1 //===- llvm/unittest/IR/OpenMPIRBuilderTest.cpp - OpenMPIRBuilder tests ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Frontend/OpenMP/OMPConstants.h"
10 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
11 #include "llvm/IR/BasicBlock.h"
12 #include "llvm/IR/DIBuilder.h"
13 #include "llvm/IR/Function.h"
14 #include "llvm/IR/InstIterator.h"
15 #include "llvm/IR/LLVMContext.h"
16 #include "llvm/IR/Module.h"
17 #include "llvm/IR/Verifier.h"
18 #include "llvm/Passes/PassBuilder.h"
19 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
20 #include "gtest/gtest.h"
21 
22 using namespace llvm;
23 using namespace omp;
24 
25 namespace {
26 
27 /// Create an instruction that uses the values in \p Values. We use "printf"
28 /// just because it is often used for this purpose in test code, but it is never
29 /// executed here.
30 static CallInst *createPrintfCall(IRBuilder<> &Builder, StringRef FormatStr,
31                                   ArrayRef<Value *> Values) {
32   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
33 
34   GlobalVariable *GV = Builder.CreateGlobalString(FormatStr, "", 0, M);
35   Constant *Zero = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
36   Constant *Indices[] = {Zero, Zero};
37   Constant *FormatStrConst =
38       ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV, Indices);
39 
40   Function *PrintfDecl = M->getFunction("printf");
41   if (!PrintfDecl) {
42     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
43     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), true);
44     PrintfDecl = Function::Create(Ty, Linkage, "printf", M);
45   }
46 
47   SmallVector<Value *, 4> Args;
48   Args.push_back(FormatStrConst);
49   Args.append(Values.begin(), Values.end());
50   return Builder.CreateCall(PrintfDecl, Args);
51 }
52 
53 /// Verify that blocks in \p RefOrder are corresponds to the depth-first visit
54 /// order the control flow of \p F.
55 ///
56 /// This is an easy way to verify the branching structure of the CFG without
57 /// checking every branch instruction individually. For the CFG of a
58 /// CanonicalLoopInfo, the Cond BB's terminating branch's first edge is entering
59 /// the body, i.e. the DFS order corresponds to the execution order with one
60 /// loop iteration.
61 static testing::AssertionResult
62 verifyDFSOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
63   ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
64   ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
65 
66   df_iterator_default_set<BasicBlock *, 16> Visited;
67   auto DFS = llvm::depth_first_ext(&F->getEntryBlock(), Visited);
68 
69   BasicBlock *Prev = nullptr;
70   for (BasicBlock *BB : DFS) {
71     if (It != E && BB == *It) {
72       Prev = *It;
73       ++It;
74     }
75   }
76 
77   if (It == E)
78     return testing::AssertionSuccess();
79   if (!Prev)
80     return testing::AssertionFailure()
81            << "Did not find " << (*It)->getName() << " in control flow";
82   return testing::AssertionFailure()
83          << "Expected " << Prev->getName() << " before " << (*It)->getName()
84          << " in control flow";
85 }
86 
87 /// Verify that blocks in \p RefOrder are in the same relative order in the
88 /// linked lists of blocks in \p F. The linked list may contain additional
89 /// blocks in-between.
90 ///
91 /// While the order in the linked list is not relevant for semantics, keeping
92 /// the order roughly in execution order makes its printout easier to read.
93 static testing::AssertionResult
94 verifyListOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
95   ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
96   ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
97 
98   BasicBlock *Prev = nullptr;
99   for (BasicBlock &BB : *F) {
100     if (It != E && &BB == *It) {
101       Prev = *It;
102       ++It;
103     }
104   }
105 
106   if (It == E)
107     return testing::AssertionSuccess();
108   if (!Prev)
109     return testing::AssertionFailure() << "Did not find " << (*It)->getName()
110                                        << " in function " << F->getName();
111   return testing::AssertionFailure()
112          << "Expected " << Prev->getName() << " before " << (*It)->getName()
113          << " in function " << F->getName();
114 }
115 
116 /// Populate Calls with call instructions calling the function with the given
117 /// FnID from the given function F.
118 static void findCalls(Function *F, omp::RuntimeFunction FnID,
119                       OpenMPIRBuilder &OMPBuilder,
120                       SmallVectorImpl<CallInst *> &Calls) {
121   Function *Fn = OMPBuilder.getOrCreateRuntimeFunctionPtr(FnID);
122   for (BasicBlock &BB : *F) {
123     for (Instruction &I : BB) {
124       auto *Call = dyn_cast<CallInst>(&I);
125       if (Call && Call->getCalledFunction() == Fn)
126         Calls.push_back(Call);
127     }
128   }
129 }
130 
131 /// Assuming \p F contains only one call to the function with the given \p FnID,
132 /// return that call.
133 static CallInst *findSingleCall(Function *F, omp::RuntimeFunction FnID,
134                                 OpenMPIRBuilder &OMPBuilder) {
135   SmallVector<CallInst *, 1> Calls;
136   findCalls(F, FnID, OMPBuilder, Calls);
137   EXPECT_EQ(1u, Calls.size());
138   if (Calls.size() != 1)
139     return nullptr;
140   return Calls.front();
141 }
142 
143 static omp::ScheduleKind getSchedKind(omp::OMPScheduleType SchedType) {
144   switch (SchedType & ~omp::OMPScheduleType::ModifierMask) {
145   case omp::OMPScheduleType::BaseDynamicChunked:
146     return omp::OMP_SCHEDULE_Dynamic;
147   case omp::OMPScheduleType::BaseGuidedChunked:
148     return omp::OMP_SCHEDULE_Guided;
149   case omp::OMPScheduleType::BaseAuto:
150     return omp::OMP_SCHEDULE_Auto;
151   case omp::OMPScheduleType::BaseRuntime:
152     return omp::OMP_SCHEDULE_Runtime;
153   default:
154     llvm_unreachable("unknown type for this test");
155   }
156 }
157 
158 class OpenMPIRBuilderTest : public testing::Test {
159 protected:
160   void SetUp() override {
161     Ctx.setOpaquePointers(false); // TODO: Update tests for opaque pointers.
162     M.reset(new Module("MyModule", Ctx));
163     FunctionType *FTy =
164         FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
165                           /*isVarArg=*/false);
166     F = Function::Create(FTy, Function::ExternalLinkage, "", M.get());
167     BB = BasicBlock::Create(Ctx, "", F);
168 
169     DIBuilder DIB(*M);
170     auto File = DIB.createFile("test.dbg", "/src", llvm::None,
171                                Optional<StringRef>("/src/test.dbg"));
172     auto CU =
173         DIB.createCompileUnit(dwarf::DW_LANG_C, File, "llvm-C", true, "", 0);
174     auto Type = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
175     auto SP = DIB.createFunction(
176         CU, "foo", "", File, 1, Type, 1, DINode::FlagZero,
177         DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
178     F->setSubprogram(SP);
179     auto Scope = DIB.createLexicalBlockFile(SP, File, 0);
180     DIB.finalize();
181     DL = DILocation::get(Ctx, 3, 7, Scope);
182   }
183 
184   void TearDown() override {
185     BB = nullptr;
186     M.reset();
187   }
188 
189   /// Create a function with a simple loop that calls printf using the logical
190   /// loop counter for use with tests that need a CanonicalLoopInfo object.
191   CanonicalLoopInfo *buildSingleLoopFunction(DebugLoc DL,
192                                              OpenMPIRBuilder &OMPBuilder,
193                                              int UseIVBits,
194                                              CallInst **Call = nullptr,
195                                              BasicBlock **BodyCode = nullptr) {
196     OMPBuilder.initialize();
197     F->setName("func");
198 
199     IRBuilder<> Builder(BB);
200     OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
201     Value *TripCount = F->getArg(0);
202 
203     Type *IVType = Type::getIntNTy(Builder.getContext(), UseIVBits);
204     Value *CastedTripCount =
205         Builder.CreateZExtOrTrunc(TripCount, IVType, "tripcount");
206 
207     auto LoopBodyGenCB = [&](OpenMPIRBuilder::InsertPointTy CodeGenIP,
208                              llvm::Value *LC) {
209       Builder.restoreIP(CodeGenIP);
210       if (BodyCode)
211         *BodyCode = Builder.GetInsertBlock();
212 
213       // Add something that consumes the induction variable to the body.
214       CallInst *CallInst = createPrintfCall(Builder, "%d\\n", {LC});
215       if (Call)
216         *Call = CallInst;
217     };
218     CanonicalLoopInfo *Loop =
219         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, CastedTripCount);
220 
221     // Finalize the function.
222     Builder.restoreIP(Loop->getAfterIP());
223     Builder.CreateRetVoid();
224 
225     return Loop;
226   }
227 
228   LLVMContext Ctx;
229   std::unique_ptr<Module> M;
230   Function *F;
231   BasicBlock *BB;
232   DebugLoc DL;
233 };
234 
235 class OpenMPIRBuilderTestWithParams
236     : public OpenMPIRBuilderTest,
237       public ::testing::WithParamInterface<omp::OMPScheduleType> {};
238 
239 class OpenMPIRBuilderTestWithIVBits
240     : public OpenMPIRBuilderTest,
241       public ::testing::WithParamInterface<int> {};
242 
243 // Returns the value stored in the given allocation. Returns null if the given
244 // value is not a result of an InstTy instruction, if no value is stored or if
245 // there is more than one store.
246 template <typename InstTy> static Value *findStoredValue(Value *AllocaValue) {
247   Instruction *Inst = dyn_cast<InstTy>(AllocaValue);
248   if (!Inst)
249     return nullptr;
250   StoreInst *Store = nullptr;
251   for (Use &U : Inst->uses()) {
252     if (auto *CandidateStore = dyn_cast<StoreInst>(U.getUser())) {
253       EXPECT_EQ(Store, nullptr);
254       Store = CandidateStore;
255     }
256   }
257   if (!Store)
258     return nullptr;
259   return Store->getValueOperand();
260 }
261 
262 // Returns the value stored in the aggregate argument of an outlined function,
263 // or nullptr if it is not found.
264 static Value *findStoredValueInAggregateAt(LLVMContext &Ctx, Value *Aggregate,
265                                            unsigned Idx) {
266   GetElementPtrInst *GEPAtIdx = nullptr;
267   // Find GEP instruction at that index.
268   for (User *Usr : Aggregate->users()) {
269     GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Usr);
270     if (!GEP)
271       continue;
272 
273     if (GEP->getOperand(2) != ConstantInt::get(Type::getInt32Ty(Ctx), Idx))
274       continue;
275 
276     EXPECT_EQ(GEPAtIdx, nullptr);
277     GEPAtIdx = GEP;
278   }
279 
280   EXPECT_NE(GEPAtIdx, nullptr);
281   EXPECT_EQ(GEPAtIdx->getNumUses(), 1U);
282 
283   // Find the value stored to the aggregate.
284   StoreInst *StoreToAgg = dyn_cast<StoreInst>(*GEPAtIdx->user_begin());
285   Value *StoredAggValue = StoreToAgg->getValueOperand();
286 
287   Value *StoredValue = nullptr;
288 
289   // Find the value stored to the value stored in the aggregate.
290   for (User *Usr : StoredAggValue->users()) {
291     StoreInst *Store = dyn_cast<StoreInst>(Usr);
292     if (!Store)
293       continue;
294 
295     if (Store->getPointerOperand() != StoredAggValue)
296       continue;
297 
298     EXPECT_EQ(StoredValue, nullptr);
299     StoredValue = Store->getValueOperand();
300   }
301 
302   return StoredValue;
303 }
304 
305 // Returns the aggregate that the value is originating from.
306 static Value *findAggregateFromValue(Value *V) {
307   // Expects a load instruction that loads from the aggregate.
308   LoadInst *Load = dyn_cast<LoadInst>(V);
309   EXPECT_NE(Load, nullptr);
310   // Find the GEP instruction used in the load instruction.
311   GetElementPtrInst *GEP =
312       dyn_cast<GetElementPtrInst>(Load->getPointerOperand());
313   EXPECT_NE(GEP, nullptr);
314   // Find the aggregate used in the GEP instruction.
315   Value *Aggregate = GEP->getPointerOperand();
316 
317   return Aggregate;
318 }
319 
320 TEST_F(OpenMPIRBuilderTest, CreateBarrier) {
321   OpenMPIRBuilder OMPBuilder(*M);
322   OMPBuilder.initialize();
323 
324   IRBuilder<> Builder(BB);
325 
326   OMPBuilder.createBarrier({IRBuilder<>::InsertPoint()}, OMPD_for);
327   EXPECT_TRUE(M->global_empty());
328   EXPECT_EQ(M->size(), 1U);
329   EXPECT_EQ(F->size(), 1U);
330   EXPECT_EQ(BB->size(), 0U);
331 
332   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
333   OMPBuilder.createBarrier(Loc, OMPD_for);
334   EXPECT_FALSE(M->global_empty());
335   EXPECT_EQ(M->size(), 3U);
336   EXPECT_EQ(F->size(), 1U);
337   EXPECT_EQ(BB->size(), 2U);
338 
339   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
340   EXPECT_NE(GTID, nullptr);
341   EXPECT_EQ(GTID->arg_size(), 1U);
342   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
343   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
344   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
345 
346   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
347   EXPECT_NE(Barrier, nullptr);
348   EXPECT_EQ(Barrier->arg_size(), 2U);
349   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_barrier");
350   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
351   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
352 
353   EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
354 
355   Builder.CreateUnreachable();
356   EXPECT_FALSE(verifyModule(*M, &errs()));
357 }
358 
359 TEST_F(OpenMPIRBuilderTest, CreateCancel) {
360   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
361   OpenMPIRBuilder OMPBuilder(*M);
362   OMPBuilder.initialize();
363 
364   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
365   new UnreachableInst(Ctx, CBB);
366   auto FiniCB = [&](InsertPointTy IP) {
367     ASSERT_NE(IP.getBlock(), nullptr);
368     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
369     BranchInst::Create(CBB, IP.getBlock());
370   };
371   OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
372 
373   IRBuilder<> Builder(BB);
374 
375   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
376   auto NewIP = OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel);
377   Builder.restoreIP(NewIP);
378   EXPECT_FALSE(M->global_empty());
379   EXPECT_EQ(M->size(), 4U);
380   EXPECT_EQ(F->size(), 4U);
381   EXPECT_EQ(BB->size(), 4U);
382 
383   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
384   EXPECT_NE(GTID, nullptr);
385   EXPECT_EQ(GTID->arg_size(), 1U);
386   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
387   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
388   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
389 
390   CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode());
391   EXPECT_NE(Cancel, nullptr);
392   EXPECT_EQ(Cancel->arg_size(), 3U);
393   EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel");
394   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory());
395   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory());
396   EXPECT_EQ(Cancel->getNumUses(), 1U);
397   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
398   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
399   EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock());
400   EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
401   CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
402   EXPECT_NE(GTID1, nullptr);
403   EXPECT_EQ(GTID1->arg_size(), 1U);
404   EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
405   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
406   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
407   CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
408   EXPECT_NE(Barrier, nullptr);
409   EXPECT_EQ(Barrier->arg_size(), 2U);
410   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
411   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
412   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
413   EXPECT_EQ(Barrier->getNumUses(), 0U);
414   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
415             1U);
416   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB);
417 
418   EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID);
419 
420   OMPBuilder.popFinalizationCB();
421 
422   Builder.CreateUnreachable();
423   EXPECT_FALSE(verifyModule(*M, &errs()));
424 }
425 
426 TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) {
427   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
428   OpenMPIRBuilder OMPBuilder(*M);
429   OMPBuilder.initialize();
430 
431   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
432   new UnreachableInst(Ctx, CBB);
433   auto FiniCB = [&](InsertPointTy IP) {
434     ASSERT_NE(IP.getBlock(), nullptr);
435     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
436     BranchInst::Create(CBB, IP.getBlock());
437   };
438   OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
439 
440   IRBuilder<> Builder(BB);
441 
442   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
443   auto NewIP = OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel);
444   Builder.restoreIP(NewIP);
445   EXPECT_FALSE(M->global_empty());
446   EXPECT_EQ(M->size(), 4U);
447   EXPECT_EQ(F->size(), 7U);
448   EXPECT_EQ(BB->size(), 1U);
449   ASSERT_TRUE(isa<BranchInst>(BB->getTerminator()));
450   ASSERT_EQ(BB->getTerminator()->getNumSuccessors(), 2U);
451   BB = BB->getTerminator()->getSuccessor(0);
452   EXPECT_EQ(BB->size(), 4U);
453 
454   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
455   EXPECT_NE(GTID, nullptr);
456   EXPECT_EQ(GTID->arg_size(), 1U);
457   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
458   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
459   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
460 
461   CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode());
462   EXPECT_NE(Cancel, nullptr);
463   EXPECT_EQ(Cancel->arg_size(), 3U);
464   EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel");
465   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory());
466   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory());
467   EXPECT_EQ(Cancel->getNumUses(), 1U);
468   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
469   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
470   EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U);
471   EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(),
472             NewIP.getBlock());
473   EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
474   CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
475   EXPECT_NE(GTID1, nullptr);
476   EXPECT_EQ(GTID1->arg_size(), 1U);
477   EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
478   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
479   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
480   CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
481   EXPECT_NE(Barrier, nullptr);
482   EXPECT_EQ(Barrier->arg_size(), 2U);
483   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
484   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
485   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
486   EXPECT_EQ(Barrier->getNumUses(), 0U);
487   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
488             1U);
489   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB);
490 
491   EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID);
492 
493   OMPBuilder.popFinalizationCB();
494 
495   Builder.CreateUnreachable();
496   EXPECT_FALSE(verifyModule(*M, &errs()));
497 }
498 
499 TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
500   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
501   OpenMPIRBuilder OMPBuilder(*M);
502   OMPBuilder.initialize();
503 
504   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
505   new UnreachableInst(Ctx, CBB);
506   auto FiniCB = [&](InsertPointTy IP) {
507     ASSERT_NE(IP.getBlock(), nullptr);
508     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
509     BranchInst::Create(CBB, IP.getBlock());
510   };
511   OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
512 
513   IRBuilder<> Builder(BB);
514 
515   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
516   auto NewIP = OMPBuilder.createBarrier(Loc, OMPD_for);
517   Builder.restoreIP(NewIP);
518   EXPECT_FALSE(M->global_empty());
519   EXPECT_EQ(M->size(), 3U);
520   EXPECT_EQ(F->size(), 4U);
521   EXPECT_EQ(BB->size(), 4U);
522 
523   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
524   EXPECT_NE(GTID, nullptr);
525   EXPECT_EQ(GTID->arg_size(), 1U);
526   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
527   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
528   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
529 
530   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
531   EXPECT_NE(Barrier, nullptr);
532   EXPECT_EQ(Barrier->arg_size(), 2U);
533   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
534   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
535   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
536   EXPECT_EQ(Barrier->getNumUses(), 1U);
537   Instruction *BarrierBBTI = Barrier->getParent()->getTerminator();
538   EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U);
539   EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock());
540   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U);
541   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
542             1U);
543   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0),
544             CBB);
545 
546   EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
547 
548   OMPBuilder.popFinalizationCB();
549 
550   Builder.CreateUnreachable();
551   EXPECT_FALSE(verifyModule(*M, &errs()));
552 }
553 
554 TEST_F(OpenMPIRBuilderTest, DbgLoc) {
555   OpenMPIRBuilder OMPBuilder(*M);
556   OMPBuilder.initialize();
557   F->setName("func");
558 
559   IRBuilder<> Builder(BB);
560 
561   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
562   OMPBuilder.createBarrier(Loc, OMPD_for);
563   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
564   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
565   EXPECT_EQ(GTID->getDebugLoc(), DL);
566   EXPECT_EQ(Barrier->getDebugLoc(), DL);
567   EXPECT_TRUE(isa<GlobalVariable>(Barrier->getOperand(0)));
568   if (!isa<GlobalVariable>(Barrier->getOperand(0)))
569     return;
570   GlobalVariable *Ident = cast<GlobalVariable>(Barrier->getOperand(0));
571   EXPECT_TRUE(Ident->hasInitializer());
572   if (!Ident->hasInitializer())
573     return;
574   Constant *Initializer = Ident->getInitializer();
575   EXPECT_TRUE(
576       isa<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()));
577   GlobalVariable *SrcStrGlob =
578       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
579   if (!SrcStrGlob)
580     return;
581   EXPECT_TRUE(isa<ConstantDataArray>(SrcStrGlob->getInitializer()));
582   ConstantDataArray *SrcSrc =
583       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
584   if (!SrcSrc)
585     return;
586   EXPECT_EQ(SrcSrc->getAsCString(), ";/src/test.dbg;foo;3;7;;");
587 }
588 
589 TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
590   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
591   OpenMPIRBuilder OMPBuilder(*M);
592   OMPBuilder.initialize();
593   F->setName("func");
594   IRBuilder<> Builder(BB);
595 
596   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
597   Builder.CreateBr(EnterBB);
598   Builder.SetInsertPoint(EnterBB);
599   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
600 
601   AllocaInst *PrivAI = nullptr;
602 
603   unsigned NumBodiesGenerated = 0;
604   unsigned NumPrivatizedVars = 0;
605   unsigned NumFinalizationPoints = 0;
606 
607   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
608     ++NumBodiesGenerated;
609 
610     Builder.restoreIP(AllocaIP);
611     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
612     Builder.CreateStore(F->arg_begin(), PrivAI);
613 
614     Builder.restoreIP(CodeGenIP);
615     Value *PrivLoad =
616         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
617     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
618     Instruction *ThenTerm, *ElseTerm;
619     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
620                                   &ThenTerm, &ElseTerm);
621   };
622 
623   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
624                     Value &Orig, Value &Inner,
625                     Value *&ReplacementValue) -> InsertPointTy {
626     ++NumPrivatizedVars;
627 
628     if (!isa<AllocaInst>(Orig)) {
629       EXPECT_EQ(&Orig, F->arg_begin());
630       ReplacementValue = &Inner;
631       return CodeGenIP;
632     }
633 
634     // Since the original value is an allocation, it has a pointer type and
635     // therefore no additional wrapping should happen.
636     EXPECT_EQ(&Orig, &Inner);
637 
638     // Trivial copy (=firstprivate).
639     Builder.restoreIP(AllocaIP);
640     Type *VTy = ReplacementValue->getType();
641     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
642     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
643     Builder.restoreIP(CodeGenIP);
644     Builder.CreateStore(V, ReplacementValue);
645     return CodeGenIP;
646   };
647 
648   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
649 
650   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
651                                     F->getEntryBlock().getFirstInsertionPt());
652   IRBuilder<>::InsertPoint AfterIP =
653       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
654                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
655   EXPECT_EQ(NumBodiesGenerated, 1U);
656   EXPECT_EQ(NumPrivatizedVars, 1U);
657   EXPECT_EQ(NumFinalizationPoints, 1U);
658 
659   Builder.restoreIP(AfterIP);
660   Builder.CreateRetVoid();
661 
662   OMPBuilder.finalize();
663 
664   EXPECT_NE(PrivAI, nullptr);
665   Function *OutlinedFn = PrivAI->getFunction();
666   EXPECT_NE(F, OutlinedFn);
667   EXPECT_FALSE(verifyModule(*M, &errs()));
668   EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
669   EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoRecurse));
670   EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
671   EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias));
672 
673   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
674   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
675 
676   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
677   EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
678   User *Usr = OutlinedFn->user_back();
679   ASSERT_TRUE(isa<ConstantExpr>(Usr));
680   CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
681   ASSERT_NE(ForkCI, nullptr);
682 
683   EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
684   EXPECT_EQ(ForkCI->arg_size(), 4U);
685   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
686   EXPECT_EQ(ForkCI->getArgOperand(1),
687             ConstantInt::get(Type::getInt32Ty(Ctx), 1U));
688   EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
689   Value *StoredValue =
690       findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0);
691   EXPECT_EQ(StoredValue, F->arg_begin());
692 }
693 
694 TEST_F(OpenMPIRBuilderTest, ParallelNested) {
695   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
696   OpenMPIRBuilder OMPBuilder(*M);
697   OMPBuilder.initialize();
698   F->setName("func");
699   IRBuilder<> Builder(BB);
700 
701   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
702   Builder.CreateBr(EnterBB);
703   Builder.SetInsertPoint(EnterBB);
704   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
705 
706   unsigned NumInnerBodiesGenerated = 0;
707   unsigned NumOuterBodiesGenerated = 0;
708   unsigned NumFinalizationPoints = 0;
709 
710   auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
711     ++NumInnerBodiesGenerated;
712   };
713 
714   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
715                     Value &Orig, Value &Inner,
716                     Value *&ReplacementValue) -> InsertPointTy {
717     // Trivial copy (=firstprivate).
718     Builder.restoreIP(AllocaIP);
719     Type *VTy = ReplacementValue->getType();
720     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
721     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
722     Builder.restoreIP(CodeGenIP);
723     Builder.CreateStore(V, ReplacementValue);
724     return CodeGenIP;
725   };
726 
727   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
728 
729   auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
730     ++NumOuterBodiesGenerated;
731     Builder.restoreIP(CodeGenIP);
732     BasicBlock *CGBB = CodeGenIP.getBlock();
733     BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint());
734     CGBB->getTerminator()->eraseFromParent();
735     ;
736 
737     IRBuilder<>::InsertPoint AfterIP = OMPBuilder.createParallel(
738         InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
739         FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
740 
741     Builder.restoreIP(AfterIP);
742     Builder.CreateBr(NewBB);
743   };
744 
745   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
746                                     F->getEntryBlock().getFirstInsertionPt());
747   IRBuilder<>::InsertPoint AfterIP =
748       OMPBuilder.createParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
749                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
750 
751   EXPECT_EQ(NumInnerBodiesGenerated, 1U);
752   EXPECT_EQ(NumOuterBodiesGenerated, 1U);
753   EXPECT_EQ(NumFinalizationPoints, 2U);
754 
755   Builder.restoreIP(AfterIP);
756   Builder.CreateRetVoid();
757 
758   OMPBuilder.finalize();
759 
760   EXPECT_EQ(M->size(), 5U);
761   for (Function &OutlinedFn : *M) {
762     if (F == &OutlinedFn || OutlinedFn.isDeclaration())
763       continue;
764     EXPECT_FALSE(verifyModule(*M, &errs()));
765     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
766     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse));
767     EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
768     EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
769 
770     EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
771     EXPECT_EQ(OutlinedFn.arg_size(), 2U);
772 
773     EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
774     User *Usr = OutlinedFn.user_back();
775     ASSERT_TRUE(isa<ConstantExpr>(Usr));
776     CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
777     ASSERT_NE(ForkCI, nullptr);
778 
779     EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
780     EXPECT_EQ(ForkCI->arg_size(), 3U);
781     EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
782     EXPECT_EQ(ForkCI->getArgOperand(1),
783               ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
784     EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
785   }
786 }
787 
788 TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
789   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
790   OpenMPIRBuilder OMPBuilder(*M);
791   OMPBuilder.initialize();
792   F->setName("func");
793   IRBuilder<> Builder(BB);
794 
795   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
796   Builder.CreateBr(EnterBB);
797   Builder.SetInsertPoint(EnterBB);
798   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
799 
800   unsigned NumInnerBodiesGenerated = 0;
801   unsigned NumOuterBodiesGenerated = 0;
802   unsigned NumFinalizationPoints = 0;
803 
804   auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
805     ++NumInnerBodiesGenerated;
806   };
807 
808   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
809                     Value &Orig, Value &Inner,
810                     Value *&ReplacementValue) -> InsertPointTy {
811     // Trivial copy (=firstprivate).
812     Builder.restoreIP(AllocaIP);
813     Type *VTy = ReplacementValue->getType();
814     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
815     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
816     Builder.restoreIP(CodeGenIP);
817     Builder.CreateStore(V, ReplacementValue);
818     return CodeGenIP;
819   };
820 
821   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
822 
823   auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
824     ++NumOuterBodiesGenerated;
825     Builder.restoreIP(CodeGenIP);
826     BasicBlock *CGBB = CodeGenIP.getBlock();
827     BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint());
828     BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt());
829     CGBB->getTerminator()->eraseFromParent();
830     ;
831     NewBB1->getTerminator()->eraseFromParent();
832     ;
833 
834     IRBuilder<>::InsertPoint AfterIP1 = OMPBuilder.createParallel(
835         InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
836         FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
837 
838     Builder.restoreIP(AfterIP1);
839     Builder.CreateBr(NewBB1);
840 
841     IRBuilder<>::InsertPoint AfterIP2 = OMPBuilder.createParallel(
842         InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, InnerBodyGenCB, PrivCB,
843         FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
844 
845     Builder.restoreIP(AfterIP2);
846     Builder.CreateBr(NewBB2);
847   };
848 
849   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
850                                     F->getEntryBlock().getFirstInsertionPt());
851   IRBuilder<>::InsertPoint AfterIP =
852       OMPBuilder.createParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
853                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
854 
855   EXPECT_EQ(NumInnerBodiesGenerated, 2U);
856   EXPECT_EQ(NumOuterBodiesGenerated, 1U);
857   EXPECT_EQ(NumFinalizationPoints, 3U);
858 
859   Builder.restoreIP(AfterIP);
860   Builder.CreateRetVoid();
861 
862   OMPBuilder.finalize();
863 
864   EXPECT_EQ(M->size(), 6U);
865   for (Function &OutlinedFn : *M) {
866     if (F == &OutlinedFn || OutlinedFn.isDeclaration())
867       continue;
868     EXPECT_FALSE(verifyModule(*M, &errs()));
869     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
870     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse));
871     EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
872     EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
873 
874     EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
875     EXPECT_EQ(OutlinedFn.arg_size(), 2U);
876 
877     unsigned NumAllocas = 0;
878     for (Instruction &I : instructions(OutlinedFn))
879       NumAllocas += isa<AllocaInst>(I);
880     EXPECT_EQ(NumAllocas, 1U);
881 
882     EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
883     User *Usr = OutlinedFn.user_back();
884     ASSERT_TRUE(isa<ConstantExpr>(Usr));
885     CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
886     ASSERT_NE(ForkCI, nullptr);
887 
888     EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
889     EXPECT_EQ(ForkCI->arg_size(), 3U);
890     EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
891     EXPECT_EQ(ForkCI->getArgOperand(1),
892               ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
893     EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
894   }
895 }
896 
897 TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
898   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
899   OpenMPIRBuilder OMPBuilder(*M);
900   OMPBuilder.initialize();
901   F->setName("func");
902   IRBuilder<> Builder(BB);
903 
904   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
905   Builder.CreateBr(EnterBB);
906   Builder.SetInsertPoint(EnterBB);
907   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
908 
909   AllocaInst *PrivAI = nullptr;
910 
911   unsigned NumBodiesGenerated = 0;
912   unsigned NumPrivatizedVars = 0;
913   unsigned NumFinalizationPoints = 0;
914 
915   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
916     ++NumBodiesGenerated;
917 
918     Builder.restoreIP(AllocaIP);
919     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
920     Builder.CreateStore(F->arg_begin(), PrivAI);
921 
922     Builder.restoreIP(CodeGenIP);
923     Value *PrivLoad =
924         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
925     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
926     Instruction *ThenTerm, *ElseTerm;
927     SplitBlockAndInsertIfThenElse(Cmp, &*Builder.GetInsertPoint(), &ThenTerm,
928                                   &ElseTerm);
929   };
930 
931   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
932                     Value &Orig, Value &Inner,
933                     Value *&ReplacementValue) -> InsertPointTy {
934     ++NumPrivatizedVars;
935 
936     if (!isa<AllocaInst>(Orig)) {
937       EXPECT_EQ(&Orig, F->arg_begin());
938       ReplacementValue = &Inner;
939       return CodeGenIP;
940     }
941 
942     // Since the original value is an allocation, it has a pointer type and
943     // therefore no additional wrapping should happen.
944     EXPECT_EQ(&Orig, &Inner);
945 
946     // Trivial copy (=firstprivate).
947     Builder.restoreIP(AllocaIP);
948     Type *VTy = ReplacementValue->getType();
949     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
950     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
951     Builder.restoreIP(CodeGenIP);
952     Builder.CreateStore(V, ReplacementValue);
953     return CodeGenIP;
954   };
955 
956   auto FiniCB = [&](InsertPointTy CodeGenIP) {
957     ++NumFinalizationPoints;
958     // No destructors.
959   };
960 
961   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
962                                     F->getEntryBlock().getFirstInsertionPt());
963   IRBuilder<>::InsertPoint AfterIP =
964       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
965                                 Builder.CreateIsNotNull(F->arg_begin()),
966                                 nullptr, OMP_PROC_BIND_default, false);
967 
968   EXPECT_EQ(NumBodiesGenerated, 1U);
969   EXPECT_EQ(NumPrivatizedVars, 1U);
970   EXPECT_EQ(NumFinalizationPoints, 1U);
971 
972   Builder.restoreIP(AfterIP);
973   Builder.CreateRetVoid();
974   OMPBuilder.finalize();
975 
976   EXPECT_NE(PrivAI, nullptr);
977   Function *OutlinedFn = PrivAI->getFunction();
978   EXPECT_NE(F, OutlinedFn);
979   EXPECT_FALSE(verifyModule(*M, &errs()));
980 
981   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
982   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
983 
984   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
985   ASSERT_EQ(OutlinedFn->getNumUses(), 2U);
986 
987   CallInst *DirectCI = nullptr;
988   CallInst *ForkCI = nullptr;
989   for (User *Usr : OutlinedFn->users()) {
990     if (isa<CallInst>(Usr)) {
991       ASSERT_EQ(DirectCI, nullptr);
992       DirectCI = cast<CallInst>(Usr);
993     } else {
994       ASSERT_TRUE(isa<ConstantExpr>(Usr));
995       ASSERT_EQ(Usr->getNumUses(), 1U);
996       ASSERT_TRUE(isa<CallInst>(Usr->user_back()));
997       ForkCI = cast<CallInst>(Usr->user_back());
998     }
999   }
1000 
1001   EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
1002   EXPECT_EQ(ForkCI->arg_size(), 4U);
1003   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
1004   EXPECT_EQ(ForkCI->getArgOperand(1),
1005             ConstantInt::get(Type::getInt32Ty(Ctx), 1));
1006   Value *StoredForkArg =
1007       findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0);
1008   EXPECT_EQ(StoredForkArg, F->arg_begin());
1009 
1010   EXPECT_EQ(DirectCI->getCalledFunction(), OutlinedFn);
1011   EXPECT_EQ(DirectCI->arg_size(), 3U);
1012   EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(0)));
1013   EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(1)));
1014   Value *StoredDirectArg =
1015       findStoredValueInAggregateAt(Ctx, DirectCI->getArgOperand(2), 0);
1016   EXPECT_EQ(StoredDirectArg, F->arg_begin());
1017 }
1018 
1019 TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
1020   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1021   OpenMPIRBuilder OMPBuilder(*M);
1022   OMPBuilder.initialize();
1023   F->setName("func");
1024   IRBuilder<> Builder(BB);
1025 
1026   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
1027   Builder.CreateBr(EnterBB);
1028   Builder.SetInsertPoint(EnterBB);
1029   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1030 
1031   unsigned NumBodiesGenerated = 0;
1032   unsigned NumPrivatizedVars = 0;
1033   unsigned NumFinalizationPoints = 0;
1034 
1035   CallInst *CheckedBarrier = nullptr;
1036   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1037     ++NumBodiesGenerated;
1038 
1039     Builder.restoreIP(CodeGenIP);
1040 
1041     // Create three barriers, two cancel barriers but only one checked.
1042     Function *CBFn, *BFn;
1043 
1044     Builder.restoreIP(
1045         OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel));
1046 
1047     CBFn = M->getFunction("__kmpc_cancel_barrier");
1048     BFn = M->getFunction("__kmpc_barrier");
1049     ASSERT_NE(CBFn, nullptr);
1050     ASSERT_EQ(BFn, nullptr);
1051     ASSERT_EQ(CBFn->getNumUses(), 1U);
1052     ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
1053     ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U);
1054     CheckedBarrier = cast<CallInst>(CBFn->user_back());
1055 
1056     Builder.restoreIP(
1057         OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true));
1058     CBFn = M->getFunction("__kmpc_cancel_barrier");
1059     BFn = M->getFunction("__kmpc_barrier");
1060     ASSERT_NE(CBFn, nullptr);
1061     ASSERT_NE(BFn, nullptr);
1062     ASSERT_EQ(CBFn->getNumUses(), 1U);
1063     ASSERT_EQ(BFn->getNumUses(), 1U);
1064     ASSERT_TRUE(isa<CallInst>(BFn->user_back()));
1065     ASSERT_EQ(BFn->user_back()->getNumUses(), 0U);
1066 
1067     Builder.restoreIP(OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel,
1068                                                false, false));
1069     ASSERT_EQ(CBFn->getNumUses(), 2U);
1070     ASSERT_EQ(BFn->getNumUses(), 1U);
1071     ASSERT_TRUE(CBFn->user_back() != CheckedBarrier);
1072     ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
1073     ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U);
1074   };
1075 
1076   auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value &,
1077                     Value *&) -> InsertPointTy {
1078     ++NumPrivatizedVars;
1079     llvm_unreachable("No privatization callback call expected!");
1080   };
1081 
1082   FunctionType *FakeDestructorTy =
1083       FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
1084                         /*isVarArg=*/false);
1085   auto *FakeDestructor = Function::Create(
1086       FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get());
1087 
1088   auto FiniCB = [&](InsertPointTy IP) {
1089     ++NumFinalizationPoints;
1090     Builder.restoreIP(IP);
1091     Builder.CreateCall(FakeDestructor,
1092                        {Builder.getInt32(NumFinalizationPoints)});
1093   };
1094 
1095   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1096                                     F->getEntryBlock().getFirstInsertionPt());
1097   IRBuilder<>::InsertPoint AfterIP =
1098       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1099                                 Builder.CreateIsNotNull(F->arg_begin()),
1100                                 nullptr, OMP_PROC_BIND_default, true);
1101 
1102   EXPECT_EQ(NumBodiesGenerated, 1U);
1103   EXPECT_EQ(NumPrivatizedVars, 0U);
1104   EXPECT_EQ(NumFinalizationPoints, 2U);
1105   EXPECT_EQ(FakeDestructor->getNumUses(), 2U);
1106 
1107   Builder.restoreIP(AfterIP);
1108   Builder.CreateRetVoid();
1109   OMPBuilder.finalize();
1110 
1111   EXPECT_FALSE(verifyModule(*M, &errs()));
1112 
1113   BasicBlock *ExitBB = nullptr;
1114   for (const User *Usr : FakeDestructor->users()) {
1115     const CallInst *CI = dyn_cast<CallInst>(Usr);
1116     ASSERT_EQ(CI->getCalledFunction(), FakeDestructor);
1117     ASSERT_TRUE(isa<BranchInst>(CI->getNextNode()));
1118     ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U);
1119     if (ExitBB)
1120       ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB);
1121     else
1122       ExitBB = CI->getNextNode()->getSuccessor(0);
1123     ASSERT_EQ(ExitBB->size(), 1U);
1124     if (!isa<ReturnInst>(ExitBB->front())) {
1125       ASSERT_TRUE(isa<BranchInst>(ExitBB->front()));
1126       ASSERT_EQ(cast<BranchInst>(ExitBB->front()).getNumSuccessors(), 1U);
1127       ASSERT_TRUE(isa<ReturnInst>(
1128           cast<BranchInst>(ExitBB->front()).getSuccessor(0)->front()));
1129     }
1130   }
1131 }
1132 
1133 TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
1134   OpenMPIRBuilder OMPBuilder(*M);
1135   OMPBuilder.initialize();
1136   F->setName("func");
1137   IRBuilder<> Builder(BB);
1138   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1139   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1140 
1141   Type *I32Ty = Type::getInt32Ty(M->getContext());
1142   Type *I32PtrTy = Type::getInt32PtrTy(M->getContext());
1143   Type *StructTy = StructType::get(I32Ty, I32PtrTy);
1144   Type *StructPtrTy = StructTy->getPointerTo();
1145   StructType *ArgStructTy =
1146       StructType::get(I32PtrTy, StructPtrTy, I32PtrTy, StructPtrTy);
1147   Type *VoidTy = Type::getVoidTy(M->getContext());
1148   FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty);
1149   FunctionCallee TakeI32Func =
1150       M->getOrInsertFunction("take_i32", VoidTy, I32Ty);
1151   FunctionCallee RetI32PtrFunc = M->getOrInsertFunction("ret_i32ptr", I32PtrTy);
1152   FunctionCallee TakeI32PtrFunc =
1153       M->getOrInsertFunction("take_i32ptr", VoidTy, I32PtrTy);
1154   FunctionCallee RetStructFunc = M->getOrInsertFunction("ret_struct", StructTy);
1155   FunctionCallee TakeStructFunc =
1156       M->getOrInsertFunction("take_struct", VoidTy, StructTy);
1157   FunctionCallee RetStructPtrFunc =
1158       M->getOrInsertFunction("ret_structptr", StructPtrTy);
1159   FunctionCallee TakeStructPtrFunc =
1160       M->getOrInsertFunction("take_structPtr", VoidTy, StructPtrTy);
1161   Value *I32Val = Builder.CreateCall(RetI32Func);
1162   Value *I32PtrVal = Builder.CreateCall(RetI32PtrFunc);
1163   Value *StructVal = Builder.CreateCall(RetStructFunc);
1164   Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc);
1165 
1166   Instruction *Internal;
1167   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1168     IRBuilder<>::InsertPointGuard Guard(Builder);
1169     Builder.restoreIP(CodeGenIP);
1170     Internal = Builder.CreateCall(TakeI32Func, I32Val);
1171     Builder.CreateCall(TakeI32PtrFunc, I32PtrVal);
1172     Builder.CreateCall(TakeStructFunc, StructVal);
1173     Builder.CreateCall(TakeStructPtrFunc, StructPtrVal);
1174   };
1175   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
1176                     Value &Inner, Value *&ReplacementValue) {
1177     ReplacementValue = &Inner;
1178     return CodeGenIP;
1179   };
1180   auto FiniCB = [](InsertPointTy) {};
1181 
1182   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1183                                     F->getEntryBlock().getFirstInsertionPt());
1184   IRBuilder<>::InsertPoint AfterIP =
1185       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1186                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
1187   Builder.restoreIP(AfterIP);
1188   Builder.CreateRetVoid();
1189 
1190   OMPBuilder.finalize();
1191 
1192   EXPECT_FALSE(verifyModule(*M, &errs()));
1193   Function *OutlinedFn = Internal->getFunction();
1194 
1195   Type *Arg2Type = OutlinedFn->getArg(2)->getType();
1196   EXPECT_TRUE(Arg2Type->isPointerTy());
1197   EXPECT_TRUE(
1198       cast<PointerType>(Arg2Type)->isOpaqueOrPointeeTypeMatches(ArgStructTy));
1199 }
1200 
1201 TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
1202   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1203   OpenMPIRBuilder OMPBuilder(*M);
1204   OMPBuilder.initialize();
1205   IRBuilder<> Builder(BB);
1206   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1207   Value *TripCount = F->getArg(0);
1208 
1209   unsigned NumBodiesGenerated = 0;
1210   auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
1211     NumBodiesGenerated += 1;
1212 
1213     Builder.restoreIP(CodeGenIP);
1214 
1215     Value *Cmp = Builder.CreateICmpEQ(LC, TripCount);
1216     Instruction *ThenTerm, *ElseTerm;
1217     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
1218                                   &ThenTerm, &ElseTerm);
1219   };
1220 
1221   CanonicalLoopInfo *Loop =
1222       OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount);
1223 
1224   Builder.restoreIP(Loop->getAfterIP());
1225   ReturnInst *RetInst = Builder.CreateRetVoid();
1226   OMPBuilder.finalize();
1227 
1228   Loop->assertOK();
1229   EXPECT_FALSE(verifyModule(*M, &errs()));
1230 
1231   EXPECT_EQ(NumBodiesGenerated, 1U);
1232 
1233   // Verify control flow structure (in addition to Loop->assertOK()).
1234   EXPECT_EQ(Loop->getPreheader()->getSinglePredecessor(), &F->getEntryBlock());
1235   EXPECT_EQ(Loop->getAfter(), Builder.GetInsertBlock());
1236 
1237   Instruction *IndVar = Loop->getIndVar();
1238   EXPECT_TRUE(isa<PHINode>(IndVar));
1239   EXPECT_EQ(IndVar->getType(), TripCount->getType());
1240   EXPECT_EQ(IndVar->getParent(), Loop->getHeader());
1241 
1242   EXPECT_EQ(Loop->getTripCount(), TripCount);
1243 
1244   BasicBlock *Body = Loop->getBody();
1245   Instruction *CmpInst = &Body->getInstList().front();
1246   EXPECT_TRUE(isa<ICmpInst>(CmpInst));
1247   EXPECT_EQ(CmpInst->getOperand(0), IndVar);
1248 
1249   BasicBlock *LatchPred = Loop->getLatch()->getSinglePredecessor();
1250   EXPECT_TRUE(llvm::all_of(successors(Body), [=](BasicBlock *SuccBB) {
1251     return SuccBB->getSingleSuccessor() == LatchPred;
1252   }));
1253 
1254   EXPECT_EQ(&Loop->getAfter()->front(), RetInst);
1255 }
1256 
1257 TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) {
1258   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1259   OpenMPIRBuilder OMPBuilder(*M);
1260   OMPBuilder.initialize();
1261   IRBuilder<> Builder(BB);
1262 
1263   // Check the trip count is computed correctly. We generate the canonical loop
1264   // but rely on the IRBuilder's constant folder to compute the final result
1265   // since all inputs are constant. To verify overflow situations, limit the
1266   // trip count / loop counter widths to 16 bits.
1267   auto EvalTripCount = [&](int64_t Start, int64_t Stop, int64_t Step,
1268                            bool IsSigned, bool InclusiveStop) -> int64_t {
1269     OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1270     Type *LCTy = Type::getInt16Ty(Ctx);
1271     Value *StartVal = ConstantInt::get(LCTy, Start);
1272     Value *StopVal = ConstantInt::get(LCTy, Stop);
1273     Value *StepVal = ConstantInt::get(LCTy, Step);
1274     auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {};
1275     CanonicalLoopInfo *Loop =
1276         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
1277                                        StepVal, IsSigned, InclusiveStop);
1278     Loop->assertOK();
1279     Builder.restoreIP(Loop->getAfterIP());
1280     Value *TripCount = Loop->getTripCount();
1281     return cast<ConstantInt>(TripCount)->getValue().getZExtValue();
1282   };
1283 
1284   EXPECT_EQ(EvalTripCount(0, 0, 1, false, false), 0);
1285   EXPECT_EQ(EvalTripCount(0, 1, 2, false, false), 1);
1286   EXPECT_EQ(EvalTripCount(0, 42, 1, false, false), 42);
1287   EXPECT_EQ(EvalTripCount(0, 42, 2, false, false), 21);
1288   EXPECT_EQ(EvalTripCount(21, 42, 1, false, false), 21);
1289   EXPECT_EQ(EvalTripCount(0, 5, 5, false, false), 1);
1290   EXPECT_EQ(EvalTripCount(0, 9, 5, false, false), 2);
1291   EXPECT_EQ(EvalTripCount(0, 11, 5, false, false), 3);
1292   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 1, false, false), 0xFFFF);
1293   EXPECT_EQ(EvalTripCount(0xFFFF, 0, 1, false, false), 0);
1294   EXPECT_EQ(EvalTripCount(0xFFFE, 0xFFFF, 1, false, false), 1);
1295   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0x100, false, false), 0x100);
1296   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFF, false, false), 1);
1297 
1298   EXPECT_EQ(EvalTripCount(0, 6, 5, false, false), 2);
1299   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFE, false, false), 2);
1300   EXPECT_EQ(EvalTripCount(0, 0, 1, false, true), 1);
1301   EXPECT_EQ(EvalTripCount(0, 0, 0xFFFF, false, true), 1);
1302   EXPECT_EQ(EvalTripCount(0, 0xFFFE, 1, false, true), 0xFFFF);
1303   EXPECT_EQ(EvalTripCount(0, 0xFFFE, 2, false, true), 0x8000);
1304 
1305   EXPECT_EQ(EvalTripCount(0, 0, -1, true, false), 0);
1306   EXPECT_EQ(EvalTripCount(0, 1, -1, true, true), 0);
1307   EXPECT_EQ(EvalTripCount(20, 5, -5, true, false), 3);
1308   EXPECT_EQ(EvalTripCount(20, 5, -5, true, true), 4);
1309   EXPECT_EQ(EvalTripCount(-4, -2, 2, true, false), 1);
1310   EXPECT_EQ(EvalTripCount(-4, -3, 2, true, false), 1);
1311   EXPECT_EQ(EvalTripCount(-4, -2, 2, true, true), 2);
1312 
1313   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, false), 0x8000);
1314   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, true), 0x8001);
1315   EXPECT_EQ(EvalTripCount(INT16_MIN, 0x7FFF, 1, true, false), 0xFFFF);
1316   EXPECT_EQ(EvalTripCount(INT16_MIN + 1, 0x7FFF, 1, true, true), 0xFFFF);
1317   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 0x7FFF, true, false), 2);
1318   EXPECT_EQ(EvalTripCount(0x7FFF, 0, -1, true, false), 0x7FFF);
1319   EXPECT_EQ(EvalTripCount(0, INT16_MIN, -1, true, false), 0x8000);
1320   EXPECT_EQ(EvalTripCount(0, INT16_MIN, -16, true, false), 0x800);
1321   EXPECT_EQ(EvalTripCount(0x7FFF, INT16_MIN, -1, true, false), 0xFFFF);
1322   EXPECT_EQ(EvalTripCount(0x7FFF, 1, INT16_MIN, true, false), 1);
1323   EXPECT_EQ(EvalTripCount(0x7FFF, -1, INT16_MIN, true, true), 2);
1324 
1325   // Finalize the function and verify it.
1326   Builder.CreateRetVoid();
1327   OMPBuilder.finalize();
1328   EXPECT_FALSE(verifyModule(*M, &errs()));
1329 }
1330 
1331 TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) {
1332   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1333   OpenMPIRBuilder OMPBuilder(*M);
1334   OMPBuilder.initialize();
1335   F->setName("func");
1336 
1337   IRBuilder<> Builder(BB);
1338 
1339   Type *LCTy = F->getArg(0)->getType();
1340   Constant *One = ConstantInt::get(LCTy, 1);
1341   Constant *Two = ConstantInt::get(LCTy, 2);
1342   Value *OuterTripCount =
1343       Builder.CreateAdd(F->getArg(0), Two, "tripcount.outer");
1344   Value *InnerTripCount =
1345       Builder.CreateAdd(F->getArg(0), One, "tripcount.inner");
1346 
1347   // Fix an insertion point for ComputeIP.
1348   BasicBlock *LoopNextEnter =
1349       BasicBlock::Create(M->getContext(), "loopnest.enter", F,
1350                          Builder.GetInsertBlock()->getNextNode());
1351   BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
1352   InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
1353 
1354   Builder.SetInsertPoint(LoopNextEnter);
1355   OpenMPIRBuilder::LocationDescription OuterLoc(Builder.saveIP(), DL);
1356 
1357   CanonicalLoopInfo *InnerLoop = nullptr;
1358   CallInst *InbetweenLead = nullptr;
1359   CallInst *InbetweenTrail = nullptr;
1360   CallInst *Call = nullptr;
1361   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, Value *OuterLC) {
1362     Builder.restoreIP(OuterCodeGenIP);
1363     InbetweenLead =
1364         createPrintfCall(Builder, "In-between lead i=%d\\n", {OuterLC});
1365 
1366     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1367                                   Value *InnerLC) {
1368       Builder.restoreIP(InnerCodeGenIP);
1369       Call = createPrintfCall(Builder, "body i=%d j=%d\\n", {OuterLC, InnerLC});
1370     };
1371     InnerLoop = OMPBuilder.createCanonicalLoop(
1372         Builder.saveIP(), InnerLoopBodyGenCB, InnerTripCount, "inner");
1373 
1374     Builder.restoreIP(InnerLoop->getAfterIP());
1375     InbetweenTrail =
1376         createPrintfCall(Builder, "In-between trail i=%d\\n", {OuterLC});
1377   };
1378   CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
1379       OuterLoc, OuterLoopBodyGenCB, OuterTripCount, "outer");
1380 
1381   // Finish the function.
1382   Builder.restoreIP(OuterLoop->getAfterIP());
1383   Builder.CreateRetVoid();
1384 
1385   CanonicalLoopInfo *Collapsed =
1386       OMPBuilder.collapseLoops(DL, {OuterLoop, InnerLoop}, ComputeIP);
1387 
1388   OMPBuilder.finalize();
1389   EXPECT_FALSE(verifyModule(*M, &errs()));
1390 
1391   // Verify control flow and BB order.
1392   BasicBlock *RefOrder[] = {
1393       Collapsed->getPreheader(),   Collapsed->getHeader(),
1394       Collapsed->getCond(),        Collapsed->getBody(),
1395       InbetweenLead->getParent(),  Call->getParent(),
1396       InbetweenTrail->getParent(), Collapsed->getLatch(),
1397       Collapsed->getExit(),        Collapsed->getAfter(),
1398   };
1399   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1400   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1401 
1402   // Verify the total trip count.
1403   auto *TripCount = cast<MulOperator>(Collapsed->getTripCount());
1404   EXPECT_EQ(TripCount->getOperand(0), OuterTripCount);
1405   EXPECT_EQ(TripCount->getOperand(1), InnerTripCount);
1406 
1407   // Verify the changed indvar.
1408   auto *OuterIV = cast<BinaryOperator>(Call->getOperand(1));
1409   EXPECT_EQ(OuterIV->getOpcode(), Instruction::UDiv);
1410   EXPECT_EQ(OuterIV->getParent(), Collapsed->getBody());
1411   EXPECT_EQ(OuterIV->getOperand(1), InnerTripCount);
1412   EXPECT_EQ(OuterIV->getOperand(0), Collapsed->getIndVar());
1413 
1414   auto *InnerIV = cast<BinaryOperator>(Call->getOperand(2));
1415   EXPECT_EQ(InnerIV->getOpcode(), Instruction::URem);
1416   EXPECT_EQ(InnerIV->getParent(), Collapsed->getBody());
1417   EXPECT_EQ(InnerIV->getOperand(0), Collapsed->getIndVar());
1418   EXPECT_EQ(InnerIV->getOperand(1), InnerTripCount);
1419 
1420   EXPECT_EQ(InbetweenLead->getOperand(1), OuterIV);
1421   EXPECT_EQ(InbetweenTrail->getOperand(1), OuterIV);
1422 }
1423 
1424 TEST_F(OpenMPIRBuilderTest, TileSingleLoop) {
1425   OpenMPIRBuilder OMPBuilder(*M);
1426   CallInst *Call;
1427   BasicBlock *BodyCode;
1428   CanonicalLoopInfo *Loop =
1429       buildSingleLoopFunction(DL, OMPBuilder, 32, &Call, &BodyCode);
1430 
1431   Instruction *OrigIndVar = Loop->getIndVar();
1432   EXPECT_EQ(Call->getOperand(1), OrigIndVar);
1433 
1434   // Tile the loop.
1435   Constant *TileSize = ConstantInt::get(Loop->getIndVarType(), APInt(32, 7));
1436   std::vector<CanonicalLoopInfo *> GenLoops =
1437       OMPBuilder.tileLoops(DL, {Loop}, {TileSize});
1438 
1439   OMPBuilder.finalize();
1440   EXPECT_FALSE(verifyModule(*M, &errs()));
1441 
1442   EXPECT_EQ(GenLoops.size(), 2u);
1443   CanonicalLoopInfo *Floor = GenLoops[0];
1444   CanonicalLoopInfo *Tile = GenLoops[1];
1445 
1446   BasicBlock *RefOrder[] = {
1447       Floor->getPreheader(), Floor->getHeader(),   Floor->getCond(),
1448       Floor->getBody(),      Tile->getPreheader(), Tile->getHeader(),
1449       Tile->getCond(),       Tile->getBody(),      BodyCode,
1450       Tile->getLatch(),      Tile->getExit(),      Tile->getAfter(),
1451       Floor->getLatch(),     Floor->getExit(),     Floor->getAfter(),
1452   };
1453   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1454   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1455 
1456   // Check the induction variable.
1457   EXPECT_EQ(Call->getParent(), BodyCode);
1458   auto *Shift = cast<AddOperator>(Call->getOperand(1));
1459   EXPECT_EQ(cast<Instruction>(Shift)->getParent(), Tile->getBody());
1460   EXPECT_EQ(Shift->getOperand(1), Tile->getIndVar());
1461   auto *Scale = cast<MulOperator>(Shift->getOperand(0));
1462   EXPECT_EQ(cast<Instruction>(Scale)->getParent(), Tile->getBody());
1463   EXPECT_EQ(Scale->getOperand(0), TileSize);
1464   EXPECT_EQ(Scale->getOperand(1), Floor->getIndVar());
1465 }
1466 
1467 TEST_F(OpenMPIRBuilderTest, TileNestedLoops) {
1468   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1469   OpenMPIRBuilder OMPBuilder(*M);
1470   OMPBuilder.initialize();
1471   F->setName("func");
1472 
1473   IRBuilder<> Builder(BB);
1474   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1475   Value *TripCount = F->getArg(0);
1476   Type *LCTy = TripCount->getType();
1477 
1478   BasicBlock *BodyCode = nullptr;
1479   CanonicalLoopInfo *InnerLoop = nullptr;
1480   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
1481                                 llvm::Value *OuterLC) {
1482     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1483                                   llvm::Value *InnerLC) {
1484       Builder.restoreIP(InnerCodeGenIP);
1485       BodyCode = Builder.GetInsertBlock();
1486 
1487       // Add something that consumes the induction variables to the body.
1488       createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
1489     };
1490     InnerLoop = OMPBuilder.createCanonicalLoop(
1491         OuterCodeGenIP, InnerLoopBodyGenCB, TripCount, "inner");
1492   };
1493   CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
1494       Loc, OuterLoopBodyGenCB, TripCount, "outer");
1495 
1496   // Finalize the function.
1497   Builder.restoreIP(OuterLoop->getAfterIP());
1498   Builder.CreateRetVoid();
1499 
1500   // Tile to loop nest.
1501   Constant *OuterTileSize = ConstantInt::get(LCTy, APInt(32, 11));
1502   Constant *InnerTileSize = ConstantInt::get(LCTy, APInt(32, 7));
1503   std::vector<CanonicalLoopInfo *> GenLoops = OMPBuilder.tileLoops(
1504       DL, {OuterLoop, InnerLoop}, {OuterTileSize, InnerTileSize});
1505 
1506   OMPBuilder.finalize();
1507   EXPECT_FALSE(verifyModule(*M, &errs()));
1508 
1509   EXPECT_EQ(GenLoops.size(), 4u);
1510   CanonicalLoopInfo *Floor1 = GenLoops[0];
1511   CanonicalLoopInfo *Floor2 = GenLoops[1];
1512   CanonicalLoopInfo *Tile1 = GenLoops[2];
1513   CanonicalLoopInfo *Tile2 = GenLoops[3];
1514 
1515   BasicBlock *RefOrder[] = {
1516       Floor1->getPreheader(),
1517       Floor1->getHeader(),
1518       Floor1->getCond(),
1519       Floor1->getBody(),
1520       Floor2->getPreheader(),
1521       Floor2->getHeader(),
1522       Floor2->getCond(),
1523       Floor2->getBody(),
1524       Tile1->getPreheader(),
1525       Tile1->getHeader(),
1526       Tile1->getCond(),
1527       Tile1->getBody(),
1528       Tile2->getPreheader(),
1529       Tile2->getHeader(),
1530       Tile2->getCond(),
1531       Tile2->getBody(),
1532       BodyCode,
1533       Tile2->getLatch(),
1534       Tile2->getExit(),
1535       Tile2->getAfter(),
1536       Tile1->getLatch(),
1537       Tile1->getExit(),
1538       Tile1->getAfter(),
1539       Floor2->getLatch(),
1540       Floor2->getExit(),
1541       Floor2->getAfter(),
1542       Floor1->getLatch(),
1543       Floor1->getExit(),
1544       Floor1->getAfter(),
1545   };
1546   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1547   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1548 }
1549 
1550 TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) {
1551   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1552   OpenMPIRBuilder OMPBuilder(*M);
1553   OMPBuilder.initialize();
1554   F->setName("func");
1555 
1556   IRBuilder<> Builder(BB);
1557   Value *TripCount = F->getArg(0);
1558   Type *LCTy = TripCount->getType();
1559 
1560   Value *OuterStartVal = ConstantInt::get(LCTy, 2);
1561   Value *OuterStopVal = TripCount;
1562   Value *OuterStep = ConstantInt::get(LCTy, 5);
1563   Value *InnerStartVal = ConstantInt::get(LCTy, 13);
1564   Value *InnerStopVal = TripCount;
1565   Value *InnerStep = ConstantInt::get(LCTy, 3);
1566 
1567   // Fix an insertion point for ComputeIP.
1568   BasicBlock *LoopNextEnter =
1569       BasicBlock::Create(M->getContext(), "loopnest.enter", F,
1570                          Builder.GetInsertBlock()->getNextNode());
1571   BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
1572   InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
1573 
1574   InsertPointTy LoopIP{LoopNextEnter, LoopNextEnter->begin()};
1575   OpenMPIRBuilder::LocationDescription Loc({LoopIP, DL});
1576 
1577   BasicBlock *BodyCode = nullptr;
1578   CanonicalLoopInfo *InnerLoop = nullptr;
1579   CallInst *Call = nullptr;
1580   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
1581                                 llvm::Value *OuterLC) {
1582     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1583                                   llvm::Value *InnerLC) {
1584       Builder.restoreIP(InnerCodeGenIP);
1585       BodyCode = Builder.GetInsertBlock();
1586 
1587       // Add something that consumes the induction variable to the body.
1588       Call = createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
1589     };
1590     InnerLoop = OMPBuilder.createCanonicalLoop(
1591         OuterCodeGenIP, InnerLoopBodyGenCB, InnerStartVal, InnerStopVal,
1592         InnerStep, false, false, ComputeIP, "inner");
1593   };
1594   CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
1595       Loc, OuterLoopBodyGenCB, OuterStartVal, OuterStopVal, OuterStep, false,
1596       false, ComputeIP, "outer");
1597 
1598   // Finalize the function
1599   Builder.restoreIP(OuterLoop->getAfterIP());
1600   Builder.CreateRetVoid();
1601 
1602   // Tile the loop nest.
1603   Constant *TileSize0 = ConstantInt::get(LCTy, APInt(32, 11));
1604   Constant *TileSize1 = ConstantInt::get(LCTy, APInt(32, 7));
1605   std::vector<CanonicalLoopInfo *> GenLoops =
1606       OMPBuilder.tileLoops(DL, {OuterLoop, InnerLoop}, {TileSize0, TileSize1});
1607 
1608   OMPBuilder.finalize();
1609   EXPECT_FALSE(verifyModule(*M, &errs()));
1610 
1611   EXPECT_EQ(GenLoops.size(), 4u);
1612   CanonicalLoopInfo *Floor0 = GenLoops[0];
1613   CanonicalLoopInfo *Floor1 = GenLoops[1];
1614   CanonicalLoopInfo *Tile0 = GenLoops[2];
1615   CanonicalLoopInfo *Tile1 = GenLoops[3];
1616 
1617   BasicBlock *RefOrder[] = {
1618       Floor0->getPreheader(),
1619       Floor0->getHeader(),
1620       Floor0->getCond(),
1621       Floor0->getBody(),
1622       Floor1->getPreheader(),
1623       Floor1->getHeader(),
1624       Floor1->getCond(),
1625       Floor1->getBody(),
1626       Tile0->getPreheader(),
1627       Tile0->getHeader(),
1628       Tile0->getCond(),
1629       Tile0->getBody(),
1630       Tile1->getPreheader(),
1631       Tile1->getHeader(),
1632       Tile1->getCond(),
1633       Tile1->getBody(),
1634       BodyCode,
1635       Tile1->getLatch(),
1636       Tile1->getExit(),
1637       Tile1->getAfter(),
1638       Tile0->getLatch(),
1639       Tile0->getExit(),
1640       Tile0->getAfter(),
1641       Floor1->getLatch(),
1642       Floor1->getExit(),
1643       Floor1->getAfter(),
1644       Floor0->getLatch(),
1645       Floor0->getExit(),
1646       Floor0->getAfter(),
1647   };
1648   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1649   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1650 
1651   EXPECT_EQ(Call->getParent(), BodyCode);
1652 
1653   auto *RangeShift0 = cast<AddOperator>(Call->getOperand(1));
1654   EXPECT_EQ(RangeShift0->getOperand(1), OuterStartVal);
1655   auto *RangeScale0 = cast<MulOperator>(RangeShift0->getOperand(0));
1656   EXPECT_EQ(RangeScale0->getOperand(1), OuterStep);
1657   auto *TileShift0 = cast<AddOperator>(RangeScale0->getOperand(0));
1658   EXPECT_EQ(cast<Instruction>(TileShift0)->getParent(), Tile1->getBody());
1659   EXPECT_EQ(TileShift0->getOperand(1), Tile0->getIndVar());
1660   auto *TileScale0 = cast<MulOperator>(TileShift0->getOperand(0));
1661   EXPECT_EQ(cast<Instruction>(TileScale0)->getParent(), Tile1->getBody());
1662   EXPECT_EQ(TileScale0->getOperand(0), TileSize0);
1663   EXPECT_EQ(TileScale0->getOperand(1), Floor0->getIndVar());
1664 
1665   auto *RangeShift1 = cast<AddOperator>(Call->getOperand(2));
1666   EXPECT_EQ(cast<Instruction>(RangeShift1)->getParent(), BodyCode);
1667   EXPECT_EQ(RangeShift1->getOperand(1), InnerStartVal);
1668   auto *RangeScale1 = cast<MulOperator>(RangeShift1->getOperand(0));
1669   EXPECT_EQ(cast<Instruction>(RangeScale1)->getParent(), BodyCode);
1670   EXPECT_EQ(RangeScale1->getOperand(1), InnerStep);
1671   auto *TileShift1 = cast<AddOperator>(RangeScale1->getOperand(0));
1672   EXPECT_EQ(cast<Instruction>(TileShift1)->getParent(), Tile1->getBody());
1673   EXPECT_EQ(TileShift1->getOperand(1), Tile1->getIndVar());
1674   auto *TileScale1 = cast<MulOperator>(TileShift1->getOperand(0));
1675   EXPECT_EQ(cast<Instruction>(TileScale1)->getParent(), Tile1->getBody());
1676   EXPECT_EQ(TileScale1->getOperand(0), TileSize1);
1677   EXPECT_EQ(TileScale1->getOperand(1), Floor1->getIndVar());
1678 }
1679 
1680 TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
1681   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1682   OpenMPIRBuilder OMPBuilder(*M);
1683   OMPBuilder.initialize();
1684   IRBuilder<> Builder(BB);
1685 
1686   // Create a loop, tile it, and extract its trip count. All input values are
1687   // constant and IRBuilder evaluates all-constant arithmetic inplace, such that
1688   // the floor trip count itself will be a ConstantInt. Unfortunately we cannot
1689   // do the same for the tile loop.
1690   auto GetFloorCount = [&](int64_t Start, int64_t Stop, int64_t Step,
1691                            bool IsSigned, bool InclusiveStop,
1692                            int64_t TileSize) -> uint64_t {
1693     OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
1694     Type *LCTy = Type::getInt16Ty(Ctx);
1695     Value *StartVal = ConstantInt::get(LCTy, Start);
1696     Value *StopVal = ConstantInt::get(LCTy, Stop);
1697     Value *StepVal = ConstantInt::get(LCTy, Step);
1698 
1699     // Generate a loop.
1700     auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {};
1701     CanonicalLoopInfo *Loop =
1702         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
1703                                        StepVal, IsSigned, InclusiveStop);
1704     InsertPointTy AfterIP = Loop->getAfterIP();
1705 
1706     // Tile the loop.
1707     Value *TileSizeVal = ConstantInt::get(LCTy, TileSize);
1708     std::vector<CanonicalLoopInfo *> GenLoops =
1709         OMPBuilder.tileLoops(Loc.DL, {Loop}, {TileSizeVal});
1710 
1711     // Set the insertion pointer to after loop, where the next loop will be
1712     // emitted.
1713     Builder.restoreIP(AfterIP);
1714 
1715     // Extract the trip count.
1716     CanonicalLoopInfo *FloorLoop = GenLoops[0];
1717     Value *FloorTripCount = FloorLoop->getTripCount();
1718     return cast<ConstantInt>(FloorTripCount)->getValue().getZExtValue();
1719   };
1720 
1721   // Empty iteration domain.
1722   EXPECT_EQ(GetFloorCount(0, 0, 1, false, false, 7), 0u);
1723   EXPECT_EQ(GetFloorCount(0, -1, 1, false, true, 7), 0u);
1724   EXPECT_EQ(GetFloorCount(-1, -1, -1, true, false, 7), 0u);
1725   EXPECT_EQ(GetFloorCount(-1, 0, -1, true, true, 7), 0u);
1726   EXPECT_EQ(GetFloorCount(-1, -1, 3, true, false, 7), 0u);
1727 
1728   // Only complete tiles.
1729   EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u);
1730   EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u);
1731   EXPECT_EQ(GetFloorCount(1, 15, 1, false, false, 7), 2u);
1732   EXPECT_EQ(GetFloorCount(0, -14, -1, true, false, 7), 2u);
1733   EXPECT_EQ(GetFloorCount(-1, -14, -1, true, true, 7), 2u);
1734   EXPECT_EQ(GetFloorCount(0, 3 * 7 * 2, 3, false, false, 7), 2u);
1735 
1736   // Only a partial tile.
1737   EXPECT_EQ(GetFloorCount(0, 1, 1, false, false, 7), 1u);
1738   EXPECT_EQ(GetFloorCount(0, 6, 1, false, false, 7), 1u);
1739   EXPECT_EQ(GetFloorCount(-1, 1, 3, true, false, 7), 1u);
1740   EXPECT_EQ(GetFloorCount(-1, -2, -1, true, false, 7), 1u);
1741   EXPECT_EQ(GetFloorCount(0, 2, 3, false, false, 7), 1u);
1742 
1743   // Complete and partial tiles.
1744   EXPECT_EQ(GetFloorCount(0, 13, 1, false, false, 7), 2u);
1745   EXPECT_EQ(GetFloorCount(0, 15, 1, false, false, 7), 3u);
1746   EXPECT_EQ(GetFloorCount(-1, -14, -1, true, false, 7), 2u);
1747   EXPECT_EQ(GetFloorCount(0, 3 * 7 * 5 - 1, 3, false, false, 7), 5u);
1748   EXPECT_EQ(GetFloorCount(-1, -3 * 7 * 5, -3, true, false, 7), 5u);
1749 
1750   // Close to 16-bit integer range.
1751   EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 1), 0xFFFFu);
1752   EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 7), 0xFFFFu / 7 + 1);
1753   EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, true, 7), 0xFFFFu / 7 + 1);
1754   EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 7), 0xFFFFu / 7 + 1);
1755   EXPECT_EQ(GetFloorCount(-0x7FFF, 0x7FFF, 1, true, true, 7), 0xFFFFu / 7 + 1);
1756   EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, false, 0xFFFF), 1u);
1757   EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 0xFFFF), 1u);
1758 
1759   // Finalize the function.
1760   Builder.CreateRetVoid();
1761   OMPBuilder.finalize();
1762 
1763   EXPECT_FALSE(verifyModule(*M, &errs()));
1764 }
1765 
1766 TEST_F(OpenMPIRBuilderTest, ApplySimd) {
1767   OpenMPIRBuilder OMPBuilder(*M);
1768 
1769   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1770 
1771   // Simd-ize the loop.
1772   OMPBuilder.applySimd(CLI, nullptr);
1773 
1774   OMPBuilder.finalize();
1775   EXPECT_FALSE(verifyModule(*M, &errs()));
1776 
1777   PassBuilder PB;
1778   FunctionAnalysisManager FAM;
1779   PB.registerFunctionAnalyses(FAM);
1780   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1781 
1782   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
1783   EXPECT_EQ(TopLvl.size(), 1u);
1784 
1785   Loop *L = TopLvl.front();
1786   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
1787   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
1788 
1789   // Check for llvm.access.group metadata attached to the printf
1790   // function in the loop body.
1791   BasicBlock *LoopBody = CLI->getBody();
1792   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
1793     return I.getMetadata("llvm.access.group") != nullptr;
1794   }));
1795 }
1796 
1797 TEST_F(OpenMPIRBuilderTest, ApplySimdlen) {
1798   OpenMPIRBuilder OMPBuilder(*M);
1799 
1800   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1801 
1802   // Simd-ize the loop.
1803   OMPBuilder.applySimd(CLI, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
1804 
1805   OMPBuilder.finalize();
1806   EXPECT_FALSE(verifyModule(*M, &errs()));
1807 
1808   PassBuilder PB;
1809   FunctionAnalysisManager FAM;
1810   PB.registerFunctionAnalyses(FAM);
1811   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1812 
1813   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
1814   EXPECT_EQ(TopLvl.size(), 1u);
1815 
1816   Loop *L = TopLvl.front();
1817   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
1818   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
1819   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
1820 
1821   // Check for llvm.access.group metadata attached to the printf
1822   // function in the loop body.
1823   BasicBlock *LoopBody = CLI->getBody();
1824   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
1825     return I.getMetadata("llvm.access.group") != nullptr;
1826   }));
1827 }
1828 
1829 TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) {
1830   OpenMPIRBuilder OMPBuilder(*M);
1831 
1832   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1833 
1834   // Unroll the loop.
1835   OMPBuilder.unrollLoopFull(DL, CLI);
1836 
1837   OMPBuilder.finalize();
1838   EXPECT_FALSE(verifyModule(*M, &errs()));
1839 
1840   PassBuilder PB;
1841   FunctionAnalysisManager FAM;
1842   PB.registerFunctionAnalyses(FAM);
1843   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1844 
1845   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
1846   EXPECT_EQ(TopLvl.size(), 1u);
1847 
1848   Loop *L = TopLvl.front();
1849   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"));
1850   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.full"));
1851 }
1852 
1853 TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) {
1854   OpenMPIRBuilder OMPBuilder(*M);
1855   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1856 
1857   // Unroll the loop.
1858   CanonicalLoopInfo *UnrolledLoop = nullptr;
1859   OMPBuilder.unrollLoopPartial(DL, CLI, 5, &UnrolledLoop);
1860   ASSERT_NE(UnrolledLoop, nullptr);
1861 
1862   OMPBuilder.finalize();
1863   EXPECT_FALSE(verifyModule(*M, &errs()));
1864   UnrolledLoop->assertOK();
1865 
1866   PassBuilder PB;
1867   FunctionAnalysisManager FAM;
1868   PB.registerFunctionAnalyses(FAM);
1869   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1870 
1871   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
1872   EXPECT_EQ(TopLvl.size(), 1u);
1873   Loop *Outer = TopLvl.front();
1874   EXPECT_EQ(Outer->getHeader(), UnrolledLoop->getHeader());
1875   EXPECT_EQ(Outer->getLoopLatch(), UnrolledLoop->getLatch());
1876   EXPECT_EQ(Outer->getExitingBlock(), UnrolledLoop->getCond());
1877   EXPECT_EQ(Outer->getExitBlock(), UnrolledLoop->getExit());
1878 
1879   EXPECT_EQ(Outer->getSubLoops().size(), 1u);
1880   Loop *Inner = Outer->getSubLoops().front();
1881 
1882   EXPECT_TRUE(getBooleanLoopAttribute(Inner, "llvm.loop.unroll.enable"));
1883   EXPECT_EQ(getIntLoopAttribute(Inner, "llvm.loop.unroll.count"), 5);
1884 }
1885 
1886 TEST_F(OpenMPIRBuilderTest, UnrollLoopHeuristic) {
1887   OpenMPIRBuilder OMPBuilder(*M);
1888 
1889   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1890 
1891   // Unroll the loop.
1892   OMPBuilder.unrollLoopHeuristic(DL, CLI);
1893 
1894   OMPBuilder.finalize();
1895   EXPECT_FALSE(verifyModule(*M, &errs()));
1896 
1897   PassBuilder PB;
1898   FunctionAnalysisManager FAM;
1899   PB.registerFunctionAnalyses(FAM);
1900   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1901 
1902   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
1903   EXPECT_EQ(TopLvl.size(), 1u);
1904 
1905   Loop *L = TopLvl.front();
1906   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"));
1907 }
1908 
1909 TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
1910   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1911   OpenMPIRBuilder OMPBuilder(*M);
1912   OMPBuilder.initialize();
1913   IRBuilder<> Builder(BB);
1914   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1915 
1916   Type *LCTy = Type::getInt32Ty(Ctx);
1917   Value *StartVal = ConstantInt::get(LCTy, 10);
1918   Value *StopVal = ConstantInt::get(LCTy, 52);
1919   Value *StepVal = ConstantInt::get(LCTy, 2);
1920   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
1921 
1922   CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
1923       Loc, LoopBodyGen, StartVal, StopVal, StepVal,
1924       /*IsSigned=*/false, /*InclusiveStop=*/false);
1925   BasicBlock *Preheader = CLI->getPreheader();
1926   BasicBlock *Body = CLI->getBody();
1927   Value *IV = CLI->getIndVar();
1928   BasicBlock *ExitBlock = CLI->getExit();
1929 
1930   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
1931   InsertPointTy AllocaIP = Builder.saveIP();
1932 
1933   OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true,
1934                                 OMP_SCHEDULE_Static);
1935 
1936   BasicBlock *Cond = Body->getSinglePredecessor();
1937   Instruction *Cmp = &*Cond->begin();
1938   Value *TripCount = Cmp->getOperand(1);
1939 
1940   auto AllocaIter = BB->begin();
1941   ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
1942   AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++));
1943   AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
1944   AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
1945   AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++));
1946   EXPECT_NE(PLastIter, nullptr);
1947   EXPECT_NE(PLowerBound, nullptr);
1948   EXPECT_NE(PUpperBound, nullptr);
1949   EXPECT_NE(PStride, nullptr);
1950 
1951   auto PreheaderIter = Preheader->begin();
1952   ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 7);
1953   StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
1954   StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
1955   StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
1956   ASSERT_NE(LowerBoundStore, nullptr);
1957   ASSERT_NE(UpperBoundStore, nullptr);
1958   ASSERT_NE(StrideStore, nullptr);
1959 
1960   auto *OrigLowerBound =
1961       dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand());
1962   auto *OrigUpperBound =
1963       dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand());
1964   auto *OrigStride = dyn_cast<ConstantInt>(StrideStore->getValueOperand());
1965   ASSERT_NE(OrigLowerBound, nullptr);
1966   ASSERT_NE(OrigUpperBound, nullptr);
1967   ASSERT_NE(OrigStride, nullptr);
1968   EXPECT_EQ(OrigLowerBound->getValue(), 0);
1969   EXPECT_EQ(OrigUpperBound->getValue(), 20);
1970   EXPECT_EQ(OrigStride->getValue(), 1);
1971 
1972   // Check that the loop IV is updated to account for the lower bound returned
1973   // by the OpenMP runtime call.
1974   BinaryOperator *Add = dyn_cast<BinaryOperator>(&Body->front());
1975   EXPECT_EQ(Add->getOperand(0), IV);
1976   auto *LoadedLowerBound = dyn_cast<LoadInst>(Add->getOperand(1));
1977   ASSERT_NE(LoadedLowerBound, nullptr);
1978   EXPECT_EQ(LoadedLowerBound->getPointerOperand(), PLowerBound);
1979 
1980   // Check that the trip count is updated to account for the lower and upper
1981   // bounds return by the OpenMP runtime call.
1982   auto *AddOne = dyn_cast<Instruction>(TripCount);
1983   ASSERT_NE(AddOne, nullptr);
1984   ASSERT_TRUE(AddOne->isBinaryOp());
1985   auto *One = dyn_cast<ConstantInt>(AddOne->getOperand(1));
1986   ASSERT_NE(One, nullptr);
1987   EXPECT_EQ(One->getValue(), 1);
1988   auto *Difference = dyn_cast<Instruction>(AddOne->getOperand(0));
1989   ASSERT_NE(Difference, nullptr);
1990   ASSERT_TRUE(Difference->isBinaryOp());
1991   EXPECT_EQ(Difference->getOperand(1), LoadedLowerBound);
1992   auto *LoadedUpperBound = dyn_cast<LoadInst>(Difference->getOperand(0));
1993   ASSERT_NE(LoadedUpperBound, nullptr);
1994   EXPECT_EQ(LoadedUpperBound->getPointerOperand(), PUpperBound);
1995 
1996   // The original loop iterator should only be used in the condition, in the
1997   // increment and in the statement that adds the lower bound to it.
1998   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
1999 
2000   // The exit block should contain the "fini" call and the barrier call,
2001   // plus the call to obtain the thread ID.
2002   size_t NumCallsInExitBlock =
2003       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2004   EXPECT_EQ(NumCallsInExitBlock, 3u);
2005 }
2006 
2007 TEST_P(OpenMPIRBuilderTestWithIVBits, StaticChunkedWorkshareLoop) {
2008   unsigned IVBits = GetParam();
2009 
2010   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2011   OpenMPIRBuilder OMPBuilder(*M);
2012 
2013   BasicBlock *Body;
2014   CallInst *Call;
2015   CanonicalLoopInfo *CLI =
2016       buildSingleLoopFunction(DL, OMPBuilder, IVBits, &Call, &Body);
2017 
2018   Instruction *OrigIndVar = CLI->getIndVar();
2019   EXPECT_EQ(Call->getOperand(1), OrigIndVar);
2020 
2021   Type *LCTy = Type::getInt32Ty(Ctx);
2022   Value *ChunkSize = ConstantInt::get(LCTy, 5);
2023   InsertPointTy AllocaIP{&F->getEntryBlock(),
2024                          F->getEntryBlock().getFirstInsertionPt()};
2025   OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true,
2026                                 OMP_SCHEDULE_Static, ChunkSize);
2027 
2028   OMPBuilder.finalize();
2029   EXPECT_FALSE(verifyModule(*M, &errs()));
2030 
2031   BasicBlock *Entry = &F->getEntryBlock();
2032   BasicBlock *Preheader = Entry->getSingleSuccessor();
2033 
2034   BasicBlock *DispatchPreheader = Preheader->getSingleSuccessor();
2035   BasicBlock *DispatchHeader = DispatchPreheader->getSingleSuccessor();
2036   BasicBlock *DispatchCond = DispatchHeader->getSingleSuccessor();
2037   BasicBlock *DispatchBody = succ_begin(DispatchCond)[0];
2038   BasicBlock *DispatchExit = succ_begin(DispatchCond)[1];
2039   BasicBlock *DispatchAfter = DispatchExit->getSingleSuccessor();
2040   BasicBlock *Return = DispatchAfter->getSingleSuccessor();
2041 
2042   BasicBlock *ChunkPreheader = DispatchBody->getSingleSuccessor();
2043   BasicBlock *ChunkHeader = ChunkPreheader->getSingleSuccessor();
2044   BasicBlock *ChunkCond = ChunkHeader->getSingleSuccessor();
2045   BasicBlock *ChunkBody = succ_begin(ChunkCond)[0];
2046   BasicBlock *ChunkExit = succ_begin(ChunkCond)[1];
2047   BasicBlock *ChunkInc = ChunkBody->getSingleSuccessor();
2048   BasicBlock *ChunkAfter = ChunkExit->getSingleSuccessor();
2049 
2050   BasicBlock *DispatchInc = ChunkAfter;
2051 
2052   EXPECT_EQ(ChunkBody, Body);
2053   EXPECT_EQ(ChunkInc->getSingleSuccessor(), ChunkHeader);
2054   EXPECT_EQ(DispatchInc->getSingleSuccessor(), DispatchHeader);
2055 
2056   EXPECT_TRUE(isa<ReturnInst>(Return->front()));
2057 
2058   Value *NewIV = Call->getOperand(1);
2059   EXPECT_EQ(NewIV->getType()->getScalarSizeInBits(), IVBits);
2060 
2061   CallInst *InitCall = findSingleCall(
2062       F,
2063       (IVBits > 32) ? omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u
2064                     : omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u,
2065       OMPBuilder);
2066   EXPECT_EQ(InitCall->getParent(), Preheader);
2067   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(2))->getSExtValue(), 33);
2068   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(7))->getSExtValue(), 1);
2069   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(8))->getSExtValue(), 5);
2070 
2071   CallInst *FiniCall = findSingleCall(
2072       F, omp::RuntimeFunction::OMPRTL___kmpc_for_static_fini, OMPBuilder);
2073   EXPECT_EQ(FiniCall->getParent(), DispatchExit);
2074 
2075   CallInst *BarrierCall = findSingleCall(
2076       F, omp::RuntimeFunction::OMPRTL___kmpc_barrier, OMPBuilder);
2077   EXPECT_EQ(BarrierCall->getParent(), DispatchExit);
2078 }
2079 
2080 INSTANTIATE_TEST_SUITE_P(IVBits, OpenMPIRBuilderTestWithIVBits,
2081                          ::testing::Values(8, 16, 32, 64));
2082 
2083 TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
2084   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2085   OpenMPIRBuilder OMPBuilder(*M);
2086   OMPBuilder.initialize();
2087   IRBuilder<> Builder(BB);
2088   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2089 
2090   omp::OMPScheduleType SchedType = GetParam();
2091   uint32_t ChunkSize = 1;
2092   switch (SchedType & ~OMPScheduleType::ModifierMask) {
2093   case omp::OMPScheduleType::BaseDynamicChunked:
2094   case omp::OMPScheduleType::BaseGuidedChunked:
2095     ChunkSize = 7;
2096     break;
2097   case omp::OMPScheduleType::BaseAuto:
2098   case omp::OMPScheduleType::BaseRuntime:
2099     ChunkSize = 1;
2100     break;
2101   default:
2102     assert(0 && "unknown type for this test");
2103     break;
2104   }
2105 
2106   Type *LCTy = Type::getInt32Ty(Ctx);
2107   Value *StartVal = ConstantInt::get(LCTy, 10);
2108   Value *StopVal = ConstantInt::get(LCTy, 52);
2109   Value *StepVal = ConstantInt::get(LCTy, 2);
2110   Value *ChunkVal =
2111       (ChunkSize == 1) ? nullptr : ConstantInt::get(LCTy, ChunkSize);
2112   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
2113 
2114   CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
2115       Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2116       /*IsSigned=*/false, /*InclusiveStop=*/false);
2117 
2118   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2119   InsertPointTy AllocaIP = Builder.saveIP();
2120 
2121   // Collect all the info from CLI, as it isn't usable after the call to
2122   // createDynamicWorkshareLoop.
2123   InsertPointTy AfterIP = CLI->getAfterIP();
2124   BasicBlock *Preheader = CLI->getPreheader();
2125   BasicBlock *ExitBlock = CLI->getExit();
2126   BasicBlock *LatchBlock = CLI->getLatch();
2127   Value *IV = CLI->getIndVar();
2128 
2129   InsertPointTy EndIP = OMPBuilder.applyWorkshareLoop(
2130       DL, CLI, AllocaIP, /*NeedsBarrier=*/true, getSchedKind(SchedType),
2131       ChunkVal, /*Simd=*/false,
2132       (SchedType & omp::OMPScheduleType::ModifierMonotonic) ==
2133           omp::OMPScheduleType::ModifierMonotonic,
2134       (SchedType & omp::OMPScheduleType::ModifierNonmonotonic) ==
2135           omp::OMPScheduleType::ModifierNonmonotonic,
2136       /*Ordered=*/false);
2137 
2138   // The returned value should be the "after" point.
2139   ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock());
2140   ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint());
2141 
2142   auto AllocaIter = BB->begin();
2143   ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
2144   AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++));
2145   AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2146   AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2147   AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++));
2148   EXPECT_NE(PLastIter, nullptr);
2149   EXPECT_NE(PLowerBound, nullptr);
2150   EXPECT_NE(PUpperBound, nullptr);
2151   EXPECT_NE(PStride, nullptr);
2152 
2153   auto PreheaderIter = Preheader->begin();
2154   ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 6);
2155   StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2156   StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2157   StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2158   ASSERT_NE(LowerBoundStore, nullptr);
2159   ASSERT_NE(UpperBoundStore, nullptr);
2160   ASSERT_NE(StrideStore, nullptr);
2161 
2162   CallInst *ThreadIdCall = dyn_cast<CallInst>(&*(PreheaderIter++));
2163   ASSERT_NE(ThreadIdCall, nullptr);
2164   EXPECT_EQ(ThreadIdCall->getCalledFunction()->getName(),
2165             "__kmpc_global_thread_num");
2166 
2167   CallInst *InitCall = dyn_cast<CallInst>(&*PreheaderIter);
2168 
2169   ASSERT_NE(InitCall, nullptr);
2170   EXPECT_EQ(InitCall->getCalledFunction()->getName(),
2171             "__kmpc_dispatch_init_4u");
2172   EXPECT_EQ(InitCall->arg_size(), 7U);
2173   EXPECT_EQ(InitCall->getArgOperand(6), ConstantInt::get(LCTy, ChunkSize));
2174   ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2));
2175   if ((SchedType & OMPScheduleType::MonotonicityMask) ==
2176       OMPScheduleType::None) {
2177     // Implementation is allowed to add default nonmonotonicity flag
2178     EXPECT_EQ(
2179         static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()) |
2180             OMPScheduleType::ModifierNonmonotonic,
2181         SchedType | OMPScheduleType::ModifierNonmonotonic);
2182   } else {
2183     EXPECT_EQ(static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()),
2184               SchedType);
2185   }
2186 
2187   ConstantInt *OrigLowerBound =
2188       dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand());
2189   ConstantInt *OrigUpperBound =
2190       dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand());
2191   ConstantInt *OrigStride =
2192       dyn_cast<ConstantInt>(StrideStore->getValueOperand());
2193   ASSERT_NE(OrigLowerBound, nullptr);
2194   ASSERT_NE(OrigUpperBound, nullptr);
2195   ASSERT_NE(OrigStride, nullptr);
2196   EXPECT_EQ(OrigLowerBound->getValue(), 1);
2197   EXPECT_EQ(OrigUpperBound->getValue(), 21);
2198   EXPECT_EQ(OrigStride->getValue(), 1);
2199 
2200   CallInst *FiniCall = dyn_cast<CallInst>(
2201       &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true)));
2202   EXPECT_EQ(FiniCall, nullptr);
2203 
2204   // The original loop iterator should only be used in the condition, in the
2205   // increment and in the statement that adds the lower bound to it.
2206   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2207 
2208   // The exit block should contain the barrier call, plus the call to obtain
2209   // the thread ID.
2210   size_t NumCallsInExitBlock =
2211       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2212   EXPECT_EQ(NumCallsInExitBlock, 2u);
2213 
2214   // Add a termination to our block and check that it is internally consistent.
2215   Builder.restoreIP(EndIP);
2216   Builder.CreateRetVoid();
2217   OMPBuilder.finalize();
2218   EXPECT_FALSE(verifyModule(*M, &errs()));
2219 }
2220 
2221 INSTANTIATE_TEST_SUITE_P(
2222     OpenMPWSLoopSchedulingTypes, OpenMPIRBuilderTestWithParams,
2223     ::testing::Values(omp::OMPScheduleType::UnorderedDynamicChunked,
2224                       omp::OMPScheduleType::UnorderedGuidedChunked,
2225                       omp::OMPScheduleType::UnorderedAuto,
2226                       omp::OMPScheduleType::UnorderedRuntime,
2227                       omp::OMPScheduleType::UnorderedDynamicChunked |
2228                           omp::OMPScheduleType::ModifierMonotonic,
2229                       omp::OMPScheduleType::UnorderedDynamicChunked |
2230                           omp::OMPScheduleType::ModifierNonmonotonic,
2231                       omp::OMPScheduleType::UnorderedGuidedChunked |
2232                           omp::OMPScheduleType::ModifierMonotonic,
2233                       omp::OMPScheduleType::UnorderedGuidedChunked |
2234                           omp::OMPScheduleType::ModifierNonmonotonic,
2235                       omp::OMPScheduleType::UnorderedAuto |
2236                           omp::OMPScheduleType::ModifierMonotonic,
2237                       omp::OMPScheduleType::UnorderedRuntime |
2238                           omp::OMPScheduleType::ModifierMonotonic));
2239 
2240 TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) {
2241   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2242   OpenMPIRBuilder OMPBuilder(*M);
2243   OMPBuilder.initialize();
2244   IRBuilder<> Builder(BB);
2245   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2246 
2247   uint32_t ChunkSize = 1;
2248   Type *LCTy = Type::getInt32Ty(Ctx);
2249   Value *StartVal = ConstantInt::get(LCTy, 10);
2250   Value *StopVal = ConstantInt::get(LCTy, 52);
2251   Value *StepVal = ConstantInt::get(LCTy, 2);
2252   Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize);
2253   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
2254 
2255   CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
2256       Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2257       /*IsSigned=*/false, /*InclusiveStop=*/false);
2258 
2259   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2260   InsertPointTy AllocaIP = Builder.saveIP();
2261 
2262   // Collect all the info from CLI, as it isn't usable after the call to
2263   // createDynamicWorkshareLoop.
2264   BasicBlock *Preheader = CLI->getPreheader();
2265   BasicBlock *ExitBlock = CLI->getExit();
2266   BasicBlock *LatchBlock = CLI->getLatch();
2267   Value *IV = CLI->getIndVar();
2268 
2269   InsertPointTy EndIP = OMPBuilder.applyWorkshareLoop(
2270       DL, CLI, AllocaIP, /*NeedsBarrier=*/true, OMP_SCHEDULE_Static, ChunkVal,
2271       /*HasSimdModifier=*/false, /*HasMonotonicModifier=*/false,
2272       /*HasNonmonotonicModifier=*/false,
2273       /*HasOrderedClause=*/true);
2274 
2275   // Add a termination to our block and check that it is internally consistent.
2276   Builder.restoreIP(EndIP);
2277   Builder.CreateRetVoid();
2278   OMPBuilder.finalize();
2279   EXPECT_FALSE(verifyModule(*M, &errs()));
2280 
2281   CallInst *InitCall = nullptr;
2282   for (Instruction &EI : *Preheader) {
2283     Instruction *Cur = &EI;
2284     if (isa<CallInst>(Cur)) {
2285       InitCall = cast<CallInst>(Cur);
2286       if (InitCall->getCalledFunction()->getName() == "__kmpc_dispatch_init_4u")
2287         break;
2288       InitCall = nullptr;
2289     }
2290   }
2291   EXPECT_NE(InitCall, nullptr);
2292   EXPECT_EQ(InitCall->arg_size(), 7U);
2293   ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2));
2294   EXPECT_EQ(SchedVal->getValue(),
2295             static_cast<uint64_t>(OMPScheduleType::OrderedStaticChunked));
2296 
2297   CallInst *FiniCall = dyn_cast<CallInst>(
2298       &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true)));
2299   ASSERT_NE(FiniCall, nullptr);
2300   EXPECT_EQ(FiniCall->getCalledFunction()->getName(),
2301             "__kmpc_dispatch_fini_4u");
2302   EXPECT_EQ(FiniCall->arg_size(), 2U);
2303   EXPECT_EQ(InitCall->getArgOperand(0), FiniCall->getArgOperand(0));
2304   EXPECT_EQ(InitCall->getArgOperand(1), FiniCall->getArgOperand(1));
2305 
2306   // The original loop iterator should only be used in the condition, in the
2307   // increment and in the statement that adds the lower bound to it.
2308   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2309 
2310   // The exit block should contain the barrier call, plus the call to obtain
2311   // the thread ID.
2312   size_t NumCallsInExitBlock =
2313       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2314   EXPECT_EQ(NumCallsInExitBlock, 2u);
2315 }
2316 
2317 TEST_F(OpenMPIRBuilderTest, MasterDirective) {
2318   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2319   OpenMPIRBuilder OMPBuilder(*M);
2320   OMPBuilder.initialize();
2321   F->setName("func");
2322   IRBuilder<> Builder(BB);
2323 
2324   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2325 
2326   AllocaInst *PrivAI = nullptr;
2327 
2328   BasicBlock *EntryBB = nullptr;
2329   BasicBlock *ThenBB = nullptr;
2330 
2331   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2332     if (AllocaIP.isSet())
2333       Builder.restoreIP(AllocaIP);
2334     else
2335       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2336     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2337     Builder.CreateStore(F->arg_begin(), PrivAI);
2338 
2339     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2340     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2341     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2342 
2343     Builder.restoreIP(CodeGenIP);
2344 
2345     // collect some info for checks later
2346     ThenBB = Builder.GetInsertBlock();
2347     EntryBB = ThenBB->getUniquePredecessor();
2348 
2349     // simple instructions for body
2350     Value *PrivLoad =
2351         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2352     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2353   };
2354 
2355   auto FiniCB = [&](InsertPointTy IP) {
2356     BasicBlock *IPBB = IP.getBlock();
2357     EXPECT_NE(IPBB->end(), IP.getPoint());
2358   };
2359 
2360   Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
2361   Value *EntryBBTI = EntryBB->getTerminator();
2362   EXPECT_NE(EntryBBTI, nullptr);
2363   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
2364   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
2365   EXPECT_TRUE(EntryBr->isConditional());
2366   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
2367   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
2368   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
2369 
2370   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
2371   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
2372 
2373   CallInst *MasterEntryCI = cast<CallInst>(CondInst->getOperand(0));
2374   EXPECT_EQ(MasterEntryCI->arg_size(), 2U);
2375   EXPECT_EQ(MasterEntryCI->getCalledFunction()->getName(), "__kmpc_master");
2376   EXPECT_TRUE(isa<GlobalVariable>(MasterEntryCI->getArgOperand(0)));
2377 
2378   CallInst *MasterEndCI = nullptr;
2379   for (auto &FI : *ThenBB) {
2380     Instruction *cur = &FI;
2381     if (isa<CallInst>(cur)) {
2382       MasterEndCI = cast<CallInst>(cur);
2383       if (MasterEndCI->getCalledFunction()->getName() == "__kmpc_end_master")
2384         break;
2385       MasterEndCI = nullptr;
2386     }
2387   }
2388   EXPECT_NE(MasterEndCI, nullptr);
2389   EXPECT_EQ(MasterEndCI->arg_size(), 2U);
2390   EXPECT_TRUE(isa<GlobalVariable>(MasterEndCI->getArgOperand(0)));
2391   EXPECT_EQ(MasterEndCI->getArgOperand(1), MasterEntryCI->getArgOperand(1));
2392 }
2393 
2394 TEST_F(OpenMPIRBuilderTest, MaskedDirective) {
2395   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2396   OpenMPIRBuilder OMPBuilder(*M);
2397   OMPBuilder.initialize();
2398   F->setName("func");
2399   IRBuilder<> Builder(BB);
2400 
2401   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2402 
2403   AllocaInst *PrivAI = nullptr;
2404 
2405   BasicBlock *EntryBB = nullptr;
2406   BasicBlock *ThenBB = nullptr;
2407 
2408   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2409     if (AllocaIP.isSet())
2410       Builder.restoreIP(AllocaIP);
2411     else
2412       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2413     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2414     Builder.CreateStore(F->arg_begin(), PrivAI);
2415 
2416     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2417     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2418     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2419 
2420     Builder.restoreIP(CodeGenIP);
2421 
2422     // collect some info for checks later
2423     ThenBB = Builder.GetInsertBlock();
2424     EntryBB = ThenBB->getUniquePredecessor();
2425 
2426     // simple instructions for body
2427     Value *PrivLoad =
2428         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2429     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2430   };
2431 
2432   auto FiniCB = [&](InsertPointTy IP) {
2433     BasicBlock *IPBB = IP.getBlock();
2434     EXPECT_NE(IPBB->end(), IP.getPoint());
2435   };
2436 
2437   Constant *Filter = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
2438   Builder.restoreIP(
2439       OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, Filter));
2440   Value *EntryBBTI = EntryBB->getTerminator();
2441   EXPECT_NE(EntryBBTI, nullptr);
2442   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
2443   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
2444   EXPECT_TRUE(EntryBr->isConditional());
2445   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
2446   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
2447   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
2448 
2449   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
2450   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
2451 
2452   CallInst *MaskedEntryCI = cast<CallInst>(CondInst->getOperand(0));
2453   EXPECT_EQ(MaskedEntryCI->arg_size(), 3U);
2454   EXPECT_EQ(MaskedEntryCI->getCalledFunction()->getName(), "__kmpc_masked");
2455   EXPECT_TRUE(isa<GlobalVariable>(MaskedEntryCI->getArgOperand(0)));
2456 
2457   CallInst *MaskedEndCI = nullptr;
2458   for (auto &FI : *ThenBB) {
2459     Instruction *cur = &FI;
2460     if (isa<CallInst>(cur)) {
2461       MaskedEndCI = cast<CallInst>(cur);
2462       if (MaskedEndCI->getCalledFunction()->getName() == "__kmpc_end_masked")
2463         break;
2464       MaskedEndCI = nullptr;
2465     }
2466   }
2467   EXPECT_NE(MaskedEndCI, nullptr);
2468   EXPECT_EQ(MaskedEndCI->arg_size(), 2U);
2469   EXPECT_TRUE(isa<GlobalVariable>(MaskedEndCI->getArgOperand(0)));
2470   EXPECT_EQ(MaskedEndCI->getArgOperand(1), MaskedEntryCI->getArgOperand(1));
2471 }
2472 
2473 TEST_F(OpenMPIRBuilderTest, CriticalDirective) {
2474   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2475   OpenMPIRBuilder OMPBuilder(*M);
2476   OMPBuilder.initialize();
2477   F->setName("func");
2478   IRBuilder<> Builder(BB);
2479 
2480   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2481 
2482   AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2483 
2484   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2485     // actual start for bodyCB
2486     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2487     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2488     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2489 
2490     // body begin
2491     Builder.restoreIP(CodeGenIP);
2492     Builder.CreateStore(F->arg_begin(), PrivAI);
2493     Value *PrivLoad =
2494         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2495     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2496   };
2497 
2498   auto FiniCB = [&](InsertPointTy IP) {
2499     BasicBlock *IPBB = IP.getBlock();
2500     EXPECT_NE(IPBB->end(), IP.getPoint());
2501   };
2502   BasicBlock *EntryBB = Builder.GetInsertBlock();
2503 
2504   Builder.restoreIP(OMPBuilder.createCritical(Builder, BodyGenCB, FiniCB,
2505                                               "testCRT", nullptr));
2506 
2507   CallInst *CriticalEntryCI = nullptr;
2508   for (auto &EI : *EntryBB) {
2509     Instruction *cur = &EI;
2510     if (isa<CallInst>(cur)) {
2511       CriticalEntryCI = cast<CallInst>(cur);
2512       if (CriticalEntryCI->getCalledFunction()->getName() == "__kmpc_critical")
2513         break;
2514       CriticalEntryCI = nullptr;
2515     }
2516   }
2517   EXPECT_NE(CriticalEntryCI, nullptr);
2518   EXPECT_EQ(CriticalEntryCI->arg_size(), 3U);
2519   EXPECT_EQ(CriticalEntryCI->getCalledFunction()->getName(), "__kmpc_critical");
2520   EXPECT_TRUE(isa<GlobalVariable>(CriticalEntryCI->getArgOperand(0)));
2521 
2522   CallInst *CriticalEndCI = nullptr;
2523   for (auto &FI : *EntryBB) {
2524     Instruction *cur = &FI;
2525     if (isa<CallInst>(cur)) {
2526       CriticalEndCI = cast<CallInst>(cur);
2527       if (CriticalEndCI->getCalledFunction()->getName() ==
2528           "__kmpc_end_critical")
2529         break;
2530       CriticalEndCI = nullptr;
2531     }
2532   }
2533   EXPECT_NE(CriticalEndCI, nullptr);
2534   EXPECT_EQ(CriticalEndCI->arg_size(), 3U);
2535   EXPECT_TRUE(isa<GlobalVariable>(CriticalEndCI->getArgOperand(0)));
2536   EXPECT_EQ(CriticalEndCI->getArgOperand(1), CriticalEntryCI->getArgOperand(1));
2537   PointerType *CriticalNamePtrTy =
2538       PointerType::getUnqual(ArrayType::get(Type::getInt32Ty(Ctx), 8));
2539   EXPECT_EQ(CriticalEndCI->getArgOperand(2), CriticalEntryCI->getArgOperand(2));
2540   EXPECT_EQ(CriticalEndCI->getArgOperand(2)->getType(), CriticalNamePtrTy);
2541 }
2542 
2543 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSource) {
2544   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2545   OpenMPIRBuilder OMPBuilder(*M);
2546   OMPBuilder.initialize();
2547   F->setName("func");
2548   IRBuilder<> Builder(BB);
2549   LLVMContext &Ctx = M->getContext();
2550 
2551   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2552 
2553   InsertPointTy AllocaIP(&F->getEntryBlock(),
2554                          F->getEntryBlock().getFirstInsertionPt());
2555 
2556   unsigned NumLoops = 2;
2557   SmallVector<Value *, 2> StoreValues;
2558   Type *LCTy = Type::getInt64Ty(Ctx);
2559   StoreValues.emplace_back(ConstantInt::get(LCTy, 1));
2560   StoreValues.emplace_back(ConstantInt::get(LCTy, 2));
2561 
2562   // Test for "#omp ordered depend(source)"
2563   Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops,
2564                                                    StoreValues, ".cnt.addr",
2565                                                    /*IsDependSource=*/true));
2566 
2567   Builder.CreateRetVoid();
2568   OMPBuilder.finalize();
2569   EXPECT_FALSE(verifyModule(*M, &errs()));
2570 
2571   AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front());
2572   ASSERT_NE(AllocInst, nullptr);
2573   ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType());
2574   EXPECT_EQ(ArrType->getNumElements(), NumLoops);
2575   EXPECT_TRUE(
2576       AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64));
2577 
2578   Instruction *IterInst = dyn_cast<Instruction>(AllocInst);
2579   for (unsigned Iter = 0; Iter < NumLoops; Iter++) {
2580     GetElementPtrInst *DependAddrGEPIter =
2581         dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
2582     ASSERT_NE(DependAddrGEPIter, nullptr);
2583     EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst);
2584     EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2);
2585     auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1));
2586     auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2));
2587     ASSERT_NE(FirstIdx, nullptr);
2588     ASSERT_NE(SecondIdx, nullptr);
2589     EXPECT_EQ(FirstIdx->getValue(), 0);
2590     EXPECT_EQ(SecondIdx->getValue(), Iter);
2591     StoreInst *StoreValue =
2592         dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode());
2593     ASSERT_NE(StoreValue, nullptr);
2594     EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]);
2595     EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter);
2596     EXPECT_EQ(StoreValue->getAlign(), Align(8));
2597     IterInst = dyn_cast<Instruction>(StoreValue);
2598   }
2599 
2600   GetElementPtrInst *DependBaseAddrGEP =
2601       dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
2602   ASSERT_NE(DependBaseAddrGEP, nullptr);
2603   EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst);
2604   EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2);
2605   auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1));
2606   auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2));
2607   ASSERT_NE(FirstIdx, nullptr);
2608   ASSERT_NE(SecondIdx, nullptr);
2609   EXPECT_EQ(FirstIdx->getValue(), 0);
2610   EXPECT_EQ(SecondIdx->getValue(), 0);
2611 
2612   CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode());
2613   ASSERT_NE(GTID, nullptr);
2614   EXPECT_EQ(GTID->arg_size(), 1U);
2615   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
2616   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
2617   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
2618 
2619   CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode());
2620   ASSERT_NE(Depend, nullptr);
2621   EXPECT_EQ(Depend->arg_size(), 3U);
2622   EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_post");
2623   EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0)));
2624   EXPECT_EQ(Depend->getArgOperand(1), GTID);
2625   EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP);
2626 }
2627 
2628 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSink) {
2629   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2630   OpenMPIRBuilder OMPBuilder(*M);
2631   OMPBuilder.initialize();
2632   F->setName("func");
2633   IRBuilder<> Builder(BB);
2634   LLVMContext &Ctx = M->getContext();
2635 
2636   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2637 
2638   InsertPointTy AllocaIP(&F->getEntryBlock(),
2639                          F->getEntryBlock().getFirstInsertionPt());
2640 
2641   unsigned NumLoops = 2;
2642   SmallVector<Value *, 2> StoreValues;
2643   Type *LCTy = Type::getInt64Ty(Ctx);
2644   StoreValues.emplace_back(ConstantInt::get(LCTy, 1));
2645   StoreValues.emplace_back(ConstantInt::get(LCTy, 2));
2646 
2647   // Test for "#omp ordered depend(sink: vec)"
2648   Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops,
2649                                                    StoreValues, ".cnt.addr",
2650                                                    /*IsDependSource=*/false));
2651 
2652   Builder.CreateRetVoid();
2653   OMPBuilder.finalize();
2654   EXPECT_FALSE(verifyModule(*M, &errs()));
2655 
2656   AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front());
2657   ASSERT_NE(AllocInst, nullptr);
2658   ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType());
2659   EXPECT_EQ(ArrType->getNumElements(), NumLoops);
2660   EXPECT_TRUE(
2661       AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64));
2662 
2663   Instruction *IterInst = dyn_cast<Instruction>(AllocInst);
2664   for (unsigned Iter = 0; Iter < NumLoops; Iter++) {
2665     GetElementPtrInst *DependAddrGEPIter =
2666         dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
2667     ASSERT_NE(DependAddrGEPIter, nullptr);
2668     EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst);
2669     EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2);
2670     auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1));
2671     auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2));
2672     ASSERT_NE(FirstIdx, nullptr);
2673     ASSERT_NE(SecondIdx, nullptr);
2674     EXPECT_EQ(FirstIdx->getValue(), 0);
2675     EXPECT_EQ(SecondIdx->getValue(), Iter);
2676     StoreInst *StoreValue =
2677         dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode());
2678     ASSERT_NE(StoreValue, nullptr);
2679     EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]);
2680     EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter);
2681     EXPECT_EQ(StoreValue->getAlign(), Align(8));
2682     IterInst = dyn_cast<Instruction>(StoreValue);
2683   }
2684 
2685   GetElementPtrInst *DependBaseAddrGEP =
2686       dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
2687   ASSERT_NE(DependBaseAddrGEP, nullptr);
2688   EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst);
2689   EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2);
2690   auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1));
2691   auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2));
2692   ASSERT_NE(FirstIdx, nullptr);
2693   ASSERT_NE(SecondIdx, nullptr);
2694   EXPECT_EQ(FirstIdx->getValue(), 0);
2695   EXPECT_EQ(SecondIdx->getValue(), 0);
2696 
2697   CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode());
2698   ASSERT_NE(GTID, nullptr);
2699   EXPECT_EQ(GTID->arg_size(), 1U);
2700   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
2701   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
2702   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
2703 
2704   CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode());
2705   ASSERT_NE(Depend, nullptr);
2706   EXPECT_EQ(Depend->arg_size(), 3U);
2707   EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_wait");
2708   EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0)));
2709   EXPECT_EQ(Depend->getArgOperand(1), GTID);
2710   EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP);
2711 }
2712 
2713 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) {
2714   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2715   OpenMPIRBuilder OMPBuilder(*M);
2716   OMPBuilder.initialize();
2717   F->setName("func");
2718   IRBuilder<> Builder(BB);
2719 
2720   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2721 
2722   AllocaInst *PrivAI =
2723       Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
2724 
2725   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2726     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2727     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2728     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2729 
2730     Builder.restoreIP(CodeGenIP);
2731     Builder.CreateStore(F->arg_begin(), PrivAI);
2732     Value *PrivLoad =
2733         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2734     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2735   };
2736 
2737   auto FiniCB = [&](InsertPointTy IP) {
2738     BasicBlock *IPBB = IP.getBlock();
2739     EXPECT_NE(IPBB->end(), IP.getPoint());
2740   };
2741 
2742   // Test for "#omp ordered [threads]"
2743   BasicBlock *EntryBB = Builder.GetInsertBlock();
2744   Builder.restoreIP(
2745       OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, true));
2746 
2747   Builder.CreateRetVoid();
2748   OMPBuilder.finalize();
2749   EXPECT_FALSE(verifyModule(*M, &errs()));
2750 
2751   EXPECT_NE(EntryBB->getTerminator(), nullptr);
2752 
2753   CallInst *OrderedEntryCI = nullptr;
2754   for (auto &EI : *EntryBB) {
2755     Instruction *Cur = &EI;
2756     if (isa<CallInst>(Cur)) {
2757       OrderedEntryCI = cast<CallInst>(Cur);
2758       if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
2759         break;
2760       OrderedEntryCI = nullptr;
2761     }
2762   }
2763   EXPECT_NE(OrderedEntryCI, nullptr);
2764   EXPECT_EQ(OrderedEntryCI->arg_size(), 2U);
2765   EXPECT_EQ(OrderedEntryCI->getCalledFunction()->getName(), "__kmpc_ordered");
2766   EXPECT_TRUE(isa<GlobalVariable>(OrderedEntryCI->getArgOperand(0)));
2767 
2768   CallInst *OrderedEndCI = nullptr;
2769   for (auto &FI : *EntryBB) {
2770     Instruction *Cur = &FI;
2771     if (isa<CallInst>(Cur)) {
2772       OrderedEndCI = cast<CallInst>(Cur);
2773       if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
2774         break;
2775       OrderedEndCI = nullptr;
2776     }
2777   }
2778   EXPECT_NE(OrderedEndCI, nullptr);
2779   EXPECT_EQ(OrderedEndCI->arg_size(), 2U);
2780   EXPECT_TRUE(isa<GlobalVariable>(OrderedEndCI->getArgOperand(0)));
2781   EXPECT_EQ(OrderedEndCI->getArgOperand(1), OrderedEntryCI->getArgOperand(1));
2782 }
2783 
2784 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) {
2785   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2786   OpenMPIRBuilder OMPBuilder(*M);
2787   OMPBuilder.initialize();
2788   F->setName("func");
2789   IRBuilder<> Builder(BB);
2790 
2791   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2792 
2793   AllocaInst *PrivAI =
2794       Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
2795 
2796   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2797     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2798     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2799     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2800 
2801     Builder.restoreIP(CodeGenIP);
2802     Builder.CreateStore(F->arg_begin(), PrivAI);
2803     Value *PrivLoad =
2804         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2805     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2806   };
2807 
2808   auto FiniCB = [&](InsertPointTy IP) {
2809     BasicBlock *IPBB = IP.getBlock();
2810     EXPECT_NE(IPBB->end(), IP.getPoint());
2811   };
2812 
2813   // Test for "#omp ordered simd"
2814   BasicBlock *EntryBB = Builder.GetInsertBlock();
2815   Builder.restoreIP(
2816       OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, false));
2817 
2818   Builder.CreateRetVoid();
2819   OMPBuilder.finalize();
2820   EXPECT_FALSE(verifyModule(*M, &errs()));
2821 
2822   EXPECT_NE(EntryBB->getTerminator(), nullptr);
2823 
2824   CallInst *OrderedEntryCI = nullptr;
2825   for (auto &EI : *EntryBB) {
2826     Instruction *Cur = &EI;
2827     if (isa<CallInst>(Cur)) {
2828       OrderedEntryCI = cast<CallInst>(Cur);
2829       if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
2830         break;
2831       OrderedEntryCI = nullptr;
2832     }
2833   }
2834   EXPECT_EQ(OrderedEntryCI, nullptr);
2835 
2836   CallInst *OrderedEndCI = nullptr;
2837   for (auto &FI : *EntryBB) {
2838     Instruction *Cur = &FI;
2839     if (isa<CallInst>(Cur)) {
2840       OrderedEndCI = cast<CallInst>(Cur);
2841       if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
2842         break;
2843       OrderedEndCI = nullptr;
2844     }
2845   }
2846   EXPECT_EQ(OrderedEndCI, nullptr);
2847 }
2848 
2849 TEST_F(OpenMPIRBuilderTest, CopyinBlocks) {
2850   OpenMPIRBuilder OMPBuilder(*M);
2851   OMPBuilder.initialize();
2852   F->setName("func");
2853   IRBuilder<> Builder(BB);
2854 
2855   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2856 
2857   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
2858   AllocaInst *MasterAddress = Builder.CreateAlloca(Int32->getPointerTo());
2859   AllocaInst *PrivAddress = Builder.CreateAlloca(Int32->getPointerTo());
2860 
2861   BasicBlock *EntryBB = BB;
2862 
2863   OMPBuilder.createCopyinClauseBlocks(Builder.saveIP(), MasterAddress,
2864                                       PrivAddress, Int32, /*BranchtoEnd*/ true);
2865 
2866   BranchInst *EntryBr = dyn_cast_or_null<BranchInst>(EntryBB->getTerminator());
2867 
2868   EXPECT_NE(EntryBr, nullptr);
2869   EXPECT_TRUE(EntryBr->isConditional());
2870 
2871   BasicBlock *NotMasterBB = EntryBr->getSuccessor(0);
2872   BasicBlock *CopyinEnd = EntryBr->getSuccessor(1);
2873   CmpInst *CMP = dyn_cast_or_null<CmpInst>(EntryBr->getCondition());
2874 
2875   EXPECT_NE(CMP, nullptr);
2876   EXPECT_NE(NotMasterBB, nullptr);
2877   EXPECT_NE(CopyinEnd, nullptr);
2878 
2879   BranchInst *NotMasterBr =
2880       dyn_cast_or_null<BranchInst>(NotMasterBB->getTerminator());
2881   EXPECT_NE(NotMasterBr, nullptr);
2882   EXPECT_FALSE(NotMasterBr->isConditional());
2883   EXPECT_EQ(CopyinEnd, NotMasterBr->getSuccessor(0));
2884 }
2885 
2886 TEST_F(OpenMPIRBuilderTest, SingleDirective) {
2887   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2888   OpenMPIRBuilder OMPBuilder(*M);
2889   OMPBuilder.initialize();
2890   F->setName("func");
2891   IRBuilder<> Builder(BB);
2892 
2893   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2894 
2895   AllocaInst *PrivAI = nullptr;
2896 
2897   BasicBlock *EntryBB = nullptr;
2898   BasicBlock *ThenBB = nullptr;
2899 
2900   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2901     if (AllocaIP.isSet())
2902       Builder.restoreIP(AllocaIP);
2903     else
2904       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2905     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2906     Builder.CreateStore(F->arg_begin(), PrivAI);
2907 
2908     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2909     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2910     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2911 
2912     Builder.restoreIP(CodeGenIP);
2913 
2914     // collect some info for checks later
2915     ThenBB = Builder.GetInsertBlock();
2916     EntryBB = ThenBB->getUniquePredecessor();
2917 
2918     // simple instructions for body
2919     Value *PrivLoad =
2920         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2921     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2922   };
2923 
2924   auto FiniCB = [&](InsertPointTy IP) {
2925     BasicBlock *IPBB = IP.getBlock();
2926     EXPECT_NE(IPBB->end(), IP.getPoint());
2927   };
2928 
2929   Builder.restoreIP(OMPBuilder.createSingle(
2930       Builder, BodyGenCB, FiniCB, /*IsNowait*/ false, /*DidIt*/ nullptr));
2931   Value *EntryBBTI = EntryBB->getTerminator();
2932   EXPECT_NE(EntryBBTI, nullptr);
2933   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
2934   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
2935   EXPECT_TRUE(EntryBr->isConditional());
2936   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
2937   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
2938   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
2939 
2940   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
2941   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
2942 
2943   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
2944   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
2945   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
2946   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
2947 
2948   CallInst *SingleEndCI = nullptr;
2949   for (auto &FI : *ThenBB) {
2950     Instruction *cur = &FI;
2951     if (isa<CallInst>(cur)) {
2952       SingleEndCI = cast<CallInst>(cur);
2953       if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single")
2954         break;
2955       SingleEndCI = nullptr;
2956     }
2957   }
2958   EXPECT_NE(SingleEndCI, nullptr);
2959   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
2960   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
2961   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
2962 
2963   bool FoundBarrier = false;
2964   for (auto &FI : *ExitBB) {
2965     Instruction *cur = &FI;
2966     if (auto CI = dyn_cast<CallInst>(cur)) {
2967       if (CI->getCalledFunction()->getName() == "__kmpc_barrier") {
2968         FoundBarrier = true;
2969         break;
2970       }
2971     }
2972   }
2973   EXPECT_TRUE(FoundBarrier);
2974 }
2975 
2976 TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) {
2977   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2978   OpenMPIRBuilder OMPBuilder(*M);
2979   OMPBuilder.initialize();
2980   F->setName("func");
2981   IRBuilder<> Builder(BB);
2982 
2983   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2984 
2985   AllocaInst *PrivAI = nullptr;
2986 
2987   BasicBlock *EntryBB = nullptr;
2988   BasicBlock *ThenBB = nullptr;
2989 
2990   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2991     if (AllocaIP.isSet())
2992       Builder.restoreIP(AllocaIP);
2993     else
2994       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2995     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2996     Builder.CreateStore(F->arg_begin(), PrivAI);
2997 
2998     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2999     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3000     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3001 
3002     Builder.restoreIP(CodeGenIP);
3003 
3004     // collect some info for checks later
3005     ThenBB = Builder.GetInsertBlock();
3006     EntryBB = ThenBB->getUniquePredecessor();
3007 
3008     // simple instructions for body
3009     Value *PrivLoad =
3010         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3011     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3012   };
3013 
3014   auto FiniCB = [&](InsertPointTy IP) {
3015     BasicBlock *IPBB = IP.getBlock();
3016     EXPECT_NE(IPBB->end(), IP.getPoint());
3017   };
3018 
3019   Builder.restoreIP(OMPBuilder.createSingle(
3020       Builder, BodyGenCB, FiniCB, /*IsNowait*/ true, /*DidIt*/ nullptr));
3021   Value *EntryBBTI = EntryBB->getTerminator();
3022   EXPECT_NE(EntryBBTI, nullptr);
3023   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
3024   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
3025   EXPECT_TRUE(EntryBr->isConditional());
3026   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
3027   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
3028   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
3029 
3030   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3031   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3032 
3033   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
3034   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
3035   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
3036   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
3037 
3038   CallInst *SingleEndCI = nullptr;
3039   for (auto &FI : *ThenBB) {
3040     Instruction *cur = &FI;
3041     if (isa<CallInst>(cur)) {
3042       SingleEndCI = cast<CallInst>(cur);
3043       if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single")
3044         break;
3045       SingleEndCI = nullptr;
3046     }
3047   }
3048   EXPECT_NE(SingleEndCI, nullptr);
3049   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
3050   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
3051   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
3052 
3053   CallInst *ExitBarrier = nullptr;
3054   for (auto &FI : *ExitBB) {
3055     Instruction *cur = &FI;
3056     if (auto CI = dyn_cast<CallInst>(cur)) {
3057       if (CI->getCalledFunction()->getName() == "__kmpc_barrier") {
3058         ExitBarrier = CI;
3059         break;
3060       }
3061     }
3062   }
3063   EXPECT_EQ(ExitBarrier, nullptr);
3064 }
3065 
3066 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) {
3067   OpenMPIRBuilder OMPBuilder(*M);
3068   OMPBuilder.initialize();
3069   F->setName("func");
3070   IRBuilder<> Builder(BB);
3071 
3072   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3073 
3074   Type *Float32 = Type::getFloatTy(M->getContext());
3075   AllocaInst *XVal = Builder.CreateAlloca(Float32);
3076   XVal->setName("AtomicVar");
3077   AllocaInst *VVal = Builder.CreateAlloca(Float32);
3078   VVal->setName("AtomicRead");
3079   AtomicOrdering AO = AtomicOrdering::Monotonic;
3080   OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false};
3081   OpenMPIRBuilder::AtomicOpValue V = {VVal, Float32, false, false};
3082 
3083   Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO));
3084 
3085   IntegerType *IntCastTy =
3086       IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
3087 
3088   BitCastInst *CastFrmFlt = cast<BitCastInst>(VVal->getNextNode());
3089   EXPECT_EQ(CastFrmFlt->getSrcTy(), Float32->getPointerTo());
3090   EXPECT_EQ(CastFrmFlt->getDestTy(), IntCastTy->getPointerTo());
3091   EXPECT_EQ(CastFrmFlt->getOperand(0), XVal);
3092 
3093   LoadInst *AtomicLoad = cast<LoadInst>(CastFrmFlt->getNextNode());
3094   EXPECT_TRUE(AtomicLoad->isAtomic());
3095   EXPECT_EQ(AtomicLoad->getPointerOperand(), CastFrmFlt);
3096 
3097   BitCastInst *CastToFlt = cast<BitCastInst>(AtomicLoad->getNextNode());
3098   EXPECT_EQ(CastToFlt->getSrcTy(), IntCastTy);
3099   EXPECT_EQ(CastToFlt->getDestTy(), Float32);
3100   EXPECT_EQ(CastToFlt->getOperand(0), AtomicLoad);
3101 
3102   StoreInst *StoreofAtomic = cast<StoreInst>(CastToFlt->getNextNode());
3103   EXPECT_EQ(StoreofAtomic->getValueOperand(), CastToFlt);
3104   EXPECT_EQ(StoreofAtomic->getPointerOperand(), VVal);
3105 
3106   Builder.CreateRetVoid();
3107   OMPBuilder.finalize();
3108   EXPECT_FALSE(verifyModule(*M, &errs()));
3109 }
3110 
3111 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadInt) {
3112   OpenMPIRBuilder OMPBuilder(*M);
3113   OMPBuilder.initialize();
3114   F->setName("func");
3115   IRBuilder<> Builder(BB);
3116 
3117   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3118 
3119   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3120   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3121   XVal->setName("AtomicVar");
3122   AllocaInst *VVal = Builder.CreateAlloca(Int32);
3123   VVal->setName("AtomicRead");
3124   AtomicOrdering AO = AtomicOrdering::Monotonic;
3125   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3126   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
3127 
3128   BasicBlock *EntryBB = BB;
3129 
3130   Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO));
3131   LoadInst *AtomicLoad = nullptr;
3132   StoreInst *StoreofAtomic = nullptr;
3133 
3134   for (Instruction &Cur : *EntryBB) {
3135     if (isa<LoadInst>(Cur)) {
3136       AtomicLoad = cast<LoadInst>(&Cur);
3137       if (AtomicLoad->getPointerOperand() == XVal)
3138         continue;
3139       AtomicLoad = nullptr;
3140     } else if (isa<StoreInst>(Cur)) {
3141       StoreofAtomic = cast<StoreInst>(&Cur);
3142       if (StoreofAtomic->getPointerOperand() == VVal)
3143         continue;
3144       StoreofAtomic = nullptr;
3145     }
3146   }
3147 
3148   EXPECT_NE(AtomicLoad, nullptr);
3149   EXPECT_TRUE(AtomicLoad->isAtomic());
3150 
3151   EXPECT_NE(StoreofAtomic, nullptr);
3152   EXPECT_EQ(StoreofAtomic->getValueOperand(), AtomicLoad);
3153 
3154   Builder.CreateRetVoid();
3155   OMPBuilder.finalize();
3156 
3157   EXPECT_FALSE(verifyModule(*M, &errs()));
3158 }
3159 
3160 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) {
3161   OpenMPIRBuilder OMPBuilder(*M);
3162   OMPBuilder.initialize();
3163   F->setName("func");
3164   IRBuilder<> Builder(BB);
3165 
3166   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3167 
3168   LLVMContext &Ctx = M->getContext();
3169   Type *Float32 = Type::getFloatTy(Ctx);
3170   AllocaInst *XVal = Builder.CreateAlloca(Float32);
3171   XVal->setName("AtomicVar");
3172   OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false};
3173   AtomicOrdering AO = AtomicOrdering::Monotonic;
3174   Constant *ValToWrite = ConstantFP::get(Float32, 1.0);
3175 
3176   Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO));
3177 
3178   IntegerType *IntCastTy =
3179       IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
3180 
3181   BitCastInst *CastFrmFlt = cast<BitCastInst>(XVal->getNextNode());
3182   EXPECT_EQ(CastFrmFlt->getSrcTy(), Float32->getPointerTo());
3183   EXPECT_EQ(CastFrmFlt->getDestTy(), IntCastTy->getPointerTo());
3184   EXPECT_EQ(CastFrmFlt->getOperand(0), XVal);
3185 
3186   Value *ExprCast = Builder.CreateBitCast(ValToWrite, IntCastTy);
3187 
3188   StoreInst *StoreofAtomic = cast<StoreInst>(CastFrmFlt->getNextNode());
3189   EXPECT_EQ(StoreofAtomic->getValueOperand(), ExprCast);
3190   EXPECT_EQ(StoreofAtomic->getPointerOperand(), CastFrmFlt);
3191   EXPECT_TRUE(StoreofAtomic->isAtomic());
3192 
3193   Builder.CreateRetVoid();
3194   OMPBuilder.finalize();
3195   EXPECT_FALSE(verifyModule(*M, &errs()));
3196 }
3197 
3198 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) {
3199   OpenMPIRBuilder OMPBuilder(*M);
3200   OMPBuilder.initialize();
3201   F->setName("func");
3202   IRBuilder<> Builder(BB);
3203 
3204   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3205 
3206   LLVMContext &Ctx = M->getContext();
3207   IntegerType *Int32 = Type::getInt32Ty(Ctx);
3208   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3209   XVal->setName("AtomicVar");
3210   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3211   AtomicOrdering AO = AtomicOrdering::Monotonic;
3212   ConstantInt *ValToWrite = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3213 
3214   BasicBlock *EntryBB = BB;
3215 
3216   Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO));
3217 
3218   StoreInst *StoreofAtomic = nullptr;
3219 
3220   for (Instruction &Cur : *EntryBB) {
3221     if (isa<StoreInst>(Cur)) {
3222       StoreofAtomic = cast<StoreInst>(&Cur);
3223       if (StoreofAtomic->getPointerOperand() == XVal)
3224         continue;
3225       StoreofAtomic = nullptr;
3226     }
3227   }
3228 
3229   EXPECT_NE(StoreofAtomic, nullptr);
3230   EXPECT_TRUE(StoreofAtomic->isAtomic());
3231   EXPECT_EQ(StoreofAtomic->getValueOperand(), ValToWrite);
3232 
3233   Builder.CreateRetVoid();
3234   OMPBuilder.finalize();
3235   EXPECT_FALSE(verifyModule(*M, &errs()));
3236 }
3237 
3238 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
3239   OpenMPIRBuilder OMPBuilder(*M);
3240   OMPBuilder.initialize();
3241   F->setName("func");
3242   IRBuilder<> Builder(BB);
3243 
3244   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3245 
3246   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3247   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3248   XVal->setName("AtomicVar");
3249   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
3250   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3251   AtomicOrdering AO = AtomicOrdering::Monotonic;
3252   ConstantInt *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3253   Value *Expr = nullptr;
3254   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Sub;
3255   bool IsXLHSInRHSPart = false;
3256 
3257   BasicBlock *EntryBB = BB;
3258   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
3259                                           EntryBB->getFirstInsertionPt());
3260   Value *Sub = nullptr;
3261 
3262   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
3263     Sub = IRB.CreateSub(ConstVal, Atomic);
3264     return Sub;
3265   };
3266   Builder.restoreIP(OMPBuilder.createAtomicUpdate(
3267       Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart));
3268   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
3269   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
3270   EXPECT_NE(ContTI, nullptr);
3271   BasicBlock *EndBB = ContTI->getSuccessor(0);
3272   EXPECT_TRUE(ContTI->isConditional());
3273   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
3274   EXPECT_NE(EndBB, nullptr);
3275 
3276   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
3277   EXPECT_NE(Phi, nullptr);
3278   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
3279   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
3280   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
3281 
3282   EXPECT_EQ(Sub->getNumUses(), 1U);
3283   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
3284   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
3285 
3286   ExtractValueInst *ExVI1 =
3287       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
3288   EXPECT_NE(ExVI1, nullptr);
3289   AtomicCmpXchgInst *CmpExchg =
3290       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
3291   EXPECT_NE(CmpExchg, nullptr);
3292   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
3293   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
3294   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
3295 
3296   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
3297   EXPECT_NE(Ld, nullptr);
3298   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
3299 
3300   Builder.CreateRetVoid();
3301   OMPBuilder.finalize();
3302   EXPECT_FALSE(verifyModule(*M, &errs()));
3303 }
3304 
3305 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) {
3306   OpenMPIRBuilder OMPBuilder(*M);
3307   OMPBuilder.initialize();
3308   F->setName("func");
3309   IRBuilder<> Builder(BB);
3310 
3311   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3312 
3313   Type *FloatTy = Type::getFloatTy(M->getContext());
3314   AllocaInst *XVal = Builder.CreateAlloca(FloatTy);
3315   XVal->setName("AtomicVar");
3316   Builder.CreateStore(ConstantFP::get(Type::getFloatTy(Ctx), 0.0), XVal);
3317   OpenMPIRBuilder::AtomicOpValue X = {XVal, FloatTy, false, false};
3318   AtomicOrdering AO = AtomicOrdering::Monotonic;
3319   Constant *ConstVal = ConstantFP::get(Type::getFloatTy(Ctx), 1.0);
3320   Value *Expr = nullptr;
3321   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::FSub;
3322   bool IsXLHSInRHSPart = false;
3323 
3324   BasicBlock *EntryBB = BB;
3325   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
3326                                           EntryBB->getFirstInsertionPt());
3327   Value *Sub = nullptr;
3328 
3329   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
3330     Sub = IRB.CreateFSub(ConstVal, Atomic);
3331     return Sub;
3332   };
3333   Builder.restoreIP(OMPBuilder.createAtomicUpdate(
3334       Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart));
3335   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
3336   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
3337   EXPECT_NE(ContTI, nullptr);
3338   BasicBlock *EndBB = ContTI->getSuccessor(0);
3339   EXPECT_TRUE(ContTI->isConditional());
3340   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
3341   EXPECT_NE(EndBB, nullptr);
3342 
3343   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
3344   EXPECT_NE(Phi, nullptr);
3345   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
3346   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
3347   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
3348 
3349   EXPECT_EQ(Sub->getNumUses(), 1U);
3350   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
3351   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
3352 
3353   ExtractValueInst *ExVI1 =
3354       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
3355   EXPECT_NE(ExVI1, nullptr);
3356   AtomicCmpXchgInst *CmpExchg =
3357       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
3358   EXPECT_NE(CmpExchg, nullptr);
3359   BitCastInst *BitCastNew =
3360       dyn_cast<BitCastInst>(CmpExchg->getPointerOperand());
3361   EXPECT_NE(BitCastNew, nullptr);
3362   EXPECT_EQ(BitCastNew->getOperand(0), XVal);
3363   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
3364   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
3365 
3366   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
3367   EXPECT_NE(Ld, nullptr);
3368   BitCastInst *BitCastOld = dyn_cast<BitCastInst>(Ld->getPointerOperand());
3369   EXPECT_NE(BitCastOld, nullptr);
3370   EXPECT_EQ(UpdateTemp, BitCastOld->getOperand(0));
3371 
3372   Builder.CreateRetVoid();
3373   OMPBuilder.finalize();
3374   EXPECT_FALSE(verifyModule(*M, &errs()));
3375 }
3376 
3377 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) {
3378   OpenMPIRBuilder OMPBuilder(*M);
3379   OMPBuilder.initialize();
3380   F->setName("func");
3381   IRBuilder<> Builder(BB);
3382 
3383   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3384 
3385   Type *IntTy = Type::getInt32Ty(M->getContext());
3386   AllocaInst *XVal = Builder.CreateAlloca(IntTy);
3387   XVal->setName("AtomicVar");
3388   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0), XVal);
3389   OpenMPIRBuilder::AtomicOpValue X = {XVal, IntTy, false, false};
3390   AtomicOrdering AO = AtomicOrdering::Monotonic;
3391   Constant *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1);
3392   Value *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1);
3393   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::UMax;
3394   bool IsXLHSInRHSPart = false;
3395 
3396   BasicBlock *EntryBB = BB;
3397   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
3398                                           EntryBB->getFirstInsertionPt());
3399   Value *Sub = nullptr;
3400 
3401   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
3402     Sub = IRB.CreateSub(ConstVal, Atomic);
3403     return Sub;
3404   };
3405   Builder.restoreIP(OMPBuilder.createAtomicUpdate(
3406       Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart));
3407   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
3408   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
3409   EXPECT_NE(ContTI, nullptr);
3410   BasicBlock *EndBB = ContTI->getSuccessor(0);
3411   EXPECT_TRUE(ContTI->isConditional());
3412   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
3413   EXPECT_NE(EndBB, nullptr);
3414 
3415   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
3416   EXPECT_NE(Phi, nullptr);
3417   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
3418   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
3419   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
3420 
3421   EXPECT_EQ(Sub->getNumUses(), 1U);
3422   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
3423   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
3424 
3425   ExtractValueInst *ExVI1 =
3426       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
3427   EXPECT_NE(ExVI1, nullptr);
3428   AtomicCmpXchgInst *CmpExchg =
3429       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
3430   EXPECT_NE(CmpExchg, nullptr);
3431   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
3432   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
3433   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
3434 
3435   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
3436   EXPECT_NE(Ld, nullptr);
3437   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
3438 
3439   Builder.CreateRetVoid();
3440   OMPBuilder.finalize();
3441   EXPECT_FALSE(verifyModule(*M, &errs()));
3442 }
3443 
3444 TEST_F(OpenMPIRBuilderTest, OMPAtomicCapture) {
3445   OpenMPIRBuilder OMPBuilder(*M);
3446   OMPBuilder.initialize();
3447   F->setName("func");
3448   IRBuilder<> Builder(BB);
3449 
3450   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3451 
3452   LLVMContext &Ctx = M->getContext();
3453   IntegerType *Int32 = Type::getInt32Ty(Ctx);
3454   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3455   XVal->setName("AtomicVar");
3456   AllocaInst *VVal = Builder.CreateAlloca(Int32);
3457   VVal->setName("AtomicCapTar");
3458   StoreInst *Init =
3459       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
3460 
3461   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3462   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
3463   AtomicOrdering AO = AtomicOrdering::Monotonic;
3464   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3465   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Add;
3466   bool IsXLHSInRHSPart = true;
3467   bool IsPostfixUpdate = true;
3468   bool UpdateExpr = true;
3469 
3470   BasicBlock *EntryBB = BB;
3471   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
3472                                           EntryBB->getFirstInsertionPt());
3473 
3474   // integer update - not used
3475   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { return nullptr; };
3476 
3477   Builder.restoreIP(OMPBuilder.createAtomicCapture(
3478       Builder, AllocaIP, X, V, Expr, AO, RMWOp, UpdateOp, UpdateExpr,
3479       IsPostfixUpdate, IsXLHSInRHSPart));
3480   EXPECT_EQ(EntryBB->getParent()->size(), 1U);
3481   AtomicRMWInst *ARWM = dyn_cast<AtomicRMWInst>(Init->getNextNode());
3482   EXPECT_NE(ARWM, nullptr);
3483   EXPECT_EQ(ARWM->getPointerOperand(), XVal);
3484   EXPECT_EQ(ARWM->getOperation(), RMWOp);
3485   StoreInst *St = dyn_cast<StoreInst>(ARWM->user_back());
3486   EXPECT_NE(St, nullptr);
3487   EXPECT_EQ(St->getPointerOperand(), VVal);
3488 
3489   Builder.CreateRetVoid();
3490   OMPBuilder.finalize();
3491   EXPECT_FALSE(verifyModule(*M, &errs()));
3492 }
3493 
3494 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompare) {
3495   OpenMPIRBuilder OMPBuilder(*M);
3496   OMPBuilder.initialize();
3497   F->setName("func");
3498   IRBuilder<> Builder(BB);
3499 
3500   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3501 
3502   LLVMContext &Ctx = M->getContext();
3503   IntegerType *Int32 = Type::getInt32Ty(Ctx);
3504   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3505   XVal->setName("x");
3506   StoreInst *Init =
3507       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
3508 
3509   OpenMPIRBuilder::AtomicOpValue XSigned = {XVal, Int32, true, false};
3510   OpenMPIRBuilder::AtomicOpValue XUnsigned = {XVal, Int32, false, false};
3511   // V and R are not used in atomic compare
3512   OpenMPIRBuilder::AtomicOpValue V = {nullptr, nullptr, false, false};
3513   OpenMPIRBuilder::AtomicOpValue R = {nullptr, nullptr, false, false};
3514   AtomicOrdering AO = AtomicOrdering::Monotonic;
3515   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3516   ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3517   OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX;
3518   OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ;
3519 
3520   Builder.restoreIP(OMPBuilder.createAtomicCompare(
3521       Builder, XSigned, V, R, Expr, nullptr, AO, OpMax, true, false, false));
3522   Builder.restoreIP(OMPBuilder.createAtomicCompare(
3523       Builder, XUnsigned, V, R, Expr, nullptr, AO, OpMax, false, false, false));
3524   Builder.restoreIP(OMPBuilder.createAtomicCompare(
3525       Builder, XSigned, V, R, Expr, D, AO, OpEQ, true, false, false));
3526 
3527   BasicBlock *EntryBB = BB;
3528   EXPECT_EQ(EntryBB->getParent()->size(), 1U);
3529   EXPECT_EQ(EntryBB->size(), 5U);
3530 
3531   AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Init->getNextNode());
3532   EXPECT_NE(ARWM1, nullptr);
3533   EXPECT_EQ(ARWM1->getPointerOperand(), XVal);
3534   EXPECT_EQ(ARWM1->getValOperand(), Expr);
3535   EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min);
3536 
3537   AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(ARWM1->getNextNode());
3538   EXPECT_NE(ARWM2, nullptr);
3539   EXPECT_EQ(ARWM2->getPointerOperand(), XVal);
3540   EXPECT_EQ(ARWM2->getValOperand(), Expr);
3541   EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::UMax);
3542 
3543   AtomicCmpXchgInst *AXCHG = dyn_cast<AtomicCmpXchgInst>(ARWM2->getNextNode());
3544   EXPECT_NE(AXCHG, nullptr);
3545   EXPECT_EQ(AXCHG->getPointerOperand(), XVal);
3546   EXPECT_EQ(AXCHG->getCompareOperand(), Expr);
3547   EXPECT_EQ(AXCHG->getNewValOperand(), D);
3548 
3549   Builder.CreateRetVoid();
3550   OMPBuilder.finalize();
3551   EXPECT_FALSE(verifyModule(*M, &errs()));
3552 }
3553 
3554 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompareCapture) {
3555   OpenMPIRBuilder OMPBuilder(*M);
3556   OMPBuilder.initialize();
3557   F->setName("func");
3558   IRBuilder<> Builder(BB);
3559 
3560   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3561 
3562   LLVMContext &Ctx = M->getContext();
3563   IntegerType *Int32 = Type::getInt32Ty(Ctx);
3564   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3565   XVal->setName("x");
3566   AllocaInst *VVal = Builder.CreateAlloca(Int32);
3567   VVal->setName("v");
3568   AllocaInst *RVal = Builder.CreateAlloca(Int32);
3569   RVal->setName("r");
3570 
3571   StoreInst *Init =
3572       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
3573 
3574   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, true, false};
3575   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
3576   OpenMPIRBuilder::AtomicOpValue NoV = {nullptr, nullptr, false, false};
3577   OpenMPIRBuilder::AtomicOpValue R = {RVal, Int32, false, false};
3578   OpenMPIRBuilder::AtomicOpValue NoR = {nullptr, nullptr, false, false};
3579 
3580   AtomicOrdering AO = AtomicOrdering::Monotonic;
3581   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3582   ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3583   OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX;
3584   OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ;
3585 
3586   // { cond-update-stmt v = x; }
3587   Builder.restoreIP(OMPBuilder.createAtomicCompare(
3588       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
3589       /* IsPostfixUpdate */ false,
3590       /* IsFailOnly */ false));
3591   // { v = x; cond-update-stmt }
3592   Builder.restoreIP(OMPBuilder.createAtomicCompare(
3593       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
3594       /* IsPostfixUpdate */ true,
3595       /* IsFailOnly */ false));
3596   // if(x == e) { x = d; } else { v = x; }
3597   Builder.restoreIP(OMPBuilder.createAtomicCompare(
3598       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
3599       /* IsPostfixUpdate */ false,
3600       /* IsFailOnly */ true));
3601   // { r = x == e; if(r) { x = d; } }
3602   Builder.restoreIP(OMPBuilder.createAtomicCompare(
3603       Builder, X, NoV, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
3604       /* IsPostfixUpdate */ false,
3605       /* IsFailOnly */ false));
3606   // { r = x == e; if(r) { x = d; } else { v = x; } }
3607   Builder.restoreIP(OMPBuilder.createAtomicCompare(
3608       Builder, X, V, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
3609       /* IsPostfixUpdate */ false,
3610       /* IsFailOnly */ true));
3611 
3612   // { v = x; cond-update-stmt }
3613   Builder.restoreIP(OMPBuilder.createAtomicCompare(
3614       Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ true,
3615       /* IsPostfixUpdate */ true,
3616       /* IsFailOnly */ false));
3617   // { cond-update-stmt v = x; }
3618   Builder.restoreIP(OMPBuilder.createAtomicCompare(
3619       Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ false,
3620       /* IsPostfixUpdate */ false,
3621       /* IsFailOnly */ false));
3622 
3623   BasicBlock *EntryBB = BB;
3624   EXPECT_EQ(EntryBB->getParent()->size(), 5U);
3625   BasicBlock *Cont1 = dyn_cast<BasicBlock>(EntryBB->getNextNode());
3626   EXPECT_NE(Cont1, nullptr);
3627   BasicBlock *Exit1 = dyn_cast<BasicBlock>(Cont1->getNextNode());
3628   EXPECT_NE(Exit1, nullptr);
3629   BasicBlock *Cont2 = dyn_cast<BasicBlock>(Exit1->getNextNode());
3630   EXPECT_NE(Cont2, nullptr);
3631   BasicBlock *Exit2 = dyn_cast<BasicBlock>(Cont2->getNextNode());
3632   EXPECT_NE(Exit2, nullptr);
3633 
3634   AtomicCmpXchgInst *CmpXchg1 =
3635       dyn_cast<AtomicCmpXchgInst>(Init->getNextNode());
3636   EXPECT_NE(CmpXchg1, nullptr);
3637   EXPECT_EQ(CmpXchg1->getPointerOperand(), XVal);
3638   EXPECT_EQ(CmpXchg1->getCompareOperand(), Expr);
3639   EXPECT_EQ(CmpXchg1->getNewValOperand(), D);
3640   ExtractValueInst *ExtVal1 =
3641       dyn_cast<ExtractValueInst>(CmpXchg1->getNextNode());
3642   EXPECT_NE(ExtVal1, nullptr);
3643   EXPECT_EQ(ExtVal1->getAggregateOperand(), CmpXchg1);
3644   EXPECT_EQ(ExtVal1->getIndices(), ArrayRef<unsigned int>(0U));
3645   ExtractValueInst *ExtVal2 =
3646       dyn_cast<ExtractValueInst>(ExtVal1->getNextNode());
3647   EXPECT_NE(ExtVal2, nullptr);
3648   EXPECT_EQ(ExtVal2->getAggregateOperand(), CmpXchg1);
3649   EXPECT_EQ(ExtVal2->getIndices(), ArrayRef<unsigned int>(1U));
3650   SelectInst *Sel1 = dyn_cast<SelectInst>(ExtVal2->getNextNode());
3651   EXPECT_NE(Sel1, nullptr);
3652   EXPECT_EQ(Sel1->getCondition(), ExtVal2);
3653   EXPECT_EQ(Sel1->getTrueValue(), Expr);
3654   EXPECT_EQ(Sel1->getFalseValue(), ExtVal1);
3655   StoreInst *Store1 = dyn_cast<StoreInst>(Sel1->getNextNode());
3656   EXPECT_NE(Store1, nullptr);
3657   EXPECT_EQ(Store1->getPointerOperand(), VVal);
3658   EXPECT_EQ(Store1->getValueOperand(), Sel1);
3659 
3660   AtomicCmpXchgInst *CmpXchg2 =
3661       dyn_cast<AtomicCmpXchgInst>(Store1->getNextNode());
3662   EXPECT_NE(CmpXchg2, nullptr);
3663   EXPECT_EQ(CmpXchg2->getPointerOperand(), XVal);
3664   EXPECT_EQ(CmpXchg2->getCompareOperand(), Expr);
3665   EXPECT_EQ(CmpXchg2->getNewValOperand(), D);
3666   ExtractValueInst *ExtVal3 =
3667       dyn_cast<ExtractValueInst>(CmpXchg2->getNextNode());
3668   EXPECT_NE(ExtVal3, nullptr);
3669   EXPECT_EQ(ExtVal3->getAggregateOperand(), CmpXchg2);
3670   EXPECT_EQ(ExtVal3->getIndices(), ArrayRef<unsigned int>(0U));
3671   StoreInst *Store2 = dyn_cast<StoreInst>(ExtVal3->getNextNode());
3672   EXPECT_NE(Store2, nullptr);
3673   EXPECT_EQ(Store2->getPointerOperand(), VVal);
3674   EXPECT_EQ(Store2->getValueOperand(), ExtVal3);
3675 
3676   AtomicCmpXchgInst *CmpXchg3 =
3677       dyn_cast<AtomicCmpXchgInst>(Store2->getNextNode());
3678   EXPECT_NE(CmpXchg3, nullptr);
3679   EXPECT_EQ(CmpXchg3->getPointerOperand(), XVal);
3680   EXPECT_EQ(CmpXchg3->getCompareOperand(), Expr);
3681   EXPECT_EQ(CmpXchg3->getNewValOperand(), D);
3682   ExtractValueInst *ExtVal4 =
3683       dyn_cast<ExtractValueInst>(CmpXchg3->getNextNode());
3684   EXPECT_NE(ExtVal4, nullptr);
3685   EXPECT_EQ(ExtVal4->getAggregateOperand(), CmpXchg3);
3686   EXPECT_EQ(ExtVal4->getIndices(), ArrayRef<unsigned int>(0U));
3687   ExtractValueInst *ExtVal5 =
3688       dyn_cast<ExtractValueInst>(ExtVal4->getNextNode());
3689   EXPECT_NE(ExtVal5, nullptr);
3690   EXPECT_EQ(ExtVal5->getAggregateOperand(), CmpXchg3);
3691   EXPECT_EQ(ExtVal5->getIndices(), ArrayRef<unsigned int>(1U));
3692   BranchInst *Br1 = dyn_cast<BranchInst>(ExtVal5->getNextNode());
3693   EXPECT_NE(Br1, nullptr);
3694   EXPECT_EQ(Br1->isConditional(), true);
3695   EXPECT_EQ(Br1->getCondition(), ExtVal5);
3696   EXPECT_EQ(Br1->getSuccessor(0), Exit1);
3697   EXPECT_EQ(Br1->getSuccessor(1), Cont1);
3698 
3699   StoreInst *Store3 = dyn_cast<StoreInst>(&Cont1->front());
3700   EXPECT_NE(Store3, nullptr);
3701   EXPECT_EQ(Store3->getPointerOperand(), VVal);
3702   EXPECT_EQ(Store3->getValueOperand(), ExtVal4);
3703   BranchInst *Br2 = dyn_cast<BranchInst>(Store3->getNextNode());
3704   EXPECT_NE(Br2, nullptr);
3705   EXPECT_EQ(Br2->isUnconditional(), true);
3706   EXPECT_EQ(Br2->getSuccessor(0), Exit1);
3707 
3708   AtomicCmpXchgInst *CmpXchg4 = dyn_cast<AtomicCmpXchgInst>(&Exit1->front());
3709   EXPECT_NE(CmpXchg4, nullptr);
3710   EXPECT_EQ(CmpXchg4->getPointerOperand(), XVal);
3711   EXPECT_EQ(CmpXchg4->getCompareOperand(), Expr);
3712   EXPECT_EQ(CmpXchg4->getNewValOperand(), D);
3713   ExtractValueInst *ExtVal6 =
3714       dyn_cast<ExtractValueInst>(CmpXchg4->getNextNode());
3715   EXPECT_NE(ExtVal6, nullptr);
3716   EXPECT_EQ(ExtVal6->getAggregateOperand(), CmpXchg4);
3717   EXPECT_EQ(ExtVal6->getIndices(), ArrayRef<unsigned int>(1U));
3718   ZExtInst *ZExt1 = dyn_cast<ZExtInst>(ExtVal6->getNextNode());
3719   EXPECT_NE(ZExt1, nullptr);
3720   EXPECT_EQ(ZExt1->getDestTy(), Int32);
3721   StoreInst *Store4 = dyn_cast<StoreInst>(ZExt1->getNextNode());
3722   EXPECT_NE(Store4, nullptr);
3723   EXPECT_EQ(Store4->getPointerOperand(), RVal);
3724   EXPECT_EQ(Store4->getValueOperand(), ZExt1);
3725 
3726   AtomicCmpXchgInst *CmpXchg5 =
3727       dyn_cast<AtomicCmpXchgInst>(Store4->getNextNode());
3728   EXPECT_NE(CmpXchg5, nullptr);
3729   EXPECT_EQ(CmpXchg5->getPointerOperand(), XVal);
3730   EXPECT_EQ(CmpXchg5->getCompareOperand(), Expr);
3731   EXPECT_EQ(CmpXchg5->getNewValOperand(), D);
3732   ExtractValueInst *ExtVal7 =
3733       dyn_cast<ExtractValueInst>(CmpXchg5->getNextNode());
3734   EXPECT_NE(ExtVal7, nullptr);
3735   EXPECT_EQ(ExtVal7->getAggregateOperand(), CmpXchg5);
3736   EXPECT_EQ(ExtVal7->getIndices(), ArrayRef<unsigned int>(0U));
3737   ExtractValueInst *ExtVal8 =
3738       dyn_cast<ExtractValueInst>(ExtVal7->getNextNode());
3739   EXPECT_NE(ExtVal8, nullptr);
3740   EXPECT_EQ(ExtVal8->getAggregateOperand(), CmpXchg5);
3741   EXPECT_EQ(ExtVal8->getIndices(), ArrayRef<unsigned int>(1U));
3742   BranchInst *Br3 = dyn_cast<BranchInst>(ExtVal8->getNextNode());
3743   EXPECT_NE(Br3, nullptr);
3744   EXPECT_EQ(Br3->isConditional(), true);
3745   EXPECT_EQ(Br3->getCondition(), ExtVal8);
3746   EXPECT_EQ(Br3->getSuccessor(0), Exit2);
3747   EXPECT_EQ(Br3->getSuccessor(1), Cont2);
3748 
3749   StoreInst *Store5 = dyn_cast<StoreInst>(&Cont2->front());
3750   EXPECT_NE(Store5, nullptr);
3751   EXPECT_EQ(Store5->getPointerOperand(), VVal);
3752   EXPECT_EQ(Store5->getValueOperand(), ExtVal7);
3753   BranchInst *Br4 = dyn_cast<BranchInst>(Store5->getNextNode());
3754   EXPECT_NE(Br4, nullptr);
3755   EXPECT_EQ(Br4->isUnconditional(), true);
3756   EXPECT_EQ(Br4->getSuccessor(0), Exit2);
3757 
3758   ExtractValueInst *ExtVal9 = dyn_cast<ExtractValueInst>(&Exit2->front());
3759   EXPECT_NE(ExtVal9, nullptr);
3760   EXPECT_EQ(ExtVal9->getAggregateOperand(), CmpXchg5);
3761   EXPECT_EQ(ExtVal9->getIndices(), ArrayRef<unsigned int>(1U));
3762   ZExtInst *ZExt2 = dyn_cast<ZExtInst>(ExtVal9->getNextNode());
3763   EXPECT_NE(ZExt2, nullptr);
3764   EXPECT_EQ(ZExt2->getDestTy(), Int32);
3765   StoreInst *Store6 = dyn_cast<StoreInst>(ZExt2->getNextNode());
3766   EXPECT_NE(Store6, nullptr);
3767   EXPECT_EQ(Store6->getPointerOperand(), RVal);
3768   EXPECT_EQ(Store6->getValueOperand(), ZExt2);
3769 
3770   AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Store6->getNextNode());
3771   EXPECT_NE(ARWM1, nullptr);
3772   EXPECT_EQ(ARWM1->getPointerOperand(), XVal);
3773   EXPECT_EQ(ARWM1->getValOperand(), Expr);
3774   EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min);
3775   StoreInst *Store7 = dyn_cast<StoreInst>(ARWM1->getNextNode());
3776   EXPECT_NE(Store7, nullptr);
3777   EXPECT_EQ(Store7->getPointerOperand(), VVal);
3778   EXPECT_EQ(Store7->getValueOperand(), ARWM1);
3779 
3780   AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(Store7->getNextNode());
3781   EXPECT_NE(ARWM2, nullptr);
3782   EXPECT_EQ(ARWM2->getPointerOperand(), XVal);
3783   EXPECT_EQ(ARWM2->getValOperand(), Expr);
3784   EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::Max);
3785   CmpInst *Cmp1 = dyn_cast<CmpInst>(ARWM2->getNextNode());
3786   EXPECT_NE(Cmp1, nullptr);
3787   EXPECT_EQ(Cmp1->getPredicate(), CmpInst::ICMP_SGT);
3788   EXPECT_EQ(Cmp1->getOperand(0), ARWM2);
3789   EXPECT_EQ(Cmp1->getOperand(1), Expr);
3790   SelectInst *Sel2 = dyn_cast<SelectInst>(Cmp1->getNextNode());
3791   EXPECT_NE(Sel2, nullptr);
3792   EXPECT_EQ(Sel2->getCondition(), Cmp1);
3793   EXPECT_EQ(Sel2->getTrueValue(), Expr);
3794   EXPECT_EQ(Sel2->getFalseValue(), ARWM2);
3795   StoreInst *Store8 = dyn_cast<StoreInst>(Sel2->getNextNode());
3796   EXPECT_NE(Store8, nullptr);
3797   EXPECT_EQ(Store8->getPointerOperand(), VVal);
3798   EXPECT_EQ(Store8->getValueOperand(), Sel2);
3799 
3800   Builder.CreateRetVoid();
3801   OMPBuilder.finalize();
3802   EXPECT_FALSE(verifyModule(*M, &errs()));
3803 }
3804 
3805 /// Returns the single instruction of InstTy type in BB that uses the value V.
3806 /// If there is more than one such instruction, returns null.
3807 template <typename InstTy>
3808 static InstTy *findSingleUserInBlock(Value *V, BasicBlock *BB) {
3809   InstTy *Result = nullptr;
3810   for (User *U : V->users()) {
3811     auto *Inst = dyn_cast<InstTy>(U);
3812     if (!Inst || Inst->getParent() != BB)
3813       continue;
3814     if (Result)
3815       return nullptr;
3816     Result = Inst;
3817   }
3818   return Result;
3819 }
3820 
3821 /// Returns true if BB contains a simple binary reduction that loads a value
3822 /// from Accum, performs some binary operation with it, and stores it back to
3823 /// Accum.
3824 static bool isSimpleBinaryReduction(Value *Accum, BasicBlock *BB,
3825                                     Instruction::BinaryOps *OpCode = nullptr) {
3826   StoreInst *Store = findSingleUserInBlock<StoreInst>(Accum, BB);
3827   if (!Store)
3828     return false;
3829   auto *Stored = dyn_cast<BinaryOperator>(Store->getOperand(0));
3830   if (!Stored)
3831     return false;
3832   if (OpCode && *OpCode != Stored->getOpcode())
3833     return false;
3834   auto *Load = dyn_cast<LoadInst>(Stored->getOperand(0));
3835   return Load && Load->getOperand(0) == Accum;
3836 }
3837 
3838 /// Returns true if BB contains a binary reduction that reduces V using a binary
3839 /// operator into an accumulator that is a function argument.
3840 static bool isValueReducedToFuncArg(Value *V, BasicBlock *BB) {
3841   auto *ReductionOp = findSingleUserInBlock<BinaryOperator>(V, BB);
3842   if (!ReductionOp)
3843     return false;
3844 
3845   auto *GlobalLoad = dyn_cast<LoadInst>(ReductionOp->getOperand(0));
3846   if (!GlobalLoad)
3847     return false;
3848 
3849   auto *Store = findSingleUserInBlock<StoreInst>(ReductionOp, BB);
3850   if (!Store)
3851     return false;
3852 
3853   return Store->getPointerOperand() == GlobalLoad->getPointerOperand() &&
3854          isa<Argument>(findAggregateFromValue(GlobalLoad->getPointerOperand()));
3855 }
3856 
3857 /// Finds among users of Ptr a pair of GEP instructions with indices [0, 0] and
3858 /// [0, 1], respectively, and assigns results of these instructions to Zero and
3859 /// One. Returns true on success, false on failure or if such instructions are
3860 /// not unique among the users of Ptr.
3861 static bool findGEPZeroOne(Value *Ptr, Value *&Zero, Value *&One) {
3862   Zero = nullptr;
3863   One = nullptr;
3864   for (User *U : Ptr->users()) {
3865     if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
3866       if (GEP->getNumIndices() != 2)
3867         continue;
3868       auto *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
3869       auto *SecondIdx = dyn_cast<ConstantInt>(GEP->getOperand(2));
3870       EXPECT_NE(FirstIdx, nullptr);
3871       EXPECT_NE(SecondIdx, nullptr);
3872 
3873       EXPECT_TRUE(FirstIdx->isZero());
3874       if (SecondIdx->isZero()) {
3875         if (Zero)
3876           return false;
3877         Zero = GEP;
3878       } else if (SecondIdx->isOne()) {
3879         if (One)
3880           return false;
3881         One = GEP;
3882       } else {
3883         return false;
3884       }
3885     }
3886   }
3887   return Zero != nullptr && One != nullptr;
3888 }
3889 
3890 static OpenMPIRBuilder::InsertPointTy
3891 sumReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
3892              Value *&Result) {
3893   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
3894   Result = Builder.CreateFAdd(LHS, RHS, "red.add");
3895   return Builder.saveIP();
3896 }
3897 
3898 static OpenMPIRBuilder::InsertPointTy
3899 sumAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS,
3900                    Value *RHS) {
3901   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
3902   Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial");
3903   Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, LHS, Partial, None,
3904                           AtomicOrdering::Monotonic);
3905   return Builder.saveIP();
3906 }
3907 
3908 static OpenMPIRBuilder::InsertPointTy
3909 xorReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
3910              Value *&Result) {
3911   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
3912   Result = Builder.CreateXor(LHS, RHS, "red.xor");
3913   return Builder.saveIP();
3914 }
3915 
3916 static OpenMPIRBuilder::InsertPointTy
3917 xorAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS,
3918                    Value *RHS) {
3919   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
3920   Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial");
3921   Builder.CreateAtomicRMW(AtomicRMWInst::Xor, LHS, Partial, None,
3922                           AtomicOrdering::Monotonic);
3923   return Builder.saveIP();
3924 }
3925 
3926 TEST_F(OpenMPIRBuilderTest, CreateReductions) {
3927   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3928   OpenMPIRBuilder OMPBuilder(*M);
3929   OMPBuilder.initialize();
3930   F->setName("func");
3931   IRBuilder<> Builder(BB);
3932 
3933   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
3934   Builder.CreateBr(EnterBB);
3935   Builder.SetInsertPoint(EnterBB);
3936   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3937 
3938   // Create variables to be reduced.
3939   InsertPointTy OuterAllocaIP(&F->getEntryBlock(),
3940                               F->getEntryBlock().getFirstInsertionPt());
3941   Type *SumType = Builder.getFloatTy();
3942   Type *XorType = Builder.getInt32Ty();
3943   Value *SumReduced;
3944   Value *XorReduced;
3945   {
3946     IRBuilderBase::InsertPointGuard Guard(Builder);
3947     Builder.restoreIP(OuterAllocaIP);
3948     SumReduced = Builder.CreateAlloca(SumType);
3949     XorReduced = Builder.CreateAlloca(XorType);
3950   }
3951 
3952   // Store initial values of reductions into global variables.
3953   Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced);
3954   Builder.CreateStore(Builder.getInt32(1), XorReduced);
3955 
3956   // The loop body computes two reductions:
3957   //   sum of (float) thread-id;
3958   //   xor of thread-id;
3959   // and store the result in global variables.
3960   InsertPointTy BodyIP, BodyAllocaIP;
3961   auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP) {
3962     IRBuilderBase::InsertPointGuard Guard(Builder);
3963     Builder.restoreIP(CodeGenIP);
3964 
3965     uint32_t StrSize;
3966     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
3967     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
3968     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
3969     Value *SumLocal =
3970         Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local");
3971     Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial");
3972     Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial");
3973     Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum");
3974     Value *Xor = Builder.CreateXor(XorPartial, TID, "xor");
3975     Builder.CreateStore(Sum, SumReduced);
3976     Builder.CreateStore(Xor, XorReduced);
3977 
3978     BodyIP = Builder.saveIP();
3979     BodyAllocaIP = InnerAllocaIP;
3980   };
3981 
3982   // Privatization for reduction creates local copies of reduction variables and
3983   // initializes them to reduction-neutral values.
3984   Value *SumPrivatized;
3985   Value *XorPrivatized;
3986   auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
3987                     Value &Original, Value &Inner, Value *&ReplVal) {
3988     IRBuilderBase::InsertPointGuard Guard(Builder);
3989     Builder.restoreIP(InnerAllocaIP);
3990     if (&Original == SumReduced) {
3991       SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy());
3992       ReplVal = SumPrivatized;
3993     } else if (&Original == XorReduced) {
3994       XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty());
3995       ReplVal = XorPrivatized;
3996     } else {
3997       ReplVal = &Inner;
3998       return CodeGenIP;
3999     }
4000 
4001     Builder.restoreIP(CodeGenIP);
4002     if (&Original == SumReduced)
4003       Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0),
4004                           SumPrivatized);
4005     else if (&Original == XorReduced)
4006       Builder.CreateStore(Builder.getInt32(0), XorPrivatized);
4007 
4008     return Builder.saveIP();
4009   };
4010 
4011   // Do nothing in finalization.
4012   auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; };
4013 
4014   InsertPointTy AfterIP =
4015       OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB,
4016                                 /* IfCondition */ nullptr,
4017                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
4018                                 /* IsCancellable */ false);
4019   Builder.restoreIP(AfterIP);
4020 
4021   OpenMPIRBuilder::ReductionInfo ReductionInfos[] = {
4022       {SumType, SumReduced, SumPrivatized, sumReduction, sumAtomicReduction},
4023       {XorType, XorReduced, XorPrivatized, xorReduction, xorAtomicReduction}};
4024 
4025   OMPBuilder.createReductions(BodyIP, BodyAllocaIP, ReductionInfos);
4026 
4027   Builder.restoreIP(AfterIP);
4028   Builder.CreateRetVoid();
4029 
4030   OMPBuilder.finalize(F);
4031 
4032   // The IR must be valid.
4033   EXPECT_FALSE(verifyModule(*M));
4034 
4035   // Outlining must have happened.
4036   SmallVector<CallInst *> ForkCalls;
4037   findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder,
4038             ForkCalls);
4039   ASSERT_EQ(ForkCalls.size(), 1u);
4040   Value *CalleeVal = cast<Constant>(ForkCalls[0]->getOperand(2))->getOperand(0);
4041   Function *Outlined = dyn_cast<Function>(CalleeVal);
4042   EXPECT_NE(Outlined, nullptr);
4043 
4044   // Check that the lock variable was created with the expected name.
4045   GlobalVariable *LockVar =
4046       M->getGlobalVariable(".gomp_critical_user_.reduction.var");
4047   EXPECT_NE(LockVar, nullptr);
4048 
4049   // Find the allocation of a local array that will be used to call the runtime
4050   // reduciton function.
4051   BasicBlock &AllocBlock = Outlined->getEntryBlock();
4052   Value *LocalArray = nullptr;
4053   for (Instruction &I : AllocBlock) {
4054     if (AllocaInst *Alloc = dyn_cast<AllocaInst>(&I)) {
4055       if (!Alloc->getAllocatedType()->isArrayTy() ||
4056           !Alloc->getAllocatedType()->getArrayElementType()->isPointerTy())
4057         continue;
4058       LocalArray = Alloc;
4059       break;
4060     }
4061   }
4062   ASSERT_NE(LocalArray, nullptr);
4063 
4064   // Find the call to the runtime reduction function.
4065   BasicBlock *BB = AllocBlock.getUniqueSuccessor();
4066   Value *LocalArrayPtr = nullptr;
4067   Value *ReductionFnVal = nullptr;
4068   Value *SwitchArg = nullptr;
4069   for (Instruction &I : *BB) {
4070     if (CallInst *Call = dyn_cast<CallInst>(&I)) {
4071       if (Call->getCalledFunction() !=
4072           OMPBuilder.getOrCreateRuntimeFunctionPtr(
4073               RuntimeFunction::OMPRTL___kmpc_reduce))
4074         continue;
4075       LocalArrayPtr = Call->getOperand(4);
4076       ReductionFnVal = Call->getOperand(5);
4077       SwitchArg = Call;
4078       break;
4079     }
4080   }
4081 
4082   // Check that the local array is passed to the function.
4083   ASSERT_NE(LocalArrayPtr, nullptr);
4084   BitCastInst *BitCast = dyn_cast<BitCastInst>(LocalArrayPtr);
4085   ASSERT_NE(BitCast, nullptr);
4086   EXPECT_EQ(BitCast->getOperand(0), LocalArray);
4087 
4088   // Find the GEP instructions preceding stores to the local array.
4089   Value *FirstArrayElemPtr = nullptr;
4090   Value *SecondArrayElemPtr = nullptr;
4091   EXPECT_EQ(LocalArray->getNumUses(), 3u);
4092   ASSERT_TRUE(
4093       findGEPZeroOne(LocalArray, FirstArrayElemPtr, SecondArrayElemPtr));
4094 
4095   // Check that the values stored into the local array are privatized reduction
4096   // variables.
4097   auto *FirstStored = dyn_cast_or_null<BitCastInst>(
4098       findStoredValue<GetElementPtrInst>(FirstArrayElemPtr));
4099   auto *SecondStored = dyn_cast_or_null<BitCastInst>(
4100       findStoredValue<GetElementPtrInst>(SecondArrayElemPtr));
4101   ASSERT_NE(FirstStored, nullptr);
4102   ASSERT_NE(SecondStored, nullptr);
4103   Value *FirstPrivatized = FirstStored->getOperand(0);
4104   Value *SecondPrivatized = SecondStored->getOperand(0);
4105   EXPECT_TRUE(
4106       isSimpleBinaryReduction(FirstPrivatized, FirstStored->getParent()));
4107   EXPECT_TRUE(
4108       isSimpleBinaryReduction(SecondPrivatized, SecondStored->getParent()));
4109 
4110   // Check that the result of the runtime reduction call is used for further
4111   // dispatch.
4112   ASSERT_EQ(SwitchArg->getNumUses(), 1u);
4113   SwitchInst *Switch = dyn_cast<SwitchInst>(*SwitchArg->user_begin());
4114   ASSERT_NE(Switch, nullptr);
4115   EXPECT_EQ(Switch->getNumSuccessors(), 3u);
4116   BasicBlock *NonAtomicBB = Switch->case_begin()->getCaseSuccessor();
4117   BasicBlock *AtomicBB = std::next(Switch->case_begin())->getCaseSuccessor();
4118 
4119   // Non-atomic block contains reductions to the global reduction variable,
4120   // which is passed into the outlined function as an argument.
4121   Value *FirstLoad =
4122       findSingleUserInBlock<LoadInst>(FirstPrivatized, NonAtomicBB);
4123   Value *SecondLoad =
4124       findSingleUserInBlock<LoadInst>(SecondPrivatized, NonAtomicBB);
4125   EXPECT_TRUE(isValueReducedToFuncArg(FirstLoad, NonAtomicBB));
4126   EXPECT_TRUE(isValueReducedToFuncArg(SecondLoad, NonAtomicBB));
4127 
4128   // Atomic block also constains reductions to the global reduction variable.
4129   FirstLoad = findSingleUserInBlock<LoadInst>(FirstPrivatized, AtomicBB);
4130   SecondLoad = findSingleUserInBlock<LoadInst>(SecondPrivatized, AtomicBB);
4131   auto *FirstAtomic = findSingleUserInBlock<AtomicRMWInst>(FirstLoad, AtomicBB);
4132   auto *SecondAtomic =
4133       findSingleUserInBlock<AtomicRMWInst>(SecondLoad, AtomicBB);
4134   ASSERT_NE(FirstAtomic, nullptr);
4135   Value *AtomicStorePointer = FirstAtomic->getPointerOperand();
4136   EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer)));
4137   ASSERT_NE(SecondAtomic, nullptr);
4138   AtomicStorePointer = SecondAtomic->getPointerOperand();
4139   EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer)));
4140 
4141   // Check that the separate reduction function also performs (non-atomic)
4142   // reductions after extracting reduction variables from its arguments.
4143   Function *ReductionFn = cast<Function>(ReductionFnVal);
4144   BasicBlock *FnReductionBB = &ReductionFn->getEntryBlock();
4145   auto *Bitcast =
4146       findSingleUserInBlock<BitCastInst>(ReductionFn->getArg(0), FnReductionBB);
4147   Value *FirstLHSPtr;
4148   Value *SecondLHSPtr;
4149   ASSERT_TRUE(findGEPZeroOne(Bitcast, FirstLHSPtr, SecondLHSPtr));
4150   Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB);
4151   ASSERT_NE(Opaque, nullptr);
4152   Bitcast = findSingleUserInBlock<BitCastInst>(Opaque, FnReductionBB);
4153   ASSERT_NE(Bitcast, nullptr);
4154   EXPECT_TRUE(isSimpleBinaryReduction(Bitcast, FnReductionBB));
4155   Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB);
4156   ASSERT_NE(Opaque, nullptr);
4157   Bitcast = findSingleUserInBlock<BitCastInst>(Opaque, FnReductionBB);
4158   ASSERT_NE(Bitcast, nullptr);
4159   EXPECT_TRUE(isSimpleBinaryReduction(Bitcast, FnReductionBB));
4160 
4161   Bitcast =
4162       findSingleUserInBlock<BitCastInst>(ReductionFn->getArg(1), FnReductionBB);
4163   Value *FirstRHS;
4164   Value *SecondRHS;
4165   EXPECT_TRUE(findGEPZeroOne(Bitcast, FirstRHS, SecondRHS));
4166 }
4167 
4168 TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
4169   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4170   OpenMPIRBuilder OMPBuilder(*M);
4171   OMPBuilder.initialize();
4172   F->setName("func");
4173   IRBuilder<> Builder(BB);
4174 
4175   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
4176   Builder.CreateBr(EnterBB);
4177   Builder.SetInsertPoint(EnterBB);
4178   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4179 
4180   // Create variables to be reduced.
4181   InsertPointTy OuterAllocaIP(&F->getEntryBlock(),
4182                               F->getEntryBlock().getFirstInsertionPt());
4183   Type *SumType = Builder.getFloatTy();
4184   Type *XorType = Builder.getInt32Ty();
4185   Value *SumReduced;
4186   Value *XorReduced;
4187   {
4188     IRBuilderBase::InsertPointGuard Guard(Builder);
4189     Builder.restoreIP(OuterAllocaIP);
4190     SumReduced = Builder.CreateAlloca(SumType);
4191     XorReduced = Builder.CreateAlloca(XorType);
4192   }
4193 
4194   // Store initial values of reductions into global variables.
4195   Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced);
4196   Builder.CreateStore(Builder.getInt32(1), XorReduced);
4197 
4198   InsertPointTy FirstBodyIP, FirstBodyAllocaIP;
4199   auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP,
4200                             InsertPointTy CodeGenIP) {
4201     IRBuilderBase::InsertPointGuard Guard(Builder);
4202     Builder.restoreIP(CodeGenIP);
4203 
4204     uint32_t StrSize;
4205     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
4206     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
4207     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
4208     Value *SumLocal =
4209         Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local");
4210     Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial");
4211     Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum");
4212     Builder.CreateStore(Sum, SumReduced);
4213 
4214     FirstBodyIP = Builder.saveIP();
4215     FirstBodyAllocaIP = InnerAllocaIP;
4216   };
4217 
4218   InsertPointTy SecondBodyIP, SecondBodyAllocaIP;
4219   auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP,
4220                              InsertPointTy CodeGenIP) {
4221     IRBuilderBase::InsertPointGuard Guard(Builder);
4222     Builder.restoreIP(CodeGenIP);
4223 
4224     uint32_t StrSize;
4225     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
4226     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
4227     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
4228     Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial");
4229     Value *Xor = Builder.CreateXor(XorPartial, TID, "xor");
4230     Builder.CreateStore(Xor, XorReduced);
4231 
4232     SecondBodyIP = Builder.saveIP();
4233     SecondBodyAllocaIP = InnerAllocaIP;
4234   };
4235 
4236   // Privatization for reduction creates local copies of reduction variables and
4237   // initializes them to reduction-neutral values. The same privatization
4238   // callback is used for both loops, with dispatch based on the value being
4239   // privatized.
4240   Value *SumPrivatized;
4241   Value *XorPrivatized;
4242   auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
4243                     Value &Original, Value &Inner, Value *&ReplVal) {
4244     IRBuilderBase::InsertPointGuard Guard(Builder);
4245     Builder.restoreIP(InnerAllocaIP);
4246     if (&Original == SumReduced) {
4247       SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy());
4248       ReplVal = SumPrivatized;
4249     } else if (&Original == XorReduced) {
4250       XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty());
4251       ReplVal = XorPrivatized;
4252     } else {
4253       ReplVal = &Inner;
4254       return CodeGenIP;
4255     }
4256 
4257     Builder.restoreIP(CodeGenIP);
4258     if (&Original == SumReduced)
4259       Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0),
4260                           SumPrivatized);
4261     else if (&Original == XorReduced)
4262       Builder.CreateStore(Builder.getInt32(0), XorPrivatized);
4263 
4264     return Builder.saveIP();
4265   };
4266 
4267   // Do nothing in finalization.
4268   auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; };
4269 
4270   Builder.restoreIP(
4271       OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB,
4272                                 FiniCB, /* IfCondition */ nullptr,
4273                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
4274                                 /* IsCancellable */ false));
4275   InsertPointTy AfterIP = OMPBuilder.createParallel(
4276       {Builder.saveIP(), DL}, OuterAllocaIP, SecondBodyGenCB, PrivCB, FiniCB,
4277       /* IfCondition */ nullptr,
4278       /* NumThreads */ nullptr, OMP_PROC_BIND_default,
4279       /* IsCancellable */ false);
4280 
4281   OMPBuilder.createReductions(
4282       FirstBodyIP, FirstBodyAllocaIP,
4283       {{SumType, SumReduced, SumPrivatized, sumReduction, sumAtomicReduction}});
4284   OMPBuilder.createReductions(
4285       SecondBodyIP, SecondBodyAllocaIP,
4286       {{XorType, XorReduced, XorPrivatized, xorReduction, xorAtomicReduction}});
4287 
4288   Builder.restoreIP(AfterIP);
4289   Builder.CreateRetVoid();
4290 
4291   OMPBuilder.finalize(F);
4292 
4293   // The IR must be valid.
4294   EXPECT_FALSE(verifyModule(*M));
4295 
4296   // Two different outlined functions must have been created.
4297   SmallVector<CallInst *> ForkCalls;
4298   findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder,
4299             ForkCalls);
4300   ASSERT_EQ(ForkCalls.size(), 2u);
4301   Value *CalleeVal = cast<Constant>(ForkCalls[0]->getOperand(2))->getOperand(0);
4302   Function *FirstCallee = cast<Function>(CalleeVal);
4303   CalleeVal = cast<Constant>(ForkCalls[1]->getOperand(2))->getOperand(0);
4304   Function *SecondCallee = cast<Function>(CalleeVal);
4305   EXPECT_NE(FirstCallee, SecondCallee);
4306 
4307   // Two different reduction functions must have been created.
4308   SmallVector<CallInst *> ReduceCalls;
4309   findCalls(FirstCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce, OMPBuilder,
4310             ReduceCalls);
4311   ASSERT_EQ(ReduceCalls.size(), 1u);
4312   auto *AddReduction = cast<Function>(ReduceCalls[0]->getOperand(5));
4313   ReduceCalls.clear();
4314   findCalls(SecondCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce,
4315             OMPBuilder, ReduceCalls);
4316   auto *XorReduction = cast<Function>(ReduceCalls[0]->getOperand(5));
4317   EXPECT_NE(AddReduction, XorReduction);
4318 
4319   // Each reduction function does its own kind of reduction.
4320   BasicBlock *FnReductionBB = &AddReduction->getEntryBlock();
4321   auto *Bitcast = findSingleUserInBlock<BitCastInst>(AddReduction->getArg(0),
4322                                                      FnReductionBB);
4323   ASSERT_NE(Bitcast, nullptr);
4324   Value *FirstLHSPtr =
4325       findSingleUserInBlock<GetElementPtrInst>(Bitcast, FnReductionBB);
4326   ASSERT_NE(FirstLHSPtr, nullptr);
4327   Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB);
4328   ASSERT_NE(Opaque, nullptr);
4329   Bitcast = findSingleUserInBlock<BitCastInst>(Opaque, FnReductionBB);
4330   ASSERT_NE(Bitcast, nullptr);
4331   Instruction::BinaryOps Opcode = Instruction::FAdd;
4332   EXPECT_TRUE(isSimpleBinaryReduction(Bitcast, FnReductionBB, &Opcode));
4333 
4334   FnReductionBB = &XorReduction->getEntryBlock();
4335   Bitcast = findSingleUserInBlock<BitCastInst>(XorReduction->getArg(0),
4336                                                FnReductionBB);
4337   ASSERT_NE(Bitcast, nullptr);
4338   Value *SecondLHSPtr =
4339       findSingleUserInBlock<GetElementPtrInst>(Bitcast, FnReductionBB);
4340   ASSERT_NE(FirstLHSPtr, nullptr);
4341   Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB);
4342   ASSERT_NE(Opaque, nullptr);
4343   Bitcast = findSingleUserInBlock<BitCastInst>(Opaque, FnReductionBB);
4344   ASSERT_NE(Bitcast, nullptr);
4345   Opcode = Instruction::Xor;
4346   EXPECT_TRUE(isSimpleBinaryReduction(Bitcast, FnReductionBB, &Opcode));
4347 }
4348 
4349 TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) {
4350   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4351   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4352   OpenMPIRBuilder OMPBuilder(*M);
4353   OMPBuilder.initialize();
4354   F->setName("func");
4355   IRBuilder<> Builder(BB);
4356 
4357   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F);
4358   Builder.CreateBr(EnterBB);
4359   Builder.SetInsertPoint(EnterBB);
4360   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4361 
4362   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4363   llvm::SmallVector<BasicBlock *, 4> CaseBBs;
4364 
4365   auto FiniCB = [&](InsertPointTy IP) {};
4366   auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
4367   SectionCBVector.push_back(SectionCB);
4368 
4369   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4370                    llvm::Value &, llvm::Value &Val,
4371                    llvm::Value *&ReplVal) { return CodeGenIP; };
4372   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
4373                                     F->getEntryBlock().getFirstInsertionPt());
4374   Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
4375                                               PrivCB, FiniCB, false, false));
4376   Builder.CreateRetVoid(); // Required at the end of the function
4377   EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr);
4378   EXPECT_FALSE(verifyModule(*M, &errs()));
4379 }
4380 
4381 TEST_F(OpenMPIRBuilderTest, CreateSections) {
4382   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4383   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4384   OpenMPIRBuilder OMPBuilder(*M);
4385   OMPBuilder.initialize();
4386   F->setName("func");
4387   IRBuilder<> Builder(BB);
4388 
4389   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4390   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4391   llvm::SmallVector<BasicBlock *, 4> CaseBBs;
4392 
4393   BasicBlock *SwitchBB = nullptr;
4394   AllocaInst *PrivAI = nullptr;
4395   SwitchInst *Switch = nullptr;
4396 
4397   unsigned NumBodiesGenerated = 0;
4398   unsigned NumFiniCBCalls = 0;
4399   PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
4400 
4401   auto FiniCB = [&](InsertPointTy IP) {
4402     ++NumFiniCBCalls;
4403     BasicBlock *IPBB = IP.getBlock();
4404     EXPECT_NE(IPBB->end(), IP.getPoint());
4405   };
4406 
4407   auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4408     ++NumBodiesGenerated;
4409     CaseBBs.push_back(CodeGenIP.getBlock());
4410     SwitchBB = CodeGenIP.getBlock()->getSinglePredecessor();
4411     Builder.restoreIP(CodeGenIP);
4412     Builder.CreateStore(F->arg_begin(), PrivAI);
4413     Value *PrivLoad =
4414         Builder.CreateLoad(F->arg_begin()->getType(), PrivAI, "local.alloca");
4415     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
4416   };
4417   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4418                    llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4419     // TODO: Privatization not implemented yet
4420     return CodeGenIP;
4421   };
4422 
4423   SectionCBVector.push_back(SectionCB);
4424   SectionCBVector.push_back(SectionCB);
4425 
4426   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
4427                                     F->getEntryBlock().getFirstInsertionPt());
4428   Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
4429                                               PrivCB, FiniCB, false, false));
4430   Builder.CreateRetVoid(); // Required at the end of the function
4431 
4432   // Switch BB's predecessor is loop condition BB, whose successor at index 1 is
4433   // loop's exit BB
4434   BasicBlock *ForExitBB =
4435       SwitchBB->getSinglePredecessor()->getTerminator()->getSuccessor(1);
4436   EXPECT_NE(ForExitBB, nullptr);
4437 
4438   EXPECT_NE(PrivAI, nullptr);
4439   Function *OutlinedFn = PrivAI->getFunction();
4440   EXPECT_EQ(F, OutlinedFn);
4441   EXPECT_FALSE(verifyModule(*M, &errs()));
4442   EXPECT_EQ(OutlinedFn->arg_size(), 1U);
4443 
4444   BasicBlock *LoopPreheaderBB =
4445       OutlinedFn->getEntryBlock().getSingleSuccessor();
4446   // loop variables are 5 - lower bound, upper bound, stride, islastiter, and
4447   // iterator/counter
4448   bool FoundForInit = false;
4449   for (Instruction &Inst : *LoopPreheaderBB) {
4450     if (isa<CallInst>(Inst)) {
4451       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
4452           "__kmpc_for_static_init_4u") {
4453         FoundForInit = true;
4454       }
4455     }
4456   }
4457   EXPECT_EQ(FoundForInit, true);
4458 
4459   bool FoundForExit = false;
4460   bool FoundBarrier = false;
4461   for (Instruction &Inst : *ForExitBB) {
4462     if (isa<CallInst>(Inst)) {
4463       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
4464           "__kmpc_for_static_fini") {
4465         FoundForExit = true;
4466       }
4467       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
4468           "__kmpc_barrier") {
4469         FoundBarrier = true;
4470       }
4471       if (FoundForExit && FoundBarrier)
4472         break;
4473     }
4474   }
4475   EXPECT_EQ(FoundForExit, true);
4476   EXPECT_EQ(FoundBarrier, true);
4477 
4478   EXPECT_NE(SwitchBB, nullptr);
4479   EXPECT_NE(SwitchBB->getTerminator(), nullptr);
4480   EXPECT_EQ(isa<SwitchInst>(SwitchBB->getTerminator()), true);
4481   Switch = cast<SwitchInst>(SwitchBB->getTerminator());
4482   EXPECT_EQ(Switch->getNumCases(), 2U);
4483 
4484   EXPECT_EQ(CaseBBs.size(), 2U);
4485   for (auto *&CaseBB : CaseBBs) {
4486     EXPECT_EQ(CaseBB->getParent(), OutlinedFn);
4487   }
4488 
4489   ASSERT_EQ(NumBodiesGenerated, 2U);
4490   ASSERT_EQ(NumFiniCBCalls, 1U);
4491   EXPECT_FALSE(verifyModule(*M, &errs()));
4492 }
4493 
4494 TEST_F(OpenMPIRBuilderTest, CreateSectionsNoWait) {
4495   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4496   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4497   OpenMPIRBuilder OMPBuilder(*M);
4498   OMPBuilder.initialize();
4499   F->setName("func");
4500   IRBuilder<> Builder(BB);
4501 
4502   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F);
4503   Builder.CreateBr(EnterBB);
4504   Builder.SetInsertPoint(EnterBB);
4505   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4506 
4507   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
4508                                     F->getEntryBlock().getFirstInsertionPt());
4509   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4510   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4511                    llvm::Value &, llvm::Value &Val,
4512                    llvm::Value *&ReplVal) { return CodeGenIP; };
4513   auto FiniCB = [&](InsertPointTy IP) {};
4514 
4515   Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
4516                                               PrivCB, FiniCB, false, true));
4517   Builder.CreateRetVoid(); // Required at the end of the function
4518   for (auto &Inst : instructions(*F)) {
4519     EXPECT_FALSE(isa<CallInst>(Inst) &&
4520                  cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
4521                      "__kmpc_barrier" &&
4522                  "call to function __kmpc_barrier found with nowait");
4523   }
4524 }
4525 
4526 TEST_F(OpenMPIRBuilderTest, CreateOffloadMaptypes) {
4527   OpenMPIRBuilder OMPBuilder(*M);
4528   OMPBuilder.initialize();
4529 
4530   IRBuilder<> Builder(BB);
4531 
4532   SmallVector<uint64_t> Mappings = {0, 1};
4533   GlobalVariable *OffloadMaptypesGlobal =
4534       OMPBuilder.createOffloadMaptypes(Mappings, "offload_maptypes");
4535   EXPECT_FALSE(M->global_empty());
4536   EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_maptypes");
4537   EXPECT_TRUE(OffloadMaptypesGlobal->isConstant());
4538   EXPECT_TRUE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr());
4539   EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage());
4540   EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer());
4541   Constant *Initializer = OffloadMaptypesGlobal->getInitializer();
4542   EXPECT_TRUE(isa<ConstantDataArray>(Initializer));
4543   ConstantDataArray *MappingInit = dyn_cast<ConstantDataArray>(Initializer);
4544   EXPECT_EQ(MappingInit->getNumElements(), Mappings.size());
4545   EXPECT_TRUE(MappingInit->getType()->getElementType()->isIntegerTy(64));
4546   Constant *CA = ConstantDataArray::get(Builder.getContext(), Mappings);
4547   EXPECT_EQ(MappingInit, CA);
4548 }
4549 
4550 TEST_F(OpenMPIRBuilderTest, CreateOffloadMapnames) {
4551   OpenMPIRBuilder OMPBuilder(*M);
4552   OMPBuilder.initialize();
4553 
4554   IRBuilder<> Builder(BB);
4555 
4556   uint32_t StrSize;
4557   Constant *Cst1 =
4558       OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize);
4559   Constant *Cst2 =
4560       OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize);
4561   SmallVector<llvm::Constant *> Names = {Cst1, Cst2};
4562 
4563   GlobalVariable *OffloadMaptypesGlobal =
4564       OMPBuilder.createOffloadMapnames(Names, "offload_mapnames");
4565   EXPECT_FALSE(M->global_empty());
4566   EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_mapnames");
4567   EXPECT_TRUE(OffloadMaptypesGlobal->isConstant());
4568   EXPECT_FALSE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr());
4569   EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage());
4570   EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer());
4571   Constant *Initializer = OffloadMaptypesGlobal->getInitializer();
4572   EXPECT_TRUE(isa<Constant>(Initializer->getOperand(0)->stripPointerCasts()));
4573   EXPECT_TRUE(isa<Constant>(Initializer->getOperand(1)->stripPointerCasts()));
4574 
4575   GlobalVariable *Name1Gbl =
4576       cast<GlobalVariable>(Initializer->getOperand(0)->stripPointerCasts());
4577   EXPECT_TRUE(isa<ConstantDataArray>(Name1Gbl->getInitializer()));
4578   ConstantDataArray *Name1GblCA =
4579       dyn_cast<ConstantDataArray>(Name1Gbl->getInitializer());
4580   EXPECT_EQ(Name1GblCA->getAsCString(), ";file1;array1;2;5;;");
4581 
4582   GlobalVariable *Name2Gbl =
4583       cast<GlobalVariable>(Initializer->getOperand(1)->stripPointerCasts());
4584   EXPECT_TRUE(isa<ConstantDataArray>(Name2Gbl->getInitializer()));
4585   ConstantDataArray *Name2GblCA =
4586       dyn_cast<ConstantDataArray>(Name2Gbl->getInitializer());
4587   EXPECT_EQ(Name2GblCA->getAsCString(), ";file1;array2;3;5;;");
4588 
4589   EXPECT_TRUE(Initializer->getType()->getArrayElementType()->isPointerTy());
4590   EXPECT_EQ(Initializer->getType()->getArrayNumElements(), Names.size());
4591 }
4592 
4593 TEST_F(OpenMPIRBuilderTest, CreateMapperAllocas) {
4594   OpenMPIRBuilder OMPBuilder(*M);
4595   OMPBuilder.initialize();
4596   F->setName("func");
4597   IRBuilder<> Builder(BB);
4598 
4599   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4600 
4601   unsigned TotalNbOperand = 2;
4602 
4603   OpenMPIRBuilder::MapperAllocas MapperAllocas;
4604   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
4605                                     F->getEntryBlock().getFirstInsertionPt());
4606   OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas);
4607   EXPECT_NE(MapperAllocas.ArgsBase, nullptr);
4608   EXPECT_NE(MapperAllocas.Args, nullptr);
4609   EXPECT_NE(MapperAllocas.ArgSizes, nullptr);
4610   EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()->isArrayTy());
4611   ArrayType *ArrType =
4612       dyn_cast<ArrayType>(MapperAllocas.ArgsBase->getAllocatedType());
4613   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
4614   EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()
4615                   ->getArrayElementType()
4616                   ->isPointerTy());
4617   EXPECT_TRUE(
4618       cast<PointerType>(
4619           MapperAllocas.ArgsBase->getAllocatedType()->getArrayElementType())
4620           ->isOpaqueOrPointeeTypeMatches(Builder.getInt8Ty()));
4621 
4622   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()->isArrayTy());
4623   ArrType = dyn_cast<ArrayType>(MapperAllocas.Args->getAllocatedType());
4624   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
4625   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()
4626                   ->getArrayElementType()
4627                   ->isPointerTy());
4628   EXPECT_TRUE(cast<PointerType>(
4629                   MapperAllocas.Args->getAllocatedType()->getArrayElementType())
4630                   ->isOpaqueOrPointeeTypeMatches(Builder.getInt8Ty()));
4631 
4632   EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()->isArrayTy());
4633   ArrType = dyn_cast<ArrayType>(MapperAllocas.ArgSizes->getAllocatedType());
4634   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
4635   EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()
4636                   ->getArrayElementType()
4637                   ->isIntegerTy(64));
4638 }
4639 
4640 TEST_F(OpenMPIRBuilderTest, EmitMapperCall) {
4641   OpenMPIRBuilder OMPBuilder(*M);
4642   OMPBuilder.initialize();
4643   F->setName("func");
4644   IRBuilder<> Builder(BB);
4645   LLVMContext &Ctx = M->getContext();
4646 
4647   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4648 
4649   unsigned TotalNbOperand = 2;
4650 
4651   OpenMPIRBuilder::MapperAllocas MapperAllocas;
4652   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
4653                                     F->getEntryBlock().getFirstInsertionPt());
4654   OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas);
4655 
4656   auto *BeginMapperFunc = OMPBuilder.getOrCreateRuntimeFunctionPtr(
4657       omp::OMPRTL___tgt_target_data_begin_mapper);
4658 
4659   SmallVector<uint64_t> Flags = {0, 2};
4660 
4661   uint32_t StrSize;
4662   Constant *SrcLocCst =
4663       OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5, StrSize);
4664   Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst, StrSize);
4665 
4666   Constant *Cst1 =
4667       OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize);
4668   Constant *Cst2 =
4669       OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize);
4670   SmallVector<llvm::Constant *> Names = {Cst1, Cst2};
4671 
4672   GlobalVariable *Maptypes =
4673       OMPBuilder.createOffloadMaptypes(Flags, ".offload_maptypes");
4674   Value *MaptypesArg = Builder.CreateConstInBoundsGEP2_32(
4675       ArrayType::get(Type::getInt64Ty(Ctx), TotalNbOperand), Maptypes,
4676       /*Idx0=*/0, /*Idx1=*/0);
4677 
4678   GlobalVariable *Mapnames =
4679       OMPBuilder.createOffloadMapnames(Names, ".offload_mapnames");
4680   Value *MapnamesArg = Builder.CreateConstInBoundsGEP2_32(
4681       ArrayType::get(Type::getInt8PtrTy(Ctx), TotalNbOperand), Mapnames,
4682       /*Idx0=*/0, /*Idx1=*/0);
4683 
4684   OMPBuilder.emitMapperCall(Builder.saveIP(), BeginMapperFunc, SrcLocInfo,
4685                             MaptypesArg, MapnamesArg, MapperAllocas, -1,
4686                             TotalNbOperand);
4687 
4688   CallInst *MapperCall = dyn_cast<CallInst>(&BB->back());
4689   EXPECT_NE(MapperCall, nullptr);
4690   EXPECT_EQ(MapperCall->arg_size(), 9U);
4691   EXPECT_EQ(MapperCall->getCalledFunction()->getName(),
4692             "__tgt_target_data_begin_mapper");
4693   EXPECT_EQ(MapperCall->getOperand(0), SrcLocInfo);
4694   EXPECT_TRUE(MapperCall->getOperand(1)->getType()->isIntegerTy(64));
4695   EXPECT_TRUE(MapperCall->getOperand(2)->getType()->isIntegerTy(32));
4696 
4697   EXPECT_EQ(MapperCall->getOperand(6), MaptypesArg);
4698   EXPECT_EQ(MapperCall->getOperand(7), MapnamesArg);
4699   EXPECT_TRUE(MapperCall->getOperand(8)->getType()->isPointerTy());
4700 }
4701 
4702 TEST_F(OpenMPIRBuilderTest, CreateTask) {
4703   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4704   OpenMPIRBuilder OMPBuilder(*M);
4705   OMPBuilder.initialize();
4706   F->setName("func");
4707   IRBuilder<> Builder(BB);
4708 
4709   AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty());
4710   AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
4711   Value *Val128 =
4712       Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load");
4713 
4714   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4715     Builder.restoreIP(AllocaIP);
4716     AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
4717                                                 "bodygen.alloca128");
4718 
4719     Builder.restoreIP(CodeGenIP);
4720     // Loading and storing captured pointer and values
4721     Builder.CreateStore(Val128, Local128);
4722     Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32,
4723                                       "bodygen.load32");
4724 
4725     LoadInst *PrivLoad128 = Builder.CreateLoad(
4726         Local128->getAllocatedType(), Local128, "bodygen.local.load128");
4727     Value *Cmp = Builder.CreateICmpNE(
4728         Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType()));
4729     Instruction *ThenTerm, *ElseTerm;
4730     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
4731                                   &ThenTerm, &ElseTerm);
4732   };
4733 
4734   BasicBlock *AllocaBB = Builder.GetInsertBlock();
4735   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
4736   OpenMPIRBuilder::LocationDescription Loc(
4737       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
4738   Builder.restoreIP(OMPBuilder.createTask(
4739       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
4740       BodyGenCB));
4741   OMPBuilder.finalize();
4742   Builder.CreateRetVoid();
4743 
4744   EXPECT_FALSE(verifyModule(*M, &errs()));
4745 
4746   CallInst *TaskAllocCall = dyn_cast<CallInst>(
4747       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
4748           ->user_back());
4749 
4750   // Verify the Ident argument
4751   GlobalVariable *Ident = cast<GlobalVariable>(TaskAllocCall->getArgOperand(0));
4752   ASSERT_NE(Ident, nullptr);
4753   EXPECT_TRUE(Ident->hasInitializer());
4754   Constant *Initializer = Ident->getInitializer();
4755   GlobalVariable *SrcStrGlob =
4756       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
4757   ASSERT_NE(SrcStrGlob, nullptr);
4758   ConstantDataArray *SrcSrc =
4759       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
4760   ASSERT_NE(SrcSrc, nullptr);
4761 
4762   // Verify the num_threads argument.
4763   CallInst *GTID = dyn_cast<CallInst>(TaskAllocCall->getArgOperand(1));
4764   ASSERT_NE(GTID, nullptr);
4765   EXPECT_EQ(GTID->arg_size(), 1U);
4766   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
4767 
4768   // Verify the flags
4769   // TODO: Check for others flags. Currently testing only for tiedness.
4770   ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2));
4771   ASSERT_NE(Flags, nullptr);
4772   EXPECT_EQ(Flags->getSExtValue(), 1);
4773 
4774   // Verify the data size
4775   ConstantInt *DataSize =
4776       dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3));
4777   ASSERT_NE(DataSize, nullptr);
4778   EXPECT_EQ(DataSize->getSExtValue(), 24); // 64-bit pointer + 128-bit integer
4779 
4780   // TODO: Verify size of shared clause variables
4781 
4782   // Verify Wrapper function
4783   Function *WrapperFunc =
4784       dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts());
4785   ASSERT_NE(WrapperFunc, nullptr);
4786   EXPECT_FALSE(WrapperFunc->isDeclaration());
4787   CallInst *OutlinedFnCall = dyn_cast<CallInst>(WrapperFunc->begin()->begin());
4788   ASSERT_NE(OutlinedFnCall, nullptr);
4789   EXPECT_EQ(WrapperFunc->getArg(0)->getType(), Builder.getInt32Ty());
4790   EXPECT_EQ(OutlinedFnCall->getArgOperand(0), WrapperFunc->getArg(1));
4791 
4792   // Verify the presence of `trunc` and `icmp` instructions in Outlined function
4793   Function *OutlinedFn = OutlinedFnCall->getCalledFunction();
4794   ASSERT_NE(OutlinedFn, nullptr);
4795   EXPECT_TRUE(any_of(instructions(OutlinedFn),
4796                      [](Instruction &inst) { return isa<TruncInst>(&inst); }));
4797   EXPECT_TRUE(any_of(instructions(OutlinedFn),
4798                      [](Instruction &inst) { return isa<ICmpInst>(&inst); }));
4799 
4800   // Verify the execution of the task
4801   CallInst *TaskCall = dyn_cast<CallInst>(
4802       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task)
4803           ->user_back());
4804   ASSERT_NE(TaskCall, nullptr);
4805   EXPECT_EQ(TaskCall->getArgOperand(0), Ident);
4806   EXPECT_EQ(TaskCall->getArgOperand(1), GTID);
4807   EXPECT_EQ(TaskCall->getArgOperand(2), TaskAllocCall);
4808 
4809   // Verify that the argument data has been copied
4810   for (User *in : TaskAllocCall->users()) {
4811     if (MemCpyInst *memCpyInst = dyn_cast<MemCpyInst>(in)) {
4812       EXPECT_EQ(memCpyInst->getDest(), TaskAllocCall);
4813     }
4814   }
4815 }
4816 
4817 TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) {
4818   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4819   OpenMPIRBuilder OMPBuilder(*M);
4820   OMPBuilder.initialize();
4821   F->setName("func");
4822   IRBuilder<> Builder(BB);
4823 
4824   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
4825 
4826   BasicBlock *AllocaBB = Builder.GetInsertBlock();
4827   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
4828   OpenMPIRBuilder::LocationDescription Loc(
4829       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
4830   Builder.restoreIP(OMPBuilder.createTask(
4831       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
4832       BodyGenCB));
4833   OMPBuilder.finalize();
4834   Builder.CreateRetVoid();
4835 
4836   EXPECT_FALSE(verifyModule(*M, &errs()));
4837 }
4838 
4839 TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) {
4840   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4841   OpenMPIRBuilder OMPBuilder(*M);
4842   OMPBuilder.initialize();
4843   F->setName("func");
4844   IRBuilder<> Builder(BB);
4845   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
4846   BasicBlock *AllocaBB = Builder.GetInsertBlock();
4847   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
4848   OpenMPIRBuilder::LocationDescription Loc(
4849       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
4850   Builder.restoreIP(OMPBuilder.createTask(
4851       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB,
4852       /*Tied=*/false));
4853   OMPBuilder.finalize();
4854   Builder.CreateRetVoid();
4855 
4856   // Check for the `Tied` argument
4857   CallInst *TaskAllocCall = dyn_cast<CallInst>(
4858       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
4859           ->user_back());
4860   ASSERT_NE(TaskAllocCall, nullptr);
4861   ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2));
4862   ASSERT_NE(Flags, nullptr);
4863   EXPECT_EQ(Flags->getZExtValue() & 1U, 0U);
4864 
4865   EXPECT_FALSE(verifyModule(*M, &errs()));
4866 }
4867 
4868 TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) {
4869   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4870   OpenMPIRBuilder OMPBuilder(*M);
4871   OMPBuilder.initialize();
4872   F->setName("func");
4873   IRBuilder<> Builder(BB);
4874   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
4875   IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP();
4876   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
4877   Builder.SetInsertPoint(BodyBB);
4878   Value *Final = Builder.CreateICmp(
4879       CmpInst::Predicate::ICMP_EQ, F->getArg(0),
4880       ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
4881   OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
4882   Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
4883                                           /*Tied=*/false, Final));
4884   OMPBuilder.finalize();
4885   Builder.CreateRetVoid();
4886 
4887   // Check for the `Tied` argument
4888   CallInst *TaskAllocCall = dyn_cast<CallInst>(
4889       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
4890           ->user_back());
4891   ASSERT_NE(TaskAllocCall, nullptr);
4892   BinaryOperator *OrInst =
4893       dyn_cast<BinaryOperator>(TaskAllocCall->getArgOperand(2));
4894   ASSERT_NE(OrInst, nullptr);
4895   EXPECT_EQ(OrInst->getOpcode(), BinaryOperator::BinaryOps::Or);
4896 
4897   // One of the arguments to `or` instruction is the tied flag, which is equal
4898   // to zero.
4899   EXPECT_TRUE(any_of(OrInst->operands(), [](Value *op) {
4900     if (ConstantInt *TiedValue = dyn_cast<ConstantInt>(op))
4901       return TiedValue->getSExtValue() == 0;
4902     return false;
4903   }));
4904 
4905   // One of the arguments to `or` instruction is the final condition.
4906   EXPECT_TRUE(any_of(OrInst->operands(), [Final](Value *op) {
4907     if (SelectInst *Select = dyn_cast<SelectInst>(op)) {
4908       ConstantInt *TrueValue = dyn_cast<ConstantInt>(Select->getTrueValue());
4909       ConstantInt *FalseValue = dyn_cast<ConstantInt>(Select->getFalseValue());
4910       if (!TrueValue || !FalseValue)
4911         return false;
4912       return Select->getCondition() == Final &&
4913              TrueValue->getSExtValue() == 2 && FalseValue->getSExtValue() == 0;
4914     }
4915     return false;
4916   }));
4917 
4918   EXPECT_FALSE(verifyModule(*M, &errs()));
4919 }
4920 
4921 } // namespace
4922