1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
14 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
15 #include "mlir/IR/BlockAndValueMapping.h"
16 #include "mlir/IR/Operation.h"
17 #include "mlir/Support/LLVM.h"
18 #include "mlir/Target/LLVMIR/ModuleTranslation.h"
19 
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/TypeSwitch.h"
22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
23 #include "llvm/IR/DebugInfoMetadata.h"
24 #include "llvm/IR/IRBuilder.h"
25 
26 using namespace mlir;
27 
28 namespace {
29 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
30 /// insertion points for allocas.
31 class OpenMPAllocaStackFrame
32     : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
33 public:
34   explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
35       : allocaInsertPoint(allocaIP) {}
36   llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
37 };
38 
39 /// ModuleTranslation stack frame containing the partial mapping between MLIR
40 /// values and their LLVM IR equivalents.
41 class OpenMPVarMappingStackFrame
42     : public LLVM::ModuleTranslation::StackFrameBase<
43           OpenMPVarMappingStackFrame> {
44 public:
45   explicit OpenMPVarMappingStackFrame(
46       const DenseMap<Value, llvm::Value *> &mapping)
47       : mapping(mapping) {}
48 
49   DenseMap<Value, llvm::Value *> mapping;
50 };
51 } // namespace
52 
53 /// Find the insertion point for allocas given the current insertion point for
54 /// normal operations in the builder.
55 static llvm::OpenMPIRBuilder::InsertPointTy
56 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
57                       const LLVM::ModuleTranslation &moduleTranslation) {
58   // If there is an alloca insertion point on stack, i.e. we are in a nested
59   // operation and a specific point was provided by some surrounding operation,
60   // use it.
61   llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
62   WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
63       [&](const OpenMPAllocaStackFrame &frame) {
64         allocaInsertPoint = frame.allocaInsertPoint;
65         return WalkResult::interrupt();
66       });
67   if (walkResult.wasInterrupted())
68     return allocaInsertPoint;
69 
70   // Otherwise, insert to the entry block of the surrounding function.
71   llvm::BasicBlock &funcEntryBlock =
72       builder.GetInsertBlock()->getParent()->getEntryBlock();
73   return llvm::OpenMPIRBuilder::InsertPointTy(
74       &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
75 }
76 
77 /// Converts the given region that appears within an OpenMP dialect operation to
78 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
79 /// region, and a branch from any block with an successor-less OpenMP terminator
80 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
81 /// of the continuation block if provided.
82 static void convertOmpOpRegions(
83     Region &region, StringRef blockName, llvm::BasicBlock &sourceBlock,
84     llvm::BasicBlock &continuationBlock, llvm::IRBuilderBase &builder,
85     LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus,
86     SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
87   llvm::LLVMContext &llvmContext = builder.getContext();
88   for (Block &bb : region) {
89     llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
90         llvmContext, blockName, builder.GetInsertBlock()->getParent(),
91         builder.GetInsertBlock()->getNextNode());
92     moduleTranslation.mapBlock(&bb, llvmBB);
93   }
94 
95   llvm::Instruction *sourceTerminator = sourceBlock.getTerminator();
96 
97   // Terminators (namely YieldOp) may be forwarding values to the region that
98   // need to be available in the continuation block. Collect the types of these
99   // operands in preparation of creating PHI nodes.
100   SmallVector<llvm::Type *> continuationBlockPHITypes;
101   bool operandsProcessed = false;
102   unsigned numYields = 0;
103   for (Block &bb : region.getBlocks()) {
104     if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
105       if (!operandsProcessed) {
106         for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
107           continuationBlockPHITypes.push_back(
108               moduleTranslation.convertType(yield->getOperand(i).getType()));
109         }
110         operandsProcessed = true;
111       } else {
112         assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
113                "mismatching number of values yielded from the region");
114         for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
115           llvm::Type *operandType =
116               moduleTranslation.convertType(yield->getOperand(i).getType());
117           (void)operandType;
118           assert(continuationBlockPHITypes[i] == operandType &&
119                  "values of mismatching types yielded from the region");
120         }
121       }
122       numYields++;
123     }
124   }
125 
126   // Insert PHI nodes in the continuation block for any values forwarded by the
127   // terminators in this region.
128   if (!continuationBlockPHITypes.empty())
129     assert(
130         continuationBlockPHIs &&
131         "expected continuation block PHIs if converted regions yield values");
132   if (continuationBlockPHIs) {
133     llvm::IRBuilderBase::InsertPointGuard guard(builder);
134     continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
135     builder.SetInsertPoint(&continuationBlock, continuationBlock.begin());
136     for (llvm::Type *ty : continuationBlockPHITypes)
137       continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
138   }
139 
140   // Convert blocks one by one in topological order to ensure
141   // defs are converted before uses.
142   SetVector<Block *> blocks =
143       LLVM::detail::getTopologicallySortedBlocks(region);
144   for (Block *bb : blocks) {
145     llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
146     // Retarget the branch of the entry block to the entry block of the
147     // converted region (regions are single-entry).
148     if (bb->isEntryBlock()) {
149       assert(sourceTerminator->getNumSuccessors() == 1 &&
150              "provided entry block has multiple successors");
151       assert(sourceTerminator->getSuccessor(0) == &continuationBlock &&
152              "ContinuationBlock is not the successor of the entry block");
153       sourceTerminator->setSuccessor(0, llvmBB);
154     }
155 
156     llvm::IRBuilderBase::InsertPointGuard guard(builder);
157     if (failed(
158             moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) {
159       bodyGenStatus = failure();
160       return;
161     }
162 
163     // Special handling for `omp.yield` and `omp.terminator` (we may have more
164     // than one): they return the control to the parent OpenMP dialect operation
165     // so replace them with the branch to the continuation block. We handle this
166     // here to avoid relying inter-function communication through the
167     // ModuleTranslation class to set up the correct insertion point. This is
168     // also consistent with MLIR's idiom of handling special region terminators
169     // in the same code that handles the region-owning operation.
170     Operation *terminator = bb->getTerminator();
171     if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
172       builder.CreateBr(&continuationBlock);
173 
174       for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
175         (*continuationBlockPHIs)[i]->addIncoming(
176             moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
177     }
178   }
179   // After all blocks have been traversed and values mapped, connect the PHI
180   // nodes to the results of preceding blocks.
181   LLVM::detail::connectPHINodes(region, moduleTranslation);
182 
183   // Remove the blocks and values defined in this region from the mapping since
184   // they are not visible outside of this region. This allows the same region to
185   // be converted several times, that is cloned, without clashes, and slightly
186   // speeds up the lookups.
187   moduleTranslation.forgetMapping(region);
188 }
189 
190 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
191 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
192   switch (kind) {
193   case omp::ClauseProcBindKind::Close:
194     return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
195   case omp::ClauseProcBindKind::Master:
196     return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
197   case omp::ClauseProcBindKind::Primary:
198     return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
199   case omp::ClauseProcBindKind::Spread:
200     return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
201   }
202   llvm_unreachable("Unknown ClauseProcBindKind kind");
203 }
204 
205 /// Converts the OpenMP parallel operation to LLVM IR.
206 static LogicalResult
207 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
208                    LLVM::ModuleTranslation &moduleTranslation) {
209   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
210   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
211   // relying on captured variables.
212   LogicalResult bodyGenStatus = success();
213 
214   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
215                        llvm::BasicBlock &continuationBlock) {
216     // Save the alloca insertion point on ModuleTranslation stack for use in
217     // nested regions.
218     LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
219         moduleTranslation, allocaIP);
220 
221     // ParallelOp has only one region associated with it.
222     convertOmpOpRegions(opInst.getRegion(), "omp.par.region",
223                         *codeGenIP.getBlock(), continuationBlock, builder,
224                         moduleTranslation, bodyGenStatus);
225   };
226 
227   // TODO: Perform appropriate actions according to the data-sharing
228   // attribute (shared, private, firstprivate, ...) of variables.
229   // Currently defaults to shared.
230   auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
231                     llvm::Value &, llvm::Value &vPtr,
232                     llvm::Value *&replacementValue) -> InsertPointTy {
233     replacementValue = &vPtr;
234 
235     return codeGenIP;
236   };
237 
238   // TODO: Perform finalization actions for variables. This has to be
239   // called for variables which have destructors/finalizers.
240   auto finiCB = [&](InsertPointTy codeGenIP) {};
241 
242   llvm::Value *ifCond = nullptr;
243   if (auto ifExprVar = opInst.if_expr_var())
244     ifCond = moduleTranslation.lookupValue(ifExprVar);
245   llvm::Value *numThreads = nullptr;
246   if (auto numThreadsVar = opInst.num_threads_var())
247     numThreads = moduleTranslation.lookupValue(numThreadsVar);
248   auto pbKind = llvm::omp::OMP_PROC_BIND_default;
249   if (auto bind = opInst.proc_bind_val())
250     pbKind = getProcBindKind(*bind);
251   // TODO: Is the Parallel construct cancellable?
252   bool isCancellable = false;
253 
254   // Ensure that the BasicBlock for the the parallel region is sparate from the
255   // function entry which we may need to insert allocas.
256   if (builder.GetInsertBlock() ==
257       &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
258     assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
259            "Assuming end of basic block");
260     llvm::BasicBlock *entryBB =
261         llvm::BasicBlock::Create(builder.getContext(), "parallel.entry",
262                                  builder.GetInsertBlock()->getParent(),
263                                  builder.GetInsertBlock()->getNextNode());
264     builder.CreateBr(entryBB);
265     builder.SetInsertPoint(entryBB);
266   }
267   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
268   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel(
269       ompLoc, findAllocaInsertPoint(builder, moduleTranslation), bodyGenCB,
270       privCB, finiCB, ifCond, numThreads, pbKind, isCancellable));
271 
272   return bodyGenStatus;
273 }
274 
275 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
276 static LogicalResult
277 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
278                  LLVM::ModuleTranslation &moduleTranslation) {
279   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
280   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
281   // relying on captured variables.
282   LogicalResult bodyGenStatus = success();
283 
284   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
285                        llvm::BasicBlock &continuationBlock) {
286     // MasterOp has only one region associated with it.
287     auto &region = cast<omp::MasterOp>(opInst).getRegion();
288     convertOmpOpRegions(region, "omp.master.region", *codeGenIP.getBlock(),
289                         continuationBlock, builder, moduleTranslation,
290                         bodyGenStatus);
291   };
292 
293   // TODO: Perform finalization actions for variables. This has to be
294   // called for variables which have destructors/finalizers.
295   auto finiCB = [&](InsertPointTy codeGenIP) {};
296 
297   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
298   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster(
299       ompLoc, bodyGenCB, finiCB));
300   return success();
301 }
302 
303 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
304 static LogicalResult
305 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
306                    LLVM::ModuleTranslation &moduleTranslation) {
307   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
308   auto criticalOp = cast<omp::CriticalOp>(opInst);
309   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
310   // relying on captured variables.
311   LogicalResult bodyGenStatus = success();
312 
313   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
314                        llvm::BasicBlock &continuationBlock) {
315     // CriticalOp has only one region associated with it.
316     auto &region = cast<omp::CriticalOp>(opInst).getRegion();
317     convertOmpOpRegions(region, "omp.critical.region", *codeGenIP.getBlock(),
318                         continuationBlock, builder, moduleTranslation,
319                         bodyGenStatus);
320   };
321 
322   // TODO: Perform finalization actions for variables. This has to be
323   // called for variables which have destructors/finalizers.
324   auto finiCB = [&](InsertPointTy codeGenIP) {};
325 
326   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
327   llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
328   llvm::Constant *hint = nullptr;
329 
330   // If it has a name, it probably has a hint too.
331   if (criticalOp.nameAttr()) {
332     // The verifiers in OpenMP Dialect guarentee that all the pointers are
333     // non-null
334     auto symbolRef = criticalOp.nameAttr().cast<SymbolRefAttr>();
335     auto criticalDeclareOp =
336         SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
337                                                                      symbolRef);
338     hint =
339         llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
340                                static_cast<int>(criticalDeclareOp.hint_val()));
341   }
342   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical(
343       ompLoc, bodyGenCB, finiCB, criticalOp.name().getValueOr(""), hint));
344   return success();
345 }
346 
347 /// Returns a reduction declaration that corresponds to the given reduction
348 /// operation in the given container. Currently only supports reductions inside
349 /// WsLoopOp but can be easily extended.
350 static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container,
351                                                  omp::ReductionOp reduction) {
352   SymbolRefAttr reductionSymbol;
353   for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) {
354     if (container.reduction_vars()[i] != reduction.accumulator())
355       continue;
356     reductionSymbol = (*container.reductions())[i].cast<SymbolRefAttr>();
357     break;
358   }
359   assert(reductionSymbol &&
360          "reduction operation must be associated with a declaration");
361 
362   return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
363       container, reductionSymbol);
364 }
365 
366 /// Populates `reductions` with reduction declarations used in the given loop.
367 static void
368 collectReductionDecls(omp::WsLoopOp loop,
369                       SmallVectorImpl<omp::ReductionDeclareOp> &reductions) {
370   Optional<ArrayAttr> attr = loop.reductions();
371   if (!attr)
372     return;
373 
374   reductions.reserve(reductions.size() + loop.getNumReductionVars());
375   for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
376     reductions.push_back(
377         SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
378             loop, symbolRef));
379   }
380 }
381 
382 /// Translates the blocks contained in the given region and appends them to at
383 /// the current insertion point of `builder`. The operations of the entry block
384 /// are appended to the current insertion block, which is not expected to have a
385 /// terminator. If set, `continuationBlockArgs` is populated with translated
386 /// values that correspond to the values omp.yield'ed from the region.
387 static LogicalResult inlineConvertOmpRegions(
388     Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
389     LLVM::ModuleTranslation &moduleTranslation,
390     SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
391   if (region.empty())
392     return success();
393 
394   // Special case for single-block regions that don't create additional blocks:
395   // insert operations without creating additional blocks.
396   if (llvm::hasSingleElement(region)) {
397     moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
398     if (failed(moduleTranslation.convertBlock(
399             region.front(), /*ignoreArguments=*/true, builder)))
400       return failure();
401 
402     // The continuation arguments are simply the translated terminator operands.
403     if (continuationBlockArgs)
404       llvm::append_range(
405           *continuationBlockArgs,
406           moduleTranslation.lookupValues(region.front().back().getOperands()));
407 
408     // Drop the mapping that is no longer necessary so that the same region can
409     // be processed multiple times.
410     moduleTranslation.forgetMapping(region);
411     return success();
412   }
413 
414   // Create the continuation block manually instead of calling splitBlock
415   // because the current insertion block may not have a terminator.
416   llvm::BasicBlock *continuationBlock =
417       llvm::BasicBlock::Create(builder.getContext(), blockName + ".cont",
418                                builder.GetInsertBlock()->getParent(),
419                                builder.GetInsertBlock()->getNextNode());
420   builder.CreateBr(continuationBlock);
421 
422   LogicalResult bodyGenStatus = success();
423   SmallVector<llvm::PHINode *> phis;
424   convertOmpOpRegions(region, blockName, *builder.GetInsertBlock(),
425                       *continuationBlock, builder, moduleTranslation,
426                       bodyGenStatus, &phis);
427   if (failed(bodyGenStatus))
428     return failure();
429   if (continuationBlockArgs)
430     llvm::append_range(*continuationBlockArgs, phis);
431   builder.SetInsertPoint(continuationBlock,
432                          continuationBlock->getFirstInsertionPt());
433   return success();
434 }
435 
436 namespace {
437 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
438 /// store lambdas with capture.
439 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy(
440     llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
441     llvm::Value *&)>;
442 using OwningAtomicReductionGen =
443     std::function<llvm::OpenMPIRBuilder::InsertPointTy(
444         llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
445         llvm::Value *)>;
446 } // namespace
447 
448 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
449 /// reduction declaration. The generator uses `builder` but ignores its
450 /// insertion point.
451 static OwningReductionGen
452 makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder,
453                  LLVM::ModuleTranslation &moduleTranslation) {
454   // The lambda is mutable because we need access to non-const methods of decl
455   // (which aren't actually mutating it), and we must capture decl by-value to
456   // avoid the dangling reference after the parent function returns.
457   OwningReductionGen gen =
458       [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
459                 llvm::Value *lhs, llvm::Value *rhs,
460                 llvm::Value *&result) mutable {
461         Region &reductionRegion = decl.reductionRegion();
462         moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs);
463         moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs);
464         builder.restoreIP(insertPoint);
465         SmallVector<llvm::Value *> phis;
466         if (failed(inlineConvertOmpRegions(reductionRegion,
467                                            "omp.reduction.nonatomic.body",
468                                            builder, moduleTranslation, &phis)))
469           return llvm::OpenMPIRBuilder::InsertPointTy();
470         assert(phis.size() == 1);
471         result = phis[0];
472         return builder.saveIP();
473       };
474   return gen;
475 }
476 
477 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
478 /// given reduction declaration. The generator uses `builder` but ignores its
479 /// insertion point. Returns null if there is no atomic region available in the
480 /// reduction declaration.
481 static OwningAtomicReductionGen
482 makeAtomicReductionGen(omp::ReductionDeclareOp decl,
483                        llvm::IRBuilderBase &builder,
484                        LLVM::ModuleTranslation &moduleTranslation) {
485   if (decl.atomicReductionRegion().empty())
486     return OwningAtomicReductionGen();
487 
488   // The lambda is mutable because we need access to non-const methods of decl
489   // (which aren't actually mutating it), and we must capture decl by-value to
490   // avoid the dangling reference after the parent function returns.
491   OwningAtomicReductionGen atomicGen =
492       [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
493                 llvm::Value *lhs, llvm::Value *rhs) mutable {
494         Region &atomicRegion = decl.atomicReductionRegion();
495         moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs);
496         moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs);
497         builder.restoreIP(insertPoint);
498         SmallVector<llvm::Value *> phis;
499         if (failed(inlineConvertOmpRegions(atomicRegion,
500                                            "omp.reduction.atomic.body", builder,
501                                            moduleTranslation, &phis)))
502           return llvm::OpenMPIRBuilder::InsertPointTy();
503         assert(phis.empty());
504         return builder.saveIP();
505       };
506   return atomicGen;
507 }
508 
509 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
510 static LogicalResult
511 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
512                   LLVM::ModuleTranslation &moduleTranslation) {
513   auto orderedOp = cast<omp::OrderedOp>(opInst);
514 
515   omp::ClauseDepend dependType = *orderedOp.depend_type_val();
516   bool isDependSource = dependType == omp::ClauseDepend::dependsource;
517   unsigned numLoops = orderedOp.num_loops_val().getValue();
518   SmallVector<llvm::Value *> vecValues =
519       moduleTranslation.lookupValues(orderedOp.depend_vec_vars());
520 
521   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
522   size_t indexVecValues = 0;
523   while (indexVecValues < vecValues.size()) {
524     SmallVector<llvm::Value *> storeValues;
525     storeValues.reserve(numLoops);
526     for (unsigned i = 0; i < numLoops; i++) {
527       storeValues.push_back(vecValues[indexVecValues]);
528       indexVecValues++;
529     }
530     builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
531         ompLoc, findAllocaInsertPoint(builder, moduleTranslation), numLoops,
532         storeValues, ".cnt.addr", isDependSource));
533   }
534   return success();
535 }
536 
537 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
538 /// OpenMPIRBuilder.
539 static LogicalResult
540 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
541                         LLVM::ModuleTranslation &moduleTranslation) {
542   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
543   auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
544 
545   // TODO: The code generation for ordered simd directive is not supported yet.
546   if (orderedRegionOp.simd())
547     return failure();
548 
549   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
550   // relying on captured variables.
551   LogicalResult bodyGenStatus = success();
552 
553   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
554                        llvm::BasicBlock &continuationBlock) {
555     // OrderedOp has only one region associated with it.
556     auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
557     convertOmpOpRegions(region, "omp.ordered.region", *codeGenIP.getBlock(),
558                         continuationBlock, builder, moduleTranslation,
559                         bodyGenStatus);
560   };
561 
562   // TODO: Perform finalization actions for variables. This has to be
563   // called for variables which have destructors/finalizers.
564   auto finiCB = [&](InsertPointTy codeGenIP) {};
565 
566   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
567   builder.restoreIP(
568       moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
569           ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd()));
570   return bodyGenStatus;
571 }
572 
573 static LogicalResult
574 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
575                    LLVM::ModuleTranslation &moduleTranslation) {
576   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
577   using StorableBodyGenCallbackTy =
578       llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
579 
580   auto sectionsOp = cast<omp::SectionsOp>(opInst);
581 
582   // TODO: Support the following clauses: private, firstprivate, lastprivate,
583   // reduction, allocate
584   if (!sectionsOp.reduction_vars().empty() || sectionsOp.reductions() ||
585       !sectionsOp.allocate_vars().empty() ||
586       !sectionsOp.allocators_vars().empty())
587     return emitError(sectionsOp.getLoc())
588            << "reduction and allocate clauses are not supported for sections "
589               "construct";
590 
591   LogicalResult bodyGenStatus = success();
592   SmallVector<StorableBodyGenCallbackTy> sectionCBs;
593 
594   for (Operation &op : *sectionsOp.region().begin()) {
595     auto sectionOp = dyn_cast<omp::SectionOp>(op);
596     if (!sectionOp) // omp.terminator
597       continue;
598 
599     Region &region = sectionOp.region();
600     auto sectionCB = [&region, &builder, &moduleTranslation, &bodyGenStatus](
601                          InsertPointTy allocaIP, InsertPointTy codeGenIP,
602                          llvm::BasicBlock &finiBB) {
603       builder.restoreIP(codeGenIP);
604       builder.CreateBr(&finiBB);
605       convertOmpOpRegions(region, "omp.section.region", *codeGenIP.getBlock(),
606                           finiBB, builder, moduleTranslation, bodyGenStatus);
607     };
608     sectionCBs.push_back(sectionCB);
609   }
610 
611   // No sections within omp.sections operation - skip generation. This situation
612   // is only possible if there is only a terminator operation inside the
613   // sections operation
614   if (sectionCBs.empty())
615     return success();
616 
617   assert(isa<omp::SectionOp>(*sectionsOp.region().op_begin()));
618 
619   // TODO: Perform appropriate actions according to the data-sharing
620   // attribute (shared, private, firstprivate, ...) of variables.
621   // Currently defaults to shared.
622   auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
623                     llvm::Value &vPtr,
624                     llvm::Value *&replacementValue) -> InsertPointTy {
625     replacementValue = &vPtr;
626     return codeGenIP;
627   };
628 
629   // TODO: Perform finalization actions for variables. This has to be
630   // called for variables which have destructors/finalizers.
631   auto finiCB = [&](InsertPointTy codeGenIP) {};
632 
633   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
634   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections(
635       ompLoc, findAllocaInsertPoint(builder, moduleTranslation), sectionCBs,
636       privCB, finiCB, false, sectionsOp.nowait()));
637   return bodyGenStatus;
638 }
639 
640 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
641 static LogicalResult
642 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
643                  LLVM::ModuleTranslation &moduleTranslation) {
644   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
645   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
646   LogicalResult bodyGenStatus = success();
647   auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP,
648                     llvm::BasicBlock &continuationBB) {
649     convertOmpOpRegions(singleOp.region(), "omp.single.region",
650                         *codegenIP.getBlock(), continuationBB, builder,
651                         moduleTranslation, bodyGenStatus);
652   };
653   auto finiCB = [&](InsertPointTy codeGenIP) {};
654   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle(
655       ompLoc, bodyCB, finiCB, singleOp.nowait(), /*DidIt=*/nullptr));
656   return bodyGenStatus;
657 }
658 
659 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
660 static LogicalResult
661 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
662                  LLVM::ModuleTranslation &moduleTranslation) {
663   auto loop = cast<omp::WsLoopOp>(opInst);
664   // TODO: this should be in the op verifier instead.
665   if (loop.lowerBound().empty())
666     return failure();
667 
668   // Static is the default.
669   auto schedule =
670       loop.schedule_val().getValueOr(omp::ClauseScheduleKind::Static);
671 
672   // Find the loop configuration.
673   llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]);
674   llvm::Type *ivType = step->getType();
675   llvm::Value *chunk = nullptr;
676   if (loop.schedule_chunk_var()) {
677     llvm::Value *chunkVar =
678         moduleTranslation.lookupValue(loop.schedule_chunk_var());
679     llvm::Type *chunkVarType = chunkVar->getType();
680     assert(chunkVarType->isIntegerTy() &&
681            "chunk size must be one integer expression");
682     if (chunkVarType->getIntegerBitWidth() < ivType->getIntegerBitWidth())
683       chunk = builder.CreateSExt(chunkVar, ivType);
684     else if (chunkVarType->getIntegerBitWidth() > ivType->getIntegerBitWidth())
685       chunk = builder.CreateTrunc(chunkVar, ivType);
686     else
687       chunk = chunkVar;
688   }
689 
690   SmallVector<omp::ReductionDeclareOp> reductionDecls;
691   collectReductionDecls(loop, reductionDecls);
692   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
693       findAllocaInsertPoint(builder, moduleTranslation);
694 
695   // Allocate space for privatized reduction variables.
696   SmallVector<llvm::Value *> privateReductionVariables;
697   DenseMap<Value, llvm::Value *> reductionVariableMap;
698   unsigned numReductions = loop.getNumReductionVars();
699   privateReductionVariables.reserve(numReductions);
700   if (numReductions != 0) {
701     llvm::IRBuilderBase::InsertPointGuard guard(builder);
702     builder.restoreIP(allocaIP);
703     for (unsigned i = 0; i < numReductions; ++i) {
704       auto reductionType =
705           loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>();
706       llvm::Value *var = builder.CreateAlloca(
707           moduleTranslation.convertType(reductionType.getElementType()));
708       privateReductionVariables.push_back(var);
709       reductionVariableMap.try_emplace(loop.reduction_vars()[i], var);
710     }
711   }
712 
713   // Store the mapping between reduction variables and their private copies on
714   // ModuleTranslation stack. It can be then recovered when translating
715   // omp.reduce operations in a separate call.
716   LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
717       moduleTranslation, reductionVariableMap);
718 
719   // Before the loop, store the initial values of reductions into reduction
720   // variables. Although this could be done after allocas, we don't want to mess
721   // up with the alloca insertion point.
722   for (unsigned i = 0; i < numReductions; ++i) {
723     SmallVector<llvm::Value *> phis;
724     if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(),
725                                        "omp.reduction.neutral", builder,
726                                        moduleTranslation, &phis)))
727       return failure();
728     assert(phis.size() == 1 && "expected one value to be yielded from the "
729                                "reduction neutral element declaration region");
730     builder.CreateStore(phis[0], privateReductionVariables[i]);
731   }
732 
733   // Set up the source location value for OpenMP runtime.
734   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
735 
736   // Generator of the canonical loop body.
737   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
738   // relying on captured variables.
739   SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
740   SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
741   LogicalResult bodyGenStatus = success();
742   auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
743     // Make sure further conversions know about the induction variable.
744     moduleTranslation.mapValue(
745         loop.getRegion().front().getArgument(loopInfos.size()), iv);
746 
747     // Capture the body insertion point for use in nested loops. BodyIP of the
748     // CanonicalLoopInfo always points to the beginning of the entry block of
749     // the body.
750     bodyInsertPoints.push_back(ip);
751 
752     if (loopInfos.size() != loop.getNumLoops() - 1)
753       return;
754 
755     // Convert the body of the loop.
756     llvm::BasicBlock *entryBlock = ip.getBlock();
757     llvm::BasicBlock *exitBlock =
758         entryBlock->splitBasicBlock(ip.getPoint(), "omp.wsloop.exit");
759     convertOmpOpRegions(loop.region(), "omp.wsloop.region", *entryBlock,
760                         *exitBlock, builder, moduleTranslation, bodyGenStatus);
761   };
762 
763   // Delegate actual loop construction to the OpenMP IRBuilder.
764   // TODO: this currently assumes WsLoop is semantically similar to SCF loop,
765   // i.e. it has a positive step, uses signed integer semantics. Reconsider
766   // this code when WsLoop clearly supports more cases.
767   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
768   for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
769     llvm::Value *lowerBound =
770         moduleTranslation.lookupValue(loop.lowerBound()[i]);
771     llvm::Value *upperBound =
772         moduleTranslation.lookupValue(loop.upperBound()[i]);
773     llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]);
774 
775     // Make sure loop trip count are emitted in the preheader of the outermost
776     // loop at the latest so that they are all available for the new collapsed
777     // loop will be created below.
778     llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
779     llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
780     if (i != 0) {
781       loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back());
782       computeIP = loopInfos.front()->getPreheaderIP();
783     }
784     loopInfos.push_back(ompBuilder->createCanonicalLoop(
785         loc, bodyGen, lowerBound, upperBound, step,
786         /*IsSigned=*/true, loop.inclusive(), computeIP));
787 
788     if (failed(bodyGenStatus))
789       return failure();
790   }
791 
792   // Collapse loops. Store the insertion point because LoopInfos may get
793   // invalidated.
794   llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
795   llvm::CanonicalLoopInfo *loopInfo =
796       ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
797 
798   allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
799 
800   bool isSimd = loop.simd_modifier();
801 
802   // The orderedVal refers to the value obtained from the ordered[(n)] clause.
803   //   orderedVal == -1: No ordered[(n)] clause specified.
804   //   orderedVal == 0: The ordered clause specified without a parameter.
805   //   orderedVal > 0: The ordered clause specified with a parameter (n).
806   // TODO: Handle doacross loop init when orderedVal is greater than 0.
807   int64_t orderedVal =
808       loop.ordered_val().hasValue() ? loop.ordered_val().getValue() : -1;
809   if (schedule == omp::ClauseScheduleKind::Static && orderedVal != 0) {
810     ompBuilder->applyWorkshareLoop(ompLoc.DL, loopInfo, allocaIP,
811                                    !loop.nowait(),
812                                    llvm::omp::OMP_SCHEDULE_Static, chunk);
813   } else {
814     llvm::omp::OMPScheduleType schedType;
815     switch (schedule) {
816     case omp::ClauseScheduleKind::Static:
817       if (loop.schedule_chunk_var())
818         schedType = llvm::omp::OMPScheduleType::OrderedStaticChunked;
819       else
820         schedType = llvm::omp::OMPScheduleType::OrderedStatic;
821       break;
822     case omp::ClauseScheduleKind::Dynamic:
823       if (orderedVal == 0)
824         schedType = llvm::omp::OMPScheduleType::OrderedDynamicChunked;
825       else
826         schedType = llvm::omp::OMPScheduleType::DynamicChunked;
827       break;
828     case omp::ClauseScheduleKind::Guided:
829       if (orderedVal == 0) {
830         schedType = llvm::omp::OMPScheduleType::OrderedGuidedChunked;
831       } else {
832         if (isSimd)
833           schedType = llvm::omp::OMPScheduleType::GuidedSimd;
834         else
835           schedType = llvm::omp::OMPScheduleType::GuidedChunked;
836       }
837       break;
838     case omp::ClauseScheduleKind::Auto:
839       if (orderedVal == 0)
840         schedType = llvm::omp::OMPScheduleType::OrderedAuto;
841       else
842         schedType = llvm::omp::OMPScheduleType::Auto;
843       break;
844     case omp::ClauseScheduleKind::Runtime:
845       if (orderedVal == 0) {
846         schedType = llvm::omp::OMPScheduleType::OrderedRuntime;
847       } else {
848         if (isSimd)
849           schedType = llvm::omp::OMPScheduleType::RuntimeSimd;
850         else
851           schedType = llvm::omp::OMPScheduleType::Runtime;
852       }
853       break;
854     default:
855       if (orderedVal == 0) {
856         schedType = llvm::omp::OMPScheduleType::OrderedStatic;
857         break;
858       }
859       llvm_unreachable("Unknown schedule value");
860       break;
861     }
862 
863     if (Optional<omp::ScheduleModifier> modifier = loop.schedule_modifier()) {
864       switch (*modifier) {
865       case omp::ScheduleModifier::monotonic:
866         schedType |= llvm::omp::OMPScheduleType::ModifierMonotonic;
867         break;
868       case omp::ScheduleModifier::nonmonotonic:
869         schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic;
870         break;
871       default:
872         // Nothing to do here.
873         break;
874       }
875     } else {
876       // OpenMP 5.1, 2.11.4 Worksharing-Loop Construct, Description.
877       // If the static schedule kind is specified or if the ordered clause is
878       // specified, and if the nonmonotonic modifier is not specified, the
879       // effect is as if the monotonic modifier is specified. Otherwise, unless
880       // the monotonic modifier is specified, the effect is as if the
881       // nonmonotonic modifier is specified.
882       // The monotonic is used by default in openmp runtime library, so no need
883       // to set it.
884       if (!(schedType == llvm::omp::OMPScheduleType::OrderedStatic ||
885             schedType == llvm::omp::OMPScheduleType::OrderedStaticChunked))
886         schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic;
887     }
888 
889     ompBuilder->applyDynamicWorkshareLoop(ompLoc.DL, loopInfo, allocaIP,
890                                           schedType, !loop.nowait(), chunk,
891                                           /*ordered*/ orderedVal == 0);
892   }
893 
894   // Continue building IR after the loop. Note that the LoopInfo returned by
895   // `collapseLoops` points inside the outermost loop and is intended for
896   // potential further loop transformations. Use the insertion point stored
897   // before collapsing loops instead.
898   builder.restoreIP(afterIP);
899 
900   // Process the reductions if required.
901   if (numReductions == 0)
902     return success();
903 
904   // Create the reduction generators. We need to own them here because
905   // ReductionInfo only accepts references to the generators.
906   SmallVector<OwningReductionGen> owningReductionGens;
907   SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
908   for (unsigned i = 0; i < numReductions; ++i) {
909     owningReductionGens.push_back(
910         makeReductionGen(reductionDecls[i], builder, moduleTranslation));
911     owningAtomicReductionGens.push_back(
912         makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
913   }
914 
915   // Collect the reduction information.
916   SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
917   reductionInfos.reserve(numReductions);
918   for (unsigned i = 0; i < numReductions; ++i) {
919     llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
920     if (owningAtomicReductionGens[i])
921       atomicGen = owningAtomicReductionGens[i];
922     auto reductionType =
923         loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>();
924     llvm::Value *variable =
925         moduleTranslation.lookupValue(loop.reduction_vars()[i]);
926     reductionInfos.push_back(
927         {moduleTranslation.convertType(reductionType.getElementType()),
928          variable, privateReductionVariables[i], owningReductionGens[i],
929          atomicGen});
930   }
931 
932   // The call to createReductions below expects the block to have a
933   // terminator. Create an unreachable instruction to serve as terminator
934   // and remove it later.
935   llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
936   builder.SetInsertPoint(tempTerminator);
937   llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
938       ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
939                                    loop.nowait());
940   if (!contInsertPoint.getBlock())
941     return loop->emitOpError() << "failed to convert reductions";
942   auto nextInsertionPoint =
943       ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
944   tempTerminator->eraseFromParent();
945   builder.restoreIP(nextInsertionPoint);
946 
947   return success();
948 }
949 
950 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
951 static LogicalResult
952 convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
953                    LLVM::ModuleTranslation &moduleTranslation) {
954   auto loop = cast<omp::SimdLoopOp>(opInst);
955 
956   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
957 
958   // Generator of the canonical loop body.
959   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
960   // relying on captured variables.
961   SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
962   SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
963   LogicalResult bodyGenStatus = success();
964   auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
965     // Make sure further conversions know about the induction variable.
966     moduleTranslation.mapValue(
967         loop.getRegion().front().getArgument(loopInfos.size()), iv);
968 
969     // Capture the body insertion point for use in nested loops. BodyIP of the
970     // CanonicalLoopInfo always points to the beginning of the entry block of
971     // the body.
972     bodyInsertPoints.push_back(ip);
973 
974     if (loopInfos.size() != loop.getNumLoops() - 1)
975       return;
976 
977     // Convert the body of the loop.
978     llvm::BasicBlock *entryBlock = ip.getBlock();
979     llvm::BasicBlock *exitBlock =
980         entryBlock->splitBasicBlock(ip.getPoint(), "omp.simdloop.exit");
981     convertOmpOpRegions(loop.region(), "omp.simdloop.region", *entryBlock,
982                         *exitBlock, builder, moduleTranslation, bodyGenStatus);
983   };
984 
985   // Delegate actual loop construction to the OpenMP IRBuilder.
986   // TODO: this currently assumes SimdLoop is semantically similar to SCF loop,
987   // i.e. it has a positive step, uses signed integer semantics. Reconsider
988   // this code when SimdLoop clearly supports more cases.
989   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
990   for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
991     llvm::Value *lowerBound =
992         moduleTranslation.lookupValue(loop.lowerBound()[i]);
993     llvm::Value *upperBound =
994         moduleTranslation.lookupValue(loop.upperBound()[i]);
995     llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]);
996 
997     // Make sure loop trip count are emitted in the preheader of the outermost
998     // loop at the latest so that they are all available for the new collapsed
999     // loop will be created below.
1000     llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
1001     llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
1002     if (i != 0) {
1003       loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
1004                                                        ompLoc.DL);
1005       computeIP = loopInfos.front()->getPreheaderIP();
1006     }
1007     loopInfos.push_back(ompBuilder->createCanonicalLoop(
1008         loc, bodyGen, lowerBound, upperBound, step,
1009         /*IsSigned=*/true, /*Inclusive=*/true, computeIP));
1010 
1011     if (failed(bodyGenStatus))
1012       return failure();
1013   }
1014 
1015   // Collapse loops.
1016   llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
1017   llvm::CanonicalLoopInfo *loopInfo =
1018       ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
1019 
1020   ompBuilder->applySimd(ompLoc.DL, loopInfo);
1021 
1022   builder.restoreIP(afterIP);
1023   return success();
1024 }
1025 
1026 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
1027 llvm::AtomicOrdering
1028 convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) {
1029   if (!ao)
1030     return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
1031 
1032   switch (*ao) {
1033   case omp::ClauseMemoryOrderKind::Seq_cst:
1034     return llvm::AtomicOrdering::SequentiallyConsistent;
1035   case omp::ClauseMemoryOrderKind::Acq_rel:
1036     return llvm::AtomicOrdering::AcquireRelease;
1037   case omp::ClauseMemoryOrderKind::Acquire:
1038     return llvm::AtomicOrdering::Acquire;
1039   case omp::ClauseMemoryOrderKind::Release:
1040     return llvm::AtomicOrdering::Release;
1041   case omp::ClauseMemoryOrderKind::Relaxed:
1042     return llvm::AtomicOrdering::Monotonic;
1043   }
1044   llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
1045 }
1046 
1047 /// Convert omp.atomic.read operation to LLVM IR.
1048 static LogicalResult
1049 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
1050                      LLVM::ModuleTranslation &moduleTranslation) {
1051 
1052   auto readOp = cast<omp::AtomicReadOp>(opInst);
1053   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1054 
1055   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1056 
1057   llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.memory_order_val());
1058   llvm::Value *x = moduleTranslation.lookupValue(readOp.x());
1059   Type xTy = readOp.x().getType().cast<omp::PointerLikeType>().getElementType();
1060   llvm::Value *v = moduleTranslation.lookupValue(readOp.v());
1061   Type vTy = readOp.v().getType().cast<omp::PointerLikeType>().getElementType();
1062   llvm::OpenMPIRBuilder::AtomicOpValue V = {
1063       v, moduleTranslation.convertType(vTy), false, false};
1064   llvm::OpenMPIRBuilder::AtomicOpValue X = {
1065       x, moduleTranslation.convertType(xTy), false, false};
1066   builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
1067   return success();
1068 }
1069 
1070 /// Converts an omp.atomic.write operation to LLVM IR.
1071 static LogicalResult
1072 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
1073                       LLVM::ModuleTranslation &moduleTranslation) {
1074   auto writeOp = cast<omp::AtomicWriteOp>(opInst);
1075   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1076 
1077   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1078   llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.memory_order_val());
1079   llvm::Value *expr = moduleTranslation.lookupValue(writeOp.value());
1080   llvm::Value *dest = moduleTranslation.lookupValue(writeOp.address());
1081   llvm::Type *ty = moduleTranslation.convertType(writeOp.value().getType());
1082   llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
1083                                             /*isVolatile=*/false};
1084   builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
1085   return success();
1086 }
1087 
1088 /// Converts an LLVM dialect binary operation to the corresponding enum value
1089 /// for `atomicrmw` supported binary operation.
1090 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
1091   return llvm::TypeSwitch<Operation *, llvm::AtomicRMWInst::BinOp>(&op)
1092       .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
1093       .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
1094       .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
1095       .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
1096       .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
1097       .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
1098       .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
1099       .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
1100       .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
1101       .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
1102 }
1103 
1104 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
1105 static LogicalResult
1106 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
1107                        llvm::IRBuilderBase &builder,
1108                        LLVM::ModuleTranslation &moduleTranslation) {
1109   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1110   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1111 
1112   // Convert values and types.
1113   auto &innerOpList = opInst.region().front().getOperations();
1114   if (innerOpList.size() != 2)
1115     return opInst.emitError("exactly two operations are allowed inside an "
1116                             "atomic update region while lowering to LLVM IR");
1117 
1118   Operation &innerUpdateOp = innerOpList.front();
1119 
1120   if (innerUpdateOp.getNumOperands() != 2 ||
1121       !llvm::is_contained(innerUpdateOp.getOperands(),
1122                           opInst.getRegion().getArgument(0)))
1123     return opInst.emitError(
1124         "the update operation inside the region must be a binary operation and "
1125         "that update operation must have the region argument as an operand");
1126 
1127   llvm::AtomicRMWInst::BinOp binop = convertBinOpToAtomic(innerUpdateOp);
1128 
1129   bool isXBinopExpr =
1130       innerUpdateOp.getNumOperands() > 0 &&
1131       innerUpdateOp.getOperand(0) == opInst.getRegion().getArgument(0);
1132 
1133   mlir::Value mlirExpr = (isXBinopExpr ? innerUpdateOp.getOperand(1)
1134                                        : innerUpdateOp.getOperand(0));
1135   llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1136   llvm::Value *llvmX = moduleTranslation.lookupValue(opInst.x());
1137   LLVM::LLVMPointerType mlirXType =
1138       opInst.x().getType().cast<LLVM::LLVMPointerType>();
1139   llvm::Type *llvmXElementType =
1140       moduleTranslation.convertType(mlirXType.getElementType());
1141   llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1142                                                       /*isSigned=*/false,
1143                                                       /*isVolatile=*/false};
1144 
1145   llvm::AtomicOrdering atomicOrdering =
1146       convertAtomicOrdering(opInst.memory_order_val());
1147 
1148   // Generate update code.
1149   LogicalResult updateGenStatus = success();
1150   auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus](
1151                       llvm::Value *atomicx,
1152                       llvm::IRBuilder<> &builder) -> llvm::Value * {
1153     Block &bb = *opInst.region().begin();
1154     moduleTranslation.mapValue(*opInst.region().args_begin(), atomicx);
1155     moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1156     if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1157       updateGenStatus = (opInst.emitError()
1158                          << "unable to convert update operation to llvm IR");
1159       return nullptr;
1160     }
1161     omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1162     assert(yieldop && yieldop.results().size() == 1 &&
1163            "terminator must be omp.yield op and it must have exactly one "
1164            "argument");
1165     return moduleTranslation.lookupValue(yieldop.results()[0]);
1166   };
1167 
1168   // Handle ambiguous alloca, if any.
1169   auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1170   if (allocaIP.getPoint() == ompLoc.IP.getPoint()) {
1171     // Same point => split basic block and make them unambigous.
1172     llvm::UnreachableInst *unreachableInst = builder.CreateUnreachable();
1173     builder.SetInsertPoint(builder.GetInsertBlock()->splitBasicBlock(
1174         unreachableInst, "alloca_split"));
1175     ompLoc.IP = builder.saveIP();
1176     unreachableInst->eraseFromParent();
1177   }
1178   builder.restoreIP(ompBuilder->createAtomicUpdate(
1179       ompLoc, findAllocaInsertPoint(builder, moduleTranslation), llvmAtomicX,
1180       llvmExpr, atomicOrdering, binop, updateFn, isXBinopExpr));
1181   return updateGenStatus;
1182 }
1183 
1184 static LogicalResult
1185 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
1186                         llvm::IRBuilderBase &builder,
1187                         LLVM::ModuleTranslation &moduleTranslation) {
1188   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1189   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1190   mlir::Value mlirExpr;
1191   bool isXBinopExpr = false, isPostfixUpdate = false;
1192   llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
1193 
1194   omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
1195   omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
1196 
1197   assert((atomicUpdateOp || atomicWriteOp) &&
1198          "internal op must be an atomic.update or atomic.write op");
1199 
1200   if (atomicWriteOp) {
1201     isPostfixUpdate = true;
1202     mlirExpr = atomicWriteOp.value();
1203   } else {
1204     isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
1205                       atomicCaptureOp.getAtomicUpdateOp().getOperation();
1206     auto &innerOpList = atomicUpdateOp.region().front().getOperations();
1207     if (innerOpList.size() != 2)
1208       return atomicUpdateOp.emitError(
1209           "exactly two operations are allowed inside an "
1210           "atomic update region while lowering to LLVM IR");
1211     Operation *innerUpdateOp = atomicUpdateOp.getFirstOp();
1212     if (innerUpdateOp->getNumOperands() != 2 ||
1213         !llvm::is_contained(innerUpdateOp->getOperands(),
1214                             atomicUpdateOp.getRegion().getArgument(0)))
1215       return atomicUpdateOp.emitError(
1216           "the update operation inside the region must be a binary operation "
1217           "and that update operation must have the region argument as an "
1218           "operand");
1219     binop = convertBinOpToAtomic(*innerUpdateOp);
1220 
1221     isXBinopExpr = innerUpdateOp->getOperand(0) ==
1222                    atomicUpdateOp.getRegion().getArgument(0);
1223 
1224     mlirExpr = (isXBinopExpr ? innerUpdateOp->getOperand(1)
1225                              : innerUpdateOp->getOperand(0));
1226   }
1227 
1228   llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1229   llvm::Value *llvmX =
1230       moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().x());
1231   llvm::Value *llvmV =
1232       moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().v());
1233   auto mlirXType = atomicCaptureOp.getAtomicReadOp()
1234                        .x()
1235                        .getType()
1236                        .cast<LLVM::LLVMPointerType>();
1237   llvm::Type *llvmXElementType =
1238       moduleTranslation.convertType(mlirXType.getElementType());
1239   llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1240                                                       /*isSigned=*/false,
1241                                                       /*isVolatile=*/false};
1242   llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
1243                                                       /*isSigned=*/false,
1244                                                       /*isVolatile=*/false};
1245 
1246   llvm::AtomicOrdering atomicOrdering =
1247       convertAtomicOrdering(atomicCaptureOp.memory_order_val());
1248 
1249   LogicalResult updateGenStatus = success();
1250   auto updateFn = [&](llvm::Value *atomicx,
1251                       llvm::IRBuilder<> &builder) -> llvm::Value * {
1252     if (atomicWriteOp)
1253       return moduleTranslation.lookupValue(atomicWriteOp.value());
1254     Block &bb = *atomicUpdateOp.region().begin();
1255     moduleTranslation.mapValue(*atomicUpdateOp.region().args_begin(), atomicx);
1256     moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1257     if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1258       updateGenStatus = (atomicUpdateOp.emitError()
1259                          << "unable to convert update operation to llvm IR");
1260       return nullptr;
1261     }
1262     omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1263     assert(yieldop && yieldop.results().size() == 1 &&
1264            "terminator must be omp.yield op and it must have exactly one "
1265            "argument");
1266     return moduleTranslation.lookupValue(yieldop.results()[0]);
1267   };
1268   // Handle ambiguous alloca, if any.
1269   auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1270   if (allocaIP.getPoint() == ompLoc.IP.getPoint()) {
1271     // Same point => split basic block and make them unambigous.
1272     llvm::UnreachableInst *unreachableInst = builder.CreateUnreachable();
1273     builder.SetInsertPoint(builder.GetInsertBlock()->splitBasicBlock(
1274         unreachableInst, "alloca_split"));
1275     ompLoc.IP = builder.saveIP();
1276     unreachableInst->eraseFromParent();
1277   }
1278   builder.restoreIP(ompBuilder->createAtomicCapture(
1279       ompLoc, findAllocaInsertPoint(builder, moduleTranslation), llvmAtomicX,
1280       llvmAtomicV, llvmExpr, atomicOrdering, binop, updateFn, atomicUpdateOp,
1281       isPostfixUpdate, isXBinopExpr));
1282   return updateGenStatus;
1283 }
1284 
1285 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
1286 /// mapping between reduction variables and their private equivalents to have
1287 /// been stored on the ModuleTranslation stack. Currently only supports
1288 /// reduction within WsLoopOp, but can be easily extended.
1289 static LogicalResult
1290 convertOmpReductionOp(omp::ReductionOp reductionOp,
1291                       llvm::IRBuilderBase &builder,
1292                       LLVM::ModuleTranslation &moduleTranslation) {
1293   // Find the declaration that corresponds to the reduction op.
1294   auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>();
1295   omp::ReductionDeclareOp declaration =
1296       findReductionDecl(reductionContainer, reductionOp);
1297   assert(declaration && "could not find reduction declaration");
1298 
1299   // Retrieve the mapping between reduction variables and their private
1300   // equivalents.
1301   const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr;
1302   moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>(
1303       [&](const OpenMPVarMappingStackFrame &frame) {
1304         reductionVariableMap = &frame.mapping;
1305         return WalkResult::interrupt();
1306       });
1307   assert(reductionVariableMap && "couldn't find private reduction variables");
1308 
1309   // Translate the reduction operation by emitting the body of the corresponding
1310   // reduction declaration.
1311   Region &reductionRegion = declaration.reductionRegion();
1312   llvm::Value *privateReductionVar =
1313       reductionVariableMap->lookup(reductionOp.accumulator());
1314   llvm::Value *reductionVal = builder.CreateLoad(
1315       moduleTranslation.convertType(reductionOp.operand().getType()),
1316       privateReductionVar);
1317 
1318   moduleTranslation.mapValue(reductionRegion.front().getArgument(0),
1319                              reductionVal);
1320   moduleTranslation.mapValue(
1321       reductionRegion.front().getArgument(1),
1322       moduleTranslation.lookupValue(reductionOp.operand()));
1323 
1324   SmallVector<llvm::Value *> phis;
1325   if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body",
1326                                      builder, moduleTranslation, &phis)))
1327     return failure();
1328   assert(phis.size() == 1 && "expected one value to be yielded from "
1329                              "the reduction body declaration region");
1330   builder.CreateStore(phis[0], privateReductionVar);
1331   return success();
1332 }
1333 
1334 namespace {
1335 
1336 /// Implementation of the dialect interface that converts operations belonging
1337 /// to the OpenMP dialect to LLVM IR.
1338 class OpenMPDialectLLVMIRTranslationInterface
1339     : public LLVMTranslationDialectInterface {
1340 public:
1341   using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
1342 
1343   /// Translates the given operation to LLVM IR using the provided IR builder
1344   /// and saving the state in `moduleTranslation`.
1345   LogicalResult
1346   convertOperation(Operation *op, llvm::IRBuilderBase &builder,
1347                    LLVM::ModuleTranslation &moduleTranslation) const final;
1348 };
1349 
1350 } // namespace
1351 
1352 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
1353 /// (including OpenMP runtime calls).
1354 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
1355     Operation *op, llvm::IRBuilderBase &builder,
1356     LLVM::ModuleTranslation &moduleTranslation) const {
1357 
1358   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1359 
1360   return llvm::TypeSwitch<Operation *, LogicalResult>(op)
1361       .Case([&](omp::BarrierOp) {
1362         ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
1363         return success();
1364       })
1365       .Case([&](omp::TaskwaitOp) {
1366         ompBuilder->createTaskwait(builder.saveIP());
1367         return success();
1368       })
1369       .Case([&](omp::TaskyieldOp) {
1370         ompBuilder->createTaskyield(builder.saveIP());
1371         return success();
1372       })
1373       .Case([&](omp::FlushOp) {
1374         // No support in Openmp runtime function (__kmpc_flush) to accept
1375         // the argument list.
1376         // OpenMP standard states the following:
1377         //  "An implementation may implement a flush with a list by ignoring
1378         //   the list, and treating it the same as a flush without a list."
1379         //
1380         // The argument list is discarded so that, flush with a list is treated
1381         // same as a flush without a list.
1382         ompBuilder->createFlush(builder.saveIP());
1383         return success();
1384       })
1385       .Case([&](omp::ParallelOp op) {
1386         return convertOmpParallel(op, builder, moduleTranslation);
1387       })
1388       .Case([&](omp::ReductionOp reductionOp) {
1389         return convertOmpReductionOp(reductionOp, builder, moduleTranslation);
1390       })
1391       .Case([&](omp::MasterOp) {
1392         return convertOmpMaster(*op, builder, moduleTranslation);
1393       })
1394       .Case([&](omp::CriticalOp) {
1395         return convertOmpCritical(*op, builder, moduleTranslation);
1396       })
1397       .Case([&](omp::OrderedRegionOp) {
1398         return convertOmpOrderedRegion(*op, builder, moduleTranslation);
1399       })
1400       .Case([&](omp::OrderedOp) {
1401         return convertOmpOrdered(*op, builder, moduleTranslation);
1402       })
1403       .Case([&](omp::WsLoopOp) {
1404         return convertOmpWsLoop(*op, builder, moduleTranslation);
1405       })
1406       .Case([&](omp::SimdLoopOp) {
1407         return convertOmpSimdLoop(*op, builder, moduleTranslation);
1408       })
1409       .Case([&](omp::AtomicReadOp) {
1410         return convertOmpAtomicRead(*op, builder, moduleTranslation);
1411       })
1412       .Case([&](omp::AtomicWriteOp) {
1413         return convertOmpAtomicWrite(*op, builder, moduleTranslation);
1414       })
1415       .Case([&](omp::AtomicUpdateOp op) {
1416         return convertOmpAtomicUpdate(op, builder, moduleTranslation);
1417       })
1418       .Case([&](omp::AtomicCaptureOp op) {
1419         return convertOmpAtomicCapture(op, builder, moduleTranslation);
1420       })
1421       .Case([&](omp::SectionsOp) {
1422         return convertOmpSections(*op, builder, moduleTranslation);
1423       })
1424       .Case([&](omp::SingleOp op) {
1425         return convertOmpSingle(op, builder, moduleTranslation);
1426       })
1427       .Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp,
1428             omp::CriticalDeclareOp>([](auto op) {
1429         // `yield` and `terminator` can be just omitted. The block structure
1430         // was created in the region that handles their parent operation.
1431         // `reduction.declare` will be used by reductions and is not
1432         // converted directly, skip it.
1433         // `critical.declare` is only used to declare names of critical
1434         // sections which will be used by `critical` ops and hence can be
1435         // ignored for lowering. The OpenMP IRBuilder will create unique
1436         // name for critical section names.
1437         return success();
1438       })
1439       .Default([&](Operation *inst) {
1440         return inst->emitError("unsupported OpenMP operation: ")
1441                << inst->getName();
1442       });
1443 }
1444 
1445 void mlir::registerOpenMPDialectTranslation(DialectRegistry &registry) {
1446   registry.insert<omp::OpenMPDialect>();
1447   registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
1448     dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
1449   });
1450 }
1451 
1452 void mlir::registerOpenMPDialectTranslation(MLIRContext &context) {
1453   DialectRegistry registry;
1454   registerOpenMPDialectTranslation(registry);
1455   context.appendDialectRegistry(registry);
1456 }
1457