1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
14 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
15 #include "mlir/IR/BlockAndValueMapping.h"
16 #include "mlir/IR/Operation.h"
17 #include "mlir/Support/LLVM.h"
18 #include "mlir/Target/LLVMIR/ModuleTranslation.h"
19 
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/TypeSwitch.h"
22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
23 #include "llvm/IR/DebugInfoMetadata.h"
24 #include "llvm/IR/IRBuilder.h"
25 
26 using namespace mlir;
27 
28 namespace {
29 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
30 /// insertion points for allocas.
31 class OpenMPAllocaStackFrame
32     : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
33 public:
34   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
35 
36   explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
37       : allocaInsertPoint(allocaIP) {}
38   llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
39 };
40 
41 /// ModuleTranslation stack frame containing the partial mapping between MLIR
42 /// values and their LLVM IR equivalents.
43 class OpenMPVarMappingStackFrame
44     : public LLVM::ModuleTranslation::StackFrameBase<
45           OpenMPVarMappingStackFrame> {
46 public:
47   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPVarMappingStackFrame)
48 
49   explicit OpenMPVarMappingStackFrame(
50       const DenseMap<Value, llvm::Value *> &mapping)
51       : mapping(mapping) {}
52 
53   DenseMap<Value, llvm::Value *> mapping;
54 };
55 } // namespace
56 
57 /// Find the insertion point for allocas given the current insertion point for
58 /// normal operations in the builder.
59 static llvm::OpenMPIRBuilder::InsertPointTy
60 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
61                       const LLVM::ModuleTranslation &moduleTranslation) {
62   // If there is an alloca insertion point on stack, i.e. we are in a nested
63   // operation and a specific point was provided by some surrounding operation,
64   // use it.
65   llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
66   WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
67       [&](const OpenMPAllocaStackFrame &frame) {
68         allocaInsertPoint = frame.allocaInsertPoint;
69         return WalkResult::interrupt();
70       });
71   if (walkResult.wasInterrupted())
72     return allocaInsertPoint;
73 
74   // Otherwise, insert to the entry block of the surrounding function.
75   // If the current IRBuilder InsertPoint is the function's entry, it cannot
76   // also be used for alloca insertion which would result in insertion order
77   // confusion. Create a new BasicBlock for the Builder and use the entry block
78   // for the allocs.
79   if (builder.GetInsertBlock() ==
80       &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
81     assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
82            "Assuming end of basic block");
83     llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
84         builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
85         builder.GetInsertBlock()->getNextNode());
86     builder.CreateBr(entryBB);
87     builder.SetInsertPoint(entryBB);
88   }
89 
90   llvm::BasicBlock &funcEntryBlock =
91       builder.GetInsertBlock()->getParent()->getEntryBlock();
92   return llvm::OpenMPIRBuilder::InsertPointTy(
93       &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
94 }
95 
96 /// Converts the given region that appears within an OpenMP dialect operation to
97 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
98 /// region, and a branch from any block with an successor-less OpenMP terminator
99 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
100 /// of the continuation block if provided.
101 static void convertOmpOpRegions(
102     Region &region, StringRef blockName, llvm::BasicBlock &sourceBlock,
103     llvm::BasicBlock &continuationBlock, llvm::IRBuilderBase &builder,
104     LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus,
105     SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
106   llvm::LLVMContext &llvmContext = builder.getContext();
107   for (Block &bb : region) {
108     llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
109         llvmContext, blockName, builder.GetInsertBlock()->getParent(),
110         builder.GetInsertBlock()->getNextNode());
111     moduleTranslation.mapBlock(&bb, llvmBB);
112   }
113 
114   llvm::Instruction *sourceTerminator = sourceBlock.getTerminator();
115 
116   // Terminators (namely YieldOp) may be forwarding values to the region that
117   // need to be available in the continuation block. Collect the types of these
118   // operands in preparation of creating PHI nodes.
119   SmallVector<llvm::Type *> continuationBlockPHITypes;
120   bool operandsProcessed = false;
121   unsigned numYields = 0;
122   for (Block &bb : region.getBlocks()) {
123     if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
124       if (!operandsProcessed) {
125         for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
126           continuationBlockPHITypes.push_back(
127               moduleTranslation.convertType(yield->getOperand(i).getType()));
128         }
129         operandsProcessed = true;
130       } else {
131         assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
132                "mismatching number of values yielded from the region");
133         for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
134           llvm::Type *operandType =
135               moduleTranslation.convertType(yield->getOperand(i).getType());
136           (void)operandType;
137           assert(continuationBlockPHITypes[i] == operandType &&
138                  "values of mismatching types yielded from the region");
139         }
140       }
141       numYields++;
142     }
143   }
144 
145   // Insert PHI nodes in the continuation block for any values forwarded by the
146   // terminators in this region.
147   if (!continuationBlockPHITypes.empty())
148     assert(
149         continuationBlockPHIs &&
150         "expected continuation block PHIs if converted regions yield values");
151   if (continuationBlockPHIs) {
152     llvm::IRBuilderBase::InsertPointGuard guard(builder);
153     continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
154     builder.SetInsertPoint(&continuationBlock, continuationBlock.begin());
155     for (llvm::Type *ty : continuationBlockPHITypes)
156       continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
157   }
158 
159   // Convert blocks one by one in topological order to ensure
160   // defs are converted before uses.
161   SetVector<Block *> blocks =
162       LLVM::detail::getTopologicallySortedBlocks(region);
163   for (Block *bb : blocks) {
164     llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
165     // Retarget the branch of the entry block to the entry block of the
166     // converted region (regions are single-entry).
167     if (bb->isEntryBlock()) {
168       assert(sourceTerminator->getNumSuccessors() == 1 &&
169              "provided entry block has multiple successors");
170       assert(sourceTerminator->getSuccessor(0) == &continuationBlock &&
171              "ContinuationBlock is not the successor of the entry block");
172       sourceTerminator->setSuccessor(0, llvmBB);
173     }
174 
175     llvm::IRBuilderBase::InsertPointGuard guard(builder);
176     if (failed(
177             moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) {
178       bodyGenStatus = failure();
179       return;
180     }
181 
182     // Special handling for `omp.yield` and `omp.terminator` (we may have more
183     // than one): they return the control to the parent OpenMP dialect operation
184     // so replace them with the branch to the continuation block. We handle this
185     // here to avoid relying inter-function communication through the
186     // ModuleTranslation class to set up the correct insertion point. This is
187     // also consistent with MLIR's idiom of handling special region terminators
188     // in the same code that handles the region-owning operation.
189     Operation *terminator = bb->getTerminator();
190     if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
191       builder.CreateBr(&continuationBlock);
192 
193       for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
194         (*continuationBlockPHIs)[i]->addIncoming(
195             moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
196     }
197   }
198   // After all blocks have been traversed and values mapped, connect the PHI
199   // nodes to the results of preceding blocks.
200   LLVM::detail::connectPHINodes(region, moduleTranslation);
201 
202   // Remove the blocks and values defined in this region from the mapping since
203   // they are not visible outside of this region. This allows the same region to
204   // be converted several times, that is cloned, without clashes, and slightly
205   // speeds up the lookups.
206   moduleTranslation.forgetMapping(region);
207 }
208 
209 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
210 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
211   switch (kind) {
212   case omp::ClauseProcBindKind::Close:
213     return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
214   case omp::ClauseProcBindKind::Master:
215     return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
216   case omp::ClauseProcBindKind::Primary:
217     return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
218   case omp::ClauseProcBindKind::Spread:
219     return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
220   }
221   llvm_unreachable("Unknown ClauseProcBindKind kind");
222 }
223 
224 /// Converts the OpenMP parallel operation to LLVM IR.
225 static LogicalResult
226 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
227                    LLVM::ModuleTranslation &moduleTranslation) {
228   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
229   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
230   // relying on captured variables.
231   LogicalResult bodyGenStatus = success();
232 
233   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
234                        llvm::BasicBlock &continuationBlock) {
235     // Save the alloca insertion point on ModuleTranslation stack for use in
236     // nested regions.
237     LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
238         moduleTranslation, allocaIP);
239 
240     // ParallelOp has only one region associated with it.
241     convertOmpOpRegions(opInst.getRegion(), "omp.par.region",
242                         *codeGenIP.getBlock(), continuationBlock, builder,
243                         moduleTranslation, bodyGenStatus);
244   };
245 
246   // TODO: Perform appropriate actions according to the data-sharing
247   // attribute (shared, private, firstprivate, ...) of variables.
248   // Currently defaults to shared.
249   auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
250                     llvm::Value &, llvm::Value &vPtr,
251                     llvm::Value *&replacementValue) -> InsertPointTy {
252     replacementValue = &vPtr;
253 
254     return codeGenIP;
255   };
256 
257   // TODO: Perform finalization actions for variables. This has to be
258   // called for variables which have destructors/finalizers.
259   auto finiCB = [&](InsertPointTy codeGenIP) {};
260 
261   llvm::Value *ifCond = nullptr;
262   if (auto ifExprVar = opInst.if_expr_var())
263     ifCond = moduleTranslation.lookupValue(ifExprVar);
264   llvm::Value *numThreads = nullptr;
265   if (auto numThreadsVar = opInst.num_threads_var())
266     numThreads = moduleTranslation.lookupValue(numThreadsVar);
267   auto pbKind = llvm::omp::OMP_PROC_BIND_default;
268   if (auto bind = opInst.proc_bind_val())
269     pbKind = getProcBindKind(*bind);
270   // TODO: Is the Parallel construct cancellable?
271   bool isCancellable = false;
272 
273   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
274       findAllocaInsertPoint(builder, moduleTranslation);
275   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
276   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel(
277       ompLoc, allocaIP, bodyGenCB, privCB, finiCB, ifCond, numThreads, pbKind,
278       isCancellable));
279 
280   return bodyGenStatus;
281 }
282 
283 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
284 static LogicalResult
285 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
286                  LLVM::ModuleTranslation &moduleTranslation) {
287   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
288   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
289   // relying on captured variables.
290   LogicalResult bodyGenStatus = success();
291 
292   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
293                        llvm::BasicBlock &continuationBlock) {
294     // MasterOp has only one region associated with it.
295     auto &region = cast<omp::MasterOp>(opInst).getRegion();
296     convertOmpOpRegions(region, "omp.master.region", *codeGenIP.getBlock(),
297                         continuationBlock, builder, moduleTranslation,
298                         bodyGenStatus);
299   };
300 
301   // TODO: Perform finalization actions for variables. This has to be
302   // called for variables which have destructors/finalizers.
303   auto finiCB = [&](InsertPointTy codeGenIP) {};
304 
305   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
306   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster(
307       ompLoc, bodyGenCB, finiCB));
308   return success();
309 }
310 
311 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
312 static LogicalResult
313 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
314                    LLVM::ModuleTranslation &moduleTranslation) {
315   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
316   auto criticalOp = cast<omp::CriticalOp>(opInst);
317   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
318   // relying on captured variables.
319   LogicalResult bodyGenStatus = success();
320 
321   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
322                        llvm::BasicBlock &continuationBlock) {
323     // CriticalOp has only one region associated with it.
324     auto &region = cast<omp::CriticalOp>(opInst).getRegion();
325     convertOmpOpRegions(region, "omp.critical.region", *codeGenIP.getBlock(),
326                         continuationBlock, builder, moduleTranslation,
327                         bodyGenStatus);
328   };
329 
330   // TODO: Perform finalization actions for variables. This has to be
331   // called for variables which have destructors/finalizers.
332   auto finiCB = [&](InsertPointTy codeGenIP) {};
333 
334   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
335   llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
336   llvm::Constant *hint = nullptr;
337 
338   // If it has a name, it probably has a hint too.
339   if (criticalOp.nameAttr()) {
340     // The verifiers in OpenMP Dialect guarentee that all the pointers are
341     // non-null
342     auto symbolRef = criticalOp.nameAttr().cast<SymbolRefAttr>();
343     auto criticalDeclareOp =
344         SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
345                                                                      symbolRef);
346     hint =
347         llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
348                                static_cast<int>(criticalDeclareOp.hint_val()));
349   }
350   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical(
351       ompLoc, bodyGenCB, finiCB, criticalOp.name().getValueOr(""), hint));
352   return success();
353 }
354 
355 /// Returns a reduction declaration that corresponds to the given reduction
356 /// operation in the given container. Currently only supports reductions inside
357 /// WsLoopOp but can be easily extended.
358 static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container,
359                                                  omp::ReductionOp reduction) {
360   SymbolRefAttr reductionSymbol;
361   for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) {
362     if (container.reduction_vars()[i] != reduction.accumulator())
363       continue;
364     reductionSymbol = (*container.reductions())[i].cast<SymbolRefAttr>();
365     break;
366   }
367   assert(reductionSymbol &&
368          "reduction operation must be associated with a declaration");
369 
370   return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
371       container, reductionSymbol);
372 }
373 
374 /// Populates `reductions` with reduction declarations used in the given loop.
375 static void
376 collectReductionDecls(omp::WsLoopOp loop,
377                       SmallVectorImpl<omp::ReductionDeclareOp> &reductions) {
378   Optional<ArrayAttr> attr = loop.reductions();
379   if (!attr)
380     return;
381 
382   reductions.reserve(reductions.size() + loop.getNumReductionVars());
383   for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
384     reductions.push_back(
385         SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
386             loop, symbolRef));
387   }
388 }
389 
390 /// Translates the blocks contained in the given region and appends them to at
391 /// the current insertion point of `builder`. The operations of the entry block
392 /// are appended to the current insertion block, which is not expected to have a
393 /// terminator. If set, `continuationBlockArgs` is populated with translated
394 /// values that correspond to the values omp.yield'ed from the region.
395 static LogicalResult inlineConvertOmpRegions(
396     Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
397     LLVM::ModuleTranslation &moduleTranslation,
398     SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
399   if (region.empty())
400     return success();
401 
402   // Special case for single-block regions that don't create additional blocks:
403   // insert operations without creating additional blocks.
404   if (llvm::hasSingleElement(region)) {
405     moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
406     if (failed(moduleTranslation.convertBlock(
407             region.front(), /*ignoreArguments=*/true, builder)))
408       return failure();
409 
410     // The continuation arguments are simply the translated terminator operands.
411     if (continuationBlockArgs)
412       llvm::append_range(
413           *continuationBlockArgs,
414           moduleTranslation.lookupValues(region.front().back().getOperands()));
415 
416     // Drop the mapping that is no longer necessary so that the same region can
417     // be processed multiple times.
418     moduleTranslation.forgetMapping(region);
419     return success();
420   }
421 
422   // Create the continuation block manually instead of calling splitBlock
423   // because the current insertion block may not have a terminator.
424   llvm::BasicBlock *continuationBlock =
425       llvm::BasicBlock::Create(builder.getContext(), blockName + ".cont",
426                                builder.GetInsertBlock()->getParent(),
427                                builder.GetInsertBlock()->getNextNode());
428   builder.CreateBr(continuationBlock);
429 
430   LogicalResult bodyGenStatus = success();
431   SmallVector<llvm::PHINode *> phis;
432   convertOmpOpRegions(region, blockName, *builder.GetInsertBlock(),
433                       *continuationBlock, builder, moduleTranslation,
434                       bodyGenStatus, &phis);
435   if (failed(bodyGenStatus))
436     return failure();
437   if (continuationBlockArgs)
438     llvm::append_range(*continuationBlockArgs, phis);
439   builder.SetInsertPoint(continuationBlock,
440                          continuationBlock->getFirstInsertionPt());
441   return success();
442 }
443 
444 namespace {
445 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
446 /// store lambdas with capture.
447 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy(
448     llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
449     llvm::Value *&)>;
450 using OwningAtomicReductionGen =
451     std::function<llvm::OpenMPIRBuilder::InsertPointTy(
452         llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
453         llvm::Value *)>;
454 } // namespace
455 
456 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
457 /// reduction declaration. The generator uses `builder` but ignores its
458 /// insertion point.
459 static OwningReductionGen
460 makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder,
461                  LLVM::ModuleTranslation &moduleTranslation) {
462   // The lambda is mutable because we need access to non-const methods of decl
463   // (which aren't actually mutating it), and we must capture decl by-value to
464   // avoid the dangling reference after the parent function returns.
465   OwningReductionGen gen =
466       [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
467                 llvm::Value *lhs, llvm::Value *rhs,
468                 llvm::Value *&result) mutable {
469         Region &reductionRegion = decl.reductionRegion();
470         moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs);
471         moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs);
472         builder.restoreIP(insertPoint);
473         SmallVector<llvm::Value *> phis;
474         if (failed(inlineConvertOmpRegions(reductionRegion,
475                                            "omp.reduction.nonatomic.body",
476                                            builder, moduleTranslation, &phis)))
477           return llvm::OpenMPIRBuilder::InsertPointTy();
478         assert(phis.size() == 1);
479         result = phis[0];
480         return builder.saveIP();
481       };
482   return gen;
483 }
484 
485 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
486 /// given reduction declaration. The generator uses `builder` but ignores its
487 /// insertion point. Returns null if there is no atomic region available in the
488 /// reduction declaration.
489 static OwningAtomicReductionGen
490 makeAtomicReductionGen(omp::ReductionDeclareOp decl,
491                        llvm::IRBuilderBase &builder,
492                        LLVM::ModuleTranslation &moduleTranslation) {
493   if (decl.atomicReductionRegion().empty())
494     return OwningAtomicReductionGen();
495 
496   // The lambda is mutable because we need access to non-const methods of decl
497   // (which aren't actually mutating it), and we must capture decl by-value to
498   // avoid the dangling reference after the parent function returns.
499   OwningAtomicReductionGen atomicGen =
500       [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
501                 llvm::Value *lhs, llvm::Value *rhs) mutable {
502         Region &atomicRegion = decl.atomicReductionRegion();
503         moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs);
504         moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs);
505         builder.restoreIP(insertPoint);
506         SmallVector<llvm::Value *> phis;
507         if (failed(inlineConvertOmpRegions(atomicRegion,
508                                            "omp.reduction.atomic.body", builder,
509                                            moduleTranslation, &phis)))
510           return llvm::OpenMPIRBuilder::InsertPointTy();
511         assert(phis.empty());
512         return builder.saveIP();
513       };
514   return atomicGen;
515 }
516 
517 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
518 static LogicalResult
519 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
520                   LLVM::ModuleTranslation &moduleTranslation) {
521   auto orderedOp = cast<omp::OrderedOp>(opInst);
522 
523   omp::ClauseDepend dependType = *orderedOp.depend_type_val();
524   bool isDependSource = dependType == omp::ClauseDepend::dependsource;
525   unsigned numLoops = orderedOp.num_loops_val().getValue();
526   SmallVector<llvm::Value *> vecValues =
527       moduleTranslation.lookupValues(orderedOp.depend_vec_vars());
528 
529   size_t indexVecValues = 0;
530   while (indexVecValues < vecValues.size()) {
531     SmallVector<llvm::Value *> storeValues;
532     storeValues.reserve(numLoops);
533     for (unsigned i = 0; i < numLoops; i++) {
534       storeValues.push_back(vecValues[indexVecValues]);
535       indexVecValues++;
536     }
537     llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
538         findAllocaInsertPoint(builder, moduleTranslation);
539     llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
540     builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
541         ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
542   }
543   return success();
544 }
545 
546 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
547 /// OpenMPIRBuilder.
548 static LogicalResult
549 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
550                         LLVM::ModuleTranslation &moduleTranslation) {
551   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
552   auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
553 
554   // TODO: The code generation for ordered simd directive is not supported yet.
555   if (orderedRegionOp.simd())
556     return failure();
557 
558   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
559   // relying on captured variables.
560   LogicalResult bodyGenStatus = success();
561 
562   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
563                        llvm::BasicBlock &continuationBlock) {
564     // OrderedOp has only one region associated with it.
565     auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
566     convertOmpOpRegions(region, "omp.ordered.region", *codeGenIP.getBlock(),
567                         continuationBlock, builder, moduleTranslation,
568                         bodyGenStatus);
569   };
570 
571   // TODO: Perform finalization actions for variables. This has to be
572   // called for variables which have destructors/finalizers.
573   auto finiCB = [&](InsertPointTy codeGenIP) {};
574 
575   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
576   builder.restoreIP(
577       moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
578           ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd()));
579   return bodyGenStatus;
580 }
581 
582 static LogicalResult
583 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
584                    LLVM::ModuleTranslation &moduleTranslation) {
585   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
586   using StorableBodyGenCallbackTy =
587       llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
588 
589   auto sectionsOp = cast<omp::SectionsOp>(opInst);
590 
591   // TODO: Support the following clauses: private, firstprivate, lastprivate,
592   // reduction, allocate
593   if (!sectionsOp.reduction_vars().empty() || sectionsOp.reductions() ||
594       !sectionsOp.allocate_vars().empty() ||
595       !sectionsOp.allocators_vars().empty())
596     return emitError(sectionsOp.getLoc())
597            << "reduction and allocate clauses are not supported for sections "
598               "construct";
599 
600   LogicalResult bodyGenStatus = success();
601   SmallVector<StorableBodyGenCallbackTy> sectionCBs;
602 
603   for (Operation &op : *sectionsOp.region().begin()) {
604     auto sectionOp = dyn_cast<omp::SectionOp>(op);
605     if (!sectionOp) // omp.terminator
606       continue;
607 
608     Region &region = sectionOp.region();
609     auto sectionCB = [&region, &builder, &moduleTranslation, &bodyGenStatus](
610                          InsertPointTy allocaIP, InsertPointTy codeGenIP,
611                          llvm::BasicBlock &finiBB) {
612       builder.restoreIP(codeGenIP);
613       builder.CreateBr(&finiBB);
614       convertOmpOpRegions(region, "omp.section.region", *codeGenIP.getBlock(),
615                           finiBB, builder, moduleTranslation, bodyGenStatus);
616     };
617     sectionCBs.push_back(sectionCB);
618   }
619 
620   // No sections within omp.sections operation - skip generation. This situation
621   // is only possible if there is only a terminator operation inside the
622   // sections operation
623   if (sectionCBs.empty())
624     return success();
625 
626   assert(isa<omp::SectionOp>(*sectionsOp.region().op_begin()));
627 
628   // TODO: Perform appropriate actions according to the data-sharing
629   // attribute (shared, private, firstprivate, ...) of variables.
630   // Currently defaults to shared.
631   auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
632                     llvm::Value &vPtr,
633                     llvm::Value *&replacementValue) -> InsertPointTy {
634     replacementValue = &vPtr;
635     return codeGenIP;
636   };
637 
638   // TODO: Perform finalization actions for variables. This has to be
639   // called for variables which have destructors/finalizers.
640   auto finiCB = [&](InsertPointTy codeGenIP) {};
641 
642   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
643       findAllocaInsertPoint(builder, moduleTranslation);
644   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
645   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections(
646       ompLoc, allocaIP, sectionCBs, privCB, finiCB, false,
647       sectionsOp.nowait()));
648   return bodyGenStatus;
649 }
650 
651 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
652 static LogicalResult
653 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
654                  LLVM::ModuleTranslation &moduleTranslation) {
655   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
656   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
657   LogicalResult bodyGenStatus = success();
658   auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP,
659                     llvm::BasicBlock &continuationBB) {
660     convertOmpOpRegions(singleOp.region(), "omp.single.region",
661                         *codegenIP.getBlock(), continuationBB, builder,
662                         moduleTranslation, bodyGenStatus);
663   };
664   auto finiCB = [&](InsertPointTy codeGenIP) {};
665   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle(
666       ompLoc, bodyCB, finiCB, singleOp.nowait(), /*DidIt=*/nullptr));
667   return bodyGenStatus;
668 }
669 
670 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
671 static LogicalResult
672 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
673                  LLVM::ModuleTranslation &moduleTranslation) {
674   auto loop = cast<omp::WsLoopOp>(opInst);
675   // TODO: this should be in the op verifier instead.
676   if (loop.lowerBound().empty())
677     return failure();
678 
679   // Static is the default.
680   auto schedule =
681       loop.schedule_val().getValueOr(omp::ClauseScheduleKind::Static);
682 
683   // Find the loop configuration.
684   llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]);
685   llvm::Type *ivType = step->getType();
686   llvm::Value *chunk = nullptr;
687   if (loop.schedule_chunk_var()) {
688     llvm::Value *chunkVar =
689         moduleTranslation.lookupValue(loop.schedule_chunk_var());
690     llvm::Type *chunkVarType = chunkVar->getType();
691     assert(chunkVarType->isIntegerTy() &&
692            "chunk size must be one integer expression");
693     if (chunkVarType->getIntegerBitWidth() < ivType->getIntegerBitWidth())
694       chunk = builder.CreateSExt(chunkVar, ivType);
695     else if (chunkVarType->getIntegerBitWidth() > ivType->getIntegerBitWidth())
696       chunk = builder.CreateTrunc(chunkVar, ivType);
697     else
698       chunk = chunkVar;
699   }
700 
701   SmallVector<omp::ReductionDeclareOp> reductionDecls;
702   collectReductionDecls(loop, reductionDecls);
703   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
704       findAllocaInsertPoint(builder, moduleTranslation);
705 
706   // Allocate space for privatized reduction variables.
707   SmallVector<llvm::Value *> privateReductionVariables;
708   DenseMap<Value, llvm::Value *> reductionVariableMap;
709   unsigned numReductions = loop.getNumReductionVars();
710   privateReductionVariables.reserve(numReductions);
711   if (numReductions != 0) {
712     llvm::IRBuilderBase::InsertPointGuard guard(builder);
713     builder.restoreIP(allocaIP);
714     for (unsigned i = 0; i < numReductions; ++i) {
715       auto reductionType =
716           loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>();
717       llvm::Value *var = builder.CreateAlloca(
718           moduleTranslation.convertType(reductionType.getElementType()));
719       privateReductionVariables.push_back(var);
720       reductionVariableMap.try_emplace(loop.reduction_vars()[i], var);
721     }
722   }
723 
724   // Store the mapping between reduction variables and their private copies on
725   // ModuleTranslation stack. It can be then recovered when translating
726   // omp.reduce operations in a separate call.
727   LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
728       moduleTranslation, reductionVariableMap);
729 
730   // Before the loop, store the initial values of reductions into reduction
731   // variables. Although this could be done after allocas, we don't want to mess
732   // up with the alloca insertion point.
733   for (unsigned i = 0; i < numReductions; ++i) {
734     SmallVector<llvm::Value *> phis;
735     if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(),
736                                        "omp.reduction.neutral", builder,
737                                        moduleTranslation, &phis)))
738       return failure();
739     assert(phis.size() == 1 && "expected one value to be yielded from the "
740                                "reduction neutral element declaration region");
741     builder.CreateStore(phis[0], privateReductionVariables[i]);
742   }
743 
744   // Set up the source location value for OpenMP runtime.
745   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
746 
747   // Generator of the canonical loop body.
748   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
749   // relying on captured variables.
750   SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
751   SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
752   LogicalResult bodyGenStatus = success();
753   auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
754     // Make sure further conversions know about the induction variable.
755     moduleTranslation.mapValue(
756         loop.getRegion().front().getArgument(loopInfos.size()), iv);
757 
758     // Capture the body insertion point for use in nested loops. BodyIP of the
759     // CanonicalLoopInfo always points to the beginning of the entry block of
760     // the body.
761     bodyInsertPoints.push_back(ip);
762 
763     if (loopInfos.size() != loop.getNumLoops() - 1)
764       return;
765 
766     // Convert the body of the loop.
767     llvm::BasicBlock *entryBlock = ip.getBlock();
768     llvm::BasicBlock *exitBlock =
769         entryBlock->splitBasicBlock(ip.getPoint(), "omp.wsloop.exit");
770     convertOmpOpRegions(loop.region(), "omp.wsloop.region", *entryBlock,
771                         *exitBlock, builder, moduleTranslation, bodyGenStatus);
772   };
773 
774   // Delegate actual loop construction to the OpenMP IRBuilder.
775   // TODO: this currently assumes WsLoop is semantically similar to SCF loop,
776   // i.e. it has a positive step, uses signed integer semantics. Reconsider
777   // this code when WsLoop clearly supports more cases.
778   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
779   for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
780     llvm::Value *lowerBound =
781         moduleTranslation.lookupValue(loop.lowerBound()[i]);
782     llvm::Value *upperBound =
783         moduleTranslation.lookupValue(loop.upperBound()[i]);
784     llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]);
785 
786     // Make sure loop trip count are emitted in the preheader of the outermost
787     // loop at the latest so that they are all available for the new collapsed
788     // loop will be created below.
789     llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
790     llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
791     if (i != 0) {
792       loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back());
793       computeIP = loopInfos.front()->getPreheaderIP();
794     }
795     loopInfos.push_back(ompBuilder->createCanonicalLoop(
796         loc, bodyGen, lowerBound, upperBound, step,
797         /*IsSigned=*/true, loop.inclusive(), computeIP));
798 
799     if (failed(bodyGenStatus))
800       return failure();
801   }
802 
803   // Collapse loops. Store the insertion point because LoopInfos may get
804   // invalidated.
805   llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
806   llvm::CanonicalLoopInfo *loopInfo =
807       ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
808 
809   allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
810 
811   bool isSimd = loop.simd_modifier();
812 
813   // The orderedVal refers to the value obtained from the ordered[(n)] clause.
814   //   orderedVal == -1: No ordered[(n)] clause specified.
815   //   orderedVal == 0: The ordered clause specified without a parameter.
816   //   orderedVal > 0: The ordered clause specified with a parameter (n).
817   // TODO: Handle doacross loop init when orderedVal is greater than 0.
818   int64_t orderedVal =
819       loop.ordered_val().hasValue() ? loop.ordered_val().getValue() : -1;
820   if (schedule == omp::ClauseScheduleKind::Static && orderedVal != 0) {
821     ompBuilder->applyWorkshareLoop(ompLoc.DL, loopInfo, allocaIP,
822                                    !loop.nowait(),
823                                    llvm::omp::OMP_SCHEDULE_Static, chunk);
824   } else {
825     llvm::omp::OMPScheduleType schedType;
826     switch (schedule) {
827     case omp::ClauseScheduleKind::Static:
828       if (loop.schedule_chunk_var())
829         schedType = llvm::omp::OMPScheduleType::OrderedStaticChunked;
830       else
831         schedType = llvm::omp::OMPScheduleType::OrderedStatic;
832       break;
833     case omp::ClauseScheduleKind::Dynamic:
834       if (orderedVal == 0)
835         schedType = llvm::omp::OMPScheduleType::OrderedDynamicChunked;
836       else
837         schedType = llvm::omp::OMPScheduleType::DynamicChunked;
838       break;
839     case omp::ClauseScheduleKind::Guided:
840       if (orderedVal == 0) {
841         schedType = llvm::omp::OMPScheduleType::OrderedGuidedChunked;
842       } else {
843         if (isSimd)
844           schedType = llvm::omp::OMPScheduleType::GuidedSimd;
845         else
846           schedType = llvm::omp::OMPScheduleType::GuidedChunked;
847       }
848       break;
849     case omp::ClauseScheduleKind::Auto:
850       if (orderedVal == 0)
851         schedType = llvm::omp::OMPScheduleType::OrderedAuto;
852       else
853         schedType = llvm::omp::OMPScheduleType::Auto;
854       break;
855     case omp::ClauseScheduleKind::Runtime:
856       if (orderedVal == 0) {
857         schedType = llvm::omp::OMPScheduleType::OrderedRuntime;
858       } else {
859         if (isSimd)
860           schedType = llvm::omp::OMPScheduleType::RuntimeSimd;
861         else
862           schedType = llvm::omp::OMPScheduleType::Runtime;
863       }
864       break;
865     }
866 
867     if (Optional<omp::ScheduleModifier> modifier = loop.schedule_modifier()) {
868       switch (*modifier) {
869       case omp::ScheduleModifier::monotonic:
870         schedType |= llvm::omp::OMPScheduleType::ModifierMonotonic;
871         break;
872       case omp::ScheduleModifier::nonmonotonic:
873         schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic;
874         break;
875       default:
876         // Nothing to do here.
877         break;
878       }
879     } else {
880       // OpenMP 5.1, 2.11.4 Worksharing-Loop Construct, Description.
881       // If the static schedule kind is specified or if the ordered clause is
882       // specified, and if the nonmonotonic modifier is not specified, the
883       // effect is as if the monotonic modifier is specified. Otherwise, unless
884       // the monotonic modifier is specified, the effect is as if the
885       // nonmonotonic modifier is specified.
886       // The monotonic is used by default in openmp runtime library, so no need
887       // to set it.
888       if (!(schedType == llvm::omp::OMPScheduleType::OrderedStatic ||
889             schedType == llvm::omp::OMPScheduleType::OrderedStaticChunked))
890         schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic;
891     }
892 
893     ompBuilder->applyDynamicWorkshareLoop(ompLoc.DL, loopInfo, allocaIP,
894                                           schedType, !loop.nowait(), chunk,
895                                           /*ordered*/ orderedVal == 0);
896   }
897 
898   // Continue building IR after the loop. Note that the LoopInfo returned by
899   // `collapseLoops` points inside the outermost loop and is intended for
900   // potential further loop transformations. Use the insertion point stored
901   // before collapsing loops instead.
902   builder.restoreIP(afterIP);
903 
904   // Process the reductions if required.
905   if (numReductions == 0)
906     return success();
907 
908   // Create the reduction generators. We need to own them here because
909   // ReductionInfo only accepts references to the generators.
910   SmallVector<OwningReductionGen> owningReductionGens;
911   SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
912   for (unsigned i = 0; i < numReductions; ++i) {
913     owningReductionGens.push_back(
914         makeReductionGen(reductionDecls[i], builder, moduleTranslation));
915     owningAtomicReductionGens.push_back(
916         makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
917   }
918 
919   // Collect the reduction information.
920   SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
921   reductionInfos.reserve(numReductions);
922   for (unsigned i = 0; i < numReductions; ++i) {
923     llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
924     if (owningAtomicReductionGens[i])
925       atomicGen = owningAtomicReductionGens[i];
926     auto reductionType =
927         loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>();
928     llvm::Value *variable =
929         moduleTranslation.lookupValue(loop.reduction_vars()[i]);
930     reductionInfos.push_back(
931         {moduleTranslation.convertType(reductionType.getElementType()),
932          variable, privateReductionVariables[i], owningReductionGens[i],
933          atomicGen});
934   }
935 
936   // The call to createReductions below expects the block to have a
937   // terminator. Create an unreachable instruction to serve as terminator
938   // and remove it later.
939   llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
940   builder.SetInsertPoint(tempTerminator);
941   llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
942       ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
943                                    loop.nowait());
944   if (!contInsertPoint.getBlock())
945     return loop->emitOpError() << "failed to convert reductions";
946   auto nextInsertionPoint =
947       ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
948   tempTerminator->eraseFromParent();
949   builder.restoreIP(nextInsertionPoint);
950 
951   return success();
952 }
953 
954 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
955 static LogicalResult
956 convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
957                    LLVM::ModuleTranslation &moduleTranslation) {
958   auto loop = cast<omp::SimdLoopOp>(opInst);
959 
960   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
961 
962   // Generator of the canonical loop body.
963   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
964   // relying on captured variables.
965   SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
966   SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
967   LogicalResult bodyGenStatus = success();
968   auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
969     // Make sure further conversions know about the induction variable.
970     moduleTranslation.mapValue(
971         loop.getRegion().front().getArgument(loopInfos.size()), iv);
972 
973     // Capture the body insertion point for use in nested loops. BodyIP of the
974     // CanonicalLoopInfo always points to the beginning of the entry block of
975     // the body.
976     bodyInsertPoints.push_back(ip);
977 
978     if (loopInfos.size() != loop.getNumLoops() - 1)
979       return;
980 
981     // Convert the body of the loop.
982     llvm::BasicBlock *entryBlock = ip.getBlock();
983     llvm::BasicBlock *exitBlock =
984         entryBlock->splitBasicBlock(ip.getPoint(), "omp.simdloop.exit");
985     convertOmpOpRegions(loop.region(), "omp.simdloop.region", *entryBlock,
986                         *exitBlock, builder, moduleTranslation, bodyGenStatus);
987   };
988 
989   // Delegate actual loop construction to the OpenMP IRBuilder.
990   // TODO: this currently assumes SimdLoop is semantically similar to SCF loop,
991   // i.e. it has a positive step, uses signed integer semantics. Reconsider
992   // this code when SimdLoop clearly supports more cases.
993   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
994   for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
995     llvm::Value *lowerBound =
996         moduleTranslation.lookupValue(loop.lowerBound()[i]);
997     llvm::Value *upperBound =
998         moduleTranslation.lookupValue(loop.upperBound()[i]);
999     llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]);
1000 
1001     // Make sure loop trip count are emitted in the preheader of the outermost
1002     // loop at the latest so that they are all available for the new collapsed
1003     // loop will be created below.
1004     llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
1005     llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
1006     if (i != 0) {
1007       loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
1008                                                        ompLoc.DL);
1009       computeIP = loopInfos.front()->getPreheaderIP();
1010     }
1011     loopInfos.push_back(ompBuilder->createCanonicalLoop(
1012         loc, bodyGen, lowerBound, upperBound, step,
1013         /*IsSigned=*/true, /*Inclusive=*/true, computeIP));
1014 
1015     if (failed(bodyGenStatus))
1016       return failure();
1017   }
1018 
1019   // Collapse loops.
1020   llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
1021   llvm::CanonicalLoopInfo *loopInfo =
1022       ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
1023 
1024   ompBuilder->applySimd(ompLoc.DL, loopInfo);
1025 
1026   builder.restoreIP(afterIP);
1027   return success();
1028 }
1029 
1030 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
1031 llvm::AtomicOrdering
1032 convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) {
1033   if (!ao)
1034     return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
1035 
1036   switch (*ao) {
1037   case omp::ClauseMemoryOrderKind::Seq_cst:
1038     return llvm::AtomicOrdering::SequentiallyConsistent;
1039   case omp::ClauseMemoryOrderKind::Acq_rel:
1040     return llvm::AtomicOrdering::AcquireRelease;
1041   case omp::ClauseMemoryOrderKind::Acquire:
1042     return llvm::AtomicOrdering::Acquire;
1043   case omp::ClauseMemoryOrderKind::Release:
1044     return llvm::AtomicOrdering::Release;
1045   case omp::ClauseMemoryOrderKind::Relaxed:
1046     return llvm::AtomicOrdering::Monotonic;
1047   }
1048   llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
1049 }
1050 
1051 /// Convert omp.atomic.read operation to LLVM IR.
1052 static LogicalResult
1053 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
1054                      LLVM::ModuleTranslation &moduleTranslation) {
1055 
1056   auto readOp = cast<omp::AtomicReadOp>(opInst);
1057   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1058 
1059   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1060 
1061   llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.memory_order_val());
1062   llvm::Value *x = moduleTranslation.lookupValue(readOp.x());
1063   Type xTy = readOp.x().getType().cast<omp::PointerLikeType>().getElementType();
1064   llvm::Value *v = moduleTranslation.lookupValue(readOp.v());
1065   Type vTy = readOp.v().getType().cast<omp::PointerLikeType>().getElementType();
1066   llvm::OpenMPIRBuilder::AtomicOpValue V = {
1067       v, moduleTranslation.convertType(vTy), false, false};
1068   llvm::OpenMPIRBuilder::AtomicOpValue X = {
1069       x, moduleTranslation.convertType(xTy), false, false};
1070   builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
1071   return success();
1072 }
1073 
1074 /// Converts an omp.atomic.write operation to LLVM IR.
1075 static LogicalResult
1076 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
1077                       LLVM::ModuleTranslation &moduleTranslation) {
1078   auto writeOp = cast<omp::AtomicWriteOp>(opInst);
1079   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1080 
1081   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1082   llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.memory_order_val());
1083   llvm::Value *expr = moduleTranslation.lookupValue(writeOp.value());
1084   llvm::Value *dest = moduleTranslation.lookupValue(writeOp.address());
1085   llvm::Type *ty = moduleTranslation.convertType(writeOp.value().getType());
1086   llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
1087                                             /*isVolatile=*/false};
1088   builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
1089   return success();
1090 }
1091 
1092 /// Converts an LLVM dialect binary operation to the corresponding enum value
1093 /// for `atomicrmw` supported binary operation.
1094 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
1095   return llvm::TypeSwitch<Operation *, llvm::AtomicRMWInst::BinOp>(&op)
1096       .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
1097       .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
1098       .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
1099       .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
1100       .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
1101       .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
1102       .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
1103       .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
1104       .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
1105       .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
1106 }
1107 
1108 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
1109 static LogicalResult
1110 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
1111                        llvm::IRBuilderBase &builder,
1112                        LLVM::ModuleTranslation &moduleTranslation) {
1113   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1114 
1115   // Convert values and types.
1116   auto &innerOpList = opInst.region().front().getOperations();
1117   if (innerOpList.size() != 2)
1118     return opInst.emitError("exactly two operations are allowed inside an "
1119                             "atomic update region while lowering to LLVM IR");
1120 
1121   Operation &innerUpdateOp = innerOpList.front();
1122 
1123   if (innerUpdateOp.getNumOperands() != 2 ||
1124       !llvm::is_contained(innerUpdateOp.getOperands(),
1125                           opInst.getRegion().getArgument(0)))
1126     return opInst.emitError(
1127         "the update operation inside the region must be a binary operation and "
1128         "that update operation must have the region argument as an operand");
1129 
1130   llvm::AtomicRMWInst::BinOp binop = convertBinOpToAtomic(innerUpdateOp);
1131 
1132   bool isXBinopExpr =
1133       innerUpdateOp.getNumOperands() > 0 &&
1134       innerUpdateOp.getOperand(0) == opInst.getRegion().getArgument(0);
1135 
1136   mlir::Value mlirExpr = (isXBinopExpr ? innerUpdateOp.getOperand(1)
1137                                        : innerUpdateOp.getOperand(0));
1138   llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1139   llvm::Value *llvmX = moduleTranslation.lookupValue(opInst.x());
1140   LLVM::LLVMPointerType mlirXType =
1141       opInst.x().getType().cast<LLVM::LLVMPointerType>();
1142   llvm::Type *llvmXElementType =
1143       moduleTranslation.convertType(mlirXType.getElementType());
1144   llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1145                                                       /*isSigned=*/false,
1146                                                       /*isVolatile=*/false};
1147 
1148   llvm::AtomicOrdering atomicOrdering =
1149       convertAtomicOrdering(opInst.memory_order_val());
1150 
1151   // Generate update code.
1152   LogicalResult updateGenStatus = success();
1153   auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus](
1154                       llvm::Value *atomicx,
1155                       llvm::IRBuilder<> &builder) -> llvm::Value * {
1156     Block &bb = *opInst.region().begin();
1157     moduleTranslation.mapValue(*opInst.region().args_begin(), atomicx);
1158     moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1159     if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1160       updateGenStatus = (opInst.emitError()
1161                          << "unable to convert update operation to llvm IR");
1162       return nullptr;
1163     }
1164     omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1165     assert(yieldop && yieldop.results().size() == 1 &&
1166            "terminator must be omp.yield op and it must have exactly one "
1167            "argument");
1168     return moduleTranslation.lookupValue(yieldop.results()[0]);
1169   };
1170 
1171   // Handle ambiguous alloca, if any.
1172   auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1173   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1174   builder.restoreIP(ompBuilder->createAtomicUpdate(
1175       ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop, updateFn,
1176       isXBinopExpr));
1177   return updateGenStatus;
1178 }
1179 
1180 static LogicalResult
1181 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
1182                         llvm::IRBuilderBase &builder,
1183                         LLVM::ModuleTranslation &moduleTranslation) {
1184   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1185   mlir::Value mlirExpr;
1186   bool isXBinopExpr = false, isPostfixUpdate = false;
1187   llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
1188 
1189   omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
1190   omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
1191 
1192   assert((atomicUpdateOp || atomicWriteOp) &&
1193          "internal op must be an atomic.update or atomic.write op");
1194 
1195   if (atomicWriteOp) {
1196     isPostfixUpdate = true;
1197     mlirExpr = atomicWriteOp.value();
1198   } else {
1199     isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
1200                       atomicCaptureOp.getAtomicUpdateOp().getOperation();
1201     auto &innerOpList = atomicUpdateOp.region().front().getOperations();
1202     if (innerOpList.size() != 2)
1203       return atomicUpdateOp.emitError(
1204           "exactly two operations are allowed inside an "
1205           "atomic update region while lowering to LLVM IR");
1206     Operation *innerUpdateOp = atomicUpdateOp.getFirstOp();
1207     if (innerUpdateOp->getNumOperands() != 2 ||
1208         !llvm::is_contained(innerUpdateOp->getOperands(),
1209                             atomicUpdateOp.getRegion().getArgument(0)))
1210       return atomicUpdateOp.emitError(
1211           "the update operation inside the region must be a binary operation "
1212           "and that update operation must have the region argument as an "
1213           "operand");
1214     binop = convertBinOpToAtomic(*innerUpdateOp);
1215 
1216     isXBinopExpr = innerUpdateOp->getOperand(0) ==
1217                    atomicUpdateOp.getRegion().getArgument(0);
1218 
1219     mlirExpr = (isXBinopExpr ? innerUpdateOp->getOperand(1)
1220                              : innerUpdateOp->getOperand(0));
1221   }
1222 
1223   llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1224   llvm::Value *llvmX =
1225       moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().x());
1226   llvm::Value *llvmV =
1227       moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().v());
1228   auto mlirXType = atomicCaptureOp.getAtomicReadOp()
1229                        .x()
1230                        .getType()
1231                        .cast<LLVM::LLVMPointerType>();
1232   llvm::Type *llvmXElementType =
1233       moduleTranslation.convertType(mlirXType.getElementType());
1234   llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1235                                                       /*isSigned=*/false,
1236                                                       /*isVolatile=*/false};
1237   llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
1238                                                       /*isSigned=*/false,
1239                                                       /*isVolatile=*/false};
1240 
1241   llvm::AtomicOrdering atomicOrdering =
1242       convertAtomicOrdering(atomicCaptureOp.memory_order_val());
1243 
1244   LogicalResult updateGenStatus = success();
1245   auto updateFn = [&](llvm::Value *atomicx,
1246                       llvm::IRBuilder<> &builder) -> llvm::Value * {
1247     if (atomicWriteOp)
1248       return moduleTranslation.lookupValue(atomicWriteOp.value());
1249     Block &bb = *atomicUpdateOp.region().begin();
1250     moduleTranslation.mapValue(*atomicUpdateOp.region().args_begin(), atomicx);
1251     moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1252     if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1253       updateGenStatus = (atomicUpdateOp.emitError()
1254                          << "unable to convert update operation to llvm IR");
1255       return nullptr;
1256     }
1257     omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1258     assert(yieldop && yieldop.results().size() == 1 &&
1259            "terminator must be omp.yield op and it must have exactly one "
1260            "argument");
1261     return moduleTranslation.lookupValue(yieldop.results()[0]);
1262   };
1263 
1264   // Handle ambiguous alloca, if any.
1265   auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1266   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1267   builder.restoreIP(ompBuilder->createAtomicCapture(
1268       ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
1269       binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr));
1270   return updateGenStatus;
1271 }
1272 
1273 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
1274 /// mapping between reduction variables and their private equivalents to have
1275 /// been stored on the ModuleTranslation stack. Currently only supports
1276 /// reduction within WsLoopOp, but can be easily extended.
1277 static LogicalResult
1278 convertOmpReductionOp(omp::ReductionOp reductionOp,
1279                       llvm::IRBuilderBase &builder,
1280                       LLVM::ModuleTranslation &moduleTranslation) {
1281   // Find the declaration that corresponds to the reduction op.
1282   auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>();
1283   omp::ReductionDeclareOp declaration =
1284       findReductionDecl(reductionContainer, reductionOp);
1285   assert(declaration && "could not find reduction declaration");
1286 
1287   // Retrieve the mapping between reduction variables and their private
1288   // equivalents.
1289   const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr;
1290   moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>(
1291       [&](const OpenMPVarMappingStackFrame &frame) {
1292         reductionVariableMap = &frame.mapping;
1293         return WalkResult::interrupt();
1294       });
1295   assert(reductionVariableMap && "couldn't find private reduction variables");
1296 
1297   // Translate the reduction operation by emitting the body of the corresponding
1298   // reduction declaration.
1299   Region &reductionRegion = declaration.reductionRegion();
1300   llvm::Value *privateReductionVar =
1301       reductionVariableMap->lookup(reductionOp.accumulator());
1302   llvm::Value *reductionVal = builder.CreateLoad(
1303       moduleTranslation.convertType(reductionOp.operand().getType()),
1304       privateReductionVar);
1305 
1306   moduleTranslation.mapValue(reductionRegion.front().getArgument(0),
1307                              reductionVal);
1308   moduleTranslation.mapValue(
1309       reductionRegion.front().getArgument(1),
1310       moduleTranslation.lookupValue(reductionOp.operand()));
1311 
1312   SmallVector<llvm::Value *> phis;
1313   if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body",
1314                                      builder, moduleTranslation, &phis)))
1315     return failure();
1316   assert(phis.size() == 1 && "expected one value to be yielded from "
1317                              "the reduction body declaration region");
1318   builder.CreateStore(phis[0], privateReductionVar);
1319   return success();
1320 }
1321 
1322 namespace {
1323 
1324 /// Implementation of the dialect interface that converts operations belonging
1325 /// to the OpenMP dialect to LLVM IR.
1326 class OpenMPDialectLLVMIRTranslationInterface
1327     : public LLVMTranslationDialectInterface {
1328 public:
1329   using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
1330 
1331   /// Translates the given operation to LLVM IR using the provided IR builder
1332   /// and saving the state in `moduleTranslation`.
1333   LogicalResult
1334   convertOperation(Operation *op, llvm::IRBuilderBase &builder,
1335                    LLVM::ModuleTranslation &moduleTranslation) const final;
1336 };
1337 
1338 } // namespace
1339 
1340 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
1341 /// (including OpenMP runtime calls).
1342 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
1343     Operation *op, llvm::IRBuilderBase &builder,
1344     LLVM::ModuleTranslation &moduleTranslation) const {
1345 
1346   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1347 
1348   return llvm::TypeSwitch<Operation *, LogicalResult>(op)
1349       .Case([&](omp::BarrierOp) {
1350         ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
1351         return success();
1352       })
1353       .Case([&](omp::TaskwaitOp) {
1354         ompBuilder->createTaskwait(builder.saveIP());
1355         return success();
1356       })
1357       .Case([&](omp::TaskyieldOp) {
1358         ompBuilder->createTaskyield(builder.saveIP());
1359         return success();
1360       })
1361       .Case([&](omp::FlushOp) {
1362         // No support in Openmp runtime function (__kmpc_flush) to accept
1363         // the argument list.
1364         // OpenMP standard states the following:
1365         //  "An implementation may implement a flush with a list by ignoring
1366         //   the list, and treating it the same as a flush without a list."
1367         //
1368         // The argument list is discarded so that, flush with a list is treated
1369         // same as a flush without a list.
1370         ompBuilder->createFlush(builder.saveIP());
1371         return success();
1372       })
1373       .Case([&](omp::ParallelOp op) {
1374         return convertOmpParallel(op, builder, moduleTranslation);
1375       })
1376       .Case([&](omp::ReductionOp reductionOp) {
1377         return convertOmpReductionOp(reductionOp, builder, moduleTranslation);
1378       })
1379       .Case([&](omp::MasterOp) {
1380         return convertOmpMaster(*op, builder, moduleTranslation);
1381       })
1382       .Case([&](omp::CriticalOp) {
1383         return convertOmpCritical(*op, builder, moduleTranslation);
1384       })
1385       .Case([&](omp::OrderedRegionOp) {
1386         return convertOmpOrderedRegion(*op, builder, moduleTranslation);
1387       })
1388       .Case([&](omp::OrderedOp) {
1389         return convertOmpOrdered(*op, builder, moduleTranslation);
1390       })
1391       .Case([&](omp::WsLoopOp) {
1392         return convertOmpWsLoop(*op, builder, moduleTranslation);
1393       })
1394       .Case([&](omp::SimdLoopOp) {
1395         return convertOmpSimdLoop(*op, builder, moduleTranslation);
1396       })
1397       .Case([&](omp::AtomicReadOp) {
1398         return convertOmpAtomicRead(*op, builder, moduleTranslation);
1399       })
1400       .Case([&](omp::AtomicWriteOp) {
1401         return convertOmpAtomicWrite(*op, builder, moduleTranslation);
1402       })
1403       .Case([&](omp::AtomicUpdateOp op) {
1404         return convertOmpAtomicUpdate(op, builder, moduleTranslation);
1405       })
1406       .Case([&](omp::AtomicCaptureOp op) {
1407         return convertOmpAtomicCapture(op, builder, moduleTranslation);
1408       })
1409       .Case([&](omp::SectionsOp) {
1410         return convertOmpSections(*op, builder, moduleTranslation);
1411       })
1412       .Case([&](omp::SingleOp op) {
1413         return convertOmpSingle(op, builder, moduleTranslation);
1414       })
1415       .Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp,
1416             omp::CriticalDeclareOp>([](auto op) {
1417         // `yield` and `terminator` can be just omitted. The block structure
1418         // was created in the region that handles their parent operation.
1419         // `reduction.declare` will be used by reductions and is not
1420         // converted directly, skip it.
1421         // `critical.declare` is only used to declare names of critical
1422         // sections which will be used by `critical` ops and hence can be
1423         // ignored for lowering. The OpenMP IRBuilder will create unique
1424         // name for critical section names.
1425         return success();
1426       })
1427       .Default([&](Operation *inst) {
1428         return inst->emitError("unsupported OpenMP operation: ")
1429                << inst->getName();
1430       });
1431 }
1432 
1433 void mlir::registerOpenMPDialectTranslation(DialectRegistry &registry) {
1434   registry.insert<omp::OpenMPDialect>();
1435   registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
1436     dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
1437   });
1438 }
1439 
1440 void mlir::registerOpenMPDialectTranslation(MLIRContext &context) {
1441   DialectRegistry registry;
1442   registerOpenMPDialectTranslation(registry);
1443   context.appendDialectRegistry(registry);
1444 }
1445