1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
14 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
15 #include "mlir/IR/BlockAndValueMapping.h"
16 #include "mlir/IR/Operation.h"
17 #include "mlir/Support/LLVM.h"
18 #include "mlir/Target/LLVMIR/ModuleTranslation.h"
19 
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/TypeSwitch.h"
22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
23 #include "llvm/IR/DebugInfoMetadata.h"
24 #include "llvm/IR/IRBuilder.h"
25 
26 using namespace mlir;
27 
28 namespace {
29 static llvm::omp::ScheduleKind
30 convertToScheduleKind(Optional<omp::ClauseScheduleKind> schedKind) {
31   if (!schedKind.has_value())
32     return llvm::omp::OMP_SCHEDULE_Default;
33   switch (schedKind.value()) {
34   case omp::ClauseScheduleKind::Static:
35     return llvm::omp::OMP_SCHEDULE_Static;
36   case omp::ClauseScheduleKind::Dynamic:
37     return llvm::omp::OMP_SCHEDULE_Dynamic;
38   case omp::ClauseScheduleKind::Guided:
39     return llvm::omp::OMP_SCHEDULE_Guided;
40   case omp::ClauseScheduleKind::Auto:
41     return llvm::omp::OMP_SCHEDULE_Auto;
42   case omp::ClauseScheduleKind::Runtime:
43     return llvm::omp::OMP_SCHEDULE_Runtime;
44   }
45   llvm_unreachable("unhandled schedule clause argument");
46 }
47 
48 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
49 /// insertion points for allocas.
50 class OpenMPAllocaStackFrame
51     : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
52 public:
53   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
54 
55   explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
56       : allocaInsertPoint(allocaIP) {}
57   llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
58 };
59 
60 /// ModuleTranslation stack frame containing the partial mapping between MLIR
61 /// values and their LLVM IR equivalents.
62 class OpenMPVarMappingStackFrame
63     : public LLVM::ModuleTranslation::StackFrameBase<
64           OpenMPVarMappingStackFrame> {
65 public:
66   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPVarMappingStackFrame)
67 
68   explicit OpenMPVarMappingStackFrame(
69       const DenseMap<Value, llvm::Value *> &mapping)
70       : mapping(mapping) {}
71 
72   DenseMap<Value, llvm::Value *> mapping;
73 };
74 } // namespace
75 
76 /// Find the insertion point for allocas given the current insertion point for
77 /// normal operations in the builder.
78 static llvm::OpenMPIRBuilder::InsertPointTy
79 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
80                       const LLVM::ModuleTranslation &moduleTranslation) {
81   // If there is an alloca insertion point on stack, i.e. we are in a nested
82   // operation and a specific point was provided by some surrounding operation,
83   // use it.
84   llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
85   WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
86       [&](const OpenMPAllocaStackFrame &frame) {
87         allocaInsertPoint = frame.allocaInsertPoint;
88         return WalkResult::interrupt();
89       });
90   if (walkResult.wasInterrupted())
91     return allocaInsertPoint;
92 
93   // Otherwise, insert to the entry block of the surrounding function.
94   // If the current IRBuilder InsertPoint is the function's entry, it cannot
95   // also be used for alloca insertion which would result in insertion order
96   // confusion. Create a new BasicBlock for the Builder and use the entry block
97   // for the allocs.
98   // TODO: Create a dedicated alloca BasicBlock at function creation such that
99   // we do not need to move the current InertPoint here.
100   if (builder.GetInsertBlock() ==
101       &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
102     assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
103            "Assuming end of basic block");
104     llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
105         builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
106         builder.GetInsertBlock()->getNextNode());
107     builder.CreateBr(entryBB);
108     builder.SetInsertPoint(entryBB);
109   }
110 
111   llvm::BasicBlock &funcEntryBlock =
112       builder.GetInsertBlock()->getParent()->getEntryBlock();
113   return llvm::OpenMPIRBuilder::InsertPointTy(
114       &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
115 }
116 
117 /// Converts the given region that appears within an OpenMP dialect operation to
118 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
119 /// region, and a branch from any block with an successor-less OpenMP terminator
120 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
121 /// of the continuation block if provided.
122 static llvm::BasicBlock *convertOmpOpRegions(
123     Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
124     LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus,
125     SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
126   llvm::BasicBlock *continuationBlock =
127       splitBB(builder, true, "omp.region.cont");
128   llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
129 
130   llvm::LLVMContext &llvmContext = builder.getContext();
131   for (Block &bb : region) {
132     llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
133         llvmContext, blockName, builder.GetInsertBlock()->getParent(),
134         builder.GetInsertBlock()->getNextNode());
135     moduleTranslation.mapBlock(&bb, llvmBB);
136   }
137 
138   llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
139 
140   // Terminators (namely YieldOp) may be forwarding values to the region that
141   // need to be available in the continuation block. Collect the types of these
142   // operands in preparation of creating PHI nodes.
143   SmallVector<llvm::Type *> continuationBlockPHITypes;
144   bool operandsProcessed = false;
145   unsigned numYields = 0;
146   for (Block &bb : region.getBlocks()) {
147     if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
148       if (!operandsProcessed) {
149         for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
150           continuationBlockPHITypes.push_back(
151               moduleTranslation.convertType(yield->getOperand(i).getType()));
152         }
153         operandsProcessed = true;
154       } else {
155         assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
156                "mismatching number of values yielded from the region");
157         for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
158           llvm::Type *operandType =
159               moduleTranslation.convertType(yield->getOperand(i).getType());
160           (void)operandType;
161           assert(continuationBlockPHITypes[i] == operandType &&
162                  "values of mismatching types yielded from the region");
163         }
164       }
165       numYields++;
166     }
167   }
168 
169   // Insert PHI nodes in the continuation block for any values forwarded by the
170   // terminators in this region.
171   if (!continuationBlockPHITypes.empty())
172     assert(
173         continuationBlockPHIs &&
174         "expected continuation block PHIs if converted regions yield values");
175   if (continuationBlockPHIs) {
176     llvm::IRBuilderBase::InsertPointGuard guard(builder);
177     continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
178     builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
179     for (llvm::Type *ty : continuationBlockPHITypes)
180       continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
181   }
182 
183   // Convert blocks one by one in topological order to ensure
184   // defs are converted before uses.
185   SetVector<Block *> blocks =
186       LLVM::detail::getTopologicallySortedBlocks(region);
187   for (Block *bb : blocks) {
188     llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
189     // Retarget the branch of the entry block to the entry block of the
190     // converted region (regions are single-entry).
191     if (bb->isEntryBlock()) {
192       assert(sourceTerminator->getNumSuccessors() == 1 &&
193              "provided entry block has multiple successors");
194       assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
195              "ContinuationBlock is not the successor of the entry block");
196       sourceTerminator->setSuccessor(0, llvmBB);
197     }
198 
199     llvm::IRBuilderBase::InsertPointGuard guard(builder);
200     if (failed(
201             moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) {
202       bodyGenStatus = failure();
203       return continuationBlock;
204     }
205 
206     // Special handling for `omp.yield` and `omp.terminator` (we may have more
207     // than one): they return the control to the parent OpenMP dialect operation
208     // so replace them with the branch to the continuation block. We handle this
209     // here to avoid relying inter-function communication through the
210     // ModuleTranslation class to set up the correct insertion point. This is
211     // also consistent with MLIR's idiom of handling special region terminators
212     // in the same code that handles the region-owning operation.
213     Operation *terminator = bb->getTerminator();
214     if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
215       builder.CreateBr(continuationBlock);
216 
217       for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
218         (*continuationBlockPHIs)[i]->addIncoming(
219             moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
220     }
221   }
222   // After all blocks have been traversed and values mapped, connect the PHI
223   // nodes to the results of preceding blocks.
224   LLVM::detail::connectPHINodes(region, moduleTranslation);
225 
226   // Remove the blocks and values defined in this region from the mapping since
227   // they are not visible outside of this region. This allows the same region to
228   // be converted several times, that is cloned, without clashes, and slightly
229   // speeds up the lookups.
230   moduleTranslation.forgetMapping(region);
231 
232   return continuationBlock;
233 }
234 
235 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
236 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
237   switch (kind) {
238   case omp::ClauseProcBindKind::Close:
239     return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
240   case omp::ClauseProcBindKind::Master:
241     return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
242   case omp::ClauseProcBindKind::Primary:
243     return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
244   case omp::ClauseProcBindKind::Spread:
245     return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
246   }
247   llvm_unreachable("Unknown ClauseProcBindKind kind");
248 }
249 
250 /// Converts the OpenMP parallel operation to LLVM IR.
251 static LogicalResult
252 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
253                    LLVM::ModuleTranslation &moduleTranslation) {
254   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
255   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
256   // relying on captured variables.
257   LogicalResult bodyGenStatus = success();
258 
259   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
260     // Save the alloca insertion point on ModuleTranslation stack for use in
261     // nested regions.
262     LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
263         moduleTranslation, allocaIP);
264 
265     // ParallelOp has only one region associated with it.
266     builder.restoreIP(codeGenIP);
267     convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder,
268                         moduleTranslation, bodyGenStatus);
269   };
270 
271   // TODO: Perform appropriate actions according to the data-sharing
272   // attribute (shared, private, firstprivate, ...) of variables.
273   // Currently defaults to shared.
274   auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
275                     llvm::Value &, llvm::Value &vPtr,
276                     llvm::Value *&replacementValue) -> InsertPointTy {
277     replacementValue = &vPtr;
278 
279     return codeGenIP;
280   };
281 
282   // TODO: Perform finalization actions for variables. This has to be
283   // called for variables which have destructors/finalizers.
284   auto finiCB = [&](InsertPointTy codeGenIP) {};
285 
286   llvm::Value *ifCond = nullptr;
287   if (auto ifExprVar = opInst.if_expr_var())
288     ifCond = moduleTranslation.lookupValue(ifExprVar);
289   llvm::Value *numThreads = nullptr;
290   if (auto numThreadsVar = opInst.num_threads_var())
291     numThreads = moduleTranslation.lookupValue(numThreadsVar);
292   auto pbKind = llvm::omp::OMP_PROC_BIND_default;
293   if (auto bind = opInst.proc_bind_val())
294     pbKind = getProcBindKind(*bind);
295   // TODO: Is the Parallel construct cancellable?
296   bool isCancellable = false;
297 
298   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
299       findAllocaInsertPoint(builder, moduleTranslation);
300   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
301   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel(
302       ompLoc, allocaIP, bodyGenCB, privCB, finiCB, ifCond, numThreads, pbKind,
303       isCancellable));
304 
305   return bodyGenStatus;
306 }
307 
308 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
309 static LogicalResult
310 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
311                  LLVM::ModuleTranslation &moduleTranslation) {
312   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
313   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
314   // relying on captured variables.
315   LogicalResult bodyGenStatus = success();
316 
317   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
318     // MasterOp has only one region associated with it.
319     auto &region = cast<omp::MasterOp>(opInst).getRegion();
320     builder.restoreIP(codeGenIP);
321     convertOmpOpRegions(region, "omp.master.region", builder, moduleTranslation,
322                         bodyGenStatus);
323   };
324 
325   // TODO: Perform finalization actions for variables. This has to be
326   // called for variables which have destructors/finalizers.
327   auto finiCB = [&](InsertPointTy codeGenIP) {};
328 
329   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
330   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster(
331       ompLoc, bodyGenCB, finiCB));
332   return success();
333 }
334 
335 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
336 static LogicalResult
337 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
338                    LLVM::ModuleTranslation &moduleTranslation) {
339   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
340   auto criticalOp = cast<omp::CriticalOp>(opInst);
341   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
342   // relying on captured variables.
343   LogicalResult bodyGenStatus = success();
344 
345   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
346     // CriticalOp has only one region associated with it.
347     auto &region = cast<omp::CriticalOp>(opInst).getRegion();
348     builder.restoreIP(codeGenIP);
349     convertOmpOpRegions(region, "omp.critical.region", builder,
350                         moduleTranslation, bodyGenStatus);
351   };
352 
353   // TODO: Perform finalization actions for variables. This has to be
354   // called for variables which have destructors/finalizers.
355   auto finiCB = [&](InsertPointTy codeGenIP) {};
356 
357   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
358   llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
359   llvm::Constant *hint = nullptr;
360 
361   // If it has a name, it probably has a hint too.
362   if (criticalOp.nameAttr()) {
363     // The verifiers in OpenMP Dialect guarentee that all the pointers are
364     // non-null
365     auto symbolRef = criticalOp.nameAttr().cast<SymbolRefAttr>();
366     auto criticalDeclareOp =
367         SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
368                                                                      symbolRef);
369     hint =
370         llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
371                                static_cast<int>(criticalDeclareOp.hint_val()));
372   }
373   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical(
374       ompLoc, bodyGenCB, finiCB, criticalOp.name().value_or(""), hint));
375   return success();
376 }
377 
378 /// Returns a reduction declaration that corresponds to the given reduction
379 /// operation in the given container. Currently only supports reductions inside
380 /// WsLoopOp but can be easily extended.
381 static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container,
382                                                  omp::ReductionOp reduction) {
383   SymbolRefAttr reductionSymbol;
384   for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) {
385     if (container.reduction_vars()[i] != reduction.accumulator())
386       continue;
387     reductionSymbol = (*container.reductions())[i].cast<SymbolRefAttr>();
388     break;
389   }
390   assert(reductionSymbol &&
391          "reduction operation must be associated with a declaration");
392 
393   return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
394       container, reductionSymbol);
395 }
396 
397 /// Populates `reductions` with reduction declarations used in the given loop.
398 static void
399 collectReductionDecls(omp::WsLoopOp loop,
400                       SmallVectorImpl<omp::ReductionDeclareOp> &reductions) {
401   Optional<ArrayAttr> attr = loop.reductions();
402   if (!attr)
403     return;
404 
405   reductions.reserve(reductions.size() + loop.getNumReductionVars());
406   for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
407     reductions.push_back(
408         SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
409             loop, symbolRef));
410   }
411 }
412 
413 /// Translates the blocks contained in the given region and appends them to at
414 /// the current insertion point of `builder`. The operations of the entry block
415 /// are appended to the current insertion block, which is not expected to have a
416 /// terminator. If set, `continuationBlockArgs` is populated with translated
417 /// values that correspond to the values omp.yield'ed from the region.
418 static LogicalResult inlineConvertOmpRegions(
419     Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
420     LLVM::ModuleTranslation &moduleTranslation,
421     SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
422   if (region.empty())
423     return success();
424 
425   // Special case for single-block regions that don't create additional blocks:
426   // insert operations without creating additional blocks.
427   if (llvm::hasSingleElement(region)) {
428     moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
429     if (failed(moduleTranslation.convertBlock(
430             region.front(), /*ignoreArguments=*/true, builder)))
431       return failure();
432 
433     // The continuation arguments are simply the translated terminator operands.
434     if (continuationBlockArgs)
435       llvm::append_range(
436           *continuationBlockArgs,
437           moduleTranslation.lookupValues(region.front().back().getOperands()));
438 
439     // Drop the mapping that is no longer necessary so that the same region can
440     // be processed multiple times.
441     moduleTranslation.forgetMapping(region);
442     return success();
443   }
444 
445   LogicalResult bodyGenStatus = success();
446   SmallVector<llvm::PHINode *> phis;
447   llvm::BasicBlock *continuationBlock = convertOmpOpRegions(
448       region, blockName, builder, moduleTranslation, bodyGenStatus, &phis);
449   if (failed(bodyGenStatus))
450     return failure();
451   if (continuationBlockArgs)
452     llvm::append_range(*continuationBlockArgs, phis);
453   builder.SetInsertPoint(continuationBlock,
454                          continuationBlock->getFirstInsertionPt());
455   return success();
456 }
457 
458 namespace {
459 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
460 /// store lambdas with capture.
461 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy(
462     llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
463     llvm::Value *&)>;
464 using OwningAtomicReductionGen =
465     std::function<llvm::OpenMPIRBuilder::InsertPointTy(
466         llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
467         llvm::Value *)>;
468 } // namespace
469 
470 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
471 /// reduction declaration. The generator uses `builder` but ignores its
472 /// insertion point.
473 static OwningReductionGen
474 makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder,
475                  LLVM::ModuleTranslation &moduleTranslation) {
476   // The lambda is mutable because we need access to non-const methods of decl
477   // (which aren't actually mutating it), and we must capture decl by-value to
478   // avoid the dangling reference after the parent function returns.
479   OwningReductionGen gen =
480       [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
481                 llvm::Value *lhs, llvm::Value *rhs,
482                 llvm::Value *&result) mutable {
483         Region &reductionRegion = decl.reductionRegion();
484         moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs);
485         moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs);
486         builder.restoreIP(insertPoint);
487         SmallVector<llvm::Value *> phis;
488         if (failed(inlineConvertOmpRegions(reductionRegion,
489                                            "omp.reduction.nonatomic.body",
490                                            builder, moduleTranslation, &phis)))
491           return llvm::OpenMPIRBuilder::InsertPointTy();
492         assert(phis.size() == 1);
493         result = phis[0];
494         return builder.saveIP();
495       };
496   return gen;
497 }
498 
499 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
500 /// given reduction declaration. The generator uses `builder` but ignores its
501 /// insertion point. Returns null if there is no atomic region available in the
502 /// reduction declaration.
503 static OwningAtomicReductionGen
504 makeAtomicReductionGen(omp::ReductionDeclareOp decl,
505                        llvm::IRBuilderBase &builder,
506                        LLVM::ModuleTranslation &moduleTranslation) {
507   if (decl.atomicReductionRegion().empty())
508     return OwningAtomicReductionGen();
509 
510   // The lambda is mutable because we need access to non-const methods of decl
511   // (which aren't actually mutating it), and we must capture decl by-value to
512   // avoid the dangling reference after the parent function returns.
513   OwningAtomicReductionGen atomicGen =
514       [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
515                 llvm::Value *lhs, llvm::Value *rhs) mutable {
516         Region &atomicRegion = decl.atomicReductionRegion();
517         moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs);
518         moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs);
519         builder.restoreIP(insertPoint);
520         SmallVector<llvm::Value *> phis;
521         if (failed(inlineConvertOmpRegions(atomicRegion,
522                                            "omp.reduction.atomic.body", builder,
523                                            moduleTranslation, &phis)))
524           return llvm::OpenMPIRBuilder::InsertPointTy();
525         assert(phis.empty());
526         return builder.saveIP();
527       };
528   return atomicGen;
529 }
530 
531 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
532 static LogicalResult
533 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
534                   LLVM::ModuleTranslation &moduleTranslation) {
535   auto orderedOp = cast<omp::OrderedOp>(opInst);
536 
537   omp::ClauseDepend dependType = *orderedOp.depend_type_val();
538   bool isDependSource = dependType == omp::ClauseDepend::dependsource;
539   unsigned numLoops = *orderedOp.num_loops_val();
540   SmallVector<llvm::Value *> vecValues =
541       moduleTranslation.lookupValues(orderedOp.depend_vec_vars());
542 
543   size_t indexVecValues = 0;
544   while (indexVecValues < vecValues.size()) {
545     SmallVector<llvm::Value *> storeValues;
546     storeValues.reserve(numLoops);
547     for (unsigned i = 0; i < numLoops; i++) {
548       storeValues.push_back(vecValues[indexVecValues]);
549       indexVecValues++;
550     }
551     llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
552         findAllocaInsertPoint(builder, moduleTranslation);
553     llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
554     builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
555         ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
556   }
557   return success();
558 }
559 
560 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
561 /// OpenMPIRBuilder.
562 static LogicalResult
563 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
564                         LLVM::ModuleTranslation &moduleTranslation) {
565   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
566   auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
567 
568   // TODO: The code generation for ordered simd directive is not supported yet.
569   if (orderedRegionOp.simd())
570     return failure();
571 
572   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
573   // relying on captured variables.
574   LogicalResult bodyGenStatus = success();
575 
576   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
577     // OrderedOp has only one region associated with it.
578     auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
579     builder.restoreIP(codeGenIP);
580     convertOmpOpRegions(region, "omp.ordered.region", builder,
581                         moduleTranslation, bodyGenStatus);
582   };
583 
584   // TODO: Perform finalization actions for variables. This has to be
585   // called for variables which have destructors/finalizers.
586   auto finiCB = [&](InsertPointTy codeGenIP) {};
587 
588   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
589   builder.restoreIP(
590       moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
591           ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd()));
592   return bodyGenStatus;
593 }
594 
595 static LogicalResult
596 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
597                    LLVM::ModuleTranslation &moduleTranslation) {
598   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
599   using StorableBodyGenCallbackTy =
600       llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
601 
602   auto sectionsOp = cast<omp::SectionsOp>(opInst);
603 
604   // TODO: Support the following clauses: private, firstprivate, lastprivate,
605   // reduction, allocate
606   if (!sectionsOp.reduction_vars().empty() || sectionsOp.reductions() ||
607       !sectionsOp.allocate_vars().empty() ||
608       !sectionsOp.allocators_vars().empty())
609     return emitError(sectionsOp.getLoc())
610            << "reduction and allocate clauses are not supported for sections "
611               "construct";
612 
613   LogicalResult bodyGenStatus = success();
614   SmallVector<StorableBodyGenCallbackTy> sectionCBs;
615 
616   for (Operation &op : *sectionsOp.region().begin()) {
617     auto sectionOp = dyn_cast<omp::SectionOp>(op);
618     if (!sectionOp) // omp.terminator
619       continue;
620 
621     Region &region = sectionOp.region();
622     auto sectionCB = [&region, &builder, &moduleTranslation, &bodyGenStatus](
623                          InsertPointTy allocaIP, InsertPointTy codeGenIP) {
624       builder.restoreIP(codeGenIP);
625       convertOmpOpRegions(region, "omp.section.region", builder,
626                           moduleTranslation, bodyGenStatus);
627     };
628     sectionCBs.push_back(sectionCB);
629   }
630 
631   // No sections within omp.sections operation - skip generation. This situation
632   // is only possible if there is only a terminator operation inside the
633   // sections operation
634   if (sectionCBs.empty())
635     return success();
636 
637   assert(isa<omp::SectionOp>(*sectionsOp.region().op_begin()));
638 
639   // TODO: Perform appropriate actions according to the data-sharing
640   // attribute (shared, private, firstprivate, ...) of variables.
641   // Currently defaults to shared.
642   auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
643                     llvm::Value &vPtr,
644                     llvm::Value *&replacementValue) -> InsertPointTy {
645     replacementValue = &vPtr;
646     return codeGenIP;
647   };
648 
649   // TODO: Perform finalization actions for variables. This has to be
650   // called for variables which have destructors/finalizers.
651   auto finiCB = [&](InsertPointTy codeGenIP) {};
652 
653   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
654       findAllocaInsertPoint(builder, moduleTranslation);
655   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
656   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections(
657       ompLoc, allocaIP, sectionCBs, privCB, finiCB, false,
658       sectionsOp.nowait()));
659   return bodyGenStatus;
660 }
661 
662 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
663 static LogicalResult
664 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
665                  LLVM::ModuleTranslation &moduleTranslation) {
666   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
667   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
668   LogicalResult bodyGenStatus = success();
669   auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
670     builder.restoreIP(codegenIP);
671     convertOmpOpRegions(singleOp.region(), "omp.single.region", builder,
672                         moduleTranslation, bodyGenStatus);
673   };
674   auto finiCB = [&](InsertPointTy codeGenIP) {};
675   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle(
676       ompLoc, bodyCB, finiCB, singleOp.nowait(), /*DidIt=*/nullptr));
677   return bodyGenStatus;
678 }
679 
680 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
681 static LogicalResult
682 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
683                  LLVM::ModuleTranslation &moduleTranslation) {
684   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
685   LogicalResult bodyGenStatus = success();
686   if (taskOp.if_expr() || taskOp.final_expr() || taskOp.untiedAttr() ||
687       taskOp.mergeableAttr() || taskOp.in_reductions() || taskOp.priority() ||
688       !taskOp.allocate_vars().empty()) {
689     return taskOp.emitError("unhandled clauses for translation to LLVM IR");
690   }
691   auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
692     builder.restoreIP(codegenIP);
693     convertOmpOpRegions(taskOp.region(), "omp.task.region", builder,
694                         moduleTranslation, bodyGenStatus);
695   };
696   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
697       findAllocaInsertPoint(builder, moduleTranslation);
698   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
699   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTask(
700       ompLoc, allocaIP, bodyCB, !taskOp.untied()));
701   return bodyGenStatus;
702 }
703 
704 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
705 static LogicalResult
706 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
707                  LLVM::ModuleTranslation &moduleTranslation) {
708   auto loop = cast<omp::WsLoopOp>(opInst);
709   // TODO: this should be in the op verifier instead.
710   if (loop.lowerBound().empty())
711     return failure();
712 
713   // Static is the default.
714   auto schedule = loop.schedule_val().value_or(omp::ClauseScheduleKind::Static);
715 
716   // Find the loop configuration.
717   llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]);
718   llvm::Type *ivType = step->getType();
719   llvm::Value *chunk = nullptr;
720   if (loop.schedule_chunk_var()) {
721     llvm::Value *chunkVar =
722         moduleTranslation.lookupValue(loop.schedule_chunk_var());
723     chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
724   }
725 
726   SmallVector<omp::ReductionDeclareOp> reductionDecls;
727   collectReductionDecls(loop, reductionDecls);
728   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
729       findAllocaInsertPoint(builder, moduleTranslation);
730 
731   // Allocate space for privatized reduction variables.
732   SmallVector<llvm::Value *> privateReductionVariables;
733   DenseMap<Value, llvm::Value *> reductionVariableMap;
734   unsigned numReductions = loop.getNumReductionVars();
735   privateReductionVariables.reserve(numReductions);
736   if (numReductions != 0) {
737     llvm::IRBuilderBase::InsertPointGuard guard(builder);
738     builder.restoreIP(allocaIP);
739     for (unsigned i = 0; i < numReductions; ++i) {
740       auto reductionType =
741           loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>();
742       llvm::Value *var = builder.CreateAlloca(
743           moduleTranslation.convertType(reductionType.getElementType()));
744       privateReductionVariables.push_back(var);
745       reductionVariableMap.try_emplace(loop.reduction_vars()[i], var);
746     }
747   }
748 
749   // Store the mapping between reduction variables and their private copies on
750   // ModuleTranslation stack. It can be then recovered when translating
751   // omp.reduce operations in a separate call.
752   LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
753       moduleTranslation, reductionVariableMap);
754 
755   // Before the loop, store the initial values of reductions into reduction
756   // variables. Although this could be done after allocas, we don't want to mess
757   // up with the alloca insertion point.
758   for (unsigned i = 0; i < numReductions; ++i) {
759     SmallVector<llvm::Value *> phis;
760     if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(),
761                                        "omp.reduction.neutral", builder,
762                                        moduleTranslation, &phis)))
763       return failure();
764     assert(phis.size() == 1 && "expected one value to be yielded from the "
765                                "reduction neutral element declaration region");
766     builder.CreateStore(phis[0], privateReductionVariables[i]);
767   }
768 
769   // Set up the source location value for OpenMP runtime.
770   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
771 
772   // Generator of the canonical loop body.
773   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
774   // relying on captured variables.
775   SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
776   SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
777   LogicalResult bodyGenStatus = success();
778   auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
779     // Make sure further conversions know about the induction variable.
780     moduleTranslation.mapValue(
781         loop.getRegion().front().getArgument(loopInfos.size()), iv);
782 
783     // Capture the body insertion point for use in nested loops. BodyIP of the
784     // CanonicalLoopInfo always points to the beginning of the entry block of
785     // the body.
786     bodyInsertPoints.push_back(ip);
787 
788     if (loopInfos.size() != loop.getNumLoops() - 1)
789       return;
790 
791     // Convert the body of the loop.
792     builder.restoreIP(ip);
793     convertOmpOpRegions(loop.region(), "omp.wsloop.region", builder,
794                         moduleTranslation, bodyGenStatus);
795   };
796 
797   // Delegate actual loop construction to the OpenMP IRBuilder.
798   // TODO: this currently assumes WsLoop is semantically similar to SCF loop,
799   // i.e. it has a positive step, uses signed integer semantics. Reconsider
800   // this code when WsLoop clearly supports more cases.
801   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
802   for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
803     llvm::Value *lowerBound =
804         moduleTranslation.lookupValue(loop.lowerBound()[i]);
805     llvm::Value *upperBound =
806         moduleTranslation.lookupValue(loop.upperBound()[i]);
807     llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]);
808 
809     // Make sure loop trip count are emitted in the preheader of the outermost
810     // loop at the latest so that they are all available for the new collapsed
811     // loop will be created below.
812     llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
813     llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
814     if (i != 0) {
815       loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back());
816       computeIP = loopInfos.front()->getPreheaderIP();
817     }
818     loopInfos.push_back(ompBuilder->createCanonicalLoop(
819         loc, bodyGen, lowerBound, upperBound, step,
820         /*IsSigned=*/true, loop.inclusive(), computeIP));
821 
822     if (failed(bodyGenStatus))
823       return failure();
824   }
825 
826   // Collapse loops. Store the insertion point because LoopInfos may get
827   // invalidated.
828   llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
829   llvm::CanonicalLoopInfo *loopInfo =
830       ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
831 
832   allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
833 
834   // TODO: Handle doacross loops when the ordered clause has a parameter.
835   bool isOrdered = loop.ordered_val().has_value();
836   Optional<omp::ScheduleModifier> scheduleModifier = loop.schedule_modifier();
837   bool isSimd = loop.simd_modifier();
838 
839   ompBuilder->applyWorkshareLoop(
840       ompLoc.DL, loopInfo, allocaIP, !loop.nowait(),
841       convertToScheduleKind(schedule), chunk, isSimd,
842       scheduleModifier == omp::ScheduleModifier::monotonic,
843       scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered);
844 
845   // Continue building IR after the loop. Note that the LoopInfo returned by
846   // `collapseLoops` points inside the outermost loop and is intended for
847   // potential further loop transformations. Use the insertion point stored
848   // before collapsing loops instead.
849   builder.restoreIP(afterIP);
850 
851   // Process the reductions if required.
852   if (numReductions == 0)
853     return success();
854 
855   // Create the reduction generators. We need to own them here because
856   // ReductionInfo only accepts references to the generators.
857   SmallVector<OwningReductionGen> owningReductionGens;
858   SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
859   for (unsigned i = 0; i < numReductions; ++i) {
860     owningReductionGens.push_back(
861         makeReductionGen(reductionDecls[i], builder, moduleTranslation));
862     owningAtomicReductionGens.push_back(
863         makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
864   }
865 
866   // Collect the reduction information.
867   SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
868   reductionInfos.reserve(numReductions);
869   for (unsigned i = 0; i < numReductions; ++i) {
870     llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
871     if (owningAtomicReductionGens[i])
872       atomicGen = owningAtomicReductionGens[i];
873     auto reductionType =
874         loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>();
875     llvm::Value *variable =
876         moduleTranslation.lookupValue(loop.reduction_vars()[i]);
877     reductionInfos.push_back(
878         {moduleTranslation.convertType(reductionType.getElementType()),
879          variable, privateReductionVariables[i], owningReductionGens[i],
880          atomicGen});
881   }
882 
883   // The call to createReductions below expects the block to have a
884   // terminator. Create an unreachable instruction to serve as terminator
885   // and remove it later.
886   llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
887   builder.SetInsertPoint(tempTerminator);
888   llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
889       ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
890                                    loop.nowait());
891   if (!contInsertPoint.getBlock())
892     return loop->emitOpError() << "failed to convert reductions";
893   auto nextInsertionPoint =
894       ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
895   tempTerminator->eraseFromParent();
896   builder.restoreIP(nextInsertionPoint);
897 
898   return success();
899 }
900 
901 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
902 static LogicalResult
903 convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
904                    LLVM::ModuleTranslation &moduleTranslation) {
905   auto loop = cast<omp::SimdLoopOp>(opInst);
906 
907   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
908 
909   // Generator of the canonical loop body.
910   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
911   // relying on captured variables.
912   SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
913   SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
914   LogicalResult bodyGenStatus = success();
915 
916   // TODO: The code generation for if clause is not supported yet.
917   if (loop.if_expr())
918     return failure();
919 
920   auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
921     // Make sure further conversions know about the induction variable.
922     moduleTranslation.mapValue(
923         loop.getRegion().front().getArgument(loopInfos.size()), iv);
924 
925     // Capture the body insertion point for use in nested loops. BodyIP of the
926     // CanonicalLoopInfo always points to the beginning of the entry block of
927     // the body.
928     bodyInsertPoints.push_back(ip);
929 
930     if (loopInfos.size() != loop.getNumLoops() - 1)
931       return;
932 
933     // Convert the body of the loop.
934     builder.restoreIP(ip);
935     convertOmpOpRegions(loop.region(), "omp.simdloop.region", builder,
936                         moduleTranslation, bodyGenStatus);
937   };
938 
939   // Delegate actual loop construction to the OpenMP IRBuilder.
940   // TODO: this currently assumes SimdLoop is semantically similar to SCF loop,
941   // i.e. it has a positive step, uses signed integer semantics. Reconsider
942   // this code when SimdLoop clearly supports more cases.
943   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
944   for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
945     llvm::Value *lowerBound =
946         moduleTranslation.lookupValue(loop.lowerBound()[i]);
947     llvm::Value *upperBound =
948         moduleTranslation.lookupValue(loop.upperBound()[i]);
949     llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]);
950 
951     // Make sure loop trip count are emitted in the preheader of the outermost
952     // loop at the latest so that they are all available for the new collapsed
953     // loop will be created below.
954     llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
955     llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
956     if (i != 0) {
957       loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
958                                                        ompLoc.DL);
959       computeIP = loopInfos.front()->getPreheaderIP();
960     }
961     loopInfos.push_back(ompBuilder->createCanonicalLoop(
962         loc, bodyGen, lowerBound, upperBound, step,
963         /*IsSigned=*/true, /*Inclusive=*/true, computeIP));
964 
965     if (failed(bodyGenStatus))
966       return failure();
967   }
968 
969   // Collapse loops.
970   llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
971   llvm::CanonicalLoopInfo *loopInfo =
972       ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
973 
974   ompBuilder->applySimd(loopInfo, nullptr);
975 
976   builder.restoreIP(afterIP);
977   return success();
978 }
979 
980 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
981 llvm::AtomicOrdering
982 convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) {
983   if (!ao)
984     return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
985 
986   switch (*ao) {
987   case omp::ClauseMemoryOrderKind::Seq_cst:
988     return llvm::AtomicOrdering::SequentiallyConsistent;
989   case omp::ClauseMemoryOrderKind::Acq_rel:
990     return llvm::AtomicOrdering::AcquireRelease;
991   case omp::ClauseMemoryOrderKind::Acquire:
992     return llvm::AtomicOrdering::Acquire;
993   case omp::ClauseMemoryOrderKind::Release:
994     return llvm::AtomicOrdering::Release;
995   case omp::ClauseMemoryOrderKind::Relaxed:
996     return llvm::AtomicOrdering::Monotonic;
997   }
998   llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
999 }
1000 
1001 /// Convert omp.atomic.read operation to LLVM IR.
1002 static LogicalResult
1003 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
1004                      LLVM::ModuleTranslation &moduleTranslation) {
1005 
1006   auto readOp = cast<omp::AtomicReadOp>(opInst);
1007   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1008 
1009   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1010 
1011   llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.memory_order_val());
1012   llvm::Value *x = moduleTranslation.lookupValue(readOp.x());
1013   Type xTy = readOp.x().getType().cast<omp::PointerLikeType>().getElementType();
1014   llvm::Value *v = moduleTranslation.lookupValue(readOp.v());
1015   Type vTy = readOp.v().getType().cast<omp::PointerLikeType>().getElementType();
1016   llvm::OpenMPIRBuilder::AtomicOpValue V = {
1017       v, moduleTranslation.convertType(vTy), false, false};
1018   llvm::OpenMPIRBuilder::AtomicOpValue X = {
1019       x, moduleTranslation.convertType(xTy), false, false};
1020   builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
1021   return success();
1022 }
1023 
1024 /// Converts an omp.atomic.write operation to LLVM IR.
1025 static LogicalResult
1026 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
1027                       LLVM::ModuleTranslation &moduleTranslation) {
1028   auto writeOp = cast<omp::AtomicWriteOp>(opInst);
1029   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1030 
1031   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1032   llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.memory_order_val());
1033   llvm::Value *expr = moduleTranslation.lookupValue(writeOp.value());
1034   llvm::Value *dest = moduleTranslation.lookupValue(writeOp.address());
1035   llvm::Type *ty = moduleTranslation.convertType(writeOp.value().getType());
1036   llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
1037                                             /*isVolatile=*/false};
1038   builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
1039   return success();
1040 }
1041 
1042 /// Converts an LLVM dialect binary operation to the corresponding enum value
1043 /// for `atomicrmw` supported binary operation.
1044 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
1045   return llvm::TypeSwitch<Operation *, llvm::AtomicRMWInst::BinOp>(&op)
1046       .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
1047       .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
1048       .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
1049       .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
1050       .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
1051       .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
1052       .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
1053       .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
1054       .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
1055       .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
1056 }
1057 
1058 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
1059 static LogicalResult
1060 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
1061                        llvm::IRBuilderBase &builder,
1062                        LLVM::ModuleTranslation &moduleTranslation) {
1063   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1064 
1065   // Convert values and types.
1066   auto &innerOpList = opInst.region().front().getOperations();
1067   if (innerOpList.size() != 2)
1068     return opInst.emitError("exactly two operations are allowed inside an "
1069                             "atomic update region while lowering to LLVM IR");
1070 
1071   Operation &innerUpdateOp = innerOpList.front();
1072 
1073   if (innerUpdateOp.getNumOperands() != 2 ||
1074       !llvm::is_contained(innerUpdateOp.getOperands(),
1075                           opInst.getRegion().getArgument(0)))
1076     return opInst.emitError(
1077         "the update operation inside the region must be a binary operation and "
1078         "that update operation must have the region argument as an operand");
1079 
1080   llvm::AtomicRMWInst::BinOp binop = convertBinOpToAtomic(innerUpdateOp);
1081 
1082   bool isXBinopExpr =
1083       innerUpdateOp.getNumOperands() > 0 &&
1084       innerUpdateOp.getOperand(0) == opInst.getRegion().getArgument(0);
1085 
1086   mlir::Value mlirExpr = (isXBinopExpr ? innerUpdateOp.getOperand(1)
1087                                        : innerUpdateOp.getOperand(0));
1088   llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1089   llvm::Value *llvmX = moduleTranslation.lookupValue(opInst.x());
1090   LLVM::LLVMPointerType mlirXType =
1091       opInst.x().getType().cast<LLVM::LLVMPointerType>();
1092   llvm::Type *llvmXElementType =
1093       moduleTranslation.convertType(mlirXType.getElementType());
1094   llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1095                                                       /*isSigned=*/false,
1096                                                       /*isVolatile=*/false};
1097 
1098   llvm::AtomicOrdering atomicOrdering =
1099       convertAtomicOrdering(opInst.memory_order_val());
1100 
1101   // Generate update code.
1102   LogicalResult updateGenStatus = success();
1103   auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus](
1104                       llvm::Value *atomicx,
1105                       llvm::IRBuilder<> &builder) -> llvm::Value * {
1106     Block &bb = *opInst.region().begin();
1107     moduleTranslation.mapValue(*opInst.region().args_begin(), atomicx);
1108     moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1109     if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1110       updateGenStatus = (opInst.emitError()
1111                          << "unable to convert update operation to llvm IR");
1112       return nullptr;
1113     }
1114     omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1115     assert(yieldop && yieldop.results().size() == 1 &&
1116            "terminator must be omp.yield op and it must have exactly one "
1117            "argument");
1118     return moduleTranslation.lookupValue(yieldop.results()[0]);
1119   };
1120 
1121   // Handle ambiguous alloca, if any.
1122   auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1123   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1124   builder.restoreIP(ompBuilder->createAtomicUpdate(
1125       ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop, updateFn,
1126       isXBinopExpr));
1127   return updateGenStatus;
1128 }
1129 
1130 static LogicalResult
1131 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
1132                         llvm::IRBuilderBase &builder,
1133                         LLVM::ModuleTranslation &moduleTranslation) {
1134   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1135   mlir::Value mlirExpr;
1136   bool isXBinopExpr = false, isPostfixUpdate = false;
1137   llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
1138 
1139   omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
1140   omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
1141 
1142   assert((atomicUpdateOp || atomicWriteOp) &&
1143          "internal op must be an atomic.update or atomic.write op");
1144 
1145   if (atomicWriteOp) {
1146     isPostfixUpdate = true;
1147     mlirExpr = atomicWriteOp.value();
1148   } else {
1149     isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
1150                       atomicCaptureOp.getAtomicUpdateOp().getOperation();
1151     auto &innerOpList = atomicUpdateOp.region().front().getOperations();
1152     if (innerOpList.size() != 2)
1153       return atomicUpdateOp.emitError(
1154           "exactly two operations are allowed inside an "
1155           "atomic update region while lowering to LLVM IR");
1156     Operation *innerUpdateOp = atomicUpdateOp.getFirstOp();
1157     if (innerUpdateOp->getNumOperands() != 2 ||
1158         !llvm::is_contained(innerUpdateOp->getOperands(),
1159                             atomicUpdateOp.getRegion().getArgument(0)))
1160       return atomicUpdateOp.emitError(
1161           "the update operation inside the region must be a binary operation "
1162           "and that update operation must have the region argument as an "
1163           "operand");
1164     binop = convertBinOpToAtomic(*innerUpdateOp);
1165 
1166     isXBinopExpr = innerUpdateOp->getOperand(0) ==
1167                    atomicUpdateOp.getRegion().getArgument(0);
1168 
1169     mlirExpr = (isXBinopExpr ? innerUpdateOp->getOperand(1)
1170                              : innerUpdateOp->getOperand(0));
1171   }
1172 
1173   llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1174   llvm::Value *llvmX =
1175       moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().x());
1176   llvm::Value *llvmV =
1177       moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().v());
1178   auto mlirXType = atomicCaptureOp.getAtomicReadOp()
1179                        .x()
1180                        .getType()
1181                        .cast<LLVM::LLVMPointerType>();
1182   llvm::Type *llvmXElementType =
1183       moduleTranslation.convertType(mlirXType.getElementType());
1184   llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1185                                                       /*isSigned=*/false,
1186                                                       /*isVolatile=*/false};
1187   llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
1188                                                       /*isSigned=*/false,
1189                                                       /*isVolatile=*/false};
1190 
1191   llvm::AtomicOrdering atomicOrdering =
1192       convertAtomicOrdering(atomicCaptureOp.memory_order_val());
1193 
1194   LogicalResult updateGenStatus = success();
1195   auto updateFn = [&](llvm::Value *atomicx,
1196                       llvm::IRBuilder<> &builder) -> llvm::Value * {
1197     if (atomicWriteOp)
1198       return moduleTranslation.lookupValue(atomicWriteOp.value());
1199     Block &bb = *atomicUpdateOp.region().begin();
1200     moduleTranslation.mapValue(*atomicUpdateOp.region().args_begin(), atomicx);
1201     moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1202     if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1203       updateGenStatus = (atomicUpdateOp.emitError()
1204                          << "unable to convert update operation to llvm IR");
1205       return nullptr;
1206     }
1207     omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1208     assert(yieldop && yieldop.results().size() == 1 &&
1209            "terminator must be omp.yield op and it must have exactly one "
1210            "argument");
1211     return moduleTranslation.lookupValue(yieldop.results()[0]);
1212   };
1213 
1214   // Handle ambiguous alloca, if any.
1215   auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1216   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1217   builder.restoreIP(ompBuilder->createAtomicCapture(
1218       ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
1219       binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr));
1220   return updateGenStatus;
1221 }
1222 
1223 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
1224 /// mapping between reduction variables and their private equivalents to have
1225 /// been stored on the ModuleTranslation stack. Currently only supports
1226 /// reduction within WsLoopOp, but can be easily extended.
1227 static LogicalResult
1228 convertOmpReductionOp(omp::ReductionOp reductionOp,
1229                       llvm::IRBuilderBase &builder,
1230                       LLVM::ModuleTranslation &moduleTranslation) {
1231   // Find the declaration that corresponds to the reduction op.
1232   auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>();
1233   omp::ReductionDeclareOp declaration =
1234       findReductionDecl(reductionContainer, reductionOp);
1235   assert(declaration && "could not find reduction declaration");
1236 
1237   // Retrieve the mapping between reduction variables and their private
1238   // equivalents.
1239   const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr;
1240   moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>(
1241       [&](const OpenMPVarMappingStackFrame &frame) {
1242         reductionVariableMap = &frame.mapping;
1243         return WalkResult::interrupt();
1244       });
1245   assert(reductionVariableMap && "couldn't find private reduction variables");
1246 
1247   // Translate the reduction operation by emitting the body of the corresponding
1248   // reduction declaration.
1249   Region &reductionRegion = declaration.reductionRegion();
1250   llvm::Value *privateReductionVar =
1251       reductionVariableMap->lookup(reductionOp.accumulator());
1252   llvm::Value *reductionVal = builder.CreateLoad(
1253       moduleTranslation.convertType(reductionOp.operand().getType()),
1254       privateReductionVar);
1255 
1256   moduleTranslation.mapValue(reductionRegion.front().getArgument(0),
1257                              reductionVal);
1258   moduleTranslation.mapValue(
1259       reductionRegion.front().getArgument(1),
1260       moduleTranslation.lookupValue(reductionOp.operand()));
1261 
1262   SmallVector<llvm::Value *> phis;
1263   if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body",
1264                                      builder, moduleTranslation, &phis)))
1265     return failure();
1266   assert(phis.size() == 1 && "expected one value to be yielded from "
1267                              "the reduction body declaration region");
1268   builder.CreateStore(phis[0], privateReductionVar);
1269   return success();
1270 }
1271 
1272 /// Converts an OpenMP Threadprivate operation into LLVM IR using
1273 /// OpenMPIRBuilder.
1274 static LogicalResult
1275 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
1276                         LLVM::ModuleTranslation &moduleTranslation) {
1277   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1278   auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
1279 
1280   Value symAddr = threadprivateOp.sym_addr();
1281   auto *symOp = symAddr.getDefiningOp();
1282   if (!isa<LLVM::AddressOfOp>(symOp))
1283     return opInst.emitError("Addressing symbol not found");
1284   LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
1285 
1286   LLVM::GlobalOp global = addressOfOp.getGlobal();
1287   llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
1288   llvm::Value *data =
1289       builder.CreateBitCast(globalValue, builder.getInt8PtrTy());
1290   llvm::Type *type = globalValue->getValueType();
1291   llvm::TypeSize typeSize =
1292       builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
1293           type);
1294   llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedSize());
1295   llvm::StringRef suffix = llvm::StringRef(".cache", 6);
1296   std::string cacheName = (Twine(global.getSymName()).concat(suffix)).str();
1297   // Emit runtime function and bitcast its type (i8*) to real data type.
1298   llvm::Value *callInst =
1299       moduleTranslation.getOpenMPBuilder()->createCachedThreadPrivate(
1300           ompLoc, data, size, cacheName);
1301   llvm::Value *result = builder.CreateBitCast(callInst, globalValue->getType());
1302   moduleTranslation.mapValue(opInst.getResult(0), result);
1303   return success();
1304 }
1305 
1306 namespace {
1307 
1308 /// Implementation of the dialect interface that converts operations belonging
1309 /// to the OpenMP dialect to LLVM IR.
1310 class OpenMPDialectLLVMIRTranslationInterface
1311     : public LLVMTranslationDialectInterface {
1312 public:
1313   using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
1314 
1315   /// Translates the given operation to LLVM IR using the provided IR builder
1316   /// and saving the state in `moduleTranslation`.
1317   LogicalResult
1318   convertOperation(Operation *op, llvm::IRBuilderBase &builder,
1319                    LLVM::ModuleTranslation &moduleTranslation) const final;
1320 };
1321 
1322 } // namespace
1323 
1324 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
1325 /// (including OpenMP runtime calls).
1326 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
1327     Operation *op, llvm::IRBuilderBase &builder,
1328     LLVM::ModuleTranslation &moduleTranslation) const {
1329 
1330   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1331 
1332   return llvm::TypeSwitch<Operation *, LogicalResult>(op)
1333       .Case([&](omp::BarrierOp) {
1334         ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
1335         return success();
1336       })
1337       .Case([&](omp::TaskwaitOp) {
1338         ompBuilder->createTaskwait(builder.saveIP());
1339         return success();
1340       })
1341       .Case([&](omp::TaskyieldOp) {
1342         ompBuilder->createTaskyield(builder.saveIP());
1343         return success();
1344       })
1345       .Case([&](omp::FlushOp) {
1346         // No support in Openmp runtime function (__kmpc_flush) to accept
1347         // the argument list.
1348         // OpenMP standard states the following:
1349         //  "An implementation may implement a flush with a list by ignoring
1350         //   the list, and treating it the same as a flush without a list."
1351         //
1352         // The argument list is discarded so that, flush with a list is treated
1353         // same as a flush without a list.
1354         ompBuilder->createFlush(builder.saveIP());
1355         return success();
1356       })
1357       .Case([&](omp::ParallelOp op) {
1358         return convertOmpParallel(op, builder, moduleTranslation);
1359       })
1360       .Case([&](omp::ReductionOp reductionOp) {
1361         return convertOmpReductionOp(reductionOp, builder, moduleTranslation);
1362       })
1363       .Case([&](omp::MasterOp) {
1364         return convertOmpMaster(*op, builder, moduleTranslation);
1365       })
1366       .Case([&](omp::CriticalOp) {
1367         return convertOmpCritical(*op, builder, moduleTranslation);
1368       })
1369       .Case([&](omp::OrderedRegionOp) {
1370         return convertOmpOrderedRegion(*op, builder, moduleTranslation);
1371       })
1372       .Case([&](omp::OrderedOp) {
1373         return convertOmpOrdered(*op, builder, moduleTranslation);
1374       })
1375       .Case([&](omp::WsLoopOp) {
1376         return convertOmpWsLoop(*op, builder, moduleTranslation);
1377       })
1378       .Case([&](omp::SimdLoopOp) {
1379         return convertOmpSimdLoop(*op, builder, moduleTranslation);
1380       })
1381       .Case([&](omp::AtomicReadOp) {
1382         return convertOmpAtomicRead(*op, builder, moduleTranslation);
1383       })
1384       .Case([&](omp::AtomicWriteOp) {
1385         return convertOmpAtomicWrite(*op, builder, moduleTranslation);
1386       })
1387       .Case([&](omp::AtomicUpdateOp op) {
1388         return convertOmpAtomicUpdate(op, builder, moduleTranslation);
1389       })
1390       .Case([&](omp::AtomicCaptureOp op) {
1391         return convertOmpAtomicCapture(op, builder, moduleTranslation);
1392       })
1393       .Case([&](omp::SectionsOp) {
1394         return convertOmpSections(*op, builder, moduleTranslation);
1395       })
1396       .Case([&](omp::SingleOp op) {
1397         return convertOmpSingle(op, builder, moduleTranslation);
1398       })
1399       .Case([&](omp::TaskOp op) {
1400         return convertOmpTaskOp(op, builder, moduleTranslation);
1401       })
1402       .Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp,
1403             omp::CriticalDeclareOp>([](auto op) {
1404         // `yield` and `terminator` can be just omitted. The block structure
1405         // was created in the region that handles their parent operation.
1406         // `reduction.declare` will be used by reductions and is not
1407         // converted directly, skip it.
1408         // `critical.declare` is only used to declare names of critical
1409         // sections which will be used by `critical` ops and hence can be
1410         // ignored for lowering. The OpenMP IRBuilder will create unique
1411         // name for critical section names.
1412         return success();
1413       })
1414       .Case([&](omp::ThreadprivateOp) {
1415         return convertOmpThreadprivate(*op, builder, moduleTranslation);
1416       })
1417       .Default([&](Operation *inst) {
1418         return inst->emitError("unsupported OpenMP operation: ")
1419                << inst->getName();
1420       });
1421 }
1422 
1423 void mlir::registerOpenMPDialectTranslation(DialectRegistry &registry) {
1424   registry.insert<omp::OpenMPDialect>();
1425   registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
1426     dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
1427   });
1428 }
1429 
1430 void mlir::registerOpenMPDialectTranslation(MLIRContext &context) {
1431   DialectRegistry registry;
1432   registerOpenMPDialectTranslation(registry);
1433   context.appendDialectRegistry(registry);
1434 }
1435