1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
14 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
15 #include "mlir/IR/BlockAndValueMapping.h"
16 #include "mlir/IR/Operation.h"
17 #include "mlir/Support/LLVM.h"
18 #include "mlir/Target/LLVMIR/ModuleTranslation.h"
19 
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/TypeSwitch.h"
22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
23 #include "llvm/IR/IRBuilder.h"
24 
25 using namespace mlir;
26 
27 namespace {
28 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
29 /// insertion points for allocas.
30 class OpenMPAllocaStackFrame
31     : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
32 public:
33   explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
34       : allocaInsertPoint(allocaIP) {}
35   llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
36 };
37 
38 /// ModuleTranslation stack frame containing the partial mapping between MLIR
39 /// values and their LLVM IR equivalents.
40 class OpenMPVarMappingStackFrame
41     : public LLVM::ModuleTranslation::StackFrameBase<
42           OpenMPVarMappingStackFrame> {
43 public:
44   explicit OpenMPVarMappingStackFrame(
45       const DenseMap<Value, llvm::Value *> &mapping)
46       : mapping(mapping) {}
47 
48   DenseMap<Value, llvm::Value *> mapping;
49 };
50 } // namespace
51 
52 /// Find the insertion point for allocas given the current insertion point for
53 /// normal operations in the builder.
54 static llvm::OpenMPIRBuilder::InsertPointTy
55 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
56                       const LLVM::ModuleTranslation &moduleTranslation) {
57   // If there is an alloca insertion point on stack, i.e. we are in a nested
58   // operation and a specific point was provided by some surrounding operation,
59   // use it.
60   llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
61   WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
62       [&](const OpenMPAllocaStackFrame &frame) {
63         allocaInsertPoint = frame.allocaInsertPoint;
64         return WalkResult::interrupt();
65       });
66   if (walkResult.wasInterrupted())
67     return allocaInsertPoint;
68 
69   // Otherwise, insert to the entry block of the surrounding function.
70   llvm::BasicBlock &funcEntryBlock =
71       builder.GetInsertBlock()->getParent()->getEntryBlock();
72   return llvm::OpenMPIRBuilder::InsertPointTy(
73       &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
74 }
75 
76 /// Converts the given region that appears within an OpenMP dialect operation to
77 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
78 /// region, and a branch from any block with an successor-less OpenMP terminator
79 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
80 /// of the continuation block if provided.
81 static void convertOmpOpRegions(
82     Region &region, StringRef blockName, llvm::BasicBlock &sourceBlock,
83     llvm::BasicBlock &continuationBlock, llvm::IRBuilderBase &builder,
84     LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus,
85     SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
86   llvm::LLVMContext &llvmContext = builder.getContext();
87   for (Block &bb : region) {
88     llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
89         llvmContext, blockName, builder.GetInsertBlock()->getParent(),
90         builder.GetInsertBlock()->getNextNode());
91     moduleTranslation.mapBlock(&bb, llvmBB);
92   }
93 
94   llvm::Instruction *sourceTerminator = sourceBlock.getTerminator();
95 
96   // Terminators (namely YieldOp) may be forwarding values to the region that
97   // need to be available in the continuation block. Collect the types of these
98   // operands in preparation of creating PHI nodes.
99   SmallVector<llvm::Type *> continuationBlockPHITypes;
100   bool operandsProcessed = false;
101   unsigned numYields = 0;
102   for (Block &bb : region.getBlocks()) {
103     if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
104       if (!operandsProcessed) {
105         for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
106           continuationBlockPHITypes.push_back(
107               moduleTranslation.convertType(yield->getOperand(i).getType()));
108         }
109         operandsProcessed = true;
110       } else {
111         assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
112                "mismatching number of values yielded from the region");
113         for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
114           llvm::Type *operandType =
115               moduleTranslation.convertType(yield->getOperand(i).getType());
116           (void)operandType;
117           assert(continuationBlockPHITypes[i] == operandType &&
118                  "values of mismatching types yielded from the region");
119         }
120       }
121       numYields++;
122     }
123   }
124 
125   // Insert PHI nodes in the continuation block for any values forwarded by the
126   // terminators in this region.
127   if (!continuationBlockPHITypes.empty())
128     assert(
129         continuationBlockPHIs &&
130         "expected continuation block PHIs if converted regions yield values");
131   if (continuationBlockPHIs) {
132     llvm::IRBuilderBase::InsertPointGuard guard(builder);
133     continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
134     builder.SetInsertPoint(&continuationBlock, continuationBlock.begin());
135     for (llvm::Type *ty : continuationBlockPHITypes)
136       continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
137   }
138 
139   // Convert blocks one by one in topological order to ensure
140   // defs are converted before uses.
141   SetVector<Block *> blocks =
142       LLVM::detail::getTopologicallySortedBlocks(region);
143   for (Block *bb : blocks) {
144     llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
145     // Retarget the branch of the entry block to the entry block of the
146     // converted region (regions are single-entry).
147     if (bb->isEntryBlock()) {
148       assert(sourceTerminator->getNumSuccessors() == 1 &&
149              "provided entry block has multiple successors");
150       assert(sourceTerminator->getSuccessor(0) == &continuationBlock &&
151              "ContinuationBlock is not the successor of the entry block");
152       sourceTerminator->setSuccessor(0, llvmBB);
153     }
154 
155     llvm::IRBuilderBase::InsertPointGuard guard(builder);
156     if (failed(
157             moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) {
158       bodyGenStatus = failure();
159       return;
160     }
161 
162     // Special handling for `omp.yield` and `omp.terminator` (we may have more
163     // than one): they return the control to the parent OpenMP dialect operation
164     // so replace them with the branch to the continuation block. We handle this
165     // here to avoid relying inter-function communication through the
166     // ModuleTranslation class to set up the correct insertion point. This is
167     // also consistent with MLIR's idiom of handling special region terminators
168     // in the same code that handles the region-owning operation.
169     Operation *terminator = bb->getTerminator();
170     if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
171       builder.CreateBr(&continuationBlock);
172 
173       for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
174         (*continuationBlockPHIs)[i]->addIncoming(
175             moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
176     }
177   }
178   // After all blocks have been traversed and values mapped, connect the PHI
179   // nodes to the results of preceding blocks.
180   LLVM::detail::connectPHINodes(region, moduleTranslation);
181 
182   // Remove the blocks and values defined in this region from the mapping since
183   // they are not visible outside of this region. This allows the same region to
184   // be converted several times, that is cloned, without clashes, and slightly
185   // speeds up the lookups.
186   moduleTranslation.forgetMapping(region);
187 }
188 
189 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
190 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
191   switch (kind) {
192   case omp::ClauseProcBindKind::close:
193     return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
194   case omp::ClauseProcBindKind::master:
195     return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
196   case omp::ClauseProcBindKind::primary:
197     return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
198   case omp::ClauseProcBindKind::spread:
199     return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
200   }
201   llvm_unreachable("Unknown ClauseProcBindKind kind");
202 }
203 
204 /// Converts the OpenMP parallel operation to LLVM IR.
205 static LogicalResult
206 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
207                    LLVM::ModuleTranslation &moduleTranslation) {
208   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
209   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
210   // relying on captured variables.
211   LogicalResult bodyGenStatus = success();
212 
213   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
214                        llvm::BasicBlock &continuationBlock) {
215     // Save the alloca insertion point on ModuleTranslation stack for use in
216     // nested regions.
217     LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
218         moduleTranslation, allocaIP);
219 
220     // ParallelOp has only one region associated with it.
221     convertOmpOpRegions(opInst.getRegion(), "omp.par.region",
222                         *codeGenIP.getBlock(), continuationBlock, builder,
223                         moduleTranslation, bodyGenStatus);
224   };
225 
226   // TODO: Perform appropriate actions according to the data-sharing
227   // attribute (shared, private, firstprivate, ...) of variables.
228   // Currently defaults to shared.
229   auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
230                     llvm::Value &, llvm::Value &vPtr,
231                     llvm::Value *&replacementValue) -> InsertPointTy {
232     replacementValue = &vPtr;
233 
234     return codeGenIP;
235   };
236 
237   // TODO: Perform finalization actions for variables. This has to be
238   // called for variables which have destructors/finalizers.
239   auto finiCB = [&](InsertPointTy codeGenIP) {};
240 
241   llvm::Value *ifCond = nullptr;
242   if (auto ifExprVar = opInst.if_expr_var())
243     ifCond = moduleTranslation.lookupValue(ifExprVar);
244   llvm::Value *numThreads = nullptr;
245   if (auto numThreadsVar = opInst.num_threads_var())
246     numThreads = moduleTranslation.lookupValue(numThreadsVar);
247   auto pbKind = llvm::omp::OMP_PROC_BIND_default;
248   if (auto bind = opInst.proc_bind_val())
249     pbKind = getProcBindKind(*bind);
250   // TODO: Is the Parallel construct cancellable?
251   bool isCancellable = false;
252 
253   // Ensure that the BasicBlock for the the parallel region is sparate from the
254   // function entry which we may need to insert allocas.
255   if (builder.GetInsertBlock() ==
256       &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
257     assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
258            "Assuming end of basic block");
259     llvm::BasicBlock *entryBB =
260         llvm::BasicBlock::Create(builder.getContext(), "parallel.entry",
261                                  builder.GetInsertBlock()->getParent(),
262                                  builder.GetInsertBlock()->getNextNode());
263     builder.CreateBr(entryBB);
264     builder.SetInsertPoint(entryBB);
265   }
266   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
267       builder.saveIP(), builder.getCurrentDebugLocation());
268   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel(
269       ompLoc, findAllocaInsertPoint(builder, moduleTranslation), bodyGenCB,
270       privCB, finiCB, ifCond, numThreads, pbKind, isCancellable));
271 
272   return bodyGenStatus;
273 }
274 
275 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
276 static LogicalResult
277 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
278                  LLVM::ModuleTranslation &moduleTranslation) {
279   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
280   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
281   // relying on captured variables.
282   LogicalResult bodyGenStatus = success();
283 
284   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
285                        llvm::BasicBlock &continuationBlock) {
286     // MasterOp has only one region associated with it.
287     auto &region = cast<omp::MasterOp>(opInst).getRegion();
288     convertOmpOpRegions(region, "omp.master.region", *codeGenIP.getBlock(),
289                         continuationBlock, builder, moduleTranslation,
290                         bodyGenStatus);
291   };
292 
293   // TODO: Perform finalization actions for variables. This has to be
294   // called for variables which have destructors/finalizers.
295   auto finiCB = [&](InsertPointTy codeGenIP) {};
296 
297   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
298       builder.saveIP(), builder.getCurrentDebugLocation());
299   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster(
300       ompLoc, bodyGenCB, finiCB));
301   return success();
302 }
303 
304 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
305 static LogicalResult
306 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
307                    LLVM::ModuleTranslation &moduleTranslation) {
308   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
309   auto criticalOp = cast<omp::CriticalOp>(opInst);
310   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
311   // relying on captured variables.
312   LogicalResult bodyGenStatus = success();
313 
314   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
315                        llvm::BasicBlock &continuationBlock) {
316     // CriticalOp has only one region associated with it.
317     auto &region = cast<omp::CriticalOp>(opInst).getRegion();
318     convertOmpOpRegions(region, "omp.critical.region", *codeGenIP.getBlock(),
319                         continuationBlock, builder, moduleTranslation,
320                         bodyGenStatus);
321   };
322 
323   // TODO: Perform finalization actions for variables. This has to be
324   // called for variables which have destructors/finalizers.
325   auto finiCB = [&](InsertPointTy codeGenIP) {};
326 
327   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
328       builder.saveIP(), builder.getCurrentDebugLocation());
329   llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
330   llvm::Constant *hint = nullptr;
331 
332   // If it has a name, it probably has a hint too.
333   if (criticalOp.nameAttr()) {
334     // The verifiers in OpenMP Dialect guarentee that all the pointers are
335     // non-null
336     auto symbolRef = criticalOp.nameAttr().cast<SymbolRefAttr>();
337     auto criticalDeclareOp =
338         SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
339                                                                      symbolRef);
340     hint = llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
341                                   static_cast<int>(criticalDeclareOp.hint()));
342   }
343   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical(
344       ompLoc, bodyGenCB, finiCB, criticalOp.name().getValueOr(""), hint));
345   return success();
346 }
347 
348 /// Returns a reduction declaration that corresponds to the given reduction
349 /// operation in the given container. Currently only supports reductions inside
350 /// WsLoopOp but can be easily extended.
351 static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container,
352                                                  omp::ReductionOp reduction) {
353   SymbolRefAttr reductionSymbol;
354   for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) {
355     if (container.reduction_vars()[i] != reduction.accumulator())
356       continue;
357     reductionSymbol = (*container.reductions())[i].cast<SymbolRefAttr>();
358     break;
359   }
360   assert(reductionSymbol &&
361          "reduction operation must be associated with a declaration");
362 
363   return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
364       container, reductionSymbol);
365 }
366 
367 /// Populates `reductions` with reduction declarations used in the given loop.
368 static void
369 collectReductionDecls(omp::WsLoopOp loop,
370                       SmallVectorImpl<omp::ReductionDeclareOp> &reductions) {
371   Optional<ArrayAttr> attr = loop.reductions();
372   if (!attr)
373     return;
374 
375   reductions.reserve(reductions.size() + loop.getNumReductionVars());
376   for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
377     reductions.push_back(
378         SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
379             loop, symbolRef));
380   }
381 }
382 
383 /// Translates the blocks contained in the given region and appends them to at
384 /// the current insertion point of `builder`. The operations of the entry block
385 /// are appended to the current insertion block, which is not expected to have a
386 /// terminator. If set, `continuationBlockArgs` is populated with translated
387 /// values that correspond to the values omp.yield'ed from the region.
388 static LogicalResult inlineConvertOmpRegions(
389     Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
390     LLVM::ModuleTranslation &moduleTranslation,
391     SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
392   if (region.empty())
393     return success();
394 
395   // Special case for single-block regions that don't create additional blocks:
396   // insert operations without creating additional blocks.
397   if (llvm::hasSingleElement(region)) {
398     moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
399     if (failed(moduleTranslation.convertBlock(
400             region.front(), /*ignoreArguments=*/true, builder)))
401       return failure();
402 
403     // The continuation arguments are simply the translated terminator operands.
404     if (continuationBlockArgs)
405       llvm::append_range(
406           *continuationBlockArgs,
407           moduleTranslation.lookupValues(region.front().back().getOperands()));
408 
409     // Drop the mapping that is no longer necessary so that the same region can
410     // be processed multiple times.
411     moduleTranslation.forgetMapping(region);
412     return success();
413   }
414 
415   // Create the continuation block manually instead of calling splitBlock
416   // because the current insertion block may not have a terminator.
417   llvm::BasicBlock *continuationBlock =
418       llvm::BasicBlock::Create(builder.getContext(), blockName + ".cont",
419                                builder.GetInsertBlock()->getParent(),
420                                builder.GetInsertBlock()->getNextNode());
421   builder.CreateBr(continuationBlock);
422 
423   LogicalResult bodyGenStatus = success();
424   SmallVector<llvm::PHINode *> phis;
425   convertOmpOpRegions(region, blockName, *builder.GetInsertBlock(),
426                       *continuationBlock, builder, moduleTranslation,
427                       bodyGenStatus, &phis);
428   if (failed(bodyGenStatus))
429     return failure();
430   if (continuationBlockArgs)
431     llvm::append_range(*continuationBlockArgs, phis);
432   builder.SetInsertPoint(continuationBlock,
433                          continuationBlock->getFirstInsertionPt());
434   return success();
435 }
436 
437 namespace {
438 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
439 /// store lambdas with capture.
440 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy(
441     llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
442     llvm::Value *&)>;
443 using OwningAtomicReductionGen =
444     std::function<llvm::OpenMPIRBuilder::InsertPointTy(
445         llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
446         llvm::Value *)>;
447 } // namespace
448 
449 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
450 /// reduction declaration. The generator uses `builder` but ignores its
451 /// insertion point.
452 static OwningReductionGen
453 makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder,
454                  LLVM::ModuleTranslation &moduleTranslation) {
455   // The lambda is mutable because we need access to non-const methods of decl
456   // (which aren't actually mutating it), and we must capture decl by-value to
457   // avoid the dangling reference after the parent function returns.
458   OwningReductionGen gen =
459       [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
460                 llvm::Value *lhs, llvm::Value *rhs,
461                 llvm::Value *&result) mutable {
462         Region &reductionRegion = decl.reductionRegion();
463         moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs);
464         moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs);
465         builder.restoreIP(insertPoint);
466         SmallVector<llvm::Value *> phis;
467         if (failed(inlineConvertOmpRegions(reductionRegion,
468                                            "omp.reduction.nonatomic.body",
469                                            builder, moduleTranslation, &phis)))
470           return llvm::OpenMPIRBuilder::InsertPointTy();
471         assert(phis.size() == 1);
472         result = phis[0];
473         return builder.saveIP();
474       };
475   return gen;
476 }
477 
478 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
479 /// given reduction declaration. The generator uses `builder` but ignores its
480 /// insertion point. Returns null if there is no atomic region available in the
481 /// reduction declaration.
482 static OwningAtomicReductionGen
483 makeAtomicReductionGen(omp::ReductionDeclareOp decl,
484                        llvm::IRBuilderBase &builder,
485                        LLVM::ModuleTranslation &moduleTranslation) {
486   if (decl.atomicReductionRegion().empty())
487     return OwningAtomicReductionGen();
488 
489   // The lambda is mutable because we need access to non-const methods of decl
490   // (which aren't actually mutating it), and we must capture decl by-value to
491   // avoid the dangling reference after the parent function returns.
492   OwningAtomicReductionGen atomicGen =
493       [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
494                 llvm::Value *lhs, llvm::Value *rhs) mutable {
495         Region &atomicRegion = decl.atomicReductionRegion();
496         moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs);
497         moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs);
498         builder.restoreIP(insertPoint);
499         SmallVector<llvm::Value *> phis;
500         if (failed(inlineConvertOmpRegions(atomicRegion,
501                                            "omp.reduction.atomic.body", builder,
502                                            moduleTranslation, &phis)))
503           return llvm::OpenMPIRBuilder::InsertPointTy();
504         assert(phis.empty());
505         return builder.saveIP();
506       };
507   return atomicGen;
508 }
509 
510 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
511 static LogicalResult
512 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
513                   LLVM::ModuleTranslation &moduleTranslation) {
514   auto orderedOp = cast<omp::OrderedOp>(opInst);
515 
516   omp::ClauseDepend dependType = *orderedOp.depend_type_val();
517   bool isDependSource = dependType == omp::ClauseDepend::dependsource;
518   unsigned numLoops = orderedOp.num_loops_val().getValue();
519   SmallVector<llvm::Value *> vecValues =
520       moduleTranslation.lookupValues(orderedOp.depend_vec_vars());
521 
522   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
523       builder.saveIP(), builder.getCurrentDebugLocation());
524   size_t indexVecValues = 0;
525   while (indexVecValues < vecValues.size()) {
526     SmallVector<llvm::Value *> storeValues;
527     storeValues.reserve(numLoops);
528     for (unsigned i = 0; i < numLoops; i++) {
529       storeValues.push_back(vecValues[indexVecValues]);
530       indexVecValues++;
531     }
532     builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
533         ompLoc, findAllocaInsertPoint(builder, moduleTranslation), numLoops,
534         storeValues, ".cnt.addr", isDependSource));
535   }
536   return success();
537 }
538 
539 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
540 /// OpenMPIRBuilder.
541 static LogicalResult
542 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
543                         LLVM::ModuleTranslation &moduleTranslation) {
544   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
545   auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
546 
547   // TODO: The code generation for ordered simd directive is not supported yet.
548   if (orderedRegionOp.simd())
549     return failure();
550 
551   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
552   // relying on captured variables.
553   LogicalResult bodyGenStatus = success();
554 
555   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
556                        llvm::BasicBlock &continuationBlock) {
557     // OrderedOp has only one region associated with it.
558     auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
559     convertOmpOpRegions(region, "omp.ordered.region", *codeGenIP.getBlock(),
560                         continuationBlock, builder, moduleTranslation,
561                         bodyGenStatus);
562   };
563 
564   // TODO: Perform finalization actions for variables. This has to be
565   // called for variables which have destructors/finalizers.
566   auto finiCB = [&](InsertPointTy codeGenIP) {};
567 
568   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
569       builder.saveIP(), builder.getCurrentDebugLocation());
570   builder.restoreIP(
571       moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
572           ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd()));
573   return bodyGenStatus;
574 }
575 
576 static LogicalResult
577 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
578                    LLVM::ModuleTranslation &moduleTranslation) {
579   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
580   using StorableBodyGenCallbackTy =
581       llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
582 
583   auto sectionsOp = cast<omp::SectionsOp>(opInst);
584 
585   // TODO: Support the following clauses: private, firstprivate, lastprivate,
586   // reduction, allocate
587   if (!sectionsOp.private_vars().empty() ||
588       !sectionsOp.firstprivate_vars().empty() ||
589       !sectionsOp.lastprivate_vars().empty() ||
590       !sectionsOp.reduction_vars().empty() || sectionsOp.reductions() ||
591       !sectionsOp.allocate_vars().empty() ||
592       !sectionsOp.allocators_vars().empty())
593     return emitError(sectionsOp.getLoc())
594            << "private, firstprivate, lastprivate, reduction and allocate "
595               "clauses are not supported for sections construct";
596 
597   LogicalResult bodyGenStatus = success();
598   SmallVector<StorableBodyGenCallbackTy> sectionCBs;
599 
600   for (Operation &op : *sectionsOp.region().begin()) {
601     auto sectionOp = dyn_cast<omp::SectionOp>(op);
602     if (!sectionOp) // omp.terminator
603       continue;
604 
605     Region &region = sectionOp.region();
606     auto sectionCB = [&region, &builder, &moduleTranslation, &bodyGenStatus](
607                          InsertPointTy allocaIP, InsertPointTy codeGenIP,
608                          llvm::BasicBlock &finiBB) {
609       builder.restoreIP(codeGenIP);
610       builder.CreateBr(&finiBB);
611       convertOmpOpRegions(region, "omp.section.region", *codeGenIP.getBlock(),
612                           finiBB, builder, moduleTranslation, bodyGenStatus);
613     };
614     sectionCBs.push_back(sectionCB);
615   }
616 
617   // No sections within omp.sections operation - skip generation. This situation
618   // is only possible if there is only a terminator operation inside the
619   // sections operation
620   if (sectionCBs.empty())
621     return success();
622 
623   assert(isa<omp::SectionOp>(*sectionsOp.region().op_begin()));
624 
625   // TODO: Perform appropriate actions according to the data-sharing
626   // attribute (shared, private, firstprivate, ...) of variables.
627   // Currently defaults to shared.
628   auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
629                     llvm::Value &vPtr,
630                     llvm::Value *&replacementValue) -> InsertPointTy {
631     replacementValue = &vPtr;
632     return codeGenIP;
633   };
634 
635   // TODO: Perform finalization actions for variables. This has to be
636   // called for variables which have destructors/finalizers.
637   auto finiCB = [&](InsertPointTy codeGenIP) {};
638 
639   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
640       builder.saveIP(), builder.getCurrentDebugLocation());
641   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections(
642       ompLoc, findAllocaInsertPoint(builder, moduleTranslation), sectionCBs,
643       privCB, finiCB, false, sectionsOp.nowait()));
644   return bodyGenStatus;
645 }
646 
647 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
648 static LogicalResult
649 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
650                  LLVM::ModuleTranslation &moduleTranslation) {
651   auto loop = cast<omp::WsLoopOp>(opInst);
652   // TODO: this should be in the op verifier instead.
653   if (loop.lowerBound().empty())
654     return failure();
655 
656   // Static is the default.
657   auto schedule =
658       loop.schedule_val().getValueOr(omp::ClauseScheduleKind::Static);
659 
660   // Find the loop configuration.
661   llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]);
662   llvm::Type *ivType = step->getType();
663   llvm::Value *chunk = nullptr;
664   if (loop.schedule_chunk_var()) {
665     llvm::Value *chunkVar =
666         moduleTranslation.lookupValue(loop.schedule_chunk_var());
667     llvm::Type *chunkVarType = chunkVar->getType();
668     assert(chunkVarType->isIntegerTy() &&
669            "chunk size must be one integer expression");
670     if (chunkVarType->getIntegerBitWidth() < ivType->getIntegerBitWidth())
671       chunk = builder.CreateSExt(chunkVar, ivType);
672     else if (chunkVarType->getIntegerBitWidth() > ivType->getIntegerBitWidth())
673       chunk = builder.CreateTrunc(chunkVar, ivType);
674     else
675       chunk = chunkVar;
676   }
677 
678   SmallVector<omp::ReductionDeclareOp> reductionDecls;
679   collectReductionDecls(loop, reductionDecls);
680   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
681       findAllocaInsertPoint(builder, moduleTranslation);
682 
683   // Allocate space for privatized reduction variables.
684   SmallVector<llvm::Value *> privateReductionVariables;
685   DenseMap<Value, llvm::Value *> reductionVariableMap;
686   unsigned numReductions = loop.getNumReductionVars();
687   privateReductionVariables.reserve(numReductions);
688   if (numReductions != 0) {
689     llvm::IRBuilderBase::InsertPointGuard guard(builder);
690     builder.restoreIP(allocaIP);
691     for (unsigned i = 0; i < numReductions; ++i) {
692       auto reductionType =
693           loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>();
694       llvm::Value *var = builder.CreateAlloca(
695           moduleTranslation.convertType(reductionType.getElementType()));
696       privateReductionVariables.push_back(var);
697       reductionVariableMap.try_emplace(loop.reduction_vars()[i], var);
698     }
699   }
700 
701   // Store the mapping between reduction variables and their private copies on
702   // ModuleTranslation stack. It can be then recovered when translating
703   // omp.reduce operations in a separate call.
704   LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
705       moduleTranslation, reductionVariableMap);
706 
707   // Before the loop, store the initial values of reductions into reduction
708   // variables. Although this could be done after allocas, we don't want to mess
709   // up with the alloca insertion point.
710   for (unsigned i = 0; i < numReductions; ++i) {
711     SmallVector<llvm::Value *> phis;
712     if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(),
713                                        "omp.reduction.neutral", builder,
714                                        moduleTranslation, &phis)))
715       return failure();
716     assert(phis.size() == 1 && "expected one value to be yielded from the "
717                                "reduction neutral element declaration region");
718     builder.CreateStore(phis[0], privateReductionVariables[i]);
719   }
720 
721   // Set up the source location value for OpenMP runtime.
722   llvm::DISubprogram *subprogram =
723       builder.GetInsertBlock()->getParent()->getSubprogram();
724   const llvm::DILocation *diLoc =
725       moduleTranslation.translateLoc(opInst.getLoc(), subprogram);
726   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(),
727                                                     llvm::DebugLoc(diLoc));
728 
729   // Generator of the canonical loop body.
730   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
731   // relying on captured variables.
732   SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
733   SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
734   LogicalResult bodyGenStatus = success();
735   auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
736     // Make sure further conversions know about the induction variable.
737     moduleTranslation.mapValue(
738         loop.getRegion().front().getArgument(loopInfos.size()), iv);
739 
740     // Capture the body insertion point for use in nested loops. BodyIP of the
741     // CanonicalLoopInfo always points to the beginning of the entry block of
742     // the body.
743     bodyInsertPoints.push_back(ip);
744 
745     if (loopInfos.size() != loop.getNumLoops() - 1)
746       return;
747 
748     // Convert the body of the loop.
749     llvm::BasicBlock *entryBlock = ip.getBlock();
750     llvm::BasicBlock *exitBlock =
751         entryBlock->splitBasicBlock(ip.getPoint(), "omp.wsloop.exit");
752     convertOmpOpRegions(loop.region(), "omp.wsloop.region", *entryBlock,
753                         *exitBlock, builder, moduleTranslation, bodyGenStatus);
754   };
755 
756   // Delegate actual loop construction to the OpenMP IRBuilder.
757   // TODO: this currently assumes WsLoop is semantically similar to SCF loop,
758   // i.e. it has a positive step, uses signed integer semantics. Reconsider
759   // this code when WsLoop clearly supports more cases.
760   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
761   for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
762     llvm::Value *lowerBound =
763         moduleTranslation.lookupValue(loop.lowerBound()[i]);
764     llvm::Value *upperBound =
765         moduleTranslation.lookupValue(loop.upperBound()[i]);
766     llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]);
767 
768     // Make sure loop trip count are emitted in the preheader of the outermost
769     // loop at the latest so that they are all available for the new collapsed
770     // loop will be created below.
771     llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
772     llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
773     if (i != 0) {
774       loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
775                                                        llvm::DebugLoc(diLoc));
776       computeIP = loopInfos.front()->getPreheaderIP();
777     }
778     loopInfos.push_back(ompBuilder->createCanonicalLoop(
779         loc, bodyGen, lowerBound, upperBound, step,
780         /*IsSigned=*/true, loop.inclusive(), computeIP));
781 
782     if (failed(bodyGenStatus))
783       return failure();
784   }
785 
786   // Collapse loops. Store the insertion point because LoopInfos may get
787   // invalidated.
788   llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
789   llvm::CanonicalLoopInfo *loopInfo =
790       ompBuilder->collapseLoops(diLoc, loopInfos, {});
791 
792   allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
793 
794   bool isSimd = loop.simd_modifier();
795 
796   if (schedule == omp::ClauseScheduleKind::Static) {
797     ompBuilder->applyStaticWorkshareLoop(ompLoc.DL, loopInfo, allocaIP,
798                                          !loop.nowait(), chunk);
799   } else {
800     llvm::omp::OMPScheduleType schedType;
801     switch (schedule) {
802     case omp::ClauseScheduleKind::Dynamic:
803       schedType = llvm::omp::OMPScheduleType::DynamicChunked;
804       break;
805     case omp::ClauseScheduleKind::Guided:
806       if (isSimd)
807         schedType = llvm::omp::OMPScheduleType::GuidedSimd;
808       else
809         schedType = llvm::omp::OMPScheduleType::GuidedChunked;
810       break;
811     case omp::ClauseScheduleKind::Auto:
812       schedType = llvm::omp::OMPScheduleType::Auto;
813       break;
814     case omp::ClauseScheduleKind::Runtime:
815       if (isSimd)
816         schedType = llvm::omp::OMPScheduleType::RuntimeSimd;
817       else
818         schedType = llvm::omp::OMPScheduleType::Runtime;
819       break;
820     default:
821       llvm_unreachable("Unknown schedule value");
822       break;
823     }
824 
825     if (Optional<omp::ScheduleModifier> modifier = loop.schedule_modifier()) {
826       switch (*modifier) {
827       case omp::ScheduleModifier::monotonic:
828         schedType |= llvm::omp::OMPScheduleType::ModifierMonotonic;
829         break;
830       case omp::ScheduleModifier::nonmonotonic:
831         schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic;
832         break;
833       default:
834         // Nothing to do here.
835         break;
836       }
837     }
838     afterIP = ompBuilder->applyDynamicWorkshareLoop(
839         ompLoc.DL, loopInfo, allocaIP, schedType, !loop.nowait(), chunk);
840   }
841 
842   // Continue building IR after the loop. Note that the LoopInfo returned by
843   // `collapseLoops` points inside the outermost loop and is intended for
844   // potential further loop transformations. Use the insertion point stored
845   // before collapsing loops instead.
846   builder.restoreIP(afterIP);
847 
848   // Process the reductions if required.
849   if (numReductions == 0)
850     return success();
851 
852   // Create the reduction generators. We need to own them here because
853   // ReductionInfo only accepts references to the generators.
854   SmallVector<OwningReductionGen> owningReductionGens;
855   SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
856   for (unsigned i = 0; i < numReductions; ++i) {
857     owningReductionGens.push_back(
858         makeReductionGen(reductionDecls[i], builder, moduleTranslation));
859     owningAtomicReductionGens.push_back(
860         makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
861   }
862 
863   // Collect the reduction information.
864   SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
865   reductionInfos.reserve(numReductions);
866   for (unsigned i = 0; i < numReductions; ++i) {
867     llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
868     if (owningAtomicReductionGens[i])
869       atomicGen = owningAtomicReductionGens[i];
870     llvm::Value *variable =
871         moduleTranslation.lookupValue(loop.reduction_vars()[i]);
872     reductionInfos.push_back({variable->getType()->getPointerElementType(),
873                               variable, privateReductionVariables[i],
874                               owningReductionGens[i], atomicGen});
875   }
876 
877   // The call to createReductions below expects the block to have a
878   // terminator. Create an unreachable instruction to serve as terminator
879   // and remove it later.
880   llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
881   builder.SetInsertPoint(tempTerminator);
882   llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
883       ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
884                                    loop.nowait());
885   if (!contInsertPoint.getBlock())
886     return loop->emitOpError() << "failed to convert reductions";
887   auto nextInsertionPoint =
888       ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
889   tempTerminator->eraseFromParent();
890   builder.restoreIP(nextInsertionPoint);
891 
892   return success();
893 }
894 
895 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
896 llvm::AtomicOrdering
897 convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) {
898   if (!ao)
899     return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
900 
901   switch (*ao) {
902   case omp::ClauseMemoryOrderKind::seq_cst:
903     return llvm::AtomicOrdering::SequentiallyConsistent;
904   case omp::ClauseMemoryOrderKind::acq_rel:
905     return llvm::AtomicOrdering::AcquireRelease;
906   case omp::ClauseMemoryOrderKind::acquire:
907     return llvm::AtomicOrdering::Acquire;
908   case omp::ClauseMemoryOrderKind::release:
909     return llvm::AtomicOrdering::Release;
910   case omp::ClauseMemoryOrderKind::relaxed:
911     return llvm::AtomicOrdering::Monotonic;
912   }
913   llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
914 }
915 
916 /// Convert omp.atomic.read operation to LLVM IR.
917 static LogicalResult
918 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
919                      LLVM::ModuleTranslation &moduleTranslation) {
920 
921   auto readOp = cast<omp::AtomicReadOp>(opInst);
922   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
923 
924   // Set up the source location value for OpenMP runtime.
925   llvm::DISubprogram *subprogram =
926       builder.GetInsertBlock()->getParent()->getSubprogram();
927   const llvm::DILocation *diLoc =
928       moduleTranslation.translateLoc(opInst.getLoc(), subprogram);
929   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(),
930                                                     llvm::DebugLoc(diLoc));
931   llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.memory_order());
932   llvm::Value *x = moduleTranslation.lookupValue(readOp.x());
933   Type xTy = readOp.x().getType().cast<omp::PointerLikeType>().getElementType();
934   llvm::Value *v = moduleTranslation.lookupValue(readOp.v());
935   Type vTy = readOp.v().getType().cast<omp::PointerLikeType>().getElementType();
936   llvm::OpenMPIRBuilder::AtomicOpValue V = {
937       v, moduleTranslation.convertType(vTy), false, false};
938   llvm::OpenMPIRBuilder::AtomicOpValue X = {
939       x, moduleTranslation.convertType(xTy), false, false};
940   builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
941   return success();
942 }
943 
944 /// Converts an omp.atomic.write operation to LLVM IR.
945 static LogicalResult
946 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
947                       LLVM::ModuleTranslation &moduleTranslation) {
948   auto writeOp = cast<omp::AtomicWriteOp>(opInst);
949   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
950 
951   // Set up the source location value for OpenMP runtime.
952   llvm::DISubprogram *subprogram =
953       builder.GetInsertBlock()->getParent()->getSubprogram();
954   const llvm::DILocation *diLoc =
955       moduleTranslation.translateLoc(opInst.getLoc(), subprogram);
956   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(),
957                                                     llvm::DebugLoc(diLoc));
958   llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.memory_order());
959   llvm::Value *expr = moduleTranslation.lookupValue(writeOp.value());
960   llvm::Value *dest = moduleTranslation.lookupValue(writeOp.address());
961   llvm::Type *ty = moduleTranslation.convertType(writeOp.value().getType());
962   llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
963                                             /*isVolatile=*/false};
964   builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
965   return success();
966 }
967 
968 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
969 /// mapping between reduction variables and their private equivalents to have
970 /// been stored on the ModuleTranslation stack. Currently only supports
971 /// reduction within WsLoopOp, but can be easily extended.
972 static LogicalResult
973 convertOmpReductionOp(omp::ReductionOp reductionOp,
974                       llvm::IRBuilderBase &builder,
975                       LLVM::ModuleTranslation &moduleTranslation) {
976   // Find the declaration that corresponds to the reduction op.
977   auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>();
978   omp::ReductionDeclareOp declaration =
979       findReductionDecl(reductionContainer, reductionOp);
980   assert(declaration && "could not find reduction declaration");
981 
982   // Retrieve the mapping between reduction variables and their private
983   // equivalents.
984   const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr;
985   moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>(
986       [&](const OpenMPVarMappingStackFrame &frame) {
987         reductionVariableMap = &frame.mapping;
988         return WalkResult::interrupt();
989       });
990   assert(reductionVariableMap && "couldn't find private reduction variables");
991 
992   // Translate the reduction operation by emitting the body of the corresponding
993   // reduction declaration.
994   Region &reductionRegion = declaration.reductionRegion();
995   llvm::Value *privateReductionVar =
996       reductionVariableMap->lookup(reductionOp.accumulator());
997   llvm::Value *reductionVal = builder.CreateLoad(
998       moduleTranslation.convertType(reductionOp.operand().getType()),
999       privateReductionVar);
1000 
1001   moduleTranslation.mapValue(reductionRegion.front().getArgument(0),
1002                              reductionVal);
1003   moduleTranslation.mapValue(
1004       reductionRegion.front().getArgument(1),
1005       moduleTranslation.lookupValue(reductionOp.operand()));
1006 
1007   SmallVector<llvm::Value *> phis;
1008   if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body",
1009                                      builder, moduleTranslation, &phis)))
1010     return failure();
1011   assert(phis.size() == 1 && "expected one value to be yielded from "
1012                              "the reduction body declaration region");
1013   builder.CreateStore(phis[0], privateReductionVar);
1014   return success();
1015 }
1016 
1017 namespace {
1018 
1019 /// Implementation of the dialect interface that converts operations belonging
1020 /// to the OpenMP dialect to LLVM IR.
1021 class OpenMPDialectLLVMIRTranslationInterface
1022     : public LLVMTranslationDialectInterface {
1023 public:
1024   using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
1025 
1026   /// Translates the given operation to LLVM IR using the provided IR builder
1027   /// and saving the state in `moduleTranslation`.
1028   LogicalResult
1029   convertOperation(Operation *op, llvm::IRBuilderBase &builder,
1030                    LLVM::ModuleTranslation &moduleTranslation) const final;
1031 };
1032 
1033 } // namespace
1034 
1035 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
1036 /// (including OpenMP runtime calls).
1037 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
1038     Operation *op, llvm::IRBuilderBase &builder,
1039     LLVM::ModuleTranslation &moduleTranslation) const {
1040 
1041   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1042 
1043   return llvm::TypeSwitch<Operation *, LogicalResult>(op)
1044       .Case([&](omp::BarrierOp) {
1045         ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
1046         return success();
1047       })
1048       .Case([&](omp::TaskwaitOp) {
1049         ompBuilder->createTaskwait(builder.saveIP());
1050         return success();
1051       })
1052       .Case([&](omp::TaskyieldOp) {
1053         ompBuilder->createTaskyield(builder.saveIP());
1054         return success();
1055       })
1056       .Case([&](omp::FlushOp) {
1057         // No support in Openmp runtime function (__kmpc_flush) to accept
1058         // the argument list.
1059         // OpenMP standard states the following:
1060         //  "An implementation may implement a flush with a list by ignoring
1061         //   the list, and treating it the same as a flush without a list."
1062         //
1063         // The argument list is discarded so that, flush with a list is treated
1064         // same as a flush without a list.
1065         ompBuilder->createFlush(builder.saveIP());
1066         return success();
1067       })
1068       .Case([&](omp::ParallelOp op) {
1069         return convertOmpParallel(op, builder, moduleTranslation);
1070       })
1071       .Case([&](omp::ReductionOp reductionOp) {
1072         return convertOmpReductionOp(reductionOp, builder, moduleTranslation);
1073       })
1074       .Case([&](omp::MasterOp) {
1075         return convertOmpMaster(*op, builder, moduleTranslation);
1076       })
1077       .Case([&](omp::CriticalOp) {
1078         return convertOmpCritical(*op, builder, moduleTranslation);
1079       })
1080       .Case([&](omp::OrderedRegionOp) {
1081         return convertOmpOrderedRegion(*op, builder, moduleTranslation);
1082       })
1083       .Case([&](omp::OrderedOp) {
1084         return convertOmpOrdered(*op, builder, moduleTranslation);
1085       })
1086       .Case([&](omp::WsLoopOp) {
1087         return convertOmpWsLoop(*op, builder, moduleTranslation);
1088       })
1089       .Case([&](omp::AtomicReadOp) {
1090         return convertOmpAtomicRead(*op, builder, moduleTranslation);
1091       })
1092       .Case([&](omp::AtomicWriteOp) {
1093         return convertOmpAtomicWrite(*op, builder, moduleTranslation);
1094       })
1095       .Case([&](omp::SectionsOp) {
1096         return convertOmpSections(*op, builder, moduleTranslation);
1097       })
1098       .Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp,
1099             omp::CriticalDeclareOp>([](auto op) {
1100         // `yield` and `terminator` can be just omitted. The block structure
1101         // was created in the region that handles their parent operation.
1102         // `reduction.declare` will be used by reductions and is not
1103         // converted directly, skip it.
1104         // `critical.declare` is only used to declare names of critical
1105         // sections which will be used by `critical` ops and hence can be
1106         // ignored for lowering. The OpenMP IRBuilder will create unique
1107         // name for critical section names.
1108         return success();
1109       })
1110       .Default([&](Operation *inst) {
1111         return inst->emitError("unsupported OpenMP operation: ")
1112                << inst->getName();
1113       });
1114 }
1115 
1116 void mlir::registerOpenMPDialectTranslation(DialectRegistry &registry) {
1117   registry.insert<omp::OpenMPDialect>();
1118   registry.addDialectInterface<omp::OpenMPDialect,
1119                                OpenMPDialectLLVMIRTranslationInterface>();
1120 }
1121 
1122 void mlir::registerOpenMPDialectTranslation(MLIRContext &context) {
1123   DialectRegistry registry;
1124   registerOpenMPDialectTranslation(registry);
1125   context.appendDialectRegistry(registry);
1126 }
1127