1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
14 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
15 #include "mlir/IR/BlockAndValueMapping.h"
16 #include "mlir/IR/Operation.h"
17 #include "mlir/Support/LLVM.h"
18 #include "mlir/Target/LLVMIR/ModuleTranslation.h"
19 
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/TypeSwitch.h"
22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
23 #include "llvm/IR/DebugInfoMetadata.h"
24 #include "llvm/IR/IRBuilder.h"
25 
26 using namespace mlir;
27 
28 namespace {
29 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
30 /// insertion points for allocas.
31 class OpenMPAllocaStackFrame
32     : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
33 public:
34   explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
35       : allocaInsertPoint(allocaIP) {}
36   llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
37 };
38 
39 /// ModuleTranslation stack frame containing the partial mapping between MLIR
40 /// values and their LLVM IR equivalents.
41 class OpenMPVarMappingStackFrame
42     : public LLVM::ModuleTranslation::StackFrameBase<
43           OpenMPVarMappingStackFrame> {
44 public:
45   explicit OpenMPVarMappingStackFrame(
46       const DenseMap<Value, llvm::Value *> &mapping)
47       : mapping(mapping) {}
48 
49   DenseMap<Value, llvm::Value *> mapping;
50 };
51 } // namespace
52 
53 /// Find the insertion point for allocas given the current insertion point for
54 /// normal operations in the builder.
55 static llvm::OpenMPIRBuilder::InsertPointTy
56 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
57                       const LLVM::ModuleTranslation &moduleTranslation) {
58   // If there is an alloca insertion point on stack, i.e. we are in a nested
59   // operation and a specific point was provided by some surrounding operation,
60   // use it.
61   llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
62   WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
63       [&](const OpenMPAllocaStackFrame &frame) {
64         allocaInsertPoint = frame.allocaInsertPoint;
65         return WalkResult::interrupt();
66       });
67   if (walkResult.wasInterrupted())
68     return allocaInsertPoint;
69 
70   // Otherwise, insert to the entry block of the surrounding function.
71   llvm::BasicBlock &funcEntryBlock =
72       builder.GetInsertBlock()->getParent()->getEntryBlock();
73   return llvm::OpenMPIRBuilder::InsertPointTy(
74       &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
75 }
76 
77 /// Converts the given region that appears within an OpenMP dialect operation to
78 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
79 /// region, and a branch from any block with an successor-less OpenMP terminator
80 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
81 /// of the continuation block if provided.
82 static void convertOmpOpRegions(
83     Region &region, StringRef blockName, llvm::BasicBlock &sourceBlock,
84     llvm::BasicBlock &continuationBlock, llvm::IRBuilderBase &builder,
85     LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus,
86     SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
87   llvm::LLVMContext &llvmContext = builder.getContext();
88   for (Block &bb : region) {
89     llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
90         llvmContext, blockName, builder.GetInsertBlock()->getParent(),
91         builder.GetInsertBlock()->getNextNode());
92     moduleTranslation.mapBlock(&bb, llvmBB);
93   }
94 
95   llvm::Instruction *sourceTerminator = sourceBlock.getTerminator();
96 
97   // Terminators (namely YieldOp) may be forwarding values to the region that
98   // need to be available in the continuation block. Collect the types of these
99   // operands in preparation of creating PHI nodes.
100   SmallVector<llvm::Type *> continuationBlockPHITypes;
101   bool operandsProcessed = false;
102   unsigned numYields = 0;
103   for (Block &bb : region.getBlocks()) {
104     if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
105       if (!operandsProcessed) {
106         for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
107           continuationBlockPHITypes.push_back(
108               moduleTranslation.convertType(yield->getOperand(i).getType()));
109         }
110         operandsProcessed = true;
111       } else {
112         assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
113                "mismatching number of values yielded from the region");
114         for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
115           llvm::Type *operandType =
116               moduleTranslation.convertType(yield->getOperand(i).getType());
117           (void)operandType;
118           assert(continuationBlockPHITypes[i] == operandType &&
119                  "values of mismatching types yielded from the region");
120         }
121       }
122       numYields++;
123     }
124   }
125 
126   // Insert PHI nodes in the continuation block for any values forwarded by the
127   // terminators in this region.
128   if (!continuationBlockPHITypes.empty())
129     assert(
130         continuationBlockPHIs &&
131         "expected continuation block PHIs if converted regions yield values");
132   if (continuationBlockPHIs) {
133     llvm::IRBuilderBase::InsertPointGuard guard(builder);
134     continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
135     builder.SetInsertPoint(&continuationBlock, continuationBlock.begin());
136     for (llvm::Type *ty : continuationBlockPHITypes)
137       continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
138   }
139 
140   // Convert blocks one by one in topological order to ensure
141   // defs are converted before uses.
142   SetVector<Block *> blocks =
143       LLVM::detail::getTopologicallySortedBlocks(region);
144   for (Block *bb : blocks) {
145     llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
146     // Retarget the branch of the entry block to the entry block of the
147     // converted region (regions are single-entry).
148     if (bb->isEntryBlock()) {
149       assert(sourceTerminator->getNumSuccessors() == 1 &&
150              "provided entry block has multiple successors");
151       assert(sourceTerminator->getSuccessor(0) == &continuationBlock &&
152              "ContinuationBlock is not the successor of the entry block");
153       sourceTerminator->setSuccessor(0, llvmBB);
154     }
155 
156     llvm::IRBuilderBase::InsertPointGuard guard(builder);
157     if (failed(
158             moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) {
159       bodyGenStatus = failure();
160       return;
161     }
162 
163     // Special handling for `omp.yield` and `omp.terminator` (we may have more
164     // than one): they return the control to the parent OpenMP dialect operation
165     // so replace them with the branch to the continuation block. We handle this
166     // here to avoid relying inter-function communication through the
167     // ModuleTranslation class to set up the correct insertion point. This is
168     // also consistent with MLIR's idiom of handling special region terminators
169     // in the same code that handles the region-owning operation.
170     Operation *terminator = bb->getTerminator();
171     if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
172       builder.CreateBr(&continuationBlock);
173 
174       for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
175         (*continuationBlockPHIs)[i]->addIncoming(
176             moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
177     }
178   }
179   // After all blocks have been traversed and values mapped, connect the PHI
180   // nodes to the results of preceding blocks.
181   LLVM::detail::connectPHINodes(region, moduleTranslation);
182 
183   // Remove the blocks and values defined in this region from the mapping since
184   // they are not visible outside of this region. This allows the same region to
185   // be converted several times, that is cloned, without clashes, and slightly
186   // speeds up the lookups.
187   moduleTranslation.forgetMapping(region);
188 }
189 
190 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
191 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
192   switch (kind) {
193   case omp::ClauseProcBindKind::close:
194     return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
195   case omp::ClauseProcBindKind::master:
196     return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
197   case omp::ClauseProcBindKind::primary:
198     return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
199   case omp::ClauseProcBindKind::spread:
200     return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
201   }
202   llvm_unreachable("Unknown ClauseProcBindKind kind");
203 }
204 
205 /// Converts the OpenMP parallel operation to LLVM IR.
206 static LogicalResult
207 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
208                    LLVM::ModuleTranslation &moduleTranslation) {
209   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
210   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
211   // relying on captured variables.
212   LogicalResult bodyGenStatus = success();
213 
214   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
215                        llvm::BasicBlock &continuationBlock) {
216     // Save the alloca insertion point on ModuleTranslation stack for use in
217     // nested regions.
218     LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
219         moduleTranslation, allocaIP);
220 
221     // ParallelOp has only one region associated with it.
222     convertOmpOpRegions(opInst.getRegion(), "omp.par.region",
223                         *codeGenIP.getBlock(), continuationBlock, builder,
224                         moduleTranslation, bodyGenStatus);
225   };
226 
227   // TODO: Perform appropriate actions according to the data-sharing
228   // attribute (shared, private, firstprivate, ...) of variables.
229   // Currently defaults to shared.
230   auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
231                     llvm::Value &, llvm::Value &vPtr,
232                     llvm::Value *&replacementValue) -> InsertPointTy {
233     replacementValue = &vPtr;
234 
235     return codeGenIP;
236   };
237 
238   // TODO: Perform finalization actions for variables. This has to be
239   // called for variables which have destructors/finalizers.
240   auto finiCB = [&](InsertPointTy codeGenIP) {};
241 
242   llvm::Value *ifCond = nullptr;
243   if (auto ifExprVar = opInst.if_expr_var())
244     ifCond = moduleTranslation.lookupValue(ifExprVar);
245   llvm::Value *numThreads = nullptr;
246   if (auto numThreadsVar = opInst.num_threads_var())
247     numThreads = moduleTranslation.lookupValue(numThreadsVar);
248   auto pbKind = llvm::omp::OMP_PROC_BIND_default;
249   if (auto bind = opInst.proc_bind_val())
250     pbKind = getProcBindKind(*bind);
251   // TODO: Is the Parallel construct cancellable?
252   bool isCancellable = false;
253 
254   // Ensure that the BasicBlock for the the parallel region is sparate from the
255   // function entry which we may need to insert allocas.
256   if (builder.GetInsertBlock() ==
257       &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
258     assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
259            "Assuming end of basic block");
260     llvm::BasicBlock *entryBB =
261         llvm::BasicBlock::Create(builder.getContext(), "parallel.entry",
262                                  builder.GetInsertBlock()->getParent(),
263                                  builder.GetInsertBlock()->getNextNode());
264     builder.CreateBr(entryBB);
265     builder.SetInsertPoint(entryBB);
266   }
267   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
268       builder.saveIP(), builder.getCurrentDebugLocation());
269   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel(
270       ompLoc, findAllocaInsertPoint(builder, moduleTranslation), bodyGenCB,
271       privCB, finiCB, ifCond, numThreads, pbKind, isCancellable));
272 
273   return bodyGenStatus;
274 }
275 
276 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
277 static LogicalResult
278 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
279                  LLVM::ModuleTranslation &moduleTranslation) {
280   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
281   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
282   // relying on captured variables.
283   LogicalResult bodyGenStatus = success();
284 
285   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
286                        llvm::BasicBlock &continuationBlock) {
287     // MasterOp has only one region associated with it.
288     auto &region = cast<omp::MasterOp>(opInst).getRegion();
289     convertOmpOpRegions(region, "omp.master.region", *codeGenIP.getBlock(),
290                         continuationBlock, builder, moduleTranslation,
291                         bodyGenStatus);
292   };
293 
294   // TODO: Perform finalization actions for variables. This has to be
295   // called for variables which have destructors/finalizers.
296   auto finiCB = [&](InsertPointTy codeGenIP) {};
297 
298   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
299       builder.saveIP(), builder.getCurrentDebugLocation());
300   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster(
301       ompLoc, bodyGenCB, finiCB));
302   return success();
303 }
304 
305 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
306 static LogicalResult
307 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
308                    LLVM::ModuleTranslation &moduleTranslation) {
309   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
310   auto criticalOp = cast<omp::CriticalOp>(opInst);
311   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
312   // relying on captured variables.
313   LogicalResult bodyGenStatus = success();
314 
315   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
316                        llvm::BasicBlock &continuationBlock) {
317     // CriticalOp has only one region associated with it.
318     auto &region = cast<omp::CriticalOp>(opInst).getRegion();
319     convertOmpOpRegions(region, "omp.critical.region", *codeGenIP.getBlock(),
320                         continuationBlock, builder, moduleTranslation,
321                         bodyGenStatus);
322   };
323 
324   // TODO: Perform finalization actions for variables. This has to be
325   // called for variables which have destructors/finalizers.
326   auto finiCB = [&](InsertPointTy codeGenIP) {};
327 
328   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
329       builder.saveIP(), builder.getCurrentDebugLocation());
330   llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
331   llvm::Constant *hint = nullptr;
332 
333   // If it has a name, it probably has a hint too.
334   if (criticalOp.nameAttr()) {
335     // The verifiers in OpenMP Dialect guarentee that all the pointers are
336     // non-null
337     auto symbolRef = criticalOp.nameAttr().cast<SymbolRefAttr>();
338     auto criticalDeclareOp =
339         SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
340                                                                      symbolRef);
341     hint = llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
342                                   static_cast<int>(criticalDeclareOp.hint()));
343   }
344   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical(
345       ompLoc, bodyGenCB, finiCB, criticalOp.name().getValueOr(""), hint));
346   return success();
347 }
348 
349 /// Returns a reduction declaration that corresponds to the given reduction
350 /// operation in the given container. Currently only supports reductions inside
351 /// WsLoopOp but can be easily extended.
352 static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container,
353                                                  omp::ReductionOp reduction) {
354   SymbolRefAttr reductionSymbol;
355   for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) {
356     if (container.reduction_vars()[i] != reduction.accumulator())
357       continue;
358     reductionSymbol = (*container.reductions())[i].cast<SymbolRefAttr>();
359     break;
360   }
361   assert(reductionSymbol &&
362          "reduction operation must be associated with a declaration");
363 
364   return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
365       container, reductionSymbol);
366 }
367 
368 /// Populates `reductions` with reduction declarations used in the given loop.
369 static void
370 collectReductionDecls(omp::WsLoopOp loop,
371                       SmallVectorImpl<omp::ReductionDeclareOp> &reductions) {
372   Optional<ArrayAttr> attr = loop.reductions();
373   if (!attr)
374     return;
375 
376   reductions.reserve(reductions.size() + loop.getNumReductionVars());
377   for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
378     reductions.push_back(
379         SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
380             loop, symbolRef));
381   }
382 }
383 
384 /// Translates the blocks contained in the given region and appends them to at
385 /// the current insertion point of `builder`. The operations of the entry block
386 /// are appended to the current insertion block, which is not expected to have a
387 /// terminator. If set, `continuationBlockArgs` is populated with translated
388 /// values that correspond to the values omp.yield'ed from the region.
389 static LogicalResult inlineConvertOmpRegions(
390     Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
391     LLVM::ModuleTranslation &moduleTranslation,
392     SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
393   if (region.empty())
394     return success();
395 
396   // Special case for single-block regions that don't create additional blocks:
397   // insert operations without creating additional blocks.
398   if (llvm::hasSingleElement(region)) {
399     moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
400     if (failed(moduleTranslation.convertBlock(
401             region.front(), /*ignoreArguments=*/true, builder)))
402       return failure();
403 
404     // The continuation arguments are simply the translated terminator operands.
405     if (continuationBlockArgs)
406       llvm::append_range(
407           *continuationBlockArgs,
408           moduleTranslation.lookupValues(region.front().back().getOperands()));
409 
410     // Drop the mapping that is no longer necessary so that the same region can
411     // be processed multiple times.
412     moduleTranslation.forgetMapping(region);
413     return success();
414   }
415 
416   // Create the continuation block manually instead of calling splitBlock
417   // because the current insertion block may not have a terminator.
418   llvm::BasicBlock *continuationBlock =
419       llvm::BasicBlock::Create(builder.getContext(), blockName + ".cont",
420                                builder.GetInsertBlock()->getParent(),
421                                builder.GetInsertBlock()->getNextNode());
422   builder.CreateBr(continuationBlock);
423 
424   LogicalResult bodyGenStatus = success();
425   SmallVector<llvm::PHINode *> phis;
426   convertOmpOpRegions(region, blockName, *builder.GetInsertBlock(),
427                       *continuationBlock, builder, moduleTranslation,
428                       bodyGenStatus, &phis);
429   if (failed(bodyGenStatus))
430     return failure();
431   if (continuationBlockArgs)
432     llvm::append_range(*continuationBlockArgs, phis);
433   builder.SetInsertPoint(continuationBlock,
434                          continuationBlock->getFirstInsertionPt());
435   return success();
436 }
437 
438 namespace {
439 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
440 /// store lambdas with capture.
441 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy(
442     llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
443     llvm::Value *&)>;
444 using OwningAtomicReductionGen =
445     std::function<llvm::OpenMPIRBuilder::InsertPointTy(
446         llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
447         llvm::Value *)>;
448 } // namespace
449 
450 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
451 /// reduction declaration. The generator uses `builder` but ignores its
452 /// insertion point.
453 static OwningReductionGen
454 makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder,
455                  LLVM::ModuleTranslation &moduleTranslation) {
456   // The lambda is mutable because we need access to non-const methods of decl
457   // (which aren't actually mutating it), and we must capture decl by-value to
458   // avoid the dangling reference after the parent function returns.
459   OwningReductionGen gen =
460       [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
461                 llvm::Value *lhs, llvm::Value *rhs,
462                 llvm::Value *&result) mutable {
463         Region &reductionRegion = decl.reductionRegion();
464         moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs);
465         moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs);
466         builder.restoreIP(insertPoint);
467         SmallVector<llvm::Value *> phis;
468         if (failed(inlineConvertOmpRegions(reductionRegion,
469                                            "omp.reduction.nonatomic.body",
470                                            builder, moduleTranslation, &phis)))
471           return llvm::OpenMPIRBuilder::InsertPointTy();
472         assert(phis.size() == 1);
473         result = phis[0];
474         return builder.saveIP();
475       };
476   return gen;
477 }
478 
479 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
480 /// given reduction declaration. The generator uses `builder` but ignores its
481 /// insertion point. Returns null if there is no atomic region available in the
482 /// reduction declaration.
483 static OwningAtomicReductionGen
484 makeAtomicReductionGen(omp::ReductionDeclareOp decl,
485                        llvm::IRBuilderBase &builder,
486                        LLVM::ModuleTranslation &moduleTranslation) {
487   if (decl.atomicReductionRegion().empty())
488     return OwningAtomicReductionGen();
489 
490   // The lambda is mutable because we need access to non-const methods of decl
491   // (which aren't actually mutating it), and we must capture decl by-value to
492   // avoid the dangling reference after the parent function returns.
493   OwningAtomicReductionGen atomicGen =
494       [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
495                 llvm::Value *lhs, llvm::Value *rhs) mutable {
496         Region &atomicRegion = decl.atomicReductionRegion();
497         moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs);
498         moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs);
499         builder.restoreIP(insertPoint);
500         SmallVector<llvm::Value *> phis;
501         if (failed(inlineConvertOmpRegions(atomicRegion,
502                                            "omp.reduction.atomic.body", builder,
503                                            moduleTranslation, &phis)))
504           return llvm::OpenMPIRBuilder::InsertPointTy();
505         assert(phis.empty());
506         return builder.saveIP();
507       };
508   return atomicGen;
509 }
510 
511 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
512 static LogicalResult
513 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
514                   LLVM::ModuleTranslation &moduleTranslation) {
515   auto orderedOp = cast<omp::OrderedOp>(opInst);
516 
517   omp::ClauseDepend dependType = *orderedOp.depend_type_val();
518   bool isDependSource = dependType == omp::ClauseDepend::dependsource;
519   unsigned numLoops = orderedOp.num_loops_val().getValue();
520   SmallVector<llvm::Value *> vecValues =
521       moduleTranslation.lookupValues(orderedOp.depend_vec_vars());
522 
523   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
524       builder.saveIP(), builder.getCurrentDebugLocation());
525   size_t indexVecValues = 0;
526   while (indexVecValues < vecValues.size()) {
527     SmallVector<llvm::Value *> storeValues;
528     storeValues.reserve(numLoops);
529     for (unsigned i = 0; i < numLoops; i++) {
530       storeValues.push_back(vecValues[indexVecValues]);
531       indexVecValues++;
532     }
533     builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
534         ompLoc, findAllocaInsertPoint(builder, moduleTranslation), numLoops,
535         storeValues, ".cnt.addr", isDependSource));
536   }
537   return success();
538 }
539 
540 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
541 /// OpenMPIRBuilder.
542 static LogicalResult
543 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
544                         LLVM::ModuleTranslation &moduleTranslation) {
545   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
546   auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
547 
548   // TODO: The code generation for ordered simd directive is not supported yet.
549   if (orderedRegionOp.simd())
550     return failure();
551 
552   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
553   // relying on captured variables.
554   LogicalResult bodyGenStatus = success();
555 
556   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
557                        llvm::BasicBlock &continuationBlock) {
558     // OrderedOp has only one region associated with it.
559     auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
560     convertOmpOpRegions(region, "omp.ordered.region", *codeGenIP.getBlock(),
561                         continuationBlock, builder, moduleTranslation,
562                         bodyGenStatus);
563   };
564 
565   // TODO: Perform finalization actions for variables. This has to be
566   // called for variables which have destructors/finalizers.
567   auto finiCB = [&](InsertPointTy codeGenIP) {};
568 
569   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
570       builder.saveIP(), builder.getCurrentDebugLocation());
571   builder.restoreIP(
572       moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
573           ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd()));
574   return bodyGenStatus;
575 }
576 
577 static LogicalResult
578 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
579                    LLVM::ModuleTranslation &moduleTranslation) {
580   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
581   using StorableBodyGenCallbackTy =
582       llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
583 
584   auto sectionsOp = cast<omp::SectionsOp>(opInst);
585 
586   // TODO: Support the following clauses: private, firstprivate, lastprivate,
587   // reduction, allocate
588   if (!sectionsOp.private_vars().empty() ||
589       !sectionsOp.firstprivate_vars().empty() ||
590       !sectionsOp.lastprivate_vars().empty() ||
591       !sectionsOp.reduction_vars().empty() || sectionsOp.reductions() ||
592       !sectionsOp.allocate_vars().empty() ||
593       !sectionsOp.allocators_vars().empty())
594     return emitError(sectionsOp.getLoc())
595            << "private, firstprivate, lastprivate, reduction and allocate "
596               "clauses are not supported for sections construct";
597 
598   LogicalResult bodyGenStatus = success();
599   SmallVector<StorableBodyGenCallbackTy> sectionCBs;
600 
601   for (Operation &op : *sectionsOp.region().begin()) {
602     auto sectionOp = dyn_cast<omp::SectionOp>(op);
603     if (!sectionOp) // omp.terminator
604       continue;
605 
606     Region &region = sectionOp.region();
607     auto sectionCB = [&region, &builder, &moduleTranslation, &bodyGenStatus](
608                          InsertPointTy allocaIP, InsertPointTy codeGenIP,
609                          llvm::BasicBlock &finiBB) {
610       builder.restoreIP(codeGenIP);
611       builder.CreateBr(&finiBB);
612       convertOmpOpRegions(region, "omp.section.region", *codeGenIP.getBlock(),
613                           finiBB, builder, moduleTranslation, bodyGenStatus);
614     };
615     sectionCBs.push_back(sectionCB);
616   }
617 
618   // No sections within omp.sections operation - skip generation. This situation
619   // is only possible if there is only a terminator operation inside the
620   // sections operation
621   if (sectionCBs.empty())
622     return success();
623 
624   assert(isa<omp::SectionOp>(*sectionsOp.region().op_begin()));
625 
626   // TODO: Perform appropriate actions according to the data-sharing
627   // attribute (shared, private, firstprivate, ...) of variables.
628   // Currently defaults to shared.
629   auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
630                     llvm::Value &vPtr,
631                     llvm::Value *&replacementValue) -> InsertPointTy {
632     replacementValue = &vPtr;
633     return codeGenIP;
634   };
635 
636   // TODO: Perform finalization actions for variables. This has to be
637   // called for variables which have destructors/finalizers.
638   auto finiCB = [&](InsertPointTy codeGenIP) {};
639 
640   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
641       builder.saveIP(), builder.getCurrentDebugLocation());
642   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections(
643       ompLoc, findAllocaInsertPoint(builder, moduleTranslation), sectionCBs,
644       privCB, finiCB, false, sectionsOp.nowait()));
645   return bodyGenStatus;
646 }
647 
648 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
649 static LogicalResult
650 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
651                  LLVM::ModuleTranslation &moduleTranslation) {
652   auto loop = cast<omp::WsLoopOp>(opInst);
653   // TODO: this should be in the op verifier instead.
654   if (loop.lowerBound().empty())
655     return failure();
656 
657   // Static is the default.
658   auto schedule =
659       loop.schedule_val().getValueOr(omp::ClauseScheduleKind::Static);
660 
661   // Find the loop configuration.
662   llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]);
663   llvm::Type *ivType = step->getType();
664   llvm::Value *chunk = nullptr;
665   if (loop.schedule_chunk_var()) {
666     llvm::Value *chunkVar =
667         moduleTranslation.lookupValue(loop.schedule_chunk_var());
668     llvm::Type *chunkVarType = chunkVar->getType();
669     assert(chunkVarType->isIntegerTy() &&
670            "chunk size must be one integer expression");
671     if (chunkVarType->getIntegerBitWidth() < ivType->getIntegerBitWidth())
672       chunk = builder.CreateSExt(chunkVar, ivType);
673     else if (chunkVarType->getIntegerBitWidth() > ivType->getIntegerBitWidth())
674       chunk = builder.CreateTrunc(chunkVar, ivType);
675     else
676       chunk = chunkVar;
677   }
678 
679   SmallVector<omp::ReductionDeclareOp> reductionDecls;
680   collectReductionDecls(loop, reductionDecls);
681   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
682       findAllocaInsertPoint(builder, moduleTranslation);
683 
684   // Allocate space for privatized reduction variables.
685   SmallVector<llvm::Value *> privateReductionVariables;
686   DenseMap<Value, llvm::Value *> reductionVariableMap;
687   unsigned numReductions = loop.getNumReductionVars();
688   privateReductionVariables.reserve(numReductions);
689   if (numReductions != 0) {
690     llvm::IRBuilderBase::InsertPointGuard guard(builder);
691     builder.restoreIP(allocaIP);
692     for (unsigned i = 0; i < numReductions; ++i) {
693       auto reductionType =
694           loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>();
695       llvm::Value *var = builder.CreateAlloca(
696           moduleTranslation.convertType(reductionType.getElementType()));
697       privateReductionVariables.push_back(var);
698       reductionVariableMap.try_emplace(loop.reduction_vars()[i], var);
699     }
700   }
701 
702   // Store the mapping between reduction variables and their private copies on
703   // ModuleTranslation stack. It can be then recovered when translating
704   // omp.reduce operations in a separate call.
705   LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
706       moduleTranslation, reductionVariableMap);
707 
708   // Before the loop, store the initial values of reductions into reduction
709   // variables. Although this could be done after allocas, we don't want to mess
710   // up with the alloca insertion point.
711   for (unsigned i = 0; i < numReductions; ++i) {
712     SmallVector<llvm::Value *> phis;
713     if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(),
714                                        "omp.reduction.neutral", builder,
715                                        moduleTranslation, &phis)))
716       return failure();
717     assert(phis.size() == 1 && "expected one value to be yielded from the "
718                                "reduction neutral element declaration region");
719     builder.CreateStore(phis[0], privateReductionVariables[i]);
720   }
721 
722   // Set up the source location value for OpenMP runtime.
723   llvm::DISubprogram *subprogram =
724       builder.GetInsertBlock()->getParent()->getSubprogram();
725   const llvm::DILocation *diLoc =
726       moduleTranslation.translateLoc(opInst.getLoc(), subprogram);
727   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(),
728                                                     llvm::DebugLoc(diLoc));
729 
730   // Generator of the canonical loop body.
731   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
732   // relying on captured variables.
733   SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
734   SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
735   LogicalResult bodyGenStatus = success();
736   auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
737     // Make sure further conversions know about the induction variable.
738     moduleTranslation.mapValue(
739         loop.getRegion().front().getArgument(loopInfos.size()), iv);
740 
741     // Capture the body insertion point for use in nested loops. BodyIP of the
742     // CanonicalLoopInfo always points to the beginning of the entry block of
743     // the body.
744     bodyInsertPoints.push_back(ip);
745 
746     if (loopInfos.size() != loop.getNumLoops() - 1)
747       return;
748 
749     // Convert the body of the loop.
750     llvm::BasicBlock *entryBlock = ip.getBlock();
751     llvm::BasicBlock *exitBlock =
752         entryBlock->splitBasicBlock(ip.getPoint(), "omp.wsloop.exit");
753     convertOmpOpRegions(loop.region(), "omp.wsloop.region", *entryBlock,
754                         *exitBlock, builder, moduleTranslation, bodyGenStatus);
755   };
756 
757   // Delegate actual loop construction to the OpenMP IRBuilder.
758   // TODO: this currently assumes WsLoop is semantically similar to SCF loop,
759   // i.e. it has a positive step, uses signed integer semantics. Reconsider
760   // this code when WsLoop clearly supports more cases.
761   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
762   for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
763     llvm::Value *lowerBound =
764         moduleTranslation.lookupValue(loop.lowerBound()[i]);
765     llvm::Value *upperBound =
766         moduleTranslation.lookupValue(loop.upperBound()[i]);
767     llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]);
768 
769     // Make sure loop trip count are emitted in the preheader of the outermost
770     // loop at the latest so that they are all available for the new collapsed
771     // loop will be created below.
772     llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
773     llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
774     if (i != 0) {
775       loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
776                                                        llvm::DebugLoc(diLoc));
777       computeIP = loopInfos.front()->getPreheaderIP();
778     }
779     loopInfos.push_back(ompBuilder->createCanonicalLoop(
780         loc, bodyGen, lowerBound, upperBound, step,
781         /*IsSigned=*/true, loop.inclusive(), computeIP));
782 
783     if (failed(bodyGenStatus))
784       return failure();
785   }
786 
787   // Collapse loops. Store the insertion point because LoopInfos may get
788   // invalidated.
789   llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
790   llvm::CanonicalLoopInfo *loopInfo =
791       ompBuilder->collapseLoops(diLoc, loopInfos, {});
792 
793   allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
794 
795   bool isSimd = loop.simd_modifier();
796 
797   if (schedule == omp::ClauseScheduleKind::Static) {
798     ompBuilder->applyStaticWorkshareLoop(ompLoc.DL, loopInfo, allocaIP,
799                                          !loop.nowait(), chunk);
800   } else {
801     llvm::omp::OMPScheduleType schedType;
802     switch (schedule) {
803     case omp::ClauseScheduleKind::Dynamic:
804       schedType = llvm::omp::OMPScheduleType::DynamicChunked;
805       break;
806     case omp::ClauseScheduleKind::Guided:
807       if (isSimd)
808         schedType = llvm::omp::OMPScheduleType::GuidedSimd;
809       else
810         schedType = llvm::omp::OMPScheduleType::GuidedChunked;
811       break;
812     case omp::ClauseScheduleKind::Auto:
813       schedType = llvm::omp::OMPScheduleType::Auto;
814       break;
815     case omp::ClauseScheduleKind::Runtime:
816       if (isSimd)
817         schedType = llvm::omp::OMPScheduleType::RuntimeSimd;
818       else
819         schedType = llvm::omp::OMPScheduleType::Runtime;
820       break;
821     default:
822       llvm_unreachable("Unknown schedule value");
823       break;
824     }
825 
826     if (Optional<omp::ScheduleModifier> modifier = loop.schedule_modifier()) {
827       switch (*modifier) {
828       case omp::ScheduleModifier::monotonic:
829         schedType |= llvm::omp::OMPScheduleType::ModifierMonotonic;
830         break;
831       case omp::ScheduleModifier::nonmonotonic:
832         schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic;
833         break;
834       default:
835         // Nothing to do here.
836         break;
837       }
838     }
839     afterIP = ompBuilder->applyDynamicWorkshareLoop(
840         ompLoc.DL, loopInfo, allocaIP, schedType, !loop.nowait(), chunk);
841   }
842 
843   // Continue building IR after the loop. Note that the LoopInfo returned by
844   // `collapseLoops` points inside the outermost loop and is intended for
845   // potential further loop transformations. Use the insertion point stored
846   // before collapsing loops instead.
847   builder.restoreIP(afterIP);
848 
849   // Process the reductions if required.
850   if (numReductions == 0)
851     return success();
852 
853   // Create the reduction generators. We need to own them here because
854   // ReductionInfo only accepts references to the generators.
855   SmallVector<OwningReductionGen> owningReductionGens;
856   SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
857   for (unsigned i = 0; i < numReductions; ++i) {
858     owningReductionGens.push_back(
859         makeReductionGen(reductionDecls[i], builder, moduleTranslation));
860     owningAtomicReductionGens.push_back(
861         makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
862   }
863 
864   // Collect the reduction information.
865   SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
866   reductionInfos.reserve(numReductions);
867   for (unsigned i = 0; i < numReductions; ++i) {
868     llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
869     if (owningAtomicReductionGens[i])
870       atomicGen = owningAtomicReductionGens[i];
871     llvm::Value *variable =
872         moduleTranslation.lookupValue(loop.reduction_vars()[i]);
873     reductionInfos.push_back({variable->getType()->getPointerElementType(),
874                               variable, privateReductionVariables[i],
875                               owningReductionGens[i], atomicGen});
876   }
877 
878   // The call to createReductions below expects the block to have a
879   // terminator. Create an unreachable instruction to serve as terminator
880   // and remove it later.
881   llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
882   builder.SetInsertPoint(tempTerminator);
883   llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
884       ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
885                                    loop.nowait());
886   if (!contInsertPoint.getBlock())
887     return loop->emitOpError() << "failed to convert reductions";
888   auto nextInsertionPoint =
889       ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
890   tempTerminator->eraseFromParent();
891   builder.restoreIP(nextInsertionPoint);
892 
893   return success();
894 }
895 
896 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
897 llvm::AtomicOrdering
898 convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) {
899   if (!ao)
900     return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
901 
902   switch (*ao) {
903   case omp::ClauseMemoryOrderKind::seq_cst:
904     return llvm::AtomicOrdering::SequentiallyConsistent;
905   case omp::ClauseMemoryOrderKind::acq_rel:
906     return llvm::AtomicOrdering::AcquireRelease;
907   case omp::ClauseMemoryOrderKind::acquire:
908     return llvm::AtomicOrdering::Acquire;
909   case omp::ClauseMemoryOrderKind::release:
910     return llvm::AtomicOrdering::Release;
911   case omp::ClauseMemoryOrderKind::relaxed:
912     return llvm::AtomicOrdering::Monotonic;
913   }
914   llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
915 }
916 
917 /// Convert omp.atomic.read operation to LLVM IR.
918 static LogicalResult
919 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
920                      LLVM::ModuleTranslation &moduleTranslation) {
921 
922   auto readOp = cast<omp::AtomicReadOp>(opInst);
923   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
924 
925   // Set up the source location value for OpenMP runtime.
926   llvm::DISubprogram *subprogram =
927       builder.GetInsertBlock()->getParent()->getSubprogram();
928   const llvm::DILocation *diLoc =
929       moduleTranslation.translateLoc(opInst.getLoc(), subprogram);
930   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(),
931                                                     llvm::DebugLoc(diLoc));
932   llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.memory_order());
933   llvm::Value *x = moduleTranslation.lookupValue(readOp.x());
934   Type xTy = readOp.x().getType().cast<omp::PointerLikeType>().getElementType();
935   llvm::Value *v = moduleTranslation.lookupValue(readOp.v());
936   Type vTy = readOp.v().getType().cast<omp::PointerLikeType>().getElementType();
937   llvm::OpenMPIRBuilder::AtomicOpValue V = {
938       v, moduleTranslation.convertType(vTy), false, false};
939   llvm::OpenMPIRBuilder::AtomicOpValue X = {
940       x, moduleTranslation.convertType(xTy), false, false};
941   builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
942   return success();
943 }
944 
945 /// Converts an omp.atomic.write operation to LLVM IR.
946 static LogicalResult
947 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
948                       LLVM::ModuleTranslation &moduleTranslation) {
949   auto writeOp = cast<omp::AtomicWriteOp>(opInst);
950   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
951 
952   // Set up the source location value for OpenMP runtime.
953   llvm::DISubprogram *subprogram =
954       builder.GetInsertBlock()->getParent()->getSubprogram();
955   const llvm::DILocation *diLoc =
956       moduleTranslation.translateLoc(opInst.getLoc(), subprogram);
957   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(),
958                                                     llvm::DebugLoc(diLoc));
959   llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.memory_order());
960   llvm::Value *expr = moduleTranslation.lookupValue(writeOp.value());
961   llvm::Value *dest = moduleTranslation.lookupValue(writeOp.address());
962   llvm::Type *ty = moduleTranslation.convertType(writeOp.value().getType());
963   llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
964                                             /*isVolatile=*/false};
965   builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
966   return success();
967 }
968 
969 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
970 /// mapping between reduction variables and their private equivalents to have
971 /// been stored on the ModuleTranslation stack. Currently only supports
972 /// reduction within WsLoopOp, but can be easily extended.
973 static LogicalResult
974 convertOmpReductionOp(omp::ReductionOp reductionOp,
975                       llvm::IRBuilderBase &builder,
976                       LLVM::ModuleTranslation &moduleTranslation) {
977   // Find the declaration that corresponds to the reduction op.
978   auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>();
979   omp::ReductionDeclareOp declaration =
980       findReductionDecl(reductionContainer, reductionOp);
981   assert(declaration && "could not find reduction declaration");
982 
983   // Retrieve the mapping between reduction variables and their private
984   // equivalents.
985   const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr;
986   moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>(
987       [&](const OpenMPVarMappingStackFrame &frame) {
988         reductionVariableMap = &frame.mapping;
989         return WalkResult::interrupt();
990       });
991   assert(reductionVariableMap && "couldn't find private reduction variables");
992 
993   // Translate the reduction operation by emitting the body of the corresponding
994   // reduction declaration.
995   Region &reductionRegion = declaration.reductionRegion();
996   llvm::Value *privateReductionVar =
997       reductionVariableMap->lookup(reductionOp.accumulator());
998   llvm::Value *reductionVal = builder.CreateLoad(
999       moduleTranslation.convertType(reductionOp.operand().getType()),
1000       privateReductionVar);
1001 
1002   moduleTranslation.mapValue(reductionRegion.front().getArgument(0),
1003                              reductionVal);
1004   moduleTranslation.mapValue(
1005       reductionRegion.front().getArgument(1),
1006       moduleTranslation.lookupValue(reductionOp.operand()));
1007 
1008   SmallVector<llvm::Value *> phis;
1009   if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body",
1010                                      builder, moduleTranslation, &phis)))
1011     return failure();
1012   assert(phis.size() == 1 && "expected one value to be yielded from "
1013                              "the reduction body declaration region");
1014   builder.CreateStore(phis[0], privateReductionVar);
1015   return success();
1016 }
1017 
1018 namespace {
1019 
1020 /// Implementation of the dialect interface that converts operations belonging
1021 /// to the OpenMP dialect to LLVM IR.
1022 class OpenMPDialectLLVMIRTranslationInterface
1023     : public LLVMTranslationDialectInterface {
1024 public:
1025   using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
1026 
1027   /// Translates the given operation to LLVM IR using the provided IR builder
1028   /// and saving the state in `moduleTranslation`.
1029   LogicalResult
1030   convertOperation(Operation *op, llvm::IRBuilderBase &builder,
1031                    LLVM::ModuleTranslation &moduleTranslation) const final;
1032 };
1033 
1034 } // namespace
1035 
1036 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
1037 /// (including OpenMP runtime calls).
1038 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
1039     Operation *op, llvm::IRBuilderBase &builder,
1040     LLVM::ModuleTranslation &moduleTranslation) const {
1041 
1042   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1043 
1044   return llvm::TypeSwitch<Operation *, LogicalResult>(op)
1045       .Case([&](omp::BarrierOp) {
1046         ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
1047         return success();
1048       })
1049       .Case([&](omp::TaskwaitOp) {
1050         ompBuilder->createTaskwait(builder.saveIP());
1051         return success();
1052       })
1053       .Case([&](omp::TaskyieldOp) {
1054         ompBuilder->createTaskyield(builder.saveIP());
1055         return success();
1056       })
1057       .Case([&](omp::FlushOp) {
1058         // No support in Openmp runtime function (__kmpc_flush) to accept
1059         // the argument list.
1060         // OpenMP standard states the following:
1061         //  "An implementation may implement a flush with a list by ignoring
1062         //   the list, and treating it the same as a flush without a list."
1063         //
1064         // The argument list is discarded so that, flush with a list is treated
1065         // same as a flush without a list.
1066         ompBuilder->createFlush(builder.saveIP());
1067         return success();
1068       })
1069       .Case([&](omp::ParallelOp op) {
1070         return convertOmpParallel(op, builder, moduleTranslation);
1071       })
1072       .Case([&](omp::ReductionOp reductionOp) {
1073         return convertOmpReductionOp(reductionOp, builder, moduleTranslation);
1074       })
1075       .Case([&](omp::MasterOp) {
1076         return convertOmpMaster(*op, builder, moduleTranslation);
1077       })
1078       .Case([&](omp::CriticalOp) {
1079         return convertOmpCritical(*op, builder, moduleTranslation);
1080       })
1081       .Case([&](omp::OrderedRegionOp) {
1082         return convertOmpOrderedRegion(*op, builder, moduleTranslation);
1083       })
1084       .Case([&](omp::OrderedOp) {
1085         return convertOmpOrdered(*op, builder, moduleTranslation);
1086       })
1087       .Case([&](omp::WsLoopOp) {
1088         return convertOmpWsLoop(*op, builder, moduleTranslation);
1089       })
1090       .Case([&](omp::AtomicReadOp) {
1091         return convertOmpAtomicRead(*op, builder, moduleTranslation);
1092       })
1093       .Case([&](omp::AtomicWriteOp) {
1094         return convertOmpAtomicWrite(*op, builder, moduleTranslation);
1095       })
1096       .Case([&](omp::SectionsOp) {
1097         return convertOmpSections(*op, builder, moduleTranslation);
1098       })
1099       .Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp,
1100             omp::CriticalDeclareOp>([](auto op) {
1101         // `yield` and `terminator` can be just omitted. The block structure
1102         // was created in the region that handles their parent operation.
1103         // `reduction.declare` will be used by reductions and is not
1104         // converted directly, skip it.
1105         // `critical.declare` is only used to declare names of critical
1106         // sections which will be used by `critical` ops and hence can be
1107         // ignored for lowering. The OpenMP IRBuilder will create unique
1108         // name for critical section names.
1109         return success();
1110       })
1111       .Default([&](Operation *inst) {
1112         return inst->emitError("unsupported OpenMP operation: ")
1113                << inst->getName();
1114       });
1115 }
1116 
1117 void mlir::registerOpenMPDialectTranslation(DialectRegistry &registry) {
1118   registry.insert<omp::OpenMPDialect>();
1119   registry.addDialectInterface<omp::OpenMPDialect,
1120                                OpenMPDialectLLVMIRTranslationInterface>();
1121 }
1122 
1123 void mlir::registerOpenMPDialectTranslation(MLIRContext &context) {
1124   DialectRegistry registry;
1125   registerOpenMPDialectTranslation(registry);
1126   context.appendDialectRegistry(registry);
1127 }
1128