1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
14 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
15 #include "mlir/IR/BlockAndValueMapping.h"
16 #include "mlir/IR/Operation.h"
17 #include "mlir/Support/LLVM.h"
18 #include "mlir/Target/LLVMIR/ModuleTranslation.h"
19 
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/TypeSwitch.h"
22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
23 #include "llvm/IR/IRBuilder.h"
24 
25 using namespace mlir;
26 
27 namespace {
28 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
29 /// insertion points for allocas.
30 class OpenMPAllocaStackFrame
31     : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
32 public:
33   explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
34       : allocaInsertPoint(allocaIP) {}
35   llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
36 };
37 
38 /// ModuleTranslation stack frame containing the partial mapping between MLIR
39 /// values and their LLVM IR equivalents.
40 class OpenMPVarMappingStackFrame
41     : public LLVM::ModuleTranslation::StackFrameBase<
42           OpenMPVarMappingStackFrame> {
43 public:
44   explicit OpenMPVarMappingStackFrame(
45       const DenseMap<Value, llvm::Value *> &mapping)
46       : mapping(mapping) {}
47 
48   DenseMap<Value, llvm::Value *> mapping;
49 };
50 } // namespace
51 
52 /// Find the insertion point for allocas given the current insertion point for
53 /// normal operations in the builder.
54 static llvm::OpenMPIRBuilder::InsertPointTy
55 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
56                       const LLVM::ModuleTranslation &moduleTranslation) {
57   // If there is an alloca insertion point on stack, i.e. we are in a nested
58   // operation and a specific point was provided by some surrounding operation,
59   // use it.
60   llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
61   WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
62       [&](const OpenMPAllocaStackFrame &frame) {
63         allocaInsertPoint = frame.allocaInsertPoint;
64         return WalkResult::interrupt();
65       });
66   if (walkResult.wasInterrupted())
67     return allocaInsertPoint;
68 
69   // Otherwise, insert to the entry block of the surrounding function.
70   llvm::BasicBlock &funcEntryBlock =
71       builder.GetInsertBlock()->getParent()->getEntryBlock();
72   return llvm::OpenMPIRBuilder::InsertPointTy(
73       &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
74 }
75 
76 /// Converts the given region that appears within an OpenMP dialect operation to
77 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
78 /// region, and a branch from any block with an successor-less OpenMP terminator
79 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
80 /// of the continuation block if provided.
81 static void convertOmpOpRegions(
82     Region &region, StringRef blockName, llvm::BasicBlock &sourceBlock,
83     llvm::BasicBlock &continuationBlock, llvm::IRBuilderBase &builder,
84     LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus,
85     SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
86   llvm::LLVMContext &llvmContext = builder.getContext();
87   for (Block &bb : region) {
88     llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
89         llvmContext, blockName, builder.GetInsertBlock()->getParent(),
90         builder.GetInsertBlock()->getNextNode());
91     moduleTranslation.mapBlock(&bb, llvmBB);
92   }
93 
94   llvm::Instruction *sourceTerminator = sourceBlock.getTerminator();
95 
96   // Terminators (namely YieldOp) may be forwarding values to the region that
97   // need to be available in the continuation block. Collect the types of these
98   // operands in preparation of creating PHI nodes.
99   SmallVector<llvm::Type *> continuationBlockPHITypes;
100   bool operandsProcessed = false;
101   unsigned numYields = 0;
102   for (Block &bb : region.getBlocks()) {
103     if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
104       if (!operandsProcessed) {
105         for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
106           continuationBlockPHITypes.push_back(
107               moduleTranslation.convertType(yield->getOperand(i).getType()));
108         }
109         operandsProcessed = true;
110       } else {
111         assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
112                "mismatching number of values yielded from the region");
113         for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
114           llvm::Type *operandType =
115               moduleTranslation.convertType(yield->getOperand(i).getType());
116           (void)operandType;
117           assert(continuationBlockPHITypes[i] == operandType &&
118                  "values of mismatching types yielded from the region");
119         }
120       }
121       numYields++;
122     }
123   }
124 
125   // Insert PHI nodes in the continuation block for any values forwarded by the
126   // terminators in this region.
127   if (!continuationBlockPHITypes.empty())
128     assert(
129         continuationBlockPHIs &&
130         "expected continuation block PHIs if converted regions yield values");
131   if (continuationBlockPHIs) {
132     llvm::IRBuilderBase::InsertPointGuard guard(builder);
133     continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
134     builder.SetInsertPoint(&continuationBlock, continuationBlock.begin());
135     for (llvm::Type *ty : continuationBlockPHITypes)
136       continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
137   }
138 
139   // Convert blocks one by one in topological order to ensure
140   // defs are converted before uses.
141   SetVector<Block *> blocks =
142       LLVM::detail::getTopologicallySortedBlocks(region);
143   for (Block *bb : blocks) {
144     llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
145     // Retarget the branch of the entry block to the entry block of the
146     // converted region (regions are single-entry).
147     if (bb->isEntryBlock()) {
148       assert(sourceTerminator->getNumSuccessors() == 1 &&
149              "provided entry block has multiple successors");
150       assert(sourceTerminator->getSuccessor(0) == &continuationBlock &&
151              "ContinuationBlock is not the successor of the entry block");
152       sourceTerminator->setSuccessor(0, llvmBB);
153     }
154 
155     llvm::IRBuilderBase::InsertPointGuard guard(builder);
156     if (failed(
157             moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) {
158       bodyGenStatus = failure();
159       return;
160     }
161 
162     // Special handling for `omp.yield` and `omp.terminator` (we may have more
163     // than one): they return the control to the parent OpenMP dialect operation
164     // so replace them with the branch to the continuation block. We handle this
165     // here to avoid relying inter-function communication through the
166     // ModuleTranslation class to set up the correct insertion point. This is
167     // also consistent with MLIR's idiom of handling special region terminators
168     // in the same code that handles the region-owning operation.
169     Operation *terminator = bb->getTerminator();
170     if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
171       builder.CreateBr(&continuationBlock);
172 
173       for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
174         (*continuationBlockPHIs)[i]->addIncoming(
175             moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
176     }
177   }
178   // After all blocks have been traversed and values mapped, connect the PHI
179   // nodes to the results of preceding blocks.
180   LLVM::detail::connectPHINodes(region, moduleTranslation);
181 
182   // Remove the blocks and values defined in this region from the mapping since
183   // they are not visible outside of this region. This allows the same region to
184   // be converted several times, that is cloned, without clashes, and slightly
185   // speeds up the lookups.
186   moduleTranslation.forgetMapping(region);
187 }
188 
189 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
190 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
191   switch (kind) {
192   case omp::ClauseProcBindKind::close:
193     return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
194   case omp::ClauseProcBindKind::master:
195     return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
196   case omp::ClauseProcBindKind::primary:
197     return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
198   case omp::ClauseProcBindKind::spread:
199     return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
200   }
201 }
202 
203 /// Converts the OpenMP parallel operation to LLVM IR.
204 static LogicalResult
205 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
206                    LLVM::ModuleTranslation &moduleTranslation) {
207   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
208   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
209   // relying on captured variables.
210   LogicalResult bodyGenStatus = success();
211 
212   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
213                        llvm::BasicBlock &continuationBlock) {
214     // Save the alloca insertion point on ModuleTranslation stack for use in
215     // nested regions.
216     LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
217         moduleTranslation, allocaIP);
218 
219     // ParallelOp has only one region associated with it.
220     convertOmpOpRegions(opInst.getRegion(), "omp.par.region",
221                         *codeGenIP.getBlock(), continuationBlock, builder,
222                         moduleTranslation, bodyGenStatus);
223   };
224 
225   // TODO: Perform appropriate actions according to the data-sharing
226   // attribute (shared, private, firstprivate, ...) of variables.
227   // Currently defaults to shared.
228   auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
229                     llvm::Value &, llvm::Value &vPtr,
230                     llvm::Value *&replacementValue) -> InsertPointTy {
231     replacementValue = &vPtr;
232 
233     return codeGenIP;
234   };
235 
236   // TODO: Perform finalization actions for variables. This has to be
237   // called for variables which have destructors/finalizers.
238   auto finiCB = [&](InsertPointTy codeGenIP) {};
239 
240   llvm::Value *ifCond = nullptr;
241   if (auto ifExprVar = opInst.if_expr_var())
242     ifCond = moduleTranslation.lookupValue(ifExprVar);
243   llvm::Value *numThreads = nullptr;
244   if (auto numThreadsVar = opInst.num_threads_var())
245     numThreads = moduleTranslation.lookupValue(numThreadsVar);
246   auto pbKind = llvm::omp::OMP_PROC_BIND_default;
247   if (auto bind = opInst.proc_bind_val())
248     pbKind = getProcBindKind(*bind);
249   // TODO: Is the Parallel construct cancellable?
250   bool isCancellable = false;
251 
252   // Ensure that the BasicBlock for the the parallel region is sparate from the
253   // function entry which we may need to insert allocas.
254   if (builder.GetInsertBlock() ==
255       &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
256     assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
257            "Assuming end of basic block");
258     llvm::BasicBlock *entryBB =
259         llvm::BasicBlock::Create(builder.getContext(), "parallel.entry",
260                                  builder.GetInsertBlock()->getParent(),
261                                  builder.GetInsertBlock()->getNextNode());
262     builder.CreateBr(entryBB);
263     builder.SetInsertPoint(entryBB);
264   }
265   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
266       builder.saveIP(), builder.getCurrentDebugLocation());
267   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel(
268       ompLoc, findAllocaInsertPoint(builder, moduleTranslation), bodyGenCB,
269       privCB, finiCB, ifCond, numThreads, pbKind, isCancellable));
270 
271   return bodyGenStatus;
272 }
273 
274 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
275 static LogicalResult
276 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
277                  LLVM::ModuleTranslation &moduleTranslation) {
278   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
279   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
280   // relying on captured variables.
281   LogicalResult bodyGenStatus = success();
282 
283   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
284                        llvm::BasicBlock &continuationBlock) {
285     // MasterOp has only one region associated with it.
286     auto &region = cast<omp::MasterOp>(opInst).getRegion();
287     convertOmpOpRegions(region, "omp.master.region", *codeGenIP.getBlock(),
288                         continuationBlock, builder, moduleTranslation,
289                         bodyGenStatus);
290   };
291 
292   // TODO: Perform finalization actions for variables. This has to be
293   // called for variables which have destructors/finalizers.
294   auto finiCB = [&](InsertPointTy codeGenIP) {};
295 
296   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
297       builder.saveIP(), builder.getCurrentDebugLocation());
298   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster(
299       ompLoc, bodyGenCB, finiCB));
300   return success();
301 }
302 
303 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
304 static LogicalResult
305 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
306                    LLVM::ModuleTranslation &moduleTranslation) {
307   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
308   auto criticalOp = cast<omp::CriticalOp>(opInst);
309   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
310   // relying on captured variables.
311   LogicalResult bodyGenStatus = success();
312 
313   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
314                        llvm::BasicBlock &continuationBlock) {
315     // CriticalOp has only one region associated with it.
316     auto &region = cast<omp::CriticalOp>(opInst).getRegion();
317     convertOmpOpRegions(region, "omp.critical.region", *codeGenIP.getBlock(),
318                         continuationBlock, builder, moduleTranslation,
319                         bodyGenStatus);
320   };
321 
322   // TODO: Perform finalization actions for variables. This has to be
323   // called for variables which have destructors/finalizers.
324   auto finiCB = [&](InsertPointTy codeGenIP) {};
325 
326   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
327       builder.saveIP(), builder.getCurrentDebugLocation());
328   llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
329   llvm::Constant *hint = nullptr;
330 
331   // If it has a name, it probably has a hint too.
332   if (criticalOp.nameAttr()) {
333     // The verifiers in OpenMP Dialect guarentee that all the pointers are
334     // non-null
335     auto symbolRef = criticalOp.nameAttr().cast<SymbolRefAttr>();
336     auto criticalDeclareOp =
337         SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
338                                                                      symbolRef);
339     hint = llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
340                                   static_cast<int>(criticalDeclareOp.hint()));
341   }
342   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical(
343       ompLoc, bodyGenCB, finiCB, criticalOp.name().getValueOr(""), hint));
344   return success();
345 }
346 
347 /// Returns a reduction declaration that corresponds to the given reduction
348 /// operation in the given container. Currently only supports reductions inside
349 /// WsLoopOp but can be easily extended.
350 static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container,
351                                                  omp::ReductionOp reduction) {
352   SymbolRefAttr reductionSymbol;
353   for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) {
354     if (container.reduction_vars()[i] != reduction.accumulator())
355       continue;
356     reductionSymbol = (*container.reductions())[i].cast<SymbolRefAttr>();
357     break;
358   }
359   assert(reductionSymbol &&
360          "reduction operation must be associated with a declaration");
361 
362   return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
363       container, reductionSymbol);
364 }
365 
366 /// Populates `reductions` with reduction declarations used in the given loop.
367 static void
368 collectReductionDecls(omp::WsLoopOp loop,
369                       SmallVectorImpl<omp::ReductionDeclareOp> &reductions) {
370   Optional<ArrayAttr> attr = loop.reductions();
371   if (!attr)
372     return;
373 
374   reductions.reserve(reductions.size() + loop.getNumReductionVars());
375   for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
376     reductions.push_back(
377         SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
378             loop, symbolRef));
379   }
380 }
381 
382 /// Translates the blocks contained in the given region and appends them to at
383 /// the current insertion point of `builder`. The operations of the entry block
384 /// are appended to the current insertion block, which is not expected to have a
385 /// terminator. If set, `continuationBlockArgs` is populated with translated
386 /// values that correspond to the values omp.yield'ed from the region.
387 static LogicalResult inlineConvertOmpRegions(
388     Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
389     LLVM::ModuleTranslation &moduleTranslation,
390     SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
391   if (region.empty())
392     return success();
393 
394   // Special case for single-block regions that don't create additional blocks:
395   // insert operations without creating additional blocks.
396   if (llvm::hasSingleElement(region)) {
397     moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
398     if (failed(moduleTranslation.convertBlock(
399             region.front(), /*ignoreArguments=*/true, builder)))
400       return failure();
401 
402     // The continuation arguments are simply the translated terminator operands.
403     if (continuationBlockArgs)
404       llvm::append_range(
405           *continuationBlockArgs,
406           moduleTranslation.lookupValues(region.front().back().getOperands()));
407 
408     // Drop the mapping that is no longer necessary so that the same region can
409     // be processed multiple times.
410     moduleTranslation.forgetMapping(region);
411     return success();
412   }
413 
414   // Create the continuation block manually instead of calling splitBlock
415   // because the current insertion block may not have a terminator.
416   llvm::BasicBlock *continuationBlock =
417       llvm::BasicBlock::Create(builder.getContext(), blockName + ".cont",
418                                builder.GetInsertBlock()->getParent(),
419                                builder.GetInsertBlock()->getNextNode());
420   builder.CreateBr(continuationBlock);
421 
422   LogicalResult bodyGenStatus = success();
423   SmallVector<llvm::PHINode *> phis;
424   convertOmpOpRegions(region, blockName, *builder.GetInsertBlock(),
425                       *continuationBlock, builder, moduleTranslation,
426                       bodyGenStatus, &phis);
427   if (failed(bodyGenStatus))
428     return failure();
429   if (continuationBlockArgs)
430     llvm::append_range(*continuationBlockArgs, phis);
431   builder.SetInsertPoint(continuationBlock,
432                          continuationBlock->getFirstInsertionPt());
433   return success();
434 }
435 
436 namespace {
437 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
438 /// store lambdas with capture.
439 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy(
440     llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
441     llvm::Value *&)>;
442 using OwningAtomicReductionGen =
443     std::function<llvm::OpenMPIRBuilder::InsertPointTy(
444         llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
445         llvm::Value *)>;
446 } // namespace
447 
448 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
449 /// reduction declaration. The generator uses `builder` but ignores its
450 /// insertion point.
451 static OwningReductionGen
452 makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder,
453                  LLVM::ModuleTranslation &moduleTranslation) {
454   // The lambda is mutable because we need access to non-const methods of decl
455   // (which aren't actually mutating it), and we must capture decl by-value to
456   // avoid the dangling reference after the parent function returns.
457   OwningReductionGen gen =
458       [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
459                 llvm::Value *lhs, llvm::Value *rhs,
460                 llvm::Value *&result) mutable {
461         Region &reductionRegion = decl.reductionRegion();
462         moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs);
463         moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs);
464         builder.restoreIP(insertPoint);
465         SmallVector<llvm::Value *> phis;
466         if (failed(inlineConvertOmpRegions(reductionRegion,
467                                            "omp.reduction.nonatomic.body",
468                                            builder, moduleTranslation, &phis)))
469           return llvm::OpenMPIRBuilder::InsertPointTy();
470         assert(phis.size() == 1);
471         result = phis[0];
472         return builder.saveIP();
473       };
474   return gen;
475 }
476 
477 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
478 /// given reduction declaration. The generator uses `builder` but ignores its
479 /// insertion point. Returns null if there is no atomic region available in the
480 /// reduction declaration.
481 static OwningAtomicReductionGen
482 makeAtomicReductionGen(omp::ReductionDeclareOp decl,
483                        llvm::IRBuilderBase &builder,
484                        LLVM::ModuleTranslation &moduleTranslation) {
485   if (decl.atomicReductionRegion().empty())
486     return OwningAtomicReductionGen();
487 
488   // The lambda is mutable because we need access to non-const methods of decl
489   // (which aren't actually mutating it), and we must capture decl by-value to
490   // avoid the dangling reference after the parent function returns.
491   OwningAtomicReductionGen atomicGen =
492       [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
493                 llvm::Value *lhs, llvm::Value *rhs) mutable {
494         Region &atomicRegion = decl.atomicReductionRegion();
495         moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs);
496         moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs);
497         builder.restoreIP(insertPoint);
498         SmallVector<llvm::Value *> phis;
499         if (failed(inlineConvertOmpRegions(atomicRegion,
500                                            "omp.reduction.atomic.body", builder,
501                                            moduleTranslation, &phis)))
502           return llvm::OpenMPIRBuilder::InsertPointTy();
503         assert(phis.empty());
504         return builder.saveIP();
505       };
506   return atomicGen;
507 }
508 
509 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
510 static LogicalResult
511 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
512                   LLVM::ModuleTranslation &moduleTranslation) {
513   auto orderedOp = cast<omp::OrderedOp>(opInst);
514 
515   omp::ClauseDepend dependType = *orderedOp.depend_type_val();
516   bool isDependSource = dependType == omp::ClauseDepend::dependsource;
517   unsigned numLoops = orderedOp.num_loops_val().getValue();
518   SmallVector<llvm::Value *> vecValues =
519       moduleTranslation.lookupValues(orderedOp.depend_vec_vars());
520 
521   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
522       builder.saveIP(), builder.getCurrentDebugLocation());
523   size_t indexVecValues = 0;
524   while (indexVecValues < vecValues.size()) {
525     SmallVector<llvm::Value *> storeValues;
526     storeValues.reserve(numLoops);
527     for (unsigned i = 0; i < numLoops; i++) {
528       storeValues.push_back(vecValues[indexVecValues]);
529       indexVecValues++;
530     }
531     builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
532         ompLoc, findAllocaInsertPoint(builder, moduleTranslation), numLoops,
533         storeValues, ".cnt.addr", isDependSource));
534   }
535   return success();
536 }
537 
538 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
539 /// OpenMPIRBuilder.
540 static LogicalResult
541 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
542                         LLVM::ModuleTranslation &moduleTranslation) {
543   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
544   auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
545 
546   // TODO: The code generation for ordered simd directive is not supported yet.
547   if (orderedRegionOp.simd())
548     return failure();
549 
550   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
551   // relying on captured variables.
552   LogicalResult bodyGenStatus = success();
553 
554   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
555                        llvm::BasicBlock &continuationBlock) {
556     // OrderedOp has only one region associated with it.
557     auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
558     convertOmpOpRegions(region, "omp.ordered.region", *codeGenIP.getBlock(),
559                         continuationBlock, builder, moduleTranslation,
560                         bodyGenStatus);
561   };
562 
563   // TODO: Perform finalization actions for variables. This has to be
564   // called for variables which have destructors/finalizers.
565   auto finiCB = [&](InsertPointTy codeGenIP) {};
566 
567   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
568       builder.saveIP(), builder.getCurrentDebugLocation());
569   builder.restoreIP(
570       moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
571           ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd()));
572   return bodyGenStatus;
573 }
574 
575 static LogicalResult
576 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
577                    LLVM::ModuleTranslation &moduleTranslation) {
578   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
579   using StorableBodyGenCallbackTy =
580       llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
581 
582   auto sectionsOp = cast<omp::SectionsOp>(opInst);
583 
584   // TODO: Support the following clauses: private, firstprivate, lastprivate,
585   // reduction, allocate
586   if (!sectionsOp.private_vars().empty() ||
587       !sectionsOp.firstprivate_vars().empty() ||
588       !sectionsOp.lastprivate_vars().empty() ||
589       !sectionsOp.reduction_vars().empty() || sectionsOp.reductions() ||
590       !sectionsOp.allocate_vars().empty() ||
591       !sectionsOp.allocators_vars().empty())
592     return emitError(sectionsOp.getLoc())
593            << "private, firstprivate, lastprivate, reduction and allocate "
594               "clauses are not supported for sections construct";
595 
596   LogicalResult bodyGenStatus = success();
597   SmallVector<StorableBodyGenCallbackTy> sectionCBs;
598 
599   for (Operation &op : *sectionsOp.region().begin()) {
600     auto sectionOp = dyn_cast<omp::SectionOp>(op);
601     if (!sectionOp) // omp.terminator
602       continue;
603 
604     Region &region = sectionOp.region();
605     auto sectionCB = [&region, &builder, &moduleTranslation, &bodyGenStatus](
606                          InsertPointTy allocaIP, InsertPointTy codeGenIP,
607                          llvm::BasicBlock &finiBB) {
608       builder.restoreIP(codeGenIP);
609       builder.CreateBr(&finiBB);
610       convertOmpOpRegions(region, "omp.section.region", *codeGenIP.getBlock(),
611                           finiBB, builder, moduleTranslation, bodyGenStatus);
612     };
613     sectionCBs.push_back(sectionCB);
614   }
615 
616   // No sections within omp.sections operation - skip generation. This situation
617   // is only possible if there is only a terminator operation inside the
618   // sections operation
619   if (sectionCBs.empty())
620     return success();
621 
622   assert(isa<omp::SectionOp>(*sectionsOp.region().op_begin()));
623 
624   // TODO: Perform appropriate actions according to the data-sharing
625   // attribute (shared, private, firstprivate, ...) of variables.
626   // Currently defaults to shared.
627   auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
628                     llvm::Value &vPtr,
629                     llvm::Value *&replacementValue) -> InsertPointTy {
630     replacementValue = &vPtr;
631     return codeGenIP;
632   };
633 
634   // TODO: Perform finalization actions for variables. This has to be
635   // called for variables which have destructors/finalizers.
636   auto finiCB = [&](InsertPointTy codeGenIP) {};
637 
638   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
639       builder.saveIP(), builder.getCurrentDebugLocation());
640   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections(
641       ompLoc, findAllocaInsertPoint(builder, moduleTranslation), sectionCBs,
642       privCB, finiCB, false, sectionsOp.nowait()));
643   return bodyGenStatus;
644 }
645 
646 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
647 static LogicalResult
648 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
649                  LLVM::ModuleTranslation &moduleTranslation) {
650   auto loop = cast<omp::WsLoopOp>(opInst);
651   // TODO: this should be in the op verifier instead.
652   if (loop.lowerBound().empty())
653     return failure();
654 
655   // Static is the default.
656   auto schedule =
657       loop.schedule_val().getValueOr(omp::ClauseScheduleKind::Static);
658 
659   // Find the loop configuration.
660   llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]);
661   llvm::Type *ivType = step->getType();
662   llvm::Value *chunk = nullptr;
663   if (loop.schedule_chunk_var()) {
664     llvm::Value *chunkVar =
665         moduleTranslation.lookupValue(loop.schedule_chunk_var());
666     llvm::Type *chunkVarType = chunkVar->getType();
667     assert(chunkVarType->isIntegerTy() &&
668            "chunk size must be one integer expression");
669     if (chunkVarType->getIntegerBitWidth() < ivType->getIntegerBitWidth())
670       chunk = builder.CreateSExt(chunkVar, ivType);
671     else if (chunkVarType->getIntegerBitWidth() > ivType->getIntegerBitWidth())
672       chunk = builder.CreateTrunc(chunkVar, ivType);
673     else
674       chunk = chunkVar;
675   }
676 
677   SmallVector<omp::ReductionDeclareOp> reductionDecls;
678   collectReductionDecls(loop, reductionDecls);
679   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
680       findAllocaInsertPoint(builder, moduleTranslation);
681 
682   // Allocate space for privatized reduction variables.
683   SmallVector<llvm::Value *> privateReductionVariables;
684   DenseMap<Value, llvm::Value *> reductionVariableMap;
685   unsigned numReductions = loop.getNumReductionVars();
686   privateReductionVariables.reserve(numReductions);
687   if (numReductions != 0) {
688     llvm::IRBuilderBase::InsertPointGuard guard(builder);
689     builder.restoreIP(allocaIP);
690     for (unsigned i = 0; i < numReductions; ++i) {
691       auto reductionType =
692           loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>();
693       llvm::Value *var = builder.CreateAlloca(
694           moduleTranslation.convertType(reductionType.getElementType()));
695       privateReductionVariables.push_back(var);
696       reductionVariableMap.try_emplace(loop.reduction_vars()[i], var);
697     }
698   }
699 
700   // Store the mapping between reduction variables and their private copies on
701   // ModuleTranslation stack. It can be then recovered when translating
702   // omp.reduce operations in a separate call.
703   LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
704       moduleTranslation, reductionVariableMap);
705 
706   // Before the loop, store the initial values of reductions into reduction
707   // variables. Although this could be done after allocas, we don't want to mess
708   // up with the alloca insertion point.
709   for (unsigned i = 0; i < numReductions; ++i) {
710     SmallVector<llvm::Value *> phis;
711     if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(),
712                                        "omp.reduction.neutral", builder,
713                                        moduleTranslation, &phis)))
714       return failure();
715     assert(phis.size() == 1 && "expected one value to be yielded from the "
716                                "reduction neutral element declaration region");
717     builder.CreateStore(phis[0], privateReductionVariables[i]);
718   }
719 
720   // Set up the source location value for OpenMP runtime.
721   llvm::DISubprogram *subprogram =
722       builder.GetInsertBlock()->getParent()->getSubprogram();
723   const llvm::DILocation *diLoc =
724       moduleTranslation.translateLoc(opInst.getLoc(), subprogram);
725   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(),
726                                                     llvm::DebugLoc(diLoc));
727 
728   // Generator of the canonical loop body.
729   // TODO: support error propagation in OpenMPIRBuilder and use it instead of
730   // relying on captured variables.
731   SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
732   SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
733   LogicalResult bodyGenStatus = success();
734   auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
735     // Make sure further conversions know about the induction variable.
736     moduleTranslation.mapValue(
737         loop.getRegion().front().getArgument(loopInfos.size()), iv);
738 
739     // Capture the body insertion point for use in nested loops. BodyIP of the
740     // CanonicalLoopInfo always points to the beginning of the entry block of
741     // the body.
742     bodyInsertPoints.push_back(ip);
743 
744     if (loopInfos.size() != loop.getNumLoops() - 1)
745       return;
746 
747     // Convert the body of the loop.
748     llvm::BasicBlock *entryBlock = ip.getBlock();
749     llvm::BasicBlock *exitBlock =
750         entryBlock->splitBasicBlock(ip.getPoint(), "omp.wsloop.exit");
751     convertOmpOpRegions(loop.region(), "omp.wsloop.region", *entryBlock,
752                         *exitBlock, builder, moduleTranslation, bodyGenStatus);
753   };
754 
755   // Delegate actual loop construction to the OpenMP IRBuilder.
756   // TODO: this currently assumes WsLoop is semantically similar to SCF loop,
757   // i.e. it has a positive step, uses signed integer semantics. Reconsider
758   // this code when WsLoop clearly supports more cases.
759   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
760   for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
761     llvm::Value *lowerBound =
762         moduleTranslation.lookupValue(loop.lowerBound()[i]);
763     llvm::Value *upperBound =
764         moduleTranslation.lookupValue(loop.upperBound()[i]);
765     llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]);
766 
767     // Make sure loop trip count are emitted in the preheader of the outermost
768     // loop at the latest so that they are all available for the new collapsed
769     // loop will be created below.
770     llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
771     llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
772     if (i != 0) {
773       loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
774                                                        llvm::DebugLoc(diLoc));
775       computeIP = loopInfos.front()->getPreheaderIP();
776     }
777     loopInfos.push_back(ompBuilder->createCanonicalLoop(
778         loc, bodyGen, lowerBound, upperBound, step,
779         /*IsSigned=*/true, loop.inclusive(), computeIP));
780 
781     if (failed(bodyGenStatus))
782       return failure();
783   }
784 
785   // Collapse loops. Store the insertion point because LoopInfos may get
786   // invalidated.
787   llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
788   llvm::CanonicalLoopInfo *loopInfo =
789       ompBuilder->collapseLoops(diLoc, loopInfos, {});
790 
791   allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
792 
793   bool isSimd = loop.simd_modifier();
794 
795   if (schedule == omp::ClauseScheduleKind::Static) {
796     ompBuilder->applyStaticWorkshareLoop(ompLoc.DL, loopInfo, allocaIP,
797                                          !loop.nowait(), chunk);
798   } else {
799     llvm::omp::OMPScheduleType schedType;
800     switch (schedule) {
801     case omp::ClauseScheduleKind::Dynamic:
802       schedType = llvm::omp::OMPScheduleType::DynamicChunked;
803       break;
804     case omp::ClauseScheduleKind::Guided:
805       if (isSimd)
806         schedType = llvm::omp::OMPScheduleType::GuidedSimd;
807       else
808         schedType = llvm::omp::OMPScheduleType::GuidedChunked;
809       break;
810     case omp::ClauseScheduleKind::Auto:
811       schedType = llvm::omp::OMPScheduleType::Auto;
812       break;
813     case omp::ClauseScheduleKind::Runtime:
814       if (isSimd)
815         schedType = llvm::omp::OMPScheduleType::RuntimeSimd;
816       else
817         schedType = llvm::omp::OMPScheduleType::Runtime;
818       break;
819     default:
820       llvm_unreachable("Unknown schedule value");
821       break;
822     }
823 
824     if (Optional<omp::ScheduleModifier> modifier = loop.schedule_modifier()) {
825       switch (*modifier) {
826       case omp::ScheduleModifier::monotonic:
827         schedType |= llvm::omp::OMPScheduleType::ModifierMonotonic;
828         break;
829       case omp::ScheduleModifier::nonmonotonic:
830         schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic;
831         break;
832       default:
833         // Nothing to do here.
834         break;
835       }
836     }
837     afterIP = ompBuilder->applyDynamicWorkshareLoop(
838         ompLoc.DL, loopInfo, allocaIP, schedType, !loop.nowait(), chunk);
839   }
840 
841   // Continue building IR after the loop. Note that the LoopInfo returned by
842   // `collapseLoops` points inside the outermost loop and is intended for
843   // potential further loop transformations. Use the insertion point stored
844   // before collapsing loops instead.
845   builder.restoreIP(afterIP);
846 
847   // Process the reductions if required.
848   if (numReductions == 0)
849     return success();
850 
851   // Create the reduction generators. We need to own them here because
852   // ReductionInfo only accepts references to the generators.
853   SmallVector<OwningReductionGen> owningReductionGens;
854   SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
855   for (unsigned i = 0; i < numReductions; ++i) {
856     owningReductionGens.push_back(
857         makeReductionGen(reductionDecls[i], builder, moduleTranslation));
858     owningAtomicReductionGens.push_back(
859         makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
860   }
861 
862   // Collect the reduction information.
863   SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
864   reductionInfos.reserve(numReductions);
865   for (unsigned i = 0; i < numReductions; ++i) {
866     llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
867     if (owningAtomicReductionGens[i])
868       atomicGen = owningAtomicReductionGens[i];
869     llvm::Value *variable =
870         moduleTranslation.lookupValue(loop.reduction_vars()[i]);
871     reductionInfos.push_back({variable->getType()->getPointerElementType(),
872                               variable, privateReductionVariables[i],
873                               owningReductionGens[i], atomicGen});
874   }
875 
876   // The call to createReductions below expects the block to have a
877   // terminator. Create an unreachable instruction to serve as terminator
878   // and remove it later.
879   llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
880   builder.SetInsertPoint(tempTerminator);
881   llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
882       ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
883                                    loop.nowait());
884   if (!contInsertPoint.getBlock())
885     return loop->emitOpError() << "failed to convert reductions";
886   auto nextInsertionPoint =
887       ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
888   tempTerminator->eraseFromParent();
889   builder.restoreIP(nextInsertionPoint);
890 
891   return success();
892 }
893 
894 // Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
895 llvm::AtomicOrdering
896 convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) {
897   if (!ao)
898     return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
899 
900   switch (*ao) {
901   case omp::ClauseMemoryOrderKind::seq_cst:
902     return llvm::AtomicOrdering::SequentiallyConsistent;
903   case omp::ClauseMemoryOrderKind::acq_rel:
904     return llvm::AtomicOrdering::AcquireRelease;
905   case omp::ClauseMemoryOrderKind::acquire:
906     return llvm::AtomicOrdering::Acquire;
907   case omp::ClauseMemoryOrderKind::release:
908     return llvm::AtomicOrdering::Release;
909   case omp::ClauseMemoryOrderKind::relaxed:
910     return llvm::AtomicOrdering::Monotonic;
911   }
912 }
913 
914 // Convert omp.atomic.read operation to LLVM IR.
915 static LogicalResult
916 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
917                      LLVM::ModuleTranslation &moduleTranslation) {
918 
919   auto readOp = cast<omp::AtomicReadOp>(opInst);
920   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
921 
922   // Set up the source location value for OpenMP runtime.
923   llvm::DISubprogram *subprogram =
924       builder.GetInsertBlock()->getParent()->getSubprogram();
925   const llvm::DILocation *diLoc =
926       moduleTranslation.translateLoc(opInst.getLoc(), subprogram);
927   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(),
928                                                     llvm::DebugLoc(diLoc));
929   llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.memory_order());
930   llvm::Value *x = moduleTranslation.lookupValue(readOp.x());
931   llvm::Value *v = moduleTranslation.lookupValue(readOp.v());
932   llvm::OpenMPIRBuilder::AtomicOpValue V = {v, false, false};
933   llvm::OpenMPIRBuilder::AtomicOpValue X = {x, false, false};
934   builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
935   return success();
936 }
937 
938 /// Converts an omp.atomic.write operation to LLVM IR.
939 static LogicalResult
940 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
941                       LLVM::ModuleTranslation &moduleTranslation) {
942   auto writeOp = cast<omp::AtomicWriteOp>(opInst);
943   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
944 
945   // Set up the source location value for OpenMP runtime.
946   llvm::DISubprogram *subprogram =
947       builder.GetInsertBlock()->getParent()->getSubprogram();
948   const llvm::DILocation *diLoc =
949       moduleTranslation.translateLoc(opInst.getLoc(), subprogram);
950   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(),
951                                                     llvm::DebugLoc(diLoc));
952   llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.memory_order());
953   llvm::Value *expr = moduleTranslation.lookupValue(writeOp.value());
954   llvm::Value *dest = moduleTranslation.lookupValue(writeOp.address());
955   llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, /*isSigned=*/false,
956                                             /*isVolatile=*/false};
957   builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
958   return success();
959 }
960 
961 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
962 /// mapping between reduction variables and their private equivalents to have
963 /// been stored on the ModuleTranslation stack. Currently only supports
964 /// reduction within WsLoopOp, but can be easily extended.
965 static LogicalResult
966 convertOmpReductionOp(omp::ReductionOp reductionOp,
967                       llvm::IRBuilderBase &builder,
968                       LLVM::ModuleTranslation &moduleTranslation) {
969   // Find the declaration that corresponds to the reduction op.
970   auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>();
971   omp::ReductionDeclareOp declaration =
972       findReductionDecl(reductionContainer, reductionOp);
973   assert(declaration && "could not find reduction declaration");
974 
975   // Retrieve the mapping between reduction variables and their private
976   // equivalents.
977   const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr;
978   moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>(
979       [&](const OpenMPVarMappingStackFrame &frame) {
980         reductionVariableMap = &frame.mapping;
981         return WalkResult::interrupt();
982       });
983   assert(reductionVariableMap && "couldn't find private reduction variables");
984 
985   // Translate the reduction operation by emitting the body of the corresponding
986   // reduction declaration.
987   Region &reductionRegion = declaration.reductionRegion();
988   llvm::Value *privateReductionVar =
989       reductionVariableMap->lookup(reductionOp.accumulator());
990   llvm::Value *reductionVal = builder.CreateLoad(
991       moduleTranslation.convertType(reductionOp.operand().getType()),
992       privateReductionVar);
993 
994   moduleTranslation.mapValue(reductionRegion.front().getArgument(0),
995                              reductionVal);
996   moduleTranslation.mapValue(
997       reductionRegion.front().getArgument(1),
998       moduleTranslation.lookupValue(reductionOp.operand()));
999 
1000   SmallVector<llvm::Value *> phis;
1001   if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body",
1002                                      builder, moduleTranslation, &phis)))
1003     return failure();
1004   assert(phis.size() == 1 && "expected one value to be yielded from "
1005                              "the reduction body declaration region");
1006   builder.CreateStore(phis[0], privateReductionVar);
1007   return success();
1008 }
1009 
1010 namespace {
1011 
1012 /// Implementation of the dialect interface that converts operations belonging
1013 /// to the OpenMP dialect to LLVM IR.
1014 class OpenMPDialectLLVMIRTranslationInterface
1015     : public LLVMTranslationDialectInterface {
1016 public:
1017   using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
1018 
1019   /// Translates the given operation to LLVM IR using the provided IR builder
1020   /// and saving the state in `moduleTranslation`.
1021   LogicalResult
1022   convertOperation(Operation *op, llvm::IRBuilderBase &builder,
1023                    LLVM::ModuleTranslation &moduleTranslation) const final;
1024 };
1025 
1026 } // namespace
1027 
1028 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
1029 /// (including OpenMP runtime calls).
1030 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
1031     Operation *op, llvm::IRBuilderBase &builder,
1032     LLVM::ModuleTranslation &moduleTranslation) const {
1033 
1034   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1035 
1036   return llvm::TypeSwitch<Operation *, LogicalResult>(op)
1037       .Case([&](omp::BarrierOp) {
1038         ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
1039         return success();
1040       })
1041       .Case([&](omp::TaskwaitOp) {
1042         ompBuilder->createTaskwait(builder.saveIP());
1043         return success();
1044       })
1045       .Case([&](omp::TaskyieldOp) {
1046         ompBuilder->createTaskyield(builder.saveIP());
1047         return success();
1048       })
1049       .Case([&](omp::FlushOp) {
1050         // No support in Openmp runtime function (__kmpc_flush) to accept
1051         // the argument list.
1052         // OpenMP standard states the following:
1053         //  "An implementation may implement a flush with a list by ignoring
1054         //   the list, and treating it the same as a flush without a list."
1055         //
1056         // The argument list is discarded so that, flush with a list is treated
1057         // same as a flush without a list.
1058         ompBuilder->createFlush(builder.saveIP());
1059         return success();
1060       })
1061       .Case([&](omp::ParallelOp op) {
1062         return convertOmpParallel(op, builder, moduleTranslation);
1063       })
1064       .Case([&](omp::ReductionOp reductionOp) {
1065         return convertOmpReductionOp(reductionOp, builder, moduleTranslation);
1066       })
1067       .Case([&](omp::MasterOp) {
1068         return convertOmpMaster(*op, builder, moduleTranslation);
1069       })
1070       .Case([&](omp::CriticalOp) {
1071         return convertOmpCritical(*op, builder, moduleTranslation);
1072       })
1073       .Case([&](omp::OrderedRegionOp) {
1074         return convertOmpOrderedRegion(*op, builder, moduleTranslation);
1075       })
1076       .Case([&](omp::OrderedOp) {
1077         return convertOmpOrdered(*op, builder, moduleTranslation);
1078       })
1079       .Case([&](omp::WsLoopOp) {
1080         return convertOmpWsLoop(*op, builder, moduleTranslation);
1081       })
1082       .Case([&](omp::AtomicReadOp) {
1083         return convertOmpAtomicRead(*op, builder, moduleTranslation);
1084       })
1085       .Case([&](omp::AtomicWriteOp) {
1086         return convertOmpAtomicWrite(*op, builder, moduleTranslation);
1087       })
1088       .Case([&](omp::SectionsOp) {
1089         return convertOmpSections(*op, builder, moduleTranslation);
1090       })
1091       .Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp,
1092             omp::CriticalDeclareOp>([](auto op) {
1093         // `yield` and `terminator` can be just omitted. The block structure
1094         // was created in the region that handles their parent operation.
1095         // `reduction.declare` will be used by reductions and is not
1096         // converted directly, skip it.
1097         // `critical.declare` is only used to declare names of critical
1098         // sections which will be used by `critical` ops and hence can be
1099         // ignored for lowering. The OpenMP IRBuilder will create unique
1100         // name for critical section names.
1101         return success();
1102       })
1103       .Default([&](Operation *inst) {
1104         return inst->emitError("unsupported OpenMP operation: ")
1105                << inst->getName();
1106       });
1107 }
1108 
1109 void mlir::registerOpenMPDialectTranslation(DialectRegistry &registry) {
1110   registry.insert<omp::OpenMPDialect>();
1111   registry.addDialectInterface<omp::OpenMPDialect,
1112                                OpenMPDialectLLVMIRTranslationInterface>();
1113 }
1114 
1115 void mlir::registerOpenMPDialectTranslation(MLIRContext &context) {
1116   DialectRegistry registry;
1117   registerOpenMPDialectTranslation(registry);
1118   context.appendDialectRegistry(registry);
1119 }
1120