//===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements a translation between the MLIR OpenMP dialect and LLVM // IR. // //===----------------------------------------------------------------------===// #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Operation.h" #include "mlir/Support/LLVM.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/IRBuilder.h" using namespace mlir; /// Converts the given region that appears within an OpenMP dialect operation to /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the /// region, and a branch from any block with an successor-less OpenMP terminator /// to `continuationBlock`. static void convertOmpOpRegions(Region ®ion, StringRef blockName, llvm::BasicBlock &sourceBlock, llvm::BasicBlock &continuationBlock, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus) { llvm::LLVMContext &llvmContext = builder.getContext(); for (Block &bb : region) { llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create( llvmContext, blockName, builder.GetInsertBlock()->getParent()); moduleTranslation.mapBlock(&bb, llvmBB); } llvm::Instruction *sourceTerminator = sourceBlock.getTerminator(); // Convert blocks one by one in topological order to ensure // defs are converted before uses. llvm::SetVector blocks = LLVM::detail::getTopologicallySortedBlocks(region); for (Block *bb : blocks) { llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb); // Retarget the branch of the entry block to the entry block of the // converted region (regions are single-entry). if (bb->isEntryBlock()) { assert(sourceTerminator->getNumSuccessors() == 1 && "provided entry block has multiple successors"); assert(sourceTerminator->getSuccessor(0) == &continuationBlock && "ContinuationBlock is not the successor of the entry block"); sourceTerminator->setSuccessor(0, llvmBB); } llvm::IRBuilderBase::InsertPointGuard guard(builder); if (failed( moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) { bodyGenStatus = failure(); return; } // Special handling for `omp.yield` and `omp.terminator` (we may have more // than one): they return the control to the parent OpenMP dialect operation // so replace them with the branch to the continuation block. We handle this // here to avoid relying inter-function communication through the // ModuleTranslation class to set up the correct insertion point. This is // also consistent with MLIR's idiom of handling special region terminators // in the same code that handles the region-owning operation. if (isa(bb->getTerminator())) builder.CreateBr(&continuationBlock); } // Finally, after all blocks have been traversed and values mapped, // connect the PHI nodes to the results of preceding blocks. LLVM::detail::connectPHINodes(region, moduleTranslation); } /// Converts the OpenMP parallel operation to LLVM IR. static LogicalResult convertOmpParallel(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; // TODO: support error propagation in OpenMPIRBuilder and use it instead of // relying on captured variables. LogicalResult bodyGenStatus = success(); auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, llvm::BasicBlock &continuationBlock) { // ParallelOp has only one region associated with it. auto ®ion = cast(opInst).getRegion(); convertOmpOpRegions(region, "omp.par.region", *codeGenIP.getBlock(), continuationBlock, builder, moduleTranslation, bodyGenStatus); }; // TODO: Perform appropriate actions according to the data-sharing // attribute (shared, private, firstprivate, ...) of variables. // Currently defaults to shared. auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, llvm::Value &, llvm::Value &vPtr, llvm::Value *&replacementValue) -> InsertPointTy { replacementValue = &vPtr; return codeGenIP; }; // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. auto finiCB = [&](InsertPointTy codeGenIP) {}; llvm::Value *ifCond = nullptr; if (auto ifExprVar = cast(opInst).if_expr_var()) ifCond = moduleTranslation.lookupValue(ifExprVar); llvm::Value *numThreads = nullptr; if (auto numThreadsVar = cast(opInst).num_threads_var()) numThreads = moduleTranslation.lookupValue(numThreadsVar); llvm::omp::ProcBindKind pbKind = llvm::omp::OMP_PROC_BIND_default; if (auto bind = cast(opInst).proc_bind_val()) pbKind = llvm::omp::getProcBindKind(bind.getValue()); // TODO: Is the Parallel construct cancellable? bool isCancellable = false; // TODO: Determine the actual alloca insertion point, e.g., the function // entry or the alloca insertion point as provided by the body callback // above. llvm::OpenMPIRBuilder::InsertPointTy allocaIP(builder.saveIP()); if (failed(bodyGenStatus)) return failure(); llvm::OpenMPIRBuilder::LocationDescription ompLoc( builder.saveIP(), builder.getCurrentDebugLocation()); builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel( ompLoc, allocaIP, bodyGenCB, privCB, finiCB, ifCond, numThreads, pbKind, isCancellable)); return success(); } /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; // TODO: support error propagation in OpenMPIRBuilder and use it instead of // relying on captured variables. LogicalResult bodyGenStatus = success(); auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, llvm::BasicBlock &continuationBlock) { // MasterOp has only one region associated with it. auto ®ion = cast(opInst).getRegion(); convertOmpOpRegions(region, "omp.master.region", *codeGenIP.getBlock(), continuationBlock, builder, moduleTranslation, bodyGenStatus); }; // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. auto finiCB = [&](InsertPointTy codeGenIP) {}; llvm::OpenMPIRBuilder::LocationDescription ompLoc( builder.saveIP(), builder.getCurrentDebugLocation()); builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster( ompLoc, bodyGenCB, finiCB)); return success(); } /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { auto loop = cast(opInst); // TODO: this should be in the op verifier instead. if (loop.lowerBound().empty()) return failure(); if (loop.getNumLoops() != 1) return opInst.emitOpError("collapsed loops not yet supported"); if (loop.schedule_val().hasValue() && omp::symbolizeClauseScheduleKind(loop.schedule_val().getValue()) != omp::ClauseScheduleKind::Static) return opInst.emitOpError( "only static (default) loop schedule is currently supported"); // Find the loop configuration. llvm::Value *lowerBound = moduleTranslation.lookupValue(loop.lowerBound()[0]); llvm::Value *upperBound = moduleTranslation.lookupValue(loop.upperBound()[0]); llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]); llvm::Type *ivType = step->getType(); llvm::Value *chunk = loop.schedule_chunk_var() ? moduleTranslation.lookupValue(loop.schedule_chunk_var()) : llvm::ConstantInt::get(ivType, 1); // Set up the source location value for OpenMP runtime. llvm::DISubprogram *subprogram = builder.GetInsertBlock()->getParent()->getSubprogram(); const llvm::DILocation *diLoc = moduleTranslation.translateLoc(opInst.getLoc(), subprogram); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(), llvm::DebugLoc(diLoc)); // Generator of the canonical loop body. Produces an SESE region of basic // blocks. // TODO: support error propagation in OpenMPIRBuilder and use it instead of // relying on captured variables. LogicalResult bodyGenStatus = success(); auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { llvm::IRBuilder<>::InsertPointGuard guard(builder); // Make sure further conversions know about the induction variable. moduleTranslation.mapValue(loop.getRegion().front().getArgument(0), iv); llvm::BasicBlock *entryBlock = ip.getBlock(); llvm::BasicBlock *exitBlock = entryBlock->splitBasicBlock(ip.getPoint(), "omp.wsloop.exit"); // Convert the body of the loop. convertOmpOpRegions(loop.region(), "omp.wsloop.region", *entryBlock, *exitBlock, builder, moduleTranslation, bodyGenStatus); }; // Delegate actual loop construction to the OpenMP IRBuilder. // TODO: this currently assumes WsLoop is semantically similar to SCF loop, // i.e. it has a positive step, uses signed integer semantics. Reconsider // this code when WsLoop clearly supports more cases. llvm::BasicBlock *insertBlock = builder.GetInsertBlock(); llvm::CanonicalLoopInfo *loopInfo = moduleTranslation.getOpenMPBuilder()->createCanonicalLoop( ompLoc, bodyGen, lowerBound, upperBound, step, /*IsSigned=*/true, /*InclusiveStop=*/loop.inclusive()); if (failed(bodyGenStatus)) return failure(); // TODO: get the alloca insertion point from the parallel operation builder. // If we insert the at the top of the current function, they will be passed as // extra arguments into the function the parallel operation builder outlines. // Put them at the start of the current block for now. llvm::OpenMPIRBuilder::InsertPointTy allocaIP( insertBlock, insertBlock->getFirstInsertionPt()); loopInfo = moduleTranslation.getOpenMPBuilder()->createStaticWorkshareLoop( ompLoc, loopInfo, allocaIP, !loop.nowait(), chunk); // Continue building IR after the loop. builder.restoreIP(loopInfo->getAfterIP()); return success(); } namespace { /// Implementation of the dialect interface that converts operations belonging /// to the OpenMP dialect to LLVM IR. class OpenMPDialectLLVMIRTranslationInterface : public LLVMTranslationDialectInterface { public: using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; /// Translates the given operation to LLVM IR using the provided IR builder /// and saving the state in `moduleTranslation`. LogicalResult convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const final; }; } // end namespace /// Given an OpenMP MLIR operation, create the corresponding LLVM IR /// (including OpenMP runtime calls). LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); return llvm::TypeSwitch(op) .Case([&](omp::BarrierOp) { ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); return success(); }) .Case([&](omp::TaskwaitOp) { ompBuilder->createTaskwait(builder.saveIP()); return success(); }) .Case([&](omp::TaskyieldOp) { ompBuilder->createTaskyield(builder.saveIP()); return success(); }) .Case([&](omp::FlushOp) { // No support in Openmp runtime function (__kmpc_flush) to accept // the argument list. // OpenMP standard states the following: // "An implementation may implement a flush with a list by ignoring // the list, and treating it the same as a flush without a list." // // The argument list is discarded so that, flush with a list is treated // same as a flush without a list. ompBuilder->createFlush(builder.saveIP()); return success(); }) .Case([&](omp::ParallelOp) { return convertOmpParallel(*op, builder, moduleTranslation); }) .Case([&](omp::MasterOp) { return convertOmpMaster(*op, builder, moduleTranslation); }) .Case([&](omp::WsLoopOp) { return convertOmpWsLoop(*op, builder, moduleTranslation); }) .Case([](auto op) { // `yield` and `terminator` can be just omitted. The block structure was // created in the function that handles their parent operation. assert(op->getNumOperands() == 0 && "unexpected OpenMP terminator with operands"); return success(); }) .Default([&](Operation *inst) { return inst->emitError("unsupported OpenMP operation: ") << inst->getName(); }); } void mlir::registerOpenMPDialectTranslation(DialectRegistry ®istry) { registry.insert(); registry.addDialectInterface(); } void mlir::registerOpenMPDialectTranslation(MLIRContext &context) { DialectRegistry registry; registerOpenMPDialectTranslation(registry); context.appendDialectRegistry(registry); }