1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a translation between the MLIR OpenMP dialect and LLVM 10 // IR. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" 14 #include "mlir/Dialect/OpenMP/OpenMPDialect.h" 15 #include "mlir/IR/BlockAndValueMapping.h" 16 #include "mlir/IR/Operation.h" 17 #include "mlir/Support/LLVM.h" 18 #include "mlir/Target/LLVMIR/ModuleTranslation.h" 19 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/TypeSwitch.h" 22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 23 #include "llvm/IR/IRBuilder.h" 24 25 using namespace mlir; 26 27 namespace { 28 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the 29 /// insertion points for allocas. 30 class OpenMPAllocaStackFrame 31 : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> { 32 public: 33 explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP) 34 : allocaInsertPoint(allocaIP) {} 35 llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; 36 }; 37 38 /// ModuleTranslation stack frame containing the partial mapping between MLIR 39 /// values and their LLVM IR equivalents. 40 class OpenMPVarMappingStackFrame 41 : public LLVM::ModuleTranslation::StackFrameBase< 42 OpenMPVarMappingStackFrame> { 43 public: 44 explicit OpenMPVarMappingStackFrame( 45 const DenseMap<Value, llvm::Value *> &mapping) 46 : mapping(mapping) {} 47 48 DenseMap<Value, llvm::Value *> mapping; 49 }; 50 } // namespace 51 52 /// Find the insertion point for allocas given the current insertion point for 53 /// normal operations in the builder. 54 static llvm::OpenMPIRBuilder::InsertPointTy 55 findAllocaInsertPoint(llvm::IRBuilderBase &builder, 56 const LLVM::ModuleTranslation &moduleTranslation) { 57 // If there is an alloca insertion point on stack, i.e. we are in a nested 58 // operation and a specific point was provided by some surrounding operation, 59 // use it. 60 llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; 61 WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>( 62 [&](const OpenMPAllocaStackFrame &frame) { 63 allocaInsertPoint = frame.allocaInsertPoint; 64 return WalkResult::interrupt(); 65 }); 66 if (walkResult.wasInterrupted()) 67 return allocaInsertPoint; 68 69 // Otherwise, insert to the entry block of the surrounding function. 70 llvm::BasicBlock &funcEntryBlock = 71 builder.GetInsertBlock()->getParent()->getEntryBlock(); 72 return llvm::OpenMPIRBuilder::InsertPointTy( 73 &funcEntryBlock, funcEntryBlock.getFirstInsertionPt()); 74 } 75 76 /// Converts the given region that appears within an OpenMP dialect operation to 77 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the 78 /// region, and a branch from any block with an successor-less OpenMP terminator 79 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes 80 /// of the continuation block if provided. 81 static void convertOmpOpRegions( 82 Region ®ion, StringRef blockName, llvm::BasicBlock &sourceBlock, 83 llvm::BasicBlock &continuationBlock, llvm::IRBuilderBase &builder, 84 LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, 85 SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) { 86 llvm::LLVMContext &llvmContext = builder.getContext(); 87 for (Block &bb : region) { 88 llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create( 89 llvmContext, blockName, builder.GetInsertBlock()->getParent(), 90 builder.GetInsertBlock()->getNextNode()); 91 moduleTranslation.mapBlock(&bb, llvmBB); 92 } 93 94 llvm::Instruction *sourceTerminator = sourceBlock.getTerminator(); 95 96 // Terminators (namely YieldOp) may be forwarding values to the region that 97 // need to be available in the continuation block. Collect the types of these 98 // operands in preparation of creating PHI nodes. 99 SmallVector<llvm::Type *> continuationBlockPHITypes; 100 bool operandsProcessed = false; 101 unsigned numYields = 0; 102 for (Block &bb : region.getBlocks()) { 103 if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) { 104 if (!operandsProcessed) { 105 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { 106 continuationBlockPHITypes.push_back( 107 moduleTranslation.convertType(yield->getOperand(i).getType())); 108 } 109 operandsProcessed = true; 110 } else { 111 assert(continuationBlockPHITypes.size() == yield->getNumOperands() && 112 "mismatching number of values yielded from the region"); 113 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { 114 llvm::Type *operandType = 115 moduleTranslation.convertType(yield->getOperand(i).getType()); 116 (void)operandType; 117 assert(continuationBlockPHITypes[i] == operandType && 118 "values of mismatching types yielded from the region"); 119 } 120 } 121 numYields++; 122 } 123 } 124 125 // Insert PHI nodes in the continuation block for any values forwarded by the 126 // terminators in this region. 127 if (!continuationBlockPHITypes.empty()) 128 assert( 129 continuationBlockPHIs && 130 "expected continuation block PHIs if converted regions yield values"); 131 if (continuationBlockPHIs) { 132 llvm::IRBuilderBase::InsertPointGuard guard(builder); 133 continuationBlockPHIs->reserve(continuationBlockPHITypes.size()); 134 builder.SetInsertPoint(&continuationBlock, continuationBlock.begin()); 135 for (llvm::Type *ty : continuationBlockPHITypes) 136 continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields)); 137 } 138 139 // Convert blocks one by one in topological order to ensure 140 // defs are converted before uses. 141 SetVector<Block *> blocks = 142 LLVM::detail::getTopologicallySortedBlocks(region); 143 for (Block *bb : blocks) { 144 llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb); 145 // Retarget the branch of the entry block to the entry block of the 146 // converted region (regions are single-entry). 147 if (bb->isEntryBlock()) { 148 assert(sourceTerminator->getNumSuccessors() == 1 && 149 "provided entry block has multiple successors"); 150 assert(sourceTerminator->getSuccessor(0) == &continuationBlock && 151 "ContinuationBlock is not the successor of the entry block"); 152 sourceTerminator->setSuccessor(0, llvmBB); 153 } 154 155 llvm::IRBuilderBase::InsertPointGuard guard(builder); 156 if (failed( 157 moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) { 158 bodyGenStatus = failure(); 159 return; 160 } 161 162 // Special handling for `omp.yield` and `omp.terminator` (we may have more 163 // than one): they return the control to the parent OpenMP dialect operation 164 // so replace them with the branch to the continuation block. We handle this 165 // here to avoid relying inter-function communication through the 166 // ModuleTranslation class to set up the correct insertion point. This is 167 // also consistent with MLIR's idiom of handling special region terminators 168 // in the same code that handles the region-owning operation. 169 Operation *terminator = bb->getTerminator(); 170 if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) { 171 builder.CreateBr(&continuationBlock); 172 173 for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i) 174 (*continuationBlockPHIs)[i]->addIncoming( 175 moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB); 176 } 177 } 178 // After all blocks have been traversed and values mapped, connect the PHI 179 // nodes to the results of preceding blocks. 180 LLVM::detail::connectPHINodes(region, moduleTranslation); 181 182 // Remove the blocks and values defined in this region from the mapping since 183 // they are not visible outside of this region. This allows the same region to 184 // be converted several times, that is cloned, without clashes, and slightly 185 // speeds up the lookups. 186 moduleTranslation.forgetMapping(region); 187 } 188 189 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum. 190 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) { 191 switch (kind) { 192 case omp::ClauseProcBindKind::close: 193 return llvm::omp::ProcBindKind::OMP_PROC_BIND_close; 194 case omp::ClauseProcBindKind::master: 195 return llvm::omp::ProcBindKind::OMP_PROC_BIND_master; 196 case omp::ClauseProcBindKind::primary: 197 return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary; 198 case omp::ClauseProcBindKind::spread: 199 return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread; 200 } 201 llvm_unreachable("Unknown ClauseProcBindKind kind"); 202 } 203 204 /// Converts the OpenMP parallel operation to LLVM IR. 205 static LogicalResult 206 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, 207 LLVM::ModuleTranslation &moduleTranslation) { 208 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 209 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 210 // relying on captured variables. 211 LogicalResult bodyGenStatus = success(); 212 213 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 214 llvm::BasicBlock &continuationBlock) { 215 // Save the alloca insertion point on ModuleTranslation stack for use in 216 // nested regions. 217 LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( 218 moduleTranslation, allocaIP); 219 220 // ParallelOp has only one region associated with it. 221 convertOmpOpRegions(opInst.getRegion(), "omp.par.region", 222 *codeGenIP.getBlock(), continuationBlock, builder, 223 moduleTranslation, bodyGenStatus); 224 }; 225 226 // TODO: Perform appropriate actions according to the data-sharing 227 // attribute (shared, private, firstprivate, ...) of variables. 228 // Currently defaults to shared. 229 auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 230 llvm::Value &, llvm::Value &vPtr, 231 llvm::Value *&replacementValue) -> InsertPointTy { 232 replacementValue = &vPtr; 233 234 return codeGenIP; 235 }; 236 237 // TODO: Perform finalization actions for variables. This has to be 238 // called for variables which have destructors/finalizers. 239 auto finiCB = [&](InsertPointTy codeGenIP) {}; 240 241 llvm::Value *ifCond = nullptr; 242 if (auto ifExprVar = opInst.if_expr_var()) 243 ifCond = moduleTranslation.lookupValue(ifExprVar); 244 llvm::Value *numThreads = nullptr; 245 if (auto numThreadsVar = opInst.num_threads_var()) 246 numThreads = moduleTranslation.lookupValue(numThreadsVar); 247 auto pbKind = llvm::omp::OMP_PROC_BIND_default; 248 if (auto bind = opInst.proc_bind_val()) 249 pbKind = getProcBindKind(*bind); 250 // TODO: Is the Parallel construct cancellable? 251 bool isCancellable = false; 252 253 // Ensure that the BasicBlock for the the parallel region is sparate from the 254 // function entry which we may need to insert allocas. 255 if (builder.GetInsertBlock() == 256 &builder.GetInsertBlock()->getParent()->getEntryBlock()) { 257 assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() && 258 "Assuming end of basic block"); 259 llvm::BasicBlock *entryBB = 260 llvm::BasicBlock::Create(builder.getContext(), "parallel.entry", 261 builder.GetInsertBlock()->getParent(), 262 builder.GetInsertBlock()->getNextNode()); 263 builder.CreateBr(entryBB); 264 builder.SetInsertPoint(entryBB); 265 } 266 llvm::OpenMPIRBuilder::LocationDescription ompLoc( 267 builder.saveIP(), builder.getCurrentDebugLocation()); 268 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel( 269 ompLoc, findAllocaInsertPoint(builder, moduleTranslation), bodyGenCB, 270 privCB, finiCB, ifCond, numThreads, pbKind, isCancellable)); 271 272 return bodyGenStatus; 273 } 274 275 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder. 276 static LogicalResult 277 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, 278 LLVM::ModuleTranslation &moduleTranslation) { 279 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 280 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 281 // relying on captured variables. 282 LogicalResult bodyGenStatus = success(); 283 284 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 285 llvm::BasicBlock &continuationBlock) { 286 // MasterOp has only one region associated with it. 287 auto ®ion = cast<omp::MasterOp>(opInst).getRegion(); 288 convertOmpOpRegions(region, "omp.master.region", *codeGenIP.getBlock(), 289 continuationBlock, builder, moduleTranslation, 290 bodyGenStatus); 291 }; 292 293 // TODO: Perform finalization actions for variables. This has to be 294 // called for variables which have destructors/finalizers. 295 auto finiCB = [&](InsertPointTy codeGenIP) {}; 296 297 llvm::OpenMPIRBuilder::LocationDescription ompLoc( 298 builder.saveIP(), builder.getCurrentDebugLocation()); 299 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster( 300 ompLoc, bodyGenCB, finiCB)); 301 return success(); 302 } 303 304 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder. 305 static LogicalResult 306 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, 307 LLVM::ModuleTranslation &moduleTranslation) { 308 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 309 auto criticalOp = cast<omp::CriticalOp>(opInst); 310 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 311 // relying on captured variables. 312 LogicalResult bodyGenStatus = success(); 313 314 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 315 llvm::BasicBlock &continuationBlock) { 316 // CriticalOp has only one region associated with it. 317 auto ®ion = cast<omp::CriticalOp>(opInst).getRegion(); 318 convertOmpOpRegions(region, "omp.critical.region", *codeGenIP.getBlock(), 319 continuationBlock, builder, moduleTranslation, 320 bodyGenStatus); 321 }; 322 323 // TODO: Perform finalization actions for variables. This has to be 324 // called for variables which have destructors/finalizers. 325 auto finiCB = [&](InsertPointTy codeGenIP) {}; 326 327 llvm::OpenMPIRBuilder::LocationDescription ompLoc( 328 builder.saveIP(), builder.getCurrentDebugLocation()); 329 llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext(); 330 llvm::Constant *hint = nullptr; 331 332 // If it has a name, it probably has a hint too. 333 if (criticalOp.nameAttr()) { 334 // The verifiers in OpenMP Dialect guarentee that all the pointers are 335 // non-null 336 auto symbolRef = criticalOp.nameAttr().cast<SymbolRefAttr>(); 337 auto criticalDeclareOp = 338 SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp, 339 symbolRef); 340 hint = llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), 341 static_cast<int>(criticalDeclareOp.hint())); 342 } 343 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical( 344 ompLoc, bodyGenCB, finiCB, criticalOp.name().getValueOr(""), hint)); 345 return success(); 346 } 347 348 /// Returns a reduction declaration that corresponds to the given reduction 349 /// operation in the given container. Currently only supports reductions inside 350 /// WsLoopOp but can be easily extended. 351 static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container, 352 omp::ReductionOp reduction) { 353 SymbolRefAttr reductionSymbol; 354 for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) { 355 if (container.reduction_vars()[i] != reduction.accumulator()) 356 continue; 357 reductionSymbol = (*container.reductions())[i].cast<SymbolRefAttr>(); 358 break; 359 } 360 assert(reductionSymbol && 361 "reduction operation must be associated with a declaration"); 362 363 return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>( 364 container, reductionSymbol); 365 } 366 367 /// Populates `reductions` with reduction declarations used in the given loop. 368 static void 369 collectReductionDecls(omp::WsLoopOp loop, 370 SmallVectorImpl<omp::ReductionDeclareOp> &reductions) { 371 Optional<ArrayAttr> attr = loop.reductions(); 372 if (!attr) 373 return; 374 375 reductions.reserve(reductions.size() + loop.getNumReductionVars()); 376 for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) { 377 reductions.push_back( 378 SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>( 379 loop, symbolRef)); 380 } 381 } 382 383 /// Translates the blocks contained in the given region and appends them to at 384 /// the current insertion point of `builder`. The operations of the entry block 385 /// are appended to the current insertion block, which is not expected to have a 386 /// terminator. If set, `continuationBlockArgs` is populated with translated 387 /// values that correspond to the values omp.yield'ed from the region. 388 static LogicalResult inlineConvertOmpRegions( 389 Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, 390 LLVM::ModuleTranslation &moduleTranslation, 391 SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) { 392 if (region.empty()) 393 return success(); 394 395 // Special case for single-block regions that don't create additional blocks: 396 // insert operations without creating additional blocks. 397 if (llvm::hasSingleElement(region)) { 398 moduleTranslation.mapBlock(®ion.front(), builder.GetInsertBlock()); 399 if (failed(moduleTranslation.convertBlock( 400 region.front(), /*ignoreArguments=*/true, builder))) 401 return failure(); 402 403 // The continuation arguments are simply the translated terminator operands. 404 if (continuationBlockArgs) 405 llvm::append_range( 406 *continuationBlockArgs, 407 moduleTranslation.lookupValues(region.front().back().getOperands())); 408 409 // Drop the mapping that is no longer necessary so that the same region can 410 // be processed multiple times. 411 moduleTranslation.forgetMapping(region); 412 return success(); 413 } 414 415 // Create the continuation block manually instead of calling splitBlock 416 // because the current insertion block may not have a terminator. 417 llvm::BasicBlock *continuationBlock = 418 llvm::BasicBlock::Create(builder.getContext(), blockName + ".cont", 419 builder.GetInsertBlock()->getParent(), 420 builder.GetInsertBlock()->getNextNode()); 421 builder.CreateBr(continuationBlock); 422 423 LogicalResult bodyGenStatus = success(); 424 SmallVector<llvm::PHINode *> phis; 425 convertOmpOpRegions(region, blockName, *builder.GetInsertBlock(), 426 *continuationBlock, builder, moduleTranslation, 427 bodyGenStatus, &phis); 428 if (failed(bodyGenStatus)) 429 return failure(); 430 if (continuationBlockArgs) 431 llvm::append_range(*continuationBlockArgs, phis); 432 builder.SetInsertPoint(continuationBlock, 433 continuationBlock->getFirstInsertionPt()); 434 return success(); 435 } 436 437 namespace { 438 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to 439 /// store lambdas with capture. 440 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy( 441 llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *, 442 llvm::Value *&)>; 443 using OwningAtomicReductionGen = 444 std::function<llvm::OpenMPIRBuilder::InsertPointTy( 445 llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *, 446 llvm::Value *)>; 447 } // namespace 448 449 /// Create an OpenMPIRBuilder-compatible reduction generator for the given 450 /// reduction declaration. The generator uses `builder` but ignores its 451 /// insertion point. 452 static OwningReductionGen 453 makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder, 454 LLVM::ModuleTranslation &moduleTranslation) { 455 // The lambda is mutable because we need access to non-const methods of decl 456 // (which aren't actually mutating it), and we must capture decl by-value to 457 // avoid the dangling reference after the parent function returns. 458 OwningReductionGen gen = 459 [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, 460 llvm::Value *lhs, llvm::Value *rhs, 461 llvm::Value *&result) mutable { 462 Region &reductionRegion = decl.reductionRegion(); 463 moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs); 464 moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs); 465 builder.restoreIP(insertPoint); 466 SmallVector<llvm::Value *> phis; 467 if (failed(inlineConvertOmpRegions(reductionRegion, 468 "omp.reduction.nonatomic.body", 469 builder, moduleTranslation, &phis))) 470 return llvm::OpenMPIRBuilder::InsertPointTy(); 471 assert(phis.size() == 1); 472 result = phis[0]; 473 return builder.saveIP(); 474 }; 475 return gen; 476 } 477 478 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the 479 /// given reduction declaration. The generator uses `builder` but ignores its 480 /// insertion point. Returns null if there is no atomic region available in the 481 /// reduction declaration. 482 static OwningAtomicReductionGen 483 makeAtomicReductionGen(omp::ReductionDeclareOp decl, 484 llvm::IRBuilderBase &builder, 485 LLVM::ModuleTranslation &moduleTranslation) { 486 if (decl.atomicReductionRegion().empty()) 487 return OwningAtomicReductionGen(); 488 489 // The lambda is mutable because we need access to non-const methods of decl 490 // (which aren't actually mutating it), and we must capture decl by-value to 491 // avoid the dangling reference after the parent function returns. 492 OwningAtomicReductionGen atomicGen = 493 [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *, 494 llvm::Value *lhs, llvm::Value *rhs) mutable { 495 Region &atomicRegion = decl.atomicReductionRegion(); 496 moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs); 497 moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs); 498 builder.restoreIP(insertPoint); 499 SmallVector<llvm::Value *> phis; 500 if (failed(inlineConvertOmpRegions(atomicRegion, 501 "omp.reduction.atomic.body", builder, 502 moduleTranslation, &phis))) 503 return llvm::OpenMPIRBuilder::InsertPointTy(); 504 assert(phis.empty()); 505 return builder.saveIP(); 506 }; 507 return atomicGen; 508 } 509 510 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder. 511 static LogicalResult 512 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, 513 LLVM::ModuleTranslation &moduleTranslation) { 514 auto orderedOp = cast<omp::OrderedOp>(opInst); 515 516 omp::ClauseDepend dependType = *orderedOp.depend_type_val(); 517 bool isDependSource = dependType == omp::ClauseDepend::dependsource; 518 unsigned numLoops = orderedOp.num_loops_val().getValue(); 519 SmallVector<llvm::Value *> vecValues = 520 moduleTranslation.lookupValues(orderedOp.depend_vec_vars()); 521 522 llvm::OpenMPIRBuilder::LocationDescription ompLoc( 523 builder.saveIP(), builder.getCurrentDebugLocation()); 524 size_t indexVecValues = 0; 525 while (indexVecValues < vecValues.size()) { 526 SmallVector<llvm::Value *> storeValues; 527 storeValues.reserve(numLoops); 528 for (unsigned i = 0; i < numLoops; i++) { 529 storeValues.push_back(vecValues[indexVecValues]); 530 indexVecValues++; 531 } 532 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend( 533 ompLoc, findAllocaInsertPoint(builder, moduleTranslation), numLoops, 534 storeValues, ".cnt.addr", isDependSource)); 535 } 536 return success(); 537 } 538 539 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using 540 /// OpenMPIRBuilder. 541 static LogicalResult 542 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, 543 LLVM::ModuleTranslation &moduleTranslation) { 544 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 545 auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst); 546 547 // TODO: The code generation for ordered simd directive is not supported yet. 548 if (orderedRegionOp.simd()) 549 return failure(); 550 551 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 552 // relying on captured variables. 553 LogicalResult bodyGenStatus = success(); 554 555 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 556 llvm::BasicBlock &continuationBlock) { 557 // OrderedOp has only one region associated with it. 558 auto ®ion = cast<omp::OrderedRegionOp>(opInst).getRegion(); 559 convertOmpOpRegions(region, "omp.ordered.region", *codeGenIP.getBlock(), 560 continuationBlock, builder, moduleTranslation, 561 bodyGenStatus); 562 }; 563 564 // TODO: Perform finalization actions for variables. This has to be 565 // called for variables which have destructors/finalizers. 566 auto finiCB = [&](InsertPointTy codeGenIP) {}; 567 568 llvm::OpenMPIRBuilder::LocationDescription ompLoc( 569 builder.saveIP(), builder.getCurrentDebugLocation()); 570 builder.restoreIP( 571 moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd( 572 ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd())); 573 return bodyGenStatus; 574 } 575 576 static LogicalResult 577 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, 578 LLVM::ModuleTranslation &moduleTranslation) { 579 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 580 using StorableBodyGenCallbackTy = 581 llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 582 583 auto sectionsOp = cast<omp::SectionsOp>(opInst); 584 585 // TODO: Support the following clauses: private, firstprivate, lastprivate, 586 // reduction, allocate 587 if (!sectionsOp.private_vars().empty() || 588 !sectionsOp.firstprivate_vars().empty() || 589 !sectionsOp.lastprivate_vars().empty() || 590 !sectionsOp.reduction_vars().empty() || sectionsOp.reductions() || 591 !sectionsOp.allocate_vars().empty() || 592 !sectionsOp.allocators_vars().empty()) 593 return emitError(sectionsOp.getLoc()) 594 << "private, firstprivate, lastprivate, reduction and allocate " 595 "clauses are not supported for sections construct"; 596 597 LogicalResult bodyGenStatus = success(); 598 SmallVector<StorableBodyGenCallbackTy> sectionCBs; 599 600 for (Operation &op : *sectionsOp.region().begin()) { 601 auto sectionOp = dyn_cast<omp::SectionOp>(op); 602 if (!sectionOp) // omp.terminator 603 continue; 604 605 Region ®ion = sectionOp.region(); 606 auto sectionCB = [®ion, &builder, &moduleTranslation, &bodyGenStatus]( 607 InsertPointTy allocaIP, InsertPointTy codeGenIP, 608 llvm::BasicBlock &finiBB) { 609 builder.restoreIP(codeGenIP); 610 builder.CreateBr(&finiBB); 611 convertOmpOpRegions(region, "omp.section.region", *codeGenIP.getBlock(), 612 finiBB, builder, moduleTranslation, bodyGenStatus); 613 }; 614 sectionCBs.push_back(sectionCB); 615 } 616 617 // No sections within omp.sections operation - skip generation. This situation 618 // is only possible if there is only a terminator operation inside the 619 // sections operation 620 if (sectionCBs.empty()) 621 return success(); 622 623 assert(isa<omp::SectionOp>(*sectionsOp.region().op_begin())); 624 625 // TODO: Perform appropriate actions according to the data-sharing 626 // attribute (shared, private, firstprivate, ...) of variables. 627 // Currently defaults to shared. 628 auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &, 629 llvm::Value &vPtr, 630 llvm::Value *&replacementValue) -> InsertPointTy { 631 replacementValue = &vPtr; 632 return codeGenIP; 633 }; 634 635 // TODO: Perform finalization actions for variables. This has to be 636 // called for variables which have destructors/finalizers. 637 auto finiCB = [&](InsertPointTy codeGenIP) {}; 638 639 llvm::OpenMPIRBuilder::LocationDescription ompLoc( 640 builder.saveIP(), builder.getCurrentDebugLocation()); 641 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections( 642 ompLoc, findAllocaInsertPoint(builder, moduleTranslation), sectionCBs, 643 privCB, finiCB, false, sectionsOp.nowait())); 644 return bodyGenStatus; 645 } 646 647 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. 648 static LogicalResult 649 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, 650 LLVM::ModuleTranslation &moduleTranslation) { 651 auto loop = cast<omp::WsLoopOp>(opInst); 652 // TODO: this should be in the op verifier instead. 653 if (loop.lowerBound().empty()) 654 return failure(); 655 656 // Static is the default. 657 auto schedule = 658 loop.schedule_val().getValueOr(omp::ClauseScheduleKind::Static); 659 660 // Find the loop configuration. 661 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]); 662 llvm::Type *ivType = step->getType(); 663 llvm::Value *chunk = nullptr; 664 if (loop.schedule_chunk_var()) { 665 llvm::Value *chunkVar = 666 moduleTranslation.lookupValue(loop.schedule_chunk_var()); 667 llvm::Type *chunkVarType = chunkVar->getType(); 668 assert(chunkVarType->isIntegerTy() && 669 "chunk size must be one integer expression"); 670 if (chunkVarType->getIntegerBitWidth() < ivType->getIntegerBitWidth()) 671 chunk = builder.CreateSExt(chunkVar, ivType); 672 else if (chunkVarType->getIntegerBitWidth() > ivType->getIntegerBitWidth()) 673 chunk = builder.CreateTrunc(chunkVar, ivType); 674 else 675 chunk = chunkVar; 676 } 677 678 SmallVector<omp::ReductionDeclareOp> reductionDecls; 679 collectReductionDecls(loop, reductionDecls); 680 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 681 findAllocaInsertPoint(builder, moduleTranslation); 682 683 // Allocate space for privatized reduction variables. 684 SmallVector<llvm::Value *> privateReductionVariables; 685 DenseMap<Value, llvm::Value *> reductionVariableMap; 686 unsigned numReductions = loop.getNumReductionVars(); 687 privateReductionVariables.reserve(numReductions); 688 if (numReductions != 0) { 689 llvm::IRBuilderBase::InsertPointGuard guard(builder); 690 builder.restoreIP(allocaIP); 691 for (unsigned i = 0; i < numReductions; ++i) { 692 auto reductionType = 693 loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>(); 694 llvm::Value *var = builder.CreateAlloca( 695 moduleTranslation.convertType(reductionType.getElementType())); 696 privateReductionVariables.push_back(var); 697 reductionVariableMap.try_emplace(loop.reduction_vars()[i], var); 698 } 699 } 700 701 // Store the mapping between reduction variables and their private copies on 702 // ModuleTranslation stack. It can be then recovered when translating 703 // omp.reduce operations in a separate call. 704 LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard( 705 moduleTranslation, reductionVariableMap); 706 707 // Before the loop, store the initial values of reductions into reduction 708 // variables. Although this could be done after allocas, we don't want to mess 709 // up with the alloca insertion point. 710 for (unsigned i = 0; i < numReductions; ++i) { 711 SmallVector<llvm::Value *> phis; 712 if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(), 713 "omp.reduction.neutral", builder, 714 moduleTranslation, &phis))) 715 return failure(); 716 assert(phis.size() == 1 && "expected one value to be yielded from the " 717 "reduction neutral element declaration region"); 718 builder.CreateStore(phis[0], privateReductionVariables[i]); 719 } 720 721 // Set up the source location value for OpenMP runtime. 722 llvm::DISubprogram *subprogram = 723 builder.GetInsertBlock()->getParent()->getSubprogram(); 724 const llvm::DILocation *diLoc = 725 moduleTranslation.translateLoc(opInst.getLoc(), subprogram); 726 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(), 727 llvm::DebugLoc(diLoc)); 728 729 // Generator of the canonical loop body. 730 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 731 // relying on captured variables. 732 SmallVector<llvm::CanonicalLoopInfo *> loopInfos; 733 SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints; 734 LogicalResult bodyGenStatus = success(); 735 auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { 736 // Make sure further conversions know about the induction variable. 737 moduleTranslation.mapValue( 738 loop.getRegion().front().getArgument(loopInfos.size()), iv); 739 740 // Capture the body insertion point for use in nested loops. BodyIP of the 741 // CanonicalLoopInfo always points to the beginning of the entry block of 742 // the body. 743 bodyInsertPoints.push_back(ip); 744 745 if (loopInfos.size() != loop.getNumLoops() - 1) 746 return; 747 748 // Convert the body of the loop. 749 llvm::BasicBlock *entryBlock = ip.getBlock(); 750 llvm::BasicBlock *exitBlock = 751 entryBlock->splitBasicBlock(ip.getPoint(), "omp.wsloop.exit"); 752 convertOmpOpRegions(loop.region(), "omp.wsloop.region", *entryBlock, 753 *exitBlock, builder, moduleTranslation, bodyGenStatus); 754 }; 755 756 // Delegate actual loop construction to the OpenMP IRBuilder. 757 // TODO: this currently assumes WsLoop is semantically similar to SCF loop, 758 // i.e. it has a positive step, uses signed integer semantics. Reconsider 759 // this code when WsLoop clearly supports more cases. 760 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 761 for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) { 762 llvm::Value *lowerBound = 763 moduleTranslation.lookupValue(loop.lowerBound()[i]); 764 llvm::Value *upperBound = 765 moduleTranslation.lookupValue(loop.upperBound()[i]); 766 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]); 767 768 // Make sure loop trip count are emitted in the preheader of the outermost 769 // loop at the latest so that they are all available for the new collapsed 770 // loop will be created below. 771 llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; 772 llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; 773 if (i != 0) { 774 loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(), 775 llvm::DebugLoc(diLoc)); 776 computeIP = loopInfos.front()->getPreheaderIP(); 777 } 778 loopInfos.push_back(ompBuilder->createCanonicalLoop( 779 loc, bodyGen, lowerBound, upperBound, step, 780 /*IsSigned=*/true, loop.inclusive(), computeIP)); 781 782 if (failed(bodyGenStatus)) 783 return failure(); 784 } 785 786 // Collapse loops. Store the insertion point because LoopInfos may get 787 // invalidated. 788 llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); 789 llvm::CanonicalLoopInfo *loopInfo = 790 ompBuilder->collapseLoops(diLoc, loopInfos, {}); 791 792 allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 793 794 bool isSimd = loop.simd_modifier(); 795 796 if (schedule == omp::ClauseScheduleKind::Static) { 797 ompBuilder->applyStaticWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, 798 !loop.nowait(), chunk); 799 } else { 800 llvm::omp::OMPScheduleType schedType; 801 switch (schedule) { 802 case omp::ClauseScheduleKind::Dynamic: 803 schedType = llvm::omp::OMPScheduleType::DynamicChunked; 804 break; 805 case omp::ClauseScheduleKind::Guided: 806 if (isSimd) 807 schedType = llvm::omp::OMPScheduleType::GuidedSimd; 808 else 809 schedType = llvm::omp::OMPScheduleType::GuidedChunked; 810 break; 811 case omp::ClauseScheduleKind::Auto: 812 schedType = llvm::omp::OMPScheduleType::Auto; 813 break; 814 case omp::ClauseScheduleKind::Runtime: 815 if (isSimd) 816 schedType = llvm::omp::OMPScheduleType::RuntimeSimd; 817 else 818 schedType = llvm::omp::OMPScheduleType::Runtime; 819 break; 820 default: 821 llvm_unreachable("Unknown schedule value"); 822 break; 823 } 824 825 if (Optional<omp::ScheduleModifier> modifier = loop.schedule_modifier()) { 826 switch (*modifier) { 827 case omp::ScheduleModifier::monotonic: 828 schedType |= llvm::omp::OMPScheduleType::ModifierMonotonic; 829 break; 830 case omp::ScheduleModifier::nonmonotonic: 831 schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic; 832 break; 833 default: 834 // Nothing to do here. 835 break; 836 } 837 } 838 afterIP = ompBuilder->applyDynamicWorkshareLoop( 839 ompLoc.DL, loopInfo, allocaIP, schedType, !loop.nowait(), chunk); 840 } 841 842 // Continue building IR after the loop. Note that the LoopInfo returned by 843 // `collapseLoops` points inside the outermost loop and is intended for 844 // potential further loop transformations. Use the insertion point stored 845 // before collapsing loops instead. 846 builder.restoreIP(afterIP); 847 848 // Process the reductions if required. 849 if (numReductions == 0) 850 return success(); 851 852 // Create the reduction generators. We need to own them here because 853 // ReductionInfo only accepts references to the generators. 854 SmallVector<OwningReductionGen> owningReductionGens; 855 SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens; 856 for (unsigned i = 0; i < numReductions; ++i) { 857 owningReductionGens.push_back( 858 makeReductionGen(reductionDecls[i], builder, moduleTranslation)); 859 owningAtomicReductionGens.push_back( 860 makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation)); 861 } 862 863 // Collect the reduction information. 864 SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos; 865 reductionInfos.reserve(numReductions); 866 for (unsigned i = 0; i < numReductions; ++i) { 867 llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr; 868 if (owningAtomicReductionGens[i]) 869 atomicGen = owningAtomicReductionGens[i]; 870 llvm::Value *variable = 871 moduleTranslation.lookupValue(loop.reduction_vars()[i]); 872 reductionInfos.push_back({variable->getType()->getPointerElementType(), 873 variable, privateReductionVariables[i], 874 owningReductionGens[i], atomicGen}); 875 } 876 877 // The call to createReductions below expects the block to have a 878 // terminator. Create an unreachable instruction to serve as terminator 879 // and remove it later. 880 llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); 881 builder.SetInsertPoint(tempTerminator); 882 llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = 883 ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, 884 loop.nowait()); 885 if (!contInsertPoint.getBlock()) 886 return loop->emitOpError() << "failed to convert reductions"; 887 auto nextInsertionPoint = 888 ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for); 889 tempTerminator->eraseFromParent(); 890 builder.restoreIP(nextInsertionPoint); 891 892 return success(); 893 } 894 895 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. 896 llvm::AtomicOrdering 897 convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) { 898 if (!ao) 899 return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering 900 901 switch (*ao) { 902 case omp::ClauseMemoryOrderKind::seq_cst: 903 return llvm::AtomicOrdering::SequentiallyConsistent; 904 case omp::ClauseMemoryOrderKind::acq_rel: 905 return llvm::AtomicOrdering::AcquireRelease; 906 case omp::ClauseMemoryOrderKind::acquire: 907 return llvm::AtomicOrdering::Acquire; 908 case omp::ClauseMemoryOrderKind::release: 909 return llvm::AtomicOrdering::Release; 910 case omp::ClauseMemoryOrderKind::relaxed: 911 return llvm::AtomicOrdering::Monotonic; 912 } 913 llvm_unreachable("Unknown ClauseMemoryOrderKind kind"); 914 } 915 916 /// Convert omp.atomic.read operation to LLVM IR. 917 static LogicalResult 918 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, 919 LLVM::ModuleTranslation &moduleTranslation) { 920 921 auto readOp = cast<omp::AtomicReadOp>(opInst); 922 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 923 924 // Set up the source location value for OpenMP runtime. 925 llvm::DISubprogram *subprogram = 926 builder.GetInsertBlock()->getParent()->getSubprogram(); 927 const llvm::DILocation *diLoc = 928 moduleTranslation.translateLoc(opInst.getLoc(), subprogram); 929 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(), 930 llvm::DebugLoc(diLoc)); 931 llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.memory_order()); 932 llvm::Value *x = moduleTranslation.lookupValue(readOp.x()); 933 llvm::Value *v = moduleTranslation.lookupValue(readOp.v()); 934 llvm::OpenMPIRBuilder::AtomicOpValue V = {v, false, false}; 935 llvm::OpenMPIRBuilder::AtomicOpValue X = {x, false, false}; 936 builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO)); 937 return success(); 938 } 939 940 /// Converts an omp.atomic.write operation to LLVM IR. 941 static LogicalResult 942 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, 943 LLVM::ModuleTranslation &moduleTranslation) { 944 auto writeOp = cast<omp::AtomicWriteOp>(opInst); 945 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 946 947 // Set up the source location value for OpenMP runtime. 948 llvm::DISubprogram *subprogram = 949 builder.GetInsertBlock()->getParent()->getSubprogram(); 950 const llvm::DILocation *diLoc = 951 moduleTranslation.translateLoc(opInst.getLoc(), subprogram); 952 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(), 953 llvm::DebugLoc(diLoc)); 954 llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.memory_order()); 955 llvm::Value *expr = moduleTranslation.lookupValue(writeOp.value()); 956 llvm::Value *dest = moduleTranslation.lookupValue(writeOp.address()); 957 llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, /*isSigned=*/false, 958 /*isVolatile=*/false}; 959 builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao)); 960 return success(); 961 } 962 963 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the 964 /// mapping between reduction variables and their private equivalents to have 965 /// been stored on the ModuleTranslation stack. Currently only supports 966 /// reduction within WsLoopOp, but can be easily extended. 967 static LogicalResult 968 convertOmpReductionOp(omp::ReductionOp reductionOp, 969 llvm::IRBuilderBase &builder, 970 LLVM::ModuleTranslation &moduleTranslation) { 971 // Find the declaration that corresponds to the reduction op. 972 auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>(); 973 omp::ReductionDeclareOp declaration = 974 findReductionDecl(reductionContainer, reductionOp); 975 assert(declaration && "could not find reduction declaration"); 976 977 // Retrieve the mapping between reduction variables and their private 978 // equivalents. 979 const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr; 980 moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>( 981 [&](const OpenMPVarMappingStackFrame &frame) { 982 reductionVariableMap = &frame.mapping; 983 return WalkResult::interrupt(); 984 }); 985 assert(reductionVariableMap && "couldn't find private reduction variables"); 986 987 // Translate the reduction operation by emitting the body of the corresponding 988 // reduction declaration. 989 Region &reductionRegion = declaration.reductionRegion(); 990 llvm::Value *privateReductionVar = 991 reductionVariableMap->lookup(reductionOp.accumulator()); 992 llvm::Value *reductionVal = builder.CreateLoad( 993 moduleTranslation.convertType(reductionOp.operand().getType()), 994 privateReductionVar); 995 996 moduleTranslation.mapValue(reductionRegion.front().getArgument(0), 997 reductionVal); 998 moduleTranslation.mapValue( 999 reductionRegion.front().getArgument(1), 1000 moduleTranslation.lookupValue(reductionOp.operand())); 1001 1002 SmallVector<llvm::Value *> phis; 1003 if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body", 1004 builder, moduleTranslation, &phis))) 1005 return failure(); 1006 assert(phis.size() == 1 && "expected one value to be yielded from " 1007 "the reduction body declaration region"); 1008 builder.CreateStore(phis[0], privateReductionVar); 1009 return success(); 1010 } 1011 1012 namespace { 1013 1014 /// Implementation of the dialect interface that converts operations belonging 1015 /// to the OpenMP dialect to LLVM IR. 1016 class OpenMPDialectLLVMIRTranslationInterface 1017 : public LLVMTranslationDialectInterface { 1018 public: 1019 using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; 1020 1021 /// Translates the given operation to LLVM IR using the provided IR builder 1022 /// and saving the state in `moduleTranslation`. 1023 LogicalResult 1024 convertOperation(Operation *op, llvm::IRBuilderBase &builder, 1025 LLVM::ModuleTranslation &moduleTranslation) const final; 1026 }; 1027 1028 } // namespace 1029 1030 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR 1031 /// (including OpenMP runtime calls). 1032 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( 1033 Operation *op, llvm::IRBuilderBase &builder, 1034 LLVM::ModuleTranslation &moduleTranslation) const { 1035 1036 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1037 1038 return llvm::TypeSwitch<Operation *, LogicalResult>(op) 1039 .Case([&](omp::BarrierOp) { 1040 ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); 1041 return success(); 1042 }) 1043 .Case([&](omp::TaskwaitOp) { 1044 ompBuilder->createTaskwait(builder.saveIP()); 1045 return success(); 1046 }) 1047 .Case([&](omp::TaskyieldOp) { 1048 ompBuilder->createTaskyield(builder.saveIP()); 1049 return success(); 1050 }) 1051 .Case([&](omp::FlushOp) { 1052 // No support in Openmp runtime function (__kmpc_flush) to accept 1053 // the argument list. 1054 // OpenMP standard states the following: 1055 // "An implementation may implement a flush with a list by ignoring 1056 // the list, and treating it the same as a flush without a list." 1057 // 1058 // The argument list is discarded so that, flush with a list is treated 1059 // same as a flush without a list. 1060 ompBuilder->createFlush(builder.saveIP()); 1061 return success(); 1062 }) 1063 .Case([&](omp::ParallelOp op) { 1064 return convertOmpParallel(op, builder, moduleTranslation); 1065 }) 1066 .Case([&](omp::ReductionOp reductionOp) { 1067 return convertOmpReductionOp(reductionOp, builder, moduleTranslation); 1068 }) 1069 .Case([&](omp::MasterOp) { 1070 return convertOmpMaster(*op, builder, moduleTranslation); 1071 }) 1072 .Case([&](omp::CriticalOp) { 1073 return convertOmpCritical(*op, builder, moduleTranslation); 1074 }) 1075 .Case([&](omp::OrderedRegionOp) { 1076 return convertOmpOrderedRegion(*op, builder, moduleTranslation); 1077 }) 1078 .Case([&](omp::OrderedOp) { 1079 return convertOmpOrdered(*op, builder, moduleTranslation); 1080 }) 1081 .Case([&](omp::WsLoopOp) { 1082 return convertOmpWsLoop(*op, builder, moduleTranslation); 1083 }) 1084 .Case([&](omp::AtomicReadOp) { 1085 return convertOmpAtomicRead(*op, builder, moduleTranslation); 1086 }) 1087 .Case([&](omp::AtomicWriteOp) { 1088 return convertOmpAtomicWrite(*op, builder, moduleTranslation); 1089 }) 1090 .Case([&](omp::SectionsOp) { 1091 return convertOmpSections(*op, builder, moduleTranslation); 1092 }) 1093 .Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp, 1094 omp::CriticalDeclareOp>([](auto op) { 1095 // `yield` and `terminator` can be just omitted. The block structure 1096 // was created in the region that handles their parent operation. 1097 // `reduction.declare` will be used by reductions and is not 1098 // converted directly, skip it. 1099 // `critical.declare` is only used to declare names of critical 1100 // sections which will be used by `critical` ops and hence can be 1101 // ignored for lowering. The OpenMP IRBuilder will create unique 1102 // name for critical section names. 1103 return success(); 1104 }) 1105 .Default([&](Operation *inst) { 1106 return inst->emitError("unsupported OpenMP operation: ") 1107 << inst->getName(); 1108 }); 1109 } 1110 1111 void mlir::registerOpenMPDialectTranslation(DialectRegistry ®istry) { 1112 registry.insert<omp::OpenMPDialect>(); 1113 registry.addDialectInterface<omp::OpenMPDialect, 1114 OpenMPDialectLLVMIRTranslationInterface>(); 1115 } 1116 1117 void mlir::registerOpenMPDialectTranslation(MLIRContext &context) { 1118 DialectRegistry registry; 1119 registerOpenMPDialectTranslation(registry); 1120 context.appendDialectRegistry(registry); 1121 } 1122