1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a translation between the MLIR OpenMP dialect and LLVM 10 // IR. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" 14 #include "mlir/Dialect/OpenMP/OpenMPDialect.h" 15 #include "mlir/IR/BlockAndValueMapping.h" 16 #include "mlir/IR/Operation.h" 17 #include "mlir/Support/LLVM.h" 18 #include "mlir/Target/LLVMIR/ModuleTranslation.h" 19 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/TypeSwitch.h" 22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 23 #include "llvm/IR/DebugInfoMetadata.h" 24 #include "llvm/IR/IRBuilder.h" 25 26 using namespace mlir; 27 28 namespace { 29 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the 30 /// insertion points for allocas. 31 class OpenMPAllocaStackFrame 32 : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> { 33 public: 34 explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP) 35 : allocaInsertPoint(allocaIP) {} 36 llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; 37 }; 38 39 /// ModuleTranslation stack frame containing the partial mapping between MLIR 40 /// values and their LLVM IR equivalents. 41 class OpenMPVarMappingStackFrame 42 : public LLVM::ModuleTranslation::StackFrameBase< 43 OpenMPVarMappingStackFrame> { 44 public: 45 explicit OpenMPVarMappingStackFrame( 46 const DenseMap<Value, llvm::Value *> &mapping) 47 : mapping(mapping) {} 48 49 DenseMap<Value, llvm::Value *> mapping; 50 }; 51 } // namespace 52 53 /// Find the insertion point for allocas given the current insertion point for 54 /// normal operations in the builder. 55 static llvm::OpenMPIRBuilder::InsertPointTy 56 findAllocaInsertPoint(llvm::IRBuilderBase &builder, 57 const LLVM::ModuleTranslation &moduleTranslation) { 58 // If there is an alloca insertion point on stack, i.e. we are in a nested 59 // operation and a specific point was provided by some surrounding operation, 60 // use it. 61 llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; 62 WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>( 63 [&](const OpenMPAllocaStackFrame &frame) { 64 allocaInsertPoint = frame.allocaInsertPoint; 65 return WalkResult::interrupt(); 66 }); 67 if (walkResult.wasInterrupted()) 68 return allocaInsertPoint; 69 70 // Otherwise, insert to the entry block of the surrounding function. 71 llvm::BasicBlock &funcEntryBlock = 72 builder.GetInsertBlock()->getParent()->getEntryBlock(); 73 return llvm::OpenMPIRBuilder::InsertPointTy( 74 &funcEntryBlock, funcEntryBlock.getFirstInsertionPt()); 75 } 76 77 /// Converts the given region that appears within an OpenMP dialect operation to 78 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the 79 /// region, and a branch from any block with an successor-less OpenMP terminator 80 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes 81 /// of the continuation block if provided. 82 static void convertOmpOpRegions( 83 Region ®ion, StringRef blockName, llvm::BasicBlock &sourceBlock, 84 llvm::BasicBlock &continuationBlock, llvm::IRBuilderBase &builder, 85 LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, 86 SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) { 87 llvm::LLVMContext &llvmContext = builder.getContext(); 88 for (Block &bb : region) { 89 llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create( 90 llvmContext, blockName, builder.GetInsertBlock()->getParent(), 91 builder.GetInsertBlock()->getNextNode()); 92 moduleTranslation.mapBlock(&bb, llvmBB); 93 } 94 95 llvm::Instruction *sourceTerminator = sourceBlock.getTerminator(); 96 97 // Terminators (namely YieldOp) may be forwarding values to the region that 98 // need to be available in the continuation block. Collect the types of these 99 // operands in preparation of creating PHI nodes. 100 SmallVector<llvm::Type *> continuationBlockPHITypes; 101 bool operandsProcessed = false; 102 unsigned numYields = 0; 103 for (Block &bb : region.getBlocks()) { 104 if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) { 105 if (!operandsProcessed) { 106 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { 107 continuationBlockPHITypes.push_back( 108 moduleTranslation.convertType(yield->getOperand(i).getType())); 109 } 110 operandsProcessed = true; 111 } else { 112 assert(continuationBlockPHITypes.size() == yield->getNumOperands() && 113 "mismatching number of values yielded from the region"); 114 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { 115 llvm::Type *operandType = 116 moduleTranslation.convertType(yield->getOperand(i).getType()); 117 (void)operandType; 118 assert(continuationBlockPHITypes[i] == operandType && 119 "values of mismatching types yielded from the region"); 120 } 121 } 122 numYields++; 123 } 124 } 125 126 // Insert PHI nodes in the continuation block for any values forwarded by the 127 // terminators in this region. 128 if (!continuationBlockPHITypes.empty()) 129 assert( 130 continuationBlockPHIs && 131 "expected continuation block PHIs if converted regions yield values"); 132 if (continuationBlockPHIs) { 133 llvm::IRBuilderBase::InsertPointGuard guard(builder); 134 continuationBlockPHIs->reserve(continuationBlockPHITypes.size()); 135 builder.SetInsertPoint(&continuationBlock, continuationBlock.begin()); 136 for (llvm::Type *ty : continuationBlockPHITypes) 137 continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields)); 138 } 139 140 // Convert blocks one by one in topological order to ensure 141 // defs are converted before uses. 142 SetVector<Block *> blocks = 143 LLVM::detail::getTopologicallySortedBlocks(region); 144 for (Block *bb : blocks) { 145 llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb); 146 // Retarget the branch of the entry block to the entry block of the 147 // converted region (regions are single-entry). 148 if (bb->isEntryBlock()) { 149 assert(sourceTerminator->getNumSuccessors() == 1 && 150 "provided entry block has multiple successors"); 151 assert(sourceTerminator->getSuccessor(0) == &continuationBlock && 152 "ContinuationBlock is not the successor of the entry block"); 153 sourceTerminator->setSuccessor(0, llvmBB); 154 } 155 156 llvm::IRBuilderBase::InsertPointGuard guard(builder); 157 if (failed( 158 moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) { 159 bodyGenStatus = failure(); 160 return; 161 } 162 163 // Special handling for `omp.yield` and `omp.terminator` (we may have more 164 // than one): they return the control to the parent OpenMP dialect operation 165 // so replace them with the branch to the continuation block. We handle this 166 // here to avoid relying inter-function communication through the 167 // ModuleTranslation class to set up the correct insertion point. This is 168 // also consistent with MLIR's idiom of handling special region terminators 169 // in the same code that handles the region-owning operation. 170 Operation *terminator = bb->getTerminator(); 171 if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) { 172 builder.CreateBr(&continuationBlock); 173 174 for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i) 175 (*continuationBlockPHIs)[i]->addIncoming( 176 moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB); 177 } 178 } 179 // After all blocks have been traversed and values mapped, connect the PHI 180 // nodes to the results of preceding blocks. 181 LLVM::detail::connectPHINodes(region, moduleTranslation); 182 183 // Remove the blocks and values defined in this region from the mapping since 184 // they are not visible outside of this region. This allows the same region to 185 // be converted several times, that is cloned, without clashes, and slightly 186 // speeds up the lookups. 187 moduleTranslation.forgetMapping(region); 188 } 189 190 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum. 191 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) { 192 switch (kind) { 193 case omp::ClauseProcBindKind::close: 194 return llvm::omp::ProcBindKind::OMP_PROC_BIND_close; 195 case omp::ClauseProcBindKind::master: 196 return llvm::omp::ProcBindKind::OMP_PROC_BIND_master; 197 case omp::ClauseProcBindKind::primary: 198 return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary; 199 case omp::ClauseProcBindKind::spread: 200 return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread; 201 } 202 llvm_unreachable("Unknown ClauseProcBindKind kind"); 203 } 204 205 /// Converts the OpenMP parallel operation to LLVM IR. 206 static LogicalResult 207 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, 208 LLVM::ModuleTranslation &moduleTranslation) { 209 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 210 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 211 // relying on captured variables. 212 LogicalResult bodyGenStatus = success(); 213 214 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 215 llvm::BasicBlock &continuationBlock) { 216 // Save the alloca insertion point on ModuleTranslation stack for use in 217 // nested regions. 218 LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( 219 moduleTranslation, allocaIP); 220 221 // ParallelOp has only one region associated with it. 222 convertOmpOpRegions(opInst.getRegion(), "omp.par.region", 223 *codeGenIP.getBlock(), continuationBlock, builder, 224 moduleTranslation, bodyGenStatus); 225 }; 226 227 // TODO: Perform appropriate actions according to the data-sharing 228 // attribute (shared, private, firstprivate, ...) of variables. 229 // Currently defaults to shared. 230 auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 231 llvm::Value &, llvm::Value &vPtr, 232 llvm::Value *&replacementValue) -> InsertPointTy { 233 replacementValue = &vPtr; 234 235 return codeGenIP; 236 }; 237 238 // TODO: Perform finalization actions for variables. This has to be 239 // called for variables which have destructors/finalizers. 240 auto finiCB = [&](InsertPointTy codeGenIP) {}; 241 242 llvm::Value *ifCond = nullptr; 243 if (auto ifExprVar = opInst.if_expr_var()) 244 ifCond = moduleTranslation.lookupValue(ifExprVar); 245 llvm::Value *numThreads = nullptr; 246 if (auto numThreadsVar = opInst.num_threads_var()) 247 numThreads = moduleTranslation.lookupValue(numThreadsVar); 248 auto pbKind = llvm::omp::OMP_PROC_BIND_default; 249 if (auto bind = opInst.proc_bind_val()) 250 pbKind = getProcBindKind(*bind); 251 // TODO: Is the Parallel construct cancellable? 252 bool isCancellable = false; 253 254 // Ensure that the BasicBlock for the the parallel region is sparate from the 255 // function entry which we may need to insert allocas. 256 if (builder.GetInsertBlock() == 257 &builder.GetInsertBlock()->getParent()->getEntryBlock()) { 258 assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() && 259 "Assuming end of basic block"); 260 llvm::BasicBlock *entryBB = 261 llvm::BasicBlock::Create(builder.getContext(), "parallel.entry", 262 builder.GetInsertBlock()->getParent(), 263 builder.GetInsertBlock()->getNextNode()); 264 builder.CreateBr(entryBB); 265 builder.SetInsertPoint(entryBB); 266 } 267 llvm::OpenMPIRBuilder::LocationDescription ompLoc( 268 builder.saveIP(), builder.getCurrentDebugLocation()); 269 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel( 270 ompLoc, findAllocaInsertPoint(builder, moduleTranslation), bodyGenCB, 271 privCB, finiCB, ifCond, numThreads, pbKind, isCancellable)); 272 273 return bodyGenStatus; 274 } 275 276 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder. 277 static LogicalResult 278 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, 279 LLVM::ModuleTranslation &moduleTranslation) { 280 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 281 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 282 // relying on captured variables. 283 LogicalResult bodyGenStatus = success(); 284 285 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 286 llvm::BasicBlock &continuationBlock) { 287 // MasterOp has only one region associated with it. 288 auto ®ion = cast<omp::MasterOp>(opInst).getRegion(); 289 convertOmpOpRegions(region, "omp.master.region", *codeGenIP.getBlock(), 290 continuationBlock, builder, moduleTranslation, 291 bodyGenStatus); 292 }; 293 294 // TODO: Perform finalization actions for variables. This has to be 295 // called for variables which have destructors/finalizers. 296 auto finiCB = [&](InsertPointTy codeGenIP) {}; 297 298 llvm::OpenMPIRBuilder::LocationDescription ompLoc( 299 builder.saveIP(), builder.getCurrentDebugLocation()); 300 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster( 301 ompLoc, bodyGenCB, finiCB)); 302 return success(); 303 } 304 305 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder. 306 static LogicalResult 307 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, 308 LLVM::ModuleTranslation &moduleTranslation) { 309 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 310 auto criticalOp = cast<omp::CriticalOp>(opInst); 311 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 312 // relying on captured variables. 313 LogicalResult bodyGenStatus = success(); 314 315 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 316 llvm::BasicBlock &continuationBlock) { 317 // CriticalOp has only one region associated with it. 318 auto ®ion = cast<omp::CriticalOp>(opInst).getRegion(); 319 convertOmpOpRegions(region, "omp.critical.region", *codeGenIP.getBlock(), 320 continuationBlock, builder, moduleTranslation, 321 bodyGenStatus); 322 }; 323 324 // TODO: Perform finalization actions for variables. This has to be 325 // called for variables which have destructors/finalizers. 326 auto finiCB = [&](InsertPointTy codeGenIP) {}; 327 328 llvm::OpenMPIRBuilder::LocationDescription ompLoc( 329 builder.saveIP(), builder.getCurrentDebugLocation()); 330 llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext(); 331 llvm::Constant *hint = nullptr; 332 333 // If it has a name, it probably has a hint too. 334 if (criticalOp.nameAttr()) { 335 // The verifiers in OpenMP Dialect guarentee that all the pointers are 336 // non-null 337 auto symbolRef = criticalOp.nameAttr().cast<SymbolRefAttr>(); 338 auto criticalDeclareOp = 339 SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp, 340 symbolRef); 341 hint = llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), 342 static_cast<int>(criticalDeclareOp.hint())); 343 } 344 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical( 345 ompLoc, bodyGenCB, finiCB, criticalOp.name().getValueOr(""), hint)); 346 return success(); 347 } 348 349 /// Returns a reduction declaration that corresponds to the given reduction 350 /// operation in the given container. Currently only supports reductions inside 351 /// WsLoopOp but can be easily extended. 352 static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container, 353 omp::ReductionOp reduction) { 354 SymbolRefAttr reductionSymbol; 355 for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) { 356 if (container.reduction_vars()[i] != reduction.accumulator()) 357 continue; 358 reductionSymbol = (*container.reductions())[i].cast<SymbolRefAttr>(); 359 break; 360 } 361 assert(reductionSymbol && 362 "reduction operation must be associated with a declaration"); 363 364 return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>( 365 container, reductionSymbol); 366 } 367 368 /// Populates `reductions` with reduction declarations used in the given loop. 369 static void 370 collectReductionDecls(omp::WsLoopOp loop, 371 SmallVectorImpl<omp::ReductionDeclareOp> &reductions) { 372 Optional<ArrayAttr> attr = loop.reductions(); 373 if (!attr) 374 return; 375 376 reductions.reserve(reductions.size() + loop.getNumReductionVars()); 377 for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) { 378 reductions.push_back( 379 SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>( 380 loop, symbolRef)); 381 } 382 } 383 384 /// Translates the blocks contained in the given region and appends them to at 385 /// the current insertion point of `builder`. The operations of the entry block 386 /// are appended to the current insertion block, which is not expected to have a 387 /// terminator. If set, `continuationBlockArgs` is populated with translated 388 /// values that correspond to the values omp.yield'ed from the region. 389 static LogicalResult inlineConvertOmpRegions( 390 Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, 391 LLVM::ModuleTranslation &moduleTranslation, 392 SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) { 393 if (region.empty()) 394 return success(); 395 396 // Special case for single-block regions that don't create additional blocks: 397 // insert operations without creating additional blocks. 398 if (llvm::hasSingleElement(region)) { 399 moduleTranslation.mapBlock(®ion.front(), builder.GetInsertBlock()); 400 if (failed(moduleTranslation.convertBlock( 401 region.front(), /*ignoreArguments=*/true, builder))) 402 return failure(); 403 404 // The continuation arguments are simply the translated terminator operands. 405 if (continuationBlockArgs) 406 llvm::append_range( 407 *continuationBlockArgs, 408 moduleTranslation.lookupValues(region.front().back().getOperands())); 409 410 // Drop the mapping that is no longer necessary so that the same region can 411 // be processed multiple times. 412 moduleTranslation.forgetMapping(region); 413 return success(); 414 } 415 416 // Create the continuation block manually instead of calling splitBlock 417 // because the current insertion block may not have a terminator. 418 llvm::BasicBlock *continuationBlock = 419 llvm::BasicBlock::Create(builder.getContext(), blockName + ".cont", 420 builder.GetInsertBlock()->getParent(), 421 builder.GetInsertBlock()->getNextNode()); 422 builder.CreateBr(continuationBlock); 423 424 LogicalResult bodyGenStatus = success(); 425 SmallVector<llvm::PHINode *> phis; 426 convertOmpOpRegions(region, blockName, *builder.GetInsertBlock(), 427 *continuationBlock, builder, moduleTranslation, 428 bodyGenStatus, &phis); 429 if (failed(bodyGenStatus)) 430 return failure(); 431 if (continuationBlockArgs) 432 llvm::append_range(*continuationBlockArgs, phis); 433 builder.SetInsertPoint(continuationBlock, 434 continuationBlock->getFirstInsertionPt()); 435 return success(); 436 } 437 438 namespace { 439 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to 440 /// store lambdas with capture. 441 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy( 442 llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *, 443 llvm::Value *&)>; 444 using OwningAtomicReductionGen = 445 std::function<llvm::OpenMPIRBuilder::InsertPointTy( 446 llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *, 447 llvm::Value *)>; 448 } // namespace 449 450 /// Create an OpenMPIRBuilder-compatible reduction generator for the given 451 /// reduction declaration. The generator uses `builder` but ignores its 452 /// insertion point. 453 static OwningReductionGen 454 makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder, 455 LLVM::ModuleTranslation &moduleTranslation) { 456 // The lambda is mutable because we need access to non-const methods of decl 457 // (which aren't actually mutating it), and we must capture decl by-value to 458 // avoid the dangling reference after the parent function returns. 459 OwningReductionGen gen = 460 [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, 461 llvm::Value *lhs, llvm::Value *rhs, 462 llvm::Value *&result) mutable { 463 Region &reductionRegion = decl.reductionRegion(); 464 moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs); 465 moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs); 466 builder.restoreIP(insertPoint); 467 SmallVector<llvm::Value *> phis; 468 if (failed(inlineConvertOmpRegions(reductionRegion, 469 "omp.reduction.nonatomic.body", 470 builder, moduleTranslation, &phis))) 471 return llvm::OpenMPIRBuilder::InsertPointTy(); 472 assert(phis.size() == 1); 473 result = phis[0]; 474 return builder.saveIP(); 475 }; 476 return gen; 477 } 478 479 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the 480 /// given reduction declaration. The generator uses `builder` but ignores its 481 /// insertion point. Returns null if there is no atomic region available in the 482 /// reduction declaration. 483 static OwningAtomicReductionGen 484 makeAtomicReductionGen(omp::ReductionDeclareOp decl, 485 llvm::IRBuilderBase &builder, 486 LLVM::ModuleTranslation &moduleTranslation) { 487 if (decl.atomicReductionRegion().empty()) 488 return OwningAtomicReductionGen(); 489 490 // The lambda is mutable because we need access to non-const methods of decl 491 // (which aren't actually mutating it), and we must capture decl by-value to 492 // avoid the dangling reference after the parent function returns. 493 OwningAtomicReductionGen atomicGen = 494 [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *, 495 llvm::Value *lhs, llvm::Value *rhs) mutable { 496 Region &atomicRegion = decl.atomicReductionRegion(); 497 moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs); 498 moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs); 499 builder.restoreIP(insertPoint); 500 SmallVector<llvm::Value *> phis; 501 if (failed(inlineConvertOmpRegions(atomicRegion, 502 "omp.reduction.atomic.body", builder, 503 moduleTranslation, &phis))) 504 return llvm::OpenMPIRBuilder::InsertPointTy(); 505 assert(phis.empty()); 506 return builder.saveIP(); 507 }; 508 return atomicGen; 509 } 510 511 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder. 512 static LogicalResult 513 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, 514 LLVM::ModuleTranslation &moduleTranslation) { 515 auto orderedOp = cast<omp::OrderedOp>(opInst); 516 517 omp::ClauseDepend dependType = *orderedOp.depend_type_val(); 518 bool isDependSource = dependType == omp::ClauseDepend::dependsource; 519 unsigned numLoops = orderedOp.num_loops_val().getValue(); 520 SmallVector<llvm::Value *> vecValues = 521 moduleTranslation.lookupValues(orderedOp.depend_vec_vars()); 522 523 llvm::OpenMPIRBuilder::LocationDescription ompLoc( 524 builder.saveIP(), builder.getCurrentDebugLocation()); 525 size_t indexVecValues = 0; 526 while (indexVecValues < vecValues.size()) { 527 SmallVector<llvm::Value *> storeValues; 528 storeValues.reserve(numLoops); 529 for (unsigned i = 0; i < numLoops; i++) { 530 storeValues.push_back(vecValues[indexVecValues]); 531 indexVecValues++; 532 } 533 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend( 534 ompLoc, findAllocaInsertPoint(builder, moduleTranslation), numLoops, 535 storeValues, ".cnt.addr", isDependSource)); 536 } 537 return success(); 538 } 539 540 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using 541 /// OpenMPIRBuilder. 542 static LogicalResult 543 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, 544 LLVM::ModuleTranslation &moduleTranslation) { 545 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 546 auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst); 547 548 // TODO: The code generation for ordered simd directive is not supported yet. 549 if (orderedRegionOp.simd()) 550 return failure(); 551 552 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 553 // relying on captured variables. 554 LogicalResult bodyGenStatus = success(); 555 556 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 557 llvm::BasicBlock &continuationBlock) { 558 // OrderedOp has only one region associated with it. 559 auto ®ion = cast<omp::OrderedRegionOp>(opInst).getRegion(); 560 convertOmpOpRegions(region, "omp.ordered.region", *codeGenIP.getBlock(), 561 continuationBlock, builder, moduleTranslation, 562 bodyGenStatus); 563 }; 564 565 // TODO: Perform finalization actions for variables. This has to be 566 // called for variables which have destructors/finalizers. 567 auto finiCB = [&](InsertPointTy codeGenIP) {}; 568 569 llvm::OpenMPIRBuilder::LocationDescription ompLoc( 570 builder.saveIP(), builder.getCurrentDebugLocation()); 571 builder.restoreIP( 572 moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd( 573 ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd())); 574 return bodyGenStatus; 575 } 576 577 static LogicalResult 578 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, 579 LLVM::ModuleTranslation &moduleTranslation) { 580 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 581 using StorableBodyGenCallbackTy = 582 llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 583 584 auto sectionsOp = cast<omp::SectionsOp>(opInst); 585 586 // TODO: Support the following clauses: private, firstprivate, lastprivate, 587 // reduction, allocate 588 if (!sectionsOp.private_vars().empty() || 589 !sectionsOp.firstprivate_vars().empty() || 590 !sectionsOp.lastprivate_vars().empty() || 591 !sectionsOp.reduction_vars().empty() || sectionsOp.reductions() || 592 !sectionsOp.allocate_vars().empty() || 593 !sectionsOp.allocators_vars().empty()) 594 return emitError(sectionsOp.getLoc()) 595 << "private, firstprivate, lastprivate, reduction and allocate " 596 "clauses are not supported for sections construct"; 597 598 LogicalResult bodyGenStatus = success(); 599 SmallVector<StorableBodyGenCallbackTy> sectionCBs; 600 601 for (Operation &op : *sectionsOp.region().begin()) { 602 auto sectionOp = dyn_cast<omp::SectionOp>(op); 603 if (!sectionOp) // omp.terminator 604 continue; 605 606 Region ®ion = sectionOp.region(); 607 auto sectionCB = [®ion, &builder, &moduleTranslation, &bodyGenStatus]( 608 InsertPointTy allocaIP, InsertPointTy codeGenIP, 609 llvm::BasicBlock &finiBB) { 610 builder.restoreIP(codeGenIP); 611 builder.CreateBr(&finiBB); 612 convertOmpOpRegions(region, "omp.section.region", *codeGenIP.getBlock(), 613 finiBB, builder, moduleTranslation, bodyGenStatus); 614 }; 615 sectionCBs.push_back(sectionCB); 616 } 617 618 // No sections within omp.sections operation - skip generation. This situation 619 // is only possible if there is only a terminator operation inside the 620 // sections operation 621 if (sectionCBs.empty()) 622 return success(); 623 624 assert(isa<omp::SectionOp>(*sectionsOp.region().op_begin())); 625 626 // TODO: Perform appropriate actions according to the data-sharing 627 // attribute (shared, private, firstprivate, ...) of variables. 628 // Currently defaults to shared. 629 auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &, 630 llvm::Value &vPtr, 631 llvm::Value *&replacementValue) -> InsertPointTy { 632 replacementValue = &vPtr; 633 return codeGenIP; 634 }; 635 636 // TODO: Perform finalization actions for variables. This has to be 637 // called for variables which have destructors/finalizers. 638 auto finiCB = [&](InsertPointTy codeGenIP) {}; 639 640 llvm::OpenMPIRBuilder::LocationDescription ompLoc( 641 builder.saveIP(), builder.getCurrentDebugLocation()); 642 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections( 643 ompLoc, findAllocaInsertPoint(builder, moduleTranslation), sectionCBs, 644 privCB, finiCB, false, sectionsOp.nowait())); 645 return bodyGenStatus; 646 } 647 648 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. 649 static LogicalResult 650 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, 651 LLVM::ModuleTranslation &moduleTranslation) { 652 auto loop = cast<omp::WsLoopOp>(opInst); 653 // TODO: this should be in the op verifier instead. 654 if (loop.lowerBound().empty()) 655 return failure(); 656 657 // Static is the default. 658 auto schedule = 659 loop.schedule_val().getValueOr(omp::ClauseScheduleKind::Static); 660 661 // Find the loop configuration. 662 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]); 663 llvm::Type *ivType = step->getType(); 664 llvm::Value *chunk = nullptr; 665 if (loop.schedule_chunk_var()) { 666 llvm::Value *chunkVar = 667 moduleTranslation.lookupValue(loop.schedule_chunk_var()); 668 llvm::Type *chunkVarType = chunkVar->getType(); 669 assert(chunkVarType->isIntegerTy() && 670 "chunk size must be one integer expression"); 671 if (chunkVarType->getIntegerBitWidth() < ivType->getIntegerBitWidth()) 672 chunk = builder.CreateSExt(chunkVar, ivType); 673 else if (chunkVarType->getIntegerBitWidth() > ivType->getIntegerBitWidth()) 674 chunk = builder.CreateTrunc(chunkVar, ivType); 675 else 676 chunk = chunkVar; 677 } 678 679 SmallVector<omp::ReductionDeclareOp> reductionDecls; 680 collectReductionDecls(loop, reductionDecls); 681 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 682 findAllocaInsertPoint(builder, moduleTranslation); 683 684 // Allocate space for privatized reduction variables. 685 SmallVector<llvm::Value *> privateReductionVariables; 686 DenseMap<Value, llvm::Value *> reductionVariableMap; 687 unsigned numReductions = loop.getNumReductionVars(); 688 privateReductionVariables.reserve(numReductions); 689 if (numReductions != 0) { 690 llvm::IRBuilderBase::InsertPointGuard guard(builder); 691 builder.restoreIP(allocaIP); 692 for (unsigned i = 0; i < numReductions; ++i) { 693 auto reductionType = 694 loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>(); 695 llvm::Value *var = builder.CreateAlloca( 696 moduleTranslation.convertType(reductionType.getElementType())); 697 privateReductionVariables.push_back(var); 698 reductionVariableMap.try_emplace(loop.reduction_vars()[i], var); 699 } 700 } 701 702 // Store the mapping between reduction variables and their private copies on 703 // ModuleTranslation stack. It can be then recovered when translating 704 // omp.reduce operations in a separate call. 705 LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard( 706 moduleTranslation, reductionVariableMap); 707 708 // Before the loop, store the initial values of reductions into reduction 709 // variables. Although this could be done after allocas, we don't want to mess 710 // up with the alloca insertion point. 711 for (unsigned i = 0; i < numReductions; ++i) { 712 SmallVector<llvm::Value *> phis; 713 if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(), 714 "omp.reduction.neutral", builder, 715 moduleTranslation, &phis))) 716 return failure(); 717 assert(phis.size() == 1 && "expected one value to be yielded from the " 718 "reduction neutral element declaration region"); 719 builder.CreateStore(phis[0], privateReductionVariables[i]); 720 } 721 722 // Set up the source location value for OpenMP runtime. 723 llvm::DISubprogram *subprogram = 724 builder.GetInsertBlock()->getParent()->getSubprogram(); 725 const llvm::DILocation *diLoc = 726 moduleTranslation.translateLoc(opInst.getLoc(), subprogram); 727 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(), 728 llvm::DebugLoc(diLoc)); 729 730 // Generator of the canonical loop body. 731 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 732 // relying on captured variables. 733 SmallVector<llvm::CanonicalLoopInfo *> loopInfos; 734 SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints; 735 LogicalResult bodyGenStatus = success(); 736 auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { 737 // Make sure further conversions know about the induction variable. 738 moduleTranslation.mapValue( 739 loop.getRegion().front().getArgument(loopInfos.size()), iv); 740 741 // Capture the body insertion point for use in nested loops. BodyIP of the 742 // CanonicalLoopInfo always points to the beginning of the entry block of 743 // the body. 744 bodyInsertPoints.push_back(ip); 745 746 if (loopInfos.size() != loop.getNumLoops() - 1) 747 return; 748 749 // Convert the body of the loop. 750 llvm::BasicBlock *entryBlock = ip.getBlock(); 751 llvm::BasicBlock *exitBlock = 752 entryBlock->splitBasicBlock(ip.getPoint(), "omp.wsloop.exit"); 753 convertOmpOpRegions(loop.region(), "omp.wsloop.region", *entryBlock, 754 *exitBlock, builder, moduleTranslation, bodyGenStatus); 755 }; 756 757 // Delegate actual loop construction to the OpenMP IRBuilder. 758 // TODO: this currently assumes WsLoop is semantically similar to SCF loop, 759 // i.e. it has a positive step, uses signed integer semantics. Reconsider 760 // this code when WsLoop clearly supports more cases. 761 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 762 for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) { 763 llvm::Value *lowerBound = 764 moduleTranslation.lookupValue(loop.lowerBound()[i]); 765 llvm::Value *upperBound = 766 moduleTranslation.lookupValue(loop.upperBound()[i]); 767 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]); 768 769 // Make sure loop trip count are emitted in the preheader of the outermost 770 // loop at the latest so that they are all available for the new collapsed 771 // loop will be created below. 772 llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; 773 llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; 774 if (i != 0) { 775 loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(), 776 llvm::DebugLoc(diLoc)); 777 computeIP = loopInfos.front()->getPreheaderIP(); 778 } 779 loopInfos.push_back(ompBuilder->createCanonicalLoop( 780 loc, bodyGen, lowerBound, upperBound, step, 781 /*IsSigned=*/true, loop.inclusive(), computeIP)); 782 783 if (failed(bodyGenStatus)) 784 return failure(); 785 } 786 787 // Collapse loops. Store the insertion point because LoopInfos may get 788 // invalidated. 789 llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); 790 llvm::CanonicalLoopInfo *loopInfo = 791 ompBuilder->collapseLoops(diLoc, loopInfos, {}); 792 793 allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 794 795 bool isSimd = loop.simd_modifier(); 796 797 if (schedule == omp::ClauseScheduleKind::Static) { 798 ompBuilder->applyStaticWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, 799 !loop.nowait(), chunk); 800 } else { 801 llvm::omp::OMPScheduleType schedType; 802 switch (schedule) { 803 case omp::ClauseScheduleKind::Dynamic: 804 schedType = llvm::omp::OMPScheduleType::DynamicChunked; 805 break; 806 case omp::ClauseScheduleKind::Guided: 807 if (isSimd) 808 schedType = llvm::omp::OMPScheduleType::GuidedSimd; 809 else 810 schedType = llvm::omp::OMPScheduleType::GuidedChunked; 811 break; 812 case omp::ClauseScheduleKind::Auto: 813 schedType = llvm::omp::OMPScheduleType::Auto; 814 break; 815 case omp::ClauseScheduleKind::Runtime: 816 if (isSimd) 817 schedType = llvm::omp::OMPScheduleType::RuntimeSimd; 818 else 819 schedType = llvm::omp::OMPScheduleType::Runtime; 820 break; 821 default: 822 llvm_unreachable("Unknown schedule value"); 823 break; 824 } 825 826 if (Optional<omp::ScheduleModifier> modifier = loop.schedule_modifier()) { 827 switch (*modifier) { 828 case omp::ScheduleModifier::monotonic: 829 schedType |= llvm::omp::OMPScheduleType::ModifierMonotonic; 830 break; 831 case omp::ScheduleModifier::nonmonotonic: 832 schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic; 833 break; 834 default: 835 // Nothing to do here. 836 break; 837 } 838 } 839 afterIP = ompBuilder->applyDynamicWorkshareLoop( 840 ompLoc.DL, loopInfo, allocaIP, schedType, !loop.nowait(), chunk); 841 } 842 843 // Continue building IR after the loop. Note that the LoopInfo returned by 844 // `collapseLoops` points inside the outermost loop and is intended for 845 // potential further loop transformations. Use the insertion point stored 846 // before collapsing loops instead. 847 builder.restoreIP(afterIP); 848 849 // Process the reductions if required. 850 if (numReductions == 0) 851 return success(); 852 853 // Create the reduction generators. We need to own them here because 854 // ReductionInfo only accepts references to the generators. 855 SmallVector<OwningReductionGen> owningReductionGens; 856 SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens; 857 for (unsigned i = 0; i < numReductions; ++i) { 858 owningReductionGens.push_back( 859 makeReductionGen(reductionDecls[i], builder, moduleTranslation)); 860 owningAtomicReductionGens.push_back( 861 makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation)); 862 } 863 864 // Collect the reduction information. 865 SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos; 866 reductionInfos.reserve(numReductions); 867 for (unsigned i = 0; i < numReductions; ++i) { 868 llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr; 869 if (owningAtomicReductionGens[i]) 870 atomicGen = owningAtomicReductionGens[i]; 871 llvm::Value *variable = 872 moduleTranslation.lookupValue(loop.reduction_vars()[i]); 873 reductionInfos.push_back({variable->getType()->getPointerElementType(), 874 variable, privateReductionVariables[i], 875 owningReductionGens[i], atomicGen}); 876 } 877 878 // The call to createReductions below expects the block to have a 879 // terminator. Create an unreachable instruction to serve as terminator 880 // and remove it later. 881 llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); 882 builder.SetInsertPoint(tempTerminator); 883 llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = 884 ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, 885 loop.nowait()); 886 if (!contInsertPoint.getBlock()) 887 return loop->emitOpError() << "failed to convert reductions"; 888 auto nextInsertionPoint = 889 ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for); 890 tempTerminator->eraseFromParent(); 891 builder.restoreIP(nextInsertionPoint); 892 893 return success(); 894 } 895 896 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. 897 llvm::AtomicOrdering 898 convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) { 899 if (!ao) 900 return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering 901 902 switch (*ao) { 903 case omp::ClauseMemoryOrderKind::seq_cst: 904 return llvm::AtomicOrdering::SequentiallyConsistent; 905 case omp::ClauseMemoryOrderKind::acq_rel: 906 return llvm::AtomicOrdering::AcquireRelease; 907 case omp::ClauseMemoryOrderKind::acquire: 908 return llvm::AtomicOrdering::Acquire; 909 case omp::ClauseMemoryOrderKind::release: 910 return llvm::AtomicOrdering::Release; 911 case omp::ClauseMemoryOrderKind::relaxed: 912 return llvm::AtomicOrdering::Monotonic; 913 } 914 llvm_unreachable("Unknown ClauseMemoryOrderKind kind"); 915 } 916 917 /// Convert omp.atomic.read operation to LLVM IR. 918 static LogicalResult 919 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, 920 LLVM::ModuleTranslation &moduleTranslation) { 921 922 auto readOp = cast<omp::AtomicReadOp>(opInst); 923 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 924 925 // Set up the source location value for OpenMP runtime. 926 llvm::DISubprogram *subprogram = 927 builder.GetInsertBlock()->getParent()->getSubprogram(); 928 const llvm::DILocation *diLoc = 929 moduleTranslation.translateLoc(opInst.getLoc(), subprogram); 930 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(), 931 llvm::DebugLoc(diLoc)); 932 llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.memory_order()); 933 llvm::Value *x = moduleTranslation.lookupValue(readOp.x()); 934 Type xTy = readOp.x().getType().cast<omp::PointerLikeType>().getElementType(); 935 llvm::Value *v = moduleTranslation.lookupValue(readOp.v()); 936 Type vTy = readOp.v().getType().cast<omp::PointerLikeType>().getElementType(); 937 llvm::OpenMPIRBuilder::AtomicOpValue V = { 938 v, moduleTranslation.convertType(vTy), false, false}; 939 llvm::OpenMPIRBuilder::AtomicOpValue X = { 940 x, moduleTranslation.convertType(xTy), false, false}; 941 builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO)); 942 return success(); 943 } 944 945 /// Converts an omp.atomic.write operation to LLVM IR. 946 static LogicalResult 947 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, 948 LLVM::ModuleTranslation &moduleTranslation) { 949 auto writeOp = cast<omp::AtomicWriteOp>(opInst); 950 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 951 952 // Set up the source location value for OpenMP runtime. 953 llvm::DISubprogram *subprogram = 954 builder.GetInsertBlock()->getParent()->getSubprogram(); 955 const llvm::DILocation *diLoc = 956 moduleTranslation.translateLoc(opInst.getLoc(), subprogram); 957 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(), 958 llvm::DebugLoc(diLoc)); 959 llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.memory_order()); 960 llvm::Value *expr = moduleTranslation.lookupValue(writeOp.value()); 961 llvm::Value *dest = moduleTranslation.lookupValue(writeOp.address()); 962 llvm::Type *ty = moduleTranslation.convertType(writeOp.value().getType()); 963 llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false, 964 /*isVolatile=*/false}; 965 builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao)); 966 return success(); 967 } 968 969 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the 970 /// mapping between reduction variables and their private equivalents to have 971 /// been stored on the ModuleTranslation stack. Currently only supports 972 /// reduction within WsLoopOp, but can be easily extended. 973 static LogicalResult 974 convertOmpReductionOp(omp::ReductionOp reductionOp, 975 llvm::IRBuilderBase &builder, 976 LLVM::ModuleTranslation &moduleTranslation) { 977 // Find the declaration that corresponds to the reduction op. 978 auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>(); 979 omp::ReductionDeclareOp declaration = 980 findReductionDecl(reductionContainer, reductionOp); 981 assert(declaration && "could not find reduction declaration"); 982 983 // Retrieve the mapping between reduction variables and their private 984 // equivalents. 985 const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr; 986 moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>( 987 [&](const OpenMPVarMappingStackFrame &frame) { 988 reductionVariableMap = &frame.mapping; 989 return WalkResult::interrupt(); 990 }); 991 assert(reductionVariableMap && "couldn't find private reduction variables"); 992 993 // Translate the reduction operation by emitting the body of the corresponding 994 // reduction declaration. 995 Region &reductionRegion = declaration.reductionRegion(); 996 llvm::Value *privateReductionVar = 997 reductionVariableMap->lookup(reductionOp.accumulator()); 998 llvm::Value *reductionVal = builder.CreateLoad( 999 moduleTranslation.convertType(reductionOp.operand().getType()), 1000 privateReductionVar); 1001 1002 moduleTranslation.mapValue(reductionRegion.front().getArgument(0), 1003 reductionVal); 1004 moduleTranslation.mapValue( 1005 reductionRegion.front().getArgument(1), 1006 moduleTranslation.lookupValue(reductionOp.operand())); 1007 1008 SmallVector<llvm::Value *> phis; 1009 if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body", 1010 builder, moduleTranslation, &phis))) 1011 return failure(); 1012 assert(phis.size() == 1 && "expected one value to be yielded from " 1013 "the reduction body declaration region"); 1014 builder.CreateStore(phis[0], privateReductionVar); 1015 return success(); 1016 } 1017 1018 namespace { 1019 1020 /// Implementation of the dialect interface that converts operations belonging 1021 /// to the OpenMP dialect to LLVM IR. 1022 class OpenMPDialectLLVMIRTranslationInterface 1023 : public LLVMTranslationDialectInterface { 1024 public: 1025 using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; 1026 1027 /// Translates the given operation to LLVM IR using the provided IR builder 1028 /// and saving the state in `moduleTranslation`. 1029 LogicalResult 1030 convertOperation(Operation *op, llvm::IRBuilderBase &builder, 1031 LLVM::ModuleTranslation &moduleTranslation) const final; 1032 }; 1033 1034 } // namespace 1035 1036 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR 1037 /// (including OpenMP runtime calls). 1038 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( 1039 Operation *op, llvm::IRBuilderBase &builder, 1040 LLVM::ModuleTranslation &moduleTranslation) const { 1041 1042 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1043 1044 return llvm::TypeSwitch<Operation *, LogicalResult>(op) 1045 .Case([&](omp::BarrierOp) { 1046 ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); 1047 return success(); 1048 }) 1049 .Case([&](omp::TaskwaitOp) { 1050 ompBuilder->createTaskwait(builder.saveIP()); 1051 return success(); 1052 }) 1053 .Case([&](omp::TaskyieldOp) { 1054 ompBuilder->createTaskyield(builder.saveIP()); 1055 return success(); 1056 }) 1057 .Case([&](omp::FlushOp) { 1058 // No support in Openmp runtime function (__kmpc_flush) to accept 1059 // the argument list. 1060 // OpenMP standard states the following: 1061 // "An implementation may implement a flush with a list by ignoring 1062 // the list, and treating it the same as a flush without a list." 1063 // 1064 // The argument list is discarded so that, flush with a list is treated 1065 // same as a flush without a list. 1066 ompBuilder->createFlush(builder.saveIP()); 1067 return success(); 1068 }) 1069 .Case([&](omp::ParallelOp op) { 1070 return convertOmpParallel(op, builder, moduleTranslation); 1071 }) 1072 .Case([&](omp::ReductionOp reductionOp) { 1073 return convertOmpReductionOp(reductionOp, builder, moduleTranslation); 1074 }) 1075 .Case([&](omp::MasterOp) { 1076 return convertOmpMaster(*op, builder, moduleTranslation); 1077 }) 1078 .Case([&](omp::CriticalOp) { 1079 return convertOmpCritical(*op, builder, moduleTranslation); 1080 }) 1081 .Case([&](omp::OrderedRegionOp) { 1082 return convertOmpOrderedRegion(*op, builder, moduleTranslation); 1083 }) 1084 .Case([&](omp::OrderedOp) { 1085 return convertOmpOrdered(*op, builder, moduleTranslation); 1086 }) 1087 .Case([&](omp::WsLoopOp) { 1088 return convertOmpWsLoop(*op, builder, moduleTranslation); 1089 }) 1090 .Case([&](omp::AtomicReadOp) { 1091 return convertOmpAtomicRead(*op, builder, moduleTranslation); 1092 }) 1093 .Case([&](omp::AtomicWriteOp) { 1094 return convertOmpAtomicWrite(*op, builder, moduleTranslation); 1095 }) 1096 .Case([&](omp::SectionsOp) { 1097 return convertOmpSections(*op, builder, moduleTranslation); 1098 }) 1099 .Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp, 1100 omp::CriticalDeclareOp>([](auto op) { 1101 // `yield` and `terminator` can be just omitted. The block structure 1102 // was created in the region that handles their parent operation. 1103 // `reduction.declare` will be used by reductions and is not 1104 // converted directly, skip it. 1105 // `critical.declare` is only used to declare names of critical 1106 // sections which will be used by `critical` ops and hence can be 1107 // ignored for lowering. The OpenMP IRBuilder will create unique 1108 // name for critical section names. 1109 return success(); 1110 }) 1111 .Default([&](Operation *inst) { 1112 return inst->emitError("unsupported OpenMP operation: ") 1113 << inst->getName(); 1114 }); 1115 } 1116 1117 void mlir::registerOpenMPDialectTranslation(DialectRegistry ®istry) { 1118 registry.insert<omp::OpenMPDialect>(); 1119 registry.addDialectInterface<omp::OpenMPDialect, 1120 OpenMPDialectLLVMIRTranslationInterface>(); 1121 } 1122 1123 void mlir::registerOpenMPDialectTranslation(MLIRContext &context) { 1124 DialectRegistry registry; 1125 registerOpenMPDialectTranslation(registry); 1126 context.appendDialectRegistry(registry); 1127 } 1128