1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a translation between the MLIR OpenMP dialect and LLVM 10 // IR. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" 14 #include "mlir/Dialect/OpenMP/OpenMPDialect.h" 15 #include "mlir/IR/BlockAndValueMapping.h" 16 #include "mlir/IR/Operation.h" 17 #include "mlir/Support/LLVM.h" 18 #include "mlir/Target/LLVMIR/ModuleTranslation.h" 19 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/TypeSwitch.h" 22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 23 #include "llvm/IR/DebugInfoMetadata.h" 24 #include "llvm/IR/IRBuilder.h" 25 26 using namespace mlir; 27 28 namespace { 29 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the 30 /// insertion points for allocas. 31 class OpenMPAllocaStackFrame 32 : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> { 33 public: 34 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame) 35 36 explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP) 37 : allocaInsertPoint(allocaIP) {} 38 llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; 39 }; 40 41 /// ModuleTranslation stack frame containing the partial mapping between MLIR 42 /// values and their LLVM IR equivalents. 43 class OpenMPVarMappingStackFrame 44 : public LLVM::ModuleTranslation::StackFrameBase< 45 OpenMPVarMappingStackFrame> { 46 public: 47 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPVarMappingStackFrame) 48 49 explicit OpenMPVarMappingStackFrame( 50 const DenseMap<Value, llvm::Value *> &mapping) 51 : mapping(mapping) {} 52 53 DenseMap<Value, llvm::Value *> mapping; 54 }; 55 } // namespace 56 57 /// Find the insertion point for allocas given the current insertion point for 58 /// normal operations in the builder. 59 static llvm::OpenMPIRBuilder::InsertPointTy 60 findAllocaInsertPoint(llvm::IRBuilderBase &builder, 61 const LLVM::ModuleTranslation &moduleTranslation) { 62 // If there is an alloca insertion point on stack, i.e. we are in a nested 63 // operation and a specific point was provided by some surrounding operation, 64 // use it. 65 llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; 66 WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>( 67 [&](const OpenMPAllocaStackFrame &frame) { 68 allocaInsertPoint = frame.allocaInsertPoint; 69 return WalkResult::interrupt(); 70 }); 71 if (walkResult.wasInterrupted()) 72 return allocaInsertPoint; 73 74 // Otherwise, insert to the entry block of the surrounding function. 75 // If the current IRBuilder InsertPoint is the function's entry, it cannot 76 // also be used for alloca insertion which would result in insertion order 77 // confusion. Create a new BasicBlock for the Builder and use the entry block 78 // for the allocs. 79 if (builder.GetInsertBlock() == 80 &builder.GetInsertBlock()->getParent()->getEntryBlock()) { 81 assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() && 82 "Assuming end of basic block"); 83 llvm::BasicBlock *entryBB = llvm::BasicBlock::Create( 84 builder.getContext(), "entry", builder.GetInsertBlock()->getParent(), 85 builder.GetInsertBlock()->getNextNode()); 86 builder.CreateBr(entryBB); 87 builder.SetInsertPoint(entryBB); 88 } 89 90 llvm::BasicBlock &funcEntryBlock = 91 builder.GetInsertBlock()->getParent()->getEntryBlock(); 92 return llvm::OpenMPIRBuilder::InsertPointTy( 93 &funcEntryBlock, funcEntryBlock.getFirstInsertionPt()); 94 } 95 96 /// Converts the given region that appears within an OpenMP dialect operation to 97 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the 98 /// region, and a branch from any block with an successor-less OpenMP terminator 99 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes 100 /// of the continuation block if provided. 101 static void convertOmpOpRegions( 102 Region ®ion, StringRef blockName, llvm::BasicBlock &sourceBlock, 103 llvm::BasicBlock &continuationBlock, llvm::IRBuilderBase &builder, 104 LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, 105 SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) { 106 llvm::LLVMContext &llvmContext = builder.getContext(); 107 for (Block &bb : region) { 108 llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create( 109 llvmContext, blockName, builder.GetInsertBlock()->getParent(), 110 builder.GetInsertBlock()->getNextNode()); 111 moduleTranslation.mapBlock(&bb, llvmBB); 112 } 113 114 llvm::Instruction *sourceTerminator = sourceBlock.getTerminator(); 115 116 // Terminators (namely YieldOp) may be forwarding values to the region that 117 // need to be available in the continuation block. Collect the types of these 118 // operands in preparation of creating PHI nodes. 119 SmallVector<llvm::Type *> continuationBlockPHITypes; 120 bool operandsProcessed = false; 121 unsigned numYields = 0; 122 for (Block &bb : region.getBlocks()) { 123 if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) { 124 if (!operandsProcessed) { 125 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { 126 continuationBlockPHITypes.push_back( 127 moduleTranslation.convertType(yield->getOperand(i).getType())); 128 } 129 operandsProcessed = true; 130 } else { 131 assert(continuationBlockPHITypes.size() == yield->getNumOperands() && 132 "mismatching number of values yielded from the region"); 133 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { 134 llvm::Type *operandType = 135 moduleTranslation.convertType(yield->getOperand(i).getType()); 136 (void)operandType; 137 assert(continuationBlockPHITypes[i] == operandType && 138 "values of mismatching types yielded from the region"); 139 } 140 } 141 numYields++; 142 } 143 } 144 145 // Insert PHI nodes in the continuation block for any values forwarded by the 146 // terminators in this region. 147 if (!continuationBlockPHITypes.empty()) 148 assert( 149 continuationBlockPHIs && 150 "expected continuation block PHIs if converted regions yield values"); 151 if (continuationBlockPHIs) { 152 llvm::IRBuilderBase::InsertPointGuard guard(builder); 153 continuationBlockPHIs->reserve(continuationBlockPHITypes.size()); 154 builder.SetInsertPoint(&continuationBlock, continuationBlock.begin()); 155 for (llvm::Type *ty : continuationBlockPHITypes) 156 continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields)); 157 } 158 159 // Convert blocks one by one in topological order to ensure 160 // defs are converted before uses. 161 SetVector<Block *> blocks = 162 LLVM::detail::getTopologicallySortedBlocks(region); 163 for (Block *bb : blocks) { 164 llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb); 165 // Retarget the branch of the entry block to the entry block of the 166 // converted region (regions are single-entry). 167 if (bb->isEntryBlock()) { 168 assert(sourceTerminator->getNumSuccessors() == 1 && 169 "provided entry block has multiple successors"); 170 assert(sourceTerminator->getSuccessor(0) == &continuationBlock && 171 "ContinuationBlock is not the successor of the entry block"); 172 sourceTerminator->setSuccessor(0, llvmBB); 173 } 174 175 llvm::IRBuilderBase::InsertPointGuard guard(builder); 176 if (failed( 177 moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) { 178 bodyGenStatus = failure(); 179 return; 180 } 181 182 // Special handling for `omp.yield` and `omp.terminator` (we may have more 183 // than one): they return the control to the parent OpenMP dialect operation 184 // so replace them with the branch to the continuation block. We handle this 185 // here to avoid relying inter-function communication through the 186 // ModuleTranslation class to set up the correct insertion point. This is 187 // also consistent with MLIR's idiom of handling special region terminators 188 // in the same code that handles the region-owning operation. 189 Operation *terminator = bb->getTerminator(); 190 if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) { 191 builder.CreateBr(&continuationBlock); 192 193 for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i) 194 (*continuationBlockPHIs)[i]->addIncoming( 195 moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB); 196 } 197 } 198 // After all blocks have been traversed and values mapped, connect the PHI 199 // nodes to the results of preceding blocks. 200 LLVM::detail::connectPHINodes(region, moduleTranslation); 201 202 // Remove the blocks and values defined in this region from the mapping since 203 // they are not visible outside of this region. This allows the same region to 204 // be converted several times, that is cloned, without clashes, and slightly 205 // speeds up the lookups. 206 moduleTranslation.forgetMapping(region); 207 } 208 209 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum. 210 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) { 211 switch (kind) { 212 case omp::ClauseProcBindKind::Close: 213 return llvm::omp::ProcBindKind::OMP_PROC_BIND_close; 214 case omp::ClauseProcBindKind::Master: 215 return llvm::omp::ProcBindKind::OMP_PROC_BIND_master; 216 case omp::ClauseProcBindKind::Primary: 217 return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary; 218 case omp::ClauseProcBindKind::Spread: 219 return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread; 220 } 221 llvm_unreachable("Unknown ClauseProcBindKind kind"); 222 } 223 224 /// Converts the OpenMP parallel operation to LLVM IR. 225 static LogicalResult 226 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, 227 LLVM::ModuleTranslation &moduleTranslation) { 228 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 229 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 230 // relying on captured variables. 231 LogicalResult bodyGenStatus = success(); 232 233 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 234 llvm::BasicBlock &continuationBlock) { 235 // Save the alloca insertion point on ModuleTranslation stack for use in 236 // nested regions. 237 LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( 238 moduleTranslation, allocaIP); 239 240 // ParallelOp has only one region associated with it. 241 convertOmpOpRegions(opInst.getRegion(), "omp.par.region", 242 *codeGenIP.getBlock(), continuationBlock, builder, 243 moduleTranslation, bodyGenStatus); 244 }; 245 246 // TODO: Perform appropriate actions according to the data-sharing 247 // attribute (shared, private, firstprivate, ...) of variables. 248 // Currently defaults to shared. 249 auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 250 llvm::Value &, llvm::Value &vPtr, 251 llvm::Value *&replacementValue) -> InsertPointTy { 252 replacementValue = &vPtr; 253 254 return codeGenIP; 255 }; 256 257 // TODO: Perform finalization actions for variables. This has to be 258 // called for variables which have destructors/finalizers. 259 auto finiCB = [&](InsertPointTy codeGenIP) {}; 260 261 llvm::Value *ifCond = nullptr; 262 if (auto ifExprVar = opInst.if_expr_var()) 263 ifCond = moduleTranslation.lookupValue(ifExprVar); 264 llvm::Value *numThreads = nullptr; 265 if (auto numThreadsVar = opInst.num_threads_var()) 266 numThreads = moduleTranslation.lookupValue(numThreadsVar); 267 auto pbKind = llvm::omp::OMP_PROC_BIND_default; 268 if (auto bind = opInst.proc_bind_val()) 269 pbKind = getProcBindKind(*bind); 270 // TODO: Is the Parallel construct cancellable? 271 bool isCancellable = false; 272 273 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 274 findAllocaInsertPoint(builder, moduleTranslation); 275 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 276 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel( 277 ompLoc, allocaIP, bodyGenCB, privCB, finiCB, ifCond, numThreads, pbKind, 278 isCancellable)); 279 280 return bodyGenStatus; 281 } 282 283 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder. 284 static LogicalResult 285 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, 286 LLVM::ModuleTranslation &moduleTranslation) { 287 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 288 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 289 // relying on captured variables. 290 LogicalResult bodyGenStatus = success(); 291 292 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 293 llvm::BasicBlock &continuationBlock) { 294 // MasterOp has only one region associated with it. 295 auto ®ion = cast<omp::MasterOp>(opInst).getRegion(); 296 convertOmpOpRegions(region, "omp.master.region", *codeGenIP.getBlock(), 297 continuationBlock, builder, moduleTranslation, 298 bodyGenStatus); 299 }; 300 301 // TODO: Perform finalization actions for variables. This has to be 302 // called for variables which have destructors/finalizers. 303 auto finiCB = [&](InsertPointTy codeGenIP) {}; 304 305 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 306 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster( 307 ompLoc, bodyGenCB, finiCB)); 308 return success(); 309 } 310 311 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder. 312 static LogicalResult 313 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, 314 LLVM::ModuleTranslation &moduleTranslation) { 315 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 316 auto criticalOp = cast<omp::CriticalOp>(opInst); 317 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 318 // relying on captured variables. 319 LogicalResult bodyGenStatus = success(); 320 321 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 322 llvm::BasicBlock &continuationBlock) { 323 // CriticalOp has only one region associated with it. 324 auto ®ion = cast<omp::CriticalOp>(opInst).getRegion(); 325 convertOmpOpRegions(region, "omp.critical.region", *codeGenIP.getBlock(), 326 continuationBlock, builder, moduleTranslation, 327 bodyGenStatus); 328 }; 329 330 // TODO: Perform finalization actions for variables. This has to be 331 // called for variables which have destructors/finalizers. 332 auto finiCB = [&](InsertPointTy codeGenIP) {}; 333 334 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 335 llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext(); 336 llvm::Constant *hint = nullptr; 337 338 // If it has a name, it probably has a hint too. 339 if (criticalOp.nameAttr()) { 340 // The verifiers in OpenMP Dialect guarentee that all the pointers are 341 // non-null 342 auto symbolRef = criticalOp.nameAttr().cast<SymbolRefAttr>(); 343 auto criticalDeclareOp = 344 SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp, 345 symbolRef); 346 hint = 347 llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), 348 static_cast<int>(criticalDeclareOp.hint_val())); 349 } 350 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical( 351 ompLoc, bodyGenCB, finiCB, criticalOp.name().getValueOr(""), hint)); 352 return success(); 353 } 354 355 /// Returns a reduction declaration that corresponds to the given reduction 356 /// operation in the given container. Currently only supports reductions inside 357 /// WsLoopOp but can be easily extended. 358 static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container, 359 omp::ReductionOp reduction) { 360 SymbolRefAttr reductionSymbol; 361 for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) { 362 if (container.reduction_vars()[i] != reduction.accumulator()) 363 continue; 364 reductionSymbol = (*container.reductions())[i].cast<SymbolRefAttr>(); 365 break; 366 } 367 assert(reductionSymbol && 368 "reduction operation must be associated with a declaration"); 369 370 return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>( 371 container, reductionSymbol); 372 } 373 374 /// Populates `reductions` with reduction declarations used in the given loop. 375 static void 376 collectReductionDecls(omp::WsLoopOp loop, 377 SmallVectorImpl<omp::ReductionDeclareOp> &reductions) { 378 Optional<ArrayAttr> attr = loop.reductions(); 379 if (!attr) 380 return; 381 382 reductions.reserve(reductions.size() + loop.getNumReductionVars()); 383 for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) { 384 reductions.push_back( 385 SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>( 386 loop, symbolRef)); 387 } 388 } 389 390 /// Translates the blocks contained in the given region and appends them to at 391 /// the current insertion point of `builder`. The operations of the entry block 392 /// are appended to the current insertion block, which is not expected to have a 393 /// terminator. If set, `continuationBlockArgs` is populated with translated 394 /// values that correspond to the values omp.yield'ed from the region. 395 static LogicalResult inlineConvertOmpRegions( 396 Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, 397 LLVM::ModuleTranslation &moduleTranslation, 398 SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) { 399 if (region.empty()) 400 return success(); 401 402 // Special case for single-block regions that don't create additional blocks: 403 // insert operations without creating additional blocks. 404 if (llvm::hasSingleElement(region)) { 405 moduleTranslation.mapBlock(®ion.front(), builder.GetInsertBlock()); 406 if (failed(moduleTranslation.convertBlock( 407 region.front(), /*ignoreArguments=*/true, builder))) 408 return failure(); 409 410 // The continuation arguments are simply the translated terminator operands. 411 if (continuationBlockArgs) 412 llvm::append_range( 413 *continuationBlockArgs, 414 moduleTranslation.lookupValues(region.front().back().getOperands())); 415 416 // Drop the mapping that is no longer necessary so that the same region can 417 // be processed multiple times. 418 moduleTranslation.forgetMapping(region); 419 return success(); 420 } 421 422 // Create the continuation block manually instead of calling splitBlock 423 // because the current insertion block may not have a terminator. 424 llvm::BasicBlock *continuationBlock = 425 llvm::BasicBlock::Create(builder.getContext(), blockName + ".cont", 426 builder.GetInsertBlock()->getParent(), 427 builder.GetInsertBlock()->getNextNode()); 428 builder.CreateBr(continuationBlock); 429 430 LogicalResult bodyGenStatus = success(); 431 SmallVector<llvm::PHINode *> phis; 432 convertOmpOpRegions(region, blockName, *builder.GetInsertBlock(), 433 *continuationBlock, builder, moduleTranslation, 434 bodyGenStatus, &phis); 435 if (failed(bodyGenStatus)) 436 return failure(); 437 if (continuationBlockArgs) 438 llvm::append_range(*continuationBlockArgs, phis); 439 builder.SetInsertPoint(continuationBlock, 440 continuationBlock->getFirstInsertionPt()); 441 return success(); 442 } 443 444 namespace { 445 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to 446 /// store lambdas with capture. 447 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy( 448 llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *, 449 llvm::Value *&)>; 450 using OwningAtomicReductionGen = 451 std::function<llvm::OpenMPIRBuilder::InsertPointTy( 452 llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *, 453 llvm::Value *)>; 454 } // namespace 455 456 /// Create an OpenMPIRBuilder-compatible reduction generator for the given 457 /// reduction declaration. The generator uses `builder` but ignores its 458 /// insertion point. 459 static OwningReductionGen 460 makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder, 461 LLVM::ModuleTranslation &moduleTranslation) { 462 // The lambda is mutable because we need access to non-const methods of decl 463 // (which aren't actually mutating it), and we must capture decl by-value to 464 // avoid the dangling reference after the parent function returns. 465 OwningReductionGen gen = 466 [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, 467 llvm::Value *lhs, llvm::Value *rhs, 468 llvm::Value *&result) mutable { 469 Region &reductionRegion = decl.reductionRegion(); 470 moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs); 471 moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs); 472 builder.restoreIP(insertPoint); 473 SmallVector<llvm::Value *> phis; 474 if (failed(inlineConvertOmpRegions(reductionRegion, 475 "omp.reduction.nonatomic.body", 476 builder, moduleTranslation, &phis))) 477 return llvm::OpenMPIRBuilder::InsertPointTy(); 478 assert(phis.size() == 1); 479 result = phis[0]; 480 return builder.saveIP(); 481 }; 482 return gen; 483 } 484 485 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the 486 /// given reduction declaration. The generator uses `builder` but ignores its 487 /// insertion point. Returns null if there is no atomic region available in the 488 /// reduction declaration. 489 static OwningAtomicReductionGen 490 makeAtomicReductionGen(omp::ReductionDeclareOp decl, 491 llvm::IRBuilderBase &builder, 492 LLVM::ModuleTranslation &moduleTranslation) { 493 if (decl.atomicReductionRegion().empty()) 494 return OwningAtomicReductionGen(); 495 496 // The lambda is mutable because we need access to non-const methods of decl 497 // (which aren't actually mutating it), and we must capture decl by-value to 498 // avoid the dangling reference after the parent function returns. 499 OwningAtomicReductionGen atomicGen = 500 [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *, 501 llvm::Value *lhs, llvm::Value *rhs) mutable { 502 Region &atomicRegion = decl.atomicReductionRegion(); 503 moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs); 504 moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs); 505 builder.restoreIP(insertPoint); 506 SmallVector<llvm::Value *> phis; 507 if (failed(inlineConvertOmpRegions(atomicRegion, 508 "omp.reduction.atomic.body", builder, 509 moduleTranslation, &phis))) 510 return llvm::OpenMPIRBuilder::InsertPointTy(); 511 assert(phis.empty()); 512 return builder.saveIP(); 513 }; 514 return atomicGen; 515 } 516 517 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder. 518 static LogicalResult 519 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, 520 LLVM::ModuleTranslation &moduleTranslation) { 521 auto orderedOp = cast<omp::OrderedOp>(opInst); 522 523 omp::ClauseDepend dependType = *orderedOp.depend_type_val(); 524 bool isDependSource = dependType == omp::ClauseDepend::dependsource; 525 unsigned numLoops = orderedOp.num_loops_val().getValue(); 526 SmallVector<llvm::Value *> vecValues = 527 moduleTranslation.lookupValues(orderedOp.depend_vec_vars()); 528 529 size_t indexVecValues = 0; 530 while (indexVecValues < vecValues.size()) { 531 SmallVector<llvm::Value *> storeValues; 532 storeValues.reserve(numLoops); 533 for (unsigned i = 0; i < numLoops; i++) { 534 storeValues.push_back(vecValues[indexVecValues]); 535 indexVecValues++; 536 } 537 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 538 findAllocaInsertPoint(builder, moduleTranslation); 539 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 540 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend( 541 ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource)); 542 } 543 return success(); 544 } 545 546 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using 547 /// OpenMPIRBuilder. 548 static LogicalResult 549 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, 550 LLVM::ModuleTranslation &moduleTranslation) { 551 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 552 auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst); 553 554 // TODO: The code generation for ordered simd directive is not supported yet. 555 if (orderedRegionOp.simd()) 556 return failure(); 557 558 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 559 // relying on captured variables. 560 LogicalResult bodyGenStatus = success(); 561 562 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 563 llvm::BasicBlock &continuationBlock) { 564 // OrderedOp has only one region associated with it. 565 auto ®ion = cast<omp::OrderedRegionOp>(opInst).getRegion(); 566 convertOmpOpRegions(region, "omp.ordered.region", *codeGenIP.getBlock(), 567 continuationBlock, builder, moduleTranslation, 568 bodyGenStatus); 569 }; 570 571 // TODO: Perform finalization actions for variables. This has to be 572 // called for variables which have destructors/finalizers. 573 auto finiCB = [&](InsertPointTy codeGenIP) {}; 574 575 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 576 builder.restoreIP( 577 moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd( 578 ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd())); 579 return bodyGenStatus; 580 } 581 582 static LogicalResult 583 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, 584 LLVM::ModuleTranslation &moduleTranslation) { 585 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 586 using StorableBodyGenCallbackTy = 587 llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 588 589 auto sectionsOp = cast<omp::SectionsOp>(opInst); 590 591 // TODO: Support the following clauses: private, firstprivate, lastprivate, 592 // reduction, allocate 593 if (!sectionsOp.reduction_vars().empty() || sectionsOp.reductions() || 594 !sectionsOp.allocate_vars().empty() || 595 !sectionsOp.allocators_vars().empty()) 596 return emitError(sectionsOp.getLoc()) 597 << "reduction and allocate clauses are not supported for sections " 598 "construct"; 599 600 LogicalResult bodyGenStatus = success(); 601 SmallVector<StorableBodyGenCallbackTy> sectionCBs; 602 603 for (Operation &op : *sectionsOp.region().begin()) { 604 auto sectionOp = dyn_cast<omp::SectionOp>(op); 605 if (!sectionOp) // omp.terminator 606 continue; 607 608 Region ®ion = sectionOp.region(); 609 auto sectionCB = [®ion, &builder, &moduleTranslation, &bodyGenStatus]( 610 InsertPointTy allocaIP, InsertPointTy codeGenIP, 611 llvm::BasicBlock &finiBB) { 612 builder.restoreIP(codeGenIP); 613 builder.CreateBr(&finiBB); 614 convertOmpOpRegions(region, "omp.section.region", *codeGenIP.getBlock(), 615 finiBB, builder, moduleTranslation, bodyGenStatus); 616 }; 617 sectionCBs.push_back(sectionCB); 618 } 619 620 // No sections within omp.sections operation - skip generation. This situation 621 // is only possible if there is only a terminator operation inside the 622 // sections operation 623 if (sectionCBs.empty()) 624 return success(); 625 626 assert(isa<omp::SectionOp>(*sectionsOp.region().op_begin())); 627 628 // TODO: Perform appropriate actions according to the data-sharing 629 // attribute (shared, private, firstprivate, ...) of variables. 630 // Currently defaults to shared. 631 auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &, 632 llvm::Value &vPtr, 633 llvm::Value *&replacementValue) -> InsertPointTy { 634 replacementValue = &vPtr; 635 return codeGenIP; 636 }; 637 638 // TODO: Perform finalization actions for variables. This has to be 639 // called for variables which have destructors/finalizers. 640 auto finiCB = [&](InsertPointTy codeGenIP) {}; 641 642 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 643 findAllocaInsertPoint(builder, moduleTranslation); 644 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 645 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections( 646 ompLoc, allocaIP, sectionCBs, privCB, finiCB, false, 647 sectionsOp.nowait())); 648 return bodyGenStatus; 649 } 650 651 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder. 652 static LogicalResult 653 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, 654 LLVM::ModuleTranslation &moduleTranslation) { 655 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 656 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 657 LogicalResult bodyGenStatus = success(); 658 auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP, 659 llvm::BasicBlock &continuationBB) { 660 convertOmpOpRegions(singleOp.region(), "omp.single.region", 661 *codegenIP.getBlock(), continuationBB, builder, 662 moduleTranslation, bodyGenStatus); 663 }; 664 auto finiCB = [&](InsertPointTy codeGenIP) {}; 665 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle( 666 ompLoc, bodyCB, finiCB, singleOp.nowait(), /*DidIt=*/nullptr)); 667 return bodyGenStatus; 668 } 669 670 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. 671 static LogicalResult 672 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, 673 LLVM::ModuleTranslation &moduleTranslation) { 674 auto loop = cast<omp::WsLoopOp>(opInst); 675 // TODO: this should be in the op verifier instead. 676 if (loop.lowerBound().empty()) 677 return failure(); 678 679 // Static is the default. 680 auto schedule = 681 loop.schedule_val().getValueOr(omp::ClauseScheduleKind::Static); 682 683 // Find the loop configuration. 684 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]); 685 llvm::Type *ivType = step->getType(); 686 llvm::Value *chunk = nullptr; 687 if (loop.schedule_chunk_var()) { 688 llvm::Value *chunkVar = 689 moduleTranslation.lookupValue(loop.schedule_chunk_var()); 690 llvm::Type *chunkVarType = chunkVar->getType(); 691 assert(chunkVarType->isIntegerTy() && 692 "chunk size must be one integer expression"); 693 if (chunkVarType->getIntegerBitWidth() < ivType->getIntegerBitWidth()) 694 chunk = builder.CreateSExt(chunkVar, ivType); 695 else if (chunkVarType->getIntegerBitWidth() > ivType->getIntegerBitWidth()) 696 chunk = builder.CreateTrunc(chunkVar, ivType); 697 else 698 chunk = chunkVar; 699 } 700 701 SmallVector<omp::ReductionDeclareOp> reductionDecls; 702 collectReductionDecls(loop, reductionDecls); 703 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 704 findAllocaInsertPoint(builder, moduleTranslation); 705 706 // Allocate space for privatized reduction variables. 707 SmallVector<llvm::Value *> privateReductionVariables; 708 DenseMap<Value, llvm::Value *> reductionVariableMap; 709 unsigned numReductions = loop.getNumReductionVars(); 710 privateReductionVariables.reserve(numReductions); 711 if (numReductions != 0) { 712 llvm::IRBuilderBase::InsertPointGuard guard(builder); 713 builder.restoreIP(allocaIP); 714 for (unsigned i = 0; i < numReductions; ++i) { 715 auto reductionType = 716 loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>(); 717 llvm::Value *var = builder.CreateAlloca( 718 moduleTranslation.convertType(reductionType.getElementType())); 719 privateReductionVariables.push_back(var); 720 reductionVariableMap.try_emplace(loop.reduction_vars()[i], var); 721 } 722 } 723 724 // Store the mapping between reduction variables and their private copies on 725 // ModuleTranslation stack. It can be then recovered when translating 726 // omp.reduce operations in a separate call. 727 LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard( 728 moduleTranslation, reductionVariableMap); 729 730 // Before the loop, store the initial values of reductions into reduction 731 // variables. Although this could be done after allocas, we don't want to mess 732 // up with the alloca insertion point. 733 for (unsigned i = 0; i < numReductions; ++i) { 734 SmallVector<llvm::Value *> phis; 735 if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(), 736 "omp.reduction.neutral", builder, 737 moduleTranslation, &phis))) 738 return failure(); 739 assert(phis.size() == 1 && "expected one value to be yielded from the " 740 "reduction neutral element declaration region"); 741 builder.CreateStore(phis[0], privateReductionVariables[i]); 742 } 743 744 // Set up the source location value for OpenMP runtime. 745 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 746 747 // Generator of the canonical loop body. 748 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 749 // relying on captured variables. 750 SmallVector<llvm::CanonicalLoopInfo *> loopInfos; 751 SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints; 752 LogicalResult bodyGenStatus = success(); 753 auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { 754 // Make sure further conversions know about the induction variable. 755 moduleTranslation.mapValue( 756 loop.getRegion().front().getArgument(loopInfos.size()), iv); 757 758 // Capture the body insertion point for use in nested loops. BodyIP of the 759 // CanonicalLoopInfo always points to the beginning of the entry block of 760 // the body. 761 bodyInsertPoints.push_back(ip); 762 763 if (loopInfos.size() != loop.getNumLoops() - 1) 764 return; 765 766 // Convert the body of the loop. 767 llvm::BasicBlock *entryBlock = ip.getBlock(); 768 llvm::BasicBlock *exitBlock = 769 entryBlock->splitBasicBlock(ip.getPoint(), "omp.wsloop.exit"); 770 convertOmpOpRegions(loop.region(), "omp.wsloop.region", *entryBlock, 771 *exitBlock, builder, moduleTranslation, bodyGenStatus); 772 }; 773 774 // Delegate actual loop construction to the OpenMP IRBuilder. 775 // TODO: this currently assumes WsLoop is semantically similar to SCF loop, 776 // i.e. it has a positive step, uses signed integer semantics. Reconsider 777 // this code when WsLoop clearly supports more cases. 778 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 779 for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) { 780 llvm::Value *lowerBound = 781 moduleTranslation.lookupValue(loop.lowerBound()[i]); 782 llvm::Value *upperBound = 783 moduleTranslation.lookupValue(loop.upperBound()[i]); 784 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]); 785 786 // Make sure loop trip count are emitted in the preheader of the outermost 787 // loop at the latest so that they are all available for the new collapsed 788 // loop will be created below. 789 llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; 790 llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; 791 if (i != 0) { 792 loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back()); 793 computeIP = loopInfos.front()->getPreheaderIP(); 794 } 795 loopInfos.push_back(ompBuilder->createCanonicalLoop( 796 loc, bodyGen, lowerBound, upperBound, step, 797 /*IsSigned=*/true, loop.inclusive(), computeIP)); 798 799 if (failed(bodyGenStatus)) 800 return failure(); 801 } 802 803 // Collapse loops. Store the insertion point because LoopInfos may get 804 // invalidated. 805 llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); 806 llvm::CanonicalLoopInfo *loopInfo = 807 ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); 808 809 allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 810 811 bool isSimd = loop.simd_modifier(); 812 813 // The orderedVal refers to the value obtained from the ordered[(n)] clause. 814 // orderedVal == -1: No ordered[(n)] clause specified. 815 // orderedVal == 0: The ordered clause specified without a parameter. 816 // orderedVal > 0: The ordered clause specified with a parameter (n). 817 // TODO: Handle doacross loop init when orderedVal is greater than 0. 818 int64_t orderedVal = 819 loop.ordered_val().hasValue() ? loop.ordered_val().getValue() : -1; 820 if (schedule == omp::ClauseScheduleKind::Static && orderedVal != 0) { 821 ompBuilder->applyWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, 822 !loop.nowait(), 823 llvm::omp::OMP_SCHEDULE_Static, chunk); 824 } else { 825 llvm::omp::OMPScheduleType schedType; 826 switch (schedule) { 827 case omp::ClauseScheduleKind::Static: 828 if (loop.schedule_chunk_var()) 829 schedType = llvm::omp::OMPScheduleType::OrderedStaticChunked; 830 else 831 schedType = llvm::omp::OMPScheduleType::OrderedStatic; 832 break; 833 case omp::ClauseScheduleKind::Dynamic: 834 if (orderedVal == 0) 835 schedType = llvm::omp::OMPScheduleType::OrderedDynamicChunked; 836 else 837 schedType = llvm::omp::OMPScheduleType::DynamicChunked; 838 break; 839 case omp::ClauseScheduleKind::Guided: 840 if (orderedVal == 0) { 841 schedType = llvm::omp::OMPScheduleType::OrderedGuidedChunked; 842 } else { 843 if (isSimd) 844 schedType = llvm::omp::OMPScheduleType::GuidedSimd; 845 else 846 schedType = llvm::omp::OMPScheduleType::GuidedChunked; 847 } 848 break; 849 case omp::ClauseScheduleKind::Auto: 850 if (orderedVal == 0) 851 schedType = llvm::omp::OMPScheduleType::OrderedAuto; 852 else 853 schedType = llvm::omp::OMPScheduleType::Auto; 854 break; 855 case omp::ClauseScheduleKind::Runtime: 856 if (orderedVal == 0) { 857 schedType = llvm::omp::OMPScheduleType::OrderedRuntime; 858 } else { 859 if (isSimd) 860 schedType = llvm::omp::OMPScheduleType::RuntimeSimd; 861 else 862 schedType = llvm::omp::OMPScheduleType::Runtime; 863 } 864 break; 865 } 866 867 if (Optional<omp::ScheduleModifier> modifier = loop.schedule_modifier()) { 868 switch (*modifier) { 869 case omp::ScheduleModifier::monotonic: 870 schedType |= llvm::omp::OMPScheduleType::ModifierMonotonic; 871 break; 872 case omp::ScheduleModifier::nonmonotonic: 873 schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic; 874 break; 875 default: 876 // Nothing to do here. 877 break; 878 } 879 } else { 880 // OpenMP 5.1, 2.11.4 Worksharing-Loop Construct, Description. 881 // If the static schedule kind is specified or if the ordered clause is 882 // specified, and if the nonmonotonic modifier is not specified, the 883 // effect is as if the monotonic modifier is specified. Otherwise, unless 884 // the monotonic modifier is specified, the effect is as if the 885 // nonmonotonic modifier is specified. 886 // The monotonic is used by default in openmp runtime library, so no need 887 // to set it. 888 if (!(schedType == llvm::omp::OMPScheduleType::OrderedStatic || 889 schedType == llvm::omp::OMPScheduleType::OrderedStaticChunked)) 890 schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic; 891 } 892 893 ompBuilder->applyDynamicWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, 894 schedType, !loop.nowait(), chunk, 895 /*ordered*/ orderedVal == 0); 896 } 897 898 // Continue building IR after the loop. Note that the LoopInfo returned by 899 // `collapseLoops` points inside the outermost loop and is intended for 900 // potential further loop transformations. Use the insertion point stored 901 // before collapsing loops instead. 902 builder.restoreIP(afterIP); 903 904 // Process the reductions if required. 905 if (numReductions == 0) 906 return success(); 907 908 // Create the reduction generators. We need to own them here because 909 // ReductionInfo only accepts references to the generators. 910 SmallVector<OwningReductionGen> owningReductionGens; 911 SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens; 912 for (unsigned i = 0; i < numReductions; ++i) { 913 owningReductionGens.push_back( 914 makeReductionGen(reductionDecls[i], builder, moduleTranslation)); 915 owningAtomicReductionGens.push_back( 916 makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation)); 917 } 918 919 // Collect the reduction information. 920 SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos; 921 reductionInfos.reserve(numReductions); 922 for (unsigned i = 0; i < numReductions; ++i) { 923 llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr; 924 if (owningAtomicReductionGens[i]) 925 atomicGen = owningAtomicReductionGens[i]; 926 auto reductionType = 927 loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>(); 928 llvm::Value *variable = 929 moduleTranslation.lookupValue(loop.reduction_vars()[i]); 930 reductionInfos.push_back( 931 {moduleTranslation.convertType(reductionType.getElementType()), 932 variable, privateReductionVariables[i], owningReductionGens[i], 933 atomicGen}); 934 } 935 936 // The call to createReductions below expects the block to have a 937 // terminator. Create an unreachable instruction to serve as terminator 938 // and remove it later. 939 llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); 940 builder.SetInsertPoint(tempTerminator); 941 llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = 942 ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, 943 loop.nowait()); 944 if (!contInsertPoint.getBlock()) 945 return loop->emitOpError() << "failed to convert reductions"; 946 auto nextInsertionPoint = 947 ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for); 948 tempTerminator->eraseFromParent(); 949 builder.restoreIP(nextInsertionPoint); 950 951 return success(); 952 } 953 954 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder. 955 static LogicalResult 956 convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder, 957 LLVM::ModuleTranslation &moduleTranslation) { 958 auto loop = cast<omp::SimdLoopOp>(opInst); 959 960 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 961 962 // Generator of the canonical loop body. 963 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 964 // relying on captured variables. 965 SmallVector<llvm::CanonicalLoopInfo *> loopInfos; 966 SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints; 967 LogicalResult bodyGenStatus = success(); 968 auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { 969 // Make sure further conversions know about the induction variable. 970 moduleTranslation.mapValue( 971 loop.getRegion().front().getArgument(loopInfos.size()), iv); 972 973 // Capture the body insertion point for use in nested loops. BodyIP of the 974 // CanonicalLoopInfo always points to the beginning of the entry block of 975 // the body. 976 bodyInsertPoints.push_back(ip); 977 978 if (loopInfos.size() != loop.getNumLoops() - 1) 979 return; 980 981 // Convert the body of the loop. 982 llvm::BasicBlock *entryBlock = ip.getBlock(); 983 llvm::BasicBlock *exitBlock = 984 entryBlock->splitBasicBlock(ip.getPoint(), "omp.simdloop.exit"); 985 convertOmpOpRegions(loop.region(), "omp.simdloop.region", *entryBlock, 986 *exitBlock, builder, moduleTranslation, bodyGenStatus); 987 }; 988 989 // Delegate actual loop construction to the OpenMP IRBuilder. 990 // TODO: this currently assumes SimdLoop is semantically similar to SCF loop, 991 // i.e. it has a positive step, uses signed integer semantics. Reconsider 992 // this code when SimdLoop clearly supports more cases. 993 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 994 for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) { 995 llvm::Value *lowerBound = 996 moduleTranslation.lookupValue(loop.lowerBound()[i]); 997 llvm::Value *upperBound = 998 moduleTranslation.lookupValue(loop.upperBound()[i]); 999 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]); 1000 1001 // Make sure loop trip count are emitted in the preheader of the outermost 1002 // loop at the latest so that they are all available for the new collapsed 1003 // loop will be created below. 1004 llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; 1005 llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; 1006 if (i != 0) { 1007 loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(), 1008 ompLoc.DL); 1009 computeIP = loopInfos.front()->getPreheaderIP(); 1010 } 1011 loopInfos.push_back(ompBuilder->createCanonicalLoop( 1012 loc, bodyGen, lowerBound, upperBound, step, 1013 /*IsSigned=*/true, /*Inclusive=*/true, computeIP)); 1014 1015 if (failed(bodyGenStatus)) 1016 return failure(); 1017 } 1018 1019 // Collapse loops. 1020 llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); 1021 llvm::CanonicalLoopInfo *loopInfo = 1022 ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); 1023 1024 ompBuilder->applySimd(ompLoc.DL, loopInfo); 1025 1026 builder.restoreIP(afterIP); 1027 return success(); 1028 } 1029 1030 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. 1031 llvm::AtomicOrdering 1032 convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) { 1033 if (!ao) 1034 return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering 1035 1036 switch (*ao) { 1037 case omp::ClauseMemoryOrderKind::Seq_cst: 1038 return llvm::AtomicOrdering::SequentiallyConsistent; 1039 case omp::ClauseMemoryOrderKind::Acq_rel: 1040 return llvm::AtomicOrdering::AcquireRelease; 1041 case omp::ClauseMemoryOrderKind::Acquire: 1042 return llvm::AtomicOrdering::Acquire; 1043 case omp::ClauseMemoryOrderKind::Release: 1044 return llvm::AtomicOrdering::Release; 1045 case omp::ClauseMemoryOrderKind::Relaxed: 1046 return llvm::AtomicOrdering::Monotonic; 1047 } 1048 llvm_unreachable("Unknown ClauseMemoryOrderKind kind"); 1049 } 1050 1051 /// Convert omp.atomic.read operation to LLVM IR. 1052 static LogicalResult 1053 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, 1054 LLVM::ModuleTranslation &moduleTranslation) { 1055 1056 auto readOp = cast<omp::AtomicReadOp>(opInst); 1057 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1058 1059 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1060 1061 llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.memory_order_val()); 1062 llvm::Value *x = moduleTranslation.lookupValue(readOp.x()); 1063 Type xTy = readOp.x().getType().cast<omp::PointerLikeType>().getElementType(); 1064 llvm::Value *v = moduleTranslation.lookupValue(readOp.v()); 1065 Type vTy = readOp.v().getType().cast<omp::PointerLikeType>().getElementType(); 1066 llvm::OpenMPIRBuilder::AtomicOpValue V = { 1067 v, moduleTranslation.convertType(vTy), false, false}; 1068 llvm::OpenMPIRBuilder::AtomicOpValue X = { 1069 x, moduleTranslation.convertType(xTy), false, false}; 1070 builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO)); 1071 return success(); 1072 } 1073 1074 /// Converts an omp.atomic.write operation to LLVM IR. 1075 static LogicalResult 1076 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, 1077 LLVM::ModuleTranslation &moduleTranslation) { 1078 auto writeOp = cast<omp::AtomicWriteOp>(opInst); 1079 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1080 1081 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1082 llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.memory_order_val()); 1083 llvm::Value *expr = moduleTranslation.lookupValue(writeOp.value()); 1084 llvm::Value *dest = moduleTranslation.lookupValue(writeOp.address()); 1085 llvm::Type *ty = moduleTranslation.convertType(writeOp.value().getType()); 1086 llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false, 1087 /*isVolatile=*/false}; 1088 builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao)); 1089 return success(); 1090 } 1091 1092 /// Converts an LLVM dialect binary operation to the corresponding enum value 1093 /// for `atomicrmw` supported binary operation. 1094 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) { 1095 return llvm::TypeSwitch<Operation *, llvm::AtomicRMWInst::BinOp>(&op) 1096 .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; }) 1097 .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; }) 1098 .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; }) 1099 .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; }) 1100 .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; }) 1101 .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; }) 1102 .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; }) 1103 .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; }) 1104 .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; }) 1105 .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP); 1106 } 1107 1108 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder. 1109 static LogicalResult 1110 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, 1111 llvm::IRBuilderBase &builder, 1112 LLVM::ModuleTranslation &moduleTranslation) { 1113 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1114 1115 // Convert values and types. 1116 auto &innerOpList = opInst.region().front().getOperations(); 1117 if (innerOpList.size() != 2) 1118 return opInst.emitError("exactly two operations are allowed inside an " 1119 "atomic update region while lowering to LLVM IR"); 1120 1121 Operation &innerUpdateOp = innerOpList.front(); 1122 1123 if (innerUpdateOp.getNumOperands() != 2 || 1124 !llvm::is_contained(innerUpdateOp.getOperands(), 1125 opInst.getRegion().getArgument(0))) 1126 return opInst.emitError( 1127 "the update operation inside the region must be a binary operation and " 1128 "that update operation must have the region argument as an operand"); 1129 1130 llvm::AtomicRMWInst::BinOp binop = convertBinOpToAtomic(innerUpdateOp); 1131 1132 bool isXBinopExpr = 1133 innerUpdateOp.getNumOperands() > 0 && 1134 innerUpdateOp.getOperand(0) == opInst.getRegion().getArgument(0); 1135 1136 mlir::Value mlirExpr = (isXBinopExpr ? innerUpdateOp.getOperand(1) 1137 : innerUpdateOp.getOperand(0)); 1138 llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr); 1139 llvm::Value *llvmX = moduleTranslation.lookupValue(opInst.x()); 1140 LLVM::LLVMPointerType mlirXType = 1141 opInst.x().getType().cast<LLVM::LLVMPointerType>(); 1142 llvm::Type *llvmXElementType = 1143 moduleTranslation.convertType(mlirXType.getElementType()); 1144 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType, 1145 /*isSigned=*/false, 1146 /*isVolatile=*/false}; 1147 1148 llvm::AtomicOrdering atomicOrdering = 1149 convertAtomicOrdering(opInst.memory_order_val()); 1150 1151 // Generate update code. 1152 LogicalResult updateGenStatus = success(); 1153 auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus]( 1154 llvm::Value *atomicx, 1155 llvm::IRBuilder<> &builder) -> llvm::Value * { 1156 Block &bb = *opInst.region().begin(); 1157 moduleTranslation.mapValue(*opInst.region().args_begin(), atomicx); 1158 moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); 1159 if (failed(moduleTranslation.convertBlock(bb, true, builder))) { 1160 updateGenStatus = (opInst.emitError() 1161 << "unable to convert update operation to llvm IR"); 1162 return nullptr; 1163 } 1164 omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator()); 1165 assert(yieldop && yieldop.results().size() == 1 && 1166 "terminator must be omp.yield op and it must have exactly one " 1167 "argument"); 1168 return moduleTranslation.lookupValue(yieldop.results()[0]); 1169 }; 1170 1171 // Handle ambiguous alloca, if any. 1172 auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 1173 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1174 builder.restoreIP(ompBuilder->createAtomicUpdate( 1175 ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop, updateFn, 1176 isXBinopExpr)); 1177 return updateGenStatus; 1178 } 1179 1180 static LogicalResult 1181 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, 1182 llvm::IRBuilderBase &builder, 1183 LLVM::ModuleTranslation &moduleTranslation) { 1184 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1185 mlir::Value mlirExpr; 1186 bool isXBinopExpr = false, isPostfixUpdate = false; 1187 llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP; 1188 1189 omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp(); 1190 omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp(); 1191 1192 assert((atomicUpdateOp || atomicWriteOp) && 1193 "internal op must be an atomic.update or atomic.write op"); 1194 1195 if (atomicWriteOp) { 1196 isPostfixUpdate = true; 1197 mlirExpr = atomicWriteOp.value(); 1198 } else { 1199 isPostfixUpdate = atomicCaptureOp.getSecondOp() == 1200 atomicCaptureOp.getAtomicUpdateOp().getOperation(); 1201 auto &innerOpList = atomicUpdateOp.region().front().getOperations(); 1202 if (innerOpList.size() != 2) 1203 return atomicUpdateOp.emitError( 1204 "exactly two operations are allowed inside an " 1205 "atomic update region while lowering to LLVM IR"); 1206 Operation *innerUpdateOp = atomicUpdateOp.getFirstOp(); 1207 if (innerUpdateOp->getNumOperands() != 2 || 1208 !llvm::is_contained(innerUpdateOp->getOperands(), 1209 atomicUpdateOp.getRegion().getArgument(0))) 1210 return atomicUpdateOp.emitError( 1211 "the update operation inside the region must be a binary operation " 1212 "and that update operation must have the region argument as an " 1213 "operand"); 1214 binop = convertBinOpToAtomic(*innerUpdateOp); 1215 1216 isXBinopExpr = innerUpdateOp->getOperand(0) == 1217 atomicUpdateOp.getRegion().getArgument(0); 1218 1219 mlirExpr = (isXBinopExpr ? innerUpdateOp->getOperand(1) 1220 : innerUpdateOp->getOperand(0)); 1221 } 1222 1223 llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr); 1224 llvm::Value *llvmX = 1225 moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().x()); 1226 llvm::Value *llvmV = 1227 moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().v()); 1228 auto mlirXType = atomicCaptureOp.getAtomicReadOp() 1229 .x() 1230 .getType() 1231 .cast<LLVM::LLVMPointerType>(); 1232 llvm::Type *llvmXElementType = 1233 moduleTranslation.convertType(mlirXType.getElementType()); 1234 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType, 1235 /*isSigned=*/false, 1236 /*isVolatile=*/false}; 1237 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType, 1238 /*isSigned=*/false, 1239 /*isVolatile=*/false}; 1240 1241 llvm::AtomicOrdering atomicOrdering = 1242 convertAtomicOrdering(atomicCaptureOp.memory_order_val()); 1243 1244 LogicalResult updateGenStatus = success(); 1245 auto updateFn = [&](llvm::Value *atomicx, 1246 llvm::IRBuilder<> &builder) -> llvm::Value * { 1247 if (atomicWriteOp) 1248 return moduleTranslation.lookupValue(atomicWriteOp.value()); 1249 Block &bb = *atomicUpdateOp.region().begin(); 1250 moduleTranslation.mapValue(*atomicUpdateOp.region().args_begin(), atomicx); 1251 moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); 1252 if (failed(moduleTranslation.convertBlock(bb, true, builder))) { 1253 updateGenStatus = (atomicUpdateOp.emitError() 1254 << "unable to convert update operation to llvm IR"); 1255 return nullptr; 1256 } 1257 omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator()); 1258 assert(yieldop && yieldop.results().size() == 1 && 1259 "terminator must be omp.yield op and it must have exactly one " 1260 "argument"); 1261 return moduleTranslation.lookupValue(yieldop.results()[0]); 1262 }; 1263 1264 // Handle ambiguous alloca, if any. 1265 auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 1266 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1267 builder.restoreIP(ompBuilder->createAtomicCapture( 1268 ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering, 1269 binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr)); 1270 return updateGenStatus; 1271 } 1272 1273 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the 1274 /// mapping between reduction variables and their private equivalents to have 1275 /// been stored on the ModuleTranslation stack. Currently only supports 1276 /// reduction within WsLoopOp, but can be easily extended. 1277 static LogicalResult 1278 convertOmpReductionOp(omp::ReductionOp reductionOp, 1279 llvm::IRBuilderBase &builder, 1280 LLVM::ModuleTranslation &moduleTranslation) { 1281 // Find the declaration that corresponds to the reduction op. 1282 auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>(); 1283 omp::ReductionDeclareOp declaration = 1284 findReductionDecl(reductionContainer, reductionOp); 1285 assert(declaration && "could not find reduction declaration"); 1286 1287 // Retrieve the mapping between reduction variables and their private 1288 // equivalents. 1289 const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr; 1290 moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>( 1291 [&](const OpenMPVarMappingStackFrame &frame) { 1292 reductionVariableMap = &frame.mapping; 1293 return WalkResult::interrupt(); 1294 }); 1295 assert(reductionVariableMap && "couldn't find private reduction variables"); 1296 1297 // Translate the reduction operation by emitting the body of the corresponding 1298 // reduction declaration. 1299 Region &reductionRegion = declaration.reductionRegion(); 1300 llvm::Value *privateReductionVar = 1301 reductionVariableMap->lookup(reductionOp.accumulator()); 1302 llvm::Value *reductionVal = builder.CreateLoad( 1303 moduleTranslation.convertType(reductionOp.operand().getType()), 1304 privateReductionVar); 1305 1306 moduleTranslation.mapValue(reductionRegion.front().getArgument(0), 1307 reductionVal); 1308 moduleTranslation.mapValue( 1309 reductionRegion.front().getArgument(1), 1310 moduleTranslation.lookupValue(reductionOp.operand())); 1311 1312 SmallVector<llvm::Value *> phis; 1313 if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body", 1314 builder, moduleTranslation, &phis))) 1315 return failure(); 1316 assert(phis.size() == 1 && "expected one value to be yielded from " 1317 "the reduction body declaration region"); 1318 builder.CreateStore(phis[0], privateReductionVar); 1319 return success(); 1320 } 1321 1322 namespace { 1323 1324 /// Implementation of the dialect interface that converts operations belonging 1325 /// to the OpenMP dialect to LLVM IR. 1326 class OpenMPDialectLLVMIRTranslationInterface 1327 : public LLVMTranslationDialectInterface { 1328 public: 1329 using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; 1330 1331 /// Translates the given operation to LLVM IR using the provided IR builder 1332 /// and saving the state in `moduleTranslation`. 1333 LogicalResult 1334 convertOperation(Operation *op, llvm::IRBuilderBase &builder, 1335 LLVM::ModuleTranslation &moduleTranslation) const final; 1336 }; 1337 1338 } // namespace 1339 1340 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR 1341 /// (including OpenMP runtime calls). 1342 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( 1343 Operation *op, llvm::IRBuilderBase &builder, 1344 LLVM::ModuleTranslation &moduleTranslation) const { 1345 1346 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1347 1348 return llvm::TypeSwitch<Operation *, LogicalResult>(op) 1349 .Case([&](omp::BarrierOp) { 1350 ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); 1351 return success(); 1352 }) 1353 .Case([&](omp::TaskwaitOp) { 1354 ompBuilder->createTaskwait(builder.saveIP()); 1355 return success(); 1356 }) 1357 .Case([&](omp::TaskyieldOp) { 1358 ompBuilder->createTaskyield(builder.saveIP()); 1359 return success(); 1360 }) 1361 .Case([&](omp::FlushOp) { 1362 // No support in Openmp runtime function (__kmpc_flush) to accept 1363 // the argument list. 1364 // OpenMP standard states the following: 1365 // "An implementation may implement a flush with a list by ignoring 1366 // the list, and treating it the same as a flush without a list." 1367 // 1368 // The argument list is discarded so that, flush with a list is treated 1369 // same as a flush without a list. 1370 ompBuilder->createFlush(builder.saveIP()); 1371 return success(); 1372 }) 1373 .Case([&](omp::ParallelOp op) { 1374 return convertOmpParallel(op, builder, moduleTranslation); 1375 }) 1376 .Case([&](omp::ReductionOp reductionOp) { 1377 return convertOmpReductionOp(reductionOp, builder, moduleTranslation); 1378 }) 1379 .Case([&](omp::MasterOp) { 1380 return convertOmpMaster(*op, builder, moduleTranslation); 1381 }) 1382 .Case([&](omp::CriticalOp) { 1383 return convertOmpCritical(*op, builder, moduleTranslation); 1384 }) 1385 .Case([&](omp::OrderedRegionOp) { 1386 return convertOmpOrderedRegion(*op, builder, moduleTranslation); 1387 }) 1388 .Case([&](omp::OrderedOp) { 1389 return convertOmpOrdered(*op, builder, moduleTranslation); 1390 }) 1391 .Case([&](omp::WsLoopOp) { 1392 return convertOmpWsLoop(*op, builder, moduleTranslation); 1393 }) 1394 .Case([&](omp::SimdLoopOp) { 1395 return convertOmpSimdLoop(*op, builder, moduleTranslation); 1396 }) 1397 .Case([&](omp::AtomicReadOp) { 1398 return convertOmpAtomicRead(*op, builder, moduleTranslation); 1399 }) 1400 .Case([&](omp::AtomicWriteOp) { 1401 return convertOmpAtomicWrite(*op, builder, moduleTranslation); 1402 }) 1403 .Case([&](omp::AtomicUpdateOp op) { 1404 return convertOmpAtomicUpdate(op, builder, moduleTranslation); 1405 }) 1406 .Case([&](omp::AtomicCaptureOp op) { 1407 return convertOmpAtomicCapture(op, builder, moduleTranslation); 1408 }) 1409 .Case([&](omp::SectionsOp) { 1410 return convertOmpSections(*op, builder, moduleTranslation); 1411 }) 1412 .Case([&](omp::SingleOp op) { 1413 return convertOmpSingle(op, builder, moduleTranslation); 1414 }) 1415 .Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp, 1416 omp::CriticalDeclareOp>([](auto op) { 1417 // `yield` and `terminator` can be just omitted. The block structure 1418 // was created in the region that handles their parent operation. 1419 // `reduction.declare` will be used by reductions and is not 1420 // converted directly, skip it. 1421 // `critical.declare` is only used to declare names of critical 1422 // sections which will be used by `critical` ops and hence can be 1423 // ignored for lowering. The OpenMP IRBuilder will create unique 1424 // name for critical section names. 1425 return success(); 1426 }) 1427 .Default([&](Operation *inst) { 1428 return inst->emitError("unsupported OpenMP operation: ") 1429 << inst->getName(); 1430 }); 1431 } 1432 1433 void mlir::registerOpenMPDialectTranslation(DialectRegistry ®istry) { 1434 registry.insert<omp::OpenMPDialect>(); 1435 registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) { 1436 dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>(); 1437 }); 1438 } 1439 1440 void mlir::registerOpenMPDialectTranslation(MLIRContext &context) { 1441 DialectRegistry registry; 1442 registerOpenMPDialectTranslation(registry); 1443 context.appendDialectRegistry(registry); 1444 } 1445