1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a translation between the MLIR OpenMP dialect and LLVM 10 // IR. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" 14 #include "mlir/Dialect/OpenMP/OpenMPDialect.h" 15 #include "mlir/IR/BlockAndValueMapping.h" 16 #include "mlir/IR/Operation.h" 17 #include "mlir/Support/LLVM.h" 18 #include "mlir/Target/LLVMIR/ModuleTranslation.h" 19 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/TypeSwitch.h" 22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 23 #include "llvm/IR/DebugInfoMetadata.h" 24 #include "llvm/IR/IRBuilder.h" 25 26 using namespace mlir; 27 28 namespace { 29 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the 30 /// insertion points for allocas. 31 class OpenMPAllocaStackFrame 32 : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> { 33 public: 34 explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP) 35 : allocaInsertPoint(allocaIP) {} 36 llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; 37 }; 38 39 /// ModuleTranslation stack frame containing the partial mapping between MLIR 40 /// values and their LLVM IR equivalents. 41 class OpenMPVarMappingStackFrame 42 : public LLVM::ModuleTranslation::StackFrameBase< 43 OpenMPVarMappingStackFrame> { 44 public: 45 explicit OpenMPVarMappingStackFrame( 46 const DenseMap<Value, llvm::Value *> &mapping) 47 : mapping(mapping) {} 48 49 DenseMap<Value, llvm::Value *> mapping; 50 }; 51 } // namespace 52 53 /// Find the insertion point for allocas given the current insertion point for 54 /// normal operations in the builder. 55 static llvm::OpenMPIRBuilder::InsertPointTy 56 findAllocaInsertPoint(llvm::IRBuilderBase &builder, 57 const LLVM::ModuleTranslation &moduleTranslation) { 58 // If there is an alloca insertion point on stack, i.e. we are in a nested 59 // operation and a specific point was provided by some surrounding operation, 60 // use it. 61 llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; 62 WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>( 63 [&](const OpenMPAllocaStackFrame &frame) { 64 allocaInsertPoint = frame.allocaInsertPoint; 65 return WalkResult::interrupt(); 66 }); 67 if (walkResult.wasInterrupted()) 68 return allocaInsertPoint; 69 70 // Otherwise, insert to the entry block of the surrounding function. 71 llvm::BasicBlock &funcEntryBlock = 72 builder.GetInsertBlock()->getParent()->getEntryBlock(); 73 return llvm::OpenMPIRBuilder::InsertPointTy( 74 &funcEntryBlock, funcEntryBlock.getFirstInsertionPt()); 75 } 76 77 /// Converts the given region that appears within an OpenMP dialect operation to 78 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the 79 /// region, and a branch from any block with an successor-less OpenMP terminator 80 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes 81 /// of the continuation block if provided. 82 static void convertOmpOpRegions( 83 Region ®ion, StringRef blockName, llvm::BasicBlock &sourceBlock, 84 llvm::BasicBlock &continuationBlock, llvm::IRBuilderBase &builder, 85 LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, 86 SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) { 87 llvm::LLVMContext &llvmContext = builder.getContext(); 88 for (Block &bb : region) { 89 llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create( 90 llvmContext, blockName, builder.GetInsertBlock()->getParent(), 91 builder.GetInsertBlock()->getNextNode()); 92 moduleTranslation.mapBlock(&bb, llvmBB); 93 } 94 95 llvm::Instruction *sourceTerminator = sourceBlock.getTerminator(); 96 97 // Terminators (namely YieldOp) may be forwarding values to the region that 98 // need to be available in the continuation block. Collect the types of these 99 // operands in preparation of creating PHI nodes. 100 SmallVector<llvm::Type *> continuationBlockPHITypes; 101 bool operandsProcessed = false; 102 unsigned numYields = 0; 103 for (Block &bb : region.getBlocks()) { 104 if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) { 105 if (!operandsProcessed) { 106 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { 107 continuationBlockPHITypes.push_back( 108 moduleTranslation.convertType(yield->getOperand(i).getType())); 109 } 110 operandsProcessed = true; 111 } else { 112 assert(continuationBlockPHITypes.size() == yield->getNumOperands() && 113 "mismatching number of values yielded from the region"); 114 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { 115 llvm::Type *operandType = 116 moduleTranslation.convertType(yield->getOperand(i).getType()); 117 (void)operandType; 118 assert(continuationBlockPHITypes[i] == operandType && 119 "values of mismatching types yielded from the region"); 120 } 121 } 122 numYields++; 123 } 124 } 125 126 // Insert PHI nodes in the continuation block for any values forwarded by the 127 // terminators in this region. 128 if (!continuationBlockPHITypes.empty()) 129 assert( 130 continuationBlockPHIs && 131 "expected continuation block PHIs if converted regions yield values"); 132 if (continuationBlockPHIs) { 133 llvm::IRBuilderBase::InsertPointGuard guard(builder); 134 continuationBlockPHIs->reserve(continuationBlockPHITypes.size()); 135 builder.SetInsertPoint(&continuationBlock, continuationBlock.begin()); 136 for (llvm::Type *ty : continuationBlockPHITypes) 137 continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields)); 138 } 139 140 // Convert blocks one by one in topological order to ensure 141 // defs are converted before uses. 142 SetVector<Block *> blocks = 143 LLVM::detail::getTopologicallySortedBlocks(region); 144 for (Block *bb : blocks) { 145 llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb); 146 // Retarget the branch of the entry block to the entry block of the 147 // converted region (regions are single-entry). 148 if (bb->isEntryBlock()) { 149 assert(sourceTerminator->getNumSuccessors() == 1 && 150 "provided entry block has multiple successors"); 151 assert(sourceTerminator->getSuccessor(0) == &continuationBlock && 152 "ContinuationBlock is not the successor of the entry block"); 153 sourceTerminator->setSuccessor(0, llvmBB); 154 } 155 156 llvm::IRBuilderBase::InsertPointGuard guard(builder); 157 if (failed( 158 moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) { 159 bodyGenStatus = failure(); 160 return; 161 } 162 163 // Special handling for `omp.yield` and `omp.terminator` (we may have more 164 // than one): they return the control to the parent OpenMP dialect operation 165 // so replace them with the branch to the continuation block. We handle this 166 // here to avoid relying inter-function communication through the 167 // ModuleTranslation class to set up the correct insertion point. This is 168 // also consistent with MLIR's idiom of handling special region terminators 169 // in the same code that handles the region-owning operation. 170 Operation *terminator = bb->getTerminator(); 171 if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) { 172 builder.CreateBr(&continuationBlock); 173 174 for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i) 175 (*continuationBlockPHIs)[i]->addIncoming( 176 moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB); 177 } 178 } 179 // After all blocks have been traversed and values mapped, connect the PHI 180 // nodes to the results of preceding blocks. 181 LLVM::detail::connectPHINodes(region, moduleTranslation); 182 183 // Remove the blocks and values defined in this region from the mapping since 184 // they are not visible outside of this region. This allows the same region to 185 // be converted several times, that is cloned, without clashes, and slightly 186 // speeds up the lookups. 187 moduleTranslation.forgetMapping(region); 188 } 189 190 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum. 191 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) { 192 switch (kind) { 193 case omp::ClauseProcBindKind::Close: 194 return llvm::omp::ProcBindKind::OMP_PROC_BIND_close; 195 case omp::ClauseProcBindKind::Master: 196 return llvm::omp::ProcBindKind::OMP_PROC_BIND_master; 197 case omp::ClauseProcBindKind::Primary: 198 return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary; 199 case omp::ClauseProcBindKind::Spread: 200 return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread; 201 } 202 llvm_unreachable("Unknown ClauseProcBindKind kind"); 203 } 204 205 /// Converts the OpenMP parallel operation to LLVM IR. 206 static LogicalResult 207 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, 208 LLVM::ModuleTranslation &moduleTranslation) { 209 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 210 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 211 // relying on captured variables. 212 LogicalResult bodyGenStatus = success(); 213 214 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 215 llvm::BasicBlock &continuationBlock) { 216 // Save the alloca insertion point on ModuleTranslation stack for use in 217 // nested regions. 218 LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( 219 moduleTranslation, allocaIP); 220 221 // ParallelOp has only one region associated with it. 222 convertOmpOpRegions(opInst.getRegion(), "omp.par.region", 223 *codeGenIP.getBlock(), continuationBlock, builder, 224 moduleTranslation, bodyGenStatus); 225 }; 226 227 // TODO: Perform appropriate actions according to the data-sharing 228 // attribute (shared, private, firstprivate, ...) of variables. 229 // Currently defaults to shared. 230 auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 231 llvm::Value &, llvm::Value &vPtr, 232 llvm::Value *&replacementValue) -> InsertPointTy { 233 replacementValue = &vPtr; 234 235 return codeGenIP; 236 }; 237 238 // TODO: Perform finalization actions for variables. This has to be 239 // called for variables which have destructors/finalizers. 240 auto finiCB = [&](InsertPointTy codeGenIP) {}; 241 242 llvm::Value *ifCond = nullptr; 243 if (auto ifExprVar = opInst.if_expr_var()) 244 ifCond = moduleTranslation.lookupValue(ifExprVar); 245 llvm::Value *numThreads = nullptr; 246 if (auto numThreadsVar = opInst.num_threads_var()) 247 numThreads = moduleTranslation.lookupValue(numThreadsVar); 248 auto pbKind = llvm::omp::OMP_PROC_BIND_default; 249 if (auto bind = opInst.proc_bind_val()) 250 pbKind = getProcBindKind(*bind); 251 // TODO: Is the Parallel construct cancellable? 252 bool isCancellable = false; 253 254 // Ensure that the BasicBlock for the the parallel region is sparate from the 255 // function entry which we may need to insert allocas. 256 if (builder.GetInsertBlock() == 257 &builder.GetInsertBlock()->getParent()->getEntryBlock()) { 258 assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() && 259 "Assuming end of basic block"); 260 llvm::BasicBlock *entryBB = 261 llvm::BasicBlock::Create(builder.getContext(), "parallel.entry", 262 builder.GetInsertBlock()->getParent(), 263 builder.GetInsertBlock()->getNextNode()); 264 builder.CreateBr(entryBB); 265 builder.SetInsertPoint(entryBB); 266 } 267 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 268 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel( 269 ompLoc, findAllocaInsertPoint(builder, moduleTranslation), bodyGenCB, 270 privCB, finiCB, ifCond, numThreads, pbKind, isCancellable)); 271 272 return bodyGenStatus; 273 } 274 275 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder. 276 static LogicalResult 277 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, 278 LLVM::ModuleTranslation &moduleTranslation) { 279 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 280 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 281 // relying on captured variables. 282 LogicalResult bodyGenStatus = success(); 283 284 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 285 llvm::BasicBlock &continuationBlock) { 286 // MasterOp has only one region associated with it. 287 auto ®ion = cast<omp::MasterOp>(opInst).getRegion(); 288 convertOmpOpRegions(region, "omp.master.region", *codeGenIP.getBlock(), 289 continuationBlock, builder, moduleTranslation, 290 bodyGenStatus); 291 }; 292 293 // TODO: Perform finalization actions for variables. This has to be 294 // called for variables which have destructors/finalizers. 295 auto finiCB = [&](InsertPointTy codeGenIP) {}; 296 297 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 298 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster( 299 ompLoc, bodyGenCB, finiCB)); 300 return success(); 301 } 302 303 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder. 304 static LogicalResult 305 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, 306 LLVM::ModuleTranslation &moduleTranslation) { 307 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 308 auto criticalOp = cast<omp::CriticalOp>(opInst); 309 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 310 // relying on captured variables. 311 LogicalResult bodyGenStatus = success(); 312 313 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 314 llvm::BasicBlock &continuationBlock) { 315 // CriticalOp has only one region associated with it. 316 auto ®ion = cast<omp::CriticalOp>(opInst).getRegion(); 317 convertOmpOpRegions(region, "omp.critical.region", *codeGenIP.getBlock(), 318 continuationBlock, builder, moduleTranslation, 319 bodyGenStatus); 320 }; 321 322 // TODO: Perform finalization actions for variables. This has to be 323 // called for variables which have destructors/finalizers. 324 auto finiCB = [&](InsertPointTy codeGenIP) {}; 325 326 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 327 llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext(); 328 llvm::Constant *hint = nullptr; 329 330 // If it has a name, it probably has a hint too. 331 if (criticalOp.nameAttr()) { 332 // The verifiers in OpenMP Dialect guarentee that all the pointers are 333 // non-null 334 auto symbolRef = criticalOp.nameAttr().cast<SymbolRefAttr>(); 335 auto criticalDeclareOp = 336 SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp, 337 symbolRef); 338 hint = 339 llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), 340 static_cast<int>(criticalDeclareOp.hint_val())); 341 } 342 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical( 343 ompLoc, bodyGenCB, finiCB, criticalOp.name().getValueOr(""), hint)); 344 return success(); 345 } 346 347 /// Returns a reduction declaration that corresponds to the given reduction 348 /// operation in the given container. Currently only supports reductions inside 349 /// WsLoopOp but can be easily extended. 350 static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container, 351 omp::ReductionOp reduction) { 352 SymbolRefAttr reductionSymbol; 353 for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) { 354 if (container.reduction_vars()[i] != reduction.accumulator()) 355 continue; 356 reductionSymbol = (*container.reductions())[i].cast<SymbolRefAttr>(); 357 break; 358 } 359 assert(reductionSymbol && 360 "reduction operation must be associated with a declaration"); 361 362 return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>( 363 container, reductionSymbol); 364 } 365 366 /// Populates `reductions` with reduction declarations used in the given loop. 367 static void 368 collectReductionDecls(omp::WsLoopOp loop, 369 SmallVectorImpl<omp::ReductionDeclareOp> &reductions) { 370 Optional<ArrayAttr> attr = loop.reductions(); 371 if (!attr) 372 return; 373 374 reductions.reserve(reductions.size() + loop.getNumReductionVars()); 375 for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) { 376 reductions.push_back( 377 SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>( 378 loop, symbolRef)); 379 } 380 } 381 382 /// Translates the blocks contained in the given region and appends them to at 383 /// the current insertion point of `builder`. The operations of the entry block 384 /// are appended to the current insertion block, which is not expected to have a 385 /// terminator. If set, `continuationBlockArgs` is populated with translated 386 /// values that correspond to the values omp.yield'ed from the region. 387 static LogicalResult inlineConvertOmpRegions( 388 Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, 389 LLVM::ModuleTranslation &moduleTranslation, 390 SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) { 391 if (region.empty()) 392 return success(); 393 394 // Special case for single-block regions that don't create additional blocks: 395 // insert operations without creating additional blocks. 396 if (llvm::hasSingleElement(region)) { 397 moduleTranslation.mapBlock(®ion.front(), builder.GetInsertBlock()); 398 if (failed(moduleTranslation.convertBlock( 399 region.front(), /*ignoreArguments=*/true, builder))) 400 return failure(); 401 402 // The continuation arguments are simply the translated terminator operands. 403 if (continuationBlockArgs) 404 llvm::append_range( 405 *continuationBlockArgs, 406 moduleTranslation.lookupValues(region.front().back().getOperands())); 407 408 // Drop the mapping that is no longer necessary so that the same region can 409 // be processed multiple times. 410 moduleTranslation.forgetMapping(region); 411 return success(); 412 } 413 414 // Create the continuation block manually instead of calling splitBlock 415 // because the current insertion block may not have a terminator. 416 llvm::BasicBlock *continuationBlock = 417 llvm::BasicBlock::Create(builder.getContext(), blockName + ".cont", 418 builder.GetInsertBlock()->getParent(), 419 builder.GetInsertBlock()->getNextNode()); 420 builder.CreateBr(continuationBlock); 421 422 LogicalResult bodyGenStatus = success(); 423 SmallVector<llvm::PHINode *> phis; 424 convertOmpOpRegions(region, blockName, *builder.GetInsertBlock(), 425 *continuationBlock, builder, moduleTranslation, 426 bodyGenStatus, &phis); 427 if (failed(bodyGenStatus)) 428 return failure(); 429 if (continuationBlockArgs) 430 llvm::append_range(*continuationBlockArgs, phis); 431 builder.SetInsertPoint(continuationBlock, 432 continuationBlock->getFirstInsertionPt()); 433 return success(); 434 } 435 436 namespace { 437 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to 438 /// store lambdas with capture. 439 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy( 440 llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *, 441 llvm::Value *&)>; 442 using OwningAtomicReductionGen = 443 std::function<llvm::OpenMPIRBuilder::InsertPointTy( 444 llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *, 445 llvm::Value *)>; 446 } // namespace 447 448 /// Create an OpenMPIRBuilder-compatible reduction generator for the given 449 /// reduction declaration. The generator uses `builder` but ignores its 450 /// insertion point. 451 static OwningReductionGen 452 makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder, 453 LLVM::ModuleTranslation &moduleTranslation) { 454 // The lambda is mutable because we need access to non-const methods of decl 455 // (which aren't actually mutating it), and we must capture decl by-value to 456 // avoid the dangling reference after the parent function returns. 457 OwningReductionGen gen = 458 [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, 459 llvm::Value *lhs, llvm::Value *rhs, 460 llvm::Value *&result) mutable { 461 Region &reductionRegion = decl.reductionRegion(); 462 moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs); 463 moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs); 464 builder.restoreIP(insertPoint); 465 SmallVector<llvm::Value *> phis; 466 if (failed(inlineConvertOmpRegions(reductionRegion, 467 "omp.reduction.nonatomic.body", 468 builder, moduleTranslation, &phis))) 469 return llvm::OpenMPIRBuilder::InsertPointTy(); 470 assert(phis.size() == 1); 471 result = phis[0]; 472 return builder.saveIP(); 473 }; 474 return gen; 475 } 476 477 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the 478 /// given reduction declaration. The generator uses `builder` but ignores its 479 /// insertion point. Returns null if there is no atomic region available in the 480 /// reduction declaration. 481 static OwningAtomicReductionGen 482 makeAtomicReductionGen(omp::ReductionDeclareOp decl, 483 llvm::IRBuilderBase &builder, 484 LLVM::ModuleTranslation &moduleTranslation) { 485 if (decl.atomicReductionRegion().empty()) 486 return OwningAtomicReductionGen(); 487 488 // The lambda is mutable because we need access to non-const methods of decl 489 // (which aren't actually mutating it), and we must capture decl by-value to 490 // avoid the dangling reference after the parent function returns. 491 OwningAtomicReductionGen atomicGen = 492 [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *, 493 llvm::Value *lhs, llvm::Value *rhs) mutable { 494 Region &atomicRegion = decl.atomicReductionRegion(); 495 moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs); 496 moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs); 497 builder.restoreIP(insertPoint); 498 SmallVector<llvm::Value *> phis; 499 if (failed(inlineConvertOmpRegions(atomicRegion, 500 "omp.reduction.atomic.body", builder, 501 moduleTranslation, &phis))) 502 return llvm::OpenMPIRBuilder::InsertPointTy(); 503 assert(phis.empty()); 504 return builder.saveIP(); 505 }; 506 return atomicGen; 507 } 508 509 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder. 510 static LogicalResult 511 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, 512 LLVM::ModuleTranslation &moduleTranslation) { 513 auto orderedOp = cast<omp::OrderedOp>(opInst); 514 515 omp::ClauseDepend dependType = *orderedOp.depend_type_val(); 516 bool isDependSource = dependType == omp::ClauseDepend::dependsource; 517 unsigned numLoops = orderedOp.num_loops_val().getValue(); 518 SmallVector<llvm::Value *> vecValues = 519 moduleTranslation.lookupValues(orderedOp.depend_vec_vars()); 520 521 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 522 size_t indexVecValues = 0; 523 while (indexVecValues < vecValues.size()) { 524 SmallVector<llvm::Value *> storeValues; 525 storeValues.reserve(numLoops); 526 for (unsigned i = 0; i < numLoops; i++) { 527 storeValues.push_back(vecValues[indexVecValues]); 528 indexVecValues++; 529 } 530 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend( 531 ompLoc, findAllocaInsertPoint(builder, moduleTranslation), numLoops, 532 storeValues, ".cnt.addr", isDependSource)); 533 } 534 return success(); 535 } 536 537 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using 538 /// OpenMPIRBuilder. 539 static LogicalResult 540 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, 541 LLVM::ModuleTranslation &moduleTranslation) { 542 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 543 auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst); 544 545 // TODO: The code generation for ordered simd directive is not supported yet. 546 if (orderedRegionOp.simd()) 547 return failure(); 548 549 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 550 // relying on captured variables. 551 LogicalResult bodyGenStatus = success(); 552 553 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 554 llvm::BasicBlock &continuationBlock) { 555 // OrderedOp has only one region associated with it. 556 auto ®ion = cast<omp::OrderedRegionOp>(opInst).getRegion(); 557 convertOmpOpRegions(region, "omp.ordered.region", *codeGenIP.getBlock(), 558 continuationBlock, builder, moduleTranslation, 559 bodyGenStatus); 560 }; 561 562 // TODO: Perform finalization actions for variables. This has to be 563 // called for variables which have destructors/finalizers. 564 auto finiCB = [&](InsertPointTy codeGenIP) {}; 565 566 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 567 builder.restoreIP( 568 moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd( 569 ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd())); 570 return bodyGenStatus; 571 } 572 573 static LogicalResult 574 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, 575 LLVM::ModuleTranslation &moduleTranslation) { 576 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 577 using StorableBodyGenCallbackTy = 578 llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 579 580 auto sectionsOp = cast<omp::SectionsOp>(opInst); 581 582 // TODO: Support the following clauses: private, firstprivate, lastprivate, 583 // reduction, allocate 584 if (!sectionsOp.reduction_vars().empty() || sectionsOp.reductions() || 585 !sectionsOp.allocate_vars().empty() || 586 !sectionsOp.allocators_vars().empty()) 587 return emitError(sectionsOp.getLoc()) 588 << "reduction and allocate clauses are not supported for sections " 589 "construct"; 590 591 LogicalResult bodyGenStatus = success(); 592 SmallVector<StorableBodyGenCallbackTy> sectionCBs; 593 594 for (Operation &op : *sectionsOp.region().begin()) { 595 auto sectionOp = dyn_cast<omp::SectionOp>(op); 596 if (!sectionOp) // omp.terminator 597 continue; 598 599 Region ®ion = sectionOp.region(); 600 auto sectionCB = [®ion, &builder, &moduleTranslation, &bodyGenStatus]( 601 InsertPointTy allocaIP, InsertPointTy codeGenIP, 602 llvm::BasicBlock &finiBB) { 603 builder.restoreIP(codeGenIP); 604 builder.CreateBr(&finiBB); 605 convertOmpOpRegions(region, "omp.section.region", *codeGenIP.getBlock(), 606 finiBB, builder, moduleTranslation, bodyGenStatus); 607 }; 608 sectionCBs.push_back(sectionCB); 609 } 610 611 // No sections within omp.sections operation - skip generation. This situation 612 // is only possible if there is only a terminator operation inside the 613 // sections operation 614 if (sectionCBs.empty()) 615 return success(); 616 617 assert(isa<omp::SectionOp>(*sectionsOp.region().op_begin())); 618 619 // TODO: Perform appropriate actions according to the data-sharing 620 // attribute (shared, private, firstprivate, ...) of variables. 621 // Currently defaults to shared. 622 auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &, 623 llvm::Value &vPtr, 624 llvm::Value *&replacementValue) -> InsertPointTy { 625 replacementValue = &vPtr; 626 return codeGenIP; 627 }; 628 629 // TODO: Perform finalization actions for variables. This has to be 630 // called for variables which have destructors/finalizers. 631 auto finiCB = [&](InsertPointTy codeGenIP) {}; 632 633 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 634 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections( 635 ompLoc, findAllocaInsertPoint(builder, moduleTranslation), sectionCBs, 636 privCB, finiCB, false, sectionsOp.nowait())); 637 return bodyGenStatus; 638 } 639 640 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder. 641 static LogicalResult 642 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, 643 LLVM::ModuleTranslation &moduleTranslation) { 644 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 645 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 646 LogicalResult bodyGenStatus = success(); 647 auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP, 648 llvm::BasicBlock &continuationBB) { 649 convertOmpOpRegions(singleOp.region(), "omp.single.region", 650 *codegenIP.getBlock(), continuationBB, builder, 651 moduleTranslation, bodyGenStatus); 652 }; 653 auto finiCB = [&](InsertPointTy codeGenIP) {}; 654 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle( 655 ompLoc, bodyCB, finiCB, singleOp.nowait(), /*DidIt=*/nullptr)); 656 return bodyGenStatus; 657 } 658 659 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. 660 static LogicalResult 661 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, 662 LLVM::ModuleTranslation &moduleTranslation) { 663 auto loop = cast<omp::WsLoopOp>(opInst); 664 // TODO: this should be in the op verifier instead. 665 if (loop.lowerBound().empty()) 666 return failure(); 667 668 // Static is the default. 669 auto schedule = 670 loop.schedule_val().getValueOr(omp::ClauseScheduleKind::Static); 671 672 // Find the loop configuration. 673 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]); 674 llvm::Type *ivType = step->getType(); 675 llvm::Value *chunk = nullptr; 676 if (loop.schedule_chunk_var()) { 677 llvm::Value *chunkVar = 678 moduleTranslation.lookupValue(loop.schedule_chunk_var()); 679 llvm::Type *chunkVarType = chunkVar->getType(); 680 assert(chunkVarType->isIntegerTy() && 681 "chunk size must be one integer expression"); 682 if (chunkVarType->getIntegerBitWidth() < ivType->getIntegerBitWidth()) 683 chunk = builder.CreateSExt(chunkVar, ivType); 684 else if (chunkVarType->getIntegerBitWidth() > ivType->getIntegerBitWidth()) 685 chunk = builder.CreateTrunc(chunkVar, ivType); 686 else 687 chunk = chunkVar; 688 } 689 690 SmallVector<omp::ReductionDeclareOp> reductionDecls; 691 collectReductionDecls(loop, reductionDecls); 692 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 693 findAllocaInsertPoint(builder, moduleTranslation); 694 695 // Allocate space for privatized reduction variables. 696 SmallVector<llvm::Value *> privateReductionVariables; 697 DenseMap<Value, llvm::Value *> reductionVariableMap; 698 unsigned numReductions = loop.getNumReductionVars(); 699 privateReductionVariables.reserve(numReductions); 700 if (numReductions != 0) { 701 llvm::IRBuilderBase::InsertPointGuard guard(builder); 702 builder.restoreIP(allocaIP); 703 for (unsigned i = 0; i < numReductions; ++i) { 704 auto reductionType = 705 loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>(); 706 llvm::Value *var = builder.CreateAlloca( 707 moduleTranslation.convertType(reductionType.getElementType())); 708 privateReductionVariables.push_back(var); 709 reductionVariableMap.try_emplace(loop.reduction_vars()[i], var); 710 } 711 } 712 713 // Store the mapping between reduction variables and their private copies on 714 // ModuleTranslation stack. It can be then recovered when translating 715 // omp.reduce operations in a separate call. 716 LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard( 717 moduleTranslation, reductionVariableMap); 718 719 // Before the loop, store the initial values of reductions into reduction 720 // variables. Although this could be done after allocas, we don't want to mess 721 // up with the alloca insertion point. 722 for (unsigned i = 0; i < numReductions; ++i) { 723 SmallVector<llvm::Value *> phis; 724 if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(), 725 "omp.reduction.neutral", builder, 726 moduleTranslation, &phis))) 727 return failure(); 728 assert(phis.size() == 1 && "expected one value to be yielded from the " 729 "reduction neutral element declaration region"); 730 builder.CreateStore(phis[0], privateReductionVariables[i]); 731 } 732 733 // Set up the source location value for OpenMP runtime. 734 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 735 736 // Generator of the canonical loop body. 737 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 738 // relying on captured variables. 739 SmallVector<llvm::CanonicalLoopInfo *> loopInfos; 740 SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints; 741 LogicalResult bodyGenStatus = success(); 742 auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { 743 // Make sure further conversions know about the induction variable. 744 moduleTranslation.mapValue( 745 loop.getRegion().front().getArgument(loopInfos.size()), iv); 746 747 // Capture the body insertion point for use in nested loops. BodyIP of the 748 // CanonicalLoopInfo always points to the beginning of the entry block of 749 // the body. 750 bodyInsertPoints.push_back(ip); 751 752 if (loopInfos.size() != loop.getNumLoops() - 1) 753 return; 754 755 // Convert the body of the loop. 756 llvm::BasicBlock *entryBlock = ip.getBlock(); 757 llvm::BasicBlock *exitBlock = 758 entryBlock->splitBasicBlock(ip.getPoint(), "omp.wsloop.exit"); 759 convertOmpOpRegions(loop.region(), "omp.wsloop.region", *entryBlock, 760 *exitBlock, builder, moduleTranslation, bodyGenStatus); 761 }; 762 763 // Delegate actual loop construction to the OpenMP IRBuilder. 764 // TODO: this currently assumes WsLoop is semantically similar to SCF loop, 765 // i.e. it has a positive step, uses signed integer semantics. Reconsider 766 // this code when WsLoop clearly supports more cases. 767 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 768 for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) { 769 llvm::Value *lowerBound = 770 moduleTranslation.lookupValue(loop.lowerBound()[i]); 771 llvm::Value *upperBound = 772 moduleTranslation.lookupValue(loop.upperBound()[i]); 773 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]); 774 775 // Make sure loop trip count are emitted in the preheader of the outermost 776 // loop at the latest so that they are all available for the new collapsed 777 // loop will be created below. 778 llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; 779 llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; 780 if (i != 0) { 781 loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back()); 782 computeIP = loopInfos.front()->getPreheaderIP(); 783 } 784 loopInfos.push_back(ompBuilder->createCanonicalLoop( 785 loc, bodyGen, lowerBound, upperBound, step, 786 /*IsSigned=*/true, loop.inclusive(), computeIP)); 787 788 if (failed(bodyGenStatus)) 789 return failure(); 790 } 791 792 // Collapse loops. Store the insertion point because LoopInfos may get 793 // invalidated. 794 llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); 795 llvm::CanonicalLoopInfo *loopInfo = 796 ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); 797 798 allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 799 800 bool isSimd = loop.simd_modifier(); 801 802 // The orderedVal refers to the value obtained from the ordered[(n)] clause. 803 // orderedVal == -1: No ordered[(n)] clause specified. 804 // orderedVal == 0: The ordered clause specified without a parameter. 805 // orderedVal > 0: The ordered clause specified with a parameter (n). 806 // TODO: Handle doacross loop init when orderedVal is greater than 0. 807 int64_t orderedVal = 808 loop.ordered_val().hasValue() ? loop.ordered_val().getValue() : -1; 809 if (schedule == omp::ClauseScheduleKind::Static && orderedVal != 0) { 810 ompBuilder->applyWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, 811 !loop.nowait(), 812 llvm::omp::OMP_SCHEDULE_Static, chunk); 813 } else { 814 llvm::omp::OMPScheduleType schedType; 815 switch (schedule) { 816 case omp::ClauseScheduleKind::Static: 817 if (loop.schedule_chunk_var()) 818 schedType = llvm::omp::OMPScheduleType::OrderedStaticChunked; 819 else 820 schedType = llvm::omp::OMPScheduleType::OrderedStatic; 821 break; 822 case omp::ClauseScheduleKind::Dynamic: 823 if (orderedVal == 0) 824 schedType = llvm::omp::OMPScheduleType::OrderedDynamicChunked; 825 else 826 schedType = llvm::omp::OMPScheduleType::DynamicChunked; 827 break; 828 case omp::ClauseScheduleKind::Guided: 829 if (orderedVal == 0) { 830 schedType = llvm::omp::OMPScheduleType::OrderedGuidedChunked; 831 } else { 832 if (isSimd) 833 schedType = llvm::omp::OMPScheduleType::GuidedSimd; 834 else 835 schedType = llvm::omp::OMPScheduleType::GuidedChunked; 836 } 837 break; 838 case omp::ClauseScheduleKind::Auto: 839 if (orderedVal == 0) 840 schedType = llvm::omp::OMPScheduleType::OrderedAuto; 841 else 842 schedType = llvm::omp::OMPScheduleType::Auto; 843 break; 844 case omp::ClauseScheduleKind::Runtime: 845 if (orderedVal == 0) { 846 schedType = llvm::omp::OMPScheduleType::OrderedRuntime; 847 } else { 848 if (isSimd) 849 schedType = llvm::omp::OMPScheduleType::RuntimeSimd; 850 else 851 schedType = llvm::omp::OMPScheduleType::Runtime; 852 } 853 break; 854 default: 855 if (orderedVal == 0) { 856 schedType = llvm::omp::OMPScheduleType::OrderedStatic; 857 break; 858 } 859 llvm_unreachable("Unknown schedule value"); 860 break; 861 } 862 863 if (Optional<omp::ScheduleModifier> modifier = loop.schedule_modifier()) { 864 switch (*modifier) { 865 case omp::ScheduleModifier::monotonic: 866 schedType |= llvm::omp::OMPScheduleType::ModifierMonotonic; 867 break; 868 case omp::ScheduleModifier::nonmonotonic: 869 schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic; 870 break; 871 default: 872 // Nothing to do here. 873 break; 874 } 875 } else { 876 // OpenMP 5.1, 2.11.4 Worksharing-Loop Construct, Description. 877 // If the static schedule kind is specified or if the ordered clause is 878 // specified, and if the nonmonotonic modifier is not specified, the 879 // effect is as if the monotonic modifier is specified. Otherwise, unless 880 // the monotonic modifier is specified, the effect is as if the 881 // nonmonotonic modifier is specified. 882 // The monotonic is used by default in openmp runtime library, so no need 883 // to set it. 884 if (!(schedType == llvm::omp::OMPScheduleType::OrderedStatic || 885 schedType == llvm::omp::OMPScheduleType::OrderedStaticChunked)) 886 schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic; 887 } 888 889 ompBuilder->applyDynamicWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, 890 schedType, !loop.nowait(), chunk, 891 /*ordered*/ orderedVal == 0); 892 } 893 894 // Continue building IR after the loop. Note that the LoopInfo returned by 895 // `collapseLoops` points inside the outermost loop and is intended for 896 // potential further loop transformations. Use the insertion point stored 897 // before collapsing loops instead. 898 builder.restoreIP(afterIP); 899 900 // Process the reductions if required. 901 if (numReductions == 0) 902 return success(); 903 904 // Create the reduction generators. We need to own them here because 905 // ReductionInfo only accepts references to the generators. 906 SmallVector<OwningReductionGen> owningReductionGens; 907 SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens; 908 for (unsigned i = 0; i < numReductions; ++i) { 909 owningReductionGens.push_back( 910 makeReductionGen(reductionDecls[i], builder, moduleTranslation)); 911 owningAtomicReductionGens.push_back( 912 makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation)); 913 } 914 915 // Collect the reduction information. 916 SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos; 917 reductionInfos.reserve(numReductions); 918 for (unsigned i = 0; i < numReductions; ++i) { 919 llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr; 920 if (owningAtomicReductionGens[i]) 921 atomicGen = owningAtomicReductionGens[i]; 922 auto reductionType = 923 loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>(); 924 llvm::Value *variable = 925 moduleTranslation.lookupValue(loop.reduction_vars()[i]); 926 reductionInfos.push_back( 927 {moduleTranslation.convertType(reductionType.getElementType()), 928 variable, privateReductionVariables[i], owningReductionGens[i], 929 atomicGen}); 930 } 931 932 // The call to createReductions below expects the block to have a 933 // terminator. Create an unreachable instruction to serve as terminator 934 // and remove it later. 935 llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); 936 builder.SetInsertPoint(tempTerminator); 937 llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = 938 ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, 939 loop.nowait()); 940 if (!contInsertPoint.getBlock()) 941 return loop->emitOpError() << "failed to convert reductions"; 942 auto nextInsertionPoint = 943 ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for); 944 tempTerminator->eraseFromParent(); 945 builder.restoreIP(nextInsertionPoint); 946 947 return success(); 948 } 949 950 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder. 951 static LogicalResult 952 convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder, 953 LLVM::ModuleTranslation &moduleTranslation) { 954 auto loop = cast<omp::SimdLoopOp>(opInst); 955 956 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 957 958 // Generator of the canonical loop body. 959 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 960 // relying on captured variables. 961 SmallVector<llvm::CanonicalLoopInfo *> loopInfos; 962 SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints; 963 LogicalResult bodyGenStatus = success(); 964 auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { 965 // Make sure further conversions know about the induction variable. 966 moduleTranslation.mapValue( 967 loop.getRegion().front().getArgument(loopInfos.size()), iv); 968 969 // Capture the body insertion point for use in nested loops. BodyIP of the 970 // CanonicalLoopInfo always points to the beginning of the entry block of 971 // the body. 972 bodyInsertPoints.push_back(ip); 973 974 if (loopInfos.size() != loop.getNumLoops() - 1) 975 return; 976 977 // Convert the body of the loop. 978 llvm::BasicBlock *entryBlock = ip.getBlock(); 979 llvm::BasicBlock *exitBlock = 980 entryBlock->splitBasicBlock(ip.getPoint(), "omp.simdloop.exit"); 981 convertOmpOpRegions(loop.region(), "omp.simdloop.region", *entryBlock, 982 *exitBlock, builder, moduleTranslation, bodyGenStatus); 983 }; 984 985 // Delegate actual loop construction to the OpenMP IRBuilder. 986 // TODO: this currently assumes SimdLoop is semantically similar to SCF loop, 987 // i.e. it has a positive step, uses signed integer semantics. Reconsider 988 // this code when SimdLoop clearly supports more cases. 989 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 990 for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) { 991 llvm::Value *lowerBound = 992 moduleTranslation.lookupValue(loop.lowerBound()[i]); 993 llvm::Value *upperBound = 994 moduleTranslation.lookupValue(loop.upperBound()[i]); 995 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]); 996 997 // Make sure loop trip count are emitted in the preheader of the outermost 998 // loop at the latest so that they are all available for the new collapsed 999 // loop will be created below. 1000 llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; 1001 llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; 1002 if (i != 0) { 1003 loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(), 1004 ompLoc.DL); 1005 computeIP = loopInfos.front()->getPreheaderIP(); 1006 } 1007 loopInfos.push_back(ompBuilder->createCanonicalLoop( 1008 loc, bodyGen, lowerBound, upperBound, step, 1009 /*IsSigned=*/true, /*Inclusive=*/true, computeIP)); 1010 1011 if (failed(bodyGenStatus)) 1012 return failure(); 1013 } 1014 1015 // Collapse loops. 1016 llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); 1017 llvm::CanonicalLoopInfo *loopInfo = 1018 ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); 1019 1020 ompBuilder->applySimd(ompLoc.DL, loopInfo); 1021 1022 builder.restoreIP(afterIP); 1023 return success(); 1024 } 1025 1026 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. 1027 llvm::AtomicOrdering 1028 convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) { 1029 if (!ao) 1030 return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering 1031 1032 switch (*ao) { 1033 case omp::ClauseMemoryOrderKind::Seq_cst: 1034 return llvm::AtomicOrdering::SequentiallyConsistent; 1035 case omp::ClauseMemoryOrderKind::Acq_rel: 1036 return llvm::AtomicOrdering::AcquireRelease; 1037 case omp::ClauseMemoryOrderKind::Acquire: 1038 return llvm::AtomicOrdering::Acquire; 1039 case omp::ClauseMemoryOrderKind::Release: 1040 return llvm::AtomicOrdering::Release; 1041 case omp::ClauseMemoryOrderKind::Relaxed: 1042 return llvm::AtomicOrdering::Monotonic; 1043 } 1044 llvm_unreachable("Unknown ClauseMemoryOrderKind kind"); 1045 } 1046 1047 /// Convert omp.atomic.read operation to LLVM IR. 1048 static LogicalResult 1049 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, 1050 LLVM::ModuleTranslation &moduleTranslation) { 1051 1052 auto readOp = cast<omp::AtomicReadOp>(opInst); 1053 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1054 1055 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1056 1057 llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.memory_order_val()); 1058 llvm::Value *x = moduleTranslation.lookupValue(readOp.x()); 1059 Type xTy = readOp.x().getType().cast<omp::PointerLikeType>().getElementType(); 1060 llvm::Value *v = moduleTranslation.lookupValue(readOp.v()); 1061 Type vTy = readOp.v().getType().cast<omp::PointerLikeType>().getElementType(); 1062 llvm::OpenMPIRBuilder::AtomicOpValue V = { 1063 v, moduleTranslation.convertType(vTy), false, false}; 1064 llvm::OpenMPIRBuilder::AtomicOpValue X = { 1065 x, moduleTranslation.convertType(xTy), false, false}; 1066 builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO)); 1067 return success(); 1068 } 1069 1070 /// Converts an omp.atomic.write operation to LLVM IR. 1071 static LogicalResult 1072 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, 1073 LLVM::ModuleTranslation &moduleTranslation) { 1074 auto writeOp = cast<omp::AtomicWriteOp>(opInst); 1075 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1076 1077 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1078 llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.memory_order_val()); 1079 llvm::Value *expr = moduleTranslation.lookupValue(writeOp.value()); 1080 llvm::Value *dest = moduleTranslation.lookupValue(writeOp.address()); 1081 llvm::Type *ty = moduleTranslation.convertType(writeOp.value().getType()); 1082 llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false, 1083 /*isVolatile=*/false}; 1084 builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao)); 1085 return success(); 1086 } 1087 1088 /// Converts an LLVM dialect binary operation to the corresponding enum value 1089 /// for `atomicrmw` supported binary operation. 1090 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) { 1091 return llvm::TypeSwitch<Operation *, llvm::AtomicRMWInst::BinOp>(&op) 1092 .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; }) 1093 .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; }) 1094 .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; }) 1095 .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; }) 1096 .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; }) 1097 .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; }) 1098 .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; }) 1099 .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; }) 1100 .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; }) 1101 .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP); 1102 } 1103 1104 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder. 1105 static LogicalResult 1106 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, 1107 llvm::IRBuilderBase &builder, 1108 LLVM::ModuleTranslation &moduleTranslation) { 1109 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1110 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1111 1112 // Convert values and types. 1113 auto &innerOpList = opInst.region().front().getOperations(); 1114 if (innerOpList.size() != 2) 1115 return opInst.emitError("exactly two operations are allowed inside an " 1116 "atomic update region while lowering to LLVM IR"); 1117 1118 Operation &innerUpdateOp = innerOpList.front(); 1119 1120 if (innerUpdateOp.getNumOperands() != 2 || 1121 !llvm::is_contained(innerUpdateOp.getOperands(), 1122 opInst.getRegion().getArgument(0))) 1123 return opInst.emitError( 1124 "the update operation inside the region must be a binary operation and " 1125 "that update operation must have the region argument as an operand"); 1126 1127 llvm::AtomicRMWInst::BinOp binop = convertBinOpToAtomic(innerUpdateOp); 1128 1129 bool isXBinopExpr = 1130 innerUpdateOp.getNumOperands() > 0 && 1131 innerUpdateOp.getOperand(0) == opInst.getRegion().getArgument(0); 1132 1133 mlir::Value mlirExpr = (isXBinopExpr ? innerUpdateOp.getOperand(1) 1134 : innerUpdateOp.getOperand(0)); 1135 llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr); 1136 llvm::Value *llvmX = moduleTranslation.lookupValue(opInst.x()); 1137 LLVM::LLVMPointerType mlirXType = 1138 opInst.x().getType().cast<LLVM::LLVMPointerType>(); 1139 llvm::Type *llvmXElementType = 1140 moduleTranslation.convertType(mlirXType.getElementType()); 1141 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType, 1142 /*isSigned=*/false, 1143 /*isVolatile=*/false}; 1144 1145 llvm::AtomicOrdering atomicOrdering = 1146 convertAtomicOrdering(opInst.memory_order_val()); 1147 1148 // Generate update code. 1149 LogicalResult updateGenStatus = success(); 1150 auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus]( 1151 llvm::Value *atomicx, 1152 llvm::IRBuilder<> &builder) -> llvm::Value * { 1153 Block &bb = *opInst.region().begin(); 1154 moduleTranslation.mapValue(*opInst.region().args_begin(), atomicx); 1155 moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); 1156 if (failed(moduleTranslation.convertBlock(bb, true, builder))) { 1157 updateGenStatus = (opInst.emitError() 1158 << "unable to convert update operation to llvm IR"); 1159 return nullptr; 1160 } 1161 omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator()); 1162 assert(yieldop && yieldop.results().size() == 1 && 1163 "terminator must be omp.yield op and it must have exactly one " 1164 "argument"); 1165 return moduleTranslation.lookupValue(yieldop.results()[0]); 1166 }; 1167 1168 // Handle ambiguous alloca, if any. 1169 auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 1170 if (allocaIP.getPoint() == ompLoc.IP.getPoint()) { 1171 // Same point => split basic block and make them unambigous. 1172 llvm::UnreachableInst *unreachableInst = builder.CreateUnreachable(); 1173 builder.SetInsertPoint(builder.GetInsertBlock()->splitBasicBlock( 1174 unreachableInst, "alloca_split")); 1175 ompLoc.IP = builder.saveIP(); 1176 unreachableInst->eraseFromParent(); 1177 } 1178 builder.restoreIP(ompBuilder->createAtomicUpdate( 1179 ompLoc, findAllocaInsertPoint(builder, moduleTranslation), llvmAtomicX, 1180 llvmExpr, atomicOrdering, binop, updateFn, isXBinopExpr)); 1181 return updateGenStatus; 1182 } 1183 1184 static LogicalResult 1185 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, 1186 llvm::IRBuilderBase &builder, 1187 LLVM::ModuleTranslation &moduleTranslation) { 1188 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1189 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1190 mlir::Value mlirExpr; 1191 bool isXBinopExpr = false, isPostfixUpdate = false; 1192 llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP; 1193 1194 omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp(); 1195 omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp(); 1196 1197 assert((atomicUpdateOp || atomicWriteOp) && 1198 "internal op must be an atomic.update or atomic.write op"); 1199 1200 if (atomicWriteOp) { 1201 isPostfixUpdate = true; 1202 mlirExpr = atomicWriteOp.value(); 1203 } else { 1204 isPostfixUpdate = atomicCaptureOp.getSecondOp() == 1205 atomicCaptureOp.getAtomicUpdateOp().getOperation(); 1206 auto &innerOpList = atomicUpdateOp.region().front().getOperations(); 1207 if (innerOpList.size() != 2) 1208 return atomicUpdateOp.emitError( 1209 "exactly two operations are allowed inside an " 1210 "atomic update region while lowering to LLVM IR"); 1211 Operation *innerUpdateOp = atomicUpdateOp.getFirstOp(); 1212 if (innerUpdateOp->getNumOperands() != 2 || 1213 !llvm::is_contained(innerUpdateOp->getOperands(), 1214 atomicUpdateOp.getRegion().getArgument(0))) 1215 return atomicUpdateOp.emitError( 1216 "the update operation inside the region must be a binary operation " 1217 "and that update operation must have the region argument as an " 1218 "operand"); 1219 binop = convertBinOpToAtomic(*innerUpdateOp); 1220 1221 isXBinopExpr = innerUpdateOp->getOperand(0) == 1222 atomicUpdateOp.getRegion().getArgument(0); 1223 1224 mlirExpr = (isXBinopExpr ? innerUpdateOp->getOperand(1) 1225 : innerUpdateOp->getOperand(0)); 1226 } 1227 1228 llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr); 1229 llvm::Value *llvmX = 1230 moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().x()); 1231 llvm::Value *llvmV = 1232 moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().v()); 1233 auto mlirXType = atomicCaptureOp.getAtomicReadOp() 1234 .x() 1235 .getType() 1236 .cast<LLVM::LLVMPointerType>(); 1237 llvm::Type *llvmXElementType = 1238 moduleTranslation.convertType(mlirXType.getElementType()); 1239 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType, 1240 /*isSigned=*/false, 1241 /*isVolatile=*/false}; 1242 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType, 1243 /*isSigned=*/false, 1244 /*isVolatile=*/false}; 1245 1246 llvm::AtomicOrdering atomicOrdering = 1247 convertAtomicOrdering(atomicCaptureOp.memory_order_val()); 1248 1249 LogicalResult updateGenStatus = success(); 1250 auto updateFn = [&](llvm::Value *atomicx, 1251 llvm::IRBuilder<> &builder) -> llvm::Value * { 1252 if (atomicWriteOp) 1253 return moduleTranslation.lookupValue(atomicWriteOp.value()); 1254 Block &bb = *atomicUpdateOp.region().begin(); 1255 moduleTranslation.mapValue(*atomicUpdateOp.region().args_begin(), atomicx); 1256 moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); 1257 if (failed(moduleTranslation.convertBlock(bb, true, builder))) { 1258 updateGenStatus = (atomicUpdateOp.emitError() 1259 << "unable to convert update operation to llvm IR"); 1260 return nullptr; 1261 } 1262 omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator()); 1263 assert(yieldop && yieldop.results().size() == 1 && 1264 "terminator must be omp.yield op and it must have exactly one " 1265 "argument"); 1266 return moduleTranslation.lookupValue(yieldop.results()[0]); 1267 }; 1268 // Handle ambiguous alloca, if any. 1269 auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 1270 if (allocaIP.getPoint() == ompLoc.IP.getPoint()) { 1271 // Same point => split basic block and make them unambigous. 1272 llvm::UnreachableInst *unreachableInst = builder.CreateUnreachable(); 1273 builder.SetInsertPoint(builder.GetInsertBlock()->splitBasicBlock( 1274 unreachableInst, "alloca_split")); 1275 ompLoc.IP = builder.saveIP(); 1276 unreachableInst->eraseFromParent(); 1277 } 1278 builder.restoreIP(ompBuilder->createAtomicCapture( 1279 ompLoc, findAllocaInsertPoint(builder, moduleTranslation), llvmAtomicX, 1280 llvmAtomicV, llvmExpr, atomicOrdering, binop, updateFn, atomicUpdateOp, 1281 isPostfixUpdate, isXBinopExpr)); 1282 return updateGenStatus; 1283 } 1284 1285 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the 1286 /// mapping between reduction variables and their private equivalents to have 1287 /// been stored on the ModuleTranslation stack. Currently only supports 1288 /// reduction within WsLoopOp, but can be easily extended. 1289 static LogicalResult 1290 convertOmpReductionOp(omp::ReductionOp reductionOp, 1291 llvm::IRBuilderBase &builder, 1292 LLVM::ModuleTranslation &moduleTranslation) { 1293 // Find the declaration that corresponds to the reduction op. 1294 auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>(); 1295 omp::ReductionDeclareOp declaration = 1296 findReductionDecl(reductionContainer, reductionOp); 1297 assert(declaration && "could not find reduction declaration"); 1298 1299 // Retrieve the mapping between reduction variables and their private 1300 // equivalents. 1301 const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr; 1302 moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>( 1303 [&](const OpenMPVarMappingStackFrame &frame) { 1304 reductionVariableMap = &frame.mapping; 1305 return WalkResult::interrupt(); 1306 }); 1307 assert(reductionVariableMap && "couldn't find private reduction variables"); 1308 1309 // Translate the reduction operation by emitting the body of the corresponding 1310 // reduction declaration. 1311 Region &reductionRegion = declaration.reductionRegion(); 1312 llvm::Value *privateReductionVar = 1313 reductionVariableMap->lookup(reductionOp.accumulator()); 1314 llvm::Value *reductionVal = builder.CreateLoad( 1315 moduleTranslation.convertType(reductionOp.operand().getType()), 1316 privateReductionVar); 1317 1318 moduleTranslation.mapValue(reductionRegion.front().getArgument(0), 1319 reductionVal); 1320 moduleTranslation.mapValue( 1321 reductionRegion.front().getArgument(1), 1322 moduleTranslation.lookupValue(reductionOp.operand())); 1323 1324 SmallVector<llvm::Value *> phis; 1325 if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body", 1326 builder, moduleTranslation, &phis))) 1327 return failure(); 1328 assert(phis.size() == 1 && "expected one value to be yielded from " 1329 "the reduction body declaration region"); 1330 builder.CreateStore(phis[0], privateReductionVar); 1331 return success(); 1332 } 1333 1334 namespace { 1335 1336 /// Implementation of the dialect interface that converts operations belonging 1337 /// to the OpenMP dialect to LLVM IR. 1338 class OpenMPDialectLLVMIRTranslationInterface 1339 : public LLVMTranslationDialectInterface { 1340 public: 1341 using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; 1342 1343 /// Translates the given operation to LLVM IR using the provided IR builder 1344 /// and saving the state in `moduleTranslation`. 1345 LogicalResult 1346 convertOperation(Operation *op, llvm::IRBuilderBase &builder, 1347 LLVM::ModuleTranslation &moduleTranslation) const final; 1348 }; 1349 1350 } // namespace 1351 1352 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR 1353 /// (including OpenMP runtime calls). 1354 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( 1355 Operation *op, llvm::IRBuilderBase &builder, 1356 LLVM::ModuleTranslation &moduleTranslation) const { 1357 1358 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1359 1360 return llvm::TypeSwitch<Operation *, LogicalResult>(op) 1361 .Case([&](omp::BarrierOp) { 1362 ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); 1363 return success(); 1364 }) 1365 .Case([&](omp::TaskwaitOp) { 1366 ompBuilder->createTaskwait(builder.saveIP()); 1367 return success(); 1368 }) 1369 .Case([&](omp::TaskyieldOp) { 1370 ompBuilder->createTaskyield(builder.saveIP()); 1371 return success(); 1372 }) 1373 .Case([&](omp::FlushOp) { 1374 // No support in Openmp runtime function (__kmpc_flush) to accept 1375 // the argument list. 1376 // OpenMP standard states the following: 1377 // "An implementation may implement a flush with a list by ignoring 1378 // the list, and treating it the same as a flush without a list." 1379 // 1380 // The argument list is discarded so that, flush with a list is treated 1381 // same as a flush without a list. 1382 ompBuilder->createFlush(builder.saveIP()); 1383 return success(); 1384 }) 1385 .Case([&](omp::ParallelOp op) { 1386 return convertOmpParallel(op, builder, moduleTranslation); 1387 }) 1388 .Case([&](omp::ReductionOp reductionOp) { 1389 return convertOmpReductionOp(reductionOp, builder, moduleTranslation); 1390 }) 1391 .Case([&](omp::MasterOp) { 1392 return convertOmpMaster(*op, builder, moduleTranslation); 1393 }) 1394 .Case([&](omp::CriticalOp) { 1395 return convertOmpCritical(*op, builder, moduleTranslation); 1396 }) 1397 .Case([&](omp::OrderedRegionOp) { 1398 return convertOmpOrderedRegion(*op, builder, moduleTranslation); 1399 }) 1400 .Case([&](omp::OrderedOp) { 1401 return convertOmpOrdered(*op, builder, moduleTranslation); 1402 }) 1403 .Case([&](omp::WsLoopOp) { 1404 return convertOmpWsLoop(*op, builder, moduleTranslation); 1405 }) 1406 .Case([&](omp::SimdLoopOp) { 1407 return convertOmpSimdLoop(*op, builder, moduleTranslation); 1408 }) 1409 .Case([&](omp::AtomicReadOp) { 1410 return convertOmpAtomicRead(*op, builder, moduleTranslation); 1411 }) 1412 .Case([&](omp::AtomicWriteOp) { 1413 return convertOmpAtomicWrite(*op, builder, moduleTranslation); 1414 }) 1415 .Case([&](omp::AtomicUpdateOp op) { 1416 return convertOmpAtomicUpdate(op, builder, moduleTranslation); 1417 }) 1418 .Case([&](omp::AtomicCaptureOp op) { 1419 return convertOmpAtomicCapture(op, builder, moduleTranslation); 1420 }) 1421 .Case([&](omp::SectionsOp) { 1422 return convertOmpSections(*op, builder, moduleTranslation); 1423 }) 1424 .Case([&](omp::SingleOp op) { 1425 return convertOmpSingle(op, builder, moduleTranslation); 1426 }) 1427 .Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp, 1428 omp::CriticalDeclareOp>([](auto op) { 1429 // `yield` and `terminator` can be just omitted. The block structure 1430 // was created in the region that handles their parent operation. 1431 // `reduction.declare` will be used by reductions and is not 1432 // converted directly, skip it. 1433 // `critical.declare` is only used to declare names of critical 1434 // sections which will be used by `critical` ops and hence can be 1435 // ignored for lowering. The OpenMP IRBuilder will create unique 1436 // name for critical section names. 1437 return success(); 1438 }) 1439 .Default([&](Operation *inst) { 1440 return inst->emitError("unsupported OpenMP operation: ") 1441 << inst->getName(); 1442 }); 1443 } 1444 1445 void mlir::registerOpenMPDialectTranslation(DialectRegistry ®istry) { 1446 registry.insert<omp::OpenMPDialect>(); 1447 registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) { 1448 dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>(); 1449 }); 1450 } 1451 1452 void mlir::registerOpenMPDialectTranslation(MLIRContext &context) { 1453 DialectRegistry registry; 1454 registerOpenMPDialectTranslation(registry); 1455 context.appendDialectRegistry(registry); 1456 } 1457