1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a translation between the MLIR OpenMP dialect and LLVM 10 // IR. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" 14 #include "mlir/Dialect/OpenMP/OpenMPDialect.h" 15 #include "mlir/IR/BlockAndValueMapping.h" 16 #include "mlir/IR/Operation.h" 17 #include "mlir/Support/LLVM.h" 18 #include "mlir/Target/LLVMIR/ModuleTranslation.h" 19 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/TypeSwitch.h" 22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 23 #include "llvm/IR/DebugInfoMetadata.h" 24 #include "llvm/IR/IRBuilder.h" 25 26 using namespace mlir; 27 28 namespace { 29 static llvm::omp::ScheduleKind 30 convertToScheduleKind(Optional<omp::ClauseScheduleKind> schedKind) { 31 if (!schedKind.has_value()) 32 return llvm::omp::OMP_SCHEDULE_Default; 33 switch (schedKind.value()) { 34 case omp::ClauseScheduleKind::Static: 35 return llvm::omp::OMP_SCHEDULE_Static; 36 case omp::ClauseScheduleKind::Dynamic: 37 return llvm::omp::OMP_SCHEDULE_Dynamic; 38 case omp::ClauseScheduleKind::Guided: 39 return llvm::omp::OMP_SCHEDULE_Guided; 40 case omp::ClauseScheduleKind::Auto: 41 return llvm::omp::OMP_SCHEDULE_Auto; 42 case omp::ClauseScheduleKind::Runtime: 43 return llvm::omp::OMP_SCHEDULE_Runtime; 44 } 45 llvm_unreachable("unhandled schedule clause argument"); 46 } 47 48 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the 49 /// insertion points for allocas. 50 class OpenMPAllocaStackFrame 51 : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> { 52 public: 53 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame) 54 55 explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP) 56 : allocaInsertPoint(allocaIP) {} 57 llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; 58 }; 59 60 /// ModuleTranslation stack frame containing the partial mapping between MLIR 61 /// values and their LLVM IR equivalents. 62 class OpenMPVarMappingStackFrame 63 : public LLVM::ModuleTranslation::StackFrameBase< 64 OpenMPVarMappingStackFrame> { 65 public: 66 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPVarMappingStackFrame) 67 68 explicit OpenMPVarMappingStackFrame( 69 const DenseMap<Value, llvm::Value *> &mapping) 70 : mapping(mapping) {} 71 72 DenseMap<Value, llvm::Value *> mapping; 73 }; 74 } // namespace 75 76 /// Find the insertion point for allocas given the current insertion point for 77 /// normal operations in the builder. 78 static llvm::OpenMPIRBuilder::InsertPointTy 79 findAllocaInsertPoint(llvm::IRBuilderBase &builder, 80 const LLVM::ModuleTranslation &moduleTranslation) { 81 // If there is an alloca insertion point on stack, i.e. we are in a nested 82 // operation and a specific point was provided by some surrounding operation, 83 // use it. 84 llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; 85 WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>( 86 [&](const OpenMPAllocaStackFrame &frame) { 87 allocaInsertPoint = frame.allocaInsertPoint; 88 return WalkResult::interrupt(); 89 }); 90 if (walkResult.wasInterrupted()) 91 return allocaInsertPoint; 92 93 // Otherwise, insert to the entry block of the surrounding function. 94 // If the current IRBuilder InsertPoint is the function's entry, it cannot 95 // also be used for alloca insertion which would result in insertion order 96 // confusion. Create a new BasicBlock for the Builder and use the entry block 97 // for the allocs. 98 // TODO: Create a dedicated alloca BasicBlock at function creation such that 99 // we do not need to move the current InertPoint here. 100 if (builder.GetInsertBlock() == 101 &builder.GetInsertBlock()->getParent()->getEntryBlock()) { 102 assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() && 103 "Assuming end of basic block"); 104 llvm::BasicBlock *entryBB = llvm::BasicBlock::Create( 105 builder.getContext(), "entry", builder.GetInsertBlock()->getParent(), 106 builder.GetInsertBlock()->getNextNode()); 107 builder.CreateBr(entryBB); 108 builder.SetInsertPoint(entryBB); 109 } 110 111 llvm::BasicBlock &funcEntryBlock = 112 builder.GetInsertBlock()->getParent()->getEntryBlock(); 113 return llvm::OpenMPIRBuilder::InsertPointTy( 114 &funcEntryBlock, funcEntryBlock.getFirstInsertionPt()); 115 } 116 117 /// Converts the given region that appears within an OpenMP dialect operation to 118 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the 119 /// region, and a branch from any block with an successor-less OpenMP terminator 120 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes 121 /// of the continuation block if provided. 122 static llvm::BasicBlock *convertOmpOpRegions( 123 Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, 124 LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, 125 SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) { 126 llvm::BasicBlock *continuationBlock = 127 splitBB(builder, true, "omp.region.cont"); 128 llvm::BasicBlock *sourceBlock = builder.GetInsertBlock(); 129 130 llvm::LLVMContext &llvmContext = builder.getContext(); 131 for (Block &bb : region) { 132 llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create( 133 llvmContext, blockName, builder.GetInsertBlock()->getParent(), 134 builder.GetInsertBlock()->getNextNode()); 135 moduleTranslation.mapBlock(&bb, llvmBB); 136 } 137 138 llvm::Instruction *sourceTerminator = sourceBlock->getTerminator(); 139 140 // Terminators (namely YieldOp) may be forwarding values to the region that 141 // need to be available in the continuation block. Collect the types of these 142 // operands in preparation of creating PHI nodes. 143 SmallVector<llvm::Type *> continuationBlockPHITypes; 144 bool operandsProcessed = false; 145 unsigned numYields = 0; 146 for (Block &bb : region.getBlocks()) { 147 if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) { 148 if (!operandsProcessed) { 149 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { 150 continuationBlockPHITypes.push_back( 151 moduleTranslation.convertType(yield->getOperand(i).getType())); 152 } 153 operandsProcessed = true; 154 } else { 155 assert(continuationBlockPHITypes.size() == yield->getNumOperands() && 156 "mismatching number of values yielded from the region"); 157 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { 158 llvm::Type *operandType = 159 moduleTranslation.convertType(yield->getOperand(i).getType()); 160 (void)operandType; 161 assert(continuationBlockPHITypes[i] == operandType && 162 "values of mismatching types yielded from the region"); 163 } 164 } 165 numYields++; 166 } 167 } 168 169 // Insert PHI nodes in the continuation block for any values forwarded by the 170 // terminators in this region. 171 if (!continuationBlockPHITypes.empty()) 172 assert( 173 continuationBlockPHIs && 174 "expected continuation block PHIs if converted regions yield values"); 175 if (continuationBlockPHIs) { 176 llvm::IRBuilderBase::InsertPointGuard guard(builder); 177 continuationBlockPHIs->reserve(continuationBlockPHITypes.size()); 178 builder.SetInsertPoint(continuationBlock, continuationBlock->begin()); 179 for (llvm::Type *ty : continuationBlockPHITypes) 180 continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields)); 181 } 182 183 // Convert blocks one by one in topological order to ensure 184 // defs are converted before uses. 185 SetVector<Block *> blocks = 186 LLVM::detail::getTopologicallySortedBlocks(region); 187 for (Block *bb : blocks) { 188 llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb); 189 // Retarget the branch of the entry block to the entry block of the 190 // converted region (regions are single-entry). 191 if (bb->isEntryBlock()) { 192 assert(sourceTerminator->getNumSuccessors() == 1 && 193 "provided entry block has multiple successors"); 194 assert(sourceTerminator->getSuccessor(0) == continuationBlock && 195 "ContinuationBlock is not the successor of the entry block"); 196 sourceTerminator->setSuccessor(0, llvmBB); 197 } 198 199 llvm::IRBuilderBase::InsertPointGuard guard(builder); 200 if (failed( 201 moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) { 202 bodyGenStatus = failure(); 203 return continuationBlock; 204 } 205 206 // Special handling for `omp.yield` and `omp.terminator` (we may have more 207 // than one): they return the control to the parent OpenMP dialect operation 208 // so replace them with the branch to the continuation block. We handle this 209 // here to avoid relying inter-function communication through the 210 // ModuleTranslation class to set up the correct insertion point. This is 211 // also consistent with MLIR's idiom of handling special region terminators 212 // in the same code that handles the region-owning operation. 213 Operation *terminator = bb->getTerminator(); 214 if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) { 215 builder.CreateBr(continuationBlock); 216 217 for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i) 218 (*continuationBlockPHIs)[i]->addIncoming( 219 moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB); 220 } 221 } 222 // After all blocks have been traversed and values mapped, connect the PHI 223 // nodes to the results of preceding blocks. 224 LLVM::detail::connectPHINodes(region, moduleTranslation); 225 226 // Remove the blocks and values defined in this region from the mapping since 227 // they are not visible outside of this region. This allows the same region to 228 // be converted several times, that is cloned, without clashes, and slightly 229 // speeds up the lookups. 230 moduleTranslation.forgetMapping(region); 231 232 return continuationBlock; 233 } 234 235 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum. 236 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) { 237 switch (kind) { 238 case omp::ClauseProcBindKind::Close: 239 return llvm::omp::ProcBindKind::OMP_PROC_BIND_close; 240 case omp::ClauseProcBindKind::Master: 241 return llvm::omp::ProcBindKind::OMP_PROC_BIND_master; 242 case omp::ClauseProcBindKind::Primary: 243 return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary; 244 case omp::ClauseProcBindKind::Spread: 245 return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread; 246 } 247 llvm_unreachable("Unknown ClauseProcBindKind kind"); 248 } 249 250 /// Converts the OpenMP parallel operation to LLVM IR. 251 static LogicalResult 252 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, 253 LLVM::ModuleTranslation &moduleTranslation) { 254 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 255 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 256 // relying on captured variables. 257 LogicalResult bodyGenStatus = success(); 258 259 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { 260 // Save the alloca insertion point on ModuleTranslation stack for use in 261 // nested regions. 262 LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( 263 moduleTranslation, allocaIP); 264 265 // ParallelOp has only one region associated with it. 266 builder.restoreIP(codeGenIP); 267 convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder, 268 moduleTranslation, bodyGenStatus); 269 }; 270 271 // TODO: Perform appropriate actions according to the data-sharing 272 // attribute (shared, private, firstprivate, ...) of variables. 273 // Currently defaults to shared. 274 auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 275 llvm::Value &, llvm::Value &vPtr, 276 llvm::Value *&replacementValue) -> InsertPointTy { 277 replacementValue = &vPtr; 278 279 return codeGenIP; 280 }; 281 282 // TODO: Perform finalization actions for variables. This has to be 283 // called for variables which have destructors/finalizers. 284 auto finiCB = [&](InsertPointTy codeGenIP) {}; 285 286 llvm::Value *ifCond = nullptr; 287 if (auto ifExprVar = opInst.if_expr_var()) 288 ifCond = moduleTranslation.lookupValue(ifExprVar); 289 llvm::Value *numThreads = nullptr; 290 if (auto numThreadsVar = opInst.num_threads_var()) 291 numThreads = moduleTranslation.lookupValue(numThreadsVar); 292 auto pbKind = llvm::omp::OMP_PROC_BIND_default; 293 if (auto bind = opInst.proc_bind_val()) 294 pbKind = getProcBindKind(*bind); 295 // TODO: Is the Parallel construct cancellable? 296 bool isCancellable = false; 297 298 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 299 findAllocaInsertPoint(builder, moduleTranslation); 300 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 301 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel( 302 ompLoc, allocaIP, bodyGenCB, privCB, finiCB, ifCond, numThreads, pbKind, 303 isCancellable)); 304 305 return bodyGenStatus; 306 } 307 308 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder. 309 static LogicalResult 310 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, 311 LLVM::ModuleTranslation &moduleTranslation) { 312 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 313 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 314 // relying on captured variables. 315 LogicalResult bodyGenStatus = success(); 316 317 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { 318 // MasterOp has only one region associated with it. 319 auto ®ion = cast<omp::MasterOp>(opInst).getRegion(); 320 builder.restoreIP(codeGenIP); 321 convertOmpOpRegions(region, "omp.master.region", builder, moduleTranslation, 322 bodyGenStatus); 323 }; 324 325 // TODO: Perform finalization actions for variables. This has to be 326 // called for variables which have destructors/finalizers. 327 auto finiCB = [&](InsertPointTy codeGenIP) {}; 328 329 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 330 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster( 331 ompLoc, bodyGenCB, finiCB)); 332 return success(); 333 } 334 335 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder. 336 static LogicalResult 337 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, 338 LLVM::ModuleTranslation &moduleTranslation) { 339 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 340 auto criticalOp = cast<omp::CriticalOp>(opInst); 341 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 342 // relying on captured variables. 343 LogicalResult bodyGenStatus = success(); 344 345 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { 346 // CriticalOp has only one region associated with it. 347 auto ®ion = cast<omp::CriticalOp>(opInst).getRegion(); 348 builder.restoreIP(codeGenIP); 349 convertOmpOpRegions(region, "omp.critical.region", builder, 350 moduleTranslation, bodyGenStatus); 351 }; 352 353 // TODO: Perform finalization actions for variables. This has to be 354 // called for variables which have destructors/finalizers. 355 auto finiCB = [&](InsertPointTy codeGenIP) {}; 356 357 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 358 llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext(); 359 llvm::Constant *hint = nullptr; 360 361 // If it has a name, it probably has a hint too. 362 if (criticalOp.nameAttr()) { 363 // The verifiers in OpenMP Dialect guarentee that all the pointers are 364 // non-null 365 auto symbolRef = criticalOp.nameAttr().cast<SymbolRefAttr>(); 366 auto criticalDeclareOp = 367 SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp, 368 symbolRef); 369 hint = 370 llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), 371 static_cast<int>(criticalDeclareOp.hint_val())); 372 } 373 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical( 374 ompLoc, bodyGenCB, finiCB, criticalOp.name().value_or(""), hint)); 375 return success(); 376 } 377 378 /// Returns a reduction declaration that corresponds to the given reduction 379 /// operation in the given container. Currently only supports reductions inside 380 /// WsLoopOp but can be easily extended. 381 static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container, 382 omp::ReductionOp reduction) { 383 SymbolRefAttr reductionSymbol; 384 for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) { 385 if (container.reduction_vars()[i] != reduction.accumulator()) 386 continue; 387 reductionSymbol = (*container.reductions())[i].cast<SymbolRefAttr>(); 388 break; 389 } 390 assert(reductionSymbol && 391 "reduction operation must be associated with a declaration"); 392 393 return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>( 394 container, reductionSymbol); 395 } 396 397 /// Populates `reductions` with reduction declarations used in the given loop. 398 static void 399 collectReductionDecls(omp::WsLoopOp loop, 400 SmallVectorImpl<omp::ReductionDeclareOp> &reductions) { 401 Optional<ArrayAttr> attr = loop.reductions(); 402 if (!attr) 403 return; 404 405 reductions.reserve(reductions.size() + loop.getNumReductionVars()); 406 for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) { 407 reductions.push_back( 408 SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>( 409 loop, symbolRef)); 410 } 411 } 412 413 /// Translates the blocks contained in the given region and appends them to at 414 /// the current insertion point of `builder`. The operations of the entry block 415 /// are appended to the current insertion block, which is not expected to have a 416 /// terminator. If set, `continuationBlockArgs` is populated with translated 417 /// values that correspond to the values omp.yield'ed from the region. 418 static LogicalResult inlineConvertOmpRegions( 419 Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, 420 LLVM::ModuleTranslation &moduleTranslation, 421 SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) { 422 if (region.empty()) 423 return success(); 424 425 // Special case for single-block regions that don't create additional blocks: 426 // insert operations without creating additional blocks. 427 if (llvm::hasSingleElement(region)) { 428 moduleTranslation.mapBlock(®ion.front(), builder.GetInsertBlock()); 429 if (failed(moduleTranslation.convertBlock( 430 region.front(), /*ignoreArguments=*/true, builder))) 431 return failure(); 432 433 // The continuation arguments are simply the translated terminator operands. 434 if (continuationBlockArgs) 435 llvm::append_range( 436 *continuationBlockArgs, 437 moduleTranslation.lookupValues(region.front().back().getOperands())); 438 439 // Drop the mapping that is no longer necessary so that the same region can 440 // be processed multiple times. 441 moduleTranslation.forgetMapping(region); 442 return success(); 443 } 444 445 LogicalResult bodyGenStatus = success(); 446 SmallVector<llvm::PHINode *> phis; 447 llvm::BasicBlock *continuationBlock = convertOmpOpRegions( 448 region, blockName, builder, moduleTranslation, bodyGenStatus, &phis); 449 if (failed(bodyGenStatus)) 450 return failure(); 451 if (continuationBlockArgs) 452 llvm::append_range(*continuationBlockArgs, phis); 453 builder.SetInsertPoint(continuationBlock, 454 continuationBlock->getFirstInsertionPt()); 455 return success(); 456 } 457 458 namespace { 459 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to 460 /// store lambdas with capture. 461 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy( 462 llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *, 463 llvm::Value *&)>; 464 using OwningAtomicReductionGen = 465 std::function<llvm::OpenMPIRBuilder::InsertPointTy( 466 llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *, 467 llvm::Value *)>; 468 } // namespace 469 470 /// Create an OpenMPIRBuilder-compatible reduction generator for the given 471 /// reduction declaration. The generator uses `builder` but ignores its 472 /// insertion point. 473 static OwningReductionGen 474 makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder, 475 LLVM::ModuleTranslation &moduleTranslation) { 476 // The lambda is mutable because we need access to non-const methods of decl 477 // (which aren't actually mutating it), and we must capture decl by-value to 478 // avoid the dangling reference after the parent function returns. 479 OwningReductionGen gen = 480 [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, 481 llvm::Value *lhs, llvm::Value *rhs, 482 llvm::Value *&result) mutable { 483 Region &reductionRegion = decl.reductionRegion(); 484 moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs); 485 moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs); 486 builder.restoreIP(insertPoint); 487 SmallVector<llvm::Value *> phis; 488 if (failed(inlineConvertOmpRegions(reductionRegion, 489 "omp.reduction.nonatomic.body", 490 builder, moduleTranslation, &phis))) 491 return llvm::OpenMPIRBuilder::InsertPointTy(); 492 assert(phis.size() == 1); 493 result = phis[0]; 494 return builder.saveIP(); 495 }; 496 return gen; 497 } 498 499 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the 500 /// given reduction declaration. The generator uses `builder` but ignores its 501 /// insertion point. Returns null if there is no atomic region available in the 502 /// reduction declaration. 503 static OwningAtomicReductionGen 504 makeAtomicReductionGen(omp::ReductionDeclareOp decl, 505 llvm::IRBuilderBase &builder, 506 LLVM::ModuleTranslation &moduleTranslation) { 507 if (decl.atomicReductionRegion().empty()) 508 return OwningAtomicReductionGen(); 509 510 // The lambda is mutable because we need access to non-const methods of decl 511 // (which aren't actually mutating it), and we must capture decl by-value to 512 // avoid the dangling reference after the parent function returns. 513 OwningAtomicReductionGen atomicGen = 514 [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *, 515 llvm::Value *lhs, llvm::Value *rhs) mutable { 516 Region &atomicRegion = decl.atomicReductionRegion(); 517 moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs); 518 moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs); 519 builder.restoreIP(insertPoint); 520 SmallVector<llvm::Value *> phis; 521 if (failed(inlineConvertOmpRegions(atomicRegion, 522 "omp.reduction.atomic.body", builder, 523 moduleTranslation, &phis))) 524 return llvm::OpenMPIRBuilder::InsertPointTy(); 525 assert(phis.empty()); 526 return builder.saveIP(); 527 }; 528 return atomicGen; 529 } 530 531 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder. 532 static LogicalResult 533 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, 534 LLVM::ModuleTranslation &moduleTranslation) { 535 auto orderedOp = cast<omp::OrderedOp>(opInst); 536 537 omp::ClauseDepend dependType = *orderedOp.depend_type_val(); 538 bool isDependSource = dependType == omp::ClauseDepend::dependsource; 539 unsigned numLoops = *orderedOp.num_loops_val(); 540 SmallVector<llvm::Value *> vecValues = 541 moduleTranslation.lookupValues(orderedOp.depend_vec_vars()); 542 543 size_t indexVecValues = 0; 544 while (indexVecValues < vecValues.size()) { 545 SmallVector<llvm::Value *> storeValues; 546 storeValues.reserve(numLoops); 547 for (unsigned i = 0; i < numLoops; i++) { 548 storeValues.push_back(vecValues[indexVecValues]); 549 indexVecValues++; 550 } 551 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 552 findAllocaInsertPoint(builder, moduleTranslation); 553 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 554 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend( 555 ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource)); 556 } 557 return success(); 558 } 559 560 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using 561 /// OpenMPIRBuilder. 562 static LogicalResult 563 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, 564 LLVM::ModuleTranslation &moduleTranslation) { 565 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 566 auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst); 567 568 // TODO: The code generation for ordered simd directive is not supported yet. 569 if (orderedRegionOp.simd()) 570 return failure(); 571 572 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 573 // relying on captured variables. 574 LogicalResult bodyGenStatus = success(); 575 576 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { 577 // OrderedOp has only one region associated with it. 578 auto ®ion = cast<omp::OrderedRegionOp>(opInst).getRegion(); 579 builder.restoreIP(codeGenIP); 580 convertOmpOpRegions(region, "omp.ordered.region", builder, 581 moduleTranslation, bodyGenStatus); 582 }; 583 584 // TODO: Perform finalization actions for variables. This has to be 585 // called for variables which have destructors/finalizers. 586 auto finiCB = [&](InsertPointTy codeGenIP) {}; 587 588 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 589 builder.restoreIP( 590 moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd( 591 ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd())); 592 return bodyGenStatus; 593 } 594 595 static LogicalResult 596 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, 597 LLVM::ModuleTranslation &moduleTranslation) { 598 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 599 using StorableBodyGenCallbackTy = 600 llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 601 602 auto sectionsOp = cast<omp::SectionsOp>(opInst); 603 604 // TODO: Support the following clauses: private, firstprivate, lastprivate, 605 // reduction, allocate 606 if (!sectionsOp.reduction_vars().empty() || sectionsOp.reductions() || 607 !sectionsOp.allocate_vars().empty() || 608 !sectionsOp.allocators_vars().empty()) 609 return emitError(sectionsOp.getLoc()) 610 << "reduction and allocate clauses are not supported for sections " 611 "construct"; 612 613 LogicalResult bodyGenStatus = success(); 614 SmallVector<StorableBodyGenCallbackTy> sectionCBs; 615 616 for (Operation &op : *sectionsOp.region().begin()) { 617 auto sectionOp = dyn_cast<omp::SectionOp>(op); 618 if (!sectionOp) // omp.terminator 619 continue; 620 621 Region ®ion = sectionOp.region(); 622 auto sectionCB = [®ion, &builder, &moduleTranslation, &bodyGenStatus]( 623 InsertPointTy allocaIP, InsertPointTy codeGenIP) { 624 builder.restoreIP(codeGenIP); 625 convertOmpOpRegions(region, "omp.section.region", builder, 626 moduleTranslation, bodyGenStatus); 627 }; 628 sectionCBs.push_back(sectionCB); 629 } 630 631 // No sections within omp.sections operation - skip generation. This situation 632 // is only possible if there is only a terminator operation inside the 633 // sections operation 634 if (sectionCBs.empty()) 635 return success(); 636 637 assert(isa<omp::SectionOp>(*sectionsOp.region().op_begin())); 638 639 // TODO: Perform appropriate actions according to the data-sharing 640 // attribute (shared, private, firstprivate, ...) of variables. 641 // Currently defaults to shared. 642 auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &, 643 llvm::Value &vPtr, 644 llvm::Value *&replacementValue) -> InsertPointTy { 645 replacementValue = &vPtr; 646 return codeGenIP; 647 }; 648 649 // TODO: Perform finalization actions for variables. This has to be 650 // called for variables which have destructors/finalizers. 651 auto finiCB = [&](InsertPointTy codeGenIP) {}; 652 653 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 654 findAllocaInsertPoint(builder, moduleTranslation); 655 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 656 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections( 657 ompLoc, allocaIP, sectionCBs, privCB, finiCB, false, 658 sectionsOp.nowait())); 659 return bodyGenStatus; 660 } 661 662 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder. 663 static LogicalResult 664 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, 665 LLVM::ModuleTranslation &moduleTranslation) { 666 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 667 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 668 LogicalResult bodyGenStatus = success(); 669 auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { 670 builder.restoreIP(codegenIP); 671 convertOmpOpRegions(singleOp.region(), "omp.single.region", builder, 672 moduleTranslation, bodyGenStatus); 673 }; 674 auto finiCB = [&](InsertPointTy codeGenIP) {}; 675 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle( 676 ompLoc, bodyCB, finiCB, singleOp.nowait(), /*DidIt=*/nullptr)); 677 return bodyGenStatus; 678 } 679 680 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder. 681 static LogicalResult 682 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, 683 LLVM::ModuleTranslation &moduleTranslation) { 684 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 685 LogicalResult bodyGenStatus = success(); 686 if (taskOp.if_expr() || taskOp.final_expr() || taskOp.untiedAttr() || 687 taskOp.mergeableAttr() || taskOp.in_reductions() || taskOp.priority() || 688 !taskOp.allocate_vars().empty()) { 689 return taskOp.emitError("unhandled clauses for translation to LLVM IR"); 690 } 691 auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { 692 builder.restoreIP(codegenIP); 693 convertOmpOpRegions(taskOp.region(), "omp.task.region", builder, 694 moduleTranslation, bodyGenStatus); 695 }; 696 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 697 findAllocaInsertPoint(builder, moduleTranslation); 698 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 699 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTask( 700 ompLoc, allocaIP, bodyCB, !taskOp.untied())); 701 return bodyGenStatus; 702 } 703 704 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. 705 static LogicalResult 706 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, 707 LLVM::ModuleTranslation &moduleTranslation) { 708 auto loop = cast<omp::WsLoopOp>(opInst); 709 // TODO: this should be in the op verifier instead. 710 if (loop.lowerBound().empty()) 711 return failure(); 712 713 // Static is the default. 714 auto schedule = loop.schedule_val().value_or(omp::ClauseScheduleKind::Static); 715 716 // Find the loop configuration. 717 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]); 718 llvm::Type *ivType = step->getType(); 719 llvm::Value *chunk = nullptr; 720 if (loop.schedule_chunk_var()) { 721 llvm::Value *chunkVar = 722 moduleTranslation.lookupValue(loop.schedule_chunk_var()); 723 chunk = builder.CreateSExtOrTrunc(chunkVar, ivType); 724 } 725 726 SmallVector<omp::ReductionDeclareOp> reductionDecls; 727 collectReductionDecls(loop, reductionDecls); 728 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 729 findAllocaInsertPoint(builder, moduleTranslation); 730 731 // Allocate space for privatized reduction variables. 732 SmallVector<llvm::Value *> privateReductionVariables; 733 DenseMap<Value, llvm::Value *> reductionVariableMap; 734 unsigned numReductions = loop.getNumReductionVars(); 735 privateReductionVariables.reserve(numReductions); 736 if (numReductions != 0) { 737 llvm::IRBuilderBase::InsertPointGuard guard(builder); 738 builder.restoreIP(allocaIP); 739 for (unsigned i = 0; i < numReductions; ++i) { 740 auto reductionType = 741 loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>(); 742 llvm::Value *var = builder.CreateAlloca( 743 moduleTranslation.convertType(reductionType.getElementType())); 744 privateReductionVariables.push_back(var); 745 reductionVariableMap.try_emplace(loop.reduction_vars()[i], var); 746 } 747 } 748 749 // Store the mapping between reduction variables and their private copies on 750 // ModuleTranslation stack. It can be then recovered when translating 751 // omp.reduce operations in a separate call. 752 LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard( 753 moduleTranslation, reductionVariableMap); 754 755 // Before the loop, store the initial values of reductions into reduction 756 // variables. Although this could be done after allocas, we don't want to mess 757 // up with the alloca insertion point. 758 for (unsigned i = 0; i < numReductions; ++i) { 759 SmallVector<llvm::Value *> phis; 760 if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(), 761 "omp.reduction.neutral", builder, 762 moduleTranslation, &phis))) 763 return failure(); 764 assert(phis.size() == 1 && "expected one value to be yielded from the " 765 "reduction neutral element declaration region"); 766 builder.CreateStore(phis[0], privateReductionVariables[i]); 767 } 768 769 // Set up the source location value for OpenMP runtime. 770 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 771 772 // Generator of the canonical loop body. 773 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 774 // relying on captured variables. 775 SmallVector<llvm::CanonicalLoopInfo *> loopInfos; 776 SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints; 777 LogicalResult bodyGenStatus = success(); 778 auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { 779 // Make sure further conversions know about the induction variable. 780 moduleTranslation.mapValue( 781 loop.getRegion().front().getArgument(loopInfos.size()), iv); 782 783 // Capture the body insertion point for use in nested loops. BodyIP of the 784 // CanonicalLoopInfo always points to the beginning of the entry block of 785 // the body. 786 bodyInsertPoints.push_back(ip); 787 788 if (loopInfos.size() != loop.getNumLoops() - 1) 789 return; 790 791 // Convert the body of the loop. 792 builder.restoreIP(ip); 793 convertOmpOpRegions(loop.region(), "omp.wsloop.region", builder, 794 moduleTranslation, bodyGenStatus); 795 }; 796 797 // Delegate actual loop construction to the OpenMP IRBuilder. 798 // TODO: this currently assumes WsLoop is semantically similar to SCF loop, 799 // i.e. it has a positive step, uses signed integer semantics. Reconsider 800 // this code when WsLoop clearly supports more cases. 801 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 802 for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) { 803 llvm::Value *lowerBound = 804 moduleTranslation.lookupValue(loop.lowerBound()[i]); 805 llvm::Value *upperBound = 806 moduleTranslation.lookupValue(loop.upperBound()[i]); 807 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]); 808 809 // Make sure loop trip count are emitted in the preheader of the outermost 810 // loop at the latest so that they are all available for the new collapsed 811 // loop will be created below. 812 llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; 813 llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; 814 if (i != 0) { 815 loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back()); 816 computeIP = loopInfos.front()->getPreheaderIP(); 817 } 818 loopInfos.push_back(ompBuilder->createCanonicalLoop( 819 loc, bodyGen, lowerBound, upperBound, step, 820 /*IsSigned=*/true, loop.inclusive(), computeIP)); 821 822 if (failed(bodyGenStatus)) 823 return failure(); 824 } 825 826 // Collapse loops. Store the insertion point because LoopInfos may get 827 // invalidated. 828 llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); 829 llvm::CanonicalLoopInfo *loopInfo = 830 ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); 831 832 allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 833 834 // TODO: Handle doacross loops when the ordered clause has a parameter. 835 bool isOrdered = loop.ordered_val().has_value(); 836 Optional<omp::ScheduleModifier> scheduleModifier = loop.schedule_modifier(); 837 bool isSimd = loop.simd_modifier(); 838 839 ompBuilder->applyWorkshareLoop( 840 ompLoc.DL, loopInfo, allocaIP, !loop.nowait(), 841 convertToScheduleKind(schedule), chunk, isSimd, 842 scheduleModifier == omp::ScheduleModifier::monotonic, 843 scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered); 844 845 // Continue building IR after the loop. Note that the LoopInfo returned by 846 // `collapseLoops` points inside the outermost loop and is intended for 847 // potential further loop transformations. Use the insertion point stored 848 // before collapsing loops instead. 849 builder.restoreIP(afterIP); 850 851 // Process the reductions if required. 852 if (numReductions == 0) 853 return success(); 854 855 // Create the reduction generators. We need to own them here because 856 // ReductionInfo only accepts references to the generators. 857 SmallVector<OwningReductionGen> owningReductionGens; 858 SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens; 859 for (unsigned i = 0; i < numReductions; ++i) { 860 owningReductionGens.push_back( 861 makeReductionGen(reductionDecls[i], builder, moduleTranslation)); 862 owningAtomicReductionGens.push_back( 863 makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation)); 864 } 865 866 // Collect the reduction information. 867 SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos; 868 reductionInfos.reserve(numReductions); 869 for (unsigned i = 0; i < numReductions; ++i) { 870 llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr; 871 if (owningAtomicReductionGens[i]) 872 atomicGen = owningAtomicReductionGens[i]; 873 auto reductionType = 874 loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>(); 875 llvm::Value *variable = 876 moduleTranslation.lookupValue(loop.reduction_vars()[i]); 877 reductionInfos.push_back( 878 {moduleTranslation.convertType(reductionType.getElementType()), 879 variable, privateReductionVariables[i], owningReductionGens[i], 880 atomicGen}); 881 } 882 883 // The call to createReductions below expects the block to have a 884 // terminator. Create an unreachable instruction to serve as terminator 885 // and remove it later. 886 llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); 887 builder.SetInsertPoint(tempTerminator); 888 llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = 889 ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, 890 loop.nowait()); 891 if (!contInsertPoint.getBlock()) 892 return loop->emitOpError() << "failed to convert reductions"; 893 auto nextInsertionPoint = 894 ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for); 895 tempTerminator->eraseFromParent(); 896 builder.restoreIP(nextInsertionPoint); 897 898 return success(); 899 } 900 901 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder. 902 static LogicalResult 903 convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder, 904 LLVM::ModuleTranslation &moduleTranslation) { 905 auto loop = cast<omp::SimdLoopOp>(opInst); 906 907 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 908 909 // Generator of the canonical loop body. 910 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 911 // relying on captured variables. 912 SmallVector<llvm::CanonicalLoopInfo *> loopInfos; 913 SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints; 914 LogicalResult bodyGenStatus = success(); 915 916 // TODO: The code generation for if clause is not supported yet. 917 if (loop.if_expr()) 918 return failure(); 919 920 auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { 921 // Make sure further conversions know about the induction variable. 922 moduleTranslation.mapValue( 923 loop.getRegion().front().getArgument(loopInfos.size()), iv); 924 925 // Capture the body insertion point for use in nested loops. BodyIP of the 926 // CanonicalLoopInfo always points to the beginning of the entry block of 927 // the body. 928 bodyInsertPoints.push_back(ip); 929 930 if (loopInfos.size() != loop.getNumLoops() - 1) 931 return; 932 933 // Convert the body of the loop. 934 builder.restoreIP(ip); 935 convertOmpOpRegions(loop.region(), "omp.simdloop.region", builder, 936 moduleTranslation, bodyGenStatus); 937 }; 938 939 // Delegate actual loop construction to the OpenMP IRBuilder. 940 // TODO: this currently assumes SimdLoop is semantically similar to SCF loop, 941 // i.e. it has a positive step, uses signed integer semantics. Reconsider 942 // this code when SimdLoop clearly supports more cases. 943 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 944 for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) { 945 llvm::Value *lowerBound = 946 moduleTranslation.lookupValue(loop.lowerBound()[i]); 947 llvm::Value *upperBound = 948 moduleTranslation.lookupValue(loop.upperBound()[i]); 949 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]); 950 951 // Make sure loop trip count are emitted in the preheader of the outermost 952 // loop at the latest so that they are all available for the new collapsed 953 // loop will be created below. 954 llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; 955 llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; 956 if (i != 0) { 957 loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(), 958 ompLoc.DL); 959 computeIP = loopInfos.front()->getPreheaderIP(); 960 } 961 loopInfos.push_back(ompBuilder->createCanonicalLoop( 962 loc, bodyGen, lowerBound, upperBound, step, 963 /*IsSigned=*/true, /*Inclusive=*/true, computeIP)); 964 965 if (failed(bodyGenStatus)) 966 return failure(); 967 } 968 969 // Collapse loops. 970 llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); 971 llvm::CanonicalLoopInfo *loopInfo = 972 ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); 973 974 ompBuilder->applySimd(loopInfo, nullptr); 975 976 builder.restoreIP(afterIP); 977 return success(); 978 } 979 980 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. 981 llvm::AtomicOrdering 982 convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) { 983 if (!ao) 984 return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering 985 986 switch (*ao) { 987 case omp::ClauseMemoryOrderKind::Seq_cst: 988 return llvm::AtomicOrdering::SequentiallyConsistent; 989 case omp::ClauseMemoryOrderKind::Acq_rel: 990 return llvm::AtomicOrdering::AcquireRelease; 991 case omp::ClauseMemoryOrderKind::Acquire: 992 return llvm::AtomicOrdering::Acquire; 993 case omp::ClauseMemoryOrderKind::Release: 994 return llvm::AtomicOrdering::Release; 995 case omp::ClauseMemoryOrderKind::Relaxed: 996 return llvm::AtomicOrdering::Monotonic; 997 } 998 llvm_unreachable("Unknown ClauseMemoryOrderKind kind"); 999 } 1000 1001 /// Convert omp.atomic.read operation to LLVM IR. 1002 static LogicalResult 1003 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, 1004 LLVM::ModuleTranslation &moduleTranslation) { 1005 1006 auto readOp = cast<omp::AtomicReadOp>(opInst); 1007 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1008 1009 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1010 1011 llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.memory_order_val()); 1012 llvm::Value *x = moduleTranslation.lookupValue(readOp.x()); 1013 Type xTy = readOp.x().getType().cast<omp::PointerLikeType>().getElementType(); 1014 llvm::Value *v = moduleTranslation.lookupValue(readOp.v()); 1015 Type vTy = readOp.v().getType().cast<omp::PointerLikeType>().getElementType(); 1016 llvm::OpenMPIRBuilder::AtomicOpValue V = { 1017 v, moduleTranslation.convertType(vTy), false, false}; 1018 llvm::OpenMPIRBuilder::AtomicOpValue X = { 1019 x, moduleTranslation.convertType(xTy), false, false}; 1020 builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO)); 1021 return success(); 1022 } 1023 1024 /// Converts an omp.atomic.write operation to LLVM IR. 1025 static LogicalResult 1026 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, 1027 LLVM::ModuleTranslation &moduleTranslation) { 1028 auto writeOp = cast<omp::AtomicWriteOp>(opInst); 1029 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1030 1031 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1032 llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.memory_order_val()); 1033 llvm::Value *expr = moduleTranslation.lookupValue(writeOp.value()); 1034 llvm::Value *dest = moduleTranslation.lookupValue(writeOp.address()); 1035 llvm::Type *ty = moduleTranslation.convertType(writeOp.value().getType()); 1036 llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false, 1037 /*isVolatile=*/false}; 1038 builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao)); 1039 return success(); 1040 } 1041 1042 /// Converts an LLVM dialect binary operation to the corresponding enum value 1043 /// for `atomicrmw` supported binary operation. 1044 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) { 1045 return llvm::TypeSwitch<Operation *, llvm::AtomicRMWInst::BinOp>(&op) 1046 .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; }) 1047 .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; }) 1048 .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; }) 1049 .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; }) 1050 .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; }) 1051 .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; }) 1052 .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; }) 1053 .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; }) 1054 .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; }) 1055 .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP); 1056 } 1057 1058 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder. 1059 static LogicalResult 1060 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, 1061 llvm::IRBuilderBase &builder, 1062 LLVM::ModuleTranslation &moduleTranslation) { 1063 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1064 1065 // Convert values and types. 1066 auto &innerOpList = opInst.region().front().getOperations(); 1067 if (innerOpList.size() != 2) 1068 return opInst.emitError("exactly two operations are allowed inside an " 1069 "atomic update region while lowering to LLVM IR"); 1070 1071 Operation &innerUpdateOp = innerOpList.front(); 1072 1073 if (innerUpdateOp.getNumOperands() != 2 || 1074 !llvm::is_contained(innerUpdateOp.getOperands(), 1075 opInst.getRegion().getArgument(0))) 1076 return opInst.emitError( 1077 "the update operation inside the region must be a binary operation and " 1078 "that update operation must have the region argument as an operand"); 1079 1080 llvm::AtomicRMWInst::BinOp binop = convertBinOpToAtomic(innerUpdateOp); 1081 1082 bool isXBinopExpr = 1083 innerUpdateOp.getNumOperands() > 0 && 1084 innerUpdateOp.getOperand(0) == opInst.getRegion().getArgument(0); 1085 1086 mlir::Value mlirExpr = (isXBinopExpr ? innerUpdateOp.getOperand(1) 1087 : innerUpdateOp.getOperand(0)); 1088 llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr); 1089 llvm::Value *llvmX = moduleTranslation.lookupValue(opInst.x()); 1090 LLVM::LLVMPointerType mlirXType = 1091 opInst.x().getType().cast<LLVM::LLVMPointerType>(); 1092 llvm::Type *llvmXElementType = 1093 moduleTranslation.convertType(mlirXType.getElementType()); 1094 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType, 1095 /*isSigned=*/false, 1096 /*isVolatile=*/false}; 1097 1098 llvm::AtomicOrdering atomicOrdering = 1099 convertAtomicOrdering(opInst.memory_order_val()); 1100 1101 // Generate update code. 1102 LogicalResult updateGenStatus = success(); 1103 auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus]( 1104 llvm::Value *atomicx, 1105 llvm::IRBuilder<> &builder) -> llvm::Value * { 1106 Block &bb = *opInst.region().begin(); 1107 moduleTranslation.mapValue(*opInst.region().args_begin(), atomicx); 1108 moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); 1109 if (failed(moduleTranslation.convertBlock(bb, true, builder))) { 1110 updateGenStatus = (opInst.emitError() 1111 << "unable to convert update operation to llvm IR"); 1112 return nullptr; 1113 } 1114 omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator()); 1115 assert(yieldop && yieldop.results().size() == 1 && 1116 "terminator must be omp.yield op and it must have exactly one " 1117 "argument"); 1118 return moduleTranslation.lookupValue(yieldop.results()[0]); 1119 }; 1120 1121 // Handle ambiguous alloca, if any. 1122 auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 1123 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1124 builder.restoreIP(ompBuilder->createAtomicUpdate( 1125 ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop, updateFn, 1126 isXBinopExpr)); 1127 return updateGenStatus; 1128 } 1129 1130 static LogicalResult 1131 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, 1132 llvm::IRBuilderBase &builder, 1133 LLVM::ModuleTranslation &moduleTranslation) { 1134 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1135 mlir::Value mlirExpr; 1136 bool isXBinopExpr = false, isPostfixUpdate = false; 1137 llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP; 1138 1139 omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp(); 1140 omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp(); 1141 1142 assert((atomicUpdateOp || atomicWriteOp) && 1143 "internal op must be an atomic.update or atomic.write op"); 1144 1145 if (atomicWriteOp) { 1146 isPostfixUpdate = true; 1147 mlirExpr = atomicWriteOp.value(); 1148 } else { 1149 isPostfixUpdate = atomicCaptureOp.getSecondOp() == 1150 atomicCaptureOp.getAtomicUpdateOp().getOperation(); 1151 auto &innerOpList = atomicUpdateOp.region().front().getOperations(); 1152 if (innerOpList.size() != 2) 1153 return atomicUpdateOp.emitError( 1154 "exactly two operations are allowed inside an " 1155 "atomic update region while lowering to LLVM IR"); 1156 Operation *innerUpdateOp = atomicUpdateOp.getFirstOp(); 1157 if (innerUpdateOp->getNumOperands() != 2 || 1158 !llvm::is_contained(innerUpdateOp->getOperands(), 1159 atomicUpdateOp.getRegion().getArgument(0))) 1160 return atomicUpdateOp.emitError( 1161 "the update operation inside the region must be a binary operation " 1162 "and that update operation must have the region argument as an " 1163 "operand"); 1164 binop = convertBinOpToAtomic(*innerUpdateOp); 1165 1166 isXBinopExpr = innerUpdateOp->getOperand(0) == 1167 atomicUpdateOp.getRegion().getArgument(0); 1168 1169 mlirExpr = (isXBinopExpr ? innerUpdateOp->getOperand(1) 1170 : innerUpdateOp->getOperand(0)); 1171 } 1172 1173 llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr); 1174 llvm::Value *llvmX = 1175 moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().x()); 1176 llvm::Value *llvmV = 1177 moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().v()); 1178 auto mlirXType = atomicCaptureOp.getAtomicReadOp() 1179 .x() 1180 .getType() 1181 .cast<LLVM::LLVMPointerType>(); 1182 llvm::Type *llvmXElementType = 1183 moduleTranslation.convertType(mlirXType.getElementType()); 1184 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType, 1185 /*isSigned=*/false, 1186 /*isVolatile=*/false}; 1187 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType, 1188 /*isSigned=*/false, 1189 /*isVolatile=*/false}; 1190 1191 llvm::AtomicOrdering atomicOrdering = 1192 convertAtomicOrdering(atomicCaptureOp.memory_order_val()); 1193 1194 LogicalResult updateGenStatus = success(); 1195 auto updateFn = [&](llvm::Value *atomicx, 1196 llvm::IRBuilder<> &builder) -> llvm::Value * { 1197 if (atomicWriteOp) 1198 return moduleTranslation.lookupValue(atomicWriteOp.value()); 1199 Block &bb = *atomicUpdateOp.region().begin(); 1200 moduleTranslation.mapValue(*atomicUpdateOp.region().args_begin(), atomicx); 1201 moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); 1202 if (failed(moduleTranslation.convertBlock(bb, true, builder))) { 1203 updateGenStatus = (atomicUpdateOp.emitError() 1204 << "unable to convert update operation to llvm IR"); 1205 return nullptr; 1206 } 1207 omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator()); 1208 assert(yieldop && yieldop.results().size() == 1 && 1209 "terminator must be omp.yield op and it must have exactly one " 1210 "argument"); 1211 return moduleTranslation.lookupValue(yieldop.results()[0]); 1212 }; 1213 1214 // Handle ambiguous alloca, if any. 1215 auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 1216 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1217 builder.restoreIP(ompBuilder->createAtomicCapture( 1218 ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering, 1219 binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr)); 1220 return updateGenStatus; 1221 } 1222 1223 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the 1224 /// mapping between reduction variables and their private equivalents to have 1225 /// been stored on the ModuleTranslation stack. Currently only supports 1226 /// reduction within WsLoopOp, but can be easily extended. 1227 static LogicalResult 1228 convertOmpReductionOp(omp::ReductionOp reductionOp, 1229 llvm::IRBuilderBase &builder, 1230 LLVM::ModuleTranslation &moduleTranslation) { 1231 // Find the declaration that corresponds to the reduction op. 1232 auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>(); 1233 omp::ReductionDeclareOp declaration = 1234 findReductionDecl(reductionContainer, reductionOp); 1235 assert(declaration && "could not find reduction declaration"); 1236 1237 // Retrieve the mapping between reduction variables and their private 1238 // equivalents. 1239 const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr; 1240 moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>( 1241 [&](const OpenMPVarMappingStackFrame &frame) { 1242 reductionVariableMap = &frame.mapping; 1243 return WalkResult::interrupt(); 1244 }); 1245 assert(reductionVariableMap && "couldn't find private reduction variables"); 1246 1247 // Translate the reduction operation by emitting the body of the corresponding 1248 // reduction declaration. 1249 Region &reductionRegion = declaration.reductionRegion(); 1250 llvm::Value *privateReductionVar = 1251 reductionVariableMap->lookup(reductionOp.accumulator()); 1252 llvm::Value *reductionVal = builder.CreateLoad( 1253 moduleTranslation.convertType(reductionOp.operand().getType()), 1254 privateReductionVar); 1255 1256 moduleTranslation.mapValue(reductionRegion.front().getArgument(0), 1257 reductionVal); 1258 moduleTranslation.mapValue( 1259 reductionRegion.front().getArgument(1), 1260 moduleTranslation.lookupValue(reductionOp.operand())); 1261 1262 SmallVector<llvm::Value *> phis; 1263 if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body", 1264 builder, moduleTranslation, &phis))) 1265 return failure(); 1266 assert(phis.size() == 1 && "expected one value to be yielded from " 1267 "the reduction body declaration region"); 1268 builder.CreateStore(phis[0], privateReductionVar); 1269 return success(); 1270 } 1271 1272 /// Converts an OpenMP Threadprivate operation into LLVM IR using 1273 /// OpenMPIRBuilder. 1274 static LogicalResult 1275 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, 1276 LLVM::ModuleTranslation &moduleTranslation) { 1277 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1278 auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst); 1279 1280 Value symAddr = threadprivateOp.sym_addr(); 1281 auto *symOp = symAddr.getDefiningOp(); 1282 if (!isa<LLVM::AddressOfOp>(symOp)) 1283 return opInst.emitError("Addressing symbol not found"); 1284 LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp); 1285 1286 LLVM::GlobalOp global = addressOfOp.getGlobal(); 1287 llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global); 1288 llvm::Value *data = 1289 builder.CreateBitCast(globalValue, builder.getInt8PtrTy()); 1290 llvm::Type *type = globalValue->getValueType(); 1291 llvm::TypeSize typeSize = 1292 builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize( 1293 type); 1294 llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedSize()); 1295 llvm::StringRef suffix = llvm::StringRef(".cache", 6); 1296 std::string cacheName = (Twine(global.getSymName()).concat(suffix)).str(); 1297 // Emit runtime function and bitcast its type (i8*) to real data type. 1298 llvm::Value *callInst = 1299 moduleTranslation.getOpenMPBuilder()->createCachedThreadPrivate( 1300 ompLoc, data, size, cacheName); 1301 llvm::Value *result = builder.CreateBitCast(callInst, globalValue->getType()); 1302 moduleTranslation.mapValue(opInst.getResult(0), result); 1303 return success(); 1304 } 1305 1306 namespace { 1307 1308 /// Implementation of the dialect interface that converts operations belonging 1309 /// to the OpenMP dialect to LLVM IR. 1310 class OpenMPDialectLLVMIRTranslationInterface 1311 : public LLVMTranslationDialectInterface { 1312 public: 1313 using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; 1314 1315 /// Translates the given operation to LLVM IR using the provided IR builder 1316 /// and saving the state in `moduleTranslation`. 1317 LogicalResult 1318 convertOperation(Operation *op, llvm::IRBuilderBase &builder, 1319 LLVM::ModuleTranslation &moduleTranslation) const final; 1320 }; 1321 1322 } // namespace 1323 1324 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR 1325 /// (including OpenMP runtime calls). 1326 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( 1327 Operation *op, llvm::IRBuilderBase &builder, 1328 LLVM::ModuleTranslation &moduleTranslation) const { 1329 1330 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1331 1332 return llvm::TypeSwitch<Operation *, LogicalResult>(op) 1333 .Case([&](omp::BarrierOp) { 1334 ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); 1335 return success(); 1336 }) 1337 .Case([&](omp::TaskwaitOp) { 1338 ompBuilder->createTaskwait(builder.saveIP()); 1339 return success(); 1340 }) 1341 .Case([&](omp::TaskyieldOp) { 1342 ompBuilder->createTaskyield(builder.saveIP()); 1343 return success(); 1344 }) 1345 .Case([&](omp::FlushOp) { 1346 // No support in Openmp runtime function (__kmpc_flush) to accept 1347 // the argument list. 1348 // OpenMP standard states the following: 1349 // "An implementation may implement a flush with a list by ignoring 1350 // the list, and treating it the same as a flush without a list." 1351 // 1352 // The argument list is discarded so that, flush with a list is treated 1353 // same as a flush without a list. 1354 ompBuilder->createFlush(builder.saveIP()); 1355 return success(); 1356 }) 1357 .Case([&](omp::ParallelOp op) { 1358 return convertOmpParallel(op, builder, moduleTranslation); 1359 }) 1360 .Case([&](omp::ReductionOp reductionOp) { 1361 return convertOmpReductionOp(reductionOp, builder, moduleTranslation); 1362 }) 1363 .Case([&](omp::MasterOp) { 1364 return convertOmpMaster(*op, builder, moduleTranslation); 1365 }) 1366 .Case([&](omp::CriticalOp) { 1367 return convertOmpCritical(*op, builder, moduleTranslation); 1368 }) 1369 .Case([&](omp::OrderedRegionOp) { 1370 return convertOmpOrderedRegion(*op, builder, moduleTranslation); 1371 }) 1372 .Case([&](omp::OrderedOp) { 1373 return convertOmpOrdered(*op, builder, moduleTranslation); 1374 }) 1375 .Case([&](omp::WsLoopOp) { 1376 return convertOmpWsLoop(*op, builder, moduleTranslation); 1377 }) 1378 .Case([&](omp::SimdLoopOp) { 1379 return convertOmpSimdLoop(*op, builder, moduleTranslation); 1380 }) 1381 .Case([&](omp::AtomicReadOp) { 1382 return convertOmpAtomicRead(*op, builder, moduleTranslation); 1383 }) 1384 .Case([&](omp::AtomicWriteOp) { 1385 return convertOmpAtomicWrite(*op, builder, moduleTranslation); 1386 }) 1387 .Case([&](omp::AtomicUpdateOp op) { 1388 return convertOmpAtomicUpdate(op, builder, moduleTranslation); 1389 }) 1390 .Case([&](omp::AtomicCaptureOp op) { 1391 return convertOmpAtomicCapture(op, builder, moduleTranslation); 1392 }) 1393 .Case([&](omp::SectionsOp) { 1394 return convertOmpSections(*op, builder, moduleTranslation); 1395 }) 1396 .Case([&](omp::SingleOp op) { 1397 return convertOmpSingle(op, builder, moduleTranslation); 1398 }) 1399 .Case([&](omp::TaskOp op) { 1400 return convertOmpTaskOp(op, builder, moduleTranslation); 1401 }) 1402 .Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp, 1403 omp::CriticalDeclareOp>([](auto op) { 1404 // `yield` and `terminator` can be just omitted. The block structure 1405 // was created in the region that handles their parent operation. 1406 // `reduction.declare` will be used by reductions and is not 1407 // converted directly, skip it. 1408 // `critical.declare` is only used to declare names of critical 1409 // sections which will be used by `critical` ops and hence can be 1410 // ignored for lowering. The OpenMP IRBuilder will create unique 1411 // name for critical section names. 1412 return success(); 1413 }) 1414 .Case([&](omp::ThreadprivateOp) { 1415 return convertOmpThreadprivate(*op, builder, moduleTranslation); 1416 }) 1417 .Default([&](Operation *inst) { 1418 return inst->emitError("unsupported OpenMP operation: ") 1419 << inst->getName(); 1420 }); 1421 } 1422 1423 void mlir::registerOpenMPDialectTranslation(DialectRegistry ®istry) { 1424 registry.insert<omp::OpenMPDialect>(); 1425 registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) { 1426 dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>(); 1427 }); 1428 } 1429 1430 void mlir::registerOpenMPDialectTranslation(MLIRContext &context) { 1431 DialectRegistry registry; 1432 registerOpenMPDialectTranslation(registry); 1433 context.appendDialectRegistry(registry); 1434 } 1435