1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a translation between the MLIR OpenMP dialect and LLVM 10 // IR. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" 14 #include "mlir/Dialect/OpenMP/OpenMPDialect.h" 15 #include "mlir/IR/BlockAndValueMapping.h" 16 #include "mlir/IR/Operation.h" 17 #include "mlir/Support/LLVM.h" 18 #include "mlir/Target/LLVMIR/ModuleTranslation.h" 19 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/TypeSwitch.h" 22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 23 #include "llvm/IR/DebugInfoMetadata.h" 24 #include "llvm/IR/IRBuilder.h" 25 26 using namespace mlir; 27 28 namespace { 29 static llvm::omp::ScheduleKind 30 convertToScheduleKind(Optional<omp::ClauseScheduleKind> schedKind) { 31 if (!schedKind.has_value()) 32 return llvm::omp::OMP_SCHEDULE_Default; 33 switch (schedKind.value()) { 34 case omp::ClauseScheduleKind::Static: 35 return llvm::omp::OMP_SCHEDULE_Static; 36 case omp::ClauseScheduleKind::Dynamic: 37 return llvm::omp::OMP_SCHEDULE_Dynamic; 38 case omp::ClauseScheduleKind::Guided: 39 return llvm::omp::OMP_SCHEDULE_Guided; 40 case omp::ClauseScheduleKind::Auto: 41 return llvm::omp::OMP_SCHEDULE_Auto; 42 case omp::ClauseScheduleKind::Runtime: 43 return llvm::omp::OMP_SCHEDULE_Runtime; 44 } 45 llvm_unreachable("unhandled schedule clause argument"); 46 } 47 48 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the 49 /// insertion points for allocas. 50 class OpenMPAllocaStackFrame 51 : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> { 52 public: 53 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame) 54 55 explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP) 56 : allocaInsertPoint(allocaIP) {} 57 llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; 58 }; 59 60 /// ModuleTranslation stack frame containing the partial mapping between MLIR 61 /// values and their LLVM IR equivalents. 62 class OpenMPVarMappingStackFrame 63 : public LLVM::ModuleTranslation::StackFrameBase< 64 OpenMPVarMappingStackFrame> { 65 public: 66 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPVarMappingStackFrame) 67 68 explicit OpenMPVarMappingStackFrame( 69 const DenseMap<Value, llvm::Value *> &mapping) 70 : mapping(mapping) {} 71 72 DenseMap<Value, llvm::Value *> mapping; 73 }; 74 } // namespace 75 76 /// Find the insertion point for allocas given the current insertion point for 77 /// normal operations in the builder. 78 static llvm::OpenMPIRBuilder::InsertPointTy 79 findAllocaInsertPoint(llvm::IRBuilderBase &builder, 80 const LLVM::ModuleTranslation &moduleTranslation) { 81 // If there is an alloca insertion point on stack, i.e. we are in a nested 82 // operation and a specific point was provided by some surrounding operation, 83 // use it. 84 llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; 85 WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>( 86 [&](const OpenMPAllocaStackFrame &frame) { 87 allocaInsertPoint = frame.allocaInsertPoint; 88 return WalkResult::interrupt(); 89 }); 90 if (walkResult.wasInterrupted()) 91 return allocaInsertPoint; 92 93 // Otherwise, insert to the entry block of the surrounding function. 94 // If the current IRBuilder InsertPoint is the function's entry, it cannot 95 // also be used for alloca insertion which would result in insertion order 96 // confusion. Create a new BasicBlock for the Builder and use the entry block 97 // for the allocs. 98 // TODO: Create a dedicated alloca BasicBlock at function creation such that 99 // we do not need to move the current InertPoint here. 100 if (builder.GetInsertBlock() == 101 &builder.GetInsertBlock()->getParent()->getEntryBlock()) { 102 assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() && 103 "Assuming end of basic block"); 104 llvm::BasicBlock *entryBB = llvm::BasicBlock::Create( 105 builder.getContext(), "entry", builder.GetInsertBlock()->getParent(), 106 builder.GetInsertBlock()->getNextNode()); 107 builder.CreateBr(entryBB); 108 builder.SetInsertPoint(entryBB); 109 } 110 111 llvm::BasicBlock &funcEntryBlock = 112 builder.GetInsertBlock()->getParent()->getEntryBlock(); 113 return llvm::OpenMPIRBuilder::InsertPointTy( 114 &funcEntryBlock, funcEntryBlock.getFirstInsertionPt()); 115 } 116 117 /// Converts the given region that appears within an OpenMP dialect operation to 118 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the 119 /// region, and a branch from any block with an successor-less OpenMP terminator 120 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes 121 /// of the continuation block if provided. 122 static llvm::BasicBlock *convertOmpOpRegions( 123 Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, 124 LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, 125 SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) { 126 llvm::BasicBlock *continuationBlock = 127 splitBB(builder, true, "omp.region.cont"); 128 llvm::BasicBlock *sourceBlock = builder.GetInsertBlock(); 129 130 llvm::LLVMContext &llvmContext = builder.getContext(); 131 for (Block &bb : region) { 132 llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create( 133 llvmContext, blockName, builder.GetInsertBlock()->getParent(), 134 builder.GetInsertBlock()->getNextNode()); 135 moduleTranslation.mapBlock(&bb, llvmBB); 136 } 137 138 llvm::Instruction *sourceTerminator = sourceBlock->getTerminator(); 139 140 // Terminators (namely YieldOp) may be forwarding values to the region that 141 // need to be available in the continuation block. Collect the types of these 142 // operands in preparation of creating PHI nodes. 143 SmallVector<llvm::Type *> continuationBlockPHITypes; 144 bool operandsProcessed = false; 145 unsigned numYields = 0; 146 for (Block &bb : region.getBlocks()) { 147 if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) { 148 if (!operandsProcessed) { 149 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { 150 continuationBlockPHITypes.push_back( 151 moduleTranslation.convertType(yield->getOperand(i).getType())); 152 } 153 operandsProcessed = true; 154 } else { 155 assert(continuationBlockPHITypes.size() == yield->getNumOperands() && 156 "mismatching number of values yielded from the region"); 157 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { 158 llvm::Type *operandType = 159 moduleTranslation.convertType(yield->getOperand(i).getType()); 160 (void)operandType; 161 assert(continuationBlockPHITypes[i] == operandType && 162 "values of mismatching types yielded from the region"); 163 } 164 } 165 numYields++; 166 } 167 } 168 169 // Insert PHI nodes in the continuation block for any values forwarded by the 170 // terminators in this region. 171 if (!continuationBlockPHITypes.empty()) 172 assert( 173 continuationBlockPHIs && 174 "expected continuation block PHIs if converted regions yield values"); 175 if (continuationBlockPHIs) { 176 llvm::IRBuilderBase::InsertPointGuard guard(builder); 177 continuationBlockPHIs->reserve(continuationBlockPHITypes.size()); 178 builder.SetInsertPoint(continuationBlock, continuationBlock->begin()); 179 for (llvm::Type *ty : continuationBlockPHITypes) 180 continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields)); 181 } 182 183 // Convert blocks one by one in topological order to ensure 184 // defs are converted before uses. 185 SetVector<Block *> blocks = 186 LLVM::detail::getTopologicallySortedBlocks(region); 187 for (Block *bb : blocks) { 188 llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb); 189 // Retarget the branch of the entry block to the entry block of the 190 // converted region (regions are single-entry). 191 if (bb->isEntryBlock()) { 192 assert(sourceTerminator->getNumSuccessors() == 1 && 193 "provided entry block has multiple successors"); 194 assert(sourceTerminator->getSuccessor(0) == continuationBlock && 195 "ContinuationBlock is not the successor of the entry block"); 196 sourceTerminator->setSuccessor(0, llvmBB); 197 } 198 199 llvm::IRBuilderBase::InsertPointGuard guard(builder); 200 if (failed( 201 moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) { 202 bodyGenStatus = failure(); 203 return continuationBlock; 204 } 205 206 // Special handling for `omp.yield` and `omp.terminator` (we may have more 207 // than one): they return the control to the parent OpenMP dialect operation 208 // so replace them with the branch to the continuation block. We handle this 209 // here to avoid relying inter-function communication through the 210 // ModuleTranslation class to set up the correct insertion point. This is 211 // also consistent with MLIR's idiom of handling special region terminators 212 // in the same code that handles the region-owning operation. 213 Operation *terminator = bb->getTerminator(); 214 if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) { 215 builder.CreateBr(continuationBlock); 216 217 for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i) 218 (*continuationBlockPHIs)[i]->addIncoming( 219 moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB); 220 } 221 } 222 // After all blocks have been traversed and values mapped, connect the PHI 223 // nodes to the results of preceding blocks. 224 LLVM::detail::connectPHINodes(region, moduleTranslation); 225 226 // Remove the blocks and values defined in this region from the mapping since 227 // they are not visible outside of this region. This allows the same region to 228 // be converted several times, that is cloned, without clashes, and slightly 229 // speeds up the lookups. 230 moduleTranslation.forgetMapping(region); 231 232 return continuationBlock; 233 } 234 235 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum. 236 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) { 237 switch (kind) { 238 case omp::ClauseProcBindKind::Close: 239 return llvm::omp::ProcBindKind::OMP_PROC_BIND_close; 240 case omp::ClauseProcBindKind::Master: 241 return llvm::omp::ProcBindKind::OMP_PROC_BIND_master; 242 case omp::ClauseProcBindKind::Primary: 243 return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary; 244 case omp::ClauseProcBindKind::Spread: 245 return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread; 246 } 247 llvm_unreachable("Unknown ClauseProcBindKind kind"); 248 } 249 250 /// Converts the OpenMP parallel operation to LLVM IR. 251 static LogicalResult 252 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, 253 LLVM::ModuleTranslation &moduleTranslation) { 254 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 255 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 256 // relying on captured variables. 257 LogicalResult bodyGenStatus = success(); 258 259 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { 260 // Save the alloca insertion point on ModuleTranslation stack for use in 261 // nested regions. 262 LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( 263 moduleTranslation, allocaIP); 264 265 // ParallelOp has only one region associated with it. 266 builder.restoreIP(codeGenIP); 267 convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder, 268 moduleTranslation, bodyGenStatus); 269 }; 270 271 // TODO: Perform appropriate actions according to the data-sharing 272 // attribute (shared, private, firstprivate, ...) of variables. 273 // Currently defaults to shared. 274 auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 275 llvm::Value &, llvm::Value &vPtr, 276 llvm::Value *&replacementValue) -> InsertPointTy { 277 replacementValue = &vPtr; 278 279 return codeGenIP; 280 }; 281 282 // TODO: Perform finalization actions for variables. This has to be 283 // called for variables which have destructors/finalizers. 284 auto finiCB = [&](InsertPointTy codeGenIP) {}; 285 286 llvm::Value *ifCond = nullptr; 287 if (auto ifExprVar = opInst.if_expr_var()) 288 ifCond = moduleTranslation.lookupValue(ifExprVar); 289 llvm::Value *numThreads = nullptr; 290 if (auto numThreadsVar = opInst.num_threads_var()) 291 numThreads = moduleTranslation.lookupValue(numThreadsVar); 292 auto pbKind = llvm::omp::OMP_PROC_BIND_default; 293 if (auto bind = opInst.proc_bind_val()) 294 pbKind = getProcBindKind(*bind); 295 // TODO: Is the Parallel construct cancellable? 296 bool isCancellable = false; 297 298 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 299 findAllocaInsertPoint(builder, moduleTranslation); 300 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 301 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel( 302 ompLoc, allocaIP, bodyGenCB, privCB, finiCB, ifCond, numThreads, pbKind, 303 isCancellable)); 304 305 return bodyGenStatus; 306 } 307 308 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder. 309 static LogicalResult 310 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, 311 LLVM::ModuleTranslation &moduleTranslation) { 312 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 313 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 314 // relying on captured variables. 315 LogicalResult bodyGenStatus = success(); 316 317 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { 318 // MasterOp has only one region associated with it. 319 auto ®ion = cast<omp::MasterOp>(opInst).getRegion(); 320 builder.restoreIP(codeGenIP); 321 convertOmpOpRegions(region, "omp.master.region", builder, moduleTranslation, 322 bodyGenStatus); 323 }; 324 325 // TODO: Perform finalization actions for variables. This has to be 326 // called for variables which have destructors/finalizers. 327 auto finiCB = [&](InsertPointTy codeGenIP) {}; 328 329 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 330 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster( 331 ompLoc, bodyGenCB, finiCB)); 332 return success(); 333 } 334 335 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder. 336 static LogicalResult 337 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, 338 LLVM::ModuleTranslation &moduleTranslation) { 339 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 340 auto criticalOp = cast<omp::CriticalOp>(opInst); 341 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 342 // relying on captured variables. 343 LogicalResult bodyGenStatus = success(); 344 345 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { 346 // CriticalOp has only one region associated with it. 347 auto ®ion = cast<omp::CriticalOp>(opInst).getRegion(); 348 builder.restoreIP(codeGenIP); 349 convertOmpOpRegions(region, "omp.critical.region", builder, 350 moduleTranslation, bodyGenStatus); 351 }; 352 353 // TODO: Perform finalization actions for variables. This has to be 354 // called for variables which have destructors/finalizers. 355 auto finiCB = [&](InsertPointTy codeGenIP) {}; 356 357 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 358 llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext(); 359 llvm::Constant *hint = nullptr; 360 361 // If it has a name, it probably has a hint too. 362 if (criticalOp.nameAttr()) { 363 // The verifiers in OpenMP Dialect guarentee that all the pointers are 364 // non-null 365 auto symbolRef = criticalOp.nameAttr().cast<SymbolRefAttr>(); 366 auto criticalDeclareOp = 367 SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp, 368 symbolRef); 369 hint = 370 llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), 371 static_cast<int>(criticalDeclareOp.hint_val())); 372 } 373 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical( 374 ompLoc, bodyGenCB, finiCB, criticalOp.name().value_or(""), hint)); 375 return success(); 376 } 377 378 /// Returns a reduction declaration that corresponds to the given reduction 379 /// operation in the given container. Currently only supports reductions inside 380 /// WsLoopOp but can be easily extended. 381 static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container, 382 omp::ReductionOp reduction) { 383 SymbolRefAttr reductionSymbol; 384 for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) { 385 if (container.reduction_vars()[i] != reduction.accumulator()) 386 continue; 387 reductionSymbol = (*container.reductions())[i].cast<SymbolRefAttr>(); 388 break; 389 } 390 assert(reductionSymbol && 391 "reduction operation must be associated with a declaration"); 392 393 return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>( 394 container, reductionSymbol); 395 } 396 397 /// Populates `reductions` with reduction declarations used in the given loop. 398 static void 399 collectReductionDecls(omp::WsLoopOp loop, 400 SmallVectorImpl<omp::ReductionDeclareOp> &reductions) { 401 Optional<ArrayAttr> attr = loop.reductions(); 402 if (!attr) 403 return; 404 405 reductions.reserve(reductions.size() + loop.getNumReductionVars()); 406 for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) { 407 reductions.push_back( 408 SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>( 409 loop, symbolRef)); 410 } 411 } 412 413 /// Translates the blocks contained in the given region and appends them to at 414 /// the current insertion point of `builder`. The operations of the entry block 415 /// are appended to the current insertion block, which is not expected to have a 416 /// terminator. If set, `continuationBlockArgs` is populated with translated 417 /// values that correspond to the values omp.yield'ed from the region. 418 static LogicalResult inlineConvertOmpRegions( 419 Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, 420 LLVM::ModuleTranslation &moduleTranslation, 421 SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) { 422 if (region.empty()) 423 return success(); 424 425 // Special case for single-block regions that don't create additional blocks: 426 // insert operations without creating additional blocks. 427 if (llvm::hasSingleElement(region)) { 428 moduleTranslation.mapBlock(®ion.front(), builder.GetInsertBlock()); 429 if (failed(moduleTranslation.convertBlock( 430 region.front(), /*ignoreArguments=*/true, builder))) 431 return failure(); 432 433 // The continuation arguments are simply the translated terminator operands. 434 if (continuationBlockArgs) 435 llvm::append_range( 436 *continuationBlockArgs, 437 moduleTranslation.lookupValues(region.front().back().getOperands())); 438 439 // Drop the mapping that is no longer necessary so that the same region can 440 // be processed multiple times. 441 moduleTranslation.forgetMapping(region); 442 return success(); 443 } 444 445 LogicalResult bodyGenStatus = success(); 446 SmallVector<llvm::PHINode *> phis; 447 llvm::BasicBlock *continuationBlock = convertOmpOpRegions( 448 region, blockName, builder, moduleTranslation, bodyGenStatus, &phis); 449 if (failed(bodyGenStatus)) 450 return failure(); 451 if (continuationBlockArgs) 452 llvm::append_range(*continuationBlockArgs, phis); 453 builder.SetInsertPoint(continuationBlock, 454 continuationBlock->getFirstInsertionPt()); 455 return success(); 456 } 457 458 namespace { 459 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to 460 /// store lambdas with capture. 461 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy( 462 llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *, 463 llvm::Value *&)>; 464 using OwningAtomicReductionGen = 465 std::function<llvm::OpenMPIRBuilder::InsertPointTy( 466 llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *, 467 llvm::Value *)>; 468 } // namespace 469 470 /// Create an OpenMPIRBuilder-compatible reduction generator for the given 471 /// reduction declaration. The generator uses `builder` but ignores its 472 /// insertion point. 473 static OwningReductionGen 474 makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder, 475 LLVM::ModuleTranslation &moduleTranslation) { 476 // The lambda is mutable because we need access to non-const methods of decl 477 // (which aren't actually mutating it), and we must capture decl by-value to 478 // avoid the dangling reference after the parent function returns. 479 OwningReductionGen gen = 480 [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, 481 llvm::Value *lhs, llvm::Value *rhs, 482 llvm::Value *&result) mutable { 483 Region &reductionRegion = decl.reductionRegion(); 484 moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs); 485 moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs); 486 builder.restoreIP(insertPoint); 487 SmallVector<llvm::Value *> phis; 488 if (failed(inlineConvertOmpRegions(reductionRegion, 489 "omp.reduction.nonatomic.body", 490 builder, moduleTranslation, &phis))) 491 return llvm::OpenMPIRBuilder::InsertPointTy(); 492 assert(phis.size() == 1); 493 result = phis[0]; 494 return builder.saveIP(); 495 }; 496 return gen; 497 } 498 499 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the 500 /// given reduction declaration. The generator uses `builder` but ignores its 501 /// insertion point. Returns null if there is no atomic region available in the 502 /// reduction declaration. 503 static OwningAtomicReductionGen 504 makeAtomicReductionGen(omp::ReductionDeclareOp decl, 505 llvm::IRBuilderBase &builder, 506 LLVM::ModuleTranslation &moduleTranslation) { 507 if (decl.atomicReductionRegion().empty()) 508 return OwningAtomicReductionGen(); 509 510 // The lambda is mutable because we need access to non-const methods of decl 511 // (which aren't actually mutating it), and we must capture decl by-value to 512 // avoid the dangling reference after the parent function returns. 513 OwningAtomicReductionGen atomicGen = 514 [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *, 515 llvm::Value *lhs, llvm::Value *rhs) mutable { 516 Region &atomicRegion = decl.atomicReductionRegion(); 517 moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs); 518 moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs); 519 builder.restoreIP(insertPoint); 520 SmallVector<llvm::Value *> phis; 521 if (failed(inlineConvertOmpRegions(atomicRegion, 522 "omp.reduction.atomic.body", builder, 523 moduleTranslation, &phis))) 524 return llvm::OpenMPIRBuilder::InsertPointTy(); 525 assert(phis.empty()); 526 return builder.saveIP(); 527 }; 528 return atomicGen; 529 } 530 531 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder. 532 static LogicalResult 533 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, 534 LLVM::ModuleTranslation &moduleTranslation) { 535 auto orderedOp = cast<omp::OrderedOp>(opInst); 536 537 omp::ClauseDepend dependType = *orderedOp.depend_type_val(); 538 bool isDependSource = dependType == omp::ClauseDepend::dependsource; 539 unsigned numLoops = *orderedOp.num_loops_val(); 540 SmallVector<llvm::Value *> vecValues = 541 moduleTranslation.lookupValues(orderedOp.depend_vec_vars()); 542 543 size_t indexVecValues = 0; 544 while (indexVecValues < vecValues.size()) { 545 SmallVector<llvm::Value *> storeValues; 546 storeValues.reserve(numLoops); 547 for (unsigned i = 0; i < numLoops; i++) { 548 storeValues.push_back(vecValues[indexVecValues]); 549 indexVecValues++; 550 } 551 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 552 findAllocaInsertPoint(builder, moduleTranslation); 553 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 554 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend( 555 ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource)); 556 } 557 return success(); 558 } 559 560 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using 561 /// OpenMPIRBuilder. 562 static LogicalResult 563 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, 564 LLVM::ModuleTranslation &moduleTranslation) { 565 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 566 auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst); 567 568 // TODO: The code generation for ordered simd directive is not supported yet. 569 if (orderedRegionOp.simd()) 570 return failure(); 571 572 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 573 // relying on captured variables. 574 LogicalResult bodyGenStatus = success(); 575 576 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { 577 // OrderedOp has only one region associated with it. 578 auto ®ion = cast<omp::OrderedRegionOp>(opInst).getRegion(); 579 builder.restoreIP(codeGenIP); 580 convertOmpOpRegions(region, "omp.ordered.region", builder, 581 moduleTranslation, bodyGenStatus); 582 }; 583 584 // TODO: Perform finalization actions for variables. This has to be 585 // called for variables which have destructors/finalizers. 586 auto finiCB = [&](InsertPointTy codeGenIP) {}; 587 588 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 589 builder.restoreIP( 590 moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd( 591 ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd())); 592 return bodyGenStatus; 593 } 594 595 static LogicalResult 596 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, 597 LLVM::ModuleTranslation &moduleTranslation) { 598 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 599 using StorableBodyGenCallbackTy = 600 llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 601 602 auto sectionsOp = cast<omp::SectionsOp>(opInst); 603 604 // TODO: Support the following clauses: private, firstprivate, lastprivate, 605 // reduction, allocate 606 if (!sectionsOp.reduction_vars().empty() || sectionsOp.reductions() || 607 !sectionsOp.allocate_vars().empty() || 608 !sectionsOp.allocators_vars().empty()) 609 return emitError(sectionsOp.getLoc()) 610 << "reduction and allocate clauses are not supported for sections " 611 "construct"; 612 613 LogicalResult bodyGenStatus = success(); 614 SmallVector<StorableBodyGenCallbackTy> sectionCBs; 615 616 for (Operation &op : *sectionsOp.region().begin()) { 617 auto sectionOp = dyn_cast<omp::SectionOp>(op); 618 if (!sectionOp) // omp.terminator 619 continue; 620 621 Region ®ion = sectionOp.region(); 622 auto sectionCB = [®ion, &builder, &moduleTranslation, &bodyGenStatus]( 623 InsertPointTy allocaIP, InsertPointTy codeGenIP) { 624 builder.restoreIP(codeGenIP); 625 convertOmpOpRegions(region, "omp.section.region", builder, 626 moduleTranslation, bodyGenStatus); 627 }; 628 sectionCBs.push_back(sectionCB); 629 } 630 631 // No sections within omp.sections operation - skip generation. This situation 632 // is only possible if there is only a terminator operation inside the 633 // sections operation 634 if (sectionCBs.empty()) 635 return success(); 636 637 assert(isa<omp::SectionOp>(*sectionsOp.region().op_begin())); 638 639 // TODO: Perform appropriate actions according to the data-sharing 640 // attribute (shared, private, firstprivate, ...) of variables. 641 // Currently defaults to shared. 642 auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &, 643 llvm::Value &vPtr, 644 llvm::Value *&replacementValue) -> InsertPointTy { 645 replacementValue = &vPtr; 646 return codeGenIP; 647 }; 648 649 // TODO: Perform finalization actions for variables. This has to be 650 // called for variables which have destructors/finalizers. 651 auto finiCB = [&](InsertPointTy codeGenIP) {}; 652 653 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 654 findAllocaInsertPoint(builder, moduleTranslation); 655 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 656 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections( 657 ompLoc, allocaIP, sectionCBs, privCB, finiCB, false, 658 sectionsOp.nowait())); 659 return bodyGenStatus; 660 } 661 662 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder. 663 static LogicalResult 664 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, 665 LLVM::ModuleTranslation &moduleTranslation) { 666 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 667 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 668 LogicalResult bodyGenStatus = success(); 669 auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { 670 builder.restoreIP(codegenIP); 671 convertOmpOpRegions(singleOp.region(), "omp.single.region", builder, 672 moduleTranslation, bodyGenStatus); 673 }; 674 auto finiCB = [&](InsertPointTy codeGenIP) {}; 675 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle( 676 ompLoc, bodyCB, finiCB, singleOp.nowait(), /*DidIt=*/nullptr)); 677 return bodyGenStatus; 678 } 679 680 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. 681 static LogicalResult 682 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, 683 LLVM::ModuleTranslation &moduleTranslation) { 684 auto loop = cast<omp::WsLoopOp>(opInst); 685 // TODO: this should be in the op verifier instead. 686 if (loop.lowerBound().empty()) 687 return failure(); 688 689 // Static is the default. 690 auto schedule = loop.schedule_val().value_or(omp::ClauseScheduleKind::Static); 691 692 // Find the loop configuration. 693 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]); 694 llvm::Type *ivType = step->getType(); 695 llvm::Value *chunk = nullptr; 696 if (loop.schedule_chunk_var()) { 697 llvm::Value *chunkVar = 698 moduleTranslation.lookupValue(loop.schedule_chunk_var()); 699 llvm::Type *chunkVarType = chunkVar->getType(); 700 assert(chunkVarType->isIntegerTy() && 701 "chunk size must be one integer expression"); 702 if (chunkVarType->getIntegerBitWidth() < ivType->getIntegerBitWidth()) 703 chunk = builder.CreateSExt(chunkVar, ivType); 704 else if (chunkVarType->getIntegerBitWidth() > ivType->getIntegerBitWidth()) 705 chunk = builder.CreateTrunc(chunkVar, ivType); 706 else 707 chunk = chunkVar; 708 } 709 710 SmallVector<omp::ReductionDeclareOp> reductionDecls; 711 collectReductionDecls(loop, reductionDecls); 712 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 713 findAllocaInsertPoint(builder, moduleTranslation); 714 715 // Allocate space for privatized reduction variables. 716 SmallVector<llvm::Value *> privateReductionVariables; 717 DenseMap<Value, llvm::Value *> reductionVariableMap; 718 unsigned numReductions = loop.getNumReductionVars(); 719 privateReductionVariables.reserve(numReductions); 720 if (numReductions != 0) { 721 llvm::IRBuilderBase::InsertPointGuard guard(builder); 722 builder.restoreIP(allocaIP); 723 for (unsigned i = 0; i < numReductions; ++i) { 724 auto reductionType = 725 loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>(); 726 llvm::Value *var = builder.CreateAlloca( 727 moduleTranslation.convertType(reductionType.getElementType())); 728 privateReductionVariables.push_back(var); 729 reductionVariableMap.try_emplace(loop.reduction_vars()[i], var); 730 } 731 } 732 733 // Store the mapping between reduction variables and their private copies on 734 // ModuleTranslation stack. It can be then recovered when translating 735 // omp.reduce operations in a separate call. 736 LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard( 737 moduleTranslation, reductionVariableMap); 738 739 // Before the loop, store the initial values of reductions into reduction 740 // variables. Although this could be done after allocas, we don't want to mess 741 // up with the alloca insertion point. 742 for (unsigned i = 0; i < numReductions; ++i) { 743 SmallVector<llvm::Value *> phis; 744 if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(), 745 "omp.reduction.neutral", builder, 746 moduleTranslation, &phis))) 747 return failure(); 748 assert(phis.size() == 1 && "expected one value to be yielded from the " 749 "reduction neutral element declaration region"); 750 builder.CreateStore(phis[0], privateReductionVariables[i]); 751 } 752 753 // Set up the source location value for OpenMP runtime. 754 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 755 756 // Generator of the canonical loop body. 757 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 758 // relying on captured variables. 759 SmallVector<llvm::CanonicalLoopInfo *> loopInfos; 760 SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints; 761 LogicalResult bodyGenStatus = success(); 762 auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { 763 // Make sure further conversions know about the induction variable. 764 moduleTranslation.mapValue( 765 loop.getRegion().front().getArgument(loopInfos.size()), iv); 766 767 // Capture the body insertion point for use in nested loops. BodyIP of the 768 // CanonicalLoopInfo always points to the beginning of the entry block of 769 // the body. 770 bodyInsertPoints.push_back(ip); 771 772 if (loopInfos.size() != loop.getNumLoops() - 1) 773 return; 774 775 // Convert the body of the loop. 776 builder.restoreIP(ip); 777 convertOmpOpRegions(loop.region(), "omp.wsloop.region", builder, 778 moduleTranslation, bodyGenStatus); 779 }; 780 781 // Delegate actual loop construction to the OpenMP IRBuilder. 782 // TODO: this currently assumes WsLoop is semantically similar to SCF loop, 783 // i.e. it has a positive step, uses signed integer semantics. Reconsider 784 // this code when WsLoop clearly supports more cases. 785 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 786 for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) { 787 llvm::Value *lowerBound = 788 moduleTranslation.lookupValue(loop.lowerBound()[i]); 789 llvm::Value *upperBound = 790 moduleTranslation.lookupValue(loop.upperBound()[i]); 791 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]); 792 793 // Make sure loop trip count are emitted in the preheader of the outermost 794 // loop at the latest so that they are all available for the new collapsed 795 // loop will be created below. 796 llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; 797 llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; 798 if (i != 0) { 799 loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back()); 800 computeIP = loopInfos.front()->getPreheaderIP(); 801 } 802 loopInfos.push_back(ompBuilder->createCanonicalLoop( 803 loc, bodyGen, lowerBound, upperBound, step, 804 /*IsSigned=*/true, loop.inclusive(), computeIP)); 805 806 if (failed(bodyGenStatus)) 807 return failure(); 808 } 809 810 // Collapse loops. Store the insertion point because LoopInfos may get 811 // invalidated. 812 llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); 813 llvm::CanonicalLoopInfo *loopInfo = 814 ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); 815 816 allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 817 818 // TODO: Handle doacross loops when the ordered clause has a parameter. 819 bool isOrdered = loop.ordered_val().has_value(); 820 Optional<omp::ScheduleModifier> scheduleModifier = loop.schedule_modifier(); 821 bool isSimd = loop.simd_modifier(); 822 823 ompBuilder->applyWorkshareLoop( 824 ompLoc.DL, loopInfo, allocaIP, !loop.nowait(), 825 convertToScheduleKind(schedule), chunk, isSimd, 826 scheduleModifier == omp::ScheduleModifier::monotonic, 827 scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered); 828 829 // Continue building IR after the loop. Note that the LoopInfo returned by 830 // `collapseLoops` points inside the outermost loop and is intended for 831 // potential further loop transformations. Use the insertion point stored 832 // before collapsing loops instead. 833 builder.restoreIP(afterIP); 834 835 // Process the reductions if required. 836 if (numReductions == 0) 837 return success(); 838 839 // Create the reduction generators. We need to own them here because 840 // ReductionInfo only accepts references to the generators. 841 SmallVector<OwningReductionGen> owningReductionGens; 842 SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens; 843 for (unsigned i = 0; i < numReductions; ++i) { 844 owningReductionGens.push_back( 845 makeReductionGen(reductionDecls[i], builder, moduleTranslation)); 846 owningAtomicReductionGens.push_back( 847 makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation)); 848 } 849 850 // Collect the reduction information. 851 SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos; 852 reductionInfos.reserve(numReductions); 853 for (unsigned i = 0; i < numReductions; ++i) { 854 llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr; 855 if (owningAtomicReductionGens[i]) 856 atomicGen = owningAtomicReductionGens[i]; 857 auto reductionType = 858 loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>(); 859 llvm::Value *variable = 860 moduleTranslation.lookupValue(loop.reduction_vars()[i]); 861 reductionInfos.push_back( 862 {moduleTranslation.convertType(reductionType.getElementType()), 863 variable, privateReductionVariables[i], owningReductionGens[i], 864 atomicGen}); 865 } 866 867 // The call to createReductions below expects the block to have a 868 // terminator. Create an unreachable instruction to serve as terminator 869 // and remove it later. 870 llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); 871 builder.SetInsertPoint(tempTerminator); 872 llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = 873 ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, 874 loop.nowait()); 875 if (!contInsertPoint.getBlock()) 876 return loop->emitOpError() << "failed to convert reductions"; 877 auto nextInsertionPoint = 878 ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for); 879 tempTerminator->eraseFromParent(); 880 builder.restoreIP(nextInsertionPoint); 881 882 return success(); 883 } 884 885 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder. 886 static LogicalResult 887 convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder, 888 LLVM::ModuleTranslation &moduleTranslation) { 889 auto loop = cast<omp::SimdLoopOp>(opInst); 890 891 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 892 893 // Generator of the canonical loop body. 894 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 895 // relying on captured variables. 896 SmallVector<llvm::CanonicalLoopInfo *> loopInfos; 897 SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints; 898 LogicalResult bodyGenStatus = success(); 899 auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { 900 // Make sure further conversions know about the induction variable. 901 moduleTranslation.mapValue( 902 loop.getRegion().front().getArgument(loopInfos.size()), iv); 903 904 // Capture the body insertion point for use in nested loops. BodyIP of the 905 // CanonicalLoopInfo always points to the beginning of the entry block of 906 // the body. 907 bodyInsertPoints.push_back(ip); 908 909 if (loopInfos.size() != loop.getNumLoops() - 1) 910 return; 911 912 // Convert the body of the loop. 913 builder.restoreIP(ip); 914 convertOmpOpRegions(loop.region(), "omp.simdloop.region", builder, 915 moduleTranslation, bodyGenStatus); 916 }; 917 918 // Delegate actual loop construction to the OpenMP IRBuilder. 919 // TODO: this currently assumes SimdLoop is semantically similar to SCF loop, 920 // i.e. it has a positive step, uses signed integer semantics. Reconsider 921 // this code when SimdLoop clearly supports more cases. 922 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 923 for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) { 924 llvm::Value *lowerBound = 925 moduleTranslation.lookupValue(loop.lowerBound()[i]); 926 llvm::Value *upperBound = 927 moduleTranslation.lookupValue(loop.upperBound()[i]); 928 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]); 929 930 // Make sure loop trip count are emitted in the preheader of the outermost 931 // loop at the latest so that they are all available for the new collapsed 932 // loop will be created below. 933 llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; 934 llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; 935 if (i != 0) { 936 loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(), 937 ompLoc.DL); 938 computeIP = loopInfos.front()->getPreheaderIP(); 939 } 940 loopInfos.push_back(ompBuilder->createCanonicalLoop( 941 loc, bodyGen, lowerBound, upperBound, step, 942 /*IsSigned=*/true, /*Inclusive=*/true, computeIP)); 943 944 if (failed(bodyGenStatus)) 945 return failure(); 946 } 947 948 // Collapse loops. 949 llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); 950 llvm::CanonicalLoopInfo *loopInfo = 951 ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); 952 953 ompBuilder->applySimd(ompLoc.DL, loopInfo); 954 955 builder.restoreIP(afterIP); 956 return success(); 957 } 958 959 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. 960 llvm::AtomicOrdering 961 convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) { 962 if (!ao) 963 return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering 964 965 switch (*ao) { 966 case omp::ClauseMemoryOrderKind::Seq_cst: 967 return llvm::AtomicOrdering::SequentiallyConsistent; 968 case omp::ClauseMemoryOrderKind::Acq_rel: 969 return llvm::AtomicOrdering::AcquireRelease; 970 case omp::ClauseMemoryOrderKind::Acquire: 971 return llvm::AtomicOrdering::Acquire; 972 case omp::ClauseMemoryOrderKind::Release: 973 return llvm::AtomicOrdering::Release; 974 case omp::ClauseMemoryOrderKind::Relaxed: 975 return llvm::AtomicOrdering::Monotonic; 976 } 977 llvm_unreachable("Unknown ClauseMemoryOrderKind kind"); 978 } 979 980 /// Convert omp.atomic.read operation to LLVM IR. 981 static LogicalResult 982 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, 983 LLVM::ModuleTranslation &moduleTranslation) { 984 985 auto readOp = cast<omp::AtomicReadOp>(opInst); 986 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 987 988 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 989 990 llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.memory_order_val()); 991 llvm::Value *x = moduleTranslation.lookupValue(readOp.x()); 992 Type xTy = readOp.x().getType().cast<omp::PointerLikeType>().getElementType(); 993 llvm::Value *v = moduleTranslation.lookupValue(readOp.v()); 994 Type vTy = readOp.v().getType().cast<omp::PointerLikeType>().getElementType(); 995 llvm::OpenMPIRBuilder::AtomicOpValue V = { 996 v, moduleTranslation.convertType(vTy), false, false}; 997 llvm::OpenMPIRBuilder::AtomicOpValue X = { 998 x, moduleTranslation.convertType(xTy), false, false}; 999 builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO)); 1000 return success(); 1001 } 1002 1003 /// Converts an omp.atomic.write operation to LLVM IR. 1004 static LogicalResult 1005 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, 1006 LLVM::ModuleTranslation &moduleTranslation) { 1007 auto writeOp = cast<omp::AtomicWriteOp>(opInst); 1008 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1009 1010 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1011 llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.memory_order_val()); 1012 llvm::Value *expr = moduleTranslation.lookupValue(writeOp.value()); 1013 llvm::Value *dest = moduleTranslation.lookupValue(writeOp.address()); 1014 llvm::Type *ty = moduleTranslation.convertType(writeOp.value().getType()); 1015 llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false, 1016 /*isVolatile=*/false}; 1017 builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao)); 1018 return success(); 1019 } 1020 1021 /// Converts an LLVM dialect binary operation to the corresponding enum value 1022 /// for `atomicrmw` supported binary operation. 1023 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) { 1024 return llvm::TypeSwitch<Operation *, llvm::AtomicRMWInst::BinOp>(&op) 1025 .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; }) 1026 .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; }) 1027 .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; }) 1028 .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; }) 1029 .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; }) 1030 .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; }) 1031 .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; }) 1032 .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; }) 1033 .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; }) 1034 .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP); 1035 } 1036 1037 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder. 1038 static LogicalResult 1039 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, 1040 llvm::IRBuilderBase &builder, 1041 LLVM::ModuleTranslation &moduleTranslation) { 1042 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1043 1044 // Convert values and types. 1045 auto &innerOpList = opInst.region().front().getOperations(); 1046 if (innerOpList.size() != 2) 1047 return opInst.emitError("exactly two operations are allowed inside an " 1048 "atomic update region while lowering to LLVM IR"); 1049 1050 Operation &innerUpdateOp = innerOpList.front(); 1051 1052 if (innerUpdateOp.getNumOperands() != 2 || 1053 !llvm::is_contained(innerUpdateOp.getOperands(), 1054 opInst.getRegion().getArgument(0))) 1055 return opInst.emitError( 1056 "the update operation inside the region must be a binary operation and " 1057 "that update operation must have the region argument as an operand"); 1058 1059 llvm::AtomicRMWInst::BinOp binop = convertBinOpToAtomic(innerUpdateOp); 1060 1061 bool isXBinopExpr = 1062 innerUpdateOp.getNumOperands() > 0 && 1063 innerUpdateOp.getOperand(0) == opInst.getRegion().getArgument(0); 1064 1065 mlir::Value mlirExpr = (isXBinopExpr ? innerUpdateOp.getOperand(1) 1066 : innerUpdateOp.getOperand(0)); 1067 llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr); 1068 llvm::Value *llvmX = moduleTranslation.lookupValue(opInst.x()); 1069 LLVM::LLVMPointerType mlirXType = 1070 opInst.x().getType().cast<LLVM::LLVMPointerType>(); 1071 llvm::Type *llvmXElementType = 1072 moduleTranslation.convertType(mlirXType.getElementType()); 1073 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType, 1074 /*isSigned=*/false, 1075 /*isVolatile=*/false}; 1076 1077 llvm::AtomicOrdering atomicOrdering = 1078 convertAtomicOrdering(opInst.memory_order_val()); 1079 1080 // Generate update code. 1081 LogicalResult updateGenStatus = success(); 1082 auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus]( 1083 llvm::Value *atomicx, 1084 llvm::IRBuilder<> &builder) -> llvm::Value * { 1085 Block &bb = *opInst.region().begin(); 1086 moduleTranslation.mapValue(*opInst.region().args_begin(), atomicx); 1087 moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); 1088 if (failed(moduleTranslation.convertBlock(bb, true, builder))) { 1089 updateGenStatus = (opInst.emitError() 1090 << "unable to convert update operation to llvm IR"); 1091 return nullptr; 1092 } 1093 omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator()); 1094 assert(yieldop && yieldop.results().size() == 1 && 1095 "terminator must be omp.yield op and it must have exactly one " 1096 "argument"); 1097 return moduleTranslation.lookupValue(yieldop.results()[0]); 1098 }; 1099 1100 // Handle ambiguous alloca, if any. 1101 auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 1102 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1103 builder.restoreIP(ompBuilder->createAtomicUpdate( 1104 ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop, updateFn, 1105 isXBinopExpr)); 1106 return updateGenStatus; 1107 } 1108 1109 static LogicalResult 1110 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, 1111 llvm::IRBuilderBase &builder, 1112 LLVM::ModuleTranslation &moduleTranslation) { 1113 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1114 mlir::Value mlirExpr; 1115 bool isXBinopExpr = false, isPostfixUpdate = false; 1116 llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP; 1117 1118 omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp(); 1119 omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp(); 1120 1121 assert((atomicUpdateOp || atomicWriteOp) && 1122 "internal op must be an atomic.update or atomic.write op"); 1123 1124 if (atomicWriteOp) { 1125 isPostfixUpdate = true; 1126 mlirExpr = atomicWriteOp.value(); 1127 } else { 1128 isPostfixUpdate = atomicCaptureOp.getSecondOp() == 1129 atomicCaptureOp.getAtomicUpdateOp().getOperation(); 1130 auto &innerOpList = atomicUpdateOp.region().front().getOperations(); 1131 if (innerOpList.size() != 2) 1132 return atomicUpdateOp.emitError( 1133 "exactly two operations are allowed inside an " 1134 "atomic update region while lowering to LLVM IR"); 1135 Operation *innerUpdateOp = atomicUpdateOp.getFirstOp(); 1136 if (innerUpdateOp->getNumOperands() != 2 || 1137 !llvm::is_contained(innerUpdateOp->getOperands(), 1138 atomicUpdateOp.getRegion().getArgument(0))) 1139 return atomicUpdateOp.emitError( 1140 "the update operation inside the region must be a binary operation " 1141 "and that update operation must have the region argument as an " 1142 "operand"); 1143 binop = convertBinOpToAtomic(*innerUpdateOp); 1144 1145 isXBinopExpr = innerUpdateOp->getOperand(0) == 1146 atomicUpdateOp.getRegion().getArgument(0); 1147 1148 mlirExpr = (isXBinopExpr ? innerUpdateOp->getOperand(1) 1149 : innerUpdateOp->getOperand(0)); 1150 } 1151 1152 llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr); 1153 llvm::Value *llvmX = 1154 moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().x()); 1155 llvm::Value *llvmV = 1156 moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().v()); 1157 auto mlirXType = atomicCaptureOp.getAtomicReadOp() 1158 .x() 1159 .getType() 1160 .cast<LLVM::LLVMPointerType>(); 1161 llvm::Type *llvmXElementType = 1162 moduleTranslation.convertType(mlirXType.getElementType()); 1163 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType, 1164 /*isSigned=*/false, 1165 /*isVolatile=*/false}; 1166 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType, 1167 /*isSigned=*/false, 1168 /*isVolatile=*/false}; 1169 1170 llvm::AtomicOrdering atomicOrdering = 1171 convertAtomicOrdering(atomicCaptureOp.memory_order_val()); 1172 1173 LogicalResult updateGenStatus = success(); 1174 auto updateFn = [&](llvm::Value *atomicx, 1175 llvm::IRBuilder<> &builder) -> llvm::Value * { 1176 if (atomicWriteOp) 1177 return moduleTranslation.lookupValue(atomicWriteOp.value()); 1178 Block &bb = *atomicUpdateOp.region().begin(); 1179 moduleTranslation.mapValue(*atomicUpdateOp.region().args_begin(), atomicx); 1180 moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); 1181 if (failed(moduleTranslation.convertBlock(bb, true, builder))) { 1182 updateGenStatus = (atomicUpdateOp.emitError() 1183 << "unable to convert update operation to llvm IR"); 1184 return nullptr; 1185 } 1186 omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator()); 1187 assert(yieldop && yieldop.results().size() == 1 && 1188 "terminator must be omp.yield op and it must have exactly one " 1189 "argument"); 1190 return moduleTranslation.lookupValue(yieldop.results()[0]); 1191 }; 1192 1193 // Handle ambiguous alloca, if any. 1194 auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 1195 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1196 builder.restoreIP(ompBuilder->createAtomicCapture( 1197 ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering, 1198 binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr)); 1199 return updateGenStatus; 1200 } 1201 1202 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the 1203 /// mapping between reduction variables and their private equivalents to have 1204 /// been stored on the ModuleTranslation stack. Currently only supports 1205 /// reduction within WsLoopOp, but can be easily extended. 1206 static LogicalResult 1207 convertOmpReductionOp(omp::ReductionOp reductionOp, 1208 llvm::IRBuilderBase &builder, 1209 LLVM::ModuleTranslation &moduleTranslation) { 1210 // Find the declaration that corresponds to the reduction op. 1211 auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>(); 1212 omp::ReductionDeclareOp declaration = 1213 findReductionDecl(reductionContainer, reductionOp); 1214 assert(declaration && "could not find reduction declaration"); 1215 1216 // Retrieve the mapping between reduction variables and their private 1217 // equivalents. 1218 const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr; 1219 moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>( 1220 [&](const OpenMPVarMappingStackFrame &frame) { 1221 reductionVariableMap = &frame.mapping; 1222 return WalkResult::interrupt(); 1223 }); 1224 assert(reductionVariableMap && "couldn't find private reduction variables"); 1225 1226 // Translate the reduction operation by emitting the body of the corresponding 1227 // reduction declaration. 1228 Region &reductionRegion = declaration.reductionRegion(); 1229 llvm::Value *privateReductionVar = 1230 reductionVariableMap->lookup(reductionOp.accumulator()); 1231 llvm::Value *reductionVal = builder.CreateLoad( 1232 moduleTranslation.convertType(reductionOp.operand().getType()), 1233 privateReductionVar); 1234 1235 moduleTranslation.mapValue(reductionRegion.front().getArgument(0), 1236 reductionVal); 1237 moduleTranslation.mapValue( 1238 reductionRegion.front().getArgument(1), 1239 moduleTranslation.lookupValue(reductionOp.operand())); 1240 1241 SmallVector<llvm::Value *> phis; 1242 if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body", 1243 builder, moduleTranslation, &phis))) 1244 return failure(); 1245 assert(phis.size() == 1 && "expected one value to be yielded from " 1246 "the reduction body declaration region"); 1247 builder.CreateStore(phis[0], privateReductionVar); 1248 return success(); 1249 } 1250 1251 /// Converts an OpenMP Threadprivate operation into LLVM IR using 1252 /// OpenMPIRBuilder. 1253 static LogicalResult 1254 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, 1255 LLVM::ModuleTranslation &moduleTranslation) { 1256 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1257 auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst); 1258 1259 Value symAddr = threadprivateOp.sym_addr(); 1260 auto *symOp = symAddr.getDefiningOp(); 1261 if (!isa<LLVM::AddressOfOp>(symOp)) 1262 return opInst.emitError("Addressing symbol not found"); 1263 LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp); 1264 1265 LLVM::GlobalOp global = addressOfOp.getGlobal(); 1266 llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global); 1267 llvm::Value *data = 1268 builder.CreateBitCast(globalValue, builder.getInt8PtrTy()); 1269 llvm::Type *type = globalValue->getValueType(); 1270 llvm::TypeSize typeSize = 1271 builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize( 1272 type); 1273 llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedSize()); 1274 llvm::StringRef suffix = llvm::StringRef(".cache", 6); 1275 std::string cacheName = (Twine(global.getSymName()).concat(suffix)).str(); 1276 // Emit runtime function and bitcast its type (i8*) to real data type. 1277 llvm::Value *callInst = 1278 moduleTranslation.getOpenMPBuilder()->createCachedThreadPrivate( 1279 ompLoc, data, size, cacheName); 1280 llvm::Value *result = builder.CreateBitCast(callInst, globalValue->getType()); 1281 moduleTranslation.mapValue(opInst.getResult(0), result); 1282 return success(); 1283 } 1284 1285 namespace { 1286 1287 /// Implementation of the dialect interface that converts operations belonging 1288 /// to the OpenMP dialect to LLVM IR. 1289 class OpenMPDialectLLVMIRTranslationInterface 1290 : public LLVMTranslationDialectInterface { 1291 public: 1292 using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; 1293 1294 /// Translates the given operation to LLVM IR using the provided IR builder 1295 /// and saving the state in `moduleTranslation`. 1296 LogicalResult 1297 convertOperation(Operation *op, llvm::IRBuilderBase &builder, 1298 LLVM::ModuleTranslation &moduleTranslation) const final; 1299 }; 1300 1301 } // namespace 1302 1303 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR 1304 /// (including OpenMP runtime calls). 1305 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( 1306 Operation *op, llvm::IRBuilderBase &builder, 1307 LLVM::ModuleTranslation &moduleTranslation) const { 1308 1309 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1310 1311 return llvm::TypeSwitch<Operation *, LogicalResult>(op) 1312 .Case([&](omp::BarrierOp) { 1313 ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); 1314 return success(); 1315 }) 1316 .Case([&](omp::TaskwaitOp) { 1317 ompBuilder->createTaskwait(builder.saveIP()); 1318 return success(); 1319 }) 1320 .Case([&](omp::TaskyieldOp) { 1321 ompBuilder->createTaskyield(builder.saveIP()); 1322 return success(); 1323 }) 1324 .Case([&](omp::FlushOp) { 1325 // No support in Openmp runtime function (__kmpc_flush) to accept 1326 // the argument list. 1327 // OpenMP standard states the following: 1328 // "An implementation may implement a flush with a list by ignoring 1329 // the list, and treating it the same as a flush without a list." 1330 // 1331 // The argument list is discarded so that, flush with a list is treated 1332 // same as a flush without a list. 1333 ompBuilder->createFlush(builder.saveIP()); 1334 return success(); 1335 }) 1336 .Case([&](omp::ParallelOp op) { 1337 return convertOmpParallel(op, builder, moduleTranslation); 1338 }) 1339 .Case([&](omp::ReductionOp reductionOp) { 1340 return convertOmpReductionOp(reductionOp, builder, moduleTranslation); 1341 }) 1342 .Case([&](omp::MasterOp) { 1343 return convertOmpMaster(*op, builder, moduleTranslation); 1344 }) 1345 .Case([&](omp::CriticalOp) { 1346 return convertOmpCritical(*op, builder, moduleTranslation); 1347 }) 1348 .Case([&](omp::OrderedRegionOp) { 1349 return convertOmpOrderedRegion(*op, builder, moduleTranslation); 1350 }) 1351 .Case([&](omp::OrderedOp) { 1352 return convertOmpOrdered(*op, builder, moduleTranslation); 1353 }) 1354 .Case([&](omp::WsLoopOp) { 1355 return convertOmpWsLoop(*op, builder, moduleTranslation); 1356 }) 1357 .Case([&](omp::SimdLoopOp) { 1358 return convertOmpSimdLoop(*op, builder, moduleTranslation); 1359 }) 1360 .Case([&](omp::AtomicReadOp) { 1361 return convertOmpAtomicRead(*op, builder, moduleTranslation); 1362 }) 1363 .Case([&](omp::AtomicWriteOp) { 1364 return convertOmpAtomicWrite(*op, builder, moduleTranslation); 1365 }) 1366 .Case([&](omp::AtomicUpdateOp op) { 1367 return convertOmpAtomicUpdate(op, builder, moduleTranslation); 1368 }) 1369 .Case([&](omp::AtomicCaptureOp op) { 1370 return convertOmpAtomicCapture(op, builder, moduleTranslation); 1371 }) 1372 .Case([&](omp::SectionsOp) { 1373 return convertOmpSections(*op, builder, moduleTranslation); 1374 }) 1375 .Case([&](omp::SingleOp op) { 1376 return convertOmpSingle(op, builder, moduleTranslation); 1377 }) 1378 .Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp, 1379 omp::CriticalDeclareOp>([](auto op) { 1380 // `yield` and `terminator` can be just omitted. The block structure 1381 // was created in the region that handles their parent operation. 1382 // `reduction.declare` will be used by reductions and is not 1383 // converted directly, skip it. 1384 // `critical.declare` is only used to declare names of critical 1385 // sections which will be used by `critical` ops and hence can be 1386 // ignored for lowering. The OpenMP IRBuilder will create unique 1387 // name for critical section names. 1388 return success(); 1389 }) 1390 .Case([&](omp::ThreadprivateOp) { 1391 return convertOmpThreadprivate(*op, builder, moduleTranslation); 1392 }) 1393 .Default([&](Operation *inst) { 1394 return inst->emitError("unsupported OpenMP operation: ") 1395 << inst->getName(); 1396 }); 1397 } 1398 1399 void mlir::registerOpenMPDialectTranslation(DialectRegistry ®istry) { 1400 registry.insert<omp::OpenMPDialect>(); 1401 registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) { 1402 dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>(); 1403 }); 1404 } 1405 1406 void mlir::registerOpenMPDialectTranslation(MLIRContext &context) { 1407 DialectRegistry registry; 1408 registerOpenMPDialectTranslation(registry); 1409 context.appendDialectRegistry(registry); 1410 } 1411