1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a translation between the MLIR OpenMP dialect and LLVM 10 // IR. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" 14 #include "mlir/Dialect/OpenMP/OpenMPDialect.h" 15 #include "mlir/IR/BlockAndValueMapping.h" 16 #include "mlir/IR/Operation.h" 17 #include "mlir/Support/LLVM.h" 18 #include "mlir/Target/LLVMIR/ModuleTranslation.h" 19 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/TypeSwitch.h" 22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 23 #include "llvm/IR/DebugInfoMetadata.h" 24 #include "llvm/IR/IRBuilder.h" 25 26 using namespace mlir; 27 28 namespace { 29 static llvm::omp::ScheduleKind 30 convertToScheduleKind(Optional<omp::ClauseScheduleKind> schedKind) { 31 if (!schedKind.hasValue()) 32 return llvm::omp::OMP_SCHEDULE_Default; 33 switch (schedKind.getValue()) { 34 case omp::ClauseScheduleKind::Static: 35 return llvm::omp::OMP_SCHEDULE_Static; 36 case omp::ClauseScheduleKind::Dynamic: 37 return llvm::omp::OMP_SCHEDULE_Dynamic; 38 case omp::ClauseScheduleKind::Guided: 39 return llvm::omp::OMP_SCHEDULE_Guided; 40 case omp::ClauseScheduleKind::Auto: 41 return llvm::omp::OMP_SCHEDULE_Auto; 42 case omp::ClauseScheduleKind::Runtime: 43 return llvm::omp::OMP_SCHEDULE_Runtime; 44 } 45 llvm_unreachable("unhandled schedule clause argument"); 46 } 47 48 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the 49 /// insertion points for allocas. 50 class OpenMPAllocaStackFrame 51 : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> { 52 public: 53 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame) 54 55 explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP) 56 : allocaInsertPoint(allocaIP) {} 57 llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; 58 }; 59 60 /// ModuleTranslation stack frame containing the partial mapping between MLIR 61 /// values and their LLVM IR equivalents. 62 class OpenMPVarMappingStackFrame 63 : public LLVM::ModuleTranslation::StackFrameBase< 64 OpenMPVarMappingStackFrame> { 65 public: 66 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPVarMappingStackFrame) 67 68 explicit OpenMPVarMappingStackFrame( 69 const DenseMap<Value, llvm::Value *> &mapping) 70 : mapping(mapping) {} 71 72 DenseMap<Value, llvm::Value *> mapping; 73 }; 74 } // namespace 75 76 /// Find the insertion point for allocas given the current insertion point for 77 /// normal operations in the builder. 78 static llvm::OpenMPIRBuilder::InsertPointTy 79 findAllocaInsertPoint(llvm::IRBuilderBase &builder, 80 const LLVM::ModuleTranslation &moduleTranslation) { 81 // If there is an alloca insertion point on stack, i.e. we are in a nested 82 // operation and a specific point was provided by some surrounding operation, 83 // use it. 84 llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; 85 WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>( 86 [&](const OpenMPAllocaStackFrame &frame) { 87 allocaInsertPoint = frame.allocaInsertPoint; 88 return WalkResult::interrupt(); 89 }); 90 if (walkResult.wasInterrupted()) 91 return allocaInsertPoint; 92 93 // Otherwise, insert to the entry block of the surrounding function. 94 // If the current IRBuilder InsertPoint is the function's entry, it cannot 95 // also be used for alloca insertion which would result in insertion order 96 // confusion. Create a new BasicBlock for the Builder and use the entry block 97 // for the allocs. 98 // TODO: Create a dedicated alloca BasicBlock at function creation such that 99 // we do not need to move the current InertPoint here. 100 if (builder.GetInsertBlock() == 101 &builder.GetInsertBlock()->getParent()->getEntryBlock()) { 102 assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() && 103 "Assuming end of basic block"); 104 llvm::BasicBlock *entryBB = llvm::BasicBlock::Create( 105 builder.getContext(), "entry", builder.GetInsertBlock()->getParent(), 106 builder.GetInsertBlock()->getNextNode()); 107 builder.CreateBr(entryBB); 108 builder.SetInsertPoint(entryBB); 109 } 110 111 llvm::BasicBlock &funcEntryBlock = 112 builder.GetInsertBlock()->getParent()->getEntryBlock(); 113 return llvm::OpenMPIRBuilder::InsertPointTy( 114 &funcEntryBlock, funcEntryBlock.getFirstInsertionPt()); 115 } 116 117 /// Converts the given region that appears within an OpenMP dialect operation to 118 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the 119 /// region, and a branch from any block with an successor-less OpenMP terminator 120 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes 121 /// of the continuation block if provided. 122 static llvm::BasicBlock *convertOmpOpRegions( 123 Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, 124 LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, 125 SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) { 126 llvm::BasicBlock *continuationBlock = 127 splitBB(builder, true, "omp.region.cont"); 128 llvm::BasicBlock *sourceBlock = builder.GetInsertBlock(); 129 130 llvm::LLVMContext &llvmContext = builder.getContext(); 131 for (Block &bb : region) { 132 llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create( 133 llvmContext, blockName, builder.GetInsertBlock()->getParent(), 134 builder.GetInsertBlock()->getNextNode()); 135 moduleTranslation.mapBlock(&bb, llvmBB); 136 } 137 138 llvm::Instruction *sourceTerminator = sourceBlock->getTerminator(); 139 140 // Terminators (namely YieldOp) may be forwarding values to the region that 141 // need to be available in the continuation block. Collect the types of these 142 // operands in preparation of creating PHI nodes. 143 SmallVector<llvm::Type *> continuationBlockPHITypes; 144 bool operandsProcessed = false; 145 unsigned numYields = 0; 146 for (Block &bb : region.getBlocks()) { 147 if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) { 148 if (!operandsProcessed) { 149 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { 150 continuationBlockPHITypes.push_back( 151 moduleTranslation.convertType(yield->getOperand(i).getType())); 152 } 153 operandsProcessed = true; 154 } else { 155 assert(continuationBlockPHITypes.size() == yield->getNumOperands() && 156 "mismatching number of values yielded from the region"); 157 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { 158 llvm::Type *operandType = 159 moduleTranslation.convertType(yield->getOperand(i).getType()); 160 (void)operandType; 161 assert(continuationBlockPHITypes[i] == operandType && 162 "values of mismatching types yielded from the region"); 163 } 164 } 165 numYields++; 166 } 167 } 168 169 // Insert PHI nodes in the continuation block for any values forwarded by the 170 // terminators in this region. 171 if (!continuationBlockPHITypes.empty()) 172 assert( 173 continuationBlockPHIs && 174 "expected continuation block PHIs if converted regions yield values"); 175 if (continuationBlockPHIs) { 176 llvm::IRBuilderBase::InsertPointGuard guard(builder); 177 continuationBlockPHIs->reserve(continuationBlockPHITypes.size()); 178 builder.SetInsertPoint(continuationBlock, continuationBlock->begin()); 179 for (llvm::Type *ty : continuationBlockPHITypes) 180 continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields)); 181 } 182 183 // Convert blocks one by one in topological order to ensure 184 // defs are converted before uses. 185 SetVector<Block *> blocks = 186 LLVM::detail::getTopologicallySortedBlocks(region); 187 for (Block *bb : blocks) { 188 llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb); 189 // Retarget the branch of the entry block to the entry block of the 190 // converted region (regions are single-entry). 191 if (bb->isEntryBlock()) { 192 assert(sourceTerminator->getNumSuccessors() == 1 && 193 "provided entry block has multiple successors"); 194 assert(sourceTerminator->getSuccessor(0) == continuationBlock && 195 "ContinuationBlock is not the successor of the entry block"); 196 sourceTerminator->setSuccessor(0, llvmBB); 197 } 198 199 llvm::IRBuilderBase::InsertPointGuard guard(builder); 200 if (failed( 201 moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) { 202 bodyGenStatus = failure(); 203 return continuationBlock; 204 } 205 206 // Special handling for `omp.yield` and `omp.terminator` (we may have more 207 // than one): they return the control to the parent OpenMP dialect operation 208 // so replace them with the branch to the continuation block. We handle this 209 // here to avoid relying inter-function communication through the 210 // ModuleTranslation class to set up the correct insertion point. This is 211 // also consistent with MLIR's idiom of handling special region terminators 212 // in the same code that handles the region-owning operation. 213 Operation *terminator = bb->getTerminator(); 214 if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) { 215 builder.CreateBr(continuationBlock); 216 217 for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i) 218 (*continuationBlockPHIs)[i]->addIncoming( 219 moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB); 220 } 221 } 222 // After all blocks have been traversed and values mapped, connect the PHI 223 // nodes to the results of preceding blocks. 224 LLVM::detail::connectPHINodes(region, moduleTranslation); 225 226 // Remove the blocks and values defined in this region from the mapping since 227 // they are not visible outside of this region. This allows the same region to 228 // be converted several times, that is cloned, without clashes, and slightly 229 // speeds up the lookups. 230 moduleTranslation.forgetMapping(region); 231 232 return continuationBlock; 233 } 234 235 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum. 236 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) { 237 switch (kind) { 238 case omp::ClauseProcBindKind::Close: 239 return llvm::omp::ProcBindKind::OMP_PROC_BIND_close; 240 case omp::ClauseProcBindKind::Master: 241 return llvm::omp::ProcBindKind::OMP_PROC_BIND_master; 242 case omp::ClauseProcBindKind::Primary: 243 return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary; 244 case omp::ClauseProcBindKind::Spread: 245 return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread; 246 } 247 llvm_unreachable("Unknown ClauseProcBindKind kind"); 248 } 249 250 /// Converts the OpenMP parallel operation to LLVM IR. 251 static LogicalResult 252 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, 253 LLVM::ModuleTranslation &moduleTranslation) { 254 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 255 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 256 // relying on captured variables. 257 LogicalResult bodyGenStatus = success(); 258 259 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { 260 // Save the alloca insertion point on ModuleTranslation stack for use in 261 // nested regions. 262 LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( 263 moduleTranslation, allocaIP); 264 265 // ParallelOp has only one region associated with it. 266 builder.restoreIP(codeGenIP); 267 convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder, 268 moduleTranslation, bodyGenStatus); 269 }; 270 271 // TODO: Perform appropriate actions according to the data-sharing 272 // attribute (shared, private, firstprivate, ...) of variables. 273 // Currently defaults to shared. 274 auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, 275 llvm::Value &, llvm::Value &vPtr, 276 llvm::Value *&replacementValue) -> InsertPointTy { 277 replacementValue = &vPtr; 278 279 return codeGenIP; 280 }; 281 282 // TODO: Perform finalization actions for variables. This has to be 283 // called for variables which have destructors/finalizers. 284 auto finiCB = [&](InsertPointTy codeGenIP) {}; 285 286 llvm::Value *ifCond = nullptr; 287 if (auto ifExprVar = opInst.if_expr_var()) 288 ifCond = moduleTranslation.lookupValue(ifExprVar); 289 llvm::Value *numThreads = nullptr; 290 if (auto numThreadsVar = opInst.num_threads_var()) 291 numThreads = moduleTranslation.lookupValue(numThreadsVar); 292 auto pbKind = llvm::omp::OMP_PROC_BIND_default; 293 if (auto bind = opInst.proc_bind_val()) 294 pbKind = getProcBindKind(*bind); 295 // TODO: Is the Parallel construct cancellable? 296 bool isCancellable = false; 297 298 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 299 findAllocaInsertPoint(builder, moduleTranslation); 300 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 301 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel( 302 ompLoc, allocaIP, bodyGenCB, privCB, finiCB, ifCond, numThreads, pbKind, 303 isCancellable)); 304 305 return bodyGenStatus; 306 } 307 308 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder. 309 static LogicalResult 310 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, 311 LLVM::ModuleTranslation &moduleTranslation) { 312 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 313 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 314 // relying on captured variables. 315 LogicalResult bodyGenStatus = success(); 316 317 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { 318 // MasterOp has only one region associated with it. 319 auto ®ion = cast<omp::MasterOp>(opInst).getRegion(); 320 builder.restoreIP(codeGenIP); 321 convertOmpOpRegions(region, "omp.master.region", builder, moduleTranslation, 322 bodyGenStatus); 323 }; 324 325 // TODO: Perform finalization actions for variables. This has to be 326 // called for variables which have destructors/finalizers. 327 auto finiCB = [&](InsertPointTy codeGenIP) {}; 328 329 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 330 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster( 331 ompLoc, bodyGenCB, finiCB)); 332 return success(); 333 } 334 335 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder. 336 static LogicalResult 337 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, 338 LLVM::ModuleTranslation &moduleTranslation) { 339 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 340 auto criticalOp = cast<omp::CriticalOp>(opInst); 341 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 342 // relying on captured variables. 343 LogicalResult bodyGenStatus = success(); 344 345 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { 346 // CriticalOp has only one region associated with it. 347 auto ®ion = cast<omp::CriticalOp>(opInst).getRegion(); 348 builder.restoreIP(codeGenIP); 349 convertOmpOpRegions(region, "omp.critical.region", builder, 350 moduleTranslation, bodyGenStatus); 351 }; 352 353 // TODO: Perform finalization actions for variables. This has to be 354 // called for variables which have destructors/finalizers. 355 auto finiCB = [&](InsertPointTy codeGenIP) {}; 356 357 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 358 llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext(); 359 llvm::Constant *hint = nullptr; 360 361 // If it has a name, it probably has a hint too. 362 if (criticalOp.nameAttr()) { 363 // The verifiers in OpenMP Dialect guarentee that all the pointers are 364 // non-null 365 auto symbolRef = criticalOp.nameAttr().cast<SymbolRefAttr>(); 366 auto criticalDeclareOp = 367 SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp, 368 symbolRef); 369 hint = 370 llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), 371 static_cast<int>(criticalDeclareOp.hint_val())); 372 } 373 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical( 374 ompLoc, bodyGenCB, finiCB, criticalOp.name().getValueOr(""), hint)); 375 return success(); 376 } 377 378 /// Returns a reduction declaration that corresponds to the given reduction 379 /// operation in the given container. Currently only supports reductions inside 380 /// WsLoopOp but can be easily extended. 381 static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container, 382 omp::ReductionOp reduction) { 383 SymbolRefAttr reductionSymbol; 384 for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) { 385 if (container.reduction_vars()[i] != reduction.accumulator()) 386 continue; 387 reductionSymbol = (*container.reductions())[i].cast<SymbolRefAttr>(); 388 break; 389 } 390 assert(reductionSymbol && 391 "reduction operation must be associated with a declaration"); 392 393 return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>( 394 container, reductionSymbol); 395 } 396 397 /// Populates `reductions` with reduction declarations used in the given loop. 398 static void 399 collectReductionDecls(omp::WsLoopOp loop, 400 SmallVectorImpl<omp::ReductionDeclareOp> &reductions) { 401 Optional<ArrayAttr> attr = loop.reductions(); 402 if (!attr) 403 return; 404 405 reductions.reserve(reductions.size() + loop.getNumReductionVars()); 406 for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) { 407 reductions.push_back( 408 SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>( 409 loop, symbolRef)); 410 } 411 } 412 413 /// Translates the blocks contained in the given region and appends them to at 414 /// the current insertion point of `builder`. The operations of the entry block 415 /// are appended to the current insertion block, which is not expected to have a 416 /// terminator. If set, `continuationBlockArgs` is populated with translated 417 /// values that correspond to the values omp.yield'ed from the region. 418 static LogicalResult inlineConvertOmpRegions( 419 Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, 420 LLVM::ModuleTranslation &moduleTranslation, 421 SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) { 422 if (region.empty()) 423 return success(); 424 425 // Special case for single-block regions that don't create additional blocks: 426 // insert operations without creating additional blocks. 427 if (llvm::hasSingleElement(region)) { 428 moduleTranslation.mapBlock(®ion.front(), builder.GetInsertBlock()); 429 if (failed(moduleTranslation.convertBlock( 430 region.front(), /*ignoreArguments=*/true, builder))) 431 return failure(); 432 433 // The continuation arguments are simply the translated terminator operands. 434 if (continuationBlockArgs) 435 llvm::append_range( 436 *continuationBlockArgs, 437 moduleTranslation.lookupValues(region.front().back().getOperands())); 438 439 // Drop the mapping that is no longer necessary so that the same region can 440 // be processed multiple times. 441 moduleTranslation.forgetMapping(region); 442 return success(); 443 } 444 445 LogicalResult bodyGenStatus = success(); 446 SmallVector<llvm::PHINode *> phis; 447 llvm::BasicBlock *continuationBlock = convertOmpOpRegions( 448 region, blockName, builder, moduleTranslation, bodyGenStatus, &phis); 449 if (failed(bodyGenStatus)) 450 return failure(); 451 if (continuationBlockArgs) 452 llvm::append_range(*continuationBlockArgs, phis); 453 builder.SetInsertPoint(continuationBlock, 454 continuationBlock->getFirstInsertionPt()); 455 return success(); 456 } 457 458 namespace { 459 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to 460 /// store lambdas with capture. 461 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy( 462 llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *, 463 llvm::Value *&)>; 464 using OwningAtomicReductionGen = 465 std::function<llvm::OpenMPIRBuilder::InsertPointTy( 466 llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *, 467 llvm::Value *)>; 468 } // namespace 469 470 /// Create an OpenMPIRBuilder-compatible reduction generator for the given 471 /// reduction declaration. The generator uses `builder` but ignores its 472 /// insertion point. 473 static OwningReductionGen 474 makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder, 475 LLVM::ModuleTranslation &moduleTranslation) { 476 // The lambda is mutable because we need access to non-const methods of decl 477 // (which aren't actually mutating it), and we must capture decl by-value to 478 // avoid the dangling reference after the parent function returns. 479 OwningReductionGen gen = 480 [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, 481 llvm::Value *lhs, llvm::Value *rhs, 482 llvm::Value *&result) mutable { 483 Region &reductionRegion = decl.reductionRegion(); 484 moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs); 485 moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs); 486 builder.restoreIP(insertPoint); 487 SmallVector<llvm::Value *> phis; 488 if (failed(inlineConvertOmpRegions(reductionRegion, 489 "omp.reduction.nonatomic.body", 490 builder, moduleTranslation, &phis))) 491 return llvm::OpenMPIRBuilder::InsertPointTy(); 492 assert(phis.size() == 1); 493 result = phis[0]; 494 return builder.saveIP(); 495 }; 496 return gen; 497 } 498 499 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the 500 /// given reduction declaration. The generator uses `builder` but ignores its 501 /// insertion point. Returns null if there is no atomic region available in the 502 /// reduction declaration. 503 static OwningAtomicReductionGen 504 makeAtomicReductionGen(omp::ReductionDeclareOp decl, 505 llvm::IRBuilderBase &builder, 506 LLVM::ModuleTranslation &moduleTranslation) { 507 if (decl.atomicReductionRegion().empty()) 508 return OwningAtomicReductionGen(); 509 510 // The lambda is mutable because we need access to non-const methods of decl 511 // (which aren't actually mutating it), and we must capture decl by-value to 512 // avoid the dangling reference after the parent function returns. 513 OwningAtomicReductionGen atomicGen = 514 [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *, 515 llvm::Value *lhs, llvm::Value *rhs) mutable { 516 Region &atomicRegion = decl.atomicReductionRegion(); 517 moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs); 518 moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs); 519 builder.restoreIP(insertPoint); 520 SmallVector<llvm::Value *> phis; 521 if (failed(inlineConvertOmpRegions(atomicRegion, 522 "omp.reduction.atomic.body", builder, 523 moduleTranslation, &phis))) 524 return llvm::OpenMPIRBuilder::InsertPointTy(); 525 assert(phis.empty()); 526 return builder.saveIP(); 527 }; 528 return atomicGen; 529 } 530 531 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder. 532 static LogicalResult 533 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, 534 LLVM::ModuleTranslation &moduleTranslation) { 535 auto orderedOp = cast<omp::OrderedOp>(opInst); 536 537 omp::ClauseDepend dependType = *orderedOp.depend_type_val(); 538 bool isDependSource = dependType == omp::ClauseDepend::dependsource; 539 unsigned numLoops = orderedOp.num_loops_val().getValue(); 540 SmallVector<llvm::Value *> vecValues = 541 moduleTranslation.lookupValues(orderedOp.depend_vec_vars()); 542 543 size_t indexVecValues = 0; 544 while (indexVecValues < vecValues.size()) { 545 SmallVector<llvm::Value *> storeValues; 546 storeValues.reserve(numLoops); 547 for (unsigned i = 0; i < numLoops; i++) { 548 storeValues.push_back(vecValues[indexVecValues]); 549 indexVecValues++; 550 } 551 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 552 findAllocaInsertPoint(builder, moduleTranslation); 553 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 554 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend( 555 ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource)); 556 } 557 return success(); 558 } 559 560 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using 561 /// OpenMPIRBuilder. 562 static LogicalResult 563 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, 564 LLVM::ModuleTranslation &moduleTranslation) { 565 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 566 auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst); 567 568 // TODO: The code generation for ordered simd directive is not supported yet. 569 if (orderedRegionOp.simd()) 570 return failure(); 571 572 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 573 // relying on captured variables. 574 LogicalResult bodyGenStatus = success(); 575 576 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { 577 // OrderedOp has only one region associated with it. 578 auto ®ion = cast<omp::OrderedRegionOp>(opInst).getRegion(); 579 builder.restoreIP(codeGenIP); 580 convertOmpOpRegions(region, "omp.ordered.region", builder, 581 moduleTranslation, bodyGenStatus); 582 }; 583 584 // TODO: Perform finalization actions for variables. This has to be 585 // called for variables which have destructors/finalizers. 586 auto finiCB = [&](InsertPointTy codeGenIP) {}; 587 588 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 589 builder.restoreIP( 590 moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd( 591 ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd())); 592 return bodyGenStatus; 593 } 594 595 static LogicalResult 596 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, 597 LLVM::ModuleTranslation &moduleTranslation) { 598 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 599 using StorableBodyGenCallbackTy = 600 llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 601 602 auto sectionsOp = cast<omp::SectionsOp>(opInst); 603 604 // TODO: Support the following clauses: private, firstprivate, lastprivate, 605 // reduction, allocate 606 if (!sectionsOp.reduction_vars().empty() || sectionsOp.reductions() || 607 !sectionsOp.allocate_vars().empty() || 608 !sectionsOp.allocators_vars().empty()) 609 return emitError(sectionsOp.getLoc()) 610 << "reduction and allocate clauses are not supported for sections " 611 "construct"; 612 613 LogicalResult bodyGenStatus = success(); 614 SmallVector<StorableBodyGenCallbackTy> sectionCBs; 615 616 for (Operation &op : *sectionsOp.region().begin()) { 617 auto sectionOp = dyn_cast<omp::SectionOp>(op); 618 if (!sectionOp) // omp.terminator 619 continue; 620 621 Region ®ion = sectionOp.region(); 622 auto sectionCB = [®ion, &builder, &moduleTranslation, &bodyGenStatus]( 623 InsertPointTy allocaIP, InsertPointTy codeGenIP) { 624 builder.restoreIP(codeGenIP); 625 convertOmpOpRegions(region, "omp.section.region", builder, 626 moduleTranslation, bodyGenStatus); 627 }; 628 sectionCBs.push_back(sectionCB); 629 } 630 631 // No sections within omp.sections operation - skip generation. This situation 632 // is only possible if there is only a terminator operation inside the 633 // sections operation 634 if (sectionCBs.empty()) 635 return success(); 636 637 assert(isa<omp::SectionOp>(*sectionsOp.region().op_begin())); 638 639 // TODO: Perform appropriate actions according to the data-sharing 640 // attribute (shared, private, firstprivate, ...) of variables. 641 // Currently defaults to shared. 642 auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &, 643 llvm::Value &vPtr, 644 llvm::Value *&replacementValue) -> InsertPointTy { 645 replacementValue = &vPtr; 646 return codeGenIP; 647 }; 648 649 // TODO: Perform finalization actions for variables. This has to be 650 // called for variables which have destructors/finalizers. 651 auto finiCB = [&](InsertPointTy codeGenIP) {}; 652 653 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 654 findAllocaInsertPoint(builder, moduleTranslation); 655 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 656 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections( 657 ompLoc, allocaIP, sectionCBs, privCB, finiCB, false, 658 sectionsOp.nowait())); 659 return bodyGenStatus; 660 } 661 662 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder. 663 static LogicalResult 664 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, 665 LLVM::ModuleTranslation &moduleTranslation) { 666 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 667 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 668 LogicalResult bodyGenStatus = success(); 669 auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { 670 builder.restoreIP(codegenIP); 671 convertOmpOpRegions(singleOp.region(), "omp.single.region", builder, 672 moduleTranslation, bodyGenStatus); 673 }; 674 auto finiCB = [&](InsertPointTy codeGenIP) {}; 675 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle( 676 ompLoc, bodyCB, finiCB, singleOp.nowait(), /*DidIt=*/nullptr)); 677 return bodyGenStatus; 678 } 679 680 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. 681 static LogicalResult 682 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, 683 LLVM::ModuleTranslation &moduleTranslation) { 684 auto loop = cast<omp::WsLoopOp>(opInst); 685 // TODO: this should be in the op verifier instead. 686 if (loop.lowerBound().empty()) 687 return failure(); 688 689 // Static is the default. 690 auto schedule = 691 loop.schedule_val().getValueOr(omp::ClauseScheduleKind::Static); 692 693 // Find the loop configuration. 694 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]); 695 llvm::Type *ivType = step->getType(); 696 llvm::Value *chunk = nullptr; 697 if (loop.schedule_chunk_var()) { 698 llvm::Value *chunkVar = 699 moduleTranslation.lookupValue(loop.schedule_chunk_var()); 700 llvm::Type *chunkVarType = chunkVar->getType(); 701 assert(chunkVarType->isIntegerTy() && 702 "chunk size must be one integer expression"); 703 if (chunkVarType->getIntegerBitWidth() < ivType->getIntegerBitWidth()) 704 chunk = builder.CreateSExt(chunkVar, ivType); 705 else if (chunkVarType->getIntegerBitWidth() > ivType->getIntegerBitWidth()) 706 chunk = builder.CreateTrunc(chunkVar, ivType); 707 else 708 chunk = chunkVar; 709 } 710 711 SmallVector<omp::ReductionDeclareOp> reductionDecls; 712 collectReductionDecls(loop, reductionDecls); 713 llvm::OpenMPIRBuilder::InsertPointTy allocaIP = 714 findAllocaInsertPoint(builder, moduleTranslation); 715 716 // Allocate space for privatized reduction variables. 717 SmallVector<llvm::Value *> privateReductionVariables; 718 DenseMap<Value, llvm::Value *> reductionVariableMap; 719 unsigned numReductions = loop.getNumReductionVars(); 720 privateReductionVariables.reserve(numReductions); 721 if (numReductions != 0) { 722 llvm::IRBuilderBase::InsertPointGuard guard(builder); 723 builder.restoreIP(allocaIP); 724 for (unsigned i = 0; i < numReductions; ++i) { 725 auto reductionType = 726 loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>(); 727 llvm::Value *var = builder.CreateAlloca( 728 moduleTranslation.convertType(reductionType.getElementType())); 729 privateReductionVariables.push_back(var); 730 reductionVariableMap.try_emplace(loop.reduction_vars()[i], var); 731 } 732 } 733 734 // Store the mapping between reduction variables and their private copies on 735 // ModuleTranslation stack. It can be then recovered when translating 736 // omp.reduce operations in a separate call. 737 LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard( 738 moduleTranslation, reductionVariableMap); 739 740 // Before the loop, store the initial values of reductions into reduction 741 // variables. Although this could be done after allocas, we don't want to mess 742 // up with the alloca insertion point. 743 for (unsigned i = 0; i < numReductions; ++i) { 744 SmallVector<llvm::Value *> phis; 745 if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(), 746 "omp.reduction.neutral", builder, 747 moduleTranslation, &phis))) 748 return failure(); 749 assert(phis.size() == 1 && "expected one value to be yielded from the " 750 "reduction neutral element declaration region"); 751 builder.CreateStore(phis[0], privateReductionVariables[i]); 752 } 753 754 // Set up the source location value for OpenMP runtime. 755 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 756 757 // Generator of the canonical loop body. 758 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 759 // relying on captured variables. 760 SmallVector<llvm::CanonicalLoopInfo *> loopInfos; 761 SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints; 762 LogicalResult bodyGenStatus = success(); 763 auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { 764 // Make sure further conversions know about the induction variable. 765 moduleTranslation.mapValue( 766 loop.getRegion().front().getArgument(loopInfos.size()), iv); 767 768 // Capture the body insertion point for use in nested loops. BodyIP of the 769 // CanonicalLoopInfo always points to the beginning of the entry block of 770 // the body. 771 bodyInsertPoints.push_back(ip); 772 773 if (loopInfos.size() != loop.getNumLoops() - 1) 774 return; 775 776 // Convert the body of the loop. 777 builder.restoreIP(ip); 778 convertOmpOpRegions(loop.region(), "omp.wsloop.region", builder, 779 moduleTranslation, bodyGenStatus); 780 }; 781 782 // Delegate actual loop construction to the OpenMP IRBuilder. 783 // TODO: this currently assumes WsLoop is semantically similar to SCF loop, 784 // i.e. it has a positive step, uses signed integer semantics. Reconsider 785 // this code when WsLoop clearly supports more cases. 786 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 787 for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) { 788 llvm::Value *lowerBound = 789 moduleTranslation.lookupValue(loop.lowerBound()[i]); 790 llvm::Value *upperBound = 791 moduleTranslation.lookupValue(loop.upperBound()[i]); 792 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]); 793 794 // Make sure loop trip count are emitted in the preheader of the outermost 795 // loop at the latest so that they are all available for the new collapsed 796 // loop will be created below. 797 llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; 798 llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; 799 if (i != 0) { 800 loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back()); 801 computeIP = loopInfos.front()->getPreheaderIP(); 802 } 803 loopInfos.push_back(ompBuilder->createCanonicalLoop( 804 loc, bodyGen, lowerBound, upperBound, step, 805 /*IsSigned=*/true, loop.inclusive(), computeIP)); 806 807 if (failed(bodyGenStatus)) 808 return failure(); 809 } 810 811 // Collapse loops. Store the insertion point because LoopInfos may get 812 // invalidated. 813 llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); 814 llvm::CanonicalLoopInfo *loopInfo = 815 ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); 816 817 allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 818 819 // TODO: Handle doacross loops when the ordered clause has a parameter. 820 bool isOrdered = loop.ordered_val().hasValue(); 821 Optional<omp::ScheduleModifier> scheduleModifier = loop.schedule_modifier(); 822 bool isSimd = loop.simd_modifier(); 823 824 ompBuilder->applyWorkshareLoop( 825 ompLoc.DL, loopInfo, allocaIP, !loop.nowait(), 826 convertToScheduleKind(schedule), chunk, isSimd, 827 scheduleModifier == omp::ScheduleModifier::monotonic, 828 scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered); 829 830 // Continue building IR after the loop. Note that the LoopInfo returned by 831 // `collapseLoops` points inside the outermost loop and is intended for 832 // potential further loop transformations. Use the insertion point stored 833 // before collapsing loops instead. 834 builder.restoreIP(afterIP); 835 836 // Process the reductions if required. 837 if (numReductions == 0) 838 return success(); 839 840 // Create the reduction generators. We need to own them here because 841 // ReductionInfo only accepts references to the generators. 842 SmallVector<OwningReductionGen> owningReductionGens; 843 SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens; 844 for (unsigned i = 0; i < numReductions; ++i) { 845 owningReductionGens.push_back( 846 makeReductionGen(reductionDecls[i], builder, moduleTranslation)); 847 owningAtomicReductionGens.push_back( 848 makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation)); 849 } 850 851 // Collect the reduction information. 852 SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos; 853 reductionInfos.reserve(numReductions); 854 for (unsigned i = 0; i < numReductions; ++i) { 855 llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr; 856 if (owningAtomicReductionGens[i]) 857 atomicGen = owningAtomicReductionGens[i]; 858 auto reductionType = 859 loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>(); 860 llvm::Value *variable = 861 moduleTranslation.lookupValue(loop.reduction_vars()[i]); 862 reductionInfos.push_back( 863 {moduleTranslation.convertType(reductionType.getElementType()), 864 variable, privateReductionVariables[i], owningReductionGens[i], 865 atomicGen}); 866 } 867 868 // The call to createReductions below expects the block to have a 869 // terminator. Create an unreachable instruction to serve as terminator 870 // and remove it later. 871 llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); 872 builder.SetInsertPoint(tempTerminator); 873 llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = 874 ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, 875 loop.nowait()); 876 if (!contInsertPoint.getBlock()) 877 return loop->emitOpError() << "failed to convert reductions"; 878 auto nextInsertionPoint = 879 ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for); 880 tempTerminator->eraseFromParent(); 881 builder.restoreIP(nextInsertionPoint); 882 883 return success(); 884 } 885 886 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder. 887 static LogicalResult 888 convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder, 889 LLVM::ModuleTranslation &moduleTranslation) { 890 auto loop = cast<omp::SimdLoopOp>(opInst); 891 892 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 893 894 // Generator of the canonical loop body. 895 // TODO: support error propagation in OpenMPIRBuilder and use it instead of 896 // relying on captured variables. 897 SmallVector<llvm::CanonicalLoopInfo *> loopInfos; 898 SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints; 899 LogicalResult bodyGenStatus = success(); 900 auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { 901 // Make sure further conversions know about the induction variable. 902 moduleTranslation.mapValue( 903 loop.getRegion().front().getArgument(loopInfos.size()), iv); 904 905 // Capture the body insertion point for use in nested loops. BodyIP of the 906 // CanonicalLoopInfo always points to the beginning of the entry block of 907 // the body. 908 bodyInsertPoints.push_back(ip); 909 910 if (loopInfos.size() != loop.getNumLoops() - 1) 911 return; 912 913 // Convert the body of the loop. 914 builder.restoreIP(ip); 915 convertOmpOpRegions(loop.region(), "omp.simdloop.region", builder, 916 moduleTranslation, bodyGenStatus); 917 }; 918 919 // Delegate actual loop construction to the OpenMP IRBuilder. 920 // TODO: this currently assumes SimdLoop is semantically similar to SCF loop, 921 // i.e. it has a positive step, uses signed integer semantics. Reconsider 922 // this code when SimdLoop clearly supports more cases. 923 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 924 for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) { 925 llvm::Value *lowerBound = 926 moduleTranslation.lookupValue(loop.lowerBound()[i]); 927 llvm::Value *upperBound = 928 moduleTranslation.lookupValue(loop.upperBound()[i]); 929 llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]); 930 931 // Make sure loop trip count are emitted in the preheader of the outermost 932 // loop at the latest so that they are all available for the new collapsed 933 // loop will be created below. 934 llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; 935 llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; 936 if (i != 0) { 937 loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(), 938 ompLoc.DL); 939 computeIP = loopInfos.front()->getPreheaderIP(); 940 } 941 loopInfos.push_back(ompBuilder->createCanonicalLoop( 942 loc, bodyGen, lowerBound, upperBound, step, 943 /*IsSigned=*/true, /*Inclusive=*/true, computeIP)); 944 945 if (failed(bodyGenStatus)) 946 return failure(); 947 } 948 949 // Collapse loops. 950 llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); 951 llvm::CanonicalLoopInfo *loopInfo = 952 ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); 953 954 ompBuilder->applySimd(ompLoc.DL, loopInfo); 955 956 builder.restoreIP(afterIP); 957 return success(); 958 } 959 960 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. 961 llvm::AtomicOrdering 962 convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) { 963 if (!ao) 964 return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering 965 966 switch (*ao) { 967 case omp::ClauseMemoryOrderKind::Seq_cst: 968 return llvm::AtomicOrdering::SequentiallyConsistent; 969 case omp::ClauseMemoryOrderKind::Acq_rel: 970 return llvm::AtomicOrdering::AcquireRelease; 971 case omp::ClauseMemoryOrderKind::Acquire: 972 return llvm::AtomicOrdering::Acquire; 973 case omp::ClauseMemoryOrderKind::Release: 974 return llvm::AtomicOrdering::Release; 975 case omp::ClauseMemoryOrderKind::Relaxed: 976 return llvm::AtomicOrdering::Monotonic; 977 } 978 llvm_unreachable("Unknown ClauseMemoryOrderKind kind"); 979 } 980 981 /// Convert omp.atomic.read operation to LLVM IR. 982 static LogicalResult 983 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, 984 LLVM::ModuleTranslation &moduleTranslation) { 985 986 auto readOp = cast<omp::AtomicReadOp>(opInst); 987 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 988 989 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 990 991 llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.memory_order_val()); 992 llvm::Value *x = moduleTranslation.lookupValue(readOp.x()); 993 Type xTy = readOp.x().getType().cast<omp::PointerLikeType>().getElementType(); 994 llvm::Value *v = moduleTranslation.lookupValue(readOp.v()); 995 Type vTy = readOp.v().getType().cast<omp::PointerLikeType>().getElementType(); 996 llvm::OpenMPIRBuilder::AtomicOpValue V = { 997 v, moduleTranslation.convertType(vTy), false, false}; 998 llvm::OpenMPIRBuilder::AtomicOpValue X = { 999 x, moduleTranslation.convertType(xTy), false, false}; 1000 builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO)); 1001 return success(); 1002 } 1003 1004 /// Converts an omp.atomic.write operation to LLVM IR. 1005 static LogicalResult 1006 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, 1007 LLVM::ModuleTranslation &moduleTranslation) { 1008 auto writeOp = cast<omp::AtomicWriteOp>(opInst); 1009 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1010 1011 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1012 llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.memory_order_val()); 1013 llvm::Value *expr = moduleTranslation.lookupValue(writeOp.value()); 1014 llvm::Value *dest = moduleTranslation.lookupValue(writeOp.address()); 1015 llvm::Type *ty = moduleTranslation.convertType(writeOp.value().getType()); 1016 llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false, 1017 /*isVolatile=*/false}; 1018 builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao)); 1019 return success(); 1020 } 1021 1022 /// Converts an LLVM dialect binary operation to the corresponding enum value 1023 /// for `atomicrmw` supported binary operation. 1024 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) { 1025 return llvm::TypeSwitch<Operation *, llvm::AtomicRMWInst::BinOp>(&op) 1026 .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; }) 1027 .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; }) 1028 .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; }) 1029 .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; }) 1030 .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; }) 1031 .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; }) 1032 .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; }) 1033 .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; }) 1034 .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; }) 1035 .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP); 1036 } 1037 1038 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder. 1039 static LogicalResult 1040 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, 1041 llvm::IRBuilderBase &builder, 1042 LLVM::ModuleTranslation &moduleTranslation) { 1043 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1044 1045 // Convert values and types. 1046 auto &innerOpList = opInst.region().front().getOperations(); 1047 if (innerOpList.size() != 2) 1048 return opInst.emitError("exactly two operations are allowed inside an " 1049 "atomic update region while lowering to LLVM IR"); 1050 1051 Operation &innerUpdateOp = innerOpList.front(); 1052 1053 if (innerUpdateOp.getNumOperands() != 2 || 1054 !llvm::is_contained(innerUpdateOp.getOperands(), 1055 opInst.getRegion().getArgument(0))) 1056 return opInst.emitError( 1057 "the update operation inside the region must be a binary operation and " 1058 "that update operation must have the region argument as an operand"); 1059 1060 llvm::AtomicRMWInst::BinOp binop = convertBinOpToAtomic(innerUpdateOp); 1061 1062 bool isXBinopExpr = 1063 innerUpdateOp.getNumOperands() > 0 && 1064 innerUpdateOp.getOperand(0) == opInst.getRegion().getArgument(0); 1065 1066 mlir::Value mlirExpr = (isXBinopExpr ? innerUpdateOp.getOperand(1) 1067 : innerUpdateOp.getOperand(0)); 1068 llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr); 1069 llvm::Value *llvmX = moduleTranslation.lookupValue(opInst.x()); 1070 LLVM::LLVMPointerType mlirXType = 1071 opInst.x().getType().cast<LLVM::LLVMPointerType>(); 1072 llvm::Type *llvmXElementType = 1073 moduleTranslation.convertType(mlirXType.getElementType()); 1074 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType, 1075 /*isSigned=*/false, 1076 /*isVolatile=*/false}; 1077 1078 llvm::AtomicOrdering atomicOrdering = 1079 convertAtomicOrdering(opInst.memory_order_val()); 1080 1081 // Generate update code. 1082 LogicalResult updateGenStatus = success(); 1083 auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus]( 1084 llvm::Value *atomicx, 1085 llvm::IRBuilder<> &builder) -> llvm::Value * { 1086 Block &bb = *opInst.region().begin(); 1087 moduleTranslation.mapValue(*opInst.region().args_begin(), atomicx); 1088 moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); 1089 if (failed(moduleTranslation.convertBlock(bb, true, builder))) { 1090 updateGenStatus = (opInst.emitError() 1091 << "unable to convert update operation to llvm IR"); 1092 return nullptr; 1093 } 1094 omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator()); 1095 assert(yieldop && yieldop.results().size() == 1 && 1096 "terminator must be omp.yield op and it must have exactly one " 1097 "argument"); 1098 return moduleTranslation.lookupValue(yieldop.results()[0]); 1099 }; 1100 1101 // Handle ambiguous alloca, if any. 1102 auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 1103 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1104 builder.restoreIP(ompBuilder->createAtomicUpdate( 1105 ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop, updateFn, 1106 isXBinopExpr)); 1107 return updateGenStatus; 1108 } 1109 1110 static LogicalResult 1111 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, 1112 llvm::IRBuilderBase &builder, 1113 LLVM::ModuleTranslation &moduleTranslation) { 1114 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1115 mlir::Value mlirExpr; 1116 bool isXBinopExpr = false, isPostfixUpdate = false; 1117 llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP; 1118 1119 omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp(); 1120 omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp(); 1121 1122 assert((atomicUpdateOp || atomicWriteOp) && 1123 "internal op must be an atomic.update or atomic.write op"); 1124 1125 if (atomicWriteOp) { 1126 isPostfixUpdate = true; 1127 mlirExpr = atomicWriteOp.value(); 1128 } else { 1129 isPostfixUpdate = atomicCaptureOp.getSecondOp() == 1130 atomicCaptureOp.getAtomicUpdateOp().getOperation(); 1131 auto &innerOpList = atomicUpdateOp.region().front().getOperations(); 1132 if (innerOpList.size() != 2) 1133 return atomicUpdateOp.emitError( 1134 "exactly two operations are allowed inside an " 1135 "atomic update region while lowering to LLVM IR"); 1136 Operation *innerUpdateOp = atomicUpdateOp.getFirstOp(); 1137 if (innerUpdateOp->getNumOperands() != 2 || 1138 !llvm::is_contained(innerUpdateOp->getOperands(), 1139 atomicUpdateOp.getRegion().getArgument(0))) 1140 return atomicUpdateOp.emitError( 1141 "the update operation inside the region must be a binary operation " 1142 "and that update operation must have the region argument as an " 1143 "operand"); 1144 binop = convertBinOpToAtomic(*innerUpdateOp); 1145 1146 isXBinopExpr = innerUpdateOp->getOperand(0) == 1147 atomicUpdateOp.getRegion().getArgument(0); 1148 1149 mlirExpr = (isXBinopExpr ? innerUpdateOp->getOperand(1) 1150 : innerUpdateOp->getOperand(0)); 1151 } 1152 1153 llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr); 1154 llvm::Value *llvmX = 1155 moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().x()); 1156 llvm::Value *llvmV = 1157 moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().v()); 1158 auto mlirXType = atomicCaptureOp.getAtomicReadOp() 1159 .x() 1160 .getType() 1161 .cast<LLVM::LLVMPointerType>(); 1162 llvm::Type *llvmXElementType = 1163 moduleTranslation.convertType(mlirXType.getElementType()); 1164 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType, 1165 /*isSigned=*/false, 1166 /*isVolatile=*/false}; 1167 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType, 1168 /*isSigned=*/false, 1169 /*isVolatile=*/false}; 1170 1171 llvm::AtomicOrdering atomicOrdering = 1172 convertAtomicOrdering(atomicCaptureOp.memory_order_val()); 1173 1174 LogicalResult updateGenStatus = success(); 1175 auto updateFn = [&](llvm::Value *atomicx, 1176 llvm::IRBuilder<> &builder) -> llvm::Value * { 1177 if (atomicWriteOp) 1178 return moduleTranslation.lookupValue(atomicWriteOp.value()); 1179 Block &bb = *atomicUpdateOp.region().begin(); 1180 moduleTranslation.mapValue(*atomicUpdateOp.region().args_begin(), atomicx); 1181 moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); 1182 if (failed(moduleTranslation.convertBlock(bb, true, builder))) { 1183 updateGenStatus = (atomicUpdateOp.emitError() 1184 << "unable to convert update operation to llvm IR"); 1185 return nullptr; 1186 } 1187 omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator()); 1188 assert(yieldop && yieldop.results().size() == 1 && 1189 "terminator must be omp.yield op and it must have exactly one " 1190 "argument"); 1191 return moduleTranslation.lookupValue(yieldop.results()[0]); 1192 }; 1193 1194 // Handle ambiguous alloca, if any. 1195 auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); 1196 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1197 builder.restoreIP(ompBuilder->createAtomicCapture( 1198 ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering, 1199 binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr)); 1200 return updateGenStatus; 1201 } 1202 1203 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the 1204 /// mapping between reduction variables and their private equivalents to have 1205 /// been stored on the ModuleTranslation stack. Currently only supports 1206 /// reduction within WsLoopOp, but can be easily extended. 1207 static LogicalResult 1208 convertOmpReductionOp(omp::ReductionOp reductionOp, 1209 llvm::IRBuilderBase &builder, 1210 LLVM::ModuleTranslation &moduleTranslation) { 1211 // Find the declaration that corresponds to the reduction op. 1212 auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>(); 1213 omp::ReductionDeclareOp declaration = 1214 findReductionDecl(reductionContainer, reductionOp); 1215 assert(declaration && "could not find reduction declaration"); 1216 1217 // Retrieve the mapping between reduction variables and their private 1218 // equivalents. 1219 const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr; 1220 moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>( 1221 [&](const OpenMPVarMappingStackFrame &frame) { 1222 reductionVariableMap = &frame.mapping; 1223 return WalkResult::interrupt(); 1224 }); 1225 assert(reductionVariableMap && "couldn't find private reduction variables"); 1226 1227 // Translate the reduction operation by emitting the body of the corresponding 1228 // reduction declaration. 1229 Region &reductionRegion = declaration.reductionRegion(); 1230 llvm::Value *privateReductionVar = 1231 reductionVariableMap->lookup(reductionOp.accumulator()); 1232 llvm::Value *reductionVal = builder.CreateLoad( 1233 moduleTranslation.convertType(reductionOp.operand().getType()), 1234 privateReductionVar); 1235 1236 moduleTranslation.mapValue(reductionRegion.front().getArgument(0), 1237 reductionVal); 1238 moduleTranslation.mapValue( 1239 reductionRegion.front().getArgument(1), 1240 moduleTranslation.lookupValue(reductionOp.operand())); 1241 1242 SmallVector<llvm::Value *> phis; 1243 if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body", 1244 builder, moduleTranslation, &phis))) 1245 return failure(); 1246 assert(phis.size() == 1 && "expected one value to be yielded from " 1247 "the reduction body declaration region"); 1248 builder.CreateStore(phis[0], privateReductionVar); 1249 return success(); 1250 } 1251 1252 /// Converts an OpenMP Threadprivate operation into LLVM IR using 1253 /// OpenMPIRBuilder. 1254 static LogicalResult 1255 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, 1256 LLVM::ModuleTranslation &moduleTranslation) { 1257 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); 1258 auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst); 1259 1260 Value symAddr = threadprivateOp.sym_addr(); 1261 auto *symOp = symAddr.getDefiningOp(); 1262 if (!isa<LLVM::AddressOfOp>(symOp)) 1263 return opInst.emitError("Addressing symbol not found"); 1264 LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp); 1265 1266 LLVM::GlobalOp global = addressOfOp.getGlobal(); 1267 llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global); 1268 llvm::Value *data = 1269 builder.CreateBitCast(globalValue, builder.getInt8PtrTy()); 1270 llvm::Type *type = globalValue->getValueType(); 1271 llvm::TypeSize typeSize = 1272 builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize( 1273 type); 1274 llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedSize()); 1275 llvm::StringRef suffix = llvm::StringRef(".cache", 6); 1276 std::string cacheName = (Twine(global.getSymName()).concat(suffix)).str(); 1277 // Emit runtime function and bitcast its type (i8*) to real data type. 1278 llvm::Value *callInst = 1279 moduleTranslation.getOpenMPBuilder()->createCachedThreadPrivate( 1280 ompLoc, data, size, cacheName); 1281 llvm::Value *result = builder.CreateBitCast(callInst, globalValue->getType()); 1282 moduleTranslation.mapValue(opInst.getResult(0), result); 1283 return success(); 1284 } 1285 1286 namespace { 1287 1288 /// Implementation of the dialect interface that converts operations belonging 1289 /// to the OpenMP dialect to LLVM IR. 1290 class OpenMPDialectLLVMIRTranslationInterface 1291 : public LLVMTranslationDialectInterface { 1292 public: 1293 using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; 1294 1295 /// Translates the given operation to LLVM IR using the provided IR builder 1296 /// and saving the state in `moduleTranslation`. 1297 LogicalResult 1298 convertOperation(Operation *op, llvm::IRBuilderBase &builder, 1299 LLVM::ModuleTranslation &moduleTranslation) const final; 1300 }; 1301 1302 } // namespace 1303 1304 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR 1305 /// (including OpenMP runtime calls). 1306 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( 1307 Operation *op, llvm::IRBuilderBase &builder, 1308 LLVM::ModuleTranslation &moduleTranslation) const { 1309 1310 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); 1311 1312 return llvm::TypeSwitch<Operation *, LogicalResult>(op) 1313 .Case([&](omp::BarrierOp) { 1314 ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); 1315 return success(); 1316 }) 1317 .Case([&](omp::TaskwaitOp) { 1318 ompBuilder->createTaskwait(builder.saveIP()); 1319 return success(); 1320 }) 1321 .Case([&](omp::TaskyieldOp) { 1322 ompBuilder->createTaskyield(builder.saveIP()); 1323 return success(); 1324 }) 1325 .Case([&](omp::FlushOp) { 1326 // No support in Openmp runtime function (__kmpc_flush) to accept 1327 // the argument list. 1328 // OpenMP standard states the following: 1329 // "An implementation may implement a flush with a list by ignoring 1330 // the list, and treating it the same as a flush without a list." 1331 // 1332 // The argument list is discarded so that, flush with a list is treated 1333 // same as a flush without a list. 1334 ompBuilder->createFlush(builder.saveIP()); 1335 return success(); 1336 }) 1337 .Case([&](omp::ParallelOp op) { 1338 return convertOmpParallel(op, builder, moduleTranslation); 1339 }) 1340 .Case([&](omp::ReductionOp reductionOp) { 1341 return convertOmpReductionOp(reductionOp, builder, moduleTranslation); 1342 }) 1343 .Case([&](omp::MasterOp) { 1344 return convertOmpMaster(*op, builder, moduleTranslation); 1345 }) 1346 .Case([&](omp::CriticalOp) { 1347 return convertOmpCritical(*op, builder, moduleTranslation); 1348 }) 1349 .Case([&](omp::OrderedRegionOp) { 1350 return convertOmpOrderedRegion(*op, builder, moduleTranslation); 1351 }) 1352 .Case([&](omp::OrderedOp) { 1353 return convertOmpOrdered(*op, builder, moduleTranslation); 1354 }) 1355 .Case([&](omp::WsLoopOp) { 1356 return convertOmpWsLoop(*op, builder, moduleTranslation); 1357 }) 1358 .Case([&](omp::SimdLoopOp) { 1359 return convertOmpSimdLoop(*op, builder, moduleTranslation); 1360 }) 1361 .Case([&](omp::AtomicReadOp) { 1362 return convertOmpAtomicRead(*op, builder, moduleTranslation); 1363 }) 1364 .Case([&](omp::AtomicWriteOp) { 1365 return convertOmpAtomicWrite(*op, builder, moduleTranslation); 1366 }) 1367 .Case([&](omp::AtomicUpdateOp op) { 1368 return convertOmpAtomicUpdate(op, builder, moduleTranslation); 1369 }) 1370 .Case([&](omp::AtomicCaptureOp op) { 1371 return convertOmpAtomicCapture(op, builder, moduleTranslation); 1372 }) 1373 .Case([&](omp::SectionsOp) { 1374 return convertOmpSections(*op, builder, moduleTranslation); 1375 }) 1376 .Case([&](omp::SingleOp op) { 1377 return convertOmpSingle(op, builder, moduleTranslation); 1378 }) 1379 .Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp, 1380 omp::CriticalDeclareOp>([](auto op) { 1381 // `yield` and `terminator` can be just omitted. The block structure 1382 // was created in the region that handles their parent operation. 1383 // `reduction.declare` will be used by reductions and is not 1384 // converted directly, skip it. 1385 // `critical.declare` is only used to declare names of critical 1386 // sections which will be used by `critical` ops and hence can be 1387 // ignored for lowering. The OpenMP IRBuilder will create unique 1388 // name for critical section names. 1389 return success(); 1390 }) 1391 .Case([&](omp::ThreadprivateOp) { 1392 return convertOmpThreadprivate(*op, builder, moduleTranslation); 1393 }) 1394 .Default([&](Operation *inst) { 1395 return inst->emitError("unsupported OpenMP operation: ") 1396 << inst->getName(); 1397 }); 1398 } 1399 1400 void mlir::registerOpenMPDialectTranslation(DialectRegistry ®istry) { 1401 registry.insert<omp::OpenMPDialect>(); 1402 registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) { 1403 dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>(); 1404 }); 1405 } 1406 1407 void mlir::registerOpenMPDialectTranslation(MLIRContext &context) { 1408 DialectRegistry registry; 1409 registerOpenMPDialectTranslation(registry); 1410 context.appendDialectRegistry(registry); 1411 } 1412