1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtOpenMP.h" 20 using namespace clang; 21 using namespace CodeGen; 22 23 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 24 auto &C = getContext(); 25 llvm::Value *Size = nullptr; 26 auto SizeInChars = C.getTypeSizeInChars(Ty); 27 if (SizeInChars.isZero()) { 28 // getTypeSizeInChars() returns 0 for a VLA. 29 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 30 llvm::Value *ArraySize; 31 std::tie(ArraySize, Ty) = getVLASize(VAT); 32 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 33 } 34 SizeInChars = C.getTypeSizeInChars(Ty); 35 if (SizeInChars.isZero()) 36 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 37 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 38 } else 39 Size = CGM.getSize(SizeInChars); 40 return Size; 41 } 42 43 void CodeGenFunction::GenerateOpenMPCapturedVars( 44 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 45 const RecordDecl *RD = S.getCapturedRecordDecl(); 46 auto CurField = RD->field_begin(); 47 auto CurCap = S.captures().begin(); 48 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 49 E = S.capture_init_end(); 50 I != E; ++I, ++CurField, ++CurCap) { 51 if (CurField->hasCapturedVLAType()) { 52 auto VAT = CurField->getCapturedVLAType(); 53 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 54 CapturedVars.push_back(Val); 55 } else if (CurCap->capturesThis()) 56 CapturedVars.push_back(CXXThisValue); 57 else if (CurCap->capturesVariableByCopy()) 58 CapturedVars.push_back( 59 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal()); 60 else { 61 assert(CurCap->capturesVariable() && "Expected capture by reference."); 62 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 63 } 64 } 65 } 66 67 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 68 StringRef Name, LValue AddrLV, 69 bool isReferenceType = false) { 70 ASTContext &Ctx = CGF.getContext(); 71 72 auto *CastedPtr = CGF.EmitScalarConversion( 73 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 74 Ctx.getPointerType(DstType), SourceLocation()); 75 auto TmpAddr = 76 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 77 .getAddress(); 78 79 // If we are dealing with references we need to return the address of the 80 // reference instead of the reference of the value. 81 if (isReferenceType) { 82 QualType RefType = Ctx.getLValueReferenceType(DstType); 83 auto *RefVal = TmpAddr.getPointer(); 84 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 85 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 86 CGF.EmitScalarInit(RefVal, TmpLVal); 87 } 88 89 return TmpAddr; 90 } 91 92 llvm::Function * 93 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 94 assert( 95 CapturedStmtInfo && 96 "CapturedStmtInfo should be set when generating the captured function"); 97 const CapturedDecl *CD = S.getCapturedDecl(); 98 const RecordDecl *RD = S.getCapturedRecordDecl(); 99 assert(CD->hasBody() && "missing CapturedDecl body"); 100 101 // Build the argument list. 102 ASTContext &Ctx = CGM.getContext(); 103 FunctionArgList Args; 104 Args.append(CD->param_begin(), 105 std::next(CD->param_begin(), CD->getContextParamPosition())); 106 auto I = S.captures().begin(); 107 for (auto *FD : RD->fields()) { 108 QualType ArgType = FD->getType(); 109 IdentifierInfo *II = nullptr; 110 VarDecl *CapVar = nullptr; 111 112 // If this is a capture by copy and the type is not a pointer, the outlined 113 // function argument type should be uintptr and the value properly casted to 114 // uintptr. This is necessary given that the runtime library is only able to 115 // deal with pointers. We can pass in the same way the VLA type sizes to the 116 // outlined function. 117 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 118 I->capturesVariableArrayType()) 119 ArgType = Ctx.getUIntPtrType(); 120 121 if (I->capturesVariable() || I->capturesVariableByCopy()) { 122 CapVar = I->getCapturedVar(); 123 II = CapVar->getIdentifier(); 124 } else if (I->capturesThis()) 125 II = &getContext().Idents.get("this"); 126 else { 127 assert(I->capturesVariableArrayType()); 128 II = &getContext().Idents.get("vla"); 129 } 130 if (ArgType->isVariablyModifiedType()) 131 ArgType = getContext().getVariableArrayDecayedType(ArgType); 132 Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, 133 FD->getLocation(), II, ArgType)); 134 ++I; 135 } 136 Args.append( 137 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 138 CD->param_end()); 139 140 // Create the function declaration. 141 FunctionType::ExtInfo ExtInfo; 142 const CGFunctionInfo &FuncInfo = 143 CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo, 144 /*IsVariadic=*/false); 145 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 146 147 llvm::Function *F = llvm::Function::Create( 148 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 149 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 150 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 151 if (CD->isNothrow()) 152 F->addFnAttr(llvm::Attribute::NoUnwind); 153 154 // Generate the function. 155 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 156 CD->getBody()->getLocStart()); 157 unsigned Cnt = CD->getContextParamPosition(); 158 I = S.captures().begin(); 159 for (auto *FD : RD->fields()) { 160 // If we are capturing a pointer by copy we don't need to do anything, just 161 // use the value that we get from the arguments. 162 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 163 setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt])); 164 ++Cnt, ++I; 165 continue; 166 } 167 168 LValue ArgLVal = 169 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 170 AlignmentSource::Decl); 171 if (FD->hasCapturedVLAType()) { 172 LValue CastedArgLVal = 173 MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), 174 Args[Cnt]->getName(), ArgLVal), 175 FD->getType(), AlignmentSource::Decl); 176 auto *ExprArg = 177 EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); 178 auto VAT = FD->getCapturedVLAType(); 179 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 180 } else if (I->capturesVariable()) { 181 auto *Var = I->getCapturedVar(); 182 QualType VarTy = Var->getType(); 183 Address ArgAddr = ArgLVal.getAddress(); 184 if (!VarTy->isReferenceType()) { 185 ArgAddr = EmitLoadOfReference( 186 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 187 } 188 setAddrOfLocalVar( 189 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 190 } else if (I->capturesVariableByCopy()) { 191 assert(!FD->getType()->isAnyPointerType() && 192 "Not expecting a captured pointer."); 193 auto *Var = I->getCapturedVar(); 194 QualType VarTy = Var->getType(); 195 setAddrOfLocalVar(I->getCapturedVar(), 196 castValueFromUintptr(*this, FD->getType(), 197 Args[Cnt]->getName(), ArgLVal, 198 VarTy->isReferenceType())); 199 } else { 200 // If 'this' is captured, load it into CXXThisValue. 201 assert(I->capturesThis()); 202 CXXThisValue = 203 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 204 } 205 ++Cnt, ++I; 206 } 207 208 PGO.assignRegionCounters(GlobalDecl(CD), F); 209 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 210 FinishFunction(CD->getBodyRBrace()); 211 212 return F; 213 } 214 215 //===----------------------------------------------------------------------===// 216 // OpenMP Directive Emission 217 //===----------------------------------------------------------------------===// 218 void CodeGenFunction::EmitOMPAggregateAssign( 219 Address DestAddr, Address SrcAddr, QualType OriginalType, 220 const llvm::function_ref<void(Address, Address)> &CopyGen) { 221 // Perform element-by-element initialization. 222 QualType ElementTy; 223 224 // Drill down to the base element type on both arrays. 225 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 226 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 227 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 228 229 auto SrcBegin = SrcAddr.getPointer(); 230 auto DestBegin = DestAddr.getPointer(); 231 // Cast from pointer to array type to pointer to single element. 232 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 233 // The basic structure here is a while-do loop. 234 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 235 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 236 auto IsEmpty = 237 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 238 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 239 240 // Enter the loop body, making that address the current address. 241 auto EntryBB = Builder.GetInsertBlock(); 242 EmitBlock(BodyBB); 243 244 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 245 246 llvm::PHINode *SrcElementPHI = 247 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 248 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 249 Address SrcElementCurrent = 250 Address(SrcElementPHI, 251 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 252 253 llvm::PHINode *DestElementPHI = 254 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 255 DestElementPHI->addIncoming(DestBegin, EntryBB); 256 Address DestElementCurrent = 257 Address(DestElementPHI, 258 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 259 260 // Emit copy. 261 CopyGen(DestElementCurrent, SrcElementCurrent); 262 263 // Shift the address forward by one element. 264 auto DestElementNext = Builder.CreateConstGEP1_32( 265 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 266 auto SrcElementNext = Builder.CreateConstGEP1_32( 267 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 268 // Check whether we've reached the end. 269 auto Done = 270 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 271 Builder.CreateCondBr(Done, DoneBB, BodyBB); 272 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 273 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 274 275 // Done. 276 EmitBlock(DoneBB, /*IsFinished=*/true); 277 } 278 279 /// \brief Emit initialization of arrays of complex types. 280 /// \param DestAddr Address of the array. 281 /// \param Type Type of array. 282 /// \param Init Initial expression of array. 283 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 284 QualType Type, const Expr *Init) { 285 // Perform element-by-element initialization. 286 QualType ElementTy; 287 288 // Drill down to the base element type on both arrays. 289 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 290 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 291 DestAddr = 292 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 293 294 auto DestBegin = DestAddr.getPointer(); 295 // Cast from pointer to array type to pointer to single element. 296 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 297 // The basic structure here is a while-do loop. 298 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 299 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 300 auto IsEmpty = 301 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 302 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 303 304 // Enter the loop body, making that address the current address. 305 auto EntryBB = CGF.Builder.GetInsertBlock(); 306 CGF.EmitBlock(BodyBB); 307 308 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 309 310 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 311 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 312 DestElementPHI->addIncoming(DestBegin, EntryBB); 313 Address DestElementCurrent = 314 Address(DestElementPHI, 315 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 316 317 // Emit copy. 318 { 319 CodeGenFunction::RunCleanupsScope InitScope(CGF); 320 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 321 /*IsInitializer=*/false); 322 } 323 324 // Shift the address forward by one element. 325 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 326 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 327 // Check whether we've reached the end. 328 auto Done = 329 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 330 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 331 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 332 333 // Done. 334 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 335 } 336 337 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 338 Address SrcAddr, const VarDecl *DestVD, 339 const VarDecl *SrcVD, const Expr *Copy) { 340 if (OriginalType->isArrayType()) { 341 auto *BO = dyn_cast<BinaryOperator>(Copy); 342 if (BO && BO->getOpcode() == BO_Assign) { 343 // Perform simple memcpy for simple copying. 344 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 345 } else { 346 // For arrays with complex element types perform element by element 347 // copying. 348 EmitOMPAggregateAssign( 349 DestAddr, SrcAddr, OriginalType, 350 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 351 // Working with the single array element, so have to remap 352 // destination and source variables to corresponding array 353 // elements. 354 CodeGenFunction::OMPPrivateScope Remap(*this); 355 Remap.addPrivate(DestVD, [DestElement]() -> Address { 356 return DestElement; 357 }); 358 Remap.addPrivate( 359 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 360 (void)Remap.Privatize(); 361 EmitIgnoredExpr(Copy); 362 }); 363 } 364 } else { 365 // Remap pseudo source variable to private copy. 366 CodeGenFunction::OMPPrivateScope Remap(*this); 367 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 368 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 369 (void)Remap.Privatize(); 370 // Emit copying of the whole variable. 371 EmitIgnoredExpr(Copy); 372 } 373 } 374 375 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 376 OMPPrivateScope &PrivateScope) { 377 if (!HaveInsertPoint()) 378 return false; 379 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 380 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 381 auto IRef = C->varlist_begin(); 382 auto InitsRef = C->inits().begin(); 383 for (auto IInit : C->private_copies()) { 384 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 385 if (EmittedAsFirstprivate.count(OrigVD) == 0) { 386 EmittedAsFirstprivate.insert(OrigVD); 387 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 388 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 389 bool IsRegistered; 390 DeclRefExpr DRE( 391 const_cast<VarDecl *>(OrigVD), 392 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 393 OrigVD) != nullptr, 394 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 395 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 396 QualType Type = OrigVD->getType(); 397 if (Type->isArrayType()) { 398 // Emit VarDecl with copy init for arrays. 399 // Get the address of the original variable captured in current 400 // captured region. 401 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 402 auto Emission = EmitAutoVarAlloca(*VD); 403 auto *Init = VD->getInit(); 404 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 405 // Perform simple memcpy. 406 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 407 Type); 408 } else { 409 EmitOMPAggregateAssign( 410 Emission.getAllocatedAddress(), OriginalAddr, Type, 411 [this, VDInit, Init](Address DestElement, 412 Address SrcElement) { 413 // Clean up any temporaries needed by the initialization. 414 RunCleanupsScope InitScope(*this); 415 // Emit initialization for single element. 416 setAddrOfLocalVar(VDInit, SrcElement); 417 EmitAnyExprToMem(Init, DestElement, 418 Init->getType().getQualifiers(), 419 /*IsInitializer*/ false); 420 LocalDeclMap.erase(VDInit); 421 }); 422 } 423 EmitAutoVarCleanups(Emission); 424 return Emission.getAllocatedAddress(); 425 }); 426 } else { 427 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 428 // Emit private VarDecl with copy init. 429 // Remap temp VDInit variable to the address of the original 430 // variable 431 // (for proper handling of captured global variables). 432 setAddrOfLocalVar(VDInit, OriginalAddr); 433 EmitDecl(*VD); 434 LocalDeclMap.erase(VDInit); 435 return GetAddrOfLocalVar(VD); 436 }); 437 } 438 assert(IsRegistered && 439 "firstprivate var already registered as private"); 440 // Silence the warning about unused variable. 441 (void)IsRegistered; 442 } 443 ++IRef, ++InitsRef; 444 } 445 } 446 return !EmittedAsFirstprivate.empty(); 447 } 448 449 void CodeGenFunction::EmitOMPPrivateClause( 450 const OMPExecutableDirective &D, 451 CodeGenFunction::OMPPrivateScope &PrivateScope) { 452 if (!HaveInsertPoint()) 453 return; 454 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 455 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 456 auto IRef = C->varlist_begin(); 457 for (auto IInit : C->private_copies()) { 458 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 459 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 460 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 461 bool IsRegistered = 462 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 463 // Emit private VarDecl with copy init. 464 EmitDecl(*VD); 465 return GetAddrOfLocalVar(VD); 466 }); 467 assert(IsRegistered && "private var already registered as private"); 468 // Silence the warning about unused variable. 469 (void)IsRegistered; 470 } 471 ++IRef; 472 } 473 } 474 } 475 476 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 477 if (!HaveInsertPoint()) 478 return false; 479 // threadprivate_var1 = master_threadprivate_var1; 480 // operator=(threadprivate_var2, master_threadprivate_var2); 481 // ... 482 // __kmpc_barrier(&loc, global_tid); 483 llvm::DenseSet<const VarDecl *> CopiedVars; 484 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 485 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 486 auto IRef = C->varlist_begin(); 487 auto ISrcRef = C->source_exprs().begin(); 488 auto IDestRef = C->destination_exprs().begin(); 489 for (auto *AssignOp : C->assignment_ops()) { 490 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 491 QualType Type = VD->getType(); 492 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 493 494 // Get the address of the master variable. If we are emitting code with 495 // TLS support, the address is passed from the master as field in the 496 // captured declaration. 497 Address MasterAddr = Address::invalid(); 498 if (getLangOpts().OpenMPUseTLS && 499 getContext().getTargetInfo().isTLSSupported()) { 500 assert(CapturedStmtInfo->lookup(VD) && 501 "Copyin threadprivates should have been captured!"); 502 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 503 VK_LValue, (*IRef)->getExprLoc()); 504 MasterAddr = EmitLValue(&DRE).getAddress(); 505 LocalDeclMap.erase(VD); 506 } else { 507 MasterAddr = 508 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 509 : CGM.GetAddrOfGlobal(VD), 510 getContext().getDeclAlign(VD)); 511 } 512 // Get the address of the threadprivate variable. 513 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 514 if (CopiedVars.size() == 1) { 515 // At first check if current thread is a master thread. If it is, no 516 // need to copy data. 517 CopyBegin = createBasicBlock("copyin.not.master"); 518 CopyEnd = createBasicBlock("copyin.not.master.end"); 519 Builder.CreateCondBr( 520 Builder.CreateICmpNE( 521 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 522 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 523 CopyBegin, CopyEnd); 524 EmitBlock(CopyBegin); 525 } 526 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 527 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 528 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 529 } 530 ++IRef; 531 ++ISrcRef; 532 ++IDestRef; 533 } 534 } 535 if (CopyEnd) { 536 // Exit out of copying procedure for non-master thread. 537 EmitBlock(CopyEnd, /*IsFinished=*/true); 538 return true; 539 } 540 return false; 541 } 542 543 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 544 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 545 if (!HaveInsertPoint()) 546 return false; 547 bool HasAtLeastOneLastprivate = false; 548 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 549 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 550 HasAtLeastOneLastprivate = true; 551 auto IRef = C->varlist_begin(); 552 auto IDestRef = C->destination_exprs().begin(); 553 for (auto *IInit : C->private_copies()) { 554 // Keep the address of the original variable for future update at the end 555 // of the loop. 556 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 557 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 558 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 559 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 560 DeclRefExpr DRE( 561 const_cast<VarDecl *>(OrigVD), 562 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 563 OrigVD) != nullptr, 564 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 565 return EmitLValue(&DRE).getAddress(); 566 }); 567 // Check if the variable is also a firstprivate: in this case IInit is 568 // not generated. Initialization of this variable will happen in codegen 569 // for 'firstprivate' clause. 570 if (IInit) { 571 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 572 bool IsRegistered = 573 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 574 // Emit private VarDecl with copy init. 575 EmitDecl(*VD); 576 return GetAddrOfLocalVar(VD); 577 }); 578 assert(IsRegistered && 579 "lastprivate var already registered as private"); 580 (void)IsRegistered; 581 } 582 } 583 ++IRef, ++IDestRef; 584 } 585 } 586 return HasAtLeastOneLastprivate; 587 } 588 589 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 590 const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) { 591 if (!HaveInsertPoint()) 592 return; 593 // Emit following code: 594 // if (<IsLastIterCond>) { 595 // orig_var1 = private_orig_var1; 596 // ... 597 // orig_varn = private_orig_varn; 598 // } 599 llvm::BasicBlock *ThenBB = nullptr; 600 llvm::BasicBlock *DoneBB = nullptr; 601 if (IsLastIterCond) { 602 ThenBB = createBasicBlock(".omp.lastprivate.then"); 603 DoneBB = createBasicBlock(".omp.lastprivate.done"); 604 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 605 EmitBlock(ThenBB); 606 } 607 llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates; 608 const Expr *LastIterVal = nullptr; 609 const Expr *IVExpr = nullptr; 610 const Expr *IncExpr = nullptr; 611 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 612 if (isOpenMPWorksharingDirective(D.getDirectiveKind())) { 613 LastIterVal = cast<VarDecl>(cast<DeclRefExpr>( 614 LoopDirective->getUpperBoundVariable()) 615 ->getDecl()) 616 ->getAnyInitializer(); 617 IVExpr = LoopDirective->getIterationVariable(); 618 IncExpr = LoopDirective->getInc(); 619 auto IUpdate = LoopDirective->updates().begin(); 620 for (auto *E : LoopDirective->counters()) { 621 auto *D = cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl(); 622 LoopCountersAndUpdates[D] = *IUpdate; 623 ++IUpdate; 624 } 625 } 626 } 627 { 628 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 629 bool FirstLCV = true; 630 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 631 auto IRef = C->varlist_begin(); 632 auto ISrcRef = C->source_exprs().begin(); 633 auto IDestRef = C->destination_exprs().begin(); 634 for (auto *AssignOp : C->assignment_ops()) { 635 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 636 QualType Type = PrivateVD->getType(); 637 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 638 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 639 // If lastprivate variable is a loop control variable for loop-based 640 // directive, update its value before copyin back to original 641 // variable. 642 if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) { 643 if (FirstLCV && LastIterVal) { 644 EmitAnyExprToMem(LastIterVal, EmitLValue(IVExpr).getAddress(), 645 IVExpr->getType().getQualifiers(), 646 /*IsInitializer=*/false); 647 EmitIgnoredExpr(IncExpr); 648 FirstLCV = false; 649 } 650 EmitIgnoredExpr(UpExpr); 651 } 652 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 653 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 654 // Get the address of the original variable. 655 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 656 // Get the address of the private variable. 657 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 658 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 659 PrivateAddr = 660 Address(Builder.CreateLoad(PrivateAddr), 661 getNaturalTypeAlignment(RefTy->getPointeeType())); 662 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 663 } 664 ++IRef; 665 ++ISrcRef; 666 ++IDestRef; 667 } 668 } 669 } 670 if (IsLastIterCond) { 671 EmitBlock(DoneBB, /*IsFinished=*/true); 672 } 673 } 674 675 void CodeGenFunction::EmitOMPReductionClauseInit( 676 const OMPExecutableDirective &D, 677 CodeGenFunction::OMPPrivateScope &PrivateScope) { 678 if (!HaveInsertPoint()) 679 return; 680 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 681 auto ILHS = C->lhs_exprs().begin(); 682 auto IRHS = C->rhs_exprs().begin(); 683 auto IPriv = C->privates().begin(); 684 for (auto IRef : C->varlists()) { 685 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 686 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 687 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 688 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { 689 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 690 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 691 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 692 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 693 Base = TempASE->getBase()->IgnoreParenImpCasts(); 694 auto *DE = cast<DeclRefExpr>(Base); 695 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 696 auto OASELValueLB = EmitOMPArraySectionExpr(OASE); 697 auto OASELValueUB = 698 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 699 auto OriginalBaseLValue = EmitLValue(DE); 700 auto BaseLValue = OriginalBaseLValue; 701 auto *Zero = Builder.getInt64(/*C=*/0); 702 llvm::SmallVector<llvm::Value *, 4> Indexes; 703 Indexes.push_back(Zero); 704 auto *ItemTy = 705 OASELValueLB.getPointer()->getType()->getPointerElementType(); 706 auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType(); 707 while (Ty != ItemTy) { 708 Indexes.push_back(Zero); 709 Ty = Ty->getPointerElementType(); 710 } 711 BaseLValue = MakeAddrLValue( 712 Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes), 713 OASELValueLB.getAlignment()), 714 OASELValueLB.getType(), OASELValueLB.getAlignmentSource()); 715 // Store the address of the original variable associated with the LHS 716 // implicit variable. 717 PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { 718 return OASELValueLB.getAddress(); 719 }); 720 // Emit reduction copy. 721 bool IsRegistered = PrivateScope.addPrivate( 722 OrigVD, [this, PrivateVD, BaseLValue, OASELValueLB, OASELValueUB, 723 OriginalBaseLValue]() -> Address { 724 // Emit VarDecl with copy init for arrays. 725 // Get the address of the original variable captured in current 726 // captured region. 727 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), 728 OASELValueLB.getPointer()); 729 Size = Builder.CreateNUWAdd( 730 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 731 CodeGenFunction::OpaqueValueMapping OpaqueMap( 732 *this, cast<OpaqueValueExpr>( 733 getContext() 734 .getAsVariableArrayType(PrivateVD->getType()) 735 ->getSizeExpr()), 736 RValue::get(Size)); 737 EmitVariablyModifiedType(PrivateVD->getType()); 738 auto Emission = EmitAutoVarAlloca(*PrivateVD); 739 auto Addr = Emission.getAllocatedAddress(); 740 auto *Init = PrivateVD->getInit(); 741 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init); 742 EmitAutoVarCleanups(Emission); 743 // Emit private VarDecl with reduction init. 744 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 745 OASELValueLB.getPointer()); 746 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 747 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast( 748 Ptr, OriginalBaseLValue.getPointer()->getType()); 749 return Address(Ptr, OriginalBaseLValue.getAlignment()); 750 }); 751 assert(IsRegistered && "private var already registered as private"); 752 // Silence the warning about unused variable. 753 (void)IsRegistered; 754 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 755 return GetAddrOfLocalVar(PrivateVD); 756 }); 757 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { 758 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 759 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 760 Base = TempASE->getBase()->IgnoreParenImpCasts(); 761 auto *DE = cast<DeclRefExpr>(Base); 762 auto *OrigVD = cast<VarDecl>(DE->getDecl()); 763 auto ASELValue = EmitLValue(ASE); 764 auto OriginalBaseLValue = EmitLValue(DE); 765 auto BaseLValue = OriginalBaseLValue; 766 auto *Zero = Builder.getInt64(/*C=*/0); 767 llvm::SmallVector<llvm::Value *, 4> Indexes; 768 Indexes.push_back(Zero); 769 auto *ItemTy = 770 ASELValue.getPointer()->getType()->getPointerElementType(); 771 auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType(); 772 while (Ty != ItemTy) { 773 Indexes.push_back(Zero); 774 Ty = Ty->getPointerElementType(); 775 } 776 BaseLValue = MakeAddrLValue( 777 Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes), 778 ASELValue.getAlignment()), 779 ASELValue.getType(), ASELValue.getAlignmentSource()); 780 // Store the address of the original variable associated with the LHS 781 // implicit variable. 782 PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { 783 return ASELValue.getAddress(); 784 }); 785 // Emit reduction copy. 786 bool IsRegistered = PrivateScope.addPrivate( 787 OrigVD, [this, PrivateVD, BaseLValue, ASELValue, 788 OriginalBaseLValue]() -> Address { 789 // Emit private VarDecl with reduction init. 790 EmitDecl(*PrivateVD); 791 auto Addr = GetAddrOfLocalVar(PrivateVD); 792 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), 793 ASELValue.getPointer()); 794 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); 795 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast( 796 Ptr, OriginalBaseLValue.getPointer()->getType()); 797 return Address(Ptr, OriginalBaseLValue.getAlignment()); 798 }); 799 assert(IsRegistered && "private var already registered as private"); 800 // Silence the warning about unused variable. 801 (void)IsRegistered; 802 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 803 return Builder.CreateElementBitCast( 804 GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), 805 "rhs.begin"); 806 }); 807 } else { 808 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 809 QualType Type = PrivateVD->getType(); 810 if (getContext().getAsArrayType(Type)) { 811 // Store the address of the original variable associated with the LHS 812 // implicit variable. 813 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 814 CapturedStmtInfo->lookup(OrigVD) != nullptr, 815 IRef->getType(), VK_LValue, IRef->getExprLoc()); 816 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 817 PrivateScope.addPrivate(LHSVD, [this, OriginalAddr, 818 LHSVD]() -> Address { 819 return Builder.CreateElementBitCast( 820 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), 821 "lhs.begin"); 822 }); 823 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 824 if (Type->isVariablyModifiedType()) { 825 CodeGenFunction::OpaqueValueMapping OpaqueMap( 826 *this, cast<OpaqueValueExpr>( 827 getContext() 828 .getAsVariableArrayType(PrivateVD->getType()) 829 ->getSizeExpr()), 830 RValue::get( 831 getTypeSize(OrigVD->getType().getNonReferenceType()))); 832 EmitVariablyModifiedType(Type); 833 } 834 auto Emission = EmitAutoVarAlloca(*PrivateVD); 835 auto Addr = Emission.getAllocatedAddress(); 836 auto *Init = PrivateVD->getInit(); 837 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init); 838 EmitAutoVarCleanups(Emission); 839 return Emission.getAllocatedAddress(); 840 }); 841 assert(IsRegistered && "private var already registered as private"); 842 // Silence the warning about unused variable. 843 (void)IsRegistered; 844 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 845 return Builder.CreateElementBitCast( 846 GetAddrOfLocalVar(PrivateVD), 847 ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); 848 }); 849 } else { 850 // Store the address of the original variable associated with the LHS 851 // implicit variable. 852 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address { 853 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 854 CapturedStmtInfo->lookup(OrigVD) != nullptr, 855 IRef->getType(), VK_LValue, IRef->getExprLoc()); 856 return EmitLValue(&DRE).getAddress(); 857 }); 858 // Emit reduction copy. 859 bool IsRegistered = 860 PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address { 861 // Emit private VarDecl with reduction init. 862 EmitDecl(*PrivateVD); 863 return GetAddrOfLocalVar(PrivateVD); 864 }); 865 assert(IsRegistered && "private var already registered as private"); 866 // Silence the warning about unused variable. 867 (void)IsRegistered; 868 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 869 return GetAddrOfLocalVar(PrivateVD); 870 }); 871 } 872 } 873 ++ILHS, ++IRHS, ++IPriv; 874 } 875 } 876 } 877 878 void CodeGenFunction::EmitOMPReductionClauseFinal( 879 const OMPExecutableDirective &D) { 880 if (!HaveInsertPoint()) 881 return; 882 llvm::SmallVector<const Expr *, 8> Privates; 883 llvm::SmallVector<const Expr *, 8> LHSExprs; 884 llvm::SmallVector<const Expr *, 8> RHSExprs; 885 llvm::SmallVector<const Expr *, 8> ReductionOps; 886 bool HasAtLeastOneReduction = false; 887 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 888 HasAtLeastOneReduction = true; 889 Privates.append(C->privates().begin(), C->privates().end()); 890 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 891 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 892 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 893 } 894 if (HasAtLeastOneReduction) { 895 // Emit nowait reduction if nowait clause is present or directive is a 896 // parallel directive (it always has implicit barrier). 897 CGM.getOpenMPRuntime().emitReduction( 898 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 899 D.getSingleClause<OMPNowaitClause>() || 900 isOpenMPParallelDirective(D.getDirectiveKind()) || 901 D.getDirectiveKind() == OMPD_simd, 902 D.getDirectiveKind() == OMPD_simd); 903 } 904 } 905 906 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 907 const OMPExecutableDirective &S, 908 OpenMPDirectiveKind InnermostKind, 909 const RegionCodeGenTy &CodeGen) { 910 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 911 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 912 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 913 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 914 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 915 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 916 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 917 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 918 /*IgnoreResultAssign*/ true); 919 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 920 CGF, NumThreads, NumThreadsClause->getLocStart()); 921 } 922 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 923 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 924 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 925 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 926 } 927 const Expr *IfCond = nullptr; 928 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 929 if (C->getNameModifier() == OMPD_unknown || 930 C->getNameModifier() == OMPD_parallel) { 931 IfCond = C->getCondition(); 932 break; 933 } 934 } 935 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 936 CapturedVars, IfCond); 937 } 938 939 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 940 LexicalScope Scope(*this, S.getSourceRange()); 941 // Emit parallel region as a standalone region. 942 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 943 OMPPrivateScope PrivateScope(CGF); 944 bool Copyins = CGF.EmitOMPCopyinClause(S); 945 bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope); 946 if (Copyins || Firstprivates) { 947 // Emit implicit barrier to synchronize threads and avoid data races on 948 // initialization of firstprivate variables or propagation master's thread 949 // values of threadprivate variables to local instances of that variables 950 // of all other implicit threads. 951 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 952 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 953 /*ForceSimpleCall=*/true); 954 } 955 CGF.EmitOMPPrivateClause(S, PrivateScope); 956 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 957 (void)PrivateScope.Privatize(); 958 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 959 CGF.EmitOMPReductionClauseFinal(S); 960 }; 961 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); 962 } 963 964 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 965 JumpDest LoopExit) { 966 RunCleanupsScope BodyScope(*this); 967 // Update counters values on current iteration. 968 for (auto I : D.updates()) { 969 EmitIgnoredExpr(I); 970 } 971 // Update the linear variables. 972 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 973 for (auto U : C->updates()) { 974 EmitIgnoredExpr(U); 975 } 976 } 977 978 // On a continue in the body, jump to the end. 979 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 980 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 981 // Emit loop body. 982 EmitStmt(D.getBody()); 983 // The end (updates/cleanups). 984 EmitBlock(Continue.getBlock()); 985 BreakContinueStack.pop_back(); 986 // TODO: Update lastprivates if the SeparateIter flag is true. 987 // This will be implemented in a follow-up OMPLastprivateClause patch, but 988 // result should be still correct without it, as we do not make these 989 // variables private yet. 990 } 991 992 void CodeGenFunction::EmitOMPInnerLoop( 993 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 994 const Expr *IncExpr, 995 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 996 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 997 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 998 999 // Start the loop with a block that tests the condition. 1000 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1001 EmitBlock(CondBlock); 1002 LoopStack.push(CondBlock); 1003 1004 // If there are any cleanups between here and the loop-exit scope, 1005 // create a block to stage a loop exit along. 1006 auto ExitBlock = LoopExit.getBlock(); 1007 if (RequiresCleanup) 1008 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1009 1010 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1011 1012 // Emit condition. 1013 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1014 if (ExitBlock != LoopExit.getBlock()) { 1015 EmitBlock(ExitBlock); 1016 EmitBranchThroughCleanup(LoopExit); 1017 } 1018 1019 EmitBlock(LoopBody); 1020 incrementProfileCounter(&S); 1021 1022 // Create a block for the increment. 1023 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1024 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1025 1026 BodyGen(*this); 1027 1028 // Emit "IV = IV + 1" and a back-edge to the condition block. 1029 EmitBlock(Continue.getBlock()); 1030 EmitIgnoredExpr(IncExpr); 1031 PostIncGen(*this); 1032 BreakContinueStack.pop_back(); 1033 EmitBranch(CondBlock); 1034 LoopStack.pop(); 1035 // Emit the fall-through block. 1036 EmitBlock(LoopExit.getBlock()); 1037 } 1038 1039 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1040 if (!HaveInsertPoint()) 1041 return; 1042 // Emit inits for the linear variables. 1043 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1044 for (auto Init : C->inits()) { 1045 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1046 auto *OrigVD = cast<VarDecl>( 1047 cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())->getDecl()); 1048 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1049 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1050 VD->getInit()->getType(), VK_LValue, 1051 VD->getInit()->getExprLoc()); 1052 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1053 EmitExprAsInit(&DRE, VD, 1054 MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), 1055 /*capturedByInit=*/false); 1056 EmitAutoVarCleanups(Emission); 1057 } 1058 // Emit the linear steps for the linear clauses. 1059 // If a step is not constant, it is pre-calculated before the loop. 1060 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1061 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1062 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1063 // Emit calculation of the linear step. 1064 EmitIgnoredExpr(CS); 1065 } 1066 } 1067 } 1068 1069 static void emitLinearClauseFinal(CodeGenFunction &CGF, 1070 const OMPLoopDirective &D) { 1071 if (!CGF.HaveInsertPoint()) 1072 return; 1073 // Emit the final values of the linear variables. 1074 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1075 auto IC = C->varlist_begin(); 1076 for (auto F : C->finals()) { 1077 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1078 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1079 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 1080 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1081 Address OrigAddr = CGF.EmitLValue(&DRE).getAddress(); 1082 CodeGenFunction::OMPPrivateScope VarScope(CGF); 1083 VarScope.addPrivate(OrigVD, 1084 [OrigAddr]() -> Address { return OrigAddr; }); 1085 (void)VarScope.Privatize(); 1086 CGF.EmitIgnoredExpr(F); 1087 ++IC; 1088 } 1089 } 1090 } 1091 1092 static void emitAlignedClause(CodeGenFunction &CGF, 1093 const OMPExecutableDirective &D) { 1094 if (!CGF.HaveInsertPoint()) 1095 return; 1096 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1097 unsigned ClauseAlignment = 0; 1098 if (auto AlignmentExpr = Clause->getAlignment()) { 1099 auto AlignmentCI = 1100 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1101 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1102 } 1103 for (auto E : Clause->varlists()) { 1104 unsigned Alignment = ClauseAlignment; 1105 if (Alignment == 0) { 1106 // OpenMP [2.8.1, Description] 1107 // If no optional parameter is specified, implementation-defined default 1108 // alignments for SIMD instructions on the target platforms are assumed. 1109 Alignment = 1110 CGF.getContext() 1111 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1112 E->getType()->getPointeeType())) 1113 .getQuantity(); 1114 } 1115 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1116 "alignment is not power of 2"); 1117 if (Alignment != 0) { 1118 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1119 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1120 } 1121 } 1122 } 1123 } 1124 1125 static void emitPrivateLoopCounters(CodeGenFunction &CGF, 1126 CodeGenFunction::OMPPrivateScope &LoopScope, 1127 ArrayRef<Expr *> Counters, 1128 ArrayRef<Expr *> PrivateCounters) { 1129 if (!CGF.HaveInsertPoint()) 1130 return; 1131 auto I = PrivateCounters.begin(); 1132 for (auto *E : Counters) { 1133 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1134 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1135 Address Addr = Address::invalid(); 1136 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1137 // Emit var without initialization. 1138 auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD); 1139 CGF.EmitAutoVarCleanups(VarEmission); 1140 Addr = VarEmission.getAllocatedAddress(); 1141 return Addr; 1142 }); 1143 (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; }); 1144 ++I; 1145 } 1146 } 1147 1148 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1149 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1150 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1151 if (!CGF.HaveInsertPoint()) 1152 return; 1153 { 1154 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1155 emitPrivateLoopCounters(CGF, PreCondScope, S.counters(), 1156 S.private_counters()); 1157 (void)PreCondScope.Privatize(); 1158 // Get initial values of real counters. 1159 for (auto I : S.inits()) { 1160 CGF.EmitIgnoredExpr(I); 1161 } 1162 } 1163 // Check that loop is executed at least one time. 1164 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1165 } 1166 1167 static void 1168 emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, 1169 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1170 if (!CGF.HaveInsertPoint()) 1171 return; 1172 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1173 auto CurPrivate = C->privates().begin(); 1174 for (auto *E : C->varlists()) { 1175 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1176 auto *PrivateVD = 1177 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1178 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1179 // Emit private VarDecl with copy init. 1180 CGF.EmitVarDecl(*PrivateVD); 1181 return CGF.GetAddrOfLocalVar(PrivateVD); 1182 }); 1183 assert(IsRegistered && "linear var already registered as private"); 1184 // Silence the warning about unused variable. 1185 (void)IsRegistered; 1186 ++CurPrivate; 1187 } 1188 } 1189 } 1190 1191 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1192 const OMPExecutableDirective &D, 1193 bool IsMonotonic) { 1194 if (!CGF.HaveInsertPoint()) 1195 return; 1196 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1197 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1198 /*ignoreResult=*/true); 1199 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1200 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1201 // In presence of finite 'safelen', it may be unsafe to mark all 1202 // the memory instructions parallel, because loop-carried 1203 // dependences of 'safelen' iterations are possible. 1204 if (!IsMonotonic) 1205 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1206 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1207 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1208 /*ignoreResult=*/true); 1209 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1210 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1211 // In presence of finite 'safelen', it may be unsafe to mark all 1212 // the memory instructions parallel, because loop-carried 1213 // dependences of 'safelen' iterations are possible. 1214 CGF.LoopStack.setParallel(false); 1215 } 1216 } 1217 1218 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1219 bool IsMonotonic) { 1220 // Walk clauses and process safelen/lastprivate. 1221 LoopStack.setParallel(!IsMonotonic); 1222 LoopStack.setVectorizeEnable(true); 1223 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1224 } 1225 1226 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) { 1227 if (!HaveInsertPoint()) 1228 return; 1229 auto IC = D.counters().begin(); 1230 for (auto F : D.finals()) { 1231 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1232 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) { 1233 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1234 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1235 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1236 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1237 OMPPrivateScope VarScope(*this); 1238 VarScope.addPrivate(OrigVD, 1239 [OrigAddr]() -> Address { return OrigAddr; }); 1240 (void)VarScope.Privatize(); 1241 EmitIgnoredExpr(F); 1242 } 1243 ++IC; 1244 } 1245 emitLinearClauseFinal(*this, D); 1246 } 1247 1248 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1249 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1250 // if (PreCond) { 1251 // for (IV in 0..LastIteration) BODY; 1252 // <Final counter/linear vars updates>; 1253 // } 1254 // 1255 1256 // Emit: if (PreCond) - begin. 1257 // If the condition constant folds and can be elided, avoid emitting the 1258 // whole loop. 1259 bool CondConstant; 1260 llvm::BasicBlock *ContBlock = nullptr; 1261 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1262 if (!CondConstant) 1263 return; 1264 } else { 1265 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1266 ContBlock = CGF.createBasicBlock("simd.if.end"); 1267 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1268 CGF.getProfileCount(&S)); 1269 CGF.EmitBlock(ThenBlock); 1270 CGF.incrementProfileCounter(&S); 1271 } 1272 1273 // Emit the loop iteration variable. 1274 const Expr *IVExpr = S.getIterationVariable(); 1275 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1276 CGF.EmitVarDecl(*IVDecl); 1277 CGF.EmitIgnoredExpr(S.getInit()); 1278 1279 // Emit the iterations count variable. 1280 // If it is not a variable, Sema decided to calculate iterations count on 1281 // each iteration (e.g., it is foldable into a constant). 1282 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1283 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1284 // Emit calculation of the iterations count. 1285 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1286 } 1287 1288 CGF.EmitOMPSimdInit(S); 1289 1290 emitAlignedClause(CGF, S); 1291 CGF.EmitOMPLinearClauseInit(S); 1292 bool HasLastprivateClause; 1293 { 1294 OMPPrivateScope LoopScope(CGF); 1295 emitPrivateLoopCounters(CGF, LoopScope, S.counters(), 1296 S.private_counters()); 1297 emitPrivateLinearVars(CGF, S, LoopScope); 1298 CGF.EmitOMPPrivateClause(S, LoopScope); 1299 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1300 HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1301 (void)LoopScope.Privatize(); 1302 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1303 S.getInc(), 1304 [&S](CodeGenFunction &CGF) { 1305 CGF.EmitOMPLoopBody(S, JumpDest()); 1306 CGF.EmitStopPoint(&S); 1307 }, 1308 [](CodeGenFunction &) {}); 1309 // Emit final copy of the lastprivate variables at the end of loops. 1310 if (HasLastprivateClause) { 1311 CGF.EmitOMPLastprivateClauseFinal(S); 1312 } 1313 CGF.EmitOMPReductionClauseFinal(S); 1314 } 1315 CGF.EmitOMPSimdFinal(S); 1316 // Emit: if (PreCond) - end. 1317 if (ContBlock) { 1318 CGF.EmitBranch(ContBlock); 1319 CGF.EmitBlock(ContBlock, true); 1320 } 1321 }; 1322 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1323 } 1324 1325 void CodeGenFunction::EmitOMPForOuterLoop( 1326 OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic, 1327 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1328 Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { 1329 auto &RT = CGM.getOpenMPRuntime(); 1330 1331 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1332 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); 1333 1334 assert((Ordered || 1335 !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) && 1336 "static non-chunked schedule does not need outer loop"); 1337 1338 // Emit outer loop. 1339 // 1340 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1341 // When schedule(dynamic,chunk_size) is specified, the iterations are 1342 // distributed to threads in the team in chunks as the threads request them. 1343 // Each thread executes a chunk of iterations, then requests another chunk, 1344 // until no chunks remain to be distributed. Each chunk contains chunk_size 1345 // iterations, except for the last chunk to be distributed, which may have 1346 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1347 // 1348 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1349 // to threads in the team in chunks as the executing threads request them. 1350 // Each thread executes a chunk of iterations, then requests another chunk, 1351 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1352 // each chunk is proportional to the number of unassigned iterations divided 1353 // by the number of threads in the team, decreasing to 1. For a chunk_size 1354 // with value k (greater than 1), the size of each chunk is determined in the 1355 // same way, with the restriction that the chunks do not contain fewer than k 1356 // iterations (except for the last chunk to be assigned, which may have fewer 1357 // than k iterations). 1358 // 1359 // When schedule(auto) is specified, the decision regarding scheduling is 1360 // delegated to the compiler and/or runtime system. The programmer gives the 1361 // implementation the freedom to choose any possible mapping of iterations to 1362 // threads in the team. 1363 // 1364 // When schedule(runtime) is specified, the decision regarding scheduling is 1365 // deferred until run time, and the schedule and chunk size are taken from the 1366 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1367 // implementation defined 1368 // 1369 // while(__kmpc_dispatch_next(&LB, &UB)) { 1370 // idx = LB; 1371 // while (idx <= UB) { BODY; ++idx; 1372 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1373 // } // inner loop 1374 // } 1375 // 1376 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1377 // When schedule(static, chunk_size) is specified, iterations are divided into 1378 // chunks of size chunk_size, and the chunks are assigned to the threads in 1379 // the team in a round-robin fashion in the order of the thread number. 1380 // 1381 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1382 // while (idx <= UB) { BODY; ++idx; } // inner loop 1383 // LB = LB + ST; 1384 // UB = UB + ST; 1385 // } 1386 // 1387 1388 const Expr *IVExpr = S.getIterationVariable(); 1389 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1390 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1391 1392 if (DynamicOrOrdered) { 1393 llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); 1394 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, 1395 IVSize, IVSigned, Ordered, UBVal, Chunk); 1396 } else { 1397 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 1398 IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk); 1399 } 1400 1401 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1402 1403 // Start the loop with a block that tests the condition. 1404 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1405 EmitBlock(CondBlock); 1406 LoopStack.push(CondBlock); 1407 1408 llvm::Value *BoolCondVal = nullptr; 1409 if (!DynamicOrOrdered) { 1410 // UB = min(UB, GlobalUB) 1411 EmitIgnoredExpr(S.getEnsureUpperBound()); 1412 // IV = LB 1413 EmitIgnoredExpr(S.getInit()); 1414 // IV < UB 1415 BoolCondVal = EvaluateExprAsBool(S.getCond()); 1416 } else { 1417 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, 1418 IL, LB, UB, ST); 1419 } 1420 1421 // If there are any cleanups between here and the loop-exit scope, 1422 // create a block to stage a loop exit along. 1423 auto ExitBlock = LoopExit.getBlock(); 1424 if (LoopScope.requiresCleanups()) 1425 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1426 1427 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1428 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1429 if (ExitBlock != LoopExit.getBlock()) { 1430 EmitBlock(ExitBlock); 1431 EmitBranchThroughCleanup(LoopExit); 1432 } 1433 EmitBlock(LoopBody); 1434 1435 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1436 // LB for loop condition and emitted it above). 1437 if (DynamicOrOrdered) 1438 EmitIgnoredExpr(S.getInit()); 1439 1440 // Create a block for the increment. 1441 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1442 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1443 1444 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1445 // with dynamic/guided scheduling and without ordered clause. 1446 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1447 LoopStack.setParallel(!IsMonotonic); 1448 else 1449 EmitOMPSimdInit(S, IsMonotonic); 1450 1451 SourceLocation Loc = S.getLocStart(); 1452 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 1453 [&S, LoopExit](CodeGenFunction &CGF) { 1454 CGF.EmitOMPLoopBody(S, LoopExit); 1455 CGF.EmitStopPoint(&S); 1456 }, 1457 [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 1458 if (Ordered) { 1459 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( 1460 CGF, Loc, IVSize, IVSigned); 1461 } 1462 }); 1463 1464 EmitBlock(Continue.getBlock()); 1465 BreakContinueStack.pop_back(); 1466 if (!DynamicOrOrdered) { 1467 // Emit "LB = LB + Stride", "UB = UB + Stride". 1468 EmitIgnoredExpr(S.getNextLowerBound()); 1469 EmitIgnoredExpr(S.getNextUpperBound()); 1470 } 1471 1472 EmitBranch(CondBlock); 1473 LoopStack.pop(); 1474 // Emit the fall-through block. 1475 EmitBlock(LoopExit.getBlock()); 1476 1477 // Tell the runtime we are done. 1478 if (!DynamicOrOrdered) 1479 RT.emitForStaticFinish(*this, S.getLocEnd()); 1480 } 1481 1482 /// \brief Emit a helper variable and return corresponding lvalue. 1483 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1484 const DeclRefExpr *Helper) { 1485 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1486 CGF.EmitVarDecl(*VDecl); 1487 return CGF.EmitLValue(Helper); 1488 } 1489 1490 namespace { 1491 struct ScheduleKindModifiersTy { 1492 OpenMPScheduleClauseKind Kind; 1493 OpenMPScheduleClauseModifier M1; 1494 OpenMPScheduleClauseModifier M2; 1495 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 1496 OpenMPScheduleClauseModifier M1, 1497 OpenMPScheduleClauseModifier M2) 1498 : Kind(Kind), M1(M1), M2(M2) {} 1499 }; 1500 } // namespace 1501 1502 static std::pair<llvm::Value * /*Chunk*/, ScheduleKindModifiersTy> 1503 emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S, 1504 bool OuterRegion) { 1505 // Detect the loop schedule kind and chunk. 1506 auto ScheduleKind = OMPC_SCHEDULE_unknown; 1507 OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown; 1508 OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown; 1509 llvm::Value *Chunk = nullptr; 1510 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 1511 ScheduleKind = C->getScheduleKind(); 1512 M1 = C->getFirstScheduleModifier(); 1513 M2 = C->getSecondScheduleModifier(); 1514 if (const auto *Ch = C->getChunkSize()) { 1515 if (auto *ImpRef = cast_or_null<DeclRefExpr>(C->getHelperChunkSize())) { 1516 if (OuterRegion) { 1517 const VarDecl *ImpVar = cast<VarDecl>(ImpRef->getDecl()); 1518 CGF.EmitVarDecl(*ImpVar); 1519 CGF.EmitStoreThroughLValue( 1520 CGF.EmitAnyExpr(Ch), 1521 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(ImpVar), 1522 ImpVar->getType())); 1523 } else { 1524 Ch = ImpRef; 1525 } 1526 } 1527 if (!C->getHelperChunkSize() || !OuterRegion) { 1528 Chunk = CGF.EmitScalarExpr(Ch); 1529 Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(), 1530 S.getIterationVariable()->getType(), 1531 S.getLocStart()); 1532 } 1533 } 1534 } 1535 return std::make_pair(Chunk, ScheduleKindModifiersTy(ScheduleKind, M1, M2)); 1536 } 1537 1538 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 1539 // Emit the loop iteration variable. 1540 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 1541 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 1542 EmitVarDecl(*IVDecl); 1543 1544 // Emit the iterations count variable. 1545 // If it is not a variable, Sema decided to calculate iterations count on each 1546 // iteration (e.g., it is foldable into a constant). 1547 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1548 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1549 // Emit calculation of the iterations count. 1550 EmitIgnoredExpr(S.getCalcLastIteration()); 1551 } 1552 1553 auto &RT = CGM.getOpenMPRuntime(); 1554 1555 bool HasLastprivateClause; 1556 // Check pre-condition. 1557 { 1558 // Skip the entire loop if we don't meet the precondition. 1559 // If the condition constant folds and can be elided, avoid emitting the 1560 // whole loop. 1561 bool CondConstant; 1562 llvm::BasicBlock *ContBlock = nullptr; 1563 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1564 if (!CondConstant) 1565 return false; 1566 } else { 1567 auto *ThenBlock = createBasicBlock("omp.precond.then"); 1568 ContBlock = createBasicBlock("omp.precond.end"); 1569 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 1570 getProfileCount(&S)); 1571 EmitBlock(ThenBlock); 1572 incrementProfileCounter(&S); 1573 } 1574 1575 emitAlignedClause(*this, S); 1576 EmitOMPLinearClauseInit(S); 1577 // Emit 'then' code. 1578 { 1579 // Emit helper vars inits. 1580 LValue LB = 1581 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1582 LValue UB = 1583 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1584 LValue ST = 1585 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 1586 LValue IL = 1587 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 1588 1589 OMPPrivateScope LoopScope(*this); 1590 if (EmitOMPFirstprivateClause(S, LoopScope)) { 1591 // Emit implicit barrier to synchronize threads and avoid data races on 1592 // initialization of firstprivate variables. 1593 CGM.getOpenMPRuntime().emitBarrierCall( 1594 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1595 /*ForceSimpleCall=*/true); 1596 } 1597 EmitOMPPrivateClause(S, LoopScope); 1598 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 1599 EmitOMPReductionClauseInit(S, LoopScope); 1600 emitPrivateLoopCounters(*this, LoopScope, S.counters(), 1601 S.private_counters()); 1602 emitPrivateLinearVars(*this, S, LoopScope); 1603 (void)LoopScope.Privatize(); 1604 1605 // Detect the loop schedule kind and chunk. 1606 llvm::Value *Chunk; 1607 OpenMPScheduleClauseKind ScheduleKind; 1608 auto ScheduleInfo = 1609 emitScheduleClause(*this, S, /*OuterRegion=*/false); 1610 Chunk = ScheduleInfo.first; 1611 ScheduleKind = ScheduleInfo.second.Kind; 1612 const OpenMPScheduleClauseModifier M1 = ScheduleInfo.second.M1; 1613 const OpenMPScheduleClauseModifier M2 = ScheduleInfo.second.M2; 1614 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1615 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1616 const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr; 1617 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 1618 // If the static schedule kind is specified or if the ordered clause is 1619 // specified, and if no monotonic modifier is specified, the effect will 1620 // be as if the monotonic modifier was specified. 1621 if (RT.isStaticNonchunked(ScheduleKind, 1622 /* Chunked */ Chunk != nullptr) && 1623 !Ordered) { 1624 if (isOpenMPSimdDirective(S.getDirectiveKind())) 1625 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 1626 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1627 // When no chunk_size is specified, the iteration space is divided into 1628 // chunks that are approximately equal in size, and at most one chunk is 1629 // distributed to each thread. Note that the size of the chunks is 1630 // unspecified in this case. 1631 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 1632 IVSize, IVSigned, Ordered, 1633 IL.getAddress(), LB.getAddress(), 1634 UB.getAddress(), ST.getAddress()); 1635 auto LoopExit = 1636 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 1637 // UB = min(UB, GlobalUB); 1638 EmitIgnoredExpr(S.getEnsureUpperBound()); 1639 // IV = LB; 1640 EmitIgnoredExpr(S.getInit()); 1641 // while (idx <= UB) { BODY; ++idx; } 1642 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1643 S.getInc(), 1644 [&S, LoopExit](CodeGenFunction &CGF) { 1645 CGF.EmitOMPLoopBody(S, LoopExit); 1646 CGF.EmitStopPoint(&S); 1647 }, 1648 [](CodeGenFunction &) {}); 1649 EmitBlock(LoopExit.getBlock()); 1650 // Tell the runtime we are done. 1651 RT.emitForStaticFinish(*this, S.getLocStart()); 1652 } else { 1653 const bool IsMonotonic = Ordered || 1654 ScheduleKind == OMPC_SCHEDULE_static || 1655 ScheduleKind == OMPC_SCHEDULE_unknown || 1656 M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 1657 M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 1658 // Emit the outer loop, which requests its work chunk [LB..UB] from 1659 // runtime and runs the inner loop to process it. 1660 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 1661 LB.getAddress(), UB.getAddress(), ST.getAddress(), 1662 IL.getAddress(), Chunk); 1663 } 1664 EmitOMPReductionClauseFinal(S); 1665 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1666 if (HasLastprivateClause) 1667 EmitOMPLastprivateClauseFinal( 1668 S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 1669 } 1670 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 1671 EmitOMPSimdFinal(S); 1672 } 1673 // We're now done with the loop, so jump to the continuation block. 1674 if (ContBlock) { 1675 EmitBranch(ContBlock); 1676 EmitBlock(ContBlock, true); 1677 } 1678 } 1679 return HasLastprivateClause; 1680 } 1681 1682 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 1683 LexicalScope Scope(*this, S.getSourceRange()); 1684 bool HasLastprivates = false; 1685 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1686 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1687 }; 1688 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 1689 S.hasCancel()); 1690 1691 // Emit an implicit barrier at the end. 1692 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1693 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1694 } 1695 } 1696 1697 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 1698 LexicalScope Scope(*this, S.getSourceRange()); 1699 bool HasLastprivates = false; 1700 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1701 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1702 }; 1703 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1704 1705 // Emit an implicit barrier at the end. 1706 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1707 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1708 } 1709 } 1710 1711 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 1712 const Twine &Name, 1713 llvm::Value *Init = nullptr) { 1714 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 1715 if (Init) 1716 CGF.EmitScalarInit(Init, LVal); 1717 return LVal; 1718 } 1719 1720 OpenMPDirectiveKind 1721 CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 1722 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 1723 auto *CS = dyn_cast<CompoundStmt>(Stmt); 1724 bool HasLastprivates = false; 1725 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF) { 1726 auto &C = CGF.CGM.getContext(); 1727 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1728 // Emit helper vars inits. 1729 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 1730 CGF.Builder.getInt32(0)); 1731 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 1732 : CGF.Builder.getInt32(0); 1733 LValue UB = 1734 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 1735 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 1736 CGF.Builder.getInt32(1)); 1737 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 1738 CGF.Builder.getInt32(0)); 1739 // Loop counter. 1740 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 1741 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1742 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 1743 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1744 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 1745 // Generate condition for loop. 1746 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 1747 OK_Ordinary, S.getLocStart(), 1748 /*fpContractable=*/false); 1749 // Increment for loop counter. 1750 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 1751 S.getLocStart()); 1752 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 1753 // Iterate through all sections and emit a switch construct: 1754 // switch (IV) { 1755 // case 0: 1756 // <SectionStmt[0]>; 1757 // break; 1758 // ... 1759 // case <NumSection> - 1: 1760 // <SectionStmt[<NumSection> - 1]>; 1761 // break; 1762 // } 1763 // .omp.sections.exit: 1764 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 1765 auto *SwitchStmt = CGF.Builder.CreateSwitch( 1766 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 1767 CS == nullptr ? 1 : CS->size()); 1768 if (CS) { 1769 unsigned CaseNumber = 0; 1770 for (auto *SubStmt : CS->children()) { 1771 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 1772 CGF.EmitBlock(CaseBB); 1773 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 1774 CGF.EmitStmt(SubStmt); 1775 CGF.EmitBranch(ExitBB); 1776 ++CaseNumber; 1777 } 1778 } else { 1779 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 1780 CGF.EmitBlock(CaseBB); 1781 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 1782 CGF.EmitStmt(Stmt); 1783 CGF.EmitBranch(ExitBB); 1784 } 1785 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 1786 }; 1787 1788 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 1789 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 1790 // Emit implicit barrier to synchronize threads and avoid data races on 1791 // initialization of firstprivate variables. 1792 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1793 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1794 /*ForceSimpleCall=*/true); 1795 } 1796 CGF.EmitOMPPrivateClause(S, LoopScope); 1797 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1798 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1799 (void)LoopScope.Privatize(); 1800 1801 // Emit static non-chunked loop. 1802 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 1803 CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, 1804 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 1805 UB.getAddress(), ST.getAddress()); 1806 // UB = min(UB, GlobalUB); 1807 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 1808 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 1809 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 1810 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 1811 // IV = LB; 1812 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 1813 // while (idx <= UB) { BODY; ++idx; } 1814 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 1815 [](CodeGenFunction &) {}); 1816 // Tell the runtime we are done. 1817 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart()); 1818 CGF.EmitOMPReductionClauseFinal(S); 1819 1820 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1821 if (HasLastprivates) 1822 CGF.EmitOMPLastprivateClauseFinal( 1823 S, CGF.Builder.CreateIsNotNull( 1824 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 1825 }; 1826 1827 bool HasCancel = false; 1828 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 1829 HasCancel = OSD->hasCancel(); 1830 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 1831 HasCancel = OPSD->hasCancel(); 1832 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 1833 HasCancel); 1834 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 1835 // clause. Otherwise the barrier will be generated by the codegen for the 1836 // directive. 1837 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 1838 // Emit implicit barrier to synchronize threads and avoid data races on 1839 // initialization of firstprivate variables. 1840 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 1841 OMPD_unknown); 1842 } 1843 return OMPD_sections; 1844 } 1845 1846 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 1847 LexicalScope Scope(*this, S.getSourceRange()); 1848 OpenMPDirectiveKind EmittedAs = EmitSections(S); 1849 // Emit an implicit barrier at the end. 1850 if (!S.getSingleClause<OMPNowaitClause>()) { 1851 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs); 1852 } 1853 } 1854 1855 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 1856 LexicalScope Scope(*this, S.getSourceRange()); 1857 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1858 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1859 }; 1860 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 1861 S.hasCancel()); 1862 } 1863 1864 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 1865 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 1866 llvm::SmallVector<const Expr *, 8> DestExprs; 1867 llvm::SmallVector<const Expr *, 8> SrcExprs; 1868 llvm::SmallVector<const Expr *, 8> AssignmentOps; 1869 // Check if there are any 'copyprivate' clauses associated with this 1870 // 'single' 1871 // construct. 1872 // Build a list of copyprivate variables along with helper expressions 1873 // (<source>, <destination>, <destination>=<source> expressions) 1874 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 1875 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 1876 DestExprs.append(C->destination_exprs().begin(), 1877 C->destination_exprs().end()); 1878 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 1879 AssignmentOps.append(C->assignment_ops().begin(), 1880 C->assignment_ops().end()); 1881 } 1882 LexicalScope Scope(*this, S.getSourceRange()); 1883 // Emit code for 'single' region along with 'copyprivate' clauses 1884 bool HasFirstprivates; 1885 auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) { 1886 CodeGenFunction::OMPPrivateScope SingleScope(CGF); 1887 HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); 1888 CGF.EmitOMPPrivateClause(S, SingleScope); 1889 (void)SingleScope.Privatize(); 1890 1891 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1892 }; 1893 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 1894 CopyprivateVars, DestExprs, SrcExprs, 1895 AssignmentOps); 1896 // Emit an implicit barrier at the end (to avoid data race on firstprivate 1897 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 1898 if ((!S.getSingleClause<OMPNowaitClause>() || HasFirstprivates) && 1899 CopyprivateVars.empty()) { 1900 CGM.getOpenMPRuntime().emitBarrierCall( 1901 *this, S.getLocStart(), 1902 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 1903 } 1904 } 1905 1906 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 1907 LexicalScope Scope(*this, S.getSourceRange()); 1908 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1909 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1910 }; 1911 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 1912 } 1913 1914 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 1915 LexicalScope Scope(*this, S.getSourceRange()); 1916 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1917 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1918 }; 1919 Expr *Hint = nullptr; 1920 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 1921 Hint = HintClause->getHint(); 1922 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 1923 S.getDirectiveName().getAsString(), 1924 CodeGen, S.getLocStart(), Hint); 1925 } 1926 1927 void CodeGenFunction::EmitOMPParallelForDirective( 1928 const OMPParallelForDirective &S) { 1929 // Emit directive as a combined directive that consists of two implicit 1930 // directives: 'parallel' with 'for' directive. 1931 LexicalScope Scope(*this, S.getSourceRange()); 1932 (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); 1933 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1934 CGF.EmitOMPWorksharingLoop(S); 1935 }; 1936 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); 1937 } 1938 1939 void CodeGenFunction::EmitOMPParallelForSimdDirective( 1940 const OMPParallelForSimdDirective &S) { 1941 // Emit directive as a combined directive that consists of two implicit 1942 // directives: 'parallel' with 'for' directive. 1943 LexicalScope Scope(*this, S.getSourceRange()); 1944 (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); 1945 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1946 CGF.EmitOMPWorksharingLoop(S); 1947 }; 1948 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); 1949 } 1950 1951 void CodeGenFunction::EmitOMPParallelSectionsDirective( 1952 const OMPParallelSectionsDirective &S) { 1953 // Emit directive as a combined directive that consists of two implicit 1954 // directives: 'parallel' with 'sections' directive. 1955 LexicalScope Scope(*this, S.getSourceRange()); 1956 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1957 (void)CGF.EmitSections(S); 1958 }; 1959 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); 1960 } 1961 1962 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 1963 // Emit outlined function for task construct. 1964 LexicalScope Scope(*this, S.getSourceRange()); 1965 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1966 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 1967 auto *I = CS->getCapturedDecl()->param_begin(); 1968 auto *PartId = std::next(I); 1969 // The first function argument for tasks is a thread id, the second one is a 1970 // part id (0 for tied tasks, >=0 for untied task). 1971 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 1972 // Get list of private variables. 1973 llvm::SmallVector<const Expr *, 8> PrivateVars; 1974 llvm::SmallVector<const Expr *, 8> PrivateCopies; 1975 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 1976 auto IRef = C->varlist_begin(); 1977 for (auto *IInit : C->private_copies()) { 1978 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1979 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 1980 PrivateVars.push_back(*IRef); 1981 PrivateCopies.push_back(IInit); 1982 } 1983 ++IRef; 1984 } 1985 } 1986 EmittedAsPrivate.clear(); 1987 // Get list of firstprivate variables. 1988 llvm::SmallVector<const Expr *, 8> FirstprivateVars; 1989 llvm::SmallVector<const Expr *, 8> FirstprivateCopies; 1990 llvm::SmallVector<const Expr *, 8> FirstprivateInits; 1991 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 1992 auto IRef = C->varlist_begin(); 1993 auto IElemInitRef = C->inits().begin(); 1994 for (auto *IInit : C->private_copies()) { 1995 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1996 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 1997 FirstprivateVars.push_back(*IRef); 1998 FirstprivateCopies.push_back(IInit); 1999 FirstprivateInits.push_back(*IElemInitRef); 2000 } 2001 ++IRef, ++IElemInitRef; 2002 } 2003 } 2004 // Build list of dependences. 2005 llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8> 2006 Dependences; 2007 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 2008 for (auto *IRef : C->varlists()) { 2009 Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2010 } 2011 } 2012 auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( 2013 CodeGenFunction &CGF) { 2014 // Set proper addresses for generated private copies. 2015 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2016 OMPPrivateScope Scope(CGF); 2017 if (!PrivateVars.empty() || !FirstprivateVars.empty()) { 2018 auto *CopyFn = CGF.Builder.CreateLoad( 2019 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2020 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2021 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2022 // Map privates. 2023 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> 2024 PrivatePtrs; 2025 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2026 CallArgs.push_back(PrivatesPtr); 2027 for (auto *E : PrivateVars) { 2028 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2029 Address PrivatePtr = 2030 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 2031 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2032 CallArgs.push_back(PrivatePtr.getPointer()); 2033 } 2034 for (auto *E : FirstprivateVars) { 2035 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2036 Address PrivatePtr = 2037 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 2038 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2039 CallArgs.push_back(PrivatePtr.getPointer()); 2040 } 2041 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2042 for (auto &&Pair : PrivatePtrs) { 2043 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2044 CGF.getContext().getDeclAlign(Pair.first)); 2045 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2046 } 2047 } 2048 (void)Scope.Privatize(); 2049 if (*PartId) { 2050 // TODO: emit code for untied tasks. 2051 } 2052 CGF.EmitStmt(CS->getCapturedStmt()); 2053 }; 2054 auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2055 S, *I, OMPD_task, CodeGen); 2056 // Check if we should emit tied or untied task. 2057 bool Tied = !S.getSingleClause<OMPUntiedClause>(); 2058 // Check if the task is final 2059 llvm::PointerIntPair<llvm::Value *, 1, bool> Final; 2060 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2061 // If the condition constant folds and can be elided, try to avoid emitting 2062 // the condition and the dead arm of the if/else. 2063 auto *Cond = Clause->getCondition(); 2064 bool CondConstant; 2065 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2066 Final.setInt(CondConstant); 2067 else 2068 Final.setPointer(EvaluateExprAsBool(Cond)); 2069 } else { 2070 // By default the task is not final. 2071 Final.setInt(/*IntVal=*/false); 2072 } 2073 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2074 const Expr *IfCond = nullptr; 2075 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2076 if (C->getNameModifier() == OMPD_unknown || 2077 C->getNameModifier() == OMPD_task) { 2078 IfCond = C->getCondition(); 2079 break; 2080 } 2081 } 2082 CGM.getOpenMPRuntime().emitTaskCall( 2083 *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, 2084 CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, 2085 FirstprivateCopies, FirstprivateInits, Dependences); 2086 } 2087 2088 void CodeGenFunction::EmitOMPTaskyieldDirective( 2089 const OMPTaskyieldDirective &S) { 2090 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2091 } 2092 2093 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2094 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2095 } 2096 2097 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2098 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2099 } 2100 2101 void CodeGenFunction::EmitOMPTaskgroupDirective( 2102 const OMPTaskgroupDirective &S) { 2103 LexicalScope Scope(*this, S.getSourceRange()); 2104 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2105 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2106 }; 2107 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2108 } 2109 2110 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2111 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2112 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2113 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2114 FlushClause->varlist_end()); 2115 } 2116 return llvm::None; 2117 }(), S.getLocStart()); 2118 } 2119 2120 void CodeGenFunction::EmitOMPDistributeDirective( 2121 const OMPDistributeDirective &S) { 2122 llvm_unreachable("CodeGen for 'omp distribute' is not supported yet."); 2123 } 2124 2125 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 2126 const CapturedStmt *S) { 2127 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 2128 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 2129 CGF.CapturedStmtInfo = &CapStmtInfo; 2130 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 2131 Fn->addFnAttr(llvm::Attribute::NoInline); 2132 return Fn; 2133 } 2134 2135 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 2136 if (!S.getAssociatedStmt()) 2137 return; 2138 LexicalScope Scope(*this, S.getSourceRange()); 2139 auto *C = S.getSingleClause<OMPSIMDClause>(); 2140 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) { 2141 if (C) { 2142 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2143 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2144 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 2145 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 2146 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 2147 } else { 2148 CGF.EmitStmt( 2149 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2150 } 2151 }; 2152 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 2153 } 2154 2155 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 2156 QualType SrcType, QualType DestType, 2157 SourceLocation Loc) { 2158 assert(CGF.hasScalarEvaluationKind(DestType) && 2159 "DestType must have scalar evaluation kind."); 2160 assert(!Val.isAggregate() && "Must be a scalar or complex."); 2161 return Val.isScalar() 2162 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 2163 Loc) 2164 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 2165 DestType, Loc); 2166 } 2167 2168 static CodeGenFunction::ComplexPairTy 2169 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 2170 QualType DestType, SourceLocation Loc) { 2171 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 2172 "DestType must have complex evaluation kind."); 2173 CodeGenFunction::ComplexPairTy ComplexVal; 2174 if (Val.isScalar()) { 2175 // Convert the input element to the element type of the complex. 2176 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2177 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 2178 DestElementType, Loc); 2179 ComplexVal = CodeGenFunction::ComplexPairTy( 2180 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 2181 } else { 2182 assert(Val.isComplex() && "Must be a scalar or complex."); 2183 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 2184 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 2185 ComplexVal.first = CGF.EmitScalarConversion( 2186 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 2187 ComplexVal.second = CGF.EmitScalarConversion( 2188 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 2189 } 2190 return ComplexVal; 2191 } 2192 2193 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 2194 LValue LVal, RValue RVal) { 2195 if (LVal.isGlobalReg()) { 2196 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 2197 } else { 2198 CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent 2199 : llvm::Monotonic, 2200 LVal.isVolatile(), /*IsInit=*/false); 2201 } 2202 } 2203 2204 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 2205 QualType RValTy, SourceLocation Loc) { 2206 switch (getEvaluationKind(LVal.getType())) { 2207 case TEK_Scalar: 2208 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 2209 *this, RVal, RValTy, LVal.getType(), Loc)), 2210 LVal); 2211 break; 2212 case TEK_Complex: 2213 EmitStoreOfComplex( 2214 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 2215 /*isInit=*/false); 2216 break; 2217 case TEK_Aggregate: 2218 llvm_unreachable("Must be a scalar or complex."); 2219 } 2220 } 2221 2222 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 2223 const Expr *X, const Expr *V, 2224 SourceLocation Loc) { 2225 // v = x; 2226 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 2227 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 2228 LValue XLValue = CGF.EmitLValue(X); 2229 LValue VLValue = CGF.EmitLValue(V); 2230 RValue Res = XLValue.isGlobalReg() 2231 ? CGF.EmitLoadOfLValue(XLValue, Loc) 2232 : CGF.EmitAtomicLoad(XLValue, Loc, 2233 IsSeqCst ? llvm::SequentiallyConsistent 2234 : llvm::Monotonic, 2235 XLValue.isVolatile()); 2236 // OpenMP, 2.12.6, atomic Construct 2237 // Any atomic construct with a seq_cst clause forces the atomically 2238 // performed operation to include an implicit flush operation without a 2239 // list. 2240 if (IsSeqCst) 2241 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2242 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 2243 } 2244 2245 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 2246 const Expr *X, const Expr *E, 2247 SourceLocation Loc) { 2248 // x = expr; 2249 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 2250 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 2251 // OpenMP, 2.12.6, atomic Construct 2252 // Any atomic construct with a seq_cst clause forces the atomically 2253 // performed operation to include an implicit flush operation without a 2254 // list. 2255 if (IsSeqCst) 2256 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2257 } 2258 2259 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 2260 RValue Update, 2261 BinaryOperatorKind BO, 2262 llvm::AtomicOrdering AO, 2263 bool IsXLHSInRHSPart) { 2264 auto &Context = CGF.CGM.getContext(); 2265 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 2266 // expression is simple and atomic is allowed for the given type for the 2267 // target platform. 2268 if (BO == BO_Comma || !Update.isScalar() || 2269 !Update.getScalarVal()->getType()->isIntegerTy() || 2270 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 2271 (Update.getScalarVal()->getType() != 2272 X.getAddress().getElementType())) || 2273 !X.getAddress().getElementType()->isIntegerTy() || 2274 !Context.getTargetInfo().hasBuiltinAtomic( 2275 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 2276 return std::make_pair(false, RValue::get(nullptr)); 2277 2278 llvm::AtomicRMWInst::BinOp RMWOp; 2279 switch (BO) { 2280 case BO_Add: 2281 RMWOp = llvm::AtomicRMWInst::Add; 2282 break; 2283 case BO_Sub: 2284 if (!IsXLHSInRHSPart) 2285 return std::make_pair(false, RValue::get(nullptr)); 2286 RMWOp = llvm::AtomicRMWInst::Sub; 2287 break; 2288 case BO_And: 2289 RMWOp = llvm::AtomicRMWInst::And; 2290 break; 2291 case BO_Or: 2292 RMWOp = llvm::AtomicRMWInst::Or; 2293 break; 2294 case BO_Xor: 2295 RMWOp = llvm::AtomicRMWInst::Xor; 2296 break; 2297 case BO_LT: 2298 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2299 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 2300 : llvm::AtomicRMWInst::Max) 2301 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 2302 : llvm::AtomicRMWInst::UMax); 2303 break; 2304 case BO_GT: 2305 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2306 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 2307 : llvm::AtomicRMWInst::Min) 2308 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 2309 : llvm::AtomicRMWInst::UMin); 2310 break; 2311 case BO_Assign: 2312 RMWOp = llvm::AtomicRMWInst::Xchg; 2313 break; 2314 case BO_Mul: 2315 case BO_Div: 2316 case BO_Rem: 2317 case BO_Shl: 2318 case BO_Shr: 2319 case BO_LAnd: 2320 case BO_LOr: 2321 return std::make_pair(false, RValue::get(nullptr)); 2322 case BO_PtrMemD: 2323 case BO_PtrMemI: 2324 case BO_LE: 2325 case BO_GE: 2326 case BO_EQ: 2327 case BO_NE: 2328 case BO_AddAssign: 2329 case BO_SubAssign: 2330 case BO_AndAssign: 2331 case BO_OrAssign: 2332 case BO_XorAssign: 2333 case BO_MulAssign: 2334 case BO_DivAssign: 2335 case BO_RemAssign: 2336 case BO_ShlAssign: 2337 case BO_ShrAssign: 2338 case BO_Comma: 2339 llvm_unreachable("Unsupported atomic update operation"); 2340 } 2341 auto *UpdateVal = Update.getScalarVal(); 2342 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 2343 UpdateVal = CGF.Builder.CreateIntCast( 2344 IC, X.getAddress().getElementType(), 2345 X.getType()->hasSignedIntegerRepresentation()); 2346 } 2347 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 2348 return std::make_pair(true, RValue::get(Res)); 2349 } 2350 2351 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 2352 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 2353 llvm::AtomicOrdering AO, SourceLocation Loc, 2354 const llvm::function_ref<RValue(RValue)> &CommonGen) { 2355 // Update expressions are allowed to have the following forms: 2356 // x binop= expr; -> xrval + expr; 2357 // x++, ++x -> xrval + 1; 2358 // x--, --x -> xrval - 1; 2359 // x = x binop expr; -> xrval binop expr 2360 // x = expr Op x; - > expr binop xrval; 2361 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 2362 if (!Res.first) { 2363 if (X.isGlobalReg()) { 2364 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 2365 // 'xrval'. 2366 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 2367 } else { 2368 // Perform compare-and-swap procedure. 2369 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 2370 } 2371 } 2372 return Res; 2373 } 2374 2375 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 2376 const Expr *X, const Expr *E, 2377 const Expr *UE, bool IsXLHSInRHSPart, 2378 SourceLocation Loc) { 2379 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2380 "Update expr in 'atomic update' must be a binary operator."); 2381 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2382 // Update expressions are allowed to have the following forms: 2383 // x binop= expr; -> xrval + expr; 2384 // x++, ++x -> xrval + 1; 2385 // x--, --x -> xrval - 1; 2386 // x = x binop expr; -> xrval binop expr 2387 // x = expr Op x; - > expr binop xrval; 2388 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 2389 LValue XLValue = CGF.EmitLValue(X); 2390 RValue ExprRValue = CGF.EmitAnyExpr(E); 2391 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 2392 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2393 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2394 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2395 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2396 auto Gen = 2397 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 2398 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2399 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2400 return CGF.EmitAnyExpr(UE); 2401 }; 2402 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 2403 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2404 // OpenMP, 2.12.6, atomic Construct 2405 // Any atomic construct with a seq_cst clause forces the atomically 2406 // performed operation to include an implicit flush operation without a 2407 // list. 2408 if (IsSeqCst) 2409 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2410 } 2411 2412 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 2413 QualType SourceType, QualType ResType, 2414 SourceLocation Loc) { 2415 switch (CGF.getEvaluationKind(ResType)) { 2416 case TEK_Scalar: 2417 return RValue::get( 2418 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 2419 case TEK_Complex: { 2420 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 2421 return RValue::getComplex(Res.first, Res.second); 2422 } 2423 case TEK_Aggregate: 2424 break; 2425 } 2426 llvm_unreachable("Must be a scalar or complex."); 2427 } 2428 2429 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 2430 bool IsPostfixUpdate, const Expr *V, 2431 const Expr *X, const Expr *E, 2432 const Expr *UE, bool IsXLHSInRHSPart, 2433 SourceLocation Loc) { 2434 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 2435 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 2436 RValue NewVVal; 2437 LValue VLValue = CGF.EmitLValue(V); 2438 LValue XLValue = CGF.EmitLValue(X); 2439 RValue ExprRValue = CGF.EmitAnyExpr(E); 2440 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 2441 QualType NewVValType; 2442 if (UE) { 2443 // 'x' is updated with some additional value. 2444 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2445 "Update expr in 'atomic capture' must be a binary operator."); 2446 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2447 // Update expressions are allowed to have the following forms: 2448 // x binop= expr; -> xrval + expr; 2449 // x++, ++x -> xrval + 1; 2450 // x--, --x -> xrval - 1; 2451 // x = x binop expr; -> xrval binop expr 2452 // x = expr Op x; - > expr binop xrval; 2453 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2454 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2455 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2456 NewVValType = XRValExpr->getType(); 2457 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2458 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 2459 IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { 2460 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2461 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2462 RValue Res = CGF.EmitAnyExpr(UE); 2463 NewVVal = IsPostfixUpdate ? XRValue : Res; 2464 return Res; 2465 }; 2466 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 2467 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2468 if (Res.first) { 2469 // 'atomicrmw' instruction was generated. 2470 if (IsPostfixUpdate) { 2471 // Use old value from 'atomicrmw'. 2472 NewVVal = Res.second; 2473 } else { 2474 // 'atomicrmw' does not provide new value, so evaluate it using old 2475 // value of 'x'. 2476 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2477 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 2478 NewVVal = CGF.EmitAnyExpr(UE); 2479 } 2480 } 2481 } else { 2482 // 'x' is simply rewritten with some 'expr'. 2483 NewVValType = X->getType().getNonReferenceType(); 2484 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 2485 X->getType().getNonReferenceType(), Loc); 2486 auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { 2487 NewVVal = XRValue; 2488 return ExprRValue; 2489 }; 2490 // Try to perform atomicrmw xchg, otherwise simple exchange. 2491 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 2492 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 2493 Loc, Gen); 2494 if (Res.first) { 2495 // 'atomicrmw' instruction was generated. 2496 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 2497 } 2498 } 2499 // Emit post-update store to 'v' of old/new 'x' value. 2500 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 2501 // OpenMP, 2.12.6, atomic Construct 2502 // Any atomic construct with a seq_cst clause forces the atomically 2503 // performed operation to include an implicit flush operation without a 2504 // list. 2505 if (IsSeqCst) 2506 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2507 } 2508 2509 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 2510 bool IsSeqCst, bool IsPostfixUpdate, 2511 const Expr *X, const Expr *V, const Expr *E, 2512 const Expr *UE, bool IsXLHSInRHSPart, 2513 SourceLocation Loc) { 2514 switch (Kind) { 2515 case OMPC_read: 2516 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 2517 break; 2518 case OMPC_write: 2519 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 2520 break; 2521 case OMPC_unknown: 2522 case OMPC_update: 2523 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 2524 break; 2525 case OMPC_capture: 2526 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 2527 IsXLHSInRHSPart, Loc); 2528 break; 2529 case OMPC_if: 2530 case OMPC_final: 2531 case OMPC_num_threads: 2532 case OMPC_private: 2533 case OMPC_firstprivate: 2534 case OMPC_lastprivate: 2535 case OMPC_reduction: 2536 case OMPC_safelen: 2537 case OMPC_simdlen: 2538 case OMPC_collapse: 2539 case OMPC_default: 2540 case OMPC_seq_cst: 2541 case OMPC_shared: 2542 case OMPC_linear: 2543 case OMPC_aligned: 2544 case OMPC_copyin: 2545 case OMPC_copyprivate: 2546 case OMPC_flush: 2547 case OMPC_proc_bind: 2548 case OMPC_schedule: 2549 case OMPC_ordered: 2550 case OMPC_nowait: 2551 case OMPC_untied: 2552 case OMPC_threadprivate: 2553 case OMPC_depend: 2554 case OMPC_mergeable: 2555 case OMPC_device: 2556 case OMPC_threads: 2557 case OMPC_simd: 2558 case OMPC_map: 2559 case OMPC_num_teams: 2560 case OMPC_thread_limit: 2561 case OMPC_priority: 2562 case OMPC_grainsize: 2563 case OMPC_nogroup: 2564 case OMPC_num_tasks: 2565 case OMPC_hint: 2566 case OMPC_dist_schedule: 2567 case OMPC_defaultmap: 2568 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 2569 } 2570 } 2571 2572 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 2573 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 2574 OpenMPClauseKind Kind = OMPC_unknown; 2575 for (auto *C : S.clauses()) { 2576 // Find first clause (skip seq_cst clause, if it is first). 2577 if (C->getClauseKind() != OMPC_seq_cst) { 2578 Kind = C->getClauseKind(); 2579 break; 2580 } 2581 } 2582 2583 const auto *CS = 2584 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 2585 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 2586 enterFullExpression(EWC); 2587 } 2588 // Processing for statements under 'atomic capture'. 2589 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 2590 for (const auto *C : Compound->body()) { 2591 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 2592 enterFullExpression(EWC); 2593 } 2594 } 2595 } 2596 2597 LexicalScope Scope(*this, S.getSourceRange()); 2598 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) { 2599 CGF.EmitStopPoint(CS); 2600 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 2601 S.getV(), S.getExpr(), S.getUpdateExpr(), 2602 S.isXLHSInRHSPart(), S.getLocStart()); 2603 }; 2604 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 2605 } 2606 2607 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 2608 LexicalScope Scope(*this, S.getSourceRange()); 2609 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 2610 2611 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2612 GenerateOpenMPCapturedVars(CS, CapturedVars); 2613 2614 llvm::Function *Fn = nullptr; 2615 llvm::Constant *FnID = nullptr; 2616 2617 // Check if we have any if clause associated with the directive. 2618 const Expr *IfCond = nullptr; 2619 2620 if (auto *C = S.getSingleClause<OMPIfClause>()) { 2621 IfCond = C->getCondition(); 2622 } 2623 2624 // Check if we have any device clause associated with the directive. 2625 const Expr *Device = nullptr; 2626 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 2627 Device = C->getDevice(); 2628 } 2629 2630 // Check if we have an if clause whose conditional always evaluates to false 2631 // or if we do not have any targets specified. If so the target region is not 2632 // an offload entry point. 2633 bool IsOffloadEntry = true; 2634 if (IfCond) { 2635 bool Val; 2636 if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 2637 IsOffloadEntry = false; 2638 } 2639 if (CGM.getLangOpts().OMPTargetTriples.empty()) 2640 IsOffloadEntry = false; 2641 2642 assert(CurFuncDecl && "No parent declaration for target region!"); 2643 StringRef ParentName; 2644 // In case we have Ctors/Dtors we use the complete type variant to produce 2645 // the mangling of the device outlined kernel. 2646 if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl)) 2647 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 2648 else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl)) 2649 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 2650 else 2651 ParentName = 2652 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl))); 2653 2654 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 2655 IsOffloadEntry); 2656 2657 CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, 2658 CapturedVars); 2659 } 2660 2661 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { 2662 llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); 2663 } 2664 2665 void CodeGenFunction::EmitOMPCancellationPointDirective( 2666 const OMPCancellationPointDirective &S) { 2667 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 2668 S.getCancelRegion()); 2669 } 2670 2671 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 2672 const Expr *IfCond = nullptr; 2673 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2674 if (C->getNameModifier() == OMPD_unknown || 2675 C->getNameModifier() == OMPD_cancel) { 2676 IfCond = C->getCondition(); 2677 break; 2678 } 2679 } 2680 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 2681 S.getCancelRegion()); 2682 } 2683 2684 CodeGenFunction::JumpDest 2685 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 2686 if (Kind == OMPD_parallel || Kind == OMPD_task) 2687 return ReturnBlock; 2688 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 2689 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for); 2690 return BreakContinueStack.back().BreakBlock; 2691 } 2692 2693 // Generate the instructions for '#pragma omp target data' directive. 2694 void CodeGenFunction::EmitOMPTargetDataDirective( 2695 const OMPTargetDataDirective &S) { 2696 // emit the code inside the construct for now 2697 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2698 CGM.getOpenMPRuntime().emitInlinedDirective( 2699 *this, OMPD_target_data, 2700 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 2701 } 2702 2703 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 2704 const OMPTargetEnterDataDirective &S) { 2705 // TODO: codegen for target enter data. 2706 } 2707 2708 void CodeGenFunction::EmitOMPTargetExitDataDirective( 2709 const OMPTargetExitDataDirective &S) { 2710 // TODO: codegen for target exit data. 2711 } 2712 2713 void CodeGenFunction::EmitOMPTargetParallelDirective( 2714 const OMPTargetParallelDirective &S) { 2715 // TODO: codegen for target parallel. 2716 } 2717 2718 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 2719 // emit the code inside the construct for now 2720 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2721 CGM.getOpenMPRuntime().emitInlinedDirective( 2722 *this, OMPD_taskloop, 2723 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 2724 } 2725 2726 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 2727 const OMPTaskLoopSimdDirective &S) { 2728 // emit the code inside the construct for now 2729 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2730 CGM.getOpenMPRuntime().emitInlinedDirective( 2731 *this, OMPD_taskloop_simd, 2732 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 2733 } 2734 2735