1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false, bool EmitPreInitStmt = true) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 if (EmitPreInitStmt) 61 emitPreInitStmt(CGF, S); 62 if (AsInlined) { 63 if (S.hasAssociatedStmt()) { 64 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 65 for (auto &C : CS->captures()) { 66 if (C.capturesVariable() || C.capturesVariableByCopy()) { 67 auto *VD = C.getCapturedVar(); 68 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 69 isCapturedVar(CGF, VD) || 70 (CGF.CapturedStmtInfo && 71 InlinedShareds.isGlobalVarCaptured(VD)), 72 VD->getType().getNonReferenceType(), VK_LValue, 73 SourceLocation()); 74 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 75 return CGF.EmitLValue(&DRE).getAddress(); 76 }); 77 } 78 } 79 (void)InlinedShareds.Privatize(); 80 } 81 } 82 } 83 }; 84 85 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 86 /// for captured expressions. 87 class OMPParallelScope final : public OMPLexicalScope { 88 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 89 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 90 return !(isOpenMPTargetExecutionDirective(Kind) || 91 isOpenMPLoopBoundSharingDirective(Kind)) && 92 isOpenMPParallelDirective(Kind); 93 } 94 95 public: 96 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 97 : OMPLexicalScope(CGF, S, 98 /*AsInlined=*/false, 99 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 100 }; 101 102 /// Lexical scope for OpenMP teams construct, that handles correct codegen 103 /// for captured expressions. 104 class OMPTeamsScope final : public OMPLexicalScope { 105 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 106 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 107 return !isOpenMPTargetExecutionDirective(Kind) && 108 isOpenMPTeamsDirective(Kind); 109 } 110 111 public: 112 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 113 : OMPLexicalScope(CGF, S, 114 /*AsInlined=*/false, 115 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 116 }; 117 118 /// Private scope for OpenMP loop-based directives, that supports capturing 119 /// of used expression from loop statement. 120 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 121 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 122 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 123 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 124 for (const auto *I : PreInits->decls()) 125 CGF.EmitVarDecl(cast<VarDecl>(*I)); 126 } 127 } 128 } 129 130 public: 131 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 132 : CodeGenFunction::RunCleanupsScope(CGF) { 133 emitPreInitStmt(CGF, S); 134 } 135 }; 136 137 } // namespace 138 139 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 140 auto &C = getContext(); 141 llvm::Value *Size = nullptr; 142 auto SizeInChars = C.getTypeSizeInChars(Ty); 143 if (SizeInChars.isZero()) { 144 // getTypeSizeInChars() returns 0 for a VLA. 145 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 146 llvm::Value *ArraySize; 147 std::tie(ArraySize, Ty) = getVLASize(VAT); 148 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 149 } 150 SizeInChars = C.getTypeSizeInChars(Ty); 151 if (SizeInChars.isZero()) 152 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 153 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 154 } else 155 Size = CGM.getSize(SizeInChars); 156 return Size; 157 } 158 159 void CodeGenFunction::GenerateOpenMPCapturedVars( 160 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 161 const RecordDecl *RD = S.getCapturedRecordDecl(); 162 auto CurField = RD->field_begin(); 163 auto CurCap = S.captures().begin(); 164 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 165 E = S.capture_init_end(); 166 I != E; ++I, ++CurField, ++CurCap) { 167 if (CurField->hasCapturedVLAType()) { 168 auto VAT = CurField->getCapturedVLAType(); 169 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 170 CapturedVars.push_back(Val); 171 } else if (CurCap->capturesThis()) 172 CapturedVars.push_back(CXXThisValue); 173 else if (CurCap->capturesVariableByCopy()) { 174 llvm::Value *CV = 175 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 176 177 // If the field is not a pointer, we need to save the actual value 178 // and load it as a void pointer. 179 if (!CurField->getType()->isAnyPointerType()) { 180 auto &Ctx = getContext(); 181 auto DstAddr = CreateMemTemp( 182 Ctx.getUIntPtrType(), 183 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 184 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 185 186 auto *SrcAddrVal = EmitScalarConversion( 187 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 188 Ctx.getPointerType(CurField->getType()), SourceLocation()); 189 LValue SrcLV = 190 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 191 192 // Store the value using the source type pointer. 193 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 194 195 // Load the value using the destination type pointer. 196 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 197 } 198 CapturedVars.push_back(CV); 199 } else { 200 assert(CurCap->capturesVariable() && "Expected capture by reference."); 201 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 202 } 203 } 204 } 205 206 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 207 StringRef Name, LValue AddrLV, 208 bool isReferenceType = false) { 209 ASTContext &Ctx = CGF.getContext(); 210 211 auto *CastedPtr = CGF.EmitScalarConversion( 212 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 213 Ctx.getPointerType(DstType), SourceLocation()); 214 auto TmpAddr = 215 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 216 .getAddress(); 217 218 // If we are dealing with references we need to return the address of the 219 // reference instead of the reference of the value. 220 if (isReferenceType) { 221 QualType RefType = Ctx.getLValueReferenceType(DstType); 222 auto *RefVal = TmpAddr.getPointer(); 223 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 224 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 225 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 226 } 227 228 return TmpAddr; 229 } 230 231 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 232 if (T->isLValueReferenceType()) { 233 return C.getLValueReferenceType( 234 getCanonicalParamType(C, T.getNonReferenceType()), 235 /*SpelledAsLValue=*/false); 236 } 237 if (T->isPointerType()) 238 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 239 return C.getCanonicalParamType(T); 240 } 241 242 namespace { 243 /// Contains required data for proper outlined function codegen. 244 struct FunctionOptions { 245 /// Captured statement for which the function is generated. 246 const CapturedStmt *S = nullptr; 247 /// true if cast to/from UIntPtr is required for variables captured by 248 /// value. 249 bool UIntPtrCastRequired = true; 250 /// true if only casted argumefnts must be registered as local args or VLA 251 /// sizes. 252 bool RegisterCastedArgsOnly = false; 253 /// Name of the generated function. 254 StringRef FunctionName; 255 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 256 bool RegisterCastedArgsOnly, 257 StringRef FunctionName) 258 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 259 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 260 FunctionName(FunctionName) {} 261 }; 262 } 263 264 static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( 265 CodeGenFunction &CGF, FunctionArgList &Args, 266 llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> 267 &LocalAddrs, 268 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 269 &VLASizes, 270 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 271 const CapturedDecl *CD = FO.S->getCapturedDecl(); 272 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 273 assert(CD->hasBody() && "missing CapturedDecl body"); 274 275 CXXThisValue = nullptr; 276 // Build the argument list. 277 CodeGenModule &CGM = CGF.CGM; 278 ASTContext &Ctx = CGM.getContext(); 279 bool HasUIntPtrArgs = false; 280 Args.append(CD->param_begin(), 281 std::next(CD->param_begin(), CD->getContextParamPosition())); 282 auto I = FO.S->captures().begin(); 283 for (auto *FD : RD->fields()) { 284 QualType ArgType = FD->getType(); 285 IdentifierInfo *II = nullptr; 286 VarDecl *CapVar = nullptr; 287 288 // If this is a capture by copy and the type is not a pointer, the outlined 289 // function argument type should be uintptr and the value properly casted to 290 // uintptr. This is necessary given that the runtime library is only able to 291 // deal with pointers. We can pass in the same way the VLA type sizes to the 292 // outlined function. 293 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 294 I->capturesVariableArrayType()) { 295 HasUIntPtrArgs = true; 296 if (FO.UIntPtrCastRequired) 297 ArgType = Ctx.getUIntPtrType(); 298 } 299 300 if (I->capturesVariable() || I->capturesVariableByCopy()) { 301 CapVar = I->getCapturedVar(); 302 II = CapVar->getIdentifier(); 303 } else if (I->capturesThis()) 304 II = &Ctx.Idents.get("this"); 305 else { 306 assert(I->capturesVariableArrayType()); 307 II = &Ctx.Idents.get("vla"); 308 } 309 if (ArgType->isVariablyModifiedType()) 310 ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType()); 311 Args.push_back(ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, 312 FD->getLocation(), II, ArgType, 313 ImplicitParamDecl::Other)); 314 ++I; 315 } 316 Args.append( 317 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 318 CD->param_end()); 319 320 // Create the function declaration. 321 FunctionType::ExtInfo ExtInfo; 322 const CGFunctionInfo &FuncInfo = 323 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 324 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 325 326 llvm::Function *F = 327 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 328 FO.FunctionName, &CGM.getModule()); 329 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 330 if (CD->isNothrow()) 331 F->addFnAttr(llvm::Attribute::NoUnwind); 332 333 // Generate the function. 334 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 335 CD->getBody()->getLocStart()); 336 unsigned Cnt = CD->getContextParamPosition(); 337 I = FO.S->captures().begin(); 338 for (auto *FD : RD->fields()) { 339 // If we are capturing a pointer by copy we don't need to do anything, just 340 // use the value that we get from the arguments. 341 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 342 const VarDecl *CurVD = I->getCapturedVar(); 343 Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 344 // If the variable is a reference we need to materialize it here. 345 if (CurVD->getType()->isReferenceType()) { 346 Address RefAddr = CGF.CreateMemTemp( 347 CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); 348 CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, 349 /*Volatile=*/false, CurVD->getType()); 350 LocalAddr = RefAddr; 351 } 352 if (!FO.RegisterCastedArgsOnly) 353 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 354 ++Cnt; 355 ++I; 356 continue; 357 } 358 359 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); 360 LValue ArgLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(Args[Cnt]), 361 Args[Cnt]->getType(), BaseInfo); 362 if (FD->hasCapturedVLAType()) { 363 if (FO.UIntPtrCastRequired) { 364 ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), 365 Args[Cnt]->getName(), 366 ArgLVal), 367 FD->getType(), BaseInfo); 368 } 369 auto *ExprArg = 370 CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); 371 auto VAT = FD->getCapturedVLAType(); 372 VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}}); 373 } else if (I->capturesVariable()) { 374 auto *Var = I->getCapturedVar(); 375 QualType VarTy = Var->getType(); 376 Address ArgAddr = ArgLVal.getAddress(); 377 if (!VarTy->isReferenceType()) { 378 if (ArgLVal.getType()->isLValueReferenceType()) { 379 ArgAddr = CGF.EmitLoadOfReference( 380 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 381 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 382 assert(ArgLVal.getType()->isPointerType()); 383 ArgAddr = CGF.EmitLoadOfPointer( 384 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 385 } 386 } 387 if (!FO.RegisterCastedArgsOnly) { 388 LocalAddrs.insert( 389 {Args[Cnt], 390 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 391 } 392 } else if (I->capturesVariableByCopy()) { 393 assert(!FD->getType()->isAnyPointerType() && 394 "Not expecting a captured pointer."); 395 auto *Var = I->getCapturedVar(); 396 QualType VarTy = Var->getType(); 397 LocalAddrs.insert( 398 {Args[Cnt], 399 {Var, 400 FO.UIntPtrCastRequired 401 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(), 402 ArgLVal, VarTy->isReferenceType()) 403 : ArgLVal.getAddress()}}); 404 } else { 405 // If 'this' is captured, load it into CXXThisValue. 406 assert(I->capturesThis()); 407 CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()) 408 .getScalarVal(); 409 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 410 } 411 ++Cnt; 412 ++I; 413 } 414 415 return {F, HasUIntPtrArgs}; 416 } 417 418 llvm::Function * 419 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 420 assert( 421 CapturedStmtInfo && 422 "CapturedStmtInfo should be set when generating the captured function"); 423 const CapturedDecl *CD = S.getCapturedDecl(); 424 // Build the argument list. 425 bool NeedWrapperFunction = 426 getDebugInfo() && 427 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; 428 FunctionArgList Args; 429 llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 430 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 431 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 432 CapturedStmtInfo->getHelperName()); 433 llvm::Function *F; 434 bool HasUIntPtrArgs; 435 std::tie(F, HasUIntPtrArgs) = emitOutlinedFunctionPrologue( 436 *this, Args, LocalAddrs, VLASizes, CXXThisValue, FO); 437 for (const auto &LocalAddrPair : LocalAddrs) { 438 if (LocalAddrPair.second.first) { 439 setAddrOfLocalVar(LocalAddrPair.second.first, 440 LocalAddrPair.second.second); 441 } 442 } 443 for (const auto &VLASizePair : VLASizes) 444 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 445 PGO.assignRegionCounters(GlobalDecl(CD), F); 446 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 447 FinishFunction(CD->getBodyRBrace()); 448 if (!NeedWrapperFunction || !HasUIntPtrArgs) 449 return F; 450 451 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 452 /*RegisterCastedArgsOnly=*/true, 453 ".nondebug_wrapper."); 454 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 455 WrapperCGF.disableDebugInfo(); 456 Args.clear(); 457 LocalAddrs.clear(); 458 VLASizes.clear(); 459 llvm::Function *WrapperF = 460 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 461 WrapperCGF.CXXThisValue, WrapperFO).first; 462 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); 463 llvm::SmallVector<llvm::Value *, 4> CallArgs; 464 for (const auto *Arg : Args) { 465 llvm::Value *CallArg; 466 auto I = LocalAddrs.find(Arg); 467 if (I != LocalAddrs.end()) { 468 LValue LV = 469 WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo); 470 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 471 } else { 472 auto EI = VLASizes.find(Arg); 473 if (EI != VLASizes.end()) 474 CallArg = EI->second.second; 475 else { 476 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 477 Arg->getType(), BaseInfo); 478 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 479 } 480 } 481 CallArgs.emplace_back(CallArg); 482 } 483 WrapperCGF.Builder.CreateCall(F, CallArgs); 484 WrapperCGF.FinishFunction(); 485 return WrapperF; 486 } 487 488 //===----------------------------------------------------------------------===// 489 // OpenMP Directive Emission 490 //===----------------------------------------------------------------------===// 491 void CodeGenFunction::EmitOMPAggregateAssign( 492 Address DestAddr, Address SrcAddr, QualType OriginalType, 493 const llvm::function_ref<void(Address, Address)> &CopyGen) { 494 // Perform element-by-element initialization. 495 QualType ElementTy; 496 497 // Drill down to the base element type on both arrays. 498 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 499 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 500 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 501 502 auto SrcBegin = SrcAddr.getPointer(); 503 auto DestBegin = DestAddr.getPointer(); 504 // Cast from pointer to array type to pointer to single element. 505 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 506 // The basic structure here is a while-do loop. 507 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 508 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 509 auto IsEmpty = 510 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 511 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 512 513 // Enter the loop body, making that address the current address. 514 auto EntryBB = Builder.GetInsertBlock(); 515 EmitBlock(BodyBB); 516 517 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 518 519 llvm::PHINode *SrcElementPHI = 520 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 521 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 522 Address SrcElementCurrent = 523 Address(SrcElementPHI, 524 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 525 526 llvm::PHINode *DestElementPHI = 527 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 528 DestElementPHI->addIncoming(DestBegin, EntryBB); 529 Address DestElementCurrent = 530 Address(DestElementPHI, 531 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 532 533 // Emit copy. 534 CopyGen(DestElementCurrent, SrcElementCurrent); 535 536 // Shift the address forward by one element. 537 auto DestElementNext = Builder.CreateConstGEP1_32( 538 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 539 auto SrcElementNext = Builder.CreateConstGEP1_32( 540 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 541 // Check whether we've reached the end. 542 auto Done = 543 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 544 Builder.CreateCondBr(Done, DoneBB, BodyBB); 545 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 546 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 547 548 // Done. 549 EmitBlock(DoneBB, /*IsFinished=*/true); 550 } 551 552 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 553 Address SrcAddr, const VarDecl *DestVD, 554 const VarDecl *SrcVD, const Expr *Copy) { 555 if (OriginalType->isArrayType()) { 556 auto *BO = dyn_cast<BinaryOperator>(Copy); 557 if (BO && BO->getOpcode() == BO_Assign) { 558 // Perform simple memcpy for simple copying. 559 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 560 } else { 561 // For arrays with complex element types perform element by element 562 // copying. 563 EmitOMPAggregateAssign( 564 DestAddr, SrcAddr, OriginalType, 565 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 566 // Working with the single array element, so have to remap 567 // destination and source variables to corresponding array 568 // elements. 569 CodeGenFunction::OMPPrivateScope Remap(*this); 570 Remap.addPrivate(DestVD, [DestElement]() -> Address { 571 return DestElement; 572 }); 573 Remap.addPrivate( 574 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 575 (void)Remap.Privatize(); 576 EmitIgnoredExpr(Copy); 577 }); 578 } 579 } else { 580 // Remap pseudo source variable to private copy. 581 CodeGenFunction::OMPPrivateScope Remap(*this); 582 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 583 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 584 (void)Remap.Privatize(); 585 // Emit copying of the whole variable. 586 EmitIgnoredExpr(Copy); 587 } 588 } 589 590 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 591 OMPPrivateScope &PrivateScope) { 592 if (!HaveInsertPoint()) 593 return false; 594 bool FirstprivateIsLastprivate = false; 595 llvm::DenseSet<const VarDecl *> Lastprivates; 596 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 597 for (const auto *D : C->varlists()) 598 Lastprivates.insert( 599 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 600 } 601 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 602 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 603 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 604 auto IRef = C->varlist_begin(); 605 auto InitsRef = C->inits().begin(); 606 for (auto IInit : C->private_copies()) { 607 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 608 bool ThisFirstprivateIsLastprivate = 609 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 610 auto *CapFD = CapturesInfo.lookup(OrigVD); 611 auto *FD = CapturedStmtInfo->lookup(OrigVD); 612 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 613 !FD->getType()->isReferenceType()) { 614 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 615 ++IRef; 616 ++InitsRef; 617 continue; 618 } 619 FirstprivateIsLastprivate = 620 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 621 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 622 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 623 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 624 bool IsRegistered; 625 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 626 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 627 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 628 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 629 QualType Type = VD->getType(); 630 if (Type->isArrayType()) { 631 // Emit VarDecl with copy init for arrays. 632 // Get the address of the original variable captured in current 633 // captured region. 634 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 635 auto Emission = EmitAutoVarAlloca(*VD); 636 auto *Init = VD->getInit(); 637 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 638 // Perform simple memcpy. 639 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 640 Type); 641 } else { 642 EmitOMPAggregateAssign( 643 Emission.getAllocatedAddress(), OriginalAddr, Type, 644 [this, VDInit, Init](Address DestElement, 645 Address SrcElement) { 646 // Clean up any temporaries needed by the initialization. 647 RunCleanupsScope InitScope(*this); 648 // Emit initialization for single element. 649 setAddrOfLocalVar(VDInit, SrcElement); 650 EmitAnyExprToMem(Init, DestElement, 651 Init->getType().getQualifiers(), 652 /*IsInitializer*/ false); 653 LocalDeclMap.erase(VDInit); 654 }); 655 } 656 EmitAutoVarCleanups(Emission); 657 return Emission.getAllocatedAddress(); 658 }); 659 } else { 660 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 661 // Emit private VarDecl with copy init. 662 // Remap temp VDInit variable to the address of the original 663 // variable 664 // (for proper handling of captured global variables). 665 setAddrOfLocalVar(VDInit, OriginalAddr); 666 EmitDecl(*VD); 667 LocalDeclMap.erase(VDInit); 668 return GetAddrOfLocalVar(VD); 669 }); 670 } 671 assert(IsRegistered && 672 "firstprivate var already registered as private"); 673 // Silence the warning about unused variable. 674 (void)IsRegistered; 675 } 676 ++IRef; 677 ++InitsRef; 678 } 679 } 680 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 681 } 682 683 void CodeGenFunction::EmitOMPPrivateClause( 684 const OMPExecutableDirective &D, 685 CodeGenFunction::OMPPrivateScope &PrivateScope) { 686 if (!HaveInsertPoint()) 687 return; 688 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 689 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 690 auto IRef = C->varlist_begin(); 691 for (auto IInit : C->private_copies()) { 692 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 693 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 694 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 695 bool IsRegistered = 696 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 697 // Emit private VarDecl with copy init. 698 EmitDecl(*VD); 699 return GetAddrOfLocalVar(VD); 700 }); 701 assert(IsRegistered && "private var already registered as private"); 702 // Silence the warning about unused variable. 703 (void)IsRegistered; 704 } 705 ++IRef; 706 } 707 } 708 } 709 710 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 711 if (!HaveInsertPoint()) 712 return false; 713 // threadprivate_var1 = master_threadprivate_var1; 714 // operator=(threadprivate_var2, master_threadprivate_var2); 715 // ... 716 // __kmpc_barrier(&loc, global_tid); 717 llvm::DenseSet<const VarDecl *> CopiedVars; 718 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 719 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 720 auto IRef = C->varlist_begin(); 721 auto ISrcRef = C->source_exprs().begin(); 722 auto IDestRef = C->destination_exprs().begin(); 723 for (auto *AssignOp : C->assignment_ops()) { 724 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 725 QualType Type = VD->getType(); 726 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 727 // Get the address of the master variable. If we are emitting code with 728 // TLS support, the address is passed from the master as field in the 729 // captured declaration. 730 Address MasterAddr = Address::invalid(); 731 if (getLangOpts().OpenMPUseTLS && 732 getContext().getTargetInfo().isTLSSupported()) { 733 assert(CapturedStmtInfo->lookup(VD) && 734 "Copyin threadprivates should have been captured!"); 735 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 736 VK_LValue, (*IRef)->getExprLoc()); 737 MasterAddr = EmitLValue(&DRE).getAddress(); 738 LocalDeclMap.erase(VD); 739 } else { 740 MasterAddr = 741 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 742 : CGM.GetAddrOfGlobal(VD), 743 getContext().getDeclAlign(VD)); 744 } 745 // Get the address of the threadprivate variable. 746 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 747 if (CopiedVars.size() == 1) { 748 // At first check if current thread is a master thread. If it is, no 749 // need to copy data. 750 CopyBegin = createBasicBlock("copyin.not.master"); 751 CopyEnd = createBasicBlock("copyin.not.master.end"); 752 Builder.CreateCondBr( 753 Builder.CreateICmpNE( 754 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 755 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 756 CopyBegin, CopyEnd); 757 EmitBlock(CopyBegin); 758 } 759 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 760 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 761 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 762 } 763 ++IRef; 764 ++ISrcRef; 765 ++IDestRef; 766 } 767 } 768 if (CopyEnd) { 769 // Exit out of copying procedure for non-master thread. 770 EmitBlock(CopyEnd, /*IsFinished=*/true); 771 return true; 772 } 773 return false; 774 } 775 776 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 777 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 778 if (!HaveInsertPoint()) 779 return false; 780 bool HasAtLeastOneLastprivate = false; 781 llvm::DenseSet<const VarDecl *> SIMDLCVs; 782 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 783 auto *LoopDirective = cast<OMPLoopDirective>(&D); 784 for (auto *C : LoopDirective->counters()) { 785 SIMDLCVs.insert( 786 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 787 } 788 } 789 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 790 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 791 HasAtLeastOneLastprivate = true; 792 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 793 break; 794 auto IRef = C->varlist_begin(); 795 auto IDestRef = C->destination_exprs().begin(); 796 for (auto *IInit : C->private_copies()) { 797 // Keep the address of the original variable for future update at the end 798 // of the loop. 799 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 800 // Taskloops do not require additional initialization, it is done in 801 // runtime support library. 802 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 803 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 804 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 805 DeclRefExpr DRE( 806 const_cast<VarDecl *>(OrigVD), 807 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 808 OrigVD) != nullptr, 809 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 810 return EmitLValue(&DRE).getAddress(); 811 }); 812 // Check if the variable is also a firstprivate: in this case IInit is 813 // not generated. Initialization of this variable will happen in codegen 814 // for 'firstprivate' clause. 815 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 816 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 817 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 818 // Emit private VarDecl with copy init. 819 EmitDecl(*VD); 820 return GetAddrOfLocalVar(VD); 821 }); 822 assert(IsRegistered && 823 "lastprivate var already registered as private"); 824 (void)IsRegistered; 825 } 826 } 827 ++IRef; 828 ++IDestRef; 829 } 830 } 831 return HasAtLeastOneLastprivate; 832 } 833 834 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 835 const OMPExecutableDirective &D, bool NoFinals, 836 llvm::Value *IsLastIterCond) { 837 if (!HaveInsertPoint()) 838 return; 839 // Emit following code: 840 // if (<IsLastIterCond>) { 841 // orig_var1 = private_orig_var1; 842 // ... 843 // orig_varn = private_orig_varn; 844 // } 845 llvm::BasicBlock *ThenBB = nullptr; 846 llvm::BasicBlock *DoneBB = nullptr; 847 if (IsLastIterCond) { 848 ThenBB = createBasicBlock(".omp.lastprivate.then"); 849 DoneBB = createBasicBlock(".omp.lastprivate.done"); 850 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 851 EmitBlock(ThenBB); 852 } 853 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 854 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 855 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 856 auto IC = LoopDirective->counters().begin(); 857 for (auto F : LoopDirective->finals()) { 858 auto *D = 859 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 860 if (NoFinals) 861 AlreadyEmittedVars.insert(D); 862 else 863 LoopCountersAndUpdates[D] = F; 864 ++IC; 865 } 866 } 867 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 868 auto IRef = C->varlist_begin(); 869 auto ISrcRef = C->source_exprs().begin(); 870 auto IDestRef = C->destination_exprs().begin(); 871 for (auto *AssignOp : C->assignment_ops()) { 872 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 873 QualType Type = PrivateVD->getType(); 874 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 875 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 876 // If lastprivate variable is a loop control variable for loop-based 877 // directive, update its value before copyin back to original 878 // variable. 879 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 880 EmitIgnoredExpr(FinalExpr); 881 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 882 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 883 // Get the address of the original variable. 884 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 885 // Get the address of the private variable. 886 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 887 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 888 PrivateAddr = 889 Address(Builder.CreateLoad(PrivateAddr), 890 getNaturalTypeAlignment(RefTy->getPointeeType())); 891 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 892 } 893 ++IRef; 894 ++ISrcRef; 895 ++IDestRef; 896 } 897 if (auto *PostUpdate = C->getPostUpdateExpr()) 898 EmitIgnoredExpr(PostUpdate); 899 } 900 if (IsLastIterCond) 901 EmitBlock(DoneBB, /*IsFinished=*/true); 902 } 903 904 void CodeGenFunction::EmitOMPReductionClauseInit( 905 const OMPExecutableDirective &D, 906 CodeGenFunction::OMPPrivateScope &PrivateScope) { 907 if (!HaveInsertPoint()) 908 return; 909 SmallVector<const Expr *, 4> Shareds; 910 SmallVector<const Expr *, 4> Privates; 911 SmallVector<const Expr *, 4> ReductionOps; 912 SmallVector<const Expr *, 4> LHSs; 913 SmallVector<const Expr *, 4> RHSs; 914 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 915 auto IPriv = C->privates().begin(); 916 auto IRed = C->reduction_ops().begin(); 917 auto ILHS = C->lhs_exprs().begin(); 918 auto IRHS = C->rhs_exprs().begin(); 919 for (const auto *Ref : C->varlists()) { 920 Shareds.emplace_back(Ref); 921 Privates.emplace_back(*IPriv); 922 ReductionOps.emplace_back(*IRed); 923 LHSs.emplace_back(*ILHS); 924 RHSs.emplace_back(*IRHS); 925 std::advance(IPriv, 1); 926 std::advance(IRed, 1); 927 std::advance(ILHS, 1); 928 std::advance(IRHS, 1); 929 } 930 } 931 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); 932 unsigned Count = 0; 933 auto ILHS = LHSs.begin(); 934 auto IRHS = RHSs.begin(); 935 auto IPriv = Privates.begin(); 936 for (const auto *IRef : Shareds) { 937 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 938 // Emit private VarDecl with reduction init. 939 RedCG.emitSharedLValue(*this, Count); 940 RedCG.emitAggregateType(*this, Count); 941 auto Emission = EmitAutoVarAlloca(*PrivateVD); 942 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 943 RedCG.getSharedLValue(Count), 944 [&Emission](CodeGenFunction &CGF) { 945 CGF.EmitAutoVarInit(Emission); 946 return true; 947 }); 948 EmitAutoVarCleanups(Emission); 949 Address BaseAddr = RedCG.adjustPrivateAddress( 950 *this, Count, Emission.getAllocatedAddress()); 951 bool IsRegistered = PrivateScope.addPrivate( 952 RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); 953 assert(IsRegistered && "private var already registered as private"); 954 // Silence the warning about unused variable. 955 (void)IsRegistered; 956 957 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 958 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 959 if (isa<OMPArraySectionExpr>(IRef)) { 960 // Store the address of the original variable associated with the LHS 961 // implicit variable. 962 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 963 return RedCG.getSharedLValue(Count).getAddress(); 964 }); 965 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 966 return GetAddrOfLocalVar(PrivateVD); 967 }); 968 } else if (isa<ArraySubscriptExpr>(IRef)) { 969 // Store the address of the original variable associated with the LHS 970 // implicit variable. 971 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 972 return RedCG.getSharedLValue(Count).getAddress(); 973 }); 974 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 975 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 976 ConvertTypeForMem(RHSVD->getType()), 977 "rhs.begin"); 978 }); 979 } else { 980 QualType Type = PrivateVD->getType(); 981 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 982 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 983 // Store the address of the original variable associated with the LHS 984 // implicit variable. 985 if (IsArray) { 986 OriginalAddr = Builder.CreateElementBitCast( 987 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 988 } 989 PrivateScope.addPrivate( 990 LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); 991 PrivateScope.addPrivate( 992 RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { 993 return IsArray 994 ? Builder.CreateElementBitCast( 995 GetAddrOfLocalVar(PrivateVD), 996 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 997 : GetAddrOfLocalVar(PrivateVD); 998 }); 999 } 1000 ++ILHS; 1001 ++IRHS; 1002 ++IPriv; 1003 ++Count; 1004 } 1005 } 1006 1007 void CodeGenFunction::EmitOMPReductionClauseFinal( 1008 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1009 if (!HaveInsertPoint()) 1010 return; 1011 llvm::SmallVector<const Expr *, 8> Privates; 1012 llvm::SmallVector<const Expr *, 8> LHSExprs; 1013 llvm::SmallVector<const Expr *, 8> RHSExprs; 1014 llvm::SmallVector<const Expr *, 8> ReductionOps; 1015 bool HasAtLeastOneReduction = false; 1016 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1017 HasAtLeastOneReduction = true; 1018 Privates.append(C->privates().begin(), C->privates().end()); 1019 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1020 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1021 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1022 } 1023 if (HasAtLeastOneReduction) { 1024 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1025 isOpenMPParallelDirective(D.getDirectiveKind()) || 1026 D.getDirectiveKind() == OMPD_simd; 1027 bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; 1028 // Emit nowait reduction if nowait clause is present or directive is a 1029 // parallel directive (it always has implicit barrier). 1030 CGM.getOpenMPRuntime().emitReduction( 1031 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1032 {WithNowait, SimpleReduction, ReductionKind}); 1033 } 1034 } 1035 1036 static void emitPostUpdateForReductionClause( 1037 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1038 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1039 if (!CGF.HaveInsertPoint()) 1040 return; 1041 llvm::BasicBlock *DoneBB = nullptr; 1042 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1043 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1044 if (!DoneBB) { 1045 if (auto *Cond = CondGen(CGF)) { 1046 // If the first post-update expression is found, emit conditional 1047 // block if it was requested. 1048 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1049 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1050 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1051 CGF.EmitBlock(ThenBB); 1052 } 1053 } 1054 CGF.EmitIgnoredExpr(PostUpdate); 1055 } 1056 } 1057 if (DoneBB) 1058 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1059 } 1060 1061 namespace { 1062 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1063 /// parallel function. This is necessary for combined constructs such as 1064 /// 'distribute parallel for' 1065 typedef llvm::function_ref<void(CodeGenFunction &, 1066 const OMPExecutableDirective &, 1067 llvm::SmallVectorImpl<llvm::Value *> &)> 1068 CodeGenBoundParametersTy; 1069 } // anonymous namespace 1070 1071 static void emitCommonOMPParallelDirective( 1072 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1073 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1074 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1075 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1076 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1077 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1078 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1079 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1080 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1081 /*IgnoreResultAssign*/ true); 1082 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1083 CGF, NumThreads, NumThreadsClause->getLocStart()); 1084 } 1085 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1086 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1087 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1088 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1089 } 1090 const Expr *IfCond = nullptr; 1091 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1092 if (C->getNameModifier() == OMPD_unknown || 1093 C->getNameModifier() == OMPD_parallel) { 1094 IfCond = C->getCondition(); 1095 break; 1096 } 1097 } 1098 1099 OMPParallelScope Scope(CGF, S); 1100 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1101 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1102 // lower and upper bounds with the pragma 'for' chunking mechanism. 1103 // The following lambda takes care of appending the lower and upper bound 1104 // parameters when necessary 1105 CodeGenBoundParameters(CGF, S, CapturedVars); 1106 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1107 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1108 CapturedVars, IfCond); 1109 } 1110 1111 static void emitEmptyBoundParameters(CodeGenFunction &, 1112 const OMPExecutableDirective &, 1113 llvm::SmallVectorImpl<llvm::Value *> &) {} 1114 1115 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1116 // Emit parallel region as a standalone region. 1117 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1118 OMPPrivateScope PrivateScope(CGF); 1119 bool Copyins = CGF.EmitOMPCopyinClause(S); 1120 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1121 if (Copyins) { 1122 // Emit implicit barrier to synchronize threads and avoid data races on 1123 // propagation master's thread values of threadprivate variables to local 1124 // instances of that variables of all other implicit threads. 1125 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1126 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1127 /*ForceSimpleCall=*/true); 1128 } 1129 CGF.EmitOMPPrivateClause(S, PrivateScope); 1130 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1131 (void)PrivateScope.Privatize(); 1132 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1133 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1134 }; 1135 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1136 emitEmptyBoundParameters); 1137 emitPostUpdateForReductionClause( 1138 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1139 } 1140 1141 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1142 JumpDest LoopExit) { 1143 RunCleanupsScope BodyScope(*this); 1144 // Update counters values on current iteration. 1145 for (auto I : D.updates()) { 1146 EmitIgnoredExpr(I); 1147 } 1148 // Update the linear variables. 1149 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1150 for (auto *U : C->updates()) 1151 EmitIgnoredExpr(U); 1152 } 1153 1154 // On a continue in the body, jump to the end. 1155 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1156 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1157 // Emit loop body. 1158 EmitStmt(D.getBody()); 1159 // The end (updates/cleanups). 1160 EmitBlock(Continue.getBlock()); 1161 BreakContinueStack.pop_back(); 1162 } 1163 1164 void CodeGenFunction::EmitOMPInnerLoop( 1165 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1166 const Expr *IncExpr, 1167 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1168 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1169 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1170 1171 // Start the loop with a block that tests the condition. 1172 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1173 EmitBlock(CondBlock); 1174 const SourceRange &R = S.getSourceRange(); 1175 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1176 SourceLocToDebugLoc(R.getEnd())); 1177 1178 // If there are any cleanups between here and the loop-exit scope, 1179 // create a block to stage a loop exit along. 1180 auto ExitBlock = LoopExit.getBlock(); 1181 if (RequiresCleanup) 1182 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1183 1184 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1185 1186 // Emit condition. 1187 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1188 if (ExitBlock != LoopExit.getBlock()) { 1189 EmitBlock(ExitBlock); 1190 EmitBranchThroughCleanup(LoopExit); 1191 } 1192 1193 EmitBlock(LoopBody); 1194 incrementProfileCounter(&S); 1195 1196 // Create a block for the increment. 1197 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1198 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1199 1200 BodyGen(*this); 1201 1202 // Emit "IV = IV + 1" and a back-edge to the condition block. 1203 EmitBlock(Continue.getBlock()); 1204 EmitIgnoredExpr(IncExpr); 1205 PostIncGen(*this); 1206 BreakContinueStack.pop_back(); 1207 EmitBranch(CondBlock); 1208 LoopStack.pop(); 1209 // Emit the fall-through block. 1210 EmitBlock(LoopExit.getBlock()); 1211 } 1212 1213 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1214 if (!HaveInsertPoint()) 1215 return; 1216 // Emit inits for the linear variables. 1217 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1218 for (auto *Init : C->inits()) { 1219 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1220 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1221 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1222 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1223 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1224 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1225 VD->getInit()->getType(), VK_LValue, 1226 VD->getInit()->getExprLoc()); 1227 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1228 VD->getType()), 1229 /*capturedByInit=*/false); 1230 EmitAutoVarCleanups(Emission); 1231 } else 1232 EmitVarDecl(*VD); 1233 } 1234 // Emit the linear steps for the linear clauses. 1235 // If a step is not constant, it is pre-calculated before the loop. 1236 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1237 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1238 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1239 // Emit calculation of the linear step. 1240 EmitIgnoredExpr(CS); 1241 } 1242 } 1243 } 1244 1245 void CodeGenFunction::EmitOMPLinearClauseFinal( 1246 const OMPLoopDirective &D, 1247 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1248 if (!HaveInsertPoint()) 1249 return; 1250 llvm::BasicBlock *DoneBB = nullptr; 1251 // Emit the final values of the linear variables. 1252 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1253 auto IC = C->varlist_begin(); 1254 for (auto *F : C->finals()) { 1255 if (!DoneBB) { 1256 if (auto *Cond = CondGen(*this)) { 1257 // If the first post-update expression is found, emit conditional 1258 // block if it was requested. 1259 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1260 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1261 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1262 EmitBlock(ThenBB); 1263 } 1264 } 1265 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1266 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1267 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1268 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1269 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1270 CodeGenFunction::OMPPrivateScope VarScope(*this); 1271 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1272 (void)VarScope.Privatize(); 1273 EmitIgnoredExpr(F); 1274 ++IC; 1275 } 1276 if (auto *PostUpdate = C->getPostUpdateExpr()) 1277 EmitIgnoredExpr(PostUpdate); 1278 } 1279 if (DoneBB) 1280 EmitBlock(DoneBB, /*IsFinished=*/true); 1281 } 1282 1283 static void emitAlignedClause(CodeGenFunction &CGF, 1284 const OMPExecutableDirective &D) { 1285 if (!CGF.HaveInsertPoint()) 1286 return; 1287 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1288 unsigned ClauseAlignment = 0; 1289 if (auto AlignmentExpr = Clause->getAlignment()) { 1290 auto AlignmentCI = 1291 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1292 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1293 } 1294 for (auto E : Clause->varlists()) { 1295 unsigned Alignment = ClauseAlignment; 1296 if (Alignment == 0) { 1297 // OpenMP [2.8.1, Description] 1298 // If no optional parameter is specified, implementation-defined default 1299 // alignments for SIMD instructions on the target platforms are assumed. 1300 Alignment = 1301 CGF.getContext() 1302 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1303 E->getType()->getPointeeType())) 1304 .getQuantity(); 1305 } 1306 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1307 "alignment is not power of 2"); 1308 if (Alignment != 0) { 1309 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1310 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1311 } 1312 } 1313 } 1314 } 1315 1316 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1317 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1318 if (!HaveInsertPoint()) 1319 return; 1320 auto I = S.private_counters().begin(); 1321 for (auto *E : S.counters()) { 1322 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1323 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1324 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1325 // Emit var without initialization. 1326 if (!LocalDeclMap.count(PrivateVD)) { 1327 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1328 EmitAutoVarCleanups(VarEmission); 1329 } 1330 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1331 /*RefersToEnclosingVariableOrCapture=*/false, 1332 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1333 return EmitLValue(&DRE).getAddress(); 1334 }); 1335 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1336 VD->hasGlobalStorage()) { 1337 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1338 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1339 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1340 E->getType(), VK_LValue, E->getExprLoc()); 1341 return EmitLValue(&DRE).getAddress(); 1342 }); 1343 } 1344 ++I; 1345 } 1346 } 1347 1348 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1349 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1350 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1351 if (!CGF.HaveInsertPoint()) 1352 return; 1353 { 1354 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1355 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1356 (void)PreCondScope.Privatize(); 1357 // Get initial values of real counters. 1358 for (auto I : S.inits()) { 1359 CGF.EmitIgnoredExpr(I); 1360 } 1361 } 1362 // Check that loop is executed at least one time. 1363 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1364 } 1365 1366 void CodeGenFunction::EmitOMPLinearClause( 1367 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1368 if (!HaveInsertPoint()) 1369 return; 1370 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1371 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1372 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1373 for (auto *C : LoopDirective->counters()) { 1374 SIMDLCVs.insert( 1375 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1376 } 1377 } 1378 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1379 auto CurPrivate = C->privates().begin(); 1380 for (auto *E : C->varlists()) { 1381 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1382 auto *PrivateVD = 1383 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1384 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1385 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1386 // Emit private VarDecl with copy init. 1387 EmitVarDecl(*PrivateVD); 1388 return GetAddrOfLocalVar(PrivateVD); 1389 }); 1390 assert(IsRegistered && "linear var already registered as private"); 1391 // Silence the warning about unused variable. 1392 (void)IsRegistered; 1393 } else 1394 EmitVarDecl(*PrivateVD); 1395 ++CurPrivate; 1396 } 1397 } 1398 } 1399 1400 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1401 const OMPExecutableDirective &D, 1402 bool IsMonotonic) { 1403 if (!CGF.HaveInsertPoint()) 1404 return; 1405 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1406 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1407 /*ignoreResult=*/true); 1408 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1409 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1410 // In presence of finite 'safelen', it may be unsafe to mark all 1411 // the memory instructions parallel, because loop-carried 1412 // dependences of 'safelen' iterations are possible. 1413 if (!IsMonotonic) 1414 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1415 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1416 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1417 /*ignoreResult=*/true); 1418 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1419 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1420 // In presence of finite 'safelen', it may be unsafe to mark all 1421 // the memory instructions parallel, because loop-carried 1422 // dependences of 'safelen' iterations are possible. 1423 CGF.LoopStack.setParallel(false); 1424 } 1425 } 1426 1427 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1428 bool IsMonotonic) { 1429 // Walk clauses and process safelen/lastprivate. 1430 LoopStack.setParallel(!IsMonotonic); 1431 LoopStack.setVectorizeEnable(true); 1432 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1433 } 1434 1435 void CodeGenFunction::EmitOMPSimdFinal( 1436 const OMPLoopDirective &D, 1437 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1438 if (!HaveInsertPoint()) 1439 return; 1440 llvm::BasicBlock *DoneBB = nullptr; 1441 auto IC = D.counters().begin(); 1442 auto IPC = D.private_counters().begin(); 1443 for (auto F : D.finals()) { 1444 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1445 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1446 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1447 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1448 OrigVD->hasGlobalStorage() || CED) { 1449 if (!DoneBB) { 1450 if (auto *Cond = CondGen(*this)) { 1451 // If the first post-update expression is found, emit conditional 1452 // block if it was requested. 1453 auto *ThenBB = createBasicBlock(".omp.final.then"); 1454 DoneBB = createBasicBlock(".omp.final.done"); 1455 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1456 EmitBlock(ThenBB); 1457 } 1458 } 1459 Address OrigAddr = Address::invalid(); 1460 if (CED) 1461 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1462 else { 1463 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1464 /*RefersToEnclosingVariableOrCapture=*/false, 1465 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1466 OrigAddr = EmitLValue(&DRE).getAddress(); 1467 } 1468 OMPPrivateScope VarScope(*this); 1469 VarScope.addPrivate(OrigVD, 1470 [OrigAddr]() -> Address { return OrigAddr; }); 1471 (void)VarScope.Privatize(); 1472 EmitIgnoredExpr(F); 1473 } 1474 ++IC; 1475 ++IPC; 1476 } 1477 if (DoneBB) 1478 EmitBlock(DoneBB, /*IsFinished=*/true); 1479 } 1480 1481 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1482 const OMPLoopDirective &S, 1483 CodeGenFunction::JumpDest LoopExit) { 1484 CGF.EmitOMPLoopBody(S, LoopExit); 1485 CGF.EmitStopPoint(&S); 1486 } 1487 1488 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1489 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1490 OMPLoopScope PreInitScope(CGF, S); 1491 // if (PreCond) { 1492 // for (IV in 0..LastIteration) BODY; 1493 // <Final counter/linear vars updates>; 1494 // } 1495 // 1496 1497 // Emit: if (PreCond) - begin. 1498 // If the condition constant folds and can be elided, avoid emitting the 1499 // whole loop. 1500 bool CondConstant; 1501 llvm::BasicBlock *ContBlock = nullptr; 1502 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1503 if (!CondConstant) 1504 return; 1505 } else { 1506 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1507 ContBlock = CGF.createBasicBlock("simd.if.end"); 1508 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1509 CGF.getProfileCount(&S)); 1510 CGF.EmitBlock(ThenBlock); 1511 CGF.incrementProfileCounter(&S); 1512 } 1513 1514 // Emit the loop iteration variable. 1515 const Expr *IVExpr = S.getIterationVariable(); 1516 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1517 CGF.EmitVarDecl(*IVDecl); 1518 CGF.EmitIgnoredExpr(S.getInit()); 1519 1520 // Emit the iterations count variable. 1521 // If it is not a variable, Sema decided to calculate iterations count on 1522 // each iteration (e.g., it is foldable into a constant). 1523 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1524 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1525 // Emit calculation of the iterations count. 1526 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1527 } 1528 1529 CGF.EmitOMPSimdInit(S); 1530 1531 emitAlignedClause(CGF, S); 1532 CGF.EmitOMPLinearClauseInit(S); 1533 { 1534 OMPPrivateScope LoopScope(CGF); 1535 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1536 CGF.EmitOMPLinearClause(S, LoopScope); 1537 CGF.EmitOMPPrivateClause(S, LoopScope); 1538 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1539 bool HasLastprivateClause = 1540 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1541 (void)LoopScope.Privatize(); 1542 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1543 S.getInc(), 1544 [&S](CodeGenFunction &CGF) { 1545 CGF.EmitOMPLoopBody(S, JumpDest()); 1546 CGF.EmitStopPoint(&S); 1547 }, 1548 [](CodeGenFunction &) {}); 1549 CGF.EmitOMPSimdFinal( 1550 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1551 // Emit final copy of the lastprivate variables at the end of loops. 1552 if (HasLastprivateClause) 1553 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1554 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1555 emitPostUpdateForReductionClause( 1556 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1557 } 1558 CGF.EmitOMPLinearClauseFinal( 1559 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1560 // Emit: if (PreCond) - end. 1561 if (ContBlock) { 1562 CGF.EmitBranch(ContBlock); 1563 CGF.EmitBlock(ContBlock, true); 1564 } 1565 }; 1566 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1567 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1568 } 1569 1570 void CodeGenFunction::EmitOMPOuterLoop( 1571 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 1572 CodeGenFunction::OMPPrivateScope &LoopScope, 1573 const CodeGenFunction::OMPLoopArguments &LoopArgs, 1574 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 1575 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 1576 auto &RT = CGM.getOpenMPRuntime(); 1577 1578 const Expr *IVExpr = S.getIterationVariable(); 1579 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1580 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1581 1582 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1583 1584 // Start the loop with a block that tests the condition. 1585 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1586 EmitBlock(CondBlock); 1587 const SourceRange &R = S.getSourceRange(); 1588 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1589 SourceLocToDebugLoc(R.getEnd())); 1590 1591 llvm::Value *BoolCondVal = nullptr; 1592 if (!DynamicOrOrdered) { 1593 // UB = min(UB, GlobalUB) or 1594 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 1595 // 'distribute parallel for') 1596 EmitIgnoredExpr(LoopArgs.EUB); 1597 // IV = LB 1598 EmitIgnoredExpr(LoopArgs.Init); 1599 // IV < UB 1600 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 1601 } else { 1602 BoolCondVal = 1603 RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, 1604 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 1605 } 1606 1607 // If there are any cleanups between here and the loop-exit scope, 1608 // create a block to stage a loop exit along. 1609 auto ExitBlock = LoopExit.getBlock(); 1610 if (LoopScope.requiresCleanups()) 1611 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1612 1613 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1614 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1615 if (ExitBlock != LoopExit.getBlock()) { 1616 EmitBlock(ExitBlock); 1617 EmitBranchThroughCleanup(LoopExit); 1618 } 1619 EmitBlock(LoopBody); 1620 1621 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1622 // LB for loop condition and emitted it above). 1623 if (DynamicOrOrdered) 1624 EmitIgnoredExpr(LoopArgs.Init); 1625 1626 // Create a block for the increment. 1627 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1628 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1629 1630 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1631 // with dynamic/guided scheduling and without ordered clause. 1632 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1633 LoopStack.setParallel(!IsMonotonic); 1634 else 1635 EmitOMPSimdInit(S, IsMonotonic); 1636 1637 SourceLocation Loc = S.getLocStart(); 1638 1639 // when 'distribute' is not combined with a 'for': 1640 // while (idx <= UB) { BODY; ++idx; } 1641 // when 'distribute' is combined with a 'for' 1642 // (e.g. 'distribute parallel for') 1643 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 1644 EmitOMPInnerLoop( 1645 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 1646 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 1647 CodeGenLoop(CGF, S, LoopExit); 1648 }, 1649 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 1650 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 1651 }); 1652 1653 EmitBlock(Continue.getBlock()); 1654 BreakContinueStack.pop_back(); 1655 if (!DynamicOrOrdered) { 1656 // Emit "LB = LB + Stride", "UB = UB + Stride". 1657 EmitIgnoredExpr(LoopArgs.NextLB); 1658 EmitIgnoredExpr(LoopArgs.NextUB); 1659 } 1660 1661 EmitBranch(CondBlock); 1662 LoopStack.pop(); 1663 // Emit the fall-through block. 1664 EmitBlock(LoopExit.getBlock()); 1665 1666 // Tell the runtime we are done. 1667 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1668 if (!DynamicOrOrdered) 1669 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 1670 }; 1671 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1672 } 1673 1674 void CodeGenFunction::EmitOMPForOuterLoop( 1675 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1676 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1677 const OMPLoopArguments &LoopArgs, 1678 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 1679 auto &RT = CGM.getOpenMPRuntime(); 1680 1681 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1682 const bool DynamicOrOrdered = 1683 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1684 1685 assert((Ordered || 1686 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1687 LoopArgs.Chunk != nullptr)) && 1688 "static non-chunked schedule does not need outer loop"); 1689 1690 // Emit outer loop. 1691 // 1692 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1693 // When schedule(dynamic,chunk_size) is specified, the iterations are 1694 // distributed to threads in the team in chunks as the threads request them. 1695 // Each thread executes a chunk of iterations, then requests another chunk, 1696 // until no chunks remain to be distributed. Each chunk contains chunk_size 1697 // iterations, except for the last chunk to be distributed, which may have 1698 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1699 // 1700 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1701 // to threads in the team in chunks as the executing threads request them. 1702 // Each thread executes a chunk of iterations, then requests another chunk, 1703 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1704 // each chunk is proportional to the number of unassigned iterations divided 1705 // by the number of threads in the team, decreasing to 1. For a chunk_size 1706 // with value k (greater than 1), the size of each chunk is determined in the 1707 // same way, with the restriction that the chunks do not contain fewer than k 1708 // iterations (except for the last chunk to be assigned, which may have fewer 1709 // than k iterations). 1710 // 1711 // When schedule(auto) is specified, the decision regarding scheduling is 1712 // delegated to the compiler and/or runtime system. The programmer gives the 1713 // implementation the freedom to choose any possible mapping of iterations to 1714 // threads in the team. 1715 // 1716 // When schedule(runtime) is specified, the decision regarding scheduling is 1717 // deferred until run time, and the schedule and chunk size are taken from the 1718 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1719 // implementation defined 1720 // 1721 // while(__kmpc_dispatch_next(&LB, &UB)) { 1722 // idx = LB; 1723 // while (idx <= UB) { BODY; ++idx; 1724 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1725 // } // inner loop 1726 // } 1727 // 1728 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1729 // When schedule(static, chunk_size) is specified, iterations are divided into 1730 // chunks of size chunk_size, and the chunks are assigned to the threads in 1731 // the team in a round-robin fashion in the order of the thread number. 1732 // 1733 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1734 // while (idx <= UB) { BODY; ++idx; } // inner loop 1735 // LB = LB + ST; 1736 // UB = UB + ST; 1737 // } 1738 // 1739 1740 const Expr *IVExpr = S.getIterationVariable(); 1741 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1742 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1743 1744 if (DynamicOrOrdered) { 1745 auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 1746 llvm::Value *LBVal = DispatchBounds.first; 1747 llvm::Value *UBVal = DispatchBounds.second; 1748 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 1749 LoopArgs.Chunk}; 1750 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1751 IVSigned, Ordered, DipatchRTInputValues); 1752 } else { 1753 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1754 Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 1755 LoopArgs.ST, LoopArgs.Chunk); 1756 } 1757 1758 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 1759 const unsigned IVSize, 1760 const bool IVSigned) { 1761 if (Ordered) { 1762 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 1763 IVSigned); 1764 } 1765 }; 1766 1767 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1768 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 1769 OuterLoopArgs.IncExpr = S.getInc(); 1770 OuterLoopArgs.Init = S.getInit(); 1771 OuterLoopArgs.Cond = S.getCond(); 1772 OuterLoopArgs.NextLB = S.getNextLowerBound(); 1773 OuterLoopArgs.NextUB = S.getNextUpperBound(); 1774 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 1775 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 1776 } 1777 1778 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 1779 const unsigned IVSize, const bool IVSigned) {} 1780 1781 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1782 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 1783 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 1784 const CodeGenLoopTy &CodeGenLoopContent) { 1785 1786 auto &RT = CGM.getOpenMPRuntime(); 1787 1788 // Emit outer loop. 1789 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1790 // dynamic 1791 // 1792 1793 const Expr *IVExpr = S.getIterationVariable(); 1794 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1795 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1796 1797 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1798 IVSigned, /* Ordered = */ false, LoopArgs.IL, 1799 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1800 LoopArgs.Chunk); 1801 1802 // for combined 'distribute' and 'for' the increment expression of distribute 1803 // is store in DistInc. For 'distribute' alone, it is in Inc. 1804 Expr *IncExpr; 1805 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 1806 IncExpr = S.getDistInc(); 1807 else 1808 IncExpr = S.getInc(); 1809 1810 // this routine is shared by 'omp distribute parallel for' and 1811 // 'omp distribute': select the right EUB expression depending on the 1812 // directive 1813 OMPLoopArguments OuterLoopArgs; 1814 OuterLoopArgs.LB = LoopArgs.LB; 1815 OuterLoopArgs.UB = LoopArgs.UB; 1816 OuterLoopArgs.ST = LoopArgs.ST; 1817 OuterLoopArgs.IL = LoopArgs.IL; 1818 OuterLoopArgs.Chunk = LoopArgs.Chunk; 1819 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1820 ? S.getCombinedEnsureUpperBound() 1821 : S.getEnsureUpperBound(); 1822 OuterLoopArgs.IncExpr = IncExpr; 1823 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1824 ? S.getCombinedInit() 1825 : S.getInit(); 1826 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1827 ? S.getCombinedCond() 1828 : S.getCond(); 1829 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1830 ? S.getCombinedNextLowerBound() 1831 : S.getNextLowerBound(); 1832 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1833 ? S.getCombinedNextUpperBound() 1834 : S.getNextUpperBound(); 1835 1836 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 1837 LoopScope, OuterLoopArgs, CodeGenLoopContent, 1838 emitEmptyOrdered); 1839 } 1840 1841 /// Emit a helper variable and return corresponding lvalue. 1842 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1843 const DeclRefExpr *Helper) { 1844 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1845 CGF.EmitVarDecl(*VDecl); 1846 return CGF.EmitLValue(Helper); 1847 } 1848 1849 static std::pair<LValue, LValue> 1850 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 1851 const OMPExecutableDirective &S) { 1852 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1853 LValue LB = 1854 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 1855 LValue UB = 1856 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 1857 1858 // When composing 'distribute' with 'for' (e.g. as in 'distribute 1859 // parallel for') we need to use the 'distribute' 1860 // chunk lower and upper bounds rather than the whole loop iteration 1861 // space. These are parameters to the outlined function for 'parallel' 1862 // and we copy the bounds of the previous schedule into the 1863 // the current ones. 1864 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 1865 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 1866 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); 1867 PrevLBVal = CGF.EmitScalarConversion( 1868 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 1869 LS.getIterationVariable()->getType(), SourceLocation()); 1870 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); 1871 PrevUBVal = CGF.EmitScalarConversion( 1872 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 1873 LS.getIterationVariable()->getType(), SourceLocation()); 1874 1875 CGF.EmitStoreOfScalar(PrevLBVal, LB); 1876 CGF.EmitStoreOfScalar(PrevUBVal, UB); 1877 1878 return {LB, UB}; 1879 } 1880 1881 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 1882 /// we need to use the LB and UB expressions generated by the worksharing 1883 /// code generation support, whereas in non combined situations we would 1884 /// just emit 0 and the LastIteration expression 1885 /// This function is necessary due to the difference of the LB and UB 1886 /// types for the RT emission routines for 'for_static_init' and 1887 /// 'for_dispatch_init' 1888 static std::pair<llvm::Value *, llvm::Value *> 1889 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 1890 const OMPExecutableDirective &S, 1891 Address LB, Address UB) { 1892 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1893 const Expr *IVExpr = LS.getIterationVariable(); 1894 // when implementing a dynamic schedule for a 'for' combined with a 1895 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 1896 // is not normalized as each team only executes its own assigned 1897 // distribute chunk 1898 QualType IteratorTy = IVExpr->getType(); 1899 llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, 1900 SourceLocation()); 1901 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, 1902 SourceLocation()); 1903 return {LBVal, UBVal}; 1904 } 1905 1906 static void emitDistributeParallelForDistributeInnerBoundParams( 1907 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1908 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 1909 const auto &Dir = cast<OMPLoopDirective>(S); 1910 LValue LB = 1911 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 1912 auto LBCast = CGF.Builder.CreateIntCast( 1913 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1914 CapturedVars.push_back(LBCast); 1915 LValue UB = 1916 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 1917 1918 auto UBCast = CGF.Builder.CreateIntCast( 1919 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1920 CapturedVars.push_back(UBCast); 1921 } 1922 1923 static void 1924 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 1925 const OMPLoopDirective &S, 1926 CodeGenFunction::JumpDest LoopExit) { 1927 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 1928 PrePostActionTy &) { 1929 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 1930 emitDistributeParallelForInnerBounds, 1931 emitDistributeParallelForDispatchBounds); 1932 }; 1933 1934 emitCommonOMPParallelDirective( 1935 CGF, S, OMPD_for, CGInlinedWorksharingLoop, 1936 emitDistributeParallelForDistributeInnerBoundParams); 1937 } 1938 1939 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 1940 const OMPDistributeParallelForDirective &S) { 1941 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1942 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 1943 S.getDistInc()); 1944 }; 1945 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1946 OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, 1947 /*HasCancel=*/false); 1948 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 1949 /*HasCancel=*/false); 1950 } 1951 1952 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 1953 const OMPDistributeParallelForSimdDirective &S) { 1954 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1955 CGM.getOpenMPRuntime().emitInlinedDirective( 1956 *this, OMPD_distribute_parallel_for_simd, 1957 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1958 OMPLoopScope PreInitScope(CGF, S); 1959 CGF.EmitStmt( 1960 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1961 }); 1962 } 1963 1964 void CodeGenFunction::EmitOMPDistributeSimdDirective( 1965 const OMPDistributeSimdDirective &S) { 1966 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1967 CGM.getOpenMPRuntime().emitInlinedDirective( 1968 *this, OMPD_distribute_simd, 1969 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1970 OMPLoopScope PreInitScope(CGF, S); 1971 CGF.EmitStmt( 1972 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1973 }); 1974 } 1975 1976 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 1977 const OMPTargetParallelForSimdDirective &S) { 1978 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1979 CGM.getOpenMPRuntime().emitInlinedDirective( 1980 *this, OMPD_target_parallel_for_simd, 1981 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1982 OMPLoopScope PreInitScope(CGF, S); 1983 CGF.EmitStmt( 1984 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1985 }); 1986 } 1987 1988 void CodeGenFunction::EmitOMPTargetSimdDirective( 1989 const OMPTargetSimdDirective &S) { 1990 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1991 CGM.getOpenMPRuntime().emitInlinedDirective( 1992 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1993 OMPLoopScope PreInitScope(CGF, S); 1994 CGF.EmitStmt( 1995 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1996 }); 1997 } 1998 1999 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 2000 const OMPTeamsDistributeDirective &S) { 2001 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2002 CGM.getOpenMPRuntime().emitInlinedDirective( 2003 *this, OMPD_teams_distribute, 2004 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2005 OMPLoopScope PreInitScope(CGF, S); 2006 CGF.EmitStmt( 2007 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2008 }); 2009 } 2010 2011 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 2012 const OMPTeamsDistributeSimdDirective &S) { 2013 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2014 CGM.getOpenMPRuntime().emitInlinedDirective( 2015 *this, OMPD_teams_distribute_simd, 2016 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2017 OMPLoopScope PreInitScope(CGF, S); 2018 CGF.EmitStmt( 2019 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2020 }); 2021 } 2022 2023 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 2024 const OMPTeamsDistributeParallelForSimdDirective &S) { 2025 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2026 CGM.getOpenMPRuntime().emitInlinedDirective( 2027 *this, OMPD_teams_distribute_parallel_for_simd, 2028 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2029 OMPLoopScope PreInitScope(CGF, S); 2030 CGF.EmitStmt( 2031 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2032 }); 2033 } 2034 2035 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 2036 const OMPTeamsDistributeParallelForDirective &S) { 2037 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2038 CGM.getOpenMPRuntime().emitInlinedDirective( 2039 *this, OMPD_teams_distribute_parallel_for, 2040 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2041 OMPLoopScope PreInitScope(CGF, S); 2042 CGF.EmitStmt( 2043 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2044 }); 2045 } 2046 2047 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 2048 const OMPTargetTeamsDistributeDirective &S) { 2049 CGM.getOpenMPRuntime().emitInlinedDirective( 2050 *this, OMPD_target_teams_distribute, 2051 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2052 CGF.EmitStmt( 2053 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2054 }); 2055 } 2056 2057 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 2058 const OMPTargetTeamsDistributeParallelForDirective &S) { 2059 CGM.getOpenMPRuntime().emitInlinedDirective( 2060 *this, OMPD_target_teams_distribute_parallel_for, 2061 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2062 CGF.EmitStmt( 2063 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2064 }); 2065 } 2066 2067 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 2068 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 2069 CGM.getOpenMPRuntime().emitInlinedDirective( 2070 *this, OMPD_target_teams_distribute_parallel_for_simd, 2071 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2072 CGF.EmitStmt( 2073 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2074 }); 2075 } 2076 2077 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 2078 const OMPTargetTeamsDistributeSimdDirective &S) { 2079 CGM.getOpenMPRuntime().emitInlinedDirective( 2080 *this, OMPD_target_teams_distribute_simd, 2081 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2082 CGF.EmitStmt( 2083 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2084 }); 2085 } 2086 2087 namespace { 2088 struct ScheduleKindModifiersTy { 2089 OpenMPScheduleClauseKind Kind; 2090 OpenMPScheduleClauseModifier M1; 2091 OpenMPScheduleClauseModifier M2; 2092 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2093 OpenMPScheduleClauseModifier M1, 2094 OpenMPScheduleClauseModifier M2) 2095 : Kind(Kind), M1(M1), M2(M2) {} 2096 }; 2097 } // namespace 2098 2099 bool CodeGenFunction::EmitOMPWorksharingLoop( 2100 const OMPLoopDirective &S, Expr *EUB, 2101 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2102 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2103 // Emit the loop iteration variable. 2104 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2105 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2106 EmitVarDecl(*IVDecl); 2107 2108 // Emit the iterations count variable. 2109 // If it is not a variable, Sema decided to calculate iterations count on each 2110 // iteration (e.g., it is foldable into a constant). 2111 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2112 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2113 // Emit calculation of the iterations count. 2114 EmitIgnoredExpr(S.getCalcLastIteration()); 2115 } 2116 2117 auto &RT = CGM.getOpenMPRuntime(); 2118 2119 bool HasLastprivateClause; 2120 // Check pre-condition. 2121 { 2122 OMPLoopScope PreInitScope(*this, S); 2123 // Skip the entire loop if we don't meet the precondition. 2124 // If the condition constant folds and can be elided, avoid emitting the 2125 // whole loop. 2126 bool CondConstant; 2127 llvm::BasicBlock *ContBlock = nullptr; 2128 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2129 if (!CondConstant) 2130 return false; 2131 } else { 2132 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2133 ContBlock = createBasicBlock("omp.precond.end"); 2134 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2135 getProfileCount(&S)); 2136 EmitBlock(ThenBlock); 2137 incrementProfileCounter(&S); 2138 } 2139 2140 bool Ordered = false; 2141 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2142 if (OrderedClause->getNumForLoops()) 2143 RT.emitDoacrossInit(*this, S); 2144 else 2145 Ordered = true; 2146 } 2147 2148 llvm::DenseSet<const Expr *> EmittedFinals; 2149 emitAlignedClause(*this, S); 2150 EmitOMPLinearClauseInit(S); 2151 // Emit helper vars inits. 2152 2153 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2154 LValue LB = Bounds.first; 2155 LValue UB = Bounds.second; 2156 LValue ST = 2157 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2158 LValue IL = 2159 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2160 2161 // Emit 'then' code. 2162 { 2163 OMPPrivateScope LoopScope(*this); 2164 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2165 // Emit implicit barrier to synchronize threads and avoid data races on 2166 // initialization of firstprivate variables and post-update of 2167 // lastprivate variables. 2168 CGM.getOpenMPRuntime().emitBarrierCall( 2169 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2170 /*ForceSimpleCall=*/true); 2171 } 2172 EmitOMPPrivateClause(S, LoopScope); 2173 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2174 EmitOMPReductionClauseInit(S, LoopScope); 2175 EmitOMPPrivateLoopCounters(S, LoopScope); 2176 EmitOMPLinearClause(S, LoopScope); 2177 (void)LoopScope.Privatize(); 2178 2179 // Detect the loop schedule kind and chunk. 2180 llvm::Value *Chunk = nullptr; 2181 OpenMPScheduleTy ScheduleKind; 2182 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2183 ScheduleKind.Schedule = C->getScheduleKind(); 2184 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2185 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2186 if (const auto *Ch = C->getChunkSize()) { 2187 Chunk = EmitScalarExpr(Ch); 2188 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2189 S.getIterationVariable()->getType(), 2190 S.getLocStart()); 2191 } 2192 } 2193 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2194 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2195 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2196 // If the static schedule kind is specified or if the ordered clause is 2197 // specified, and if no monotonic modifier is specified, the effect will 2198 // be as if the monotonic modifier was specified. 2199 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2200 /* Chunked */ Chunk != nullptr) && 2201 !Ordered) { 2202 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2203 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2204 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2205 // When no chunk_size is specified, the iteration space is divided into 2206 // chunks that are approximately equal in size, and at most one chunk is 2207 // distributed to each thread. Note that the size of the chunks is 2208 // unspecified in this case. 2209 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 2210 IVSize, IVSigned, Ordered, 2211 IL.getAddress(), LB.getAddress(), 2212 UB.getAddress(), ST.getAddress()); 2213 auto LoopExit = 2214 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2215 // UB = min(UB, GlobalUB); 2216 EmitIgnoredExpr(S.getEnsureUpperBound()); 2217 // IV = LB; 2218 EmitIgnoredExpr(S.getInit()); 2219 // while (idx <= UB) { BODY; ++idx; } 2220 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2221 S.getInc(), 2222 [&S, LoopExit](CodeGenFunction &CGF) { 2223 CGF.EmitOMPLoopBody(S, LoopExit); 2224 CGF.EmitStopPoint(&S); 2225 }, 2226 [](CodeGenFunction &) {}); 2227 EmitBlock(LoopExit.getBlock()); 2228 // Tell the runtime we are done. 2229 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2230 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2231 }; 2232 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2233 } else { 2234 const bool IsMonotonic = 2235 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2236 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2237 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2238 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2239 // Emit the outer loop, which requests its work chunk [LB..UB] from 2240 // runtime and runs the inner loop to process it. 2241 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 2242 ST.getAddress(), IL.getAddress(), 2243 Chunk, EUB); 2244 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2245 LoopArguments, CGDispatchBounds); 2246 } 2247 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2248 EmitOMPSimdFinal(S, 2249 [&](CodeGenFunction &CGF) -> llvm::Value * { 2250 return CGF.Builder.CreateIsNotNull( 2251 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2252 }); 2253 } 2254 EmitOMPReductionClauseFinal( 2255 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2256 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2257 : /*Parallel only*/ OMPD_parallel); 2258 // Emit post-update of the reduction variables if IsLastIter != 0. 2259 emitPostUpdateForReductionClause( 2260 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2261 return CGF.Builder.CreateIsNotNull( 2262 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2263 }); 2264 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2265 if (HasLastprivateClause) 2266 EmitOMPLastprivateClauseFinal( 2267 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2268 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2269 } 2270 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2271 return CGF.Builder.CreateIsNotNull( 2272 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2273 }); 2274 // We're now done with the loop, so jump to the continuation block. 2275 if (ContBlock) { 2276 EmitBranch(ContBlock); 2277 EmitBlock(ContBlock, true); 2278 } 2279 } 2280 return HasLastprivateClause; 2281 } 2282 2283 /// The following two functions generate expressions for the loop lower 2284 /// and upper bounds in case of static and dynamic (dispatch) schedule 2285 /// of the associated 'for' or 'distribute' loop. 2286 static std::pair<LValue, LValue> 2287 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2288 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2289 LValue LB = 2290 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2291 LValue UB = 2292 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2293 return {LB, UB}; 2294 } 2295 2296 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2297 /// consider the lower and upper bound expressions generated by the 2298 /// worksharing loop support, but we use 0 and the iteration space size as 2299 /// constants 2300 static std::pair<llvm::Value *, llvm::Value *> 2301 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2302 Address LB, Address UB) { 2303 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2304 const Expr *IVExpr = LS.getIterationVariable(); 2305 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2306 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2307 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2308 return {LBVal, UBVal}; 2309 } 2310 2311 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2312 bool HasLastprivates = false; 2313 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2314 PrePostActionTy &) { 2315 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2316 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2317 emitForLoopBounds, 2318 emitDispatchForLoopBounds); 2319 }; 2320 { 2321 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2322 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2323 S.hasCancel()); 2324 } 2325 2326 // Emit an implicit barrier at the end. 2327 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2328 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2329 } 2330 } 2331 2332 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2333 bool HasLastprivates = false; 2334 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2335 PrePostActionTy &) { 2336 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2337 emitForLoopBounds, 2338 emitDispatchForLoopBounds); 2339 }; 2340 { 2341 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2342 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2343 } 2344 2345 // Emit an implicit barrier at the end. 2346 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2347 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2348 } 2349 } 2350 2351 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2352 const Twine &Name, 2353 llvm::Value *Init = nullptr) { 2354 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2355 if (Init) 2356 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2357 return LVal; 2358 } 2359 2360 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2361 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2362 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2363 bool HasLastprivates = false; 2364 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2365 PrePostActionTy &) { 2366 auto &C = CGF.CGM.getContext(); 2367 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2368 // Emit helper vars inits. 2369 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2370 CGF.Builder.getInt32(0)); 2371 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2372 : CGF.Builder.getInt32(0); 2373 LValue UB = 2374 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2375 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2376 CGF.Builder.getInt32(1)); 2377 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2378 CGF.Builder.getInt32(0)); 2379 // Loop counter. 2380 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2381 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2382 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2383 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2384 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2385 // Generate condition for loop. 2386 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2387 OK_Ordinary, S.getLocStart(), FPOptions()); 2388 // Increment for loop counter. 2389 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2390 S.getLocStart()); 2391 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2392 // Iterate through all sections and emit a switch construct: 2393 // switch (IV) { 2394 // case 0: 2395 // <SectionStmt[0]>; 2396 // break; 2397 // ... 2398 // case <NumSection> - 1: 2399 // <SectionStmt[<NumSection> - 1]>; 2400 // break; 2401 // } 2402 // .omp.sections.exit: 2403 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2404 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2405 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2406 CS == nullptr ? 1 : CS->size()); 2407 if (CS) { 2408 unsigned CaseNumber = 0; 2409 for (auto *SubStmt : CS->children()) { 2410 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2411 CGF.EmitBlock(CaseBB); 2412 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2413 CGF.EmitStmt(SubStmt); 2414 CGF.EmitBranch(ExitBB); 2415 ++CaseNumber; 2416 } 2417 } else { 2418 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2419 CGF.EmitBlock(CaseBB); 2420 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2421 CGF.EmitStmt(Stmt); 2422 CGF.EmitBranch(ExitBB); 2423 } 2424 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2425 }; 2426 2427 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2428 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2429 // Emit implicit barrier to synchronize threads and avoid data races on 2430 // initialization of firstprivate variables and post-update of lastprivate 2431 // variables. 2432 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2433 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2434 /*ForceSimpleCall=*/true); 2435 } 2436 CGF.EmitOMPPrivateClause(S, LoopScope); 2437 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2438 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2439 (void)LoopScope.Privatize(); 2440 2441 // Emit static non-chunked loop. 2442 OpenMPScheduleTy ScheduleKind; 2443 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2444 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2445 CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, 2446 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 2447 UB.getAddress(), ST.getAddress()); 2448 // UB = min(UB, GlobalUB); 2449 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2450 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2451 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2452 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2453 // IV = LB; 2454 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2455 // while (idx <= UB) { BODY; ++idx; } 2456 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2457 [](CodeGenFunction &) {}); 2458 // Tell the runtime we are done. 2459 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2460 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2461 }; 2462 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2463 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2464 // Emit post-update of the reduction variables if IsLastIter != 0. 2465 emitPostUpdateForReductionClause( 2466 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2467 return CGF.Builder.CreateIsNotNull( 2468 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2469 }); 2470 2471 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2472 if (HasLastprivates) 2473 CGF.EmitOMPLastprivateClauseFinal( 2474 S, /*NoFinals=*/false, 2475 CGF.Builder.CreateIsNotNull( 2476 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2477 }; 2478 2479 bool HasCancel = false; 2480 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2481 HasCancel = OSD->hasCancel(); 2482 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2483 HasCancel = OPSD->hasCancel(); 2484 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2485 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2486 HasCancel); 2487 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2488 // clause. Otherwise the barrier will be generated by the codegen for the 2489 // directive. 2490 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2491 // Emit implicit barrier to synchronize threads and avoid data races on 2492 // initialization of firstprivate variables. 2493 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2494 OMPD_unknown); 2495 } 2496 } 2497 2498 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2499 { 2500 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2501 EmitSections(S); 2502 } 2503 // Emit an implicit barrier at the end. 2504 if (!S.getSingleClause<OMPNowaitClause>()) { 2505 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2506 OMPD_sections); 2507 } 2508 } 2509 2510 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2511 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2512 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2513 }; 2514 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2515 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2516 S.hasCancel()); 2517 } 2518 2519 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2520 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2521 llvm::SmallVector<const Expr *, 8> DestExprs; 2522 llvm::SmallVector<const Expr *, 8> SrcExprs; 2523 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2524 // Check if there are any 'copyprivate' clauses associated with this 2525 // 'single' construct. 2526 // Build a list of copyprivate variables along with helper expressions 2527 // (<source>, <destination>, <destination>=<source> expressions) 2528 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2529 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2530 DestExprs.append(C->destination_exprs().begin(), 2531 C->destination_exprs().end()); 2532 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2533 AssignmentOps.append(C->assignment_ops().begin(), 2534 C->assignment_ops().end()); 2535 } 2536 // Emit code for 'single' region along with 'copyprivate' clauses 2537 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2538 Action.Enter(CGF); 2539 OMPPrivateScope SingleScope(CGF); 2540 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2541 CGF.EmitOMPPrivateClause(S, SingleScope); 2542 (void)SingleScope.Privatize(); 2543 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2544 }; 2545 { 2546 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2547 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2548 CopyprivateVars, DestExprs, 2549 SrcExprs, AssignmentOps); 2550 } 2551 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2552 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2553 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2554 CGM.getOpenMPRuntime().emitBarrierCall( 2555 *this, S.getLocStart(), 2556 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2557 } 2558 } 2559 2560 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2561 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2562 Action.Enter(CGF); 2563 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2564 }; 2565 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2566 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2567 } 2568 2569 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2570 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2571 Action.Enter(CGF); 2572 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2573 }; 2574 Expr *Hint = nullptr; 2575 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2576 Hint = HintClause->getHint(); 2577 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2578 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2579 S.getDirectiveName().getAsString(), 2580 CodeGen, S.getLocStart(), Hint); 2581 } 2582 2583 void CodeGenFunction::EmitOMPParallelForDirective( 2584 const OMPParallelForDirective &S) { 2585 // Emit directive as a combined directive that consists of two implicit 2586 // directives: 'parallel' with 'for' directive. 2587 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2588 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2589 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2590 emitDispatchForLoopBounds); 2591 }; 2592 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 2593 emitEmptyBoundParameters); 2594 } 2595 2596 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2597 const OMPParallelForSimdDirective &S) { 2598 // Emit directive as a combined directive that consists of two implicit 2599 // directives: 'parallel' with 'for' directive. 2600 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2601 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2602 emitDispatchForLoopBounds); 2603 }; 2604 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 2605 emitEmptyBoundParameters); 2606 } 2607 2608 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2609 const OMPParallelSectionsDirective &S) { 2610 // Emit directive as a combined directive that consists of two implicit 2611 // directives: 'parallel' with 'sections' directive. 2612 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2613 CGF.EmitSections(S); 2614 }; 2615 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 2616 emitEmptyBoundParameters); 2617 } 2618 2619 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2620 const RegionCodeGenTy &BodyGen, 2621 const TaskGenTy &TaskGen, 2622 OMPTaskDataTy &Data) { 2623 // Emit outlined function for task construct. 2624 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2625 auto *I = CS->getCapturedDecl()->param_begin(); 2626 auto *PartId = std::next(I); 2627 auto *TaskT = std::next(I, 4); 2628 // Check if the task is final 2629 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2630 // If the condition constant folds and can be elided, try to avoid emitting 2631 // the condition and the dead arm of the if/else. 2632 auto *Cond = Clause->getCondition(); 2633 bool CondConstant; 2634 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2635 Data.Final.setInt(CondConstant); 2636 else 2637 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2638 } else { 2639 // By default the task is not final. 2640 Data.Final.setInt(/*IntVal=*/false); 2641 } 2642 // Check if the task has 'priority' clause. 2643 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2644 auto *Prio = Clause->getPriority(); 2645 Data.Priority.setInt(/*IntVal=*/true); 2646 Data.Priority.setPointer(EmitScalarConversion( 2647 EmitScalarExpr(Prio), Prio->getType(), 2648 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2649 Prio->getExprLoc())); 2650 } 2651 // The first function argument for tasks is a thread id, the second one is a 2652 // part id (0 for tied tasks, >=0 for untied task). 2653 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2654 // Get list of private variables. 2655 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2656 auto IRef = C->varlist_begin(); 2657 for (auto *IInit : C->private_copies()) { 2658 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2659 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2660 Data.PrivateVars.push_back(*IRef); 2661 Data.PrivateCopies.push_back(IInit); 2662 } 2663 ++IRef; 2664 } 2665 } 2666 EmittedAsPrivate.clear(); 2667 // Get list of firstprivate variables. 2668 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2669 auto IRef = C->varlist_begin(); 2670 auto IElemInitRef = C->inits().begin(); 2671 for (auto *IInit : C->private_copies()) { 2672 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2673 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2674 Data.FirstprivateVars.push_back(*IRef); 2675 Data.FirstprivateCopies.push_back(IInit); 2676 Data.FirstprivateInits.push_back(*IElemInitRef); 2677 } 2678 ++IRef; 2679 ++IElemInitRef; 2680 } 2681 } 2682 // Get list of lastprivate variables (for taskloops). 2683 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2684 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2685 auto IRef = C->varlist_begin(); 2686 auto ID = C->destination_exprs().begin(); 2687 for (auto *IInit : C->private_copies()) { 2688 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2689 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2690 Data.LastprivateVars.push_back(*IRef); 2691 Data.LastprivateCopies.push_back(IInit); 2692 } 2693 LastprivateDstsOrigs.insert( 2694 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2695 cast<DeclRefExpr>(*IRef)}); 2696 ++IRef; 2697 ++ID; 2698 } 2699 } 2700 SmallVector<const Expr *, 4> LHSs; 2701 SmallVector<const Expr *, 4> RHSs; 2702 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 2703 auto IPriv = C->privates().begin(); 2704 auto IRed = C->reduction_ops().begin(); 2705 auto ILHS = C->lhs_exprs().begin(); 2706 auto IRHS = C->rhs_exprs().begin(); 2707 for (const auto *Ref : C->varlists()) { 2708 Data.ReductionVars.emplace_back(Ref); 2709 Data.ReductionCopies.emplace_back(*IPriv); 2710 Data.ReductionOps.emplace_back(*IRed); 2711 LHSs.emplace_back(*ILHS); 2712 RHSs.emplace_back(*IRHS); 2713 std::advance(IPriv, 1); 2714 std::advance(IRed, 1); 2715 std::advance(ILHS, 1); 2716 std::advance(IRHS, 1); 2717 } 2718 } 2719 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 2720 *this, S.getLocStart(), LHSs, RHSs, Data); 2721 // Build list of dependences. 2722 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2723 for (auto *IRef : C->varlists()) 2724 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2725 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs]( 2726 CodeGenFunction &CGF, PrePostActionTy &Action) { 2727 // Set proper addresses for generated private copies. 2728 OMPPrivateScope Scope(CGF); 2729 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2730 !Data.LastprivateVars.empty()) { 2731 auto *CopyFn = CGF.Builder.CreateLoad( 2732 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2733 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2734 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2735 // Map privates. 2736 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2737 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2738 CallArgs.push_back(PrivatesPtr); 2739 for (auto *E : Data.PrivateVars) { 2740 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2741 Address PrivatePtr = CGF.CreateMemTemp( 2742 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2743 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2744 CallArgs.push_back(PrivatePtr.getPointer()); 2745 } 2746 for (auto *E : Data.FirstprivateVars) { 2747 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2748 Address PrivatePtr = 2749 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2750 ".firstpriv.ptr.addr"); 2751 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2752 CallArgs.push_back(PrivatePtr.getPointer()); 2753 } 2754 for (auto *E : Data.LastprivateVars) { 2755 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2756 Address PrivatePtr = 2757 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2758 ".lastpriv.ptr.addr"); 2759 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2760 CallArgs.push_back(PrivatePtr.getPointer()); 2761 } 2762 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2763 for (auto &&Pair : LastprivateDstsOrigs) { 2764 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2765 DeclRefExpr DRE( 2766 const_cast<VarDecl *>(OrigVD), 2767 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2768 OrigVD) != nullptr, 2769 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2770 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2771 return CGF.EmitLValue(&DRE).getAddress(); 2772 }); 2773 } 2774 for (auto &&Pair : PrivatePtrs) { 2775 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2776 CGF.getContext().getDeclAlign(Pair.first)); 2777 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2778 } 2779 } 2780 if (Data.Reductions) { 2781 OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true); 2782 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, 2783 Data.ReductionOps); 2784 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 2785 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 2786 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 2787 RedCG.emitSharedLValue(CGF, Cnt); 2788 RedCG.emitAggregateType(CGF, Cnt); 2789 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2790 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2791 Replacement = 2792 Address(CGF.EmitScalarConversion( 2793 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2794 CGF.getContext().getPointerType( 2795 Data.ReductionCopies[Cnt]->getType()), 2796 SourceLocation()), 2797 Replacement.getAlignment()); 2798 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2799 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 2800 [Replacement]() { return Replacement; }); 2801 // FIXME: This must removed once the runtime library is fixed. 2802 // Emit required threadprivate variables for 2803 // initilizer/combiner/finalizer. 2804 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2805 RedCG, Cnt); 2806 } 2807 } 2808 (void)Scope.Privatize(); 2809 2810 Action.Enter(CGF); 2811 BodyGen(CGF); 2812 }; 2813 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2814 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2815 Data.NumberOfParts); 2816 OMPLexicalScope Scope(*this, S); 2817 TaskGen(*this, OutlinedFn, Data); 2818 } 2819 2820 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2821 // Emit outlined function for task construct. 2822 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2823 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2824 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2825 const Expr *IfCond = nullptr; 2826 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2827 if (C->getNameModifier() == OMPD_unknown || 2828 C->getNameModifier() == OMPD_task) { 2829 IfCond = C->getCondition(); 2830 break; 2831 } 2832 } 2833 2834 OMPTaskDataTy Data; 2835 // Check if we should emit tied or untied task. 2836 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2837 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2838 CGF.EmitStmt(CS->getCapturedStmt()); 2839 }; 2840 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2841 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2842 const OMPTaskDataTy &Data) { 2843 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2844 SharedsTy, CapturedStruct, IfCond, 2845 Data); 2846 }; 2847 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2848 } 2849 2850 void CodeGenFunction::EmitOMPTaskyieldDirective( 2851 const OMPTaskyieldDirective &S) { 2852 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2853 } 2854 2855 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2856 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2857 } 2858 2859 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2860 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2861 } 2862 2863 void CodeGenFunction::EmitOMPTaskgroupDirective( 2864 const OMPTaskgroupDirective &S) { 2865 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2866 Action.Enter(CGF); 2867 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2868 }; 2869 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2870 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2871 } 2872 2873 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2874 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2875 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2876 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2877 FlushClause->varlist_end()); 2878 } 2879 return llvm::None; 2880 }(), S.getLocStart()); 2881 } 2882 2883 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 2884 const CodeGenLoopTy &CodeGenLoop, 2885 Expr *IncExpr) { 2886 // Emit the loop iteration variable. 2887 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2888 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2889 EmitVarDecl(*IVDecl); 2890 2891 // Emit the iterations count variable. 2892 // If it is not a variable, Sema decided to calculate iterations count on each 2893 // iteration (e.g., it is foldable into a constant). 2894 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2895 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2896 // Emit calculation of the iterations count. 2897 EmitIgnoredExpr(S.getCalcLastIteration()); 2898 } 2899 2900 auto &RT = CGM.getOpenMPRuntime(); 2901 2902 bool HasLastprivateClause = false; 2903 // Check pre-condition. 2904 { 2905 OMPLoopScope PreInitScope(*this, S); 2906 // Skip the entire loop if we don't meet the precondition. 2907 // If the condition constant folds and can be elided, avoid emitting the 2908 // whole loop. 2909 bool CondConstant; 2910 llvm::BasicBlock *ContBlock = nullptr; 2911 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2912 if (!CondConstant) 2913 return; 2914 } else { 2915 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2916 ContBlock = createBasicBlock("omp.precond.end"); 2917 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2918 getProfileCount(&S)); 2919 EmitBlock(ThenBlock); 2920 incrementProfileCounter(&S); 2921 } 2922 2923 // Emit 'then' code. 2924 { 2925 // Emit helper vars inits. 2926 2927 LValue LB = EmitOMPHelperVar( 2928 *this, cast<DeclRefExpr>( 2929 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2930 ? S.getCombinedLowerBoundVariable() 2931 : S.getLowerBoundVariable()))); 2932 LValue UB = EmitOMPHelperVar( 2933 *this, cast<DeclRefExpr>( 2934 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2935 ? S.getCombinedUpperBoundVariable() 2936 : S.getUpperBoundVariable()))); 2937 LValue ST = 2938 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2939 LValue IL = 2940 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2941 2942 OMPPrivateScope LoopScope(*this); 2943 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2944 // Emit implicit barrier to synchronize threads and avoid data races on 2945 // initialization of firstprivate variables and post-update of 2946 // lastprivate variables. 2947 CGM.getOpenMPRuntime().emitBarrierCall( 2948 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2949 /*ForceSimpleCall=*/true); 2950 } 2951 EmitOMPPrivateClause(S, LoopScope); 2952 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2953 EmitOMPPrivateLoopCounters(S, LoopScope); 2954 (void)LoopScope.Privatize(); 2955 2956 // Detect the distribute schedule kind and chunk. 2957 llvm::Value *Chunk = nullptr; 2958 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 2959 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 2960 ScheduleKind = C->getDistScheduleKind(); 2961 if (const auto *Ch = C->getChunkSize()) { 2962 Chunk = EmitScalarExpr(Ch); 2963 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2964 S.getIterationVariable()->getType(), 2965 S.getLocStart()); 2966 } 2967 } 2968 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2969 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2970 2971 // OpenMP [2.10.8, distribute Construct, Description] 2972 // If dist_schedule is specified, kind must be static. If specified, 2973 // iterations are divided into chunks of size chunk_size, chunks are 2974 // assigned to the teams of the league in a round-robin fashion in the 2975 // order of the team number. When no chunk_size is specified, the 2976 // iteration space is divided into chunks that are approximately equal 2977 // in size, and at most one chunk is distributed to each team of the 2978 // league. The size of the chunks is unspecified in this case. 2979 if (RT.isStaticNonchunked(ScheduleKind, 2980 /* Chunked */ Chunk != nullptr)) { 2981 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 2982 IVSize, IVSigned, /* Ordered = */ false, 2983 IL.getAddress(), LB.getAddress(), 2984 UB.getAddress(), ST.getAddress()); 2985 auto LoopExit = 2986 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2987 // UB = min(UB, GlobalUB); 2988 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2989 ? S.getCombinedEnsureUpperBound() 2990 : S.getEnsureUpperBound()); 2991 // IV = LB; 2992 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2993 ? S.getCombinedInit() 2994 : S.getInit()); 2995 2996 Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2997 ? S.getCombinedCond() 2998 : S.getCond(); 2999 3000 // for distribute alone, codegen 3001 // while (idx <= UB) { BODY; ++idx; } 3002 // when combined with 'for' (e.g. as in 'distribute parallel for') 3003 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 3004 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, 3005 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3006 CodeGenLoop(CGF, S, LoopExit); 3007 }, 3008 [](CodeGenFunction &) {}); 3009 EmitBlock(LoopExit.getBlock()); 3010 // Tell the runtime we are done. 3011 RT.emitForStaticFinish(*this, S.getLocStart()); 3012 } else { 3013 // Emit the outer loop, which requests its work chunk [LB..UB] from 3014 // runtime and runs the inner loop to process it. 3015 const OMPLoopArguments LoopArguments = { 3016 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 3017 Chunk}; 3018 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3019 CodeGenLoop); 3020 } 3021 3022 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3023 if (HasLastprivateClause) 3024 EmitOMPLastprivateClauseFinal( 3025 S, /*NoFinals=*/false, 3026 Builder.CreateIsNotNull( 3027 EmitLoadOfScalar(IL, S.getLocStart()))); 3028 } 3029 3030 // We're now done with the loop, so jump to the continuation block. 3031 if (ContBlock) { 3032 EmitBranch(ContBlock); 3033 EmitBlock(ContBlock, true); 3034 } 3035 } 3036 } 3037 3038 void CodeGenFunction::EmitOMPDistributeDirective( 3039 const OMPDistributeDirective &S) { 3040 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3041 3042 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3043 }; 3044 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3045 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 3046 false); 3047 } 3048 3049 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3050 const CapturedStmt *S) { 3051 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3052 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3053 CGF.CapturedStmtInfo = &CapStmtInfo; 3054 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3055 Fn->addFnAttr(llvm::Attribute::NoInline); 3056 return Fn; 3057 } 3058 3059 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3060 if (!S.getAssociatedStmt()) { 3061 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3062 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3063 return; 3064 } 3065 auto *C = S.getSingleClause<OMPSIMDClause>(); 3066 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3067 PrePostActionTy &Action) { 3068 if (C) { 3069 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3070 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3071 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3072 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3073 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 3074 } else { 3075 Action.Enter(CGF); 3076 CGF.EmitStmt( 3077 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3078 } 3079 }; 3080 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3081 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 3082 } 3083 3084 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3085 QualType SrcType, QualType DestType, 3086 SourceLocation Loc) { 3087 assert(CGF.hasScalarEvaluationKind(DestType) && 3088 "DestType must have scalar evaluation kind."); 3089 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3090 return Val.isScalar() 3091 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 3092 Loc) 3093 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 3094 DestType, Loc); 3095 } 3096 3097 static CodeGenFunction::ComplexPairTy 3098 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3099 QualType DestType, SourceLocation Loc) { 3100 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3101 "DestType must have complex evaluation kind."); 3102 CodeGenFunction::ComplexPairTy ComplexVal; 3103 if (Val.isScalar()) { 3104 // Convert the input element to the element type of the complex. 3105 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3106 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3107 DestElementType, Loc); 3108 ComplexVal = CodeGenFunction::ComplexPairTy( 3109 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3110 } else { 3111 assert(Val.isComplex() && "Must be a scalar or complex."); 3112 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3113 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3114 ComplexVal.first = CGF.EmitScalarConversion( 3115 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3116 ComplexVal.second = CGF.EmitScalarConversion( 3117 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3118 } 3119 return ComplexVal; 3120 } 3121 3122 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3123 LValue LVal, RValue RVal) { 3124 if (LVal.isGlobalReg()) { 3125 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3126 } else { 3127 CGF.EmitAtomicStore(RVal, LVal, 3128 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3129 : llvm::AtomicOrdering::Monotonic, 3130 LVal.isVolatile(), /*IsInit=*/false); 3131 } 3132 } 3133 3134 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3135 QualType RValTy, SourceLocation Loc) { 3136 switch (getEvaluationKind(LVal.getType())) { 3137 case TEK_Scalar: 3138 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3139 *this, RVal, RValTy, LVal.getType(), Loc)), 3140 LVal); 3141 break; 3142 case TEK_Complex: 3143 EmitStoreOfComplex( 3144 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3145 /*isInit=*/false); 3146 break; 3147 case TEK_Aggregate: 3148 llvm_unreachable("Must be a scalar or complex."); 3149 } 3150 } 3151 3152 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3153 const Expr *X, const Expr *V, 3154 SourceLocation Loc) { 3155 // v = x; 3156 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3157 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3158 LValue XLValue = CGF.EmitLValue(X); 3159 LValue VLValue = CGF.EmitLValue(V); 3160 RValue Res = XLValue.isGlobalReg() 3161 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3162 : CGF.EmitAtomicLoad( 3163 XLValue, Loc, 3164 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3165 : llvm::AtomicOrdering::Monotonic, 3166 XLValue.isVolatile()); 3167 // OpenMP, 2.12.6, atomic Construct 3168 // Any atomic construct with a seq_cst clause forces the atomically 3169 // performed operation to include an implicit flush operation without a 3170 // list. 3171 if (IsSeqCst) 3172 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3173 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3174 } 3175 3176 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3177 const Expr *X, const Expr *E, 3178 SourceLocation Loc) { 3179 // x = expr; 3180 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3181 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3182 // OpenMP, 2.12.6, atomic Construct 3183 // Any atomic construct with a seq_cst clause forces the atomically 3184 // performed operation to include an implicit flush operation without a 3185 // list. 3186 if (IsSeqCst) 3187 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3188 } 3189 3190 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3191 RValue Update, 3192 BinaryOperatorKind BO, 3193 llvm::AtomicOrdering AO, 3194 bool IsXLHSInRHSPart) { 3195 auto &Context = CGF.CGM.getContext(); 3196 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3197 // expression is simple and atomic is allowed for the given type for the 3198 // target platform. 3199 if (BO == BO_Comma || !Update.isScalar() || 3200 !Update.getScalarVal()->getType()->isIntegerTy() || 3201 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3202 (Update.getScalarVal()->getType() != 3203 X.getAddress().getElementType())) || 3204 !X.getAddress().getElementType()->isIntegerTy() || 3205 !Context.getTargetInfo().hasBuiltinAtomic( 3206 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3207 return std::make_pair(false, RValue::get(nullptr)); 3208 3209 llvm::AtomicRMWInst::BinOp RMWOp; 3210 switch (BO) { 3211 case BO_Add: 3212 RMWOp = llvm::AtomicRMWInst::Add; 3213 break; 3214 case BO_Sub: 3215 if (!IsXLHSInRHSPart) 3216 return std::make_pair(false, RValue::get(nullptr)); 3217 RMWOp = llvm::AtomicRMWInst::Sub; 3218 break; 3219 case BO_And: 3220 RMWOp = llvm::AtomicRMWInst::And; 3221 break; 3222 case BO_Or: 3223 RMWOp = llvm::AtomicRMWInst::Or; 3224 break; 3225 case BO_Xor: 3226 RMWOp = llvm::AtomicRMWInst::Xor; 3227 break; 3228 case BO_LT: 3229 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3230 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3231 : llvm::AtomicRMWInst::Max) 3232 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3233 : llvm::AtomicRMWInst::UMax); 3234 break; 3235 case BO_GT: 3236 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3237 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3238 : llvm::AtomicRMWInst::Min) 3239 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3240 : llvm::AtomicRMWInst::UMin); 3241 break; 3242 case BO_Assign: 3243 RMWOp = llvm::AtomicRMWInst::Xchg; 3244 break; 3245 case BO_Mul: 3246 case BO_Div: 3247 case BO_Rem: 3248 case BO_Shl: 3249 case BO_Shr: 3250 case BO_LAnd: 3251 case BO_LOr: 3252 return std::make_pair(false, RValue::get(nullptr)); 3253 case BO_PtrMemD: 3254 case BO_PtrMemI: 3255 case BO_LE: 3256 case BO_GE: 3257 case BO_EQ: 3258 case BO_NE: 3259 case BO_AddAssign: 3260 case BO_SubAssign: 3261 case BO_AndAssign: 3262 case BO_OrAssign: 3263 case BO_XorAssign: 3264 case BO_MulAssign: 3265 case BO_DivAssign: 3266 case BO_RemAssign: 3267 case BO_ShlAssign: 3268 case BO_ShrAssign: 3269 case BO_Comma: 3270 llvm_unreachable("Unsupported atomic update operation"); 3271 } 3272 auto *UpdateVal = Update.getScalarVal(); 3273 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3274 UpdateVal = CGF.Builder.CreateIntCast( 3275 IC, X.getAddress().getElementType(), 3276 X.getType()->hasSignedIntegerRepresentation()); 3277 } 3278 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3279 return std::make_pair(true, RValue::get(Res)); 3280 } 3281 3282 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3283 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3284 llvm::AtomicOrdering AO, SourceLocation Loc, 3285 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3286 // Update expressions are allowed to have the following forms: 3287 // x binop= expr; -> xrval + expr; 3288 // x++, ++x -> xrval + 1; 3289 // x--, --x -> xrval - 1; 3290 // x = x binop expr; -> xrval binop expr 3291 // x = expr Op x; - > expr binop xrval; 3292 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3293 if (!Res.first) { 3294 if (X.isGlobalReg()) { 3295 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3296 // 'xrval'. 3297 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3298 } else { 3299 // Perform compare-and-swap procedure. 3300 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3301 } 3302 } 3303 return Res; 3304 } 3305 3306 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3307 const Expr *X, const Expr *E, 3308 const Expr *UE, bool IsXLHSInRHSPart, 3309 SourceLocation Loc) { 3310 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3311 "Update expr in 'atomic update' must be a binary operator."); 3312 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3313 // Update expressions are allowed to have the following forms: 3314 // x binop= expr; -> xrval + expr; 3315 // x++, ++x -> xrval + 1; 3316 // x--, --x -> xrval - 1; 3317 // x = x binop expr; -> xrval binop expr 3318 // x = expr Op x; - > expr binop xrval; 3319 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3320 LValue XLValue = CGF.EmitLValue(X); 3321 RValue ExprRValue = CGF.EmitAnyExpr(E); 3322 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3323 : llvm::AtomicOrdering::Monotonic; 3324 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3325 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3326 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3327 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3328 auto Gen = 3329 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3330 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3331 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3332 return CGF.EmitAnyExpr(UE); 3333 }; 3334 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3335 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3336 // OpenMP, 2.12.6, atomic Construct 3337 // Any atomic construct with a seq_cst clause forces the atomically 3338 // performed operation to include an implicit flush operation without a 3339 // list. 3340 if (IsSeqCst) 3341 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3342 } 3343 3344 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3345 QualType SourceType, QualType ResType, 3346 SourceLocation Loc) { 3347 switch (CGF.getEvaluationKind(ResType)) { 3348 case TEK_Scalar: 3349 return RValue::get( 3350 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3351 case TEK_Complex: { 3352 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3353 return RValue::getComplex(Res.first, Res.second); 3354 } 3355 case TEK_Aggregate: 3356 break; 3357 } 3358 llvm_unreachable("Must be a scalar or complex."); 3359 } 3360 3361 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3362 bool IsPostfixUpdate, const Expr *V, 3363 const Expr *X, const Expr *E, 3364 const Expr *UE, bool IsXLHSInRHSPart, 3365 SourceLocation Loc) { 3366 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3367 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3368 RValue NewVVal; 3369 LValue VLValue = CGF.EmitLValue(V); 3370 LValue XLValue = CGF.EmitLValue(X); 3371 RValue ExprRValue = CGF.EmitAnyExpr(E); 3372 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3373 : llvm::AtomicOrdering::Monotonic; 3374 QualType NewVValType; 3375 if (UE) { 3376 // 'x' is updated with some additional value. 3377 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3378 "Update expr in 'atomic capture' must be a binary operator."); 3379 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3380 // Update expressions are allowed to have the following forms: 3381 // x binop= expr; -> xrval + expr; 3382 // x++, ++x -> xrval + 1; 3383 // x--, --x -> xrval - 1; 3384 // x = x binop expr; -> xrval binop expr 3385 // x = expr Op x; - > expr binop xrval; 3386 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3387 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3388 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3389 NewVValType = XRValExpr->getType(); 3390 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3391 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3392 IsPostfixUpdate](RValue XRValue) -> RValue { 3393 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3394 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3395 RValue Res = CGF.EmitAnyExpr(UE); 3396 NewVVal = IsPostfixUpdate ? XRValue : Res; 3397 return Res; 3398 }; 3399 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3400 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3401 if (Res.first) { 3402 // 'atomicrmw' instruction was generated. 3403 if (IsPostfixUpdate) { 3404 // Use old value from 'atomicrmw'. 3405 NewVVal = Res.second; 3406 } else { 3407 // 'atomicrmw' does not provide new value, so evaluate it using old 3408 // value of 'x'. 3409 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3410 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3411 NewVVal = CGF.EmitAnyExpr(UE); 3412 } 3413 } 3414 } else { 3415 // 'x' is simply rewritten with some 'expr'. 3416 NewVValType = X->getType().getNonReferenceType(); 3417 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3418 X->getType().getNonReferenceType(), Loc); 3419 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3420 NewVVal = XRValue; 3421 return ExprRValue; 3422 }; 3423 // Try to perform atomicrmw xchg, otherwise simple exchange. 3424 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3425 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3426 Loc, Gen); 3427 if (Res.first) { 3428 // 'atomicrmw' instruction was generated. 3429 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3430 } 3431 } 3432 // Emit post-update store to 'v' of old/new 'x' value. 3433 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3434 // OpenMP, 2.12.6, atomic Construct 3435 // Any atomic construct with a seq_cst clause forces the atomically 3436 // performed operation to include an implicit flush operation without a 3437 // list. 3438 if (IsSeqCst) 3439 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3440 } 3441 3442 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3443 bool IsSeqCst, bool IsPostfixUpdate, 3444 const Expr *X, const Expr *V, const Expr *E, 3445 const Expr *UE, bool IsXLHSInRHSPart, 3446 SourceLocation Loc) { 3447 switch (Kind) { 3448 case OMPC_read: 3449 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3450 break; 3451 case OMPC_write: 3452 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3453 break; 3454 case OMPC_unknown: 3455 case OMPC_update: 3456 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3457 break; 3458 case OMPC_capture: 3459 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3460 IsXLHSInRHSPart, Loc); 3461 break; 3462 case OMPC_if: 3463 case OMPC_final: 3464 case OMPC_num_threads: 3465 case OMPC_private: 3466 case OMPC_firstprivate: 3467 case OMPC_lastprivate: 3468 case OMPC_reduction: 3469 case OMPC_task_reduction: 3470 case OMPC_in_reduction: 3471 case OMPC_safelen: 3472 case OMPC_simdlen: 3473 case OMPC_collapse: 3474 case OMPC_default: 3475 case OMPC_seq_cst: 3476 case OMPC_shared: 3477 case OMPC_linear: 3478 case OMPC_aligned: 3479 case OMPC_copyin: 3480 case OMPC_copyprivate: 3481 case OMPC_flush: 3482 case OMPC_proc_bind: 3483 case OMPC_schedule: 3484 case OMPC_ordered: 3485 case OMPC_nowait: 3486 case OMPC_untied: 3487 case OMPC_threadprivate: 3488 case OMPC_depend: 3489 case OMPC_mergeable: 3490 case OMPC_device: 3491 case OMPC_threads: 3492 case OMPC_simd: 3493 case OMPC_map: 3494 case OMPC_num_teams: 3495 case OMPC_thread_limit: 3496 case OMPC_priority: 3497 case OMPC_grainsize: 3498 case OMPC_nogroup: 3499 case OMPC_num_tasks: 3500 case OMPC_hint: 3501 case OMPC_dist_schedule: 3502 case OMPC_defaultmap: 3503 case OMPC_uniform: 3504 case OMPC_to: 3505 case OMPC_from: 3506 case OMPC_use_device_ptr: 3507 case OMPC_is_device_ptr: 3508 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3509 } 3510 } 3511 3512 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3513 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3514 OpenMPClauseKind Kind = OMPC_unknown; 3515 for (auto *C : S.clauses()) { 3516 // Find first clause (skip seq_cst clause, if it is first). 3517 if (C->getClauseKind() != OMPC_seq_cst) { 3518 Kind = C->getClauseKind(); 3519 break; 3520 } 3521 } 3522 3523 const auto *CS = 3524 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3525 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3526 enterFullExpression(EWC); 3527 } 3528 // Processing for statements under 'atomic capture'. 3529 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3530 for (const auto *C : Compound->body()) { 3531 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3532 enterFullExpression(EWC); 3533 } 3534 } 3535 } 3536 3537 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3538 PrePostActionTy &) { 3539 CGF.EmitStopPoint(CS); 3540 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3541 S.getV(), S.getExpr(), S.getUpdateExpr(), 3542 S.isXLHSInRHSPart(), S.getLocStart()); 3543 }; 3544 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3545 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3546 } 3547 3548 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3549 const OMPExecutableDirective &S, 3550 const RegionCodeGenTy &CodeGen) { 3551 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3552 CodeGenModule &CGM = CGF.CGM; 3553 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3554 3555 llvm::Function *Fn = nullptr; 3556 llvm::Constant *FnID = nullptr; 3557 3558 const Expr *IfCond = nullptr; 3559 // Check for the at most one if clause associated with the target region. 3560 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3561 if (C->getNameModifier() == OMPD_unknown || 3562 C->getNameModifier() == OMPD_target) { 3563 IfCond = C->getCondition(); 3564 break; 3565 } 3566 } 3567 3568 // Check if we have any device clause associated with the directive. 3569 const Expr *Device = nullptr; 3570 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3571 Device = C->getDevice(); 3572 } 3573 3574 // Check if we have an if clause whose conditional always evaluates to false 3575 // or if we do not have any targets specified. If so the target region is not 3576 // an offload entry point. 3577 bool IsOffloadEntry = true; 3578 if (IfCond) { 3579 bool Val; 3580 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3581 IsOffloadEntry = false; 3582 } 3583 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3584 IsOffloadEntry = false; 3585 3586 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3587 StringRef ParentName; 3588 // In case we have Ctors/Dtors we use the complete type variant to produce 3589 // the mangling of the device outlined kernel. 3590 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3591 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3592 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3593 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3594 else 3595 ParentName = 3596 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3597 3598 // Emit target region as a standalone region. 3599 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3600 IsOffloadEntry, CodeGen); 3601 OMPLexicalScope Scope(CGF, S); 3602 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3603 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 3604 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 3605 CapturedVars); 3606 } 3607 3608 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3609 PrePostActionTy &Action) { 3610 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3611 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3612 CGF.EmitOMPPrivateClause(S, PrivateScope); 3613 (void)PrivateScope.Privatize(); 3614 3615 Action.Enter(CGF); 3616 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3617 } 3618 3619 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3620 StringRef ParentName, 3621 const OMPTargetDirective &S) { 3622 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3623 emitTargetRegion(CGF, S, Action); 3624 }; 3625 llvm::Function *Fn; 3626 llvm::Constant *Addr; 3627 // Emit target region as a standalone region. 3628 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3629 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3630 assert(Fn && Addr && "Target device function emission failed."); 3631 } 3632 3633 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3634 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3635 emitTargetRegion(CGF, S, Action); 3636 }; 3637 emitCommonOMPTargetDirective(*this, S, CodeGen); 3638 } 3639 3640 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3641 const OMPExecutableDirective &S, 3642 OpenMPDirectiveKind InnermostKind, 3643 const RegionCodeGenTy &CodeGen) { 3644 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 3645 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 3646 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3647 3648 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); 3649 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); 3650 if (NT || TL) { 3651 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3652 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3653 3654 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3655 S.getLocStart()); 3656 } 3657 3658 OMPTeamsScope Scope(CGF, S); 3659 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3660 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3661 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3662 CapturedVars); 3663 } 3664 3665 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3666 // Emit teams region as a standalone region. 3667 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3668 OMPPrivateScope PrivateScope(CGF); 3669 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3670 CGF.EmitOMPPrivateClause(S, PrivateScope); 3671 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3672 (void)PrivateScope.Privatize(); 3673 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3674 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 3675 }; 3676 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3677 emitPostUpdateForReductionClause( 3678 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 3679 } 3680 3681 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 3682 const OMPTargetTeamsDirective &S) { 3683 auto *CS = S.getCapturedStmt(OMPD_teams); 3684 Action.Enter(CGF); 3685 auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3686 // TODO: Add support for clauses. 3687 CGF.EmitStmt(CS->getCapturedStmt()); 3688 }; 3689 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 3690 } 3691 3692 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 3693 CodeGenModule &CGM, StringRef ParentName, 3694 const OMPTargetTeamsDirective &S) { 3695 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3696 emitTargetTeamsRegion(CGF, Action, S); 3697 }; 3698 llvm::Function *Fn; 3699 llvm::Constant *Addr; 3700 // Emit target region as a standalone region. 3701 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3702 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3703 assert(Fn && Addr && "Target device function emission failed."); 3704 } 3705 3706 void CodeGenFunction::EmitOMPTargetTeamsDirective( 3707 const OMPTargetTeamsDirective &S) { 3708 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3709 emitTargetTeamsRegion(CGF, Action, S); 3710 }; 3711 emitCommonOMPTargetDirective(*this, S, CodeGen); 3712 } 3713 3714 void CodeGenFunction::EmitOMPCancellationPointDirective( 3715 const OMPCancellationPointDirective &S) { 3716 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3717 S.getCancelRegion()); 3718 } 3719 3720 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3721 const Expr *IfCond = nullptr; 3722 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3723 if (C->getNameModifier() == OMPD_unknown || 3724 C->getNameModifier() == OMPD_cancel) { 3725 IfCond = C->getCondition(); 3726 break; 3727 } 3728 } 3729 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3730 S.getCancelRegion()); 3731 } 3732 3733 CodeGenFunction::JumpDest 3734 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3735 if (Kind == OMPD_parallel || Kind == OMPD_task || 3736 Kind == OMPD_target_parallel) 3737 return ReturnBlock; 3738 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3739 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 3740 Kind == OMPD_distribute_parallel_for || 3741 Kind == OMPD_target_parallel_for); 3742 return OMPCancelStack.getExitBlock(); 3743 } 3744 3745 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3746 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3747 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3748 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3749 auto OrigVarIt = C.varlist_begin(); 3750 auto InitIt = C.inits().begin(); 3751 for (auto PvtVarIt : C.private_copies()) { 3752 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3753 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3754 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3755 3756 // In order to identify the right initializer we need to match the 3757 // declaration used by the mapping logic. In some cases we may get 3758 // OMPCapturedExprDecl that refers to the original declaration. 3759 const ValueDecl *MatchingVD = OrigVD; 3760 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3761 // OMPCapturedExprDecl are used to privative fields of the current 3762 // structure. 3763 auto *ME = cast<MemberExpr>(OED->getInit()); 3764 assert(isa<CXXThisExpr>(ME->getBase()) && 3765 "Base should be the current struct!"); 3766 MatchingVD = ME->getMemberDecl(); 3767 } 3768 3769 // If we don't have information about the current list item, move on to 3770 // the next one. 3771 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3772 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3773 continue; 3774 3775 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3776 // Initialize the temporary initialization variable with the address we 3777 // get from the runtime library. We have to cast the source address 3778 // because it is always a void *. References are materialized in the 3779 // privatization scope, so the initialization here disregards the fact 3780 // the original variable is a reference. 3781 QualType AddrQTy = 3782 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3783 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3784 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3785 setAddrOfLocalVar(InitVD, InitAddr); 3786 3787 // Emit private declaration, it will be initialized by the value we 3788 // declaration we just added to the local declarations map. 3789 EmitDecl(*PvtVD); 3790 3791 // The initialization variables reached its purpose in the emission 3792 // ofthe previous declaration, so we don't need it anymore. 3793 LocalDeclMap.erase(InitVD); 3794 3795 // Return the address of the private variable. 3796 return GetAddrOfLocalVar(PvtVD); 3797 }); 3798 assert(IsRegistered && "firstprivate var already registered as private"); 3799 // Silence the warning about unused variable. 3800 (void)IsRegistered; 3801 3802 ++OrigVarIt; 3803 ++InitIt; 3804 } 3805 } 3806 3807 // Generate the instructions for '#pragma omp target data' directive. 3808 void CodeGenFunction::EmitOMPTargetDataDirective( 3809 const OMPTargetDataDirective &S) { 3810 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3811 3812 // Create a pre/post action to signal the privatization of the device pointer. 3813 // This action can be replaced by the OpenMP runtime code generation to 3814 // deactivate privatization. 3815 bool PrivatizeDevicePointers = false; 3816 class DevicePointerPrivActionTy : public PrePostActionTy { 3817 bool &PrivatizeDevicePointers; 3818 3819 public: 3820 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3821 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3822 void Enter(CodeGenFunction &CGF) override { 3823 PrivatizeDevicePointers = true; 3824 } 3825 }; 3826 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 3827 3828 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 3829 CodeGenFunction &CGF, PrePostActionTy &Action) { 3830 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3831 CGF.EmitStmt( 3832 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3833 }; 3834 3835 // Codegen that selects wheather to generate the privatization code or not. 3836 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 3837 &InnermostCodeGen](CodeGenFunction &CGF, 3838 PrePostActionTy &Action) { 3839 RegionCodeGenTy RCG(InnermostCodeGen); 3840 PrivatizeDevicePointers = false; 3841 3842 // Call the pre-action to change the status of PrivatizeDevicePointers if 3843 // needed. 3844 Action.Enter(CGF); 3845 3846 if (PrivatizeDevicePointers) { 3847 OMPPrivateScope PrivateScope(CGF); 3848 // Emit all instances of the use_device_ptr clause. 3849 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 3850 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 3851 Info.CaptureDeviceAddrMap); 3852 (void)PrivateScope.Privatize(); 3853 RCG(CGF); 3854 } else 3855 RCG(CGF); 3856 }; 3857 3858 // Forward the provided action to the privatization codegen. 3859 RegionCodeGenTy PrivRCG(PrivCodeGen); 3860 PrivRCG.setAction(Action); 3861 3862 // Notwithstanding the body of the region is emitted as inlined directive, 3863 // we don't use an inline scope as changes in the references inside the 3864 // region are expected to be visible outside, so we do not privative them. 3865 OMPLexicalScope Scope(CGF, S); 3866 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 3867 PrivRCG); 3868 }; 3869 3870 RegionCodeGenTy RCG(CodeGen); 3871 3872 // If we don't have target devices, don't bother emitting the data mapping 3873 // code. 3874 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 3875 RCG(*this); 3876 return; 3877 } 3878 3879 // Check if we have any if clause associated with the directive. 3880 const Expr *IfCond = nullptr; 3881 if (auto *C = S.getSingleClause<OMPIfClause>()) 3882 IfCond = C->getCondition(); 3883 3884 // Check if we have any device clause associated with the directive. 3885 const Expr *Device = nullptr; 3886 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3887 Device = C->getDevice(); 3888 3889 // Set the action to signal privatization of device pointers. 3890 RCG.setAction(PrivAction); 3891 3892 // Emit region code. 3893 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 3894 Info); 3895 } 3896 3897 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 3898 const OMPTargetEnterDataDirective &S) { 3899 // If we don't have target devices, don't bother emitting the data mapping 3900 // code. 3901 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3902 return; 3903 3904 // Check if we have any if clause associated with the directive. 3905 const Expr *IfCond = nullptr; 3906 if (auto *C = S.getSingleClause<OMPIfClause>()) 3907 IfCond = C->getCondition(); 3908 3909 // Check if we have any device clause associated with the directive. 3910 const Expr *Device = nullptr; 3911 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3912 Device = C->getDevice(); 3913 3914 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3915 } 3916 3917 void CodeGenFunction::EmitOMPTargetExitDataDirective( 3918 const OMPTargetExitDataDirective &S) { 3919 // If we don't have target devices, don't bother emitting the data mapping 3920 // code. 3921 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3922 return; 3923 3924 // Check if we have any if clause associated with the directive. 3925 const Expr *IfCond = nullptr; 3926 if (auto *C = S.getSingleClause<OMPIfClause>()) 3927 IfCond = C->getCondition(); 3928 3929 // Check if we have any device clause associated with the directive. 3930 const Expr *Device = nullptr; 3931 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3932 Device = C->getDevice(); 3933 3934 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3935 } 3936 3937 static void emitTargetParallelRegion(CodeGenFunction &CGF, 3938 const OMPTargetParallelDirective &S, 3939 PrePostActionTy &Action) { 3940 // Get the captured statement associated with the 'parallel' region. 3941 auto *CS = S.getCapturedStmt(OMPD_parallel); 3942 Action.Enter(CGF); 3943 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { 3944 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3945 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3946 CGF.EmitOMPPrivateClause(S, PrivateScope); 3947 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3948 (void)PrivateScope.Privatize(); 3949 // TODO: Add support for clauses. 3950 CGF.EmitStmt(CS->getCapturedStmt()); 3951 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 3952 }; 3953 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 3954 emitEmptyBoundParameters); 3955 emitPostUpdateForReductionClause( 3956 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 3957 } 3958 3959 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 3960 CodeGenModule &CGM, StringRef ParentName, 3961 const OMPTargetParallelDirective &S) { 3962 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3963 emitTargetParallelRegion(CGF, S, Action); 3964 }; 3965 llvm::Function *Fn; 3966 llvm::Constant *Addr; 3967 // Emit target region as a standalone region. 3968 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3969 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3970 assert(Fn && Addr && "Target device function emission failed."); 3971 } 3972 3973 void CodeGenFunction::EmitOMPTargetParallelDirective( 3974 const OMPTargetParallelDirective &S) { 3975 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3976 emitTargetParallelRegion(CGF, S, Action); 3977 }; 3978 emitCommonOMPTargetDirective(*this, S, CodeGen); 3979 } 3980 3981 void CodeGenFunction::EmitOMPTargetParallelForDirective( 3982 const OMPTargetParallelForDirective &S) { 3983 // TODO: codegen for target parallel for. 3984 } 3985 3986 /// Emit a helper variable and return corresponding lvalue. 3987 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 3988 const ImplicitParamDecl *PVD, 3989 CodeGenFunction::OMPPrivateScope &Privates) { 3990 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 3991 Privates.addPrivate( 3992 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 3993 } 3994 3995 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 3996 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 3997 // Emit outlined function for task construct. 3998 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3999 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 4000 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4001 const Expr *IfCond = nullptr; 4002 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4003 if (C->getNameModifier() == OMPD_unknown || 4004 C->getNameModifier() == OMPD_taskloop) { 4005 IfCond = C->getCondition(); 4006 break; 4007 } 4008 } 4009 4010 OMPTaskDataTy Data; 4011 // Check if taskloop must be emitted without taskgroup. 4012 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 4013 // TODO: Check if we should emit tied or untied task. 4014 Data.Tied = true; 4015 // Set scheduling for taskloop 4016 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 4017 // grainsize clause 4018 Data.Schedule.setInt(/*IntVal=*/false); 4019 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 4020 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 4021 // num_tasks clause 4022 Data.Schedule.setInt(/*IntVal=*/true); 4023 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 4024 } 4025 4026 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 4027 // if (PreCond) { 4028 // for (IV in 0..LastIteration) BODY; 4029 // <Final counter/linear vars updates>; 4030 // } 4031 // 4032 4033 // Emit: if (PreCond) - begin. 4034 // If the condition constant folds and can be elided, avoid emitting the 4035 // whole loop. 4036 bool CondConstant; 4037 llvm::BasicBlock *ContBlock = nullptr; 4038 OMPLoopScope PreInitScope(CGF, S); 4039 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4040 if (!CondConstant) 4041 return; 4042 } else { 4043 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 4044 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 4045 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 4046 CGF.getProfileCount(&S)); 4047 CGF.EmitBlock(ThenBlock); 4048 CGF.incrementProfileCounter(&S); 4049 } 4050 4051 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4052 CGF.EmitOMPSimdInit(S); 4053 4054 OMPPrivateScope LoopScope(CGF); 4055 // Emit helper vars inits. 4056 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 4057 auto *I = CS->getCapturedDecl()->param_begin(); 4058 auto *LBP = std::next(I, LowerBound); 4059 auto *UBP = std::next(I, UpperBound); 4060 auto *STP = std::next(I, Stride); 4061 auto *LIP = std::next(I, LastIter); 4062 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 4063 LoopScope); 4064 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 4065 LoopScope); 4066 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 4067 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 4068 LoopScope); 4069 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 4070 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4071 (void)LoopScope.Privatize(); 4072 // Emit the loop iteration variable. 4073 const Expr *IVExpr = S.getIterationVariable(); 4074 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 4075 CGF.EmitVarDecl(*IVDecl); 4076 CGF.EmitIgnoredExpr(S.getInit()); 4077 4078 // Emit the iterations count variable. 4079 // If it is not a variable, Sema decided to calculate iterations count on 4080 // each iteration (e.g., it is foldable into a constant). 4081 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4082 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4083 // Emit calculation of the iterations count. 4084 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 4085 } 4086 4087 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 4088 S.getInc(), 4089 [&S](CodeGenFunction &CGF) { 4090 CGF.EmitOMPLoopBody(S, JumpDest()); 4091 CGF.EmitStopPoint(&S); 4092 }, 4093 [](CodeGenFunction &) {}); 4094 // Emit: if (PreCond) - end. 4095 if (ContBlock) { 4096 CGF.EmitBranch(ContBlock); 4097 CGF.EmitBlock(ContBlock, true); 4098 } 4099 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4100 if (HasLastprivateClause) { 4101 CGF.EmitOMPLastprivateClauseFinal( 4102 S, isOpenMPSimdDirective(S.getDirectiveKind()), 4103 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 4104 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 4105 (*LIP)->getType(), S.getLocStart()))); 4106 } 4107 }; 4108 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4109 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 4110 const OMPTaskDataTy &Data) { 4111 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 4112 OMPLoopScope PreInitScope(CGF, S); 4113 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 4114 OutlinedFn, SharedsTy, 4115 CapturedStruct, IfCond, Data); 4116 }; 4117 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 4118 CodeGen); 4119 }; 4120 if (Data.Nogroup) 4121 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4122 else { 4123 CGM.getOpenMPRuntime().emitTaskgroupRegion( 4124 *this, 4125 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 4126 PrePostActionTy &Action) { 4127 Action.Enter(CGF); 4128 CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4129 }, 4130 S.getLocStart()); 4131 } 4132 } 4133 4134 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 4135 EmitOMPTaskLoopBasedDirective(S); 4136 } 4137 4138 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 4139 const OMPTaskLoopSimdDirective &S) { 4140 EmitOMPTaskLoopBasedDirective(S); 4141 } 4142 4143 // Generate the instructions for '#pragma omp target update' directive. 4144 void CodeGenFunction::EmitOMPTargetUpdateDirective( 4145 const OMPTargetUpdateDirective &S) { 4146 // If we don't have target devices, don't bother emitting the data mapping 4147 // code. 4148 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4149 return; 4150 4151 // Check if we have any if clause associated with the directive. 4152 const Expr *IfCond = nullptr; 4153 if (auto *C = S.getSingleClause<OMPIfClause>()) 4154 IfCond = C->getCondition(); 4155 4156 // Check if we have any device clause associated with the directive. 4157 const Expr *Device = nullptr; 4158 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4159 Device = C->getDevice(); 4160 4161 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4162 } 4163