1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/AST/DeclOpenMP.h" 22 #include "llvm/IR/CallSite.h" 23 using namespace clang; 24 using namespace CodeGen; 25 26 namespace { 27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 28 /// for captured expressions. 29 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 31 for (const auto *C : S.clauses()) { 32 if (auto *CPI = OMPClauseWithPreInit::get(C)) { 33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 34 for (const auto *I : PreInit->decls()) { 35 if (!I->hasAttr<OMPCaptureNoInitAttr>()) 36 CGF.EmitVarDecl(cast<VarDecl>(*I)); 37 else { 38 CodeGenFunction::AutoVarEmission Emission = 39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 40 CGF.EmitAutoVarCleanups(Emission); 41 } 42 } 43 } 44 } 45 } 46 } 47 CodeGenFunction::OMPPrivateScope InlinedShareds; 48 49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 50 return CGF.LambdaCaptureFields.lookup(VD) || 51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 53 } 54 55 public: 56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 bool AsInlined = false, bool EmitPreInitStmt = true) 58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 59 InlinedShareds(CGF) { 60 if (EmitPreInitStmt) 61 emitPreInitStmt(CGF, S); 62 if (AsInlined) { 63 if (S.hasAssociatedStmt()) { 64 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 65 for (auto &C : CS->captures()) { 66 if (C.capturesVariable() || C.capturesVariableByCopy()) { 67 auto *VD = C.getCapturedVar(); 68 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 69 isCapturedVar(CGF, VD) || 70 (CGF.CapturedStmtInfo && 71 InlinedShareds.isGlobalVarCaptured(VD)), 72 VD->getType().getNonReferenceType(), VK_LValue, 73 SourceLocation()); 74 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 75 return CGF.EmitLValue(&DRE).getAddress(); 76 }); 77 } 78 } 79 (void)InlinedShareds.Privatize(); 80 } 81 } 82 } 83 }; 84 85 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 86 /// for captured expressions. 87 class OMPParallelScope final : public OMPLexicalScope { 88 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 89 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 90 return !(isOpenMPTargetExecutionDirective(Kind) || 91 isOpenMPLoopBoundSharingDirective(Kind)) && 92 isOpenMPParallelDirective(Kind); 93 } 94 95 public: 96 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 97 : OMPLexicalScope(CGF, S, 98 /*AsInlined=*/false, 99 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 100 }; 101 102 /// Lexical scope for OpenMP teams construct, that handles correct codegen 103 /// for captured expressions. 104 class OMPTeamsScope final : public OMPLexicalScope { 105 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 106 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 107 return !isOpenMPTargetExecutionDirective(Kind) && 108 isOpenMPTeamsDirective(Kind); 109 } 110 111 public: 112 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 113 : OMPLexicalScope(CGF, S, 114 /*AsInlined=*/false, 115 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} 116 }; 117 118 /// Private scope for OpenMP loop-based directives, that supports capturing 119 /// of used expression from loop statement. 120 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 121 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 122 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 123 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { 124 for (const auto *I : PreInits->decls()) 125 CGF.EmitVarDecl(cast<VarDecl>(*I)); 126 } 127 } 128 } 129 130 public: 131 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 132 : CodeGenFunction::RunCleanupsScope(CGF) { 133 emitPreInitStmt(CGF, S); 134 } 135 }; 136 137 } // namespace 138 139 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 140 auto &C = getContext(); 141 llvm::Value *Size = nullptr; 142 auto SizeInChars = C.getTypeSizeInChars(Ty); 143 if (SizeInChars.isZero()) { 144 // getTypeSizeInChars() returns 0 for a VLA. 145 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 146 llvm::Value *ArraySize; 147 std::tie(ArraySize, Ty) = getVLASize(VAT); 148 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 149 } 150 SizeInChars = C.getTypeSizeInChars(Ty); 151 if (SizeInChars.isZero()) 152 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 153 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 154 } else 155 Size = CGM.getSize(SizeInChars); 156 return Size; 157 } 158 159 void CodeGenFunction::GenerateOpenMPCapturedVars( 160 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 161 const RecordDecl *RD = S.getCapturedRecordDecl(); 162 auto CurField = RD->field_begin(); 163 auto CurCap = S.captures().begin(); 164 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 165 E = S.capture_init_end(); 166 I != E; ++I, ++CurField, ++CurCap) { 167 if (CurField->hasCapturedVLAType()) { 168 auto VAT = CurField->getCapturedVLAType(); 169 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 170 CapturedVars.push_back(Val); 171 } else if (CurCap->capturesThis()) 172 CapturedVars.push_back(CXXThisValue); 173 else if (CurCap->capturesVariableByCopy()) { 174 llvm::Value *CV = 175 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); 176 177 // If the field is not a pointer, we need to save the actual value 178 // and load it as a void pointer. 179 if (!CurField->getType()->isAnyPointerType()) { 180 auto &Ctx = getContext(); 181 auto DstAddr = CreateMemTemp( 182 Ctx.getUIntPtrType(), 183 Twine(CurCap->getCapturedVar()->getName()) + ".casted"); 184 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 185 186 auto *SrcAddrVal = EmitScalarConversion( 187 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 188 Ctx.getPointerType(CurField->getType()), SourceLocation()); 189 LValue SrcLV = 190 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 191 192 // Store the value using the source type pointer. 193 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 194 195 // Load the value using the destination type pointer. 196 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 197 } 198 CapturedVars.push_back(CV); 199 } else { 200 assert(CurCap->capturesVariable() && "Expected capture by reference."); 201 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 202 } 203 } 204 } 205 206 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, 207 StringRef Name, LValue AddrLV, 208 bool isReferenceType = false) { 209 ASTContext &Ctx = CGF.getContext(); 210 211 auto *CastedPtr = CGF.EmitScalarConversion( 212 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 213 Ctx.getPointerType(DstType), SourceLocation()); 214 auto TmpAddr = 215 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 216 .getAddress(); 217 218 // If we are dealing with references we need to return the address of the 219 // reference instead of the reference of the value. 220 if (isReferenceType) { 221 QualType RefType = Ctx.getLValueReferenceType(DstType); 222 auto *RefVal = TmpAddr.getPointer(); 223 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); 224 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); 225 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true); 226 } 227 228 return TmpAddr; 229 } 230 231 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 232 if (T->isLValueReferenceType()) { 233 return C.getLValueReferenceType( 234 getCanonicalParamType(C, T.getNonReferenceType()), 235 /*SpelledAsLValue=*/false); 236 } 237 if (T->isPointerType()) 238 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 239 return C.getCanonicalParamType(T); 240 } 241 242 namespace { 243 /// Contains required data for proper outlined function codegen. 244 struct FunctionOptions { 245 /// Captured statement for which the function is generated. 246 const CapturedStmt *S = nullptr; 247 /// true if cast to/from UIntPtr is required for variables captured by 248 /// value. 249 bool UIntPtrCastRequired = true; 250 /// true if only casted argumefnts must be registered as local args or VLA 251 /// sizes. 252 bool RegisterCastedArgsOnly = false; 253 /// Name of the generated function. 254 StringRef FunctionName; 255 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 256 bool RegisterCastedArgsOnly, 257 StringRef FunctionName) 258 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 259 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 260 FunctionName(FunctionName) {} 261 }; 262 } 263 264 static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( 265 CodeGenFunction &CGF, FunctionArgList &Args, 266 llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> 267 &LocalAddrs, 268 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 269 &VLASizes, 270 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 271 const CapturedDecl *CD = FO.S->getCapturedDecl(); 272 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 273 assert(CD->hasBody() && "missing CapturedDecl body"); 274 275 CXXThisValue = nullptr; 276 // Build the argument list. 277 CodeGenModule &CGM = CGF.CGM; 278 ASTContext &Ctx = CGM.getContext(); 279 bool HasUIntPtrArgs = false; 280 Args.append(CD->param_begin(), 281 std::next(CD->param_begin(), CD->getContextParamPosition())); 282 auto I = FO.S->captures().begin(); 283 for (auto *FD : RD->fields()) { 284 QualType ArgType = FD->getType(); 285 IdentifierInfo *II = nullptr; 286 VarDecl *CapVar = nullptr; 287 288 // If this is a capture by copy and the type is not a pointer, the outlined 289 // function argument type should be uintptr and the value properly casted to 290 // uintptr. This is necessary given that the runtime library is only able to 291 // deal with pointers. We can pass in the same way the VLA type sizes to the 292 // outlined function. 293 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 294 I->capturesVariableArrayType()) { 295 HasUIntPtrArgs = true; 296 if (FO.UIntPtrCastRequired) 297 ArgType = Ctx.getUIntPtrType(); 298 } 299 300 if (I->capturesVariable() || I->capturesVariableByCopy()) { 301 CapVar = I->getCapturedVar(); 302 II = CapVar->getIdentifier(); 303 } else if (I->capturesThis()) 304 II = &Ctx.Idents.get("this"); 305 else { 306 assert(I->capturesVariableArrayType()); 307 II = &Ctx.Idents.get("vla"); 308 } 309 if (ArgType->isVariablyModifiedType()) 310 ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType()); 311 Args.push_back(ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, 312 FD->getLocation(), II, ArgType, 313 ImplicitParamDecl::Other)); 314 ++I; 315 } 316 Args.append( 317 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 318 CD->param_end()); 319 320 // Create the function declaration. 321 FunctionType::ExtInfo ExtInfo; 322 const CGFunctionInfo &FuncInfo = 323 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 324 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 325 326 llvm::Function *F = 327 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 328 FO.FunctionName, &CGM.getModule()); 329 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 330 if (CD->isNothrow()) 331 F->addFnAttr(llvm::Attribute::NoUnwind); 332 333 // Generate the function. 334 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 335 CD->getBody()->getLocStart()); 336 unsigned Cnt = CD->getContextParamPosition(); 337 I = FO.S->captures().begin(); 338 for (auto *FD : RD->fields()) { 339 // If we are capturing a pointer by copy we don't need to do anything, just 340 // use the value that we get from the arguments. 341 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 342 const VarDecl *CurVD = I->getCapturedVar(); 343 Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 344 // If the variable is a reference we need to materialize it here. 345 if (CurVD->getType()->isReferenceType()) { 346 Address RefAddr = CGF.CreateMemTemp( 347 CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); 348 CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, 349 /*Volatile=*/false, CurVD->getType()); 350 LocalAddr = RefAddr; 351 } 352 if (!FO.RegisterCastedArgsOnly) 353 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 354 ++Cnt; 355 ++I; 356 continue; 357 } 358 359 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); 360 LValue ArgLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(Args[Cnt]), 361 Args[Cnt]->getType(), BaseInfo); 362 if (FD->hasCapturedVLAType()) { 363 if (FO.UIntPtrCastRequired) { 364 ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), 365 Args[Cnt]->getName(), 366 ArgLVal), 367 FD->getType(), BaseInfo); 368 } 369 auto *ExprArg = 370 CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); 371 auto VAT = FD->getCapturedVLAType(); 372 VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}}); 373 } else if (I->capturesVariable()) { 374 auto *Var = I->getCapturedVar(); 375 QualType VarTy = Var->getType(); 376 Address ArgAddr = ArgLVal.getAddress(); 377 if (!VarTy->isReferenceType()) { 378 if (ArgLVal.getType()->isLValueReferenceType()) { 379 ArgAddr = CGF.EmitLoadOfReference( 380 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 381 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 382 assert(ArgLVal.getType()->isPointerType()); 383 ArgAddr = CGF.EmitLoadOfPointer( 384 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 385 } 386 } 387 if (!FO.RegisterCastedArgsOnly) { 388 LocalAddrs.insert( 389 {Args[Cnt], 390 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 391 } 392 } else if (I->capturesVariableByCopy()) { 393 assert(!FD->getType()->isAnyPointerType() && 394 "Not expecting a captured pointer."); 395 auto *Var = I->getCapturedVar(); 396 QualType VarTy = Var->getType(); 397 LocalAddrs.insert( 398 {Args[Cnt], 399 {Var, 400 FO.UIntPtrCastRequired 401 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(), 402 ArgLVal, VarTy->isReferenceType()) 403 : ArgLVal.getAddress()}}); 404 } else { 405 // If 'this' is captured, load it into CXXThisValue. 406 assert(I->capturesThis()); 407 CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()) 408 .getScalarVal(); 409 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 410 } 411 ++Cnt; 412 ++I; 413 } 414 415 return {F, HasUIntPtrArgs}; 416 } 417 418 llvm::Function * 419 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 420 assert( 421 CapturedStmtInfo && 422 "CapturedStmtInfo should be set when generating the captured function"); 423 const CapturedDecl *CD = S.getCapturedDecl(); 424 // Build the argument list. 425 bool NeedWrapperFunction = 426 getDebugInfo() && 427 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; 428 FunctionArgList Args; 429 llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 430 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 431 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 432 CapturedStmtInfo->getHelperName()); 433 llvm::Function *F; 434 bool HasUIntPtrArgs; 435 std::tie(F, HasUIntPtrArgs) = emitOutlinedFunctionPrologue( 436 *this, Args, LocalAddrs, VLASizes, CXXThisValue, FO); 437 for (const auto &LocalAddrPair : LocalAddrs) { 438 if (LocalAddrPair.second.first) { 439 setAddrOfLocalVar(LocalAddrPair.second.first, 440 LocalAddrPair.second.second); 441 } 442 } 443 for (const auto &VLASizePair : VLASizes) 444 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 445 PGO.assignRegionCounters(GlobalDecl(CD), F); 446 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 447 FinishFunction(CD->getBodyRBrace()); 448 if (!NeedWrapperFunction || !HasUIntPtrArgs) 449 return F; 450 451 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 452 /*RegisterCastedArgsOnly=*/true, 453 ".nondebug_wrapper."); 454 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 455 WrapperCGF.disableDebugInfo(); 456 Args.clear(); 457 LocalAddrs.clear(); 458 VLASizes.clear(); 459 llvm::Function *WrapperF = 460 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 461 WrapperCGF.CXXThisValue, WrapperFO).first; 462 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); 463 llvm::SmallVector<llvm::Value *, 4> CallArgs; 464 for (const auto *Arg : Args) { 465 llvm::Value *CallArg; 466 auto I = LocalAddrs.find(Arg); 467 if (I != LocalAddrs.end()) { 468 LValue LV = 469 WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo); 470 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 471 } else { 472 auto EI = VLASizes.find(Arg); 473 if (EI != VLASizes.end()) 474 CallArg = EI->second.second; 475 else { 476 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 477 Arg->getType(), BaseInfo); 478 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); 479 } 480 } 481 CallArgs.emplace_back(CallArg); 482 } 483 WrapperCGF.Builder.CreateCall(F, CallArgs); 484 WrapperCGF.FinishFunction(); 485 return WrapperF; 486 } 487 488 //===----------------------------------------------------------------------===// 489 // OpenMP Directive Emission 490 //===----------------------------------------------------------------------===// 491 void CodeGenFunction::EmitOMPAggregateAssign( 492 Address DestAddr, Address SrcAddr, QualType OriginalType, 493 const llvm::function_ref<void(Address, Address)> &CopyGen) { 494 // Perform element-by-element initialization. 495 QualType ElementTy; 496 497 // Drill down to the base element type on both arrays. 498 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 499 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 500 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 501 502 auto SrcBegin = SrcAddr.getPointer(); 503 auto DestBegin = DestAddr.getPointer(); 504 // Cast from pointer to array type to pointer to single element. 505 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 506 // The basic structure here is a while-do loop. 507 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 508 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 509 auto IsEmpty = 510 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 511 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 512 513 // Enter the loop body, making that address the current address. 514 auto EntryBB = Builder.GetInsertBlock(); 515 EmitBlock(BodyBB); 516 517 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 518 519 llvm::PHINode *SrcElementPHI = 520 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 521 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 522 Address SrcElementCurrent = 523 Address(SrcElementPHI, 524 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 525 526 llvm::PHINode *DestElementPHI = 527 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 528 DestElementPHI->addIncoming(DestBegin, EntryBB); 529 Address DestElementCurrent = 530 Address(DestElementPHI, 531 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 532 533 // Emit copy. 534 CopyGen(DestElementCurrent, SrcElementCurrent); 535 536 // Shift the address forward by one element. 537 auto DestElementNext = Builder.CreateConstGEP1_32( 538 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 539 auto SrcElementNext = Builder.CreateConstGEP1_32( 540 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 541 // Check whether we've reached the end. 542 auto Done = 543 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 544 Builder.CreateCondBr(Done, DoneBB, BodyBB); 545 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 546 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 547 548 // Done. 549 EmitBlock(DoneBB, /*IsFinished=*/true); 550 } 551 552 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 553 Address SrcAddr, const VarDecl *DestVD, 554 const VarDecl *SrcVD, const Expr *Copy) { 555 if (OriginalType->isArrayType()) { 556 auto *BO = dyn_cast<BinaryOperator>(Copy); 557 if (BO && BO->getOpcode() == BO_Assign) { 558 // Perform simple memcpy for simple copying. 559 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 560 } else { 561 // For arrays with complex element types perform element by element 562 // copying. 563 EmitOMPAggregateAssign( 564 DestAddr, SrcAddr, OriginalType, 565 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 566 // Working with the single array element, so have to remap 567 // destination and source variables to corresponding array 568 // elements. 569 CodeGenFunction::OMPPrivateScope Remap(*this); 570 Remap.addPrivate(DestVD, [DestElement]() -> Address { 571 return DestElement; 572 }); 573 Remap.addPrivate( 574 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 575 (void)Remap.Privatize(); 576 EmitIgnoredExpr(Copy); 577 }); 578 } 579 } else { 580 // Remap pseudo source variable to private copy. 581 CodeGenFunction::OMPPrivateScope Remap(*this); 582 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 583 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 584 (void)Remap.Privatize(); 585 // Emit copying of the whole variable. 586 EmitIgnoredExpr(Copy); 587 } 588 } 589 590 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 591 OMPPrivateScope &PrivateScope) { 592 if (!HaveInsertPoint()) 593 return false; 594 bool FirstprivateIsLastprivate = false; 595 llvm::DenseSet<const VarDecl *> Lastprivates; 596 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 597 for (const auto *D : C->varlists()) 598 Lastprivates.insert( 599 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 600 } 601 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 602 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); 603 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 604 auto IRef = C->varlist_begin(); 605 auto InitsRef = C->inits().begin(); 606 for (auto IInit : C->private_copies()) { 607 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 608 bool ThisFirstprivateIsLastprivate = 609 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 610 auto *CapFD = CapturesInfo.lookup(OrigVD); 611 auto *FD = CapturedStmtInfo->lookup(OrigVD); 612 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && 613 !FD->getType()->isReferenceType()) { 614 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 615 ++IRef; 616 ++InitsRef; 617 continue; 618 } 619 FirstprivateIsLastprivate = 620 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 621 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 622 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 623 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 624 bool IsRegistered; 625 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 626 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 627 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 628 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 629 QualType Type = VD->getType(); 630 if (Type->isArrayType()) { 631 // Emit VarDecl with copy init for arrays. 632 // Get the address of the original variable captured in current 633 // captured region. 634 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 635 auto Emission = EmitAutoVarAlloca(*VD); 636 auto *Init = VD->getInit(); 637 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 638 // Perform simple memcpy. 639 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 640 Type); 641 } else { 642 EmitOMPAggregateAssign( 643 Emission.getAllocatedAddress(), OriginalAddr, Type, 644 [this, VDInit, Init](Address DestElement, 645 Address SrcElement) { 646 // Clean up any temporaries needed by the initialization. 647 RunCleanupsScope InitScope(*this); 648 // Emit initialization for single element. 649 setAddrOfLocalVar(VDInit, SrcElement); 650 EmitAnyExprToMem(Init, DestElement, 651 Init->getType().getQualifiers(), 652 /*IsInitializer*/ false); 653 LocalDeclMap.erase(VDInit); 654 }); 655 } 656 EmitAutoVarCleanups(Emission); 657 return Emission.getAllocatedAddress(); 658 }); 659 } else { 660 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 661 // Emit private VarDecl with copy init. 662 // Remap temp VDInit variable to the address of the original 663 // variable 664 // (for proper handling of captured global variables). 665 setAddrOfLocalVar(VDInit, OriginalAddr); 666 EmitDecl(*VD); 667 LocalDeclMap.erase(VDInit); 668 return GetAddrOfLocalVar(VD); 669 }); 670 } 671 assert(IsRegistered && 672 "firstprivate var already registered as private"); 673 // Silence the warning about unused variable. 674 (void)IsRegistered; 675 } 676 ++IRef; 677 ++InitsRef; 678 } 679 } 680 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 681 } 682 683 void CodeGenFunction::EmitOMPPrivateClause( 684 const OMPExecutableDirective &D, 685 CodeGenFunction::OMPPrivateScope &PrivateScope) { 686 if (!HaveInsertPoint()) 687 return; 688 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 689 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 690 auto IRef = C->varlist_begin(); 691 for (auto IInit : C->private_copies()) { 692 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 693 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 694 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 695 bool IsRegistered = 696 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 697 // Emit private VarDecl with copy init. 698 EmitDecl(*VD); 699 return GetAddrOfLocalVar(VD); 700 }); 701 assert(IsRegistered && "private var already registered as private"); 702 // Silence the warning about unused variable. 703 (void)IsRegistered; 704 } 705 ++IRef; 706 } 707 } 708 } 709 710 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 711 if (!HaveInsertPoint()) 712 return false; 713 // threadprivate_var1 = master_threadprivate_var1; 714 // operator=(threadprivate_var2, master_threadprivate_var2); 715 // ... 716 // __kmpc_barrier(&loc, global_tid); 717 llvm::DenseSet<const VarDecl *> CopiedVars; 718 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 719 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 720 auto IRef = C->varlist_begin(); 721 auto ISrcRef = C->source_exprs().begin(); 722 auto IDestRef = C->destination_exprs().begin(); 723 for (auto *AssignOp : C->assignment_ops()) { 724 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 725 QualType Type = VD->getType(); 726 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 727 // Get the address of the master variable. If we are emitting code with 728 // TLS support, the address is passed from the master as field in the 729 // captured declaration. 730 Address MasterAddr = Address::invalid(); 731 if (getLangOpts().OpenMPUseTLS && 732 getContext().getTargetInfo().isTLSSupported()) { 733 assert(CapturedStmtInfo->lookup(VD) && 734 "Copyin threadprivates should have been captured!"); 735 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 736 VK_LValue, (*IRef)->getExprLoc()); 737 MasterAddr = EmitLValue(&DRE).getAddress(); 738 LocalDeclMap.erase(VD); 739 } else { 740 MasterAddr = 741 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 742 : CGM.GetAddrOfGlobal(VD), 743 getContext().getDeclAlign(VD)); 744 } 745 // Get the address of the threadprivate variable. 746 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 747 if (CopiedVars.size() == 1) { 748 // At first check if current thread is a master thread. If it is, no 749 // need to copy data. 750 CopyBegin = createBasicBlock("copyin.not.master"); 751 CopyEnd = createBasicBlock("copyin.not.master.end"); 752 Builder.CreateCondBr( 753 Builder.CreateICmpNE( 754 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 755 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 756 CopyBegin, CopyEnd); 757 EmitBlock(CopyBegin); 758 } 759 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 760 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 761 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 762 } 763 ++IRef; 764 ++ISrcRef; 765 ++IDestRef; 766 } 767 } 768 if (CopyEnd) { 769 // Exit out of copying procedure for non-master thread. 770 EmitBlock(CopyEnd, /*IsFinished=*/true); 771 return true; 772 } 773 return false; 774 } 775 776 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 777 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 778 if (!HaveInsertPoint()) 779 return false; 780 bool HasAtLeastOneLastprivate = false; 781 llvm::DenseSet<const VarDecl *> SIMDLCVs; 782 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 783 auto *LoopDirective = cast<OMPLoopDirective>(&D); 784 for (auto *C : LoopDirective->counters()) { 785 SIMDLCVs.insert( 786 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 787 } 788 } 789 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 790 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 791 HasAtLeastOneLastprivate = true; 792 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) 793 break; 794 auto IRef = C->varlist_begin(); 795 auto IDestRef = C->destination_exprs().begin(); 796 for (auto *IInit : C->private_copies()) { 797 // Keep the address of the original variable for future update at the end 798 // of the loop. 799 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 800 // Taskloops do not require additional initialization, it is done in 801 // runtime support library. 802 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 803 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 804 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 805 DeclRefExpr DRE( 806 const_cast<VarDecl *>(OrigVD), 807 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 808 OrigVD) != nullptr, 809 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 810 return EmitLValue(&DRE).getAddress(); 811 }); 812 // Check if the variable is also a firstprivate: in this case IInit is 813 // not generated. Initialization of this variable will happen in codegen 814 // for 'firstprivate' clause. 815 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 816 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 817 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 818 // Emit private VarDecl with copy init. 819 EmitDecl(*VD); 820 return GetAddrOfLocalVar(VD); 821 }); 822 assert(IsRegistered && 823 "lastprivate var already registered as private"); 824 (void)IsRegistered; 825 } 826 } 827 ++IRef; 828 ++IDestRef; 829 } 830 } 831 return HasAtLeastOneLastprivate; 832 } 833 834 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 835 const OMPExecutableDirective &D, bool NoFinals, 836 llvm::Value *IsLastIterCond) { 837 if (!HaveInsertPoint()) 838 return; 839 // Emit following code: 840 // if (<IsLastIterCond>) { 841 // orig_var1 = private_orig_var1; 842 // ... 843 // orig_varn = private_orig_varn; 844 // } 845 llvm::BasicBlock *ThenBB = nullptr; 846 llvm::BasicBlock *DoneBB = nullptr; 847 if (IsLastIterCond) { 848 ThenBB = createBasicBlock(".omp.lastprivate.then"); 849 DoneBB = createBasicBlock(".omp.lastprivate.done"); 850 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 851 EmitBlock(ThenBB); 852 } 853 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 854 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 855 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 856 auto IC = LoopDirective->counters().begin(); 857 for (auto F : LoopDirective->finals()) { 858 auto *D = 859 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 860 if (NoFinals) 861 AlreadyEmittedVars.insert(D); 862 else 863 LoopCountersAndUpdates[D] = F; 864 ++IC; 865 } 866 } 867 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 868 auto IRef = C->varlist_begin(); 869 auto ISrcRef = C->source_exprs().begin(); 870 auto IDestRef = C->destination_exprs().begin(); 871 for (auto *AssignOp : C->assignment_ops()) { 872 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 873 QualType Type = PrivateVD->getType(); 874 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 875 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 876 // If lastprivate variable is a loop control variable for loop-based 877 // directive, update its value before copyin back to original 878 // variable. 879 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 880 EmitIgnoredExpr(FinalExpr); 881 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 882 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 883 // Get the address of the original variable. 884 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 885 // Get the address of the private variable. 886 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 887 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 888 PrivateAddr = 889 Address(Builder.CreateLoad(PrivateAddr), 890 getNaturalTypeAlignment(RefTy->getPointeeType())); 891 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 892 } 893 ++IRef; 894 ++ISrcRef; 895 ++IDestRef; 896 } 897 if (auto *PostUpdate = C->getPostUpdateExpr()) 898 EmitIgnoredExpr(PostUpdate); 899 } 900 if (IsLastIterCond) 901 EmitBlock(DoneBB, /*IsFinished=*/true); 902 } 903 904 void CodeGenFunction::EmitOMPReductionClauseInit( 905 const OMPExecutableDirective &D, 906 CodeGenFunction::OMPPrivateScope &PrivateScope) { 907 if (!HaveInsertPoint()) 908 return; 909 SmallVector<const Expr *, 4> Shareds; 910 SmallVector<const Expr *, 4> Privates; 911 SmallVector<const Expr *, 4> ReductionOps; 912 SmallVector<const Expr *, 4> LHSs; 913 SmallVector<const Expr *, 4> RHSs; 914 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 915 auto IPriv = C->privates().begin(); 916 auto IRed = C->reduction_ops().begin(); 917 auto ILHS = C->lhs_exprs().begin(); 918 auto IRHS = C->rhs_exprs().begin(); 919 for (const auto *Ref : C->varlists()) { 920 Shareds.emplace_back(Ref); 921 Privates.emplace_back(*IPriv); 922 ReductionOps.emplace_back(*IRed); 923 LHSs.emplace_back(*ILHS); 924 RHSs.emplace_back(*IRHS); 925 std::advance(IPriv, 1); 926 std::advance(IRed, 1); 927 std::advance(ILHS, 1); 928 std::advance(IRHS, 1); 929 } 930 } 931 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); 932 unsigned Count = 0; 933 auto ILHS = LHSs.begin(); 934 auto IRHS = RHSs.begin(); 935 auto IPriv = Privates.begin(); 936 for (const auto *IRef : Shareds) { 937 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 938 // Emit private VarDecl with reduction init. 939 RedCG.emitSharedLValue(*this, Count); 940 RedCG.emitAggregateType(*this, Count); 941 auto Emission = EmitAutoVarAlloca(*PrivateVD); 942 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 943 RedCG.getSharedLValue(Count), 944 [&Emission](CodeGenFunction &CGF) { 945 CGF.EmitAutoVarInit(Emission); 946 return true; 947 }); 948 EmitAutoVarCleanups(Emission); 949 Address BaseAddr = RedCG.adjustPrivateAddress( 950 *this, Count, Emission.getAllocatedAddress()); 951 bool IsRegistered = PrivateScope.addPrivate( 952 RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); 953 assert(IsRegistered && "private var already registered as private"); 954 // Silence the warning about unused variable. 955 (void)IsRegistered; 956 957 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 958 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 959 if (isa<OMPArraySectionExpr>(IRef)) { 960 // Store the address of the original variable associated with the LHS 961 // implicit variable. 962 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 963 return RedCG.getSharedLValue(Count).getAddress(); 964 }); 965 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { 966 return GetAddrOfLocalVar(PrivateVD); 967 }); 968 } else if (isa<ArraySubscriptExpr>(IRef)) { 969 // Store the address of the original variable associated with the LHS 970 // implicit variable. 971 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { 972 return RedCG.getSharedLValue(Count).getAddress(); 973 }); 974 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { 975 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 976 ConvertTypeForMem(RHSVD->getType()), 977 "rhs.begin"); 978 }); 979 } else { 980 QualType Type = PrivateVD->getType(); 981 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 982 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 983 // Store the address of the original variable associated with the LHS 984 // implicit variable. 985 if (IsArray) { 986 OriginalAddr = Builder.CreateElementBitCast( 987 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 988 } 989 PrivateScope.addPrivate( 990 LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); 991 PrivateScope.addPrivate( 992 RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { 993 return IsArray 994 ? Builder.CreateElementBitCast( 995 GetAddrOfLocalVar(PrivateVD), 996 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 997 : GetAddrOfLocalVar(PrivateVD); 998 }); 999 } 1000 ++ILHS; 1001 ++IRHS; 1002 ++IPriv; 1003 ++Count; 1004 } 1005 } 1006 1007 void CodeGenFunction::EmitOMPReductionClauseFinal( 1008 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1009 if (!HaveInsertPoint()) 1010 return; 1011 llvm::SmallVector<const Expr *, 8> Privates; 1012 llvm::SmallVector<const Expr *, 8> LHSExprs; 1013 llvm::SmallVector<const Expr *, 8> RHSExprs; 1014 llvm::SmallVector<const Expr *, 8> ReductionOps; 1015 bool HasAtLeastOneReduction = false; 1016 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1017 HasAtLeastOneReduction = true; 1018 Privates.append(C->privates().begin(), C->privates().end()); 1019 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1020 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1021 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1022 } 1023 if (HasAtLeastOneReduction) { 1024 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1025 isOpenMPParallelDirective(D.getDirectiveKind()) || 1026 D.getDirectiveKind() == OMPD_simd; 1027 bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; 1028 // Emit nowait reduction if nowait clause is present or directive is a 1029 // parallel directive (it always has implicit barrier). 1030 CGM.getOpenMPRuntime().emitReduction( 1031 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, 1032 {WithNowait, SimpleReduction, ReductionKind}); 1033 } 1034 } 1035 1036 static void emitPostUpdateForReductionClause( 1037 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1038 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1039 if (!CGF.HaveInsertPoint()) 1040 return; 1041 llvm::BasicBlock *DoneBB = nullptr; 1042 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1043 if (auto *PostUpdate = C->getPostUpdateExpr()) { 1044 if (!DoneBB) { 1045 if (auto *Cond = CondGen(CGF)) { 1046 // If the first post-update expression is found, emit conditional 1047 // block if it was requested. 1048 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1049 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1050 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1051 CGF.EmitBlock(ThenBB); 1052 } 1053 } 1054 CGF.EmitIgnoredExpr(PostUpdate); 1055 } 1056 } 1057 if (DoneBB) 1058 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1059 } 1060 1061 namespace { 1062 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1063 /// parallel function. This is necessary for combined constructs such as 1064 /// 'distribute parallel for' 1065 typedef llvm::function_ref<void(CodeGenFunction &, 1066 const OMPExecutableDirective &, 1067 llvm::SmallVectorImpl<llvm::Value *> &)> 1068 CodeGenBoundParametersTy; 1069 } // anonymous namespace 1070 1071 static void emitCommonOMPParallelDirective( 1072 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1073 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1074 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1075 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1076 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1077 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1078 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1079 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1080 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1081 /*IgnoreResultAssign*/ true); 1082 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1083 CGF, NumThreads, NumThreadsClause->getLocStart()); 1084 } 1085 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1086 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1087 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1088 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 1089 } 1090 const Expr *IfCond = nullptr; 1091 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1092 if (C->getNameModifier() == OMPD_unknown || 1093 C->getNameModifier() == OMPD_parallel) { 1094 IfCond = C->getCondition(); 1095 break; 1096 } 1097 } 1098 1099 OMPParallelScope Scope(CGF, S); 1100 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1101 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1102 // lower and upper bounds with the pragma 'for' chunking mechanism. 1103 // The following lambda takes care of appending the lower and upper bound 1104 // parameters when necessary 1105 CodeGenBoundParameters(CGF, S, CapturedVars); 1106 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1107 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 1108 CapturedVars, IfCond); 1109 } 1110 1111 static void emitEmptyBoundParameters(CodeGenFunction &, 1112 const OMPExecutableDirective &, 1113 llvm::SmallVectorImpl<llvm::Value *> &) {} 1114 1115 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1116 // Emit parallel region as a standalone region. 1117 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1118 OMPPrivateScope PrivateScope(CGF); 1119 bool Copyins = CGF.EmitOMPCopyinClause(S); 1120 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1121 if (Copyins) { 1122 // Emit implicit barrier to synchronize threads and avoid data races on 1123 // propagation master's thread values of threadprivate variables to local 1124 // instances of that variables of all other implicit threads. 1125 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1126 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1127 /*ForceSimpleCall=*/true); 1128 } 1129 CGF.EmitOMPPrivateClause(S, PrivateScope); 1130 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1131 (void)PrivateScope.Privatize(); 1132 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1133 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1134 }; 1135 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1136 emitEmptyBoundParameters); 1137 emitPostUpdateForReductionClause( 1138 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1139 } 1140 1141 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1142 JumpDest LoopExit) { 1143 RunCleanupsScope BodyScope(*this); 1144 // Update counters values on current iteration. 1145 for (auto I : D.updates()) { 1146 EmitIgnoredExpr(I); 1147 } 1148 // Update the linear variables. 1149 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1150 for (auto *U : C->updates()) 1151 EmitIgnoredExpr(U); 1152 } 1153 1154 // On a continue in the body, jump to the end. 1155 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 1156 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1157 // Emit loop body. 1158 EmitStmt(D.getBody()); 1159 // The end (updates/cleanups). 1160 EmitBlock(Continue.getBlock()); 1161 BreakContinueStack.pop_back(); 1162 } 1163 1164 void CodeGenFunction::EmitOMPInnerLoop( 1165 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1166 const Expr *IncExpr, 1167 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 1168 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 1169 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1170 1171 // Start the loop with a block that tests the condition. 1172 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1173 EmitBlock(CondBlock); 1174 const SourceRange &R = S.getSourceRange(); 1175 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1176 SourceLocToDebugLoc(R.getEnd())); 1177 1178 // If there are any cleanups between here and the loop-exit scope, 1179 // create a block to stage a loop exit along. 1180 auto ExitBlock = LoopExit.getBlock(); 1181 if (RequiresCleanup) 1182 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1183 1184 auto LoopBody = createBasicBlock("omp.inner.for.body"); 1185 1186 // Emit condition. 1187 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1188 if (ExitBlock != LoopExit.getBlock()) { 1189 EmitBlock(ExitBlock); 1190 EmitBranchThroughCleanup(LoopExit); 1191 } 1192 1193 EmitBlock(LoopBody); 1194 incrementProfileCounter(&S); 1195 1196 // Create a block for the increment. 1197 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1198 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1199 1200 BodyGen(*this); 1201 1202 // Emit "IV = IV + 1" and a back-edge to the condition block. 1203 EmitBlock(Continue.getBlock()); 1204 EmitIgnoredExpr(IncExpr); 1205 PostIncGen(*this); 1206 BreakContinueStack.pop_back(); 1207 EmitBranch(CondBlock); 1208 LoopStack.pop(); 1209 // Emit the fall-through block. 1210 EmitBlock(LoopExit.getBlock()); 1211 } 1212 1213 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1214 if (!HaveInsertPoint()) 1215 return false; 1216 // Emit inits for the linear variables. 1217 bool HasLinears = false; 1218 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1219 for (auto *Init : C->inits()) { 1220 HasLinears = true; 1221 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1222 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1223 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1224 auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1225 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1226 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1227 VD->getInit()->getType(), VK_LValue, 1228 VD->getInit()->getExprLoc()); 1229 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1230 VD->getType()), 1231 /*capturedByInit=*/false); 1232 EmitAutoVarCleanups(Emission); 1233 } else 1234 EmitVarDecl(*VD); 1235 } 1236 // Emit the linear steps for the linear clauses. 1237 // If a step is not constant, it is pre-calculated before the loop. 1238 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1239 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1240 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1241 // Emit calculation of the linear step. 1242 EmitIgnoredExpr(CS); 1243 } 1244 } 1245 return HasLinears; 1246 } 1247 1248 void CodeGenFunction::EmitOMPLinearClauseFinal( 1249 const OMPLoopDirective &D, 1250 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1251 if (!HaveInsertPoint()) 1252 return; 1253 llvm::BasicBlock *DoneBB = nullptr; 1254 // Emit the final values of the linear variables. 1255 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1256 auto IC = C->varlist_begin(); 1257 for (auto *F : C->finals()) { 1258 if (!DoneBB) { 1259 if (auto *Cond = CondGen(*this)) { 1260 // If the first post-update expression is found, emit conditional 1261 // block if it was requested. 1262 auto *ThenBB = createBasicBlock(".omp.linear.pu"); 1263 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1264 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1265 EmitBlock(ThenBB); 1266 } 1267 } 1268 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1269 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 1270 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1271 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1272 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1273 CodeGenFunction::OMPPrivateScope VarScope(*this); 1274 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); 1275 (void)VarScope.Privatize(); 1276 EmitIgnoredExpr(F); 1277 ++IC; 1278 } 1279 if (auto *PostUpdate = C->getPostUpdateExpr()) 1280 EmitIgnoredExpr(PostUpdate); 1281 } 1282 if (DoneBB) 1283 EmitBlock(DoneBB, /*IsFinished=*/true); 1284 } 1285 1286 static void emitAlignedClause(CodeGenFunction &CGF, 1287 const OMPExecutableDirective &D) { 1288 if (!CGF.HaveInsertPoint()) 1289 return; 1290 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1291 unsigned ClauseAlignment = 0; 1292 if (auto AlignmentExpr = Clause->getAlignment()) { 1293 auto AlignmentCI = 1294 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1295 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 1296 } 1297 for (auto E : Clause->varlists()) { 1298 unsigned Alignment = ClauseAlignment; 1299 if (Alignment == 0) { 1300 // OpenMP [2.8.1, Description] 1301 // If no optional parameter is specified, implementation-defined default 1302 // alignments for SIMD instructions on the target platforms are assumed. 1303 Alignment = 1304 CGF.getContext() 1305 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1306 E->getType()->getPointeeType())) 1307 .getQuantity(); 1308 } 1309 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 1310 "alignment is not power of 2"); 1311 if (Alignment != 0) { 1312 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1313 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 1314 } 1315 } 1316 } 1317 } 1318 1319 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1320 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1321 if (!HaveInsertPoint()) 1322 return; 1323 auto I = S.private_counters().begin(); 1324 for (auto *E : S.counters()) { 1325 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1326 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1327 (void)LoopScope.addPrivate(VD, [&]() -> Address { 1328 // Emit var without initialization. 1329 if (!LocalDeclMap.count(PrivateVD)) { 1330 auto VarEmission = EmitAutoVarAlloca(*PrivateVD); 1331 EmitAutoVarCleanups(VarEmission); 1332 } 1333 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1334 /*RefersToEnclosingVariableOrCapture=*/false, 1335 (*I)->getType(), VK_LValue, (*I)->getExprLoc()); 1336 return EmitLValue(&DRE).getAddress(); 1337 }); 1338 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1339 VD->hasGlobalStorage()) { 1340 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 1341 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 1342 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1343 E->getType(), VK_LValue, E->getExprLoc()); 1344 return EmitLValue(&DRE).getAddress(); 1345 }); 1346 } 1347 ++I; 1348 } 1349 } 1350 1351 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1352 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1353 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1354 if (!CGF.HaveInsertPoint()) 1355 return; 1356 { 1357 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1358 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1359 (void)PreCondScope.Privatize(); 1360 // Get initial values of real counters. 1361 for (auto I : S.inits()) { 1362 CGF.EmitIgnoredExpr(I); 1363 } 1364 } 1365 // Check that loop is executed at least one time. 1366 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1367 } 1368 1369 void CodeGenFunction::EmitOMPLinearClause( 1370 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1371 if (!HaveInsertPoint()) 1372 return; 1373 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1374 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1375 auto *LoopDirective = cast<OMPLoopDirective>(&D); 1376 for (auto *C : LoopDirective->counters()) { 1377 SIMDLCVs.insert( 1378 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1379 } 1380 } 1381 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1382 auto CurPrivate = C->privates().begin(); 1383 for (auto *E : C->varlists()) { 1384 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1385 auto *PrivateVD = 1386 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1387 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1388 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 1389 // Emit private VarDecl with copy init. 1390 EmitVarDecl(*PrivateVD); 1391 return GetAddrOfLocalVar(PrivateVD); 1392 }); 1393 assert(IsRegistered && "linear var already registered as private"); 1394 // Silence the warning about unused variable. 1395 (void)IsRegistered; 1396 } else 1397 EmitVarDecl(*PrivateVD); 1398 ++CurPrivate; 1399 } 1400 } 1401 } 1402 1403 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1404 const OMPExecutableDirective &D, 1405 bool IsMonotonic) { 1406 if (!CGF.HaveInsertPoint()) 1407 return; 1408 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1409 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1410 /*ignoreResult=*/true); 1411 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1412 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1413 // In presence of finite 'safelen', it may be unsafe to mark all 1414 // the memory instructions parallel, because loop-carried 1415 // dependences of 'safelen' iterations are possible. 1416 if (!IsMonotonic) 1417 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1418 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1419 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1420 /*ignoreResult=*/true); 1421 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1422 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1423 // In presence of finite 'safelen', it may be unsafe to mark all 1424 // the memory instructions parallel, because loop-carried 1425 // dependences of 'safelen' iterations are possible. 1426 CGF.LoopStack.setParallel(false); 1427 } 1428 } 1429 1430 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1431 bool IsMonotonic) { 1432 // Walk clauses and process safelen/lastprivate. 1433 LoopStack.setParallel(!IsMonotonic); 1434 LoopStack.setVectorizeEnable(true); 1435 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1436 } 1437 1438 void CodeGenFunction::EmitOMPSimdFinal( 1439 const OMPLoopDirective &D, 1440 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { 1441 if (!HaveInsertPoint()) 1442 return; 1443 llvm::BasicBlock *DoneBB = nullptr; 1444 auto IC = D.counters().begin(); 1445 auto IPC = D.private_counters().begin(); 1446 for (auto F : D.finals()) { 1447 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1448 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1449 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1450 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1451 OrigVD->hasGlobalStorage() || CED) { 1452 if (!DoneBB) { 1453 if (auto *Cond = CondGen(*this)) { 1454 // If the first post-update expression is found, emit conditional 1455 // block if it was requested. 1456 auto *ThenBB = createBasicBlock(".omp.final.then"); 1457 DoneBB = createBasicBlock(".omp.final.done"); 1458 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1459 EmitBlock(ThenBB); 1460 } 1461 } 1462 Address OrigAddr = Address::invalid(); 1463 if (CED) 1464 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1465 else { 1466 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), 1467 /*RefersToEnclosingVariableOrCapture=*/false, 1468 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1469 OrigAddr = EmitLValue(&DRE).getAddress(); 1470 } 1471 OMPPrivateScope VarScope(*this); 1472 VarScope.addPrivate(OrigVD, 1473 [OrigAddr]() -> Address { return OrigAddr; }); 1474 (void)VarScope.Privatize(); 1475 EmitIgnoredExpr(F); 1476 } 1477 ++IC; 1478 ++IPC; 1479 } 1480 if (DoneBB) 1481 EmitBlock(DoneBB, /*IsFinished=*/true); 1482 } 1483 1484 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1485 const OMPLoopDirective &S, 1486 CodeGenFunction::JumpDest LoopExit) { 1487 CGF.EmitOMPLoopBody(S, LoopExit); 1488 CGF.EmitStopPoint(&S); 1489 } 1490 1491 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1492 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1493 OMPLoopScope PreInitScope(CGF, S); 1494 // if (PreCond) { 1495 // for (IV in 0..LastIteration) BODY; 1496 // <Final counter/linear vars updates>; 1497 // } 1498 // 1499 1500 // Emit: if (PreCond) - begin. 1501 // If the condition constant folds and can be elided, avoid emitting the 1502 // whole loop. 1503 bool CondConstant; 1504 llvm::BasicBlock *ContBlock = nullptr; 1505 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1506 if (!CondConstant) 1507 return; 1508 } else { 1509 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1510 ContBlock = CGF.createBasicBlock("simd.if.end"); 1511 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1512 CGF.getProfileCount(&S)); 1513 CGF.EmitBlock(ThenBlock); 1514 CGF.incrementProfileCounter(&S); 1515 } 1516 1517 // Emit the loop iteration variable. 1518 const Expr *IVExpr = S.getIterationVariable(); 1519 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1520 CGF.EmitVarDecl(*IVDecl); 1521 CGF.EmitIgnoredExpr(S.getInit()); 1522 1523 // Emit the iterations count variable. 1524 // If it is not a variable, Sema decided to calculate iterations count on 1525 // each iteration (e.g., it is foldable into a constant). 1526 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1527 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1528 // Emit calculation of the iterations count. 1529 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1530 } 1531 1532 CGF.EmitOMPSimdInit(S); 1533 1534 emitAlignedClause(CGF, S); 1535 (void)CGF.EmitOMPLinearClauseInit(S); 1536 { 1537 OMPPrivateScope LoopScope(CGF); 1538 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1539 CGF.EmitOMPLinearClause(S, LoopScope); 1540 CGF.EmitOMPPrivateClause(S, LoopScope); 1541 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1542 bool HasLastprivateClause = 1543 CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1544 (void)LoopScope.Privatize(); 1545 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1546 S.getInc(), 1547 [&S](CodeGenFunction &CGF) { 1548 CGF.EmitOMPLoopBody(S, JumpDest()); 1549 CGF.EmitStopPoint(&S); 1550 }, 1551 [](CodeGenFunction &) {}); 1552 CGF.EmitOMPSimdFinal( 1553 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1554 // Emit final copy of the lastprivate variables at the end of loops. 1555 if (HasLastprivateClause) 1556 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1557 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1558 emitPostUpdateForReductionClause( 1559 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1560 } 1561 CGF.EmitOMPLinearClauseFinal( 1562 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 1563 // Emit: if (PreCond) - end. 1564 if (ContBlock) { 1565 CGF.EmitBranch(ContBlock); 1566 CGF.EmitBlock(ContBlock, true); 1567 } 1568 }; 1569 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1570 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1571 } 1572 1573 void CodeGenFunction::EmitOMPOuterLoop( 1574 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 1575 CodeGenFunction::OMPPrivateScope &LoopScope, 1576 const CodeGenFunction::OMPLoopArguments &LoopArgs, 1577 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 1578 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 1579 auto &RT = CGM.getOpenMPRuntime(); 1580 1581 const Expr *IVExpr = S.getIterationVariable(); 1582 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1583 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1584 1585 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1586 1587 // Start the loop with a block that tests the condition. 1588 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1589 EmitBlock(CondBlock); 1590 const SourceRange &R = S.getSourceRange(); 1591 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1592 SourceLocToDebugLoc(R.getEnd())); 1593 1594 llvm::Value *BoolCondVal = nullptr; 1595 if (!DynamicOrOrdered) { 1596 // UB = min(UB, GlobalUB) or 1597 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 1598 // 'distribute parallel for') 1599 EmitIgnoredExpr(LoopArgs.EUB); 1600 // IV = LB 1601 EmitIgnoredExpr(LoopArgs.Init); 1602 // IV < UB 1603 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 1604 } else { 1605 BoolCondVal = 1606 RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, 1607 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 1608 } 1609 1610 // If there are any cleanups between here and the loop-exit scope, 1611 // create a block to stage a loop exit along. 1612 auto ExitBlock = LoopExit.getBlock(); 1613 if (LoopScope.requiresCleanups()) 1614 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1615 1616 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1617 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1618 if (ExitBlock != LoopExit.getBlock()) { 1619 EmitBlock(ExitBlock); 1620 EmitBranchThroughCleanup(LoopExit); 1621 } 1622 EmitBlock(LoopBody); 1623 1624 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1625 // LB for loop condition and emitted it above). 1626 if (DynamicOrOrdered) 1627 EmitIgnoredExpr(LoopArgs.Init); 1628 1629 // Create a block for the increment. 1630 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1631 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1632 1633 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1634 // with dynamic/guided scheduling and without ordered clause. 1635 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1636 LoopStack.setParallel(!IsMonotonic); 1637 else 1638 EmitOMPSimdInit(S, IsMonotonic); 1639 1640 SourceLocation Loc = S.getLocStart(); 1641 1642 // when 'distribute' is not combined with a 'for': 1643 // while (idx <= UB) { BODY; ++idx; } 1644 // when 'distribute' is combined with a 'for' 1645 // (e.g. 'distribute parallel for') 1646 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 1647 EmitOMPInnerLoop( 1648 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 1649 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 1650 CodeGenLoop(CGF, S, LoopExit); 1651 }, 1652 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 1653 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 1654 }); 1655 1656 EmitBlock(Continue.getBlock()); 1657 BreakContinueStack.pop_back(); 1658 if (!DynamicOrOrdered) { 1659 // Emit "LB = LB + Stride", "UB = UB + Stride". 1660 EmitIgnoredExpr(LoopArgs.NextLB); 1661 EmitIgnoredExpr(LoopArgs.NextUB); 1662 } 1663 1664 EmitBranch(CondBlock); 1665 LoopStack.pop(); 1666 // Emit the fall-through block. 1667 EmitBlock(LoopExit.getBlock()); 1668 1669 // Tell the runtime we are done. 1670 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1671 if (!DynamicOrOrdered) 1672 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 1673 }; 1674 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1675 } 1676 1677 void CodeGenFunction::EmitOMPForOuterLoop( 1678 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1679 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1680 const OMPLoopArguments &LoopArgs, 1681 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 1682 auto &RT = CGM.getOpenMPRuntime(); 1683 1684 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1685 const bool DynamicOrOrdered = 1686 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1687 1688 assert((Ordered || 1689 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1690 LoopArgs.Chunk != nullptr)) && 1691 "static non-chunked schedule does not need outer loop"); 1692 1693 // Emit outer loop. 1694 // 1695 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1696 // When schedule(dynamic,chunk_size) is specified, the iterations are 1697 // distributed to threads in the team in chunks as the threads request them. 1698 // Each thread executes a chunk of iterations, then requests another chunk, 1699 // until no chunks remain to be distributed. Each chunk contains chunk_size 1700 // iterations, except for the last chunk to be distributed, which may have 1701 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1702 // 1703 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1704 // to threads in the team in chunks as the executing threads request them. 1705 // Each thread executes a chunk of iterations, then requests another chunk, 1706 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1707 // each chunk is proportional to the number of unassigned iterations divided 1708 // by the number of threads in the team, decreasing to 1. For a chunk_size 1709 // with value k (greater than 1), the size of each chunk is determined in the 1710 // same way, with the restriction that the chunks do not contain fewer than k 1711 // iterations (except for the last chunk to be assigned, which may have fewer 1712 // than k iterations). 1713 // 1714 // When schedule(auto) is specified, the decision regarding scheduling is 1715 // delegated to the compiler and/or runtime system. The programmer gives the 1716 // implementation the freedom to choose any possible mapping of iterations to 1717 // threads in the team. 1718 // 1719 // When schedule(runtime) is specified, the decision regarding scheduling is 1720 // deferred until run time, and the schedule and chunk size are taken from the 1721 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1722 // implementation defined 1723 // 1724 // while(__kmpc_dispatch_next(&LB, &UB)) { 1725 // idx = LB; 1726 // while (idx <= UB) { BODY; ++idx; 1727 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1728 // } // inner loop 1729 // } 1730 // 1731 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1732 // When schedule(static, chunk_size) is specified, iterations are divided into 1733 // chunks of size chunk_size, and the chunks are assigned to the threads in 1734 // the team in a round-robin fashion in the order of the thread number. 1735 // 1736 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1737 // while (idx <= UB) { BODY; ++idx; } // inner loop 1738 // LB = LB + ST; 1739 // UB = UB + ST; 1740 // } 1741 // 1742 1743 const Expr *IVExpr = S.getIterationVariable(); 1744 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1745 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1746 1747 if (DynamicOrOrdered) { 1748 auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 1749 llvm::Value *LBVal = DispatchBounds.first; 1750 llvm::Value *UBVal = DispatchBounds.second; 1751 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 1752 LoopArgs.Chunk}; 1753 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1754 IVSigned, Ordered, DipatchRTInputValues); 1755 } else { 1756 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1757 Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 1758 LoopArgs.ST, LoopArgs.Chunk); 1759 } 1760 1761 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 1762 const unsigned IVSize, 1763 const bool IVSigned) { 1764 if (Ordered) { 1765 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 1766 IVSigned); 1767 } 1768 }; 1769 1770 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1771 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 1772 OuterLoopArgs.IncExpr = S.getInc(); 1773 OuterLoopArgs.Init = S.getInit(); 1774 OuterLoopArgs.Cond = S.getCond(); 1775 OuterLoopArgs.NextLB = S.getNextLowerBound(); 1776 OuterLoopArgs.NextUB = S.getNextUpperBound(); 1777 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 1778 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 1779 } 1780 1781 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 1782 const unsigned IVSize, const bool IVSigned) {} 1783 1784 void CodeGenFunction::EmitOMPDistributeOuterLoop( 1785 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 1786 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 1787 const CodeGenLoopTy &CodeGenLoopContent) { 1788 1789 auto &RT = CGM.getOpenMPRuntime(); 1790 1791 // Emit outer loop. 1792 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 1793 // dynamic 1794 // 1795 1796 const Expr *IVExpr = S.getIterationVariable(); 1797 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1798 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1799 1800 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, 1801 IVSigned, /* Ordered = */ false, LoopArgs.IL, 1802 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 1803 LoopArgs.Chunk); 1804 1805 // for combined 'distribute' and 'for' the increment expression of distribute 1806 // is store in DistInc. For 'distribute' alone, it is in Inc. 1807 Expr *IncExpr; 1808 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 1809 IncExpr = S.getDistInc(); 1810 else 1811 IncExpr = S.getInc(); 1812 1813 // this routine is shared by 'omp distribute parallel for' and 1814 // 'omp distribute': select the right EUB expression depending on the 1815 // directive 1816 OMPLoopArguments OuterLoopArgs; 1817 OuterLoopArgs.LB = LoopArgs.LB; 1818 OuterLoopArgs.UB = LoopArgs.UB; 1819 OuterLoopArgs.ST = LoopArgs.ST; 1820 OuterLoopArgs.IL = LoopArgs.IL; 1821 OuterLoopArgs.Chunk = LoopArgs.Chunk; 1822 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1823 ? S.getCombinedEnsureUpperBound() 1824 : S.getEnsureUpperBound(); 1825 OuterLoopArgs.IncExpr = IncExpr; 1826 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1827 ? S.getCombinedInit() 1828 : S.getInit(); 1829 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1830 ? S.getCombinedCond() 1831 : S.getCond(); 1832 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1833 ? S.getCombinedNextLowerBound() 1834 : S.getNextLowerBound(); 1835 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 1836 ? S.getCombinedNextUpperBound() 1837 : S.getNextUpperBound(); 1838 1839 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 1840 LoopScope, OuterLoopArgs, CodeGenLoopContent, 1841 emitEmptyOrdered); 1842 } 1843 1844 /// Emit a helper variable and return corresponding lvalue. 1845 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1846 const DeclRefExpr *Helper) { 1847 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1848 CGF.EmitVarDecl(*VDecl); 1849 return CGF.EmitLValue(Helper); 1850 } 1851 1852 static std::pair<LValue, LValue> 1853 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 1854 const OMPExecutableDirective &S) { 1855 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1856 LValue LB = 1857 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 1858 LValue UB = 1859 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 1860 1861 // When composing 'distribute' with 'for' (e.g. as in 'distribute 1862 // parallel for') we need to use the 'distribute' 1863 // chunk lower and upper bounds rather than the whole loop iteration 1864 // space. These are parameters to the outlined function for 'parallel' 1865 // and we copy the bounds of the previous schedule into the 1866 // the current ones. 1867 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 1868 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 1869 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); 1870 PrevLBVal = CGF.EmitScalarConversion( 1871 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 1872 LS.getIterationVariable()->getType(), SourceLocation()); 1873 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); 1874 PrevUBVal = CGF.EmitScalarConversion( 1875 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 1876 LS.getIterationVariable()->getType(), SourceLocation()); 1877 1878 CGF.EmitStoreOfScalar(PrevLBVal, LB); 1879 CGF.EmitStoreOfScalar(PrevUBVal, UB); 1880 1881 return {LB, UB}; 1882 } 1883 1884 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 1885 /// we need to use the LB and UB expressions generated by the worksharing 1886 /// code generation support, whereas in non combined situations we would 1887 /// just emit 0 and the LastIteration expression 1888 /// This function is necessary due to the difference of the LB and UB 1889 /// types for the RT emission routines for 'for_static_init' and 1890 /// 'for_dispatch_init' 1891 static std::pair<llvm::Value *, llvm::Value *> 1892 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 1893 const OMPExecutableDirective &S, 1894 Address LB, Address UB) { 1895 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 1896 const Expr *IVExpr = LS.getIterationVariable(); 1897 // when implementing a dynamic schedule for a 'for' combined with a 1898 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 1899 // is not normalized as each team only executes its own assigned 1900 // distribute chunk 1901 QualType IteratorTy = IVExpr->getType(); 1902 llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, 1903 SourceLocation()); 1904 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, 1905 SourceLocation()); 1906 return {LBVal, UBVal}; 1907 } 1908 1909 static void emitDistributeParallelForDistributeInnerBoundParams( 1910 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1911 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 1912 const auto &Dir = cast<OMPLoopDirective>(S); 1913 LValue LB = 1914 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 1915 auto LBCast = CGF.Builder.CreateIntCast( 1916 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1917 CapturedVars.push_back(LBCast); 1918 LValue UB = 1919 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 1920 1921 auto UBCast = CGF.Builder.CreateIntCast( 1922 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 1923 CapturedVars.push_back(UBCast); 1924 } 1925 1926 static void 1927 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 1928 const OMPLoopDirective &S, 1929 CodeGenFunction::JumpDest LoopExit) { 1930 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 1931 PrePostActionTy &) { 1932 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 1933 emitDistributeParallelForInnerBounds, 1934 emitDistributeParallelForDispatchBounds); 1935 }; 1936 1937 emitCommonOMPParallelDirective( 1938 CGF, S, OMPD_for, CGInlinedWorksharingLoop, 1939 emitDistributeParallelForDistributeInnerBoundParams); 1940 } 1941 1942 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 1943 const OMPDistributeParallelForDirective &S) { 1944 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1945 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 1946 S.getDistInc()); 1947 }; 1948 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1949 OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, 1950 /*HasCancel=*/false); 1951 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 1952 /*HasCancel=*/false); 1953 } 1954 1955 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 1956 const OMPDistributeParallelForSimdDirective &S) { 1957 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1958 CGM.getOpenMPRuntime().emitInlinedDirective( 1959 *this, OMPD_distribute_parallel_for_simd, 1960 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1961 OMPLoopScope PreInitScope(CGF, S); 1962 CGF.EmitStmt( 1963 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1964 }); 1965 } 1966 1967 void CodeGenFunction::EmitOMPDistributeSimdDirective( 1968 const OMPDistributeSimdDirective &S) { 1969 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1970 CGM.getOpenMPRuntime().emitInlinedDirective( 1971 *this, OMPD_distribute_simd, 1972 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1973 OMPLoopScope PreInitScope(CGF, S); 1974 CGF.EmitStmt( 1975 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1976 }); 1977 } 1978 1979 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 1980 const OMPTargetParallelForSimdDirective &S) { 1981 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1982 CGM.getOpenMPRuntime().emitInlinedDirective( 1983 *this, OMPD_target_parallel_for_simd, 1984 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1985 OMPLoopScope PreInitScope(CGF, S); 1986 CGF.EmitStmt( 1987 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1988 }); 1989 } 1990 1991 void CodeGenFunction::EmitOMPTargetSimdDirective( 1992 const OMPTargetSimdDirective &S) { 1993 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 1994 CGM.getOpenMPRuntime().emitInlinedDirective( 1995 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 1996 OMPLoopScope PreInitScope(CGF, S); 1997 CGF.EmitStmt( 1998 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1999 }); 2000 } 2001 2002 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 2003 const OMPTeamsDistributeDirective &S) { 2004 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2005 CGM.getOpenMPRuntime().emitInlinedDirective( 2006 *this, OMPD_teams_distribute, 2007 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2008 OMPLoopScope PreInitScope(CGF, S); 2009 CGF.EmitStmt( 2010 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2011 }); 2012 } 2013 2014 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 2015 const OMPTeamsDistributeSimdDirective &S) { 2016 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2017 CGM.getOpenMPRuntime().emitInlinedDirective( 2018 *this, OMPD_teams_distribute_simd, 2019 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2020 OMPLoopScope PreInitScope(CGF, S); 2021 CGF.EmitStmt( 2022 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2023 }); 2024 } 2025 2026 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 2027 const OMPTeamsDistributeParallelForSimdDirective &S) { 2028 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2029 CGM.getOpenMPRuntime().emitInlinedDirective( 2030 *this, OMPD_teams_distribute_parallel_for_simd, 2031 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2032 OMPLoopScope PreInitScope(CGF, S); 2033 CGF.EmitStmt( 2034 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2035 }); 2036 } 2037 2038 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 2039 const OMPTeamsDistributeParallelForDirective &S) { 2040 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2041 CGM.getOpenMPRuntime().emitInlinedDirective( 2042 *this, OMPD_teams_distribute_parallel_for, 2043 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2044 OMPLoopScope PreInitScope(CGF, S); 2045 CGF.EmitStmt( 2046 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2047 }); 2048 } 2049 2050 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 2051 const OMPTargetTeamsDistributeDirective &S) { 2052 CGM.getOpenMPRuntime().emitInlinedDirective( 2053 *this, OMPD_target_teams_distribute, 2054 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2055 CGF.EmitStmt( 2056 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2057 }); 2058 } 2059 2060 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 2061 const OMPTargetTeamsDistributeParallelForDirective &S) { 2062 CGM.getOpenMPRuntime().emitInlinedDirective( 2063 *this, OMPD_target_teams_distribute_parallel_for, 2064 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2065 CGF.EmitStmt( 2066 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2067 }); 2068 } 2069 2070 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 2071 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 2072 CGM.getOpenMPRuntime().emitInlinedDirective( 2073 *this, OMPD_target_teams_distribute_parallel_for_simd, 2074 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2075 CGF.EmitStmt( 2076 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2077 }); 2078 } 2079 2080 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 2081 const OMPTargetTeamsDistributeSimdDirective &S) { 2082 CGM.getOpenMPRuntime().emitInlinedDirective( 2083 *this, OMPD_target_teams_distribute_simd, 2084 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2085 CGF.EmitStmt( 2086 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2087 }); 2088 } 2089 2090 namespace { 2091 struct ScheduleKindModifiersTy { 2092 OpenMPScheduleClauseKind Kind; 2093 OpenMPScheduleClauseModifier M1; 2094 OpenMPScheduleClauseModifier M2; 2095 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2096 OpenMPScheduleClauseModifier M1, 2097 OpenMPScheduleClauseModifier M2) 2098 : Kind(Kind), M1(M1), M2(M2) {} 2099 }; 2100 } // namespace 2101 2102 bool CodeGenFunction::EmitOMPWorksharingLoop( 2103 const OMPLoopDirective &S, Expr *EUB, 2104 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2105 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2106 // Emit the loop iteration variable. 2107 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2108 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2109 EmitVarDecl(*IVDecl); 2110 2111 // Emit the iterations count variable. 2112 // If it is not a variable, Sema decided to calculate iterations count on each 2113 // iteration (e.g., it is foldable into a constant). 2114 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2115 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2116 // Emit calculation of the iterations count. 2117 EmitIgnoredExpr(S.getCalcLastIteration()); 2118 } 2119 2120 auto &RT = CGM.getOpenMPRuntime(); 2121 2122 bool HasLastprivateClause; 2123 // Check pre-condition. 2124 { 2125 OMPLoopScope PreInitScope(*this, S); 2126 // Skip the entire loop if we don't meet the precondition. 2127 // If the condition constant folds and can be elided, avoid emitting the 2128 // whole loop. 2129 bool CondConstant; 2130 llvm::BasicBlock *ContBlock = nullptr; 2131 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2132 if (!CondConstant) 2133 return false; 2134 } else { 2135 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2136 ContBlock = createBasicBlock("omp.precond.end"); 2137 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2138 getProfileCount(&S)); 2139 EmitBlock(ThenBlock); 2140 incrementProfileCounter(&S); 2141 } 2142 2143 bool Ordered = false; 2144 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2145 if (OrderedClause->getNumForLoops()) 2146 RT.emitDoacrossInit(*this, S); 2147 else 2148 Ordered = true; 2149 } 2150 2151 llvm::DenseSet<const Expr *> EmittedFinals; 2152 emitAlignedClause(*this, S); 2153 bool HasLinears = EmitOMPLinearClauseInit(S); 2154 // Emit helper vars inits. 2155 2156 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2157 LValue LB = Bounds.first; 2158 LValue UB = Bounds.second; 2159 LValue ST = 2160 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2161 LValue IL = 2162 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2163 2164 // Emit 'then' code. 2165 { 2166 OMPPrivateScope LoopScope(*this); 2167 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 2168 // Emit implicit barrier to synchronize threads and avoid data races on 2169 // initialization of firstprivate variables and post-update of 2170 // lastprivate variables. 2171 CGM.getOpenMPRuntime().emitBarrierCall( 2172 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2173 /*ForceSimpleCall=*/true); 2174 } 2175 EmitOMPPrivateClause(S, LoopScope); 2176 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2177 EmitOMPReductionClauseInit(S, LoopScope); 2178 EmitOMPPrivateLoopCounters(S, LoopScope); 2179 EmitOMPLinearClause(S, LoopScope); 2180 (void)LoopScope.Privatize(); 2181 2182 // Detect the loop schedule kind and chunk. 2183 llvm::Value *Chunk = nullptr; 2184 OpenMPScheduleTy ScheduleKind; 2185 if (auto *C = S.getSingleClause<OMPScheduleClause>()) { 2186 ScheduleKind.Schedule = C->getScheduleKind(); 2187 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2188 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2189 if (const auto *Ch = C->getChunkSize()) { 2190 Chunk = EmitScalarExpr(Ch); 2191 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2192 S.getIterationVariable()->getType(), 2193 S.getLocStart()); 2194 } 2195 } 2196 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2197 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2198 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2199 // If the static schedule kind is specified or if the ordered clause is 2200 // specified, and if no monotonic modifier is specified, the effect will 2201 // be as if the monotonic modifier was specified. 2202 if (RT.isStaticNonchunked(ScheduleKind.Schedule, 2203 /* Chunked */ Chunk != nullptr) && 2204 !Ordered) { 2205 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2206 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2207 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2208 // When no chunk_size is specified, the iteration space is divided into 2209 // chunks that are approximately equal in size, and at most one chunk is 2210 // distributed to each thread. Note that the size of the chunks is 2211 // unspecified in this case. 2212 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 2213 IVSize, IVSigned, Ordered, 2214 IL.getAddress(), LB.getAddress(), 2215 UB.getAddress(), ST.getAddress()); 2216 auto LoopExit = 2217 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2218 // UB = min(UB, GlobalUB); 2219 EmitIgnoredExpr(S.getEnsureUpperBound()); 2220 // IV = LB; 2221 EmitIgnoredExpr(S.getInit()); 2222 // while (idx <= UB) { BODY; ++idx; } 2223 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 2224 S.getInc(), 2225 [&S, LoopExit](CodeGenFunction &CGF) { 2226 CGF.EmitOMPLoopBody(S, LoopExit); 2227 CGF.EmitStopPoint(&S); 2228 }, 2229 [](CodeGenFunction &) {}); 2230 EmitBlock(LoopExit.getBlock()); 2231 // Tell the runtime we are done. 2232 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2233 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2234 }; 2235 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2236 } else { 2237 const bool IsMonotonic = 2238 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2239 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2240 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2241 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2242 // Emit the outer loop, which requests its work chunk [LB..UB] from 2243 // runtime and runs the inner loop to process it. 2244 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 2245 ST.getAddress(), IL.getAddress(), 2246 Chunk, EUB); 2247 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2248 LoopArguments, CGDispatchBounds); 2249 } 2250 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2251 EmitOMPSimdFinal(S, 2252 [&](CodeGenFunction &CGF) -> llvm::Value * { 2253 return CGF.Builder.CreateIsNotNull( 2254 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2255 }); 2256 } 2257 EmitOMPReductionClauseFinal( 2258 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2259 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2260 : /*Parallel only*/ OMPD_parallel); 2261 // Emit post-update of the reduction variables if IsLastIter != 0. 2262 emitPostUpdateForReductionClause( 2263 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2264 return CGF.Builder.CreateIsNotNull( 2265 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2266 }); 2267 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2268 if (HasLastprivateClause) 2269 EmitOMPLastprivateClauseFinal( 2270 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2271 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 2272 } 2273 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2274 return CGF.Builder.CreateIsNotNull( 2275 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2276 }); 2277 // We're now done with the loop, so jump to the continuation block. 2278 if (ContBlock) { 2279 EmitBranch(ContBlock); 2280 EmitBlock(ContBlock, true); 2281 } 2282 } 2283 return HasLastprivateClause; 2284 } 2285 2286 /// The following two functions generate expressions for the loop lower 2287 /// and upper bounds in case of static and dynamic (dispatch) schedule 2288 /// of the associated 'for' or 'distribute' loop. 2289 static std::pair<LValue, LValue> 2290 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2291 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2292 LValue LB = 2293 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2294 LValue UB = 2295 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2296 return {LB, UB}; 2297 } 2298 2299 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2300 /// consider the lower and upper bound expressions generated by the 2301 /// worksharing loop support, but we use 0 and the iteration space size as 2302 /// constants 2303 static std::pair<llvm::Value *, llvm::Value *> 2304 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2305 Address LB, Address UB) { 2306 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2307 const Expr *IVExpr = LS.getIterationVariable(); 2308 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2309 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2310 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2311 return {LBVal, UBVal}; 2312 } 2313 2314 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2315 bool HasLastprivates = false; 2316 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2317 PrePostActionTy &) { 2318 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2319 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2320 emitForLoopBounds, 2321 emitDispatchForLoopBounds); 2322 }; 2323 { 2324 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2325 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2326 S.hasCancel()); 2327 } 2328 2329 // Emit an implicit barrier at the end. 2330 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2331 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2332 } 2333 } 2334 2335 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2336 bool HasLastprivates = false; 2337 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2338 PrePostActionTy &) { 2339 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2340 emitForLoopBounds, 2341 emitDispatchForLoopBounds); 2342 }; 2343 { 2344 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2345 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2346 } 2347 2348 // Emit an implicit barrier at the end. 2349 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 2350 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 2351 } 2352 } 2353 2354 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2355 const Twine &Name, 2356 llvm::Value *Init = nullptr) { 2357 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2358 if (Init) 2359 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2360 return LVal; 2361 } 2362 2363 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2364 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 2365 auto *CS = dyn_cast<CompoundStmt>(Stmt); 2366 bool HasLastprivates = false; 2367 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, 2368 PrePostActionTy &) { 2369 auto &C = CGF.CGM.getContext(); 2370 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2371 // Emit helper vars inits. 2372 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2373 CGF.Builder.getInt32(0)); 2374 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1) 2375 : CGF.Builder.getInt32(0); 2376 LValue UB = 2377 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2378 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2379 CGF.Builder.getInt32(1)); 2380 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2381 CGF.Builder.getInt32(0)); 2382 // Loop counter. 2383 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2384 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2385 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2386 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 2387 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2388 // Generate condition for loop. 2389 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2390 OK_Ordinary, S.getLocStart(), FPOptions()); 2391 // Increment for loop counter. 2392 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2393 S.getLocStart()); 2394 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) { 2395 // Iterate through all sections and emit a switch construct: 2396 // switch (IV) { 2397 // case 0: 2398 // <SectionStmt[0]>; 2399 // break; 2400 // ... 2401 // case <NumSection> - 1: 2402 // <SectionStmt[<NumSection> - 1]>; 2403 // break; 2404 // } 2405 // .omp.sections.exit: 2406 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2407 auto *SwitchStmt = CGF.Builder.CreateSwitch( 2408 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 2409 CS == nullptr ? 1 : CS->size()); 2410 if (CS) { 2411 unsigned CaseNumber = 0; 2412 for (auto *SubStmt : CS->children()) { 2413 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2414 CGF.EmitBlock(CaseBB); 2415 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2416 CGF.EmitStmt(SubStmt); 2417 CGF.EmitBranch(ExitBB); 2418 ++CaseNumber; 2419 } 2420 } else { 2421 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2422 CGF.EmitBlock(CaseBB); 2423 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2424 CGF.EmitStmt(Stmt); 2425 CGF.EmitBranch(ExitBB); 2426 } 2427 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2428 }; 2429 2430 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2431 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2432 // Emit implicit barrier to synchronize threads and avoid data races on 2433 // initialization of firstprivate variables and post-update of lastprivate 2434 // variables. 2435 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2436 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2437 /*ForceSimpleCall=*/true); 2438 } 2439 CGF.EmitOMPPrivateClause(S, LoopScope); 2440 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2441 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2442 (void)LoopScope.Privatize(); 2443 2444 // Emit static non-chunked loop. 2445 OpenMPScheduleTy ScheduleKind; 2446 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2447 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2448 CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, 2449 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), 2450 UB.getAddress(), ST.getAddress()); 2451 // UB = min(UB, GlobalUB); 2452 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 2453 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 2454 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2455 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2456 // IV = LB; 2457 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 2458 // while (idx <= UB) { BODY; ++idx; } 2459 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2460 [](CodeGenFunction &) {}); 2461 // Tell the runtime we are done. 2462 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2463 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); 2464 }; 2465 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2466 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2467 // Emit post-update of the reduction variables if IsLastIter != 0. 2468 emitPostUpdateForReductionClause( 2469 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { 2470 return CGF.Builder.CreateIsNotNull( 2471 CGF.EmitLoadOfScalar(IL, S.getLocStart())); 2472 }); 2473 2474 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2475 if (HasLastprivates) 2476 CGF.EmitOMPLastprivateClauseFinal( 2477 S, /*NoFinals=*/false, 2478 CGF.Builder.CreateIsNotNull( 2479 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 2480 }; 2481 2482 bool HasCancel = false; 2483 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2484 HasCancel = OSD->hasCancel(); 2485 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2486 HasCancel = OPSD->hasCancel(); 2487 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2488 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2489 HasCancel); 2490 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2491 // clause. Otherwise the barrier will be generated by the codegen for the 2492 // directive. 2493 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2494 // Emit implicit barrier to synchronize threads and avoid data races on 2495 // initialization of firstprivate variables. 2496 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2497 OMPD_unknown); 2498 } 2499 } 2500 2501 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2502 { 2503 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2504 EmitSections(S); 2505 } 2506 // Emit an implicit barrier at the end. 2507 if (!S.getSingleClause<OMPNowaitClause>()) { 2508 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 2509 OMPD_sections); 2510 } 2511 } 2512 2513 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2514 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2515 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2516 }; 2517 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2518 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2519 S.hasCancel()); 2520 } 2521 2522 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2523 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2524 llvm::SmallVector<const Expr *, 8> DestExprs; 2525 llvm::SmallVector<const Expr *, 8> SrcExprs; 2526 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2527 // Check if there are any 'copyprivate' clauses associated with this 2528 // 'single' construct. 2529 // Build a list of copyprivate variables along with helper expressions 2530 // (<source>, <destination>, <destination>=<source> expressions) 2531 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2532 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2533 DestExprs.append(C->destination_exprs().begin(), 2534 C->destination_exprs().end()); 2535 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2536 AssignmentOps.append(C->assignment_ops().begin(), 2537 C->assignment_ops().end()); 2538 } 2539 // Emit code for 'single' region along with 'copyprivate' clauses 2540 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2541 Action.Enter(CGF); 2542 OMPPrivateScope SingleScope(CGF); 2543 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2544 CGF.EmitOMPPrivateClause(S, SingleScope); 2545 (void)SingleScope.Privatize(); 2546 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2547 }; 2548 { 2549 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2550 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 2551 CopyprivateVars, DestExprs, 2552 SrcExprs, AssignmentOps); 2553 } 2554 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2555 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2556 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2557 CGM.getOpenMPRuntime().emitBarrierCall( 2558 *this, S.getLocStart(), 2559 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2560 } 2561 } 2562 2563 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2564 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2565 Action.Enter(CGF); 2566 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2567 }; 2568 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2569 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 2570 } 2571 2572 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2573 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2574 Action.Enter(CGF); 2575 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2576 }; 2577 Expr *Hint = nullptr; 2578 if (auto *HintClause = S.getSingleClause<OMPHintClause>()) 2579 Hint = HintClause->getHint(); 2580 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2581 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2582 S.getDirectiveName().getAsString(), 2583 CodeGen, S.getLocStart(), Hint); 2584 } 2585 2586 void CodeGenFunction::EmitOMPParallelForDirective( 2587 const OMPParallelForDirective &S) { 2588 // Emit directive as a combined directive that consists of two implicit 2589 // directives: 'parallel' with 'for' directive. 2590 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2591 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2592 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2593 emitDispatchForLoopBounds); 2594 }; 2595 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 2596 emitEmptyBoundParameters); 2597 } 2598 2599 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2600 const OMPParallelForSimdDirective &S) { 2601 // Emit directive as a combined directive that consists of two implicit 2602 // directives: 'parallel' with 'for' directive. 2603 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2604 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2605 emitDispatchForLoopBounds); 2606 }; 2607 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 2608 emitEmptyBoundParameters); 2609 } 2610 2611 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2612 const OMPParallelSectionsDirective &S) { 2613 // Emit directive as a combined directive that consists of two implicit 2614 // directives: 'parallel' with 'sections' directive. 2615 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2616 CGF.EmitSections(S); 2617 }; 2618 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 2619 emitEmptyBoundParameters); 2620 } 2621 2622 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, 2623 const RegionCodeGenTy &BodyGen, 2624 const TaskGenTy &TaskGen, 2625 OMPTaskDataTy &Data) { 2626 // Emit outlined function for task construct. 2627 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2628 auto *I = CS->getCapturedDecl()->param_begin(); 2629 auto *PartId = std::next(I); 2630 auto *TaskT = std::next(I, 4); 2631 // Check if the task is final 2632 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2633 // If the condition constant folds and can be elided, try to avoid emitting 2634 // the condition and the dead arm of the if/else. 2635 auto *Cond = Clause->getCondition(); 2636 bool CondConstant; 2637 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2638 Data.Final.setInt(CondConstant); 2639 else 2640 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2641 } else { 2642 // By default the task is not final. 2643 Data.Final.setInt(/*IntVal=*/false); 2644 } 2645 // Check if the task has 'priority' clause. 2646 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2647 auto *Prio = Clause->getPriority(); 2648 Data.Priority.setInt(/*IntVal=*/true); 2649 Data.Priority.setPointer(EmitScalarConversion( 2650 EmitScalarExpr(Prio), Prio->getType(), 2651 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2652 Prio->getExprLoc())); 2653 } 2654 // The first function argument for tasks is a thread id, the second one is a 2655 // part id (0 for tied tasks, >=0 for untied task). 2656 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2657 // Get list of private variables. 2658 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2659 auto IRef = C->varlist_begin(); 2660 for (auto *IInit : C->private_copies()) { 2661 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2662 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2663 Data.PrivateVars.push_back(*IRef); 2664 Data.PrivateCopies.push_back(IInit); 2665 } 2666 ++IRef; 2667 } 2668 } 2669 EmittedAsPrivate.clear(); 2670 // Get list of firstprivate variables. 2671 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2672 auto IRef = C->varlist_begin(); 2673 auto IElemInitRef = C->inits().begin(); 2674 for (auto *IInit : C->private_copies()) { 2675 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2676 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2677 Data.FirstprivateVars.push_back(*IRef); 2678 Data.FirstprivateCopies.push_back(IInit); 2679 Data.FirstprivateInits.push_back(*IElemInitRef); 2680 } 2681 ++IRef; 2682 ++IElemInitRef; 2683 } 2684 } 2685 // Get list of lastprivate variables (for taskloops). 2686 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2687 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2688 auto IRef = C->varlist_begin(); 2689 auto ID = C->destination_exprs().begin(); 2690 for (auto *IInit : C->private_copies()) { 2691 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2692 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2693 Data.LastprivateVars.push_back(*IRef); 2694 Data.LastprivateCopies.push_back(IInit); 2695 } 2696 LastprivateDstsOrigs.insert( 2697 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2698 cast<DeclRefExpr>(*IRef)}); 2699 ++IRef; 2700 ++ID; 2701 } 2702 } 2703 SmallVector<const Expr *, 4> LHSs; 2704 SmallVector<const Expr *, 4> RHSs; 2705 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 2706 auto IPriv = C->privates().begin(); 2707 auto IRed = C->reduction_ops().begin(); 2708 auto ILHS = C->lhs_exprs().begin(); 2709 auto IRHS = C->rhs_exprs().begin(); 2710 for (const auto *Ref : C->varlists()) { 2711 Data.ReductionVars.emplace_back(Ref); 2712 Data.ReductionCopies.emplace_back(*IPriv); 2713 Data.ReductionOps.emplace_back(*IRed); 2714 LHSs.emplace_back(*ILHS); 2715 RHSs.emplace_back(*IRHS); 2716 std::advance(IPriv, 1); 2717 std::advance(IRed, 1); 2718 std::advance(ILHS, 1); 2719 std::advance(IRHS, 1); 2720 } 2721 } 2722 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 2723 *this, S.getLocStart(), LHSs, RHSs, Data); 2724 // Build list of dependences. 2725 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2726 for (auto *IRef : C->varlists()) 2727 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 2728 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs]( 2729 CodeGenFunction &CGF, PrePostActionTy &Action) { 2730 // Set proper addresses for generated private copies. 2731 OMPPrivateScope Scope(CGF); 2732 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2733 !Data.LastprivateVars.empty()) { 2734 auto *CopyFn = CGF.Builder.CreateLoad( 2735 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 2736 auto *PrivatesPtr = CGF.Builder.CreateLoad( 2737 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 2738 // Map privates. 2739 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2740 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2741 CallArgs.push_back(PrivatesPtr); 2742 for (auto *E : Data.PrivateVars) { 2743 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2744 Address PrivatePtr = CGF.CreateMemTemp( 2745 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 2746 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2747 CallArgs.push_back(PrivatePtr.getPointer()); 2748 } 2749 for (auto *E : Data.FirstprivateVars) { 2750 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2751 Address PrivatePtr = 2752 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2753 ".firstpriv.ptr.addr"); 2754 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2755 CallArgs.push_back(PrivatePtr.getPointer()); 2756 } 2757 for (auto *E : Data.LastprivateVars) { 2758 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2759 Address PrivatePtr = 2760 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 2761 ".lastpriv.ptr.addr"); 2762 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 2763 CallArgs.push_back(PrivatePtr.getPointer()); 2764 } 2765 CGF.EmitRuntimeCall(CopyFn, CallArgs); 2766 for (auto &&Pair : LastprivateDstsOrigs) { 2767 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 2768 DeclRefExpr DRE( 2769 const_cast<VarDecl *>(OrigVD), 2770 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( 2771 OrigVD) != nullptr, 2772 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); 2773 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 2774 return CGF.EmitLValue(&DRE).getAddress(); 2775 }); 2776 } 2777 for (auto &&Pair : PrivatePtrs) { 2778 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 2779 CGF.getContext().getDeclAlign(Pair.first)); 2780 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 2781 } 2782 } 2783 if (Data.Reductions) { 2784 OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true); 2785 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, 2786 Data.ReductionOps); 2787 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 2788 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 2789 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 2790 RedCG.emitSharedLValue(CGF, Cnt); 2791 RedCG.emitAggregateType(CGF, Cnt); 2792 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 2793 CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 2794 Replacement = 2795 Address(CGF.EmitScalarConversion( 2796 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 2797 CGF.getContext().getPointerType( 2798 Data.ReductionCopies[Cnt]->getType()), 2799 SourceLocation()), 2800 Replacement.getAlignment()); 2801 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 2802 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 2803 [Replacement]() { return Replacement; }); 2804 // FIXME: This must removed once the runtime library is fixed. 2805 // Emit required threadprivate variables for 2806 // initilizer/combiner/finalizer. 2807 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), 2808 RedCG, Cnt); 2809 } 2810 } 2811 (void)Scope.Privatize(); 2812 2813 Action.Enter(CGF); 2814 BodyGen(CGF); 2815 }; 2816 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 2817 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 2818 Data.NumberOfParts); 2819 OMPLexicalScope Scope(*this, S); 2820 TaskGen(*this, OutlinedFn, Data); 2821 } 2822 2823 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 2824 // Emit outlined function for task construct. 2825 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2826 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 2827 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 2828 const Expr *IfCond = nullptr; 2829 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2830 if (C->getNameModifier() == OMPD_unknown || 2831 C->getNameModifier() == OMPD_task) { 2832 IfCond = C->getCondition(); 2833 break; 2834 } 2835 } 2836 2837 OMPTaskDataTy Data; 2838 // Check if we should emit tied or untied task. 2839 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 2840 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 2841 CGF.EmitStmt(CS->getCapturedStmt()); 2842 }; 2843 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 2844 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 2845 const OMPTaskDataTy &Data) { 2846 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, 2847 SharedsTy, CapturedStruct, IfCond, 2848 Data); 2849 }; 2850 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 2851 } 2852 2853 void CodeGenFunction::EmitOMPTaskyieldDirective( 2854 const OMPTaskyieldDirective &S) { 2855 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 2856 } 2857 2858 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 2859 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 2860 } 2861 2862 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 2863 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 2864 } 2865 2866 void CodeGenFunction::EmitOMPTaskgroupDirective( 2867 const OMPTaskgroupDirective &S) { 2868 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2869 Action.Enter(CGF); 2870 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 2871 }; 2872 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 2873 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 2874 } 2875 2876 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 2877 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 2878 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 2879 return llvm::makeArrayRef(FlushClause->varlist_begin(), 2880 FlushClause->varlist_end()); 2881 } 2882 return llvm::None; 2883 }(), S.getLocStart()); 2884 } 2885 2886 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 2887 const CodeGenLoopTy &CodeGenLoop, 2888 Expr *IncExpr) { 2889 // Emit the loop iteration variable. 2890 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2891 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2892 EmitVarDecl(*IVDecl); 2893 2894 // Emit the iterations count variable. 2895 // If it is not a variable, Sema decided to calculate iterations count on each 2896 // iteration (e.g., it is foldable into a constant). 2897 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2898 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2899 // Emit calculation of the iterations count. 2900 EmitIgnoredExpr(S.getCalcLastIteration()); 2901 } 2902 2903 auto &RT = CGM.getOpenMPRuntime(); 2904 2905 bool HasLastprivateClause = false; 2906 // Check pre-condition. 2907 { 2908 OMPLoopScope PreInitScope(*this, S); 2909 // Skip the entire loop if we don't meet the precondition. 2910 // If the condition constant folds and can be elided, avoid emitting the 2911 // whole loop. 2912 bool CondConstant; 2913 llvm::BasicBlock *ContBlock = nullptr; 2914 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2915 if (!CondConstant) 2916 return; 2917 } else { 2918 auto *ThenBlock = createBasicBlock("omp.precond.then"); 2919 ContBlock = createBasicBlock("omp.precond.end"); 2920 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2921 getProfileCount(&S)); 2922 EmitBlock(ThenBlock); 2923 incrementProfileCounter(&S); 2924 } 2925 2926 // Emit 'then' code. 2927 { 2928 // Emit helper vars inits. 2929 2930 LValue LB = EmitOMPHelperVar( 2931 *this, cast<DeclRefExpr>( 2932 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2933 ? S.getCombinedLowerBoundVariable() 2934 : S.getLowerBoundVariable()))); 2935 LValue UB = EmitOMPHelperVar( 2936 *this, cast<DeclRefExpr>( 2937 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2938 ? S.getCombinedUpperBoundVariable() 2939 : S.getUpperBoundVariable()))); 2940 LValue ST = 2941 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2942 LValue IL = 2943 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2944 2945 OMPPrivateScope LoopScope(*this); 2946 if (EmitOMPFirstprivateClause(S, LoopScope)) { 2947 // Emit implicit barrier to synchronize threads and avoid data races on 2948 // initialization of firstprivate variables and post-update of 2949 // lastprivate variables. 2950 CGM.getOpenMPRuntime().emitBarrierCall( 2951 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 2952 /*ForceSimpleCall=*/true); 2953 } 2954 EmitOMPPrivateClause(S, LoopScope); 2955 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2956 EmitOMPPrivateLoopCounters(S, LoopScope); 2957 (void)LoopScope.Privatize(); 2958 2959 // Detect the distribute schedule kind and chunk. 2960 llvm::Value *Chunk = nullptr; 2961 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 2962 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 2963 ScheduleKind = C->getDistScheduleKind(); 2964 if (const auto *Ch = C->getChunkSize()) { 2965 Chunk = EmitScalarExpr(Ch); 2966 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 2967 S.getIterationVariable()->getType(), 2968 S.getLocStart()); 2969 } 2970 } 2971 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2972 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2973 2974 // OpenMP [2.10.8, distribute Construct, Description] 2975 // If dist_schedule is specified, kind must be static. If specified, 2976 // iterations are divided into chunks of size chunk_size, chunks are 2977 // assigned to the teams of the league in a round-robin fashion in the 2978 // order of the team number. When no chunk_size is specified, the 2979 // iteration space is divided into chunks that are approximately equal 2980 // in size, and at most one chunk is distributed to each team of the 2981 // league. The size of the chunks is unspecified in this case. 2982 if (RT.isStaticNonchunked(ScheduleKind, 2983 /* Chunked */ Chunk != nullptr)) { 2984 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, 2985 IVSize, IVSigned, /* Ordered = */ false, 2986 IL.getAddress(), LB.getAddress(), 2987 UB.getAddress(), ST.getAddress()); 2988 auto LoopExit = 2989 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2990 // UB = min(UB, GlobalUB); 2991 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2992 ? S.getCombinedEnsureUpperBound() 2993 : S.getEnsureUpperBound()); 2994 // IV = LB; 2995 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2996 ? S.getCombinedInit() 2997 : S.getInit()); 2998 2999 Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3000 ? S.getCombinedCond() 3001 : S.getCond(); 3002 3003 // for distribute alone, codegen 3004 // while (idx <= UB) { BODY; ++idx; } 3005 // when combined with 'for' (e.g. as in 'distribute parallel for') 3006 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 3007 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, 3008 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3009 CodeGenLoop(CGF, S, LoopExit); 3010 }, 3011 [](CodeGenFunction &) {}); 3012 EmitBlock(LoopExit.getBlock()); 3013 // Tell the runtime we are done. 3014 RT.emitForStaticFinish(*this, S.getLocStart()); 3015 } else { 3016 // Emit the outer loop, which requests its work chunk [LB..UB] from 3017 // runtime and runs the inner loop to process it. 3018 const OMPLoopArguments LoopArguments = { 3019 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 3020 Chunk}; 3021 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3022 CodeGenLoop); 3023 } 3024 3025 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3026 if (HasLastprivateClause) 3027 EmitOMPLastprivateClauseFinal( 3028 S, /*NoFinals=*/false, 3029 Builder.CreateIsNotNull( 3030 EmitLoadOfScalar(IL, S.getLocStart()))); 3031 } 3032 3033 // We're now done with the loop, so jump to the continuation block. 3034 if (ContBlock) { 3035 EmitBranch(ContBlock); 3036 EmitBlock(ContBlock, true); 3037 } 3038 } 3039 } 3040 3041 void CodeGenFunction::EmitOMPDistributeDirective( 3042 const OMPDistributeDirective &S) { 3043 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3044 3045 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3046 }; 3047 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3048 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, 3049 false); 3050 } 3051 3052 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3053 const CapturedStmt *S) { 3054 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3055 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3056 CGF.CapturedStmtInfo = &CapStmtInfo; 3057 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3058 Fn->addFnAttr(llvm::Attribute::NoInline); 3059 return Fn; 3060 } 3061 3062 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3063 if (!S.getAssociatedStmt()) { 3064 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3065 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3066 return; 3067 } 3068 auto *C = S.getSingleClause<OMPSIMDClause>(); 3069 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3070 PrePostActionTy &Action) { 3071 if (C) { 3072 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 3073 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3074 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3075 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3076 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 3077 } else { 3078 Action.Enter(CGF); 3079 CGF.EmitStmt( 3080 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3081 } 3082 }; 3083 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3084 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 3085 } 3086 3087 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3088 QualType SrcType, QualType DestType, 3089 SourceLocation Loc) { 3090 assert(CGF.hasScalarEvaluationKind(DestType) && 3091 "DestType must have scalar evaluation kind."); 3092 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3093 return Val.isScalar() 3094 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 3095 Loc) 3096 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 3097 DestType, Loc); 3098 } 3099 3100 static CodeGenFunction::ComplexPairTy 3101 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3102 QualType DestType, SourceLocation Loc) { 3103 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3104 "DestType must have complex evaluation kind."); 3105 CodeGenFunction::ComplexPairTy ComplexVal; 3106 if (Val.isScalar()) { 3107 // Convert the input element to the element type of the complex. 3108 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3109 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3110 DestElementType, Loc); 3111 ComplexVal = CodeGenFunction::ComplexPairTy( 3112 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3113 } else { 3114 assert(Val.isComplex() && "Must be a scalar or complex."); 3115 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3116 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 3117 ComplexVal.first = CGF.EmitScalarConversion( 3118 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3119 ComplexVal.second = CGF.EmitScalarConversion( 3120 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3121 } 3122 return ComplexVal; 3123 } 3124 3125 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3126 LValue LVal, RValue RVal) { 3127 if (LVal.isGlobalReg()) { 3128 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3129 } else { 3130 CGF.EmitAtomicStore(RVal, LVal, 3131 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3132 : llvm::AtomicOrdering::Monotonic, 3133 LVal.isVolatile(), /*IsInit=*/false); 3134 } 3135 } 3136 3137 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3138 QualType RValTy, SourceLocation Loc) { 3139 switch (getEvaluationKind(LVal.getType())) { 3140 case TEK_Scalar: 3141 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3142 *this, RVal, RValTy, LVal.getType(), Loc)), 3143 LVal); 3144 break; 3145 case TEK_Complex: 3146 EmitStoreOfComplex( 3147 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3148 /*isInit=*/false); 3149 break; 3150 case TEK_Aggregate: 3151 llvm_unreachable("Must be a scalar or complex."); 3152 } 3153 } 3154 3155 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3156 const Expr *X, const Expr *V, 3157 SourceLocation Loc) { 3158 // v = x; 3159 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3160 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3161 LValue XLValue = CGF.EmitLValue(X); 3162 LValue VLValue = CGF.EmitLValue(V); 3163 RValue Res = XLValue.isGlobalReg() 3164 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3165 : CGF.EmitAtomicLoad( 3166 XLValue, Loc, 3167 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3168 : llvm::AtomicOrdering::Monotonic, 3169 XLValue.isVolatile()); 3170 // OpenMP, 2.12.6, atomic Construct 3171 // Any atomic construct with a seq_cst clause forces the atomically 3172 // performed operation to include an implicit flush operation without a 3173 // list. 3174 if (IsSeqCst) 3175 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3176 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3177 } 3178 3179 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3180 const Expr *X, const Expr *E, 3181 SourceLocation Loc) { 3182 // x = expr; 3183 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3184 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3185 // OpenMP, 2.12.6, atomic Construct 3186 // Any atomic construct with a seq_cst clause forces the atomically 3187 // performed operation to include an implicit flush operation without a 3188 // list. 3189 if (IsSeqCst) 3190 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3191 } 3192 3193 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3194 RValue Update, 3195 BinaryOperatorKind BO, 3196 llvm::AtomicOrdering AO, 3197 bool IsXLHSInRHSPart) { 3198 auto &Context = CGF.CGM.getContext(); 3199 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3200 // expression is simple and atomic is allowed for the given type for the 3201 // target platform. 3202 if (BO == BO_Comma || !Update.isScalar() || 3203 !Update.getScalarVal()->getType()->isIntegerTy() || 3204 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3205 (Update.getScalarVal()->getType() != 3206 X.getAddress().getElementType())) || 3207 !X.getAddress().getElementType()->isIntegerTy() || 3208 !Context.getTargetInfo().hasBuiltinAtomic( 3209 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3210 return std::make_pair(false, RValue::get(nullptr)); 3211 3212 llvm::AtomicRMWInst::BinOp RMWOp; 3213 switch (BO) { 3214 case BO_Add: 3215 RMWOp = llvm::AtomicRMWInst::Add; 3216 break; 3217 case BO_Sub: 3218 if (!IsXLHSInRHSPart) 3219 return std::make_pair(false, RValue::get(nullptr)); 3220 RMWOp = llvm::AtomicRMWInst::Sub; 3221 break; 3222 case BO_And: 3223 RMWOp = llvm::AtomicRMWInst::And; 3224 break; 3225 case BO_Or: 3226 RMWOp = llvm::AtomicRMWInst::Or; 3227 break; 3228 case BO_Xor: 3229 RMWOp = llvm::AtomicRMWInst::Xor; 3230 break; 3231 case BO_LT: 3232 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3233 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3234 : llvm::AtomicRMWInst::Max) 3235 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3236 : llvm::AtomicRMWInst::UMax); 3237 break; 3238 case BO_GT: 3239 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3240 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3241 : llvm::AtomicRMWInst::Min) 3242 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3243 : llvm::AtomicRMWInst::UMin); 3244 break; 3245 case BO_Assign: 3246 RMWOp = llvm::AtomicRMWInst::Xchg; 3247 break; 3248 case BO_Mul: 3249 case BO_Div: 3250 case BO_Rem: 3251 case BO_Shl: 3252 case BO_Shr: 3253 case BO_LAnd: 3254 case BO_LOr: 3255 return std::make_pair(false, RValue::get(nullptr)); 3256 case BO_PtrMemD: 3257 case BO_PtrMemI: 3258 case BO_LE: 3259 case BO_GE: 3260 case BO_EQ: 3261 case BO_NE: 3262 case BO_AddAssign: 3263 case BO_SubAssign: 3264 case BO_AndAssign: 3265 case BO_OrAssign: 3266 case BO_XorAssign: 3267 case BO_MulAssign: 3268 case BO_DivAssign: 3269 case BO_RemAssign: 3270 case BO_ShlAssign: 3271 case BO_ShrAssign: 3272 case BO_Comma: 3273 llvm_unreachable("Unsupported atomic update operation"); 3274 } 3275 auto *UpdateVal = Update.getScalarVal(); 3276 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3277 UpdateVal = CGF.Builder.CreateIntCast( 3278 IC, X.getAddress().getElementType(), 3279 X.getType()->hasSignedIntegerRepresentation()); 3280 } 3281 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3282 return std::make_pair(true, RValue::get(Res)); 3283 } 3284 3285 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3286 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3287 llvm::AtomicOrdering AO, SourceLocation Loc, 3288 const llvm::function_ref<RValue(RValue)> &CommonGen) { 3289 // Update expressions are allowed to have the following forms: 3290 // x binop= expr; -> xrval + expr; 3291 // x++, ++x -> xrval + 1; 3292 // x--, --x -> xrval - 1; 3293 // x = x binop expr; -> xrval binop expr 3294 // x = expr Op x; - > expr binop xrval; 3295 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3296 if (!Res.first) { 3297 if (X.isGlobalReg()) { 3298 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3299 // 'xrval'. 3300 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3301 } else { 3302 // Perform compare-and-swap procedure. 3303 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3304 } 3305 } 3306 return Res; 3307 } 3308 3309 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3310 const Expr *X, const Expr *E, 3311 const Expr *UE, bool IsXLHSInRHSPart, 3312 SourceLocation Loc) { 3313 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3314 "Update expr in 'atomic update' must be a binary operator."); 3315 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3316 // Update expressions are allowed to have the following forms: 3317 // x binop= expr; -> xrval + expr; 3318 // x++, ++x -> xrval + 1; 3319 // x--, --x -> xrval - 1; 3320 // x = x binop expr; -> xrval binop expr 3321 // x = expr Op x; - > expr binop xrval; 3322 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3323 LValue XLValue = CGF.EmitLValue(X); 3324 RValue ExprRValue = CGF.EmitAnyExpr(E); 3325 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3326 : llvm::AtomicOrdering::Monotonic; 3327 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3328 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3329 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3330 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3331 auto Gen = 3332 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 3333 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3334 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3335 return CGF.EmitAnyExpr(UE); 3336 }; 3337 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3338 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3339 // OpenMP, 2.12.6, atomic Construct 3340 // Any atomic construct with a seq_cst clause forces the atomically 3341 // performed operation to include an implicit flush operation without a 3342 // list. 3343 if (IsSeqCst) 3344 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3345 } 3346 3347 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3348 QualType SourceType, QualType ResType, 3349 SourceLocation Loc) { 3350 switch (CGF.getEvaluationKind(ResType)) { 3351 case TEK_Scalar: 3352 return RValue::get( 3353 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3354 case TEK_Complex: { 3355 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3356 return RValue::getComplex(Res.first, Res.second); 3357 } 3358 case TEK_Aggregate: 3359 break; 3360 } 3361 llvm_unreachable("Must be a scalar or complex."); 3362 } 3363 3364 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3365 bool IsPostfixUpdate, const Expr *V, 3366 const Expr *X, const Expr *E, 3367 const Expr *UE, bool IsXLHSInRHSPart, 3368 SourceLocation Loc) { 3369 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3370 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3371 RValue NewVVal; 3372 LValue VLValue = CGF.EmitLValue(V); 3373 LValue XLValue = CGF.EmitLValue(X); 3374 RValue ExprRValue = CGF.EmitAnyExpr(E); 3375 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3376 : llvm::AtomicOrdering::Monotonic; 3377 QualType NewVValType; 3378 if (UE) { 3379 // 'x' is updated with some additional value. 3380 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3381 "Update expr in 'atomic capture' must be a binary operator."); 3382 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3383 // Update expressions are allowed to have the following forms: 3384 // x binop= expr; -> xrval + expr; 3385 // x++, ++x -> xrval + 1; 3386 // x--, --x -> xrval - 1; 3387 // x = x binop expr; -> xrval binop expr 3388 // x = expr Op x; - > expr binop xrval; 3389 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3390 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3391 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3392 NewVValType = XRValExpr->getType(); 3393 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3394 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3395 IsPostfixUpdate](RValue XRValue) -> RValue { 3396 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3397 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3398 RValue Res = CGF.EmitAnyExpr(UE); 3399 NewVVal = IsPostfixUpdate ? XRValue : Res; 3400 return Res; 3401 }; 3402 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3403 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3404 if (Res.first) { 3405 // 'atomicrmw' instruction was generated. 3406 if (IsPostfixUpdate) { 3407 // Use old value from 'atomicrmw'. 3408 NewVVal = Res.second; 3409 } else { 3410 // 'atomicrmw' does not provide new value, so evaluate it using old 3411 // value of 'x'. 3412 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3413 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3414 NewVVal = CGF.EmitAnyExpr(UE); 3415 } 3416 } 3417 } else { 3418 // 'x' is simply rewritten with some 'expr'. 3419 NewVValType = X->getType().getNonReferenceType(); 3420 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3421 X->getType().getNonReferenceType(), Loc); 3422 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { 3423 NewVVal = XRValue; 3424 return ExprRValue; 3425 }; 3426 // Try to perform atomicrmw xchg, otherwise simple exchange. 3427 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3428 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3429 Loc, Gen); 3430 if (Res.first) { 3431 // 'atomicrmw' instruction was generated. 3432 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3433 } 3434 } 3435 // Emit post-update store to 'v' of old/new 'x' value. 3436 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3437 // OpenMP, 2.12.6, atomic Construct 3438 // Any atomic construct with a seq_cst clause forces the atomically 3439 // performed operation to include an implicit flush operation without a 3440 // list. 3441 if (IsSeqCst) 3442 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3443 } 3444 3445 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 3446 bool IsSeqCst, bool IsPostfixUpdate, 3447 const Expr *X, const Expr *V, const Expr *E, 3448 const Expr *UE, bool IsXLHSInRHSPart, 3449 SourceLocation Loc) { 3450 switch (Kind) { 3451 case OMPC_read: 3452 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 3453 break; 3454 case OMPC_write: 3455 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 3456 break; 3457 case OMPC_unknown: 3458 case OMPC_update: 3459 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 3460 break; 3461 case OMPC_capture: 3462 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 3463 IsXLHSInRHSPart, Loc); 3464 break; 3465 case OMPC_if: 3466 case OMPC_final: 3467 case OMPC_num_threads: 3468 case OMPC_private: 3469 case OMPC_firstprivate: 3470 case OMPC_lastprivate: 3471 case OMPC_reduction: 3472 case OMPC_task_reduction: 3473 case OMPC_safelen: 3474 case OMPC_simdlen: 3475 case OMPC_collapse: 3476 case OMPC_default: 3477 case OMPC_seq_cst: 3478 case OMPC_shared: 3479 case OMPC_linear: 3480 case OMPC_aligned: 3481 case OMPC_copyin: 3482 case OMPC_copyprivate: 3483 case OMPC_flush: 3484 case OMPC_proc_bind: 3485 case OMPC_schedule: 3486 case OMPC_ordered: 3487 case OMPC_nowait: 3488 case OMPC_untied: 3489 case OMPC_threadprivate: 3490 case OMPC_depend: 3491 case OMPC_mergeable: 3492 case OMPC_device: 3493 case OMPC_threads: 3494 case OMPC_simd: 3495 case OMPC_map: 3496 case OMPC_num_teams: 3497 case OMPC_thread_limit: 3498 case OMPC_priority: 3499 case OMPC_grainsize: 3500 case OMPC_nogroup: 3501 case OMPC_num_tasks: 3502 case OMPC_hint: 3503 case OMPC_dist_schedule: 3504 case OMPC_defaultmap: 3505 case OMPC_uniform: 3506 case OMPC_to: 3507 case OMPC_from: 3508 case OMPC_use_device_ptr: 3509 case OMPC_is_device_ptr: 3510 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 3511 } 3512 } 3513 3514 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 3515 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 3516 OpenMPClauseKind Kind = OMPC_unknown; 3517 for (auto *C : S.clauses()) { 3518 // Find first clause (skip seq_cst clause, if it is first). 3519 if (C->getClauseKind() != OMPC_seq_cst) { 3520 Kind = C->getClauseKind(); 3521 break; 3522 } 3523 } 3524 3525 const auto *CS = 3526 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 3527 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 3528 enterFullExpression(EWC); 3529 } 3530 // Processing for statements under 'atomic capture'. 3531 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 3532 for (const auto *C : Compound->body()) { 3533 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 3534 enterFullExpression(EWC); 3535 } 3536 } 3537 } 3538 3539 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 3540 PrePostActionTy &) { 3541 CGF.EmitStopPoint(CS); 3542 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 3543 S.getV(), S.getExpr(), S.getUpdateExpr(), 3544 S.isXLHSInRHSPart(), S.getLocStart()); 3545 }; 3546 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); 3547 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 3548 } 3549 3550 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 3551 const OMPExecutableDirective &S, 3552 const RegionCodeGenTy &CodeGen) { 3553 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 3554 CodeGenModule &CGM = CGF.CGM; 3555 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 3556 3557 llvm::Function *Fn = nullptr; 3558 llvm::Constant *FnID = nullptr; 3559 3560 const Expr *IfCond = nullptr; 3561 // Check for the at most one if clause associated with the target region. 3562 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3563 if (C->getNameModifier() == OMPD_unknown || 3564 C->getNameModifier() == OMPD_target) { 3565 IfCond = C->getCondition(); 3566 break; 3567 } 3568 } 3569 3570 // Check if we have any device clause associated with the directive. 3571 const Expr *Device = nullptr; 3572 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 3573 Device = C->getDevice(); 3574 } 3575 3576 // Check if we have an if clause whose conditional always evaluates to false 3577 // or if we do not have any targets specified. If so the target region is not 3578 // an offload entry point. 3579 bool IsOffloadEntry = true; 3580 if (IfCond) { 3581 bool Val; 3582 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 3583 IsOffloadEntry = false; 3584 } 3585 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3586 IsOffloadEntry = false; 3587 3588 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 3589 StringRef ParentName; 3590 // In case we have Ctors/Dtors we use the complete type variant to produce 3591 // the mangling of the device outlined kernel. 3592 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 3593 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 3594 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 3595 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 3596 else 3597 ParentName = 3598 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 3599 3600 // Emit target region as a standalone region. 3601 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 3602 IsOffloadEntry, CodeGen); 3603 OMPLexicalScope Scope(CGF, S); 3604 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3605 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 3606 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 3607 CapturedVars); 3608 } 3609 3610 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 3611 PrePostActionTy &Action) { 3612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3613 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3614 CGF.EmitOMPPrivateClause(S, PrivateScope); 3615 (void)PrivateScope.Privatize(); 3616 3617 Action.Enter(CGF); 3618 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3619 } 3620 3621 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 3622 StringRef ParentName, 3623 const OMPTargetDirective &S) { 3624 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3625 emitTargetRegion(CGF, S, Action); 3626 }; 3627 llvm::Function *Fn; 3628 llvm::Constant *Addr; 3629 // Emit target region as a standalone region. 3630 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3631 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3632 assert(Fn && Addr && "Target device function emission failed."); 3633 } 3634 3635 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 3636 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3637 emitTargetRegion(CGF, S, Action); 3638 }; 3639 emitCommonOMPTargetDirective(*this, S, CodeGen); 3640 } 3641 3642 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 3643 const OMPExecutableDirective &S, 3644 OpenMPDirectiveKind InnermostKind, 3645 const RegionCodeGenTy &CodeGen) { 3646 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 3647 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 3648 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 3649 3650 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); 3651 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); 3652 if (NT || TL) { 3653 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; 3654 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; 3655 3656 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 3657 S.getLocStart()); 3658 } 3659 3660 OMPTeamsScope Scope(CGF, S); 3661 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3662 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3663 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, 3664 CapturedVars); 3665 } 3666 3667 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 3668 // Emit teams region as a standalone region. 3669 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3670 OMPPrivateScope PrivateScope(CGF); 3671 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3672 CGF.EmitOMPPrivateClause(S, PrivateScope); 3673 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3674 (void)PrivateScope.Privatize(); 3675 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3676 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 3677 }; 3678 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); 3679 emitPostUpdateForReductionClause( 3680 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 3681 } 3682 3683 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 3684 const OMPTargetTeamsDirective &S) { 3685 auto *CS = S.getCapturedStmt(OMPD_teams); 3686 Action.Enter(CGF); 3687 auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3688 // TODO: Add support for clauses. 3689 CGF.EmitStmt(CS->getCapturedStmt()); 3690 }; 3691 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 3692 } 3693 3694 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 3695 CodeGenModule &CGM, StringRef ParentName, 3696 const OMPTargetTeamsDirective &S) { 3697 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3698 emitTargetTeamsRegion(CGF, Action, S); 3699 }; 3700 llvm::Function *Fn; 3701 llvm::Constant *Addr; 3702 // Emit target region as a standalone region. 3703 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3704 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3705 assert(Fn && Addr && "Target device function emission failed."); 3706 } 3707 3708 void CodeGenFunction::EmitOMPTargetTeamsDirective( 3709 const OMPTargetTeamsDirective &S) { 3710 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3711 emitTargetTeamsRegion(CGF, Action, S); 3712 }; 3713 emitCommonOMPTargetDirective(*this, S, CodeGen); 3714 } 3715 3716 void CodeGenFunction::EmitOMPCancellationPointDirective( 3717 const OMPCancellationPointDirective &S) { 3718 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 3719 S.getCancelRegion()); 3720 } 3721 3722 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 3723 const Expr *IfCond = nullptr; 3724 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3725 if (C->getNameModifier() == OMPD_unknown || 3726 C->getNameModifier() == OMPD_cancel) { 3727 IfCond = C->getCondition(); 3728 break; 3729 } 3730 } 3731 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 3732 S.getCancelRegion()); 3733 } 3734 3735 CodeGenFunction::JumpDest 3736 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 3737 if (Kind == OMPD_parallel || Kind == OMPD_task || 3738 Kind == OMPD_target_parallel) 3739 return ReturnBlock; 3740 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 3741 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 3742 Kind == OMPD_distribute_parallel_for || 3743 Kind == OMPD_target_parallel_for); 3744 return OMPCancelStack.getExitBlock(); 3745 } 3746 3747 void CodeGenFunction::EmitOMPUseDevicePtrClause( 3748 const OMPClause &NC, OMPPrivateScope &PrivateScope, 3749 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 3750 const auto &C = cast<OMPUseDevicePtrClause>(NC); 3751 auto OrigVarIt = C.varlist_begin(); 3752 auto InitIt = C.inits().begin(); 3753 for (auto PvtVarIt : C.private_copies()) { 3754 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 3755 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 3756 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 3757 3758 // In order to identify the right initializer we need to match the 3759 // declaration used by the mapping logic. In some cases we may get 3760 // OMPCapturedExprDecl that refers to the original declaration. 3761 const ValueDecl *MatchingVD = OrigVD; 3762 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 3763 // OMPCapturedExprDecl are used to privative fields of the current 3764 // structure. 3765 auto *ME = cast<MemberExpr>(OED->getInit()); 3766 assert(isa<CXXThisExpr>(ME->getBase()) && 3767 "Base should be the current struct!"); 3768 MatchingVD = ME->getMemberDecl(); 3769 } 3770 3771 // If we don't have information about the current list item, move on to 3772 // the next one. 3773 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 3774 if (InitAddrIt == CaptureDeviceAddrMap.end()) 3775 continue; 3776 3777 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 3778 // Initialize the temporary initialization variable with the address we 3779 // get from the runtime library. We have to cast the source address 3780 // because it is always a void *. References are materialized in the 3781 // privatization scope, so the initialization here disregards the fact 3782 // the original variable is a reference. 3783 QualType AddrQTy = 3784 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 3785 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 3786 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 3787 setAddrOfLocalVar(InitVD, InitAddr); 3788 3789 // Emit private declaration, it will be initialized by the value we 3790 // declaration we just added to the local declarations map. 3791 EmitDecl(*PvtVD); 3792 3793 // The initialization variables reached its purpose in the emission 3794 // ofthe previous declaration, so we don't need it anymore. 3795 LocalDeclMap.erase(InitVD); 3796 3797 // Return the address of the private variable. 3798 return GetAddrOfLocalVar(PvtVD); 3799 }); 3800 assert(IsRegistered && "firstprivate var already registered as private"); 3801 // Silence the warning about unused variable. 3802 (void)IsRegistered; 3803 3804 ++OrigVarIt; 3805 ++InitIt; 3806 } 3807 } 3808 3809 // Generate the instructions for '#pragma omp target data' directive. 3810 void CodeGenFunction::EmitOMPTargetDataDirective( 3811 const OMPTargetDataDirective &S) { 3812 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 3813 3814 // Create a pre/post action to signal the privatization of the device pointer. 3815 // This action can be replaced by the OpenMP runtime code generation to 3816 // deactivate privatization. 3817 bool PrivatizeDevicePointers = false; 3818 class DevicePointerPrivActionTy : public PrePostActionTy { 3819 bool &PrivatizeDevicePointers; 3820 3821 public: 3822 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 3823 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 3824 void Enter(CodeGenFunction &CGF) override { 3825 PrivatizeDevicePointers = true; 3826 } 3827 }; 3828 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 3829 3830 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 3831 CodeGenFunction &CGF, PrePostActionTy &Action) { 3832 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3833 CGF.EmitStmt( 3834 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 3835 }; 3836 3837 // Codegen that selects wheather to generate the privatization code or not. 3838 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 3839 &InnermostCodeGen](CodeGenFunction &CGF, 3840 PrePostActionTy &Action) { 3841 RegionCodeGenTy RCG(InnermostCodeGen); 3842 PrivatizeDevicePointers = false; 3843 3844 // Call the pre-action to change the status of PrivatizeDevicePointers if 3845 // needed. 3846 Action.Enter(CGF); 3847 3848 if (PrivatizeDevicePointers) { 3849 OMPPrivateScope PrivateScope(CGF); 3850 // Emit all instances of the use_device_ptr clause. 3851 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 3852 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 3853 Info.CaptureDeviceAddrMap); 3854 (void)PrivateScope.Privatize(); 3855 RCG(CGF); 3856 } else 3857 RCG(CGF); 3858 }; 3859 3860 // Forward the provided action to the privatization codegen. 3861 RegionCodeGenTy PrivRCG(PrivCodeGen); 3862 PrivRCG.setAction(Action); 3863 3864 // Notwithstanding the body of the region is emitted as inlined directive, 3865 // we don't use an inline scope as changes in the references inside the 3866 // region are expected to be visible outside, so we do not privative them. 3867 OMPLexicalScope Scope(CGF, S); 3868 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 3869 PrivRCG); 3870 }; 3871 3872 RegionCodeGenTy RCG(CodeGen); 3873 3874 // If we don't have target devices, don't bother emitting the data mapping 3875 // code. 3876 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 3877 RCG(*this); 3878 return; 3879 } 3880 3881 // Check if we have any if clause associated with the directive. 3882 const Expr *IfCond = nullptr; 3883 if (auto *C = S.getSingleClause<OMPIfClause>()) 3884 IfCond = C->getCondition(); 3885 3886 // Check if we have any device clause associated with the directive. 3887 const Expr *Device = nullptr; 3888 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3889 Device = C->getDevice(); 3890 3891 // Set the action to signal privatization of device pointers. 3892 RCG.setAction(PrivAction); 3893 3894 // Emit region code. 3895 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 3896 Info); 3897 } 3898 3899 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 3900 const OMPTargetEnterDataDirective &S) { 3901 // If we don't have target devices, don't bother emitting the data mapping 3902 // code. 3903 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3904 return; 3905 3906 // Check if we have any if clause associated with the directive. 3907 const Expr *IfCond = nullptr; 3908 if (auto *C = S.getSingleClause<OMPIfClause>()) 3909 IfCond = C->getCondition(); 3910 3911 // Check if we have any device clause associated with the directive. 3912 const Expr *Device = nullptr; 3913 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3914 Device = C->getDevice(); 3915 3916 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3917 } 3918 3919 void CodeGenFunction::EmitOMPTargetExitDataDirective( 3920 const OMPTargetExitDataDirective &S) { 3921 // If we don't have target devices, don't bother emitting the data mapping 3922 // code. 3923 if (CGM.getLangOpts().OMPTargetTriples.empty()) 3924 return; 3925 3926 // Check if we have any if clause associated with the directive. 3927 const Expr *IfCond = nullptr; 3928 if (auto *C = S.getSingleClause<OMPIfClause>()) 3929 IfCond = C->getCondition(); 3930 3931 // Check if we have any device clause associated with the directive. 3932 const Expr *Device = nullptr; 3933 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 3934 Device = C->getDevice(); 3935 3936 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 3937 } 3938 3939 static void emitTargetParallelRegion(CodeGenFunction &CGF, 3940 const OMPTargetParallelDirective &S, 3941 PrePostActionTy &Action) { 3942 // Get the captured statement associated with the 'parallel' region. 3943 auto *CS = S.getCapturedStmt(OMPD_parallel); 3944 Action.Enter(CGF); 3945 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { 3946 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3947 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 3948 CGF.EmitOMPPrivateClause(S, PrivateScope); 3949 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 3950 (void)PrivateScope.Privatize(); 3951 // TODO: Add support for clauses. 3952 CGF.EmitStmt(CS->getCapturedStmt()); 3953 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 3954 }; 3955 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 3956 emitEmptyBoundParameters); 3957 emitPostUpdateForReductionClause( 3958 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); 3959 } 3960 3961 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 3962 CodeGenModule &CGM, StringRef ParentName, 3963 const OMPTargetParallelDirective &S) { 3964 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3965 emitTargetParallelRegion(CGF, S, Action); 3966 }; 3967 llvm::Function *Fn; 3968 llvm::Constant *Addr; 3969 // Emit target region as a standalone region. 3970 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3971 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3972 assert(Fn && Addr && "Target device function emission failed."); 3973 } 3974 3975 void CodeGenFunction::EmitOMPTargetParallelDirective( 3976 const OMPTargetParallelDirective &S) { 3977 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3978 emitTargetParallelRegion(CGF, S, Action); 3979 }; 3980 emitCommonOMPTargetDirective(*this, S, CodeGen); 3981 } 3982 3983 void CodeGenFunction::EmitOMPTargetParallelForDirective( 3984 const OMPTargetParallelForDirective &S) { 3985 // TODO: codegen for target parallel for. 3986 } 3987 3988 /// Emit a helper variable and return corresponding lvalue. 3989 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 3990 const ImplicitParamDecl *PVD, 3991 CodeGenFunction::OMPPrivateScope &Privates) { 3992 auto *VDecl = cast<VarDecl>(Helper->getDecl()); 3993 Privates.addPrivate( 3994 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); 3995 } 3996 3997 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 3998 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 3999 // Emit outlined function for task construct. 4000 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 4001 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 4002 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4003 const Expr *IfCond = nullptr; 4004 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4005 if (C->getNameModifier() == OMPD_unknown || 4006 C->getNameModifier() == OMPD_taskloop) { 4007 IfCond = C->getCondition(); 4008 break; 4009 } 4010 } 4011 4012 OMPTaskDataTy Data; 4013 // Check if taskloop must be emitted without taskgroup. 4014 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 4015 // TODO: Check if we should emit tied or untied task. 4016 Data.Tied = true; 4017 // Set scheduling for taskloop 4018 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 4019 // grainsize clause 4020 Data.Schedule.setInt(/*IntVal=*/false); 4021 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 4022 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 4023 // num_tasks clause 4024 Data.Schedule.setInt(/*IntVal=*/true); 4025 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 4026 } 4027 4028 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 4029 // if (PreCond) { 4030 // for (IV in 0..LastIteration) BODY; 4031 // <Final counter/linear vars updates>; 4032 // } 4033 // 4034 4035 // Emit: if (PreCond) - begin. 4036 // If the condition constant folds and can be elided, avoid emitting the 4037 // whole loop. 4038 bool CondConstant; 4039 llvm::BasicBlock *ContBlock = nullptr; 4040 OMPLoopScope PreInitScope(CGF, S); 4041 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 4042 if (!CondConstant) 4043 return; 4044 } else { 4045 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 4046 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 4047 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 4048 CGF.getProfileCount(&S)); 4049 CGF.EmitBlock(ThenBlock); 4050 CGF.incrementProfileCounter(&S); 4051 } 4052 4053 if (isOpenMPSimdDirective(S.getDirectiveKind())) 4054 CGF.EmitOMPSimdInit(S); 4055 4056 OMPPrivateScope LoopScope(CGF); 4057 // Emit helper vars inits. 4058 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 4059 auto *I = CS->getCapturedDecl()->param_begin(); 4060 auto *LBP = std::next(I, LowerBound); 4061 auto *UBP = std::next(I, UpperBound); 4062 auto *STP = std::next(I, Stride); 4063 auto *LIP = std::next(I, LastIter); 4064 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 4065 LoopScope); 4066 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 4067 LoopScope); 4068 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 4069 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 4070 LoopScope); 4071 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 4072 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4073 (void)LoopScope.Privatize(); 4074 // Emit the loop iteration variable. 4075 const Expr *IVExpr = S.getIterationVariable(); 4076 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 4077 CGF.EmitVarDecl(*IVDecl); 4078 CGF.EmitIgnoredExpr(S.getInit()); 4079 4080 // Emit the iterations count variable. 4081 // If it is not a variable, Sema decided to calculate iterations count on 4082 // each iteration (e.g., it is foldable into a constant). 4083 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 4084 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 4085 // Emit calculation of the iterations count. 4086 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 4087 } 4088 4089 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 4090 S.getInc(), 4091 [&S](CodeGenFunction &CGF) { 4092 CGF.EmitOMPLoopBody(S, JumpDest()); 4093 CGF.EmitStopPoint(&S); 4094 }, 4095 [](CodeGenFunction &) {}); 4096 // Emit: if (PreCond) - end. 4097 if (ContBlock) { 4098 CGF.EmitBranch(ContBlock); 4099 CGF.EmitBlock(ContBlock, true); 4100 } 4101 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4102 if (HasLastprivateClause) { 4103 CGF.EmitOMPLastprivateClauseFinal( 4104 S, isOpenMPSimdDirective(S.getDirectiveKind()), 4105 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 4106 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 4107 (*LIP)->getType(), S.getLocStart()))); 4108 } 4109 }; 4110 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 4111 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, 4112 const OMPTaskDataTy &Data) { 4113 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { 4114 OMPLoopScope PreInitScope(CGF, S); 4115 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, 4116 OutlinedFn, SharedsTy, 4117 CapturedStruct, IfCond, Data); 4118 }; 4119 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 4120 CodeGen); 4121 }; 4122 if (Data.Nogroup) 4123 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4124 else { 4125 CGM.getOpenMPRuntime().emitTaskgroupRegion( 4126 *this, 4127 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 4128 PrePostActionTy &Action) { 4129 Action.Enter(CGF); 4130 CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); 4131 }, 4132 S.getLocStart()); 4133 } 4134 } 4135 4136 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 4137 EmitOMPTaskLoopBasedDirective(S); 4138 } 4139 4140 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 4141 const OMPTaskLoopSimdDirective &S) { 4142 EmitOMPTaskLoopBasedDirective(S); 4143 } 4144 4145 // Generate the instructions for '#pragma omp target update' directive. 4146 void CodeGenFunction::EmitOMPTargetUpdateDirective( 4147 const OMPTargetUpdateDirective &S) { 4148 // If we don't have target devices, don't bother emitting the data mapping 4149 // code. 4150 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4151 return; 4152 4153 // Check if we have any if clause associated with the directive. 4154 const Expr *IfCond = nullptr; 4155 if (auto *C = S.getSingleClause<OMPIfClause>()) 4156 IfCond = C->getCondition(); 4157 4158 // Check if we have any device clause associated with the directive. 4159 const Expr *Device = nullptr; 4160 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4161 Device = C->getDevice(); 4162 4163 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4164 } 4165