1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtOpenMP.h" 20 using namespace clang; 21 using namespace CodeGen; 22 23 void CodeGenFunction::GenerateOpenMPCapturedVars( 24 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars, 25 bool UseOnlyReferences) { 26 const RecordDecl *RD = S.getCapturedRecordDecl(); 27 auto CurField = RD->field_begin(); 28 auto CurCap = S.captures().begin(); 29 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 30 E = S.capture_init_end(); 31 I != E; ++I, ++CurField, ++CurCap) { 32 if (CurField->hasCapturedVLAType()) { 33 auto VAT = CurField->getCapturedVLAType(); 34 auto *Val = VLASizeMap[VAT->getSizeExpr()]; 35 // If we need to use only references, create a temporary location for the 36 // size of the VAT. 37 if (UseOnlyReferences) { 38 LValue LV = 39 MakeAddrLValue(CreateMemTemp(CurField->getType(), "__vla_size_ref"), 40 CurField->getType()); 41 EmitStoreThroughLValue(RValue::get(Val), LV); 42 Val = LV.getAddress().getPointer(); 43 } 44 CapturedVars.push_back(Val); 45 } else if (CurCap->capturesThis()) 46 CapturedVars.push_back(CXXThisValue); 47 else 48 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 49 } 50 } 51 52 llvm::Function * 53 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, 54 bool UseOnlyReferences) { 55 assert( 56 CapturedStmtInfo && 57 "CapturedStmtInfo should be set when generating the captured function"); 58 const CapturedDecl *CD = S.getCapturedDecl(); 59 const RecordDecl *RD = S.getCapturedRecordDecl(); 60 assert(CD->hasBody() && "missing CapturedDecl body"); 61 62 // Build the argument list. 63 ASTContext &Ctx = CGM.getContext(); 64 FunctionArgList Args; 65 Args.append(CD->param_begin(), 66 std::next(CD->param_begin(), CD->getContextParamPosition())); 67 auto I = S.captures().begin(); 68 for (auto *FD : RD->fields()) { 69 QualType ArgType = FD->getType(); 70 IdentifierInfo *II = nullptr; 71 VarDecl *CapVar = nullptr; 72 if (I->capturesVariable()) { 73 CapVar = I->getCapturedVar(); 74 II = CapVar->getIdentifier(); 75 } else if (I->capturesThis()) 76 II = &getContext().Idents.get("this"); 77 else { 78 assert(I->capturesVariableArrayType()); 79 II = &getContext().Idents.get("vla"); 80 if (UseOnlyReferences) 81 ArgType = getContext().getLValueReferenceType( 82 ArgType, /*SpelledAsLValue=*/false); 83 } 84 if (ArgType->isVariablyModifiedType()) 85 ArgType = getContext().getVariableArrayDecayedType(ArgType); 86 Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, 87 FD->getLocation(), II, ArgType)); 88 ++I; 89 } 90 Args.append( 91 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 92 CD->param_end()); 93 94 // Create the function declaration. 95 FunctionType::ExtInfo ExtInfo; 96 const CGFunctionInfo &FuncInfo = 97 CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo, 98 /*IsVariadic=*/false); 99 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 100 101 llvm::Function *F = llvm::Function::Create( 102 FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 103 CapturedStmtInfo->getHelperName(), &CGM.getModule()); 104 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 105 if (CD->isNothrow()) 106 F->addFnAttr(llvm::Attribute::NoUnwind); 107 108 // Generate the function. 109 StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), 110 CD->getBody()->getLocStart()); 111 unsigned Cnt = CD->getContextParamPosition(); 112 I = S.captures().begin(); 113 for (auto *FD : RD->fields()) { 114 LValue ArgLVal = 115 MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), 116 AlignmentSource::Decl); 117 if (FD->hasCapturedVLAType()) { 118 if (UseOnlyReferences) 119 ArgLVal = EmitLoadOfReferenceLValue( 120 ArgLVal.getAddress(), ArgLVal.getType()->castAs<ReferenceType>()); 121 auto *ExprArg = 122 EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); 123 auto VAT = FD->getCapturedVLAType(); 124 VLASizeMap[VAT->getSizeExpr()] = ExprArg; 125 } else if (I->capturesVariable()) { 126 auto *Var = I->getCapturedVar(); 127 QualType VarTy = Var->getType(); 128 Address ArgAddr = ArgLVal.getAddress(); 129 if (!VarTy->isReferenceType()) { 130 ArgAddr = EmitLoadOfReference( 131 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); 132 } 133 setAddrOfLocalVar( 134 Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); 135 } else { 136 // If 'this' is captured, load it into CXXThisValue. 137 assert(I->capturesThis()); 138 CXXThisValue = 139 EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); 140 } 141 ++Cnt, ++I; 142 } 143 144 PGO.assignRegionCounters(CD, F); 145 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 146 FinishFunction(CD->getBodyRBrace()); 147 148 return F; 149 } 150 151 //===----------------------------------------------------------------------===// 152 // OpenMP Directive Emission 153 //===----------------------------------------------------------------------===// 154 void CodeGenFunction::EmitOMPAggregateAssign( 155 Address DestAddr, Address SrcAddr, QualType OriginalType, 156 const llvm::function_ref<void(Address, Address)> &CopyGen) { 157 // Perform element-by-element initialization. 158 QualType ElementTy; 159 160 // Drill down to the base element type on both arrays. 161 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 162 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 163 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 164 165 auto SrcBegin = SrcAddr.getPointer(); 166 auto DestBegin = DestAddr.getPointer(); 167 // Cast from pointer to array type to pointer to single element. 168 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 169 // The basic structure here is a while-do loop. 170 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 171 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 172 auto IsEmpty = 173 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 174 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 175 176 // Enter the loop body, making that address the current address. 177 auto EntryBB = Builder.GetInsertBlock(); 178 EmitBlock(BodyBB); 179 180 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 181 182 llvm::PHINode *SrcElementPHI = 183 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 184 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 185 Address SrcElementCurrent = 186 Address(SrcElementPHI, 187 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 188 189 llvm::PHINode *DestElementPHI = 190 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 191 DestElementPHI->addIncoming(DestBegin, EntryBB); 192 Address DestElementCurrent = 193 Address(DestElementPHI, 194 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 195 196 // Emit copy. 197 CopyGen(DestElementCurrent, SrcElementCurrent); 198 199 // Shift the address forward by one element. 200 auto DestElementNext = Builder.CreateConstGEP1_32( 201 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 202 auto SrcElementNext = Builder.CreateConstGEP1_32( 203 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 204 // Check whether we've reached the end. 205 auto Done = 206 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 207 Builder.CreateCondBr(Done, DoneBB, BodyBB); 208 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 209 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 210 211 // Done. 212 EmitBlock(DoneBB, /*IsFinished=*/true); 213 } 214 215 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 216 Address SrcAddr, const VarDecl *DestVD, 217 const VarDecl *SrcVD, const Expr *Copy) { 218 if (OriginalType->isArrayType()) { 219 auto *BO = dyn_cast<BinaryOperator>(Copy); 220 if (BO && BO->getOpcode() == BO_Assign) { 221 // Perform simple memcpy for simple copying. 222 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 223 } else { 224 // For arrays with complex element types perform element by element 225 // copying. 226 EmitOMPAggregateAssign( 227 DestAddr, SrcAddr, OriginalType, 228 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 229 // Working with the single array element, so have to remap 230 // destination and source variables to corresponding array 231 // elements. 232 CodeGenFunction::OMPPrivateScope Remap(*this); 233 Remap.addPrivate(DestVD, [DestElement]() -> Address { 234 return DestElement; 235 }); 236 Remap.addPrivate( 237 SrcVD, [SrcElement]() -> Address { return SrcElement; }); 238 (void)Remap.Privatize(); 239 EmitIgnoredExpr(Copy); 240 }); 241 } 242 } else { 243 // Remap pseudo source variable to private copy. 244 CodeGenFunction::OMPPrivateScope Remap(*this); 245 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); 246 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); 247 (void)Remap.Privatize(); 248 // Emit copying of the whole variable. 249 EmitIgnoredExpr(Copy); 250 } 251 } 252 253 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 254 OMPPrivateScope &PrivateScope) { 255 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 256 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 257 auto IRef = C->varlist_begin(); 258 auto InitsRef = C->inits().begin(); 259 for (auto IInit : C->private_copies()) { 260 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 261 if (EmittedAsFirstprivate.count(OrigVD) == 0) { 262 EmittedAsFirstprivate.insert(OrigVD); 263 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 264 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 265 bool IsRegistered; 266 DeclRefExpr DRE( 267 const_cast<VarDecl *>(OrigVD), 268 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 269 OrigVD) != nullptr, 270 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 271 Address OriginalAddr = EmitLValue(&DRE).getAddress(); 272 QualType Type = OrigVD->getType(); 273 if (Type->isArrayType()) { 274 // Emit VarDecl with copy init for arrays. 275 // Get the address of the original variable captured in current 276 // captured region. 277 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 278 auto Emission = EmitAutoVarAlloca(*VD); 279 auto *Init = VD->getInit(); 280 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 281 // Perform simple memcpy. 282 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 283 Type); 284 } else { 285 EmitOMPAggregateAssign( 286 Emission.getAllocatedAddress(), OriginalAddr, Type, 287 [this, VDInit, Init](Address DestElement, 288 Address SrcElement) { 289 // Clean up any temporaries needed by the initialization. 290 RunCleanupsScope InitScope(*this); 291 // Emit initialization for single element. 292 setAddrOfLocalVar(VDInit, SrcElement); 293 EmitAnyExprToMem(Init, DestElement, 294 Init->getType().getQualifiers(), 295 /*IsInitializer*/ false); 296 LocalDeclMap.erase(VDInit); 297 }); 298 } 299 EmitAutoVarCleanups(Emission); 300 return Emission.getAllocatedAddress(); 301 }); 302 } else { 303 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { 304 // Emit private VarDecl with copy init. 305 // Remap temp VDInit variable to the address of the original 306 // variable 307 // (for proper handling of captured global variables). 308 setAddrOfLocalVar(VDInit, OriginalAddr); 309 EmitDecl(*VD); 310 LocalDeclMap.erase(VDInit); 311 return GetAddrOfLocalVar(VD); 312 }); 313 } 314 assert(IsRegistered && 315 "firstprivate var already registered as private"); 316 // Silence the warning about unused variable. 317 (void)IsRegistered; 318 } 319 ++IRef, ++InitsRef; 320 } 321 } 322 return !EmittedAsFirstprivate.empty(); 323 } 324 325 void CodeGenFunction::EmitOMPPrivateClause( 326 const OMPExecutableDirective &D, 327 CodeGenFunction::OMPPrivateScope &PrivateScope) { 328 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 329 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 330 auto IRef = C->varlist_begin(); 331 for (auto IInit : C->private_copies()) { 332 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 333 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 334 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 335 bool IsRegistered = 336 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 337 // Emit private VarDecl with copy init. 338 EmitDecl(*VD); 339 return GetAddrOfLocalVar(VD); 340 }); 341 assert(IsRegistered && "private var already registered as private"); 342 // Silence the warning about unused variable. 343 (void)IsRegistered; 344 } 345 ++IRef; 346 } 347 } 348 } 349 350 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 351 // threadprivate_var1 = master_threadprivate_var1; 352 // operator=(threadprivate_var2, master_threadprivate_var2); 353 // ... 354 // __kmpc_barrier(&loc, global_tid); 355 llvm::DenseSet<const VarDecl *> CopiedVars; 356 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 357 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 358 auto IRef = C->varlist_begin(); 359 auto ISrcRef = C->source_exprs().begin(); 360 auto IDestRef = C->destination_exprs().begin(); 361 for (auto *AssignOp : C->assignment_ops()) { 362 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 363 QualType Type = VD->getType(); 364 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 365 366 // Get the address of the master variable. If we are emitting code with 367 // TLS support, the address is passed from the master as field in the 368 // captured declaration. 369 Address MasterAddr = Address::invalid(); 370 if (getLangOpts().OpenMPUseTLS && 371 getContext().getTargetInfo().isTLSSupported()) { 372 assert(CapturedStmtInfo->lookup(VD) && 373 "Copyin threadprivates should have been captured!"); 374 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), 375 VK_LValue, (*IRef)->getExprLoc()); 376 MasterAddr = EmitLValue(&DRE).getAddress(); 377 LocalDeclMap.erase(VD); 378 } else { 379 MasterAddr = 380 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 381 : CGM.GetAddrOfGlobal(VD), 382 getContext().getDeclAlign(VD)); 383 } 384 // Get the address of the threadprivate variable. 385 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 386 if (CopiedVars.size() == 1) { 387 // At first check if current thread is a master thread. If it is, no 388 // need to copy data. 389 CopyBegin = createBasicBlock("copyin.not.master"); 390 CopyEnd = createBasicBlock("copyin.not.master.end"); 391 Builder.CreateCondBr( 392 Builder.CreateICmpNE( 393 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 394 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), 395 CopyBegin, CopyEnd); 396 EmitBlock(CopyBegin); 397 } 398 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 399 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 400 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 401 } 402 ++IRef; 403 ++ISrcRef; 404 ++IDestRef; 405 } 406 } 407 if (CopyEnd) { 408 // Exit out of copying procedure for non-master thread. 409 EmitBlock(CopyEnd, /*IsFinished=*/true); 410 return true; 411 } 412 return false; 413 } 414 415 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 416 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 417 bool HasAtLeastOneLastprivate = false; 418 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 419 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 420 HasAtLeastOneLastprivate = true; 421 auto IRef = C->varlist_begin(); 422 auto IDestRef = C->destination_exprs().begin(); 423 for (auto *IInit : C->private_copies()) { 424 // Keep the address of the original variable for future update at the end 425 // of the loop. 426 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 427 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 428 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 429 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { 430 DeclRefExpr DRE( 431 const_cast<VarDecl *>(OrigVD), 432 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 433 OrigVD) != nullptr, 434 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 435 return EmitLValue(&DRE).getAddress(); 436 }); 437 // Check if the variable is also a firstprivate: in this case IInit is 438 // not generated. Initialization of this variable will happen in codegen 439 // for 'firstprivate' clause. 440 if (IInit) { 441 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 442 bool IsRegistered = 443 PrivateScope.addPrivate(OrigVD, [&]() -> Address { 444 // Emit private VarDecl with copy init. 445 EmitDecl(*VD); 446 return GetAddrOfLocalVar(VD); 447 }); 448 assert(IsRegistered && 449 "lastprivate var already registered as private"); 450 (void)IsRegistered; 451 } 452 } 453 ++IRef, ++IDestRef; 454 } 455 } 456 return HasAtLeastOneLastprivate; 457 } 458 459 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 460 const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) { 461 // Emit following code: 462 // if (<IsLastIterCond>) { 463 // orig_var1 = private_orig_var1; 464 // ... 465 // orig_varn = private_orig_varn; 466 // } 467 llvm::BasicBlock *ThenBB = nullptr; 468 llvm::BasicBlock *DoneBB = nullptr; 469 if (IsLastIterCond) { 470 ThenBB = createBasicBlock(".omp.lastprivate.then"); 471 DoneBB = createBasicBlock(".omp.lastprivate.done"); 472 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 473 EmitBlock(ThenBB); 474 } 475 llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates; 476 const Expr *LastIterVal = nullptr; 477 const Expr *IVExpr = nullptr; 478 const Expr *IncExpr = nullptr; 479 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 480 if (isOpenMPWorksharingDirective(D.getDirectiveKind())) { 481 LastIterVal = cast<VarDecl>(cast<DeclRefExpr>( 482 LoopDirective->getUpperBoundVariable()) 483 ->getDecl()) 484 ->getAnyInitializer(); 485 IVExpr = LoopDirective->getIterationVariable(); 486 IncExpr = LoopDirective->getInc(); 487 auto IUpdate = LoopDirective->updates().begin(); 488 for (auto *E : LoopDirective->counters()) { 489 auto *D = cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl(); 490 LoopCountersAndUpdates[D] = *IUpdate; 491 ++IUpdate; 492 } 493 } 494 } 495 { 496 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 497 bool FirstLCV = true; 498 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 499 auto IRef = C->varlist_begin(); 500 auto ISrcRef = C->source_exprs().begin(); 501 auto IDestRef = C->destination_exprs().begin(); 502 for (auto *AssignOp : C->assignment_ops()) { 503 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 504 QualType Type = PrivateVD->getType(); 505 auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 506 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 507 // If lastprivate variable is a loop control variable for loop-based 508 // directive, update its value before copyin back to original 509 // variable. 510 if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) { 511 if (FirstLCV && LastIterVal) { 512 EmitAnyExprToMem(LastIterVal, EmitLValue(IVExpr).getAddress(), 513 IVExpr->getType().getQualifiers(), 514 /*IsInitializer=*/false); 515 EmitIgnoredExpr(IncExpr); 516 FirstLCV = false; 517 } 518 EmitIgnoredExpr(UpExpr); 519 } 520 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 521 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 522 // Get the address of the original variable. 523 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 524 // Get the address of the private variable. 525 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 526 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 527 PrivateAddr = 528 Address(Builder.CreateLoad(PrivateAddr), 529 getNaturalTypeAlignment(RefTy->getPointeeType())); 530 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 531 } 532 ++IRef; 533 ++ISrcRef; 534 ++IDestRef; 535 } 536 } 537 } 538 if (IsLastIterCond) { 539 EmitBlock(DoneBB, /*IsFinished=*/true); 540 } 541 } 542 543 void CodeGenFunction::EmitOMPReductionClauseInit( 544 const OMPExecutableDirective &D, 545 CodeGenFunction::OMPPrivateScope &PrivateScope) { 546 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 547 auto ILHS = C->lhs_exprs().begin(); 548 auto IRHS = C->rhs_exprs().begin(); 549 for (auto IRef : C->varlists()) { 550 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 551 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 552 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 553 // Store the address of the original variable associated with the LHS 554 // implicit variable. 555 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address { 556 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 557 CapturedStmtInfo->lookup(OrigVD) != nullptr, 558 IRef->getType(), VK_LValue, IRef->getExprLoc()); 559 return EmitLValue(&DRE).getAddress(); 560 }); 561 // Emit reduction copy. 562 bool IsRegistered = 563 PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address { 564 // Emit private VarDecl with reduction init. 565 EmitDecl(*PrivateVD); 566 return GetAddrOfLocalVar(PrivateVD); 567 }); 568 assert(IsRegistered && "private var already registered as private"); 569 // Silence the warning about unused variable. 570 (void)IsRegistered; 571 ++ILHS, ++IRHS; 572 } 573 } 574 } 575 576 void CodeGenFunction::EmitOMPReductionClauseFinal( 577 const OMPExecutableDirective &D) { 578 llvm::SmallVector<const Expr *, 8> LHSExprs; 579 llvm::SmallVector<const Expr *, 8> RHSExprs; 580 llvm::SmallVector<const Expr *, 8> ReductionOps; 581 bool HasAtLeastOneReduction = false; 582 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 583 HasAtLeastOneReduction = true; 584 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 585 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 586 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 587 } 588 if (HasAtLeastOneReduction) { 589 // Emit nowait reduction if nowait clause is present or directive is a 590 // parallel directive (it always has implicit barrier). 591 CGM.getOpenMPRuntime().emitReduction( 592 *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps, 593 D.getSingleClause<OMPNowaitClause>() || 594 isOpenMPParallelDirective(D.getDirectiveKind()) || 595 D.getDirectiveKind() == OMPD_simd, 596 D.getDirectiveKind() == OMPD_simd); 597 } 598 } 599 600 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 601 const OMPExecutableDirective &S, 602 OpenMPDirectiveKind InnermostKind, 603 const RegionCodeGenTy &CodeGen) { 604 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 605 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 606 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 607 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 608 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 609 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 610 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 611 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 612 /*IgnoreResultAssign*/ true); 613 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 614 CGF, NumThreads, NumThreadsClause->getLocStart()); 615 } 616 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 617 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 618 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 619 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); 620 } 621 const Expr *IfCond = nullptr; 622 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 623 if (C->getNameModifier() == OMPD_unknown || 624 C->getNameModifier() == OMPD_parallel) { 625 IfCond = C->getCondition(); 626 break; 627 } 628 } 629 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 630 CapturedVars, IfCond); 631 } 632 633 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 634 LexicalScope Scope(*this, S.getSourceRange()); 635 // Emit parallel region as a standalone region. 636 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 637 OMPPrivateScope PrivateScope(CGF); 638 bool Copyins = CGF.EmitOMPCopyinClause(S); 639 bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope); 640 if (Copyins || Firstprivates) { 641 // Emit implicit barrier to synchronize threads and avoid data races on 642 // initialization of firstprivate variables or propagation master's thread 643 // values of threadprivate variables to local instances of that variables 644 // of all other implicit threads. 645 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 646 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 647 /*ForceSimpleCall=*/true); 648 } 649 CGF.EmitOMPPrivateClause(S, PrivateScope); 650 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 651 (void)PrivateScope.Privatize(); 652 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 653 CGF.EmitOMPReductionClauseFinal(S); 654 // Emit implicit barrier at the end of the 'parallel' directive. 655 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 656 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 657 /*ForceSimpleCall=*/true); 658 }; 659 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); 660 } 661 662 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 663 JumpDest LoopExit) { 664 RunCleanupsScope BodyScope(*this); 665 // Update counters values on current iteration. 666 for (auto I : D.updates()) { 667 EmitIgnoredExpr(I); 668 } 669 // Update the linear variables. 670 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 671 for (auto U : C->updates()) { 672 EmitIgnoredExpr(U); 673 } 674 } 675 676 // On a continue in the body, jump to the end. 677 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 678 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 679 // Emit loop body. 680 EmitStmt(D.getBody()); 681 // The end (updates/cleanups). 682 EmitBlock(Continue.getBlock()); 683 BreakContinueStack.pop_back(); 684 // TODO: Update lastprivates if the SeparateIter flag is true. 685 // This will be implemented in a follow-up OMPLastprivateClause patch, but 686 // result should be still correct without it, as we do not make these 687 // variables private yet. 688 } 689 690 void CodeGenFunction::EmitOMPInnerLoop( 691 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 692 const Expr *IncExpr, 693 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen, 694 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) { 695 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 696 697 // Start the loop with a block that tests the condition. 698 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 699 EmitBlock(CondBlock); 700 LoopStack.push(CondBlock); 701 702 // If there are any cleanups between here and the loop-exit scope, 703 // create a block to stage a loop exit along. 704 auto ExitBlock = LoopExit.getBlock(); 705 if (RequiresCleanup) 706 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 707 708 auto LoopBody = createBasicBlock("omp.inner.for.body"); 709 710 // Emit condition. 711 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 712 if (ExitBlock != LoopExit.getBlock()) { 713 EmitBlock(ExitBlock); 714 EmitBranchThroughCleanup(LoopExit); 715 } 716 717 EmitBlock(LoopBody); 718 incrementProfileCounter(&S); 719 720 // Create a block for the increment. 721 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 722 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 723 724 BodyGen(*this); 725 726 // Emit "IV = IV + 1" and a back-edge to the condition block. 727 EmitBlock(Continue.getBlock()); 728 EmitIgnoredExpr(IncExpr); 729 PostIncGen(*this); 730 BreakContinueStack.pop_back(); 731 EmitBranch(CondBlock); 732 LoopStack.pop(); 733 // Emit the fall-through block. 734 EmitBlock(LoopExit.getBlock()); 735 } 736 737 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 738 // Emit inits for the linear variables. 739 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 740 for (auto Init : C->inits()) { 741 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 742 auto *OrigVD = cast<VarDecl>( 743 cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())->getDecl()); 744 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 745 CapturedStmtInfo->lookup(OrigVD) != nullptr, 746 VD->getInit()->getType(), VK_LValue, 747 VD->getInit()->getExprLoc()); 748 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 749 EmitExprAsInit(&DRE, VD, 750 MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), 751 /*capturedByInit=*/false); 752 EmitAutoVarCleanups(Emission); 753 } 754 // Emit the linear steps for the linear clauses. 755 // If a step is not constant, it is pre-calculated before the loop. 756 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 757 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 758 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 759 // Emit calculation of the linear step. 760 EmitIgnoredExpr(CS); 761 } 762 } 763 } 764 765 static void emitLinearClauseFinal(CodeGenFunction &CGF, 766 const OMPLoopDirective &D) { 767 // Emit the final values of the linear variables. 768 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 769 auto IC = C->varlist_begin(); 770 for (auto F : C->finals()) { 771 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 772 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 773 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 774 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 775 Address OrigAddr = CGF.EmitLValue(&DRE).getAddress(); 776 CodeGenFunction::OMPPrivateScope VarScope(CGF); 777 VarScope.addPrivate(OrigVD, 778 [OrigAddr]() -> Address { return OrigAddr; }); 779 (void)VarScope.Privatize(); 780 CGF.EmitIgnoredExpr(F); 781 ++IC; 782 } 783 } 784 } 785 786 static void emitAlignedClause(CodeGenFunction &CGF, 787 const OMPExecutableDirective &D) { 788 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 789 unsigned ClauseAlignment = 0; 790 if (auto AlignmentExpr = Clause->getAlignment()) { 791 auto AlignmentCI = 792 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 793 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 794 } 795 for (auto E : Clause->varlists()) { 796 unsigned Alignment = ClauseAlignment; 797 if (Alignment == 0) { 798 // OpenMP [2.8.1, Description] 799 // If no optional parameter is specified, implementation-defined default 800 // alignments for SIMD instructions on the target platforms are assumed. 801 Alignment = 802 CGF.getContext() 803 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 804 E->getType()->getPointeeType())) 805 .getQuantity(); 806 } 807 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 808 "alignment is not power of 2"); 809 if (Alignment != 0) { 810 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 811 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 812 } 813 } 814 } 815 } 816 817 static void emitPrivateLoopCounters(CodeGenFunction &CGF, 818 CodeGenFunction::OMPPrivateScope &LoopScope, 819 ArrayRef<Expr *> Counters, 820 ArrayRef<Expr *> PrivateCounters) { 821 auto I = PrivateCounters.begin(); 822 for (auto *E : Counters) { 823 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 824 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 825 Address Addr = Address::invalid(); 826 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { 827 // Emit var without initialization. 828 auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD); 829 CGF.EmitAutoVarCleanups(VarEmission); 830 Addr = VarEmission.getAllocatedAddress(); 831 return Addr; 832 }); 833 (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; }); 834 ++I; 835 } 836 } 837 838 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 839 const Expr *Cond, llvm::BasicBlock *TrueBlock, 840 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 841 { 842 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 843 emitPrivateLoopCounters(CGF, PreCondScope, S.counters(), 844 S.private_counters()); 845 (void)PreCondScope.Privatize(); 846 // Get initial values of real counters. 847 for (auto I : S.inits()) { 848 CGF.EmitIgnoredExpr(I); 849 } 850 } 851 // Check that loop is executed at least one time. 852 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 853 } 854 855 static void 856 emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, 857 CodeGenFunction::OMPPrivateScope &PrivateScope) { 858 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 859 auto CurPrivate = C->privates().begin(); 860 for (auto *E : C->varlists()) { 861 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 862 auto *PrivateVD = 863 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 864 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { 865 // Emit private VarDecl with copy init. 866 CGF.EmitVarDecl(*PrivateVD); 867 return CGF.GetAddrOfLocalVar(PrivateVD); 868 }); 869 assert(IsRegistered && "linear var already registered as private"); 870 // Silence the warning about unused variable. 871 (void)IsRegistered; 872 ++CurPrivate; 873 } 874 } 875 } 876 877 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 878 const OMPExecutableDirective &D) { 879 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 880 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 881 /*ignoreResult=*/true); 882 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 883 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 884 // In presence of finite 'safelen', it may be unsafe to mark all 885 // the memory instructions parallel, because loop-carried 886 // dependences of 'safelen' iterations are possible. 887 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 888 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 889 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 890 /*ignoreResult=*/true); 891 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 892 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 893 // In presence of finite 'safelen', it may be unsafe to mark all 894 // the memory instructions parallel, because loop-carried 895 // dependences of 'safelen' iterations are possible. 896 CGF.LoopStack.setParallel(false); 897 } 898 } 899 900 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { 901 // Walk clauses and process safelen/lastprivate. 902 LoopStack.setParallel(); 903 LoopStack.setVectorizeEnable(true); 904 emitSimdlenSafelenClause(*this, D); 905 } 906 907 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) { 908 auto IC = D.counters().begin(); 909 for (auto F : D.finals()) { 910 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 911 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) { 912 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 913 CapturedStmtInfo->lookup(OrigVD) != nullptr, 914 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 915 Address OrigAddr = EmitLValue(&DRE).getAddress(); 916 OMPPrivateScope VarScope(*this); 917 VarScope.addPrivate(OrigVD, 918 [OrigAddr]() -> Address { return OrigAddr; }); 919 (void)VarScope.Privatize(); 920 EmitIgnoredExpr(F); 921 } 922 ++IC; 923 } 924 emitLinearClauseFinal(*this, D); 925 } 926 927 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 928 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 929 // if (PreCond) { 930 // for (IV in 0..LastIteration) BODY; 931 // <Final counter/linear vars updates>; 932 // } 933 // 934 935 // Emit: if (PreCond) - begin. 936 // If the condition constant folds and can be elided, avoid emitting the 937 // whole loop. 938 bool CondConstant; 939 llvm::BasicBlock *ContBlock = nullptr; 940 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 941 if (!CondConstant) 942 return; 943 } else { 944 auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); 945 ContBlock = CGF.createBasicBlock("simd.if.end"); 946 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 947 CGF.getProfileCount(&S)); 948 CGF.EmitBlock(ThenBlock); 949 CGF.incrementProfileCounter(&S); 950 } 951 952 // Emit the loop iteration variable. 953 const Expr *IVExpr = S.getIterationVariable(); 954 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 955 CGF.EmitVarDecl(*IVDecl); 956 CGF.EmitIgnoredExpr(S.getInit()); 957 958 // Emit the iterations count variable. 959 // If it is not a variable, Sema decided to calculate iterations count on 960 // each iteration (e.g., it is foldable into a constant). 961 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 962 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 963 // Emit calculation of the iterations count. 964 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 965 } 966 967 CGF.EmitOMPSimdInit(S); 968 969 emitAlignedClause(CGF, S); 970 CGF.EmitOMPLinearClauseInit(S); 971 bool HasLastprivateClause; 972 { 973 OMPPrivateScope LoopScope(CGF); 974 emitPrivateLoopCounters(CGF, LoopScope, S.counters(), 975 S.private_counters()); 976 emitPrivateLinearVars(CGF, S, LoopScope); 977 CGF.EmitOMPPrivateClause(S, LoopScope); 978 CGF.EmitOMPReductionClauseInit(S, LoopScope); 979 HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 980 (void)LoopScope.Privatize(); 981 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 982 S.getInc(), 983 [&S](CodeGenFunction &CGF) { 984 CGF.EmitOMPLoopBody(S, JumpDest()); 985 CGF.EmitStopPoint(&S); 986 }, 987 [](CodeGenFunction &) {}); 988 // Emit final copy of the lastprivate variables at the end of loops. 989 if (HasLastprivateClause) { 990 CGF.EmitOMPLastprivateClauseFinal(S); 991 } 992 CGF.EmitOMPReductionClauseFinal(S); 993 } 994 CGF.EmitOMPSimdFinal(S); 995 // Emit: if (PreCond) - end. 996 if (ContBlock) { 997 CGF.EmitBranch(ContBlock); 998 CGF.EmitBlock(ContBlock, true); 999 } 1000 }; 1001 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1002 } 1003 1004 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, 1005 const OMPLoopDirective &S, 1006 OMPPrivateScope &LoopScope, 1007 bool Ordered, Address LB, 1008 Address UB, Address ST, 1009 Address IL, llvm::Value *Chunk) { 1010 auto &RT = CGM.getOpenMPRuntime(); 1011 1012 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1013 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); 1014 1015 assert((Ordered || 1016 !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) && 1017 "static non-chunked schedule does not need outer loop"); 1018 1019 // Emit outer loop. 1020 // 1021 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1022 // When schedule(dynamic,chunk_size) is specified, the iterations are 1023 // distributed to threads in the team in chunks as the threads request them. 1024 // Each thread executes a chunk of iterations, then requests another chunk, 1025 // until no chunks remain to be distributed. Each chunk contains chunk_size 1026 // iterations, except for the last chunk to be distributed, which may have 1027 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1028 // 1029 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1030 // to threads in the team in chunks as the executing threads request them. 1031 // Each thread executes a chunk of iterations, then requests another chunk, 1032 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 1033 // each chunk is proportional to the number of unassigned iterations divided 1034 // by the number of threads in the team, decreasing to 1. For a chunk_size 1035 // with value k (greater than 1), the size of each chunk is determined in the 1036 // same way, with the restriction that the chunks do not contain fewer than k 1037 // iterations (except for the last chunk to be assigned, which may have fewer 1038 // than k iterations). 1039 // 1040 // When schedule(auto) is specified, the decision regarding scheduling is 1041 // delegated to the compiler and/or runtime system. The programmer gives the 1042 // implementation the freedom to choose any possible mapping of iterations to 1043 // threads in the team. 1044 // 1045 // When schedule(runtime) is specified, the decision regarding scheduling is 1046 // deferred until run time, and the schedule and chunk size are taken from the 1047 // run-sched-var ICV. If the ICV is set to auto, the schedule is 1048 // implementation defined 1049 // 1050 // while(__kmpc_dispatch_next(&LB, &UB)) { 1051 // idx = LB; 1052 // while (idx <= UB) { BODY; ++idx; 1053 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 1054 // } // inner loop 1055 // } 1056 // 1057 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1058 // When schedule(static, chunk_size) is specified, iterations are divided into 1059 // chunks of size chunk_size, and the chunks are assigned to the threads in 1060 // the team in a round-robin fashion in the order of the thread number. 1061 // 1062 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 1063 // while (idx <= UB) { BODY; ++idx; } // inner loop 1064 // LB = LB + ST; 1065 // UB = UB + ST; 1066 // } 1067 // 1068 1069 const Expr *IVExpr = S.getIterationVariable(); 1070 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1071 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1072 1073 if (DynamicOrOrdered) { 1074 llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); 1075 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, 1076 IVSize, IVSigned, Ordered, UBVal, Chunk); 1077 } else { 1078 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 1079 IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk); 1080 } 1081 1082 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1083 1084 // Start the loop with a block that tests the condition. 1085 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 1086 EmitBlock(CondBlock); 1087 LoopStack.push(CondBlock); 1088 1089 llvm::Value *BoolCondVal = nullptr; 1090 if (!DynamicOrOrdered) { 1091 // UB = min(UB, GlobalUB) 1092 EmitIgnoredExpr(S.getEnsureUpperBound()); 1093 // IV = LB 1094 EmitIgnoredExpr(S.getInit()); 1095 // IV < UB 1096 BoolCondVal = EvaluateExprAsBool(S.getCond()); 1097 } else { 1098 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, 1099 IL, LB, UB, ST); 1100 } 1101 1102 // If there are any cleanups between here and the loop-exit scope, 1103 // create a block to stage a loop exit along. 1104 auto ExitBlock = LoopExit.getBlock(); 1105 if (LoopScope.requiresCleanups()) 1106 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1107 1108 auto LoopBody = createBasicBlock("omp.dispatch.body"); 1109 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1110 if (ExitBlock != LoopExit.getBlock()) { 1111 EmitBlock(ExitBlock); 1112 EmitBranchThroughCleanup(LoopExit); 1113 } 1114 EmitBlock(LoopBody); 1115 1116 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1117 // LB for loop condition and emitted it above). 1118 if (DynamicOrOrdered) 1119 EmitIgnoredExpr(S.getInit()); 1120 1121 // Create a block for the increment. 1122 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1123 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1124 1125 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1126 // with dynamic/guided scheduling and without ordered clause. 1127 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 1128 LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic || 1129 ScheduleKind == OMPC_SCHEDULE_guided) && 1130 !Ordered); 1131 } else { 1132 EmitOMPSimdInit(S); 1133 } 1134 1135 SourceLocation Loc = S.getLocStart(); 1136 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 1137 [&S, LoopExit](CodeGenFunction &CGF) { 1138 CGF.EmitOMPLoopBody(S, LoopExit); 1139 CGF.EmitStopPoint(&S); 1140 }, 1141 [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { 1142 if (Ordered) { 1143 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( 1144 CGF, Loc, IVSize, IVSigned); 1145 } 1146 }); 1147 1148 EmitBlock(Continue.getBlock()); 1149 BreakContinueStack.pop_back(); 1150 if (!DynamicOrOrdered) { 1151 // Emit "LB = LB + Stride", "UB = UB + Stride". 1152 EmitIgnoredExpr(S.getNextLowerBound()); 1153 EmitIgnoredExpr(S.getNextUpperBound()); 1154 } 1155 1156 EmitBranch(CondBlock); 1157 LoopStack.pop(); 1158 // Emit the fall-through block. 1159 EmitBlock(LoopExit.getBlock()); 1160 1161 // Tell the runtime we are done. 1162 if (!DynamicOrOrdered) 1163 RT.emitForStaticFinish(*this, S.getLocEnd()); 1164 } 1165 1166 /// \brief Emit a helper variable and return corresponding lvalue. 1167 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1168 const DeclRefExpr *Helper) { 1169 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1170 CGF.EmitVarDecl(*VDecl); 1171 return CGF.EmitLValue(Helper); 1172 } 1173 1174 static std::pair<llvm::Value * /*Chunk*/, OpenMPScheduleClauseKind> 1175 emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S, 1176 bool OuterRegion) { 1177 // Detect the loop schedule kind and chunk. 1178 auto ScheduleKind = OMPC_SCHEDULE_unknown; 1179 llvm::Value *Chunk = nullptr; 1180 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 1181 ScheduleKind = C->getScheduleKind(); 1182 if (const auto *Ch = C->getChunkSize()) { 1183 if (auto *ImpRef = cast_or_null<DeclRefExpr>(C->getHelperChunkSize())) { 1184 if (OuterRegion) { 1185 const VarDecl *ImpVar = cast<VarDecl>(ImpRef->getDecl()); 1186 CGF.EmitVarDecl(*ImpVar); 1187 CGF.EmitStoreThroughLValue( 1188 CGF.EmitAnyExpr(Ch), 1189 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(ImpVar), 1190 ImpVar->getType())); 1191 } else { 1192 Ch = ImpRef; 1193 } 1194 } 1195 if (!C->getHelperChunkSize() || !OuterRegion) { 1196 Chunk = CGF.EmitScalarExpr(Ch); 1197 Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(), 1198 S.getIterationVariable()->getType(), 1199 S.getLocStart()); 1200 } 1201 } 1202 } 1203 return std::make_pair(Chunk, ScheduleKind); 1204 } 1205 1206 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 1207 // Emit the loop iteration variable. 1208 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 1209 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 1210 EmitVarDecl(*IVDecl); 1211 1212 // Emit the iterations count variable. 1213 // If it is not a variable, Sema decided to calculate iterations count on each 1214 // iteration (e.g., it is foldable into a constant). 1215 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1216 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1217 // Emit calculation of the iterations count. 1218 EmitIgnoredExpr(S.getCalcLastIteration()); 1219 } 1220 1221 auto &RT = CGM.getOpenMPRuntime(); 1222 1223 bool HasLastprivateClause; 1224 // Check pre-condition. 1225 { 1226 // Skip the entire loop if we don't meet the precondition. 1227 // If the condition constant folds and can be elided, avoid emitting the 1228 // whole loop. 1229 bool CondConstant; 1230 llvm::BasicBlock *ContBlock = nullptr; 1231 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1232 if (!CondConstant) 1233 return false; 1234 } else { 1235 auto *ThenBlock = createBasicBlock("omp.precond.then"); 1236 ContBlock = createBasicBlock("omp.precond.end"); 1237 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 1238 getProfileCount(&S)); 1239 EmitBlock(ThenBlock); 1240 incrementProfileCounter(&S); 1241 } 1242 1243 emitAlignedClause(*this, S); 1244 EmitOMPLinearClauseInit(S); 1245 // Emit 'then' code. 1246 { 1247 // Emit helper vars inits. 1248 LValue LB = 1249 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1250 LValue UB = 1251 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1252 LValue ST = 1253 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 1254 LValue IL = 1255 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 1256 1257 OMPPrivateScope LoopScope(*this); 1258 if (EmitOMPFirstprivateClause(S, LoopScope)) { 1259 // Emit implicit barrier to synchronize threads and avoid data races on 1260 // initialization of firstprivate variables. 1261 CGM.getOpenMPRuntime().emitBarrierCall( 1262 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1263 /*ForceSimpleCall=*/true); 1264 } 1265 EmitOMPPrivateClause(S, LoopScope); 1266 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 1267 EmitOMPReductionClauseInit(S, LoopScope); 1268 emitPrivateLoopCounters(*this, LoopScope, S.counters(), 1269 S.private_counters()); 1270 emitPrivateLinearVars(*this, S, LoopScope); 1271 (void)LoopScope.Privatize(); 1272 1273 // Detect the loop schedule kind and chunk. 1274 llvm::Value *Chunk; 1275 OpenMPScheduleClauseKind ScheduleKind; 1276 auto ScheduleInfo = 1277 emitScheduleClause(*this, S, /*OuterRegion=*/false); 1278 Chunk = ScheduleInfo.first; 1279 ScheduleKind = ScheduleInfo.second; 1280 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1281 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1282 const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr; 1283 if (RT.isStaticNonchunked(ScheduleKind, 1284 /* Chunked */ Chunk != nullptr) && 1285 !Ordered) { 1286 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 1287 EmitOMPSimdInit(S); 1288 } 1289 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1290 // When no chunk_size is specified, the iteration space is divided into 1291 // chunks that are approximately equal in size, and at most one chunk is 1292 // distributed to each thread. Note that the size of the chunks is 1293 // unspecified in this case. 1294 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, 1295 IVSize, IVSigned, Ordered, 1296 IL.getAddress(), LB.getAddress(), 1297 UB.getAddress(), ST.getAddress()); 1298 auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 1299 // UB = min(UB, GlobalUB); 1300 EmitIgnoredExpr(S.getEnsureUpperBound()); 1301 // IV = LB; 1302 EmitIgnoredExpr(S.getInit()); 1303 // while (idx <= UB) { BODY; ++idx; } 1304 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1305 S.getInc(), 1306 [&S, LoopExit](CodeGenFunction &CGF) { 1307 CGF.EmitOMPLoopBody(S, LoopExit); 1308 CGF.EmitStopPoint(&S); 1309 }, 1310 [](CodeGenFunction &) {}); 1311 EmitBlock(LoopExit.getBlock()); 1312 // Tell the runtime we are done. 1313 RT.emitForStaticFinish(*this, S.getLocStart()); 1314 } else { 1315 // Emit the outer loop, which requests its work chunk [LB..UB] from 1316 // runtime and runs the inner loop to process it. 1317 EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, Ordered, 1318 LB.getAddress(), UB.getAddress(), ST.getAddress(), 1319 IL.getAddress(), Chunk); 1320 } 1321 EmitOMPReductionClauseFinal(S); 1322 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1323 if (HasLastprivateClause) 1324 EmitOMPLastprivateClauseFinal( 1325 S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 1326 } 1327 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 1328 EmitOMPSimdFinal(S); 1329 } 1330 // We're now done with the loop, so jump to the continuation block. 1331 if (ContBlock) { 1332 EmitBranch(ContBlock); 1333 EmitBlock(ContBlock, true); 1334 } 1335 } 1336 return HasLastprivateClause; 1337 } 1338 1339 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 1340 LexicalScope Scope(*this, S.getSourceRange()); 1341 bool HasLastprivates = false; 1342 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1343 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1344 }; 1345 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 1346 S.hasCancel()); 1347 1348 // Emit an implicit barrier at the end. 1349 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1350 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1351 } 1352 } 1353 1354 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 1355 LexicalScope Scope(*this, S.getSourceRange()); 1356 bool HasLastprivates = false; 1357 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1358 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1359 }; 1360 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1361 1362 // Emit an implicit barrier at the end. 1363 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { 1364 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1365 } 1366 } 1367 1368 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 1369 const Twine &Name, 1370 llvm::Value *Init = nullptr) { 1371 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 1372 if (Init) 1373 CGF.EmitScalarInit(Init, LVal); 1374 return LVal; 1375 } 1376 1377 OpenMPDirectiveKind 1378 CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 1379 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 1380 auto *CS = dyn_cast<CompoundStmt>(Stmt); 1381 if (CS && CS->size() > 1) { 1382 bool HasLastprivates = false; 1383 auto &&CodeGen = [&S, CS, &HasLastprivates](CodeGenFunction &CGF) { 1384 auto &C = CGF.CGM.getContext(); 1385 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1386 // Emit helper vars inits. 1387 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 1388 CGF.Builder.getInt32(0)); 1389 auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1); 1390 LValue UB = 1391 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 1392 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 1393 CGF.Builder.getInt32(1)); 1394 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 1395 CGF.Builder.getInt32(0)); 1396 // Loop counter. 1397 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 1398 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1399 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 1400 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1401 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 1402 // Generate condition for loop. 1403 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 1404 OK_Ordinary, S.getLocStart(), 1405 /*fpContractable=*/false); 1406 // Increment for loop counter. 1407 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, 1408 OK_Ordinary, S.getLocStart()); 1409 auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) { 1410 // Iterate through all sections and emit a switch construct: 1411 // switch (IV) { 1412 // case 0: 1413 // <SectionStmt[0]>; 1414 // break; 1415 // ... 1416 // case <NumSection> - 1: 1417 // <SectionStmt[<NumSection> - 1]>; 1418 // break; 1419 // } 1420 // .omp.sections.exit: 1421 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 1422 auto *SwitchStmt = CGF.Builder.CreateSwitch( 1423 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 1424 CS->size()); 1425 unsigned CaseNumber = 0; 1426 for (auto *SubStmt : CS->children()) { 1427 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 1428 CGF.EmitBlock(CaseBB); 1429 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 1430 CGF.EmitStmt(SubStmt); 1431 CGF.EmitBranch(ExitBB); 1432 ++CaseNumber; 1433 } 1434 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 1435 }; 1436 1437 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 1438 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 1439 // Emit implicit barrier to synchronize threads and avoid data races on 1440 // initialization of firstprivate variables. 1441 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1442 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, 1443 /*ForceSimpleCall=*/true); 1444 } 1445 CGF.EmitOMPPrivateClause(S, LoopScope); 1446 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1447 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1448 (void)LoopScope.Privatize(); 1449 1450 // Emit static non-chunked loop. 1451 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 1452 CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, 1453 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 1454 LB.getAddress(), UB.getAddress(), ST.getAddress()); 1455 // UB = min(UB, GlobalUB); 1456 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 1457 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 1458 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 1459 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 1460 // IV = LB; 1461 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 1462 // while (idx <= UB) { BODY; ++idx; } 1463 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 1464 [](CodeGenFunction &) {}); 1465 // Tell the runtime we are done. 1466 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart()); 1467 CGF.EmitOMPReductionClauseFinal(S); 1468 1469 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1470 if (HasLastprivates) 1471 CGF.EmitOMPLastprivateClauseFinal( 1472 S, CGF.Builder.CreateIsNotNull( 1473 CGF.EmitLoadOfScalar(IL, S.getLocStart()))); 1474 }; 1475 1476 bool HasCancel = false; 1477 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 1478 HasCancel = OSD->hasCancel(); 1479 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 1480 HasCancel = OPSD->hasCancel(); 1481 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 1482 HasCancel); 1483 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 1484 // clause. Otherwise the barrier will be generated by the codegen for the 1485 // directive. 1486 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 1487 // Emit implicit barrier to synchronize threads and avoid data races on 1488 // initialization of firstprivate variables. 1489 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 1490 OMPD_unknown); 1491 } 1492 return OMPD_sections; 1493 } 1494 // If only one section is found - no need to generate loop, emit as a single 1495 // region. 1496 bool HasFirstprivates; 1497 // No need to generate reductions for sections with single section region, we 1498 // can use original shared variables for all operations. 1499 bool HasReductions = S.hasClausesOfKind<OMPReductionClause>(); 1500 // No need to generate lastprivates for sections with single section region, 1501 // we can use original shared variable for all calculations with barrier at 1502 // the end of the sections. 1503 bool HasLastprivates = S.hasClausesOfKind<OMPLastprivateClause>(); 1504 auto &&CodeGen = [Stmt, &S, &HasFirstprivates](CodeGenFunction &CGF) { 1505 CodeGenFunction::OMPPrivateScope SingleScope(CGF); 1506 HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); 1507 CGF.EmitOMPPrivateClause(S, SingleScope); 1508 (void)SingleScope.Privatize(); 1509 1510 CGF.EmitStmt(Stmt); 1511 }; 1512 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 1513 llvm::None, llvm::None, llvm::None, 1514 llvm::None); 1515 // Emit barrier for firstprivates, lastprivates or reductions only if 1516 // 'sections' directive has 'nowait' clause. Otherwise the barrier will be 1517 // generated by the codegen for the directive. 1518 if ((HasFirstprivates || HasLastprivates || HasReductions) && 1519 S.getSingleClause<OMPNowaitClause>()) { 1520 // Emit implicit barrier to synchronize threads and avoid data races on 1521 // initialization of firstprivate variables. 1522 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_unknown, 1523 /*EmitChecks=*/false, 1524 /*ForceSimpleCall=*/true); 1525 } 1526 return OMPD_single; 1527 } 1528 1529 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 1530 LexicalScope Scope(*this, S.getSourceRange()); 1531 OpenMPDirectiveKind EmittedAs = EmitSections(S); 1532 // Emit an implicit barrier at the end. 1533 if (!S.getSingleClause<OMPNowaitClause>()) { 1534 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs); 1535 } 1536 } 1537 1538 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 1539 LexicalScope Scope(*this, S.getSourceRange()); 1540 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1541 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1542 CGF.EnsureInsertPoint(); 1543 }; 1544 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 1545 S.hasCancel()); 1546 } 1547 1548 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 1549 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 1550 llvm::SmallVector<const Expr *, 8> DestExprs; 1551 llvm::SmallVector<const Expr *, 8> SrcExprs; 1552 llvm::SmallVector<const Expr *, 8> AssignmentOps; 1553 // Check if there are any 'copyprivate' clauses associated with this 1554 // 'single' 1555 // construct. 1556 // Build a list of copyprivate variables along with helper expressions 1557 // (<source>, <destination>, <destination>=<source> expressions) 1558 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 1559 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 1560 DestExprs.append(C->destination_exprs().begin(), 1561 C->destination_exprs().end()); 1562 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 1563 AssignmentOps.append(C->assignment_ops().begin(), 1564 C->assignment_ops().end()); 1565 } 1566 LexicalScope Scope(*this, S.getSourceRange()); 1567 // Emit code for 'single' region along with 'copyprivate' clauses 1568 bool HasFirstprivates; 1569 auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) { 1570 CodeGenFunction::OMPPrivateScope SingleScope(CGF); 1571 HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); 1572 CGF.EmitOMPPrivateClause(S, SingleScope); 1573 (void)SingleScope.Privatize(); 1574 1575 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1576 CGF.EnsureInsertPoint(); 1577 }; 1578 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 1579 CopyprivateVars, DestExprs, SrcExprs, 1580 AssignmentOps); 1581 // Emit an implicit barrier at the end (to avoid data race on firstprivate 1582 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 1583 if ((!S.getSingleClause<OMPNowaitClause>() || HasFirstprivates) && 1584 CopyprivateVars.empty()) { 1585 CGM.getOpenMPRuntime().emitBarrierCall( 1586 *this, S.getLocStart(), 1587 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 1588 } 1589 } 1590 1591 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 1592 LexicalScope Scope(*this, S.getSourceRange()); 1593 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1594 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1595 CGF.EnsureInsertPoint(); 1596 }; 1597 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 1598 } 1599 1600 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 1601 LexicalScope Scope(*this, S.getSourceRange()); 1602 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1603 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1604 CGF.EnsureInsertPoint(); 1605 }; 1606 CGM.getOpenMPRuntime().emitCriticalRegion( 1607 *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart()); 1608 } 1609 1610 void CodeGenFunction::EmitOMPParallelForDirective( 1611 const OMPParallelForDirective &S) { 1612 // Emit directive as a combined directive that consists of two implicit 1613 // directives: 'parallel' with 'for' directive. 1614 LexicalScope Scope(*this, S.getSourceRange()); 1615 (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); 1616 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1617 CGF.EmitOMPWorksharingLoop(S); 1618 // Emit implicit barrier at the end of parallel region, but this barrier 1619 // is at the end of 'for' directive, so emit it as the implicit barrier for 1620 // this 'for' directive. 1621 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1622 CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false, 1623 /*ForceSimpleCall=*/true); 1624 }; 1625 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); 1626 } 1627 1628 void CodeGenFunction::EmitOMPParallelForSimdDirective( 1629 const OMPParallelForSimdDirective &S) { 1630 // Emit directive as a combined directive that consists of two implicit 1631 // directives: 'parallel' with 'for' directive. 1632 LexicalScope Scope(*this, S.getSourceRange()); 1633 (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); 1634 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1635 CGF.EmitOMPWorksharingLoop(S); 1636 // Emit implicit barrier at the end of parallel region, but this barrier 1637 // is at the end of 'for' directive, so emit it as the implicit barrier for 1638 // this 'for' directive. 1639 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1640 CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false, 1641 /*ForceSimpleCall=*/true); 1642 }; 1643 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); 1644 } 1645 1646 void CodeGenFunction::EmitOMPParallelSectionsDirective( 1647 const OMPParallelSectionsDirective &S) { 1648 // Emit directive as a combined directive that consists of two implicit 1649 // directives: 'parallel' with 'sections' directive. 1650 LexicalScope Scope(*this, S.getSourceRange()); 1651 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1652 (void)CGF.EmitSections(S); 1653 // Emit implicit barrier at the end of parallel region. 1654 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1655 CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false, 1656 /*ForceSimpleCall=*/true); 1657 }; 1658 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); 1659 } 1660 1661 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 1662 // Emit outlined function for task construct. 1663 LexicalScope Scope(*this, S.getSourceRange()); 1664 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1665 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 1666 auto *I = CS->getCapturedDecl()->param_begin(); 1667 auto *PartId = std::next(I); 1668 // The first function argument for tasks is a thread id, the second one is a 1669 // part id (0 for tied tasks, >=0 for untied task). 1670 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 1671 // Get list of private variables. 1672 llvm::SmallVector<const Expr *, 8> PrivateVars; 1673 llvm::SmallVector<const Expr *, 8> PrivateCopies; 1674 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 1675 auto IRef = C->varlist_begin(); 1676 for (auto *IInit : C->private_copies()) { 1677 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1678 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 1679 PrivateVars.push_back(*IRef); 1680 PrivateCopies.push_back(IInit); 1681 } 1682 ++IRef; 1683 } 1684 } 1685 EmittedAsPrivate.clear(); 1686 // Get list of firstprivate variables. 1687 llvm::SmallVector<const Expr *, 8> FirstprivateVars; 1688 llvm::SmallVector<const Expr *, 8> FirstprivateCopies; 1689 llvm::SmallVector<const Expr *, 8> FirstprivateInits; 1690 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 1691 auto IRef = C->varlist_begin(); 1692 auto IElemInitRef = C->inits().begin(); 1693 for (auto *IInit : C->private_copies()) { 1694 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1695 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 1696 FirstprivateVars.push_back(*IRef); 1697 FirstprivateCopies.push_back(IInit); 1698 FirstprivateInits.push_back(*IElemInitRef); 1699 } 1700 ++IRef, ++IElemInitRef; 1701 } 1702 } 1703 // Build list of dependences. 1704 llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8> 1705 Dependences; 1706 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 1707 for (auto *IRef : C->varlists()) { 1708 Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); 1709 } 1710 } 1711 auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( 1712 CodeGenFunction &CGF) { 1713 // Set proper addresses for generated private copies. 1714 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1715 OMPPrivateScope Scope(CGF); 1716 if (!PrivateVars.empty() || !FirstprivateVars.empty()) { 1717 auto *CopyFn = CGF.Builder.CreateLoad( 1718 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); 1719 auto *PrivatesPtr = CGF.Builder.CreateLoad( 1720 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); 1721 // Map privates. 1722 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> 1723 PrivatePtrs; 1724 llvm::SmallVector<llvm::Value *, 16> CallArgs; 1725 CallArgs.push_back(PrivatesPtr); 1726 for (auto *E : PrivateVars) { 1727 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1728 Address PrivatePtr = 1729 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 1730 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 1731 CallArgs.push_back(PrivatePtr.getPointer()); 1732 } 1733 for (auto *E : FirstprivateVars) { 1734 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1735 Address PrivatePtr = 1736 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); 1737 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); 1738 CallArgs.push_back(PrivatePtr.getPointer()); 1739 } 1740 CGF.EmitRuntimeCall(CopyFn, CallArgs); 1741 for (auto &&Pair : PrivatePtrs) { 1742 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 1743 CGF.getContext().getDeclAlign(Pair.first)); 1744 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 1745 } 1746 } 1747 (void)Scope.Privatize(); 1748 if (*PartId) { 1749 // TODO: emit code for untied tasks. 1750 } 1751 CGF.EmitStmt(CS->getCapturedStmt()); 1752 }; 1753 auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 1754 S, *I, OMPD_task, CodeGen); 1755 // Check if we should emit tied or untied task. 1756 bool Tied = !S.getSingleClause<OMPUntiedClause>(); 1757 // Check if the task is final 1758 llvm::PointerIntPair<llvm::Value *, 1, bool> Final; 1759 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 1760 // If the condition constant folds and can be elided, try to avoid emitting 1761 // the condition and the dead arm of the if/else. 1762 auto *Cond = Clause->getCondition(); 1763 bool CondConstant; 1764 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 1765 Final.setInt(CondConstant); 1766 else 1767 Final.setPointer(EvaluateExprAsBool(Cond)); 1768 } else { 1769 // By default the task is not final. 1770 Final.setInt(/*IntVal=*/false); 1771 } 1772 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 1773 const Expr *IfCond = nullptr; 1774 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1775 if (C->getNameModifier() == OMPD_unknown || 1776 C->getNameModifier() == OMPD_task) { 1777 IfCond = C->getCondition(); 1778 break; 1779 } 1780 } 1781 CGM.getOpenMPRuntime().emitTaskCall( 1782 *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, 1783 CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, 1784 FirstprivateCopies, FirstprivateInits, Dependences); 1785 } 1786 1787 void CodeGenFunction::EmitOMPTaskyieldDirective( 1788 const OMPTaskyieldDirective &S) { 1789 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 1790 } 1791 1792 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 1793 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 1794 } 1795 1796 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 1797 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); 1798 } 1799 1800 void CodeGenFunction::EmitOMPTaskgroupDirective( 1801 const OMPTaskgroupDirective &S) { 1802 LexicalScope Scope(*this, S.getSourceRange()); 1803 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1804 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1805 CGF.EnsureInsertPoint(); 1806 }; 1807 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); 1808 } 1809 1810 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 1811 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 1812 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { 1813 return llvm::makeArrayRef(FlushClause->varlist_begin(), 1814 FlushClause->varlist_end()); 1815 } 1816 return llvm::None; 1817 }(), S.getLocStart()); 1818 } 1819 1820 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 1821 const CapturedStmt *S) { 1822 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 1823 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 1824 CGF.CapturedStmtInfo = &CapStmtInfo; 1825 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 1826 Fn->addFnAttr(llvm::Attribute::NoInline); 1827 return Fn; 1828 } 1829 1830 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 1831 LexicalScope Scope(*this, S.getSourceRange()); 1832 auto *C = S.getSingleClause<OMPSIMDClause>(); 1833 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) { 1834 if (C) { 1835 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1836 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1837 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1838 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 1839 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); 1840 } else { 1841 CGF.EmitStmt( 1842 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1843 } 1844 CGF.EnsureInsertPoint(); 1845 }; 1846 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); 1847 } 1848 1849 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 1850 QualType SrcType, QualType DestType, 1851 SourceLocation Loc) { 1852 assert(CGF.hasScalarEvaluationKind(DestType) && 1853 "DestType must have scalar evaluation kind."); 1854 assert(!Val.isAggregate() && "Must be a scalar or complex."); 1855 return Val.isScalar() 1856 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, 1857 Loc) 1858 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 1859 DestType, Loc); 1860 } 1861 1862 static CodeGenFunction::ComplexPairTy 1863 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 1864 QualType DestType, SourceLocation Loc) { 1865 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 1866 "DestType must have complex evaluation kind."); 1867 CodeGenFunction::ComplexPairTy ComplexVal; 1868 if (Val.isScalar()) { 1869 // Convert the input element to the element type of the complex. 1870 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 1871 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 1872 DestElementType, Loc); 1873 ComplexVal = CodeGenFunction::ComplexPairTy( 1874 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 1875 } else { 1876 assert(Val.isComplex() && "Must be a scalar or complex."); 1877 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 1878 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 1879 ComplexVal.first = CGF.EmitScalarConversion( 1880 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 1881 ComplexVal.second = CGF.EmitScalarConversion( 1882 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 1883 } 1884 return ComplexVal; 1885 } 1886 1887 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 1888 LValue LVal, RValue RVal) { 1889 if (LVal.isGlobalReg()) { 1890 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 1891 } else { 1892 CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent 1893 : llvm::Monotonic, 1894 LVal.isVolatile(), /*IsInit=*/false); 1895 } 1896 } 1897 1898 static void emitSimpleStore(CodeGenFunction &CGF, LValue LVal, RValue RVal, 1899 QualType RValTy, SourceLocation Loc) { 1900 switch (CGF.getEvaluationKind(LVal.getType())) { 1901 case TEK_Scalar: 1902 CGF.EmitStoreThroughLValue(RValue::get(convertToScalarValue( 1903 CGF, RVal, RValTy, LVal.getType(), Loc)), 1904 LVal); 1905 break; 1906 case TEK_Complex: 1907 CGF.EmitStoreOfComplex( 1908 convertToComplexValue(CGF, RVal, RValTy, LVal.getType(), Loc), LVal, 1909 /*isInit=*/false); 1910 break; 1911 case TEK_Aggregate: 1912 llvm_unreachable("Must be a scalar or complex."); 1913 } 1914 } 1915 1916 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 1917 const Expr *X, const Expr *V, 1918 SourceLocation Loc) { 1919 // v = x; 1920 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 1921 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 1922 LValue XLValue = CGF.EmitLValue(X); 1923 LValue VLValue = CGF.EmitLValue(V); 1924 RValue Res = XLValue.isGlobalReg() 1925 ? CGF.EmitLoadOfLValue(XLValue, Loc) 1926 : CGF.EmitAtomicLoad(XLValue, Loc, 1927 IsSeqCst ? llvm::SequentiallyConsistent 1928 : llvm::Monotonic, 1929 XLValue.isVolatile()); 1930 // OpenMP, 2.12.6, atomic Construct 1931 // Any atomic construct with a seq_cst clause forces the atomically 1932 // performed operation to include an implicit flush operation without a 1933 // list. 1934 if (IsSeqCst) 1935 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1936 emitSimpleStore(CGF, VLValue, Res, X->getType().getNonReferenceType(), Loc); 1937 } 1938 1939 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 1940 const Expr *X, const Expr *E, 1941 SourceLocation Loc) { 1942 // x = expr; 1943 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 1944 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 1945 // OpenMP, 2.12.6, atomic Construct 1946 // Any atomic construct with a seq_cst clause forces the atomically 1947 // performed operation to include an implicit flush operation without a 1948 // list. 1949 if (IsSeqCst) 1950 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1951 } 1952 1953 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 1954 RValue Update, 1955 BinaryOperatorKind BO, 1956 llvm::AtomicOrdering AO, 1957 bool IsXLHSInRHSPart) { 1958 auto &Context = CGF.CGM.getContext(); 1959 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 1960 // expression is simple and atomic is allowed for the given type for the 1961 // target platform. 1962 if (BO == BO_Comma || !Update.isScalar() || 1963 !Update.getScalarVal()->getType()->isIntegerTy() || 1964 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 1965 (Update.getScalarVal()->getType() != 1966 X.getAddress().getElementType())) || 1967 !X.getAddress().getElementType()->isIntegerTy() || 1968 !Context.getTargetInfo().hasBuiltinAtomic( 1969 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 1970 return std::make_pair(false, RValue::get(nullptr)); 1971 1972 llvm::AtomicRMWInst::BinOp RMWOp; 1973 switch (BO) { 1974 case BO_Add: 1975 RMWOp = llvm::AtomicRMWInst::Add; 1976 break; 1977 case BO_Sub: 1978 if (!IsXLHSInRHSPart) 1979 return std::make_pair(false, RValue::get(nullptr)); 1980 RMWOp = llvm::AtomicRMWInst::Sub; 1981 break; 1982 case BO_And: 1983 RMWOp = llvm::AtomicRMWInst::And; 1984 break; 1985 case BO_Or: 1986 RMWOp = llvm::AtomicRMWInst::Or; 1987 break; 1988 case BO_Xor: 1989 RMWOp = llvm::AtomicRMWInst::Xor; 1990 break; 1991 case BO_LT: 1992 RMWOp = X.getType()->hasSignedIntegerRepresentation() 1993 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 1994 : llvm::AtomicRMWInst::Max) 1995 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 1996 : llvm::AtomicRMWInst::UMax); 1997 break; 1998 case BO_GT: 1999 RMWOp = X.getType()->hasSignedIntegerRepresentation() 2000 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 2001 : llvm::AtomicRMWInst::Min) 2002 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 2003 : llvm::AtomicRMWInst::UMin); 2004 break; 2005 case BO_Assign: 2006 RMWOp = llvm::AtomicRMWInst::Xchg; 2007 break; 2008 case BO_Mul: 2009 case BO_Div: 2010 case BO_Rem: 2011 case BO_Shl: 2012 case BO_Shr: 2013 case BO_LAnd: 2014 case BO_LOr: 2015 return std::make_pair(false, RValue::get(nullptr)); 2016 case BO_PtrMemD: 2017 case BO_PtrMemI: 2018 case BO_LE: 2019 case BO_GE: 2020 case BO_EQ: 2021 case BO_NE: 2022 case BO_AddAssign: 2023 case BO_SubAssign: 2024 case BO_AndAssign: 2025 case BO_OrAssign: 2026 case BO_XorAssign: 2027 case BO_MulAssign: 2028 case BO_DivAssign: 2029 case BO_RemAssign: 2030 case BO_ShlAssign: 2031 case BO_ShrAssign: 2032 case BO_Comma: 2033 llvm_unreachable("Unsupported atomic update operation"); 2034 } 2035 auto *UpdateVal = Update.getScalarVal(); 2036 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 2037 UpdateVal = CGF.Builder.CreateIntCast( 2038 IC, X.getAddress().getElementType(), 2039 X.getType()->hasSignedIntegerRepresentation()); 2040 } 2041 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 2042 return std::make_pair(true, RValue::get(Res)); 2043 } 2044 2045 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 2046 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 2047 llvm::AtomicOrdering AO, SourceLocation Loc, 2048 const llvm::function_ref<RValue(RValue)> &CommonGen) { 2049 // Update expressions are allowed to have the following forms: 2050 // x binop= expr; -> xrval + expr; 2051 // x++, ++x -> xrval + 1; 2052 // x--, --x -> xrval - 1; 2053 // x = x binop expr; -> xrval binop expr 2054 // x = expr Op x; - > expr binop xrval; 2055 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 2056 if (!Res.first) { 2057 if (X.isGlobalReg()) { 2058 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 2059 // 'xrval'. 2060 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 2061 } else { 2062 // Perform compare-and-swap procedure. 2063 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 2064 } 2065 } 2066 return Res; 2067 } 2068 2069 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 2070 const Expr *X, const Expr *E, 2071 const Expr *UE, bool IsXLHSInRHSPart, 2072 SourceLocation Loc) { 2073 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2074 "Update expr in 'atomic update' must be a binary operator."); 2075 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2076 // Update expressions are allowed to have the following forms: 2077 // x binop= expr; -> xrval + expr; 2078 // x++, ++x -> xrval + 1; 2079 // x--, --x -> xrval - 1; 2080 // x = x binop expr; -> xrval binop expr 2081 // x = expr Op x; - > expr binop xrval; 2082 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 2083 LValue XLValue = CGF.EmitLValue(X); 2084 RValue ExprRValue = CGF.EmitAnyExpr(E); 2085 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 2086 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2087 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2088 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2089 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2090 auto Gen = 2091 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 2092 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2093 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2094 return CGF.EmitAnyExpr(UE); 2095 }; 2096 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 2097 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2098 // OpenMP, 2.12.6, atomic Construct 2099 // Any atomic construct with a seq_cst clause forces the atomically 2100 // performed operation to include an implicit flush operation without a 2101 // list. 2102 if (IsSeqCst) 2103 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2104 } 2105 2106 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 2107 QualType SourceType, QualType ResType, 2108 SourceLocation Loc) { 2109 switch (CGF.getEvaluationKind(ResType)) { 2110 case TEK_Scalar: 2111 return RValue::get( 2112 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 2113 case TEK_Complex: { 2114 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 2115 return RValue::getComplex(Res.first, Res.second); 2116 } 2117 case TEK_Aggregate: 2118 break; 2119 } 2120 llvm_unreachable("Must be a scalar or complex."); 2121 } 2122 2123 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 2124 bool IsPostfixUpdate, const Expr *V, 2125 const Expr *X, const Expr *E, 2126 const Expr *UE, bool IsXLHSInRHSPart, 2127 SourceLocation Loc) { 2128 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 2129 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 2130 RValue NewVVal; 2131 LValue VLValue = CGF.EmitLValue(V); 2132 LValue XLValue = CGF.EmitLValue(X); 2133 RValue ExprRValue = CGF.EmitAnyExpr(E); 2134 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 2135 QualType NewVValType; 2136 if (UE) { 2137 // 'x' is updated with some additional value. 2138 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 2139 "Update expr in 'atomic capture' must be a binary operator."); 2140 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 2141 // Update expressions are allowed to have the following forms: 2142 // x binop= expr; -> xrval + expr; 2143 // x++, ++x -> xrval + 1; 2144 // x--, --x -> xrval - 1; 2145 // x = x binop expr; -> xrval binop expr 2146 // x = expr Op x; - > expr binop xrval; 2147 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 2148 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 2149 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 2150 NewVValType = XRValExpr->getType(); 2151 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 2152 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 2153 IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { 2154 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2155 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 2156 RValue Res = CGF.EmitAnyExpr(UE); 2157 NewVVal = IsPostfixUpdate ? XRValue : Res; 2158 return Res; 2159 }; 2160 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 2161 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 2162 if (Res.first) { 2163 // 'atomicrmw' instruction was generated. 2164 if (IsPostfixUpdate) { 2165 // Use old value from 'atomicrmw'. 2166 NewVVal = Res.second; 2167 } else { 2168 // 'atomicrmw' does not provide new value, so evaluate it using old 2169 // value of 'x'. 2170 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 2171 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 2172 NewVVal = CGF.EmitAnyExpr(UE); 2173 } 2174 } 2175 } else { 2176 // 'x' is simply rewritten with some 'expr'. 2177 NewVValType = X->getType().getNonReferenceType(); 2178 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 2179 X->getType().getNonReferenceType(), Loc); 2180 auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { 2181 NewVVal = XRValue; 2182 return ExprRValue; 2183 }; 2184 // Try to perform atomicrmw xchg, otherwise simple exchange. 2185 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 2186 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 2187 Loc, Gen); 2188 if (Res.first) { 2189 // 'atomicrmw' instruction was generated. 2190 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 2191 } 2192 } 2193 // Emit post-update store to 'v' of old/new 'x' value. 2194 emitSimpleStore(CGF, VLValue, NewVVal, NewVValType, Loc); 2195 // OpenMP, 2.12.6, atomic Construct 2196 // Any atomic construct with a seq_cst clause forces the atomically 2197 // performed operation to include an implicit flush operation without a 2198 // list. 2199 if (IsSeqCst) 2200 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 2201 } 2202 2203 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 2204 bool IsSeqCst, bool IsPostfixUpdate, 2205 const Expr *X, const Expr *V, const Expr *E, 2206 const Expr *UE, bool IsXLHSInRHSPart, 2207 SourceLocation Loc) { 2208 switch (Kind) { 2209 case OMPC_read: 2210 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 2211 break; 2212 case OMPC_write: 2213 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 2214 break; 2215 case OMPC_unknown: 2216 case OMPC_update: 2217 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 2218 break; 2219 case OMPC_capture: 2220 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 2221 IsXLHSInRHSPart, Loc); 2222 break; 2223 case OMPC_if: 2224 case OMPC_final: 2225 case OMPC_num_threads: 2226 case OMPC_private: 2227 case OMPC_firstprivate: 2228 case OMPC_lastprivate: 2229 case OMPC_reduction: 2230 case OMPC_safelen: 2231 case OMPC_simdlen: 2232 case OMPC_collapse: 2233 case OMPC_default: 2234 case OMPC_seq_cst: 2235 case OMPC_shared: 2236 case OMPC_linear: 2237 case OMPC_aligned: 2238 case OMPC_copyin: 2239 case OMPC_copyprivate: 2240 case OMPC_flush: 2241 case OMPC_proc_bind: 2242 case OMPC_schedule: 2243 case OMPC_ordered: 2244 case OMPC_nowait: 2245 case OMPC_untied: 2246 case OMPC_threadprivate: 2247 case OMPC_depend: 2248 case OMPC_mergeable: 2249 case OMPC_device: 2250 case OMPC_threads: 2251 case OMPC_simd: 2252 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 2253 } 2254 } 2255 2256 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 2257 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 2258 OpenMPClauseKind Kind = OMPC_unknown; 2259 for (auto *C : S.clauses()) { 2260 // Find first clause (skip seq_cst clause, if it is first). 2261 if (C->getClauseKind() != OMPC_seq_cst) { 2262 Kind = C->getClauseKind(); 2263 break; 2264 } 2265 } 2266 2267 const auto *CS = 2268 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 2269 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) { 2270 enterFullExpression(EWC); 2271 } 2272 // Processing for statements under 'atomic capture'. 2273 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 2274 for (const auto *C : Compound->body()) { 2275 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) { 2276 enterFullExpression(EWC); 2277 } 2278 } 2279 } 2280 2281 LexicalScope Scope(*this, S.getSourceRange()); 2282 auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) { 2283 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 2284 S.getV(), S.getExpr(), S.getUpdateExpr(), 2285 S.isXLHSInRHSPart(), S.getLocStart()); 2286 }; 2287 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 2288 } 2289 2290 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 2291 LexicalScope Scope(*this, S.getSourceRange()); 2292 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); 2293 2294 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 2295 GenerateOpenMPCapturedVars(CS, CapturedVars, /*UseOnlyReferences=*/true); 2296 2297 // Emit target region as a standalone region. 2298 auto &&CodeGen = [&CS](CodeGenFunction &CGF) { 2299 CGF.EmitStmt(CS.getCapturedStmt()); 2300 }; 2301 2302 // Obtain the target region outlined function. 2303 llvm::Value *Fn = 2304 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, CodeGen); 2305 2306 // Check if we have any if clause associated with the directive. 2307 const Expr *IfCond = nullptr; 2308 2309 if (auto *C = S.getSingleClause<OMPIfClause>()) { 2310 IfCond = C->getCondition(); 2311 } 2312 2313 // Check if we have any device clause associated with the directive. 2314 const Expr *Device = nullptr; 2315 if (auto *C = S.getSingleClause<OMPDeviceClause>()) { 2316 Device = C->getDevice(); 2317 } 2318 2319 CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, IfCond, Device, 2320 CapturedVars); 2321 } 2322 2323 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { 2324 llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); 2325 } 2326 2327 void CodeGenFunction::EmitOMPCancellationPointDirective( 2328 const OMPCancellationPointDirective &S) { 2329 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), 2330 S.getCancelRegion()); 2331 } 2332 2333 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 2334 const Expr *IfCond = nullptr; 2335 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2336 if (C->getNameModifier() == OMPD_unknown || 2337 C->getNameModifier() == OMPD_cancel) { 2338 IfCond = C->getCondition(); 2339 break; 2340 } 2341 } 2342 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, 2343 S.getCancelRegion()); 2344 } 2345 2346 CodeGenFunction::JumpDest 2347 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 2348 if (Kind == OMPD_parallel || Kind == OMPD_task) 2349 return ReturnBlock; 2350 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 2351 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for); 2352 return BreakContinueStack.back().BreakBlock; 2353 } 2354 2355 // Generate the instructions for '#pragma omp target data' directive. 2356 void CodeGenFunction::EmitOMPTargetDataDirective( 2357 const OMPTargetDataDirective &S) { 2358 2359 // emit the code inside the construct for now 2360 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 2361 CGM.getOpenMPRuntime().emitInlinedDirective( 2362 *this, OMPD_target_data, 2363 [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); 2364 } 2365