1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
emitPreInitStmt(CodeGenFunction & CGF,const OMPExecutableDirective & S)30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31 for (const auto *C : S.clauses()) {
32 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
33 if (const auto *PreInit =
34 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
35 for (const auto *I : PreInit->decls()) {
36 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
37 CGF.EmitVarDecl(cast<VarDecl>(*I));
38 } else {
39 CodeGenFunction::AutoVarEmission Emission =
40 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
41 CGF.EmitAutoVarCleanups(Emission);
42 }
43 }
44 }
45 }
46 }
47 }
48 CodeGenFunction::OMPPrivateScope InlinedShareds;
49
isCapturedVar(CodeGenFunction & CGF,const VarDecl * VD)50 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
51 return CGF.LambdaCaptureFields.lookup(VD) ||
52 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
53 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
54 }
55
56 public:
OMPLexicalScope(CodeGenFunction & CGF,const OMPExecutableDirective & S,const llvm::Optional<OpenMPDirectiveKind> CapturedRegion=llvm::None,const bool EmitPreInitStmt=true)57 OMPLexicalScope(
58 CodeGenFunction &CGF, const OMPExecutableDirective &S,
59 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
60 const bool EmitPreInitStmt = true)
61 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
62 InlinedShareds(CGF) {
63 if (EmitPreInitStmt)
64 emitPreInitStmt(CGF, S);
65 if (!CapturedRegion.hasValue())
66 return;
67 assert(S.hasAssociatedStmt() &&
68 "Expected associated statement for inlined directive.");
69 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
70 for (const auto &C : CS->captures()) {
71 if (C.capturesVariable() || C.capturesVariableByCopy()) {
72 auto *VD = C.getCapturedVar();
73 assert(VD == VD->getCanonicalDecl() &&
74 "Canonical decl must be captured.");
75 DeclRefExpr DRE(
76 CGF.getContext(), const_cast<VarDecl *>(VD),
77 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
78 InlinedShareds.isGlobalVarCaptured(VD)),
79 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
80 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
81 return CGF.EmitLValue(&DRE).getAddress();
82 });
83 }
84 }
85 (void)InlinedShareds.Privatize();
86 }
87 };
88
89 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
90 /// for captured expressions.
91 class OMPParallelScope final : public OMPLexicalScope {
EmitPreInitStmt(const OMPExecutableDirective & S)92 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
93 OpenMPDirectiveKind Kind = S.getDirectiveKind();
94 return !(isOpenMPTargetExecutionDirective(Kind) ||
95 isOpenMPLoopBoundSharingDirective(Kind)) &&
96 isOpenMPParallelDirective(Kind);
97 }
98
99 public:
OMPParallelScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)100 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
101 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
102 EmitPreInitStmt(S)) {}
103 };
104
105 /// Lexical scope for OpenMP teams construct, that handles correct codegen
106 /// for captured expressions.
107 class OMPTeamsScope final : public OMPLexicalScope {
EmitPreInitStmt(const OMPExecutableDirective & S)108 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
109 OpenMPDirectiveKind Kind = S.getDirectiveKind();
110 return !isOpenMPTargetExecutionDirective(Kind) &&
111 isOpenMPTeamsDirective(Kind);
112 }
113
114 public:
OMPTeamsScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)115 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
116 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
117 EmitPreInitStmt(S)) {}
118 };
119
120 /// Private scope for OpenMP loop-based directives, that supports capturing
121 /// of used expression from loop statement.
122 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
emitPreInitStmt(CodeGenFunction & CGF,const OMPLoopDirective & S)123 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
124 CodeGenFunction::OMPMapVars PreCondVars;
125 for (const auto *E : S.counters()) {
126 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
127 (void)PreCondVars.setVarAddr(
128 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
129 }
130 (void)PreCondVars.apply(CGF);
131 if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
132 for (const auto *I : PreInits->decls())
133 CGF.EmitVarDecl(cast<VarDecl>(*I));
134 }
135 PreCondVars.restore(CGF);
136 }
137
138 public:
OMPLoopScope(CodeGenFunction & CGF,const OMPLoopDirective & S)139 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
140 : CodeGenFunction::RunCleanupsScope(CGF) {
141 emitPreInitStmt(CGF, S);
142 }
143 };
144
145 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
146 CodeGenFunction::OMPPrivateScope InlinedShareds;
147
isCapturedVar(CodeGenFunction & CGF,const VarDecl * VD)148 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
149 return CGF.LambdaCaptureFields.lookup(VD) ||
150 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
151 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
152 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
153 }
154
155 public:
OMPSimdLexicalScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)156 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
157 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
158 InlinedShareds(CGF) {
159 for (const auto *C : S.clauses()) {
160 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
161 if (const auto *PreInit =
162 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
163 for (const auto *I : PreInit->decls()) {
164 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
165 CGF.EmitVarDecl(cast<VarDecl>(*I));
166 } else {
167 CodeGenFunction::AutoVarEmission Emission =
168 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
169 CGF.EmitAutoVarCleanups(Emission);
170 }
171 }
172 }
173 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
174 for (const Expr *E : UDP->varlists()) {
175 const Decl *D = cast<DeclRefExpr>(E)->getDecl();
176 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
177 CGF.EmitVarDecl(*OED);
178 }
179 }
180 }
181 if (!isOpenMPSimdDirective(S.getDirectiveKind()))
182 CGF.EmitOMPPrivateClause(S, InlinedShareds);
183 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
184 if (const Expr *E = TG->getReductionRef())
185 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
186 }
187 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
188 while (CS) {
189 for (auto &C : CS->captures()) {
190 if (C.capturesVariable() || C.capturesVariableByCopy()) {
191 auto *VD = C.getCapturedVar();
192 assert(VD == VD->getCanonicalDecl() &&
193 "Canonical decl must be captured.");
194 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
195 isCapturedVar(CGF, VD) ||
196 (CGF.CapturedStmtInfo &&
197 InlinedShareds.isGlobalVarCaptured(VD)),
198 VD->getType().getNonReferenceType(), VK_LValue,
199 C.getLocation());
200 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
201 return CGF.EmitLValue(&DRE).getAddress();
202 });
203 }
204 }
205 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
206 }
207 (void)InlinedShareds.Privatize();
208 }
209 };
210
211 } // namespace
212
213 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
214 const OMPExecutableDirective &S,
215 const RegionCodeGenTy &CodeGen);
216
EmitOMPSharedLValue(const Expr * E)217 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
218 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
219 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
220 OrigVD = OrigVD->getCanonicalDecl();
221 bool IsCaptured =
222 LambdaCaptureFields.lookup(OrigVD) ||
223 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
224 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
225 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
226 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
227 return EmitLValue(&DRE);
228 }
229 }
230 return EmitLValue(E);
231 }
232
getTypeSize(QualType Ty)233 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
234 ASTContext &C = getContext();
235 llvm::Value *Size = nullptr;
236 auto SizeInChars = C.getTypeSizeInChars(Ty);
237 if (SizeInChars.isZero()) {
238 // getTypeSizeInChars() returns 0 for a VLA.
239 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
240 VlaSizePair VlaSize = getVLASize(VAT);
241 Ty = VlaSize.Type;
242 Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
243 : VlaSize.NumElts;
244 }
245 SizeInChars = C.getTypeSizeInChars(Ty);
246 if (SizeInChars.isZero())
247 return llvm::ConstantInt::get(SizeTy, /*V=*/0);
248 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
249 }
250 return CGM.getSize(SizeInChars);
251 }
252
GenerateOpenMPCapturedVars(const CapturedStmt & S,SmallVectorImpl<llvm::Value * > & CapturedVars)253 void CodeGenFunction::GenerateOpenMPCapturedVars(
254 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
255 const RecordDecl *RD = S.getCapturedRecordDecl();
256 auto CurField = RD->field_begin();
257 auto CurCap = S.captures().begin();
258 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
259 E = S.capture_init_end();
260 I != E; ++I, ++CurField, ++CurCap) {
261 if (CurField->hasCapturedVLAType()) {
262 const VariableArrayType *VAT = CurField->getCapturedVLAType();
263 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
264 CapturedVars.push_back(Val);
265 } else if (CurCap->capturesThis()) {
266 CapturedVars.push_back(CXXThisValue);
267 } else if (CurCap->capturesVariableByCopy()) {
268 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
269
270 // If the field is not a pointer, we need to save the actual value
271 // and load it as a void pointer.
272 if (!CurField->getType()->isAnyPointerType()) {
273 ASTContext &Ctx = getContext();
274 Address DstAddr = CreateMemTemp(
275 Ctx.getUIntPtrType(),
276 Twine(CurCap->getCapturedVar()->getName(), ".casted"));
277 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
278
279 llvm::Value *SrcAddrVal = EmitScalarConversion(
280 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
281 Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
282 LValue SrcLV =
283 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
284
285 // Store the value using the source type pointer.
286 EmitStoreThroughLValue(RValue::get(CV), SrcLV);
287
288 // Load the value using the destination type pointer.
289 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
290 }
291 CapturedVars.push_back(CV);
292 } else {
293 assert(CurCap->capturesVariable() && "Expected capture by reference.");
294 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
295 }
296 }
297 }
298
castValueFromUintptr(CodeGenFunction & CGF,SourceLocation Loc,QualType DstType,StringRef Name,LValue AddrLV,bool isReferenceType=false)299 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
300 QualType DstType, StringRef Name,
301 LValue AddrLV,
302 bool isReferenceType = false) {
303 ASTContext &Ctx = CGF.getContext();
304
305 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
306 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
307 Ctx.getPointerType(DstType), Loc);
308 Address TmpAddr =
309 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
310 .getAddress();
311
312 // If we are dealing with references we need to return the address of the
313 // reference instead of the reference of the value.
314 if (isReferenceType) {
315 QualType RefType = Ctx.getLValueReferenceType(DstType);
316 llvm::Value *RefVal = TmpAddr.getPointer();
317 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name, ".ref"));
318 LValue TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
319 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit=*/true);
320 }
321
322 return TmpAddr;
323 }
324
getCanonicalParamType(ASTContext & C,QualType T)325 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
326 if (T->isLValueReferenceType())
327 return C.getLValueReferenceType(
328 getCanonicalParamType(C, T.getNonReferenceType()),
329 /*SpelledAsLValue=*/false);
330 if (T->isPointerType())
331 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
332 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
333 if (const auto *VLA = dyn_cast<VariableArrayType>(A))
334 return getCanonicalParamType(C, VLA->getElementType());
335 if (!A->isVariablyModifiedType())
336 return C.getCanonicalType(T);
337 }
338 return C.getCanonicalParamType(T);
339 }
340
341 namespace {
342 /// Contains required data for proper outlined function codegen.
343 struct FunctionOptions {
344 /// Captured statement for which the function is generated.
345 const CapturedStmt *S = nullptr;
346 /// true if cast to/from UIntPtr is required for variables captured by
347 /// value.
348 const bool UIntPtrCastRequired = true;
349 /// true if only casted arguments must be registered as local args or VLA
350 /// sizes.
351 const bool RegisterCastedArgsOnly = false;
352 /// Name of the generated function.
353 const StringRef FunctionName;
FunctionOptions__anonb025da6e0411::FunctionOptions354 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
355 bool RegisterCastedArgsOnly,
356 StringRef FunctionName)
357 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
358 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
359 FunctionName(FunctionName) {}
360 };
361 }
362
emitOutlinedFunctionPrologue(CodeGenFunction & CGF,FunctionArgList & Args,llvm::MapVector<const Decl *,std::pair<const VarDecl *,Address>> & LocalAddrs,llvm::DenseMap<const Decl *,std::pair<const Expr *,llvm::Value * >> & VLASizes,llvm::Value * & CXXThisValue,const FunctionOptions & FO)363 static llvm::Function *emitOutlinedFunctionPrologue(
364 CodeGenFunction &CGF, FunctionArgList &Args,
365 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
366 &LocalAddrs,
367 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
368 &VLASizes,
369 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
370 const CapturedDecl *CD = FO.S->getCapturedDecl();
371 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
372 assert(CD->hasBody() && "missing CapturedDecl body");
373
374 CXXThisValue = nullptr;
375 // Build the argument list.
376 CodeGenModule &CGM = CGF.CGM;
377 ASTContext &Ctx = CGM.getContext();
378 FunctionArgList TargetArgs;
379 Args.append(CD->param_begin(),
380 std::next(CD->param_begin(), CD->getContextParamPosition()));
381 TargetArgs.append(
382 CD->param_begin(),
383 std::next(CD->param_begin(), CD->getContextParamPosition()));
384 auto I = FO.S->captures().begin();
385 FunctionDecl *DebugFunctionDecl = nullptr;
386 if (!FO.UIntPtrCastRequired) {
387 FunctionProtoType::ExtProtoInfo EPI;
388 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
389 DebugFunctionDecl = FunctionDecl::Create(
390 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
391 SourceLocation(), DeclarationName(), FunctionTy,
392 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
393 /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
394 }
395 for (const FieldDecl *FD : RD->fields()) {
396 QualType ArgType = FD->getType();
397 IdentifierInfo *II = nullptr;
398 VarDecl *CapVar = nullptr;
399
400 // If this is a capture by copy and the type is not a pointer, the outlined
401 // function argument type should be uintptr and the value properly casted to
402 // uintptr. This is necessary given that the runtime library is only able to
403 // deal with pointers. We can pass in the same way the VLA type sizes to the
404 // outlined function.
405 if (FO.UIntPtrCastRequired &&
406 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
407 I->capturesVariableArrayType()))
408 ArgType = Ctx.getUIntPtrType();
409
410 if (I->capturesVariable() || I->capturesVariableByCopy()) {
411 CapVar = I->getCapturedVar();
412 II = CapVar->getIdentifier();
413 } else if (I->capturesThis()) {
414 II = &Ctx.Idents.get("this");
415 } else {
416 assert(I->capturesVariableArrayType());
417 II = &Ctx.Idents.get("vla");
418 }
419 if (ArgType->isVariablyModifiedType())
420 ArgType = getCanonicalParamType(Ctx, ArgType);
421 VarDecl *Arg;
422 if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
423 Arg = ParmVarDecl::Create(
424 Ctx, DebugFunctionDecl,
425 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
426 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
427 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
428 } else {
429 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
430 II, ArgType, ImplicitParamDecl::Other);
431 }
432 Args.emplace_back(Arg);
433 // Do not cast arguments if we emit function with non-original types.
434 TargetArgs.emplace_back(
435 FO.UIntPtrCastRequired
436 ? Arg
437 : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
438 ++I;
439 }
440 Args.append(
441 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
442 CD->param_end());
443 TargetArgs.append(
444 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
445 CD->param_end());
446
447 // Create the function declaration.
448 const CGFunctionInfo &FuncInfo =
449 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
450 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
451
452 auto *F =
453 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
454 FO.FunctionName, &CGM.getModule());
455 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
456 if (CD->isNothrow())
457 F->setDoesNotThrow();
458 F->setDoesNotRecurse();
459
460 // Generate the function.
461 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
462 FO.S->getBeginLoc(), CD->getBody()->getBeginLoc());
463 unsigned Cnt = CD->getContextParamPosition();
464 I = FO.S->captures().begin();
465 for (const FieldDecl *FD : RD->fields()) {
466 // Do not map arguments if we emit function with non-original types.
467 Address LocalAddr(Address::invalid());
468 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
469 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
470 TargetArgs[Cnt]);
471 } else {
472 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
473 }
474 // If we are capturing a pointer by copy we don't need to do anything, just
475 // use the value that we get from the arguments.
476 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
477 const VarDecl *CurVD = I->getCapturedVar();
478 // If the variable is a reference we need to materialize it here.
479 if (CurVD->getType()->isReferenceType()) {
480 Address RefAddr = CGF.CreateMemTemp(
481 CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
482 CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
483 /*Volatile=*/false, CurVD->getType());
484 LocalAddr = RefAddr;
485 }
486 if (!FO.RegisterCastedArgsOnly)
487 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
488 ++Cnt;
489 ++I;
490 continue;
491 }
492
493 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
494 AlignmentSource::Decl);
495 if (FD->hasCapturedVLAType()) {
496 if (FO.UIntPtrCastRequired) {
497 ArgLVal = CGF.MakeAddrLValue(
498 castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
499 Args[Cnt]->getName(), ArgLVal),
500 FD->getType(), AlignmentSource::Decl);
501 }
502 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
503 const VariableArrayType *VAT = FD->getCapturedVLAType();
504 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
505 } else if (I->capturesVariable()) {
506 const VarDecl *Var = I->getCapturedVar();
507 QualType VarTy = Var->getType();
508 Address ArgAddr = ArgLVal.getAddress();
509 if (!VarTy->isReferenceType()) {
510 if (ArgLVal.getType()->isLValueReferenceType()) {
511 ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
512 } else if (!VarTy->isVariablyModifiedType() ||
513 !VarTy->isPointerType()) {
514 assert(ArgLVal.getType()->isPointerType());
515 ArgAddr = CGF.EmitLoadOfPointer(
516 ArgAddr, ArgLVal.getType()->castAs<PointerType>());
517 }
518 }
519 if (!FO.RegisterCastedArgsOnly) {
520 LocalAddrs.insert(
521 {Args[Cnt],
522 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
523 }
524 } else if (I->capturesVariableByCopy()) {
525 assert(!FD->getType()->isAnyPointerType() &&
526 "Not expecting a captured pointer.");
527 const VarDecl *Var = I->getCapturedVar();
528 QualType VarTy = Var->getType();
529 LocalAddrs.insert(
530 {Args[Cnt],
531 {Var, FO.UIntPtrCastRequired
532 ? castValueFromUintptr(CGF, I->getLocation(),
533 FD->getType(), Args[Cnt]->getName(),
534 ArgLVal, VarTy->isReferenceType())
535 : ArgLVal.getAddress()}});
536 } else {
537 // If 'this' is captured, load it into CXXThisValue.
538 assert(I->capturesThis());
539 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
540 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
541 }
542 ++Cnt;
543 ++I;
544 }
545
546 return F;
547 }
548
549 llvm::Function *
GenerateOpenMPCapturedStmtFunction(const CapturedStmt & S)550 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
551 assert(
552 CapturedStmtInfo &&
553 "CapturedStmtInfo should be set when generating the captured function");
554 const CapturedDecl *CD = S.getCapturedDecl();
555 // Build the argument list.
556 bool NeedWrapperFunction =
557 getDebugInfo() &&
558 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
559 FunctionArgList Args;
560 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
561 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
562 SmallString<256> Buffer;
563 llvm::raw_svector_ostream Out(Buffer);
564 Out << CapturedStmtInfo->getHelperName();
565 if (NeedWrapperFunction)
566 Out << "_debug__";
567 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
568 Out.str());
569 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
570 VLASizes, CXXThisValue, FO);
571 for (const auto &LocalAddrPair : LocalAddrs) {
572 if (LocalAddrPair.second.first) {
573 setAddrOfLocalVar(LocalAddrPair.second.first,
574 LocalAddrPair.second.second);
575 }
576 }
577 for (const auto &VLASizePair : VLASizes)
578 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
579 PGO.assignRegionCounters(GlobalDecl(CD), F);
580 CapturedStmtInfo->EmitBody(*this, CD->getBody());
581 FinishFunction(CD->getBodyRBrace());
582 if (!NeedWrapperFunction)
583 return F;
584
585 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
586 /*RegisterCastedArgsOnly=*/true,
587 CapturedStmtInfo->getHelperName());
588 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
589 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
590 Args.clear();
591 LocalAddrs.clear();
592 VLASizes.clear();
593 llvm::Function *WrapperF =
594 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
595 WrapperCGF.CXXThisValue, WrapperFO);
596 llvm::SmallVector<llvm::Value *, 4> CallArgs;
597 for (const auto *Arg : Args) {
598 llvm::Value *CallArg;
599 auto I = LocalAddrs.find(Arg);
600 if (I != LocalAddrs.end()) {
601 LValue LV = WrapperCGF.MakeAddrLValue(
602 I->second.second,
603 I->second.first ? I->second.first->getType() : Arg->getType(),
604 AlignmentSource::Decl);
605 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
606 } else {
607 auto EI = VLASizes.find(Arg);
608 if (EI != VLASizes.end()) {
609 CallArg = EI->second.second;
610 } else {
611 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
612 Arg->getType(),
613 AlignmentSource::Decl);
614 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
615 }
616 }
617 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
618 }
619 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getBeginLoc(),
620 F, CallArgs);
621 WrapperCGF.FinishFunction();
622 return WrapperF;
623 }
624
625 //===----------------------------------------------------------------------===//
626 // OpenMP Directive Emission
627 //===----------------------------------------------------------------------===//
EmitOMPAggregateAssign(Address DestAddr,Address SrcAddr,QualType OriginalType,const llvm::function_ref<void (Address,Address)> CopyGen)628 void CodeGenFunction::EmitOMPAggregateAssign(
629 Address DestAddr, Address SrcAddr, QualType OriginalType,
630 const llvm::function_ref<void(Address, Address)> CopyGen) {
631 // Perform element-by-element initialization.
632 QualType ElementTy;
633
634 // Drill down to the base element type on both arrays.
635 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
636 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
637 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
638
639 llvm::Value *SrcBegin = SrcAddr.getPointer();
640 llvm::Value *DestBegin = DestAddr.getPointer();
641 // Cast from pointer to array type to pointer to single element.
642 llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements);
643 // The basic structure here is a while-do loop.
644 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
645 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
646 llvm::Value *IsEmpty =
647 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
648 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
649
650 // Enter the loop body, making that address the current address.
651 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
652 EmitBlock(BodyBB);
653
654 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
655
656 llvm::PHINode *SrcElementPHI =
657 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
658 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
659 Address SrcElementCurrent =
660 Address(SrcElementPHI,
661 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
662
663 llvm::PHINode *DestElementPHI =
664 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
665 DestElementPHI->addIncoming(DestBegin, EntryBB);
666 Address DestElementCurrent =
667 Address(DestElementPHI,
668 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
669
670 // Emit copy.
671 CopyGen(DestElementCurrent, SrcElementCurrent);
672
673 // Shift the address forward by one element.
674 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
675 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
676 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
677 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
678 // Check whether we've reached the end.
679 llvm::Value *Done =
680 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
681 Builder.CreateCondBr(Done, DoneBB, BodyBB);
682 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
683 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
684
685 // Done.
686 EmitBlock(DoneBB, /*IsFinished=*/true);
687 }
688
EmitOMPCopy(QualType OriginalType,Address DestAddr,Address SrcAddr,const VarDecl * DestVD,const VarDecl * SrcVD,const Expr * Copy)689 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
690 Address SrcAddr, const VarDecl *DestVD,
691 const VarDecl *SrcVD, const Expr *Copy) {
692 if (OriginalType->isArrayType()) {
693 const auto *BO = dyn_cast<BinaryOperator>(Copy);
694 if (BO && BO->getOpcode() == BO_Assign) {
695 // Perform simple memcpy for simple copying.
696 LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
697 LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
698 EmitAggregateAssign(Dest, Src, OriginalType);
699 } else {
700 // For arrays with complex element types perform element by element
701 // copying.
702 EmitOMPAggregateAssign(
703 DestAddr, SrcAddr, OriginalType,
704 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
705 // Working with the single array element, so have to remap
706 // destination and source variables to corresponding array
707 // elements.
708 CodeGenFunction::OMPPrivateScope Remap(*this);
709 Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
710 Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
711 (void)Remap.Privatize();
712 EmitIgnoredExpr(Copy);
713 });
714 }
715 } else {
716 // Remap pseudo source variable to private copy.
717 CodeGenFunction::OMPPrivateScope Remap(*this);
718 Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
719 Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
720 (void)Remap.Privatize();
721 // Emit copying of the whole variable.
722 EmitIgnoredExpr(Copy);
723 }
724 }
725
EmitOMPFirstprivateClause(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)726 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
727 OMPPrivateScope &PrivateScope) {
728 if (!HaveInsertPoint())
729 return false;
730 bool FirstprivateIsLastprivate = false;
731 llvm::DenseSet<const VarDecl *> Lastprivates;
732 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
733 for (const auto *D : C->varlists())
734 Lastprivates.insert(
735 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
736 }
737 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
738 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
739 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
740 // Force emission of the firstprivate copy if the directive does not emit
741 // outlined function, like omp for, omp simd, omp distribute etc.
742 bool MustEmitFirstprivateCopy =
743 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
744 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
745 auto IRef = C->varlist_begin();
746 auto InitsRef = C->inits().begin();
747 for (const Expr *IInit : C->private_copies()) {
748 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
749 bool ThisFirstprivateIsLastprivate =
750 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
751 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
752 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
753 !FD->getType()->isReferenceType()) {
754 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
755 ++IRef;
756 ++InitsRef;
757 continue;
758 }
759 FirstprivateIsLastprivate =
760 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
761 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
762 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
763 const auto *VDInit =
764 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
765 bool IsRegistered;
766 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
767 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
768 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
769 LValue OriginalLVal = EmitLValue(&DRE);
770 QualType Type = VD->getType();
771 if (Type->isArrayType()) {
772 // Emit VarDecl with copy init for arrays.
773 // Get the address of the original variable captured in current
774 // captured region.
775 IsRegistered = PrivateScope.addPrivate(
776 OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
777 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
778 const Expr *Init = VD->getInit();
779 if (!isa<CXXConstructExpr>(Init) ||
780 isTrivialInitializer(Init)) {
781 // Perform simple memcpy.
782 LValue Dest =
783 MakeAddrLValue(Emission.getAllocatedAddress(), Type);
784 EmitAggregateAssign(Dest, OriginalLVal, Type);
785 } else {
786 EmitOMPAggregateAssign(
787 Emission.getAllocatedAddress(), OriginalLVal.getAddress(),
788 Type,
789 [this, VDInit, Init](Address DestElement,
790 Address SrcElement) {
791 // Clean up any temporaries needed by the
792 // initialization.
793 RunCleanupsScope InitScope(*this);
794 // Emit initialization for single element.
795 setAddrOfLocalVar(VDInit, SrcElement);
796 EmitAnyExprToMem(Init, DestElement,
797 Init->getType().getQualifiers(),
798 /*IsInitializer*/ false);
799 LocalDeclMap.erase(VDInit);
800 });
801 }
802 EmitAutoVarCleanups(Emission);
803 return Emission.getAllocatedAddress();
804 });
805 } else {
806 Address OriginalAddr = OriginalLVal.getAddress();
807 IsRegistered = PrivateScope.addPrivate(
808 OrigVD, [this, VDInit, OriginalAddr, VD]() {
809 // Emit private VarDecl with copy init.
810 // Remap temp VDInit variable to the address of the original
811 // variable (for proper handling of captured global variables).
812 setAddrOfLocalVar(VDInit, OriginalAddr);
813 EmitDecl(*VD);
814 LocalDeclMap.erase(VDInit);
815 return GetAddrOfLocalVar(VD);
816 });
817 }
818 assert(IsRegistered &&
819 "firstprivate var already registered as private");
820 // Silence the warning about unused variable.
821 (void)IsRegistered;
822 }
823 ++IRef;
824 ++InitsRef;
825 }
826 }
827 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
828 }
829
EmitOMPPrivateClause(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)830 void CodeGenFunction::EmitOMPPrivateClause(
831 const OMPExecutableDirective &D,
832 CodeGenFunction::OMPPrivateScope &PrivateScope) {
833 if (!HaveInsertPoint())
834 return;
835 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
836 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
837 auto IRef = C->varlist_begin();
838 for (const Expr *IInit : C->private_copies()) {
839 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
840 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
841 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
842 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
843 // Emit private VarDecl with copy init.
844 EmitDecl(*VD);
845 return GetAddrOfLocalVar(VD);
846 });
847 assert(IsRegistered && "private var already registered as private");
848 // Silence the warning about unused variable.
849 (void)IsRegistered;
850 }
851 ++IRef;
852 }
853 }
854 }
855
EmitOMPCopyinClause(const OMPExecutableDirective & D)856 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
857 if (!HaveInsertPoint())
858 return false;
859 // threadprivate_var1 = master_threadprivate_var1;
860 // operator=(threadprivate_var2, master_threadprivate_var2);
861 // ...
862 // __kmpc_barrier(&loc, global_tid);
863 llvm::DenseSet<const VarDecl *> CopiedVars;
864 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
865 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
866 auto IRef = C->varlist_begin();
867 auto ISrcRef = C->source_exprs().begin();
868 auto IDestRef = C->destination_exprs().begin();
869 for (const Expr *AssignOp : C->assignment_ops()) {
870 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
871 QualType Type = VD->getType();
872 if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
873 // Get the address of the master variable. If we are emitting code with
874 // TLS support, the address is passed from the master as field in the
875 // captured declaration.
876 Address MasterAddr = Address::invalid();
877 if (getLangOpts().OpenMPUseTLS &&
878 getContext().getTargetInfo().isTLSSupported()) {
879 assert(CapturedStmtInfo->lookup(VD) &&
880 "Copyin threadprivates should have been captured!");
881 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
882 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
883 MasterAddr = EmitLValue(&DRE).getAddress();
884 LocalDeclMap.erase(VD);
885 } else {
886 MasterAddr =
887 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
888 : CGM.GetAddrOfGlobal(VD),
889 getContext().getDeclAlign(VD));
890 }
891 // Get the address of the threadprivate variable.
892 Address PrivateAddr = EmitLValue(*IRef).getAddress();
893 if (CopiedVars.size() == 1) {
894 // At first check if current thread is a master thread. If it is, no
895 // need to copy data.
896 CopyBegin = createBasicBlock("copyin.not.master");
897 CopyEnd = createBasicBlock("copyin.not.master.end");
898 Builder.CreateCondBr(
899 Builder.CreateICmpNE(
900 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
901 Builder.CreatePtrToInt(PrivateAddr.getPointer(),
902 CGM.IntPtrTy)),
903 CopyBegin, CopyEnd);
904 EmitBlock(CopyBegin);
905 }
906 const auto *SrcVD =
907 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
908 const auto *DestVD =
909 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
910 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
911 }
912 ++IRef;
913 ++ISrcRef;
914 ++IDestRef;
915 }
916 }
917 if (CopyEnd) {
918 // Exit out of copying procedure for non-master thread.
919 EmitBlock(CopyEnd, /*IsFinished=*/true);
920 return true;
921 }
922 return false;
923 }
924
EmitOMPLastprivateClauseInit(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)925 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
926 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
927 if (!HaveInsertPoint())
928 return false;
929 bool HasAtLeastOneLastprivate = false;
930 llvm::DenseSet<const VarDecl *> SIMDLCVs;
931 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
932 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
933 for (const Expr *C : LoopDirective->counters()) {
934 SIMDLCVs.insert(
935 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
936 }
937 }
938 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
939 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
940 HasAtLeastOneLastprivate = true;
941 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
942 !getLangOpts().OpenMPSimd)
943 break;
944 auto IRef = C->varlist_begin();
945 auto IDestRef = C->destination_exprs().begin();
946 for (const Expr *IInit : C->private_copies()) {
947 // Keep the address of the original variable for future update at the end
948 // of the loop.
949 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
950 // Taskloops do not require additional initialization, it is done in
951 // runtime support library.
952 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
953 const auto *DestVD =
954 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
955 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
956 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
957 /*RefersToEnclosingVariableOrCapture=*/
958 CapturedStmtInfo->lookup(OrigVD) != nullptr,
959 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
960 return EmitLValue(&DRE).getAddress();
961 });
962 // Check if the variable is also a firstprivate: in this case IInit is
963 // not generated. Initialization of this variable will happen in codegen
964 // for 'firstprivate' clause.
965 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
966 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
967 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
968 // Emit private VarDecl with copy init.
969 EmitDecl(*VD);
970 return GetAddrOfLocalVar(VD);
971 });
972 assert(IsRegistered &&
973 "lastprivate var already registered as private");
974 (void)IsRegistered;
975 }
976 }
977 ++IRef;
978 ++IDestRef;
979 }
980 }
981 return HasAtLeastOneLastprivate;
982 }
983
EmitOMPLastprivateClauseFinal(const OMPExecutableDirective & D,bool NoFinals,llvm::Value * IsLastIterCond)984 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
985 const OMPExecutableDirective &D, bool NoFinals,
986 llvm::Value *IsLastIterCond) {
987 if (!HaveInsertPoint())
988 return;
989 // Emit following code:
990 // if (<IsLastIterCond>) {
991 // orig_var1 = private_orig_var1;
992 // ...
993 // orig_varn = private_orig_varn;
994 // }
995 llvm::BasicBlock *ThenBB = nullptr;
996 llvm::BasicBlock *DoneBB = nullptr;
997 if (IsLastIterCond) {
998 ThenBB = createBasicBlock(".omp.lastprivate.then");
999 DoneBB = createBasicBlock(".omp.lastprivate.done");
1000 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1001 EmitBlock(ThenBB);
1002 }
1003 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1004 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1005 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1006 auto IC = LoopDirective->counters().begin();
1007 for (const Expr *F : LoopDirective->finals()) {
1008 const auto *D =
1009 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1010 if (NoFinals)
1011 AlreadyEmittedVars.insert(D);
1012 else
1013 LoopCountersAndUpdates[D] = F;
1014 ++IC;
1015 }
1016 }
1017 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1018 auto IRef = C->varlist_begin();
1019 auto ISrcRef = C->source_exprs().begin();
1020 auto IDestRef = C->destination_exprs().begin();
1021 for (const Expr *AssignOp : C->assignment_ops()) {
1022 const auto *PrivateVD =
1023 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1024 QualType Type = PrivateVD->getType();
1025 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1026 if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1027 // If lastprivate variable is a loop control variable for loop-based
1028 // directive, update its value before copyin back to original
1029 // variable.
1030 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1031 EmitIgnoredExpr(FinalExpr);
1032 const auto *SrcVD =
1033 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1034 const auto *DestVD =
1035 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1036 // Get the address of the original variable.
1037 Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1038 // Get the address of the private variable.
1039 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1040 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1041 PrivateAddr =
1042 Address(Builder.CreateLoad(PrivateAddr),
1043 getNaturalTypeAlignment(RefTy->getPointeeType()));
1044 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1045 }
1046 ++IRef;
1047 ++ISrcRef;
1048 ++IDestRef;
1049 }
1050 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1051 EmitIgnoredExpr(PostUpdate);
1052 }
1053 if (IsLastIterCond)
1054 EmitBlock(DoneBB, /*IsFinished=*/true);
1055 }
1056
EmitOMPReductionClauseInit(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)1057 void CodeGenFunction::EmitOMPReductionClauseInit(
1058 const OMPExecutableDirective &D,
1059 CodeGenFunction::OMPPrivateScope &PrivateScope) {
1060 if (!HaveInsertPoint())
1061 return;
1062 SmallVector<const Expr *, 4> Shareds;
1063 SmallVector<const Expr *, 4> Privates;
1064 SmallVector<const Expr *, 4> ReductionOps;
1065 SmallVector<const Expr *, 4> LHSs;
1066 SmallVector<const Expr *, 4> RHSs;
1067 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1068 auto IPriv = C->privates().begin();
1069 auto IRed = C->reduction_ops().begin();
1070 auto ILHS = C->lhs_exprs().begin();
1071 auto IRHS = C->rhs_exprs().begin();
1072 for (const Expr *Ref : C->varlists()) {
1073 Shareds.emplace_back(Ref);
1074 Privates.emplace_back(*IPriv);
1075 ReductionOps.emplace_back(*IRed);
1076 LHSs.emplace_back(*ILHS);
1077 RHSs.emplace_back(*IRHS);
1078 std::advance(IPriv, 1);
1079 std::advance(IRed, 1);
1080 std::advance(ILHS, 1);
1081 std::advance(IRHS, 1);
1082 }
1083 }
1084 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
1085 unsigned Count = 0;
1086 auto ILHS = LHSs.begin();
1087 auto IRHS = RHSs.begin();
1088 auto IPriv = Privates.begin();
1089 for (const Expr *IRef : Shareds) {
1090 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1091 // Emit private VarDecl with reduction init.
1092 RedCG.emitSharedLValue(*this, Count);
1093 RedCG.emitAggregateType(*this, Count);
1094 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1095 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1096 RedCG.getSharedLValue(Count),
1097 [&Emission](CodeGenFunction &CGF) {
1098 CGF.EmitAutoVarInit(Emission);
1099 return true;
1100 });
1101 EmitAutoVarCleanups(Emission);
1102 Address BaseAddr = RedCG.adjustPrivateAddress(
1103 *this, Count, Emission.getAllocatedAddress());
1104 bool IsRegistered = PrivateScope.addPrivate(
1105 RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
1106 assert(IsRegistered && "private var already registered as private");
1107 // Silence the warning about unused variable.
1108 (void)IsRegistered;
1109
1110 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1111 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1112 QualType Type = PrivateVD->getType();
1113 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1114 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1115 // Store the address of the original variable associated with the LHS
1116 // implicit variable.
1117 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() {
1118 return RedCG.getSharedLValue(Count).getAddress();
1119 });
1120 PrivateScope.addPrivate(
1121 RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
1122 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1123 isa<ArraySubscriptExpr>(IRef)) {
1124 // Store the address of the original variable associated with the LHS
1125 // implicit variable.
1126 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() {
1127 return RedCG.getSharedLValue(Count).getAddress();
1128 });
1129 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
1130 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1131 ConvertTypeForMem(RHSVD->getType()),
1132 "rhs.begin");
1133 });
1134 } else {
1135 QualType Type = PrivateVD->getType();
1136 bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1137 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1138 // Store the address of the original variable associated with the LHS
1139 // implicit variable.
1140 if (IsArray) {
1141 OriginalAddr = Builder.CreateElementBitCast(
1142 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1143 }
1144 PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
1145 PrivateScope.addPrivate(
1146 RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
1147 return IsArray
1148 ? Builder.CreateElementBitCast(
1149 GetAddrOfLocalVar(PrivateVD),
1150 ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1151 : GetAddrOfLocalVar(PrivateVD);
1152 });
1153 }
1154 ++ILHS;
1155 ++IRHS;
1156 ++IPriv;
1157 ++Count;
1158 }
1159 }
1160
EmitOMPReductionClauseFinal(const OMPExecutableDirective & D,const OpenMPDirectiveKind ReductionKind)1161 void CodeGenFunction::EmitOMPReductionClauseFinal(
1162 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1163 if (!HaveInsertPoint())
1164 return;
1165 llvm::SmallVector<const Expr *, 8> Privates;
1166 llvm::SmallVector<const Expr *, 8> LHSExprs;
1167 llvm::SmallVector<const Expr *, 8> RHSExprs;
1168 llvm::SmallVector<const Expr *, 8> ReductionOps;
1169 bool HasAtLeastOneReduction = false;
1170 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1171 HasAtLeastOneReduction = true;
1172 Privates.append(C->privates().begin(), C->privates().end());
1173 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1174 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1175 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1176 }
1177 if (HasAtLeastOneReduction) {
1178 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1179 isOpenMPParallelDirective(D.getDirectiveKind()) ||
1180 ReductionKind == OMPD_simd;
1181 bool SimpleReduction = ReductionKind == OMPD_simd;
1182 // Emit nowait reduction if nowait clause is present or directive is a
1183 // parallel directive (it always has implicit barrier).
1184 CGM.getOpenMPRuntime().emitReduction(
1185 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1186 {WithNowait, SimpleReduction, ReductionKind});
1187 }
1188 }
1189
emitPostUpdateForReductionClause(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)1190 static void emitPostUpdateForReductionClause(
1191 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1192 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1193 if (!CGF.HaveInsertPoint())
1194 return;
1195 llvm::BasicBlock *DoneBB = nullptr;
1196 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1197 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1198 if (!DoneBB) {
1199 if (llvm::Value *Cond = CondGen(CGF)) {
1200 // If the first post-update expression is found, emit conditional
1201 // block if it was requested.
1202 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1203 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1204 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1205 CGF.EmitBlock(ThenBB);
1206 }
1207 }
1208 CGF.EmitIgnoredExpr(PostUpdate);
1209 }
1210 }
1211 if (DoneBB)
1212 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1213 }
1214
1215 namespace {
1216 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1217 /// parallel function. This is necessary for combined constructs such as
1218 /// 'distribute parallel for'
1219 typedef llvm::function_ref<void(CodeGenFunction &,
1220 const OMPExecutableDirective &,
1221 llvm::SmallVectorImpl<llvm::Value *> &)>
1222 CodeGenBoundParametersTy;
1223 } // anonymous namespace
1224
emitCommonOMPParallelDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,const CodeGenBoundParametersTy & CodeGenBoundParameters)1225 static void emitCommonOMPParallelDirective(
1226 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1227 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1228 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1229 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1230 llvm::Value *OutlinedFn =
1231 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1232 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1233 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1234 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1235 llvm::Value *NumThreads =
1236 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1237 /*IgnoreResultAssign=*/true);
1238 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1239 CGF, NumThreads, NumThreadsClause->getBeginLoc());
1240 }
1241 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1242 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1243 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1244 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1245 }
1246 const Expr *IfCond = nullptr;
1247 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1248 if (C->getNameModifier() == OMPD_unknown ||
1249 C->getNameModifier() == OMPD_parallel) {
1250 IfCond = C->getCondition();
1251 break;
1252 }
1253 }
1254
1255 OMPParallelScope Scope(CGF, S);
1256 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1257 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1258 // lower and upper bounds with the pragma 'for' chunking mechanism.
1259 // The following lambda takes care of appending the lower and upper bound
1260 // parameters when necessary
1261 CodeGenBoundParameters(CGF, S, CapturedVars);
1262 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1263 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1264 CapturedVars, IfCond);
1265 }
1266
emitEmptyBoundParameters(CodeGenFunction &,const OMPExecutableDirective &,llvm::SmallVectorImpl<llvm::Value * > &)1267 static void emitEmptyBoundParameters(CodeGenFunction &,
1268 const OMPExecutableDirective &,
1269 llvm::SmallVectorImpl<llvm::Value *> &) {}
1270
EmitOMPParallelDirective(const OMPParallelDirective & S)1271 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1272 // Emit parallel region as a standalone region.
1273 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1274 Action.Enter(CGF);
1275 OMPPrivateScope PrivateScope(CGF);
1276 bool Copyins = CGF.EmitOMPCopyinClause(S);
1277 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1278 if (Copyins) {
1279 // Emit implicit barrier to synchronize threads and avoid data races on
1280 // propagation master's thread values of threadprivate variables to local
1281 // instances of that variables of all other implicit threads.
1282 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1283 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1284 /*ForceSimpleCall=*/true);
1285 }
1286 CGF.EmitOMPPrivateClause(S, PrivateScope);
1287 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1288 (void)PrivateScope.Privatize();
1289 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1290 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1291 };
1292 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1293 emitEmptyBoundParameters);
1294 emitPostUpdateForReductionClause(*this, S,
1295 [](CodeGenFunction &) { return nullptr; });
1296 }
1297
EmitOMPLoopBody(const OMPLoopDirective & D,JumpDest LoopExit)1298 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1299 JumpDest LoopExit) {
1300 RunCleanupsScope BodyScope(*this);
1301 // Update counters values on current iteration.
1302 for (const Expr *UE : D.updates())
1303 EmitIgnoredExpr(UE);
1304 // Update the linear variables.
1305 // In distribute directives only loop counters may be marked as linear, no
1306 // need to generate the code for them.
1307 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1308 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1309 for (const Expr *UE : C->updates())
1310 EmitIgnoredExpr(UE);
1311 }
1312 }
1313
1314 // On a continue in the body, jump to the end.
1315 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1316 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1317 // Emit loop body.
1318 EmitStmt(D.getBody());
1319 // The end (updates/cleanups).
1320 EmitBlock(Continue.getBlock());
1321 BreakContinueStack.pop_back();
1322 }
1323
EmitOMPInnerLoop(const Stmt & S,bool RequiresCleanup,const Expr * LoopCond,const Expr * IncExpr,const llvm::function_ref<void (CodeGenFunction &)> BodyGen,const llvm::function_ref<void (CodeGenFunction &)> PostIncGen)1324 void CodeGenFunction::EmitOMPInnerLoop(
1325 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1326 const Expr *IncExpr,
1327 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
1328 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
1329 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1330
1331 // Start the loop with a block that tests the condition.
1332 auto CondBlock = createBasicBlock("omp.inner.for.cond");
1333 EmitBlock(CondBlock);
1334 const SourceRange R = S.getSourceRange();
1335 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1336 SourceLocToDebugLoc(R.getEnd()));
1337
1338 // If there are any cleanups between here and the loop-exit scope,
1339 // create a block to stage a loop exit along.
1340 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
1341 if (RequiresCleanup)
1342 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1343
1344 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
1345
1346 // Emit condition.
1347 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1348 if (ExitBlock != LoopExit.getBlock()) {
1349 EmitBlock(ExitBlock);
1350 EmitBranchThroughCleanup(LoopExit);
1351 }
1352
1353 EmitBlock(LoopBody);
1354 incrementProfileCounter(&S);
1355
1356 // Create a block for the increment.
1357 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1358 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1359
1360 BodyGen(*this);
1361
1362 // Emit "IV = IV + 1" and a back-edge to the condition block.
1363 EmitBlock(Continue.getBlock());
1364 EmitIgnoredExpr(IncExpr);
1365 PostIncGen(*this);
1366 BreakContinueStack.pop_back();
1367 EmitBranch(CondBlock);
1368 LoopStack.pop();
1369 // Emit the fall-through block.
1370 EmitBlock(LoopExit.getBlock());
1371 }
1372
EmitOMPLinearClauseInit(const OMPLoopDirective & D)1373 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1374 if (!HaveInsertPoint())
1375 return false;
1376 // Emit inits for the linear variables.
1377 bool HasLinears = false;
1378 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1379 for (const Expr *Init : C->inits()) {
1380 HasLinears = true;
1381 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1382 if (const auto *Ref =
1383 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1384 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1385 const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1386 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1387 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1388 VD->getInit()->getType(), VK_LValue,
1389 VD->getInit()->getExprLoc());
1390 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1391 VD->getType()),
1392 /*capturedByInit=*/false);
1393 EmitAutoVarCleanups(Emission);
1394 } else {
1395 EmitVarDecl(*VD);
1396 }
1397 }
1398 // Emit the linear steps for the linear clauses.
1399 // If a step is not constant, it is pre-calculated before the loop.
1400 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1401 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1402 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1403 // Emit calculation of the linear step.
1404 EmitIgnoredExpr(CS);
1405 }
1406 }
1407 return HasLinears;
1408 }
1409
EmitOMPLinearClauseFinal(const OMPLoopDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)1410 void CodeGenFunction::EmitOMPLinearClauseFinal(
1411 const OMPLoopDirective &D,
1412 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1413 if (!HaveInsertPoint())
1414 return;
1415 llvm::BasicBlock *DoneBB = nullptr;
1416 // Emit the final values of the linear variables.
1417 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1418 auto IC = C->varlist_begin();
1419 for (const Expr *F : C->finals()) {
1420 if (!DoneBB) {
1421 if (llvm::Value *Cond = CondGen(*this)) {
1422 // If the first post-update expression is found, emit conditional
1423 // block if it was requested.
1424 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
1425 DoneBB = createBasicBlock(".omp.linear.pu.done");
1426 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1427 EmitBlock(ThenBB);
1428 }
1429 }
1430 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1431 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1432 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1433 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1434 Address OrigAddr = EmitLValue(&DRE).getAddress();
1435 CodeGenFunction::OMPPrivateScope VarScope(*this);
1436 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
1437 (void)VarScope.Privatize();
1438 EmitIgnoredExpr(F);
1439 ++IC;
1440 }
1441 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1442 EmitIgnoredExpr(PostUpdate);
1443 }
1444 if (DoneBB)
1445 EmitBlock(DoneBB, /*IsFinished=*/true);
1446 }
1447
emitAlignedClause(CodeGenFunction & CGF,const OMPExecutableDirective & D)1448 static void emitAlignedClause(CodeGenFunction &CGF,
1449 const OMPExecutableDirective &D) {
1450 if (!CGF.HaveInsertPoint())
1451 return;
1452 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1453 unsigned ClauseAlignment = 0;
1454 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
1455 auto *AlignmentCI =
1456 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1457 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1458 }
1459 for (const Expr *E : Clause->varlists()) {
1460 unsigned Alignment = ClauseAlignment;
1461 if (Alignment == 0) {
1462 // OpenMP [2.8.1, Description]
1463 // If no optional parameter is specified, implementation-defined default
1464 // alignments for SIMD instructions on the target platforms are assumed.
1465 Alignment =
1466 CGF.getContext()
1467 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1468 E->getType()->getPointeeType()))
1469 .getQuantity();
1470 }
1471 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1472 "alignment is not power of 2");
1473 if (Alignment != 0) {
1474 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1475 CGF.EmitAlignmentAssumption(
1476 PtrValue, E, /*No second loc needed*/ SourceLocation(), Alignment);
1477 }
1478 }
1479 }
1480 }
1481
EmitOMPPrivateLoopCounters(const OMPLoopDirective & S,CodeGenFunction::OMPPrivateScope & LoopScope)1482 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1483 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1484 if (!HaveInsertPoint())
1485 return;
1486 auto I = S.private_counters().begin();
1487 for (const Expr *E : S.counters()) {
1488 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1489 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1490 // Emit var without initialization.
1491 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
1492 EmitAutoVarCleanups(VarEmission);
1493 LocalDeclMap.erase(PrivateVD);
1494 (void)LoopScope.addPrivate(VD, [&VarEmission]() {
1495 return VarEmission.getAllocatedAddress();
1496 });
1497 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1498 VD->hasGlobalStorage()) {
1499 (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
1500 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
1501 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1502 E->getType(), VK_LValue, E->getExprLoc());
1503 return EmitLValue(&DRE).getAddress();
1504 });
1505 } else {
1506 (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
1507 return VarEmission.getAllocatedAddress();
1508 });
1509 }
1510 ++I;
1511 }
1512 // Privatize extra loop counters used in loops for ordered(n) clauses.
1513 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
1514 if (!C->getNumForLoops())
1515 continue;
1516 for (unsigned I = S.getCollapsedNumber(),
1517 E = C->getLoopNumIterations().size();
1518 I < E; ++I) {
1519 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
1520 const auto *VD = cast<VarDecl>(DRE->getDecl());
1521 // Override only those variables that can be captured to avoid re-emission
1522 // of the variables declared within the loops.
1523 if (DRE->refersToEnclosingVariableOrCapture()) {
1524 (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
1525 return CreateMemTemp(DRE->getType(), VD->getName());
1526 });
1527 }
1528 }
1529 }
1530 }
1531
emitPreCond(CodeGenFunction & CGF,const OMPLoopDirective & S,const Expr * Cond,llvm::BasicBlock * TrueBlock,llvm::BasicBlock * FalseBlock,uint64_t TrueCount)1532 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1533 const Expr *Cond, llvm::BasicBlock *TrueBlock,
1534 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1535 if (!CGF.HaveInsertPoint())
1536 return;
1537 {
1538 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1539 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1540 (void)PreCondScope.Privatize();
1541 // Get initial values of real counters.
1542 for (const Expr *I : S.inits()) {
1543 CGF.EmitIgnoredExpr(I);
1544 }
1545 }
1546 // Check that loop is executed at least one time.
1547 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1548 }
1549
EmitOMPLinearClause(const OMPLoopDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)1550 void CodeGenFunction::EmitOMPLinearClause(
1551 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1552 if (!HaveInsertPoint())
1553 return;
1554 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1555 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1556 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1557 for (const Expr *C : LoopDirective->counters()) {
1558 SIMDLCVs.insert(
1559 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1560 }
1561 }
1562 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1563 auto CurPrivate = C->privates().begin();
1564 for (const Expr *E : C->varlists()) {
1565 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1566 const auto *PrivateVD =
1567 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1568 if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1569 bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
1570 // Emit private VarDecl with copy init.
1571 EmitVarDecl(*PrivateVD);
1572 return GetAddrOfLocalVar(PrivateVD);
1573 });
1574 assert(IsRegistered && "linear var already registered as private");
1575 // Silence the warning about unused variable.
1576 (void)IsRegistered;
1577 } else {
1578 EmitVarDecl(*PrivateVD);
1579 }
1580 ++CurPrivate;
1581 }
1582 }
1583 }
1584
emitSimdlenSafelenClause(CodeGenFunction & CGF,const OMPExecutableDirective & D,bool IsMonotonic)1585 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1586 const OMPExecutableDirective &D,
1587 bool IsMonotonic) {
1588 if (!CGF.HaveInsertPoint())
1589 return;
1590 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1591 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1592 /*ignoreResult=*/true);
1593 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1594 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1595 // In presence of finite 'safelen', it may be unsafe to mark all
1596 // the memory instructions parallel, because loop-carried
1597 // dependences of 'safelen' iterations are possible.
1598 if (!IsMonotonic)
1599 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1600 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1601 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1602 /*ignoreResult=*/true);
1603 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1604 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1605 // In presence of finite 'safelen', it may be unsafe to mark all
1606 // the memory instructions parallel, because loop-carried
1607 // dependences of 'safelen' iterations are possible.
1608 CGF.LoopStack.setParallel(/*Enable=*/false);
1609 }
1610 }
1611
EmitOMPSimdInit(const OMPLoopDirective & D,bool IsMonotonic)1612 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1613 bool IsMonotonic) {
1614 // Walk clauses and process safelen/lastprivate.
1615 LoopStack.setParallel(!IsMonotonic);
1616 LoopStack.setVectorizeEnable();
1617 emitSimdlenSafelenClause(*this, D, IsMonotonic);
1618 }
1619
EmitOMPSimdFinal(const OMPLoopDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)1620 void CodeGenFunction::EmitOMPSimdFinal(
1621 const OMPLoopDirective &D,
1622 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1623 if (!HaveInsertPoint())
1624 return;
1625 llvm::BasicBlock *DoneBB = nullptr;
1626 auto IC = D.counters().begin();
1627 auto IPC = D.private_counters().begin();
1628 for (const Expr *F : D.finals()) {
1629 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1630 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1631 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1632 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1633 OrigVD->hasGlobalStorage() || CED) {
1634 if (!DoneBB) {
1635 if (llvm::Value *Cond = CondGen(*this)) {
1636 // If the first post-update expression is found, emit conditional
1637 // block if it was requested.
1638 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
1639 DoneBB = createBasicBlock(".omp.final.done");
1640 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1641 EmitBlock(ThenBB);
1642 }
1643 }
1644 Address OrigAddr = Address::invalid();
1645 if (CED) {
1646 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1647 } else {
1648 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
1649 /*RefersToEnclosingVariableOrCapture=*/false,
1650 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1651 OrigAddr = EmitLValue(&DRE).getAddress();
1652 }
1653 OMPPrivateScope VarScope(*this);
1654 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
1655 (void)VarScope.Privatize();
1656 EmitIgnoredExpr(F);
1657 }
1658 ++IC;
1659 ++IPC;
1660 }
1661 if (DoneBB)
1662 EmitBlock(DoneBB, /*IsFinished=*/true);
1663 }
1664
emitOMPLoopBodyWithStopPoint(CodeGenFunction & CGF,const OMPLoopDirective & S,CodeGenFunction::JumpDest LoopExit)1665 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1666 const OMPLoopDirective &S,
1667 CodeGenFunction::JumpDest LoopExit) {
1668 CGF.EmitOMPLoopBody(S, LoopExit);
1669 CGF.EmitStopPoint(&S);
1670 }
1671
1672 /// Emit a helper variable and return corresponding lvalue.
EmitOMPHelperVar(CodeGenFunction & CGF,const DeclRefExpr * Helper)1673 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1674 const DeclRefExpr *Helper) {
1675 auto VDecl = cast<VarDecl>(Helper->getDecl());
1676 CGF.EmitVarDecl(*VDecl);
1677 return CGF.EmitLValue(Helper);
1678 }
1679
emitOMPSimdRegion(CodeGenFunction & CGF,const OMPLoopDirective & S,PrePostActionTy & Action)1680 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
1681 PrePostActionTy &Action) {
1682 Action.Enter(CGF);
1683 assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
1684 "Expected simd directive");
1685 OMPLoopScope PreInitScope(CGF, S);
1686 // if (PreCond) {
1687 // for (IV in 0..LastIteration) BODY;
1688 // <Final counter/linear vars updates>;
1689 // }
1690 //
1691 if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
1692 isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
1693 isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
1694 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
1695 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
1696 }
1697
1698 // Emit: if (PreCond) - begin.
1699 // If the condition constant folds and can be elided, avoid emitting the
1700 // whole loop.
1701 bool CondConstant;
1702 llvm::BasicBlock *ContBlock = nullptr;
1703 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1704 if (!CondConstant)
1705 return;
1706 } else {
1707 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
1708 ContBlock = CGF.createBasicBlock("simd.if.end");
1709 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1710 CGF.getProfileCount(&S));
1711 CGF.EmitBlock(ThenBlock);
1712 CGF.incrementProfileCounter(&S);
1713 }
1714
1715 // Emit the loop iteration variable.
1716 const Expr *IVExpr = S.getIterationVariable();
1717 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1718 CGF.EmitVarDecl(*IVDecl);
1719 CGF.EmitIgnoredExpr(S.getInit());
1720
1721 // Emit the iterations count variable.
1722 // If it is not a variable, Sema decided to calculate iterations count on
1723 // each iteration (e.g., it is foldable into a constant).
1724 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1725 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1726 // Emit calculation of the iterations count.
1727 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1728 }
1729
1730 CGF.EmitOMPSimdInit(S);
1731
1732 emitAlignedClause(CGF, S);
1733 (void)CGF.EmitOMPLinearClauseInit(S);
1734 {
1735 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
1736 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1737 CGF.EmitOMPLinearClause(S, LoopScope);
1738 CGF.EmitOMPPrivateClause(S, LoopScope);
1739 CGF.EmitOMPReductionClauseInit(S, LoopScope);
1740 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1741 (void)LoopScope.Privatize();
1742 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
1743 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
1744 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1745 S.getInc(),
1746 [&S](CodeGenFunction &CGF) {
1747 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
1748 CGF.EmitStopPoint(&S);
1749 },
1750 [](CodeGenFunction &) {});
1751 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
1752 // Emit final copy of the lastprivate variables at the end of loops.
1753 if (HasLastprivateClause)
1754 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1755 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1756 emitPostUpdateForReductionClause(CGF, S,
1757 [](CodeGenFunction &) { return nullptr; });
1758 }
1759 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
1760 // Emit: if (PreCond) - end.
1761 if (ContBlock) {
1762 CGF.EmitBranch(ContBlock);
1763 CGF.EmitBlock(ContBlock, true);
1764 }
1765 }
1766
EmitOMPSimdDirective(const OMPSimdDirective & S)1767 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1768 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1769 emitOMPSimdRegion(CGF, S, Action);
1770 };
1771 OMPLexicalScope Scope(*this, S, OMPD_unknown);
1772 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1773 }
1774
EmitOMPOuterLoop(bool DynamicOrOrdered,bool IsMonotonic,const OMPLoopDirective & S,CodeGenFunction::OMPPrivateScope & LoopScope,const CodeGenFunction::OMPLoopArguments & LoopArgs,const CodeGenFunction::CodeGenLoopTy & CodeGenLoop,const CodeGenFunction::CodeGenOrderedTy & CodeGenOrdered)1775 void CodeGenFunction::EmitOMPOuterLoop(
1776 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1777 CodeGenFunction::OMPPrivateScope &LoopScope,
1778 const CodeGenFunction::OMPLoopArguments &LoopArgs,
1779 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1780 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1781 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
1782
1783 const Expr *IVExpr = S.getIterationVariable();
1784 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1785 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1786
1787 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1788
1789 // Start the loop with a block that tests the condition.
1790 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
1791 EmitBlock(CondBlock);
1792 const SourceRange R = S.getSourceRange();
1793 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1794 SourceLocToDebugLoc(R.getEnd()));
1795
1796 llvm::Value *BoolCondVal = nullptr;
1797 if (!DynamicOrOrdered) {
1798 // UB = min(UB, GlobalUB) or
1799 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1800 // 'distribute parallel for')
1801 EmitIgnoredExpr(LoopArgs.EUB);
1802 // IV = LB
1803 EmitIgnoredExpr(LoopArgs.Init);
1804 // IV < UB
1805 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1806 } else {
1807 BoolCondVal =
1808 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
1809 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1810 }
1811
1812 // If there are any cleanups between here and the loop-exit scope,
1813 // create a block to stage a loop exit along.
1814 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
1815 if (LoopScope.requiresCleanups())
1816 ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1817
1818 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
1819 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1820 if (ExitBlock != LoopExit.getBlock()) {
1821 EmitBlock(ExitBlock);
1822 EmitBranchThroughCleanup(LoopExit);
1823 }
1824 EmitBlock(LoopBody);
1825
1826 // Emit "IV = LB" (in case of static schedule, we have already calculated new
1827 // LB for loop condition and emitted it above).
1828 if (DynamicOrOrdered)
1829 EmitIgnoredExpr(LoopArgs.Init);
1830
1831 // Create a block for the increment.
1832 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1833 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1834
1835 // Generate !llvm.loop.parallel metadata for loads and stores for loops
1836 // with dynamic/guided scheduling and without ordered clause.
1837 if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1838 LoopStack.setParallel(!IsMonotonic);
1839 else
1840 EmitOMPSimdInit(S, IsMonotonic);
1841
1842 SourceLocation Loc = S.getBeginLoc();
1843
1844 // when 'distribute' is not combined with a 'for':
1845 // while (idx <= UB) { BODY; ++idx; }
1846 // when 'distribute' is combined with a 'for'
1847 // (e.g. 'distribute parallel for')
1848 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1849 EmitOMPInnerLoop(
1850 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1851 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1852 CodeGenLoop(CGF, S, LoopExit);
1853 },
1854 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1855 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1856 });
1857
1858 EmitBlock(Continue.getBlock());
1859 BreakContinueStack.pop_back();
1860 if (!DynamicOrOrdered) {
1861 // Emit "LB = LB + Stride", "UB = UB + Stride".
1862 EmitIgnoredExpr(LoopArgs.NextLB);
1863 EmitIgnoredExpr(LoopArgs.NextUB);
1864 }
1865
1866 EmitBranch(CondBlock);
1867 LoopStack.pop();
1868 // Emit the fall-through block.
1869 EmitBlock(LoopExit.getBlock());
1870
1871 // Tell the runtime we are done.
1872 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1873 if (!DynamicOrOrdered)
1874 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
1875 S.getDirectiveKind());
1876 };
1877 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1878 }
1879
EmitOMPForOuterLoop(const OpenMPScheduleTy & ScheduleKind,bool IsMonotonic,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,bool Ordered,const OMPLoopArguments & LoopArgs,const CodeGenDispatchBoundsTy & CGDispatchBounds)1880 void CodeGenFunction::EmitOMPForOuterLoop(
1881 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1882 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1883 const OMPLoopArguments &LoopArgs,
1884 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1885 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
1886
1887 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1888 const bool DynamicOrOrdered =
1889 Ordered || RT.isDynamic(ScheduleKind.Schedule);
1890
1891 assert((Ordered ||
1892 !RT.isStaticNonchunked(ScheduleKind.Schedule,
1893 LoopArgs.Chunk != nullptr)) &&
1894 "static non-chunked schedule does not need outer loop");
1895
1896 // Emit outer loop.
1897 //
1898 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1899 // When schedule(dynamic,chunk_size) is specified, the iterations are
1900 // distributed to threads in the team in chunks as the threads request them.
1901 // Each thread executes a chunk of iterations, then requests another chunk,
1902 // until no chunks remain to be distributed. Each chunk contains chunk_size
1903 // iterations, except for the last chunk to be distributed, which may have
1904 // fewer iterations. When no chunk_size is specified, it defaults to 1.
1905 //
1906 // When schedule(guided,chunk_size) is specified, the iterations are assigned
1907 // to threads in the team in chunks as the executing threads request them.
1908 // Each thread executes a chunk of iterations, then requests another chunk,
1909 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1910 // each chunk is proportional to the number of unassigned iterations divided
1911 // by the number of threads in the team, decreasing to 1. For a chunk_size
1912 // with value k (greater than 1), the size of each chunk is determined in the
1913 // same way, with the restriction that the chunks do not contain fewer than k
1914 // iterations (except for the last chunk to be assigned, which may have fewer
1915 // than k iterations).
1916 //
1917 // When schedule(auto) is specified, the decision regarding scheduling is
1918 // delegated to the compiler and/or runtime system. The programmer gives the
1919 // implementation the freedom to choose any possible mapping of iterations to
1920 // threads in the team.
1921 //
1922 // When schedule(runtime) is specified, the decision regarding scheduling is
1923 // deferred until run time, and the schedule and chunk size are taken from the
1924 // run-sched-var ICV. If the ICV is set to auto, the schedule is
1925 // implementation defined
1926 //
1927 // while(__kmpc_dispatch_next(&LB, &UB)) {
1928 // idx = LB;
1929 // while (idx <= UB) { BODY; ++idx;
1930 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1931 // } // inner loop
1932 // }
1933 //
1934 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1935 // When schedule(static, chunk_size) is specified, iterations are divided into
1936 // chunks of size chunk_size, and the chunks are assigned to the threads in
1937 // the team in a round-robin fashion in the order of the thread number.
1938 //
1939 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1940 // while (idx <= UB) { BODY; ++idx; } // inner loop
1941 // LB = LB + ST;
1942 // UB = UB + ST;
1943 // }
1944 //
1945
1946 const Expr *IVExpr = S.getIterationVariable();
1947 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1948 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1949
1950 if (DynamicOrOrdered) {
1951 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
1952 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1953 llvm::Value *LBVal = DispatchBounds.first;
1954 llvm::Value *UBVal = DispatchBounds.second;
1955 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1956 LoopArgs.Chunk};
1957 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
1958 IVSigned, Ordered, DipatchRTInputValues);
1959 } else {
1960 CGOpenMPRuntime::StaticRTInput StaticInit(
1961 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1962 LoopArgs.ST, LoopArgs.Chunk);
1963 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
1964 ScheduleKind, StaticInit);
1965 }
1966
1967 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1968 const unsigned IVSize,
1969 const bool IVSigned) {
1970 if (Ordered) {
1971 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1972 IVSigned);
1973 }
1974 };
1975
1976 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1977 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1978 OuterLoopArgs.IncExpr = S.getInc();
1979 OuterLoopArgs.Init = S.getInit();
1980 OuterLoopArgs.Cond = S.getCond();
1981 OuterLoopArgs.NextLB = S.getNextLowerBound();
1982 OuterLoopArgs.NextUB = S.getNextUpperBound();
1983 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1984 emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1985 }
1986
emitEmptyOrdered(CodeGenFunction &,SourceLocation Loc,const unsigned IVSize,const bool IVSigned)1987 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1988 const unsigned IVSize, const bool IVSigned) {}
1989
EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,const OMPLoopArguments & LoopArgs,const CodeGenLoopTy & CodeGenLoopContent)1990 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1991 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1992 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1993 const CodeGenLoopTy &CodeGenLoopContent) {
1994
1995 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
1996
1997 // Emit outer loop.
1998 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1999 // dynamic
2000 //
2001
2002 const Expr *IVExpr = S.getIterationVariable();
2003 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2004 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2005
2006 CGOpenMPRuntime::StaticRTInput StaticInit(
2007 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
2008 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
2009 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
2010
2011 // for combined 'distribute' and 'for' the increment expression of distribute
2012 // is stored in DistInc. For 'distribute' alone, it is in Inc.
2013 Expr *IncExpr;
2014 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2015 IncExpr = S.getDistInc();
2016 else
2017 IncExpr = S.getInc();
2018
2019 // this routine is shared by 'omp distribute parallel for' and
2020 // 'omp distribute': select the right EUB expression depending on the
2021 // directive
2022 OMPLoopArguments OuterLoopArgs;
2023 OuterLoopArgs.LB = LoopArgs.LB;
2024 OuterLoopArgs.UB = LoopArgs.UB;
2025 OuterLoopArgs.ST = LoopArgs.ST;
2026 OuterLoopArgs.IL = LoopArgs.IL;
2027 OuterLoopArgs.Chunk = LoopArgs.Chunk;
2028 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2029 ? S.getCombinedEnsureUpperBound()
2030 : S.getEnsureUpperBound();
2031 OuterLoopArgs.IncExpr = IncExpr;
2032 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2033 ? S.getCombinedInit()
2034 : S.getInit();
2035 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2036 ? S.getCombinedCond()
2037 : S.getCond();
2038 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2039 ? S.getCombinedNextLowerBound()
2040 : S.getNextLowerBound();
2041 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2042 ? S.getCombinedNextUpperBound()
2043 : S.getNextUpperBound();
2044
2045 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2046 LoopScope, OuterLoopArgs, CodeGenLoopContent,
2047 emitEmptyOrdered);
2048 }
2049
2050 static std::pair<LValue, LValue>
emitDistributeParallelForInnerBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S)2051 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2052 const OMPExecutableDirective &S) {
2053 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2054 LValue LB =
2055 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2056 LValue UB =
2057 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2058
2059 // When composing 'distribute' with 'for' (e.g. as in 'distribute
2060 // parallel for') we need to use the 'distribute'
2061 // chunk lower and upper bounds rather than the whole loop iteration
2062 // space. These are parameters to the outlined function for 'parallel'
2063 // and we copy the bounds of the previous schedule into the
2064 // the current ones.
2065 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2066 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2067 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2068 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2069 PrevLBVal = CGF.EmitScalarConversion(
2070 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2071 LS.getIterationVariable()->getType(),
2072 LS.getPrevLowerBoundVariable()->getExprLoc());
2073 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2074 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2075 PrevUBVal = CGF.EmitScalarConversion(
2076 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2077 LS.getIterationVariable()->getType(),
2078 LS.getPrevUpperBoundVariable()->getExprLoc());
2079
2080 CGF.EmitStoreOfScalar(PrevLBVal, LB);
2081 CGF.EmitStoreOfScalar(PrevUBVal, UB);
2082
2083 return {LB, UB};
2084 }
2085
2086 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2087 /// we need to use the LB and UB expressions generated by the worksharing
2088 /// code generation support, whereas in non combined situations we would
2089 /// just emit 0 and the LastIteration expression
2090 /// This function is necessary due to the difference of the LB and UB
2091 /// types for the RT emission routines for 'for_static_init' and
2092 /// 'for_dispatch_init'
2093 static std::pair<llvm::Value *, llvm::Value *>
emitDistributeParallelForDispatchBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S,Address LB,Address UB)2094 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2095 const OMPExecutableDirective &S,
2096 Address LB, Address UB) {
2097 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2098 const Expr *IVExpr = LS.getIterationVariable();
2099 // when implementing a dynamic schedule for a 'for' combined with a
2100 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2101 // is not normalized as each team only executes its own assigned
2102 // distribute chunk
2103 QualType IteratorTy = IVExpr->getType();
2104 llvm::Value *LBVal =
2105 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2106 llvm::Value *UBVal =
2107 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2108 return {LBVal, UBVal};
2109 }
2110
emitDistributeParallelForDistributeInnerBoundParams(CodeGenFunction & CGF,const OMPExecutableDirective & S,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars)2111 static void emitDistributeParallelForDistributeInnerBoundParams(
2112 CodeGenFunction &CGF, const OMPExecutableDirective &S,
2113 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2114 const auto &Dir = cast<OMPLoopDirective>(S);
2115 LValue LB =
2116 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2117 llvm::Value *LBCast = CGF.Builder.CreateIntCast(
2118 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2119 CapturedVars.push_back(LBCast);
2120 LValue UB =
2121 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2122
2123 llvm::Value *UBCast = CGF.Builder.CreateIntCast(
2124 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2125 CapturedVars.push_back(UBCast);
2126 }
2127
2128 static void
emitInnerParallelForWhenCombined(CodeGenFunction & CGF,const OMPLoopDirective & S,CodeGenFunction::JumpDest LoopExit)2129 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2130 const OMPLoopDirective &S,
2131 CodeGenFunction::JumpDest LoopExit) {
2132 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2133 PrePostActionTy &Action) {
2134 Action.Enter(CGF);
2135 bool HasCancel = false;
2136 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2137 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2138 HasCancel = D->hasCancel();
2139 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2140 HasCancel = D->hasCancel();
2141 else if (const auto *D =
2142 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2143 HasCancel = D->hasCancel();
2144 }
2145 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2146 HasCancel);
2147 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2148 emitDistributeParallelForInnerBounds,
2149 emitDistributeParallelForDispatchBounds);
2150 };
2151
2152 emitCommonOMPParallelDirective(
2153 CGF, S,
2154 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
2155 CGInlinedWorksharingLoop,
2156 emitDistributeParallelForDistributeInnerBoundParams);
2157 }
2158
EmitOMPDistributeParallelForDirective(const OMPDistributeParallelForDirective & S)2159 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2160 const OMPDistributeParallelForDirective &S) {
2161 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2162 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2163 S.getDistInc());
2164 };
2165 OMPLexicalScope Scope(*this, S, OMPD_parallel);
2166 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2167 }
2168
EmitOMPDistributeParallelForSimdDirective(const OMPDistributeParallelForSimdDirective & S)2169 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2170 const OMPDistributeParallelForSimdDirective &S) {
2171 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2172 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2173 S.getDistInc());
2174 };
2175 OMPLexicalScope Scope(*this, S, OMPD_parallel);
2176 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2177 }
2178
EmitOMPDistributeSimdDirective(const OMPDistributeSimdDirective & S)2179 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2180 const OMPDistributeSimdDirective &S) {
2181 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2182 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
2183 };
2184 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2185 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2186 }
2187
EmitOMPTargetSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetSimdDirective & S)2188 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
2189 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2190 // Emit SPMD target parallel for region as a standalone region.
2191 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2192 emitOMPSimdRegion(CGF, S, Action);
2193 };
2194 llvm::Function *Fn;
2195 llvm::Constant *Addr;
2196 // Emit target region as a standalone region.
2197 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
2198 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
2199 assert(Fn && Addr && "Target device function emission failed.");
2200 }
2201
EmitOMPTargetSimdDirective(const OMPTargetSimdDirective & S)2202 void CodeGenFunction::EmitOMPTargetSimdDirective(
2203 const OMPTargetSimdDirective &S) {
2204 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2205 emitOMPSimdRegion(CGF, S, Action);
2206 };
2207 emitCommonOMPTargetDirective(*this, S, CodeGen);
2208 }
2209
2210 namespace {
2211 struct ScheduleKindModifiersTy {
2212 OpenMPScheduleClauseKind Kind;
2213 OpenMPScheduleClauseModifier M1;
2214 OpenMPScheduleClauseModifier M2;
ScheduleKindModifiersTy__anonb025da6e3211::ScheduleKindModifiersTy2215 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2216 OpenMPScheduleClauseModifier M1,
2217 OpenMPScheduleClauseModifier M2)
2218 : Kind(Kind), M1(M1), M2(M2) {}
2219 };
2220 } // namespace
2221
EmitOMPWorksharingLoop(const OMPLoopDirective & S,Expr * EUB,const CodeGenLoopBoundsTy & CodeGenLoopBounds,const CodeGenDispatchBoundsTy & CGDispatchBounds)2222 bool CodeGenFunction::EmitOMPWorksharingLoop(
2223 const OMPLoopDirective &S, Expr *EUB,
2224 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2225 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2226 // Emit the loop iteration variable.
2227 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2228 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
2229 EmitVarDecl(*IVDecl);
2230
2231 // Emit the iterations count variable.
2232 // If it is not a variable, Sema decided to calculate iterations count on each
2233 // iteration (e.g., it is foldable into a constant).
2234 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2235 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2236 // Emit calculation of the iterations count.
2237 EmitIgnoredExpr(S.getCalcLastIteration());
2238 }
2239
2240 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2241
2242 bool HasLastprivateClause;
2243 // Check pre-condition.
2244 {
2245 OMPLoopScope PreInitScope(*this, S);
2246 // Skip the entire loop if we don't meet the precondition.
2247 // If the condition constant folds and can be elided, avoid emitting the
2248 // whole loop.
2249 bool CondConstant;
2250 llvm::BasicBlock *ContBlock = nullptr;
2251 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2252 if (!CondConstant)
2253 return false;
2254 } else {
2255 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
2256 ContBlock = createBasicBlock("omp.precond.end");
2257 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2258 getProfileCount(&S));
2259 EmitBlock(ThenBlock);
2260 incrementProfileCounter(&S);
2261 }
2262
2263 RunCleanupsScope DoacrossCleanupScope(*this);
2264 bool Ordered = false;
2265 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2266 if (OrderedClause->getNumForLoops())
2267 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
2268 else
2269 Ordered = true;
2270 }
2271
2272 llvm::DenseSet<const Expr *> EmittedFinals;
2273 emitAlignedClause(*this, S);
2274 bool HasLinears = EmitOMPLinearClauseInit(S);
2275 // Emit helper vars inits.
2276
2277 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2278 LValue LB = Bounds.first;
2279 LValue UB = Bounds.second;
2280 LValue ST =
2281 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2282 LValue IL =
2283 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2284
2285 // Emit 'then' code.
2286 {
2287 OMPPrivateScope LoopScope(*this);
2288 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2289 // Emit implicit barrier to synchronize threads and avoid data races on
2290 // initialization of firstprivate variables and post-update of
2291 // lastprivate variables.
2292 CGM.getOpenMPRuntime().emitBarrierCall(
2293 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
2294 /*ForceSimpleCall=*/true);
2295 }
2296 EmitOMPPrivateClause(S, LoopScope);
2297 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2298 EmitOMPReductionClauseInit(S, LoopScope);
2299 EmitOMPPrivateLoopCounters(S, LoopScope);
2300 EmitOMPLinearClause(S, LoopScope);
2301 (void)LoopScope.Privatize();
2302 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2303 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
2304
2305 // Detect the loop schedule kind and chunk.
2306 const Expr *ChunkExpr = nullptr;
2307 OpenMPScheduleTy ScheduleKind;
2308 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
2309 ScheduleKind.Schedule = C->getScheduleKind();
2310 ScheduleKind.M1 = C->getFirstScheduleModifier();
2311 ScheduleKind.M2 = C->getSecondScheduleModifier();
2312 ChunkExpr = C->getChunkSize();
2313 } else {
2314 // Default behaviour for schedule clause.
2315 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
2316 *this, S, ScheduleKind.Schedule, ChunkExpr);
2317 }
2318 bool HasChunkSizeOne = false;
2319 llvm::Value *Chunk = nullptr;
2320 if (ChunkExpr) {
2321 Chunk = EmitScalarExpr(ChunkExpr);
2322 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
2323 S.getIterationVariable()->getType(),
2324 S.getBeginLoc());
2325 Expr::EvalResult Result;
2326 if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
2327 llvm::APSInt EvaluatedChunk = Result.Val.getInt();
2328 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
2329 }
2330 }
2331 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2332 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2333 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2334 // If the static schedule kind is specified or if the ordered clause is
2335 // specified, and if no monotonic modifier is specified, the effect will
2336 // be as if the monotonic modifier was specified.
2337 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
2338 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
2339 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
2340 if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
2341 /* Chunked */ Chunk != nullptr) ||
2342 StaticChunkedOne) &&
2343 !Ordered) {
2344 if (isOpenMPSimdDirective(S.getDirectiveKind()))
2345 EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2346 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2347 // When no chunk_size is specified, the iteration space is divided into
2348 // chunks that are approximately equal in size, and at most one chunk is
2349 // distributed to each thread. Note that the size of the chunks is
2350 // unspecified in this case.
2351 CGOpenMPRuntime::StaticRTInput StaticInit(
2352 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2353 UB.getAddress(), ST.getAddress(),
2354 StaticChunkedOne ? Chunk : nullptr);
2355 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
2356 ScheduleKind, StaticInit);
2357 JumpDest LoopExit =
2358 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2359 // UB = min(UB, GlobalUB);
2360 if (!StaticChunkedOne)
2361 EmitIgnoredExpr(S.getEnsureUpperBound());
2362 // IV = LB;
2363 EmitIgnoredExpr(S.getInit());
2364 // For unchunked static schedule generate:
2365 //
2366 // while (idx <= UB) {
2367 // BODY;
2368 // ++idx;
2369 // }
2370 //
2371 // For static schedule with chunk one:
2372 //
2373 // while (IV <= PrevUB) {
2374 // BODY;
2375 // IV += ST;
2376 // }
2377 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
2378 StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(),
2379 StaticChunkedOne ? S.getDistInc() : S.getInc(),
2380 [&S, LoopExit](CodeGenFunction &CGF) {
2381 CGF.EmitOMPLoopBody(S, LoopExit);
2382 CGF.EmitStopPoint(&S);
2383 },
2384 [](CodeGenFunction &) {});
2385 EmitBlock(LoopExit.getBlock());
2386 // Tell the runtime we are done.
2387 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2388 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2389 S.getDirectiveKind());
2390 };
2391 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2392 } else {
2393 const bool IsMonotonic =
2394 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2395 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2396 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2397 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2398 // Emit the outer loop, which requests its work chunk [LB..UB] from
2399 // runtime and runs the inner loop to process it.
2400 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2401 ST.getAddress(), IL.getAddress(),
2402 Chunk, EUB);
2403 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2404 LoopArguments, CGDispatchBounds);
2405 }
2406 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2407 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
2408 return CGF.Builder.CreateIsNotNull(
2409 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
2410 });
2411 }
2412 EmitOMPReductionClauseFinal(
2413 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2414 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2415 : /*Parallel only*/ OMPD_parallel);
2416 // Emit post-update of the reduction variables if IsLastIter != 0.
2417 emitPostUpdateForReductionClause(
2418 *this, S, [IL, &S](CodeGenFunction &CGF) {
2419 return CGF.Builder.CreateIsNotNull(
2420 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
2421 });
2422 // Emit final copy of the lastprivate variables if IsLastIter != 0.
2423 if (HasLastprivateClause)
2424 EmitOMPLastprivateClauseFinal(
2425 S, isOpenMPSimdDirective(S.getDirectiveKind()),
2426 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
2427 }
2428 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
2429 return CGF.Builder.CreateIsNotNull(
2430 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
2431 });
2432 DoacrossCleanupScope.ForceCleanup();
2433 // We're now done with the loop, so jump to the continuation block.
2434 if (ContBlock) {
2435 EmitBranch(ContBlock);
2436 EmitBlock(ContBlock, /*IsFinished=*/true);
2437 }
2438 }
2439 return HasLastprivateClause;
2440 }
2441
2442 /// The following two functions generate expressions for the loop lower
2443 /// and upper bounds in case of static and dynamic (dispatch) schedule
2444 /// of the associated 'for' or 'distribute' loop.
2445 static std::pair<LValue, LValue>
emitForLoopBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S)2446 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2447 const auto &LS = cast<OMPLoopDirective>(S);
2448 LValue LB =
2449 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2450 LValue UB =
2451 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2452 return {LB, UB};
2453 }
2454
2455 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2456 /// consider the lower and upper bound expressions generated by the
2457 /// worksharing loop support, but we use 0 and the iteration space size as
2458 /// constants
2459 static std::pair<llvm::Value *, llvm::Value *>
emitDispatchForLoopBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S,Address LB,Address UB)2460 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2461 Address LB, Address UB) {
2462 const auto &LS = cast<OMPLoopDirective>(S);
2463 const Expr *IVExpr = LS.getIterationVariable();
2464 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2465 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2466 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2467 return {LBVal, UBVal};
2468 }
2469
EmitOMPForDirective(const OMPForDirective & S)2470 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2471 bool HasLastprivates = false;
2472 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2473 PrePostActionTy &) {
2474 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2475 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2476 emitForLoopBounds,
2477 emitDispatchForLoopBounds);
2478 };
2479 {
2480 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2481 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2482 S.hasCancel());
2483 }
2484
2485 // Emit an implicit barrier at the end.
2486 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
2487 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
2488 }
2489
EmitOMPForSimdDirective(const OMPForSimdDirective & S)2490 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2491 bool HasLastprivates = false;
2492 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2493 PrePostActionTy &) {
2494 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2495 emitForLoopBounds,
2496 emitDispatchForLoopBounds);
2497 };
2498 {
2499 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2500 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2501 }
2502
2503 // Emit an implicit barrier at the end.
2504 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
2505 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
2506 }
2507
createSectionLVal(CodeGenFunction & CGF,QualType Ty,const Twine & Name,llvm::Value * Init=nullptr)2508 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2509 const Twine &Name,
2510 llvm::Value *Init = nullptr) {
2511 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2512 if (Init)
2513 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2514 return LVal;
2515 }
2516
EmitSections(const OMPExecutableDirective & S)2517 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2518 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
2519 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
2520 bool HasLastprivates = false;
2521 auto &&CodeGen = [&S, CapturedStmt, CS,
2522 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
2523 ASTContext &C = CGF.getContext();
2524 QualType KmpInt32Ty =
2525 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2526 // Emit helper vars inits.
2527 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2528 CGF.Builder.getInt32(0));
2529 llvm::ConstantInt *GlobalUBVal = CS != nullptr
2530 ? CGF.Builder.getInt32(CS->size() - 1)
2531 : CGF.Builder.getInt32(0);
2532 LValue UB =
2533 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2534 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2535 CGF.Builder.getInt32(1));
2536 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2537 CGF.Builder.getInt32(0));
2538 // Loop counter.
2539 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2540 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
2541 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2542 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
2543 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2544 // Generate condition for loop.
2545 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2546 OK_Ordinary, S.getBeginLoc(), FPOptions());
2547 // Increment for loop counter.
2548 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2549 S.getBeginLoc(), true);
2550 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
2551 // Iterate through all sections and emit a switch construct:
2552 // switch (IV) {
2553 // case 0:
2554 // <SectionStmt[0]>;
2555 // break;
2556 // ...
2557 // case <NumSection> - 1:
2558 // <SectionStmt[<NumSection> - 1]>;
2559 // break;
2560 // }
2561 // .omp.sections.exit:
2562 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2563 llvm::SwitchInst *SwitchStmt =
2564 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
2565 ExitBB, CS == nullptr ? 1 : CS->size());
2566 if (CS) {
2567 unsigned CaseNumber = 0;
2568 for (const Stmt *SubStmt : CS->children()) {
2569 auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2570 CGF.EmitBlock(CaseBB);
2571 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2572 CGF.EmitStmt(SubStmt);
2573 CGF.EmitBranch(ExitBB);
2574 ++CaseNumber;
2575 }
2576 } else {
2577 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
2578 CGF.EmitBlock(CaseBB);
2579 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2580 CGF.EmitStmt(CapturedStmt);
2581 CGF.EmitBranch(ExitBB);
2582 }
2583 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2584 };
2585
2586 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2587 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2588 // Emit implicit barrier to synchronize threads and avoid data races on
2589 // initialization of firstprivate variables and post-update of lastprivate
2590 // variables.
2591 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2592 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
2593 /*ForceSimpleCall=*/true);
2594 }
2595 CGF.EmitOMPPrivateClause(S, LoopScope);
2596 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2597 CGF.EmitOMPReductionClauseInit(S, LoopScope);
2598 (void)LoopScope.Privatize();
2599 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2600 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2601
2602 // Emit static non-chunked loop.
2603 OpenMPScheduleTy ScheduleKind;
2604 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2605 CGOpenMPRuntime::StaticRTInput StaticInit(
2606 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2607 LB.getAddress(), UB.getAddress(), ST.getAddress());
2608 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2609 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2610 // UB = min(UB, GlobalUB);
2611 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
2612 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
2613 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2614 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2615 // IV = LB;
2616 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
2617 // while (idx <= UB) { BODY; ++idx; }
2618 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2619 [](CodeGenFunction &) {});
2620 // Tell the runtime we are done.
2621 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2622 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2623 S.getDirectiveKind());
2624 };
2625 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2626 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2627 // Emit post-update of the reduction variables if IsLastIter != 0.
2628 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
2629 return CGF.Builder.CreateIsNotNull(
2630 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
2631 });
2632
2633 // Emit final copy of the lastprivate variables if IsLastIter != 0.
2634 if (HasLastprivates)
2635 CGF.EmitOMPLastprivateClauseFinal(
2636 S, /*NoFinals=*/false,
2637 CGF.Builder.CreateIsNotNull(
2638 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
2639 };
2640
2641 bool HasCancel = false;
2642 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2643 HasCancel = OSD->hasCancel();
2644 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2645 HasCancel = OPSD->hasCancel();
2646 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2647 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2648 HasCancel);
2649 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2650 // clause. Otherwise the barrier will be generated by the codegen for the
2651 // directive.
2652 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2653 // Emit implicit barrier to synchronize threads and avoid data races on
2654 // initialization of firstprivate variables.
2655 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
2656 OMPD_unknown);
2657 }
2658 }
2659
EmitOMPSectionsDirective(const OMPSectionsDirective & S)2660 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2661 {
2662 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2663 EmitSections(S);
2664 }
2665 // Emit an implicit barrier at the end.
2666 if (!S.getSingleClause<OMPNowaitClause>()) {
2667 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
2668 OMPD_sections);
2669 }
2670 }
2671
EmitOMPSectionDirective(const OMPSectionDirective & S)2672 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2673 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2674 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2675 };
2676 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2677 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2678 S.hasCancel());
2679 }
2680
EmitOMPSingleDirective(const OMPSingleDirective & S)2681 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2682 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2683 llvm::SmallVector<const Expr *, 8> DestExprs;
2684 llvm::SmallVector<const Expr *, 8> SrcExprs;
2685 llvm::SmallVector<const Expr *, 8> AssignmentOps;
2686 // Check if there are any 'copyprivate' clauses associated with this
2687 // 'single' construct.
2688 // Build a list of copyprivate variables along with helper expressions
2689 // (<source>, <destination>, <destination>=<source> expressions)
2690 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2691 CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2692 DestExprs.append(C->destination_exprs().begin(),
2693 C->destination_exprs().end());
2694 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2695 AssignmentOps.append(C->assignment_ops().begin(),
2696 C->assignment_ops().end());
2697 }
2698 // Emit code for 'single' region along with 'copyprivate' clauses
2699 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2700 Action.Enter(CGF);
2701 OMPPrivateScope SingleScope(CGF);
2702 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2703 CGF.EmitOMPPrivateClause(S, SingleScope);
2704 (void)SingleScope.Privatize();
2705 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2706 };
2707 {
2708 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2709 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
2710 CopyprivateVars, DestExprs,
2711 SrcExprs, AssignmentOps);
2712 }
2713 // Emit an implicit barrier at the end (to avoid data race on firstprivate
2714 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2715 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2716 CGM.getOpenMPRuntime().emitBarrierCall(
2717 *this, S.getBeginLoc(),
2718 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2719 }
2720 }
2721
EmitOMPMasterDirective(const OMPMasterDirective & S)2722 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2723 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2724 Action.Enter(CGF);
2725 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2726 };
2727 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2728 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
2729 }
2730
EmitOMPCriticalDirective(const OMPCriticalDirective & S)2731 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2732 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2733 Action.Enter(CGF);
2734 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2735 };
2736 const Expr *Hint = nullptr;
2737 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
2738 Hint = HintClause->getHint();
2739 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2740 CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2741 S.getDirectiveName().getAsString(),
2742 CodeGen, S.getBeginLoc(), Hint);
2743 }
2744
EmitOMPParallelForDirective(const OMPParallelForDirective & S)2745 void CodeGenFunction::EmitOMPParallelForDirective(
2746 const OMPParallelForDirective &S) {
2747 // Emit directive as a combined directive that consists of two implicit
2748 // directives: 'parallel' with 'for' directive.
2749 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2750 Action.Enter(CGF);
2751 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2752 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2753 emitDispatchForLoopBounds);
2754 };
2755 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2756 emitEmptyBoundParameters);
2757 }
2758
EmitOMPParallelForSimdDirective(const OMPParallelForSimdDirective & S)2759 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2760 const OMPParallelForSimdDirective &S) {
2761 // Emit directive as a combined directive that consists of two implicit
2762 // directives: 'parallel' with 'for' directive.
2763 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2764 Action.Enter(CGF);
2765 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2766 emitDispatchForLoopBounds);
2767 };
2768 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2769 emitEmptyBoundParameters);
2770 }
2771
EmitOMPParallelSectionsDirective(const OMPParallelSectionsDirective & S)2772 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2773 const OMPParallelSectionsDirective &S) {
2774 // Emit directive as a combined directive that consists of two implicit
2775 // directives: 'parallel' with 'sections' directive.
2776 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2777 Action.Enter(CGF);
2778 CGF.EmitSections(S);
2779 };
2780 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2781 emitEmptyBoundParameters);
2782 }
2783
EmitOMPTaskBasedDirective(const OMPExecutableDirective & S,const OpenMPDirectiveKind CapturedRegion,const RegionCodeGenTy & BodyGen,const TaskGenTy & TaskGen,OMPTaskDataTy & Data)2784 void CodeGenFunction::EmitOMPTaskBasedDirective(
2785 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
2786 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
2787 OMPTaskDataTy &Data) {
2788 // Emit outlined function for task construct.
2789 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
2790 auto I = CS->getCapturedDecl()->param_begin();
2791 auto PartId = std::next(I);
2792 auto TaskT = std::next(I, 4);
2793 // Check if the task is final
2794 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2795 // If the condition constant folds and can be elided, try to avoid emitting
2796 // the condition and the dead arm of the if/else.
2797 const Expr *Cond = Clause->getCondition();
2798 bool CondConstant;
2799 if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2800 Data.Final.setInt(CondConstant);
2801 else
2802 Data.Final.setPointer(EvaluateExprAsBool(Cond));
2803 } else {
2804 // By default the task is not final.
2805 Data.Final.setInt(/*IntVal=*/false);
2806 }
2807 // Check if the task has 'priority' clause.
2808 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2809 const Expr *Prio = Clause->getPriority();
2810 Data.Priority.setInt(/*IntVal=*/true);
2811 Data.Priority.setPointer(EmitScalarConversion(
2812 EmitScalarExpr(Prio), Prio->getType(),
2813 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2814 Prio->getExprLoc()));
2815 }
2816 // The first function argument for tasks is a thread id, the second one is a
2817 // part id (0 for tied tasks, >=0 for untied task).
2818 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2819 // Get list of private variables.
2820 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2821 auto IRef = C->varlist_begin();
2822 for (const Expr *IInit : C->private_copies()) {
2823 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2824 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2825 Data.PrivateVars.push_back(*IRef);
2826 Data.PrivateCopies.push_back(IInit);
2827 }
2828 ++IRef;
2829 }
2830 }
2831 EmittedAsPrivate.clear();
2832 // Get list of firstprivate variables.
2833 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2834 auto IRef = C->varlist_begin();
2835 auto IElemInitRef = C->inits().begin();
2836 for (const Expr *IInit : C->private_copies()) {
2837 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2838 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2839 Data.FirstprivateVars.push_back(*IRef);
2840 Data.FirstprivateCopies.push_back(IInit);
2841 Data.FirstprivateInits.push_back(*IElemInitRef);
2842 }
2843 ++IRef;
2844 ++IElemInitRef;
2845 }
2846 }
2847 // Get list of lastprivate variables (for taskloops).
2848 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2849 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2850 auto IRef = C->varlist_begin();
2851 auto ID = C->destination_exprs().begin();
2852 for (const Expr *IInit : C->private_copies()) {
2853 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2854 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2855 Data.LastprivateVars.push_back(*IRef);
2856 Data.LastprivateCopies.push_back(IInit);
2857 }
2858 LastprivateDstsOrigs.insert(
2859 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2860 cast<DeclRefExpr>(*IRef)});
2861 ++IRef;
2862 ++ID;
2863 }
2864 }
2865 SmallVector<const Expr *, 4> LHSs;
2866 SmallVector<const Expr *, 4> RHSs;
2867 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2868 auto IPriv = C->privates().begin();
2869 auto IRed = C->reduction_ops().begin();
2870 auto ILHS = C->lhs_exprs().begin();
2871 auto IRHS = C->rhs_exprs().begin();
2872 for (const Expr *Ref : C->varlists()) {
2873 Data.ReductionVars.emplace_back(Ref);
2874 Data.ReductionCopies.emplace_back(*IPriv);
2875 Data.ReductionOps.emplace_back(*IRed);
2876 LHSs.emplace_back(*ILHS);
2877 RHSs.emplace_back(*IRHS);
2878 std::advance(IPriv, 1);
2879 std::advance(IRed, 1);
2880 std::advance(ILHS, 1);
2881 std::advance(IRHS, 1);
2882 }
2883 }
2884 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2885 *this, S.getBeginLoc(), LHSs, RHSs, Data);
2886 // Build list of dependences.
2887 for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2888 for (const Expr *IRef : C->varlists())
2889 Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
2890 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
2891 CapturedRegion](CodeGenFunction &CGF,
2892 PrePostActionTy &Action) {
2893 // Set proper addresses for generated private copies.
2894 OMPPrivateScope Scope(CGF);
2895 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2896 !Data.LastprivateVars.empty()) {
2897 enum { PrivatesParam = 2, CopyFnParam = 3 };
2898 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
2899 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
2900 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
2901 CS->getCapturedDecl()->getParam(PrivatesParam)));
2902 // Map privates.
2903 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2904 llvm::SmallVector<llvm::Value *, 16> CallArgs;
2905 CallArgs.push_back(PrivatesPtr);
2906 for (const Expr *E : Data.PrivateVars) {
2907 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2908 Address PrivatePtr = CGF.CreateMemTemp(
2909 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2910 PrivatePtrs.emplace_back(VD, PrivatePtr);
2911 CallArgs.push_back(PrivatePtr.getPointer());
2912 }
2913 for (const Expr *E : Data.FirstprivateVars) {
2914 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2915 Address PrivatePtr =
2916 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2917 ".firstpriv.ptr.addr");
2918 PrivatePtrs.emplace_back(VD, PrivatePtr);
2919 CallArgs.push_back(PrivatePtr.getPointer());
2920 }
2921 for (const Expr *E : Data.LastprivateVars) {
2922 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2923 Address PrivatePtr =
2924 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2925 ".lastpriv.ptr.addr");
2926 PrivatePtrs.emplace_back(VD, PrivatePtr);
2927 CallArgs.push_back(PrivatePtr.getPointer());
2928 }
2929 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
2930 CopyFn, CallArgs);
2931 for (const auto &Pair : LastprivateDstsOrigs) {
2932 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2933 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
2934 /*RefersToEnclosingVariableOrCapture=*/
2935 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
2936 Pair.second->getType(), VK_LValue,
2937 Pair.second->getExprLoc());
2938 Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2939 return CGF.EmitLValue(&DRE).getAddress();
2940 });
2941 }
2942 for (const auto &Pair : PrivatePtrs) {
2943 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2944 CGF.getContext().getDeclAlign(Pair.first));
2945 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2946 }
2947 }
2948 if (Data.Reductions) {
2949 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
2950 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2951 Data.ReductionOps);
2952 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2953 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2954 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2955 RedCG.emitSharedLValue(CGF, Cnt);
2956 RedCG.emitAggregateType(CGF, Cnt);
2957 // FIXME: This must removed once the runtime library is fixed.
2958 // Emit required threadprivate variables for
2959 // initializer/combiner/finalizer.
2960 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
2961 RedCG, Cnt);
2962 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2963 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2964 Replacement =
2965 Address(CGF.EmitScalarConversion(
2966 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2967 CGF.getContext().getPointerType(
2968 Data.ReductionCopies[Cnt]->getType()),
2969 Data.ReductionCopies[Cnt]->getExprLoc()),
2970 Replacement.getAlignment());
2971 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2972 Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2973 [Replacement]() { return Replacement; });
2974 }
2975 }
2976 // Privatize all private variables except for in_reduction items.
2977 (void)Scope.Privatize();
2978 SmallVector<const Expr *, 4> InRedVars;
2979 SmallVector<const Expr *, 4> InRedPrivs;
2980 SmallVector<const Expr *, 4> InRedOps;
2981 SmallVector<const Expr *, 4> TaskgroupDescriptors;
2982 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2983 auto IPriv = C->privates().begin();
2984 auto IRed = C->reduction_ops().begin();
2985 auto ITD = C->taskgroup_descriptors().begin();
2986 for (const Expr *Ref : C->varlists()) {
2987 InRedVars.emplace_back(Ref);
2988 InRedPrivs.emplace_back(*IPriv);
2989 InRedOps.emplace_back(*IRed);
2990 TaskgroupDescriptors.emplace_back(*ITD);
2991 std::advance(IPriv, 1);
2992 std::advance(IRed, 1);
2993 std::advance(ITD, 1);
2994 }
2995 }
2996 // Privatize in_reduction items here, because taskgroup descriptors must be
2997 // privatized earlier.
2998 OMPPrivateScope InRedScope(CGF);
2999 if (!InRedVars.empty()) {
3000 ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
3001 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
3002 RedCG.emitSharedLValue(CGF, Cnt);
3003 RedCG.emitAggregateType(CGF, Cnt);
3004 // The taskgroup descriptor variable is always implicit firstprivate and
3005 // privatized already during processing of the firstprivates.
3006 // FIXME: This must removed once the runtime library is fixed.
3007 // Emit required threadprivate variables for
3008 // initializer/combiner/finalizer.
3009 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
3010 RedCG, Cnt);
3011 llvm::Value *ReductionsPtr =
3012 CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
3013 TaskgroupDescriptors[Cnt]->getExprLoc());
3014 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
3015 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
3016 Replacement = Address(
3017 CGF.EmitScalarConversion(
3018 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
3019 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
3020 InRedPrivs[Cnt]->getExprLoc()),
3021 Replacement.getAlignment());
3022 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
3023 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
3024 [Replacement]() { return Replacement; });
3025 }
3026 }
3027 (void)InRedScope.Privatize();
3028
3029 Action.Enter(CGF);
3030 BodyGen(CGF);
3031 };
3032 llvm::Value *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
3033 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
3034 Data.NumberOfParts);
3035 OMPLexicalScope Scope(*this, S);
3036 TaskGen(*this, OutlinedFn, Data);
3037 }
3038
3039 static ImplicitParamDecl *
createImplicitFirstprivateForType(ASTContext & C,OMPTaskDataTy & Data,QualType Ty,CapturedDecl * CD,SourceLocation Loc)3040 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
3041 QualType Ty, CapturedDecl *CD,
3042 SourceLocation Loc) {
3043 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
3044 ImplicitParamDecl::Other);
3045 auto *OrigRef = DeclRefExpr::Create(
3046 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
3047 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
3048 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
3049 ImplicitParamDecl::Other);
3050 auto *PrivateRef = DeclRefExpr::Create(
3051 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
3052 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
3053 QualType ElemType = C.getBaseElementType(Ty);
3054 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
3055 ImplicitParamDecl::Other);
3056 auto *InitRef = DeclRefExpr::Create(
3057 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
3058 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
3059 PrivateVD->setInitStyle(VarDecl::CInit);
3060 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
3061 InitRef, /*BasePath=*/nullptr,
3062 VK_RValue));
3063 Data.FirstprivateVars.emplace_back(OrigRef);
3064 Data.FirstprivateCopies.emplace_back(PrivateRef);
3065 Data.FirstprivateInits.emplace_back(InitRef);
3066 return OrigVD;
3067 }
3068
EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective & S,const RegionCodeGenTy & BodyGen,OMPTargetDataInfo & InputInfo)3069 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
3070 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
3071 OMPTargetDataInfo &InputInfo) {
3072 // Emit outlined function for task construct.
3073 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
3074 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
3075 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3076 auto I = CS->getCapturedDecl()->param_begin();
3077 auto PartId = std::next(I);
3078 auto TaskT = std::next(I, 4);
3079 OMPTaskDataTy Data;
3080 // The task is not final.
3081 Data.Final.setInt(/*IntVal=*/false);
3082 // Get list of firstprivate variables.
3083 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
3084 auto IRef = C->varlist_begin();
3085 auto IElemInitRef = C->inits().begin();
3086 for (auto *IInit : C->private_copies()) {
3087 Data.FirstprivateVars.push_back(*IRef);
3088 Data.FirstprivateCopies.push_back(IInit);
3089 Data.FirstprivateInits.push_back(*IElemInitRef);
3090 ++IRef;
3091 ++IElemInitRef;
3092 }
3093 }
3094 OMPPrivateScope TargetScope(*this);
3095 VarDecl *BPVD = nullptr;
3096 VarDecl *PVD = nullptr;
3097 VarDecl *SVD = nullptr;
3098 if (InputInfo.NumberOfTargetItems > 0) {
3099 auto *CD = CapturedDecl::Create(
3100 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
3101 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
3102 QualType BaseAndPointersType = getContext().getConstantArrayType(
3103 getContext().VoidPtrTy, ArrSize, ArrayType::Normal,
3104 /*IndexTypeQuals=*/0);
3105 BPVD = createImplicitFirstprivateForType(
3106 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
3107 PVD = createImplicitFirstprivateForType(
3108 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
3109 QualType SizesType = getContext().getConstantArrayType(
3110 getContext().getSizeType(), ArrSize, ArrayType::Normal,
3111 /*IndexTypeQuals=*/0);
3112 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
3113 S.getBeginLoc());
3114 TargetScope.addPrivate(
3115 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
3116 TargetScope.addPrivate(PVD,
3117 [&InputInfo]() { return InputInfo.PointersArray; });
3118 TargetScope.addPrivate(SVD,
3119 [&InputInfo]() { return InputInfo.SizesArray; });
3120 }
3121 (void)TargetScope.Privatize();
3122 // Build list of dependences.
3123 for (const auto *C : S.getClausesOfKind<OMPDependClause>())
3124 for (const Expr *IRef : C->varlists())
3125 Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
3126 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
3127 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
3128 // Set proper addresses for generated private copies.
3129 OMPPrivateScope Scope(CGF);
3130 if (!Data.FirstprivateVars.empty()) {
3131 enum { PrivatesParam = 2, CopyFnParam = 3 };
3132 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
3133 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
3134 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
3135 CS->getCapturedDecl()->getParam(PrivatesParam)));
3136 // Map privates.
3137 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
3138 llvm::SmallVector<llvm::Value *, 16> CallArgs;
3139 CallArgs.push_back(PrivatesPtr);
3140 for (const Expr *E : Data.FirstprivateVars) {
3141 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3142 Address PrivatePtr =
3143 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3144 ".firstpriv.ptr.addr");
3145 PrivatePtrs.emplace_back(VD, PrivatePtr);
3146 CallArgs.push_back(PrivatePtr.getPointer());
3147 }
3148 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
3149 CopyFn, CallArgs);
3150 for (const auto &Pair : PrivatePtrs) {
3151 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
3152 CGF.getContext().getDeclAlign(Pair.first));
3153 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
3154 }
3155 }
3156 // Privatize all private variables except for in_reduction items.
3157 (void)Scope.Privatize();
3158 if (InputInfo.NumberOfTargetItems > 0) {
3159 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
3160 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize());
3161 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
3162 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize());
3163 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
3164 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize());
3165 }
3166
3167 Action.Enter(CGF);
3168 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
3169 BodyGen(CGF);
3170 };
3171 llvm::Value *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
3172 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
3173 Data.NumberOfParts);
3174 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
3175 IntegerLiteral IfCond(getContext(), TrueOrFalse,
3176 getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3177 SourceLocation());
3178
3179 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
3180 SharedsTy, CapturedStruct, &IfCond, Data);
3181 }
3182
EmitOMPTaskDirective(const OMPTaskDirective & S)3183 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
3184 // Emit outlined function for task construct.
3185 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
3186 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
3187 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3188 const Expr *IfCond = nullptr;
3189 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3190 if (C->getNameModifier() == OMPD_unknown ||
3191 C->getNameModifier() == OMPD_task) {
3192 IfCond = C->getCondition();
3193 break;
3194 }
3195 }
3196
3197 OMPTaskDataTy Data;
3198 // Check if we should emit tied or untied task.
3199 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
3200 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3201 CGF.EmitStmt(CS->getCapturedStmt());
3202 };
3203 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
3204 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
3205 const OMPTaskDataTy &Data) {
3206 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
3207 SharedsTy, CapturedStruct, IfCond,
3208 Data);
3209 };
3210 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
3211 }
3212
EmitOMPTaskyieldDirective(const OMPTaskyieldDirective & S)3213 void CodeGenFunction::EmitOMPTaskyieldDirective(
3214 const OMPTaskyieldDirective &S) {
3215 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
3216 }
3217
EmitOMPBarrierDirective(const OMPBarrierDirective & S)3218 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
3219 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
3220 }
3221
EmitOMPTaskwaitDirective(const OMPTaskwaitDirective & S)3222 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
3223 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
3224 }
3225
EmitOMPTaskgroupDirective(const OMPTaskgroupDirective & S)3226 void CodeGenFunction::EmitOMPTaskgroupDirective(
3227 const OMPTaskgroupDirective &S) {
3228 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3229 Action.Enter(CGF);
3230 if (const Expr *E = S.getReductionRef()) {
3231 SmallVector<const Expr *, 4> LHSs;
3232 SmallVector<const Expr *, 4> RHSs;
3233 OMPTaskDataTy Data;
3234 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
3235 auto IPriv = C->privates().begin();
3236 auto IRed = C->reduction_ops().begin();
3237 auto ILHS = C->lhs_exprs().begin();
3238 auto IRHS = C->rhs_exprs().begin();
3239 for (const Expr *Ref : C->varlists()) {
3240 Data.ReductionVars.emplace_back(Ref);
3241 Data.ReductionCopies.emplace_back(*IPriv);
3242 Data.ReductionOps.emplace_back(*IRed);
3243 LHSs.emplace_back(*ILHS);
3244 RHSs.emplace_back(*IRHS);
3245 std::advance(IPriv, 1);
3246 std::advance(IRed, 1);
3247 std::advance(ILHS, 1);
3248 std::advance(IRHS, 1);
3249 }
3250 }
3251 llvm::Value *ReductionDesc =
3252 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
3253 LHSs, RHSs, Data);
3254 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3255 CGF.EmitVarDecl(*VD);
3256 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
3257 /*Volatile=*/false, E->getType());
3258 }
3259 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3260 };
3261 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3262 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
3263 }
3264
EmitOMPFlushDirective(const OMPFlushDirective & S)3265 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
3266 CGM.getOpenMPRuntime().emitFlush(
3267 *this,
3268 [&S]() -> ArrayRef<const Expr *> {
3269 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
3270 return llvm::makeArrayRef(FlushClause->varlist_begin(),
3271 FlushClause->varlist_end());
3272 return llvm::None;
3273 }(),
3274 S.getBeginLoc());
3275 }
3276
EmitOMPDistributeLoop(const OMPLoopDirective & S,const CodeGenLoopTy & CodeGenLoop,Expr * IncExpr)3277 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3278 const CodeGenLoopTy &CodeGenLoop,
3279 Expr *IncExpr) {
3280 // Emit the loop iteration variable.
3281 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3282 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3283 EmitVarDecl(*IVDecl);
3284
3285 // Emit the iterations count variable.
3286 // If it is not a variable, Sema decided to calculate iterations count on each
3287 // iteration (e.g., it is foldable into a constant).
3288 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3289 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3290 // Emit calculation of the iterations count.
3291 EmitIgnoredExpr(S.getCalcLastIteration());
3292 }
3293
3294 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3295
3296 bool HasLastprivateClause = false;
3297 // Check pre-condition.
3298 {
3299 OMPLoopScope PreInitScope(*this, S);
3300 // Skip the entire loop if we don't meet the precondition.
3301 // If the condition constant folds and can be elided, avoid emitting the
3302 // whole loop.
3303 bool CondConstant;
3304 llvm::BasicBlock *ContBlock = nullptr;
3305 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3306 if (!CondConstant)
3307 return;
3308 } else {
3309 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3310 ContBlock = createBasicBlock("omp.precond.end");
3311 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3312 getProfileCount(&S));
3313 EmitBlock(ThenBlock);
3314 incrementProfileCounter(&S);
3315 }
3316
3317 emitAlignedClause(*this, S);
3318 // Emit 'then' code.
3319 {
3320 // Emit helper vars inits.
3321
3322 LValue LB = EmitOMPHelperVar(
3323 *this, cast<DeclRefExpr>(
3324 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3325 ? S.getCombinedLowerBoundVariable()
3326 : S.getLowerBoundVariable())));
3327 LValue UB = EmitOMPHelperVar(
3328 *this, cast<DeclRefExpr>(
3329 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3330 ? S.getCombinedUpperBoundVariable()
3331 : S.getUpperBoundVariable())));
3332 LValue ST =
3333 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3334 LValue IL =
3335 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3336
3337 OMPPrivateScope LoopScope(*this);
3338 if (EmitOMPFirstprivateClause(S, LoopScope)) {
3339 // Emit implicit barrier to synchronize threads and avoid data races
3340 // on initialization of firstprivate variables and post-update of
3341 // lastprivate variables.
3342 CGM.getOpenMPRuntime().emitBarrierCall(
3343 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3344 /*ForceSimpleCall=*/true);
3345 }
3346 EmitOMPPrivateClause(S, LoopScope);
3347 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
3348 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
3349 !isOpenMPTeamsDirective(S.getDirectiveKind()))
3350 EmitOMPReductionClauseInit(S, LoopScope);
3351 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3352 EmitOMPPrivateLoopCounters(S, LoopScope);
3353 (void)LoopScope.Privatize();
3354 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3355 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3356
3357 // Detect the distribute schedule kind and chunk.
3358 llvm::Value *Chunk = nullptr;
3359 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3360 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3361 ScheduleKind = C->getDistScheduleKind();
3362 if (const Expr *Ch = C->getChunkSize()) {
3363 Chunk = EmitScalarExpr(Ch);
3364 Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3365 S.getIterationVariable()->getType(),
3366 S.getBeginLoc());
3367 }
3368 } else {
3369 // Default behaviour for dist_schedule clause.
3370 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
3371 *this, S, ScheduleKind, Chunk);
3372 }
3373 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3374 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3375
3376 // OpenMP [2.10.8, distribute Construct, Description]
3377 // If dist_schedule is specified, kind must be static. If specified,
3378 // iterations are divided into chunks of size chunk_size, chunks are
3379 // assigned to the teams of the league in a round-robin fashion in the
3380 // order of the team number. When no chunk_size is specified, the
3381 // iteration space is divided into chunks that are approximately equal
3382 // in size, and at most one chunk is distributed to each team of the
3383 // league. The size of the chunks is unspecified in this case.
3384 bool StaticChunked = RT.isStaticChunked(
3385 ScheduleKind, /* Chunked */ Chunk != nullptr) &&
3386 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
3387 if (RT.isStaticNonchunked(ScheduleKind,
3388 /* Chunked */ Chunk != nullptr) ||
3389 StaticChunked) {
3390 if (isOpenMPSimdDirective(S.getDirectiveKind()))
3391 EmitOMPSimdInit(S, /*IsMonotonic=*/true);
3392 CGOpenMPRuntime::StaticRTInput StaticInit(
3393 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3394 LB.getAddress(), UB.getAddress(), ST.getAddress(),
3395 StaticChunked ? Chunk : nullptr);
3396 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
3397 StaticInit);
3398 JumpDest LoopExit =
3399 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3400 // UB = min(UB, GlobalUB);
3401 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3402 ? S.getCombinedEnsureUpperBound()
3403 : S.getEnsureUpperBound());
3404 // IV = LB;
3405 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3406 ? S.getCombinedInit()
3407 : S.getInit());
3408
3409 const Expr *Cond =
3410 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3411 ? S.getCombinedCond()
3412 : S.getCond();
3413
3414 if (StaticChunked)
3415 Cond = S.getCombinedDistCond();
3416
3417 // For static unchunked schedules generate:
3418 //
3419 // 1. For distribute alone, codegen
3420 // while (idx <= UB) {
3421 // BODY;
3422 // ++idx;
3423 // }
3424 //
3425 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
3426 // while (idx <= UB) {
3427 // <CodeGen rest of pragma>(LB, UB);
3428 // idx += ST;
3429 // }
3430 //
3431 // For static chunk one schedule generate:
3432 //
3433 // while (IV <= GlobalUB) {
3434 // <CodeGen rest of pragma>(LB, UB);
3435 // LB += ST;
3436 // UB += ST;
3437 // UB = min(UB, GlobalUB);
3438 // IV = LB;
3439 // }
3440 //
3441 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3442 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3443 CodeGenLoop(CGF, S, LoopExit);
3444 },
3445 [&S, StaticChunked](CodeGenFunction &CGF) {
3446 if (StaticChunked) {
3447 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
3448 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
3449 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
3450 CGF.EmitIgnoredExpr(S.getCombinedInit());
3451 }
3452 });
3453 EmitBlock(LoopExit.getBlock());
3454 // Tell the runtime we are done.
3455 RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind());
3456 } else {
3457 // Emit the outer loop, which requests its work chunk [LB..UB] from
3458 // runtime and runs the inner loop to process it.
3459 const OMPLoopArguments LoopArguments = {
3460 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3461 Chunk};
3462 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3463 CodeGenLoop);
3464 }
3465 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3466 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3467 return CGF.Builder.CreateIsNotNull(
3468 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3469 });
3470 }
3471 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
3472 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
3473 !isOpenMPTeamsDirective(S.getDirectiveKind())) {
3474 EmitOMPReductionClauseFinal(S, OMPD_simd);
3475 // Emit post-update of the reduction variables if IsLastIter != 0.
3476 emitPostUpdateForReductionClause(
3477 *this, S, [IL, &S](CodeGenFunction &CGF) {
3478 return CGF.Builder.CreateIsNotNull(
3479 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3480 });
3481 }
3482 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3483 if (HasLastprivateClause) {
3484 EmitOMPLastprivateClauseFinal(
3485 S, /*NoFinals=*/false,
3486 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3487 }
3488 }
3489
3490 // We're now done with the loop, so jump to the continuation block.
3491 if (ContBlock) {
3492 EmitBranch(ContBlock);
3493 EmitBlock(ContBlock, true);
3494 }
3495 }
3496 }
3497
EmitOMPDistributeDirective(const OMPDistributeDirective & S)3498 void CodeGenFunction::EmitOMPDistributeDirective(
3499 const OMPDistributeDirective &S) {
3500 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3501 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3502 };
3503 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3504 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3505 }
3506
emitOutlinedOrderedFunction(CodeGenModule & CGM,const CapturedStmt * S)3507 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3508 const CapturedStmt *S) {
3509 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3510 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3511 CGF.CapturedStmtInfo = &CapStmtInfo;
3512 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3513 Fn->setDoesNotRecurse();
3514 return Fn;
3515 }
3516
EmitOMPOrderedDirective(const OMPOrderedDirective & S)3517 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3518 if (S.hasClausesOfKind<OMPDependClause>()) {
3519 assert(!S.getAssociatedStmt() &&
3520 "No associated statement must be in ordered depend construct.");
3521 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3522 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3523 return;
3524 }
3525 const auto *C = S.getSingleClause<OMPSIMDClause>();
3526 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3527 PrePostActionTy &Action) {
3528 const CapturedStmt *CS = S.getInnermostCapturedStmt();
3529 if (C) {
3530 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3531 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3532 llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3533 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
3534 OutlinedFn, CapturedVars);
3535 } else {
3536 Action.Enter(CGF);
3537 CGF.EmitStmt(CS->getCapturedStmt());
3538 }
3539 };
3540 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3541 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
3542 }
3543
convertToScalarValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType,SourceLocation Loc)3544 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3545 QualType SrcType, QualType DestType,
3546 SourceLocation Loc) {
3547 assert(CGF.hasScalarEvaluationKind(DestType) &&
3548 "DestType must have scalar evaluation kind.");
3549 assert(!Val.isAggregate() && "Must be a scalar or complex.");
3550 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3551 DestType, Loc)
3552 : CGF.EmitComplexToScalarConversion(
3553 Val.getComplexVal(), SrcType, DestType, Loc);
3554 }
3555
3556 static CodeGenFunction::ComplexPairTy
convertToComplexValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType,SourceLocation Loc)3557 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3558 QualType DestType, SourceLocation Loc) {
3559 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3560 "DestType must have complex evaluation kind.");
3561 CodeGenFunction::ComplexPairTy ComplexVal;
3562 if (Val.isScalar()) {
3563 // Convert the input element to the element type of the complex.
3564 QualType DestElementType =
3565 DestType->castAs<ComplexType>()->getElementType();
3566 llvm::Value *ScalarVal = CGF.EmitScalarConversion(
3567 Val.getScalarVal(), SrcType, DestElementType, Loc);
3568 ComplexVal = CodeGenFunction::ComplexPairTy(
3569 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3570 } else {
3571 assert(Val.isComplex() && "Must be a scalar or complex.");
3572 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3573 QualType DestElementType =
3574 DestType->castAs<ComplexType>()->getElementType();
3575 ComplexVal.first = CGF.EmitScalarConversion(
3576 Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3577 ComplexVal.second = CGF.EmitScalarConversion(
3578 Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3579 }
3580 return ComplexVal;
3581 }
3582
emitSimpleAtomicStore(CodeGenFunction & CGF,bool IsSeqCst,LValue LVal,RValue RVal)3583 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3584 LValue LVal, RValue RVal) {
3585 if (LVal.isGlobalReg()) {
3586 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3587 } else {
3588 CGF.EmitAtomicStore(RVal, LVal,
3589 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3590 : llvm::AtomicOrdering::Monotonic,
3591 LVal.isVolatile(), /*IsInit=*/false);
3592 }
3593 }
3594
emitOMPSimpleStore(LValue LVal,RValue RVal,QualType RValTy,SourceLocation Loc)3595 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3596 QualType RValTy, SourceLocation Loc) {
3597 switch (getEvaluationKind(LVal.getType())) {
3598 case TEK_Scalar:
3599 EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3600 *this, RVal, RValTy, LVal.getType(), Loc)),
3601 LVal);
3602 break;
3603 case TEK_Complex:
3604 EmitStoreOfComplex(
3605 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3606 /*isInit=*/false);
3607 break;
3608 case TEK_Aggregate:
3609 llvm_unreachable("Must be a scalar or complex.");
3610 }
3611 }
3612
emitOMPAtomicReadExpr(CodeGenFunction & CGF,bool IsSeqCst,const Expr * X,const Expr * V,SourceLocation Loc)3613 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3614 const Expr *X, const Expr *V,
3615 SourceLocation Loc) {
3616 // v = x;
3617 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3618 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3619 LValue XLValue = CGF.EmitLValue(X);
3620 LValue VLValue = CGF.EmitLValue(V);
3621 RValue Res = XLValue.isGlobalReg()
3622 ? CGF.EmitLoadOfLValue(XLValue, Loc)
3623 : CGF.EmitAtomicLoad(
3624 XLValue, Loc,
3625 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3626 : llvm::AtomicOrdering::Monotonic,
3627 XLValue.isVolatile());
3628 // OpenMP, 2.12.6, atomic Construct
3629 // Any atomic construct with a seq_cst clause forces the atomically
3630 // performed operation to include an implicit flush operation without a
3631 // list.
3632 if (IsSeqCst)
3633 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3634 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3635 }
3636
emitOMPAtomicWriteExpr(CodeGenFunction & CGF,bool IsSeqCst,const Expr * X,const Expr * E,SourceLocation Loc)3637 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3638 const Expr *X, const Expr *E,
3639 SourceLocation Loc) {
3640 // x = expr;
3641 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3642 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3643 // OpenMP, 2.12.6, atomic Construct
3644 // Any atomic construct with a seq_cst clause forces the atomically
3645 // performed operation to include an implicit flush operation without a
3646 // list.
3647 if (IsSeqCst)
3648 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3649 }
3650
emitOMPAtomicRMW(CodeGenFunction & CGF,LValue X,RValue Update,BinaryOperatorKind BO,llvm::AtomicOrdering AO,bool IsXLHSInRHSPart)3651 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3652 RValue Update,
3653 BinaryOperatorKind BO,
3654 llvm::AtomicOrdering AO,
3655 bool IsXLHSInRHSPart) {
3656 ASTContext &Context = CGF.getContext();
3657 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3658 // expression is simple and atomic is allowed for the given type for the
3659 // target platform.
3660 if (BO == BO_Comma || !Update.isScalar() ||
3661 !Update.getScalarVal()->getType()->isIntegerTy() ||
3662 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3663 (Update.getScalarVal()->getType() !=
3664 X.getAddress().getElementType())) ||
3665 !X.getAddress().getElementType()->isIntegerTy() ||
3666 !Context.getTargetInfo().hasBuiltinAtomic(
3667 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3668 return std::make_pair(false, RValue::get(nullptr));
3669
3670 llvm::AtomicRMWInst::BinOp RMWOp;
3671 switch (BO) {
3672 case BO_Add:
3673 RMWOp = llvm::AtomicRMWInst::Add;
3674 break;
3675 case BO_Sub:
3676 if (!IsXLHSInRHSPart)
3677 return std::make_pair(false, RValue::get(nullptr));
3678 RMWOp = llvm::AtomicRMWInst::Sub;
3679 break;
3680 case BO_And:
3681 RMWOp = llvm::AtomicRMWInst::And;
3682 break;
3683 case BO_Or:
3684 RMWOp = llvm::AtomicRMWInst::Or;
3685 break;
3686 case BO_Xor:
3687 RMWOp = llvm::AtomicRMWInst::Xor;
3688 break;
3689 case BO_LT:
3690 RMWOp = X.getType()->hasSignedIntegerRepresentation()
3691 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3692 : llvm::AtomicRMWInst::Max)
3693 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3694 : llvm::AtomicRMWInst::UMax);
3695 break;
3696 case BO_GT:
3697 RMWOp = X.getType()->hasSignedIntegerRepresentation()
3698 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3699 : llvm::AtomicRMWInst::Min)
3700 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3701 : llvm::AtomicRMWInst::UMin);
3702 break;
3703 case BO_Assign:
3704 RMWOp = llvm::AtomicRMWInst::Xchg;
3705 break;
3706 case BO_Mul:
3707 case BO_Div:
3708 case BO_Rem:
3709 case BO_Shl:
3710 case BO_Shr:
3711 case BO_LAnd:
3712 case BO_LOr:
3713 return std::make_pair(false, RValue::get(nullptr));
3714 case BO_PtrMemD:
3715 case BO_PtrMemI:
3716 case BO_LE:
3717 case BO_GE:
3718 case BO_EQ:
3719 case BO_NE:
3720 case BO_Cmp:
3721 case BO_AddAssign:
3722 case BO_SubAssign:
3723 case BO_AndAssign:
3724 case BO_OrAssign:
3725 case BO_XorAssign:
3726 case BO_MulAssign:
3727 case BO_DivAssign:
3728 case BO_RemAssign:
3729 case BO_ShlAssign:
3730 case BO_ShrAssign:
3731 case BO_Comma:
3732 llvm_unreachable("Unsupported atomic update operation");
3733 }
3734 llvm::Value *UpdateVal = Update.getScalarVal();
3735 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3736 UpdateVal = CGF.Builder.CreateIntCast(
3737 IC, X.getAddress().getElementType(),
3738 X.getType()->hasSignedIntegerRepresentation());
3739 }
3740 llvm::Value *Res =
3741 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3742 return std::make_pair(true, RValue::get(Res));
3743 }
3744
EmitOMPAtomicSimpleUpdateExpr(LValue X,RValue E,BinaryOperatorKind BO,bool IsXLHSInRHSPart,llvm::AtomicOrdering AO,SourceLocation Loc,const llvm::function_ref<RValue (RValue)> CommonGen)3745 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3746 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3747 llvm::AtomicOrdering AO, SourceLocation Loc,
3748 const llvm::function_ref<RValue(RValue)> CommonGen) {
3749 // Update expressions are allowed to have the following forms:
3750 // x binop= expr; -> xrval + expr;
3751 // x++, ++x -> xrval + 1;
3752 // x--, --x -> xrval - 1;
3753 // x = x binop expr; -> xrval binop expr
3754 // x = expr Op x; - > expr binop xrval;
3755 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3756 if (!Res.first) {
3757 if (X.isGlobalReg()) {
3758 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3759 // 'xrval'.
3760 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3761 } else {
3762 // Perform compare-and-swap procedure.
3763 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3764 }
3765 }
3766 return Res;
3767 }
3768
emitOMPAtomicUpdateExpr(CodeGenFunction & CGF,bool IsSeqCst,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)3769 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3770 const Expr *X, const Expr *E,
3771 const Expr *UE, bool IsXLHSInRHSPart,
3772 SourceLocation Loc) {
3773 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3774 "Update expr in 'atomic update' must be a binary operator.");
3775 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3776 // Update expressions are allowed to have the following forms:
3777 // x binop= expr; -> xrval + expr;
3778 // x++, ++x -> xrval + 1;
3779 // x--, --x -> xrval - 1;
3780 // x = x binop expr; -> xrval binop expr
3781 // x = expr Op x; - > expr binop xrval;
3782 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3783 LValue XLValue = CGF.EmitLValue(X);
3784 RValue ExprRValue = CGF.EmitAnyExpr(E);
3785 llvm::AtomicOrdering AO = IsSeqCst
3786 ? llvm::AtomicOrdering::SequentiallyConsistent
3787 : llvm::AtomicOrdering::Monotonic;
3788 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3789 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3790 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3791 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3792 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
3793 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3794 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3795 return CGF.EmitAnyExpr(UE);
3796 };
3797 (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3798 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3799 // OpenMP, 2.12.6, atomic Construct
3800 // Any atomic construct with a seq_cst clause forces the atomically
3801 // performed operation to include an implicit flush operation without a
3802 // list.
3803 if (IsSeqCst)
3804 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3805 }
3806
convertToType(CodeGenFunction & CGF,RValue Value,QualType SourceType,QualType ResType,SourceLocation Loc)3807 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3808 QualType SourceType, QualType ResType,
3809 SourceLocation Loc) {
3810 switch (CGF.getEvaluationKind(ResType)) {
3811 case TEK_Scalar:
3812 return RValue::get(
3813 convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3814 case TEK_Complex: {
3815 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3816 return RValue::getComplex(Res.first, Res.second);
3817 }
3818 case TEK_Aggregate:
3819 break;
3820 }
3821 llvm_unreachable("Must be a scalar or complex.");
3822 }
3823
emitOMPAtomicCaptureExpr(CodeGenFunction & CGF,bool IsSeqCst,bool IsPostfixUpdate,const Expr * V,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)3824 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3825 bool IsPostfixUpdate, const Expr *V,
3826 const Expr *X, const Expr *E,
3827 const Expr *UE, bool IsXLHSInRHSPart,
3828 SourceLocation Loc) {
3829 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3830 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3831 RValue NewVVal;
3832 LValue VLValue = CGF.EmitLValue(V);
3833 LValue XLValue = CGF.EmitLValue(X);
3834 RValue ExprRValue = CGF.EmitAnyExpr(E);
3835 llvm::AtomicOrdering AO = IsSeqCst
3836 ? llvm::AtomicOrdering::SequentiallyConsistent
3837 : llvm::AtomicOrdering::Monotonic;
3838 QualType NewVValType;
3839 if (UE) {
3840 // 'x' is updated with some additional value.
3841 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3842 "Update expr in 'atomic capture' must be a binary operator.");
3843 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3844 // Update expressions are allowed to have the following forms:
3845 // x binop= expr; -> xrval + expr;
3846 // x++, ++x -> xrval + 1;
3847 // x--, --x -> xrval - 1;
3848 // x = x binop expr; -> xrval binop expr
3849 // x = expr Op x; - > expr binop xrval;
3850 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3851 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3852 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3853 NewVValType = XRValExpr->getType();
3854 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3855 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3856 IsPostfixUpdate](RValue XRValue) {
3857 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3858 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3859 RValue Res = CGF.EmitAnyExpr(UE);
3860 NewVVal = IsPostfixUpdate ? XRValue : Res;
3861 return Res;
3862 };
3863 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3864 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3865 if (Res.first) {
3866 // 'atomicrmw' instruction was generated.
3867 if (IsPostfixUpdate) {
3868 // Use old value from 'atomicrmw'.
3869 NewVVal = Res.second;
3870 } else {
3871 // 'atomicrmw' does not provide new value, so evaluate it using old
3872 // value of 'x'.
3873 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3874 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3875 NewVVal = CGF.EmitAnyExpr(UE);
3876 }
3877 }
3878 } else {
3879 // 'x' is simply rewritten with some 'expr'.
3880 NewVValType = X->getType().getNonReferenceType();
3881 ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3882 X->getType().getNonReferenceType(), Loc);
3883 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
3884 NewVVal = XRValue;
3885 return ExprRValue;
3886 };
3887 // Try to perform atomicrmw xchg, otherwise simple exchange.
3888 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3889 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3890 Loc, Gen);
3891 if (Res.first) {
3892 // 'atomicrmw' instruction was generated.
3893 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3894 }
3895 }
3896 // Emit post-update store to 'v' of old/new 'x' value.
3897 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3898 // OpenMP, 2.12.6, atomic Construct
3899 // Any atomic construct with a seq_cst clause forces the atomically
3900 // performed operation to include an implicit flush operation without a
3901 // list.
3902 if (IsSeqCst)
3903 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3904 }
3905
emitOMPAtomicExpr(CodeGenFunction & CGF,OpenMPClauseKind Kind,bool IsSeqCst,bool IsPostfixUpdate,const Expr * X,const Expr * V,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)3906 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3907 bool IsSeqCst, bool IsPostfixUpdate,
3908 const Expr *X, const Expr *V, const Expr *E,
3909 const Expr *UE, bool IsXLHSInRHSPart,
3910 SourceLocation Loc) {
3911 switch (Kind) {
3912 case OMPC_read:
3913 emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3914 break;
3915 case OMPC_write:
3916 emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3917 break;
3918 case OMPC_unknown:
3919 case OMPC_update:
3920 emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3921 break;
3922 case OMPC_capture:
3923 emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3924 IsXLHSInRHSPart, Loc);
3925 break;
3926 case OMPC_if:
3927 case OMPC_final:
3928 case OMPC_num_threads:
3929 case OMPC_private:
3930 case OMPC_firstprivate:
3931 case OMPC_lastprivate:
3932 case OMPC_reduction:
3933 case OMPC_task_reduction:
3934 case OMPC_in_reduction:
3935 case OMPC_safelen:
3936 case OMPC_simdlen:
3937 case OMPC_collapse:
3938 case OMPC_default:
3939 case OMPC_seq_cst:
3940 case OMPC_shared:
3941 case OMPC_linear:
3942 case OMPC_aligned:
3943 case OMPC_copyin:
3944 case OMPC_copyprivate:
3945 case OMPC_flush:
3946 case OMPC_proc_bind:
3947 case OMPC_schedule:
3948 case OMPC_ordered:
3949 case OMPC_nowait:
3950 case OMPC_untied:
3951 case OMPC_threadprivate:
3952 case OMPC_depend:
3953 case OMPC_mergeable:
3954 case OMPC_device:
3955 case OMPC_threads:
3956 case OMPC_simd:
3957 case OMPC_map:
3958 case OMPC_num_teams:
3959 case OMPC_thread_limit:
3960 case OMPC_priority:
3961 case OMPC_grainsize:
3962 case OMPC_nogroup:
3963 case OMPC_num_tasks:
3964 case OMPC_hint:
3965 case OMPC_dist_schedule:
3966 case OMPC_defaultmap:
3967 case OMPC_uniform:
3968 case OMPC_to:
3969 case OMPC_from:
3970 case OMPC_use_device_ptr:
3971 case OMPC_is_device_ptr:
3972 case OMPC_unified_address:
3973 case OMPC_unified_shared_memory:
3974 case OMPC_reverse_offload:
3975 case OMPC_dynamic_allocators:
3976 case OMPC_atomic_default_mem_order:
3977 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3978 }
3979 }
3980
EmitOMPAtomicDirective(const OMPAtomicDirective & S)3981 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3982 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3983 OpenMPClauseKind Kind = OMPC_unknown;
3984 for (const OMPClause *C : S.clauses()) {
3985 // Find first clause (skip seq_cst clause, if it is first).
3986 if (C->getClauseKind() != OMPC_seq_cst) {
3987 Kind = C->getClauseKind();
3988 break;
3989 }
3990 }
3991
3992 const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
3993 if (const auto *FE = dyn_cast<FullExpr>(CS))
3994 enterFullExpression(FE);
3995 // Processing for statements under 'atomic capture'.
3996 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3997 for (const Stmt *C : Compound->body()) {
3998 if (const auto *FE = dyn_cast<FullExpr>(C))
3999 enterFullExpression(FE);
4000 }
4001 }
4002
4003 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
4004 PrePostActionTy &) {
4005 CGF.EmitStopPoint(CS);
4006 emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
4007 S.getV(), S.getExpr(), S.getUpdateExpr(),
4008 S.isXLHSInRHSPart(), S.getBeginLoc());
4009 };
4010 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4011 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
4012 }
4013
emitCommonOMPTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,const RegionCodeGenTy & CodeGen)4014 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
4015 const OMPExecutableDirective &S,
4016 const RegionCodeGenTy &CodeGen) {
4017 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
4018 CodeGenModule &CGM = CGF.CGM;
4019
4020 // On device emit this construct as inlined code.
4021 if (CGM.getLangOpts().OpenMPIsDevice) {
4022 OMPLexicalScope Scope(CGF, S, OMPD_target);
4023 CGM.getOpenMPRuntime().emitInlinedDirective(
4024 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4025 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4026 });
4027 return;
4028 }
4029
4030 llvm::Function *Fn = nullptr;
4031 llvm::Constant *FnID = nullptr;
4032
4033 const Expr *IfCond = nullptr;
4034 // Check for the at most one if clause associated with the target region.
4035 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4036 if (C->getNameModifier() == OMPD_unknown ||
4037 C->getNameModifier() == OMPD_target) {
4038 IfCond = C->getCondition();
4039 break;
4040 }
4041 }
4042
4043 // Check if we have any device clause associated with the directive.
4044 const Expr *Device = nullptr;
4045 if (auto *C = S.getSingleClause<OMPDeviceClause>())
4046 Device = C->getDevice();
4047
4048 // Check if we have an if clause whose conditional always evaluates to false
4049 // or if we do not have any targets specified. If so the target region is not
4050 // an offload entry point.
4051 bool IsOffloadEntry = true;
4052 if (IfCond) {
4053 bool Val;
4054 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
4055 IsOffloadEntry = false;
4056 }
4057 if (CGM.getLangOpts().OMPTargetTriples.empty())
4058 IsOffloadEntry = false;
4059
4060 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
4061 StringRef ParentName;
4062 // In case we have Ctors/Dtors we use the complete type variant to produce
4063 // the mangling of the device outlined kernel.
4064 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
4065 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
4066 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
4067 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
4068 else
4069 ParentName =
4070 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
4071
4072 // Emit target region as a standalone region.
4073 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
4074 IsOffloadEntry, CodeGen);
4075 OMPLexicalScope Scope(CGF, S, OMPD_task);
4076 auto &&SizeEmitter = [](CodeGenFunction &CGF, const OMPLoopDirective &D) {
4077 OMPLoopScope(CGF, D);
4078 // Emit calculation of the iterations count.
4079 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
4080 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
4081 /*IsSigned=*/false);
4082 return NumIterations;
4083 };
4084 CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device,
4085 SizeEmitter);
4086 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
4087 }
4088
emitTargetRegion(CodeGenFunction & CGF,const OMPTargetDirective & S,PrePostActionTy & Action)4089 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
4090 PrePostActionTy &Action) {
4091 Action.Enter(CGF);
4092 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4093 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4094 CGF.EmitOMPPrivateClause(S, PrivateScope);
4095 (void)PrivateScope.Privatize();
4096 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4097 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4098
4099 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
4100 }
4101
EmitOMPTargetDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetDirective & S)4102 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
4103 StringRef ParentName,
4104 const OMPTargetDirective &S) {
4105 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4106 emitTargetRegion(CGF, S, Action);
4107 };
4108 llvm::Function *Fn;
4109 llvm::Constant *Addr;
4110 // Emit target region as a standalone region.
4111 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4112 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4113 assert(Fn && Addr && "Target device function emission failed.");
4114 }
4115
EmitOMPTargetDirective(const OMPTargetDirective & S)4116 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
4117 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4118 emitTargetRegion(CGF, S, Action);
4119 };
4120 emitCommonOMPTargetDirective(*this, S, CodeGen);
4121 }
4122
emitCommonOMPTeamsDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)4123 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
4124 const OMPExecutableDirective &S,
4125 OpenMPDirectiveKind InnermostKind,
4126 const RegionCodeGenTy &CodeGen) {
4127 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
4128 llvm::Value *OutlinedFn =
4129 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
4130 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
4131
4132 const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
4133 const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
4134 if (NT || TL) {
4135 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
4136 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
4137
4138 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
4139 S.getBeginLoc());
4140 }
4141
4142 OMPTeamsScope Scope(CGF, S);
4143 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
4144 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
4145 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
4146 CapturedVars);
4147 }
4148
EmitOMPTeamsDirective(const OMPTeamsDirective & S)4149 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
4150 // Emit teams region as a standalone region.
4151 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4152 Action.Enter(CGF);
4153 OMPPrivateScope PrivateScope(CGF);
4154 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4155 CGF.EmitOMPPrivateClause(S, PrivateScope);
4156 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4157 (void)PrivateScope.Privatize();
4158 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
4159 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4160 };
4161 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
4162 emitPostUpdateForReductionClause(*this, S,
4163 [](CodeGenFunction &) { return nullptr; });
4164 }
4165
emitTargetTeamsRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDirective & S)4166 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
4167 const OMPTargetTeamsDirective &S) {
4168 auto *CS = S.getCapturedStmt(OMPD_teams);
4169 Action.Enter(CGF);
4170 // Emit teams region as a standalone region.
4171 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
4172 Action.Enter(CGF);
4173 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4174 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4175 CGF.EmitOMPPrivateClause(S, PrivateScope);
4176 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4177 (void)PrivateScope.Privatize();
4178 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4179 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4180 CGF.EmitStmt(CS->getCapturedStmt());
4181 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4182 };
4183 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
4184 emitPostUpdateForReductionClause(CGF, S,
4185 [](CodeGenFunction &) { return nullptr; });
4186 }
4187
EmitOMPTargetTeamsDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDirective & S)4188 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
4189 CodeGenModule &CGM, StringRef ParentName,
4190 const OMPTargetTeamsDirective &S) {
4191 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4192 emitTargetTeamsRegion(CGF, Action, S);
4193 };
4194 llvm::Function *Fn;
4195 llvm::Constant *Addr;
4196 // Emit target region as a standalone region.
4197 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4198 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4199 assert(Fn && Addr && "Target device function emission failed.");
4200 }
4201
EmitOMPTargetTeamsDirective(const OMPTargetTeamsDirective & S)4202 void CodeGenFunction::EmitOMPTargetTeamsDirective(
4203 const OMPTargetTeamsDirective &S) {
4204 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4205 emitTargetTeamsRegion(CGF, Action, S);
4206 };
4207 emitCommonOMPTargetDirective(*this, S, CodeGen);
4208 }
4209
4210 static void
emitTargetTeamsDistributeRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDistributeDirective & S)4211 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
4212 const OMPTargetTeamsDistributeDirective &S) {
4213 Action.Enter(CGF);
4214 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4215 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4216 };
4217
4218 // Emit teams region as a standalone region.
4219 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4220 PrePostActionTy &Action) {
4221 Action.Enter(CGF);
4222 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4223 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4224 (void)PrivateScope.Privatize();
4225 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4226 CodeGenDistribute);
4227 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4228 };
4229 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
4230 emitPostUpdateForReductionClause(CGF, S,
4231 [](CodeGenFunction &) { return nullptr; });
4232 }
4233
EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeDirective & S)4234 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
4235 CodeGenModule &CGM, StringRef ParentName,
4236 const OMPTargetTeamsDistributeDirective &S) {
4237 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4238 emitTargetTeamsDistributeRegion(CGF, Action, S);
4239 };
4240 llvm::Function *Fn;
4241 llvm::Constant *Addr;
4242 // Emit target region as a standalone region.
4243 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4244 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4245 assert(Fn && Addr && "Target device function emission failed.");
4246 }
4247
EmitOMPTargetTeamsDistributeDirective(const OMPTargetTeamsDistributeDirective & S)4248 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
4249 const OMPTargetTeamsDistributeDirective &S) {
4250 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4251 emitTargetTeamsDistributeRegion(CGF, Action, S);
4252 };
4253 emitCommonOMPTargetDirective(*this, S, CodeGen);
4254 }
4255
emitTargetTeamsDistributeSimdRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDistributeSimdDirective & S)4256 static void emitTargetTeamsDistributeSimdRegion(
4257 CodeGenFunction &CGF, PrePostActionTy &Action,
4258 const OMPTargetTeamsDistributeSimdDirective &S) {
4259 Action.Enter(CGF);
4260 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4261 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4262 };
4263
4264 // Emit teams region as a standalone region.
4265 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4266 PrePostActionTy &Action) {
4267 Action.Enter(CGF);
4268 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4269 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4270 (void)PrivateScope.Privatize();
4271 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4272 CodeGenDistribute);
4273 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4274 };
4275 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
4276 emitPostUpdateForReductionClause(CGF, S,
4277 [](CodeGenFunction &) { return nullptr; });
4278 }
4279
EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeSimdDirective & S)4280 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
4281 CodeGenModule &CGM, StringRef ParentName,
4282 const OMPTargetTeamsDistributeSimdDirective &S) {
4283 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4284 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
4285 };
4286 llvm::Function *Fn;
4287 llvm::Constant *Addr;
4288 // Emit target region as a standalone region.
4289 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4290 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4291 assert(Fn && Addr && "Target device function emission failed.");
4292 }
4293
EmitOMPTargetTeamsDistributeSimdDirective(const OMPTargetTeamsDistributeSimdDirective & S)4294 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
4295 const OMPTargetTeamsDistributeSimdDirective &S) {
4296 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4297 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
4298 };
4299 emitCommonOMPTargetDirective(*this, S, CodeGen);
4300 }
4301
EmitOMPTeamsDistributeDirective(const OMPTeamsDistributeDirective & S)4302 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
4303 const OMPTeamsDistributeDirective &S) {
4304
4305 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4306 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4307 };
4308
4309 // Emit teams region as a standalone region.
4310 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4311 PrePostActionTy &Action) {
4312 Action.Enter(CGF);
4313 OMPPrivateScope PrivateScope(CGF);
4314 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4315 (void)PrivateScope.Privatize();
4316 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4317 CodeGenDistribute);
4318 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4319 };
4320 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
4321 emitPostUpdateForReductionClause(*this, S,
4322 [](CodeGenFunction &) { return nullptr; });
4323 }
4324
EmitOMPTeamsDistributeSimdDirective(const OMPTeamsDistributeSimdDirective & S)4325 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
4326 const OMPTeamsDistributeSimdDirective &S) {
4327 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4328 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4329 };
4330
4331 // Emit teams region as a standalone region.
4332 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4333 PrePostActionTy &Action) {
4334 Action.Enter(CGF);
4335 OMPPrivateScope PrivateScope(CGF);
4336 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4337 (void)PrivateScope.Privatize();
4338 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
4339 CodeGenDistribute);
4340 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4341 };
4342 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
4343 emitPostUpdateForReductionClause(*this, S,
4344 [](CodeGenFunction &) { return nullptr; });
4345 }
4346
EmitOMPTeamsDistributeParallelForDirective(const OMPTeamsDistributeParallelForDirective & S)4347 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
4348 const OMPTeamsDistributeParallelForDirective &S) {
4349 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4350 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4351 S.getDistInc());
4352 };
4353
4354 // Emit teams region as a standalone region.
4355 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4356 PrePostActionTy &Action) {
4357 Action.Enter(CGF);
4358 OMPPrivateScope PrivateScope(CGF);
4359 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4360 (void)PrivateScope.Privatize();
4361 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4362 CodeGenDistribute);
4363 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4364 };
4365 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
4366 emitPostUpdateForReductionClause(*this, S,
4367 [](CodeGenFunction &) { return nullptr; });
4368 }
4369
EmitOMPTeamsDistributeParallelForSimdDirective(const OMPTeamsDistributeParallelForSimdDirective & S)4370 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
4371 const OMPTeamsDistributeParallelForSimdDirective &S) {
4372 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4373 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4374 S.getDistInc());
4375 };
4376
4377 // Emit teams region as a standalone region.
4378 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4379 PrePostActionTy &Action) {
4380 Action.Enter(CGF);
4381 OMPPrivateScope PrivateScope(CGF);
4382 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4383 (void)PrivateScope.Privatize();
4384 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4385 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4386 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4387 };
4388 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
4389 emitPostUpdateForReductionClause(*this, S,
4390 [](CodeGenFunction &) { return nullptr; });
4391 }
4392
emitTargetTeamsDistributeParallelForRegion(CodeGenFunction & CGF,const OMPTargetTeamsDistributeParallelForDirective & S,PrePostActionTy & Action)4393 static void emitTargetTeamsDistributeParallelForRegion(
4394 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
4395 PrePostActionTy &Action) {
4396 Action.Enter(CGF);
4397 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4398 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4399 S.getDistInc());
4400 };
4401
4402 // Emit teams region as a standalone region.
4403 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4404 PrePostActionTy &Action) {
4405 Action.Enter(CGF);
4406 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4407 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4408 (void)PrivateScope.Privatize();
4409 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4410 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4411 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4412 };
4413
4414 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
4415 CodeGenTeams);
4416 emitPostUpdateForReductionClause(CGF, S,
4417 [](CodeGenFunction &) { return nullptr; });
4418 }
4419
EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeParallelForDirective & S)4420 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
4421 CodeGenModule &CGM, StringRef ParentName,
4422 const OMPTargetTeamsDistributeParallelForDirective &S) {
4423 // Emit SPMD target teams distribute parallel for region as a standalone
4424 // region.
4425 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4426 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
4427 };
4428 llvm::Function *Fn;
4429 llvm::Constant *Addr;
4430 // Emit target region as a standalone region.
4431 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4432 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4433 assert(Fn && Addr && "Target device function emission failed.");
4434 }
4435
EmitOMPTargetTeamsDistributeParallelForDirective(const OMPTargetTeamsDistributeParallelForDirective & S)4436 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
4437 const OMPTargetTeamsDistributeParallelForDirective &S) {
4438 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4439 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
4440 };
4441 emitCommonOMPTargetDirective(*this, S, CodeGen);
4442 }
4443
emitTargetTeamsDistributeParallelForSimdRegion(CodeGenFunction & CGF,const OMPTargetTeamsDistributeParallelForSimdDirective & S,PrePostActionTy & Action)4444 static void emitTargetTeamsDistributeParallelForSimdRegion(
4445 CodeGenFunction &CGF,
4446 const OMPTargetTeamsDistributeParallelForSimdDirective &S,
4447 PrePostActionTy &Action) {
4448 Action.Enter(CGF);
4449 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4450 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4451 S.getDistInc());
4452 };
4453
4454 // Emit teams region as a standalone region.
4455 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4456 PrePostActionTy &Action) {
4457 Action.Enter(CGF);
4458 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4459 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4460 (void)PrivateScope.Privatize();
4461 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4462 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4463 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4464 };
4465
4466 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
4467 CodeGenTeams);
4468 emitPostUpdateForReductionClause(CGF, S,
4469 [](CodeGenFunction &) { return nullptr; });
4470 }
4471
EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeParallelForSimdDirective & S)4472 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
4473 CodeGenModule &CGM, StringRef ParentName,
4474 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
4475 // Emit SPMD target teams distribute parallel for simd region as a standalone
4476 // region.
4477 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4478 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
4479 };
4480 llvm::Function *Fn;
4481 llvm::Constant *Addr;
4482 // Emit target region as a standalone region.
4483 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4484 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4485 assert(Fn && Addr && "Target device function emission failed.");
4486 }
4487
EmitOMPTargetTeamsDistributeParallelForSimdDirective(const OMPTargetTeamsDistributeParallelForSimdDirective & S)4488 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
4489 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
4490 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4491 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
4492 };
4493 emitCommonOMPTargetDirective(*this, S, CodeGen);
4494 }
4495
EmitOMPCancellationPointDirective(const OMPCancellationPointDirective & S)4496 void CodeGenFunction::EmitOMPCancellationPointDirective(
4497 const OMPCancellationPointDirective &S) {
4498 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
4499 S.getCancelRegion());
4500 }
4501
EmitOMPCancelDirective(const OMPCancelDirective & S)4502 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
4503 const Expr *IfCond = nullptr;
4504 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4505 if (C->getNameModifier() == OMPD_unknown ||
4506 C->getNameModifier() == OMPD_cancel) {
4507 IfCond = C->getCondition();
4508 break;
4509 }
4510 }
4511 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
4512 S.getCancelRegion());
4513 }
4514
4515 CodeGenFunction::JumpDest
getOMPCancelDestination(OpenMPDirectiveKind Kind)4516 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
4517 if (Kind == OMPD_parallel || Kind == OMPD_task ||
4518 Kind == OMPD_target_parallel)
4519 return ReturnBlock;
4520 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
4521 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
4522 Kind == OMPD_distribute_parallel_for ||
4523 Kind == OMPD_target_parallel_for ||
4524 Kind == OMPD_teams_distribute_parallel_for ||
4525 Kind == OMPD_target_teams_distribute_parallel_for);
4526 return OMPCancelStack.getExitBlock();
4527 }
4528
EmitOMPUseDevicePtrClause(const OMPClause & NC,OMPPrivateScope & PrivateScope,const llvm::DenseMap<const ValueDecl *,Address> & CaptureDeviceAddrMap)4529 void CodeGenFunction::EmitOMPUseDevicePtrClause(
4530 const OMPClause &NC, OMPPrivateScope &PrivateScope,
4531 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
4532 const auto &C = cast<OMPUseDevicePtrClause>(NC);
4533 auto OrigVarIt = C.varlist_begin();
4534 auto InitIt = C.inits().begin();
4535 for (const Expr *PvtVarIt : C.private_copies()) {
4536 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
4537 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
4538 const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
4539
4540 // In order to identify the right initializer we need to match the
4541 // declaration used by the mapping logic. In some cases we may get
4542 // OMPCapturedExprDecl that refers to the original declaration.
4543 const ValueDecl *MatchingVD = OrigVD;
4544 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
4545 // OMPCapturedExprDecl are used to privative fields of the current
4546 // structure.
4547 const auto *ME = cast<MemberExpr>(OED->getInit());
4548 assert(isa<CXXThisExpr>(ME->getBase()) &&
4549 "Base should be the current struct!");
4550 MatchingVD = ME->getMemberDecl();
4551 }
4552
4553 // If we don't have information about the current list item, move on to
4554 // the next one.
4555 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
4556 if (InitAddrIt == CaptureDeviceAddrMap.end())
4557 continue;
4558
4559 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD,
4560 InitAddrIt, InitVD,
4561 PvtVD]() {
4562 // Initialize the temporary initialization variable with the address we
4563 // get from the runtime library. We have to cast the source address
4564 // because it is always a void *. References are materialized in the
4565 // privatization scope, so the initialization here disregards the fact
4566 // the original variable is a reference.
4567 QualType AddrQTy =
4568 getContext().getPointerType(OrigVD->getType().getNonReferenceType());
4569 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
4570 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
4571 setAddrOfLocalVar(InitVD, InitAddr);
4572
4573 // Emit private declaration, it will be initialized by the value we
4574 // declaration we just added to the local declarations map.
4575 EmitDecl(*PvtVD);
4576
4577 // The initialization variables reached its purpose in the emission
4578 // of the previous declaration, so we don't need it anymore.
4579 LocalDeclMap.erase(InitVD);
4580
4581 // Return the address of the private variable.
4582 return GetAddrOfLocalVar(PvtVD);
4583 });
4584 assert(IsRegistered && "firstprivate var already registered as private");
4585 // Silence the warning about unused variable.
4586 (void)IsRegistered;
4587
4588 ++OrigVarIt;
4589 ++InitIt;
4590 }
4591 }
4592
4593 // Generate the instructions for '#pragma omp target data' directive.
EmitOMPTargetDataDirective(const OMPTargetDataDirective & S)4594 void CodeGenFunction::EmitOMPTargetDataDirective(
4595 const OMPTargetDataDirective &S) {
4596 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
4597
4598 // Create a pre/post action to signal the privatization of the device pointer.
4599 // This action can be replaced by the OpenMP runtime code generation to
4600 // deactivate privatization.
4601 bool PrivatizeDevicePointers = false;
4602 class DevicePointerPrivActionTy : public PrePostActionTy {
4603 bool &PrivatizeDevicePointers;
4604
4605 public:
4606 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
4607 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
4608 void Enter(CodeGenFunction &CGF) override {
4609 PrivatizeDevicePointers = true;
4610 }
4611 };
4612 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
4613
4614 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
4615 CodeGenFunction &CGF, PrePostActionTy &Action) {
4616 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4617 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4618 };
4619
4620 // Codegen that selects whether to generate the privatization code or not.
4621 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
4622 &InnermostCodeGen](CodeGenFunction &CGF,
4623 PrePostActionTy &Action) {
4624 RegionCodeGenTy RCG(InnermostCodeGen);
4625 PrivatizeDevicePointers = false;
4626
4627 // Call the pre-action to change the status of PrivatizeDevicePointers if
4628 // needed.
4629 Action.Enter(CGF);
4630
4631 if (PrivatizeDevicePointers) {
4632 OMPPrivateScope PrivateScope(CGF);
4633 // Emit all instances of the use_device_ptr clause.
4634 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
4635 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
4636 Info.CaptureDeviceAddrMap);
4637 (void)PrivateScope.Privatize();
4638 RCG(CGF);
4639 } else {
4640 RCG(CGF);
4641 }
4642 };
4643
4644 // Forward the provided action to the privatization codegen.
4645 RegionCodeGenTy PrivRCG(PrivCodeGen);
4646 PrivRCG.setAction(Action);
4647
4648 // Notwithstanding the body of the region is emitted as inlined directive,
4649 // we don't use an inline scope as changes in the references inside the
4650 // region are expected to be visible outside, so we do not privative them.
4651 OMPLexicalScope Scope(CGF, S);
4652 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4653 PrivRCG);
4654 };
4655
4656 RegionCodeGenTy RCG(CodeGen);
4657
4658 // If we don't have target devices, don't bother emitting the data mapping
4659 // code.
4660 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4661 RCG(*this);
4662 return;
4663 }
4664
4665 // Check if we have any if clause associated with the directive.
4666 const Expr *IfCond = nullptr;
4667 if (const auto *C = S.getSingleClause<OMPIfClause>())
4668 IfCond = C->getCondition();
4669
4670 // Check if we have any device clause associated with the directive.
4671 const Expr *Device = nullptr;
4672 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
4673 Device = C->getDevice();
4674
4675 // Set the action to signal privatization of device pointers.
4676 RCG.setAction(PrivAction);
4677
4678 // Emit region code.
4679 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4680 Info);
4681 }
4682
EmitOMPTargetEnterDataDirective(const OMPTargetEnterDataDirective & S)4683 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4684 const OMPTargetEnterDataDirective &S) {
4685 // If we don't have target devices, don't bother emitting the data mapping
4686 // code.
4687 if (CGM.getLangOpts().OMPTargetTriples.empty())
4688 return;
4689
4690 // Check if we have any if clause associated with the directive.
4691 const Expr *IfCond = nullptr;
4692 if (const auto *C = S.getSingleClause<OMPIfClause>())
4693 IfCond = C->getCondition();
4694
4695 // Check if we have any device clause associated with the directive.
4696 const Expr *Device = nullptr;
4697 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
4698 Device = C->getDevice();
4699
4700 OMPLexicalScope Scope(*this, S, OMPD_task);
4701 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4702 }
4703
EmitOMPTargetExitDataDirective(const OMPTargetExitDataDirective & S)4704 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4705 const OMPTargetExitDataDirective &S) {
4706 // If we don't have target devices, don't bother emitting the data mapping
4707 // code.
4708 if (CGM.getLangOpts().OMPTargetTriples.empty())
4709 return;
4710
4711 // Check if we have any if clause associated with the directive.
4712 const Expr *IfCond = nullptr;
4713 if (const auto *C = S.getSingleClause<OMPIfClause>())
4714 IfCond = C->getCondition();
4715
4716 // Check if we have any device clause associated with the directive.
4717 const Expr *Device = nullptr;
4718 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
4719 Device = C->getDevice();
4720
4721 OMPLexicalScope Scope(*this, S, OMPD_task);
4722 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4723 }
4724
emitTargetParallelRegion(CodeGenFunction & CGF,const OMPTargetParallelDirective & S,PrePostActionTy & Action)4725 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4726 const OMPTargetParallelDirective &S,
4727 PrePostActionTy &Action) {
4728 // Get the captured statement associated with the 'parallel' region.
4729 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
4730 Action.Enter(CGF);
4731 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
4732 Action.Enter(CGF);
4733 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4734 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4735 CGF.EmitOMPPrivateClause(S, PrivateScope);
4736 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4737 (void)PrivateScope.Privatize();
4738 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4739 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4740 // TODO: Add support for clauses.
4741 CGF.EmitStmt(CS->getCapturedStmt());
4742 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4743 };
4744 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4745 emitEmptyBoundParameters);
4746 emitPostUpdateForReductionClause(CGF, S,
4747 [](CodeGenFunction &) { return nullptr; });
4748 }
4749
EmitOMPTargetParallelDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelDirective & S)4750 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4751 CodeGenModule &CGM, StringRef ParentName,
4752 const OMPTargetParallelDirective &S) {
4753 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4754 emitTargetParallelRegion(CGF, S, Action);
4755 };
4756 llvm::Function *Fn;
4757 llvm::Constant *Addr;
4758 // Emit target region as a standalone region.
4759 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4760 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4761 assert(Fn && Addr && "Target device function emission failed.");
4762 }
4763
EmitOMPTargetParallelDirective(const OMPTargetParallelDirective & S)4764 void CodeGenFunction::EmitOMPTargetParallelDirective(
4765 const OMPTargetParallelDirective &S) {
4766 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4767 emitTargetParallelRegion(CGF, S, Action);
4768 };
4769 emitCommonOMPTargetDirective(*this, S, CodeGen);
4770 }
4771
emitTargetParallelForRegion(CodeGenFunction & CGF,const OMPTargetParallelForDirective & S,PrePostActionTy & Action)4772 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
4773 const OMPTargetParallelForDirective &S,
4774 PrePostActionTy &Action) {
4775 Action.Enter(CGF);
4776 // Emit directive as a combined directive that consists of two implicit
4777 // directives: 'parallel' with 'for' directive.
4778 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4779 Action.Enter(CGF);
4780 CodeGenFunction::OMPCancelStackRAII CancelRegion(
4781 CGF, OMPD_target_parallel_for, S.hasCancel());
4782 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4783 emitDispatchForLoopBounds);
4784 };
4785 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
4786 emitEmptyBoundParameters);
4787 }
4788
EmitOMPTargetParallelForDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelForDirective & S)4789 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
4790 CodeGenModule &CGM, StringRef ParentName,
4791 const OMPTargetParallelForDirective &S) {
4792 // Emit SPMD target parallel for region as a standalone region.
4793 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4794 emitTargetParallelForRegion(CGF, S, Action);
4795 };
4796 llvm::Function *Fn;
4797 llvm::Constant *Addr;
4798 // Emit target region as a standalone region.
4799 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4800 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4801 assert(Fn && Addr && "Target device function emission failed.");
4802 }
4803
EmitOMPTargetParallelForDirective(const OMPTargetParallelForDirective & S)4804 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4805 const OMPTargetParallelForDirective &S) {
4806 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4807 emitTargetParallelForRegion(CGF, S, Action);
4808 };
4809 emitCommonOMPTargetDirective(*this, S, CodeGen);
4810 }
4811
4812 static void
emitTargetParallelForSimdRegion(CodeGenFunction & CGF,const OMPTargetParallelForSimdDirective & S,PrePostActionTy & Action)4813 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
4814 const OMPTargetParallelForSimdDirective &S,
4815 PrePostActionTy &Action) {
4816 Action.Enter(CGF);
4817 // Emit directive as a combined directive that consists of two implicit
4818 // directives: 'parallel' with 'for' directive.
4819 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4820 Action.Enter(CGF);
4821 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4822 emitDispatchForLoopBounds);
4823 };
4824 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
4825 emitEmptyBoundParameters);
4826 }
4827
EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelForSimdDirective & S)4828 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
4829 CodeGenModule &CGM, StringRef ParentName,
4830 const OMPTargetParallelForSimdDirective &S) {
4831 // Emit SPMD target parallel for region as a standalone region.
4832 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4833 emitTargetParallelForSimdRegion(CGF, S, Action);
4834 };
4835 llvm::Function *Fn;
4836 llvm::Constant *Addr;
4837 // Emit target region as a standalone region.
4838 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4839 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4840 assert(Fn && Addr && "Target device function emission failed.");
4841 }
4842
EmitOMPTargetParallelForSimdDirective(const OMPTargetParallelForSimdDirective & S)4843 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
4844 const OMPTargetParallelForSimdDirective &S) {
4845 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4846 emitTargetParallelForSimdRegion(CGF, S, Action);
4847 };
4848 emitCommonOMPTargetDirective(*this, S, CodeGen);
4849 }
4850
4851 /// Emit a helper variable and return corresponding lvalue.
mapParam(CodeGenFunction & CGF,const DeclRefExpr * Helper,const ImplicitParamDecl * PVD,CodeGenFunction::OMPPrivateScope & Privates)4852 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4853 const ImplicitParamDecl *PVD,
4854 CodeGenFunction::OMPPrivateScope &Privates) {
4855 const auto *VDecl = cast<VarDecl>(Helper->getDecl());
4856 Privates.addPrivate(VDecl,
4857 [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); });
4858 }
4859
EmitOMPTaskLoopBasedDirective(const OMPLoopDirective & S)4860 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4861 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4862 // Emit outlined function for task construct.
4863 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
4864 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4865 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4866 const Expr *IfCond = nullptr;
4867 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4868 if (C->getNameModifier() == OMPD_unknown ||
4869 C->getNameModifier() == OMPD_taskloop) {
4870 IfCond = C->getCondition();
4871 break;
4872 }
4873 }
4874
4875 OMPTaskDataTy Data;
4876 // Check if taskloop must be emitted without taskgroup.
4877 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4878 // TODO: Check if we should emit tied or untied task.
4879 Data.Tied = true;
4880 // Set scheduling for taskloop
4881 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4882 // grainsize clause
4883 Data.Schedule.setInt(/*IntVal=*/false);
4884 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4885 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4886 // num_tasks clause
4887 Data.Schedule.setInt(/*IntVal=*/true);
4888 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4889 }
4890
4891 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4892 // if (PreCond) {
4893 // for (IV in 0..LastIteration) BODY;
4894 // <Final counter/linear vars updates>;
4895 // }
4896 //
4897
4898 // Emit: if (PreCond) - begin.
4899 // If the condition constant folds and can be elided, avoid emitting the
4900 // whole loop.
4901 bool CondConstant;
4902 llvm::BasicBlock *ContBlock = nullptr;
4903 OMPLoopScope PreInitScope(CGF, S);
4904 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4905 if (!CondConstant)
4906 return;
4907 } else {
4908 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4909 ContBlock = CGF.createBasicBlock("taskloop.if.end");
4910 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4911 CGF.getProfileCount(&S));
4912 CGF.EmitBlock(ThenBlock);
4913 CGF.incrementProfileCounter(&S);
4914 }
4915
4916 if (isOpenMPSimdDirective(S.getDirectiveKind()))
4917 CGF.EmitOMPSimdInit(S);
4918
4919 OMPPrivateScope LoopScope(CGF);
4920 // Emit helper vars inits.
4921 enum { LowerBound = 5, UpperBound, Stride, LastIter };
4922 auto *I = CS->getCapturedDecl()->param_begin();
4923 auto *LBP = std::next(I, LowerBound);
4924 auto *UBP = std::next(I, UpperBound);
4925 auto *STP = std::next(I, Stride);
4926 auto *LIP = std::next(I, LastIter);
4927 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4928 LoopScope);
4929 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4930 LoopScope);
4931 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4932 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4933 LoopScope);
4934 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4935 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4936 (void)LoopScope.Privatize();
4937 // Emit the loop iteration variable.
4938 const Expr *IVExpr = S.getIterationVariable();
4939 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4940 CGF.EmitVarDecl(*IVDecl);
4941 CGF.EmitIgnoredExpr(S.getInit());
4942
4943 // Emit the iterations count variable.
4944 // If it is not a variable, Sema decided to calculate iterations count on
4945 // each iteration (e.g., it is foldable into a constant).
4946 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4947 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4948 // Emit calculation of the iterations count.
4949 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4950 }
4951
4952 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4953 S.getInc(),
4954 [&S](CodeGenFunction &CGF) {
4955 CGF.EmitOMPLoopBody(S, JumpDest());
4956 CGF.EmitStopPoint(&S);
4957 },
4958 [](CodeGenFunction &) {});
4959 // Emit: if (PreCond) - end.
4960 if (ContBlock) {
4961 CGF.EmitBranch(ContBlock);
4962 CGF.EmitBlock(ContBlock, true);
4963 }
4964 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4965 if (HasLastprivateClause) {
4966 CGF.EmitOMPLastprivateClauseFinal(
4967 S, isOpenMPSimdDirective(S.getDirectiveKind()),
4968 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4969 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4970 (*LIP)->getType(), S.getBeginLoc())));
4971 }
4972 };
4973 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4974 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4975 const OMPTaskDataTy &Data) {
4976 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
4977 &Data](CodeGenFunction &CGF, PrePostActionTy &) {
4978 OMPLoopScope PreInitScope(CGF, S);
4979 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
4980 OutlinedFn, SharedsTy,
4981 CapturedStruct, IfCond, Data);
4982 };
4983 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4984 CodeGen);
4985 };
4986 if (Data.Nogroup) {
4987 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
4988 } else {
4989 CGM.getOpenMPRuntime().emitTaskgroupRegion(
4990 *this,
4991 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4992 PrePostActionTy &Action) {
4993 Action.Enter(CGF);
4994 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
4995 Data);
4996 },
4997 S.getBeginLoc());
4998 }
4999 }
5000
EmitOMPTaskLoopDirective(const OMPTaskLoopDirective & S)5001 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
5002 EmitOMPTaskLoopBasedDirective(S);
5003 }
5004
EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective & S)5005 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
5006 const OMPTaskLoopSimdDirective &S) {
5007 EmitOMPTaskLoopBasedDirective(S);
5008 }
5009
5010 // Generate the instructions for '#pragma omp target update' directive.
EmitOMPTargetUpdateDirective(const OMPTargetUpdateDirective & S)5011 void CodeGenFunction::EmitOMPTargetUpdateDirective(
5012 const OMPTargetUpdateDirective &S) {
5013 // If we don't have target devices, don't bother emitting the data mapping
5014 // code.
5015 if (CGM.getLangOpts().OMPTargetTriples.empty())
5016 return;
5017
5018 // Check if we have any if clause associated with the directive.
5019 const Expr *IfCond = nullptr;
5020 if (const auto *C = S.getSingleClause<OMPIfClause>())
5021 IfCond = C->getCondition();
5022
5023 // Check if we have any device clause associated with the directive.
5024 const Expr *Device = nullptr;
5025 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
5026 Device = C->getDevice();
5027
5028 OMPLexicalScope Scope(*this, S, OMPD_task);
5029 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
5030 }
5031
EmitSimpleOMPExecutableDirective(const OMPExecutableDirective & D)5032 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
5033 const OMPExecutableDirective &D) {
5034 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
5035 return;
5036 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
5037 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
5038 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
5039 } else {
5040 OMPPrivateScope LoopGlobals(CGF);
5041 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
5042 for (const Expr *E : LD->counters()) {
5043 const auto *VD = dyn_cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5044 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
5045 LValue GlobLVal = CGF.EmitLValue(E);
5046 LoopGlobals.addPrivate(
5047 VD, [&GlobLVal]() { return GlobLVal.getAddress(); });
5048 }
5049 if (isa<OMPCapturedExprDecl>(VD)) {
5050 // Emit only those that were not explicitly referenced in clauses.
5051 if (!CGF.LocalDeclMap.count(VD))
5052 CGF.EmitVarDecl(*VD);
5053 }
5054 }
5055 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
5056 if (!C->getNumForLoops())
5057 continue;
5058 for (unsigned I = LD->getCollapsedNumber(),
5059 E = C->getLoopNumIterations().size();
5060 I < E; ++I) {
5061 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
5062 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
5063 // Emit only those that were not explicitly referenced in clauses.
5064 if (!CGF.LocalDeclMap.count(VD))
5065 CGF.EmitVarDecl(*VD);
5066 }
5067 }
5068 }
5069 }
5070 LoopGlobals.Privatize();
5071 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
5072 }
5073 };
5074 OMPSimdLexicalScope Scope(*this, D);
5075 CGM.getOpenMPRuntime().emitInlinedDirective(
5076 *this,
5077 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
5078 : D.getDirectiveKind(),
5079 CodeGen);
5080 }
5081
5082