1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// \brief This file implements semantic analysis for CUDA constructs.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/AST/ASTContext.h"
15 #include "clang/AST/Decl.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/Lex/Preprocessor.h"
18 #include "clang/Sema/Lookup.h"
19 #include "clang/Sema/Sema.h"
20 #include "clang/Sema/SemaDiagnostic.h"
21 #include "clang/Sema/SemaInternal.h"
22 #include "clang/Sema/Template.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/SmallVector.h"
25 using namespace clang;
26 
27 void Sema::PushForceCUDAHostDevice() {
28   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
29   ForceCUDAHostDeviceDepth++;
30 }
31 
32 bool Sema::PopForceCUDAHostDevice() {
33   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
34   if (ForceCUDAHostDeviceDepth == 0)
35     return false;
36   ForceCUDAHostDeviceDepth--;
37   return true;
38 }
39 
40 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
41                                          MultiExprArg ExecConfig,
42                                          SourceLocation GGGLoc) {
43   FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
44   if (!ConfigDecl)
45     return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
46                      << "cudaConfigureCall");
47   QualType ConfigQTy = ConfigDecl->getType();
48 
49   DeclRefExpr *ConfigDR = new (Context)
50       DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
51   MarkFunctionReferenced(LLLLoc, ConfigDecl);
52 
53   return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
54                        /*IsExecConfig=*/true);
55 }
56 
57 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
58 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
59   // Code that lives outside a function is run on the host.
60   if (D == nullptr)
61     return CFT_Host;
62 
63   if (D->hasAttr<CUDAInvalidTargetAttr>())
64     return CFT_InvalidTarget;
65 
66   if (D->hasAttr<CUDAGlobalAttr>())
67     return CFT_Global;
68 
69   if (D->hasAttr<CUDADeviceAttr>()) {
70     if (D->hasAttr<CUDAHostAttr>())
71       return CFT_HostDevice;
72     return CFT_Device;
73   } else if (D->hasAttr<CUDAHostAttr>()) {
74     return CFT_Host;
75   } else if (D->isImplicit()) {
76     // Some implicit declarations (like intrinsic functions) are not marked.
77     // Set the most lenient target on them for maximal flexibility.
78     return CFT_HostDevice;
79   }
80 
81   return CFT_Host;
82 }
83 
84 // * CUDA Call preference table
85 //
86 // F - from,
87 // T - to
88 // Ph - preference in host mode
89 // Pd - preference in device mode
90 // H  - handled in (x)
91 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
92 //
93 // | F  | T  | Ph  | Pd  |  H  |
94 // |----+----+-----+-----+-----+
95 // | d  | d  | N   | N   | (c) |
96 // | d  | g  | --  | --  | (a) |
97 // | d  | h  | --  | --  | (e) |
98 // | d  | hd | HD  | HD  | (b) |
99 // | g  | d  | N   | N   | (c) |
100 // | g  | g  | --  | --  | (a) |
101 // | g  | h  | --  | --  | (e) |
102 // | g  | hd | HD  | HD  | (b) |
103 // | h  | d  | --  | --  | (e) |
104 // | h  | g  | N   | N   | (c) |
105 // | h  | h  | N   | N   | (c) |
106 // | h  | hd | HD  | HD  | (b) |
107 // | hd | d  | WS  | SS  | (d) |
108 // | hd | g  | SS  | --  |(d/a)|
109 // | hd | h  | SS  | WS  | (d) |
110 // | hd | hd | HD  | HD  | (b) |
111 
112 Sema::CUDAFunctionPreference
113 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
114                              const FunctionDecl *Callee) {
115   assert(Callee && "Callee must be valid.");
116   CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller);
117   CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
118 
119   // If one of the targets is invalid, the check always fails, no matter what
120   // the other target is.
121   if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
122     return CFP_Never;
123 
124   // (a) Can't call global from some contexts until we support CUDA's
125   // dynamic parallelism.
126   if (CalleeTarget == CFT_Global &&
127       (CallerTarget == CFT_Global || CallerTarget == CFT_Device))
128     return CFP_Never;
129 
130   // (b) Calling HostDevice is OK for everyone.
131   if (CalleeTarget == CFT_HostDevice)
132     return CFP_HostDevice;
133 
134   // (c) Best case scenarios
135   if (CalleeTarget == CallerTarget ||
136       (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
137       (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
138     return CFP_Native;
139 
140   // (d) HostDevice behavior depends on compilation mode.
141   if (CallerTarget == CFT_HostDevice) {
142     // It's OK to call a compilation-mode matching function from an HD one.
143     if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) ||
144         (!getLangOpts().CUDAIsDevice &&
145          (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)))
146       return CFP_SameSide;
147 
148     // Calls from HD to non-mode-matching functions (i.e., to host functions
149     // when compiling in device mode or to device functions when compiling in
150     // host mode) are allowed at the sema level, but eventually rejected if
151     // they're ever codegened.  TODO: Reject said calls earlier.
152     return CFP_WrongSide;
153   }
154 
155   // (e) Calling across device/host boundary is not something you should do.
156   if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
157       (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
158       (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
159     return CFP_Never;
160 
161   llvm_unreachable("All cases should've been handled by now.");
162 }
163 
164 void Sema::EraseUnwantedCUDAMatches(
165     const FunctionDecl *Caller,
166     SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
167   if (Matches.size() <= 1)
168     return;
169 
170   using Pair = std::pair<DeclAccessPair, FunctionDecl*>;
171 
172   // Gets the CUDA function preference for a call from Caller to Match.
173   auto GetCFP = [&](const Pair &Match) {
174     return IdentifyCUDAPreference(Caller, Match.second);
175   };
176 
177   // Find the best call preference among the functions in Matches.
178   CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
179       Matches.begin(), Matches.end(),
180       [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); }));
181 
182   // Erase all functions with lower priority.
183   Matches.erase(
184       llvm::remove_if(
185           Matches, [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }),
186       Matches.end());
187 }
188 
189 /// When an implicitly-declared special member has to invoke more than one
190 /// base/field special member, conflicts may occur in the targets of these
191 /// members. For example, if one base's member __host__ and another's is
192 /// __device__, it's a conflict.
193 /// This function figures out if the given targets \param Target1 and
194 /// \param Target2 conflict, and if they do not it fills in
195 /// \param ResolvedTarget with a target that resolves for both calls.
196 /// \return true if there's a conflict, false otherwise.
197 static bool
198 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
199                                 Sema::CUDAFunctionTarget Target2,
200                                 Sema::CUDAFunctionTarget *ResolvedTarget) {
201   // Only free functions and static member functions may be global.
202   assert(Target1 != Sema::CFT_Global);
203   assert(Target2 != Sema::CFT_Global);
204 
205   if (Target1 == Sema::CFT_HostDevice) {
206     *ResolvedTarget = Target2;
207   } else if (Target2 == Sema::CFT_HostDevice) {
208     *ResolvedTarget = Target1;
209   } else if (Target1 != Target2) {
210     return true;
211   } else {
212     *ResolvedTarget = Target1;
213   }
214 
215   return false;
216 }
217 
218 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
219                                                    CXXSpecialMember CSM,
220                                                    CXXMethodDecl *MemberDecl,
221                                                    bool ConstRHS,
222                                                    bool Diagnose) {
223   llvm::Optional<CUDAFunctionTarget> InferredTarget;
224 
225   // We're going to invoke special member lookup; mark that these special
226   // members are called from this one, and not from its caller.
227   ContextRAII MethodContext(*this, MemberDecl);
228 
229   // Look for special members in base classes that should be invoked from here.
230   // Infer the target of this member base on the ones it should call.
231   // Skip direct and indirect virtual bases for abstract classes.
232   llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
233   for (const auto &B : ClassDecl->bases()) {
234     if (!B.isVirtual()) {
235       Bases.push_back(&B);
236     }
237   }
238 
239   if (!ClassDecl->isAbstract()) {
240     for (const auto &VB : ClassDecl->vbases()) {
241       Bases.push_back(&VB);
242     }
243   }
244 
245   for (const auto *B : Bases) {
246     const RecordType *BaseType = B->getType()->getAs<RecordType>();
247     if (!BaseType) {
248       continue;
249     }
250 
251     CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
252     Sema::SpecialMemberOverloadResult *SMOR =
253         LookupSpecialMember(BaseClassDecl, CSM,
254                             /* ConstArg */ ConstRHS,
255                             /* VolatileArg */ false,
256                             /* RValueThis */ false,
257                             /* ConstThis */ false,
258                             /* VolatileThis */ false);
259 
260     if (!SMOR || !SMOR->getMethod()) {
261       continue;
262     }
263 
264     CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR->getMethod());
265     if (!InferredTarget.hasValue()) {
266       InferredTarget = BaseMethodTarget;
267     } else {
268       bool ResolutionError = resolveCalleeCUDATargetConflict(
269           InferredTarget.getValue(), BaseMethodTarget,
270           InferredTarget.getPointer());
271       if (ResolutionError) {
272         if (Diagnose) {
273           Diag(ClassDecl->getLocation(),
274                diag::note_implicit_member_target_infer_collision)
275               << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
276         }
277         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
278         return true;
279       }
280     }
281   }
282 
283   // Same as for bases, but now for special members of fields.
284   for (const auto *F : ClassDecl->fields()) {
285     if (F->isInvalidDecl()) {
286       continue;
287     }
288 
289     const RecordType *FieldType =
290         Context.getBaseElementType(F->getType())->getAs<RecordType>();
291     if (!FieldType) {
292       continue;
293     }
294 
295     CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
296     Sema::SpecialMemberOverloadResult *SMOR =
297         LookupSpecialMember(FieldRecDecl, CSM,
298                             /* ConstArg */ ConstRHS && !F->isMutable(),
299                             /* VolatileArg */ false,
300                             /* RValueThis */ false,
301                             /* ConstThis */ false,
302                             /* VolatileThis */ false);
303 
304     if (!SMOR || !SMOR->getMethod()) {
305       continue;
306     }
307 
308     CUDAFunctionTarget FieldMethodTarget =
309         IdentifyCUDATarget(SMOR->getMethod());
310     if (!InferredTarget.hasValue()) {
311       InferredTarget = FieldMethodTarget;
312     } else {
313       bool ResolutionError = resolveCalleeCUDATargetConflict(
314           InferredTarget.getValue(), FieldMethodTarget,
315           InferredTarget.getPointer());
316       if (ResolutionError) {
317         if (Diagnose) {
318           Diag(ClassDecl->getLocation(),
319                diag::note_implicit_member_target_infer_collision)
320               << (unsigned)CSM << InferredTarget.getValue()
321               << FieldMethodTarget;
322         }
323         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
324         return true;
325       }
326     }
327   }
328 
329   if (InferredTarget.hasValue()) {
330     if (InferredTarget.getValue() == CFT_Device) {
331       MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
332     } else if (InferredTarget.getValue() == CFT_Host) {
333       MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
334     } else {
335       MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
336       MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
337     }
338   } else {
339     // If no target was inferred, mark this member as __host__ __device__;
340     // it's the least restrictive option that can be invoked from any target.
341     MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
342     MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
343   }
344 
345   return false;
346 }
347 
348 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
349   if (!CD->isDefined() && CD->isTemplateInstantiation())
350     InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
351 
352   // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
353   // empty at a point in the translation unit, if it is either a
354   // trivial constructor
355   if (CD->isTrivial())
356     return true;
357 
358   // ... or it satisfies all of the following conditions:
359   // The constructor function has been defined.
360   // The constructor function has no parameters,
361   // and the function body is an empty compound statement.
362   if (!(CD->hasTrivialBody() && CD->getNumParams() == 0))
363     return false;
364 
365   // Its class has no virtual functions and no virtual base classes.
366   if (CD->getParent()->isDynamicClass())
367     return false;
368 
369   // The only form of initializer allowed is an empty constructor.
370   // This will recursively check all base classes and member initializers
371   if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
372         if (const CXXConstructExpr *CE =
373                 dyn_cast<CXXConstructExpr>(CI->getInit()))
374           return isEmptyCudaConstructor(Loc, CE->getConstructor());
375         return false;
376       }))
377     return false;
378 
379   return true;
380 }
381 
382 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
383   // No destructor -> no problem.
384   if (!DD)
385     return true;
386 
387   if (!DD->isDefined() && DD->isTemplateInstantiation())
388     InstantiateFunctionDefinition(Loc, DD->getFirstDecl());
389 
390   // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
391   // empty at a point in the translation unit, if it is either a
392   // trivial constructor
393   if (DD->isTrivial())
394     return true;
395 
396   // ... or it satisfies all of the following conditions:
397   // The destructor function has been defined.
398   // and the function body is an empty compound statement.
399   if (!DD->hasTrivialBody())
400     return false;
401 
402   const CXXRecordDecl *ClassDecl = DD->getParent();
403 
404   // Its class has no virtual functions and no virtual base classes.
405   if (ClassDecl->isDynamicClass())
406     return false;
407 
408   // Only empty destructors are allowed. This will recursively check
409   // destructors for all base classes...
410   if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {
411         if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
412           return isEmptyCudaDestructor(Loc, RD->getDestructor());
413         return true;
414       }))
415     return false;
416 
417   // ... and member fields.
418   if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
419         if (CXXRecordDecl *RD = Field->getType()
420                                     ->getBaseElementTypeUnsafe()
421                                     ->getAsCXXRecordDecl())
422           return isEmptyCudaDestructor(Loc, RD->getDestructor());
423         return true;
424       }))
425     return false;
426 
427   return true;
428 }
429 
430 // With -fcuda-host-device-constexpr, an unattributed constexpr function is
431 // treated as implicitly __host__ __device__, unless:
432 //  * it is a variadic function (device-side variadic functions are not
433 //    allowed), or
434 //  * a __device__ function with this signature was already declared, in which
435 //    case in which case we output an error, unless the __device__ decl is in a
436 //    system header, in which case we leave the constexpr function unattributed.
437 //
438 // In addition, all function decls are treated as __host__ __device__ when
439 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a
440 //   #pragma clang force_cuda_host_device_begin/end
441 // pair).
442 void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD,
443                                        const LookupResult &Previous) {
444   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
445 
446   if (ForceCUDAHostDeviceDepth > 0) {
447     if (!NewD->hasAttr<CUDAHostAttr>())
448       NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
449     if (!NewD->hasAttr<CUDADeviceAttr>())
450       NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
451     return;
452   }
453 
454   if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
455       NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
456       NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
457     return;
458 
459   // Is D a __device__ function with the same signature as NewD, ignoring CUDA
460   // attributes?
461   auto IsMatchingDeviceFn = [&](NamedDecl *D) {
462     if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
463       D = Using->getTargetDecl();
464     FunctionDecl *OldD = D->getAsFunction();
465     return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
466            !OldD->hasAttr<CUDAHostAttr>() &&
467            !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false,
468                        /* ConsiderCudaAttrs = */ false);
469   };
470   auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
471   if (It != Previous.end()) {
472     // We found a __device__ function with the same name and signature as NewD
473     // (ignoring CUDA attrs).  This is an error unless that function is defined
474     // in a system header, in which case we simply return without making NewD
475     // host+device.
476     NamedDecl *Match = *It;
477     if (!getSourceManager().isInSystemHeader(Match->getLocation())) {
478       Diag(NewD->getLocation(),
479            diag::err_cuda_unattributed_constexpr_cannot_overload_device)
480           << NewD->getName();
481       Diag(Match->getLocation(),
482            diag::note_cuda_conflicting_device_function_declared_here);
483     }
484     return;
485   }
486 
487   NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
488   NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
489 }
490 
491 Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc,
492                                        unsigned DiagID, FunctionDecl *Fn,
493                                        Sema &S) {
494   switch (K) {
495   case K_Nop:
496     break;
497   case K_Immediate:
498     ImmediateDiagBuilder.emplace(S.Diag(Loc, DiagID));
499     break;
500   case K_Deferred:
501     assert(Fn && "Must have a function to attach the deferred diag to.");
502     PartialDiagInfo.emplace(S, Loc, S.PDiag(DiagID), Fn);
503     break;
504   }
505 }
506 
507 // In CUDA, there are some constructs which may appear in semantically-valid
508 // code, but trigger errors if we ever generate code for the function in which
509 // they appear.  Essentially every construct you're not allowed to use on the
510 // device falls into this category, because you are allowed to use these
511 // constructs in a __host__ __device__ function, but only if that function is
512 // never codegen'ed on the device.
513 //
514 // To handle semantic checking for these constructs, we keep track of the set of
515 // functions we know will be emitted, either because we could tell a priori that
516 // they would be emitted, or because they were transitively called by a
517 // known-emitted function.
518 //
519 // We also keep a partial call graph of which not-known-emitted functions call
520 // which other not-known-emitted functions.
521 //
522 // When we see something which is illegal if the current function is emitted
523 // (usually by way of CUDADiagIfDeviceCode, CUDADiagIfHostCode, or
524 // CheckCUDACall), we first check if the current function is known-emitted.  If
525 // so, we immediately output the diagnostic.
526 //
527 // Otherwise, we "defer" the diagnostic.  It sits in Sema::CUDADeferredDiags
528 // until we discover that the function is known-emitted, at which point we take
529 // it out of this map and emit the diagnostic.
530 
531 // Do we know that we will eventually codegen the given function?
532 static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) {
533   // Templates are emitted when they're instantiated.
534   if (FD->isDependentContext())
535     return false;
536 
537   // When compiling for device, host functions are never emitted.  Similarly,
538   // when compiling for host, device and global functions are never emitted.
539   // (Technically, we do emit a host-side stub for global functions, but this
540   // doesn't count for our purposes here.)
541   Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD);
542   if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host)
543     return false;
544   if (!S.getLangOpts().CUDAIsDevice &&
545       (T == Sema::CFT_Device || T == Sema::CFT_Global))
546     return false;
547 
548   // Externally-visible and similar functions are always emitted.
549   if (!isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(FD)))
550     return true;
551 
552   // Otherwise, the function is known-emitted if it's in our set of
553   // known-emitted functions.
554   return S.CUDAKnownEmittedFns.count(FD) > 0;
555 }
556 
557 Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
558                                                  unsigned DiagID) {
559   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
560   CUDADiagBuilder::Kind DiagKind = [&] {
561     switch (CurrentCUDATarget()) {
562     case CFT_Global:
563     case CFT_Device:
564       return CUDADiagBuilder::K_Immediate;
565     case CFT_HostDevice:
566       // An HD function counts as host code if we're compiling for host, and
567       // device code if we're compiling for device.  Defer any errors in device
568       // mode until the function is known-emitted.
569       if (getLangOpts().CUDAIsDevice) {
570         return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
571                    ? CUDADiagBuilder::K_Immediate
572                    : CUDADiagBuilder::K_Deferred;
573       }
574       return CUDADiagBuilder::K_Nop;
575 
576     default:
577       return CUDADiagBuilder::K_Nop;
578     }
579   }();
580   return CUDADiagBuilder(DiagKind, Loc, DiagID,
581                          dyn_cast<FunctionDecl>(CurContext), *this);
582 }
583 
584 Sema::CUDADiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
585                                                unsigned DiagID) {
586   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
587   CUDADiagBuilder::Kind DiagKind = [&] {
588     switch (CurrentCUDATarget()) {
589     case CFT_Host:
590       return CUDADiagBuilder::K_Immediate;
591     case CFT_HostDevice:
592       // An HD function counts as host code if we're compiling for host, and
593       // device code if we're compiling for device.  Defer any errors in device
594       // mode until the function is known-emitted.
595       if (getLangOpts().CUDAIsDevice)
596         return CUDADiagBuilder::K_Nop;
597 
598       return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
599                  ? CUDADiagBuilder::K_Immediate
600                  : CUDADiagBuilder::K_Deferred;
601     default:
602       return CUDADiagBuilder::K_Nop;
603     }
604   }();
605   return CUDADiagBuilder(DiagKind, Loc, DiagID,
606                          dyn_cast<FunctionDecl>(CurContext), *this);
607 }
608 
609 // Emit any deferred diagnostics for FD and erase them from the map in which
610 // they're stored.
611 static void EmitDeferredDiags(Sema &S, FunctionDecl *FD) {
612   auto It = S.CUDADeferredDiags.find(FD);
613   if (It == S.CUDADeferredDiags.end())
614     return;
615   for (PartialDiagnosticAt &PDAt : It->second) {
616     const SourceLocation &Loc = PDAt.first;
617     const PartialDiagnostic &PD = PDAt.second;
618     DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID()));
619     Builder.setForceEmit();
620     PD.Emit(Builder);
621   }
622   S.CUDADeferredDiags.erase(It);
623 }
624 
625 // Indicate that this function (and thus everything it transtively calls) will
626 // be codegen'ed, and emit any deferred diagnostics on this function and its
627 // (transitive) callees.
628 static void MarkKnownEmitted(Sema &S, FunctionDecl *FD) {
629   // Nothing to do if we already know that FD is emitted.
630   if (IsKnownEmitted(S, FD)) {
631     assert(!S.CUDACallGraph.count(FD));
632     return;
633   }
634 
635   // We've just discovered that FD is known-emitted.  Walk our call graph to see
636   // what else we can now discover also must be emitted.
637   llvm::SmallVector<FunctionDecl *, 4> Worklist = {FD};
638   llvm::SmallSet<FunctionDecl *, 4> Seen;
639   Seen.insert(FD);
640   while (!Worklist.empty()) {
641     FunctionDecl *Caller = Worklist.pop_back_val();
642     assert(!IsKnownEmitted(S, Caller) &&
643            "Worklist should not contain known-emitted functions.");
644     S.CUDAKnownEmittedFns.insert(Caller);
645     EmitDeferredDiags(S, Caller);
646 
647     // Deferred diags are often emitted on the template itself, so emit those as
648     // well.
649     if (auto *Templ = Caller->getPrimaryTemplate())
650       EmitDeferredDiags(S, Templ->getAsFunction());
651 
652     // Add all functions called by Caller to our worklist.
653     auto CGIt = S.CUDACallGraph.find(Caller);
654     if (CGIt == S.CUDACallGraph.end())
655       continue;
656 
657     for (FunctionDecl *Callee : CGIt->second) {
658       if (Seen.count(Callee) || IsKnownEmitted(S, Callee))
659         continue;
660       Seen.insert(Callee);
661       Worklist.push_back(Callee);
662     }
663 
664     // Caller is now known-emitted, so we no longer need to maintain its list of
665     // callees in CUDACallGraph.
666     S.CUDACallGraph.erase(CGIt);
667   }
668 }
669 
670 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
671   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
672   assert(Callee && "Callee may not be null.");
673   // FIXME: Is bailing out early correct here?  Should we instead assume that
674   // the caller is a global initializer?
675   FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
676   if (!Caller)
677     return true;
678 
679   bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
680   if (CallerKnownEmitted)
681     MarkKnownEmitted(*this, Callee);
682   else
683     CUDACallGraph[Caller].insert(Callee);
684 
685   CUDADiagBuilder::Kind DiagKind = [&] {
686     switch (IdentifyCUDAPreference(Caller, Callee)) {
687     case CFP_Never:
688       return CUDADiagBuilder::K_Immediate;
689     case CFP_WrongSide:
690       assert(Caller && "WrongSide calls require a non-null caller");
691       // If we know the caller will be emitted, we know this wrong-side call
692       // will be emitted, so it's an immediate error.  Otherwise, defer the
693       // error until we know the caller is emitted.
694       return CallerKnownEmitted ? CUDADiagBuilder::K_Immediate
695                                 : CUDADiagBuilder::K_Deferred;
696     default:
697       return CUDADiagBuilder::K_Nop;
698     }
699   }();
700 
701   // Avoid emitting this error twice for the same location.  Using a hashtable
702   // like this is unfortunate, but because we must continue parsing as normal
703   // after encountering a deferred error, it's otherwise very tricky for us to
704   // ensure that we only emit this deferred error once.
705   if (!LocsWithCUDACallDiags.insert(Loc.getRawEncoding()).second)
706     return true;
707 
708   bool IsImmediateErr =
709       CUDADiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this)
710       << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller);
711   CUDADiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl,
712                   Caller, *this)
713       << Callee;
714   return !IsImmediateErr;
715 }
716 
717 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
718   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
719   if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>())
720     return;
721   FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext);
722   if (!CurFn)
723     return;
724   CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn);
725   if (Target == CFT_Global || Target == CFT_Device) {
726     Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
727   } else if (Target == CFT_HostDevice) {
728     Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
729     Method->addAttr(CUDAHostAttr::CreateImplicit(Context));
730   }
731 }
732