1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// \brief This file implements semantic analysis for CUDA constructs.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/AST/ASTContext.h"
15 #include "clang/AST/Decl.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/Lex/Preprocessor.h"
18 #include "clang/Sema/Lookup.h"
19 #include "clang/Sema/Sema.h"
20 #include "clang/Sema/SemaDiagnostic.h"
21 #include "clang/Sema/Template.h"
22 #include "llvm/ADT/Optional.h"
23 #include "llvm/ADT/SmallVector.h"
24 using namespace clang;
25 
26 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
27                                          MultiExprArg ExecConfig,
28                                          SourceLocation GGGLoc) {
29   FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
30   if (!ConfigDecl)
31     return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
32                      << "cudaConfigureCall");
33   QualType ConfigQTy = ConfigDecl->getType();
34 
35   DeclRefExpr *ConfigDR = new (Context)
36       DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
37   MarkFunctionReferenced(LLLLoc, ConfigDecl);
38 
39   return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
40                        /*IsExecConfig=*/true);
41 }
42 
43 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
44 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
45   if (D->hasAttr<CUDAInvalidTargetAttr>())
46     return CFT_InvalidTarget;
47 
48   if (D->hasAttr<CUDAGlobalAttr>())
49     return CFT_Global;
50 
51   if (D->hasAttr<CUDADeviceAttr>()) {
52     if (D->hasAttr<CUDAHostAttr>())
53       return CFT_HostDevice;
54     return CFT_Device;
55   } else if (D->hasAttr<CUDAHostAttr>()) {
56     return CFT_Host;
57   } else if (D->isImplicit()) {
58     // Some implicit declarations (like intrinsic functions) are not marked.
59     // Set the most lenient target on them for maximal flexibility.
60     return CFT_HostDevice;
61   }
62 
63   return CFT_Host;
64 }
65 
66 // * CUDA Call preference table
67 //
68 // F - from,
69 // T - to
70 // Ph - preference in host mode
71 // Pd - preference in device mode
72 // H  - handled in (x)
73 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
74 //
75 // | F  | T  | Ph  | Pd  |  H  |
76 // |----+----+-----+-----+-----+
77 // | d  | d  | N   | N   | (c) |
78 // | d  | g  | --  | --  | (a) |
79 // | d  | h  | --  | --  | (e) |
80 // | d  | hd | HD  | HD  | (b) |
81 // | g  | d  | N   | N   | (c) |
82 // | g  | g  | --  | --  | (a) |
83 // | g  | h  | --  | --  | (e) |
84 // | g  | hd | HD  | HD  | (b) |
85 // | h  | d  | --  | --  | (e) |
86 // | h  | g  | N   | N   | (c) |
87 // | h  | h  | N   | N   | (c) |
88 // | h  | hd | HD  | HD  | (b) |
89 // | hd | d  | WS  | SS  | (d) |
90 // | hd | g  | SS  | --  |(d/a)|
91 // | hd | h  | SS  | WS  | (d) |
92 // | hd | hd | HD  | HD  | (b) |
93 
94 Sema::CUDAFunctionPreference
95 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
96                              const FunctionDecl *Callee) {
97   assert(Callee && "Callee must be valid.");
98   CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
99   CUDAFunctionTarget CallerTarget =
100       (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host;
101 
102   // If one of the targets is invalid, the check always fails, no matter what
103   // the other target is.
104   if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
105     return CFP_Never;
106 
107   // (a) Can't call global from some contexts until we support CUDA's
108   // dynamic parallelism.
109   if (CalleeTarget == CFT_Global &&
110       (CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
111        (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
112     return CFP_Never;
113 
114   // (b) Calling HostDevice is OK for everyone.
115   if (CalleeTarget == CFT_HostDevice)
116     return CFP_HostDevice;
117 
118   // (c) Best case scenarios
119   if (CalleeTarget == CallerTarget ||
120       (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
121       (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
122     return CFP_Native;
123 
124   // (d) HostDevice behavior depends on compilation mode.
125   if (CallerTarget == CFT_HostDevice) {
126     // It's OK to call a compilation-mode matching function from an HD one.
127     if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) ||
128         (!getLangOpts().CUDAIsDevice &&
129          (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)))
130       return CFP_SameSide;
131 
132     // Calls from HD to non-mode-matching functions (i.e., to host functions
133     // when compiling in device mode or to device functions when compiling in
134     // host mode) are allowed at the sema level, but eventually rejected if
135     // they're ever codegened.  TODO: Reject said calls earlier.
136     return CFP_WrongSide;
137   }
138 
139   // (e) Calling across device/host boundary is not something you should do.
140   if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
141       (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
142       (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
143     return CFP_Never;
144 
145   llvm_unreachable("All cases should've been handled by now.");
146 }
147 
148 template <typename T>
149 static void EraseUnwantedCUDAMatchesImpl(
150     Sema &S, const FunctionDecl *Caller, llvm::SmallVectorImpl<T> &Matches,
151     std::function<const FunctionDecl *(const T &)> FetchDecl) {
152   if (Matches.size() <= 1)
153     return;
154 
155   // Gets the CUDA function preference for a call from Caller to Match.
156   auto GetCFP = [&](const T &Match) {
157     return S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
158   };
159 
160   // Find the best call preference among the functions in Matches.
161   Sema::CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
162       Matches.begin(), Matches.end(),
163       [&](const T &M1, const T &M2) { return GetCFP(M1) < GetCFP(M2); }));
164 
165   // Erase all functions with lower priority.
166   Matches.erase(llvm::remove_if(
167       Matches, [&](const T &Match) { return GetCFP(Match) < BestCFP; }));
168 }
169 
170 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
171                                     SmallVectorImpl<FunctionDecl *> &Matches){
172   EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
173       *this, Caller, Matches, [](const FunctionDecl *item) { return item; });
174 }
175 
176 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
177                                     SmallVectorImpl<DeclAccessPair> &Matches) {
178   EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
179       *this, Caller, Matches, [](const DeclAccessPair &item) {
180         return dyn_cast<FunctionDecl>(item.getDecl());
181       });
182 }
183 
184 void Sema::EraseUnwantedCUDAMatches(
185     const FunctionDecl *Caller,
186     SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
187   EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
188       *this, Caller, Matches,
189       [](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
190         return dyn_cast<FunctionDecl>(item.second);
191       });
192 }
193 
194 /// When an implicitly-declared special member has to invoke more than one
195 /// base/field special member, conflicts may occur in the targets of these
196 /// members. For example, if one base's member __host__ and another's is
197 /// __device__, it's a conflict.
198 /// This function figures out if the given targets \param Target1 and
199 /// \param Target2 conflict, and if they do not it fills in
200 /// \param ResolvedTarget with a target that resolves for both calls.
201 /// \return true if there's a conflict, false otherwise.
202 static bool
203 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
204                                 Sema::CUDAFunctionTarget Target2,
205                                 Sema::CUDAFunctionTarget *ResolvedTarget) {
206   // Only free functions and static member functions may be global.
207   assert(Target1 != Sema::CFT_Global);
208   assert(Target2 != Sema::CFT_Global);
209 
210   if (Target1 == Sema::CFT_HostDevice) {
211     *ResolvedTarget = Target2;
212   } else if (Target2 == Sema::CFT_HostDevice) {
213     *ResolvedTarget = Target1;
214   } else if (Target1 != Target2) {
215     return true;
216   } else {
217     *ResolvedTarget = Target1;
218   }
219 
220   return false;
221 }
222 
223 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
224                                                    CXXSpecialMember CSM,
225                                                    CXXMethodDecl *MemberDecl,
226                                                    bool ConstRHS,
227                                                    bool Diagnose) {
228   llvm::Optional<CUDAFunctionTarget> InferredTarget;
229 
230   // We're going to invoke special member lookup; mark that these special
231   // members are called from this one, and not from its caller.
232   ContextRAII MethodContext(*this, MemberDecl);
233 
234   // Look for special members in base classes that should be invoked from here.
235   // Infer the target of this member base on the ones it should call.
236   // Skip direct and indirect virtual bases for abstract classes.
237   llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
238   for (const auto &B : ClassDecl->bases()) {
239     if (!B.isVirtual()) {
240       Bases.push_back(&B);
241     }
242   }
243 
244   if (!ClassDecl->isAbstract()) {
245     for (const auto &VB : ClassDecl->vbases()) {
246       Bases.push_back(&VB);
247     }
248   }
249 
250   for (const auto *B : Bases) {
251     const RecordType *BaseType = B->getType()->getAs<RecordType>();
252     if (!BaseType) {
253       continue;
254     }
255 
256     CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
257     Sema::SpecialMemberOverloadResult *SMOR =
258         LookupSpecialMember(BaseClassDecl, CSM,
259                             /* ConstArg */ ConstRHS,
260                             /* VolatileArg */ false,
261                             /* RValueThis */ false,
262                             /* ConstThis */ false,
263                             /* VolatileThis */ false);
264 
265     if (!SMOR || !SMOR->getMethod()) {
266       continue;
267     }
268 
269     CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR->getMethod());
270     if (!InferredTarget.hasValue()) {
271       InferredTarget = BaseMethodTarget;
272     } else {
273       bool ResolutionError = resolveCalleeCUDATargetConflict(
274           InferredTarget.getValue(), BaseMethodTarget,
275           InferredTarget.getPointer());
276       if (ResolutionError) {
277         if (Diagnose) {
278           Diag(ClassDecl->getLocation(),
279                diag::note_implicit_member_target_infer_collision)
280               << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
281         }
282         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
283         return true;
284       }
285     }
286   }
287 
288   // Same as for bases, but now for special members of fields.
289   for (const auto *F : ClassDecl->fields()) {
290     if (F->isInvalidDecl()) {
291       continue;
292     }
293 
294     const RecordType *FieldType =
295         Context.getBaseElementType(F->getType())->getAs<RecordType>();
296     if (!FieldType) {
297       continue;
298     }
299 
300     CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
301     Sema::SpecialMemberOverloadResult *SMOR =
302         LookupSpecialMember(FieldRecDecl, CSM,
303                             /* ConstArg */ ConstRHS && !F->isMutable(),
304                             /* VolatileArg */ false,
305                             /* RValueThis */ false,
306                             /* ConstThis */ false,
307                             /* VolatileThis */ false);
308 
309     if (!SMOR || !SMOR->getMethod()) {
310       continue;
311     }
312 
313     CUDAFunctionTarget FieldMethodTarget =
314         IdentifyCUDATarget(SMOR->getMethod());
315     if (!InferredTarget.hasValue()) {
316       InferredTarget = FieldMethodTarget;
317     } else {
318       bool ResolutionError = resolveCalleeCUDATargetConflict(
319           InferredTarget.getValue(), FieldMethodTarget,
320           InferredTarget.getPointer());
321       if (ResolutionError) {
322         if (Diagnose) {
323           Diag(ClassDecl->getLocation(),
324                diag::note_implicit_member_target_infer_collision)
325               << (unsigned)CSM << InferredTarget.getValue()
326               << FieldMethodTarget;
327         }
328         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
329         return true;
330       }
331     }
332   }
333 
334   if (InferredTarget.hasValue()) {
335     if (InferredTarget.getValue() == CFT_Device) {
336       MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
337     } else if (InferredTarget.getValue() == CFT_Host) {
338       MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
339     } else {
340       MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
341       MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
342     }
343   } else {
344     // If no target was inferred, mark this member as __host__ __device__;
345     // it's the least restrictive option that can be invoked from any target.
346     MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
347     MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
348   }
349 
350   return false;
351 }
352 
353 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
354   if (!CD->isDefined() && CD->isTemplateInstantiation())
355     InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
356 
357   // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
358   // empty at a point in the translation unit, if it is either a
359   // trivial constructor
360   if (CD->isTrivial())
361     return true;
362 
363   // ... or it satisfies all of the following conditions:
364   // The constructor function has been defined.
365   // The constructor function has no parameters,
366   // and the function body is an empty compound statement.
367   if (!(CD->hasTrivialBody() && CD->getNumParams() == 0))
368     return false;
369 
370   // Its class has no virtual functions and no virtual base classes.
371   if (CD->getParent()->isDynamicClass())
372     return false;
373 
374   // The only form of initializer allowed is an empty constructor.
375   // This will recursively checks all base classes and member initializers
376   if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
377         if (const CXXConstructExpr *CE =
378                 dyn_cast<CXXConstructExpr>(CI->getInit()))
379           return isEmptyCudaConstructor(Loc, CE->getConstructor());
380         return false;
381       }))
382     return false;
383 
384   return true;
385 }
386 
387 // With -fcuda-host-device-constexpr, an unattributed constexpr function is
388 // treated as implicitly __host__ __device__, unless:
389 //  * it is a variadic function (device-side variadic functions are not
390 //    allowed), or
391 //  * a __device__ function with this signature was already declared, in which
392 //    case in which case we output an error, unless the __device__ decl is in a
393 //    system header, in which case we leave the constexpr function unattributed.
394 void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD,
395                                        const LookupResult &Previous) {
396   assert(getLangOpts().CUDA && "May be called only for CUDA compilations.");
397   if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
398       NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
399       NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
400     return;
401 
402   // Is D a __device__ function with the same signature as NewD, ignoring CUDA
403   // attributes?
404   auto IsMatchingDeviceFn = [&](NamedDecl *D) {
405     if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
406       D = Using->getTargetDecl();
407     FunctionDecl *OldD = D->getAsFunction();
408     return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
409            !OldD->hasAttr<CUDAHostAttr>() &&
410            !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false,
411                        /* ConsiderCudaAttrs = */ false);
412   };
413   auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
414   if (It != Previous.end()) {
415     // We found a __device__ function with the same name and signature as NewD
416     // (ignoring CUDA attrs).  This is an error unless that function is defined
417     // in a system header, in which case we simply return without making NewD
418     // host+device.
419     NamedDecl *Match = *It;
420     if (!getSourceManager().isInSystemHeader(Match->getLocation())) {
421       Diag(NewD->getLocation(),
422            diag::err_cuda_unattributed_constexpr_cannot_overload_device)
423           << NewD->getName();
424       Diag(Match->getLocation(),
425            diag::note_cuda_conflicting_device_function_declared_here);
426     }
427     return;
428   }
429 
430   NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
431   NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
432 }
433