1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements semantic analysis for CUDA constructs. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/AST/ASTContext.h" 14 #include "clang/AST/Decl.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/Basic/Cuda.h" 17 #include "clang/Basic/TargetInfo.h" 18 #include "clang/Lex/Preprocessor.h" 19 #include "clang/Sema/Lookup.h" 20 #include "clang/Sema/Sema.h" 21 #include "clang/Sema/SemaDiagnostic.h" 22 #include "clang/Sema/SemaInternal.h" 23 #include "clang/Sema/Template.h" 24 #include "llvm/ADT/Optional.h" 25 #include "llvm/ADT/SmallVector.h" 26 using namespace clang; 27 28 void Sema::PushForceCUDAHostDevice() { 29 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 30 ForceCUDAHostDeviceDepth++; 31 } 32 33 bool Sema::PopForceCUDAHostDevice() { 34 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 35 if (ForceCUDAHostDeviceDepth == 0) 36 return false; 37 ForceCUDAHostDeviceDepth--; 38 return true; 39 } 40 41 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, 42 MultiExprArg ExecConfig, 43 SourceLocation GGGLoc) { 44 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); 45 if (!ConfigDecl) 46 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 47 << getCudaConfigureFuncName()); 48 QualType ConfigQTy = ConfigDecl->getType(); 49 50 DeclRefExpr *ConfigDR = new (Context) 51 DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 52 MarkFunctionReferenced(LLLLoc, ConfigDecl); 53 54 return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 55 /*IsExecConfig=*/true); 56 } 57 58 Sema::CUDAFunctionTarget 59 Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { 60 bool HasHostAttr = false; 61 bool HasDeviceAttr = false; 62 bool HasGlobalAttr = false; 63 bool HasInvalidTargetAttr = false; 64 for (const ParsedAttr &AL : Attrs) { 65 switch (AL.getKind()) { 66 case ParsedAttr::AT_CUDAGlobal: 67 HasGlobalAttr = true; 68 break; 69 case ParsedAttr::AT_CUDAHost: 70 HasHostAttr = true; 71 break; 72 case ParsedAttr::AT_CUDADevice: 73 HasDeviceAttr = true; 74 break; 75 case ParsedAttr::AT_CUDAInvalidTarget: 76 HasInvalidTargetAttr = true; 77 break; 78 default: 79 break; 80 } 81 } 82 83 if (HasInvalidTargetAttr) 84 return CFT_InvalidTarget; 85 86 if (HasGlobalAttr) 87 return CFT_Global; 88 89 if (HasHostAttr && HasDeviceAttr) 90 return CFT_HostDevice; 91 92 if (HasDeviceAttr) 93 return CFT_Device; 94 95 return CFT_Host; 96 } 97 98 template <typename A> 99 static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { 100 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 101 return isa<A>(Attribute) && 102 !(IgnoreImplicitAttr && Attribute->isImplicit()); 103 }); 104 } 105 106 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function 107 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, 108 bool IgnoreImplicitHDAttr) { 109 // Code that lives outside a function is run on the host. 110 if (D == nullptr) 111 return CFT_Host; 112 113 if (D->hasAttr<CUDAInvalidTargetAttr>()) 114 return CFT_InvalidTarget; 115 116 if (D->hasAttr<CUDAGlobalAttr>()) 117 return CFT_Global; 118 119 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 120 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 121 return CFT_HostDevice; 122 return CFT_Device; 123 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 124 return CFT_Host; 125 } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { 126 // Some implicit declarations (like intrinsic functions) are not marked. 127 // Set the most lenient target on them for maximal flexibility. 128 return CFT_HostDevice; 129 } 130 131 return CFT_Host; 132 } 133 134 // * CUDA Call preference table 135 // 136 // F - from, 137 // T - to 138 // Ph - preference in host mode 139 // Pd - preference in device mode 140 // H - handled in (x) 141 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 142 // 143 // | F | T | Ph | Pd | H | 144 // |----+----+-----+-----+-----+ 145 // | d | d | N | N | (c) | 146 // | d | g | -- | -- | (a) | 147 // | d | h | -- | -- | (e) | 148 // | d | hd | HD | HD | (b) | 149 // | g | d | N | N | (c) | 150 // | g | g | -- | -- | (a) | 151 // | g | h | -- | -- | (e) | 152 // | g | hd | HD | HD | (b) | 153 // | h | d | -- | -- | (e) | 154 // | h | g | N | N | (c) | 155 // | h | h | N | N | (c) | 156 // | h | hd | HD | HD | (b) | 157 // | hd | d | WS | SS | (d) | 158 // | hd | g | SS | -- |(d/a)| 159 // | hd | h | SS | WS | (d) | 160 // | hd | hd | HD | HD | (b) | 161 162 Sema::CUDAFunctionPreference 163 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, 164 const FunctionDecl *Callee) { 165 assert(Callee && "Callee must be valid."); 166 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 167 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); 168 169 // If one of the targets is invalid, the check always fails, no matter what 170 // the other target is. 171 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) 172 return CFP_Never; 173 174 // (a) Can't call global from some contexts until we support CUDA's 175 // dynamic parallelism. 176 if (CalleeTarget == CFT_Global && 177 (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) 178 return CFP_Never; 179 180 // (b) Calling HostDevice is OK for everyone. 181 if (CalleeTarget == CFT_HostDevice) 182 return CFP_HostDevice; 183 184 // (c) Best case scenarios 185 if (CalleeTarget == CallerTarget || 186 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || 187 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) 188 return CFP_Native; 189 190 // (d) HostDevice behavior depends on compilation mode. 191 if (CallerTarget == CFT_HostDevice) { 192 // It's OK to call a compilation-mode matching function from an HD one. 193 if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || 194 (!getLangOpts().CUDAIsDevice && 195 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) 196 return CFP_SameSide; 197 198 // Calls from HD to non-mode-matching functions (i.e., to host functions 199 // when compiling in device mode or to device functions when compiling in 200 // host mode) are allowed at the sema level, but eventually rejected if 201 // they're ever codegened. TODO: Reject said calls earlier. 202 return CFP_WrongSide; 203 } 204 205 // (e) Calling across device/host boundary is not something you should do. 206 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || 207 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || 208 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) 209 return CFP_Never; 210 211 llvm_unreachable("All cases should've been handled by now."); 212 } 213 214 void Sema::EraseUnwantedCUDAMatches( 215 const FunctionDecl *Caller, 216 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 217 if (Matches.size() <= 1) 218 return; 219 220 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 221 222 // Gets the CUDA function preference for a call from Caller to Match. 223 auto GetCFP = [&](const Pair &Match) { 224 return IdentifyCUDAPreference(Caller, Match.second); 225 }; 226 227 // Find the best call preference among the functions in Matches. 228 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 229 Matches.begin(), Matches.end(), 230 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 231 232 // Erase all functions with lower priority. 233 llvm::erase_if(Matches, 234 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 235 } 236 237 /// When an implicitly-declared special member has to invoke more than one 238 /// base/field special member, conflicts may occur in the targets of these 239 /// members. For example, if one base's member __host__ and another's is 240 /// __device__, it's a conflict. 241 /// This function figures out if the given targets \param Target1 and 242 /// \param Target2 conflict, and if they do not it fills in 243 /// \param ResolvedTarget with a target that resolves for both calls. 244 /// \return true if there's a conflict, false otherwise. 245 static bool 246 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, 247 Sema::CUDAFunctionTarget Target2, 248 Sema::CUDAFunctionTarget *ResolvedTarget) { 249 // Only free functions and static member functions may be global. 250 assert(Target1 != Sema::CFT_Global); 251 assert(Target2 != Sema::CFT_Global); 252 253 if (Target1 == Sema::CFT_HostDevice) { 254 *ResolvedTarget = Target2; 255 } else if (Target2 == Sema::CFT_HostDevice) { 256 *ResolvedTarget = Target1; 257 } else if (Target1 != Target2) { 258 return true; 259 } else { 260 *ResolvedTarget = Target1; 261 } 262 263 return false; 264 } 265 266 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 267 CXXSpecialMember CSM, 268 CXXMethodDecl *MemberDecl, 269 bool ConstRHS, 270 bool Diagnose) { 271 // If the defaulted special member is defined lexically outside of its 272 // owning class, or the special member already has explicit device or host 273 // attributes, do not infer. 274 bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); 275 bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); 276 bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); 277 bool HasExplicitAttr = 278 (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || 279 (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); 280 if (!InClass || HasExplicitAttr) 281 return false; 282 283 llvm::Optional<CUDAFunctionTarget> InferredTarget; 284 285 // We're going to invoke special member lookup; mark that these special 286 // members are called from this one, and not from its caller. 287 ContextRAII MethodContext(*this, MemberDecl); 288 289 // Look for special members in base classes that should be invoked from here. 290 // Infer the target of this member base on the ones it should call. 291 // Skip direct and indirect virtual bases for abstract classes. 292 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 293 for (const auto &B : ClassDecl->bases()) { 294 if (!B.isVirtual()) { 295 Bases.push_back(&B); 296 } 297 } 298 299 if (!ClassDecl->isAbstract()) { 300 for (const auto &VB : ClassDecl->vbases()) { 301 Bases.push_back(&VB); 302 } 303 } 304 305 for (const auto *B : Bases) { 306 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 307 if (!BaseType) { 308 continue; 309 } 310 311 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 312 Sema::SpecialMemberOverloadResult SMOR = 313 LookupSpecialMember(BaseClassDecl, CSM, 314 /* ConstArg */ ConstRHS, 315 /* VolatileArg */ false, 316 /* RValueThis */ false, 317 /* ConstThis */ false, 318 /* VolatileThis */ false); 319 320 if (!SMOR.getMethod()) 321 continue; 322 323 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); 324 if (!InferredTarget.hasValue()) { 325 InferredTarget = BaseMethodTarget; 326 } else { 327 bool ResolutionError = resolveCalleeCUDATargetConflict( 328 InferredTarget.getValue(), BaseMethodTarget, 329 InferredTarget.getPointer()); 330 if (ResolutionError) { 331 if (Diagnose) { 332 Diag(ClassDecl->getLocation(), 333 diag::note_implicit_member_target_infer_collision) 334 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; 335 } 336 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 337 return true; 338 } 339 } 340 } 341 342 // Same as for bases, but now for special members of fields. 343 for (const auto *F : ClassDecl->fields()) { 344 if (F->isInvalidDecl()) { 345 continue; 346 } 347 348 const RecordType *FieldType = 349 Context.getBaseElementType(F->getType())->getAs<RecordType>(); 350 if (!FieldType) { 351 continue; 352 } 353 354 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 355 Sema::SpecialMemberOverloadResult SMOR = 356 LookupSpecialMember(FieldRecDecl, CSM, 357 /* ConstArg */ ConstRHS && !F->isMutable(), 358 /* VolatileArg */ false, 359 /* RValueThis */ false, 360 /* ConstThis */ false, 361 /* VolatileThis */ false); 362 363 if (!SMOR.getMethod()) 364 continue; 365 366 CUDAFunctionTarget FieldMethodTarget = 367 IdentifyCUDATarget(SMOR.getMethod()); 368 if (!InferredTarget.hasValue()) { 369 InferredTarget = FieldMethodTarget; 370 } else { 371 bool ResolutionError = resolveCalleeCUDATargetConflict( 372 InferredTarget.getValue(), FieldMethodTarget, 373 InferredTarget.getPointer()); 374 if (ResolutionError) { 375 if (Diagnose) { 376 Diag(ClassDecl->getLocation(), 377 diag::note_implicit_member_target_infer_collision) 378 << (unsigned)CSM << InferredTarget.getValue() 379 << FieldMethodTarget; 380 } 381 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 382 return true; 383 } 384 } 385 } 386 387 388 // If no target was inferred, mark this member as __host__ __device__; 389 // it's the least restrictive option that can be invoked from any target. 390 bool NeedsH = true, NeedsD = true; 391 if (InferredTarget.hasValue()) { 392 if (InferredTarget.getValue() == CFT_Device) 393 NeedsH = false; 394 else if (InferredTarget.getValue() == CFT_Host) 395 NeedsD = false; 396 } 397 398 // We either setting attributes first time, or the inferred ones must match 399 // previously set ones. 400 if (NeedsD && !HasD) 401 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 402 if (NeedsH && !HasH) 403 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 404 405 return false; 406 } 407 408 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 409 if (!CD->isDefined() && CD->isTemplateInstantiation()) 410 InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 411 412 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 413 // empty at a point in the translation unit, if it is either a 414 // trivial constructor 415 if (CD->isTrivial()) 416 return true; 417 418 // ... or it satisfies all of the following conditions: 419 // The constructor function has been defined. 420 // The constructor function has no parameters, 421 // and the function body is an empty compound statement. 422 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 423 return false; 424 425 // Its class has no virtual functions and no virtual base classes. 426 if (CD->getParent()->isDynamicClass()) 427 return false; 428 429 // Union ctor does not call ctors of its data members. 430 if (CD->getParent()->isUnion()) 431 return true; 432 433 // The only form of initializer allowed is an empty constructor. 434 // This will recursively check all base classes and member initializers 435 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 436 if (const CXXConstructExpr *CE = 437 dyn_cast<CXXConstructExpr>(CI->getInit())) 438 return isEmptyCudaConstructor(Loc, CE->getConstructor()); 439 return false; 440 })) 441 return false; 442 443 return true; 444 } 445 446 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 447 // No destructor -> no problem. 448 if (!DD) 449 return true; 450 451 if (!DD->isDefined() && DD->isTemplateInstantiation()) 452 InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 453 454 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 455 // empty at a point in the translation unit, if it is either a 456 // trivial constructor 457 if (DD->isTrivial()) 458 return true; 459 460 // ... or it satisfies all of the following conditions: 461 // The destructor function has been defined. 462 // and the function body is an empty compound statement. 463 if (!DD->hasTrivialBody()) 464 return false; 465 466 const CXXRecordDecl *ClassDecl = DD->getParent(); 467 468 // Its class has no virtual functions and no virtual base classes. 469 if (ClassDecl->isDynamicClass()) 470 return false; 471 472 // Union does not have base class and union dtor does not call dtors of its 473 // data members. 474 if (DD->getParent()->isUnion()) 475 return true; 476 477 // Only empty destructors are allowed. This will recursively check 478 // destructors for all base classes... 479 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 480 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 481 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 482 return true; 483 })) 484 return false; 485 486 // ... and member fields. 487 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 488 if (CXXRecordDecl *RD = Field->getType() 489 ->getBaseElementTypeUnsafe() 490 ->getAsCXXRecordDecl()) 491 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 492 return true; 493 })) 494 return false; 495 496 return true; 497 } 498 499 void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { 500 if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage()) 501 return; 502 const Expr *Init = VD->getInit(); 503 if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() || 504 VD->hasAttr<CUDASharedAttr>()) { 505 if (LangOpts.GPUAllowDeviceInit) 506 return; 507 assert(!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>()); 508 bool AllowedInit = false; 509 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) 510 AllowedInit = 511 isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); 512 // We'll allow constant initializers even if it's a non-empty 513 // constructor according to CUDA rules. This deviates from NVCC, 514 // but allows us to handle things like constexpr constructors. 515 if (!AllowedInit && 516 (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>())) 517 AllowedInit = VD->getInit()->isConstantInitializer( 518 Context, VD->getType()->isReferenceType()); 519 520 // Also make sure that destructor, if there is one, is empty. 521 if (AllowedInit) 522 if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) 523 AllowedInit = 524 isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); 525 526 if (!AllowedInit) { 527 Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>() 528 ? diag::err_shared_var_init 529 : diag::err_dynamic_var_init) 530 << Init->getSourceRange(); 531 VD->setInvalidDecl(); 532 } 533 } else { 534 // This is a host-side global variable. Check that the initializer is 535 // callable from the host side. 536 const FunctionDecl *InitFn = nullptr; 537 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { 538 InitFn = CE->getConstructor(); 539 } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { 540 InitFn = CE->getDirectCallee(); 541 } 542 if (InitFn) { 543 CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); 544 if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { 545 Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) 546 << InitFnTarget << InitFn; 547 Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; 548 VD->setInvalidDecl(); 549 } 550 } 551 } 552 } 553 554 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 555 // treated as implicitly __host__ __device__, unless: 556 // * it is a variadic function (device-side variadic functions are not 557 // allowed), or 558 // * a __device__ function with this signature was already declared, in which 559 // case in which case we output an error, unless the __device__ decl is in a 560 // system header, in which case we leave the constexpr function unattributed. 561 // 562 // In addition, all function decls are treated as __host__ __device__ when 563 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a 564 // #pragma clang force_cuda_host_device_begin/end 565 // pair). 566 void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, 567 const LookupResult &Previous) { 568 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 569 570 if (ForceCUDAHostDeviceDepth > 0) { 571 if (!NewD->hasAttr<CUDAHostAttr>()) 572 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 573 if (!NewD->hasAttr<CUDADeviceAttr>()) 574 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 575 return; 576 } 577 578 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 579 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 580 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 581 return; 582 583 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 584 // attributes? 585 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 586 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 587 D = Using->getTargetDecl(); 588 FunctionDecl *OldD = D->getAsFunction(); 589 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 590 !OldD->hasAttr<CUDAHostAttr>() && 591 !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, 592 /* ConsiderCudaAttrs = */ false); 593 }; 594 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 595 if (It != Previous.end()) { 596 // We found a __device__ function with the same name and signature as NewD 597 // (ignoring CUDA attrs). This is an error unless that function is defined 598 // in a system header, in which case we simply return without making NewD 599 // host+device. 600 NamedDecl *Match = *It; 601 if (!getSourceManager().isInSystemHeader(Match->getLocation())) { 602 Diag(NewD->getLocation(), 603 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 604 << NewD; 605 Diag(Match->getLocation(), 606 diag::note_cuda_conflicting_device_function_declared_here); 607 } 608 return; 609 } 610 611 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 612 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 613 } 614 615 Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, 616 unsigned DiagID) { 617 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 618 DeviceDiagBuilder::Kind DiagKind = [this] { 619 switch (CurrentCUDATarget()) { 620 case CFT_Global: 621 case CFT_Device: 622 return DeviceDiagBuilder::K_Immediate; 623 case CFT_HostDevice: 624 // An HD function counts as host code if we're compiling for host, and 625 // device code if we're compiling for device. Defer any errors in device 626 // mode until the function is known-emitted. 627 if (getLangOpts().CUDAIsDevice) { 628 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 629 FunctionEmissionStatus::Emitted) 630 ? DeviceDiagBuilder::K_ImmediateWithCallStack 631 : DeviceDiagBuilder::K_Deferred; 632 } 633 return DeviceDiagBuilder::K_Nop; 634 635 default: 636 return DeviceDiagBuilder::K_Nop; 637 } 638 }(); 639 return DeviceDiagBuilder(DiagKind, Loc, DiagID, 640 dyn_cast<FunctionDecl>(CurContext), *this); 641 } 642 643 Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, 644 unsigned DiagID) { 645 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 646 DeviceDiagBuilder::Kind DiagKind = [this] { 647 switch (CurrentCUDATarget()) { 648 case CFT_Host: 649 return DeviceDiagBuilder::K_Immediate; 650 case CFT_HostDevice: 651 // An HD function counts as host code if we're compiling for host, and 652 // device code if we're compiling for device. Defer any errors in device 653 // mode until the function is known-emitted. 654 if (getLangOpts().CUDAIsDevice) 655 return DeviceDiagBuilder::K_Nop; 656 657 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 658 FunctionEmissionStatus::Emitted) 659 ? DeviceDiagBuilder::K_ImmediateWithCallStack 660 : DeviceDiagBuilder::K_Deferred; 661 default: 662 return DeviceDiagBuilder::K_Nop; 663 } 664 }(); 665 return DeviceDiagBuilder(DiagKind, Loc, DiagID, 666 dyn_cast<FunctionDecl>(CurContext), *this); 667 } 668 669 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { 670 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 671 assert(Callee && "Callee may not be null."); 672 673 auto &ExprEvalCtx = ExprEvalContexts.back(); 674 if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) 675 return true; 676 677 // FIXME: Is bailing out early correct here? Should we instead assume that 678 // the caller is a global initializer? 679 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 680 if (!Caller) 681 return true; 682 683 // If the caller is known-emitted, mark the callee as known-emitted. 684 // Otherwise, mark the call in our call graph so we can traverse it later. 685 bool CallerKnownEmitted = 686 getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; 687 DeviceDiagBuilder::Kind DiagKind = [this, Caller, Callee, 688 CallerKnownEmitted] { 689 switch (IdentifyCUDAPreference(Caller, Callee)) { 690 case CFP_Never: 691 return DeviceDiagBuilder::K_Immediate; 692 case CFP_WrongSide: 693 assert(Caller && "WrongSide calls require a non-null caller"); 694 // If we know the caller will be emitted, we know this wrong-side call 695 // will be emitted, so it's an immediate error. Otherwise, defer the 696 // error until we know the caller is emitted. 697 return CallerKnownEmitted ? DeviceDiagBuilder::K_ImmediateWithCallStack 698 : DeviceDiagBuilder::K_Deferred; 699 default: 700 return DeviceDiagBuilder::K_Nop; 701 } 702 }(); 703 704 if (DiagKind == DeviceDiagBuilder::K_Nop) 705 return true; 706 707 // Avoid emitting this error twice for the same location. Using a hashtable 708 // like this is unfortunate, but because we must continue parsing as normal 709 // after encountering a deferred error, it's otherwise very tricky for us to 710 // ensure that we only emit this deferred error once. 711 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 712 return true; 713 714 DeviceDiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) 715 << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); 716 DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, 717 Caller, *this) 718 << Callee; 719 return DiagKind != DeviceDiagBuilder::K_Immediate && 720 DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack; 721 } 722 723 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { 724 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 725 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 726 return; 727 FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext); 728 if (!CurFn) 729 return; 730 CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); 731 if (Target == CFT_Global || Target == CFT_Device) { 732 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 733 } else if (Target == CFT_HostDevice) { 734 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 735 Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); 736 } 737 } 738 739 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, 740 const LookupResult &Previous) { 741 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 742 CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); 743 for (NamedDecl *OldND : Previous) { 744 FunctionDecl *OldFD = OldND->getAsFunction(); 745 if (!OldFD) 746 continue; 747 748 CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); 749 // Don't allow HD and global functions to overload other functions with the 750 // same signature. We allow overloading based on CUDA attributes so that 751 // functions can have different implementations on the host and device, but 752 // HD/global functions "exist" in some sense on both the host and device, so 753 // should have the same implementation on both sides. 754 if (NewTarget != OldTarget && 755 ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || 756 (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && 757 !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 758 /* ConsiderCudaAttrs = */ false)) { 759 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 760 << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 761 Diag(OldFD->getLocation(), diag::note_previous_declaration); 762 NewFD->setInvalidDecl(); 763 break; 764 } 765 } 766 } 767 768 template <typename AttrTy> 769 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 770 const FunctionDecl &TemplateFD) { 771 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 772 AttrTy *Clone = Attribute->clone(S.Context); 773 Clone->setInherited(true); 774 FD->addAttr(Clone); 775 } 776 } 777 778 void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, 779 const FunctionTemplateDecl &TD) { 780 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 781 copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); 782 copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); 783 copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); 784 } 785 786 std::string Sema::getCudaConfigureFuncName() const { 787 if (getLangOpts().HIP) 788 return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" 789 : "hipConfigureCall"; 790 791 // New CUDA kernel launch sequence. 792 if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), 793 CudaFeature::CUDA_USES_NEW_LAUNCH)) 794 return "__cudaPushCallConfiguration"; 795 796 // Legacy CUDA kernel configuration call 797 return "cudaConfigureCall"; 798 } 799