1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements semantic analysis for CUDA constructs. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/AST/ASTContext.h" 14 #include "clang/AST/Decl.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/Basic/Cuda.h" 17 #include "clang/Basic/TargetInfo.h" 18 #include "clang/Lex/Preprocessor.h" 19 #include "clang/Sema/Lookup.h" 20 #include "clang/Sema/Sema.h" 21 #include "clang/Sema/SemaDiagnostic.h" 22 #include "clang/Sema/SemaInternal.h" 23 #include "clang/Sema/Template.h" 24 #include "llvm/ADT/Optional.h" 25 #include "llvm/ADT/SmallVector.h" 26 using namespace clang; 27 28 void Sema::PushForceCUDAHostDevice() { 29 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 30 ForceCUDAHostDeviceDepth++; 31 } 32 33 bool Sema::PopForceCUDAHostDevice() { 34 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 35 if (ForceCUDAHostDeviceDepth == 0) 36 return false; 37 ForceCUDAHostDeviceDepth--; 38 return true; 39 } 40 41 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, 42 MultiExprArg ExecConfig, 43 SourceLocation GGGLoc) { 44 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); 45 if (!ConfigDecl) 46 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 47 << getCudaConfigureFuncName()); 48 QualType ConfigQTy = ConfigDecl->getType(); 49 50 DeclRefExpr *ConfigDR = new (Context) 51 DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 52 MarkFunctionReferenced(LLLLoc, ConfigDecl); 53 54 return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 55 /*IsExecConfig=*/true); 56 } 57 58 Sema::CUDAFunctionTarget 59 Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { 60 bool HasHostAttr = false; 61 bool HasDeviceAttr = false; 62 bool HasGlobalAttr = false; 63 bool HasInvalidTargetAttr = false; 64 for (const ParsedAttr &AL : Attrs) { 65 switch (AL.getKind()) { 66 case ParsedAttr::AT_CUDAGlobal: 67 HasGlobalAttr = true; 68 break; 69 case ParsedAttr::AT_CUDAHost: 70 HasHostAttr = true; 71 break; 72 case ParsedAttr::AT_CUDADevice: 73 HasDeviceAttr = true; 74 break; 75 case ParsedAttr::AT_CUDAInvalidTarget: 76 HasInvalidTargetAttr = true; 77 break; 78 default: 79 break; 80 } 81 } 82 83 if (HasInvalidTargetAttr) 84 return CFT_InvalidTarget; 85 86 if (HasGlobalAttr) 87 return CFT_Global; 88 89 if (HasHostAttr && HasDeviceAttr) 90 return CFT_HostDevice; 91 92 if (HasDeviceAttr) 93 return CFT_Device; 94 95 return CFT_Host; 96 } 97 98 template <typename A> 99 static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { 100 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 101 return isa<A>(Attribute) && 102 !(IgnoreImplicitAttr && Attribute->isImplicit()); 103 }); 104 } 105 106 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function 107 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, 108 bool IgnoreImplicitHDAttr) { 109 // Code that lives outside a function is run on the host. 110 if (D == nullptr) 111 return CFT_Host; 112 113 if (D->hasAttr<CUDAInvalidTargetAttr>()) 114 return CFT_InvalidTarget; 115 116 if (D->hasAttr<CUDAGlobalAttr>()) 117 return CFT_Global; 118 119 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 120 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 121 return CFT_HostDevice; 122 return CFT_Device; 123 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 124 return CFT_Host; 125 } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { 126 // Some implicit declarations (like intrinsic functions) are not marked. 127 // Set the most lenient target on them for maximal flexibility. 128 return CFT_HostDevice; 129 } 130 131 return CFT_Host; 132 } 133 134 // * CUDA Call preference table 135 // 136 // F - from, 137 // T - to 138 // Ph - preference in host mode 139 // Pd - preference in device mode 140 // H - handled in (x) 141 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 142 // 143 // | F | T | Ph | Pd | H | 144 // |----+----+-----+-----+-----+ 145 // | d | d | N | N | (c) | 146 // | d | g | -- | -- | (a) | 147 // | d | h | -- | -- | (e) | 148 // | d | hd | HD | HD | (b) | 149 // | g | d | N | N | (c) | 150 // | g | g | -- | -- | (a) | 151 // | g | h | -- | -- | (e) | 152 // | g | hd | HD | HD | (b) | 153 // | h | d | -- | -- | (e) | 154 // | h | g | N | N | (c) | 155 // | h | h | N | N | (c) | 156 // | h | hd | HD | HD | (b) | 157 // | hd | d | WS | SS | (d) | 158 // | hd | g | SS | -- |(d/a)| 159 // | hd | h | SS | WS | (d) | 160 // | hd | hd | HD | HD | (b) | 161 162 Sema::CUDAFunctionPreference 163 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, 164 const FunctionDecl *Callee) { 165 assert(Callee && "Callee must be valid."); 166 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 167 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); 168 169 // If one of the targets is invalid, the check always fails, no matter what 170 // the other target is. 171 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) 172 return CFP_Never; 173 174 // (a) Can't call global from some contexts until we support CUDA's 175 // dynamic parallelism. 176 if (CalleeTarget == CFT_Global && 177 (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) 178 return CFP_Never; 179 180 // (b) Calling HostDevice is OK for everyone. 181 if (CalleeTarget == CFT_HostDevice) 182 return CFP_HostDevice; 183 184 // (c) Best case scenarios 185 if (CalleeTarget == CallerTarget || 186 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || 187 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) 188 return CFP_Native; 189 190 // (d) HostDevice behavior depends on compilation mode. 191 if (CallerTarget == CFT_HostDevice) { 192 // It's OK to call a compilation-mode matching function from an HD one. 193 if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || 194 (!getLangOpts().CUDAIsDevice && 195 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) 196 return CFP_SameSide; 197 198 // Calls from HD to non-mode-matching functions (i.e., to host functions 199 // when compiling in device mode or to device functions when compiling in 200 // host mode) are allowed at the sema level, but eventually rejected if 201 // they're ever codegened. TODO: Reject said calls earlier. 202 return CFP_WrongSide; 203 } 204 205 // (e) Calling across device/host boundary is not something you should do. 206 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || 207 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || 208 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) 209 return CFP_Never; 210 211 llvm_unreachable("All cases should've been handled by now."); 212 } 213 214 template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { 215 if (!D) 216 return false; 217 if (auto *A = D->getAttr<AttrT>()) 218 return A->isImplicit(); 219 return D->isImplicit(); 220 } 221 222 bool Sema::IsCUDAImplicitHostDeviceFunction(const FunctionDecl *D) { 223 bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); 224 bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); 225 return IsImplicitDevAttr && IsImplicitHostAttr; 226 } 227 228 void Sema::EraseUnwantedCUDAMatches( 229 const FunctionDecl *Caller, 230 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 231 if (Matches.size() <= 1) 232 return; 233 234 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 235 236 // Gets the CUDA function preference for a call from Caller to Match. 237 auto GetCFP = [&](const Pair &Match) { 238 return IdentifyCUDAPreference(Caller, Match.second); 239 }; 240 241 // Find the best call preference among the functions in Matches. 242 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 243 Matches.begin(), Matches.end(), 244 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 245 246 // Erase all functions with lower priority. 247 llvm::erase_if(Matches, 248 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 249 } 250 251 /// When an implicitly-declared special member has to invoke more than one 252 /// base/field special member, conflicts may occur in the targets of these 253 /// members. For example, if one base's member __host__ and another's is 254 /// __device__, it's a conflict. 255 /// This function figures out if the given targets \param Target1 and 256 /// \param Target2 conflict, and if they do not it fills in 257 /// \param ResolvedTarget with a target that resolves for both calls. 258 /// \return true if there's a conflict, false otherwise. 259 static bool 260 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, 261 Sema::CUDAFunctionTarget Target2, 262 Sema::CUDAFunctionTarget *ResolvedTarget) { 263 // Only free functions and static member functions may be global. 264 assert(Target1 != Sema::CFT_Global); 265 assert(Target2 != Sema::CFT_Global); 266 267 if (Target1 == Sema::CFT_HostDevice) { 268 *ResolvedTarget = Target2; 269 } else if (Target2 == Sema::CFT_HostDevice) { 270 *ResolvedTarget = Target1; 271 } else if (Target1 != Target2) { 272 return true; 273 } else { 274 *ResolvedTarget = Target1; 275 } 276 277 return false; 278 } 279 280 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 281 CXXSpecialMember CSM, 282 CXXMethodDecl *MemberDecl, 283 bool ConstRHS, 284 bool Diagnose) { 285 // If the defaulted special member is defined lexically outside of its 286 // owning class, or the special member already has explicit device or host 287 // attributes, do not infer. 288 bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); 289 bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); 290 bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); 291 bool HasExplicitAttr = 292 (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || 293 (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); 294 if (!InClass || HasExplicitAttr) 295 return false; 296 297 llvm::Optional<CUDAFunctionTarget> InferredTarget; 298 299 // We're going to invoke special member lookup; mark that these special 300 // members are called from this one, and not from its caller. 301 ContextRAII MethodContext(*this, MemberDecl); 302 303 // Look for special members in base classes that should be invoked from here. 304 // Infer the target of this member base on the ones it should call. 305 // Skip direct and indirect virtual bases for abstract classes. 306 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 307 for (const auto &B : ClassDecl->bases()) { 308 if (!B.isVirtual()) { 309 Bases.push_back(&B); 310 } 311 } 312 313 if (!ClassDecl->isAbstract()) { 314 for (const auto &VB : ClassDecl->vbases()) { 315 Bases.push_back(&VB); 316 } 317 } 318 319 for (const auto *B : Bases) { 320 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 321 if (!BaseType) { 322 continue; 323 } 324 325 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 326 Sema::SpecialMemberOverloadResult SMOR = 327 LookupSpecialMember(BaseClassDecl, CSM, 328 /* ConstArg */ ConstRHS, 329 /* VolatileArg */ false, 330 /* RValueThis */ false, 331 /* ConstThis */ false, 332 /* VolatileThis */ false); 333 334 if (!SMOR.getMethod()) 335 continue; 336 337 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); 338 if (!InferredTarget.hasValue()) { 339 InferredTarget = BaseMethodTarget; 340 } else { 341 bool ResolutionError = resolveCalleeCUDATargetConflict( 342 InferredTarget.getValue(), BaseMethodTarget, 343 InferredTarget.getPointer()); 344 if (ResolutionError) { 345 if (Diagnose) { 346 Diag(ClassDecl->getLocation(), 347 diag::note_implicit_member_target_infer_collision) 348 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; 349 } 350 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 351 return true; 352 } 353 } 354 } 355 356 // Same as for bases, but now for special members of fields. 357 for (const auto *F : ClassDecl->fields()) { 358 if (F->isInvalidDecl()) { 359 continue; 360 } 361 362 const RecordType *FieldType = 363 Context.getBaseElementType(F->getType())->getAs<RecordType>(); 364 if (!FieldType) { 365 continue; 366 } 367 368 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 369 Sema::SpecialMemberOverloadResult SMOR = 370 LookupSpecialMember(FieldRecDecl, CSM, 371 /* ConstArg */ ConstRHS && !F->isMutable(), 372 /* VolatileArg */ false, 373 /* RValueThis */ false, 374 /* ConstThis */ false, 375 /* VolatileThis */ false); 376 377 if (!SMOR.getMethod()) 378 continue; 379 380 CUDAFunctionTarget FieldMethodTarget = 381 IdentifyCUDATarget(SMOR.getMethod()); 382 if (!InferredTarget.hasValue()) { 383 InferredTarget = FieldMethodTarget; 384 } else { 385 bool ResolutionError = resolveCalleeCUDATargetConflict( 386 InferredTarget.getValue(), FieldMethodTarget, 387 InferredTarget.getPointer()); 388 if (ResolutionError) { 389 if (Diagnose) { 390 Diag(ClassDecl->getLocation(), 391 diag::note_implicit_member_target_infer_collision) 392 << (unsigned)CSM << InferredTarget.getValue() 393 << FieldMethodTarget; 394 } 395 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 396 return true; 397 } 398 } 399 } 400 401 402 // If no target was inferred, mark this member as __host__ __device__; 403 // it's the least restrictive option that can be invoked from any target. 404 bool NeedsH = true, NeedsD = true; 405 if (InferredTarget.hasValue()) { 406 if (InferredTarget.getValue() == CFT_Device) 407 NeedsH = false; 408 else if (InferredTarget.getValue() == CFT_Host) 409 NeedsD = false; 410 } 411 412 // We either setting attributes first time, or the inferred ones must match 413 // previously set ones. 414 if (NeedsD && !HasD) 415 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 416 if (NeedsH && !HasH) 417 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 418 419 return false; 420 } 421 422 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 423 if (!CD->isDefined() && CD->isTemplateInstantiation()) 424 InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 425 426 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 427 // empty at a point in the translation unit, if it is either a 428 // trivial constructor 429 if (CD->isTrivial()) 430 return true; 431 432 // ... or it satisfies all of the following conditions: 433 // The constructor function has been defined. 434 // The constructor function has no parameters, 435 // and the function body is an empty compound statement. 436 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 437 return false; 438 439 // Its class has no virtual functions and no virtual base classes. 440 if (CD->getParent()->isDynamicClass()) 441 return false; 442 443 // Union ctor does not call ctors of its data members. 444 if (CD->getParent()->isUnion()) 445 return true; 446 447 // The only form of initializer allowed is an empty constructor. 448 // This will recursively check all base classes and member initializers 449 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 450 if (const CXXConstructExpr *CE = 451 dyn_cast<CXXConstructExpr>(CI->getInit())) 452 return isEmptyCudaConstructor(Loc, CE->getConstructor()); 453 return false; 454 })) 455 return false; 456 457 return true; 458 } 459 460 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 461 // No destructor -> no problem. 462 if (!DD) 463 return true; 464 465 if (!DD->isDefined() && DD->isTemplateInstantiation()) 466 InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 467 468 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 469 // empty at a point in the translation unit, if it is either a 470 // trivial constructor 471 if (DD->isTrivial()) 472 return true; 473 474 // ... or it satisfies all of the following conditions: 475 // The destructor function has been defined. 476 // and the function body is an empty compound statement. 477 if (!DD->hasTrivialBody()) 478 return false; 479 480 const CXXRecordDecl *ClassDecl = DD->getParent(); 481 482 // Its class has no virtual functions and no virtual base classes. 483 if (ClassDecl->isDynamicClass()) 484 return false; 485 486 // Union does not have base class and union dtor does not call dtors of its 487 // data members. 488 if (DD->getParent()->isUnion()) 489 return true; 490 491 // Only empty destructors are allowed. This will recursively check 492 // destructors for all base classes... 493 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 494 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 495 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 496 return true; 497 })) 498 return false; 499 500 // ... and member fields. 501 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 502 if (CXXRecordDecl *RD = Field->getType() 503 ->getBaseElementTypeUnsafe() 504 ->getAsCXXRecordDecl()) 505 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 506 return true; 507 })) 508 return false; 509 510 return true; 511 } 512 513 void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { 514 if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage()) 515 return; 516 const Expr *Init = VD->getInit(); 517 if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() || 518 VD->hasAttr<CUDASharedAttr>()) { 519 if (LangOpts.GPUAllowDeviceInit) 520 return; 521 assert(!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>()); 522 bool AllowedInit = false; 523 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) 524 AllowedInit = 525 isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); 526 // We'll allow constant initializers even if it's a non-empty 527 // constructor according to CUDA rules. This deviates from NVCC, 528 // but allows us to handle things like constexpr constructors. 529 if (!AllowedInit && 530 (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>())) 531 AllowedInit = VD->getInit()->isConstantInitializer( 532 Context, VD->getType()->isReferenceType()); 533 534 // Also make sure that destructor, if there is one, is empty. 535 if (AllowedInit) 536 if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) 537 AllowedInit = 538 isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); 539 540 if (!AllowedInit) { 541 Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>() 542 ? diag::err_shared_var_init 543 : diag::err_dynamic_var_init) 544 << Init->getSourceRange(); 545 VD->setInvalidDecl(); 546 } 547 } else { 548 // This is a host-side global variable. Check that the initializer is 549 // callable from the host side. 550 const FunctionDecl *InitFn = nullptr; 551 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { 552 InitFn = CE->getConstructor(); 553 } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { 554 InitFn = CE->getDirectCallee(); 555 } 556 if (InitFn) { 557 CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); 558 if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { 559 Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) 560 << InitFnTarget << InitFn; 561 Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; 562 VD->setInvalidDecl(); 563 } 564 } 565 } 566 } 567 568 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 569 // treated as implicitly __host__ __device__, unless: 570 // * it is a variadic function (device-side variadic functions are not 571 // allowed), or 572 // * a __device__ function with this signature was already declared, in which 573 // case in which case we output an error, unless the __device__ decl is in a 574 // system header, in which case we leave the constexpr function unattributed. 575 // 576 // In addition, all function decls are treated as __host__ __device__ when 577 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a 578 // #pragma clang force_cuda_host_device_begin/end 579 // pair). 580 void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, 581 const LookupResult &Previous) { 582 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 583 584 if (ForceCUDAHostDeviceDepth > 0) { 585 if (!NewD->hasAttr<CUDAHostAttr>()) 586 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 587 if (!NewD->hasAttr<CUDADeviceAttr>()) 588 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 589 return; 590 } 591 592 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 593 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 594 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 595 return; 596 597 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 598 // attributes? 599 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 600 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 601 D = Using->getTargetDecl(); 602 FunctionDecl *OldD = D->getAsFunction(); 603 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 604 !OldD->hasAttr<CUDAHostAttr>() && 605 !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, 606 /* ConsiderCudaAttrs = */ false); 607 }; 608 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 609 if (It != Previous.end()) { 610 // We found a __device__ function with the same name and signature as NewD 611 // (ignoring CUDA attrs). This is an error unless that function is defined 612 // in a system header, in which case we simply return without making NewD 613 // host+device. 614 NamedDecl *Match = *It; 615 if (!getSourceManager().isInSystemHeader(Match->getLocation())) { 616 Diag(NewD->getLocation(), 617 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 618 << NewD; 619 Diag(Match->getLocation(), 620 diag::note_cuda_conflicting_device_function_declared_here); 621 } 622 return; 623 } 624 625 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 626 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 627 } 628 629 Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, 630 unsigned DiagID) { 631 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 632 DeviceDiagBuilder::Kind DiagKind = [this] { 633 switch (CurrentCUDATarget()) { 634 case CFT_Global: 635 case CFT_Device: 636 return DeviceDiagBuilder::K_Immediate; 637 case CFT_HostDevice: 638 // An HD function counts as host code if we're compiling for host, and 639 // device code if we're compiling for device. Defer any errors in device 640 // mode until the function is known-emitted. 641 if (getLangOpts().CUDAIsDevice) { 642 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 643 FunctionEmissionStatus::Emitted) 644 ? DeviceDiagBuilder::K_ImmediateWithCallStack 645 : DeviceDiagBuilder::K_Deferred; 646 } 647 return DeviceDiagBuilder::K_Nop; 648 649 default: 650 return DeviceDiagBuilder::K_Nop; 651 } 652 }(); 653 return DeviceDiagBuilder(DiagKind, Loc, DiagID, 654 dyn_cast<FunctionDecl>(CurContext), *this); 655 } 656 657 Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, 658 unsigned DiagID) { 659 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 660 DeviceDiagBuilder::Kind DiagKind = [this] { 661 switch (CurrentCUDATarget()) { 662 case CFT_Host: 663 return DeviceDiagBuilder::K_Immediate; 664 case CFT_HostDevice: 665 // An HD function counts as host code if we're compiling for host, and 666 // device code if we're compiling for device. Defer any errors in device 667 // mode until the function is known-emitted. 668 if (getLangOpts().CUDAIsDevice) 669 return DeviceDiagBuilder::K_Nop; 670 671 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 672 FunctionEmissionStatus::Emitted) 673 ? DeviceDiagBuilder::K_ImmediateWithCallStack 674 : DeviceDiagBuilder::K_Deferred; 675 default: 676 return DeviceDiagBuilder::K_Nop; 677 } 678 }(); 679 return DeviceDiagBuilder(DiagKind, Loc, DiagID, 680 dyn_cast<FunctionDecl>(CurContext), *this); 681 } 682 683 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { 684 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 685 assert(Callee && "Callee may not be null."); 686 687 auto &ExprEvalCtx = ExprEvalContexts.back(); 688 if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) 689 return true; 690 691 // FIXME: Is bailing out early correct here? Should we instead assume that 692 // the caller is a global initializer? 693 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 694 if (!Caller) 695 return true; 696 697 // If the caller is known-emitted, mark the callee as known-emitted. 698 // Otherwise, mark the call in our call graph so we can traverse it later. 699 bool CallerKnownEmitted = 700 getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; 701 DeviceDiagBuilder::Kind DiagKind = [this, Caller, Callee, 702 CallerKnownEmitted] { 703 switch (IdentifyCUDAPreference(Caller, Callee)) { 704 case CFP_Never: 705 return DeviceDiagBuilder::K_Immediate; 706 case CFP_WrongSide: 707 assert(Caller && "WrongSide calls require a non-null caller"); 708 // If we know the caller will be emitted, we know this wrong-side call 709 // will be emitted, so it's an immediate error. Otherwise, defer the 710 // error until we know the caller is emitted. 711 return CallerKnownEmitted ? DeviceDiagBuilder::K_ImmediateWithCallStack 712 : DeviceDiagBuilder::K_Deferred; 713 default: 714 return DeviceDiagBuilder::K_Nop; 715 } 716 }(); 717 718 if (DiagKind == DeviceDiagBuilder::K_Nop) 719 return true; 720 721 // Avoid emitting this error twice for the same location. Using a hashtable 722 // like this is unfortunate, but because we must continue parsing as normal 723 // after encountering a deferred error, it's otherwise very tricky for us to 724 // ensure that we only emit this deferred error once. 725 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 726 return true; 727 728 DeviceDiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) 729 << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); 730 DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, 731 Caller, *this) 732 << Callee; 733 return DiagKind != DeviceDiagBuilder::K_Immediate && 734 DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack; 735 } 736 737 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { 738 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 739 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 740 return; 741 FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext); 742 if (!CurFn) 743 return; 744 CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); 745 if (Target == CFT_Global || Target == CFT_Device) { 746 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 747 } else if (Target == CFT_HostDevice) { 748 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 749 Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); 750 } 751 } 752 753 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, 754 const LookupResult &Previous) { 755 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 756 CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); 757 for (NamedDecl *OldND : Previous) { 758 FunctionDecl *OldFD = OldND->getAsFunction(); 759 if (!OldFD) 760 continue; 761 762 CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); 763 // Don't allow HD and global functions to overload other functions with the 764 // same signature. We allow overloading based on CUDA attributes so that 765 // functions can have different implementations on the host and device, but 766 // HD/global functions "exist" in some sense on both the host and device, so 767 // should have the same implementation on both sides. 768 if (NewTarget != OldTarget && 769 ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || 770 (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && 771 !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 772 /* ConsiderCudaAttrs = */ false)) { 773 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 774 << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 775 Diag(OldFD->getLocation(), diag::note_previous_declaration); 776 NewFD->setInvalidDecl(); 777 break; 778 } 779 } 780 } 781 782 template <typename AttrTy> 783 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 784 const FunctionDecl &TemplateFD) { 785 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 786 AttrTy *Clone = Attribute->clone(S.Context); 787 Clone->setInherited(true); 788 FD->addAttr(Clone); 789 } 790 } 791 792 void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, 793 const FunctionTemplateDecl &TD) { 794 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 795 copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); 796 copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); 797 copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); 798 } 799 800 std::string Sema::getCudaConfigureFuncName() const { 801 if (getLangOpts().HIP) 802 return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" 803 : "hipConfigureCall"; 804 805 // New CUDA kernel launch sequence. 806 if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), 807 CudaFeature::CUDA_USES_NEW_LAUNCH)) 808 return "__cudaPushCallConfiguration"; 809 810 // Legacy CUDA kernel configuration call 811 return "cudaConfigureCall"; 812 } 813