1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements semantic analysis for CUDA constructs. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/AST/ASTContext.h" 14 #include "clang/AST/Decl.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/Basic/Cuda.h" 17 #include "clang/Basic/TargetInfo.h" 18 #include "clang/Lex/Preprocessor.h" 19 #include "clang/Sema/Lookup.h" 20 #include "clang/Sema/ScopeInfo.h" 21 #include "clang/Sema/Sema.h" 22 #include "clang/Sema/SemaDiagnostic.h" 23 #include "clang/Sema/SemaInternal.h" 24 #include "clang/Sema/Template.h" 25 #include "llvm/ADT/Optional.h" 26 #include "llvm/ADT/SmallVector.h" 27 using namespace clang; 28 29 void Sema::PushForceCUDAHostDevice() { 30 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 31 ForceCUDAHostDeviceDepth++; 32 } 33 34 bool Sema::PopForceCUDAHostDevice() { 35 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 36 if (ForceCUDAHostDeviceDepth == 0) 37 return false; 38 ForceCUDAHostDeviceDepth--; 39 return true; 40 } 41 42 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, 43 MultiExprArg ExecConfig, 44 SourceLocation GGGLoc) { 45 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); 46 if (!ConfigDecl) 47 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 48 << getCudaConfigureFuncName()); 49 QualType ConfigQTy = ConfigDecl->getType(); 50 51 DeclRefExpr *ConfigDR = new (Context) 52 DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 53 MarkFunctionReferenced(LLLLoc, ConfigDecl); 54 55 return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 56 /*IsExecConfig=*/true); 57 } 58 59 Sema::CUDAFunctionTarget 60 Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { 61 bool HasHostAttr = false; 62 bool HasDeviceAttr = false; 63 bool HasGlobalAttr = false; 64 bool HasInvalidTargetAttr = false; 65 for (const ParsedAttr &AL : Attrs) { 66 switch (AL.getKind()) { 67 case ParsedAttr::AT_CUDAGlobal: 68 HasGlobalAttr = true; 69 break; 70 case ParsedAttr::AT_CUDAHost: 71 HasHostAttr = true; 72 break; 73 case ParsedAttr::AT_CUDADevice: 74 HasDeviceAttr = true; 75 break; 76 case ParsedAttr::AT_CUDAInvalidTarget: 77 HasInvalidTargetAttr = true; 78 break; 79 default: 80 break; 81 } 82 } 83 84 if (HasInvalidTargetAttr) 85 return CFT_InvalidTarget; 86 87 if (HasGlobalAttr) 88 return CFT_Global; 89 90 if (HasHostAttr && HasDeviceAttr) 91 return CFT_HostDevice; 92 93 if (HasDeviceAttr) 94 return CFT_Device; 95 96 return CFT_Host; 97 } 98 99 template <typename A> 100 static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { 101 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 102 return isa<A>(Attribute) && 103 !(IgnoreImplicitAttr && Attribute->isImplicit()); 104 }); 105 } 106 107 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function 108 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, 109 bool IgnoreImplicitHDAttr) { 110 // Code that lives outside a function is run on the host. 111 if (D == nullptr) 112 return CFT_Host; 113 114 if (D->hasAttr<CUDAInvalidTargetAttr>()) 115 return CFT_InvalidTarget; 116 117 if (D->hasAttr<CUDAGlobalAttr>()) 118 return CFT_Global; 119 120 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 121 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 122 return CFT_HostDevice; 123 return CFT_Device; 124 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 125 return CFT_Host; 126 } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { 127 // Some implicit declarations (like intrinsic functions) are not marked. 128 // Set the most lenient target on them for maximal flexibility. 129 return CFT_HostDevice; 130 } 131 132 return CFT_Host; 133 } 134 135 // * CUDA Call preference table 136 // 137 // F - from, 138 // T - to 139 // Ph - preference in host mode 140 // Pd - preference in device mode 141 // H - handled in (x) 142 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 143 // 144 // | F | T | Ph | Pd | H | 145 // |----+----+-----+-----+-----+ 146 // | d | d | N | N | (c) | 147 // | d | g | -- | -- | (a) | 148 // | d | h | -- | -- | (e) | 149 // | d | hd | HD | HD | (b) | 150 // | g | d | N | N | (c) | 151 // | g | g | -- | -- | (a) | 152 // | g | h | -- | -- | (e) | 153 // | g | hd | HD | HD | (b) | 154 // | h | d | -- | -- | (e) | 155 // | h | g | N | N | (c) | 156 // | h | h | N | N | (c) | 157 // | h | hd | HD | HD | (b) | 158 // | hd | d | WS | SS | (d) | 159 // | hd | g | SS | -- |(d/a)| 160 // | hd | h | SS | WS | (d) | 161 // | hd | hd | HD | HD | (b) | 162 163 Sema::CUDAFunctionPreference 164 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, 165 const FunctionDecl *Callee) { 166 assert(Callee && "Callee must be valid."); 167 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 168 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); 169 170 // If one of the targets is invalid, the check always fails, no matter what 171 // the other target is. 172 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) 173 return CFP_Never; 174 175 // (a) Can't call global from some contexts until we support CUDA's 176 // dynamic parallelism. 177 if (CalleeTarget == CFT_Global && 178 (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) 179 return CFP_Never; 180 181 // (b) Calling HostDevice is OK for everyone. 182 if (CalleeTarget == CFT_HostDevice) 183 return CFP_HostDevice; 184 185 // (c) Best case scenarios 186 if (CalleeTarget == CallerTarget || 187 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || 188 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) 189 return CFP_Native; 190 191 // (d) HostDevice behavior depends on compilation mode. 192 if (CallerTarget == CFT_HostDevice) { 193 // It's OK to call a compilation-mode matching function from an HD one. 194 if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || 195 (!getLangOpts().CUDAIsDevice && 196 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) 197 return CFP_SameSide; 198 199 // Calls from HD to non-mode-matching functions (i.e., to host functions 200 // when compiling in device mode or to device functions when compiling in 201 // host mode) are allowed at the sema level, but eventually rejected if 202 // they're ever codegened. TODO: Reject said calls earlier. 203 return CFP_WrongSide; 204 } 205 206 // (e) Calling across device/host boundary is not something you should do. 207 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || 208 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || 209 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) 210 return CFP_Never; 211 212 llvm_unreachable("All cases should've been handled by now."); 213 } 214 215 template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { 216 if (!D) 217 return false; 218 if (auto *A = D->getAttr<AttrT>()) 219 return A->isImplicit(); 220 return D->isImplicit(); 221 } 222 223 bool Sema::isCUDAImplicitHostDeviceFunction(const FunctionDecl *D) { 224 bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); 225 bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); 226 return IsImplicitDevAttr && IsImplicitHostAttr; 227 } 228 229 void Sema::EraseUnwantedCUDAMatches( 230 const FunctionDecl *Caller, 231 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 232 if (Matches.size() <= 1) 233 return; 234 235 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 236 237 // Gets the CUDA function preference for a call from Caller to Match. 238 auto GetCFP = [&](const Pair &Match) { 239 return IdentifyCUDAPreference(Caller, Match.second); 240 }; 241 242 // Find the best call preference among the functions in Matches. 243 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 244 Matches.begin(), Matches.end(), 245 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 246 247 // Erase all functions with lower priority. 248 llvm::erase_if(Matches, 249 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 250 } 251 252 /// When an implicitly-declared special member has to invoke more than one 253 /// base/field special member, conflicts may occur in the targets of these 254 /// members. For example, if one base's member __host__ and another's is 255 /// __device__, it's a conflict. 256 /// This function figures out if the given targets \param Target1 and 257 /// \param Target2 conflict, and if they do not it fills in 258 /// \param ResolvedTarget with a target that resolves for both calls. 259 /// \return true if there's a conflict, false otherwise. 260 static bool 261 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, 262 Sema::CUDAFunctionTarget Target2, 263 Sema::CUDAFunctionTarget *ResolvedTarget) { 264 // Only free functions and static member functions may be global. 265 assert(Target1 != Sema::CFT_Global); 266 assert(Target2 != Sema::CFT_Global); 267 268 if (Target1 == Sema::CFT_HostDevice) { 269 *ResolvedTarget = Target2; 270 } else if (Target2 == Sema::CFT_HostDevice) { 271 *ResolvedTarget = Target1; 272 } else if (Target1 != Target2) { 273 return true; 274 } else { 275 *ResolvedTarget = Target1; 276 } 277 278 return false; 279 } 280 281 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 282 CXXSpecialMember CSM, 283 CXXMethodDecl *MemberDecl, 284 bool ConstRHS, 285 bool Diagnose) { 286 // If the defaulted special member is defined lexically outside of its 287 // owning class, or the special member already has explicit device or host 288 // attributes, do not infer. 289 bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); 290 bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); 291 bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); 292 bool HasExplicitAttr = 293 (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || 294 (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); 295 if (!InClass || HasExplicitAttr) 296 return false; 297 298 llvm::Optional<CUDAFunctionTarget> InferredTarget; 299 300 // We're going to invoke special member lookup; mark that these special 301 // members are called from this one, and not from its caller. 302 ContextRAII MethodContext(*this, MemberDecl); 303 304 // Look for special members in base classes that should be invoked from here. 305 // Infer the target of this member base on the ones it should call. 306 // Skip direct and indirect virtual bases for abstract classes. 307 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 308 for (const auto &B : ClassDecl->bases()) { 309 if (!B.isVirtual()) { 310 Bases.push_back(&B); 311 } 312 } 313 314 if (!ClassDecl->isAbstract()) { 315 for (const auto &VB : ClassDecl->vbases()) { 316 Bases.push_back(&VB); 317 } 318 } 319 320 for (const auto *B : Bases) { 321 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 322 if (!BaseType) { 323 continue; 324 } 325 326 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 327 Sema::SpecialMemberOverloadResult SMOR = 328 LookupSpecialMember(BaseClassDecl, CSM, 329 /* ConstArg */ ConstRHS, 330 /* VolatileArg */ false, 331 /* RValueThis */ false, 332 /* ConstThis */ false, 333 /* VolatileThis */ false); 334 335 if (!SMOR.getMethod()) 336 continue; 337 338 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); 339 if (!InferredTarget.hasValue()) { 340 InferredTarget = BaseMethodTarget; 341 } else { 342 bool ResolutionError = resolveCalleeCUDATargetConflict( 343 InferredTarget.getValue(), BaseMethodTarget, 344 InferredTarget.getPointer()); 345 if (ResolutionError) { 346 if (Diagnose) { 347 Diag(ClassDecl->getLocation(), 348 diag::note_implicit_member_target_infer_collision) 349 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; 350 } 351 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 352 return true; 353 } 354 } 355 } 356 357 // Same as for bases, but now for special members of fields. 358 for (const auto *F : ClassDecl->fields()) { 359 if (F->isInvalidDecl()) { 360 continue; 361 } 362 363 const RecordType *FieldType = 364 Context.getBaseElementType(F->getType())->getAs<RecordType>(); 365 if (!FieldType) { 366 continue; 367 } 368 369 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 370 Sema::SpecialMemberOverloadResult SMOR = 371 LookupSpecialMember(FieldRecDecl, CSM, 372 /* ConstArg */ ConstRHS && !F->isMutable(), 373 /* VolatileArg */ false, 374 /* RValueThis */ false, 375 /* ConstThis */ false, 376 /* VolatileThis */ false); 377 378 if (!SMOR.getMethod()) 379 continue; 380 381 CUDAFunctionTarget FieldMethodTarget = 382 IdentifyCUDATarget(SMOR.getMethod()); 383 if (!InferredTarget.hasValue()) { 384 InferredTarget = FieldMethodTarget; 385 } else { 386 bool ResolutionError = resolveCalleeCUDATargetConflict( 387 InferredTarget.getValue(), FieldMethodTarget, 388 InferredTarget.getPointer()); 389 if (ResolutionError) { 390 if (Diagnose) { 391 Diag(ClassDecl->getLocation(), 392 diag::note_implicit_member_target_infer_collision) 393 << (unsigned)CSM << InferredTarget.getValue() 394 << FieldMethodTarget; 395 } 396 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 397 return true; 398 } 399 } 400 } 401 402 403 // If no target was inferred, mark this member as __host__ __device__; 404 // it's the least restrictive option that can be invoked from any target. 405 bool NeedsH = true, NeedsD = true; 406 if (InferredTarget.hasValue()) { 407 if (InferredTarget.getValue() == CFT_Device) 408 NeedsH = false; 409 else if (InferredTarget.getValue() == CFT_Host) 410 NeedsD = false; 411 } 412 413 // We either setting attributes first time, or the inferred ones must match 414 // previously set ones. 415 if (NeedsD && !HasD) 416 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 417 if (NeedsH && !HasH) 418 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 419 420 return false; 421 } 422 423 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 424 if (!CD->isDefined() && CD->isTemplateInstantiation()) 425 InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 426 427 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 428 // empty at a point in the translation unit, if it is either a 429 // trivial constructor 430 if (CD->isTrivial()) 431 return true; 432 433 // ... or it satisfies all of the following conditions: 434 // The constructor function has been defined. 435 // The constructor function has no parameters, 436 // and the function body is an empty compound statement. 437 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 438 return false; 439 440 // Its class has no virtual functions and no virtual base classes. 441 if (CD->getParent()->isDynamicClass()) 442 return false; 443 444 // Union ctor does not call ctors of its data members. 445 if (CD->getParent()->isUnion()) 446 return true; 447 448 // The only form of initializer allowed is an empty constructor. 449 // This will recursively check all base classes and member initializers 450 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 451 if (const CXXConstructExpr *CE = 452 dyn_cast<CXXConstructExpr>(CI->getInit())) 453 return isEmptyCudaConstructor(Loc, CE->getConstructor()); 454 return false; 455 })) 456 return false; 457 458 return true; 459 } 460 461 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 462 // No destructor -> no problem. 463 if (!DD) 464 return true; 465 466 if (!DD->isDefined() && DD->isTemplateInstantiation()) 467 InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 468 469 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 470 // empty at a point in the translation unit, if it is either a 471 // trivial constructor 472 if (DD->isTrivial()) 473 return true; 474 475 // ... or it satisfies all of the following conditions: 476 // The destructor function has been defined. 477 // and the function body is an empty compound statement. 478 if (!DD->hasTrivialBody()) 479 return false; 480 481 const CXXRecordDecl *ClassDecl = DD->getParent(); 482 483 // Its class has no virtual functions and no virtual base classes. 484 if (ClassDecl->isDynamicClass()) 485 return false; 486 487 // Union does not have base class and union dtor does not call dtors of its 488 // data members. 489 if (DD->getParent()->isUnion()) 490 return true; 491 492 // Only empty destructors are allowed. This will recursively check 493 // destructors for all base classes... 494 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 495 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 496 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 497 return true; 498 })) 499 return false; 500 501 // ... and member fields. 502 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 503 if (CXXRecordDecl *RD = Field->getType() 504 ->getBaseElementTypeUnsafe() 505 ->getAsCXXRecordDecl()) 506 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 507 return true; 508 })) 509 return false; 510 511 return true; 512 } 513 514 void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { 515 if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage()) 516 return; 517 const Expr *Init = VD->getInit(); 518 if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() || 519 VD->hasAttr<CUDASharedAttr>()) { 520 if (LangOpts.GPUAllowDeviceInit) 521 return; 522 bool AllowedInit = false; 523 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) 524 AllowedInit = 525 isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); 526 // We'll allow constant initializers even if it's a non-empty 527 // constructor according to CUDA rules. This deviates from NVCC, 528 // but allows us to handle things like constexpr constructors. 529 if (!AllowedInit && 530 (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>())) { 531 auto *Init = VD->getInit(); 532 AllowedInit = 533 ((VD->getType()->isDependentType() || Init->isValueDependent()) && 534 VD->isConstexpr()) || 535 Init->isConstantInitializer(Context, 536 VD->getType()->isReferenceType()); 537 } 538 539 // Also make sure that destructor, if there is one, is empty. 540 if (AllowedInit) 541 if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) 542 AllowedInit = 543 isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); 544 545 if (!AllowedInit) { 546 Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>() 547 ? diag::err_shared_var_init 548 : diag::err_dynamic_var_init) 549 << Init->getSourceRange(); 550 VD->setInvalidDecl(); 551 } 552 } else { 553 // This is a host-side global variable. Check that the initializer is 554 // callable from the host side. 555 const FunctionDecl *InitFn = nullptr; 556 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { 557 InitFn = CE->getConstructor(); 558 } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { 559 InitFn = CE->getDirectCallee(); 560 } 561 if (InitFn) { 562 CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); 563 if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { 564 Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) 565 << InitFnTarget << InitFn; 566 Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; 567 VD->setInvalidDecl(); 568 } 569 } 570 } 571 } 572 573 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 574 // treated as implicitly __host__ __device__, unless: 575 // * it is a variadic function (device-side variadic functions are not 576 // allowed), or 577 // * a __device__ function with this signature was already declared, in which 578 // case in which case we output an error, unless the __device__ decl is in a 579 // system header, in which case we leave the constexpr function unattributed. 580 // 581 // In addition, all function decls are treated as __host__ __device__ when 582 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a 583 // #pragma clang force_cuda_host_device_begin/end 584 // pair). 585 void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, 586 const LookupResult &Previous) { 587 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 588 589 if (ForceCUDAHostDeviceDepth > 0) { 590 if (!NewD->hasAttr<CUDAHostAttr>()) 591 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 592 if (!NewD->hasAttr<CUDADeviceAttr>()) 593 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 594 return; 595 } 596 597 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 598 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 599 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 600 return; 601 602 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 603 // attributes? 604 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 605 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 606 D = Using->getTargetDecl(); 607 FunctionDecl *OldD = D->getAsFunction(); 608 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 609 !OldD->hasAttr<CUDAHostAttr>() && 610 !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, 611 /* ConsiderCudaAttrs = */ false); 612 }; 613 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 614 if (It != Previous.end()) { 615 // We found a __device__ function with the same name and signature as NewD 616 // (ignoring CUDA attrs). This is an error unless that function is defined 617 // in a system header, in which case we simply return without making NewD 618 // host+device. 619 NamedDecl *Match = *It; 620 if (!getSourceManager().isInSystemHeader(Match->getLocation())) { 621 Diag(NewD->getLocation(), 622 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 623 << NewD; 624 Diag(Match->getLocation(), 625 diag::note_cuda_conflicting_device_function_declared_here); 626 } 627 return; 628 } 629 630 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 631 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 632 } 633 634 void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { 635 if (getLangOpts().CUDAIsDevice && VD->isConstexpr() && 636 (VD->isFileVarDecl() || VD->isStaticDataMember())) { 637 VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); 638 } 639 } 640 641 Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, 642 unsigned DiagID) { 643 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 644 SemaDiagnosticBuilder::Kind DiagKind = [&] { 645 if (!isa<FunctionDecl>(CurContext)) 646 return SemaDiagnosticBuilder::K_Nop; 647 switch (CurrentCUDATarget()) { 648 case CFT_Global: 649 case CFT_Device: 650 return SemaDiagnosticBuilder::K_Immediate; 651 case CFT_HostDevice: 652 // An HD function counts as host code if we're compiling for host, and 653 // device code if we're compiling for device. Defer any errors in device 654 // mode until the function is known-emitted. 655 if (!getLangOpts().CUDAIsDevice) 656 return SemaDiagnosticBuilder::K_Nop; 657 if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) 658 return SemaDiagnosticBuilder::K_Immediate; 659 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 660 FunctionEmissionStatus::Emitted) 661 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 662 : SemaDiagnosticBuilder::K_Deferred; 663 default: 664 return SemaDiagnosticBuilder::K_Nop; 665 } 666 }(); 667 return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, 668 dyn_cast<FunctionDecl>(CurContext), *this); 669 } 670 671 Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, 672 unsigned DiagID) { 673 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 674 SemaDiagnosticBuilder::Kind DiagKind = [&] { 675 if (!isa<FunctionDecl>(CurContext)) 676 return SemaDiagnosticBuilder::K_Nop; 677 switch (CurrentCUDATarget()) { 678 case CFT_Host: 679 return SemaDiagnosticBuilder::K_Immediate; 680 case CFT_HostDevice: 681 // An HD function counts as host code if we're compiling for host, and 682 // device code if we're compiling for device. Defer any errors in device 683 // mode until the function is known-emitted. 684 if (getLangOpts().CUDAIsDevice) 685 return SemaDiagnosticBuilder::K_Nop; 686 if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) 687 return SemaDiagnosticBuilder::K_Immediate; 688 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 689 FunctionEmissionStatus::Emitted) 690 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 691 : SemaDiagnosticBuilder::K_Deferred; 692 default: 693 return SemaDiagnosticBuilder::K_Nop; 694 } 695 }(); 696 return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, 697 dyn_cast<FunctionDecl>(CurContext), *this); 698 } 699 700 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { 701 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 702 assert(Callee && "Callee may not be null."); 703 704 auto &ExprEvalCtx = ExprEvalContexts.back(); 705 if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) 706 return true; 707 708 // FIXME: Is bailing out early correct here? Should we instead assume that 709 // the caller is a global initializer? 710 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 711 if (!Caller) 712 return true; 713 714 // If the caller is known-emitted, mark the callee as known-emitted. 715 // Otherwise, mark the call in our call graph so we can traverse it later. 716 bool CallerKnownEmitted = 717 getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; 718 SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, 719 CallerKnownEmitted] { 720 switch (IdentifyCUDAPreference(Caller, Callee)) { 721 case CFP_Never: 722 case CFP_WrongSide: 723 assert(Caller && "Never/wrongSide calls require a non-null caller"); 724 // If we know the caller will be emitted, we know this wrong-side call 725 // will be emitted, so it's an immediate error. Otherwise, defer the 726 // error until we know the caller is emitted. 727 return CallerKnownEmitted 728 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 729 : SemaDiagnosticBuilder::K_Deferred; 730 default: 731 return SemaDiagnosticBuilder::K_Nop; 732 } 733 }(); 734 735 if (DiagKind == SemaDiagnosticBuilder::K_Nop) 736 return true; 737 738 // Avoid emitting this error twice for the same location. Using a hashtable 739 // like this is unfortunate, but because we must continue parsing as normal 740 // after encountering a deferred error, it's otherwise very tricky for us to 741 // ensure that we only emit this deferred error once. 742 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 743 return true; 744 745 SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) 746 << IdentifyCUDATarget(Callee) << /*function*/ 0 << Callee 747 << IdentifyCUDATarget(Caller); 748 if (!Callee->getBuiltinID()) 749 SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), 750 diag::note_previous_decl, Caller, *this) 751 << Callee; 752 return DiagKind != SemaDiagnosticBuilder::K_Immediate && 753 DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; 754 } 755 756 // Check the wrong-sided reference capture of lambda for CUDA/HIP. 757 // A lambda function may capture a stack variable by reference when it is 758 // defined and uses the capture by reference when the lambda is called. When 759 // the capture and use happen on different sides, the capture is invalid and 760 // should be diagnosed. 761 void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, 762 const sema::Capture &Capture) { 763 // In host compilation we only need to check lambda functions emitted on host 764 // side. In such lambda functions, a reference capture is invalid only 765 // if the lambda structure is populated by a device function or kernel then 766 // is passed to and called by a host function. However that is impossible, 767 // since a device function or kernel can only call a device function, also a 768 // kernel cannot pass a lambda back to a host function since we cannot 769 // define a kernel argument type which can hold the lambda before the lambda 770 // itself is defined. 771 if (!LangOpts.CUDAIsDevice) 772 return; 773 774 // File-scope lambda can only do init captures for global variables, which 775 // results in passing by value for these global variables. 776 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 777 if (!Caller) 778 return; 779 780 // In device compilation, we only need to check lambda functions which are 781 // emitted on device side. For such lambdas, a reference capture is invalid 782 // only if the lambda structure is populated by a host function then passed 783 // to and called in a device function or kernel. 784 bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); 785 bool CallerIsHost = 786 !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); 787 bool ShouldCheck = CalleeIsDevice && CallerIsHost; 788 if (!ShouldCheck || !Capture.isReferenceCapture()) 789 return; 790 auto DiagKind = SemaDiagnosticBuilder::K_Deferred; 791 if (Capture.isVariableCapture()) { 792 SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), 793 diag::err_capture_bad_target, Callee, *this) 794 << Capture.getVariable(); 795 } else if (Capture.isThisCapture()) { 796 SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), 797 diag::err_capture_bad_target_this_ptr, Callee, *this); 798 } 799 return; 800 } 801 802 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { 803 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 804 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 805 return; 806 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 807 Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); 808 } 809 810 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, 811 const LookupResult &Previous) { 812 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 813 CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); 814 for (NamedDecl *OldND : Previous) { 815 FunctionDecl *OldFD = OldND->getAsFunction(); 816 if (!OldFD) 817 continue; 818 819 CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); 820 // Don't allow HD and global functions to overload other functions with the 821 // same signature. We allow overloading based on CUDA attributes so that 822 // functions can have different implementations on the host and device, but 823 // HD/global functions "exist" in some sense on both the host and device, so 824 // should have the same implementation on both sides. 825 if (NewTarget != OldTarget && 826 ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || 827 (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && 828 !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 829 /* ConsiderCudaAttrs = */ false)) { 830 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 831 << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 832 Diag(OldFD->getLocation(), diag::note_previous_declaration); 833 NewFD->setInvalidDecl(); 834 break; 835 } 836 } 837 } 838 839 template <typename AttrTy> 840 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 841 const FunctionDecl &TemplateFD) { 842 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 843 AttrTy *Clone = Attribute->clone(S.Context); 844 Clone->setInherited(true); 845 FD->addAttr(Clone); 846 } 847 } 848 849 void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, 850 const FunctionTemplateDecl &TD) { 851 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 852 copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); 853 copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); 854 copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); 855 } 856 857 std::string Sema::getCudaConfigureFuncName() const { 858 if (getLangOpts().HIP) 859 return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" 860 : "hipConfigureCall"; 861 862 // New CUDA kernel launch sequence. 863 if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), 864 CudaFeature::CUDA_USES_NEW_LAUNCH)) 865 return "__cudaPushCallConfiguration"; 866 867 // Legacy CUDA kernel configuration call 868 return "cudaConfigureCall"; 869 } 870