1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements semantic analysis for CUDA constructs. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/AST/ASTContext.h" 14 #include "clang/AST/Decl.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/Basic/Cuda.h" 17 #include "clang/Basic/TargetInfo.h" 18 #include "clang/Lex/Preprocessor.h" 19 #include "clang/Sema/Lookup.h" 20 #include "clang/Sema/ScopeInfo.h" 21 #include "clang/Sema/Sema.h" 22 #include "clang/Sema/SemaDiagnostic.h" 23 #include "clang/Sema/SemaInternal.h" 24 #include "clang/Sema/Template.h" 25 #include "llvm/ADT/Optional.h" 26 #include "llvm/ADT/SmallVector.h" 27 using namespace clang; 28 29 void Sema::PushForceCUDAHostDevice() { 30 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 31 ForceCUDAHostDeviceDepth++; 32 } 33 34 bool Sema::PopForceCUDAHostDevice() { 35 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 36 if (ForceCUDAHostDeviceDepth == 0) 37 return false; 38 ForceCUDAHostDeviceDepth--; 39 return true; 40 } 41 42 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, 43 MultiExprArg ExecConfig, 44 SourceLocation GGGLoc) { 45 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); 46 if (!ConfigDecl) 47 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 48 << getCudaConfigureFuncName()); 49 QualType ConfigQTy = ConfigDecl->getType(); 50 51 DeclRefExpr *ConfigDR = new (Context) 52 DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 53 MarkFunctionReferenced(LLLLoc, ConfigDecl); 54 55 return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 56 /*IsExecConfig=*/true); 57 } 58 59 Sema::CUDAFunctionTarget 60 Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { 61 bool HasHostAttr = false; 62 bool HasDeviceAttr = false; 63 bool HasGlobalAttr = false; 64 bool HasInvalidTargetAttr = false; 65 for (const ParsedAttr &AL : Attrs) { 66 switch (AL.getKind()) { 67 case ParsedAttr::AT_CUDAGlobal: 68 HasGlobalAttr = true; 69 break; 70 case ParsedAttr::AT_CUDAHost: 71 HasHostAttr = true; 72 break; 73 case ParsedAttr::AT_CUDADevice: 74 HasDeviceAttr = true; 75 break; 76 case ParsedAttr::AT_CUDAInvalidTarget: 77 HasInvalidTargetAttr = true; 78 break; 79 default: 80 break; 81 } 82 } 83 84 if (HasInvalidTargetAttr) 85 return CFT_InvalidTarget; 86 87 if (HasGlobalAttr) 88 return CFT_Global; 89 90 if (HasHostAttr && HasDeviceAttr) 91 return CFT_HostDevice; 92 93 if (HasDeviceAttr) 94 return CFT_Device; 95 96 return CFT_Host; 97 } 98 99 template <typename A> 100 static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { 101 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 102 return isa<A>(Attribute) && 103 !(IgnoreImplicitAttr && Attribute->isImplicit()); 104 }); 105 } 106 107 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function 108 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, 109 bool IgnoreImplicitHDAttr) { 110 // Code that lives outside a function is run on the host. 111 if (D == nullptr) 112 return CFT_Host; 113 114 if (D->hasAttr<CUDAInvalidTargetAttr>()) 115 return CFT_InvalidTarget; 116 117 if (D->hasAttr<CUDAGlobalAttr>()) 118 return CFT_Global; 119 120 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 121 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 122 return CFT_HostDevice; 123 return CFT_Device; 124 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 125 return CFT_Host; 126 } else if ((D->isImplicit() || !D->isUserProvided()) && 127 !IgnoreImplicitHDAttr) { 128 // Some implicit declarations (like intrinsic functions) are not marked. 129 // Set the most lenient target on them for maximal flexibility. 130 return CFT_HostDevice; 131 } 132 133 return CFT_Host; 134 } 135 136 // * CUDA Call preference table 137 // 138 // F - from, 139 // T - to 140 // Ph - preference in host mode 141 // Pd - preference in device mode 142 // H - handled in (x) 143 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 144 // 145 // | F | T | Ph | Pd | H | 146 // |----+----+-----+-----+-----+ 147 // | d | d | N | N | (c) | 148 // | d | g | -- | -- | (a) | 149 // | d | h | -- | -- | (e) | 150 // | d | hd | HD | HD | (b) | 151 // | g | d | N | N | (c) | 152 // | g | g | -- | -- | (a) | 153 // | g | h | -- | -- | (e) | 154 // | g | hd | HD | HD | (b) | 155 // | h | d | -- | -- | (e) | 156 // | h | g | N | N | (c) | 157 // | h | h | N | N | (c) | 158 // | h | hd | HD | HD | (b) | 159 // | hd | d | WS | SS | (d) | 160 // | hd | g | SS | -- |(d/a)| 161 // | hd | h | SS | WS | (d) | 162 // | hd | hd | HD | HD | (b) | 163 164 Sema::CUDAFunctionPreference 165 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, 166 const FunctionDecl *Callee) { 167 assert(Callee && "Callee must be valid."); 168 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 169 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); 170 171 // If one of the targets is invalid, the check always fails, no matter what 172 // the other target is. 173 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) 174 return CFP_Never; 175 176 // (a) Can't call global from some contexts until we support CUDA's 177 // dynamic parallelism. 178 if (CalleeTarget == CFT_Global && 179 (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) 180 return CFP_Never; 181 182 // (b) Calling HostDevice is OK for everyone. 183 if (CalleeTarget == CFT_HostDevice) 184 return CFP_HostDevice; 185 186 // (c) Best case scenarios 187 if (CalleeTarget == CallerTarget || 188 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || 189 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) 190 return CFP_Native; 191 192 // (d) HostDevice behavior depends on compilation mode. 193 if (CallerTarget == CFT_HostDevice) { 194 // It's OK to call a compilation-mode matching function from an HD one. 195 if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || 196 (!getLangOpts().CUDAIsDevice && 197 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) 198 return CFP_SameSide; 199 200 // Calls from HD to non-mode-matching functions (i.e., to host functions 201 // when compiling in device mode or to device functions when compiling in 202 // host mode) are allowed at the sema level, but eventually rejected if 203 // they're ever codegened. TODO: Reject said calls earlier. 204 return CFP_WrongSide; 205 } 206 207 // (e) Calling across device/host boundary is not something you should do. 208 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || 209 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || 210 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) 211 return CFP_Never; 212 213 llvm_unreachable("All cases should've been handled by now."); 214 } 215 216 template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { 217 if (!D) 218 return false; 219 if (auto *A = D->getAttr<AttrT>()) 220 return A->isImplicit(); 221 return D->isImplicit(); 222 } 223 224 bool Sema::isCUDAImplicitHostDeviceFunction(const FunctionDecl *D) { 225 bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); 226 bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); 227 return IsImplicitDevAttr && IsImplicitHostAttr; 228 } 229 230 void Sema::EraseUnwantedCUDAMatches( 231 const FunctionDecl *Caller, 232 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 233 if (Matches.size() <= 1) 234 return; 235 236 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 237 238 // Gets the CUDA function preference for a call from Caller to Match. 239 auto GetCFP = [&](const Pair &Match) { 240 return IdentifyCUDAPreference(Caller, Match.second); 241 }; 242 243 // Find the best call preference among the functions in Matches. 244 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 245 Matches.begin(), Matches.end(), 246 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 247 248 // Erase all functions with lower priority. 249 llvm::erase_if(Matches, 250 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 251 } 252 253 /// When an implicitly-declared special member has to invoke more than one 254 /// base/field special member, conflicts may occur in the targets of these 255 /// members. For example, if one base's member __host__ and another's is 256 /// __device__, it's a conflict. 257 /// This function figures out if the given targets \param Target1 and 258 /// \param Target2 conflict, and if they do not it fills in 259 /// \param ResolvedTarget with a target that resolves for both calls. 260 /// \return true if there's a conflict, false otherwise. 261 static bool 262 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, 263 Sema::CUDAFunctionTarget Target2, 264 Sema::CUDAFunctionTarget *ResolvedTarget) { 265 // Only free functions and static member functions may be global. 266 assert(Target1 != Sema::CFT_Global); 267 assert(Target2 != Sema::CFT_Global); 268 269 if (Target1 == Sema::CFT_HostDevice) { 270 *ResolvedTarget = Target2; 271 } else if (Target2 == Sema::CFT_HostDevice) { 272 *ResolvedTarget = Target1; 273 } else if (Target1 != Target2) { 274 return true; 275 } else { 276 *ResolvedTarget = Target1; 277 } 278 279 return false; 280 } 281 282 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 283 CXXSpecialMember CSM, 284 CXXMethodDecl *MemberDecl, 285 bool ConstRHS, 286 bool Diagnose) { 287 // If the defaulted special member is defined lexically outside of its 288 // owning class, or the special member already has explicit device or host 289 // attributes, do not infer. 290 bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); 291 bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); 292 bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); 293 bool HasExplicitAttr = 294 (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || 295 (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); 296 if (!InClass || HasExplicitAttr) 297 return false; 298 299 llvm::Optional<CUDAFunctionTarget> InferredTarget; 300 301 // We're going to invoke special member lookup; mark that these special 302 // members are called from this one, and not from its caller. 303 ContextRAII MethodContext(*this, MemberDecl); 304 305 // Look for special members in base classes that should be invoked from here. 306 // Infer the target of this member base on the ones it should call. 307 // Skip direct and indirect virtual bases for abstract classes. 308 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 309 for (const auto &B : ClassDecl->bases()) { 310 if (!B.isVirtual()) { 311 Bases.push_back(&B); 312 } 313 } 314 315 if (!ClassDecl->isAbstract()) { 316 for (const auto &VB : ClassDecl->vbases()) { 317 Bases.push_back(&VB); 318 } 319 } 320 321 for (const auto *B : Bases) { 322 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 323 if (!BaseType) { 324 continue; 325 } 326 327 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 328 Sema::SpecialMemberOverloadResult SMOR = 329 LookupSpecialMember(BaseClassDecl, CSM, 330 /* ConstArg */ ConstRHS, 331 /* VolatileArg */ false, 332 /* RValueThis */ false, 333 /* ConstThis */ false, 334 /* VolatileThis */ false); 335 336 if (!SMOR.getMethod()) 337 continue; 338 339 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); 340 if (!InferredTarget.hasValue()) { 341 InferredTarget = BaseMethodTarget; 342 } else { 343 bool ResolutionError = resolveCalleeCUDATargetConflict( 344 InferredTarget.getValue(), BaseMethodTarget, 345 InferredTarget.getPointer()); 346 if (ResolutionError) { 347 if (Diagnose) { 348 Diag(ClassDecl->getLocation(), 349 diag::note_implicit_member_target_infer_collision) 350 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; 351 } 352 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 353 return true; 354 } 355 } 356 } 357 358 // Same as for bases, but now for special members of fields. 359 for (const auto *F : ClassDecl->fields()) { 360 if (F->isInvalidDecl()) { 361 continue; 362 } 363 364 const RecordType *FieldType = 365 Context.getBaseElementType(F->getType())->getAs<RecordType>(); 366 if (!FieldType) { 367 continue; 368 } 369 370 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 371 Sema::SpecialMemberOverloadResult SMOR = 372 LookupSpecialMember(FieldRecDecl, CSM, 373 /* ConstArg */ ConstRHS && !F->isMutable(), 374 /* VolatileArg */ false, 375 /* RValueThis */ false, 376 /* ConstThis */ false, 377 /* VolatileThis */ false); 378 379 if (!SMOR.getMethod()) 380 continue; 381 382 CUDAFunctionTarget FieldMethodTarget = 383 IdentifyCUDATarget(SMOR.getMethod()); 384 if (!InferredTarget.hasValue()) { 385 InferredTarget = FieldMethodTarget; 386 } else { 387 bool ResolutionError = resolveCalleeCUDATargetConflict( 388 InferredTarget.getValue(), FieldMethodTarget, 389 InferredTarget.getPointer()); 390 if (ResolutionError) { 391 if (Diagnose) { 392 Diag(ClassDecl->getLocation(), 393 diag::note_implicit_member_target_infer_collision) 394 << (unsigned)CSM << InferredTarget.getValue() 395 << FieldMethodTarget; 396 } 397 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 398 return true; 399 } 400 } 401 } 402 403 404 // If no target was inferred, mark this member as __host__ __device__; 405 // it's the least restrictive option that can be invoked from any target. 406 bool NeedsH = true, NeedsD = true; 407 if (InferredTarget.hasValue()) { 408 if (InferredTarget.getValue() == CFT_Device) 409 NeedsH = false; 410 else if (InferredTarget.getValue() == CFT_Host) 411 NeedsD = false; 412 } 413 414 // We either setting attributes first time, or the inferred ones must match 415 // previously set ones. 416 if (NeedsD && !HasD) 417 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 418 if (NeedsH && !HasH) 419 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 420 421 return false; 422 } 423 424 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 425 if (!CD->isDefined() && CD->isTemplateInstantiation()) 426 InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 427 428 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 429 // empty at a point in the translation unit, if it is either a 430 // trivial constructor 431 if (CD->isTrivial()) 432 return true; 433 434 // ... or it satisfies all of the following conditions: 435 // The constructor function has been defined. 436 // The constructor function has no parameters, 437 // and the function body is an empty compound statement. 438 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 439 return false; 440 441 // Its class has no virtual functions and no virtual base classes. 442 if (CD->getParent()->isDynamicClass()) 443 return false; 444 445 // Union ctor does not call ctors of its data members. 446 if (CD->getParent()->isUnion()) 447 return true; 448 449 // The only form of initializer allowed is an empty constructor. 450 // This will recursively check all base classes and member initializers 451 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 452 if (const CXXConstructExpr *CE = 453 dyn_cast<CXXConstructExpr>(CI->getInit())) 454 return isEmptyCudaConstructor(Loc, CE->getConstructor()); 455 return false; 456 })) 457 return false; 458 459 return true; 460 } 461 462 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 463 // No destructor -> no problem. 464 if (!DD) 465 return true; 466 467 if (!DD->isDefined() && DD->isTemplateInstantiation()) 468 InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 469 470 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 471 // empty at a point in the translation unit, if it is either a 472 // trivial constructor 473 if (DD->isTrivial()) 474 return true; 475 476 // ... or it satisfies all of the following conditions: 477 // The destructor function has been defined. 478 // and the function body is an empty compound statement. 479 if (!DD->hasTrivialBody()) 480 return false; 481 482 const CXXRecordDecl *ClassDecl = DD->getParent(); 483 484 // Its class has no virtual functions and no virtual base classes. 485 if (ClassDecl->isDynamicClass()) 486 return false; 487 488 // Union does not have base class and union dtor does not call dtors of its 489 // data members. 490 if (DD->getParent()->isUnion()) 491 return true; 492 493 // Only empty destructors are allowed. This will recursively check 494 // destructors for all base classes... 495 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 496 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 497 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 498 return true; 499 })) 500 return false; 501 502 // ... and member fields. 503 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 504 if (CXXRecordDecl *RD = Field->getType() 505 ->getBaseElementTypeUnsafe() 506 ->getAsCXXRecordDecl()) 507 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 508 return true; 509 })) 510 return false; 511 512 return true; 513 } 514 515 void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { 516 if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage()) 517 return; 518 const Expr *Init = VD->getInit(); 519 if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() || 520 VD->hasAttr<CUDASharedAttr>()) { 521 if (LangOpts.GPUAllowDeviceInit) 522 return; 523 bool AllowedInit = false; 524 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) 525 AllowedInit = 526 isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); 527 // We'll allow constant initializers even if it's a non-empty 528 // constructor according to CUDA rules. This deviates from NVCC, 529 // but allows us to handle things like constexpr constructors. 530 if (!AllowedInit && 531 (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>())) { 532 auto *Init = VD->getInit(); 533 // isConstantInitializer cannot be called with dependent value, therefore 534 // we skip checking dependent value here. This is OK since 535 // checkAllowedCUDAInitializer is called again when the template is 536 // instantiated. 537 AllowedInit = 538 VD->getType()->isDependentType() || Init->isValueDependent() || 539 Init->isConstantInitializer(Context, 540 VD->getType()->isReferenceType()); 541 } 542 543 // Also make sure that destructor, if there is one, is empty. 544 if (AllowedInit) 545 if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) 546 AllowedInit = 547 isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); 548 549 if (!AllowedInit) { 550 Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>() 551 ? diag::err_shared_var_init 552 : diag::err_dynamic_var_init) 553 << Init->getSourceRange(); 554 VD->setInvalidDecl(); 555 } 556 } else { 557 // This is a host-side global variable. Check that the initializer is 558 // callable from the host side. 559 const FunctionDecl *InitFn = nullptr; 560 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { 561 InitFn = CE->getConstructor(); 562 } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { 563 InitFn = CE->getDirectCallee(); 564 } 565 if (InitFn) { 566 CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); 567 if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { 568 Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) 569 << InitFnTarget << InitFn; 570 Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; 571 VD->setInvalidDecl(); 572 } 573 } 574 } 575 } 576 577 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 578 // treated as implicitly __host__ __device__, unless: 579 // * it is a variadic function (device-side variadic functions are not 580 // allowed), or 581 // * a __device__ function with this signature was already declared, in which 582 // case in which case we output an error, unless the __device__ decl is in a 583 // system header, in which case we leave the constexpr function unattributed. 584 // 585 // In addition, all function decls are treated as __host__ __device__ when 586 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a 587 // #pragma clang force_cuda_host_device_begin/end 588 // pair). 589 void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, 590 const LookupResult &Previous) { 591 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 592 593 if (ForceCUDAHostDeviceDepth > 0) { 594 if (!NewD->hasAttr<CUDAHostAttr>()) 595 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 596 if (!NewD->hasAttr<CUDADeviceAttr>()) 597 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 598 return; 599 } 600 601 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 602 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 603 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 604 return; 605 606 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 607 // attributes? 608 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 609 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 610 D = Using->getTargetDecl(); 611 FunctionDecl *OldD = D->getAsFunction(); 612 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 613 !OldD->hasAttr<CUDAHostAttr>() && 614 !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, 615 /* ConsiderCudaAttrs = */ false); 616 }; 617 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 618 if (It != Previous.end()) { 619 // We found a __device__ function with the same name and signature as NewD 620 // (ignoring CUDA attrs). This is an error unless that function is defined 621 // in a system header, in which case we simply return without making NewD 622 // host+device. 623 NamedDecl *Match = *It; 624 if (!getSourceManager().isInSystemHeader(Match->getLocation())) { 625 Diag(NewD->getLocation(), 626 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 627 << NewD; 628 Diag(Match->getLocation(), 629 diag::note_cuda_conflicting_device_function_declared_here); 630 } 631 return; 632 } 633 634 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 635 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 636 } 637 638 void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { 639 if (getLangOpts().CUDAIsDevice && VD->isConstexpr() && 640 (VD->isFileVarDecl() || VD->isStaticDataMember())) { 641 VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); 642 } 643 } 644 645 Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, 646 unsigned DiagID) { 647 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 648 SemaDiagnosticBuilder::Kind DiagKind = [&] { 649 if (!isa<FunctionDecl>(CurContext)) 650 return SemaDiagnosticBuilder::K_Nop; 651 switch (CurrentCUDATarget()) { 652 case CFT_Global: 653 case CFT_Device: 654 return SemaDiagnosticBuilder::K_Immediate; 655 case CFT_HostDevice: 656 // An HD function counts as host code if we're compiling for host, and 657 // device code if we're compiling for device. Defer any errors in device 658 // mode until the function is known-emitted. 659 if (!getLangOpts().CUDAIsDevice) 660 return SemaDiagnosticBuilder::K_Nop; 661 if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) 662 return SemaDiagnosticBuilder::K_Immediate; 663 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 664 FunctionEmissionStatus::Emitted) 665 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 666 : SemaDiagnosticBuilder::K_Deferred; 667 default: 668 return SemaDiagnosticBuilder::K_Nop; 669 } 670 }(); 671 return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, 672 dyn_cast<FunctionDecl>(CurContext), *this); 673 } 674 675 Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, 676 unsigned DiagID) { 677 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 678 SemaDiagnosticBuilder::Kind DiagKind = [&] { 679 if (!isa<FunctionDecl>(CurContext)) 680 return SemaDiagnosticBuilder::K_Nop; 681 switch (CurrentCUDATarget()) { 682 case CFT_Host: 683 return SemaDiagnosticBuilder::K_Immediate; 684 case CFT_HostDevice: 685 // An HD function counts as host code if we're compiling for host, and 686 // device code if we're compiling for device. Defer any errors in device 687 // mode until the function is known-emitted. 688 if (getLangOpts().CUDAIsDevice) 689 return SemaDiagnosticBuilder::K_Nop; 690 if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) 691 return SemaDiagnosticBuilder::K_Immediate; 692 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 693 FunctionEmissionStatus::Emitted) 694 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 695 : SemaDiagnosticBuilder::K_Deferred; 696 default: 697 return SemaDiagnosticBuilder::K_Nop; 698 } 699 }(); 700 return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, 701 dyn_cast<FunctionDecl>(CurContext), *this); 702 } 703 704 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { 705 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 706 assert(Callee && "Callee may not be null."); 707 708 auto &ExprEvalCtx = ExprEvalContexts.back(); 709 if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) 710 return true; 711 712 // FIXME: Is bailing out early correct here? Should we instead assume that 713 // the caller is a global initializer? 714 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 715 if (!Caller) 716 return true; 717 718 // If the caller is known-emitted, mark the callee as known-emitted. 719 // Otherwise, mark the call in our call graph so we can traverse it later. 720 bool CallerKnownEmitted = 721 getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; 722 SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, 723 CallerKnownEmitted] { 724 switch (IdentifyCUDAPreference(Caller, Callee)) { 725 case CFP_Never: 726 case CFP_WrongSide: 727 assert(Caller && "Never/wrongSide calls require a non-null caller"); 728 // If we know the caller will be emitted, we know this wrong-side call 729 // will be emitted, so it's an immediate error. Otherwise, defer the 730 // error until we know the caller is emitted. 731 return CallerKnownEmitted 732 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 733 : SemaDiagnosticBuilder::K_Deferred; 734 default: 735 return SemaDiagnosticBuilder::K_Nop; 736 } 737 }(); 738 739 if (DiagKind == SemaDiagnosticBuilder::K_Nop) 740 return true; 741 742 // Avoid emitting this error twice for the same location. Using a hashtable 743 // like this is unfortunate, but because we must continue parsing as normal 744 // after encountering a deferred error, it's otherwise very tricky for us to 745 // ensure that we only emit this deferred error once. 746 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 747 return true; 748 749 SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) 750 << IdentifyCUDATarget(Callee) << /*function*/ 0 << Callee 751 << IdentifyCUDATarget(Caller); 752 if (!Callee->getBuiltinID()) 753 SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), 754 diag::note_previous_decl, Caller, *this) 755 << Callee; 756 return DiagKind != SemaDiagnosticBuilder::K_Immediate && 757 DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; 758 } 759 760 // Check the wrong-sided reference capture of lambda for CUDA/HIP. 761 // A lambda function may capture a stack variable by reference when it is 762 // defined and uses the capture by reference when the lambda is called. When 763 // the capture and use happen on different sides, the capture is invalid and 764 // should be diagnosed. 765 void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, 766 const sema::Capture &Capture) { 767 // In host compilation we only need to check lambda functions emitted on host 768 // side. In such lambda functions, a reference capture is invalid only 769 // if the lambda structure is populated by a device function or kernel then 770 // is passed to and called by a host function. However that is impossible, 771 // since a device function or kernel can only call a device function, also a 772 // kernel cannot pass a lambda back to a host function since we cannot 773 // define a kernel argument type which can hold the lambda before the lambda 774 // itself is defined. 775 if (!LangOpts.CUDAIsDevice) 776 return; 777 778 // File-scope lambda can only do init captures for global variables, which 779 // results in passing by value for these global variables. 780 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 781 if (!Caller) 782 return; 783 784 // In device compilation, we only need to check lambda functions which are 785 // emitted on device side. For such lambdas, a reference capture is invalid 786 // only if the lambda structure is populated by a host function then passed 787 // to and called in a device function or kernel. 788 bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); 789 bool CallerIsHost = 790 !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); 791 bool ShouldCheck = CalleeIsDevice && CallerIsHost; 792 if (!ShouldCheck || !Capture.isReferenceCapture()) 793 return; 794 auto DiagKind = SemaDiagnosticBuilder::K_Deferred; 795 if (Capture.isVariableCapture()) { 796 SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), 797 diag::err_capture_bad_target, Callee, *this) 798 << Capture.getVariable(); 799 } else if (Capture.isThisCapture()) { 800 SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), 801 diag::err_capture_bad_target_this_ptr, Callee, *this); 802 } 803 return; 804 } 805 806 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { 807 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 808 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 809 return; 810 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 811 Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); 812 } 813 814 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, 815 const LookupResult &Previous) { 816 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 817 CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); 818 for (NamedDecl *OldND : Previous) { 819 FunctionDecl *OldFD = OldND->getAsFunction(); 820 if (!OldFD) 821 continue; 822 823 CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); 824 // Don't allow HD and global functions to overload other functions with the 825 // same signature. We allow overloading based on CUDA attributes so that 826 // functions can have different implementations on the host and device, but 827 // HD/global functions "exist" in some sense on both the host and device, so 828 // should have the same implementation on both sides. 829 if (NewTarget != OldTarget && 830 ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || 831 (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && 832 !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 833 /* ConsiderCudaAttrs = */ false)) { 834 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 835 << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 836 Diag(OldFD->getLocation(), diag::note_previous_declaration); 837 NewFD->setInvalidDecl(); 838 break; 839 } 840 } 841 } 842 843 template <typename AttrTy> 844 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 845 const FunctionDecl &TemplateFD) { 846 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 847 AttrTy *Clone = Attribute->clone(S.Context); 848 Clone->setInherited(true); 849 FD->addAttr(Clone); 850 } 851 } 852 853 void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, 854 const FunctionTemplateDecl &TD) { 855 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 856 copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); 857 copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); 858 copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); 859 } 860 861 std::string Sema::getCudaConfigureFuncName() const { 862 if (getLangOpts().HIP) 863 return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" 864 : "hipConfigureCall"; 865 866 // New CUDA kernel launch sequence. 867 if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), 868 CudaFeature::CUDA_USES_NEW_LAUNCH)) 869 return "__cudaPushCallConfiguration"; 870 871 // Legacy CUDA kernel configuration call 872 return "cudaConfigureCall"; 873 } 874