1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file implements semantic analysis for CUDA constructs. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/AST/ASTContext.h" 15 #include "clang/AST/Decl.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Lex/Preprocessor.h" 18 #include "clang/Sema/Lookup.h" 19 #include "clang/Sema/Sema.h" 20 #include "clang/Sema/SemaDiagnostic.h" 21 #include "clang/Sema/SemaInternal.h" 22 #include "clang/Sema/Template.h" 23 #include "llvm/ADT/Optional.h" 24 #include "llvm/ADT/SmallVector.h" 25 using namespace clang; 26 27 void Sema::PushForceCUDAHostDevice() { 28 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 29 ForceCUDAHostDeviceDepth++; 30 } 31 32 bool Sema::PopForceCUDAHostDevice() { 33 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 34 if (ForceCUDAHostDeviceDepth == 0) 35 return false; 36 ForceCUDAHostDeviceDepth--; 37 return true; 38 } 39 40 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, 41 MultiExprArg ExecConfig, 42 SourceLocation GGGLoc) { 43 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); 44 if (!ConfigDecl) 45 return ExprError( 46 Diag(LLLLoc, diag::err_undeclared_var_use) 47 << (getLangOpts().HIP ? "hipConfigureCall" : "cudaConfigureCall")); 48 QualType ConfigQTy = ConfigDecl->getType(); 49 50 DeclRefExpr *ConfigDR = new (Context) 51 DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 52 MarkFunctionReferenced(LLLLoc, ConfigDecl); 53 54 return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 55 /*IsExecConfig=*/true); 56 } 57 58 Sema::CUDAFunctionTarget 59 Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { 60 bool HasHostAttr = false; 61 bool HasDeviceAttr = false; 62 bool HasGlobalAttr = false; 63 bool HasInvalidTargetAttr = false; 64 for (const ParsedAttr &AL : Attrs) { 65 switch (AL.getKind()) { 66 case ParsedAttr::AT_CUDAGlobal: 67 HasGlobalAttr = true; 68 break; 69 case ParsedAttr::AT_CUDAHost: 70 HasHostAttr = true; 71 break; 72 case ParsedAttr::AT_CUDADevice: 73 HasDeviceAttr = true; 74 break; 75 case ParsedAttr::AT_CUDAInvalidTarget: 76 HasInvalidTargetAttr = true; 77 break; 78 default: 79 break; 80 } 81 } 82 83 if (HasInvalidTargetAttr) 84 return CFT_InvalidTarget; 85 86 if (HasGlobalAttr) 87 return CFT_Global; 88 89 if (HasHostAttr && HasDeviceAttr) 90 return CFT_HostDevice; 91 92 if (HasDeviceAttr) 93 return CFT_Device; 94 95 return CFT_Host; 96 } 97 98 template <typename A> 99 static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { 100 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 101 return isa<A>(Attribute) && 102 !(IgnoreImplicitAttr && Attribute->isImplicit()); 103 }); 104 } 105 106 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function 107 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, 108 bool IgnoreImplicitHDAttr) { 109 // Code that lives outside a function is run on the host. 110 if (D == nullptr) 111 return CFT_Host; 112 113 if (D->hasAttr<CUDAInvalidTargetAttr>()) 114 return CFT_InvalidTarget; 115 116 if (D->hasAttr<CUDAGlobalAttr>()) 117 return CFT_Global; 118 119 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 120 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 121 return CFT_HostDevice; 122 return CFT_Device; 123 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 124 return CFT_Host; 125 } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { 126 // Some implicit declarations (like intrinsic functions) are not marked. 127 // Set the most lenient target on them for maximal flexibility. 128 return CFT_HostDevice; 129 } 130 131 return CFT_Host; 132 } 133 134 // * CUDA Call preference table 135 // 136 // F - from, 137 // T - to 138 // Ph - preference in host mode 139 // Pd - preference in device mode 140 // H - handled in (x) 141 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 142 // 143 // | F | T | Ph | Pd | H | 144 // |----+----+-----+-----+-----+ 145 // | d | d | N | N | (c) | 146 // | d | g | -- | -- | (a) | 147 // | d | h | -- | -- | (e) | 148 // | d | hd | HD | HD | (b) | 149 // | g | d | N | N | (c) | 150 // | g | g | -- | -- | (a) | 151 // | g | h | -- | -- | (e) | 152 // | g | hd | HD | HD | (b) | 153 // | h | d | -- | -- | (e) | 154 // | h | g | N | N | (c) | 155 // | h | h | N | N | (c) | 156 // | h | hd | HD | HD | (b) | 157 // | hd | d | WS | SS | (d) | 158 // | hd | g | SS | -- |(d/a)| 159 // | hd | h | SS | WS | (d) | 160 // | hd | hd | HD | HD | (b) | 161 162 Sema::CUDAFunctionPreference 163 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, 164 const FunctionDecl *Callee) { 165 assert(Callee && "Callee must be valid."); 166 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 167 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); 168 169 // If one of the targets is invalid, the check always fails, no matter what 170 // the other target is. 171 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) 172 return CFP_Never; 173 174 // (a) Can't call global from some contexts until we support CUDA's 175 // dynamic parallelism. 176 if (CalleeTarget == CFT_Global && 177 (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) 178 return CFP_Never; 179 180 // (b) Calling HostDevice is OK for everyone. 181 if (CalleeTarget == CFT_HostDevice) 182 return CFP_HostDevice; 183 184 // (c) Best case scenarios 185 if (CalleeTarget == CallerTarget || 186 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || 187 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) 188 return CFP_Native; 189 190 // (d) HostDevice behavior depends on compilation mode. 191 if (CallerTarget == CFT_HostDevice) { 192 // It's OK to call a compilation-mode matching function from an HD one. 193 if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || 194 (!getLangOpts().CUDAIsDevice && 195 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) 196 return CFP_SameSide; 197 198 // Calls from HD to non-mode-matching functions (i.e., to host functions 199 // when compiling in device mode or to device functions when compiling in 200 // host mode) are allowed at the sema level, but eventually rejected if 201 // they're ever codegened. TODO: Reject said calls earlier. 202 return CFP_WrongSide; 203 } 204 205 // (e) Calling across device/host boundary is not something you should do. 206 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || 207 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || 208 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) 209 return CFP_Never; 210 211 llvm_unreachable("All cases should've been handled by now."); 212 } 213 214 void Sema::EraseUnwantedCUDAMatches( 215 const FunctionDecl *Caller, 216 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 217 if (Matches.size() <= 1) 218 return; 219 220 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 221 222 // Gets the CUDA function preference for a call from Caller to Match. 223 auto GetCFP = [&](const Pair &Match) { 224 return IdentifyCUDAPreference(Caller, Match.second); 225 }; 226 227 // Find the best call preference among the functions in Matches. 228 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 229 Matches.begin(), Matches.end(), 230 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 231 232 // Erase all functions with lower priority. 233 llvm::erase_if(Matches, 234 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 235 } 236 237 /// When an implicitly-declared special member has to invoke more than one 238 /// base/field special member, conflicts may occur in the targets of these 239 /// members. For example, if one base's member __host__ and another's is 240 /// __device__, it's a conflict. 241 /// This function figures out if the given targets \param Target1 and 242 /// \param Target2 conflict, and if they do not it fills in 243 /// \param ResolvedTarget with a target that resolves for both calls. 244 /// \return true if there's a conflict, false otherwise. 245 static bool 246 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, 247 Sema::CUDAFunctionTarget Target2, 248 Sema::CUDAFunctionTarget *ResolvedTarget) { 249 // Only free functions and static member functions may be global. 250 assert(Target1 != Sema::CFT_Global); 251 assert(Target2 != Sema::CFT_Global); 252 253 if (Target1 == Sema::CFT_HostDevice) { 254 *ResolvedTarget = Target2; 255 } else if (Target2 == Sema::CFT_HostDevice) { 256 *ResolvedTarget = Target1; 257 } else if (Target1 != Target2) { 258 return true; 259 } else { 260 *ResolvedTarget = Target1; 261 } 262 263 return false; 264 } 265 266 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 267 CXXSpecialMember CSM, 268 CXXMethodDecl *MemberDecl, 269 bool ConstRHS, 270 bool Diagnose) { 271 llvm::Optional<CUDAFunctionTarget> InferredTarget; 272 273 // We're going to invoke special member lookup; mark that these special 274 // members are called from this one, and not from its caller. 275 ContextRAII MethodContext(*this, MemberDecl); 276 277 // Look for special members in base classes that should be invoked from here. 278 // Infer the target of this member base on the ones it should call. 279 // Skip direct and indirect virtual bases for abstract classes. 280 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 281 for (const auto &B : ClassDecl->bases()) { 282 if (!B.isVirtual()) { 283 Bases.push_back(&B); 284 } 285 } 286 287 if (!ClassDecl->isAbstract()) { 288 for (const auto &VB : ClassDecl->vbases()) { 289 Bases.push_back(&VB); 290 } 291 } 292 293 for (const auto *B : Bases) { 294 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 295 if (!BaseType) { 296 continue; 297 } 298 299 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 300 Sema::SpecialMemberOverloadResult SMOR = 301 LookupSpecialMember(BaseClassDecl, CSM, 302 /* ConstArg */ ConstRHS, 303 /* VolatileArg */ false, 304 /* RValueThis */ false, 305 /* ConstThis */ false, 306 /* VolatileThis */ false); 307 308 if (!SMOR.getMethod()) 309 continue; 310 311 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); 312 if (!InferredTarget.hasValue()) { 313 InferredTarget = BaseMethodTarget; 314 } else { 315 bool ResolutionError = resolveCalleeCUDATargetConflict( 316 InferredTarget.getValue(), BaseMethodTarget, 317 InferredTarget.getPointer()); 318 if (ResolutionError) { 319 if (Diagnose) { 320 Diag(ClassDecl->getLocation(), 321 diag::note_implicit_member_target_infer_collision) 322 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; 323 } 324 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 325 return true; 326 } 327 } 328 } 329 330 // Same as for bases, but now for special members of fields. 331 for (const auto *F : ClassDecl->fields()) { 332 if (F->isInvalidDecl()) { 333 continue; 334 } 335 336 const RecordType *FieldType = 337 Context.getBaseElementType(F->getType())->getAs<RecordType>(); 338 if (!FieldType) { 339 continue; 340 } 341 342 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 343 Sema::SpecialMemberOverloadResult SMOR = 344 LookupSpecialMember(FieldRecDecl, CSM, 345 /* ConstArg */ ConstRHS && !F->isMutable(), 346 /* VolatileArg */ false, 347 /* RValueThis */ false, 348 /* ConstThis */ false, 349 /* VolatileThis */ false); 350 351 if (!SMOR.getMethod()) 352 continue; 353 354 CUDAFunctionTarget FieldMethodTarget = 355 IdentifyCUDATarget(SMOR.getMethod()); 356 if (!InferredTarget.hasValue()) { 357 InferredTarget = FieldMethodTarget; 358 } else { 359 bool ResolutionError = resolveCalleeCUDATargetConflict( 360 InferredTarget.getValue(), FieldMethodTarget, 361 InferredTarget.getPointer()); 362 if (ResolutionError) { 363 if (Diagnose) { 364 Diag(ClassDecl->getLocation(), 365 diag::note_implicit_member_target_infer_collision) 366 << (unsigned)CSM << InferredTarget.getValue() 367 << FieldMethodTarget; 368 } 369 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 370 return true; 371 } 372 } 373 } 374 375 if (InferredTarget.hasValue()) { 376 if (InferredTarget.getValue() == CFT_Device) { 377 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 378 } else if (InferredTarget.getValue() == CFT_Host) { 379 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 380 } else { 381 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 382 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 383 } 384 } else { 385 // If no target was inferred, mark this member as __host__ __device__; 386 // it's the least restrictive option that can be invoked from any target. 387 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 388 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 389 } 390 391 return false; 392 } 393 394 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 395 if (!CD->isDefined() && CD->isTemplateInstantiation()) 396 InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 397 398 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 399 // empty at a point in the translation unit, if it is either a 400 // trivial constructor 401 if (CD->isTrivial()) 402 return true; 403 404 // ... or it satisfies all of the following conditions: 405 // The constructor function has been defined. 406 // The constructor function has no parameters, 407 // and the function body is an empty compound statement. 408 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 409 return false; 410 411 // Its class has no virtual functions and no virtual base classes. 412 if (CD->getParent()->isDynamicClass()) 413 return false; 414 415 // The only form of initializer allowed is an empty constructor. 416 // This will recursively check all base classes and member initializers 417 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 418 if (const CXXConstructExpr *CE = 419 dyn_cast<CXXConstructExpr>(CI->getInit())) 420 return isEmptyCudaConstructor(Loc, CE->getConstructor()); 421 return false; 422 })) 423 return false; 424 425 return true; 426 } 427 428 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 429 // No destructor -> no problem. 430 if (!DD) 431 return true; 432 433 if (!DD->isDefined() && DD->isTemplateInstantiation()) 434 InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 435 436 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 437 // empty at a point in the translation unit, if it is either a 438 // trivial constructor 439 if (DD->isTrivial()) 440 return true; 441 442 // ... or it satisfies all of the following conditions: 443 // The destructor function has been defined. 444 // and the function body is an empty compound statement. 445 if (!DD->hasTrivialBody()) 446 return false; 447 448 const CXXRecordDecl *ClassDecl = DD->getParent(); 449 450 // Its class has no virtual functions and no virtual base classes. 451 if (ClassDecl->isDynamicClass()) 452 return false; 453 454 // Only empty destructors are allowed. This will recursively check 455 // destructors for all base classes... 456 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 457 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 458 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 459 return true; 460 })) 461 return false; 462 463 // ... and member fields. 464 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 465 if (CXXRecordDecl *RD = Field->getType() 466 ->getBaseElementTypeUnsafe() 467 ->getAsCXXRecordDecl()) 468 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 469 return true; 470 })) 471 return false; 472 473 return true; 474 } 475 476 void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { 477 if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage()) 478 return; 479 const Expr *Init = VD->getInit(); 480 if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() || 481 VD->hasAttr<CUDASharedAttr>()) { 482 assert(!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>()); 483 bool AllowedInit = false; 484 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) 485 AllowedInit = 486 isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); 487 // We'll allow constant initializers even if it's a non-empty 488 // constructor according to CUDA rules. This deviates from NVCC, 489 // but allows us to handle things like constexpr constructors. 490 if (!AllowedInit && 491 (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>())) 492 AllowedInit = VD->getInit()->isConstantInitializer( 493 Context, VD->getType()->isReferenceType()); 494 495 // Also make sure that destructor, if there is one, is empty. 496 if (AllowedInit) 497 if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) 498 AllowedInit = 499 isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); 500 501 if (!AllowedInit) { 502 Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>() 503 ? diag::err_shared_var_init 504 : diag::err_dynamic_var_init) 505 << Init->getSourceRange(); 506 VD->setInvalidDecl(); 507 } 508 } else { 509 // This is a host-side global variable. Check that the initializer is 510 // callable from the host side. 511 const FunctionDecl *InitFn = nullptr; 512 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { 513 InitFn = CE->getConstructor(); 514 } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { 515 InitFn = CE->getDirectCallee(); 516 } 517 if (InitFn) { 518 CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); 519 if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { 520 Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) 521 << InitFnTarget << InitFn; 522 Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; 523 VD->setInvalidDecl(); 524 } 525 } 526 } 527 } 528 529 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 530 // treated as implicitly __host__ __device__, unless: 531 // * it is a variadic function (device-side variadic functions are not 532 // allowed), or 533 // * a __device__ function with this signature was already declared, in which 534 // case in which case we output an error, unless the __device__ decl is in a 535 // system header, in which case we leave the constexpr function unattributed. 536 // 537 // In addition, all function decls are treated as __host__ __device__ when 538 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a 539 // #pragma clang force_cuda_host_device_begin/end 540 // pair). 541 void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, 542 const LookupResult &Previous) { 543 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 544 545 if (ForceCUDAHostDeviceDepth > 0) { 546 if (!NewD->hasAttr<CUDAHostAttr>()) 547 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 548 if (!NewD->hasAttr<CUDADeviceAttr>()) 549 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 550 return; 551 } 552 553 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 554 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 555 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 556 return; 557 558 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 559 // attributes? 560 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 561 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 562 D = Using->getTargetDecl(); 563 FunctionDecl *OldD = D->getAsFunction(); 564 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 565 !OldD->hasAttr<CUDAHostAttr>() && 566 !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, 567 /* ConsiderCudaAttrs = */ false); 568 }; 569 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 570 if (It != Previous.end()) { 571 // We found a __device__ function with the same name and signature as NewD 572 // (ignoring CUDA attrs). This is an error unless that function is defined 573 // in a system header, in which case we simply return without making NewD 574 // host+device. 575 NamedDecl *Match = *It; 576 if (!getSourceManager().isInSystemHeader(Match->getLocation())) { 577 Diag(NewD->getLocation(), 578 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 579 << NewD; 580 Diag(Match->getLocation(), 581 diag::note_cuda_conflicting_device_function_declared_here); 582 } 583 return; 584 } 585 586 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 587 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 588 } 589 590 // In CUDA, there are some constructs which may appear in semantically-valid 591 // code, but trigger errors if we ever generate code for the function in which 592 // they appear. Essentially every construct you're not allowed to use on the 593 // device falls into this category, because you are allowed to use these 594 // constructs in a __host__ __device__ function, but only if that function is 595 // never codegen'ed on the device. 596 // 597 // To handle semantic checking for these constructs, we keep track of the set of 598 // functions we know will be emitted, either because we could tell a priori that 599 // they would be emitted, or because they were transitively called by a 600 // known-emitted function. 601 // 602 // We also keep a partial call graph of which not-known-emitted functions call 603 // which other not-known-emitted functions. 604 // 605 // When we see something which is illegal if the current function is emitted 606 // (usually by way of CUDADiagIfDeviceCode, CUDADiagIfHostCode, or 607 // CheckCUDACall), we first check if the current function is known-emitted. If 608 // so, we immediately output the diagnostic. 609 // 610 // Otherwise, we "defer" the diagnostic. It sits in Sema::CUDADeferredDiags 611 // until we discover that the function is known-emitted, at which point we take 612 // it out of this map and emit the diagnostic. 613 614 Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc, 615 unsigned DiagID, FunctionDecl *Fn, 616 Sema &S) 617 : S(S), Loc(Loc), DiagID(DiagID), Fn(Fn), 618 ShowCallStack(K == K_ImmediateWithCallStack || K == K_Deferred) { 619 switch (K) { 620 case K_Nop: 621 break; 622 case K_Immediate: 623 case K_ImmediateWithCallStack: 624 ImmediateDiag.emplace(S.Diag(Loc, DiagID)); 625 break; 626 case K_Deferred: 627 assert(Fn && "Must have a function to attach the deferred diag to."); 628 PartialDiag.emplace(S.PDiag(DiagID)); 629 break; 630 } 631 } 632 633 // Print notes showing how we can reach FD starting from an a priori 634 // known-callable function. 635 static void EmitCallStackNotes(Sema &S, FunctionDecl *FD) { 636 auto FnIt = S.CUDAKnownEmittedFns.find(FD); 637 while (FnIt != S.CUDAKnownEmittedFns.end()) { 638 DiagnosticBuilder Builder( 639 S.Diags.Report(FnIt->second.Loc, diag::note_called_by)); 640 Builder << FnIt->second.FD; 641 Builder.setForceEmit(); 642 643 FnIt = S.CUDAKnownEmittedFns.find(FnIt->second.FD); 644 } 645 } 646 647 Sema::CUDADiagBuilder::~CUDADiagBuilder() { 648 if (ImmediateDiag) { 649 // Emit our diagnostic and, if it was a warning or error, output a callstack 650 // if Fn isn't a priori known-emitted. 651 bool IsWarningOrError = S.getDiagnostics().getDiagnosticLevel( 652 DiagID, Loc) >= DiagnosticsEngine::Warning; 653 ImmediateDiag.reset(); // Emit the immediate diag. 654 if (IsWarningOrError && ShowCallStack) 655 EmitCallStackNotes(S, Fn); 656 } else if (PartialDiag) { 657 assert(ShowCallStack && "Must always show call stack for deferred diags."); 658 S.CUDADeferredDiags[Fn].push_back({Loc, std::move(*PartialDiag)}); 659 } 660 } 661 662 // Do we know that we will eventually codegen the given function? 663 static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) { 664 // Templates are emitted when they're instantiated. 665 if (FD->isDependentContext()) 666 return false; 667 668 // When compiling for device, host functions are never emitted. Similarly, 669 // when compiling for host, device and global functions are never emitted. 670 // (Technically, we do emit a host-side stub for global functions, but this 671 // doesn't count for our purposes here.) 672 Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD); 673 if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host) 674 return false; 675 if (!S.getLangOpts().CUDAIsDevice && 676 (T == Sema::CFT_Device || T == Sema::CFT_Global)) 677 return false; 678 679 // Check whether this function is externally visible -- if so, it's 680 // known-emitted. 681 // 682 // We have to check the GVA linkage of the function's *definition* -- if we 683 // only have a declaration, we don't know whether or not the function will be 684 // emitted, because (say) the definition could include "inline". 685 FunctionDecl *Def = FD->getDefinition(); 686 687 if (Def && 688 !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def))) 689 return true; 690 691 // Otherwise, the function is known-emitted if it's in our set of 692 // known-emitted functions. 693 return S.CUDAKnownEmittedFns.count(FD) > 0; 694 } 695 696 Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, 697 unsigned DiagID) { 698 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 699 CUDADiagBuilder::Kind DiagKind = [&] { 700 switch (CurrentCUDATarget()) { 701 case CFT_Global: 702 case CFT_Device: 703 return CUDADiagBuilder::K_Immediate; 704 case CFT_HostDevice: 705 // An HD function counts as host code if we're compiling for host, and 706 // device code if we're compiling for device. Defer any errors in device 707 // mode until the function is known-emitted. 708 if (getLangOpts().CUDAIsDevice) { 709 return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) 710 ? CUDADiagBuilder::K_ImmediateWithCallStack 711 : CUDADiagBuilder::K_Deferred; 712 } 713 return CUDADiagBuilder::K_Nop; 714 715 default: 716 return CUDADiagBuilder::K_Nop; 717 } 718 }(); 719 return CUDADiagBuilder(DiagKind, Loc, DiagID, 720 dyn_cast<FunctionDecl>(CurContext), *this); 721 } 722 723 Sema::CUDADiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, 724 unsigned DiagID) { 725 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 726 CUDADiagBuilder::Kind DiagKind = [&] { 727 switch (CurrentCUDATarget()) { 728 case CFT_Host: 729 return CUDADiagBuilder::K_Immediate; 730 case CFT_HostDevice: 731 // An HD function counts as host code if we're compiling for host, and 732 // device code if we're compiling for device. Defer any errors in device 733 // mode until the function is known-emitted. 734 if (getLangOpts().CUDAIsDevice) 735 return CUDADiagBuilder::K_Nop; 736 737 return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) 738 ? CUDADiagBuilder::K_ImmediateWithCallStack 739 : CUDADiagBuilder::K_Deferred; 740 default: 741 return CUDADiagBuilder::K_Nop; 742 } 743 }(); 744 return CUDADiagBuilder(DiagKind, Loc, DiagID, 745 dyn_cast<FunctionDecl>(CurContext), *this); 746 } 747 748 // Emit any deferred diagnostics for FD and erase them from the map in which 749 // they're stored. 750 static void EmitDeferredDiags(Sema &S, FunctionDecl *FD) { 751 auto It = S.CUDADeferredDiags.find(FD); 752 if (It == S.CUDADeferredDiags.end()) 753 return; 754 bool HasWarningOrError = false; 755 for (PartialDiagnosticAt &PDAt : It->second) { 756 const SourceLocation &Loc = PDAt.first; 757 const PartialDiagnostic &PD = PDAt.second; 758 HasWarningOrError |= S.getDiagnostics().getDiagnosticLevel( 759 PD.getDiagID(), Loc) >= DiagnosticsEngine::Warning; 760 DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID())); 761 Builder.setForceEmit(); 762 PD.Emit(Builder); 763 } 764 S.CUDADeferredDiags.erase(It); 765 766 // FIXME: Should this be called after every warning/error emitted in the loop 767 // above, instead of just once per function? That would be consistent with 768 // how we handle immediate errors, but it also seems like a bit much. 769 if (HasWarningOrError) 770 EmitCallStackNotes(S, FD); 771 } 772 773 // Indicate that this function (and thus everything it transtively calls) will 774 // be codegen'ed, and emit any deferred diagnostics on this function and its 775 // (transitive) callees. 776 static void MarkKnownEmitted(Sema &S, FunctionDecl *OrigCaller, 777 FunctionDecl *OrigCallee, SourceLocation OrigLoc) { 778 // Nothing to do if we already know that FD is emitted. 779 if (IsKnownEmitted(S, OrigCallee)) { 780 assert(!S.CUDACallGraph.count(OrigCallee)); 781 return; 782 } 783 784 // We've just discovered that OrigCallee is known-emitted. Walk our call 785 // graph to see what else we can now discover also must be emitted. 786 787 struct CallInfo { 788 FunctionDecl *Caller; 789 FunctionDecl *Callee; 790 SourceLocation Loc; 791 }; 792 llvm::SmallVector<CallInfo, 4> Worklist = {{OrigCaller, OrigCallee, OrigLoc}}; 793 llvm::SmallSet<CanonicalDeclPtr<FunctionDecl>, 4> Seen; 794 Seen.insert(OrigCallee); 795 while (!Worklist.empty()) { 796 CallInfo C = Worklist.pop_back_val(); 797 assert(!IsKnownEmitted(S, C.Callee) && 798 "Worklist should not contain known-emitted functions."); 799 S.CUDAKnownEmittedFns[C.Callee] = {C.Caller, C.Loc}; 800 EmitDeferredDiags(S, C.Callee); 801 802 // If this is a template instantiation, explore its callgraph as well: 803 // Non-dependent calls are part of the template's callgraph, while dependent 804 // calls are part of to the instantiation's call graph. 805 if (auto *Templ = C.Callee->getPrimaryTemplate()) { 806 FunctionDecl *TemplFD = Templ->getAsFunction(); 807 if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) { 808 Seen.insert(TemplFD); 809 Worklist.push_back( 810 {/* Caller = */ C.Caller, /* Callee = */ TemplFD, C.Loc}); 811 } 812 } 813 814 // Add all functions called by Callee to our worklist. 815 auto CGIt = S.CUDACallGraph.find(C.Callee); 816 if (CGIt == S.CUDACallGraph.end()) 817 continue; 818 819 for (std::pair<CanonicalDeclPtr<FunctionDecl>, SourceLocation> FDLoc : 820 CGIt->second) { 821 FunctionDecl *NewCallee = FDLoc.first; 822 SourceLocation CallLoc = FDLoc.second; 823 if (Seen.count(NewCallee) || IsKnownEmitted(S, NewCallee)) 824 continue; 825 Seen.insert(NewCallee); 826 Worklist.push_back( 827 {/* Caller = */ C.Callee, /* Callee = */ NewCallee, CallLoc}); 828 } 829 830 // C.Callee is now known-emitted, so we no longer need to maintain its list 831 // of callees in CUDACallGraph. 832 S.CUDACallGraph.erase(CGIt); 833 } 834 } 835 836 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { 837 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 838 assert(Callee && "Callee may not be null."); 839 // FIXME: Is bailing out early correct here? Should we instead assume that 840 // the caller is a global initializer? 841 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 842 if (!Caller) 843 return true; 844 845 // If the caller is known-emitted, mark the callee as known-emitted. 846 // Otherwise, mark the call in our call graph so we can traverse it later. 847 bool CallerKnownEmitted = IsKnownEmitted(*this, Caller); 848 if (CallerKnownEmitted) { 849 // Host-side references to a __global__ function refer to the stub, so the 850 // function itself is never emitted and therefore should not be marked. 851 if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) 852 MarkKnownEmitted(*this, Caller, Callee, Loc); 853 } else { 854 // If we have 855 // host fn calls kernel fn calls host+device, 856 // the HD function does not get instantiated on the host. We model this by 857 // omitting at the call to the kernel from the callgraph. This ensures 858 // that, when compiling for host, only HD functions actually called from the 859 // host get marked as known-emitted. 860 if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) 861 CUDACallGraph[Caller].insert({Callee, Loc}); 862 } 863 864 CUDADiagBuilder::Kind DiagKind = [&] { 865 switch (IdentifyCUDAPreference(Caller, Callee)) { 866 case CFP_Never: 867 return CUDADiagBuilder::K_Immediate; 868 case CFP_WrongSide: 869 assert(Caller && "WrongSide calls require a non-null caller"); 870 // If we know the caller will be emitted, we know this wrong-side call 871 // will be emitted, so it's an immediate error. Otherwise, defer the 872 // error until we know the caller is emitted. 873 return CallerKnownEmitted ? CUDADiagBuilder::K_ImmediateWithCallStack 874 : CUDADiagBuilder::K_Deferred; 875 default: 876 return CUDADiagBuilder::K_Nop; 877 } 878 }(); 879 880 if (DiagKind == CUDADiagBuilder::K_Nop) 881 return true; 882 883 // Avoid emitting this error twice for the same location. Using a hashtable 884 // like this is unfortunate, but because we must continue parsing as normal 885 // after encountering a deferred error, it's otherwise very tricky for us to 886 // ensure that we only emit this deferred error once. 887 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 888 return true; 889 890 CUDADiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) 891 << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); 892 CUDADiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, 893 Caller, *this) 894 << Callee; 895 return DiagKind != CUDADiagBuilder::K_Immediate && 896 DiagKind != CUDADiagBuilder::K_ImmediateWithCallStack; 897 } 898 899 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { 900 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 901 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 902 return; 903 FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext); 904 if (!CurFn) 905 return; 906 CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); 907 if (Target == CFT_Global || Target == CFT_Device) { 908 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 909 } else if (Target == CFT_HostDevice) { 910 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 911 Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); 912 } 913 } 914 915 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, 916 const LookupResult &Previous) { 917 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 918 CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); 919 for (NamedDecl *OldND : Previous) { 920 FunctionDecl *OldFD = OldND->getAsFunction(); 921 if (!OldFD) 922 continue; 923 924 CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); 925 // Don't allow HD and global functions to overload other functions with the 926 // same signature. We allow overloading based on CUDA attributes so that 927 // functions can have different implementations on the host and device, but 928 // HD/global functions "exist" in some sense on both the host and device, so 929 // should have the same implementation on both sides. 930 if (NewTarget != OldTarget && 931 ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || 932 (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && 933 !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 934 /* ConsiderCudaAttrs = */ false)) { 935 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 936 << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 937 Diag(OldFD->getLocation(), diag::note_previous_declaration); 938 NewFD->setInvalidDecl(); 939 break; 940 } 941 } 942 } 943 944 template <typename AttrTy> 945 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 946 const FunctionDecl &TemplateFD) { 947 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 948 AttrTy *Clone = Attribute->clone(S.Context); 949 Clone->setInherited(true); 950 FD->addAttr(Clone); 951 } 952 } 953 954 void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, 955 const FunctionTemplateDecl &TD) { 956 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 957 copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); 958 copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); 959 copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); 960 } 961