1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// \brief This file implements semantic analysis for CUDA constructs. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/AST/ASTContext.h" 15 #include "clang/AST/Decl.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Lex/Preprocessor.h" 18 #include "clang/Sema/Lookup.h" 19 #include "clang/Sema/Sema.h" 20 #include "clang/Sema/SemaDiagnostic.h" 21 #include "clang/Sema/SemaInternal.h" 22 #include "clang/Sema/Template.h" 23 #include "llvm/ADT/Optional.h" 24 #include "llvm/ADT/SmallVector.h" 25 using namespace clang; 26 27 void Sema::PushForceCUDAHostDevice() { 28 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 29 ForceCUDAHostDeviceDepth++; 30 } 31 32 bool Sema::PopForceCUDAHostDevice() { 33 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 34 if (ForceCUDAHostDeviceDepth == 0) 35 return false; 36 ForceCUDAHostDeviceDepth--; 37 return true; 38 } 39 40 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, 41 MultiExprArg ExecConfig, 42 SourceLocation GGGLoc) { 43 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); 44 if (!ConfigDecl) 45 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 46 << "cudaConfigureCall"); 47 QualType ConfigQTy = ConfigDecl->getType(); 48 49 DeclRefExpr *ConfigDR = new (Context) 50 DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 51 MarkFunctionReferenced(LLLLoc, ConfigDecl); 52 53 return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 54 /*IsExecConfig=*/true); 55 } 56 57 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const AttributeList *Attr) { 58 bool HasHostAttr = false; 59 bool HasDeviceAttr = false; 60 bool HasGlobalAttr = false; 61 bool HasInvalidTargetAttr = false; 62 while (Attr) { 63 switch(Attr->getKind()){ 64 case AttributeList::AT_CUDAGlobal: 65 HasGlobalAttr = true; 66 break; 67 case AttributeList::AT_CUDAHost: 68 HasHostAttr = true; 69 break; 70 case AttributeList::AT_CUDADevice: 71 HasDeviceAttr = true; 72 break; 73 case AttributeList::AT_CUDAInvalidTarget: 74 HasInvalidTargetAttr = true; 75 break; 76 default: 77 break; 78 } 79 Attr = Attr->getNext(); 80 } 81 if (HasInvalidTargetAttr) 82 return CFT_InvalidTarget; 83 84 if (HasGlobalAttr) 85 return CFT_Global; 86 87 if (HasHostAttr && HasDeviceAttr) 88 return CFT_HostDevice; 89 90 if (HasDeviceAttr) 91 return CFT_Device; 92 93 return CFT_Host; 94 } 95 96 template <typename A> 97 static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { 98 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 99 return isa<A>(Attribute) && 100 !(IgnoreImplicitAttr && Attribute->isImplicit()); 101 }); 102 } 103 104 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function 105 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, 106 bool IgnoreImplicitHDAttr) { 107 // Code that lives outside a function is run on the host. 108 if (D == nullptr) 109 return CFT_Host; 110 111 if (D->hasAttr<CUDAInvalidTargetAttr>()) 112 return CFT_InvalidTarget; 113 114 if (D->hasAttr<CUDAGlobalAttr>()) 115 return CFT_Global; 116 117 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 118 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 119 return CFT_HostDevice; 120 return CFT_Device; 121 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 122 return CFT_Host; 123 } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { 124 // Some implicit declarations (like intrinsic functions) are not marked. 125 // Set the most lenient target on them for maximal flexibility. 126 return CFT_HostDevice; 127 } 128 129 return CFT_Host; 130 } 131 132 // * CUDA Call preference table 133 // 134 // F - from, 135 // T - to 136 // Ph - preference in host mode 137 // Pd - preference in device mode 138 // H - handled in (x) 139 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 140 // 141 // | F | T | Ph | Pd | H | 142 // |----+----+-----+-----+-----+ 143 // | d | d | N | N | (c) | 144 // | d | g | -- | -- | (a) | 145 // | d | h | -- | -- | (e) | 146 // | d | hd | HD | HD | (b) | 147 // | g | d | N | N | (c) | 148 // | g | g | -- | -- | (a) | 149 // | g | h | -- | -- | (e) | 150 // | g | hd | HD | HD | (b) | 151 // | h | d | -- | -- | (e) | 152 // | h | g | N | N | (c) | 153 // | h | h | N | N | (c) | 154 // | h | hd | HD | HD | (b) | 155 // | hd | d | WS | SS | (d) | 156 // | hd | g | SS | -- |(d/a)| 157 // | hd | h | SS | WS | (d) | 158 // | hd | hd | HD | HD | (b) | 159 160 Sema::CUDAFunctionPreference 161 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, 162 const FunctionDecl *Callee) { 163 assert(Callee && "Callee must be valid."); 164 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 165 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); 166 167 // If one of the targets is invalid, the check always fails, no matter what 168 // the other target is. 169 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) 170 return CFP_Never; 171 172 // (a) Can't call global from some contexts until we support CUDA's 173 // dynamic parallelism. 174 if (CalleeTarget == CFT_Global && 175 (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) 176 return CFP_Never; 177 178 // (b) Calling HostDevice is OK for everyone. 179 if (CalleeTarget == CFT_HostDevice) 180 return CFP_HostDevice; 181 182 // (c) Best case scenarios 183 if (CalleeTarget == CallerTarget || 184 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || 185 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) 186 return CFP_Native; 187 188 // (d) HostDevice behavior depends on compilation mode. 189 if (CallerTarget == CFT_HostDevice) { 190 // It's OK to call a compilation-mode matching function from an HD one. 191 if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || 192 (!getLangOpts().CUDAIsDevice && 193 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) 194 return CFP_SameSide; 195 196 // Calls from HD to non-mode-matching functions (i.e., to host functions 197 // when compiling in device mode or to device functions when compiling in 198 // host mode) are allowed at the sema level, but eventually rejected if 199 // they're ever codegened. TODO: Reject said calls earlier. 200 return CFP_WrongSide; 201 } 202 203 // (e) Calling across device/host boundary is not something you should do. 204 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || 205 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || 206 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) 207 return CFP_Never; 208 209 llvm_unreachable("All cases should've been handled by now."); 210 } 211 212 void Sema::EraseUnwantedCUDAMatches( 213 const FunctionDecl *Caller, 214 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 215 if (Matches.size() <= 1) 216 return; 217 218 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 219 220 // Gets the CUDA function preference for a call from Caller to Match. 221 auto GetCFP = [&](const Pair &Match) { 222 return IdentifyCUDAPreference(Caller, Match.second); 223 }; 224 225 // Find the best call preference among the functions in Matches. 226 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 227 Matches.begin(), Matches.end(), 228 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 229 230 // Erase all functions with lower priority. 231 llvm::erase_if(Matches, 232 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 233 } 234 235 /// When an implicitly-declared special member has to invoke more than one 236 /// base/field special member, conflicts may occur in the targets of these 237 /// members. For example, if one base's member __host__ and another's is 238 /// __device__, it's a conflict. 239 /// This function figures out if the given targets \param Target1 and 240 /// \param Target2 conflict, and if they do not it fills in 241 /// \param ResolvedTarget with a target that resolves for both calls. 242 /// \return true if there's a conflict, false otherwise. 243 static bool 244 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, 245 Sema::CUDAFunctionTarget Target2, 246 Sema::CUDAFunctionTarget *ResolvedTarget) { 247 // Only free functions and static member functions may be global. 248 assert(Target1 != Sema::CFT_Global); 249 assert(Target2 != Sema::CFT_Global); 250 251 if (Target1 == Sema::CFT_HostDevice) { 252 *ResolvedTarget = Target2; 253 } else if (Target2 == Sema::CFT_HostDevice) { 254 *ResolvedTarget = Target1; 255 } else if (Target1 != Target2) { 256 return true; 257 } else { 258 *ResolvedTarget = Target1; 259 } 260 261 return false; 262 } 263 264 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 265 CXXSpecialMember CSM, 266 CXXMethodDecl *MemberDecl, 267 bool ConstRHS, 268 bool Diagnose) { 269 llvm::Optional<CUDAFunctionTarget> InferredTarget; 270 271 // We're going to invoke special member lookup; mark that these special 272 // members are called from this one, and not from its caller. 273 ContextRAII MethodContext(*this, MemberDecl); 274 275 // Look for special members in base classes that should be invoked from here. 276 // Infer the target of this member base on the ones it should call. 277 // Skip direct and indirect virtual bases for abstract classes. 278 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 279 for (const auto &B : ClassDecl->bases()) { 280 if (!B.isVirtual()) { 281 Bases.push_back(&B); 282 } 283 } 284 285 if (!ClassDecl->isAbstract()) { 286 for (const auto &VB : ClassDecl->vbases()) { 287 Bases.push_back(&VB); 288 } 289 } 290 291 for (const auto *B : Bases) { 292 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 293 if (!BaseType) { 294 continue; 295 } 296 297 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 298 Sema::SpecialMemberOverloadResult SMOR = 299 LookupSpecialMember(BaseClassDecl, CSM, 300 /* ConstArg */ ConstRHS, 301 /* VolatileArg */ false, 302 /* RValueThis */ false, 303 /* ConstThis */ false, 304 /* VolatileThis */ false); 305 306 if (!SMOR.getMethod()) 307 continue; 308 309 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); 310 if (!InferredTarget.hasValue()) { 311 InferredTarget = BaseMethodTarget; 312 } else { 313 bool ResolutionError = resolveCalleeCUDATargetConflict( 314 InferredTarget.getValue(), BaseMethodTarget, 315 InferredTarget.getPointer()); 316 if (ResolutionError) { 317 if (Diagnose) { 318 Diag(ClassDecl->getLocation(), 319 diag::note_implicit_member_target_infer_collision) 320 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; 321 } 322 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 323 return true; 324 } 325 } 326 } 327 328 // Same as for bases, but now for special members of fields. 329 for (const auto *F : ClassDecl->fields()) { 330 if (F->isInvalidDecl()) { 331 continue; 332 } 333 334 const RecordType *FieldType = 335 Context.getBaseElementType(F->getType())->getAs<RecordType>(); 336 if (!FieldType) { 337 continue; 338 } 339 340 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 341 Sema::SpecialMemberOverloadResult SMOR = 342 LookupSpecialMember(FieldRecDecl, CSM, 343 /* ConstArg */ ConstRHS && !F->isMutable(), 344 /* VolatileArg */ false, 345 /* RValueThis */ false, 346 /* ConstThis */ false, 347 /* VolatileThis */ false); 348 349 if (!SMOR.getMethod()) 350 continue; 351 352 CUDAFunctionTarget FieldMethodTarget = 353 IdentifyCUDATarget(SMOR.getMethod()); 354 if (!InferredTarget.hasValue()) { 355 InferredTarget = FieldMethodTarget; 356 } else { 357 bool ResolutionError = resolveCalleeCUDATargetConflict( 358 InferredTarget.getValue(), FieldMethodTarget, 359 InferredTarget.getPointer()); 360 if (ResolutionError) { 361 if (Diagnose) { 362 Diag(ClassDecl->getLocation(), 363 diag::note_implicit_member_target_infer_collision) 364 << (unsigned)CSM << InferredTarget.getValue() 365 << FieldMethodTarget; 366 } 367 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 368 return true; 369 } 370 } 371 } 372 373 if (InferredTarget.hasValue()) { 374 if (InferredTarget.getValue() == CFT_Device) { 375 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 376 } else if (InferredTarget.getValue() == CFT_Host) { 377 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 378 } else { 379 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 380 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 381 } 382 } else { 383 // If no target was inferred, mark this member as __host__ __device__; 384 // it's the least restrictive option that can be invoked from any target. 385 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 386 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 387 } 388 389 return false; 390 } 391 392 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 393 if (!CD->isDefined() && CD->isTemplateInstantiation()) 394 InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 395 396 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 397 // empty at a point in the translation unit, if it is either a 398 // trivial constructor 399 if (CD->isTrivial()) 400 return true; 401 402 // ... or it satisfies all of the following conditions: 403 // The constructor function has been defined. 404 // The constructor function has no parameters, 405 // and the function body is an empty compound statement. 406 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 407 return false; 408 409 // Its class has no virtual functions and no virtual base classes. 410 if (CD->getParent()->isDynamicClass()) 411 return false; 412 413 // The only form of initializer allowed is an empty constructor. 414 // This will recursively check all base classes and member initializers 415 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 416 if (const CXXConstructExpr *CE = 417 dyn_cast<CXXConstructExpr>(CI->getInit())) 418 return isEmptyCudaConstructor(Loc, CE->getConstructor()); 419 return false; 420 })) 421 return false; 422 423 return true; 424 } 425 426 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 427 // No destructor -> no problem. 428 if (!DD) 429 return true; 430 431 if (!DD->isDefined() && DD->isTemplateInstantiation()) 432 InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 433 434 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 435 // empty at a point in the translation unit, if it is either a 436 // trivial constructor 437 if (DD->isTrivial()) 438 return true; 439 440 // ... or it satisfies all of the following conditions: 441 // The destructor function has been defined. 442 // and the function body is an empty compound statement. 443 if (!DD->hasTrivialBody()) 444 return false; 445 446 const CXXRecordDecl *ClassDecl = DD->getParent(); 447 448 // Its class has no virtual functions and no virtual base classes. 449 if (ClassDecl->isDynamicClass()) 450 return false; 451 452 // Only empty destructors are allowed. This will recursively check 453 // destructors for all base classes... 454 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 455 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 456 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 457 return true; 458 })) 459 return false; 460 461 // ... and member fields. 462 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 463 if (CXXRecordDecl *RD = Field->getType() 464 ->getBaseElementTypeUnsafe() 465 ->getAsCXXRecordDecl()) 466 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 467 return true; 468 })) 469 return false; 470 471 return true; 472 } 473 474 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 475 // treated as implicitly __host__ __device__, unless: 476 // * it is a variadic function (device-side variadic functions are not 477 // allowed), or 478 // * a __device__ function with this signature was already declared, in which 479 // case in which case we output an error, unless the __device__ decl is in a 480 // system header, in which case we leave the constexpr function unattributed. 481 // 482 // In addition, all function decls are treated as __host__ __device__ when 483 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a 484 // #pragma clang force_cuda_host_device_begin/end 485 // pair). 486 void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, 487 const LookupResult &Previous) { 488 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 489 490 if (ForceCUDAHostDeviceDepth > 0) { 491 if (!NewD->hasAttr<CUDAHostAttr>()) 492 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 493 if (!NewD->hasAttr<CUDADeviceAttr>()) 494 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 495 return; 496 } 497 498 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 499 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 500 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 501 return; 502 503 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 504 // attributes? 505 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 506 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 507 D = Using->getTargetDecl(); 508 FunctionDecl *OldD = D->getAsFunction(); 509 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 510 !OldD->hasAttr<CUDAHostAttr>() && 511 !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, 512 /* ConsiderCudaAttrs = */ false); 513 }; 514 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 515 if (It != Previous.end()) { 516 // We found a __device__ function with the same name and signature as NewD 517 // (ignoring CUDA attrs). This is an error unless that function is defined 518 // in a system header, in which case we simply return without making NewD 519 // host+device. 520 NamedDecl *Match = *It; 521 if (!getSourceManager().isInSystemHeader(Match->getLocation())) { 522 Diag(NewD->getLocation(), 523 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 524 << NewD->getName(); 525 Diag(Match->getLocation(), 526 diag::note_cuda_conflicting_device_function_declared_here); 527 } 528 return; 529 } 530 531 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 532 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 533 } 534 535 // In CUDA, there are some constructs which may appear in semantically-valid 536 // code, but trigger errors if we ever generate code for the function in which 537 // they appear. Essentially every construct you're not allowed to use on the 538 // device falls into this category, because you are allowed to use these 539 // constructs in a __host__ __device__ function, but only if that function is 540 // never codegen'ed on the device. 541 // 542 // To handle semantic checking for these constructs, we keep track of the set of 543 // functions we know will be emitted, either because we could tell a priori that 544 // they would be emitted, or because they were transitively called by a 545 // known-emitted function. 546 // 547 // We also keep a partial call graph of which not-known-emitted functions call 548 // which other not-known-emitted functions. 549 // 550 // When we see something which is illegal if the current function is emitted 551 // (usually by way of CUDADiagIfDeviceCode, CUDADiagIfHostCode, or 552 // CheckCUDACall), we first check if the current function is known-emitted. If 553 // so, we immediately output the diagnostic. 554 // 555 // Otherwise, we "defer" the diagnostic. It sits in Sema::CUDADeferredDiags 556 // until we discover that the function is known-emitted, at which point we take 557 // it out of this map and emit the diagnostic. 558 559 Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc, 560 unsigned DiagID, FunctionDecl *Fn, 561 Sema &S) 562 : S(S), Loc(Loc), DiagID(DiagID), Fn(Fn), 563 ShowCallStack(K == K_ImmediateWithCallStack || K == K_Deferred) { 564 switch (K) { 565 case K_Nop: 566 break; 567 case K_Immediate: 568 case K_ImmediateWithCallStack: 569 ImmediateDiag.emplace(S.Diag(Loc, DiagID)); 570 break; 571 case K_Deferred: 572 assert(Fn && "Must have a function to attach the deferred diag to."); 573 PartialDiag.emplace(S.PDiag(DiagID)); 574 break; 575 } 576 } 577 578 // Print notes showing how we can reach FD starting from an a priori 579 // known-callable function. 580 static void EmitCallStackNotes(Sema &S, FunctionDecl *FD) { 581 auto FnIt = S.CUDAKnownEmittedFns.find(FD); 582 while (FnIt != S.CUDAKnownEmittedFns.end()) { 583 DiagnosticBuilder Builder( 584 S.Diags.Report(FnIt->second.Loc, diag::note_called_by)); 585 Builder << FnIt->second.FD; 586 Builder.setForceEmit(); 587 588 FnIt = S.CUDAKnownEmittedFns.find(FnIt->second.FD); 589 } 590 } 591 592 Sema::CUDADiagBuilder::~CUDADiagBuilder() { 593 if (ImmediateDiag) { 594 // Emit our diagnostic and, if it was a warning or error, output a callstack 595 // if Fn isn't a priori known-emitted. 596 bool IsWarningOrError = S.getDiagnostics().getDiagnosticLevel( 597 DiagID, Loc) >= DiagnosticsEngine::Warning; 598 ImmediateDiag.reset(); // Emit the immediate diag. 599 if (IsWarningOrError && ShowCallStack) 600 EmitCallStackNotes(S, Fn); 601 } else if (PartialDiag) { 602 assert(ShowCallStack && "Must always show call stack for deferred diags."); 603 S.CUDADeferredDiags[Fn].push_back({Loc, std::move(*PartialDiag)}); 604 } 605 } 606 607 // Do we know that we will eventually codegen the given function? 608 static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) { 609 // Templates are emitted when they're instantiated. 610 if (FD->isDependentContext()) 611 return false; 612 613 // When compiling for device, host functions are never emitted. Similarly, 614 // when compiling for host, device and global functions are never emitted. 615 // (Technically, we do emit a host-side stub for global functions, but this 616 // doesn't count for our purposes here.) 617 Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD); 618 if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host) 619 return false; 620 if (!S.getLangOpts().CUDAIsDevice && 621 (T == Sema::CFT_Device || T == Sema::CFT_Global)) 622 return false; 623 624 // Check whether this function is externally visible -- if so, it's 625 // known-emitted. 626 // 627 // We have to check the GVA linkage of the function's *definition* -- if we 628 // only have a declaration, we don't know whether or not the function will be 629 // emitted, because (say) the definition could include "inline". 630 FunctionDecl *Def = FD->getDefinition(); 631 632 // We may currently be parsing the body of FD, in which case 633 // FD->getDefinition() will be null, but we still want to treat FD as though 634 // it's a definition. 635 if (!Def && FD->willHaveBody()) 636 Def = FD; 637 638 if (Def && 639 !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def))) 640 return true; 641 642 // Otherwise, the function is known-emitted if it's in our set of 643 // known-emitted functions. 644 return S.CUDAKnownEmittedFns.count(FD) > 0; 645 } 646 647 Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, 648 unsigned DiagID) { 649 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 650 CUDADiagBuilder::Kind DiagKind = [&] { 651 switch (CurrentCUDATarget()) { 652 case CFT_Global: 653 case CFT_Device: 654 return CUDADiagBuilder::K_Immediate; 655 case CFT_HostDevice: 656 // An HD function counts as host code if we're compiling for host, and 657 // device code if we're compiling for device. Defer any errors in device 658 // mode until the function is known-emitted. 659 if (getLangOpts().CUDAIsDevice) { 660 return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) 661 ? CUDADiagBuilder::K_ImmediateWithCallStack 662 : CUDADiagBuilder::K_Deferred; 663 } 664 return CUDADiagBuilder::K_Nop; 665 666 default: 667 return CUDADiagBuilder::K_Nop; 668 } 669 }(); 670 return CUDADiagBuilder(DiagKind, Loc, DiagID, 671 dyn_cast<FunctionDecl>(CurContext), *this); 672 } 673 674 Sema::CUDADiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, 675 unsigned DiagID) { 676 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 677 CUDADiagBuilder::Kind DiagKind = [&] { 678 switch (CurrentCUDATarget()) { 679 case CFT_Host: 680 return CUDADiagBuilder::K_Immediate; 681 case CFT_HostDevice: 682 // An HD function counts as host code if we're compiling for host, and 683 // device code if we're compiling for device. Defer any errors in device 684 // mode until the function is known-emitted. 685 if (getLangOpts().CUDAIsDevice) 686 return CUDADiagBuilder::K_Nop; 687 688 return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) 689 ? CUDADiagBuilder::K_ImmediateWithCallStack 690 : CUDADiagBuilder::K_Deferred; 691 default: 692 return CUDADiagBuilder::K_Nop; 693 } 694 }(); 695 return CUDADiagBuilder(DiagKind, Loc, DiagID, 696 dyn_cast<FunctionDecl>(CurContext), *this); 697 } 698 699 // Emit any deferred diagnostics for FD and erase them from the map in which 700 // they're stored. 701 static void EmitDeferredDiags(Sema &S, FunctionDecl *FD) { 702 auto It = S.CUDADeferredDiags.find(FD); 703 if (It == S.CUDADeferredDiags.end()) 704 return; 705 bool HasWarningOrError = false; 706 for (PartialDiagnosticAt &PDAt : It->second) { 707 const SourceLocation &Loc = PDAt.first; 708 const PartialDiagnostic &PD = PDAt.second; 709 HasWarningOrError |= S.getDiagnostics().getDiagnosticLevel( 710 PD.getDiagID(), Loc) >= DiagnosticsEngine::Warning; 711 DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID())); 712 Builder.setForceEmit(); 713 PD.Emit(Builder); 714 } 715 S.CUDADeferredDiags.erase(It); 716 717 // FIXME: Should this be called after every warning/error emitted in the loop 718 // above, instead of just once per function? That would be consistent with 719 // how we handle immediate errors, but it also seems like a bit much. 720 if (HasWarningOrError) 721 EmitCallStackNotes(S, FD); 722 } 723 724 // Indicate that this function (and thus everything it transtively calls) will 725 // be codegen'ed, and emit any deferred diagnostics on this function and its 726 // (transitive) callees. 727 static void MarkKnownEmitted(Sema &S, FunctionDecl *OrigCaller, 728 FunctionDecl *OrigCallee, SourceLocation OrigLoc) { 729 // Nothing to do if we already know that FD is emitted. 730 if (IsKnownEmitted(S, OrigCallee)) { 731 assert(!S.CUDACallGraph.count(OrigCallee)); 732 return; 733 } 734 735 // We've just discovered that OrigCallee is known-emitted. Walk our call 736 // graph to see what else we can now discover also must be emitted. 737 738 struct CallInfo { 739 FunctionDecl *Caller; 740 FunctionDecl *Callee; 741 SourceLocation Loc; 742 }; 743 llvm::SmallVector<CallInfo, 4> Worklist = {{OrigCaller, OrigCallee, OrigLoc}}; 744 llvm::SmallSet<CanonicalDeclPtr<FunctionDecl>, 4> Seen; 745 Seen.insert(OrigCallee); 746 while (!Worklist.empty()) { 747 CallInfo C = Worklist.pop_back_val(); 748 assert(!IsKnownEmitted(S, C.Callee) && 749 "Worklist should not contain known-emitted functions."); 750 S.CUDAKnownEmittedFns[C.Callee] = {C.Caller, C.Loc}; 751 EmitDeferredDiags(S, C.Callee); 752 753 // If this is a template instantiation, explore its callgraph as well: 754 // Non-dependent calls are part of the template's callgraph, while dependent 755 // calls are part of to the instantiation's call graph. 756 if (auto *Templ = C.Callee->getPrimaryTemplate()) { 757 FunctionDecl *TemplFD = Templ->getAsFunction(); 758 if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) { 759 Seen.insert(TemplFD); 760 Worklist.push_back( 761 {/* Caller = */ C.Caller, /* Callee = */ TemplFD, C.Loc}); 762 } 763 } 764 765 // Add all functions called by Callee to our worklist. 766 auto CGIt = S.CUDACallGraph.find(C.Callee); 767 if (CGIt == S.CUDACallGraph.end()) 768 continue; 769 770 for (std::pair<CanonicalDeclPtr<FunctionDecl>, SourceLocation> FDLoc : 771 CGIt->second) { 772 FunctionDecl *NewCallee = FDLoc.first; 773 SourceLocation CallLoc = FDLoc.second; 774 if (Seen.count(NewCallee) || IsKnownEmitted(S, NewCallee)) 775 continue; 776 Seen.insert(NewCallee); 777 Worklist.push_back( 778 {/* Caller = */ C.Callee, /* Callee = */ NewCallee, CallLoc}); 779 } 780 781 // C.Callee is now known-emitted, so we no longer need to maintain its list 782 // of callees in CUDACallGraph. 783 S.CUDACallGraph.erase(CGIt); 784 } 785 } 786 787 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { 788 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 789 assert(Callee && "Callee may not be null."); 790 // FIXME: Is bailing out early correct here? Should we instead assume that 791 // the caller is a global initializer? 792 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 793 if (!Caller) 794 return true; 795 796 // If the caller is known-emitted, mark the callee as known-emitted. 797 // Otherwise, mark the call in our call graph so we can traverse it later. 798 bool CallerKnownEmitted = IsKnownEmitted(*this, Caller); 799 if (CallerKnownEmitted) 800 MarkKnownEmitted(*this, Caller, Callee, Loc); 801 else { 802 // If we have 803 // host fn calls kernel fn calls host+device, 804 // the HD function does not get instantiated on the host. We model this by 805 // omitting at the call to the kernel from the callgraph. This ensures 806 // that, when compiling for host, only HD functions actually called from the 807 // host get marked as known-emitted. 808 if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) 809 CUDACallGraph[Caller].insert({Callee, Loc}); 810 } 811 812 CUDADiagBuilder::Kind DiagKind = [&] { 813 switch (IdentifyCUDAPreference(Caller, Callee)) { 814 case CFP_Never: 815 return CUDADiagBuilder::K_Immediate; 816 case CFP_WrongSide: 817 assert(Caller && "WrongSide calls require a non-null caller"); 818 // If we know the caller will be emitted, we know this wrong-side call 819 // will be emitted, so it's an immediate error. Otherwise, defer the 820 // error until we know the caller is emitted. 821 return CallerKnownEmitted ? CUDADiagBuilder::K_ImmediateWithCallStack 822 : CUDADiagBuilder::K_Deferred; 823 default: 824 return CUDADiagBuilder::K_Nop; 825 } 826 }(); 827 828 if (DiagKind == CUDADiagBuilder::K_Nop) 829 return true; 830 831 // Avoid emitting this error twice for the same location. Using a hashtable 832 // like this is unfortunate, but because we must continue parsing as normal 833 // after encountering a deferred error, it's otherwise very tricky for us to 834 // ensure that we only emit this deferred error once. 835 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 836 return true; 837 838 CUDADiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) 839 << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); 840 CUDADiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, 841 Caller, *this) 842 << Callee; 843 return DiagKind != CUDADiagBuilder::K_Immediate && 844 DiagKind != CUDADiagBuilder::K_ImmediateWithCallStack; 845 } 846 847 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { 848 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 849 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 850 return; 851 FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext); 852 if (!CurFn) 853 return; 854 CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); 855 if (Target == CFT_Global || Target == CFT_Device) { 856 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 857 } else if (Target == CFT_HostDevice) { 858 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 859 Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); 860 } 861 } 862 863 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, 864 const LookupResult &Previous) { 865 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 866 CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); 867 for (NamedDecl *OldND : Previous) { 868 FunctionDecl *OldFD = OldND->getAsFunction(); 869 if (!OldFD) 870 continue; 871 872 CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); 873 // Don't allow HD and global functions to overload other functions with the 874 // same signature. We allow overloading based on CUDA attributes so that 875 // functions can have different implementations on the host and device, but 876 // HD/global functions "exist" in some sense on both the host and device, so 877 // should have the same implementation on both sides. 878 if (NewTarget != OldTarget && 879 ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || 880 (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && 881 !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 882 /* ConsiderCudaAttrs = */ false)) { 883 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 884 << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 885 Diag(OldFD->getLocation(), diag::note_previous_declaration); 886 NewFD->setInvalidDecl(); 887 break; 888 } 889 } 890 } 891 892 template <typename AttrTy> 893 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 894 const FunctionDecl &TemplateFD) { 895 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 896 AttrTy *Clone = Attribute->clone(S.Context); 897 Clone->setInherited(true); 898 FD->addAttr(Clone); 899 } 900 } 901 902 void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, 903 const FunctionTemplateDecl &TD) { 904 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 905 copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); 906 copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); 907 copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); 908 } 909