1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// \brief This file implements semantic analysis for CUDA constructs. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/AST/ASTContext.h" 15 #include "clang/AST/Decl.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Lex/Preprocessor.h" 18 #include "clang/Sema/Lookup.h" 19 #include "clang/Sema/Sema.h" 20 #include "clang/Sema/SemaDiagnostic.h" 21 #include "clang/Sema/SemaInternal.h" 22 #include "clang/Sema/Template.h" 23 #include "llvm/ADT/Optional.h" 24 #include "llvm/ADT/SmallVector.h" 25 using namespace clang; 26 27 void Sema::PushForceCUDAHostDevice() { 28 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 29 ForceCUDAHostDeviceDepth++; 30 } 31 32 bool Sema::PopForceCUDAHostDevice() { 33 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 34 if (ForceCUDAHostDeviceDepth == 0) 35 return false; 36 ForceCUDAHostDeviceDepth--; 37 return true; 38 } 39 40 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, 41 MultiExprArg ExecConfig, 42 SourceLocation GGGLoc) { 43 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); 44 if (!ConfigDecl) 45 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 46 << "cudaConfigureCall"); 47 QualType ConfigQTy = ConfigDecl->getType(); 48 49 DeclRefExpr *ConfigDR = new (Context) 50 DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 51 MarkFunctionReferenced(LLLLoc, ConfigDecl); 52 53 return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 54 /*IsExecConfig=*/true); 55 } 56 57 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const AttributeList *Attr) { 58 bool HasHostAttr = false; 59 bool HasDeviceAttr = false; 60 bool HasGlobalAttr = false; 61 bool HasInvalidTargetAttr = false; 62 while (Attr) { 63 switch(Attr->getKind()){ 64 case AttributeList::AT_CUDAGlobal: 65 HasGlobalAttr = true; 66 break; 67 case AttributeList::AT_CUDAHost: 68 HasHostAttr = true; 69 break; 70 case AttributeList::AT_CUDADevice: 71 HasDeviceAttr = true; 72 break; 73 case AttributeList::AT_CUDAInvalidTarget: 74 HasInvalidTargetAttr = true; 75 break; 76 default: 77 break; 78 } 79 Attr = Attr->getNext(); 80 } 81 if (HasInvalidTargetAttr) 82 return CFT_InvalidTarget; 83 84 if (HasGlobalAttr) 85 return CFT_Global; 86 87 if (HasHostAttr && HasDeviceAttr) 88 return CFT_HostDevice; 89 90 if (HasDeviceAttr) 91 return CFT_Device; 92 93 return CFT_Host; 94 } 95 96 template <typename A> 97 static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { 98 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 99 return isa<A>(Attribute) && 100 !(IgnoreImplicitAttr && Attribute->isImplicit()); 101 }); 102 } 103 104 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function 105 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, 106 bool IgnoreImplicitHDAttr) { 107 // Code that lives outside a function is run on the host. 108 if (D == nullptr) 109 return CFT_Host; 110 111 if (D->hasAttr<CUDAInvalidTargetAttr>()) 112 return CFT_InvalidTarget; 113 114 if (D->hasAttr<CUDAGlobalAttr>()) 115 return CFT_Global; 116 117 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 118 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 119 return CFT_HostDevice; 120 return CFT_Device; 121 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 122 return CFT_Host; 123 } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { 124 // Some implicit declarations (like intrinsic functions) are not marked. 125 // Set the most lenient target on them for maximal flexibility. 126 return CFT_HostDevice; 127 } 128 129 return CFT_Host; 130 } 131 132 // * CUDA Call preference table 133 // 134 // F - from, 135 // T - to 136 // Ph - preference in host mode 137 // Pd - preference in device mode 138 // H - handled in (x) 139 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 140 // 141 // | F | T | Ph | Pd | H | 142 // |----+----+-----+-----+-----+ 143 // | d | d | N | N | (c) | 144 // | d | g | -- | -- | (a) | 145 // | d | h | -- | -- | (e) | 146 // | d | hd | HD | HD | (b) | 147 // | g | d | N | N | (c) | 148 // | g | g | -- | -- | (a) | 149 // | g | h | -- | -- | (e) | 150 // | g | hd | HD | HD | (b) | 151 // | h | d | -- | -- | (e) | 152 // | h | g | N | N | (c) | 153 // | h | h | N | N | (c) | 154 // | h | hd | HD | HD | (b) | 155 // | hd | d | WS | SS | (d) | 156 // | hd | g | SS | -- |(d/a)| 157 // | hd | h | SS | WS | (d) | 158 // | hd | hd | HD | HD | (b) | 159 160 Sema::CUDAFunctionPreference 161 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, 162 const FunctionDecl *Callee) { 163 assert(Callee && "Callee must be valid."); 164 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 165 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); 166 167 // If one of the targets is invalid, the check always fails, no matter what 168 // the other target is. 169 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) 170 return CFP_Never; 171 172 // (a) Can't call global from some contexts until we support CUDA's 173 // dynamic parallelism. 174 if (CalleeTarget == CFT_Global && 175 (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) 176 return CFP_Never; 177 178 // (b) Calling HostDevice is OK for everyone. 179 if (CalleeTarget == CFT_HostDevice) 180 return CFP_HostDevice; 181 182 // (c) Best case scenarios 183 if (CalleeTarget == CallerTarget || 184 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || 185 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) 186 return CFP_Native; 187 188 // (d) HostDevice behavior depends on compilation mode. 189 if (CallerTarget == CFT_HostDevice) { 190 // It's OK to call a compilation-mode matching function from an HD one. 191 if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || 192 (!getLangOpts().CUDAIsDevice && 193 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) 194 return CFP_SameSide; 195 196 // Calls from HD to non-mode-matching functions (i.e., to host functions 197 // when compiling in device mode or to device functions when compiling in 198 // host mode) are allowed at the sema level, but eventually rejected if 199 // they're ever codegened. TODO: Reject said calls earlier. 200 return CFP_WrongSide; 201 } 202 203 // (e) Calling across device/host boundary is not something you should do. 204 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || 205 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || 206 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) 207 return CFP_Never; 208 209 llvm_unreachable("All cases should've been handled by now."); 210 } 211 212 void Sema::EraseUnwantedCUDAMatches( 213 const FunctionDecl *Caller, 214 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 215 if (Matches.size() <= 1) 216 return; 217 218 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 219 220 // Gets the CUDA function preference for a call from Caller to Match. 221 auto GetCFP = [&](const Pair &Match) { 222 return IdentifyCUDAPreference(Caller, Match.second); 223 }; 224 225 // Find the best call preference among the functions in Matches. 226 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 227 Matches.begin(), Matches.end(), 228 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 229 230 // Erase all functions with lower priority. 231 Matches.erase( 232 llvm::remove_if( 233 Matches, [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }), 234 Matches.end()); 235 } 236 237 /// When an implicitly-declared special member has to invoke more than one 238 /// base/field special member, conflicts may occur in the targets of these 239 /// members. For example, if one base's member __host__ and another's is 240 /// __device__, it's a conflict. 241 /// This function figures out if the given targets \param Target1 and 242 /// \param Target2 conflict, and if they do not it fills in 243 /// \param ResolvedTarget with a target that resolves for both calls. 244 /// \return true if there's a conflict, false otherwise. 245 static bool 246 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, 247 Sema::CUDAFunctionTarget Target2, 248 Sema::CUDAFunctionTarget *ResolvedTarget) { 249 // Only free functions and static member functions may be global. 250 assert(Target1 != Sema::CFT_Global); 251 assert(Target2 != Sema::CFT_Global); 252 253 if (Target1 == Sema::CFT_HostDevice) { 254 *ResolvedTarget = Target2; 255 } else if (Target2 == Sema::CFT_HostDevice) { 256 *ResolvedTarget = Target1; 257 } else if (Target1 != Target2) { 258 return true; 259 } else { 260 *ResolvedTarget = Target1; 261 } 262 263 return false; 264 } 265 266 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 267 CXXSpecialMember CSM, 268 CXXMethodDecl *MemberDecl, 269 bool ConstRHS, 270 bool Diagnose) { 271 llvm::Optional<CUDAFunctionTarget> InferredTarget; 272 273 // We're going to invoke special member lookup; mark that these special 274 // members are called from this one, and not from its caller. 275 ContextRAII MethodContext(*this, MemberDecl); 276 277 // Look for special members in base classes that should be invoked from here. 278 // Infer the target of this member base on the ones it should call. 279 // Skip direct and indirect virtual bases for abstract classes. 280 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 281 for (const auto &B : ClassDecl->bases()) { 282 if (!B.isVirtual()) { 283 Bases.push_back(&B); 284 } 285 } 286 287 if (!ClassDecl->isAbstract()) { 288 for (const auto &VB : ClassDecl->vbases()) { 289 Bases.push_back(&VB); 290 } 291 } 292 293 for (const auto *B : Bases) { 294 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 295 if (!BaseType) { 296 continue; 297 } 298 299 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 300 Sema::SpecialMemberOverloadResult *SMOR = 301 LookupSpecialMember(BaseClassDecl, CSM, 302 /* ConstArg */ ConstRHS, 303 /* VolatileArg */ false, 304 /* RValueThis */ false, 305 /* ConstThis */ false, 306 /* VolatileThis */ false); 307 308 if (!SMOR || !SMOR->getMethod()) { 309 continue; 310 } 311 312 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR->getMethod()); 313 if (!InferredTarget.hasValue()) { 314 InferredTarget = BaseMethodTarget; 315 } else { 316 bool ResolutionError = resolveCalleeCUDATargetConflict( 317 InferredTarget.getValue(), BaseMethodTarget, 318 InferredTarget.getPointer()); 319 if (ResolutionError) { 320 if (Diagnose) { 321 Diag(ClassDecl->getLocation(), 322 diag::note_implicit_member_target_infer_collision) 323 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; 324 } 325 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 326 return true; 327 } 328 } 329 } 330 331 // Same as for bases, but now for special members of fields. 332 for (const auto *F : ClassDecl->fields()) { 333 if (F->isInvalidDecl()) { 334 continue; 335 } 336 337 const RecordType *FieldType = 338 Context.getBaseElementType(F->getType())->getAs<RecordType>(); 339 if (!FieldType) { 340 continue; 341 } 342 343 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 344 Sema::SpecialMemberOverloadResult *SMOR = 345 LookupSpecialMember(FieldRecDecl, CSM, 346 /* ConstArg */ ConstRHS && !F->isMutable(), 347 /* VolatileArg */ false, 348 /* RValueThis */ false, 349 /* ConstThis */ false, 350 /* VolatileThis */ false); 351 352 if (!SMOR || !SMOR->getMethod()) { 353 continue; 354 } 355 356 CUDAFunctionTarget FieldMethodTarget = 357 IdentifyCUDATarget(SMOR->getMethod()); 358 if (!InferredTarget.hasValue()) { 359 InferredTarget = FieldMethodTarget; 360 } else { 361 bool ResolutionError = resolveCalleeCUDATargetConflict( 362 InferredTarget.getValue(), FieldMethodTarget, 363 InferredTarget.getPointer()); 364 if (ResolutionError) { 365 if (Diagnose) { 366 Diag(ClassDecl->getLocation(), 367 diag::note_implicit_member_target_infer_collision) 368 << (unsigned)CSM << InferredTarget.getValue() 369 << FieldMethodTarget; 370 } 371 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 372 return true; 373 } 374 } 375 } 376 377 if (InferredTarget.hasValue()) { 378 if (InferredTarget.getValue() == CFT_Device) { 379 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 380 } else if (InferredTarget.getValue() == CFT_Host) { 381 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 382 } else { 383 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 384 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 385 } 386 } else { 387 // If no target was inferred, mark this member as __host__ __device__; 388 // it's the least restrictive option that can be invoked from any target. 389 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 390 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 391 } 392 393 return false; 394 } 395 396 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 397 if (!CD->isDefined() && CD->isTemplateInstantiation()) 398 InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 399 400 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 401 // empty at a point in the translation unit, if it is either a 402 // trivial constructor 403 if (CD->isTrivial()) 404 return true; 405 406 // ... or it satisfies all of the following conditions: 407 // The constructor function has been defined. 408 // The constructor function has no parameters, 409 // and the function body is an empty compound statement. 410 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 411 return false; 412 413 // Its class has no virtual functions and no virtual base classes. 414 if (CD->getParent()->isDynamicClass()) 415 return false; 416 417 // The only form of initializer allowed is an empty constructor. 418 // This will recursively check all base classes and member initializers 419 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 420 if (const CXXConstructExpr *CE = 421 dyn_cast<CXXConstructExpr>(CI->getInit())) 422 return isEmptyCudaConstructor(Loc, CE->getConstructor()); 423 return false; 424 })) 425 return false; 426 427 return true; 428 } 429 430 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 431 // No destructor -> no problem. 432 if (!DD) 433 return true; 434 435 if (!DD->isDefined() && DD->isTemplateInstantiation()) 436 InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 437 438 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 439 // empty at a point in the translation unit, if it is either a 440 // trivial constructor 441 if (DD->isTrivial()) 442 return true; 443 444 // ... or it satisfies all of the following conditions: 445 // The destructor function has been defined. 446 // and the function body is an empty compound statement. 447 if (!DD->hasTrivialBody()) 448 return false; 449 450 const CXXRecordDecl *ClassDecl = DD->getParent(); 451 452 // Its class has no virtual functions and no virtual base classes. 453 if (ClassDecl->isDynamicClass()) 454 return false; 455 456 // Only empty destructors are allowed. This will recursively check 457 // destructors for all base classes... 458 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 459 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 460 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 461 return true; 462 })) 463 return false; 464 465 // ... and member fields. 466 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 467 if (CXXRecordDecl *RD = Field->getType() 468 ->getBaseElementTypeUnsafe() 469 ->getAsCXXRecordDecl()) 470 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 471 return true; 472 })) 473 return false; 474 475 return true; 476 } 477 478 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 479 // treated as implicitly __host__ __device__, unless: 480 // * it is a variadic function (device-side variadic functions are not 481 // allowed), or 482 // * a __device__ function with this signature was already declared, in which 483 // case in which case we output an error, unless the __device__ decl is in a 484 // system header, in which case we leave the constexpr function unattributed. 485 // 486 // In addition, all function decls are treated as __host__ __device__ when 487 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a 488 // #pragma clang force_cuda_host_device_begin/end 489 // pair). 490 void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, 491 const LookupResult &Previous) { 492 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 493 494 if (ForceCUDAHostDeviceDepth > 0) { 495 if (!NewD->hasAttr<CUDAHostAttr>()) 496 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 497 if (!NewD->hasAttr<CUDADeviceAttr>()) 498 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 499 return; 500 } 501 502 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 503 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 504 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 505 return; 506 507 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 508 // attributes? 509 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 510 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 511 D = Using->getTargetDecl(); 512 FunctionDecl *OldD = D->getAsFunction(); 513 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 514 !OldD->hasAttr<CUDAHostAttr>() && 515 !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, 516 /* ConsiderCudaAttrs = */ false); 517 }; 518 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 519 if (It != Previous.end()) { 520 // We found a __device__ function with the same name and signature as NewD 521 // (ignoring CUDA attrs). This is an error unless that function is defined 522 // in a system header, in which case we simply return without making NewD 523 // host+device. 524 NamedDecl *Match = *It; 525 if (!getSourceManager().isInSystemHeader(Match->getLocation())) { 526 Diag(NewD->getLocation(), 527 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 528 << NewD->getName(); 529 Diag(Match->getLocation(), 530 diag::note_cuda_conflicting_device_function_declared_here); 531 } 532 return; 533 } 534 535 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 536 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 537 } 538 539 // In CUDA, there are some constructs which may appear in semantically-valid 540 // code, but trigger errors if we ever generate code for the function in which 541 // they appear. Essentially every construct you're not allowed to use on the 542 // device falls into this category, because you are allowed to use these 543 // constructs in a __host__ __device__ function, but only if that function is 544 // never codegen'ed on the device. 545 // 546 // To handle semantic checking for these constructs, we keep track of the set of 547 // functions we know will be emitted, either because we could tell a priori that 548 // they would be emitted, or because they were transitively called by a 549 // known-emitted function. 550 // 551 // We also keep a partial call graph of which not-known-emitted functions call 552 // which other not-known-emitted functions. 553 // 554 // When we see something which is illegal if the current function is emitted 555 // (usually by way of CUDADiagIfDeviceCode, CUDADiagIfHostCode, or 556 // CheckCUDACall), we first check if the current function is known-emitted. If 557 // so, we immediately output the diagnostic. 558 // 559 // Otherwise, we "defer" the diagnostic. It sits in Sema::CUDADeferredDiags 560 // until we discover that the function is known-emitted, at which point we take 561 // it out of this map and emit the diagnostic. 562 563 Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc, 564 unsigned DiagID, FunctionDecl *Fn, 565 Sema &S) 566 : S(S), Loc(Loc), DiagID(DiagID), Fn(Fn), 567 ShowCallStack(K == K_ImmediateWithCallStack || K == K_Deferred) { 568 switch (K) { 569 case K_Nop: 570 break; 571 case K_Immediate: 572 case K_ImmediateWithCallStack: 573 ImmediateDiag.emplace(S.Diag(Loc, DiagID)); 574 break; 575 case K_Deferred: 576 assert(Fn && "Must have a function to attach the deferred diag to."); 577 PartialDiag.emplace(S.PDiag(DiagID)); 578 break; 579 } 580 } 581 582 // Print notes showing how we can reach FD starting from an a priori 583 // known-callable function. 584 static void EmitCallStackNotes(Sema &S, FunctionDecl *FD) { 585 auto FnIt = S.CUDAKnownEmittedFns.find(FD); 586 while (FnIt != S.CUDAKnownEmittedFns.end()) { 587 DiagnosticBuilder Builder( 588 S.Diags.Report(FnIt->second.Loc, diag::note_called_by)); 589 Builder << FnIt->second.FD; 590 Builder.setForceEmit(); 591 592 FnIt = S.CUDAKnownEmittedFns.find(FnIt->second.FD); 593 } 594 } 595 596 Sema::CUDADiagBuilder::~CUDADiagBuilder() { 597 if (ImmediateDiag) { 598 // Emit our diagnostic and, if it was a warning or error, output a callstack 599 // if Fn isn't a priori known-emitted. 600 bool IsWarningOrError = S.getDiagnostics().getDiagnosticLevel( 601 DiagID, Loc) >= DiagnosticsEngine::Warning; 602 ImmediateDiag.reset(); // Emit the immediate diag. 603 if (IsWarningOrError && ShowCallStack) 604 EmitCallStackNotes(S, Fn); 605 } else if (PartialDiag) { 606 assert(ShowCallStack && "Must always show call stack for deferred diags."); 607 S.CUDADeferredDiags[Fn].push_back({Loc, std::move(*PartialDiag)}); 608 } 609 } 610 611 // Do we know that we will eventually codegen the given function? 612 static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) { 613 // Templates are emitted when they're instantiated. 614 if (FD->isDependentContext()) 615 return false; 616 617 // When compiling for device, host functions are never emitted. Similarly, 618 // when compiling for host, device and global functions are never emitted. 619 // (Technically, we do emit a host-side stub for global functions, but this 620 // doesn't count for our purposes here.) 621 Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD); 622 if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host) 623 return false; 624 if (!S.getLangOpts().CUDAIsDevice && 625 (T == Sema::CFT_Device || T == Sema::CFT_Global)) 626 return false; 627 628 // Check whether this function is externally visible -- if so, it's 629 // known-emitted. 630 // 631 // We have to check the GVA linkage of the function's *definition* -- if we 632 // only have a declaration, we don't know whether or not the function will be 633 // emitted, because (say) the definition could include "inline". 634 FunctionDecl *Def = FD->getDefinition(); 635 636 // We may currently be parsing the body of FD, in which case 637 // FD->getDefinition() will be null, but we still want to treat FD as though 638 // it's a definition. 639 if (!Def && FD->willHaveBody()) 640 Def = FD; 641 642 if (Def && 643 !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def))) 644 return true; 645 646 // Otherwise, the function is known-emitted if it's in our set of 647 // known-emitted functions. 648 return S.CUDAKnownEmittedFns.count(FD) > 0; 649 } 650 651 Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, 652 unsigned DiagID) { 653 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 654 CUDADiagBuilder::Kind DiagKind = [&] { 655 switch (CurrentCUDATarget()) { 656 case CFT_Global: 657 case CFT_Device: 658 return CUDADiagBuilder::K_Immediate; 659 case CFT_HostDevice: 660 // An HD function counts as host code if we're compiling for host, and 661 // device code if we're compiling for device. Defer any errors in device 662 // mode until the function is known-emitted. 663 if (getLangOpts().CUDAIsDevice) { 664 return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) 665 ? CUDADiagBuilder::K_ImmediateWithCallStack 666 : CUDADiagBuilder::K_Deferred; 667 } 668 return CUDADiagBuilder::K_Nop; 669 670 default: 671 return CUDADiagBuilder::K_Nop; 672 } 673 }(); 674 return CUDADiagBuilder(DiagKind, Loc, DiagID, 675 dyn_cast<FunctionDecl>(CurContext), *this); 676 } 677 678 Sema::CUDADiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, 679 unsigned DiagID) { 680 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 681 CUDADiagBuilder::Kind DiagKind = [&] { 682 switch (CurrentCUDATarget()) { 683 case CFT_Host: 684 return CUDADiagBuilder::K_Immediate; 685 case CFT_HostDevice: 686 // An HD function counts as host code if we're compiling for host, and 687 // device code if we're compiling for device. Defer any errors in device 688 // mode until the function is known-emitted. 689 if (getLangOpts().CUDAIsDevice) 690 return CUDADiagBuilder::K_Nop; 691 692 return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) 693 ? CUDADiagBuilder::K_ImmediateWithCallStack 694 : CUDADiagBuilder::K_Deferred; 695 default: 696 return CUDADiagBuilder::K_Nop; 697 } 698 }(); 699 return CUDADiagBuilder(DiagKind, Loc, DiagID, 700 dyn_cast<FunctionDecl>(CurContext), *this); 701 } 702 703 // Emit any deferred diagnostics for FD and erase them from the map in which 704 // they're stored. 705 static void EmitDeferredDiags(Sema &S, FunctionDecl *FD) { 706 auto It = S.CUDADeferredDiags.find(FD); 707 if (It == S.CUDADeferredDiags.end()) 708 return; 709 bool HasWarningOrError = false; 710 for (PartialDiagnosticAt &PDAt : It->second) { 711 const SourceLocation &Loc = PDAt.first; 712 const PartialDiagnostic &PD = PDAt.second; 713 HasWarningOrError |= S.getDiagnostics().getDiagnosticLevel( 714 PD.getDiagID(), Loc) >= DiagnosticsEngine::Warning; 715 DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID())); 716 Builder.setForceEmit(); 717 PD.Emit(Builder); 718 } 719 S.CUDADeferredDiags.erase(It); 720 721 // FIXME: Should this be called after every warning/error emitted in the loop 722 // above, instead of just once per function? That would be consistent with 723 // how we handle immediate errors, but it also seems like a bit much. 724 if (HasWarningOrError) 725 EmitCallStackNotes(S, FD); 726 } 727 728 // Indicate that this function (and thus everything it transtively calls) will 729 // be codegen'ed, and emit any deferred diagnostics on this function and its 730 // (transitive) callees. 731 static void MarkKnownEmitted(Sema &S, FunctionDecl *OrigCaller, 732 FunctionDecl *OrigCallee, SourceLocation OrigLoc) { 733 // Nothing to do if we already know that FD is emitted. 734 if (IsKnownEmitted(S, OrigCallee)) { 735 assert(!S.CUDACallGraph.count(OrigCallee)); 736 return; 737 } 738 739 // We've just discovered that OrigCallee is known-emitted. Walk our call 740 // graph to see what else we can now discover also must be emitted. 741 742 struct CallInfo { 743 FunctionDecl *Caller; 744 FunctionDecl *Callee; 745 SourceLocation Loc; 746 }; 747 llvm::SmallVector<CallInfo, 4> Worklist = {{OrigCaller, OrigCallee, OrigLoc}}; 748 llvm::SmallSet<CanonicalDeclPtr<FunctionDecl>, 4> Seen; 749 Seen.insert(OrigCallee); 750 while (!Worklist.empty()) { 751 CallInfo C = Worklist.pop_back_val(); 752 assert(!IsKnownEmitted(S, C.Callee) && 753 "Worklist should not contain known-emitted functions."); 754 S.CUDAKnownEmittedFns[C.Callee] = {C.Caller, C.Loc}; 755 EmitDeferredDiags(S, C.Callee); 756 757 // If this is a template instantiation, explore its callgraph as well: 758 // Non-dependent calls are part of the template's callgraph, while dependent 759 // calls are part of to the instantiation's call graph. 760 if (auto *Templ = C.Callee->getPrimaryTemplate()) { 761 FunctionDecl *TemplFD = Templ->getAsFunction(); 762 if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) { 763 Seen.insert(TemplFD); 764 Worklist.push_back( 765 {/* Caller = */ C.Caller, /* Callee = */ TemplFD, C.Loc}); 766 } 767 } 768 769 // Add all functions called by Callee to our worklist. 770 auto CGIt = S.CUDACallGraph.find(C.Callee); 771 if (CGIt == S.CUDACallGraph.end()) 772 continue; 773 774 for (std::pair<CanonicalDeclPtr<FunctionDecl>, SourceLocation> FDLoc : 775 CGIt->second) { 776 FunctionDecl *NewCallee = FDLoc.first; 777 SourceLocation CallLoc = FDLoc.second; 778 if (Seen.count(NewCallee) || IsKnownEmitted(S, NewCallee)) 779 continue; 780 Seen.insert(NewCallee); 781 Worklist.push_back( 782 {/* Caller = */ C.Callee, /* Callee = */ NewCallee, CallLoc}); 783 } 784 785 // C.Callee is now known-emitted, so we no longer need to maintain its list 786 // of callees in CUDACallGraph. 787 S.CUDACallGraph.erase(CGIt); 788 } 789 } 790 791 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { 792 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 793 assert(Callee && "Callee may not be null."); 794 // FIXME: Is bailing out early correct here? Should we instead assume that 795 // the caller is a global initializer? 796 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 797 if (!Caller) 798 return true; 799 800 // If the caller is known-emitted, mark the callee as known-emitted. 801 // Otherwise, mark the call in our call graph so we can traverse it later. 802 bool CallerKnownEmitted = IsKnownEmitted(*this, Caller); 803 if (CallerKnownEmitted) 804 MarkKnownEmitted(*this, Caller, Callee, Loc); 805 else { 806 // If we have 807 // host fn calls kernel fn calls host+device, 808 // the HD function does not get instantiated on the host. We model this by 809 // omitting at the call to the kernel from the callgraph. This ensures 810 // that, when compiling for host, only HD functions actually called from the 811 // host get marked as known-emitted. 812 if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) 813 CUDACallGraph[Caller].insert({Callee, Loc}); 814 } 815 816 CUDADiagBuilder::Kind DiagKind = [&] { 817 switch (IdentifyCUDAPreference(Caller, Callee)) { 818 case CFP_Never: 819 return CUDADiagBuilder::K_Immediate; 820 case CFP_WrongSide: 821 assert(Caller && "WrongSide calls require a non-null caller"); 822 // If we know the caller will be emitted, we know this wrong-side call 823 // will be emitted, so it's an immediate error. Otherwise, defer the 824 // error until we know the caller is emitted. 825 return CallerKnownEmitted ? CUDADiagBuilder::K_ImmediateWithCallStack 826 : CUDADiagBuilder::K_Deferred; 827 default: 828 return CUDADiagBuilder::K_Nop; 829 } 830 }(); 831 832 if (DiagKind == CUDADiagBuilder::K_Nop) 833 return true; 834 835 // Avoid emitting this error twice for the same location. Using a hashtable 836 // like this is unfortunate, but because we must continue parsing as normal 837 // after encountering a deferred error, it's otherwise very tricky for us to 838 // ensure that we only emit this deferred error once. 839 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 840 return true; 841 842 CUDADiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) 843 << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); 844 CUDADiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, 845 Caller, *this) 846 << Callee; 847 return DiagKind != CUDADiagBuilder::K_Immediate && 848 DiagKind != CUDADiagBuilder::K_ImmediateWithCallStack; 849 } 850 851 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { 852 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 853 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 854 return; 855 FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext); 856 if (!CurFn) 857 return; 858 CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); 859 if (Target == CFT_Global || Target == CFT_Device) { 860 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 861 } else if (Target == CFT_HostDevice) { 862 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 863 Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); 864 } 865 } 866 867 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, 868 const LookupResult &Previous) { 869 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 870 CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); 871 for (NamedDecl *OldND : Previous) { 872 FunctionDecl *OldFD = OldND->getAsFunction(); 873 if (!OldFD) 874 continue; 875 876 CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); 877 // Don't allow HD and global functions to overload other functions with the 878 // same signature. We allow overloading based on CUDA attributes so that 879 // functions can have different implementations on the host and device, but 880 // HD/global functions "exist" in some sense on both the host and device, so 881 // should have the same implementation on both sides. 882 if (NewTarget != OldTarget && 883 ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || 884 (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && 885 !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 886 /* ConsiderCudaAttrs = */ false)) { 887 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 888 << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 889 Diag(OldFD->getLocation(), diag::note_previous_declaration); 890 NewFD->setInvalidDecl(); 891 break; 892 } 893 } 894 } 895 896 template <typename AttrTy> 897 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 898 const FunctionDecl &TemplateFD) { 899 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 900 AttrTy *Clone = Attribute->clone(S.Context); 901 Clone->setInherited(true); 902 FD->addAttr(Clone); 903 } 904 } 905 906 void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, 907 const FunctionTemplateDecl &TD) { 908 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 909 copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); 910 copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); 911 copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); 912 } 913