1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// \brief This file implements semantic analysis for CUDA constructs. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/AST/ASTContext.h" 15 #include "clang/AST/Decl.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Lex/Preprocessor.h" 18 #include "clang/Sema/Lookup.h" 19 #include "clang/Sema/Sema.h" 20 #include "clang/Sema/SemaDiagnostic.h" 21 #include "clang/Sema/SemaInternal.h" 22 #include "clang/Sema/Template.h" 23 #include "llvm/ADT/Optional.h" 24 #include "llvm/ADT/SmallVector.h" 25 using namespace clang; 26 27 void Sema::PushForceCUDAHostDevice() { 28 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 29 ForceCUDAHostDeviceDepth++; 30 } 31 32 bool Sema::PopForceCUDAHostDevice() { 33 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 34 if (ForceCUDAHostDeviceDepth == 0) 35 return false; 36 ForceCUDAHostDeviceDepth--; 37 return true; 38 } 39 40 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, 41 MultiExprArg ExecConfig, 42 SourceLocation GGGLoc) { 43 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); 44 if (!ConfigDecl) 45 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 46 << "cudaConfigureCall"); 47 QualType ConfigQTy = ConfigDecl->getType(); 48 49 DeclRefExpr *ConfigDR = new (Context) 50 DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 51 MarkFunctionReferenced(LLLLoc, ConfigDecl); 52 53 return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 54 /*IsExecConfig=*/true); 55 } 56 57 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const AttributeList *Attr) { 58 bool HasHostAttr = false; 59 bool HasDeviceAttr = false; 60 bool HasGlobalAttr = false; 61 bool HasInvalidTargetAttr = false; 62 while (Attr) { 63 switch(Attr->getKind()){ 64 case AttributeList::AT_CUDAGlobal: 65 HasGlobalAttr = true; 66 break; 67 case AttributeList::AT_CUDAHost: 68 HasHostAttr = true; 69 break; 70 case AttributeList::AT_CUDADevice: 71 HasDeviceAttr = true; 72 break; 73 case AttributeList::AT_CUDAInvalidTarget: 74 HasInvalidTargetAttr = true; 75 break; 76 default: 77 break; 78 } 79 Attr = Attr->getNext(); 80 } 81 if (HasInvalidTargetAttr) 82 return CFT_InvalidTarget; 83 84 if (HasGlobalAttr) 85 return CFT_Global; 86 87 if (HasHostAttr && HasDeviceAttr) 88 return CFT_HostDevice; 89 90 if (HasDeviceAttr) 91 return CFT_Device; 92 93 return CFT_Host; 94 } 95 96 template <typename A> 97 static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { 98 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 99 return isa<A>(Attribute) && 100 !(IgnoreImplicitAttr && Attribute->isImplicit()); 101 }); 102 } 103 104 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function 105 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, 106 bool IgnoreImplicitHDAttr) { 107 // Code that lives outside a function is run on the host. 108 if (D == nullptr) 109 return CFT_Host; 110 111 if (D->hasAttr<CUDAInvalidTargetAttr>()) 112 return CFT_InvalidTarget; 113 114 if (D->hasAttr<CUDAGlobalAttr>()) 115 return CFT_Global; 116 117 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 118 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 119 return CFT_HostDevice; 120 return CFT_Device; 121 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 122 return CFT_Host; 123 } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { 124 // Some implicit declarations (like intrinsic functions) are not marked. 125 // Set the most lenient target on them for maximal flexibility. 126 return CFT_HostDevice; 127 } 128 129 return CFT_Host; 130 } 131 132 // * CUDA Call preference table 133 // 134 // F - from, 135 // T - to 136 // Ph - preference in host mode 137 // Pd - preference in device mode 138 // H - handled in (x) 139 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 140 // 141 // | F | T | Ph | Pd | H | 142 // |----+----+-----+-----+-----+ 143 // | d | d | N | N | (c) | 144 // | d | g | -- | -- | (a) | 145 // | d | h | -- | -- | (e) | 146 // | d | hd | HD | HD | (b) | 147 // | g | d | N | N | (c) | 148 // | g | g | -- | -- | (a) | 149 // | g | h | -- | -- | (e) | 150 // | g | hd | HD | HD | (b) | 151 // | h | d | -- | -- | (e) | 152 // | h | g | N | N | (c) | 153 // | h | h | N | N | (c) | 154 // | h | hd | HD | HD | (b) | 155 // | hd | d | WS | SS | (d) | 156 // | hd | g | SS | -- |(d/a)| 157 // | hd | h | SS | WS | (d) | 158 // | hd | hd | HD | HD | (b) | 159 160 Sema::CUDAFunctionPreference 161 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, 162 const FunctionDecl *Callee) { 163 assert(Callee && "Callee must be valid."); 164 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 165 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); 166 167 // If one of the targets is invalid, the check always fails, no matter what 168 // the other target is. 169 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) 170 return CFP_Never; 171 172 // (a) Can't call global from some contexts until we support CUDA's 173 // dynamic parallelism. 174 if (CalleeTarget == CFT_Global && 175 (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) 176 return CFP_Never; 177 178 // (b) Calling HostDevice is OK for everyone. 179 if (CalleeTarget == CFT_HostDevice) 180 return CFP_HostDevice; 181 182 // (c) Best case scenarios 183 if (CalleeTarget == CallerTarget || 184 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || 185 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) 186 return CFP_Native; 187 188 // (d) HostDevice behavior depends on compilation mode. 189 if (CallerTarget == CFT_HostDevice) { 190 // It's OK to call a compilation-mode matching function from an HD one. 191 if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || 192 (!getLangOpts().CUDAIsDevice && 193 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) 194 return CFP_SameSide; 195 196 // Calls from HD to non-mode-matching functions (i.e., to host functions 197 // when compiling in device mode or to device functions when compiling in 198 // host mode) are allowed at the sema level, but eventually rejected if 199 // they're ever codegened. TODO: Reject said calls earlier. 200 return CFP_WrongSide; 201 } 202 203 // (e) Calling across device/host boundary is not something you should do. 204 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || 205 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || 206 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) 207 return CFP_Never; 208 209 llvm_unreachable("All cases should've been handled by now."); 210 } 211 212 void Sema::EraseUnwantedCUDAMatches( 213 const FunctionDecl *Caller, 214 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 215 if (Matches.size() <= 1) 216 return; 217 218 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 219 220 // Gets the CUDA function preference for a call from Caller to Match. 221 auto GetCFP = [&](const Pair &Match) { 222 return IdentifyCUDAPreference(Caller, Match.second); 223 }; 224 225 // Find the best call preference among the functions in Matches. 226 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 227 Matches.begin(), Matches.end(), 228 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 229 230 // Erase all functions with lower priority. 231 llvm::erase_if(Matches, 232 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 233 } 234 235 /// When an implicitly-declared special member has to invoke more than one 236 /// base/field special member, conflicts may occur in the targets of these 237 /// members. For example, if one base's member __host__ and another's is 238 /// __device__, it's a conflict. 239 /// This function figures out if the given targets \param Target1 and 240 /// \param Target2 conflict, and if they do not it fills in 241 /// \param ResolvedTarget with a target that resolves for both calls. 242 /// \return true if there's a conflict, false otherwise. 243 static bool 244 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, 245 Sema::CUDAFunctionTarget Target2, 246 Sema::CUDAFunctionTarget *ResolvedTarget) { 247 // Only free functions and static member functions may be global. 248 assert(Target1 != Sema::CFT_Global); 249 assert(Target2 != Sema::CFT_Global); 250 251 if (Target1 == Sema::CFT_HostDevice) { 252 *ResolvedTarget = Target2; 253 } else if (Target2 == Sema::CFT_HostDevice) { 254 *ResolvedTarget = Target1; 255 } else if (Target1 != Target2) { 256 return true; 257 } else { 258 *ResolvedTarget = Target1; 259 } 260 261 return false; 262 } 263 264 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 265 CXXSpecialMember CSM, 266 CXXMethodDecl *MemberDecl, 267 bool ConstRHS, 268 bool Diagnose) { 269 llvm::Optional<CUDAFunctionTarget> InferredTarget; 270 271 // We're going to invoke special member lookup; mark that these special 272 // members are called from this one, and not from its caller. 273 ContextRAII MethodContext(*this, MemberDecl); 274 275 // Look for special members in base classes that should be invoked from here. 276 // Infer the target of this member base on the ones it should call. 277 // Skip direct and indirect virtual bases for abstract classes. 278 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 279 for (const auto &B : ClassDecl->bases()) { 280 if (!B.isVirtual()) { 281 Bases.push_back(&B); 282 } 283 } 284 285 if (!ClassDecl->isAbstract()) { 286 for (const auto &VB : ClassDecl->vbases()) { 287 Bases.push_back(&VB); 288 } 289 } 290 291 for (const auto *B : Bases) { 292 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 293 if (!BaseType) { 294 continue; 295 } 296 297 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 298 Sema::SpecialMemberOverloadResult *SMOR = 299 LookupSpecialMember(BaseClassDecl, CSM, 300 /* ConstArg */ ConstRHS, 301 /* VolatileArg */ false, 302 /* RValueThis */ false, 303 /* ConstThis */ false, 304 /* VolatileThis */ false); 305 306 if (!SMOR || !SMOR->getMethod()) { 307 continue; 308 } 309 310 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR->getMethod()); 311 if (!InferredTarget.hasValue()) { 312 InferredTarget = BaseMethodTarget; 313 } else { 314 bool ResolutionError = resolveCalleeCUDATargetConflict( 315 InferredTarget.getValue(), BaseMethodTarget, 316 InferredTarget.getPointer()); 317 if (ResolutionError) { 318 if (Diagnose) { 319 Diag(ClassDecl->getLocation(), 320 diag::note_implicit_member_target_infer_collision) 321 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; 322 } 323 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 324 return true; 325 } 326 } 327 } 328 329 // Same as for bases, but now for special members of fields. 330 for (const auto *F : ClassDecl->fields()) { 331 if (F->isInvalidDecl()) { 332 continue; 333 } 334 335 const RecordType *FieldType = 336 Context.getBaseElementType(F->getType())->getAs<RecordType>(); 337 if (!FieldType) { 338 continue; 339 } 340 341 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 342 Sema::SpecialMemberOverloadResult *SMOR = 343 LookupSpecialMember(FieldRecDecl, CSM, 344 /* ConstArg */ ConstRHS && !F->isMutable(), 345 /* VolatileArg */ false, 346 /* RValueThis */ false, 347 /* ConstThis */ false, 348 /* VolatileThis */ false); 349 350 if (!SMOR || !SMOR->getMethod()) { 351 continue; 352 } 353 354 CUDAFunctionTarget FieldMethodTarget = 355 IdentifyCUDATarget(SMOR->getMethod()); 356 if (!InferredTarget.hasValue()) { 357 InferredTarget = FieldMethodTarget; 358 } else { 359 bool ResolutionError = resolveCalleeCUDATargetConflict( 360 InferredTarget.getValue(), FieldMethodTarget, 361 InferredTarget.getPointer()); 362 if (ResolutionError) { 363 if (Diagnose) { 364 Diag(ClassDecl->getLocation(), 365 diag::note_implicit_member_target_infer_collision) 366 << (unsigned)CSM << InferredTarget.getValue() 367 << FieldMethodTarget; 368 } 369 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 370 return true; 371 } 372 } 373 } 374 375 if (InferredTarget.hasValue()) { 376 if (InferredTarget.getValue() == CFT_Device) { 377 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 378 } else if (InferredTarget.getValue() == CFT_Host) { 379 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 380 } else { 381 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 382 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 383 } 384 } else { 385 // If no target was inferred, mark this member as __host__ __device__; 386 // it's the least restrictive option that can be invoked from any target. 387 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 388 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 389 } 390 391 return false; 392 } 393 394 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 395 if (!CD->isDefined() && CD->isTemplateInstantiation()) 396 InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 397 398 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 399 // empty at a point in the translation unit, if it is either a 400 // trivial constructor 401 if (CD->isTrivial()) 402 return true; 403 404 // ... or it satisfies all of the following conditions: 405 // The constructor function has been defined. 406 // The constructor function has no parameters, 407 // and the function body is an empty compound statement. 408 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 409 return false; 410 411 // Its class has no virtual functions and no virtual base classes. 412 if (CD->getParent()->isDynamicClass()) 413 return false; 414 415 // The only form of initializer allowed is an empty constructor. 416 // This will recursively check all base classes and member initializers 417 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 418 if (const CXXConstructExpr *CE = 419 dyn_cast<CXXConstructExpr>(CI->getInit())) 420 return isEmptyCudaConstructor(Loc, CE->getConstructor()); 421 return false; 422 })) 423 return false; 424 425 return true; 426 } 427 428 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 429 // No destructor -> no problem. 430 if (!DD) 431 return true; 432 433 if (!DD->isDefined() && DD->isTemplateInstantiation()) 434 InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 435 436 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 437 // empty at a point in the translation unit, if it is either a 438 // trivial constructor 439 if (DD->isTrivial()) 440 return true; 441 442 // ... or it satisfies all of the following conditions: 443 // The destructor function has been defined. 444 // and the function body is an empty compound statement. 445 if (!DD->hasTrivialBody()) 446 return false; 447 448 const CXXRecordDecl *ClassDecl = DD->getParent(); 449 450 // Its class has no virtual functions and no virtual base classes. 451 if (ClassDecl->isDynamicClass()) 452 return false; 453 454 // Only empty destructors are allowed. This will recursively check 455 // destructors for all base classes... 456 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 457 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 458 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 459 return true; 460 })) 461 return false; 462 463 // ... and member fields. 464 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 465 if (CXXRecordDecl *RD = Field->getType() 466 ->getBaseElementTypeUnsafe() 467 ->getAsCXXRecordDecl()) 468 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 469 return true; 470 })) 471 return false; 472 473 return true; 474 } 475 476 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 477 // treated as implicitly __host__ __device__, unless: 478 // * it is a variadic function (device-side variadic functions are not 479 // allowed), or 480 // * a __device__ function with this signature was already declared, in which 481 // case in which case we output an error, unless the __device__ decl is in a 482 // system header, in which case we leave the constexpr function unattributed. 483 // 484 // In addition, all function decls are treated as __host__ __device__ when 485 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a 486 // #pragma clang force_cuda_host_device_begin/end 487 // pair). 488 void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, 489 const LookupResult &Previous) { 490 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 491 492 if (ForceCUDAHostDeviceDepth > 0) { 493 if (!NewD->hasAttr<CUDAHostAttr>()) 494 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 495 if (!NewD->hasAttr<CUDADeviceAttr>()) 496 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 497 return; 498 } 499 500 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 501 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 502 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 503 return; 504 505 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 506 // attributes? 507 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 508 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 509 D = Using->getTargetDecl(); 510 FunctionDecl *OldD = D->getAsFunction(); 511 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 512 !OldD->hasAttr<CUDAHostAttr>() && 513 !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, 514 /* ConsiderCudaAttrs = */ false); 515 }; 516 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 517 if (It != Previous.end()) { 518 // We found a __device__ function with the same name and signature as NewD 519 // (ignoring CUDA attrs). This is an error unless that function is defined 520 // in a system header, in which case we simply return without making NewD 521 // host+device. 522 NamedDecl *Match = *It; 523 if (!getSourceManager().isInSystemHeader(Match->getLocation())) { 524 Diag(NewD->getLocation(), 525 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 526 << NewD->getName(); 527 Diag(Match->getLocation(), 528 diag::note_cuda_conflicting_device_function_declared_here); 529 } 530 return; 531 } 532 533 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 534 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 535 } 536 537 // In CUDA, there are some constructs which may appear in semantically-valid 538 // code, but trigger errors if we ever generate code for the function in which 539 // they appear. Essentially every construct you're not allowed to use on the 540 // device falls into this category, because you are allowed to use these 541 // constructs in a __host__ __device__ function, but only if that function is 542 // never codegen'ed on the device. 543 // 544 // To handle semantic checking for these constructs, we keep track of the set of 545 // functions we know will be emitted, either because we could tell a priori that 546 // they would be emitted, or because they were transitively called by a 547 // known-emitted function. 548 // 549 // We also keep a partial call graph of which not-known-emitted functions call 550 // which other not-known-emitted functions. 551 // 552 // When we see something which is illegal if the current function is emitted 553 // (usually by way of CUDADiagIfDeviceCode, CUDADiagIfHostCode, or 554 // CheckCUDACall), we first check if the current function is known-emitted. If 555 // so, we immediately output the diagnostic. 556 // 557 // Otherwise, we "defer" the diagnostic. It sits in Sema::CUDADeferredDiags 558 // until we discover that the function is known-emitted, at which point we take 559 // it out of this map and emit the diagnostic. 560 561 Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc, 562 unsigned DiagID, FunctionDecl *Fn, 563 Sema &S) 564 : S(S), Loc(Loc), DiagID(DiagID), Fn(Fn), 565 ShowCallStack(K == K_ImmediateWithCallStack || K == K_Deferred) { 566 switch (K) { 567 case K_Nop: 568 break; 569 case K_Immediate: 570 case K_ImmediateWithCallStack: 571 ImmediateDiag.emplace(S.Diag(Loc, DiagID)); 572 break; 573 case K_Deferred: 574 assert(Fn && "Must have a function to attach the deferred diag to."); 575 PartialDiag.emplace(S.PDiag(DiagID)); 576 break; 577 } 578 } 579 580 // Print notes showing how we can reach FD starting from an a priori 581 // known-callable function. 582 static void EmitCallStackNotes(Sema &S, FunctionDecl *FD) { 583 auto FnIt = S.CUDAKnownEmittedFns.find(FD); 584 while (FnIt != S.CUDAKnownEmittedFns.end()) { 585 DiagnosticBuilder Builder( 586 S.Diags.Report(FnIt->second.Loc, diag::note_called_by)); 587 Builder << FnIt->second.FD; 588 Builder.setForceEmit(); 589 590 FnIt = S.CUDAKnownEmittedFns.find(FnIt->second.FD); 591 } 592 } 593 594 Sema::CUDADiagBuilder::~CUDADiagBuilder() { 595 if (ImmediateDiag) { 596 // Emit our diagnostic and, if it was a warning or error, output a callstack 597 // if Fn isn't a priori known-emitted. 598 bool IsWarningOrError = S.getDiagnostics().getDiagnosticLevel( 599 DiagID, Loc) >= DiagnosticsEngine::Warning; 600 ImmediateDiag.reset(); // Emit the immediate diag. 601 if (IsWarningOrError && ShowCallStack) 602 EmitCallStackNotes(S, Fn); 603 } else if (PartialDiag) { 604 assert(ShowCallStack && "Must always show call stack for deferred diags."); 605 S.CUDADeferredDiags[Fn].push_back({Loc, std::move(*PartialDiag)}); 606 } 607 } 608 609 // Do we know that we will eventually codegen the given function? 610 static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) { 611 // Templates are emitted when they're instantiated. 612 if (FD->isDependentContext()) 613 return false; 614 615 // When compiling for device, host functions are never emitted. Similarly, 616 // when compiling for host, device and global functions are never emitted. 617 // (Technically, we do emit a host-side stub for global functions, but this 618 // doesn't count for our purposes here.) 619 Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD); 620 if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host) 621 return false; 622 if (!S.getLangOpts().CUDAIsDevice && 623 (T == Sema::CFT_Device || T == Sema::CFT_Global)) 624 return false; 625 626 // Check whether this function is externally visible -- if so, it's 627 // known-emitted. 628 // 629 // We have to check the GVA linkage of the function's *definition* -- if we 630 // only have a declaration, we don't know whether or not the function will be 631 // emitted, because (say) the definition could include "inline". 632 FunctionDecl *Def = FD->getDefinition(); 633 634 // We may currently be parsing the body of FD, in which case 635 // FD->getDefinition() will be null, but we still want to treat FD as though 636 // it's a definition. 637 if (!Def && FD->willHaveBody()) 638 Def = FD; 639 640 if (Def && 641 !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def))) 642 return true; 643 644 // Otherwise, the function is known-emitted if it's in our set of 645 // known-emitted functions. 646 return S.CUDAKnownEmittedFns.count(FD) > 0; 647 } 648 649 Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, 650 unsigned DiagID) { 651 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 652 CUDADiagBuilder::Kind DiagKind = [&] { 653 switch (CurrentCUDATarget()) { 654 case CFT_Global: 655 case CFT_Device: 656 return CUDADiagBuilder::K_Immediate; 657 case CFT_HostDevice: 658 // An HD function counts as host code if we're compiling for host, and 659 // device code if we're compiling for device. Defer any errors in device 660 // mode until the function is known-emitted. 661 if (getLangOpts().CUDAIsDevice) { 662 return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) 663 ? CUDADiagBuilder::K_ImmediateWithCallStack 664 : CUDADiagBuilder::K_Deferred; 665 } 666 return CUDADiagBuilder::K_Nop; 667 668 default: 669 return CUDADiagBuilder::K_Nop; 670 } 671 }(); 672 return CUDADiagBuilder(DiagKind, Loc, DiagID, 673 dyn_cast<FunctionDecl>(CurContext), *this); 674 } 675 676 Sema::CUDADiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, 677 unsigned DiagID) { 678 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 679 CUDADiagBuilder::Kind DiagKind = [&] { 680 switch (CurrentCUDATarget()) { 681 case CFT_Host: 682 return CUDADiagBuilder::K_Immediate; 683 case CFT_HostDevice: 684 // An HD function counts as host code if we're compiling for host, and 685 // device code if we're compiling for device. Defer any errors in device 686 // mode until the function is known-emitted. 687 if (getLangOpts().CUDAIsDevice) 688 return CUDADiagBuilder::K_Nop; 689 690 return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) 691 ? CUDADiagBuilder::K_ImmediateWithCallStack 692 : CUDADiagBuilder::K_Deferred; 693 default: 694 return CUDADiagBuilder::K_Nop; 695 } 696 }(); 697 return CUDADiagBuilder(DiagKind, Loc, DiagID, 698 dyn_cast<FunctionDecl>(CurContext), *this); 699 } 700 701 // Emit any deferred diagnostics for FD and erase them from the map in which 702 // they're stored. 703 static void EmitDeferredDiags(Sema &S, FunctionDecl *FD) { 704 auto It = S.CUDADeferredDiags.find(FD); 705 if (It == S.CUDADeferredDiags.end()) 706 return; 707 bool HasWarningOrError = false; 708 for (PartialDiagnosticAt &PDAt : It->second) { 709 const SourceLocation &Loc = PDAt.first; 710 const PartialDiagnostic &PD = PDAt.second; 711 HasWarningOrError |= S.getDiagnostics().getDiagnosticLevel( 712 PD.getDiagID(), Loc) >= DiagnosticsEngine::Warning; 713 DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID())); 714 Builder.setForceEmit(); 715 PD.Emit(Builder); 716 } 717 S.CUDADeferredDiags.erase(It); 718 719 // FIXME: Should this be called after every warning/error emitted in the loop 720 // above, instead of just once per function? That would be consistent with 721 // how we handle immediate errors, but it also seems like a bit much. 722 if (HasWarningOrError) 723 EmitCallStackNotes(S, FD); 724 } 725 726 // Indicate that this function (and thus everything it transtively calls) will 727 // be codegen'ed, and emit any deferred diagnostics on this function and its 728 // (transitive) callees. 729 static void MarkKnownEmitted(Sema &S, FunctionDecl *OrigCaller, 730 FunctionDecl *OrigCallee, SourceLocation OrigLoc) { 731 // Nothing to do if we already know that FD is emitted. 732 if (IsKnownEmitted(S, OrigCallee)) { 733 assert(!S.CUDACallGraph.count(OrigCallee)); 734 return; 735 } 736 737 // We've just discovered that OrigCallee is known-emitted. Walk our call 738 // graph to see what else we can now discover also must be emitted. 739 740 struct CallInfo { 741 FunctionDecl *Caller; 742 FunctionDecl *Callee; 743 SourceLocation Loc; 744 }; 745 llvm::SmallVector<CallInfo, 4> Worklist = {{OrigCaller, OrigCallee, OrigLoc}}; 746 llvm::SmallSet<CanonicalDeclPtr<FunctionDecl>, 4> Seen; 747 Seen.insert(OrigCallee); 748 while (!Worklist.empty()) { 749 CallInfo C = Worklist.pop_back_val(); 750 assert(!IsKnownEmitted(S, C.Callee) && 751 "Worklist should not contain known-emitted functions."); 752 S.CUDAKnownEmittedFns[C.Callee] = {C.Caller, C.Loc}; 753 EmitDeferredDiags(S, C.Callee); 754 755 // If this is a template instantiation, explore its callgraph as well: 756 // Non-dependent calls are part of the template's callgraph, while dependent 757 // calls are part of to the instantiation's call graph. 758 if (auto *Templ = C.Callee->getPrimaryTemplate()) { 759 FunctionDecl *TemplFD = Templ->getAsFunction(); 760 if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) { 761 Seen.insert(TemplFD); 762 Worklist.push_back( 763 {/* Caller = */ C.Caller, /* Callee = */ TemplFD, C.Loc}); 764 } 765 } 766 767 // Add all functions called by Callee to our worklist. 768 auto CGIt = S.CUDACallGraph.find(C.Callee); 769 if (CGIt == S.CUDACallGraph.end()) 770 continue; 771 772 for (std::pair<CanonicalDeclPtr<FunctionDecl>, SourceLocation> FDLoc : 773 CGIt->second) { 774 FunctionDecl *NewCallee = FDLoc.first; 775 SourceLocation CallLoc = FDLoc.second; 776 if (Seen.count(NewCallee) || IsKnownEmitted(S, NewCallee)) 777 continue; 778 Seen.insert(NewCallee); 779 Worklist.push_back( 780 {/* Caller = */ C.Callee, /* Callee = */ NewCallee, CallLoc}); 781 } 782 783 // C.Callee is now known-emitted, so we no longer need to maintain its list 784 // of callees in CUDACallGraph. 785 S.CUDACallGraph.erase(CGIt); 786 } 787 } 788 789 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { 790 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 791 assert(Callee && "Callee may not be null."); 792 // FIXME: Is bailing out early correct here? Should we instead assume that 793 // the caller is a global initializer? 794 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 795 if (!Caller) 796 return true; 797 798 // If the caller is known-emitted, mark the callee as known-emitted. 799 // Otherwise, mark the call in our call graph so we can traverse it later. 800 bool CallerKnownEmitted = IsKnownEmitted(*this, Caller); 801 if (CallerKnownEmitted) 802 MarkKnownEmitted(*this, Caller, Callee, Loc); 803 else { 804 // If we have 805 // host fn calls kernel fn calls host+device, 806 // the HD function does not get instantiated on the host. We model this by 807 // omitting at the call to the kernel from the callgraph. This ensures 808 // that, when compiling for host, only HD functions actually called from the 809 // host get marked as known-emitted. 810 if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) 811 CUDACallGraph[Caller].insert({Callee, Loc}); 812 } 813 814 CUDADiagBuilder::Kind DiagKind = [&] { 815 switch (IdentifyCUDAPreference(Caller, Callee)) { 816 case CFP_Never: 817 return CUDADiagBuilder::K_Immediate; 818 case CFP_WrongSide: 819 assert(Caller && "WrongSide calls require a non-null caller"); 820 // If we know the caller will be emitted, we know this wrong-side call 821 // will be emitted, so it's an immediate error. Otherwise, defer the 822 // error until we know the caller is emitted. 823 return CallerKnownEmitted ? CUDADiagBuilder::K_ImmediateWithCallStack 824 : CUDADiagBuilder::K_Deferred; 825 default: 826 return CUDADiagBuilder::K_Nop; 827 } 828 }(); 829 830 if (DiagKind == CUDADiagBuilder::K_Nop) 831 return true; 832 833 // Avoid emitting this error twice for the same location. Using a hashtable 834 // like this is unfortunate, but because we must continue parsing as normal 835 // after encountering a deferred error, it's otherwise very tricky for us to 836 // ensure that we only emit this deferred error once. 837 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 838 return true; 839 840 CUDADiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) 841 << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); 842 CUDADiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, 843 Caller, *this) 844 << Callee; 845 return DiagKind != CUDADiagBuilder::K_Immediate && 846 DiagKind != CUDADiagBuilder::K_ImmediateWithCallStack; 847 } 848 849 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { 850 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 851 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 852 return; 853 FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext); 854 if (!CurFn) 855 return; 856 CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); 857 if (Target == CFT_Global || Target == CFT_Device) { 858 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 859 } else if (Target == CFT_HostDevice) { 860 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 861 Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); 862 } 863 } 864 865 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, 866 const LookupResult &Previous) { 867 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 868 CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); 869 for (NamedDecl *OldND : Previous) { 870 FunctionDecl *OldFD = OldND->getAsFunction(); 871 if (!OldFD) 872 continue; 873 874 CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); 875 // Don't allow HD and global functions to overload other functions with the 876 // same signature. We allow overloading based on CUDA attributes so that 877 // functions can have different implementations on the host and device, but 878 // HD/global functions "exist" in some sense on both the host and device, so 879 // should have the same implementation on both sides. 880 if (NewTarget != OldTarget && 881 ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || 882 (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && 883 !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 884 /* ConsiderCudaAttrs = */ false)) { 885 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 886 << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 887 Diag(OldFD->getLocation(), diag::note_previous_declaration); 888 NewFD->setInvalidDecl(); 889 break; 890 } 891 } 892 } 893 894 template <typename AttrTy> 895 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 896 const FunctionDecl &TemplateFD) { 897 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 898 AttrTy *Clone = Attribute->clone(S.Context); 899 Clone->setInherited(true); 900 FD->addAttr(Clone); 901 } 902 } 903 904 void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, 905 const FunctionTemplateDecl &TD) { 906 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 907 copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); 908 copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); 909 copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); 910 } 911