1 //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "CommonArgs.h" 11 #include "InputInfo.h" 12 #include "clang/Basic/TargetID.h" 13 #include "clang/Driver/Compilation.h" 14 #include "clang/Driver/DriverDiagnostic.h" 15 #include "clang/Driver/Options.h" 16 #include "llvm/Option/ArgList.h" 17 #include "llvm/Support/Error.h" 18 #include "llvm/Support/FileUtilities.h" 19 #include "llvm/Support/LineIterator.h" 20 #include "llvm/Support/Path.h" 21 #include "llvm/Support/VirtualFileSystem.h" 22 #include <system_error> 23 24 #define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch" 25 26 using namespace clang::driver; 27 using namespace clang::driver::tools; 28 using namespace clang::driver::toolchains; 29 using namespace clang; 30 using namespace llvm::opt; 31 32 // Look for sub-directory starts with PackageName under ROCm candidate path. 33 // If there is one and only one matching sub-directory found, append the 34 // sub-directory to Path. If there is no matching sub-directory or there are 35 // more than one matching sub-directories, diagnose them. Returns the full 36 // path of the package if there is only one matching sub-directory, otherwise 37 // returns an empty string. 38 llvm::SmallString<0> 39 RocmInstallationDetector::findSPACKPackage(const Candidate &Cand, 40 StringRef PackageName) { 41 if (!Cand.isSPACK()) 42 return {}; 43 std::error_code EC; 44 std::string Prefix = Twine(PackageName + "-" + Cand.SPACKReleaseStr).str(); 45 llvm::SmallVector<llvm::SmallString<0>> SubDirs; 46 for (llvm::vfs::directory_iterator File = D.getVFS().dir_begin(Cand.Path, EC), 47 FileEnd; 48 File != FileEnd && !EC; File.increment(EC)) { 49 llvm::StringRef FileName = llvm::sys::path::filename(File->path()); 50 if (FileName.startswith(Prefix)) { 51 SubDirs.push_back(FileName); 52 if (SubDirs.size() > 1) 53 break; 54 } 55 } 56 if (SubDirs.size() == 1) { 57 auto PackagePath = Cand.Path; 58 llvm::sys::path::append(PackagePath, SubDirs[0]); 59 return PackagePath; 60 } 61 if (SubDirs.size() == 0) { 62 unsigned DiagID = D.getDiags().getCustomDiagID( 63 DiagnosticsEngine::Error, 64 "Expecting SPACK package %0 at %1 but not found"); 65 D.Diag(DiagID) << Prefix << Cand.Path; 66 return {}; 67 } 68 69 assert(SubDirs.size() > 1); 70 unsigned DiagID = D.getDiags().getCustomDiagID( 71 DiagnosticsEngine::Error, 72 "Expecting one SPACK package %0 at %1 but found more"); 73 D.Diag(DiagID) << Prefix << Cand.Path; 74 return {}; 75 } 76 77 void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) { 78 assert(!Path.empty()); 79 80 const StringRef Suffix(".bc"); 81 const StringRef Suffix2(".amdgcn.bc"); 82 83 std::error_code EC; 84 for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Path, EC), LE; 85 !EC && LI != LE; LI = LI.increment(EC)) { 86 StringRef FilePath = LI->path(); 87 StringRef FileName = llvm::sys::path::filename(FilePath); 88 if (!FileName.endswith(Suffix)) 89 continue; 90 91 StringRef BaseName; 92 if (FileName.endswith(Suffix2)) 93 BaseName = FileName.drop_back(Suffix2.size()); 94 else if (FileName.endswith(Suffix)) 95 BaseName = FileName.drop_back(Suffix.size()); 96 97 if (BaseName == "ocml") { 98 OCML = FilePath; 99 } else if (BaseName == "ockl") { 100 OCKL = FilePath; 101 } else if (BaseName == "opencl") { 102 OpenCL = FilePath; 103 } else if (BaseName == "hip") { 104 HIP = FilePath; 105 } else if (BaseName == "asanrtl") { 106 AsanRTL = FilePath; 107 } else if (BaseName == "oclc_finite_only_off") { 108 FiniteOnly.Off = FilePath; 109 } else if (BaseName == "oclc_finite_only_on") { 110 FiniteOnly.On = FilePath; 111 } else if (BaseName == "oclc_daz_opt_on") { 112 DenormalsAreZero.On = FilePath; 113 } else if (BaseName == "oclc_daz_opt_off") { 114 DenormalsAreZero.Off = FilePath; 115 } else if (BaseName == "oclc_correctly_rounded_sqrt_on") { 116 CorrectlyRoundedSqrt.On = FilePath; 117 } else if (BaseName == "oclc_correctly_rounded_sqrt_off") { 118 CorrectlyRoundedSqrt.Off = FilePath; 119 } else if (BaseName == "oclc_unsafe_math_on") { 120 UnsafeMath.On = FilePath; 121 } else if (BaseName == "oclc_unsafe_math_off") { 122 UnsafeMath.Off = FilePath; 123 } else if (BaseName == "oclc_wavefrontsize64_on") { 124 WavefrontSize64.On = FilePath; 125 } else if (BaseName == "oclc_wavefrontsize64_off") { 126 WavefrontSize64.Off = FilePath; 127 } else { 128 // Process all bitcode filenames that look like 129 // ocl_isa_version_XXX.amdgcn.bc 130 const StringRef DeviceLibPrefix = "oclc_isa_version_"; 131 if (!BaseName.startswith(DeviceLibPrefix)) 132 continue; 133 134 StringRef IsaVersionNumber = 135 BaseName.drop_front(DeviceLibPrefix.size()); 136 137 llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber; 138 SmallString<8> Tmp; 139 LibDeviceMap.insert( 140 std::make_pair(GfxName.toStringRef(Tmp), FilePath.str())); 141 } 142 } 143 } 144 145 // Parse and extract version numbers from `.hipVersion`. Return `true` if 146 // the parsing fails. 147 bool RocmInstallationDetector::parseHIPVersionFile(llvm::StringRef V) { 148 SmallVector<StringRef, 4> VersionParts; 149 V.split(VersionParts, '\n'); 150 unsigned Major = ~0U; 151 unsigned Minor = ~0U; 152 for (auto Part : VersionParts) { 153 auto Splits = Part.rtrim().split('='); 154 if (Splits.first == "HIP_VERSION_MAJOR") { 155 if (Splits.second.getAsInteger(0, Major)) 156 return true; 157 } else if (Splits.first == "HIP_VERSION_MINOR") { 158 if (Splits.second.getAsInteger(0, Minor)) 159 return true; 160 } else if (Splits.first == "HIP_VERSION_PATCH") 161 VersionPatch = Splits.second.str(); 162 } 163 if (Major == ~0U || Minor == ~0U) 164 return true; 165 VersionMajorMinor = llvm::VersionTuple(Major, Minor); 166 DetectedVersion = 167 (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); 168 return false; 169 } 170 171 /// \returns a list of candidate directories for ROCm installation, which is 172 /// cached and populated only once. 173 const SmallVectorImpl<RocmInstallationDetector::Candidate> & 174 RocmInstallationDetector::getInstallationPathCandidates() { 175 176 // Return the cached candidate list if it has already been populated. 177 if (!ROCmSearchDirs.empty()) 178 return ROCmSearchDirs; 179 180 auto DoPrintROCmSearchDirs = [&]() { 181 if (PrintROCmSearchDirs) 182 for (auto Cand : ROCmSearchDirs) { 183 llvm::errs() << "ROCm installation search path"; 184 if (Cand.isSPACK()) 185 llvm::errs() << " (Spack " << Cand.SPACKReleaseStr << ")"; 186 llvm::errs() << ": " << Cand.Path << '\n'; 187 } 188 }; 189 190 // For candidate specified by --rocm-path we do not do strict check, i.e., 191 // checking existence of HIP version file and device library files. 192 if (!RocmPathArg.empty()) { 193 ROCmSearchDirs.emplace_back(RocmPathArg.str()); 194 DoPrintROCmSearchDirs(); 195 return ROCmSearchDirs; 196 } else if (const char *RocmPathEnv = ::getenv("ROCM_PATH")) { 197 if (!StringRef(RocmPathEnv).empty()) { 198 ROCmSearchDirs.emplace_back(RocmPathEnv); 199 DoPrintROCmSearchDirs(); 200 return ROCmSearchDirs; 201 } 202 } 203 204 // Try to find relative to the compiler binary. 205 const char *InstallDir = D.getInstalledDir(); 206 207 // Check both a normal Unix prefix position of the clang binary, as well as 208 // the Windows-esque layout the ROCm packages use with the host architecture 209 // subdirectory of bin. 210 auto DeduceROCmPath = [](StringRef ClangPath) { 211 // Strip off directory (usually bin) 212 StringRef ParentDir = llvm::sys::path::parent_path(ClangPath); 213 StringRef ParentName = llvm::sys::path::filename(ParentDir); 214 215 // Some builds use bin/{host arch}, so go up again. 216 if (ParentName == "bin") { 217 ParentDir = llvm::sys::path::parent_path(ParentDir); 218 ParentName = llvm::sys::path::filename(ParentDir); 219 } 220 221 // Detect ROCm packages built with SPACK. 222 // clang is installed at 223 // <rocm_root>/llvm-amdgpu-<rocm_release_string>-<hash>/bin directory. 224 // We only consider the parent directory of llvm-amdgpu package as ROCm 225 // installation candidate for SPACK. 226 if (ParentName.startswith("llvm-amdgpu-")) { 227 auto SPACKPostfix = 228 ParentName.drop_front(strlen("llvm-amdgpu-")).split('-'); 229 auto SPACKReleaseStr = SPACKPostfix.first; 230 if (!SPACKReleaseStr.empty()) { 231 ParentDir = llvm::sys::path::parent_path(ParentDir); 232 return Candidate(ParentDir.str(), /*StrictChecking=*/true, 233 SPACKReleaseStr); 234 } 235 } 236 237 // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin 238 // Some versions of the aomp package install to /opt/rocm/aomp/bin 239 if (ParentName == "llvm" || ParentName.startswith("aomp")) 240 ParentDir = llvm::sys::path::parent_path(ParentDir); 241 242 return Candidate(ParentDir.str(), /*StrictChecking=*/true); 243 }; 244 245 // Deduce ROCm path by the path used to invoke clang. Do not resolve symbolic 246 // link of clang itself. 247 ROCmSearchDirs.emplace_back(DeduceROCmPath(InstallDir)); 248 249 // Deduce ROCm path by the real path of the invoked clang, resolving symbolic 250 // link of clang itself. 251 llvm::SmallString<256> RealClangPath; 252 llvm::sys::fs::real_path(D.getClangProgramPath(), RealClangPath); 253 auto ParentPath = llvm::sys::path::parent_path(RealClangPath); 254 if (ParentPath != InstallDir) 255 ROCmSearchDirs.emplace_back(DeduceROCmPath(ParentPath)); 256 257 // Device library may be installed in clang resource directory. 258 ROCmSearchDirs.emplace_back(D.ResourceDir, 259 /*StrictChecking=*/true); 260 261 ROCmSearchDirs.emplace_back(D.SysRoot + "/opt/rocm", 262 /*StrictChecking=*/true); 263 264 // Find the latest /opt/rocm-{release} directory. 265 std::error_code EC; 266 std::string LatestROCm; 267 llvm::VersionTuple LatestVer; 268 // Get ROCm version from ROCm directory name. 269 auto GetROCmVersion = [](StringRef DirName) { 270 llvm::VersionTuple V; 271 std::string VerStr = DirName.drop_front(strlen("rocm-")).str(); 272 // The ROCm directory name follows the format of 273 // rocm-{major}.{minor}.{subMinor}[-{build}] 274 std::replace(VerStr.begin(), VerStr.end(), '-', '.'); 275 V.tryParse(VerStr); 276 return V; 277 }; 278 for (llvm::vfs::directory_iterator 279 File = D.getVFS().dir_begin(D.SysRoot + "/opt", EC), 280 FileEnd; 281 File != FileEnd && !EC; File.increment(EC)) { 282 llvm::StringRef FileName = llvm::sys::path::filename(File->path()); 283 if (!FileName.startswith("rocm-")) 284 continue; 285 if (LatestROCm.empty()) { 286 LatestROCm = FileName.str(); 287 LatestVer = GetROCmVersion(LatestROCm); 288 continue; 289 } 290 auto Ver = GetROCmVersion(FileName); 291 if (LatestVer < Ver) { 292 LatestROCm = FileName.str(); 293 LatestVer = Ver; 294 } 295 } 296 if (!LatestROCm.empty()) 297 ROCmSearchDirs.emplace_back(D.SysRoot + "/opt/" + LatestROCm, 298 /*StrictChecking=*/true); 299 300 DoPrintROCmSearchDirs(); 301 return ROCmSearchDirs; 302 } 303 304 RocmInstallationDetector::RocmInstallationDetector( 305 const Driver &D, const llvm::Triple &HostTriple, 306 const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib) 307 : D(D) { 308 RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ); 309 PrintROCmSearchDirs = 310 Args.hasArg(clang::driver::options::OPT_print_rocm_search_dirs); 311 RocmDeviceLibPathArg = 312 Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ); 313 HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ); 314 if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) { 315 HIPVersionArg = A->getValue(); 316 unsigned Major = 0; 317 unsigned Minor = 0; 318 SmallVector<StringRef, 3> Parts; 319 HIPVersionArg.split(Parts, '.'); 320 if (Parts.size()) 321 Parts[0].getAsInteger(0, Major); 322 if (Parts.size() > 1) 323 Parts[1].getAsInteger(0, Minor); 324 if (Parts.size() > 2) 325 VersionPatch = Parts[2].str(); 326 if (VersionPatch.empty()) 327 VersionPatch = "0"; 328 if (Major == 0 || Minor == 0) 329 D.Diag(diag::err_drv_invalid_value) 330 << A->getAsString(Args) << HIPVersionArg; 331 332 VersionMajorMinor = llvm::VersionTuple(Major, Minor); 333 DetectedVersion = 334 (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); 335 } else { 336 VersionPatch = DefaultVersionPatch; 337 VersionMajorMinor = 338 llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor); 339 DetectedVersion = (Twine(DefaultVersionMajor) + "." + 340 Twine(DefaultVersionMinor) + "." + VersionPatch) 341 .str(); 342 } 343 344 if (DetectHIPRuntime) 345 detectHIPRuntime(); 346 if (DetectDeviceLib) 347 detectDeviceLibrary(); 348 } 349 350 void RocmInstallationDetector::detectDeviceLibrary() { 351 assert(LibDevicePath.empty()); 352 353 if (!RocmDeviceLibPathArg.empty()) 354 LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1]; 355 else if (const char *LibPathEnv = ::getenv("HIP_DEVICE_LIB_PATH")) 356 LibDevicePath = LibPathEnv; 357 358 auto &FS = D.getVFS(); 359 if (!LibDevicePath.empty()) { 360 // Maintain compatability with HIP flag/envvar pointing directly at the 361 // bitcode library directory. This points directly at the library path instead 362 // of the rocm root installation. 363 if (!FS.exists(LibDevicePath)) 364 return; 365 366 scanLibDevicePath(LibDevicePath); 367 HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty(); 368 return; 369 } 370 371 // The install path situation in old versions of ROCm is a real mess, and 372 // use a different install layout. Multiple copies of the device libraries 373 // exist for each frontend project, and differ depending on which build 374 // system produced the packages. Standalone OpenCL builds also have a 375 // different directory structure from the ROCm OpenCL package. 376 auto &ROCmDirs = getInstallationPathCandidates(); 377 for (const auto &Candidate : ROCmDirs) { 378 auto CandidatePath = Candidate.Path; 379 380 // Check device library exists at the given path. 381 auto CheckDeviceLib = [&](StringRef Path) { 382 bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking); 383 if (CheckLibDevice && !FS.exists(Path)) 384 return false; 385 386 scanLibDevicePath(Path); 387 388 if (!NoBuiltinLibs) { 389 // Check that the required non-target libraries are all available. 390 if (!allGenericLibsValid()) 391 return false; 392 393 // Check that we have found at least one libdevice that we can link in 394 // if -nobuiltinlib hasn't been specified. 395 if (LibDeviceMap.empty()) 396 return false; 397 } 398 return true; 399 }; 400 401 // The possible structures are: 402 // - ${ROCM_ROOT}/amdgcn/bitcode/* 403 // - ${ROCM_ROOT}/lib/* 404 // - ${ROCM_ROOT}/lib/bitcode/* 405 // so try to detect these layouts. 406 static constexpr std::array<const char *, 2> SubDirsList[] = { 407 {"amdgcn", "bitcode"}, 408 {"lib", ""}, 409 {"lib", "bitcode"}, 410 }; 411 412 // Make a path by appending sub-directories to InstallPath. 413 auto MakePath = [&](const llvm::ArrayRef<const char *> &SubDirs) { 414 // Device library built by SPACK is installed to 415 // <rocm_root>/rocm-device-libs-<rocm_release_string>-<hash> directory. 416 auto SPACKPath = findSPACKPackage(Candidate, "rocm-device-libs"); 417 auto Path = SPACKPath.empty() ? CandidatePath : SPACKPath; 418 for (auto SubDir : SubDirs) 419 llvm::sys::path::append(Path, SubDir); 420 return Path; 421 }; 422 423 for (auto SubDirs : SubDirsList) { 424 LibDevicePath = MakePath(SubDirs); 425 HasDeviceLibrary = CheckDeviceLib(LibDevicePath); 426 if (HasDeviceLibrary) 427 return; 428 } 429 } 430 } 431 432 void RocmInstallationDetector::detectHIPRuntime() { 433 SmallVector<Candidate, 4> HIPSearchDirs; 434 if (!HIPPathArg.empty()) 435 HIPSearchDirs.emplace_back(HIPPathArg.str(), /*StrictChecking=*/true); 436 else 437 HIPSearchDirs.append(getInstallationPathCandidates()); 438 auto &FS = D.getVFS(); 439 440 for (const auto &Candidate : HIPSearchDirs) { 441 InstallPath = Candidate.Path; 442 if (InstallPath.empty() || !FS.exists(InstallPath)) 443 continue; 444 // HIP runtime built by SPACK is installed to 445 // <rocm_root>/hip-<rocm_release_string>-<hash> directory. 446 auto SPACKPath = findSPACKPackage(Candidate, "hip"); 447 InstallPath = SPACKPath.empty() ? InstallPath : SPACKPath; 448 449 BinPath = InstallPath; 450 llvm::sys::path::append(BinPath, "bin"); 451 IncludePath = InstallPath; 452 llvm::sys::path::append(IncludePath, "include"); 453 LibPath = InstallPath; 454 llvm::sys::path::append(LibPath, "lib"); 455 456 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile = 457 FS.getBufferForFile(BinPath + "/.hipVersion"); 458 if (!VersionFile && Candidate.StrictChecking) 459 continue; 460 461 if (HIPVersionArg.empty() && VersionFile) 462 if (parseHIPVersionFile((*VersionFile)->getBuffer())) 463 continue; 464 465 HasHIPRuntime = true; 466 return; 467 } 468 HasHIPRuntime = false; 469 } 470 471 void RocmInstallationDetector::print(raw_ostream &OS) const { 472 if (hasHIPRuntime()) 473 OS << "Found HIP installation: " << InstallPath << ", version " 474 << DetectedVersion << '\n'; 475 } 476 477 void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs, 478 ArgStringList &CC1Args) const { 479 bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5); 480 481 if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { 482 // HIP header includes standard library wrapper headers under clang 483 // cuda_wrappers directory. Since these wrapper headers include_next 484 // standard C++ headers, whereas libc++ headers include_next other clang 485 // headers. The include paths have to follow this order: 486 // - wrapper include path 487 // - standard C++ include path 488 // - other clang include path 489 // Since standard C++ and other clang include paths are added in other 490 // places after this function, here we only need to make sure wrapper 491 // include path is added. 492 // 493 // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs 494 // a workaround. 495 SmallString<128> P(D.ResourceDir); 496 if (UsesRuntimeWrapper) 497 llvm::sys::path::append(P, "include", "cuda_wrappers"); 498 CC1Args.push_back("-internal-isystem"); 499 CC1Args.push_back(DriverArgs.MakeArgString(P)); 500 } 501 502 if (DriverArgs.hasArg(options::OPT_nogpuinc)) 503 return; 504 505 if (!hasHIPRuntime()) { 506 D.Diag(diag::err_drv_no_hip_runtime); 507 return; 508 } 509 510 CC1Args.push_back("-internal-isystem"); 511 CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath())); 512 if (UsesRuntimeWrapper) 513 CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"}); 514 } 515 516 void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, 517 const InputInfo &Output, 518 const InputInfoList &Inputs, 519 const ArgList &Args, 520 const char *LinkingOutput) const { 521 522 std::string Linker = getToolChain().GetProgramPath(getShortName()); 523 ArgStringList CmdArgs; 524 addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs); 525 AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); 526 CmdArgs.push_back("-shared"); 527 CmdArgs.push_back("-o"); 528 CmdArgs.push_back(Output.getFilename()); 529 C.addCommand(std::make_unique<Command>( 530 JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker), 531 CmdArgs, Inputs, Output)); 532 } 533 534 void amdgpu::getAMDGPUTargetFeatures(const Driver &D, 535 const llvm::Triple &Triple, 536 const llvm::opt::ArgList &Args, 537 std::vector<StringRef> &Features) { 538 // Add target ID features to -target-feature options. No diagnostics should 539 // be emitted here since invalid target ID is diagnosed at other places. 540 StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ); 541 if (!TargetID.empty()) { 542 llvm::StringMap<bool> FeatureMap; 543 auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap); 544 if (OptionalGpuArch) { 545 StringRef GpuArch = OptionalGpuArch.getValue(); 546 // Iterate through all possible target ID features for the given GPU. 547 // If it is mapped to true, add +feature. 548 // If it is mapped to false, add -feature. 549 // If it is not in the map (default), do not add it 550 for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) { 551 auto Pos = FeatureMap.find(Feature); 552 if (Pos == FeatureMap.end()) 553 continue; 554 Features.push_back(Args.MakeArgStringRef( 555 (Twine(Pos->second ? "+" : "-") + Feature).str())); 556 } 557 } 558 } 559 560 if (Args.hasFlag(options::OPT_mwavefrontsize64, 561 options::OPT_mno_wavefrontsize64, false)) 562 Features.push_back("+wavefrontsize64"); 563 564 handleTargetFeaturesGroup( 565 Args, Features, options::OPT_m_amdgpu_Features_Group); 566 } 567 568 /// AMDGPU Toolchain 569 AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple, 570 const ArgList &Args) 571 : Generic_ELF(D, Triple, Args), 572 OptionsDefault( 573 {{options::OPT_O, "3"}, {options::OPT_cl_std_EQ, "CL1.2"}}) { 574 // Check code object version options. Emit warnings for legacy options 575 // and errors for the last invalid code object version options. 576 // It is done here to avoid repeated warning or error messages for 577 // each tool invocation. 578 checkAMDGPUCodeObjectVersion(D, Args); 579 } 580 581 Tool *AMDGPUToolChain::buildLinker() const { 582 return new tools::amdgpu::Linker(*this); 583 } 584 585 DerivedArgList * 586 AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, 587 Action::OffloadKind DeviceOffloadKind) const { 588 589 DerivedArgList *DAL = 590 Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind); 591 592 const OptTable &Opts = getDriver().getOpts(); 593 594 if (!DAL) 595 DAL = new DerivedArgList(Args.getBaseArgs()); 596 597 for (Arg *A : Args) { 598 if (!shouldSkipArgument(A)) 599 DAL->append(A); 600 } 601 602 checkTargetID(*DAL); 603 604 if (!Args.getLastArgValue(options::OPT_x).equals("cl")) 605 return DAL; 606 607 // Phase 1 (.cl -> .bc) 608 if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) { 609 DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit() 610 ? options::OPT_m64 611 : options::OPT_m32)); 612 613 // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately 614 // as they defined that way in Options.td 615 if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4, 616 options::OPT_Ofast)) 617 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O), 618 getOptionDefault(options::OPT_O)); 619 } 620 621 return DAL; 622 } 623 624 bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget( 625 llvm::AMDGPU::GPUKind Kind) { 626 627 // Assume nothing without a specific target. 628 if (Kind == llvm::AMDGPU::GK_NONE) 629 return false; 630 631 const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); 632 633 // Default to enabling f32 denormals by default on subtargets where fma is 634 // fast with denormals 635 const bool BothDenormAndFMAFast = 636 (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) && 637 (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); 638 return !BothDenormAndFMAFast; 639 } 640 641 llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( 642 const llvm::opt::ArgList &DriverArgs, const JobAction &JA, 643 const llvm::fltSemantics *FPType) const { 644 // Denormals should always be enabled for f16 and f64. 645 if (!FPType || FPType != &llvm::APFloat::IEEEsingle()) 646 return llvm::DenormalMode::getIEEE(); 647 648 if (JA.getOffloadingDeviceKind() == Action::OFK_HIP || 649 JA.getOffloadingDeviceKind() == Action::OFK_Cuda) { 650 auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch()); 651 auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch); 652 if (FPType && FPType == &llvm::APFloat::IEEEsingle() && 653 DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, 654 options::OPT_fno_gpu_flush_denormals_to_zero, 655 getDefaultDenormsAreZeroForTarget(Kind))) 656 return llvm::DenormalMode::getPreserveSign(); 657 658 return llvm::DenormalMode::getIEEE(); 659 } 660 661 const StringRef GpuArch = getGPUArch(DriverArgs); 662 auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); 663 664 // TODO: There are way too many flags that change this. Do we need to check 665 // them all? 666 bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || 667 getDefaultDenormsAreZeroForTarget(Kind); 668 669 // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are 670 // also implicit treated as zero (DAZ). 671 return DAZ ? llvm::DenormalMode::getPreserveSign() : 672 llvm::DenormalMode::getIEEE(); 673 } 674 675 bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs, 676 llvm::AMDGPU::GPUKind Kind) { 677 const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); 678 bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); 679 680 return !HasWave32 || DriverArgs.hasFlag( 681 options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false); 682 } 683 684 685 /// ROCM Toolchain 686 ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple, 687 const ArgList &Args) 688 : AMDGPUToolChain(D, Triple, Args) { 689 RocmInstallation.detectDeviceLibrary(); 690 } 691 692 void AMDGPUToolChain::addClangTargetOptions( 693 const llvm::opt::ArgList &DriverArgs, 694 llvm::opt::ArgStringList &CC1Args, 695 Action::OffloadKind DeviceOffloadingKind) const { 696 // Default to "hidden" visibility, as object level linking will not be 697 // supported for the foreseeable future. 698 if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, 699 options::OPT_fvisibility_ms_compat)) { 700 CC1Args.push_back("-fvisibility"); 701 CC1Args.push_back("hidden"); 702 CC1Args.push_back("-fapply-global-visibility-to-externs"); 703 } 704 } 705 706 StringRef 707 AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const { 708 return getProcessorFromTargetID( 709 getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ)); 710 } 711 712 void AMDGPUToolChain::checkTargetID( 713 const llvm::opt::ArgList &DriverArgs) const { 714 StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); 715 if (TargetID.empty()) 716 return; 717 718 llvm::StringMap<bool> FeatureMap; 719 auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap); 720 if (!OptionalGpuArch) { 721 getDriver().Diag(clang::diag::err_drv_bad_target_id) << TargetID; 722 } 723 } 724 725 llvm::Error 726 AMDGPUToolChain::detectSystemGPUs(const ArgList &Args, 727 SmallVector<std::string, 1> &GPUArchs) const { 728 std::string Program; 729 if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ)) 730 Program = A->getValue(); 731 else 732 Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME); 733 llvm::SmallString<64> OutputFile; 734 llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */, 735 OutputFile); 736 llvm::FileRemover OutputRemover(OutputFile.c_str()); 737 llvm::Optional<llvm::StringRef> Redirects[] = { 738 {""}, 739 StringRef(OutputFile), 740 {""}, 741 }; 742 743 std::string ErrorMessage; 744 if (int Result = llvm::sys::ExecuteAndWait( 745 Program.c_str(), {}, {}, Redirects, /* SecondsToWait */ 0, 746 /*MemoryLimit*/ 0, &ErrorMessage)) { 747 if (Result > 0) { 748 ErrorMessage = "Exited with error code " + std::to_string(Result); 749 } else if (Result == -1) { 750 ErrorMessage = "Execute failed: " + ErrorMessage; 751 } else { 752 ErrorMessage = "Crashed: " + ErrorMessage; 753 } 754 755 return llvm::createStringError(std::error_code(), 756 Program + ": " + ErrorMessage); 757 } 758 759 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> OutputBuf = 760 llvm::MemoryBuffer::getFile(OutputFile.c_str()); 761 if (!OutputBuf) { 762 return llvm::createStringError(OutputBuf.getError(), 763 "Failed to read stdout of " + Program + 764 ": " + OutputBuf.getError().message()); 765 } 766 767 for (llvm::line_iterator LineIt(**OutputBuf); !LineIt.is_at_end(); ++LineIt) { 768 GPUArchs.push_back(LineIt->str()); 769 } 770 return llvm::Error::success(); 771 } 772 773 llvm::Error AMDGPUToolChain::getSystemGPUArch(const ArgList &Args, 774 std::string &GPUArch) const { 775 // detect the AMDGPU installed in system 776 SmallVector<std::string, 1> GPUArchs; 777 auto Err = detectSystemGPUs(Args, GPUArchs); 778 if (Err) { 779 return Err; 780 } 781 if (GPUArchs.empty()) { 782 return llvm::createStringError(std::error_code(), 783 "No AMD GPU detected in the system"); 784 } 785 GPUArch = GPUArchs[0]; 786 if (GPUArchs.size() > 1) { 787 bool AllSame = std::all_of( 788 GPUArchs.begin(), GPUArchs.end(), 789 [&](const StringRef &GPUArch) { return GPUArch == GPUArchs.front(); }); 790 if (!AllSame) 791 return llvm::createStringError( 792 std::error_code(), "Multiple AMD GPUs found with different archs"); 793 } 794 return llvm::Error::success(); 795 } 796 797 void ROCMToolChain::addClangTargetOptions( 798 const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, 799 Action::OffloadKind DeviceOffloadingKind) const { 800 AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args, 801 DeviceOffloadingKind); 802 803 // For the OpenCL case where there is no offload target, accept -nostdlib to 804 // disable bitcode linking. 805 if (DeviceOffloadingKind == Action::OFK_None && 806 DriverArgs.hasArg(options::OPT_nostdlib)) 807 return; 808 809 if (DriverArgs.hasArg(options::OPT_nogpulib)) 810 return; 811 812 if (!RocmInstallation.hasDeviceLibrary()) { 813 getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0; 814 return; 815 } 816 817 // Get the device name and canonicalize it 818 const StringRef GpuArch = getGPUArch(DriverArgs); 819 auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); 820 const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); 821 std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch); 822 if (LibDeviceFile.empty()) { 823 getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch; 824 return; 825 } 826 827 bool Wave64 = isWave64(DriverArgs, Kind); 828 829 // TODO: There are way too many flags that change this. Do we need to check 830 // them all? 831 bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || 832 getDefaultDenormsAreZeroForTarget(Kind); 833 bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only); 834 835 bool UnsafeMathOpt = 836 DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations); 837 bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math); 838 bool CorrectSqrt = 839 DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt); 840 841 // Add the OpenCL specific bitcode library. 842 llvm::SmallVector<std::string, 12> BCLibs; 843 BCLibs.push_back(RocmInstallation.getOpenCLPath().str()); 844 845 // Add the generic set of libraries. 846 BCLibs.append(RocmInstallation.getCommonBitcodeLibs( 847 DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, 848 FastRelaxedMath, CorrectSqrt)); 849 850 llvm::for_each(BCLibs, [&](StringRef BCFile) { 851 CC1Args.push_back("-mlink-builtin-bitcode"); 852 CC1Args.push_back(DriverArgs.MakeArgString(BCFile)); 853 }); 854 } 855 856 llvm::SmallVector<std::string, 12> 857 RocmInstallationDetector::getCommonBitcodeLibs( 858 const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64, 859 bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath, 860 bool CorrectSqrt) const { 861 862 llvm::SmallVector<std::string, 12> BCLibs; 863 864 auto AddBCLib = [&](StringRef BCFile) { BCLibs.push_back(BCFile.str()); }; 865 866 AddBCLib(getOCMLPath()); 867 AddBCLib(getOCKLPath()); 868 AddBCLib(getDenormalsAreZeroPath(DAZ)); 869 AddBCLib(getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)); 870 AddBCLib(getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)); 871 AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt)); 872 AddBCLib(getWavefrontSize64Path(Wave64)); 873 AddBCLib(LibDeviceFile); 874 875 return BCLibs; 876 } 877 878 bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const { 879 Option O = A->getOption(); 880 if (O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie)) 881 return true; 882 return false; 883 } 884