1 //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "CommonArgs.h"
11 #include "InputInfo.h"
12 #include "clang/Basic/TargetID.h"
13 #include "clang/Driver/Compilation.h"
14 #include "clang/Driver/DriverDiagnostic.h"
15 #include "llvm/Option/ArgList.h"
16 #include "llvm/Support/Path.h"
17 #include "llvm/Support/VirtualFileSystem.h"
18 
19 using namespace clang::driver;
20 using namespace clang::driver::tools;
21 using namespace clang::driver::toolchains;
22 using namespace clang;
23 using namespace llvm::opt;
24 
25 void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) {
26   assert(!Path.empty());
27 
28   const StringRef Suffix(".bc");
29   const StringRef Suffix2(".amdgcn.bc");
30 
31   std::error_code EC;
32   for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Path, EC), LE;
33        !EC && LI != LE; LI = LI.increment(EC)) {
34     StringRef FilePath = LI->path();
35     StringRef FileName = llvm::sys::path::filename(FilePath);
36     if (!FileName.endswith(Suffix))
37       continue;
38 
39     StringRef BaseName;
40     if (FileName.endswith(Suffix2))
41       BaseName = FileName.drop_back(Suffix2.size());
42     else if (FileName.endswith(Suffix))
43       BaseName = FileName.drop_back(Suffix.size());
44 
45     if (BaseName == "ocml") {
46       OCML = FilePath;
47     } else if (BaseName == "ockl") {
48       OCKL = FilePath;
49     } else if (BaseName == "opencl") {
50       OpenCL = FilePath;
51     } else if (BaseName == "hip") {
52       HIP = FilePath;
53     } else if (BaseName == "oclc_finite_only_off") {
54       FiniteOnly.Off = FilePath;
55     } else if (BaseName == "oclc_finite_only_on") {
56       FiniteOnly.On = FilePath;
57     } else if (BaseName == "oclc_daz_opt_on") {
58       DenormalsAreZero.On = FilePath;
59     } else if (BaseName == "oclc_daz_opt_off") {
60       DenormalsAreZero.Off = FilePath;
61     } else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
62       CorrectlyRoundedSqrt.On = FilePath;
63     } else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
64       CorrectlyRoundedSqrt.Off = FilePath;
65     } else if (BaseName == "oclc_unsafe_math_on") {
66       UnsafeMath.On = FilePath;
67     } else if (BaseName == "oclc_unsafe_math_off") {
68       UnsafeMath.Off = FilePath;
69     } else if (BaseName == "oclc_wavefrontsize64_on") {
70       WavefrontSize64.On = FilePath;
71     } else if (BaseName == "oclc_wavefrontsize64_off") {
72       WavefrontSize64.Off = FilePath;
73     } else {
74       // Process all bitcode filenames that look like
75       // ocl_isa_version_XXX.amdgcn.bc
76       const StringRef DeviceLibPrefix = "oclc_isa_version_";
77       if (!BaseName.startswith(DeviceLibPrefix))
78         continue;
79 
80       StringRef IsaVersionNumber =
81         BaseName.drop_front(DeviceLibPrefix.size());
82 
83       llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
84       SmallString<8> Tmp;
85       LibDeviceMap.insert(
86         std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
87     }
88   }
89 }
90 
91 void RocmInstallationDetector::ParseHIPVersionFile(llvm::StringRef V) {
92   SmallVector<StringRef, 4> VersionParts;
93   V.split(VersionParts, '\n');
94   unsigned Major;
95   unsigned Minor;
96   for (auto Part : VersionParts) {
97     auto Splits = Part.split('=');
98     if (Splits.first == "HIP_VERSION_MAJOR")
99       Splits.second.getAsInteger(0, Major);
100     else if (Splits.first == "HIP_VERSION_MINOR")
101       Splits.second.getAsInteger(0, Minor);
102     else if (Splits.first == "HIP_VERSION_PATCH")
103       VersionPatch = Splits.second.str();
104   }
105   VersionMajorMinor = llvm::VersionTuple(Major, Minor);
106   DetectedVersion =
107       (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
108 }
109 
110 // For candidate specified by --rocm-path we do not do strict check.
111 SmallVector<RocmInstallationDetector::Candidate, 4>
112 RocmInstallationDetector::getInstallationPathCandidates() {
113   SmallVector<Candidate, 4> Candidates;
114   if (!RocmPathArg.empty()) {
115     Candidates.emplace_back(RocmPathArg.str());
116     return Candidates;
117   }
118 
119   // Try to find relative to the compiler binary.
120   const char *InstallDir = D.getInstalledDir();
121 
122   // Check both a normal Unix prefix position of the clang binary, as well as
123   // the Windows-esque layout the ROCm packages use with the host architecture
124   // subdirectory of bin.
125 
126   // Strip off directory (usually bin)
127   StringRef ParentDir = llvm::sys::path::parent_path(InstallDir);
128   StringRef ParentName = llvm::sys::path::filename(ParentDir);
129 
130   // Some builds use bin/{host arch}, so go up again.
131   if (ParentName == "bin") {
132     ParentDir = llvm::sys::path::parent_path(ParentDir);
133     ParentName = llvm::sys::path::filename(ParentDir);
134   }
135 
136   // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin
137   if (ParentName == "llvm")
138     ParentDir = llvm::sys::path::parent_path(ParentDir);
139 
140   Candidates.emplace_back(ParentDir.str(), /*StrictChecking=*/true);
141 
142   // Device library may be installed in clang resource directory.
143   Candidates.emplace_back(D.ResourceDir, /*StrictChecking=*/true);
144 
145   Candidates.emplace_back(D.SysRoot + "/opt/rocm", /*StrictChecking=*/true);
146   return Candidates;
147 }
148 
149 RocmInstallationDetector::RocmInstallationDetector(
150     const Driver &D, const llvm::Triple &HostTriple,
151     const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib)
152     : D(D) {
153   RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ);
154   RocmDeviceLibPathArg =
155       Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ);
156   if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) {
157     HIPVersionArg = A->getValue();
158     unsigned Major = 0;
159     unsigned Minor = 0;
160     SmallVector<StringRef, 3> Parts;
161     HIPVersionArg.split(Parts, '.');
162     if (Parts.size())
163       Parts[0].getAsInteger(0, Major);
164     if (Parts.size() > 1)
165       Parts[1].getAsInteger(0, Minor);
166     if (Parts.size() > 2)
167       VersionPatch = Parts[2].str();
168     if (VersionPatch.empty())
169       VersionPatch = "0";
170     if (Major == 0 || Minor == 0)
171       D.Diag(diag::err_drv_invalid_value)
172           << A->getAsString(Args) << HIPVersionArg;
173 
174     VersionMajorMinor = llvm::VersionTuple(Major, Minor);
175     DetectedVersion =
176         (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
177   } else {
178     VersionPatch = DefaultVersionPatch;
179     VersionMajorMinor =
180         llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor);
181     DetectedVersion = (Twine(DefaultVersionMajor) + "." +
182                        Twine(DefaultVersionMinor) + "." + VersionPatch)
183                           .str();
184   }
185 
186   if (DetectHIPRuntime)
187     detectHIPRuntime();
188   if (DetectDeviceLib)
189     detectDeviceLibrary();
190 }
191 
192 void RocmInstallationDetector::detectDeviceLibrary() {
193   assert(LibDevicePath.empty());
194 
195   if (!RocmDeviceLibPathArg.empty())
196     LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1];
197   else if (const char *LibPathEnv = ::getenv("HIP_DEVICE_LIB_PATH"))
198     LibDevicePath = LibPathEnv;
199 
200   auto &FS = D.getVFS();
201   if (!LibDevicePath.empty()) {
202     // Maintain compatability with HIP flag/envvar pointing directly at the
203     // bitcode library directory. This points directly at the library path instead
204     // of the rocm root installation.
205     if (!FS.exists(LibDevicePath))
206       return;
207 
208     scanLibDevicePath(LibDevicePath);
209     HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty();
210     return;
211   }
212 
213   // The install path situation in old versions of ROCm is a real mess, and
214   // use a different install layout. Multiple copies of the device libraries
215   // exist for each frontend project, and differ depending on which build
216   // system produced the packages. Standalone OpenCL builds also have a
217   // different directory structure from the ROCm OpenCL package.
218   auto Candidates = getInstallationPathCandidates();
219   for (const auto &Candidate : Candidates) {
220     auto CandidatePath = Candidate.Path;
221 
222     // Check device library exists at the given path.
223     auto CheckDeviceLib = [&](StringRef Path) {
224       bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking);
225       if (CheckLibDevice && !FS.exists(Path))
226         return false;
227 
228       scanLibDevicePath(Path);
229 
230       if (!NoBuiltinLibs) {
231         // Check that the required non-target libraries are all available.
232         if (!allGenericLibsValid())
233           return false;
234 
235         // Check that we have found at least one libdevice that we can link in
236         // if -nobuiltinlib hasn't been specified.
237         if (LibDeviceMap.empty())
238           return false;
239       }
240       return true;
241     };
242 
243     // The possible structures are:
244     // - ${ROCM_ROOT}/amdgcn/bitcode/*
245     // - ${ROCM_ROOT}/lib/*
246     // - ${ROCM_ROOT}/lib/bitcode/*
247     // so try to detect these layouts.
248     static llvm::SmallVector<const char *, 2> SubDirsList[] = {
249         {"amdgcn", "bitcode"},
250         {"lib"},
251         {"lib", "bitcode"},
252     };
253 
254     // Make a path by appending sub-directories to InstallPath.
255     auto MakePath = [&](const llvm::ArrayRef<const char *> &SubDirs) {
256       auto Path = CandidatePath;
257       for (auto SubDir : SubDirs)
258         llvm::sys::path::append(Path, SubDir);
259       return Path;
260     };
261 
262     for (auto SubDirs : SubDirsList) {
263       LibDevicePath = MakePath(SubDirs);
264       HasDeviceLibrary = CheckDeviceLib(LibDevicePath);
265       if (HasDeviceLibrary)
266         return;
267     }
268   }
269 }
270 
271 void RocmInstallationDetector::detectHIPRuntime() {
272   auto Candidates = getInstallationPathCandidates();
273   auto &FS = D.getVFS();
274 
275   for (const auto &Candidate : Candidates) {
276     InstallPath = Candidate.Path;
277     if (InstallPath.empty() || !FS.exists(InstallPath))
278       continue;
279 
280     BinPath = InstallPath;
281     llvm::sys::path::append(BinPath, "bin");
282     IncludePath = InstallPath;
283     llvm::sys::path::append(IncludePath, "include");
284     LibPath = InstallPath;
285     llvm::sys::path::append(LibPath, "lib");
286 
287     llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
288         FS.getBufferForFile(BinPath + "/.hipVersion");
289     if (!VersionFile && Candidate.StrictChecking)
290       continue;
291 
292     if (HIPVersionArg.empty() && VersionFile)
293       ParseHIPVersionFile((*VersionFile)->getBuffer());
294 
295     HasHIPRuntime = true;
296     return;
297   }
298   HasHIPRuntime = false;
299 }
300 
301 void RocmInstallationDetector::print(raw_ostream &OS) const {
302   if (hasHIPRuntime())
303     OS << "Found HIP installation: " << InstallPath << ", version "
304        << DetectedVersion << '\n';
305 }
306 
307 void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
308                                                  ArgStringList &CC1Args) const {
309   bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5);
310 
311   if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
312     // HIP header includes standard library wrapper headers under clang
313     // cuda_wrappers directory. Since these wrapper headers include_next
314     // standard C++ headers, whereas libc++ headers include_next other clang
315     // headers. The include paths have to follow this order:
316     // - wrapper include path
317     // - standard C++ include path
318     // - other clang include path
319     // Since standard C++ and other clang include paths are added in other
320     // places after this function, here we only need to make sure wrapper
321     // include path is added.
322     //
323     // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs
324     // a workaround.
325     SmallString<128> P(D.ResourceDir);
326     if (UsesRuntimeWrapper)
327       llvm::sys::path::append(P, "include", "cuda_wrappers");
328     CC1Args.push_back("-internal-isystem");
329     CC1Args.push_back(DriverArgs.MakeArgString(P));
330   }
331 
332   if (DriverArgs.hasArg(options::OPT_nogpuinc))
333     return;
334 
335   if (!hasHIPRuntime()) {
336     D.Diag(diag::err_drv_no_hip_runtime);
337     return;
338   }
339 
340   CC1Args.push_back("-internal-isystem");
341   CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
342   if (UsesRuntimeWrapper)
343     CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"});
344 }
345 
346 void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
347                                   const InputInfo &Output,
348                                   const InputInfoList &Inputs,
349                                   const ArgList &Args,
350                                   const char *LinkingOutput) const {
351 
352   std::string Linker = getToolChain().GetProgramPath(getShortName());
353   ArgStringList CmdArgs;
354   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
355   CmdArgs.push_back("-shared");
356   CmdArgs.push_back("-o");
357   CmdArgs.push_back(Output.getFilename());
358   C.addCommand(
359       std::make_unique<Command>(JA, *this, ResponseFileSupport::AtFileCurCP(),
360                                 Args.MakeArgString(Linker), CmdArgs, Inputs));
361 }
362 
363 void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
364                                      const llvm::Triple &Triple,
365                                      const llvm::opt::ArgList &Args,
366                                      std::vector<StringRef> &Features) {
367   if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi))
368     D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args);
369 
370   // Add target ID features to -target-feature options. No diagnostics should
371   // be emitted here since invalid target ID is diagnosed at other places.
372   StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
373   if (!TargetID.empty()) {
374     llvm::StringMap<bool> FeatureMap;
375     auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap);
376     if (OptionalGpuArch) {
377       StringRef GpuArch = OptionalGpuArch.getValue();
378       // Iterate through all possible target ID features for the given GPU.
379       // If it is mapped to true, add +feature.
380       // If it is mapped to false, add -feature.
381       // If it is not in the map (default), do not add it
382       for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) {
383         auto Pos = FeatureMap.find(Feature);
384         if (Pos == FeatureMap.end())
385           continue;
386         Features.push_back(Args.MakeArgStringRef(
387             (Twine(Pos->second ? "+" : "-") + Feature).str()));
388       }
389     }
390   }
391 
392   if (Args.getLastArg(options::OPT_mwavefrontsize64)) {
393     Features.push_back("-wavefrontsize16");
394     Features.push_back("-wavefrontsize32");
395     Features.push_back("+wavefrontsize64");
396   }
397   if (Args.getLastArg(options::OPT_mno_wavefrontsize64)) {
398     Features.push_back("-wavefrontsize16");
399     Features.push_back("+wavefrontsize32");
400     Features.push_back("-wavefrontsize64");
401   }
402 
403   handleTargetFeaturesGroup(
404     Args, Features, options::OPT_m_amdgpu_Features_Group);
405 }
406 
407 /// AMDGPU Toolchain
408 AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
409                                  const ArgList &Args)
410     : Generic_ELF(D, Triple, Args),
411       OptionsDefault({{options::OPT_O, "3"},
412                       {options::OPT_cl_std_EQ, "CL1.2"}}) {}
413 
414 Tool *AMDGPUToolChain::buildLinker() const {
415   return new tools::amdgpu::Linker(*this);
416 }
417 
418 DerivedArgList *
419 AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
420                                Action::OffloadKind DeviceOffloadKind) const {
421 
422   DerivedArgList *DAL =
423       Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
424 
425   const OptTable &Opts = getDriver().getOpts();
426 
427   if (!DAL)
428     DAL = new DerivedArgList(Args.getBaseArgs());
429   for (auto *A : Args)
430     DAL->append(A);
431 
432   if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
433     return DAL;
434 
435   // Phase 1 (.cl -> .bc)
436   if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) {
437     DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit()
438                                                 ? options::OPT_m64
439                                                 : options::OPT_m32));
440 
441     // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately
442     // as they defined that way in Options.td
443     if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4,
444                      options::OPT_Ofast))
445       DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O),
446                         getOptionDefault(options::OPT_O));
447   }
448 
449   return DAL;
450 }
451 
452 bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
453     llvm::AMDGPU::GPUKind Kind) {
454 
455   // Assume nothing without a specific target.
456   if (Kind == llvm::AMDGPU::GK_NONE)
457     return false;
458 
459   const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
460 
461   // Default to enabling f32 denormals by default on subtargets where fma is
462   // fast with denormals
463   const bool BothDenormAndFMAFast =
464       (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
465       (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
466   return !BothDenormAndFMAFast;
467 }
468 
469 llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
470     const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
471     const llvm::fltSemantics *FPType) const {
472   // Denormals should always be enabled for f16 and f64.
473   if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
474     return llvm::DenormalMode::getIEEE();
475 
476   if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
477       JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
478     auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch());
479     auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch);
480     if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
481         DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
482                            options::OPT_fno_cuda_flush_denormals_to_zero,
483                            getDefaultDenormsAreZeroForTarget(Kind)))
484       return llvm::DenormalMode::getPreserveSign();
485 
486     return llvm::DenormalMode::getIEEE();
487   }
488 
489   const StringRef GpuArch = getGPUArch(DriverArgs);
490   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
491 
492   // TODO: There are way too many flags that change this. Do we need to check
493   // them all?
494   bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
495              getDefaultDenormsAreZeroForTarget(Kind);
496 
497   // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
498   // also implicit treated as zero (DAZ).
499   return DAZ ? llvm::DenormalMode::getPreserveSign() :
500                llvm::DenormalMode::getIEEE();
501 }
502 
503 bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs,
504                                llvm::AMDGPU::GPUKind Kind) {
505   const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
506   static bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
507 
508   return !HasWave32 || DriverArgs.hasFlag(
509     options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
510 }
511 
512 
513 /// ROCM Toolchain
514 ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
515                              const ArgList &Args)
516     : AMDGPUToolChain(D, Triple, Args) {
517   RocmInstallation.detectDeviceLibrary();
518 }
519 
520 void AMDGPUToolChain::addClangTargetOptions(
521     const llvm::opt::ArgList &DriverArgs,
522     llvm::opt::ArgStringList &CC1Args,
523     Action::OffloadKind DeviceOffloadingKind) const {
524   // Allow using target ID in -mcpu.
525   translateTargetID(DriverArgs, CC1Args);
526   // Default to "hidden" visibility, as object level linking will not be
527   // supported for the foreseeable future.
528   if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
529                          options::OPT_fvisibility_ms_compat)) {
530     CC1Args.push_back("-fvisibility");
531     CC1Args.push_back("hidden");
532     CC1Args.push_back("-fapply-global-visibility-to-externs");
533   }
534 }
535 
536 StringRef
537 AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const {
538   return getProcessorFromTargetID(
539       getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ));
540 }
541 
542 StringRef
543 AMDGPUToolChain::translateTargetID(const llvm::opt::ArgList &DriverArgs,
544                                    llvm::opt::ArgStringList &CC1Args) const {
545   StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
546   if (TargetID.empty())
547     return StringRef();
548 
549   llvm::StringMap<bool> FeatureMap;
550   auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap);
551   if (!OptionalGpuArch) {
552     getDriver().Diag(clang::diag::err_drv_bad_target_id) << TargetID;
553     return StringRef();
554   }
555 
556   return OptionalGpuArch.getValue();
557 }
558 
559 void ROCMToolChain::addClangTargetOptions(
560     const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
561     Action::OffloadKind DeviceOffloadingKind) const {
562   AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
563                                          DeviceOffloadingKind);
564 
565   // For the OpenCL case where there is no offload target, accept -nostdlib to
566   // disable bitcode linking.
567   if (DeviceOffloadingKind == Action::OFK_None &&
568       DriverArgs.hasArg(options::OPT_nostdlib))
569     return;
570 
571   if (DriverArgs.hasArg(options::OPT_nogpulib))
572     return;
573 
574   if (!RocmInstallation.hasDeviceLibrary()) {
575     getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
576     return;
577   }
578 
579   // Get the device name and canonicalize it
580   const StringRef GpuArch = getGPUArch(DriverArgs);
581   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
582   const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
583   std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
584   if (LibDeviceFile.empty()) {
585     getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
586     return;
587   }
588 
589   bool Wave64 = isWave64(DriverArgs, Kind);
590 
591   // TODO: There are way too many flags that change this. Do we need to check
592   // them all?
593   bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
594              getDefaultDenormsAreZeroForTarget(Kind);
595   bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
596 
597   bool UnsafeMathOpt =
598       DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
599   bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
600   bool CorrectSqrt =
601       DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
602 
603   // Add the OpenCL specific bitcode library.
604   CC1Args.push_back("-mlink-builtin-bitcode");
605   CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getOpenCLPath()));
606 
607   // Add the generic set of libraries.
608   RocmInstallation.addCommonBitcodeLibCC1Args(
609       DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly,
610       UnsafeMathOpt, FastRelaxedMath, CorrectSqrt);
611 }
612 
613 void RocmInstallationDetector::addCommonBitcodeLibCC1Args(
614     const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
615     StringRef LibDeviceFile, bool Wave64, bool DAZ, bool FiniteOnly,
616     bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt) const {
617   static const char LinkBitcodeFlag[] = "-mlink-builtin-bitcode";
618 
619   CC1Args.push_back(LinkBitcodeFlag);
620   CC1Args.push_back(DriverArgs.MakeArgString(getOCMLPath()));
621 
622   CC1Args.push_back(LinkBitcodeFlag);
623   CC1Args.push_back(DriverArgs.MakeArgString(getOCKLPath()));
624 
625   CC1Args.push_back(LinkBitcodeFlag);
626   CC1Args.push_back(DriverArgs.MakeArgString(getDenormalsAreZeroPath(DAZ)));
627 
628   CC1Args.push_back(LinkBitcodeFlag);
629   CC1Args.push_back(DriverArgs.MakeArgString(
630       getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)));
631 
632   CC1Args.push_back(LinkBitcodeFlag);
633   CC1Args.push_back(DriverArgs.MakeArgString(
634       getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)));
635 
636   CC1Args.push_back(LinkBitcodeFlag);
637   CC1Args.push_back(
638       DriverArgs.MakeArgString(getCorrectlyRoundedSqrtPath(CorrectSqrt)));
639 
640   CC1Args.push_back(LinkBitcodeFlag);
641   CC1Args.push_back(DriverArgs.MakeArgString(getWavefrontSize64Path(Wave64)));
642 
643   CC1Args.push_back(LinkBitcodeFlag);
644   CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
645 }
646