1 //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "CommonArgs.h"
11 #include "InputInfo.h"
12 #include "clang/Driver/Compilation.h"
13 #include "clang/Driver/DriverDiagnostic.h"
14 #include "llvm/Option/ArgList.h"
15 #include "llvm/Support/Path.h"
16 #include "llvm/Support/VirtualFileSystem.h"
17 
18 using namespace clang::driver;
19 using namespace clang::driver::tools;
20 using namespace clang::driver::toolchains;
21 using namespace clang;
22 using namespace llvm::opt;
23 
24 void RocmInstallationDetector::scanLibDevicePath() {
25   assert(!LibDevicePath.empty());
26 
27   const StringRef Suffix(".bc");
28 
29   std::error_code EC;
30   for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
31        !EC && LI != LE; LI = LI.increment(EC)) {
32     StringRef FilePath = LI->path();
33     StringRef FileName = llvm::sys::path::filename(FilePath);
34     if (!FileName.endswith(Suffix))
35       continue;
36 
37     StringRef BaseName = FileName.drop_back(Suffix.size());
38 
39     if (BaseName == "ocml") {
40       OCML = FilePath;
41     } else if (BaseName == "ockl") {
42       OCKL = FilePath;
43     } else if (BaseName == "opencl") {
44       OpenCL = FilePath;
45     } else if (BaseName == "hip") {
46       HIP = FilePath;
47     } else if (BaseName == "oclc_finite_only_off") {
48       FiniteOnly.Off = FilePath;
49     } else if (BaseName == "oclc_finite_only_on") {
50       FiniteOnly.On = FilePath;
51     } else if (BaseName == "oclc_daz_opt_on") {
52       DenormalsAreZero.On = FilePath;
53     } else if (BaseName == "oclc_daz_opt_off") {
54       DenormalsAreZero.Off = FilePath;
55     } else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
56       CorrectlyRoundedSqrt.On = FilePath;
57     } else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
58       CorrectlyRoundedSqrt.Off = FilePath;
59     } else if (BaseName == "oclc_unsafe_math_on") {
60       UnsafeMath.On = FilePath;
61     } else if (BaseName == "oclc_unsafe_math_off") {
62       UnsafeMath.Off = FilePath;
63     } else if (BaseName == "oclc_wavefrontsize64_on") {
64       WavefrontSize64.On = FilePath;
65     } else if (BaseName == "oclc_wavefrontsize64_off") {
66       WavefrontSize64.Off = FilePath;
67     } else {
68       // Process all bitcode filenames that look like
69       // ocl_isa_version_XXX.amdgcn.bc
70       const StringRef DeviceLibPrefix = "oclc_isa_version_";
71       if (!BaseName.startswith(DeviceLibPrefix))
72         continue;
73 
74       StringRef IsaVersionNumber =
75         BaseName.drop_front(DeviceLibPrefix.size());
76 
77       llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
78       SmallString<8> Tmp;
79       LibDeviceMap.insert(
80         std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
81     }
82   }
83 }
84 
85 RocmInstallationDetector::RocmInstallationDetector(
86     const Driver &D, const llvm::Triple &HostTriple,
87     const llvm::opt::ArgList &Args)
88     : D(D) {
89   struct Candidate {
90     std::string Path;
91     bool StrictChecking;
92 
93     Candidate(std::string Path, bool StrictChecking = false)
94         : Path(Path), StrictChecking(StrictChecking) {}
95   };
96 
97   SmallVector<Candidate, 4> Candidates;
98 
99   if (Args.hasArg(clang::driver::options::OPT_rocm_path_EQ)) {
100     Candidates.emplace_back(
101         Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ).str());
102   } else {
103     // Try to find relative to the compiler binary.
104     const char *InstallDir = D.getInstalledDir();
105 
106     // Check both a normal Unix prefix position of the clang binary, as well as
107     // the Windows-esque layout the ROCm packages use with the host architecture
108     // subdirectory of bin.
109 
110     // Strip off directory (usually bin)
111     StringRef ParentDir = llvm::sys::path::parent_path(InstallDir);
112     StringRef ParentName = llvm::sys::path::filename(ParentDir);
113 
114     // Some builds use bin/{host arch}, so go up again.
115     if (ParentName == "bin") {
116       ParentDir = llvm::sys::path::parent_path(ParentDir);
117       ParentName = llvm::sys::path::filename(ParentDir);
118     }
119 
120     if (ParentName == "llvm") {
121       // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin
122       Candidates.emplace_back(llvm::sys::path::parent_path(ParentDir).str(),
123                               /*StrictChecking=*/true);
124     }
125 
126     Candidates.emplace_back(D.SysRoot + "/opt/rocm");
127   }
128 
129   bool NoBuiltinLibs = Args.hasArg(options::OPT_nogpulib);
130 
131   assert(LibDevicePath.empty());
132 
133   if (Args.hasArg(clang::driver::options::OPT_hip_device_lib_path_EQ)) {
134     LibDevicePath
135       = Args.getLastArgValue(clang::driver::options::OPT_hip_device_lib_path_EQ);
136   } else if (const char *LibPathEnv = ::getenv("HIP_DEVICE_LIB_PATH")) {
137     LibDevicePath = LibPathEnv;
138   }
139 
140   if (!LibDevicePath.empty()) {
141     // Maintain compatability with HIP flag/envvar pointing directly at the
142     // bitcode library directory. This points directly at the library path instead
143     // of the rocm root installation.
144     if (!D.getVFS().exists(LibDevicePath))
145       return;
146 
147     scanLibDevicePath();
148     IsValid = allGenericLibsValid() && !LibDeviceMap.empty();
149     return;
150   }
151 
152   for (const auto &Candidate : Candidates) {
153     InstallPath = Candidate.Path;
154     if (InstallPath.empty() || !D.getVFS().exists(InstallPath))
155       continue;
156 
157     // The install path situation in old versions of ROCm is a real mess, and
158     // use a different install layout. Multiple copies of the device libraries
159     // exist for each frontend project, and differ depending on which build
160     // system produced the packages. Standalone OpenCL builds also have a
161     // different directory structure from the ROCm OpenCL package.
162     //
163     // The desired structure is (${ROCM_ROOT} or
164     // ${OPENCL_ROOT})/amdgcn/bitcode/*, so try to detect this layout.
165 
166     // BinPath = InstallPath + "/bin";
167     llvm::sys::path::append(IncludePath, InstallPath, "include");
168     llvm::sys::path::append(LibDevicePath, InstallPath, "amdgcn", "bitcode");
169 
170     auto &FS = D.getVFS();
171 
172     // We don't need the include path for OpenCL, since clang already ships with
173     // the default header.
174 
175     bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking);
176     if (CheckLibDevice && !FS.exists(LibDevicePath))
177       continue;
178 
179     scanLibDevicePath();
180 
181     if (!NoBuiltinLibs) {
182       // Check that the required non-target libraries are all available.
183       if (!allGenericLibsValid())
184         continue;
185 
186       // Check that we have found at least one libdevice that we can link in if
187       // -nobuiltinlib hasn't been specified.
188       if (LibDeviceMap.empty())
189         continue;
190     }
191 
192     IsValid = true;
193     break;
194   }
195 }
196 
197 void RocmInstallationDetector::print(raw_ostream &OS) const {
198   if (isValid())
199     OS << "Found ROCm installation: " << InstallPath << '\n';
200 }
201 
202 void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
203                                                  ArgStringList &CC1Args) const {
204   if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
205     // HIP header includes standard library wrapper headers under clang
206     // cuda_wrappers directory. Since these wrapper headers include_next
207     // standard C++ headers, whereas libc++ headers include_next other clang
208     // headers. The include paths have to follow this order:
209     // - wrapper include path
210     // - standard C++ include path
211     // - other clang include path
212     // Since standard C++ and other clang include paths are added in other
213     // places after this function, here we only need to make sure wrapper
214     // include path is added.
215     SmallString<128> P(D.ResourceDir);
216     llvm::sys::path::append(P, "include");
217     llvm::sys::path::append(P, "cuda_wrappers");
218     CC1Args.push_back("-internal-isystem");
219     CC1Args.push_back(DriverArgs.MakeArgString(P));
220     CC1Args.push_back("-include");
221     CC1Args.push_back("__clang_hip_runtime_wrapper.h");
222   }
223 
224   if (DriverArgs.hasArg(options::OPT_nogpuinc))
225     return;
226 
227   if (!isValid()) {
228     D.Diag(diag::err_drv_no_rocm_installation);
229     return;
230   }
231 
232   CC1Args.push_back("-internal-isystem");
233   CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
234 }
235 
236 void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
237                                   const InputInfo &Output,
238                                   const InputInfoList &Inputs,
239                                   const ArgList &Args,
240                                   const char *LinkingOutput) const {
241 
242   std::string Linker = getToolChain().GetProgramPath(getShortName());
243   ArgStringList CmdArgs;
244   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
245   CmdArgs.push_back("-shared");
246   CmdArgs.push_back("-o");
247   CmdArgs.push_back(Output.getFilename());
248   C.addCommand(std::make_unique<Command>(JA, *this, Args.MakeArgString(Linker),
249                                           CmdArgs, Inputs));
250 }
251 
252 void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
253                                      const llvm::opt::ArgList &Args,
254                                      std::vector<StringRef> &Features) {
255   if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi))
256     D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args);
257 
258   if (Args.getLastArg(options::OPT_mwavefrontsize64)) {
259     Features.push_back("-wavefrontsize16");
260     Features.push_back("-wavefrontsize32");
261     Features.push_back("+wavefrontsize64");
262   }
263   if (Args.getLastArg(options::OPT_mno_wavefrontsize64)) {
264     Features.push_back("-wavefrontsize16");
265     Features.push_back("+wavefrontsize32");
266     Features.push_back("-wavefrontsize64");
267   }
268 
269   handleTargetFeaturesGroup(
270     Args, Features, options::OPT_m_amdgpu_Features_Group);
271 }
272 
273 /// AMDGPU Toolchain
274 AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
275                                  const ArgList &Args)
276     : Generic_ELF(D, Triple, Args),
277       OptionsDefault({{options::OPT_O, "3"},
278                       {options::OPT_cl_std_EQ, "CL1.2"}}) {}
279 
280 Tool *AMDGPUToolChain::buildLinker() const {
281   return new tools::amdgpu::Linker(*this);
282 }
283 
284 DerivedArgList *
285 AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
286                                Action::OffloadKind DeviceOffloadKind) const {
287 
288   DerivedArgList *DAL =
289       Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
290 
291   // Do nothing if not OpenCL (-x cl)
292   if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
293     return DAL;
294 
295   if (!DAL)
296     DAL = new DerivedArgList(Args.getBaseArgs());
297   for (auto *A : Args)
298     DAL->append(A);
299 
300   const OptTable &Opts = getDriver().getOpts();
301 
302   // Phase 1 (.cl -> .bc)
303   if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) {
304     DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit()
305                                                 ? options::OPT_m64
306                                                 : options::OPT_m32));
307 
308     // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately
309     // as they defined that way in Options.td
310     if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4,
311                      options::OPT_Ofast))
312       DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O),
313                         getOptionDefault(options::OPT_O));
314   }
315 
316   return DAL;
317 }
318 
319 bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
320     llvm::AMDGPU::GPUKind Kind) {
321 
322   // Assume nothing without a specific target.
323   if (Kind == llvm::AMDGPU::GK_NONE)
324     return false;
325 
326   const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
327 
328   // Default to enabling f32 denormals by default on subtargets where fma is
329   // fast with denormals
330   const bool BothDenormAndFMAFast =
331       (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
332       (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
333   return !BothDenormAndFMAFast;
334 }
335 
336 llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
337     const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
338     const llvm::fltSemantics *FPType) const {
339   // Denormals should always be enabled for f16 and f64.
340   if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
341     return llvm::DenormalMode::getIEEE();
342 
343   if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
344       JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
345     auto Kind = llvm::AMDGPU::parseArchAMDGCN(JA.getOffloadingArch());
346     if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
347         DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
348                            options::OPT_fno_cuda_flush_denormals_to_zero,
349                            getDefaultDenormsAreZeroForTarget(Kind)))
350       return llvm::DenormalMode::getPreserveSign();
351 
352     return llvm::DenormalMode::getIEEE();
353   }
354 
355   const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
356   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
357 
358   // TODO: There are way too many flags that change this. Do we need to check
359   // them all?
360   bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
361              getDefaultDenormsAreZeroForTarget(Kind);
362 
363   // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
364   // also implicit treated as zero (DAZ).
365   return DAZ ? llvm::DenormalMode::getPreserveSign() :
366                llvm::DenormalMode::getIEEE();
367 }
368 
369 bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs,
370                                llvm::AMDGPU::GPUKind Kind) {
371   const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
372   static bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
373 
374   return !HasWave32 || DriverArgs.hasFlag(
375     options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
376 }
377 
378 
379 /// ROCM Toolchain
380 ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
381                              const ArgList &Args)
382   : AMDGPUToolChain(D, Triple, Args),
383     RocmInstallation(D, Triple, Args) { }
384 
385 void AMDGPUToolChain::addClangTargetOptions(
386     const llvm::opt::ArgList &DriverArgs,
387     llvm::opt::ArgStringList &CC1Args,
388     Action::OffloadKind DeviceOffloadingKind) const {
389   // Default to "hidden" visibility, as object level linking will not be
390   // supported for the foreseeable future.
391   if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
392                          options::OPT_fvisibility_ms_compat)) {
393     CC1Args.push_back("-fvisibility");
394     CC1Args.push_back("hidden");
395     CC1Args.push_back("-fapply-global-visibility-to-externs");
396   }
397 }
398 
399 void ROCMToolChain::addClangTargetOptions(
400     const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
401     Action::OffloadKind DeviceOffloadingKind) const {
402   AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
403                                          DeviceOffloadingKind);
404 
405   // For the OpenCL case where there is no offload target, accept -nostdlib to
406   // disable bitcode linking.
407   if (DeviceOffloadingKind == Action::OFK_None &&
408       DriverArgs.hasArg(options::OPT_nostdlib))
409     return;
410 
411   if (DriverArgs.hasArg(options::OPT_nogpulib))
412     return;
413 
414   if (!RocmInstallation.isValid()) {
415     getDriver().Diag(diag::err_drv_no_rocm_installation);
416     return;
417   }
418 
419   // Get the device name and canonicalize it
420   const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
421   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
422   const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
423   std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
424   if (LibDeviceFile.empty()) {
425     getDriver().Diag(diag::err_drv_no_rocm_device_lib) << GpuArch;
426     return;
427   }
428 
429   bool Wave64 = isWave64(DriverArgs, Kind);
430 
431   // TODO: There are way too many flags that change this. Do we need to check
432   // them all?
433   bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
434              getDefaultDenormsAreZeroForTarget(Kind);
435   bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
436 
437   bool UnsafeMathOpt =
438       DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
439   bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
440   bool CorrectSqrt =
441       DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
442 
443   // Add the OpenCL specific bitcode library.
444   CC1Args.push_back("-mlink-builtin-bitcode");
445   CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getOpenCLPath()));
446 
447   // Add the generic set of libraries.
448   RocmInstallation.addCommonBitcodeLibCC1Args(
449       DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly,
450       UnsafeMathOpt, FastRelaxedMath, CorrectSqrt);
451 }
452 
453 void RocmInstallationDetector::addCommonBitcodeLibCC1Args(
454     const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
455     StringRef LibDeviceFile, bool Wave64, bool DAZ, bool FiniteOnly,
456     bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt) const {
457   static const char LinkBitcodeFlag[] = "-mlink-builtin-bitcode";
458 
459   CC1Args.push_back(LinkBitcodeFlag);
460   CC1Args.push_back(DriverArgs.MakeArgString(getOCMLPath()));
461 
462   CC1Args.push_back(LinkBitcodeFlag);
463   CC1Args.push_back(DriverArgs.MakeArgString(getOCKLPath()));
464 
465   CC1Args.push_back(LinkBitcodeFlag);
466   CC1Args.push_back(DriverArgs.MakeArgString(getDenormalsAreZeroPath(DAZ)));
467 
468   CC1Args.push_back(LinkBitcodeFlag);
469   CC1Args.push_back(DriverArgs.MakeArgString(
470       getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)));
471 
472   CC1Args.push_back(LinkBitcodeFlag);
473   CC1Args.push_back(DriverArgs.MakeArgString(
474       getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)));
475 
476   CC1Args.push_back(LinkBitcodeFlag);
477   CC1Args.push_back(
478       DriverArgs.MakeArgString(getCorrectlyRoundedSqrtPath(CorrectSqrt)));
479 
480   CC1Args.push_back(LinkBitcodeFlag);
481   CC1Args.push_back(DriverArgs.MakeArgString(getWavefrontSize64Path(Wave64)));
482 
483   CC1Args.push_back(LinkBitcodeFlag);
484   CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
485 }
486