1 //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "CommonArgs.h"
11 #include "InputInfo.h"
12 #include "clang/Driver/Compilation.h"
13 #include "clang/Driver/DriverDiagnostic.h"
14 #include "llvm/Option/ArgList.h"
15 #include "llvm/Support/Path.h"
16 #include "llvm/Support/VirtualFileSystem.h"
17 
18 using namespace clang::driver;
19 using namespace clang::driver::tools;
20 using namespace clang::driver::toolchains;
21 using namespace clang;
22 using namespace llvm::opt;
23 
24 RocmInstallationDetector::RocmInstallationDetector(
25     const Driver &D, const llvm::Triple &HostTriple,
26     const llvm::opt::ArgList &Args)
27     : D(D) {
28   struct Candidate {
29     std::string Path;
30     bool StrictChecking;
31 
32     Candidate(std::string Path, bool StrictChecking = false)
33         : Path(Path), StrictChecking(StrictChecking) {}
34   };
35 
36   SmallVector<Candidate, 4> Candidates;
37 
38   if (Args.hasArg(clang::driver::options::OPT_rocm_path_EQ)) {
39     Candidates.emplace_back(
40         Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ).str());
41   } else {
42     // Try to find relative to the compiler binary.
43     const char *InstallDir = D.getInstalledDir();
44 
45     // Check both a normal Unix prefix position of the clang binary, as well as
46     // the Windows-esque layout the ROCm packages use with the host architecture
47     // subdirectory of bin.
48 
49     StringRef ParentDir = llvm::sys::path::parent_path(InstallDir);
50     if (ParentDir == HostTriple.getArchName())
51       ParentDir = llvm::sys::path::parent_path(ParentDir);
52 
53     if (ParentDir == "bin") {
54       Candidates.emplace_back(llvm::sys::path::parent_path(ParentDir).str(),
55                               /*StrictChecking=*/true);
56     }
57 
58     Candidates.emplace_back(D.SysRoot + "/opt/rocm");
59   }
60 
61   bool NoBuiltinLibs = Args.hasArg(options::OPT_nogpulib);
62 
63   for (const auto &Candidate : Candidates) {
64     InstallPath = Candidate.Path;
65     if (InstallPath.empty() || !D.getVFS().exists(InstallPath))
66       continue;
67 
68     // FIXME: The install path situation is a real mess.
69 
70     // For a cmake install, these are placed directly in
71     // ${INSTALL_PREFIX}/lib
72 
73     // In the separate OpenCL builds, the bitcode libraries are placed in
74     // ${OPENCL_ROOT}/lib/x86_64/bitcode/*
75 
76     // For the rocm installed packages, these are placed at
77     // /opt/rocm/opencl/lib/x86_64/bitcode
78 
79     // An additional copy is installed, in scattered locations between
80     // /opt/rocm/hcc/rocdl/oclc
81     // /opt/rocm/hcc/rocdl/ockl
82     // /opt/rocm/hcc/rocdl/lib
83     //
84     // Yet another complete set is installed to
85     // /opt/rocm/hcc/rocdl/lib
86 
87     // For now just recognize the opencl package layout.
88 
89     // BinPath = InstallPath + "/bin";
90     llvm::sys::path::append(IncludePath, InstallPath, "include");
91     llvm::sys::path::append(LibDevicePath, InstallPath, "lib");
92 
93     auto &FS = D.getVFS();
94 
95     // We don't need the include path for OpenCL, since clang already ships with
96     // the default header.
97 
98     bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking);
99     if (CheckLibDevice && !FS.exists(LibDevicePath))
100       continue;
101 
102     const StringRef Suffix(".amdgcn.bc");
103 
104     std::error_code EC;
105     for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
106          !EC && LI != LE; LI = LI.increment(EC)) {
107       StringRef FilePath = LI->path();
108       StringRef FileName = llvm::sys::path::filename(FilePath);
109       if (!FileName.endswith(Suffix))
110         continue;
111 
112       StringRef BaseName = FileName.drop_back(Suffix.size());
113 
114       if (BaseName == "ocml") {
115         OCML = FilePath;
116       } else if (BaseName == "ockl") {
117         OCKL = FilePath;
118       } else if (BaseName == "opencl") {
119         OpenCL = FilePath;
120       } else if (BaseName == "hip") {
121         HIP = FilePath;
122       } else if (BaseName == "oclc_finite_only_off") {
123         FiniteOnly.Off = FilePath;
124       } else if (BaseName == "oclc_finite_only_on") {
125         FiniteOnly.On = FilePath;
126       } else if (BaseName == "oclc_daz_opt_on") {
127         DenormalsAreZero.On = FilePath;
128       } else if (BaseName == "oclc_daz_opt_off") {
129         DenormalsAreZero.Off = FilePath;
130       } else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
131         CorrectlyRoundedSqrt.On = FilePath;
132       } else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
133         CorrectlyRoundedSqrt.Off = FilePath;
134       } else if (BaseName == "oclc_unsafe_math_on") {
135         UnsafeMath.On = FilePath;
136       } else if (BaseName == "oclc_unsafe_math_off") {
137         UnsafeMath.Off = FilePath;
138       } else if (BaseName == "oclc_wavefrontsize64_on") {
139         WavefrontSize64.On = FilePath;
140       } else if (BaseName == "oclc_wavefrontsize64_off") {
141         WavefrontSize64.Off = FilePath;
142       } else {
143         // Process all bitcode filenames that look like
144         // ocl_isa_version_XXX.amdgcn.bc
145         const StringRef DeviceLibPrefix = "oclc_isa_version_";
146         if (!BaseName.startswith(DeviceLibPrefix))
147           continue;
148 
149         StringRef IsaVersionNumber =
150             BaseName.drop_front(DeviceLibPrefix.size());
151 
152         llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
153         SmallString<8> Tmp;
154         LibDeviceMap.insert(
155             std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
156       }
157     }
158 
159     if (!NoBuiltinLibs) {
160       // Check that the required non-target libraries are all available.
161       if (!allGenericLibsValid())
162         continue;
163 
164       // Check that we have found at least one libdevice that we can link in if
165       // -nobuiltinlib hasn't been specified.
166       if (LibDeviceMap.empty())
167         continue;
168     }
169 
170     IsValid = true;
171     break;
172   }
173 }
174 
175 void RocmInstallationDetector::print(raw_ostream &OS) const {
176   if (isValid())
177     OS << "Found ROCm installation: " << InstallPath << '\n';
178 }
179 
180 void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
181                                   const InputInfo &Output,
182                                   const InputInfoList &Inputs,
183                                   const ArgList &Args,
184                                   const char *LinkingOutput) const {
185 
186   std::string Linker = getToolChain().GetProgramPath(getShortName());
187   ArgStringList CmdArgs;
188   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
189   CmdArgs.push_back("-shared");
190   CmdArgs.push_back("-o");
191   CmdArgs.push_back(Output.getFilename());
192   C.addCommand(std::make_unique<Command>(JA, *this, Args.MakeArgString(Linker),
193                                           CmdArgs, Inputs));
194 }
195 
196 void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
197                                      const llvm::opt::ArgList &Args,
198                                      std::vector<StringRef> &Features) {
199   if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi))
200     D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args);
201 
202   if (Args.getLastArg(options::OPT_mwavefrontsize64)) {
203     Features.push_back("-wavefrontsize16");
204     Features.push_back("-wavefrontsize32");
205     Features.push_back("+wavefrontsize64");
206   }
207   if (Args.getLastArg(options::OPT_mno_wavefrontsize64)) {
208     Features.push_back("-wavefrontsize16");
209     Features.push_back("+wavefrontsize32");
210     Features.push_back("-wavefrontsize64");
211   }
212 
213   handleTargetFeaturesGroup(
214     Args, Features, options::OPT_m_amdgpu_Features_Group);
215 }
216 
217 /// AMDGPU Toolchain
218 AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
219                                  const ArgList &Args)
220     : Generic_ELF(D, Triple, Args),
221       OptionsDefault({{options::OPT_O, "3"},
222                       {options::OPT_cl_std_EQ, "CL1.2"}}) {}
223 
224 Tool *AMDGPUToolChain::buildLinker() const {
225   return new tools::amdgpu::Linker(*this);
226 }
227 
228 DerivedArgList *
229 AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
230                                Action::OffloadKind DeviceOffloadKind) const {
231 
232   DerivedArgList *DAL =
233       Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
234 
235   // Do nothing if not OpenCL (-x cl)
236   if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
237     return DAL;
238 
239   if (!DAL)
240     DAL = new DerivedArgList(Args.getBaseArgs());
241   for (auto *A : Args)
242     DAL->append(A);
243 
244   const OptTable &Opts = getDriver().getOpts();
245 
246   // Phase 1 (.cl -> .bc)
247   if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) {
248     DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit()
249                                                 ? options::OPT_m64
250                                                 : options::OPT_m32));
251 
252     // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately
253     // as they defined that way in Options.td
254     if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4,
255                      options::OPT_Ofast))
256       DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O),
257                         getOptionDefault(options::OPT_O));
258   }
259 
260   return DAL;
261 }
262 
263 bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
264     llvm::AMDGPU::GPUKind Kind) {
265 
266   // Assume nothing without a specific target.
267   if (Kind == llvm::AMDGPU::GK_NONE)
268     return false;
269 
270   const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
271 
272   // Default to enabling f32 denormals by default on subtargets where fma is
273   // fast with denormals
274   const bool BothDenormAndFMAFast =
275       (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
276       (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
277   return !BothDenormAndFMAFast;
278 }
279 
280 llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
281     const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
282     const llvm::fltSemantics *FPType) const {
283   // Denormals should always be enabled for f16 and f64.
284   if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
285     return llvm::DenormalMode::getIEEE();
286 
287   if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
288       JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
289     auto Kind = llvm::AMDGPU::parseArchAMDGCN(JA.getOffloadingArch());
290     if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
291         DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
292                            options::OPT_fno_cuda_flush_denormals_to_zero,
293                            getDefaultDenormsAreZeroForTarget(Kind)))
294       return llvm::DenormalMode::getPreserveSign();
295 
296     return llvm::DenormalMode::getIEEE();
297   }
298 
299   const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
300   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
301 
302   // TODO: There are way too many flags that change this. Do we need to check
303   // them all?
304   bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
305              getDefaultDenormsAreZeroForTarget(Kind);
306 
307   // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
308   // also implicit treated as zero (DAZ).
309   return DAZ ? llvm::DenormalMode::getPreserveSign() :
310                llvm::DenormalMode::getIEEE();
311 }
312 
313 /// ROCM Toolchain
314 ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
315                              const ArgList &Args)
316   : AMDGPUToolChain(D, Triple, Args),
317     RocmInstallation(D, Triple, Args) { }
318 
319 void AMDGPUToolChain::addClangTargetOptions(
320     const llvm::opt::ArgList &DriverArgs,
321     llvm::opt::ArgStringList &CC1Args,
322     Action::OffloadKind DeviceOffloadingKind) const {
323   // Default to "hidden" visibility, as object level linking will not be
324   // supported for the foreseeable future.
325   if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
326                          options::OPT_fvisibility_ms_compat)) {
327     CC1Args.push_back("-fvisibility");
328     CC1Args.push_back("hidden");
329     CC1Args.push_back("-fapply-global-visibility-to-externs");
330   }
331 }
332 
333 void ROCMToolChain::addClangTargetOptions(
334     const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
335     Action::OffloadKind DeviceOffloadingKind) const {
336   AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
337                                          DeviceOffloadingKind);
338 
339   if (DriverArgs.hasArg(options::OPT_nogpulib))
340     return;
341 
342   if (!RocmInstallation.isValid()) {
343     getDriver().Diag(diag::err_drv_no_rocm_installation);
344     return;
345   }
346 
347   // Get the device name and canonicalize it
348   const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
349   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
350   const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
351   std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
352   if (LibDeviceFile.empty()) {
353     getDriver().Diag(diag::err_drv_no_rocm_device_lib) << GpuArch;
354     return;
355   }
356 
357   const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
358   static bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
359 
360   bool Wave64 = !HasWave32 || DriverArgs.hasFlag(
361     options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
362 
363   // TODO: There are way too many flags that change this. Do we need to check
364   // them all?
365   bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
366              getDefaultDenormsAreZeroForTarget(Kind);
367   bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
368 
369   bool UnsafeMathOpt =
370       DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
371   bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
372   bool CorrectSqrt =
373       DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
374 
375   // Add the OpenCL specific bitcode library.
376   CC1Args.push_back("-mlink-builtin-bitcode");
377   CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getOpenCLPath()));
378 
379   // Add the generic set of libraries.
380   RocmInstallation.addCommonBitcodeLibCC1Args(
381       DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly,
382       UnsafeMathOpt, FastRelaxedMath, CorrectSqrt);
383 }
384 
385 void RocmInstallationDetector::addCommonBitcodeLibCC1Args(
386     const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
387     StringRef LibDeviceFile, bool Wave64, bool DAZ, bool FiniteOnly,
388     bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt) const {
389   static const char LinkBitcodeFlag[] = "-mlink-builtin-bitcode";
390 
391   CC1Args.push_back(LinkBitcodeFlag);
392   CC1Args.push_back(DriverArgs.MakeArgString(getOCMLPath()));
393 
394   CC1Args.push_back(LinkBitcodeFlag);
395   CC1Args.push_back(DriverArgs.MakeArgString(getOCKLPath()));
396 
397   CC1Args.push_back(LinkBitcodeFlag);
398   CC1Args.push_back(DriverArgs.MakeArgString(getDenormalsAreZeroPath(DAZ)));
399 
400   CC1Args.push_back(LinkBitcodeFlag);
401   CC1Args.push_back(DriverArgs.MakeArgString(
402       getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)));
403 
404   CC1Args.push_back(LinkBitcodeFlag);
405   CC1Args.push_back(DriverArgs.MakeArgString(
406       getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)));
407 
408   CC1Args.push_back(LinkBitcodeFlag);
409   CC1Args.push_back(
410       DriverArgs.MakeArgString(getCorrectlyRoundedSqrtPath(CorrectSqrt)));
411 
412   CC1Args.push_back(LinkBitcodeFlag);
413   CC1Args.push_back(DriverArgs.MakeArgString(getWavefrontSize64Path(Wave64)));
414 
415   CC1Args.push_back(LinkBitcodeFlag);
416   CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
417 }
418