1 //===--- HIP.cpp - HIP Tool and ToolChain Implementations -------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "HIP.h"
11 #include "CommonArgs.h"
12 #include "InputInfo.h"
13 #include "clang/Basic/Cuda.h"
14 #include "clang/Driver/Compilation.h"
15 #include "clang/Driver/Driver.h"
16 #include "clang/Driver/DriverDiagnostic.h"
17 #include "clang/Driver/Options.h"
18 #include "llvm/Support/FileSystem.h"
19 #include "llvm/Support/Path.h"
20 
21 using namespace clang::driver;
22 using namespace clang::driver::toolchains;
23 using namespace clang::driver::tools;
24 using namespace clang;
25 using namespace llvm::opt;
26 
27 #if _WIN32 || _WIN64
28 #define NULL_FILE "nul"
29 #else
30 #define NULL_FILE "/dev/null"
31 #endif
32 
33 namespace {
34 
addBCLib(Compilation & C,const ArgList & Args,ArgStringList & CmdArgs,ArgStringList LibraryPaths,StringRef BCName)35 static void addBCLib(Compilation &C, const ArgList &Args,
36                      ArgStringList &CmdArgs, ArgStringList LibraryPaths,
37                      StringRef BCName) {
38   StringRef FullName;
39   for (std::string LibraryPath : LibraryPaths) {
40     SmallString<128> Path(LibraryPath);
41     llvm::sys::path::append(Path, BCName);
42     FullName = Path;
43     if (llvm::sys::fs::exists(FullName)) {
44       CmdArgs.push_back(Args.MakeArgString(FullName));
45       return;
46     }
47   }
48   C.getDriver().Diag(diag::err_drv_no_such_file) << BCName;
49 }
50 
51 } // namespace
52 
constructLLVMLinkCommand(Compilation & C,const JobAction & JA,const InputInfoList & Inputs,const ArgList & Args,StringRef SubArchName,StringRef OutputFilePrefix) const53 const char *AMDGCN::Linker::constructLLVMLinkCommand(
54     Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
55     const ArgList &Args, StringRef SubArchName,
56     StringRef OutputFilePrefix) const {
57   ArgStringList CmdArgs;
58   // Add the input bc's created by compile step.
59   for (const auto &II : Inputs)
60     CmdArgs.push_back(II.getFilename());
61 
62   ArgStringList LibraryPaths;
63 
64   // Find in --hip-device-lib-path and HIP_LIBRARY_PATH.
65   for (auto Path : Args.getAllArgValues(options::OPT_hip_device_lib_path_EQ))
66     LibraryPaths.push_back(Args.MakeArgString(Path));
67 
68   addDirectoryList(Args, LibraryPaths, "-L", "HIP_DEVICE_LIB_PATH");
69 
70   llvm::SmallVector<std::string, 10> BCLibs;
71 
72   // Add bitcode library in --hip-device-lib.
73   for (auto Lib : Args.getAllArgValues(options::OPT_hip_device_lib_EQ)) {
74     BCLibs.push_back(Args.MakeArgString(Lib));
75   }
76 
77   // If --hip-device-lib is not set, add the default bitcode libraries.
78   if (BCLibs.empty()) {
79     // Get the bc lib file name for ISA version. For example,
80     // gfx803 => oclc_isa_version_803.amdgcn.bc.
81     std::string ISAVerBC =
82         "oclc_isa_version_" + SubArchName.drop_front(3).str() + ".amdgcn.bc";
83 
84     llvm::StringRef FlushDenormalControlBC;
85     if (Args.hasArg(options::OPT_fcuda_flush_denormals_to_zero))
86       FlushDenormalControlBC = "oclc_daz_opt_on.amdgcn.bc";
87     else
88       FlushDenormalControlBC = "oclc_daz_opt_off.amdgcn.bc";
89 
90     BCLibs.append({"hip.amdgcn.bc", "opencl.amdgcn.bc",
91                    "ocml.amdgcn.bc", "ockl.amdgcn.bc",
92                    "oclc_finite_only_off.amdgcn.bc",
93                    FlushDenormalControlBC,
94                    "oclc_correctly_rounded_sqrt_on.amdgcn.bc",
95                    "oclc_unsafe_math_off.amdgcn.bc", ISAVerBC});
96   }
97   for (auto Lib : BCLibs)
98     addBCLib(C, Args, CmdArgs, LibraryPaths, Lib);
99 
100   // Add an intermediate output file.
101   CmdArgs.push_back("-o");
102   std::string TmpName =
103       C.getDriver().GetTemporaryPath(OutputFilePrefix.str() + "-linked", "bc");
104   const char *OutputFileName =
105       C.addTempFile(C.getArgs().MakeArgString(TmpName));
106   CmdArgs.push_back(OutputFileName);
107   SmallString<128> ExecPath(C.getDriver().Dir);
108   llvm::sys::path::append(ExecPath, "llvm-link");
109   const char *Exec = Args.MakeArgString(ExecPath);
110   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
111   return OutputFileName;
112 }
113 
constructOptCommand(Compilation & C,const JobAction & JA,const InputInfoList & Inputs,const llvm::opt::ArgList & Args,llvm::StringRef SubArchName,llvm::StringRef OutputFilePrefix,const char * InputFileName) const114 const char *AMDGCN::Linker::constructOptCommand(
115     Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
116     const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
117     llvm::StringRef OutputFilePrefix, const char *InputFileName) const {
118   // Construct opt command.
119   ArgStringList OptArgs;
120   // The input to opt is the output from llvm-link.
121   OptArgs.push_back(InputFileName);
122   // Pass optimization arg to opt.
123   if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
124     StringRef OOpt = "3";
125     if (A->getOption().matches(options::OPT_O4) ||
126         A->getOption().matches(options::OPT_Ofast))
127       OOpt = "3";
128     else if (A->getOption().matches(options::OPT_O0))
129       OOpt = "0";
130     else if (A->getOption().matches(options::OPT_O)) {
131       // -Os, -Oz, and -O(anything else) map to -O2
132       OOpt = llvm::StringSwitch<const char *>(A->getValue())
133                  .Case("1", "1")
134                  .Case("2", "2")
135                  .Case("3", "3")
136                  .Case("s", "2")
137                  .Case("z", "2")
138                  .Default("2");
139     }
140     OptArgs.push_back(Args.MakeArgString("-O" + OOpt));
141   }
142   OptArgs.push_back("-mtriple=amdgcn-amd-amdhsa");
143   OptArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName));
144   OptArgs.push_back("-o");
145   std::string TmpFileName = C.getDriver().GetTemporaryPath(
146       OutputFilePrefix.str() + "-optimized", "bc");
147   const char *OutputFileName =
148       C.addTempFile(C.getArgs().MakeArgString(TmpFileName));
149   OptArgs.push_back(OutputFileName);
150   SmallString<128> OptPath(C.getDriver().Dir);
151   llvm::sys::path::append(OptPath, "opt");
152   const char *OptExec = Args.MakeArgString(OptPath);
153   C.addCommand(llvm::make_unique<Command>(JA, *this, OptExec, OptArgs, Inputs));
154   return OutputFileName;
155 }
156 
constructLlcCommand(Compilation & C,const JobAction & JA,const InputInfoList & Inputs,const llvm::opt::ArgList & Args,llvm::StringRef SubArchName,llvm::StringRef OutputFilePrefix,const char * InputFileName) const157 const char *AMDGCN::Linker::constructLlcCommand(
158     Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
159     const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
160     llvm::StringRef OutputFilePrefix, const char *InputFileName) const {
161   // Construct llc command.
162   ArgStringList LlcArgs{InputFileName, "-mtriple=amdgcn-amd-amdhsa",
163                         "-filetype=obj", "-mattr=-code-object-v3",
164                         Args.MakeArgString("-mcpu=" + SubArchName), "-o"};
165   std::string LlcOutputFileName =
166       C.getDriver().GetTemporaryPath(OutputFilePrefix, "o");
167   const char *LlcOutputFile =
168       C.addTempFile(C.getArgs().MakeArgString(LlcOutputFileName));
169   LlcArgs.push_back(LlcOutputFile);
170   SmallString<128> LlcPath(C.getDriver().Dir);
171   llvm::sys::path::append(LlcPath, "llc");
172   const char *Llc = Args.MakeArgString(LlcPath);
173   C.addCommand(llvm::make_unique<Command>(JA, *this, Llc, LlcArgs, Inputs));
174   return LlcOutputFile;
175 }
176 
constructLldCommand(Compilation & C,const JobAction & JA,const InputInfoList & Inputs,const InputInfo & Output,const llvm::opt::ArgList & Args,const char * InputFileName) const177 void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
178                                           const InputInfoList &Inputs,
179                                           const InputInfo &Output,
180                                           const llvm::opt::ArgList &Args,
181                                           const char *InputFileName) const {
182   // Construct lld command.
183   // The output from ld.lld is an HSA code object file.
184   ArgStringList LldArgs{"-flavor",    "gnu", "--no-undefined",
185                         "-shared",    "-o",  Output.getFilename(),
186                         InputFileName};
187   SmallString<128> LldPath(C.getDriver().Dir);
188   llvm::sys::path::append(LldPath, "lld");
189   const char *Lld = Args.MakeArgString(LldPath);
190   C.addCommand(llvm::make_unique<Command>(JA, *this, Lld, LldArgs, Inputs));
191 }
192 
193 // Construct a clang-offload-bundler command to bundle code objects for
194 // different GPU's into a HIP fat binary.
constructHIPFatbinCommand(Compilation & C,const JobAction & JA,StringRef OutputFileName,const InputInfoList & Inputs,const llvm::opt::ArgList & Args,const Tool & T)195 void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
196                   StringRef OutputFileName, const InputInfoList &Inputs,
197                   const llvm::opt::ArgList &Args, const Tool& T) {
198   // Construct clang-offload-bundler command to bundle object files for
199   // for different GPU archs.
200   ArgStringList BundlerArgs;
201   BundlerArgs.push_back(Args.MakeArgString("-type=o"));
202 
203   // ToDo: Remove the dummy host binary entry which is required by
204   // clang-offload-bundler.
205   std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux";
206   std::string BundlerInputArg = "-inputs=" NULL_FILE;
207 
208   for (const auto &II : Inputs) {
209     const auto* A = II.getAction();
210     BundlerTargetArg = BundlerTargetArg + ",hip-amdgcn-amd-amdhsa-" +
211                        StringRef(A->getOffloadingArch()).str();
212     BundlerInputArg = BundlerInputArg + "," + II.getFilename();
213   }
214   BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg));
215   BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg));
216 
217   auto BundlerOutputArg =
218       Args.MakeArgString(std::string("-outputs=").append(OutputFileName));
219   BundlerArgs.push_back(BundlerOutputArg);
220 
221   SmallString<128> BundlerPath(C.getDriver().Dir);
222   llvm::sys::path::append(BundlerPath, "clang-offload-bundler");
223   const char *Bundler = Args.MakeArgString(BundlerPath);
224   C.addCommand(llvm::make_unique<Command>(JA, T, Bundler, BundlerArgs, Inputs));
225 }
226 
227 // For amdgcn the inputs of the linker job are device bitcode and output is
228 // object file. It calls llvm-link, opt, llc, then lld steps.
ConstructJob(Compilation & C,const JobAction & JA,const InputInfo & Output,const InputInfoList & Inputs,const ArgList & Args,const char * LinkingOutput) const229 void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
230                                    const InputInfo &Output,
231                                    const InputInfoList &Inputs,
232                                    const ArgList &Args,
233                                    const char *LinkingOutput) const {
234 
235   if (JA.getType() == types::TY_HIP_FATBIN)
236     return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this);
237 
238   assert(getToolChain().getTriple().getArch() == llvm::Triple::amdgcn &&
239          "Unsupported target");
240 
241   std::string SubArchName = JA.getOffloadingArch();
242   assert(StringRef(SubArchName).startswith("gfx") && "Unsupported sub arch");
243 
244   // Prefix for temporary file name.
245   std::string Prefix =
246       llvm::sys::path::stem(Inputs[0].getFilename()).str() + "-" + SubArchName;
247 
248   // Each command outputs different files.
249   const char *LLVMLinkCommand =
250       constructLLVMLinkCommand(C, JA, Inputs, Args, SubArchName, Prefix);
251   const char *OptCommand = constructOptCommand(C, JA, Inputs, Args, SubArchName,
252                                                Prefix, LLVMLinkCommand);
253   const char *LlcCommand =
254       constructLlcCommand(C, JA, Inputs, Args, SubArchName, Prefix, OptCommand);
255   constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand);
256 }
257 
HIPToolChain(const Driver & D,const llvm::Triple & Triple,const ToolChain & HostTC,const ArgList & Args)258 HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple,
259                              const ToolChain &HostTC, const ArgList &Args)
260     : ToolChain(D, Triple, Args), HostTC(HostTC) {
261   // Lookup binaries into the driver directory, this is used to
262   // discover the clang-offload-bundler executable.
263   getProgramPaths().push_back(getDriver().Dir);
264 }
265 
addClangTargetOptions(const llvm::opt::ArgList & DriverArgs,llvm::opt::ArgStringList & CC1Args,Action::OffloadKind DeviceOffloadingKind) const266 void HIPToolChain::addClangTargetOptions(
267     const llvm::opt::ArgList &DriverArgs,
268     llvm::opt::ArgStringList &CC1Args,
269     Action::OffloadKind DeviceOffloadingKind) const {
270   HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
271 
272   StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
273   assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
274   (void) GpuArch;
275   assert(DeviceOffloadingKind == Action::OFK_HIP &&
276          "Only HIP offloading kinds are supported for GPUs.");
277 
278   CC1Args.push_back("-target-cpu");
279   CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch));
280   CC1Args.push_back("-fcuda-is-device");
281 
282   if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
283                          options::OPT_fno_cuda_flush_denormals_to_zero, false))
284     CC1Args.push_back("-fcuda-flush-denormals-to-zero");
285 
286   if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
287                          options::OPT_fno_cuda_approx_transcendentals, false))
288     CC1Args.push_back("-fcuda-approx-transcendentals");
289 
290   if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
291                          false))
292     CC1Args.push_back("-fgpu-rdc");
293 
294   // Default to "hidden" visibility, as object level linking will not be
295   // supported for the foreseeable future.
296   if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
297                          options::OPT_fvisibility_ms_compat))
298     CC1Args.append({"-fvisibility", "hidden"});
299 }
300 
301 llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList & Args,StringRef BoundArch,Action::OffloadKind DeviceOffloadKind) const302 HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
303                              StringRef BoundArch,
304                              Action::OffloadKind DeviceOffloadKind) const {
305   DerivedArgList *DAL =
306       HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
307   if (!DAL)
308     DAL = new DerivedArgList(Args.getBaseArgs());
309 
310   const OptTable &Opts = getDriver().getOpts();
311 
312   for (Arg *A : Args) {
313     if (A->getOption().matches(options::OPT_Xarch__)) {
314       // Skip this argument unless the architecture matches BoundArch.
315       if (BoundArch.empty() || A->getValue(0) != BoundArch)
316         continue;
317 
318       unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
319       unsigned Prev = Index;
320       std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
321 
322       // If the argument parsing failed or more than one argument was
323       // consumed, the -Xarch_ argument's parameter tried to consume
324       // extra arguments. Emit an error and ignore.
325       //
326       // We also want to disallow any options which would alter the
327       // driver behavior; that isn't going to work in our model. We
328       // use isDriverOption() as an approximation, although things
329       // like -O4 are going to slip through.
330       if (!XarchArg || Index > Prev + 1) {
331         getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
332             << A->getAsString(Args);
333         continue;
334       } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
335         getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
336             << A->getAsString(Args);
337         continue;
338       }
339       XarchArg->setBaseArg(A);
340       A = XarchArg.release();
341       DAL->AddSynthesizedArg(A);
342     }
343     DAL->append(A);
344   }
345 
346   if (!BoundArch.empty()) {
347     DAL->eraseArg(options::OPT_march_EQ);
348     DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
349   }
350 
351   return DAL;
352 }
353 
buildLinker() const354 Tool *HIPToolChain::buildLinker() const {
355   assert(getTriple().getArch() == llvm::Triple::amdgcn);
356   return new tools::AMDGCN::Linker(*this);
357 }
358 
addClangWarningOptions(ArgStringList & CC1Args) const359 void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
360   HostTC.addClangWarningOptions(CC1Args);
361 }
362 
363 ToolChain::CXXStdlibType
GetCXXStdlibType(const ArgList & Args) const364 HIPToolChain::GetCXXStdlibType(const ArgList &Args) const {
365   return HostTC.GetCXXStdlibType(Args);
366 }
367 
AddClangSystemIncludeArgs(const ArgList & DriverArgs,ArgStringList & CC1Args) const368 void HIPToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
369                                               ArgStringList &CC1Args) const {
370   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
371 }
372 
AddClangCXXStdlibIncludeArgs(const ArgList & Args,ArgStringList & CC1Args) const373 void HIPToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
374                                                  ArgStringList &CC1Args) const {
375   HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
376 }
377 
AddIAMCUIncludeArgs(const ArgList & Args,ArgStringList & CC1Args) const378 void HIPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
379                                         ArgStringList &CC1Args) const {
380   HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
381 }
382 
getSupportedSanitizers() const383 SanitizerMask HIPToolChain::getSupportedSanitizers() const {
384   // The HIPToolChain only supports sanitizers in the sense that it allows
385   // sanitizer arguments on the command line if they are supported by the host
386   // toolchain. The HIPToolChain will actually ignore any command line
387   // arguments for any of these "supported" sanitizers. That means that no
388   // sanitization of device code is actually supported at this time.
389   //
390   // This behavior is necessary because the host and device toolchains
391   // invocations often share the command line, so the device toolchain must
392   // tolerate flags meant only for the host toolchain.
393   return HostTC.getSupportedSanitizers();
394 }
395 
computeMSVCVersion(const Driver * D,const ArgList & Args) const396 VersionTuple HIPToolChain::computeMSVCVersion(const Driver *D,
397                                                const ArgList &Args) const {
398   return HostTC.computeMSVCVersion(D, Args);
399 }
400