1 //===- AMDGPUOpenMP.cpp - AMDGPUOpenMP ToolChain Implementation -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDGPUOpenMP.h"
10 #include "AMDGPU.h"
11 #include "CommonArgs.h"
12 #include "ToolChains/ROCm.h"
13 #include "clang/Basic/DiagnosticDriver.h"
14 #include "clang/Driver/Compilation.h"
15 #include "clang/Driver/Driver.h"
16 #include "clang/Driver/DriverDiagnostic.h"
17 #include "clang/Driver/InputInfo.h"
18 #include "clang/Driver/Options.h"
19 #include "clang/Driver/Tool.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/Support/FileSystem.h"
22 #include "llvm/Support/FormatAdapters.h"
23 #include "llvm/Support/FormatVariadic.h"
24 #include "llvm/Support/Path.h"
25
26 using namespace clang::driver;
27 using namespace clang::driver::toolchains;
28 using namespace clang::driver::tools;
29 using namespace clang;
30 using namespace llvm::opt;
31
32 namespace {
33
getOutputFileName(Compilation & C,StringRef Base,const char * Postfix,const char * Extension)34 static const char *getOutputFileName(Compilation &C, StringRef Base,
35 const char *Postfix,
36 const char *Extension) {
37 const char *OutputFileName;
38 if (C.getDriver().isSaveTempsEnabled()) {
39 OutputFileName =
40 C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension);
41 } else {
42 std::string TmpName =
43 C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension);
44 OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName));
45 }
46 return OutputFileName;
47 }
48
addLLCOptArg(const llvm::opt::ArgList & Args,llvm::opt::ArgStringList & CmdArgs)49 static void addLLCOptArg(const llvm::opt::ArgList &Args,
50 llvm::opt::ArgStringList &CmdArgs) {
51 if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
52 StringRef OOpt = "0";
53 if (A->getOption().matches(options::OPT_O4) ||
54 A->getOption().matches(options::OPT_Ofast))
55 OOpt = "3";
56 else if (A->getOption().matches(options::OPT_O0))
57 OOpt = "0";
58 else if (A->getOption().matches(options::OPT_O)) {
59 // Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3
60 // so we map -Os/-Oz to -O2.
61 // Only clang supports -Og, and maps it to -O1.
62 // We map anything else to -O2.
63 OOpt = llvm::StringSwitch<const char *>(A->getValue())
64 .Case("1", "1")
65 .Case("2", "2")
66 .Case("3", "3")
67 .Case("s", "2")
68 .Case("z", "2")
69 .Case("g", "1")
70 .Default("0");
71 }
72 CmdArgs.push_back(Args.MakeArgString("-O" + OOpt));
73 }
74 }
75
checkSystemForAMDGPU(const ArgList & Args,const AMDGPUToolChain & TC,std::string & GPUArch)76 static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC,
77 std::string &GPUArch) {
78 if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) {
79 std::string ErrMsg =
80 llvm::formatv("{0}", llvm::fmt_consume(std::move(Err)));
81 TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg;
82 return false;
83 }
84
85 return true;
86 }
87 } // namespace
88
constructLLVMLinkCommand(const toolchains::AMDGPUOpenMPToolChain & AMDGPUOpenMPTC,Compilation & C,const JobAction & JA,const InputInfoList & Inputs,const ArgList & Args,StringRef SubArchName,StringRef OutputFilePrefix) const89 const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
90 const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C,
91 const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args,
92 StringRef SubArchName, StringRef OutputFilePrefix) const {
93 ArgStringList CmdArgs;
94
95 for (const auto &II : Inputs)
96 if (II.isFilename())
97 CmdArgs.push_back(II.getFilename());
98
99 bool HasLibm = false;
100 if (Args.hasArg(options::OPT_l)) {
101 auto Lm = Args.getAllArgValues(options::OPT_l);
102 for (auto &Lib : Lm) {
103 if (Lib == "m") {
104 HasLibm = true;
105 break;
106 }
107 }
108
109 if (HasLibm) {
110 // This is not certain to work. The device libs added here, and passed to
111 // llvm-link, are missing attributes that they expect to be inserted when
112 // passed to mlink-builtin-bitcode. The amdgpu backend does not generate
113 // conservatively correct code when attributes are missing, so this may
114 // be the root cause of miscompilations. Passing via mlink-builtin-bitcode
115 // ultimately hits CodeGenModule::addDefaultFunctionDefinitionAttributes
116 // on each function, see D28538 for context.
117 // Potential workarounds:
118 // - unconditionally link all of the device libs to every translation
119 // unit in clang via mlink-builtin-bitcode
120 // - build a libm bitcode file as part of the DeviceRTL and explictly
121 // mlink-builtin-bitcode the rocm device libs components at build time
122 // - drop this llvm-link fork in favour or some calls into LLVM, chosen
123 // to do basically the same work as llvm-link but with that call first
124 // - write an opt pass that sets that on every function it sees and pipe
125 // the device-libs bitcode through that on the way to this llvm-link
126 SmallVector<std::string, 12> BCLibs =
127 AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str());
128 for (StringRef BCFile : BCLibs)
129 CmdArgs.push_back(Args.MakeArgString(BCFile));
130 }
131 }
132
133 AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
134 SubArchName, /*isBitCodeSDL=*/true,
135 /*postClangLink=*/false);
136 // Add an intermediate output file.
137 CmdArgs.push_back("-o");
138 const char *OutputFileName =
139 getOutputFileName(C, OutputFilePrefix, "-linked", "bc");
140 CmdArgs.push_back(OutputFileName);
141 const char *Exec =
142 Args.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
143 C.addCommand(std::make_unique<Command>(
144 JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs,
145 InputInfo(&JA, Args.MakeArgString(OutputFileName))));
146
147 // If we linked in libm definitions late we run another round of optimizations
148 // to inline the definitions and fold what is foldable.
149 if (HasLibm) {
150 ArgStringList OptCmdArgs;
151 const char *OptOutputFileName =
152 getOutputFileName(C, OutputFilePrefix, "-linked-opt", "bc");
153 addLLCOptArg(Args, OptCmdArgs);
154 OptCmdArgs.push_back(OutputFileName);
155 OptCmdArgs.push_back("-o");
156 OptCmdArgs.push_back(OptOutputFileName);
157 const char *OptExec =
158 Args.MakeArgString(getToolChain().GetProgramPath("opt"));
159 C.addCommand(std::make_unique<Command>(
160 JA, *this, ResponseFileSupport::AtFileCurCP(), OptExec, OptCmdArgs,
161 InputInfo(&JA, Args.MakeArgString(OutputFileName)),
162 InputInfo(&JA, Args.MakeArgString(OptOutputFileName))));
163 OutputFileName = OptOutputFileName;
164 }
165
166 return OutputFileName;
167 }
168
constructLlcCommand(Compilation & C,const JobAction & JA,const InputInfoList & Inputs,const llvm::opt::ArgList & Args,llvm::StringRef SubArchName,llvm::StringRef OutputFilePrefix,const char * InputFileName,bool OutputIsAsm) const169 const char *AMDGCN::OpenMPLinker::constructLlcCommand(
170 Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
171 const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
172 llvm::StringRef OutputFilePrefix, const char *InputFileName,
173 bool OutputIsAsm) const {
174 // Construct llc command.
175 ArgStringList LlcArgs;
176 // The input to llc is the output from opt.
177 LlcArgs.push_back(InputFileName);
178 // Pass optimization arg to llc.
179 addLLCOptArg(Args, LlcArgs);
180 LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa");
181 LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName));
182 LlcArgs.push_back(
183 Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj")));
184
185 for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
186 LlcArgs.push_back(A->getValue(0));
187 }
188
189 // Add output filename
190 LlcArgs.push_back("-o");
191 const char *LlcOutputFile =
192 getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o");
193 LlcArgs.push_back(LlcOutputFile);
194 const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc"));
195 C.addCommand(std::make_unique<Command>(
196 JA, *this, ResponseFileSupport::AtFileCurCP(), Llc, LlcArgs, Inputs,
197 InputInfo(&JA, Args.MakeArgString(LlcOutputFile))));
198 return LlcOutputFile;
199 }
200
constructLldCommand(Compilation & C,const JobAction & JA,const InputInfoList & Inputs,const InputInfo & Output,const llvm::opt::ArgList & Args,const char * InputFileName) const201 void AMDGCN::OpenMPLinker::constructLldCommand(
202 Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
203 const InputInfo &Output, const llvm::opt::ArgList &Args,
204 const char *InputFileName) const {
205 // Construct lld command.
206 // The output from ld.lld is an HSA code object file.
207 ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined",
208 "-shared", "-o", Output.getFilename(),
209 InputFileName};
210
211 const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
212 C.addCommand(std::make_unique<Command>(
213 JA, *this, ResponseFileSupport::AtFileCurCP(), Lld, LldArgs, Inputs,
214 InputInfo(&JA, Args.MakeArgString(Output.getFilename()))));
215 }
216
217 // For amdgcn the inputs of the linker job are device bitcode and output is
218 // object file. It calls llvm-link, opt, llc, then lld steps.
ConstructJob(Compilation & C,const JobAction & JA,const InputInfo & Output,const InputInfoList & Inputs,const ArgList & Args,const char * LinkingOutput) const219 void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
220 const InputInfo &Output,
221 const InputInfoList &Inputs,
222 const ArgList &Args,
223 const char *LinkingOutput) const {
224 const ToolChain &TC = getToolChain();
225 assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target");
226
227 const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC =
228 static_cast<const toolchains::AMDGPUOpenMPToolChain &>(TC);
229
230 std::string GPUArch = Args.getLastArgValue(options::OPT_march_EQ).str();
231 if (GPUArch.empty()) {
232 if (!checkSystemForAMDGPU(Args, AMDGPUOpenMPTC, GPUArch))
233 return;
234 }
235
236 // Prefix for temporary file name.
237 std::string Prefix;
238 for (const auto &II : Inputs)
239 if (II.isFilename())
240 Prefix = llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch;
241 assert(Prefix.length() && "no linker inputs are files ");
242
243 // Each command outputs different files.
244 const char *LLVMLinkCommand = constructLLVMLinkCommand(
245 AMDGPUOpenMPTC, C, JA, Inputs, Args, GPUArch, Prefix);
246
247 // Produce readable assembly if save-temps is enabled.
248 if (C.getDriver().isSaveTempsEnabled())
249 constructLlcCommand(C, JA, Inputs, Args, GPUArch, Prefix, LLVMLinkCommand,
250 /*OutputIsAsm=*/true);
251 const char *LlcCommand = constructLlcCommand(C, JA, Inputs, Args, GPUArch,
252 Prefix, LLVMLinkCommand);
253 constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand);
254 }
255
AMDGPUOpenMPToolChain(const Driver & D,const llvm::Triple & Triple,const ToolChain & HostTC,const ArgList & Args)256 AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D,
257 const llvm::Triple &Triple,
258 const ToolChain &HostTC,
259 const ArgList &Args)
260 : ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
261 // Lookup binaries into the driver directory, this is used to
262 // discover the clang-offload-bundler executable.
263 getProgramPaths().push_back(getDriver().Dir);
264 }
265
addClangTargetOptions(const llvm::opt::ArgList & DriverArgs,llvm::opt::ArgStringList & CC1Args,Action::OffloadKind DeviceOffloadingKind) const266 void AMDGPUOpenMPToolChain::addClangTargetOptions(
267 const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
268 Action::OffloadKind DeviceOffloadingKind) const {
269 HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
270
271 std::string GPUArch = DriverArgs.getLastArgValue(options::OPT_march_EQ).str();
272 if (GPUArch.empty()) {
273 if (!checkSystemForAMDGPU(DriverArgs, *this, GPUArch))
274 return;
275 }
276
277 assert(DeviceOffloadingKind == Action::OFK_OpenMP &&
278 "Only OpenMP offloading kinds are supported.");
279
280 CC1Args.push_back("-target-cpu");
281 CC1Args.push_back(DriverArgs.MakeArgStringRef(GPUArch));
282 CC1Args.push_back("-fcuda-is-device");
283
284 if (DriverArgs.hasArg(options::OPT_nogpulib))
285 return;
286
287 // Link the bitcode library late if we're using device LTO.
288 if (getDriver().isUsingLTO(/* IsOffload */ true))
289 return;
290
291 addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, GPUArch, getTriple());
292 }
293
TranslateArgs(const llvm::opt::DerivedArgList & Args,StringRef BoundArch,Action::OffloadKind DeviceOffloadKind) const294 llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs(
295 const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
296 Action::OffloadKind DeviceOffloadKind) const {
297 DerivedArgList *DAL =
298 HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
299 if (!DAL)
300 DAL = new DerivedArgList(Args.getBaseArgs());
301
302 const OptTable &Opts = getDriver().getOpts();
303
304 if (DeviceOffloadKind == Action::OFK_OpenMP) {
305 for (Arg *A : Args)
306 if (!llvm::is_contained(*DAL, A))
307 DAL->append(A);
308
309 if (!DAL->hasArg(options::OPT_march_EQ)) {
310 std::string Arch = BoundArch.str();
311 if (BoundArch.empty())
312 checkSystemForAMDGPU(Args, *this, Arch);
313 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch);
314 }
315
316 return DAL;
317 }
318
319 for (Arg *A : Args) {
320 DAL->append(A);
321 }
322
323 if (!BoundArch.empty()) {
324 DAL->eraseArg(options::OPT_march_EQ);
325 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
326 BoundArch);
327 }
328
329 return DAL;
330 }
331
buildLinker() const332 Tool *AMDGPUOpenMPToolChain::buildLinker() const {
333 assert(getTriple().isAMDGCN());
334 return new tools::AMDGCN::OpenMPLinker(*this);
335 }
336
addClangWarningOptions(ArgStringList & CC1Args) const337 void AMDGPUOpenMPToolChain::addClangWarningOptions(
338 ArgStringList &CC1Args) const {
339 HostTC.addClangWarningOptions(CC1Args);
340 }
341
342 ToolChain::CXXStdlibType
GetCXXStdlibType(const ArgList & Args) const343 AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const {
344 return HostTC.GetCXXStdlibType(Args);
345 }
346
AddClangSystemIncludeArgs(const ArgList & DriverArgs,ArgStringList & CC1Args) const347 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
348 const ArgList &DriverArgs, ArgStringList &CC1Args) const {
349 HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
350 }
351
AddIAMCUIncludeArgs(const ArgList & Args,ArgStringList & CC1Args) const352 void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
353 ArgStringList &CC1Args) const {
354 HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
355 }
356
getSupportedSanitizers() const357 SanitizerMask AMDGPUOpenMPToolChain::getSupportedSanitizers() const {
358 // The AMDGPUOpenMPToolChain only supports sanitizers in the sense that it
359 // allows sanitizer arguments on the command line if they are supported by the
360 // host toolchain. The AMDGPUOpenMPToolChain will actually ignore any command
361 // line arguments for any of these "supported" sanitizers. That means that no
362 // sanitization of device code is actually supported at this time.
363 //
364 // This behavior is necessary because the host and device toolchains
365 // invocations often share the command line, so the device toolchain must
366 // tolerate flags meant only for the host toolchain.
367 return HostTC.getSupportedSanitizers();
368 }
369
370 VersionTuple
computeMSVCVersion(const Driver * D,const ArgList & Args) const371 AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D,
372 const ArgList &Args) const {
373 return HostTC.computeMSVCVersion(D, Args);
374 }
375