1 //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "CommonArgs.h" 11 #include "InputInfo.h" 12 #include "clang/Driver/Compilation.h" 13 #include "clang/Driver/DriverDiagnostic.h" 14 #include "llvm/Option/ArgList.h" 15 #include "llvm/Support/TargetParser.h" 16 17 using namespace clang::driver; 18 using namespace clang::driver::tools; 19 using namespace clang::driver::toolchains; 20 using namespace clang; 21 using namespace llvm::opt; 22 23 void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, 24 const InputInfo &Output, 25 const InputInfoList &Inputs, 26 const ArgList &Args, 27 const char *LinkingOutput) const { 28 29 std::string Linker = getToolChain().GetProgramPath(getShortName()); 30 ArgStringList CmdArgs; 31 AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); 32 CmdArgs.push_back("-shared"); 33 CmdArgs.push_back("-o"); 34 CmdArgs.push_back(Output.getFilename()); 35 C.addCommand(std::make_unique<Command>(JA, *this, Args.MakeArgString(Linker), 36 CmdArgs, Inputs)); 37 } 38 39 void amdgpu::getAMDGPUTargetFeatures(const Driver &D, 40 const llvm::opt::ArgList &Args, 41 std::vector<StringRef> &Features) { 42 if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi)) 43 D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args); 44 45 if (Args.getLastArg(options::OPT_mwavefrontsize64)) { 46 Features.push_back("-wavefrontsize16"); 47 Features.push_back("-wavefrontsize32"); 48 Features.push_back("+wavefrontsize64"); 49 } 50 if (Args.getLastArg(options::OPT_mno_wavefrontsize64)) { 51 Features.push_back("-wavefrontsize16"); 52 Features.push_back("+wavefrontsize32"); 53 Features.push_back("-wavefrontsize64"); 54 } 55 56 handleTargetFeaturesGroup( 57 Args, Features, options::OPT_m_amdgpu_Features_Group); 58 } 59 60 /// AMDGPU Toolchain 61 AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple, 62 const ArgList &Args) 63 : Generic_ELF(D, Triple, Args), 64 OptionsDefault({{options::OPT_O, "3"}, 65 {options::OPT_cl_std_EQ, "CL1.2"}}) {} 66 67 Tool *AMDGPUToolChain::buildLinker() const { 68 return new tools::amdgpu::Linker(*this); 69 } 70 71 DerivedArgList * 72 AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, 73 Action::OffloadKind DeviceOffloadKind) const { 74 75 DerivedArgList *DAL = 76 Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind); 77 78 // Do nothing if not OpenCL (-x cl) 79 if (!Args.getLastArgValue(options::OPT_x).equals("cl")) 80 return DAL; 81 82 if (!DAL) 83 DAL = new DerivedArgList(Args.getBaseArgs()); 84 for (auto *A : Args) 85 DAL->append(A); 86 87 const OptTable &Opts = getDriver().getOpts(); 88 89 // Phase 1 (.cl -> .bc) 90 if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) { 91 DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit() 92 ? options::OPT_m64 93 : options::OPT_m32)); 94 95 // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately 96 // as they defined that way in Options.td 97 if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4, 98 options::OPT_Ofast)) 99 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O), 100 getOptionDefault(options::OPT_O)); 101 } 102 103 return DAL; 104 } 105 106 llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( 107 const llvm::opt::ArgList &DriverArgs, Action::OffloadKind DeviceOffloadKind, 108 const llvm::fltSemantics *FPType) const { 109 // Denormals should always be enabled for f16 and f64. 110 if (!FPType || FPType != &llvm::APFloat::IEEEsingle()) 111 return llvm::DenormalMode::getIEEE(); 112 113 if (DeviceOffloadKind == Action::OFK_Cuda) { 114 if (FPType && FPType == &llvm::APFloat::IEEEsingle() && 115 DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, 116 options::OPT_fno_cuda_flush_denormals_to_zero, 117 false)) 118 return llvm::DenormalMode::getPreserveSign(); 119 } 120 121 const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); 122 auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); 123 124 // Default to enabling f32 denormals by default on subtargets where fma is 125 // fast with denormals 126 127 const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); 128 const bool DefaultDenormsAreZeroForTarget = 129 (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) && 130 (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); 131 132 // TODO: There are way too many flags that change this. Do we need to check 133 // them all? 134 bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || 135 !DefaultDenormsAreZeroForTarget; 136 // Outputs are flushed to zero, preserving sign 137 return DAZ ? llvm::DenormalMode::getPreserveSign() : 138 llvm::DenormalMode::getIEEE(); 139 } 140 141 void AMDGPUToolChain::addClangTargetOptions( 142 const llvm::opt::ArgList &DriverArgs, 143 llvm::opt::ArgStringList &CC1Args, 144 Action::OffloadKind DeviceOffloadingKind) const { 145 // Default to "hidden" visibility, as object level linking will not be 146 // supported for the foreseeable future. 147 if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, 148 options::OPT_fvisibility_ms_compat)) { 149 CC1Args.push_back("-fvisibility"); 150 CC1Args.push_back("hidden"); 151 CC1Args.push_back("-fapply-global-visibility-to-externs"); 152 } 153 } 154