1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// AMDGPU HSA Metadata Streamer. 12 /// 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUHSAMetadataStreamer.h" 17 #include "AMDGPU.h" 18 #include "AMDGPUSubtarget.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "SIProgramInfo.h" 21 #include "Utils/AMDGPUBaseInfo.h" 22 #include "llvm/ADT/StringSwitch.h" 23 #include "llvm/IR/Constants.h" 24 #include "llvm/IR/Module.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 namespace llvm { 28 29 static cl::opt<bool> DumpHSAMetadata( 30 "amdgpu-dump-hsa-metadata", 31 cl::desc("Dump AMDGPU HSA Metadata")); 32 static cl::opt<bool> VerifyHSAMetadata( 33 "amdgpu-verify-hsa-metadata", 34 cl::desc("Verify AMDGPU HSA Metadata")); 35 36 namespace AMDGPU { 37 namespace HSAMD { 38 39 void MetadataStreamer::dump(StringRef HSAMetadataString) const { 40 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 41 } 42 43 void MetadataStreamer::verify(StringRef HSAMetadataString) const { 44 errs() << "AMDGPU HSA Metadata Parser Test: "; 45 46 HSAMD::Metadata FromHSAMetadataString; 47 if (fromString(HSAMetadataString, FromHSAMetadataString)) { 48 errs() << "FAIL\n"; 49 return; 50 } 51 52 std::string ToHSAMetadataString; 53 if (toString(FromHSAMetadataString, ToHSAMetadataString)) { 54 errs() << "FAIL\n"; 55 return; 56 } 57 58 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") 59 << '\n'; 60 if (HSAMetadataString != ToHSAMetadataString) { 61 errs() << "Original input: " << HSAMetadataString << '\n' 62 << "Produced output: " << ToHSAMetadataString << '\n'; 63 } 64 } 65 66 AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const { 67 if (AccQual.empty()) 68 return AccessQualifier::Unknown; 69 70 return StringSwitch<AccessQualifier>(AccQual) 71 .Case("read_only", AccessQualifier::ReadOnly) 72 .Case("write_only", AccessQualifier::WriteOnly) 73 .Case("read_write", AccessQualifier::ReadWrite) 74 .Default(AccessQualifier::Default); 75 } 76 77 AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer( 78 unsigned AddressSpace) const { 79 if (AddressSpace == AMDGPUAS::PRIVATE_ADDRESS) 80 return AddressSpaceQualifier::Private; 81 if (AddressSpace == AMDGPUAS::GLOBAL_ADDRESS) 82 return AddressSpaceQualifier::Global; 83 if (AddressSpace == AMDGPUAS::CONSTANT_ADDRESS) 84 return AddressSpaceQualifier::Constant; 85 if (AddressSpace == AMDGPUAS::LOCAL_ADDRESS) 86 return AddressSpaceQualifier::Local; 87 if (AddressSpace == AMDGPUAS::FLAT_ADDRESS) 88 return AddressSpaceQualifier::Generic; 89 if (AddressSpace == AMDGPUAS::REGION_ADDRESS) 90 return AddressSpaceQualifier::Region; 91 92 llvm_unreachable("Unknown address space qualifier"); 93 } 94 95 ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual, 96 StringRef BaseTypeName) const { 97 if (TypeQual.find("pipe") != StringRef::npos) 98 return ValueKind::Pipe; 99 100 return StringSwitch<ValueKind>(BaseTypeName) 101 .Case("image1d_t", ValueKind::Image) 102 .Case("image1d_array_t", ValueKind::Image) 103 .Case("image1d_buffer_t", ValueKind::Image) 104 .Case("image2d_t", ValueKind::Image) 105 .Case("image2d_array_t", ValueKind::Image) 106 .Case("image2d_array_depth_t", ValueKind::Image) 107 .Case("image2d_array_msaa_t", ValueKind::Image) 108 .Case("image2d_array_msaa_depth_t", ValueKind::Image) 109 .Case("image2d_depth_t", ValueKind::Image) 110 .Case("image2d_msaa_t", ValueKind::Image) 111 .Case("image2d_msaa_depth_t", ValueKind::Image) 112 .Case("image3d_t", ValueKind::Image) 113 .Case("sampler_t", ValueKind::Sampler) 114 .Case("queue_t", ValueKind::Queue) 115 .Default(isa<PointerType>(Ty) ? 116 (Ty->getPointerAddressSpace() == 117 AMDGPUAS::LOCAL_ADDRESS ? 118 ValueKind::DynamicSharedPointer : 119 ValueKind::GlobalBuffer) : 120 ValueKind::ByValue); 121 } 122 123 ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const { 124 switch (Ty->getTypeID()) { 125 case Type::IntegerTyID: { 126 auto Signed = !TypeName.startswith("u"); 127 switch (Ty->getIntegerBitWidth()) { 128 case 8: 129 return Signed ? ValueType::I8 : ValueType::U8; 130 case 16: 131 return Signed ? ValueType::I16 : ValueType::U16; 132 case 32: 133 return Signed ? ValueType::I32 : ValueType::U32; 134 case 64: 135 return Signed ? ValueType::I64 : ValueType::U64; 136 default: 137 return ValueType::Struct; 138 } 139 } 140 case Type::HalfTyID: 141 return ValueType::F16; 142 case Type::FloatTyID: 143 return ValueType::F32; 144 case Type::DoubleTyID: 145 return ValueType::F64; 146 case Type::PointerTyID: 147 return getValueType(Ty->getPointerElementType(), TypeName); 148 case Type::VectorTyID: 149 return getValueType(Ty->getVectorElementType(), TypeName); 150 default: 151 return ValueType::Struct; 152 } 153 } 154 155 std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const { 156 switch (Ty->getTypeID()) { 157 case Type::IntegerTyID: { 158 if (!Signed) 159 return (Twine('u') + getTypeName(Ty, true)).str(); 160 161 auto BitWidth = Ty->getIntegerBitWidth(); 162 switch (BitWidth) { 163 case 8: 164 return "char"; 165 case 16: 166 return "short"; 167 case 32: 168 return "int"; 169 case 64: 170 return "long"; 171 default: 172 return (Twine('i') + Twine(BitWidth)).str(); 173 } 174 } 175 case Type::HalfTyID: 176 return "half"; 177 case Type::FloatTyID: 178 return "float"; 179 case Type::DoubleTyID: 180 return "double"; 181 case Type::VectorTyID: { 182 auto VecTy = cast<VectorType>(Ty); 183 auto ElTy = VecTy->getElementType(); 184 auto NumElements = VecTy->getVectorNumElements(); 185 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 186 } 187 default: 188 return "unknown"; 189 } 190 } 191 192 std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions( 193 MDNode *Node) const { 194 std::vector<uint32_t> Dims; 195 if (Node->getNumOperands() != 3) 196 return Dims; 197 198 for (auto &Op : Node->operands()) 199 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); 200 return Dims; 201 } 202 203 Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps( 204 const MachineFunction &MF, 205 const SIProgramInfo &ProgramInfo) const { 206 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 207 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 208 HSAMD::Kernel::CodeProps::Metadata HSACodeProps; 209 const Function &F = MF.getFunction(); 210 211 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || 212 F.getCallingConv() == CallingConv::SPIR_KERNEL); 213 214 unsigned MaxKernArgAlign; 215 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, 216 MaxKernArgAlign); 217 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; 218 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; 219 HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u); 220 HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); 221 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; 222 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; 223 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); 224 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; 225 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); 226 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); 227 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); 228 229 return HSACodeProps; 230 } 231 232 Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps( 233 const MachineFunction &MF, 234 const SIProgramInfo &ProgramInfo) const { 235 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 236 HSAMD::Kernel::DebugProps::Metadata HSADebugProps; 237 238 if (!STM.debuggerSupported()) 239 return HSADebugProps; 240 241 HSADebugProps.mDebuggerABIVersion.push_back(1); 242 HSADebugProps.mDebuggerABIVersion.push_back(0); 243 244 if (STM.debuggerEmitPrologue()) { 245 HSADebugProps.mPrivateSegmentBufferSGPR = 246 ProgramInfo.DebuggerPrivateSegmentBufferSGPR; 247 HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR = 248 ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; 249 } 250 251 return HSADebugProps; 252 } 253 254 void MetadataStreamer::emitVersion() { 255 auto &Version = HSAMetadata.mVersion; 256 257 Version.push_back(VersionMajor); 258 Version.push_back(VersionMinor); 259 } 260 261 void MetadataStreamer::emitPrintf(const Module &Mod) { 262 auto &Printf = HSAMetadata.mPrintf; 263 264 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 265 if (!Node) 266 return; 267 268 for (auto Op : Node->operands()) 269 if (Op->getNumOperands()) 270 Printf.push_back(cast<MDString>(Op->getOperand(0))->getString()); 271 } 272 273 void MetadataStreamer::emitKernelLanguage(const Function &Func) { 274 auto &Kernel = HSAMetadata.mKernels.back(); 275 276 // TODO: What about other languages? 277 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 278 if (!Node || !Node->getNumOperands()) 279 return; 280 auto Op0 = Node->getOperand(0); 281 if (Op0->getNumOperands() <= 1) 282 return; 283 284 Kernel.mLanguage = "OpenCL C"; 285 Kernel.mLanguageVersion.push_back( 286 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); 287 Kernel.mLanguageVersion.push_back( 288 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); 289 } 290 291 void MetadataStreamer::emitKernelAttrs(const Function &Func) { 292 auto &Attrs = HSAMetadata.mKernels.back().mAttrs; 293 294 if (auto Node = Func.getMetadata("reqd_work_group_size")) 295 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); 296 if (auto Node = Func.getMetadata("work_group_size_hint")) 297 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); 298 if (auto Node = Func.getMetadata("vec_type_hint")) { 299 Attrs.mVecTypeHint = getTypeName( 300 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 301 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); 302 } 303 if (Func.hasFnAttribute("runtime-handle")) { 304 Attrs.mRuntimeHandle = 305 Func.getFnAttribute("runtime-handle").getValueAsString().str(); 306 } 307 } 308 309 void MetadataStreamer::emitKernelArgs(const Function &Func) { 310 for (auto &Arg : Func.args()) 311 emitKernelArg(Arg); 312 313 emitHiddenKernelArgs(Func); 314 } 315 316 void MetadataStreamer::emitKernelArg(const Argument &Arg) { 317 auto Func = Arg.getParent(); 318 auto ArgNo = Arg.getArgNo(); 319 const MDNode *Node; 320 321 StringRef Name; 322 Node = Func->getMetadata("kernel_arg_name"); 323 if (Node && ArgNo < Node->getNumOperands()) 324 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 325 else if (Arg.hasName()) 326 Name = Arg.getName(); 327 328 StringRef TypeName; 329 Node = Func->getMetadata("kernel_arg_type"); 330 if (Node && ArgNo < Node->getNumOperands()) 331 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 332 333 StringRef BaseTypeName; 334 Node = Func->getMetadata("kernel_arg_base_type"); 335 if (Node && ArgNo < Node->getNumOperands()) 336 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 337 338 StringRef AccQual; 339 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 340 Arg.hasNoAliasAttr()) { 341 AccQual = "read_only"; 342 } else { 343 Node = Func->getMetadata("kernel_arg_access_qual"); 344 if (Node && ArgNo < Node->getNumOperands()) 345 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 346 } 347 348 StringRef TypeQual; 349 Node = Func->getMetadata("kernel_arg_type_qual"); 350 if (Node && ArgNo < Node->getNumOperands()) 351 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 352 353 Type *Ty = Arg.getType(); 354 const DataLayout &DL = Func->getParent()->getDataLayout(); 355 356 unsigned PointeeAlign = 0; 357 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 358 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 359 PointeeAlign = Arg.getParamAlignment(); 360 if (PointeeAlign == 0) 361 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); 362 } 363 } 364 365 emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName), 366 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); 367 } 368 369 void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty, 370 ValueKind ValueKind, 371 unsigned PointeeAlign, 372 StringRef Name, 373 StringRef TypeName, StringRef BaseTypeName, 374 StringRef AccQual, StringRef TypeQual) { 375 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); 376 auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); 377 378 Arg.mName = Name; 379 Arg.mTypeName = TypeName; 380 Arg.mSize = DL.getTypeAllocSize(Ty); 381 Arg.mAlign = DL.getABITypeAlignment(Ty); 382 Arg.mValueKind = ValueKind; 383 Arg.mValueType = getValueType(Ty, BaseTypeName); 384 Arg.mPointeeAlign = PointeeAlign; 385 386 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 387 Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace()); 388 389 Arg.mAccQual = getAccessQualifier(AccQual); 390 391 // TODO: Emit Arg.mActualAccQual. 392 393 SmallVector<StringRef, 1> SplitTypeQuals; 394 TypeQual.split(SplitTypeQuals, " ", -1, false); 395 for (StringRef Key : SplitTypeQuals) { 396 auto P = StringSwitch<bool*>(Key) 397 .Case("const", &Arg.mIsConst) 398 .Case("restrict", &Arg.mIsRestrict) 399 .Case("volatile", &Arg.mIsVolatile) 400 .Case("pipe", &Arg.mIsPipe) 401 .Default(nullptr); 402 if (P) 403 *P = true; 404 } 405 } 406 407 void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) { 408 int HiddenArgNumBytes = 409 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); 410 411 if (!HiddenArgNumBytes) 412 return; 413 414 auto &DL = Func.getParent()->getDataLayout(); 415 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 416 417 if (HiddenArgNumBytes >= 8) 418 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX); 419 if (HiddenArgNumBytes >= 16) 420 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY); 421 if (HiddenArgNumBytes >= 24) 422 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ); 423 424 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), 425 AMDGPUAS::GLOBAL_ADDRESS); 426 427 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 428 // "none" argument. 429 if (HiddenArgNumBytes >= 32) { 430 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 431 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); 432 else 433 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 434 } 435 436 // Emit "default queue" and "completion action" arguments if enqueue kernel is 437 // used, otherwise emit dummy "none" arguments. 438 if (HiddenArgNumBytes >= 48) { 439 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 440 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue); 441 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction); 442 } else { 443 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 444 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 445 } 446 } 447 } 448 449 void MetadataStreamer::begin(const Module &Mod) { 450 emitVersion(); 451 emitPrintf(Mod); 452 } 453 454 void MetadataStreamer::end() { 455 std::string HSAMetadataString; 456 if (toString(HSAMetadata, HSAMetadataString)) 457 return; 458 459 if (DumpHSAMetadata) 460 dump(HSAMetadataString); 461 if (VerifyHSAMetadata) 462 verify(HSAMetadataString); 463 } 464 465 void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) { 466 auto &Func = MF.getFunction(); 467 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) 468 return; 469 470 auto CodeProps = getHSACodeProps(MF, ProgramInfo); 471 auto DebugProps = getHSADebugProps(MF, ProgramInfo); 472 473 HSAMetadata.mKernels.push_back(Kernel::Metadata()); 474 auto &Kernel = HSAMetadata.mKernels.back(); 475 476 Kernel.mName = Func.getName(); 477 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); 478 emitKernelLanguage(Func); 479 emitKernelAttrs(Func); 480 emitKernelArgs(Func); 481 HSAMetadata.mKernels.back().mCodeProps = CodeProps; 482 HSAMetadata.mKernels.back().mDebugProps = DebugProps; 483 } 484 485 } // end namespace HSAMD 486 } // end namespace AMDGPU 487 } // end namespace llvm 488