1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// AMDGPU HSA Metadata Streamer. 12 /// 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUHSAMetadataStreamer.h" 17 #include "AMDGPU.h" 18 #include "AMDGPUSubtarget.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "SIProgramInfo.h" 21 #include "Utils/AMDGPUBaseInfo.h" 22 #include "llvm/ADT/StringSwitch.h" 23 #include "llvm/IR/Constants.h" 24 #include "llvm/IR/Module.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 namespace llvm { 28 29 static cl::opt<bool> DumpHSAMetadata( 30 "amdgpu-dump-hsa-metadata", 31 cl::desc("Dump AMDGPU HSA Metadata")); 32 static cl::opt<bool> VerifyHSAMetadata( 33 "amdgpu-verify-hsa-metadata", 34 cl::desc("Verify AMDGPU HSA Metadata")); 35 36 namespace AMDGPU { 37 namespace HSAMD { 38 39 void MetadataStreamer::dump(StringRef HSAMetadataString) const { 40 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 41 } 42 43 void MetadataStreamer::verify(StringRef HSAMetadataString) const { 44 errs() << "AMDGPU HSA Metadata Parser Test: "; 45 46 HSAMD::Metadata FromHSAMetadataString; 47 if (fromString(HSAMetadataString, FromHSAMetadataString)) { 48 errs() << "FAIL\n"; 49 return; 50 } 51 52 std::string ToHSAMetadataString; 53 if (toString(FromHSAMetadataString, ToHSAMetadataString)) { 54 errs() << "FAIL\n"; 55 return; 56 } 57 58 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") 59 << '\n'; 60 if (HSAMetadataString != ToHSAMetadataString) { 61 errs() << "Original input: " << HSAMetadataString << '\n' 62 << "Produced output: " << ToHSAMetadataString << '\n'; 63 } 64 } 65 66 AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const { 67 if (AccQual.empty()) 68 return AccessQualifier::Unknown; 69 70 return StringSwitch<AccessQualifier>(AccQual) 71 .Case("read_only", AccessQualifier::ReadOnly) 72 .Case("write_only", AccessQualifier::WriteOnly) 73 .Case("read_write", AccessQualifier::ReadWrite) 74 .Default(AccessQualifier::Default); 75 } 76 77 AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer( 78 unsigned AddressSpace) const { 79 switch (AddressSpace) { 80 case AMDGPUAS::PRIVATE_ADDRESS: 81 return AddressSpaceQualifier::Private; 82 case AMDGPUAS::GLOBAL_ADDRESS: 83 return AddressSpaceQualifier::Global; 84 case AMDGPUAS::CONSTANT_ADDRESS: 85 return AddressSpaceQualifier::Constant; 86 case AMDGPUAS::LOCAL_ADDRESS: 87 return AddressSpaceQualifier::Local; 88 case AMDGPUAS::FLAT_ADDRESS: 89 return AddressSpaceQualifier::Generic; 90 case AMDGPUAS::REGION_ADDRESS: 91 return AddressSpaceQualifier::Region; 92 default: 93 return AddressSpaceQualifier::Unknown; 94 } 95 } 96 97 ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual, 98 StringRef BaseTypeName) const { 99 if (TypeQual.find("pipe") != StringRef::npos) 100 return ValueKind::Pipe; 101 102 return StringSwitch<ValueKind>(BaseTypeName) 103 .Case("image1d_t", ValueKind::Image) 104 .Case("image1d_array_t", ValueKind::Image) 105 .Case("image1d_buffer_t", ValueKind::Image) 106 .Case("image2d_t", ValueKind::Image) 107 .Case("image2d_array_t", ValueKind::Image) 108 .Case("image2d_array_depth_t", ValueKind::Image) 109 .Case("image2d_array_msaa_t", ValueKind::Image) 110 .Case("image2d_array_msaa_depth_t", ValueKind::Image) 111 .Case("image2d_depth_t", ValueKind::Image) 112 .Case("image2d_msaa_t", ValueKind::Image) 113 .Case("image2d_msaa_depth_t", ValueKind::Image) 114 .Case("image3d_t", ValueKind::Image) 115 .Case("sampler_t", ValueKind::Sampler) 116 .Case("queue_t", ValueKind::Queue) 117 .Default(isa<PointerType>(Ty) ? 118 (Ty->getPointerAddressSpace() == 119 AMDGPUAS::LOCAL_ADDRESS ? 120 ValueKind::DynamicSharedPointer : 121 ValueKind::GlobalBuffer) : 122 ValueKind::ByValue); 123 } 124 125 ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const { 126 switch (Ty->getTypeID()) { 127 case Type::IntegerTyID: { 128 auto Signed = !TypeName.startswith("u"); 129 switch (Ty->getIntegerBitWidth()) { 130 case 8: 131 return Signed ? ValueType::I8 : ValueType::U8; 132 case 16: 133 return Signed ? ValueType::I16 : ValueType::U16; 134 case 32: 135 return Signed ? ValueType::I32 : ValueType::U32; 136 case 64: 137 return Signed ? ValueType::I64 : ValueType::U64; 138 default: 139 return ValueType::Struct; 140 } 141 } 142 case Type::HalfTyID: 143 return ValueType::F16; 144 case Type::FloatTyID: 145 return ValueType::F32; 146 case Type::DoubleTyID: 147 return ValueType::F64; 148 case Type::PointerTyID: 149 return getValueType(Ty->getPointerElementType(), TypeName); 150 case Type::VectorTyID: 151 return getValueType(Ty->getVectorElementType(), TypeName); 152 default: 153 return ValueType::Struct; 154 } 155 } 156 157 std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const { 158 switch (Ty->getTypeID()) { 159 case Type::IntegerTyID: { 160 if (!Signed) 161 return (Twine('u') + getTypeName(Ty, true)).str(); 162 163 auto BitWidth = Ty->getIntegerBitWidth(); 164 switch (BitWidth) { 165 case 8: 166 return "char"; 167 case 16: 168 return "short"; 169 case 32: 170 return "int"; 171 case 64: 172 return "long"; 173 default: 174 return (Twine('i') + Twine(BitWidth)).str(); 175 } 176 } 177 case Type::HalfTyID: 178 return "half"; 179 case Type::FloatTyID: 180 return "float"; 181 case Type::DoubleTyID: 182 return "double"; 183 case Type::VectorTyID: { 184 auto VecTy = cast<VectorType>(Ty); 185 auto ElTy = VecTy->getElementType(); 186 auto NumElements = VecTy->getVectorNumElements(); 187 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 188 } 189 default: 190 return "unknown"; 191 } 192 } 193 194 std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions( 195 MDNode *Node) const { 196 std::vector<uint32_t> Dims; 197 if (Node->getNumOperands() != 3) 198 return Dims; 199 200 for (auto &Op : Node->operands()) 201 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); 202 return Dims; 203 } 204 205 Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps( 206 const MachineFunction &MF, 207 const SIProgramInfo &ProgramInfo) const { 208 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 209 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 210 HSAMD::Kernel::CodeProps::Metadata HSACodeProps; 211 const Function &F = MF.getFunction(); 212 213 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || 214 F.getCallingConv() == CallingConv::SPIR_KERNEL); 215 216 unsigned MaxKernArgAlign; 217 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, 218 MaxKernArgAlign); 219 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; 220 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; 221 HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u); 222 HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); 223 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; 224 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; 225 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); 226 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; 227 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); 228 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); 229 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); 230 231 return HSACodeProps; 232 } 233 234 Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps( 235 const MachineFunction &MF, 236 const SIProgramInfo &ProgramInfo) const { 237 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 238 HSAMD::Kernel::DebugProps::Metadata HSADebugProps; 239 240 if (!STM.debuggerSupported()) 241 return HSADebugProps; 242 243 HSADebugProps.mDebuggerABIVersion.push_back(1); 244 HSADebugProps.mDebuggerABIVersion.push_back(0); 245 246 if (STM.debuggerEmitPrologue()) { 247 HSADebugProps.mPrivateSegmentBufferSGPR = 248 ProgramInfo.DebuggerPrivateSegmentBufferSGPR; 249 HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR = 250 ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; 251 } 252 253 return HSADebugProps; 254 } 255 256 void MetadataStreamer::emitVersion() { 257 auto &Version = HSAMetadata.mVersion; 258 259 Version.push_back(VersionMajor); 260 Version.push_back(VersionMinor); 261 } 262 263 void MetadataStreamer::emitPrintf(const Module &Mod) { 264 auto &Printf = HSAMetadata.mPrintf; 265 266 auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); 267 if (!Node) 268 return; 269 270 for (auto Op : Node->operands()) 271 if (Op->getNumOperands()) 272 Printf.push_back(cast<MDString>(Op->getOperand(0))->getString()); 273 } 274 275 void MetadataStreamer::emitKernelLanguage(const Function &Func) { 276 auto &Kernel = HSAMetadata.mKernels.back(); 277 278 // TODO: What about other languages? 279 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 280 if (!Node || !Node->getNumOperands()) 281 return; 282 auto Op0 = Node->getOperand(0); 283 if (Op0->getNumOperands() <= 1) 284 return; 285 286 Kernel.mLanguage = "OpenCL C"; 287 Kernel.mLanguageVersion.push_back( 288 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); 289 Kernel.mLanguageVersion.push_back( 290 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); 291 } 292 293 void MetadataStreamer::emitKernelAttrs(const Function &Func) { 294 auto &Attrs = HSAMetadata.mKernels.back().mAttrs; 295 296 if (auto Node = Func.getMetadata("reqd_work_group_size")) 297 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); 298 if (auto Node = Func.getMetadata("work_group_size_hint")) 299 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); 300 if (auto Node = Func.getMetadata("vec_type_hint")) { 301 Attrs.mVecTypeHint = getTypeName( 302 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 303 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); 304 } 305 if (Func.hasFnAttribute("runtime-handle")) { 306 Attrs.mRuntimeHandle = 307 Func.getFnAttribute("runtime-handle").getValueAsString().str(); 308 } 309 } 310 311 void MetadataStreamer::emitKernelArgs(const Function &Func) { 312 for (auto &Arg : Func.args()) 313 emitKernelArg(Arg); 314 315 emitHiddenKernelArgs(Func); 316 } 317 318 void MetadataStreamer::emitKernelArg(const Argument &Arg) { 319 auto Func = Arg.getParent(); 320 auto ArgNo = Arg.getArgNo(); 321 const MDNode *Node; 322 323 StringRef Name; 324 Node = Func->getMetadata("kernel_arg_name"); 325 if (Node && ArgNo < Node->getNumOperands()) 326 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 327 else if (Arg.hasName()) 328 Name = Arg.getName(); 329 330 StringRef TypeName; 331 Node = Func->getMetadata("kernel_arg_type"); 332 if (Node && ArgNo < Node->getNumOperands()) 333 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 334 335 StringRef BaseTypeName; 336 Node = Func->getMetadata("kernel_arg_base_type"); 337 if (Node && ArgNo < Node->getNumOperands()) 338 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 339 340 StringRef AccQual; 341 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && 342 Arg.hasNoAliasAttr()) { 343 AccQual = "read_only"; 344 } else { 345 Node = Func->getMetadata("kernel_arg_access_qual"); 346 if (Node && ArgNo < Node->getNumOperands()) 347 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 348 } 349 350 StringRef TypeQual; 351 Node = Func->getMetadata("kernel_arg_type_qual"); 352 if (Node && ArgNo < Node->getNumOperands()) 353 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 354 355 Type *Ty = Arg.getType(); 356 const DataLayout &DL = Func->getParent()->getDataLayout(); 357 358 unsigned PointeeAlign = 0; 359 if (auto PtrTy = dyn_cast<PointerType>(Ty)) { 360 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 361 PointeeAlign = Arg.getParamAlignment(); 362 if (PointeeAlign == 0) 363 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); 364 } 365 } 366 367 emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName), 368 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); 369 } 370 371 void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty, 372 ValueKind ValueKind, 373 unsigned PointeeAlign, 374 StringRef Name, 375 StringRef TypeName, StringRef BaseTypeName, 376 StringRef AccQual, StringRef TypeQual) { 377 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); 378 auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); 379 380 Arg.mName = Name; 381 Arg.mTypeName = TypeName; 382 Arg.mSize = DL.getTypeAllocSize(Ty); 383 Arg.mAlign = DL.getABITypeAlignment(Ty); 384 Arg.mValueKind = ValueKind; 385 Arg.mValueType = getValueType(Ty, BaseTypeName); 386 Arg.mPointeeAlign = PointeeAlign; 387 388 if (auto PtrTy = dyn_cast<PointerType>(Ty)) 389 Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace()); 390 391 Arg.mAccQual = getAccessQualifier(AccQual); 392 393 // TODO: Emit Arg.mActualAccQual. 394 395 SmallVector<StringRef, 1> SplitTypeQuals; 396 TypeQual.split(SplitTypeQuals, " ", -1, false); 397 for (StringRef Key : SplitTypeQuals) { 398 auto P = StringSwitch<bool*>(Key) 399 .Case("const", &Arg.mIsConst) 400 .Case("restrict", &Arg.mIsRestrict) 401 .Case("volatile", &Arg.mIsVolatile) 402 .Case("pipe", &Arg.mIsPipe) 403 .Default(nullptr); 404 if (P) 405 *P = true; 406 } 407 } 408 409 void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) { 410 int HiddenArgNumBytes = 411 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); 412 413 if (!HiddenArgNumBytes) 414 return; 415 416 auto &DL = Func.getParent()->getDataLayout(); 417 auto Int64Ty = Type::getInt64Ty(Func.getContext()); 418 419 if (HiddenArgNumBytes >= 8) 420 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX); 421 if (HiddenArgNumBytes >= 16) 422 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY); 423 if (HiddenArgNumBytes >= 24) 424 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ); 425 426 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), 427 AMDGPUAS::GLOBAL_ADDRESS); 428 429 // Emit "printf buffer" argument if printf is used, otherwise emit dummy 430 // "none" argument. 431 if (HiddenArgNumBytes >= 32) { 432 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) 433 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); 434 else 435 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 436 } 437 438 // Emit "default queue" and "completion action" arguments if enqueue kernel is 439 // used, otherwise emit dummy "none" arguments. 440 if (HiddenArgNumBytes >= 48) { 441 if (Func.hasFnAttribute("calls-enqueue-kernel")) { 442 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue); 443 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction); 444 } else { 445 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 446 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); 447 } 448 } 449 } 450 451 void MetadataStreamer::begin(const Module &Mod) { 452 emitVersion(); 453 emitPrintf(Mod); 454 } 455 456 void MetadataStreamer::end() { 457 std::string HSAMetadataString; 458 if (toString(HSAMetadata, HSAMetadataString)) 459 return; 460 461 if (DumpHSAMetadata) 462 dump(HSAMetadataString); 463 if (VerifyHSAMetadata) 464 verify(HSAMetadataString); 465 } 466 467 void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) { 468 auto &Func = MF.getFunction(); 469 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) 470 return; 471 472 auto CodeProps = getHSACodeProps(MF, ProgramInfo); 473 auto DebugProps = getHSADebugProps(MF, ProgramInfo); 474 475 HSAMetadata.mKernels.push_back(Kernel::Metadata()); 476 auto &Kernel = HSAMetadata.mKernels.back(); 477 478 Kernel.mName = Func.getName(); 479 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); 480 emitKernelLanguage(Func); 481 emitKernelAttrs(Func); 482 emitKernelArgs(Func); 483 HSAMetadata.mKernels.back().mCodeProps = CodeProps; 484 HSAMetadata.mKernels.back().mDebugProps = DebugProps; 485 } 486 487 } // end namespace HSAMD 488 } // end namespace AMDGPU 489 } // end namespace llvm 490