1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a printer that converts from our internal representation 11 // of machine-dependent LLVM code to NVPTX assembly language. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "InstPrinter/NVPTXInstPrinter.h" 16 #include "MCTargetDesc/NVPTXBaseInfo.h" 17 #include "MCTargetDesc/NVPTXMCAsmInfo.h" 18 #include "NVPTX.h" 19 #include "NVPTXAsmPrinter.h" 20 #include "NVPTXMCExpr.h" 21 #include "NVPTXMachineFunctionInfo.h" 22 #include "NVPTXRegisterInfo.h" 23 #include "NVPTXSubtarget.h" 24 #include "NVPTXTargetMachine.h" 25 #include "NVPTXUtilities.h" 26 #include "cl_common_defines.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/DenseMap.h" 30 #include "llvm/ADT/DenseSet.h" 31 #include "llvm/ADT/SmallString.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/ADT/StringRef.h" 35 #include "llvm/ADT/Triple.h" 36 #include "llvm/ADT/Twine.h" 37 #include "llvm/Analysis/ConstantFolding.h" 38 #include "llvm/CodeGen/Analysis.h" 39 #include "llvm/CodeGen/MachineBasicBlock.h" 40 #include "llvm/CodeGen/MachineFrameInfo.h" 41 #include "llvm/CodeGen/MachineFunction.h" 42 #include "llvm/CodeGen/MachineInstr.h" 43 #include "llvm/CodeGen/MachineLoopInfo.h" 44 #include "llvm/CodeGen/MachineModuleInfo.h" 45 #include "llvm/CodeGen/MachineOperand.h" 46 #include "llvm/CodeGen/MachineRegisterInfo.h" 47 #include "llvm/CodeGen/MachineValueType.h" 48 #include "llvm/CodeGen/ValueTypes.h" 49 #include "llvm/IR/Attributes.h" 50 #include "llvm/IR/BasicBlock.h" 51 #include "llvm/IR/Constant.h" 52 #include "llvm/IR/Constants.h" 53 #include "llvm/IR/DataLayout.h" 54 #include "llvm/IR/DebugInfo.h" 55 #include "llvm/IR/DebugInfoMetadata.h" 56 #include "llvm/IR/DebugLoc.h" 57 #include "llvm/IR/DerivedTypes.h" 58 #include "llvm/IR/Function.h" 59 #include "llvm/IR/GlobalValue.h" 60 #include "llvm/IR/GlobalVariable.h" 61 #include "llvm/IR/Instruction.h" 62 #include "llvm/IR/LLVMContext.h" 63 #include "llvm/IR/Module.h" 64 #include "llvm/IR/Operator.h" 65 #include "llvm/IR/Type.h" 66 #include "llvm/IR/User.h" 67 #include "llvm/MC/MCExpr.h" 68 #include "llvm/MC/MCInst.h" 69 #include "llvm/MC/MCInstrDesc.h" 70 #include "llvm/MC/MCStreamer.h" 71 #include "llvm/MC/MCSymbol.h" 72 #include "llvm/Support/Casting.h" 73 #include "llvm/Support/CommandLine.h" 74 #include "llvm/Support/ErrorHandling.h" 75 #include "llvm/Support/Path.h" 76 #include "llvm/Support/raw_ostream.h" 77 #include "llvm/Support/TargetRegistry.h" 78 #include "llvm/Target/TargetLowering.h" 79 #include "llvm/Target/TargetLoweringObjectFile.h" 80 #include "llvm/Target/TargetMachine.h" 81 #include "llvm/Target/TargetRegisterInfo.h" 82 #include "llvm/Transforms/Utils/UnrollLoop.h" 83 #include <cassert> 84 #include <cstdint> 85 #include <cstring> 86 #include <new> 87 #include <sstream> 88 #include <string> 89 #include <utility> 90 #include <vector> 91 92 using namespace llvm; 93 94 #define DEPOTNAME "__local_depot" 95 96 static cl::opt<bool> 97 EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden, 98 cl::desc("NVPTX Specific: Emit Line numbers even without -G"), 99 cl::init(true)); 100 101 static cl::opt<bool> 102 InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden, 103 cl::desc("NVPTX Specific: Emit source line in ptx file"), 104 cl::init(false)); 105 106 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V 107 /// depends. 108 static void 109 DiscoverDependentGlobals(const Value *V, 110 DenseSet<const GlobalVariable *> &Globals) { 111 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 112 Globals.insert(GV); 113 else { 114 if (const User *U = dyn_cast<User>(V)) { 115 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { 116 DiscoverDependentGlobals(U->getOperand(i), Globals); 117 } 118 } 119 } 120 } 121 122 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable 123 /// instances to be emitted, but only after any dependents have been added 124 /// first.s 125 static void 126 VisitGlobalVariableForEmission(const GlobalVariable *GV, 127 SmallVectorImpl<const GlobalVariable *> &Order, 128 DenseSet<const GlobalVariable *> &Visited, 129 DenseSet<const GlobalVariable *> &Visiting) { 130 // Have we already visited this one? 131 if (Visited.count(GV)) 132 return; 133 134 // Do we have a circular dependency? 135 if (!Visiting.insert(GV).second) 136 report_fatal_error("Circular dependency found in global variable set"); 137 138 // Make sure we visit all dependents first 139 DenseSet<const GlobalVariable *> Others; 140 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) 141 DiscoverDependentGlobals(GV->getOperand(i), Others); 142 143 for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(), 144 E = Others.end(); 145 I != E; ++I) 146 VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); 147 148 // Now we can visit ourself 149 Order.push_back(GV); 150 Visited.insert(GV); 151 Visiting.erase(GV); 152 } 153 154 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { 155 if (!EmitLineNumbers) 156 return; 157 if (ignoreLoc(MI)) 158 return; 159 160 const DebugLoc &curLoc = MI.getDebugLoc(); 161 162 if (!prevDebugLoc && !curLoc) 163 return; 164 165 if (prevDebugLoc == curLoc) 166 return; 167 168 prevDebugLoc = curLoc; 169 170 if (!curLoc) 171 return; 172 173 auto *Scope = cast_or_null<DIScope>(curLoc.getScope()); 174 if (!Scope) 175 return; 176 177 StringRef fileName(Scope->getFilename()); 178 StringRef dirName(Scope->getDirectory()); 179 SmallString<128> FullPathName = dirName; 180 if (!dirName.empty() && !sys::path::is_absolute(fileName)) { 181 sys::path::append(FullPathName, fileName); 182 fileName = FullPathName; 183 } 184 185 if (filenameMap.find(fileName) == filenameMap.end()) 186 return; 187 188 // Emit the line from the source file. 189 if (InterleaveSrc) 190 this->emitSrcInText(fileName, curLoc.getLine()); 191 192 std::stringstream temp; 193 temp << "\t.loc " << filenameMap[fileName] << " " << curLoc.getLine() 194 << " " << curLoc.getCol(); 195 OutStreamer->EmitRawText(temp.str()); 196 } 197 198 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { 199 SmallString<128> Str; 200 raw_svector_ostream OS(Str); 201 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) 202 emitLineNumberAsDotLoc(*MI); 203 204 MCInst Inst; 205 lowerToMCInst(MI, Inst); 206 EmitToStreamer(*OutStreamer, Inst); 207 } 208 209 // Handle symbol backtracking for targets that do not support image handles 210 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI, 211 unsigned OpNo, MCOperand &MCOp) { 212 const MachineOperand &MO = MI->getOperand(OpNo); 213 const MCInstrDesc &MCID = MI->getDesc(); 214 215 if (MCID.TSFlags & NVPTXII::IsTexFlag) { 216 // This is a texture fetch, so operand 4 is a texref and operand 5 is 217 // a samplerref 218 if (OpNo == 4 && MO.isImm()) { 219 lowerImageHandleSymbol(MO.getImm(), MCOp); 220 return true; 221 } 222 if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) { 223 lowerImageHandleSymbol(MO.getImm(), MCOp); 224 return true; 225 } 226 227 return false; 228 } else if (MCID.TSFlags & NVPTXII::IsSuldMask) { 229 unsigned VecSize = 230 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1); 231 232 // For a surface load of vector size N, the Nth operand will be the surfref 233 if (OpNo == VecSize && MO.isImm()) { 234 lowerImageHandleSymbol(MO.getImm(), MCOp); 235 return true; 236 } 237 238 return false; 239 } else if (MCID.TSFlags & NVPTXII::IsSustFlag) { 240 // This is a surface store, so operand 0 is a surfref 241 if (OpNo == 0 && MO.isImm()) { 242 lowerImageHandleSymbol(MO.getImm(), MCOp); 243 return true; 244 } 245 246 return false; 247 } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) { 248 // This is a query, so operand 1 is a surfref/texref 249 if (OpNo == 1 && MO.isImm()) { 250 lowerImageHandleSymbol(MO.getImm(), MCOp); 251 return true; 252 } 253 254 return false; 255 } 256 257 return false; 258 } 259 260 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { 261 // Ewwww 262 TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget()); 263 NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM); 264 const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>(); 265 const char *Sym = MFI->getImageHandleSymbol(Index); 266 std::string *SymNamePtr = 267 nvTM.getManagedStrPool()->getManagedString(Sym); 268 MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr))); 269 } 270 271 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { 272 OutMI.setOpcode(MI->getOpcode()); 273 // Special: Do not mangle symbol operand of CALL_PROTOTYPE 274 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) { 275 const MachineOperand &MO = MI->getOperand(0); 276 OutMI.addOperand(GetSymbolRef( 277 OutContext.getOrCreateSymbol(Twine(MO.getSymbolName())))); 278 return; 279 } 280 281 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 282 const MachineOperand &MO = MI->getOperand(i); 283 284 MCOperand MCOp; 285 if (!nvptxSubtarget->hasImageHandles()) { 286 if (lowerImageHandleOperand(MI, i, MCOp)) { 287 OutMI.addOperand(MCOp); 288 continue; 289 } 290 } 291 292 if (lowerOperand(MO, MCOp)) 293 OutMI.addOperand(MCOp); 294 } 295 } 296 297 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, 298 MCOperand &MCOp) { 299 switch (MO.getType()) { 300 default: llvm_unreachable("unknown operand type"); 301 case MachineOperand::MO_Register: 302 MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg())); 303 break; 304 case MachineOperand::MO_Immediate: 305 MCOp = MCOperand::createImm(MO.getImm()); 306 break; 307 case MachineOperand::MO_MachineBasicBlock: 308 MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( 309 MO.getMBB()->getSymbol(), OutContext)); 310 break; 311 case MachineOperand::MO_ExternalSymbol: 312 MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName())); 313 break; 314 case MachineOperand::MO_GlobalAddress: 315 MCOp = GetSymbolRef(getSymbol(MO.getGlobal())); 316 break; 317 case MachineOperand::MO_FPImmediate: { 318 const ConstantFP *Cnt = MO.getFPImm(); 319 const APFloat &Val = Cnt->getValueAPF(); 320 321 switch (Cnt->getType()->getTypeID()) { 322 default: report_fatal_error("Unsupported FP type"); break; 323 case Type::FloatTyID: 324 MCOp = MCOperand::createExpr( 325 NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext)); 326 break; 327 case Type::DoubleTyID: 328 MCOp = MCOperand::createExpr( 329 NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext)); 330 break; 331 } 332 break; 333 } 334 } 335 return true; 336 } 337 338 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { 339 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 340 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 341 342 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC]; 343 unsigned RegNum = RegMap[Reg]; 344 345 // Encode the register class in the upper 4 bits 346 // Must be kept in sync with NVPTXInstPrinter::printRegName 347 unsigned Ret = 0; 348 if (RC == &NVPTX::Int1RegsRegClass) { 349 Ret = (1 << 28); 350 } else if (RC == &NVPTX::Int16RegsRegClass) { 351 Ret = (2 << 28); 352 } else if (RC == &NVPTX::Int32RegsRegClass) { 353 Ret = (3 << 28); 354 } else if (RC == &NVPTX::Int64RegsRegClass) { 355 Ret = (4 << 28); 356 } else if (RC == &NVPTX::Float32RegsRegClass) { 357 Ret = (5 << 28); 358 } else if (RC == &NVPTX::Float64RegsRegClass) { 359 Ret = (6 << 28); 360 } else { 361 report_fatal_error("Bad register class"); 362 } 363 364 // Insert the vreg number 365 Ret |= (RegNum & 0x0FFFFFFF); 366 return Ret; 367 } else { 368 // Some special-use registers are actually physical registers. 369 // Encode this as the register class ID of 0 and the real register ID. 370 return Reg & 0x0FFFFFFF; 371 } 372 } 373 374 MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { 375 const MCExpr *Expr; 376 Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, 377 OutContext); 378 return MCOperand::createExpr(Expr); 379 } 380 381 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { 382 const DataLayout &DL = getDataLayout(); 383 const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); 384 385 Type *Ty = F->getReturnType(); 386 387 bool isABI = (nvptxSubtarget->getSmVersion() >= 20); 388 389 if (Ty->getTypeID() == Type::VoidTyID) 390 return; 391 392 O << " ("; 393 394 if (isABI) { 395 if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) { 396 unsigned size = 0; 397 if (auto *ITy = dyn_cast<IntegerType>(Ty)) { 398 size = ITy->getBitWidth(); 399 if (size < 32) 400 size = 32; 401 } else { 402 assert(Ty->isFloatingPointTy() && "Floating point type expected here"); 403 size = Ty->getPrimitiveSizeInBits(); 404 } 405 406 O << ".param .b" << size << " func_retval0"; 407 } else if (isa<PointerType>(Ty)) { 408 O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits() 409 << " func_retval0"; 410 } else if (Ty->isAggregateType() || Ty->isVectorTy()) { 411 unsigned totalsz = DL.getTypeAllocSize(Ty); 412 unsigned retAlignment = 0; 413 if (!getAlign(*F, 0, retAlignment)) 414 retAlignment = DL.getABITypeAlignment(Ty); 415 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz 416 << "]"; 417 } else 418 llvm_unreachable("Unknown return type"); 419 } else { 420 SmallVector<EVT, 16> vtparts; 421 ComputeValueVTs(*TLI, DL, Ty, vtparts); 422 unsigned idx = 0; 423 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 424 unsigned elems = 1; 425 EVT elemtype = vtparts[i]; 426 if (vtparts[i].isVector()) { 427 elems = vtparts[i].getVectorNumElements(); 428 elemtype = vtparts[i].getVectorElementType(); 429 } 430 431 for (unsigned j = 0, je = elems; j != je; ++j) { 432 unsigned sz = elemtype.getSizeInBits(); 433 if (elemtype.isInteger() && (sz < 32)) 434 sz = 32; 435 O << ".reg .b" << sz << " func_retval" << idx; 436 if (j < je - 1) 437 O << ", "; 438 ++idx; 439 } 440 if (i < e - 1) 441 O << ", "; 442 } 443 } 444 O << ") "; 445 } 446 447 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 448 raw_ostream &O) { 449 const Function *F = MF.getFunction(); 450 printReturnValStr(F, O); 451 } 452 453 // Return true if MBB is the header of a loop marked with 454 // llvm.loop.unroll.disable. 455 // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll". 456 bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( 457 const MachineBasicBlock &MBB) const { 458 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); 459 // We insert .pragma "nounroll" only to the loop header. 460 if (!LI.isLoopHeader(&MBB)) 461 return false; 462 463 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore, 464 // we iterate through each back edge of the loop with header MBB, and check 465 // whether its metadata contains llvm.loop.unroll.disable. 466 for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) { 467 const MachineBasicBlock *PMBB = *I; 468 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) { 469 // Edges from other loops to MBB are not back edges. 470 continue; 471 } 472 if (const BasicBlock *PBB = PMBB->getBasicBlock()) { 473 if (MDNode *LoopID = 474 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) { 475 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable")) 476 return true; 477 } 478 } 479 } 480 return false; 481 } 482 483 void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { 484 AsmPrinter::EmitBasicBlockStart(MBB); 485 if (isLoopHeaderOfNoUnroll(MBB)) 486 OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n")); 487 } 488 489 void NVPTXAsmPrinter::EmitFunctionEntryLabel() { 490 SmallString<128> Str; 491 raw_svector_ostream O(Str); 492 493 if (!GlobalsEmitted) { 494 emitGlobals(*MF->getFunction()->getParent()); 495 GlobalsEmitted = true; 496 } 497 498 // Set up 499 MRI = &MF->getRegInfo(); 500 F = MF->getFunction(); 501 emitLinkageDirective(F, O); 502 if (isKernelFunction(*F)) 503 O << ".entry "; 504 else { 505 O << ".func "; 506 printReturnValStr(*MF, O); 507 } 508 509 CurrentFnSym->print(O, MAI); 510 511 emitFunctionParamList(*MF, O); 512 513 if (isKernelFunction(*F)) 514 emitKernelFunctionDirectives(*F, O); 515 516 OutStreamer->EmitRawText(O.str()); 517 518 prevDebugLoc = DebugLoc(); 519 } 520 521 void NVPTXAsmPrinter::EmitFunctionBodyStart() { 522 VRegMapping.clear(); 523 OutStreamer->EmitRawText(StringRef("{\n")); 524 setAndEmitFunctionVirtualRegisters(*MF); 525 526 SmallString<128> Str; 527 raw_svector_ostream O(Str); 528 emitDemotedVars(MF->getFunction(), O); 529 OutStreamer->EmitRawText(O.str()); 530 } 531 532 void NVPTXAsmPrinter::EmitFunctionBodyEnd() { 533 OutStreamer->EmitRawText(StringRef("}\n")); 534 VRegMapping.clear(); 535 } 536 537 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { 538 unsigned RegNo = MI->getOperand(0).getReg(); 539 if (TargetRegisterInfo::isVirtualRegister(RegNo)) { 540 OutStreamer->AddComment(Twine("implicit-def: ") + 541 getVirtualRegisterName(RegNo)); 542 } else { 543 OutStreamer->AddComment(Twine("implicit-def: ") + 544 nvptxSubtarget->getRegisterInfo()->getName(RegNo)); 545 } 546 OutStreamer->AddBlankLine(); 547 } 548 549 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, 550 raw_ostream &O) const { 551 // If the NVVM IR has some of reqntid* specified, then output 552 // the reqntid directive, and set the unspecified ones to 1. 553 // If none of reqntid* is specified, don't output reqntid directive. 554 unsigned reqntidx, reqntidy, reqntidz; 555 bool specified = false; 556 if (!getReqNTIDx(F, reqntidx)) 557 reqntidx = 1; 558 else 559 specified = true; 560 if (!getReqNTIDy(F, reqntidy)) 561 reqntidy = 1; 562 else 563 specified = true; 564 if (!getReqNTIDz(F, reqntidz)) 565 reqntidz = 1; 566 else 567 specified = true; 568 569 if (specified) 570 O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz 571 << "\n"; 572 573 // If the NVVM IR has some of maxntid* specified, then output 574 // the maxntid directive, and set the unspecified ones to 1. 575 // If none of maxntid* is specified, don't output maxntid directive. 576 unsigned maxntidx, maxntidy, maxntidz; 577 specified = false; 578 if (!getMaxNTIDx(F, maxntidx)) 579 maxntidx = 1; 580 else 581 specified = true; 582 if (!getMaxNTIDy(F, maxntidy)) 583 maxntidy = 1; 584 else 585 specified = true; 586 if (!getMaxNTIDz(F, maxntidz)) 587 maxntidz = 1; 588 else 589 specified = true; 590 591 if (specified) 592 O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz 593 << "\n"; 594 595 unsigned mincta; 596 if (getMinCTASm(F, mincta)) 597 O << ".minnctapersm " << mincta << "\n"; 598 599 unsigned maxnreg; 600 if (getMaxNReg(F, maxnreg)) 601 O << ".maxnreg " << maxnreg << "\n"; 602 } 603 604 std::string 605 NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { 606 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 607 608 std::string Name; 609 raw_string_ostream NameStr(Name); 610 611 VRegRCMap::const_iterator I = VRegMapping.find(RC); 612 assert(I != VRegMapping.end() && "Bad register class"); 613 const DenseMap<unsigned, unsigned> &RegMap = I->second; 614 615 VRegMap::const_iterator VI = RegMap.find(Reg); 616 assert(VI != RegMap.end() && "Bad virtual register"); 617 unsigned MappedVR = VI->second; 618 619 NameStr << getNVPTXRegClassStr(RC) << MappedVR; 620 621 NameStr.flush(); 622 return Name; 623 } 624 625 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, 626 raw_ostream &O) { 627 O << getVirtualRegisterName(vr); 628 } 629 630 void NVPTXAsmPrinter::printVecModifiedImmediate( 631 const MachineOperand &MO, const char *Modifier, raw_ostream &O) { 632 static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; 633 int Imm = (int) MO.getImm(); 634 if (0 == strcmp(Modifier, "vecelem")) 635 O << "_" << vecelem[Imm]; 636 else if (0 == strcmp(Modifier, "vecv4comm1")) { 637 if ((Imm < 0) || (Imm > 3)) 638 O << "//"; 639 } else if (0 == strcmp(Modifier, "vecv4comm2")) { 640 if ((Imm < 4) || (Imm > 7)) 641 O << "//"; 642 } else if (0 == strcmp(Modifier, "vecv4pos")) { 643 if (Imm < 0) 644 Imm = 0; 645 O << "_" << vecelem[Imm % 4]; 646 } else if (0 == strcmp(Modifier, "vecv2comm1")) { 647 if ((Imm < 0) || (Imm > 1)) 648 O << "//"; 649 } else if (0 == strcmp(Modifier, "vecv2comm2")) { 650 if ((Imm < 2) || (Imm > 3)) 651 O << "//"; 652 } else if (0 == strcmp(Modifier, "vecv2pos")) { 653 if (Imm < 0) 654 Imm = 0; 655 O << "_" << vecelem[Imm % 2]; 656 } else 657 llvm_unreachable("Unknown Modifier on immediate operand"); 658 } 659 660 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { 661 emitLinkageDirective(F, O); 662 if (isKernelFunction(*F)) 663 O << ".entry "; 664 else 665 O << ".func "; 666 printReturnValStr(F, O); 667 getSymbol(F)->print(O, MAI); 668 O << "\n"; 669 emitFunctionParamList(F, O); 670 O << ";\n"; 671 } 672 673 static bool usedInGlobalVarDef(const Constant *C) { 674 if (!C) 675 return false; 676 677 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 678 return GV->getName() != "llvm.used"; 679 } 680 681 for (const User *U : C->users()) 682 if (const Constant *C = dyn_cast<Constant>(U)) 683 if (usedInGlobalVarDef(C)) 684 return true; 685 686 return false; 687 } 688 689 static bool usedInOneFunc(const User *U, Function const *&oneFunc) { 690 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 691 if (othergv->getName() == "llvm.used") 692 return true; 693 } 694 695 if (const Instruction *instr = dyn_cast<Instruction>(U)) { 696 if (instr->getParent() && instr->getParent()->getParent()) { 697 const Function *curFunc = instr->getParent()->getParent(); 698 if (oneFunc && (curFunc != oneFunc)) 699 return false; 700 oneFunc = curFunc; 701 return true; 702 } else 703 return false; 704 } 705 706 for (const User *UU : U->users()) 707 if (!usedInOneFunc(UU, oneFunc)) 708 return false; 709 710 return true; 711 } 712 713 /* Find out if a global variable can be demoted to local scope. 714 * Currently, this is valid for CUDA shared variables, which have local 715 * scope and global lifetime. So the conditions to check are : 716 * 1. Is the global variable in shared address space? 717 * 2. Does it have internal linkage? 718 * 3. Is the global variable referenced only in one function? 719 */ 720 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 721 if (!gv->hasInternalLinkage()) 722 return false; 723 PointerType *Pty = gv->getType(); 724 if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED) 725 return false; 726 727 const Function *oneFunc = nullptr; 728 729 bool flag = usedInOneFunc(gv, oneFunc); 730 if (!flag) 731 return false; 732 if (!oneFunc) 733 return false; 734 f = oneFunc; 735 return true; 736 } 737 738 static bool useFuncSeen(const Constant *C, 739 DenseMap<const Function *, bool> &seenMap) { 740 for (const User *U : C->users()) { 741 if (const Constant *cu = dyn_cast<Constant>(U)) { 742 if (useFuncSeen(cu, seenMap)) 743 return true; 744 } else if (const Instruction *I = dyn_cast<Instruction>(U)) { 745 const BasicBlock *bb = I->getParent(); 746 if (!bb) 747 continue; 748 const Function *caller = bb->getParent(); 749 if (!caller) 750 continue; 751 if (seenMap.find(caller) != seenMap.end()) 752 return true; 753 } 754 } 755 return false; 756 } 757 758 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { 759 DenseMap<const Function *, bool> seenMap; 760 for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { 761 const Function *F = &*FI; 762 763 if (F->isDeclaration()) { 764 if (F->use_empty()) 765 continue; 766 if (F->getIntrinsicID()) 767 continue; 768 emitDeclaration(F, O); 769 continue; 770 } 771 for (const User *U : F->users()) { 772 if (const Constant *C = dyn_cast<Constant>(U)) { 773 if (usedInGlobalVarDef(C)) { 774 // The use is in the initialization of a global variable 775 // that is a function pointer, so print a declaration 776 // for the original function 777 emitDeclaration(F, O); 778 break; 779 } 780 // Emit a declaration of this function if the function that 781 // uses this constant expr has already been seen. 782 if (useFuncSeen(C, seenMap)) { 783 emitDeclaration(F, O); 784 break; 785 } 786 } 787 788 if (!isa<Instruction>(U)) 789 continue; 790 const Instruction *instr = cast<Instruction>(U); 791 const BasicBlock *bb = instr->getParent(); 792 if (!bb) 793 continue; 794 const Function *caller = bb->getParent(); 795 if (!caller) 796 continue; 797 798 // If a caller has already been seen, then the caller is 799 // appearing in the module before the callee. so print out 800 // a declaration for the callee. 801 if (seenMap.find(caller) != seenMap.end()) { 802 emitDeclaration(F, O); 803 break; 804 } 805 } 806 seenMap[F] = true; 807 } 808 } 809 810 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { 811 DebugInfoFinder DbgFinder; 812 DbgFinder.processModule(M); 813 814 unsigned i = 1; 815 for (const DICompileUnit *DIUnit : DbgFinder.compile_units()) { 816 StringRef Filename = DIUnit->getFilename(); 817 StringRef Dirname = DIUnit->getDirectory(); 818 SmallString<128> FullPathName = Dirname; 819 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 820 sys::path::append(FullPathName, Filename); 821 Filename = FullPathName; 822 } 823 if (filenameMap.find(Filename) != filenameMap.end()) 824 continue; 825 filenameMap[Filename] = i; 826 OutStreamer->EmitDwarfFileDirective(i, "", Filename); 827 ++i; 828 } 829 830 for (DISubprogram *SP : DbgFinder.subprograms()) { 831 StringRef Filename = SP->getFilename(); 832 StringRef Dirname = SP->getDirectory(); 833 SmallString<128> FullPathName = Dirname; 834 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 835 sys::path::append(FullPathName, Filename); 836 Filename = FullPathName; 837 } 838 if (filenameMap.find(Filename) != filenameMap.end()) 839 continue; 840 filenameMap[Filename] = i; 841 OutStreamer->EmitDwarfFileDirective(i, "", Filename); 842 ++i; 843 } 844 } 845 846 static bool isEmptyXXStructor(GlobalVariable *GV) { 847 if (!GV) return true; 848 const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); 849 if (!InitList) return true; // Not an array; we don't know how to parse. 850 return InitList->getNumOperands() == 0; 851 } 852 853 bool NVPTXAsmPrinter::doInitialization(Module &M) { 854 // Construct a default subtarget off of the TargetMachine defaults. The 855 // rest of NVPTX isn't friendly to change subtargets per function and 856 // so the default TargetMachine will have all of the options. 857 const Triple &TT = TM.getTargetTriple(); 858 StringRef CPU = TM.getTargetCPU(); 859 StringRef FS = TM.getTargetFeatureString(); 860 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 861 const NVPTXSubtarget STI(TT, CPU, FS, NTM); 862 863 if (M.alias_size()) { 864 report_fatal_error("Module has aliases, which NVPTX does not support."); 865 return true; // error 866 } 867 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) { 868 report_fatal_error( 869 "Module has a nontrivial global ctor, which NVPTX does not support."); 870 return true; // error 871 } 872 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) { 873 report_fatal_error( 874 "Module has a nontrivial global dtor, which NVPTX does not support."); 875 return true; // error 876 } 877 878 SmallString<128> Str1; 879 raw_svector_ostream OS1(Str1); 880 881 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 882 883 // We need to call the parent's one explicitly. 884 //bool Result = AsmPrinter::doInitialization(M); 885 886 // Initialize TargetLoweringObjectFile since we didn't do in 887 // AsmPrinter::doInitialization either right above or where it's commented out 888 // below. 889 const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) 890 .Initialize(OutContext, TM); 891 892 // Emit header before any dwarf directives are emitted below. 893 emitHeader(M, OS1, STI); 894 OutStreamer->EmitRawText(OS1.str()); 895 896 // Already commented out 897 //bool Result = AsmPrinter::doInitialization(M); 898 899 // Emit module-level inline asm if it exists. 900 if (!M.getModuleInlineAsm().empty()) { 901 OutStreamer->AddComment("Start of file scope inline assembly"); 902 OutStreamer->AddBlankLine(); 903 OutStreamer->EmitRawText(StringRef(M.getModuleInlineAsm())); 904 OutStreamer->AddBlankLine(); 905 OutStreamer->AddComment("End of file scope inline assembly"); 906 OutStreamer->AddBlankLine(); 907 } 908 909 // If we're not NVCL we're CUDA, go ahead and emit filenames. 910 if (TM.getTargetTriple().getOS() != Triple::NVCL) 911 recordAndEmitFilenames(M); 912 913 GlobalsEmitted = false; 914 915 return false; // success 916 } 917 918 void NVPTXAsmPrinter::emitGlobals(const Module &M) { 919 SmallString<128> Str2; 920 raw_svector_ostream OS2(Str2); 921 922 emitDeclarations(M, OS2); 923 924 // As ptxas does not support forward references of globals, we need to first 925 // sort the list of module-level globals in def-use order. We visit each 926 // global variable in order, and ensure that we emit it *after* its dependent 927 // globals. We use a little extra memory maintaining both a set and a list to 928 // have fast searches while maintaining a strict ordering. 929 SmallVector<const GlobalVariable *, 8> Globals; 930 DenseSet<const GlobalVariable *> GVVisited; 931 DenseSet<const GlobalVariable *> GVVisiting; 932 933 // Visit each global variable, in order 934 for (const GlobalVariable &I : M.globals()) 935 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting); 936 937 assert(GVVisited.size() == M.getGlobalList().size() && 938 "Missed a global variable"); 939 assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); 940 941 // Print out module-level global variables in proper order 942 for (unsigned i = 0, e = Globals.size(); i != e; ++i) 943 printModuleLevelGV(Globals[i], OS2); 944 945 OS2 << '\n'; 946 947 OutStreamer->EmitRawText(OS2.str()); 948 } 949 950 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O, 951 const NVPTXSubtarget &STI) { 952 O << "//\n"; 953 O << "// Generated by LLVM NVPTX Back-End\n"; 954 O << "//\n"; 955 O << "\n"; 956 957 unsigned PTXVersion = STI.getPTXVersion(); 958 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 959 960 O << ".target "; 961 O << STI.getTargetName(); 962 963 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 964 if (NTM.getDrvInterface() == NVPTX::NVCL) 965 O << ", texmode_independent"; 966 else { 967 if (!STI.hasDouble()) 968 O << ", map_f64_to_f32"; 969 } 970 971 if (MAI->doesSupportDebugInformation()) 972 O << ", debug"; 973 974 O << "\n"; 975 976 O << ".address_size "; 977 if (NTM.is64Bit()) 978 O << "64"; 979 else 980 O << "32"; 981 O << "\n"; 982 983 O << "\n"; 984 } 985 986 bool NVPTXAsmPrinter::doFinalization(Module &M) { 987 // If we did not emit any functions, then the global declarations have not 988 // yet been emitted. 989 if (!GlobalsEmitted) { 990 emitGlobals(M); 991 GlobalsEmitted = true; 992 } 993 994 // XXX Temproarily remove global variables so that doFinalization() will not 995 // emit them again (global variables are emitted at beginning). 996 997 Module::GlobalListType &global_list = M.getGlobalList(); 998 int i, n = global_list.size(); 999 GlobalVariable **gv_array = new GlobalVariable *[n]; 1000 1001 // first, back-up GlobalVariable in gv_array 1002 i = 0; 1003 for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 1004 I != E; ++I) 1005 gv_array[i++] = &*I; 1006 1007 // second, empty global_list 1008 while (!global_list.empty()) 1009 global_list.remove(global_list.begin()); 1010 1011 // call doFinalization 1012 bool ret = AsmPrinter::doFinalization(M); 1013 1014 // now we restore global variables 1015 for (i = 0; i < n; i++) 1016 global_list.insert(global_list.end(), gv_array[i]); 1017 1018 clearAnnotationCache(&M); 1019 1020 delete[] gv_array; 1021 return ret; 1022 1023 //bool Result = AsmPrinter::doFinalization(M); 1024 // Instead of calling the parents doFinalization, we may 1025 // clone parents doFinalization and customize here. 1026 // Currently, we if NVISA out the EmitGlobals() in 1027 // parent's doFinalization, which is too intrusive. 1028 // 1029 // Same for the doInitialization. 1030 //return Result; 1031 } 1032 1033 // This function emits appropriate linkage directives for 1034 // functions and global variables. 1035 // 1036 // extern function declaration -> .extern 1037 // extern function definition -> .visible 1038 // external global variable with init -> .visible 1039 // external without init -> .extern 1040 // appending -> not allowed, assert. 1041 // for any linkage other than 1042 // internal, private, linker_private, 1043 // linker_private_weak, linker_private_weak_def_auto, 1044 // we emit -> .weak. 1045 1046 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, 1047 raw_ostream &O) { 1048 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) { 1049 if (V->hasExternalLinkage()) { 1050 if (isa<GlobalVariable>(V)) { 1051 const GlobalVariable *GVar = cast<GlobalVariable>(V); 1052 if (GVar) { 1053 if (GVar->hasInitializer()) 1054 O << ".visible "; 1055 else 1056 O << ".extern "; 1057 } 1058 } else if (V->isDeclaration()) 1059 O << ".extern "; 1060 else 1061 O << ".visible "; 1062 } else if (V->hasAppendingLinkage()) { 1063 std::string msg; 1064 msg.append("Error: "); 1065 msg.append("Symbol "); 1066 if (V->hasName()) 1067 msg.append(V->getName()); 1068 msg.append("has unsupported appending linkage type"); 1069 llvm_unreachable(msg.c_str()); 1070 } else if (!V->hasInternalLinkage() && 1071 !V->hasPrivateLinkage()) { 1072 O << ".weak "; 1073 } 1074 } 1075 } 1076 1077 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, 1078 raw_ostream &O, 1079 bool processDemoted) { 1080 // Skip meta data 1081 if (GVar->hasSection()) { 1082 if (GVar->getSection() == "llvm.metadata") 1083 return; 1084 } 1085 1086 // Skip LLVM intrinsic global variables 1087 if (GVar->getName().startswith("llvm.") || 1088 GVar->getName().startswith("nvvm.")) 1089 return; 1090 1091 const DataLayout &DL = getDataLayout(); 1092 1093 // GlobalVariables are always constant pointers themselves. 1094 PointerType *PTy = GVar->getType(); 1095 Type *ETy = GVar->getValueType(); 1096 1097 if (GVar->hasExternalLinkage()) { 1098 if (GVar->hasInitializer()) 1099 O << ".visible "; 1100 else 1101 O << ".extern "; 1102 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() || 1103 GVar->hasAvailableExternallyLinkage() || 1104 GVar->hasCommonLinkage()) { 1105 O << ".weak "; 1106 } 1107 1108 if (isTexture(*GVar)) { 1109 O << ".global .texref " << getTextureName(*GVar) << ";\n"; 1110 return; 1111 } 1112 1113 if (isSurface(*GVar)) { 1114 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n"; 1115 return; 1116 } 1117 1118 if (GVar->isDeclaration()) { 1119 // (extern) declarations, no definition or initializer 1120 // Currently the only known declaration is for an automatic __local 1121 // (.shared) promoted to global. 1122 emitPTXGlobalVariable(GVar, O); 1123 O << ";\n"; 1124 return; 1125 } 1126 1127 if (isSampler(*GVar)) { 1128 O << ".global .samplerref " << getSamplerName(*GVar); 1129 1130 const Constant *Initializer = nullptr; 1131 if (GVar->hasInitializer()) 1132 Initializer = GVar->getInitializer(); 1133 const ConstantInt *CI = nullptr; 1134 if (Initializer) 1135 CI = dyn_cast<ConstantInt>(Initializer); 1136 if (CI) { 1137 unsigned sample = CI->getZExtValue(); 1138 1139 O << " = { "; 1140 1141 for (int i = 0, 1142 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); 1143 i < 3; i++) { 1144 O << "addr_mode_" << i << " = "; 1145 switch (addr) { 1146 case 0: 1147 O << "wrap"; 1148 break; 1149 case 1: 1150 O << "clamp_to_border"; 1151 break; 1152 case 2: 1153 O << "clamp_to_edge"; 1154 break; 1155 case 3: 1156 O << "wrap"; 1157 break; 1158 case 4: 1159 O << "mirror"; 1160 break; 1161 } 1162 O << ", "; 1163 } 1164 O << "filter_mode = "; 1165 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { 1166 case 0: 1167 O << "nearest"; 1168 break; 1169 case 1: 1170 O << "linear"; 1171 break; 1172 case 2: 1173 llvm_unreachable("Anisotropic filtering is not supported"); 1174 default: 1175 O << "nearest"; 1176 break; 1177 } 1178 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { 1179 O << ", force_unnormalized_coords = 1"; 1180 } 1181 O << " }"; 1182 } 1183 1184 O << ";\n"; 1185 return; 1186 } 1187 1188 if (GVar->hasPrivateLinkage()) { 1189 if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0) 1190 return; 1191 1192 // FIXME - need better way (e.g. Metadata) to avoid generating this global 1193 if (strncmp(GVar->getName().data(), "filename", 8) == 0) 1194 return; 1195 if (GVar->use_empty()) 1196 return; 1197 } 1198 1199 const Function *demotedFunc = nullptr; 1200 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 1201 O << "// " << GVar->getName() << " has been demoted\n"; 1202 if (localDecls.find(demotedFunc) != localDecls.end()) 1203 localDecls[demotedFunc].push_back(GVar); 1204 else { 1205 std::vector<const GlobalVariable *> temp; 1206 temp.push_back(GVar); 1207 localDecls[demotedFunc] = temp; 1208 } 1209 return; 1210 } 1211 1212 O << "."; 1213 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1214 1215 if (isManaged(*GVar)) { 1216 O << " .attribute(.managed)"; 1217 } 1218 1219 if (GVar->getAlignment() == 0) 1220 O << " .align " << (int)DL.getPrefTypeAlignment(ETy); 1221 else 1222 O << " .align " << GVar->getAlignment(); 1223 1224 if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { 1225 O << " ."; 1226 // Special case: ABI requires that we use .u8 for predicates 1227 if (ETy->isIntegerTy(1)) 1228 O << "u8"; 1229 else 1230 O << getPTXFundamentalTypeStr(ETy, false); 1231 O << " "; 1232 getSymbol(GVar)->print(O, MAI); 1233 1234 // Ptx allows variable initilization only for constant and global state 1235 // spaces. 1236 if (GVar->hasInitializer()) { 1237 if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) || 1238 (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) { 1239 const Constant *Initializer = GVar->getInitializer(); 1240 // 'undef' is treated as there is no value specified. 1241 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) { 1242 O << " = "; 1243 printScalarConstant(Initializer, O); 1244 } 1245 } else { 1246 // The frontend adds zero-initializer to device and constant variables 1247 // that don't have an initial value, and UndefValue to shared 1248 // variables, so skip warning for this case. 1249 if (!GVar->getInitializer()->isNullValue() && 1250 !isa<UndefValue>(GVar->getInitializer())) { 1251 report_fatal_error("initial value of '" + GVar->getName() + 1252 "' is not allowed in addrspace(" + 1253 Twine(PTy->getAddressSpace()) + ")"); 1254 } 1255 } 1256 } 1257 } else { 1258 unsigned int ElementSize = 0; 1259 1260 // Although PTX has direct support for struct type and array type and 1261 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 1262 // targets that support these high level field accesses. Structs, arrays 1263 // and vectors are lowered into arrays of bytes. 1264 switch (ETy->getTypeID()) { 1265 case Type::StructTyID: 1266 case Type::ArrayTyID: 1267 case Type::VectorTyID: 1268 ElementSize = DL.getTypeStoreSize(ETy); 1269 // Ptx allows variable initilization only for constant and 1270 // global state spaces. 1271 if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) || 1272 (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) && 1273 GVar->hasInitializer()) { 1274 const Constant *Initializer = GVar->getInitializer(); 1275 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { 1276 AggBuffer aggBuffer(ElementSize, O, *this); 1277 bufferAggregateConstant(Initializer, &aggBuffer); 1278 if (aggBuffer.numSymbols) { 1279 if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) { 1280 O << " .u64 "; 1281 getSymbol(GVar)->print(O, MAI); 1282 O << "["; 1283 O << ElementSize / 8; 1284 } else { 1285 O << " .u32 "; 1286 getSymbol(GVar)->print(O, MAI); 1287 O << "["; 1288 O << ElementSize / 4; 1289 } 1290 O << "]"; 1291 } else { 1292 O << " .b8 "; 1293 getSymbol(GVar)->print(O, MAI); 1294 O << "["; 1295 O << ElementSize; 1296 O << "]"; 1297 } 1298 O << " = {"; 1299 aggBuffer.print(); 1300 O << "}"; 1301 } else { 1302 O << " .b8 "; 1303 getSymbol(GVar)->print(O, MAI); 1304 if (ElementSize) { 1305 O << "["; 1306 O << ElementSize; 1307 O << "]"; 1308 } 1309 } 1310 } else { 1311 O << " .b8 "; 1312 getSymbol(GVar)->print(O, MAI); 1313 if (ElementSize) { 1314 O << "["; 1315 O << ElementSize; 1316 O << "]"; 1317 } 1318 } 1319 break; 1320 default: 1321 llvm_unreachable("type not supported yet"); 1322 } 1323 } 1324 O << ";\n"; 1325 } 1326 1327 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 1328 if (localDecls.find(f) == localDecls.end()) 1329 return; 1330 1331 std::vector<const GlobalVariable *> &gvars = localDecls[f]; 1332 1333 for (unsigned i = 0, e = gvars.size(); i != e; ++i) { 1334 O << "\t// demoted variable\n\t"; 1335 printModuleLevelGV(gvars[i], O, true); 1336 } 1337 } 1338 1339 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 1340 raw_ostream &O) const { 1341 switch (AddressSpace) { 1342 case ADDRESS_SPACE_LOCAL: 1343 O << "local"; 1344 break; 1345 case ADDRESS_SPACE_GLOBAL: 1346 O << "global"; 1347 break; 1348 case ADDRESS_SPACE_CONST: 1349 O << "const"; 1350 break; 1351 case ADDRESS_SPACE_SHARED: 1352 O << "shared"; 1353 break; 1354 default: 1355 report_fatal_error("Bad address space found while emitting PTX"); 1356 break; 1357 } 1358 } 1359 1360 std::string 1361 NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const { 1362 switch (Ty->getTypeID()) { 1363 default: 1364 llvm_unreachable("unexpected type"); 1365 break; 1366 case Type::IntegerTyID: { 1367 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 1368 if (NumBits == 1) 1369 return "pred"; 1370 else if (NumBits <= 64) { 1371 std::string name = "u"; 1372 return name + utostr(NumBits); 1373 } else { 1374 llvm_unreachable("Integer too large"); 1375 break; 1376 } 1377 break; 1378 } 1379 case Type::FloatTyID: 1380 return "f32"; 1381 case Type::DoubleTyID: 1382 return "f64"; 1383 case Type::PointerTyID: 1384 if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) 1385 if (useB4PTR) 1386 return "b64"; 1387 else 1388 return "u64"; 1389 else if (useB4PTR) 1390 return "b32"; 1391 else 1392 return "u32"; 1393 } 1394 llvm_unreachable("unexpected type"); 1395 return nullptr; 1396 } 1397 1398 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, 1399 raw_ostream &O) { 1400 const DataLayout &DL = getDataLayout(); 1401 1402 // GlobalVariables are always constant pointers themselves. 1403 Type *ETy = GVar->getValueType(); 1404 1405 O << "."; 1406 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O); 1407 if (GVar->getAlignment() == 0) 1408 O << " .align " << (int)DL.getPrefTypeAlignment(ETy); 1409 else 1410 O << " .align " << GVar->getAlignment(); 1411 1412 if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { 1413 O << " ."; 1414 O << getPTXFundamentalTypeStr(ETy); 1415 O << " "; 1416 getSymbol(GVar)->print(O, MAI); 1417 return; 1418 } 1419 1420 int64_t ElementSize = 0; 1421 1422 // Although PTX has direct support for struct type and array type and LLVM IR 1423 // is very similar to PTX, the LLVM CodeGen does not support for targets that 1424 // support these high level field accesses. Structs and arrays are lowered 1425 // into arrays of bytes. 1426 switch (ETy->getTypeID()) { 1427 case Type::StructTyID: 1428 case Type::ArrayTyID: 1429 case Type::VectorTyID: 1430 ElementSize = DL.getTypeStoreSize(ETy); 1431 O << " .b8 "; 1432 getSymbol(GVar)->print(O, MAI); 1433 O << "["; 1434 if (ElementSize) { 1435 O << ElementSize; 1436 } 1437 O << "]"; 1438 break; 1439 default: 1440 llvm_unreachable("type not supported yet"); 1441 } 1442 } 1443 1444 static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) { 1445 if (Ty->isSingleValueType()) 1446 return DL.getPrefTypeAlignment(Ty); 1447 1448 auto *ATy = dyn_cast<ArrayType>(Ty); 1449 if (ATy) 1450 return getOpenCLAlignment(DL, ATy->getElementType()); 1451 1452 auto *STy = dyn_cast<StructType>(Ty); 1453 if (STy) { 1454 unsigned int alignStruct = 1; 1455 // Go through each element of the struct and find the 1456 // largest alignment. 1457 for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { 1458 Type *ETy = STy->getElementType(i); 1459 unsigned int align = getOpenCLAlignment(DL, ETy); 1460 if (align > alignStruct) 1461 alignStruct = align; 1462 } 1463 return alignStruct; 1464 } 1465 1466 auto *FTy = dyn_cast<FunctionType>(Ty); 1467 if (FTy) 1468 return DL.getPointerPrefAlignment(); 1469 return DL.getPrefTypeAlignment(Ty); 1470 } 1471 1472 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 1473 int paramIndex, raw_ostream &O) { 1474 getSymbol(I->getParent())->print(O, MAI); 1475 O << "_param_" << paramIndex; 1476 } 1477 1478 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { 1479 const DataLayout &DL = getDataLayout(); 1480 const AttributeSet &PAL = F->getAttributes(); 1481 const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); 1482 Function::const_arg_iterator I, E; 1483 unsigned paramIndex = 0; 1484 bool first = true; 1485 bool isKernelFunc = isKernelFunction(*F); 1486 bool isABI = (nvptxSubtarget->getSmVersion() >= 20); 1487 MVT thePointerTy = TLI->getPointerTy(DL); 1488 1489 if (F->arg_empty()) { 1490 O << "()\n"; 1491 return; 1492 } 1493 1494 O << "(\n"; 1495 1496 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 1497 Type *Ty = I->getType(); 1498 1499 if (!first) 1500 O << ",\n"; 1501 1502 first = false; 1503 1504 // Handle image/sampler parameters 1505 if (isKernelFunction(*F)) { 1506 if (isSampler(*I) || isImage(*I)) { 1507 if (isImage(*I)) { 1508 std::string sname = I->getName(); 1509 if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { 1510 if (nvptxSubtarget->hasImageHandles()) 1511 O << "\t.param .u64 .ptr .surfref "; 1512 else 1513 O << "\t.param .surfref "; 1514 CurrentFnSym->print(O, MAI); 1515 O << "_param_" << paramIndex; 1516 } 1517 else { // Default image is read_only 1518 if (nvptxSubtarget->hasImageHandles()) 1519 O << "\t.param .u64 .ptr .texref "; 1520 else 1521 O << "\t.param .texref "; 1522 CurrentFnSym->print(O, MAI); 1523 O << "_param_" << paramIndex; 1524 } 1525 } else { 1526 if (nvptxSubtarget->hasImageHandles()) 1527 O << "\t.param .u64 .ptr .samplerref "; 1528 else 1529 O << "\t.param .samplerref "; 1530 CurrentFnSym->print(O, MAI); 1531 O << "_param_" << paramIndex; 1532 } 1533 continue; 1534 } 1535 } 1536 1537 if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) { 1538 if (Ty->isAggregateType() || Ty->isVectorTy()) { 1539 // Just print .param .align <a> .b8 .param[size]; 1540 // <a> = PAL.getparamalignment 1541 // size = typeallocsize of element type 1542 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1543 if (align == 0) 1544 align = DL.getABITypeAlignment(Ty); 1545 1546 unsigned sz = DL.getTypeAllocSize(Ty); 1547 O << "\t.param .align " << align << " .b8 "; 1548 printParamName(I, paramIndex, O); 1549 O << "[" << sz << "]"; 1550 1551 continue; 1552 } 1553 // Just a scalar 1554 auto *PTy = dyn_cast<PointerType>(Ty); 1555 if (isKernelFunc) { 1556 if (PTy) { 1557 // Special handling for pointer arguments to kernel 1558 O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 1559 1560 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() != 1561 NVPTX::CUDA) { 1562 Type *ETy = PTy->getElementType(); 1563 int addrSpace = PTy->getAddressSpace(); 1564 switch (addrSpace) { 1565 default: 1566 O << ".ptr "; 1567 break; 1568 case ADDRESS_SPACE_CONST: 1569 O << ".ptr .const "; 1570 break; 1571 case ADDRESS_SPACE_SHARED: 1572 O << ".ptr .shared "; 1573 break; 1574 case ADDRESS_SPACE_GLOBAL: 1575 O << ".ptr .global "; 1576 break; 1577 } 1578 O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " "; 1579 } 1580 printParamName(I, paramIndex, O); 1581 continue; 1582 } 1583 1584 // non-pointer scalar to kernel func 1585 O << "\t.param ."; 1586 // Special case: predicate operands become .u8 types 1587 if (Ty->isIntegerTy(1)) 1588 O << "u8"; 1589 else 1590 O << getPTXFundamentalTypeStr(Ty); 1591 O << " "; 1592 printParamName(I, paramIndex, O); 1593 continue; 1594 } 1595 // Non-kernel function, just print .param .b<size> for ABI 1596 // and .reg .b<size> for non-ABI 1597 unsigned sz = 0; 1598 if (isa<IntegerType>(Ty)) { 1599 sz = cast<IntegerType>(Ty)->getBitWidth(); 1600 if (sz < 32) 1601 sz = 32; 1602 } else if (isa<PointerType>(Ty)) 1603 sz = thePointerTy.getSizeInBits(); 1604 else 1605 sz = Ty->getPrimitiveSizeInBits(); 1606 if (isABI) 1607 O << "\t.param .b" << sz << " "; 1608 else 1609 O << "\t.reg .b" << sz << " "; 1610 printParamName(I, paramIndex, O); 1611 continue; 1612 } 1613 1614 // param has byVal attribute. So should be a pointer 1615 auto *PTy = dyn_cast<PointerType>(Ty); 1616 assert(PTy && "Param with byval attribute should be a pointer type"); 1617 Type *ETy = PTy->getElementType(); 1618 1619 if (isABI || isKernelFunc) { 1620 // Just print .param .align <a> .b8 .param[size]; 1621 // <a> = PAL.getparamalignment 1622 // size = typeallocsize of element type 1623 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1624 if (align == 0) 1625 align = DL.getABITypeAlignment(ETy); 1626 // Work around a bug in ptxas. When PTX code takes address of 1627 // byval parameter with alignment < 4, ptxas generates code to 1628 // spill argument into memory. Alas on sm_50+ ptxas generates 1629 // SASS code that fails with misaligned access. To work around 1630 // the problem, make sure that we align byval parameters by at 1631 // least 4. Matching change must be made in LowerCall() where we 1632 // prepare parameters for the call. 1633 // 1634 // TODO: this will need to be undone when we get to support multi-TU 1635 // device-side compilation as it breaks ABI compatibility with nvcc. 1636 // Hopefully ptxas bug is fixed by then. 1637 if (!isKernelFunc && align < 4) 1638 align = 4; 1639 unsigned sz = DL.getTypeAllocSize(ETy); 1640 O << "\t.param .align " << align << " .b8 "; 1641 printParamName(I, paramIndex, O); 1642 O << "[" << sz << "]"; 1643 continue; 1644 } else { 1645 // Split the ETy into constituent parts and 1646 // print .param .b<size> <name> for each part. 1647 // Further, if a part is vector, print the above for 1648 // each vector element. 1649 SmallVector<EVT, 16> vtparts; 1650 ComputeValueVTs(*TLI, DL, ETy, vtparts); 1651 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 1652 unsigned elems = 1; 1653 EVT elemtype = vtparts[i]; 1654 if (vtparts[i].isVector()) { 1655 elems = vtparts[i].getVectorNumElements(); 1656 elemtype = vtparts[i].getVectorElementType(); 1657 } 1658 1659 for (unsigned j = 0, je = elems; j != je; ++j) { 1660 unsigned sz = elemtype.getSizeInBits(); 1661 if (elemtype.isInteger() && (sz < 32)) 1662 sz = 32; 1663 O << "\t.reg .b" << sz << " "; 1664 printParamName(I, paramIndex, O); 1665 if (j < je - 1) 1666 O << ",\n"; 1667 ++paramIndex; 1668 } 1669 if (i < e - 1) 1670 O << ",\n"; 1671 } 1672 --paramIndex; 1673 continue; 1674 } 1675 } 1676 1677 O << "\n)\n"; 1678 } 1679 1680 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 1681 raw_ostream &O) { 1682 const Function *F = MF.getFunction(); 1683 emitFunctionParamList(F, O); 1684 } 1685 1686 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( 1687 const MachineFunction &MF) { 1688 SmallString<128> Str; 1689 raw_svector_ostream O(Str); 1690 1691 // Map the global virtual register number to a register class specific 1692 // virtual register number starting from 1 with that class. 1693 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1694 //unsigned numRegClasses = TRI->getNumRegClasses(); 1695 1696 // Emit the Fake Stack Object 1697 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1698 int NumBytes = (int) MFI.getStackSize(); 1699 if (NumBytes) { 1700 O << "\t.local .align " << MFI.getMaxAlignment() << " .b8 \t" << DEPOTNAME 1701 << getFunctionNumber() << "[" << NumBytes << "];\n"; 1702 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) { 1703 O << "\t.reg .b64 \t%SP;\n"; 1704 O << "\t.reg .b64 \t%SPL;\n"; 1705 } else { 1706 O << "\t.reg .b32 \t%SP;\n"; 1707 O << "\t.reg .b32 \t%SPL;\n"; 1708 } 1709 } 1710 1711 // Go through all virtual registers to establish the mapping between the 1712 // global virtual 1713 // register number and the per class virtual register number. 1714 // We use the per class virtual register number in the ptx output. 1715 unsigned int numVRs = MRI->getNumVirtRegs(); 1716 for (unsigned i = 0; i < numVRs; i++) { 1717 unsigned int vr = TRI->index2VirtReg(i); 1718 const TargetRegisterClass *RC = MRI->getRegClass(vr); 1719 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1720 int n = regmap.size(); 1721 regmap.insert(std::make_pair(vr, n + 1)); 1722 } 1723 1724 // Emit register declarations 1725 // @TODO: Extract out the real register usage 1726 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 1727 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 1728 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 1729 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 1730 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n"; 1731 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 1732 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n"; 1733 1734 // Emit declaration of the virtual registers or 'physical' registers for 1735 // each register class 1736 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) { 1737 const TargetRegisterClass *RC = TRI->getRegClass(i); 1738 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1739 std::string rcname = getNVPTXRegClassName(RC); 1740 std::string rcStr = getNVPTXRegClassStr(RC); 1741 int n = regmap.size(); 1742 1743 // Only declare those registers that may be used. 1744 if (n) { 1745 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 1746 << ">;\n"; 1747 } 1748 } 1749 1750 OutStreamer->EmitRawText(O.str()); 1751 } 1752 1753 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 1754 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 1755 bool ignored; 1756 unsigned int numHex; 1757 const char *lead; 1758 1759 if (Fp->getType()->getTypeID() == Type::FloatTyID) { 1760 numHex = 8; 1761 lead = "0f"; 1762 APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &ignored); 1763 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 1764 numHex = 16; 1765 lead = "0d"; 1766 APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &ignored); 1767 } else 1768 llvm_unreachable("unsupported fp type"); 1769 1770 APInt API = APF.bitcastToAPInt(); 1771 std::string hexstr(utohexstr(API.getZExtValue())); 1772 O << lead; 1773 if (hexstr.length() < numHex) 1774 O << std::string(numHex - hexstr.length(), '0'); 1775 O << utohexstr(API.getZExtValue()); 1776 } 1777 1778 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { 1779 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 1780 O << CI->getValue(); 1781 return; 1782 } 1783 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 1784 printFPConstant(CFP, O); 1785 return; 1786 } 1787 if (isa<ConstantPointerNull>(CPV)) { 1788 O << "0"; 1789 return; 1790 } 1791 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1792 bool IsNonGenericPointer = false; 1793 if (GVar->getType()->getAddressSpace() != 0) { 1794 IsNonGenericPointer = true; 1795 } 1796 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) { 1797 O << "generic("; 1798 getSymbol(GVar)->print(O, MAI); 1799 O << ")"; 1800 } else { 1801 getSymbol(GVar)->print(O, MAI); 1802 } 1803 return; 1804 } 1805 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1806 const Value *v = Cexpr->stripPointerCasts(); 1807 PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType()); 1808 bool IsNonGenericPointer = false; 1809 if (PTy && PTy->getAddressSpace() != 0) { 1810 IsNonGenericPointer = true; 1811 } 1812 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 1813 if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { 1814 O << "generic("; 1815 getSymbol(GVar)->print(O, MAI); 1816 O << ")"; 1817 } else { 1818 getSymbol(GVar)->print(O, MAI); 1819 } 1820 return; 1821 } else { 1822 lowerConstant(CPV)->print(O, MAI); 1823 return; 1824 } 1825 } 1826 llvm_unreachable("Not scalar type found in printScalarConstant()"); 1827 } 1828 1829 // These utility functions assure we get the right sequence of bytes for a given 1830 // type even for big-endian machines 1831 template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) { 1832 int64_t vp = (int64_t)val; 1833 for (unsigned i = 0; i < sizeof(T); ++i) { 1834 p[i] = (unsigned char)vp; 1835 vp >>= 8; 1836 } 1837 } 1838 static void ConvertFloatToBytes(unsigned char *p, float val) { 1839 int32_t *vp = (int32_t *)&val; 1840 for (unsigned i = 0; i < sizeof(int32_t); ++i) { 1841 p[i] = (unsigned char)*vp; 1842 *vp >>= 8; 1843 } 1844 } 1845 static void ConvertDoubleToBytes(unsigned char *p, double val) { 1846 int64_t *vp = (int64_t *)&val; 1847 for (unsigned i = 0; i < sizeof(int64_t); ++i) { 1848 p[i] = (unsigned char)*vp; 1849 *vp >>= 8; 1850 } 1851 } 1852 1853 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, 1854 AggBuffer *aggBuffer) { 1855 const DataLayout &DL = getDataLayout(); 1856 1857 if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 1858 int s = DL.getTypeAllocSize(CPV->getType()); 1859 if (s < Bytes) 1860 s = Bytes; 1861 aggBuffer->addZeros(s); 1862 return; 1863 } 1864 1865 unsigned char ptr[8]; 1866 switch (CPV->getType()->getTypeID()) { 1867 1868 case Type::IntegerTyID: { 1869 Type *ETy = CPV->getType(); 1870 if (ETy == Type::getInt8Ty(CPV->getContext())) { 1871 unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue(); 1872 ConvertIntToBytes<>(ptr, c); 1873 aggBuffer->addBytes(ptr, 1, Bytes); 1874 } else if (ETy == Type::getInt16Ty(CPV->getContext())) { 1875 short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue(); 1876 ConvertIntToBytes<>(ptr, int16); 1877 aggBuffer->addBytes(ptr, 2, Bytes); 1878 } else if (ETy == Type::getInt32Ty(CPV->getContext())) { 1879 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1880 int int32 = (int)(constInt->getZExtValue()); 1881 ConvertIntToBytes<>(ptr, int32); 1882 aggBuffer->addBytes(ptr, 4, Bytes); 1883 break; 1884 } else if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1885 if (const auto *constInt = dyn_cast_or_null<ConstantInt>( 1886 ConstantFoldConstant(Cexpr, DL))) { 1887 int int32 = (int)(constInt->getZExtValue()); 1888 ConvertIntToBytes<>(ptr, int32); 1889 aggBuffer->addBytes(ptr, 4, Bytes); 1890 break; 1891 } 1892 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1893 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1894 aggBuffer->addSymbol(v, Cexpr->getOperand(0)); 1895 aggBuffer->addZeros(4); 1896 break; 1897 } 1898 } 1899 llvm_unreachable("unsupported integer const type"); 1900 } else if (ETy == Type::getInt64Ty(CPV->getContext())) { 1901 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1902 long long int64 = (long long)(constInt->getZExtValue()); 1903 ConvertIntToBytes<>(ptr, int64); 1904 aggBuffer->addBytes(ptr, 8, Bytes); 1905 break; 1906 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1907 if (const auto *constInt = dyn_cast_or_null<ConstantInt>( 1908 ConstantFoldConstant(Cexpr, DL))) { 1909 long long int64 = (long long)(constInt->getZExtValue()); 1910 ConvertIntToBytes<>(ptr, int64); 1911 aggBuffer->addBytes(ptr, 8, Bytes); 1912 break; 1913 } 1914 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1915 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1916 aggBuffer->addSymbol(v, Cexpr->getOperand(0)); 1917 aggBuffer->addZeros(8); 1918 break; 1919 } 1920 } 1921 llvm_unreachable("unsupported integer const type"); 1922 } else 1923 llvm_unreachable("unsupported integer const type"); 1924 break; 1925 } 1926 case Type::FloatTyID: 1927 case Type::DoubleTyID: { 1928 const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); 1929 Type *Ty = CFP->getType(); 1930 if (Ty == Type::getFloatTy(CPV->getContext())) { 1931 float float32 = (float) CFP->getValueAPF().convertToFloat(); 1932 ConvertFloatToBytes(ptr, float32); 1933 aggBuffer->addBytes(ptr, 4, Bytes); 1934 } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 1935 double float64 = CFP->getValueAPF().convertToDouble(); 1936 ConvertDoubleToBytes(ptr, float64); 1937 aggBuffer->addBytes(ptr, 8, Bytes); 1938 } else { 1939 llvm_unreachable("unsupported fp const type"); 1940 } 1941 break; 1942 } 1943 case Type::PointerTyID: { 1944 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1945 aggBuffer->addSymbol(GVar, GVar); 1946 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1947 const Value *v = Cexpr->stripPointerCasts(); 1948 aggBuffer->addSymbol(v, Cexpr); 1949 } 1950 unsigned int s = DL.getTypeAllocSize(CPV->getType()); 1951 aggBuffer->addZeros(s); 1952 break; 1953 } 1954 1955 case Type::ArrayTyID: 1956 case Type::VectorTyID: 1957 case Type::StructTyID: { 1958 if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) { 1959 int ElementSize = DL.getTypeAllocSize(CPV->getType()); 1960 bufferAggregateConstant(CPV, aggBuffer); 1961 if (Bytes > ElementSize) 1962 aggBuffer->addZeros(Bytes - ElementSize); 1963 } else if (isa<ConstantAggregateZero>(CPV)) 1964 aggBuffer->addZeros(Bytes); 1965 else 1966 llvm_unreachable("Unexpected Constant type"); 1967 break; 1968 } 1969 1970 default: 1971 llvm_unreachable("unsupported type"); 1972 } 1973 } 1974 1975 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, 1976 AggBuffer *aggBuffer) { 1977 const DataLayout &DL = getDataLayout(); 1978 int Bytes; 1979 1980 // Old constants 1981 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 1982 if (CPV->getNumOperands()) 1983 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 1984 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 1985 return; 1986 } 1987 1988 if (const ConstantDataSequential *CDS = 1989 dyn_cast<ConstantDataSequential>(CPV)) { 1990 if (CDS->getNumElements()) 1991 for (unsigned i = 0; i < CDS->getNumElements(); ++i) 1992 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 1993 aggBuffer); 1994 return; 1995 } 1996 1997 if (isa<ConstantStruct>(CPV)) { 1998 if (CPV->getNumOperands()) { 1999 StructType *ST = cast<StructType>(CPV->getType()); 2000 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 2001 if (i == (e - 1)) 2002 Bytes = DL.getStructLayout(ST)->getElementOffset(0) + 2003 DL.getTypeAllocSize(ST) - 2004 DL.getStructLayout(ST)->getElementOffset(i); 2005 else 2006 Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) - 2007 DL.getStructLayout(ST)->getElementOffset(i); 2008 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); 2009 } 2010 } 2011 return; 2012 } 2013 llvm_unreachable("unsupported constant type in printAggregateConstant()"); 2014 } 2015 2016 // buildTypeNameMap - Run through symbol table looking for type names. 2017 // 2018 2019 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { 2020 switch (MI.getOpcode()) { 2021 default: 2022 return false; 2023 case NVPTX::CallArgBeginInst: 2024 case NVPTX::CallArgEndInst0: 2025 case NVPTX::CallArgEndInst1: 2026 case NVPTX::CallArgF32: 2027 case NVPTX::CallArgF64: 2028 case NVPTX::CallArgI16: 2029 case NVPTX::CallArgI32: 2030 case NVPTX::CallArgI32imm: 2031 case NVPTX::CallArgI64: 2032 case NVPTX::CallArgParam: 2033 case NVPTX::CallVoidInst: 2034 case NVPTX::CallVoidInstReg: 2035 case NVPTX::Callseq_End: 2036 case NVPTX::CallVoidInstReg64: 2037 case NVPTX::DeclareParamInst: 2038 case NVPTX::DeclareRetMemInst: 2039 case NVPTX::DeclareRetRegInst: 2040 case NVPTX::DeclareRetScalarInst: 2041 case NVPTX::DeclareScalarParamInst: 2042 case NVPTX::DeclareScalarRegInst: 2043 case NVPTX::StoreParamF32: 2044 case NVPTX::StoreParamF64: 2045 case NVPTX::StoreParamI16: 2046 case NVPTX::StoreParamI32: 2047 case NVPTX::StoreParamI64: 2048 case NVPTX::StoreParamI8: 2049 case NVPTX::StoreRetvalF32: 2050 case NVPTX::StoreRetvalF64: 2051 case NVPTX::StoreRetvalI16: 2052 case NVPTX::StoreRetvalI32: 2053 case NVPTX::StoreRetvalI64: 2054 case NVPTX::StoreRetvalI8: 2055 case NVPTX::LastCallArgF32: 2056 case NVPTX::LastCallArgF64: 2057 case NVPTX::LastCallArgI16: 2058 case NVPTX::LastCallArgI32: 2059 case NVPTX::LastCallArgI32imm: 2060 case NVPTX::LastCallArgI64: 2061 case NVPTX::LastCallArgParam: 2062 case NVPTX::LoadParamMemF32: 2063 case NVPTX::LoadParamMemF64: 2064 case NVPTX::LoadParamMemI16: 2065 case NVPTX::LoadParamMemI32: 2066 case NVPTX::LoadParamMemI64: 2067 case NVPTX::LoadParamMemI8: 2068 case NVPTX::PrototypeInst: 2069 case NVPTX::DBG_VALUE: 2070 return true; 2071 } 2072 return false; 2073 } 2074 2075 /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly 2076 /// a copy from AsmPrinter::lowerConstant, except customized to only handle 2077 /// expressions that are representable in PTX and create 2078 /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions. 2079 const MCExpr * 2080 NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) { 2081 MCContext &Ctx = OutContext; 2082 2083 if (CV->isNullValue() || isa<UndefValue>(CV)) 2084 return MCConstantExpr::create(0, Ctx); 2085 2086 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 2087 return MCConstantExpr::create(CI->getZExtValue(), Ctx); 2088 2089 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { 2090 const MCSymbolRefExpr *Expr = 2091 MCSymbolRefExpr::create(getSymbol(GV), Ctx); 2092 if (ProcessingGeneric) { 2093 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx); 2094 } else { 2095 return Expr; 2096 } 2097 } 2098 2099 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 2100 if (!CE) { 2101 llvm_unreachable("Unknown constant value to lower!"); 2102 } 2103 2104 switch (CE->getOpcode()) { 2105 default: 2106 // If the code isn't optimized, there may be outstanding folding 2107 // opportunities. Attempt to fold the expression using DataLayout as a 2108 // last resort before giving up. 2109 if (Constant *C = ConstantFoldConstant(CE, getDataLayout())) 2110 if (C && C != CE) 2111 return lowerConstantForGV(C, ProcessingGeneric); 2112 2113 // Otherwise report the problem to the user. 2114 { 2115 std::string S; 2116 raw_string_ostream OS(S); 2117 OS << "Unsupported expression in static initializer: "; 2118 CE->printAsOperand(OS, /*PrintType=*/false, 2119 !MF ? nullptr : MF->getFunction()->getParent()); 2120 report_fatal_error(OS.str()); 2121 } 2122 2123 case Instruction::AddrSpaceCast: { 2124 // Strip the addrspacecast and pass along the operand 2125 PointerType *DstTy = cast<PointerType>(CE->getType()); 2126 if (DstTy->getAddressSpace() == 0) { 2127 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true); 2128 } 2129 std::string S; 2130 raw_string_ostream OS(S); 2131 OS << "Unsupported expression in static initializer: "; 2132 CE->printAsOperand(OS, /*PrintType=*/ false, 2133 !MF ? nullptr : MF->getFunction()->getParent()); 2134 report_fatal_error(OS.str()); 2135 } 2136 2137 case Instruction::GetElementPtr: { 2138 const DataLayout &DL = getDataLayout(); 2139 2140 // Generate a symbolic expression for the byte address 2141 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); 2142 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI); 2143 2144 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0), 2145 ProcessingGeneric); 2146 if (!OffsetAI) 2147 return Base; 2148 2149 int64_t Offset = OffsetAI.getSExtValue(); 2150 return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx), 2151 Ctx); 2152 } 2153 2154 case Instruction::Trunc: 2155 // We emit the value and depend on the assembler to truncate the generated 2156 // expression properly. This is important for differences between 2157 // blockaddress labels. Since the two labels are in the same function, it 2158 // is reasonable to treat their delta as a 32-bit value. 2159 LLVM_FALLTHROUGH; 2160 case Instruction::BitCast: 2161 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric); 2162 2163 case Instruction::IntToPtr: { 2164 const DataLayout &DL = getDataLayout(); 2165 2166 // Handle casts to pointers by changing them into casts to the appropriate 2167 // integer type. This promotes constant folding and simplifies this code. 2168 Constant *Op = CE->getOperand(0); 2169 Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()), 2170 false/*ZExt*/); 2171 return lowerConstantForGV(Op, ProcessingGeneric); 2172 } 2173 2174 case Instruction::PtrToInt: { 2175 const DataLayout &DL = getDataLayout(); 2176 2177 // Support only foldable casts to/from pointers that can be eliminated by 2178 // changing the pointer to the appropriately sized integer type. 2179 Constant *Op = CE->getOperand(0); 2180 Type *Ty = CE->getType(); 2181 2182 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric); 2183 2184 // We can emit the pointer value into this slot if the slot is an 2185 // integer slot equal to the size of the pointer. 2186 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType())) 2187 return OpExpr; 2188 2189 // Otherwise the pointer is smaller than the resultant integer, mask off 2190 // the high bits so we are sure to get a proper truncation if the input is 2191 // a constant expr. 2192 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); 2193 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx); 2194 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx); 2195 } 2196 2197 // The MC library also has a right-shift operator, but it isn't consistently 2198 // signed or unsigned between different targets. 2199 case Instruction::Add: { 2200 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric); 2201 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric); 2202 switch (CE->getOpcode()) { 2203 default: llvm_unreachable("Unknown binary operator constant cast expr"); 2204 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx); 2205 } 2206 } 2207 } 2208 } 2209 2210 // Copy of MCExpr::print customized for NVPTX 2211 void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) { 2212 switch (Expr.getKind()) { 2213 case MCExpr::Target: 2214 return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI); 2215 case MCExpr::Constant: 2216 OS << cast<MCConstantExpr>(Expr).getValue(); 2217 return; 2218 2219 case MCExpr::SymbolRef: { 2220 const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr); 2221 const MCSymbol &Sym = SRE.getSymbol(); 2222 Sym.print(OS, MAI); 2223 return; 2224 } 2225 2226 case MCExpr::Unary: { 2227 const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr); 2228 switch (UE.getOpcode()) { 2229 case MCUnaryExpr::LNot: OS << '!'; break; 2230 case MCUnaryExpr::Minus: OS << '-'; break; 2231 case MCUnaryExpr::Not: OS << '~'; break; 2232 case MCUnaryExpr::Plus: OS << '+'; break; 2233 } 2234 printMCExpr(*UE.getSubExpr(), OS); 2235 return; 2236 } 2237 2238 case MCExpr::Binary: { 2239 const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr); 2240 2241 // Only print parens around the LHS if it is non-trivial. 2242 if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) || 2243 isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) { 2244 printMCExpr(*BE.getLHS(), OS); 2245 } else { 2246 OS << '('; 2247 printMCExpr(*BE.getLHS(), OS); 2248 OS<< ')'; 2249 } 2250 2251 switch (BE.getOpcode()) { 2252 case MCBinaryExpr::Add: 2253 // Print "X-42" instead of "X+-42". 2254 if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) { 2255 if (RHSC->getValue() < 0) { 2256 OS << RHSC->getValue(); 2257 return; 2258 } 2259 } 2260 2261 OS << '+'; 2262 break; 2263 default: llvm_unreachable("Unhandled binary operator"); 2264 } 2265 2266 // Only print parens around the LHS if it is non-trivial. 2267 if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) { 2268 printMCExpr(*BE.getRHS(), OS); 2269 } else { 2270 OS << '('; 2271 printMCExpr(*BE.getRHS(), OS); 2272 OS << ')'; 2273 } 2274 return; 2275 } 2276 } 2277 2278 llvm_unreachable("Invalid expression kind!"); 2279 } 2280 2281 /// PrintAsmOperand - Print out an operand for an inline asm expression. 2282 /// 2283 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 2284 unsigned AsmVariant, 2285 const char *ExtraCode, raw_ostream &O) { 2286 if (ExtraCode && ExtraCode[0]) { 2287 if (ExtraCode[1] != 0) 2288 return true; // Unknown modifier. 2289 2290 switch (ExtraCode[0]) { 2291 default: 2292 // See if this is a generic print operand 2293 return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); 2294 case 'r': 2295 break; 2296 } 2297 } 2298 2299 printOperand(MI, OpNo, O); 2300 2301 return false; 2302 } 2303 2304 bool NVPTXAsmPrinter::PrintAsmMemoryOperand( 2305 const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, 2306 const char *ExtraCode, raw_ostream &O) { 2307 if (ExtraCode && ExtraCode[0]) 2308 return true; // Unknown modifier 2309 2310 O << '['; 2311 printMemOperand(MI, OpNo, O); 2312 O << ']'; 2313 2314 return false; 2315 } 2316 2317 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, 2318 raw_ostream &O, const char *Modifier) { 2319 const MachineOperand &MO = MI->getOperand(opNum); 2320 switch (MO.getType()) { 2321 case MachineOperand::MO_Register: 2322 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 2323 if (MO.getReg() == NVPTX::VRDepot) 2324 O << DEPOTNAME << getFunctionNumber(); 2325 else 2326 O << NVPTXInstPrinter::getRegisterName(MO.getReg()); 2327 } else { 2328 emitVirtualRegister(MO.getReg(), O); 2329 } 2330 return; 2331 2332 case MachineOperand::MO_Immediate: 2333 if (!Modifier) 2334 O << MO.getImm(); 2335 else if (strstr(Modifier, "vec") == Modifier) 2336 printVecModifiedImmediate(MO, Modifier, O); 2337 else 2338 llvm_unreachable( 2339 "Don't know how to handle modifier on immediate operand"); 2340 return; 2341 2342 case MachineOperand::MO_FPImmediate: 2343 printFPConstant(MO.getFPImm(), O); 2344 break; 2345 2346 case MachineOperand::MO_GlobalAddress: 2347 getSymbol(MO.getGlobal())->print(O, MAI); 2348 break; 2349 2350 case MachineOperand::MO_MachineBasicBlock: 2351 MO.getMBB()->getSymbol()->print(O, MAI); 2352 return; 2353 2354 default: 2355 llvm_unreachable("Operand type not supported."); 2356 } 2357 } 2358 2359 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, 2360 raw_ostream &O, const char *Modifier) { 2361 printOperand(MI, opNum, O); 2362 2363 if (Modifier && strcmp(Modifier, "add") == 0) { 2364 O << ", "; 2365 printOperand(MI, opNum + 1, O); 2366 } else { 2367 if (MI->getOperand(opNum + 1).isImm() && 2368 MI->getOperand(opNum + 1).getImm() == 0) 2369 return; // don't print ',0' or '+0' 2370 O << "+"; 2371 printOperand(MI, opNum + 1, O); 2372 } 2373 } 2374 2375 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { 2376 std::stringstream temp; 2377 LineReader *reader = this->getReader(filename); 2378 temp << "\n//"; 2379 temp << filename.str(); 2380 temp << ":"; 2381 temp << line; 2382 temp << " "; 2383 temp << reader->readLine(line); 2384 temp << "\n"; 2385 this->OutStreamer->EmitRawText(temp.str()); 2386 } 2387 2388 LineReader *NVPTXAsmPrinter::getReader(const std::string &filename) { 2389 if (!reader) { 2390 reader = new LineReader(filename); 2391 } 2392 2393 if (reader->fileName() != filename) { 2394 delete reader; 2395 reader = new LineReader(filename); 2396 } 2397 2398 return reader; 2399 } 2400 2401 std::string LineReader::readLine(unsigned lineNum) { 2402 if (lineNum < theCurLine) { 2403 theCurLine = 0; 2404 fstr.seekg(0, std::ios::beg); 2405 } 2406 while (theCurLine < lineNum) { 2407 fstr.getline(buff, 500); 2408 theCurLine++; 2409 } 2410 return buff; 2411 } 2412 2413 // Force static initialization. 2414 extern "C" void LLVMInitializeNVPTXAsmPrinter() { 2415 RegisterAsmPrinter<NVPTXAsmPrinter> X(getTheNVPTXTarget32()); 2416 RegisterAsmPrinter<NVPTXAsmPrinter> Y(getTheNVPTXTarget64()); 2417 } 2418