1 //===- MIParser.cpp - Machine instructions parser implementation ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the parsing of machine instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MIParser.h" 15 #include "MILexer.h" 16 #include "llvm/ADT/StringMap.h" 17 #include "llvm/AsmParser/SlotMapping.h" 18 #include "llvm/CodeGen/MachineBasicBlock.h" 19 #include "llvm/CodeGen/MachineFunction.h" 20 #include "llvm/CodeGen/MachineInstr.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/IR/Module.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include "llvm/Support/SourceMgr.h" 25 #include "llvm/Target/TargetSubtargetInfo.h" 26 #include "llvm/Target/TargetInstrInfo.h" 27 28 using namespace llvm; 29 30 namespace { 31 32 /// A wrapper struct around the 'MachineOperand' struct that includes a source 33 /// range. 34 struct MachineOperandWithLocation { 35 MachineOperand Operand; 36 StringRef::iterator Begin; 37 StringRef::iterator End; 38 39 MachineOperandWithLocation(const MachineOperand &Operand, 40 StringRef::iterator Begin, StringRef::iterator End) 41 : Operand(Operand), Begin(Begin), End(End) {} 42 }; 43 44 class MIParser { 45 SourceMgr &SM; 46 MachineFunction &MF; 47 SMDiagnostic &Error; 48 StringRef Source, CurrentSource; 49 MIToken Token; 50 const PerFunctionMIParsingState &PFS; 51 /// Maps from indices to unnamed global values and metadata nodes. 52 const SlotMapping &IRSlots; 53 /// Maps from instruction names to op codes. 54 StringMap<unsigned> Names2InstrOpCodes; 55 /// Maps from register names to registers. 56 StringMap<unsigned> Names2Regs; 57 /// Maps from register mask names to register masks. 58 StringMap<const uint32_t *> Names2RegMasks; 59 /// Maps from subregister names to subregister indices. 60 StringMap<unsigned> Names2SubRegIndices; 61 62 public: 63 MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, 64 StringRef Source, const PerFunctionMIParsingState &PFS, 65 const SlotMapping &IRSlots); 66 67 void lex(); 68 69 /// Report an error at the current location with the given message. 70 /// 71 /// This function always return true. 72 bool error(const Twine &Msg); 73 74 /// Report an error at the given location with the given message. 75 /// 76 /// This function always return true. 77 bool error(StringRef::iterator Loc, const Twine &Msg); 78 79 bool parse(MachineInstr *&MI); 80 bool parseMBB(MachineBasicBlock *&MBB); 81 82 bool parseRegister(unsigned &Reg); 83 bool parseRegisterFlag(unsigned &Flags); 84 bool parseSubRegisterIndex(unsigned &SubReg); 85 bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false); 86 bool parseImmediateOperand(MachineOperand &Dest); 87 bool parseMBBReference(MachineBasicBlock *&MBB); 88 bool parseMBBOperand(MachineOperand &Dest); 89 bool parseGlobalAddressOperand(MachineOperand &Dest); 90 bool parseMachineOperand(MachineOperand &Dest); 91 92 private: 93 /// Convert the integer literal in the current token into an unsigned integer. 94 /// 95 /// Return true if an error occurred. 96 bool getUnsigned(unsigned &Result); 97 98 void initNames2InstrOpCodes(); 99 100 /// Try to convert an instruction name to an opcode. Return true if the 101 /// instruction name is invalid. 102 bool parseInstrName(StringRef InstrName, unsigned &OpCode); 103 104 bool parseInstruction(unsigned &OpCode); 105 106 bool verifyImplicitOperands(ArrayRef<MachineOperandWithLocation> Operands, 107 const MCInstrDesc &MCID); 108 109 void initNames2Regs(); 110 111 /// Try to convert a register name to a register number. Return true if the 112 /// register name is invalid. 113 bool getRegisterByName(StringRef RegName, unsigned &Reg); 114 115 void initNames2RegMasks(); 116 117 /// Check if the given identifier is a name of a register mask. 118 /// 119 /// Return null if the identifier isn't a register mask. 120 const uint32_t *getRegMask(StringRef Identifier); 121 122 void initNames2SubRegIndices(); 123 124 /// Check if the given identifier is a name of a subregister index. 125 /// 126 /// Return 0 if the name isn't a subregister index class. 127 unsigned getSubRegIndex(StringRef Name); 128 }; 129 130 } // end anonymous namespace 131 132 MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, 133 StringRef Source, const PerFunctionMIParsingState &PFS, 134 const SlotMapping &IRSlots) 135 : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source), 136 Token(MIToken::Error, StringRef()), PFS(PFS), IRSlots(IRSlots) {} 137 138 void MIParser::lex() { 139 CurrentSource = lexMIToken( 140 CurrentSource, Token, 141 [this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); }); 142 } 143 144 bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); } 145 146 bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { 147 assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size())); 148 Error = SMDiagnostic( 149 SM, SMLoc(), 150 SM.getMemoryBuffer(SM.getMainFileID())->getBufferIdentifier(), 1, 151 Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), Source, None, None); 152 return true; 153 } 154 155 bool MIParser::parse(MachineInstr *&MI) { 156 lex(); 157 158 // Parse any register operands before '=' 159 // TODO: Allow parsing of multiple operands before '=' 160 MachineOperand MO = MachineOperand::CreateImm(0); 161 SmallVector<MachineOperandWithLocation, 8> Operands; 162 if (Token.isRegister() || Token.isRegisterFlag()) { 163 auto Loc = Token.location(); 164 if (parseRegisterOperand(MO, /*IsDef=*/true)) 165 return true; 166 Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location())); 167 if (Token.isNot(MIToken::equal)) 168 return error("expected '='"); 169 lex(); 170 } 171 172 unsigned OpCode; 173 if (Token.isError() || parseInstruction(OpCode)) 174 return true; 175 176 // TODO: Parse the instruction flags and memory operands. 177 178 // Parse the remaining machine operands. 179 while (Token.isNot(MIToken::Eof)) { 180 auto Loc = Token.location(); 181 if (parseMachineOperand(MO)) 182 return true; 183 Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location())); 184 if (Token.is(MIToken::Eof)) 185 break; 186 if (Token.isNot(MIToken::comma)) 187 return error("expected ',' before the next machine operand"); 188 lex(); 189 } 190 191 const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode); 192 if (!MCID.isVariadic()) { 193 // FIXME: Move the implicit operand verification to the machine verifier. 194 if (verifyImplicitOperands(Operands, MCID)) 195 return true; 196 } 197 198 // TODO: Check for extraneous machine operands. 199 MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true); 200 for (const auto &Operand : Operands) 201 MI->addOperand(MF, Operand.Operand); 202 return false; 203 } 204 205 bool MIParser::parseMBB(MachineBasicBlock *&MBB) { 206 lex(); 207 if (Token.isNot(MIToken::MachineBasicBlock)) 208 return error("expected a machine basic block reference"); 209 if (parseMBBReference(MBB)) 210 return true; 211 lex(); 212 if (Token.isNot(MIToken::Eof)) 213 return error( 214 "expected end of string after the machine basic block reference"); 215 return false; 216 } 217 218 static const char *printImplicitRegisterFlag(const MachineOperand &MO) { 219 assert(MO.isImplicit()); 220 return MO.isDef() ? "implicit-def" : "implicit"; 221 } 222 223 static std::string getRegisterName(const TargetRegisterInfo *TRI, 224 unsigned Reg) { 225 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "expected phys reg"); 226 return StringRef(TRI->getName(Reg)).lower(); 227 } 228 229 bool MIParser::verifyImplicitOperands( 230 ArrayRef<MachineOperandWithLocation> Operands, const MCInstrDesc &MCID) { 231 if (MCID.isCall()) 232 // We can't verify call instructions as they can contain arbitrary implicit 233 // register and register mask operands. 234 return false; 235 236 // Gather all the expected implicit operands. 237 SmallVector<MachineOperand, 4> ImplicitOperands; 238 if (MCID.ImplicitDefs) 239 for (const uint16_t *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs) 240 ImplicitOperands.push_back( 241 MachineOperand::CreateReg(*ImpDefs, true, true)); 242 if (MCID.ImplicitUses) 243 for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) 244 ImplicitOperands.push_back( 245 MachineOperand::CreateReg(*ImpUses, false, true)); 246 247 const auto *TRI = MF.getSubtarget().getRegisterInfo(); 248 assert(TRI && "Expected target register info"); 249 size_t I = ImplicitOperands.size(), J = Operands.size(); 250 while (I) { 251 --I; 252 if (J) { 253 --J; 254 const auto &ImplicitOperand = ImplicitOperands[I]; 255 const auto &Operand = Operands[J].Operand; 256 if (ImplicitOperand.isIdenticalTo(Operand)) 257 continue; 258 if (Operand.isReg() && Operand.isImplicit()) { 259 return error(Operands[J].Begin, 260 Twine("expected an implicit register operand '") + 261 printImplicitRegisterFlag(ImplicitOperand) + " %" + 262 getRegisterName(TRI, ImplicitOperand.getReg()) + "'"); 263 } 264 } 265 // TODO: Fix source location when Operands[J].end is right before '=', i.e: 266 // insead of reporting an error at this location: 267 // %eax = MOV32r0 268 // ^ 269 // report the error at the following location: 270 // %eax = MOV32r0 271 // ^ 272 return error(J < Operands.size() ? Operands[J].End : Token.location(), 273 Twine("missing implicit register operand '") + 274 printImplicitRegisterFlag(ImplicitOperands[I]) + " %" + 275 getRegisterName(TRI, ImplicitOperands[I].getReg()) + "'"); 276 } 277 return false; 278 } 279 280 bool MIParser::parseInstruction(unsigned &OpCode) { 281 if (Token.isNot(MIToken::Identifier)) 282 return error("expected a machine instruction"); 283 StringRef InstrName = Token.stringValue(); 284 if (parseInstrName(InstrName, OpCode)) 285 return error(Twine("unknown machine instruction name '") + InstrName + "'"); 286 lex(); 287 return false; 288 } 289 290 bool MIParser::parseRegister(unsigned &Reg) { 291 switch (Token.kind()) { 292 case MIToken::underscore: 293 Reg = 0; 294 break; 295 case MIToken::NamedRegister: { 296 StringRef Name = Token.stringValue(); 297 if (getRegisterByName(Name, Reg)) 298 return error(Twine("unknown register name '") + Name + "'"); 299 break; 300 } 301 case MIToken::VirtualRegister: { 302 unsigned ID; 303 if (getUnsigned(ID)) 304 return true; 305 const auto RegInfo = PFS.VirtualRegisterSlots.find(ID); 306 if (RegInfo == PFS.VirtualRegisterSlots.end()) 307 return error(Twine("use of undefined virtual register '%") + Twine(ID) + 308 "'"); 309 Reg = RegInfo->second; 310 break; 311 } 312 // TODO: Parse other register kinds. 313 default: 314 llvm_unreachable("The current token should be a register"); 315 } 316 return false; 317 } 318 319 bool MIParser::parseRegisterFlag(unsigned &Flags) { 320 switch (Token.kind()) { 321 case MIToken::kw_implicit: 322 Flags |= RegState::Implicit; 323 break; 324 case MIToken::kw_implicit_define: 325 Flags |= RegState::ImplicitDefine; 326 break; 327 case MIToken::kw_dead: 328 Flags |= RegState::Dead; 329 break; 330 case MIToken::kw_killed: 331 Flags |= RegState::Kill; 332 break; 333 case MIToken::kw_undef: 334 Flags |= RegState::Undef; 335 break; 336 // TODO: report an error when we specify the same flag more than once. 337 // TODO: parse the other register flags. 338 default: 339 llvm_unreachable("The current token should be a register flag"); 340 } 341 lex(); 342 return false; 343 } 344 345 bool MIParser::parseSubRegisterIndex(unsigned &SubReg) { 346 assert(Token.is(MIToken::colon)); 347 lex(); 348 if (Token.isNot(MIToken::Identifier)) 349 return error("expected a subregister index after ':'"); 350 auto Name = Token.stringValue(); 351 SubReg = getSubRegIndex(Name); 352 if (!SubReg) 353 return error(Twine("use of unknown subregister index '") + Name + "'"); 354 lex(); 355 return false; 356 } 357 358 bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) { 359 unsigned Reg; 360 unsigned Flags = IsDef ? RegState::Define : 0; 361 while (Token.isRegisterFlag()) { 362 if (parseRegisterFlag(Flags)) 363 return true; 364 } 365 if (!Token.isRegister()) 366 return error("expected a register after register flags"); 367 if (parseRegister(Reg)) 368 return true; 369 lex(); 370 unsigned SubReg = 0; 371 if (Token.is(MIToken::colon)) { 372 if (parseSubRegisterIndex(SubReg)) 373 return true; 374 } 375 Dest = MachineOperand::CreateReg( 376 Reg, Flags & RegState::Define, Flags & RegState::Implicit, 377 Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef, 378 /*isEarlyClobber=*/false, SubReg); 379 return false; 380 } 381 382 bool MIParser::parseImmediateOperand(MachineOperand &Dest) { 383 assert(Token.is(MIToken::IntegerLiteral)); 384 const APSInt &Int = Token.integerValue(); 385 if (Int.getMinSignedBits() > 64) 386 // TODO: Replace this with an error when we can parse CIMM Machine Operands. 387 llvm_unreachable("Can't parse large integer literals yet!"); 388 Dest = MachineOperand::CreateImm(Int.getExtValue()); 389 lex(); 390 return false; 391 } 392 393 bool MIParser::getUnsigned(unsigned &Result) { 394 assert(Token.hasIntegerValue() && "Expected a token with an integer value"); 395 const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1; 396 uint64_t Val64 = Token.integerValue().getLimitedValue(Limit); 397 if (Val64 == Limit) 398 return error("expected 32-bit integer (too large)"); 399 Result = Val64; 400 return false; 401 } 402 403 bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) { 404 assert(Token.is(MIToken::MachineBasicBlock)); 405 unsigned Number; 406 if (getUnsigned(Number)) 407 return true; 408 auto MBBInfo = PFS.MBBSlots.find(Number); 409 if (MBBInfo == PFS.MBBSlots.end()) 410 return error(Twine("use of undefined machine basic block #") + 411 Twine(Number)); 412 MBB = MBBInfo->second; 413 if (!Token.stringValue().empty() && Token.stringValue() != MBB->getName()) 414 return error(Twine("the name of machine basic block #") + Twine(Number) + 415 " isn't '" + Token.stringValue() + "'"); 416 return false; 417 } 418 419 bool MIParser::parseMBBOperand(MachineOperand &Dest) { 420 MachineBasicBlock *MBB; 421 if (parseMBBReference(MBB)) 422 return true; 423 Dest = MachineOperand::CreateMBB(MBB); 424 lex(); 425 return false; 426 } 427 428 bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) { 429 switch (Token.kind()) { 430 case MIToken::NamedGlobalValue: { 431 auto Name = Token.stringValue(); 432 const Module *M = MF.getFunction()->getParent(); 433 if (const auto *GV = M->getNamedValue(Name)) { 434 Dest = MachineOperand::CreateGA(GV, /*Offset=*/0); 435 break; 436 } 437 return error(Twine("use of undefined global value '@") + Name + "'"); 438 } 439 case MIToken::GlobalValue: { 440 unsigned GVIdx; 441 if (getUnsigned(GVIdx)) 442 return true; 443 if (GVIdx >= IRSlots.GlobalValues.size()) 444 return error(Twine("use of undefined global value '@") + Twine(GVIdx) + 445 "'"); 446 Dest = MachineOperand::CreateGA(IRSlots.GlobalValues[GVIdx], 447 /*Offset=*/0); 448 break; 449 } 450 default: 451 llvm_unreachable("The current token should be a global value"); 452 } 453 // TODO: Parse offset and target flags. 454 lex(); 455 return false; 456 } 457 458 bool MIParser::parseMachineOperand(MachineOperand &Dest) { 459 switch (Token.kind()) { 460 case MIToken::kw_implicit: 461 case MIToken::kw_implicit_define: 462 case MIToken::kw_dead: 463 case MIToken::kw_killed: 464 case MIToken::kw_undef: 465 case MIToken::underscore: 466 case MIToken::NamedRegister: 467 case MIToken::VirtualRegister: 468 return parseRegisterOperand(Dest); 469 case MIToken::IntegerLiteral: 470 return parseImmediateOperand(Dest); 471 case MIToken::MachineBasicBlock: 472 return parseMBBOperand(Dest); 473 case MIToken::GlobalValue: 474 case MIToken::NamedGlobalValue: 475 return parseGlobalAddressOperand(Dest); 476 case MIToken::Error: 477 return true; 478 case MIToken::Identifier: 479 if (const auto *RegMask = getRegMask(Token.stringValue())) { 480 Dest = MachineOperand::CreateRegMask(RegMask); 481 lex(); 482 break; 483 } 484 // fallthrough 485 default: 486 // TODO: parse the other machine operands. 487 return error("expected a machine operand"); 488 } 489 return false; 490 } 491 492 void MIParser::initNames2InstrOpCodes() { 493 if (!Names2InstrOpCodes.empty()) 494 return; 495 const auto *TII = MF.getSubtarget().getInstrInfo(); 496 assert(TII && "Expected target instruction info"); 497 for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I) 498 Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I)); 499 } 500 501 bool MIParser::parseInstrName(StringRef InstrName, unsigned &OpCode) { 502 initNames2InstrOpCodes(); 503 auto InstrInfo = Names2InstrOpCodes.find(InstrName); 504 if (InstrInfo == Names2InstrOpCodes.end()) 505 return true; 506 OpCode = InstrInfo->getValue(); 507 return false; 508 } 509 510 void MIParser::initNames2Regs() { 511 if (!Names2Regs.empty()) 512 return; 513 // The '%noreg' register is the register 0. 514 Names2Regs.insert(std::make_pair("noreg", 0)); 515 const auto *TRI = MF.getSubtarget().getRegisterInfo(); 516 assert(TRI && "Expected target register info"); 517 for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) { 518 bool WasInserted = 519 Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I)) 520 .second; 521 (void)WasInserted; 522 assert(WasInserted && "Expected registers to be unique case-insensitively"); 523 } 524 } 525 526 bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) { 527 initNames2Regs(); 528 auto RegInfo = Names2Regs.find(RegName); 529 if (RegInfo == Names2Regs.end()) 530 return true; 531 Reg = RegInfo->getValue(); 532 return false; 533 } 534 535 void MIParser::initNames2RegMasks() { 536 if (!Names2RegMasks.empty()) 537 return; 538 const auto *TRI = MF.getSubtarget().getRegisterInfo(); 539 assert(TRI && "Expected target register info"); 540 ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks(); 541 ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames(); 542 assert(RegMasks.size() == RegMaskNames.size()); 543 for (size_t I = 0, E = RegMasks.size(); I < E; ++I) 544 Names2RegMasks.insert( 545 std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I])); 546 } 547 548 const uint32_t *MIParser::getRegMask(StringRef Identifier) { 549 initNames2RegMasks(); 550 auto RegMaskInfo = Names2RegMasks.find(Identifier); 551 if (RegMaskInfo == Names2RegMasks.end()) 552 return nullptr; 553 return RegMaskInfo->getValue(); 554 } 555 556 void MIParser::initNames2SubRegIndices() { 557 if (!Names2SubRegIndices.empty()) 558 return; 559 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 560 for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I) 561 Names2SubRegIndices.insert( 562 std::make_pair(StringRef(TRI->getSubRegIndexName(I)).lower(), I)); 563 } 564 565 unsigned MIParser::getSubRegIndex(StringRef Name) { 566 initNames2SubRegIndices(); 567 auto SubRegInfo = Names2SubRegIndices.find(Name); 568 if (SubRegInfo == Names2SubRegIndices.end()) 569 return 0; 570 return SubRegInfo->getValue(); 571 } 572 573 bool llvm::parseMachineInstr(MachineInstr *&MI, SourceMgr &SM, 574 MachineFunction &MF, StringRef Src, 575 const PerFunctionMIParsingState &PFS, 576 const SlotMapping &IRSlots, SMDiagnostic &Error) { 577 return MIParser(SM, MF, Error, Src, PFS, IRSlots).parse(MI); 578 } 579 580 bool llvm::parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM, 581 MachineFunction &MF, StringRef Src, 582 const PerFunctionMIParsingState &PFS, 583 const SlotMapping &IRSlots, SMDiagnostic &Error) { 584 return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseMBB(MBB); 585 } 586