1 //===- MIParser.cpp - Machine instructions parser implementation ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the parsing of machine instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MIParser.h" 15 #include "MILexer.h" 16 #include "llvm/ADT/StringMap.h" 17 #include "llvm/AsmParser/SlotMapping.h" 18 #include "llvm/CodeGen/MachineBasicBlock.h" 19 #include "llvm/CodeGen/MachineFunction.h" 20 #include "llvm/CodeGen/MachineInstr.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/IR/Module.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include "llvm/Support/SourceMgr.h" 25 #include "llvm/Target/TargetSubtargetInfo.h" 26 #include "llvm/Target/TargetInstrInfo.h" 27 28 using namespace llvm; 29 30 namespace { 31 32 /// A wrapper struct around the 'MachineOperand' struct that includes a source 33 /// range. 34 struct MachineOperandWithLocation { 35 MachineOperand Operand; 36 StringRef::iterator Begin; 37 StringRef::iterator End; 38 39 MachineOperandWithLocation(const MachineOperand &Operand, 40 StringRef::iterator Begin, StringRef::iterator End) 41 : Operand(Operand), Begin(Begin), End(End) {} 42 }; 43 44 class MIParser { 45 SourceMgr &SM; 46 MachineFunction &MF; 47 SMDiagnostic &Error; 48 StringRef Source, CurrentSource; 49 MIToken Token; 50 const PerFunctionMIParsingState &PFS; 51 /// Maps from indices to unnamed global values and metadata nodes. 52 const SlotMapping &IRSlots; 53 /// Maps from instruction names to op codes. 54 StringMap<unsigned> Names2InstrOpCodes; 55 /// Maps from register names to registers. 56 StringMap<unsigned> Names2Regs; 57 /// Maps from register mask names to register masks. 58 StringMap<const uint32_t *> Names2RegMasks; 59 /// Maps from subregister names to subregister indices. 60 StringMap<unsigned> Names2SubRegIndices; 61 62 public: 63 MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, 64 StringRef Source, const PerFunctionMIParsingState &PFS, 65 const SlotMapping &IRSlots); 66 67 void lex(); 68 69 /// Report an error at the current location with the given message. 70 /// 71 /// This function always return true. 72 bool error(const Twine &Msg); 73 74 /// Report an error at the given location with the given message. 75 /// 76 /// This function always return true. 77 bool error(StringRef::iterator Loc, const Twine &Msg); 78 79 bool parse(MachineInstr *&MI); 80 bool parseMBB(MachineBasicBlock *&MBB); 81 bool parseNamedRegister(unsigned &Reg); 82 83 bool parseRegister(unsigned &Reg); 84 bool parseRegisterFlag(unsigned &Flags); 85 bool parseSubRegisterIndex(unsigned &SubReg); 86 bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false); 87 bool parseImmediateOperand(MachineOperand &Dest); 88 bool parseMBBReference(MachineBasicBlock *&MBB); 89 bool parseMBBOperand(MachineOperand &Dest); 90 bool parseGlobalAddressOperand(MachineOperand &Dest); 91 bool parseMachineOperand(MachineOperand &Dest); 92 93 private: 94 /// Convert the integer literal in the current token into an unsigned integer. 95 /// 96 /// Return true if an error occurred. 97 bool getUnsigned(unsigned &Result); 98 99 void initNames2InstrOpCodes(); 100 101 /// Try to convert an instruction name to an opcode. Return true if the 102 /// instruction name is invalid. 103 bool parseInstrName(StringRef InstrName, unsigned &OpCode); 104 105 bool parseInstruction(unsigned &OpCode); 106 107 bool verifyImplicitOperands(ArrayRef<MachineOperandWithLocation> Operands, 108 const MCInstrDesc &MCID); 109 110 void initNames2Regs(); 111 112 /// Try to convert a register name to a register number. Return true if the 113 /// register name is invalid. 114 bool getRegisterByName(StringRef RegName, unsigned &Reg); 115 116 void initNames2RegMasks(); 117 118 /// Check if the given identifier is a name of a register mask. 119 /// 120 /// Return null if the identifier isn't a register mask. 121 const uint32_t *getRegMask(StringRef Identifier); 122 123 void initNames2SubRegIndices(); 124 125 /// Check if the given identifier is a name of a subregister index. 126 /// 127 /// Return 0 if the name isn't a subregister index class. 128 unsigned getSubRegIndex(StringRef Name); 129 }; 130 131 } // end anonymous namespace 132 133 MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, 134 StringRef Source, const PerFunctionMIParsingState &PFS, 135 const SlotMapping &IRSlots) 136 : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source), 137 Token(MIToken::Error, StringRef()), PFS(PFS), IRSlots(IRSlots) {} 138 139 void MIParser::lex() { 140 CurrentSource = lexMIToken( 141 CurrentSource, Token, 142 [this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); }); 143 } 144 145 bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); } 146 147 bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { 148 assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size())); 149 Error = SMDiagnostic( 150 SM, SMLoc(), 151 SM.getMemoryBuffer(SM.getMainFileID())->getBufferIdentifier(), 1, 152 Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), Source, None, None); 153 return true; 154 } 155 156 bool MIParser::parse(MachineInstr *&MI) { 157 lex(); 158 159 // Parse any register operands before '=' 160 // TODO: Allow parsing of multiple operands before '=' 161 MachineOperand MO = MachineOperand::CreateImm(0); 162 SmallVector<MachineOperandWithLocation, 8> Operands; 163 if (Token.isRegister() || Token.isRegisterFlag()) { 164 auto Loc = Token.location(); 165 if (parseRegisterOperand(MO, /*IsDef=*/true)) 166 return true; 167 Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location())); 168 if (Token.isNot(MIToken::equal)) 169 return error("expected '='"); 170 lex(); 171 } 172 173 unsigned OpCode; 174 if (Token.isError() || parseInstruction(OpCode)) 175 return true; 176 177 // TODO: Parse the instruction flags and memory operands. 178 179 // Parse the remaining machine operands. 180 while (Token.isNot(MIToken::Eof)) { 181 auto Loc = Token.location(); 182 if (parseMachineOperand(MO)) 183 return true; 184 Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location())); 185 if (Token.is(MIToken::Eof)) 186 break; 187 if (Token.isNot(MIToken::comma)) 188 return error("expected ',' before the next machine operand"); 189 lex(); 190 } 191 192 const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode); 193 if (!MCID.isVariadic()) { 194 // FIXME: Move the implicit operand verification to the machine verifier. 195 if (verifyImplicitOperands(Operands, MCID)) 196 return true; 197 } 198 199 // TODO: Check for extraneous machine operands. 200 MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true); 201 for (const auto &Operand : Operands) 202 MI->addOperand(MF, Operand.Operand); 203 return false; 204 } 205 206 bool MIParser::parseMBB(MachineBasicBlock *&MBB) { 207 lex(); 208 if (Token.isNot(MIToken::MachineBasicBlock)) 209 return error("expected a machine basic block reference"); 210 if (parseMBBReference(MBB)) 211 return true; 212 lex(); 213 if (Token.isNot(MIToken::Eof)) 214 return error( 215 "expected end of string after the machine basic block reference"); 216 return false; 217 } 218 219 bool MIParser::parseNamedRegister(unsigned &Reg) { 220 lex(); 221 if (Token.isNot(MIToken::NamedRegister)) 222 return error("expected a named register"); 223 if (parseRegister(Reg)) 224 return 0; 225 lex(); 226 if (Token.isNot(MIToken::Eof)) 227 return error("expected end of string after the register reference"); 228 return false; 229 } 230 231 static const char *printImplicitRegisterFlag(const MachineOperand &MO) { 232 assert(MO.isImplicit()); 233 return MO.isDef() ? "implicit-def" : "implicit"; 234 } 235 236 static std::string getRegisterName(const TargetRegisterInfo *TRI, 237 unsigned Reg) { 238 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "expected phys reg"); 239 return StringRef(TRI->getName(Reg)).lower(); 240 } 241 242 bool MIParser::verifyImplicitOperands( 243 ArrayRef<MachineOperandWithLocation> Operands, const MCInstrDesc &MCID) { 244 if (MCID.isCall()) 245 // We can't verify call instructions as they can contain arbitrary implicit 246 // register and register mask operands. 247 return false; 248 249 // Gather all the expected implicit operands. 250 SmallVector<MachineOperand, 4> ImplicitOperands; 251 if (MCID.ImplicitDefs) 252 for (const uint16_t *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs) 253 ImplicitOperands.push_back( 254 MachineOperand::CreateReg(*ImpDefs, true, true)); 255 if (MCID.ImplicitUses) 256 for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) 257 ImplicitOperands.push_back( 258 MachineOperand::CreateReg(*ImpUses, false, true)); 259 260 const auto *TRI = MF.getSubtarget().getRegisterInfo(); 261 assert(TRI && "Expected target register info"); 262 size_t I = ImplicitOperands.size(), J = Operands.size(); 263 while (I) { 264 --I; 265 if (J) { 266 --J; 267 const auto &ImplicitOperand = ImplicitOperands[I]; 268 const auto &Operand = Operands[J].Operand; 269 if (ImplicitOperand.isIdenticalTo(Operand)) 270 continue; 271 if (Operand.isReg() && Operand.isImplicit()) { 272 return error(Operands[J].Begin, 273 Twine("expected an implicit register operand '") + 274 printImplicitRegisterFlag(ImplicitOperand) + " %" + 275 getRegisterName(TRI, ImplicitOperand.getReg()) + "'"); 276 } 277 } 278 // TODO: Fix source location when Operands[J].end is right before '=', i.e: 279 // insead of reporting an error at this location: 280 // %eax = MOV32r0 281 // ^ 282 // report the error at the following location: 283 // %eax = MOV32r0 284 // ^ 285 return error(J < Operands.size() ? Operands[J].End : Token.location(), 286 Twine("missing implicit register operand '") + 287 printImplicitRegisterFlag(ImplicitOperands[I]) + " %" + 288 getRegisterName(TRI, ImplicitOperands[I].getReg()) + "'"); 289 } 290 return false; 291 } 292 293 bool MIParser::parseInstruction(unsigned &OpCode) { 294 if (Token.isNot(MIToken::Identifier)) 295 return error("expected a machine instruction"); 296 StringRef InstrName = Token.stringValue(); 297 if (parseInstrName(InstrName, OpCode)) 298 return error(Twine("unknown machine instruction name '") + InstrName + "'"); 299 lex(); 300 return false; 301 } 302 303 bool MIParser::parseRegister(unsigned &Reg) { 304 switch (Token.kind()) { 305 case MIToken::underscore: 306 Reg = 0; 307 break; 308 case MIToken::NamedRegister: { 309 StringRef Name = Token.stringValue(); 310 if (getRegisterByName(Name, Reg)) 311 return error(Twine("unknown register name '") + Name + "'"); 312 break; 313 } 314 case MIToken::VirtualRegister: { 315 unsigned ID; 316 if (getUnsigned(ID)) 317 return true; 318 const auto RegInfo = PFS.VirtualRegisterSlots.find(ID); 319 if (RegInfo == PFS.VirtualRegisterSlots.end()) 320 return error(Twine("use of undefined virtual register '%") + Twine(ID) + 321 "'"); 322 Reg = RegInfo->second; 323 break; 324 } 325 // TODO: Parse other register kinds. 326 default: 327 llvm_unreachable("The current token should be a register"); 328 } 329 return false; 330 } 331 332 bool MIParser::parseRegisterFlag(unsigned &Flags) { 333 switch (Token.kind()) { 334 case MIToken::kw_implicit: 335 Flags |= RegState::Implicit; 336 break; 337 case MIToken::kw_implicit_define: 338 Flags |= RegState::ImplicitDefine; 339 break; 340 case MIToken::kw_dead: 341 Flags |= RegState::Dead; 342 break; 343 case MIToken::kw_killed: 344 Flags |= RegState::Kill; 345 break; 346 case MIToken::kw_undef: 347 Flags |= RegState::Undef; 348 break; 349 // TODO: report an error when we specify the same flag more than once. 350 // TODO: parse the other register flags. 351 default: 352 llvm_unreachable("The current token should be a register flag"); 353 } 354 lex(); 355 return false; 356 } 357 358 bool MIParser::parseSubRegisterIndex(unsigned &SubReg) { 359 assert(Token.is(MIToken::colon)); 360 lex(); 361 if (Token.isNot(MIToken::Identifier)) 362 return error("expected a subregister index after ':'"); 363 auto Name = Token.stringValue(); 364 SubReg = getSubRegIndex(Name); 365 if (!SubReg) 366 return error(Twine("use of unknown subregister index '") + Name + "'"); 367 lex(); 368 return false; 369 } 370 371 bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) { 372 unsigned Reg; 373 unsigned Flags = IsDef ? RegState::Define : 0; 374 while (Token.isRegisterFlag()) { 375 if (parseRegisterFlag(Flags)) 376 return true; 377 } 378 if (!Token.isRegister()) 379 return error("expected a register after register flags"); 380 if (parseRegister(Reg)) 381 return true; 382 lex(); 383 unsigned SubReg = 0; 384 if (Token.is(MIToken::colon)) { 385 if (parseSubRegisterIndex(SubReg)) 386 return true; 387 } 388 Dest = MachineOperand::CreateReg( 389 Reg, Flags & RegState::Define, Flags & RegState::Implicit, 390 Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef, 391 /*isEarlyClobber=*/false, SubReg); 392 return false; 393 } 394 395 bool MIParser::parseImmediateOperand(MachineOperand &Dest) { 396 assert(Token.is(MIToken::IntegerLiteral)); 397 const APSInt &Int = Token.integerValue(); 398 if (Int.getMinSignedBits() > 64) 399 // TODO: Replace this with an error when we can parse CIMM Machine Operands. 400 llvm_unreachable("Can't parse large integer literals yet!"); 401 Dest = MachineOperand::CreateImm(Int.getExtValue()); 402 lex(); 403 return false; 404 } 405 406 bool MIParser::getUnsigned(unsigned &Result) { 407 assert(Token.hasIntegerValue() && "Expected a token with an integer value"); 408 const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1; 409 uint64_t Val64 = Token.integerValue().getLimitedValue(Limit); 410 if (Val64 == Limit) 411 return error("expected 32-bit integer (too large)"); 412 Result = Val64; 413 return false; 414 } 415 416 bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) { 417 assert(Token.is(MIToken::MachineBasicBlock)); 418 unsigned Number; 419 if (getUnsigned(Number)) 420 return true; 421 auto MBBInfo = PFS.MBBSlots.find(Number); 422 if (MBBInfo == PFS.MBBSlots.end()) 423 return error(Twine("use of undefined machine basic block #") + 424 Twine(Number)); 425 MBB = MBBInfo->second; 426 if (!Token.stringValue().empty() && Token.stringValue() != MBB->getName()) 427 return error(Twine("the name of machine basic block #") + Twine(Number) + 428 " isn't '" + Token.stringValue() + "'"); 429 return false; 430 } 431 432 bool MIParser::parseMBBOperand(MachineOperand &Dest) { 433 MachineBasicBlock *MBB; 434 if (parseMBBReference(MBB)) 435 return true; 436 Dest = MachineOperand::CreateMBB(MBB); 437 lex(); 438 return false; 439 } 440 441 bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) { 442 switch (Token.kind()) { 443 case MIToken::NamedGlobalValue: { 444 auto Name = Token.stringValue(); 445 const Module *M = MF.getFunction()->getParent(); 446 if (const auto *GV = M->getNamedValue(Name)) { 447 Dest = MachineOperand::CreateGA(GV, /*Offset=*/0); 448 break; 449 } 450 return error(Twine("use of undefined global value '@") + Name + "'"); 451 } 452 case MIToken::GlobalValue: { 453 unsigned GVIdx; 454 if (getUnsigned(GVIdx)) 455 return true; 456 if (GVIdx >= IRSlots.GlobalValues.size()) 457 return error(Twine("use of undefined global value '@") + Twine(GVIdx) + 458 "'"); 459 Dest = MachineOperand::CreateGA(IRSlots.GlobalValues[GVIdx], 460 /*Offset=*/0); 461 break; 462 } 463 default: 464 llvm_unreachable("The current token should be a global value"); 465 } 466 // TODO: Parse offset and target flags. 467 lex(); 468 return false; 469 } 470 471 bool MIParser::parseMachineOperand(MachineOperand &Dest) { 472 switch (Token.kind()) { 473 case MIToken::kw_implicit: 474 case MIToken::kw_implicit_define: 475 case MIToken::kw_dead: 476 case MIToken::kw_killed: 477 case MIToken::kw_undef: 478 case MIToken::underscore: 479 case MIToken::NamedRegister: 480 case MIToken::VirtualRegister: 481 return parseRegisterOperand(Dest); 482 case MIToken::IntegerLiteral: 483 return parseImmediateOperand(Dest); 484 case MIToken::MachineBasicBlock: 485 return parseMBBOperand(Dest); 486 case MIToken::GlobalValue: 487 case MIToken::NamedGlobalValue: 488 return parseGlobalAddressOperand(Dest); 489 case MIToken::Error: 490 return true; 491 case MIToken::Identifier: 492 if (const auto *RegMask = getRegMask(Token.stringValue())) { 493 Dest = MachineOperand::CreateRegMask(RegMask); 494 lex(); 495 break; 496 } 497 // fallthrough 498 default: 499 // TODO: parse the other machine operands. 500 return error("expected a machine operand"); 501 } 502 return false; 503 } 504 505 void MIParser::initNames2InstrOpCodes() { 506 if (!Names2InstrOpCodes.empty()) 507 return; 508 const auto *TII = MF.getSubtarget().getInstrInfo(); 509 assert(TII && "Expected target instruction info"); 510 for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I) 511 Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I)); 512 } 513 514 bool MIParser::parseInstrName(StringRef InstrName, unsigned &OpCode) { 515 initNames2InstrOpCodes(); 516 auto InstrInfo = Names2InstrOpCodes.find(InstrName); 517 if (InstrInfo == Names2InstrOpCodes.end()) 518 return true; 519 OpCode = InstrInfo->getValue(); 520 return false; 521 } 522 523 void MIParser::initNames2Regs() { 524 if (!Names2Regs.empty()) 525 return; 526 // The '%noreg' register is the register 0. 527 Names2Regs.insert(std::make_pair("noreg", 0)); 528 const auto *TRI = MF.getSubtarget().getRegisterInfo(); 529 assert(TRI && "Expected target register info"); 530 for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) { 531 bool WasInserted = 532 Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I)) 533 .second; 534 (void)WasInserted; 535 assert(WasInserted && "Expected registers to be unique case-insensitively"); 536 } 537 } 538 539 bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) { 540 initNames2Regs(); 541 auto RegInfo = Names2Regs.find(RegName); 542 if (RegInfo == Names2Regs.end()) 543 return true; 544 Reg = RegInfo->getValue(); 545 return false; 546 } 547 548 void MIParser::initNames2RegMasks() { 549 if (!Names2RegMasks.empty()) 550 return; 551 const auto *TRI = MF.getSubtarget().getRegisterInfo(); 552 assert(TRI && "Expected target register info"); 553 ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks(); 554 ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames(); 555 assert(RegMasks.size() == RegMaskNames.size()); 556 for (size_t I = 0, E = RegMasks.size(); I < E; ++I) 557 Names2RegMasks.insert( 558 std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I])); 559 } 560 561 const uint32_t *MIParser::getRegMask(StringRef Identifier) { 562 initNames2RegMasks(); 563 auto RegMaskInfo = Names2RegMasks.find(Identifier); 564 if (RegMaskInfo == Names2RegMasks.end()) 565 return nullptr; 566 return RegMaskInfo->getValue(); 567 } 568 569 void MIParser::initNames2SubRegIndices() { 570 if (!Names2SubRegIndices.empty()) 571 return; 572 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 573 for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I) 574 Names2SubRegIndices.insert( 575 std::make_pair(StringRef(TRI->getSubRegIndexName(I)).lower(), I)); 576 } 577 578 unsigned MIParser::getSubRegIndex(StringRef Name) { 579 initNames2SubRegIndices(); 580 auto SubRegInfo = Names2SubRegIndices.find(Name); 581 if (SubRegInfo == Names2SubRegIndices.end()) 582 return 0; 583 return SubRegInfo->getValue(); 584 } 585 586 bool llvm::parseMachineInstr(MachineInstr *&MI, SourceMgr &SM, 587 MachineFunction &MF, StringRef Src, 588 const PerFunctionMIParsingState &PFS, 589 const SlotMapping &IRSlots, SMDiagnostic &Error) { 590 return MIParser(SM, MF, Error, Src, PFS, IRSlots).parse(MI); 591 } 592 593 bool llvm::parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM, 594 MachineFunction &MF, StringRef Src, 595 const PerFunctionMIParsingState &PFS, 596 const SlotMapping &IRSlots, SMDiagnostic &Error) { 597 return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseMBB(MBB); 598 } 599 600 bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM, 601 MachineFunction &MF, StringRef Src, 602 const PerFunctionMIParsingState &PFS, 603 const SlotMapping &IRSlots, 604 SMDiagnostic &Error) { 605 return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseNamedRegister(Reg); 606 } 607