1 //===- MIParser.cpp - Machine instructions parser implementation ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the parsing of machine instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MIParser.h" 15 #include "MILexer.h" 16 #include "llvm/ADT/StringMap.h" 17 #include "llvm/AsmParser/SlotMapping.h" 18 #include "llvm/CodeGen/MachineBasicBlock.h" 19 #include "llvm/CodeGen/MachineFunction.h" 20 #include "llvm/CodeGen/MachineInstr.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/IR/Module.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include "llvm/Support/SourceMgr.h" 25 #include "llvm/Target/TargetSubtargetInfo.h" 26 #include "llvm/Target/TargetInstrInfo.h" 27 28 using namespace llvm; 29 30 namespace { 31 32 /// A wrapper struct around the 'MachineOperand' struct that includes a source 33 /// range. 34 struct MachineOperandWithLocation { 35 MachineOperand Operand; 36 StringRef::iterator Begin; 37 StringRef::iterator End; 38 39 MachineOperandWithLocation(const MachineOperand &Operand, 40 StringRef::iterator Begin, StringRef::iterator End) 41 : Operand(Operand), Begin(Begin), End(End) {} 42 }; 43 44 class MIParser { 45 SourceMgr &SM; 46 MachineFunction &MF; 47 SMDiagnostic &Error; 48 StringRef Source, CurrentSource; 49 MIToken Token; 50 const PerFunctionMIParsingState &PFS; 51 /// Maps from indices to unnamed global values and metadata nodes. 52 const SlotMapping &IRSlots; 53 /// Maps from instruction names to op codes. 54 StringMap<unsigned> Names2InstrOpCodes; 55 /// Maps from register names to registers. 56 StringMap<unsigned> Names2Regs; 57 /// Maps from register mask names to register masks. 58 StringMap<const uint32_t *> Names2RegMasks; 59 60 public: 61 MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, 62 StringRef Source, const PerFunctionMIParsingState &PFS, 63 const SlotMapping &IRSlots); 64 65 void lex(); 66 67 /// Report an error at the current location with the given message. 68 /// 69 /// This function always return true. 70 bool error(const Twine &Msg); 71 72 /// Report an error at the given location with the given message. 73 /// 74 /// This function always return true. 75 bool error(StringRef::iterator Loc, const Twine &Msg); 76 77 bool parse(MachineInstr *&MI); 78 bool parseMBB(MachineBasicBlock *&MBB); 79 80 bool parseRegister(unsigned &Reg); 81 bool parseRegisterFlag(unsigned &Flags); 82 bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false); 83 bool parseImmediateOperand(MachineOperand &Dest); 84 bool parseMBBReference(MachineBasicBlock *&MBB); 85 bool parseMBBOperand(MachineOperand &Dest); 86 bool parseGlobalAddressOperand(MachineOperand &Dest); 87 bool parseMachineOperand(MachineOperand &Dest); 88 89 private: 90 /// Convert the integer literal in the current token into an unsigned integer. 91 /// 92 /// Return true if an error occurred. 93 bool getUnsigned(unsigned &Result); 94 95 void initNames2InstrOpCodes(); 96 97 /// Try to convert an instruction name to an opcode. Return true if the 98 /// instruction name is invalid. 99 bool parseInstrName(StringRef InstrName, unsigned &OpCode); 100 101 bool parseInstruction(unsigned &OpCode); 102 103 bool verifyImplicitOperands(ArrayRef<MachineOperandWithLocation> Operands, 104 const MCInstrDesc &MCID); 105 106 void initNames2Regs(); 107 108 /// Try to convert a register name to a register number. Return true if the 109 /// register name is invalid. 110 bool getRegisterByName(StringRef RegName, unsigned &Reg); 111 112 void initNames2RegMasks(); 113 114 /// Check if the given identifier is a name of a register mask. 115 /// 116 /// Return null if the identifier isn't a register mask. 117 const uint32_t *getRegMask(StringRef Identifier); 118 }; 119 120 } // end anonymous namespace 121 122 MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, 123 StringRef Source, const PerFunctionMIParsingState &PFS, 124 const SlotMapping &IRSlots) 125 : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source), 126 Token(MIToken::Error, StringRef()), PFS(PFS), IRSlots(IRSlots) {} 127 128 void MIParser::lex() { 129 CurrentSource = lexMIToken( 130 CurrentSource, Token, 131 [this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); }); 132 } 133 134 bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); } 135 136 bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { 137 assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size())); 138 Error = SMDiagnostic( 139 SM, SMLoc(), 140 SM.getMemoryBuffer(SM.getMainFileID())->getBufferIdentifier(), 1, 141 Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), Source, None, None); 142 return true; 143 } 144 145 bool MIParser::parse(MachineInstr *&MI) { 146 lex(); 147 148 // Parse any register operands before '=' 149 // TODO: Allow parsing of multiple operands before '=' 150 MachineOperand MO = MachineOperand::CreateImm(0); 151 SmallVector<MachineOperandWithLocation, 8> Operands; 152 if (Token.isRegister() || Token.isRegisterFlag()) { 153 auto Loc = Token.location(); 154 if (parseRegisterOperand(MO, /*IsDef=*/true)) 155 return true; 156 Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location())); 157 if (Token.isNot(MIToken::equal)) 158 return error("expected '='"); 159 lex(); 160 } 161 162 unsigned OpCode; 163 if (Token.isError() || parseInstruction(OpCode)) 164 return true; 165 166 // TODO: Parse the instruction flags and memory operands. 167 168 // Parse the remaining machine operands. 169 while (Token.isNot(MIToken::Eof)) { 170 auto Loc = Token.location(); 171 if (parseMachineOperand(MO)) 172 return true; 173 Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location())); 174 if (Token.is(MIToken::Eof)) 175 break; 176 if (Token.isNot(MIToken::comma)) 177 return error("expected ',' before the next machine operand"); 178 lex(); 179 } 180 181 const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode); 182 if (!MCID.isVariadic()) { 183 // FIXME: Move the implicit operand verification to the machine verifier. 184 if (verifyImplicitOperands(Operands, MCID)) 185 return true; 186 } 187 188 // TODO: Check for extraneous machine operands. 189 MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true); 190 for (const auto &Operand : Operands) 191 MI->addOperand(MF, Operand.Operand); 192 return false; 193 } 194 195 bool MIParser::parseMBB(MachineBasicBlock *&MBB) { 196 lex(); 197 if (Token.isNot(MIToken::MachineBasicBlock)) 198 return error("expected a machine basic block reference"); 199 if (parseMBBReference(MBB)) 200 return true; 201 lex(); 202 if (Token.isNot(MIToken::Eof)) 203 return error( 204 "expected end of string after the machine basic block reference"); 205 return false; 206 } 207 208 static const char *printImplicitRegisterFlag(const MachineOperand &MO) { 209 assert(MO.isImplicit()); 210 return MO.isDef() ? "implicit-def" : "implicit"; 211 } 212 213 static std::string getRegisterName(const TargetRegisterInfo *TRI, 214 unsigned Reg) { 215 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "expected phys reg"); 216 return StringRef(TRI->getName(Reg)).lower(); 217 } 218 219 bool MIParser::verifyImplicitOperands( 220 ArrayRef<MachineOperandWithLocation> Operands, const MCInstrDesc &MCID) { 221 if (MCID.isCall()) 222 // We can't verify call instructions as they can contain arbitrary implicit 223 // register and register mask operands. 224 return false; 225 226 // Gather all the expected implicit operands. 227 SmallVector<MachineOperand, 4> ImplicitOperands; 228 if (MCID.ImplicitDefs) 229 for (const uint16_t *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs) 230 ImplicitOperands.push_back( 231 MachineOperand::CreateReg(*ImpDefs, true, true)); 232 if (MCID.ImplicitUses) 233 for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) 234 ImplicitOperands.push_back( 235 MachineOperand::CreateReg(*ImpUses, false, true)); 236 237 const auto *TRI = MF.getSubtarget().getRegisterInfo(); 238 assert(TRI && "Expected target register info"); 239 size_t I = ImplicitOperands.size(), J = Operands.size(); 240 while (I) { 241 --I; 242 if (J) { 243 --J; 244 const auto &ImplicitOperand = ImplicitOperands[I]; 245 const auto &Operand = Operands[J].Operand; 246 if (ImplicitOperand.isIdenticalTo(Operand)) 247 continue; 248 if (Operand.isReg() && Operand.isImplicit()) { 249 return error(Operands[J].Begin, 250 Twine("expected an implicit register operand '") + 251 printImplicitRegisterFlag(ImplicitOperand) + " %" + 252 getRegisterName(TRI, ImplicitOperand.getReg()) + "'"); 253 } 254 } 255 // TODO: Fix source location when Operands[J].end is right before '=', i.e: 256 // insead of reporting an error at this location: 257 // %eax = MOV32r0 258 // ^ 259 // report the error at the following location: 260 // %eax = MOV32r0 261 // ^ 262 return error(J < Operands.size() ? Operands[J].End : Token.location(), 263 Twine("missing implicit register operand '") + 264 printImplicitRegisterFlag(ImplicitOperands[I]) + " %" + 265 getRegisterName(TRI, ImplicitOperands[I].getReg()) + "'"); 266 } 267 return false; 268 } 269 270 bool MIParser::parseInstruction(unsigned &OpCode) { 271 if (Token.isNot(MIToken::Identifier)) 272 return error("expected a machine instruction"); 273 StringRef InstrName = Token.stringValue(); 274 if (parseInstrName(InstrName, OpCode)) 275 return error(Twine("unknown machine instruction name '") + InstrName + "'"); 276 lex(); 277 return false; 278 } 279 280 bool MIParser::parseRegister(unsigned &Reg) { 281 switch (Token.kind()) { 282 case MIToken::underscore: 283 Reg = 0; 284 break; 285 case MIToken::NamedRegister: { 286 StringRef Name = Token.stringValue(); 287 if (getRegisterByName(Name, Reg)) 288 return error(Twine("unknown register name '") + Name + "'"); 289 break; 290 } 291 case MIToken::VirtualRegister: { 292 unsigned ID; 293 if (getUnsigned(ID)) 294 return true; 295 const auto RegInfo = PFS.VirtualRegisterSlots.find(ID); 296 if (RegInfo == PFS.VirtualRegisterSlots.end()) 297 return error(Twine("use of undefined virtual register '%") + Twine(ID) + 298 "'"); 299 Reg = RegInfo->second; 300 break; 301 } 302 // TODO: Parse other register kinds. 303 default: 304 llvm_unreachable("The current token should be a register"); 305 } 306 return false; 307 } 308 309 bool MIParser::parseRegisterFlag(unsigned &Flags) { 310 switch (Token.kind()) { 311 case MIToken::kw_implicit: 312 Flags |= RegState::Implicit; 313 break; 314 case MIToken::kw_implicit_define: 315 Flags |= RegState::ImplicitDefine; 316 break; 317 case MIToken::kw_dead: 318 Flags |= RegState::Dead; 319 break; 320 case MIToken::kw_killed: 321 Flags |= RegState::Kill; 322 break; 323 case MIToken::kw_undef: 324 Flags |= RegState::Undef; 325 break; 326 // TODO: report an error when we specify the same flag more than once. 327 // TODO: parse the other register flags. 328 default: 329 llvm_unreachable("The current token should be a register flag"); 330 } 331 lex(); 332 return false; 333 } 334 335 bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) { 336 unsigned Reg; 337 unsigned Flags = IsDef ? RegState::Define : 0; 338 while (Token.isRegisterFlag()) { 339 if (parseRegisterFlag(Flags)) 340 return true; 341 } 342 if (!Token.isRegister()) 343 return error("expected a register after register flags"); 344 if (parseRegister(Reg)) 345 return true; 346 lex(); 347 // TODO: Parse subregister. 348 Dest = MachineOperand::CreateReg( 349 Reg, Flags & RegState::Define, Flags & RegState::Implicit, 350 Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef); 351 return false; 352 } 353 354 bool MIParser::parseImmediateOperand(MachineOperand &Dest) { 355 assert(Token.is(MIToken::IntegerLiteral)); 356 const APSInt &Int = Token.integerValue(); 357 if (Int.getMinSignedBits() > 64) 358 // TODO: Replace this with an error when we can parse CIMM Machine Operands. 359 llvm_unreachable("Can't parse large integer literals yet!"); 360 Dest = MachineOperand::CreateImm(Int.getExtValue()); 361 lex(); 362 return false; 363 } 364 365 bool MIParser::getUnsigned(unsigned &Result) { 366 assert(Token.hasIntegerValue() && "Expected a token with an integer value"); 367 const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1; 368 uint64_t Val64 = Token.integerValue().getLimitedValue(Limit); 369 if (Val64 == Limit) 370 return error("expected 32-bit integer (too large)"); 371 Result = Val64; 372 return false; 373 } 374 375 bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) { 376 assert(Token.is(MIToken::MachineBasicBlock)); 377 unsigned Number; 378 if (getUnsigned(Number)) 379 return true; 380 auto MBBInfo = PFS.MBBSlots.find(Number); 381 if (MBBInfo == PFS.MBBSlots.end()) 382 return error(Twine("use of undefined machine basic block #") + 383 Twine(Number)); 384 MBB = MBBInfo->second; 385 if (!Token.stringValue().empty() && Token.stringValue() != MBB->getName()) 386 return error(Twine("the name of machine basic block #") + Twine(Number) + 387 " isn't '" + Token.stringValue() + "'"); 388 return false; 389 } 390 391 bool MIParser::parseMBBOperand(MachineOperand &Dest) { 392 MachineBasicBlock *MBB; 393 if (parseMBBReference(MBB)) 394 return true; 395 Dest = MachineOperand::CreateMBB(MBB); 396 lex(); 397 return false; 398 } 399 400 bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) { 401 switch (Token.kind()) { 402 case MIToken::NamedGlobalValue: { 403 auto Name = Token.stringValue(); 404 const Module *M = MF.getFunction()->getParent(); 405 if (const auto *GV = M->getNamedValue(Name)) { 406 Dest = MachineOperand::CreateGA(GV, /*Offset=*/0); 407 break; 408 } 409 return error(Twine("use of undefined global value '@") + Name + "'"); 410 } 411 case MIToken::GlobalValue: { 412 unsigned GVIdx; 413 if (getUnsigned(GVIdx)) 414 return true; 415 if (GVIdx >= IRSlots.GlobalValues.size()) 416 return error(Twine("use of undefined global value '@") + Twine(GVIdx) + 417 "'"); 418 Dest = MachineOperand::CreateGA(IRSlots.GlobalValues[GVIdx], 419 /*Offset=*/0); 420 break; 421 } 422 default: 423 llvm_unreachable("The current token should be a global value"); 424 } 425 // TODO: Parse offset and target flags. 426 lex(); 427 return false; 428 } 429 430 bool MIParser::parseMachineOperand(MachineOperand &Dest) { 431 switch (Token.kind()) { 432 case MIToken::kw_implicit: 433 case MIToken::kw_implicit_define: 434 case MIToken::kw_dead: 435 case MIToken::kw_killed: 436 case MIToken::kw_undef: 437 case MIToken::underscore: 438 case MIToken::NamedRegister: 439 case MIToken::VirtualRegister: 440 return parseRegisterOperand(Dest); 441 case MIToken::IntegerLiteral: 442 return parseImmediateOperand(Dest); 443 case MIToken::MachineBasicBlock: 444 return parseMBBOperand(Dest); 445 case MIToken::GlobalValue: 446 case MIToken::NamedGlobalValue: 447 return parseGlobalAddressOperand(Dest); 448 case MIToken::Error: 449 return true; 450 case MIToken::Identifier: 451 if (const auto *RegMask = getRegMask(Token.stringValue())) { 452 Dest = MachineOperand::CreateRegMask(RegMask); 453 lex(); 454 break; 455 } 456 // fallthrough 457 default: 458 // TODO: parse the other machine operands. 459 return error("expected a machine operand"); 460 } 461 return false; 462 } 463 464 void MIParser::initNames2InstrOpCodes() { 465 if (!Names2InstrOpCodes.empty()) 466 return; 467 const auto *TII = MF.getSubtarget().getInstrInfo(); 468 assert(TII && "Expected target instruction info"); 469 for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I) 470 Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I)); 471 } 472 473 bool MIParser::parseInstrName(StringRef InstrName, unsigned &OpCode) { 474 initNames2InstrOpCodes(); 475 auto InstrInfo = Names2InstrOpCodes.find(InstrName); 476 if (InstrInfo == Names2InstrOpCodes.end()) 477 return true; 478 OpCode = InstrInfo->getValue(); 479 return false; 480 } 481 482 void MIParser::initNames2Regs() { 483 if (!Names2Regs.empty()) 484 return; 485 // The '%noreg' register is the register 0. 486 Names2Regs.insert(std::make_pair("noreg", 0)); 487 const auto *TRI = MF.getSubtarget().getRegisterInfo(); 488 assert(TRI && "Expected target register info"); 489 for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) { 490 bool WasInserted = 491 Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I)) 492 .second; 493 (void)WasInserted; 494 assert(WasInserted && "Expected registers to be unique case-insensitively"); 495 } 496 } 497 498 bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) { 499 initNames2Regs(); 500 auto RegInfo = Names2Regs.find(RegName); 501 if (RegInfo == Names2Regs.end()) 502 return true; 503 Reg = RegInfo->getValue(); 504 return false; 505 } 506 507 void MIParser::initNames2RegMasks() { 508 if (!Names2RegMasks.empty()) 509 return; 510 const auto *TRI = MF.getSubtarget().getRegisterInfo(); 511 assert(TRI && "Expected target register info"); 512 ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks(); 513 ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames(); 514 assert(RegMasks.size() == RegMaskNames.size()); 515 for (size_t I = 0, E = RegMasks.size(); I < E; ++I) 516 Names2RegMasks.insert( 517 std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I])); 518 } 519 520 const uint32_t *MIParser::getRegMask(StringRef Identifier) { 521 initNames2RegMasks(); 522 auto RegMaskInfo = Names2RegMasks.find(Identifier); 523 if (RegMaskInfo == Names2RegMasks.end()) 524 return nullptr; 525 return RegMaskInfo->getValue(); 526 } 527 528 bool llvm::parseMachineInstr(MachineInstr *&MI, SourceMgr &SM, 529 MachineFunction &MF, StringRef Src, 530 const PerFunctionMIParsingState &PFS, 531 const SlotMapping &IRSlots, SMDiagnostic &Error) { 532 return MIParser(SM, MF, Error, Src, PFS, IRSlots).parse(MI); 533 } 534 535 bool llvm::parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM, 536 MachineFunction &MF, StringRef Src, 537 const PerFunctionMIParsingState &PFS, 538 const SlotMapping &IRSlots, SMDiagnostic &Error) { 539 return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseMBB(MBB); 540 } 541