1 //===- MIParser.cpp - Machine instructions parser implementation ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the parsing of machine instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MIParser.h" 15 #include "MILexer.h" 16 #include "llvm/ADT/StringMap.h" 17 #include "llvm/AsmParser/SlotMapping.h" 18 #include "llvm/CodeGen/MachineBasicBlock.h" 19 #include "llvm/CodeGen/MachineFunction.h" 20 #include "llvm/CodeGen/MachineInstr.h" 21 #include "llvm/IR/Module.h" 22 #include "llvm/Support/raw_ostream.h" 23 #include "llvm/Support/SourceMgr.h" 24 #include "llvm/Target/TargetSubtargetInfo.h" 25 #include "llvm/Target/TargetInstrInfo.h" 26 27 using namespace llvm; 28 29 namespace { 30 31 class MIParser { 32 SourceMgr &SM; 33 MachineFunction &MF; 34 SMDiagnostic &Error; 35 StringRef Source, CurrentSource; 36 MIToken Token; 37 /// Maps from basic block numbers to MBBs. 38 const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots; 39 /// Maps from indices to unnamed global values and metadata nodes. 40 const SlotMapping &IRSlots; 41 /// Maps from instruction names to op codes. 42 StringMap<unsigned> Names2InstrOpCodes; 43 /// Maps from register names to registers. 44 StringMap<unsigned> Names2Regs; 45 /// Maps from register mask names to register masks. 46 StringMap<const uint32_t *> Names2RegMasks; 47 48 public: 49 MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, 50 StringRef Source, 51 const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots, 52 const SlotMapping &IRSlots); 53 54 void lex(); 55 56 /// Report an error at the current location with the given message. 57 /// 58 /// This function always return true. 59 bool error(const Twine &Msg); 60 61 /// Report an error at the given location with the given message. 62 /// 63 /// This function always return true. 64 bool error(StringRef::iterator Loc, const Twine &Msg); 65 66 bool parse(MachineInstr *&MI); 67 bool parseMBB(MachineBasicBlock *&MBB); 68 69 bool parseRegister(unsigned &Reg); 70 bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false); 71 bool parseImmediateOperand(MachineOperand &Dest); 72 bool parseMBBReference(MachineBasicBlock *&MBB); 73 bool parseMBBOperand(MachineOperand &Dest); 74 bool parseGlobalAddressOperand(MachineOperand &Dest); 75 bool parseMachineOperand(MachineOperand &Dest); 76 77 private: 78 /// Convert the integer literal in the current token into an unsigned integer. 79 /// 80 /// Return true if an error occurred. 81 bool getUnsigned(unsigned &Result); 82 83 void initNames2InstrOpCodes(); 84 85 /// Try to convert an instruction name to an opcode. Return true if the 86 /// instruction name is invalid. 87 bool parseInstrName(StringRef InstrName, unsigned &OpCode); 88 89 bool parseInstruction(unsigned &OpCode); 90 91 void initNames2Regs(); 92 93 /// Try to convert a register name to a register number. Return true if the 94 /// register name is invalid. 95 bool getRegisterByName(StringRef RegName, unsigned &Reg); 96 97 void initNames2RegMasks(); 98 99 /// Check if the given identifier is a name of a register mask. 100 /// 101 /// Return null if the identifier isn't a register mask. 102 const uint32_t *getRegMask(StringRef Identifier); 103 }; 104 105 } // end anonymous namespace 106 107 MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, 108 StringRef Source, 109 const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots, 110 const SlotMapping &IRSlots) 111 : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source), 112 Token(MIToken::Error, StringRef()), MBBSlots(MBBSlots), IRSlots(IRSlots) { 113 } 114 115 void MIParser::lex() { 116 CurrentSource = lexMIToken( 117 CurrentSource, Token, 118 [this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); }); 119 } 120 121 bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); } 122 123 bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { 124 // TODO: Get the proper location in the MIR file, not just a location inside 125 // the string. 126 assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size())); 127 Error = SMDiagnostic( 128 SM, SMLoc(), 129 SM.getMemoryBuffer(SM.getMainFileID())->getBufferIdentifier(), 1, 130 Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), Source, None, None); 131 return true; 132 } 133 134 bool MIParser::parse(MachineInstr *&MI) { 135 lex(); 136 137 // Parse any register operands before '=' 138 // TODO: Allow parsing of multiple operands before '=' 139 MachineOperand MO = MachineOperand::CreateImm(0); 140 SmallVector<MachineOperand, 8> Operands; 141 if (Token.isRegister()) { 142 if (parseRegisterOperand(MO, /*IsDef=*/true)) 143 return true; 144 Operands.push_back(MO); 145 if (Token.isNot(MIToken::equal)) 146 return error("expected '='"); 147 lex(); 148 } 149 150 unsigned OpCode; 151 if (Token.isError() || parseInstruction(OpCode)) 152 return true; 153 154 // TODO: Parse the instruction flags and memory operands. 155 156 // Parse the remaining machine operands. 157 while (Token.isNot(MIToken::Eof)) { 158 if (parseMachineOperand(MO)) 159 return true; 160 Operands.push_back(MO); 161 if (Token.is(MIToken::Eof)) 162 break; 163 if (Token.isNot(MIToken::comma)) 164 return error("expected ',' before the next machine operand"); 165 lex(); 166 } 167 168 const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode); 169 170 // Verify machine operands. 171 if (!MCID.isVariadic()) { 172 for (size_t I = 0, E = Operands.size(); I < E; ++I) { 173 if (I < MCID.getNumOperands()) 174 continue; 175 // Mark this register as implicit to prevent an assertion when it's added 176 // to an instruction. This is a temporary workaround until the implicit 177 // register flag can be parsed. 178 if (Operands[I].isReg()) 179 Operands[I].setImplicit(); 180 } 181 } 182 183 // TODO: Determine the implicit behaviour when implicit register flags are 184 // parsed. 185 MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true); 186 for (const auto &Operand : Operands) 187 MI->addOperand(MF, Operand); 188 return false; 189 } 190 191 bool MIParser::parseMBB(MachineBasicBlock *&MBB) { 192 lex(); 193 if (Token.isNot(MIToken::MachineBasicBlock)) 194 return error("expected a machine basic block reference"); 195 if (parseMBBReference(MBB)) 196 return true; 197 lex(); 198 if (Token.isNot(MIToken::Eof)) 199 return error( 200 "expected end of string after the machine basic block reference"); 201 return false; 202 } 203 204 bool MIParser::parseInstruction(unsigned &OpCode) { 205 if (Token.isNot(MIToken::Identifier)) 206 return error("expected a machine instruction"); 207 StringRef InstrName = Token.stringValue(); 208 if (parseInstrName(InstrName, OpCode)) 209 return error(Twine("unknown machine instruction name '") + InstrName + "'"); 210 lex(); 211 return false; 212 } 213 214 bool MIParser::parseRegister(unsigned &Reg) { 215 switch (Token.kind()) { 216 case MIToken::underscore: 217 Reg = 0; 218 break; 219 case MIToken::NamedRegister: { 220 StringRef Name = Token.stringValue(); 221 if (getRegisterByName(Name, Reg)) 222 return error(Twine("unknown register name '") + Name + "'"); 223 break; 224 } 225 // TODO: Parse other register kinds. 226 default: 227 llvm_unreachable("The current token should be a register"); 228 } 229 return false; 230 } 231 232 bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) { 233 unsigned Reg; 234 // TODO: Parse register flags. 235 if (parseRegister(Reg)) 236 return true; 237 lex(); 238 // TODO: Parse subregister. 239 Dest = MachineOperand::CreateReg(Reg, IsDef); 240 return false; 241 } 242 243 bool MIParser::parseImmediateOperand(MachineOperand &Dest) { 244 assert(Token.is(MIToken::IntegerLiteral)); 245 const APSInt &Int = Token.integerValue(); 246 if (Int.getMinSignedBits() > 64) 247 // TODO: Replace this with an error when we can parse CIMM Machine Operands. 248 llvm_unreachable("Can't parse large integer literals yet!"); 249 Dest = MachineOperand::CreateImm(Int.getExtValue()); 250 lex(); 251 return false; 252 } 253 254 bool MIParser::getUnsigned(unsigned &Result) { 255 assert(Token.hasIntegerValue() && "Expected a token with an integer value"); 256 const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1; 257 uint64_t Val64 = Token.integerValue().getLimitedValue(Limit); 258 if (Val64 == Limit) 259 return error("expected 32-bit integer (too large)"); 260 Result = Val64; 261 return false; 262 } 263 264 bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) { 265 assert(Token.is(MIToken::MachineBasicBlock)); 266 unsigned Number; 267 if (getUnsigned(Number)) 268 return true; 269 auto MBBInfo = MBBSlots.find(Number); 270 if (MBBInfo == MBBSlots.end()) 271 return error(Twine("use of undefined machine basic block #") + 272 Twine(Number)); 273 MBB = MBBInfo->second; 274 if (!Token.stringValue().empty() && Token.stringValue() != MBB->getName()) 275 return error(Twine("the name of machine basic block #") + Twine(Number) + 276 " isn't '" + Token.stringValue() + "'"); 277 return false; 278 } 279 280 bool MIParser::parseMBBOperand(MachineOperand &Dest) { 281 MachineBasicBlock *MBB; 282 if (parseMBBReference(MBB)) 283 return true; 284 Dest = MachineOperand::CreateMBB(MBB); 285 lex(); 286 return false; 287 } 288 289 bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) { 290 switch (Token.kind()) { 291 case MIToken::NamedGlobalValue: { 292 auto Name = Token.stringValue(); 293 const Module *M = MF.getFunction()->getParent(); 294 if (const auto *GV = M->getNamedValue(Name)) { 295 Dest = MachineOperand::CreateGA(GV, /*Offset=*/0); 296 break; 297 } 298 return error(Twine("use of undefined global value '@") + Name + "'"); 299 } 300 case MIToken::GlobalValue: { 301 unsigned GVIdx; 302 if (getUnsigned(GVIdx)) 303 return true; 304 if (GVIdx >= IRSlots.GlobalValues.size()) 305 return error(Twine("use of undefined global value '@") + Twine(GVIdx) + 306 "'"); 307 Dest = MachineOperand::CreateGA(IRSlots.GlobalValues[GVIdx], 308 /*Offset=*/0); 309 break; 310 } 311 default: 312 llvm_unreachable("The current token should be a global value"); 313 } 314 // TODO: Parse offset and target flags. 315 lex(); 316 return false; 317 } 318 319 bool MIParser::parseMachineOperand(MachineOperand &Dest) { 320 switch (Token.kind()) { 321 case MIToken::underscore: 322 case MIToken::NamedRegister: 323 return parseRegisterOperand(Dest); 324 case MIToken::IntegerLiteral: 325 return parseImmediateOperand(Dest); 326 case MIToken::MachineBasicBlock: 327 return parseMBBOperand(Dest); 328 case MIToken::GlobalValue: 329 case MIToken::NamedGlobalValue: 330 return parseGlobalAddressOperand(Dest); 331 case MIToken::Error: 332 return true; 333 case MIToken::Identifier: 334 if (const auto *RegMask = getRegMask(Token.stringValue())) { 335 Dest = MachineOperand::CreateRegMask(RegMask); 336 lex(); 337 break; 338 } 339 // fallthrough 340 default: 341 // TODO: parse the other machine operands. 342 return error("expected a machine operand"); 343 } 344 return false; 345 } 346 347 void MIParser::initNames2InstrOpCodes() { 348 if (!Names2InstrOpCodes.empty()) 349 return; 350 const auto *TII = MF.getSubtarget().getInstrInfo(); 351 assert(TII && "Expected target instruction info"); 352 for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I) 353 Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I)); 354 } 355 356 bool MIParser::parseInstrName(StringRef InstrName, unsigned &OpCode) { 357 initNames2InstrOpCodes(); 358 auto InstrInfo = Names2InstrOpCodes.find(InstrName); 359 if (InstrInfo == Names2InstrOpCodes.end()) 360 return true; 361 OpCode = InstrInfo->getValue(); 362 return false; 363 } 364 365 void MIParser::initNames2Regs() { 366 if (!Names2Regs.empty()) 367 return; 368 // The '%noreg' register is the register 0. 369 Names2Regs.insert(std::make_pair("noreg", 0)); 370 const auto *TRI = MF.getSubtarget().getRegisterInfo(); 371 assert(TRI && "Expected target register info"); 372 for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) { 373 bool WasInserted = 374 Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I)) 375 .second; 376 (void)WasInserted; 377 assert(WasInserted && "Expected registers to be unique case-insensitively"); 378 } 379 } 380 381 bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) { 382 initNames2Regs(); 383 auto RegInfo = Names2Regs.find(RegName); 384 if (RegInfo == Names2Regs.end()) 385 return true; 386 Reg = RegInfo->getValue(); 387 return false; 388 } 389 390 void MIParser::initNames2RegMasks() { 391 if (!Names2RegMasks.empty()) 392 return; 393 const auto *TRI = MF.getSubtarget().getRegisterInfo(); 394 assert(TRI && "Expected target register info"); 395 ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks(); 396 ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames(); 397 assert(RegMasks.size() == RegMaskNames.size()); 398 for (size_t I = 0, E = RegMasks.size(); I < E; ++I) 399 Names2RegMasks.insert( 400 std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I])); 401 } 402 403 const uint32_t *MIParser::getRegMask(StringRef Identifier) { 404 initNames2RegMasks(); 405 auto RegMaskInfo = Names2RegMasks.find(Identifier); 406 if (RegMaskInfo == Names2RegMasks.end()) 407 return nullptr; 408 return RegMaskInfo->getValue(); 409 } 410 411 bool llvm::parseMachineInstr( 412 MachineInstr *&MI, SourceMgr &SM, MachineFunction &MF, StringRef Src, 413 const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots, 414 const SlotMapping &IRSlots, SMDiagnostic &Error) { 415 return MIParser(SM, MF, Error, Src, MBBSlots, IRSlots).parse(MI); 416 } 417 418 bool llvm::parseMBBReference( 419 MachineBasicBlock *&MBB, SourceMgr &SM, MachineFunction &MF, StringRef Src, 420 const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots, 421 const SlotMapping &IRSlots, SMDiagnostic &Error) { 422 return MIParser(SM, MF, Error, Src, MBBSlots, IRSlots).parseMBB(MBB); 423 } 424