1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "MCTargetDesc/X86BaseInfo.h" 11 #include "X86AsmInstrumentation.h" 12 #include "X86AsmParserCommon.h" 13 #include "X86Operand.h" 14 #include "llvm/ADT/STLExtras.h" 15 #include "llvm/ADT/SmallString.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/StringSwitch.h" 18 #include "llvm/ADT/Twine.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCExpr.h" 21 #include "llvm/MC/MCInst.h" 22 #include "llvm/MC/MCInstrInfo.h" 23 #include "llvm/MC/MCParser/MCAsmLexer.h" 24 #include "llvm/MC/MCParser/MCAsmParser.h" 25 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 26 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 27 #include "llvm/MC/MCRegisterInfo.h" 28 #include "llvm/MC/MCSection.h" 29 #include "llvm/MC/MCStreamer.h" 30 #include "llvm/MC/MCSubtargetInfo.h" 31 #include "llvm/MC/MCSymbol.h" 32 #include "llvm/Support/SourceMgr.h" 33 #include "llvm/Support/TargetRegistry.h" 34 #include "llvm/Support/raw_ostream.h" 35 #include <algorithm> 36 #include <memory> 37 38 using namespace llvm; 39 40 namespace { 41 42 static const char OpPrecedence[] = { 43 0, // IC_OR 44 1, // IC_XOR 45 2, // IC_AND 46 3, // IC_LSHIFT 47 3, // IC_RSHIFT 48 4, // IC_PLUS 49 4, // IC_MINUS 50 5, // IC_MULTIPLY 51 5, // IC_DIVIDE 52 6, // IC_RPAREN 53 7, // IC_LPAREN 54 0, // IC_IMM 55 0 // IC_REGISTER 56 }; 57 58 class X86AsmParser : public MCTargetAsmParser { 59 const MCInstrInfo &MII; 60 ParseInstructionInfo *InstInfo; 61 std::unique_ptr<X86AsmInstrumentation> Instrumentation; 62 bool Code16GCC; 63 64 private: 65 SMLoc consumeToken() { 66 MCAsmParser &Parser = getParser(); 67 SMLoc Result = Parser.getTok().getLoc(); 68 Parser.Lex(); 69 return Result; 70 } 71 72 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst, 73 uint64_t &ErrorInfo, bool matchingInlineAsm, 74 unsigned VariantID = 0) { 75 // In Code16GCC mode, match as 32-bit. 76 if (Code16GCC) 77 SwitchMode(X86::Mode32Bit); 78 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo, 79 matchingInlineAsm, VariantID); 80 if (Code16GCC) 81 SwitchMode(X86::Mode16Bit); 82 return rv; 83 } 84 85 enum InfixCalculatorTok { 86 IC_OR = 0, 87 IC_XOR, 88 IC_AND, 89 IC_LSHIFT, 90 IC_RSHIFT, 91 IC_PLUS, 92 IC_MINUS, 93 IC_MULTIPLY, 94 IC_DIVIDE, 95 IC_RPAREN, 96 IC_LPAREN, 97 IC_IMM, 98 IC_REGISTER 99 }; 100 101 class InfixCalculator { 102 typedef std::pair< InfixCalculatorTok, int64_t > ICToken; 103 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; 104 SmallVector<ICToken, 4> PostfixStack; 105 106 public: 107 int64_t popOperand() { 108 assert (!PostfixStack.empty() && "Poped an empty stack!"); 109 ICToken Op = PostfixStack.pop_back_val(); 110 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER) 111 && "Expected and immediate or register!"); 112 return Op.second; 113 } 114 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { 115 assert ((Op == IC_IMM || Op == IC_REGISTER) && 116 "Unexpected operand!"); 117 PostfixStack.push_back(std::make_pair(Op, Val)); 118 } 119 120 void popOperator() { InfixOperatorStack.pop_back(); } 121 void pushOperator(InfixCalculatorTok Op) { 122 // Push the new operator if the stack is empty. 123 if (InfixOperatorStack.empty()) { 124 InfixOperatorStack.push_back(Op); 125 return; 126 } 127 128 // Push the new operator if it has a higher precedence than the operator 129 // on the top of the stack or the operator on the top of the stack is a 130 // left parentheses. 131 unsigned Idx = InfixOperatorStack.size() - 1; 132 InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; 133 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { 134 InfixOperatorStack.push_back(Op); 135 return; 136 } 137 138 // The operator on the top of the stack has higher precedence than the 139 // new operator. 140 unsigned ParenCount = 0; 141 while (1) { 142 // Nothing to process. 143 if (InfixOperatorStack.empty()) 144 break; 145 146 Idx = InfixOperatorStack.size() - 1; 147 StackOp = InfixOperatorStack[Idx]; 148 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) 149 break; 150 151 // If we have an even parentheses count and we see a left parentheses, 152 // then stop processing. 153 if (!ParenCount && StackOp == IC_LPAREN) 154 break; 155 156 if (StackOp == IC_RPAREN) { 157 ++ParenCount; 158 InfixOperatorStack.pop_back(); 159 } else if (StackOp == IC_LPAREN) { 160 --ParenCount; 161 InfixOperatorStack.pop_back(); 162 } else { 163 InfixOperatorStack.pop_back(); 164 PostfixStack.push_back(std::make_pair(StackOp, 0)); 165 } 166 } 167 // Push the new operator. 168 InfixOperatorStack.push_back(Op); 169 } 170 171 int64_t execute() { 172 // Push any remaining operators onto the postfix stack. 173 while (!InfixOperatorStack.empty()) { 174 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); 175 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) 176 PostfixStack.push_back(std::make_pair(StackOp, 0)); 177 } 178 179 if (PostfixStack.empty()) 180 return 0; 181 182 SmallVector<ICToken, 16> OperandStack; 183 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) { 184 ICToken Op = PostfixStack[i]; 185 if (Op.first == IC_IMM || Op.first == IC_REGISTER) { 186 OperandStack.push_back(Op); 187 } else { 188 assert (OperandStack.size() > 1 && "Too few operands."); 189 int64_t Val; 190 ICToken Op2 = OperandStack.pop_back_val(); 191 ICToken Op1 = OperandStack.pop_back_val(); 192 switch (Op.first) { 193 default: 194 report_fatal_error("Unexpected operator!"); 195 break; 196 case IC_PLUS: 197 Val = Op1.second + Op2.second; 198 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 199 break; 200 case IC_MINUS: 201 Val = Op1.second - Op2.second; 202 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 203 break; 204 case IC_MULTIPLY: 205 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 206 "Multiply operation with an immediate and a register!"); 207 Val = Op1.second * Op2.second; 208 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 209 break; 210 case IC_DIVIDE: 211 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 212 "Divide operation with an immediate and a register!"); 213 assert (Op2.second != 0 && "Division by zero!"); 214 Val = Op1.second / Op2.second; 215 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 216 break; 217 case IC_OR: 218 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 219 "Or operation with an immediate and a register!"); 220 Val = Op1.second | Op2.second; 221 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 222 break; 223 case IC_XOR: 224 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 225 "Xor operation with an immediate and a register!"); 226 Val = Op1.second ^ Op2.second; 227 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 228 break; 229 case IC_AND: 230 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 231 "And operation with an immediate and a register!"); 232 Val = Op1.second & Op2.second; 233 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 234 break; 235 case IC_LSHIFT: 236 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 237 "Left shift operation with an immediate and a register!"); 238 Val = Op1.second << Op2.second; 239 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 240 break; 241 case IC_RSHIFT: 242 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 243 "Right shift operation with an immediate and a register!"); 244 Val = Op1.second >> Op2.second; 245 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 246 break; 247 } 248 } 249 } 250 assert (OperandStack.size() == 1 && "Expected a single result."); 251 return OperandStack.pop_back_val().second; 252 } 253 }; 254 255 enum IntelExprState { 256 IES_OR, 257 IES_XOR, 258 IES_AND, 259 IES_LSHIFT, 260 IES_RSHIFT, 261 IES_PLUS, 262 IES_MINUS, 263 IES_NOT, 264 IES_MULTIPLY, 265 IES_DIVIDE, 266 IES_LBRAC, 267 IES_RBRAC, 268 IES_LPAREN, 269 IES_RPAREN, 270 IES_REGISTER, 271 IES_INTEGER, 272 IES_IDENTIFIER, 273 IES_ERROR 274 }; 275 276 class IntelExprStateMachine { 277 IntelExprState State, PrevState; 278 unsigned BaseReg, IndexReg, TmpReg, Scale; 279 int64_t Imm; 280 const MCExpr *Sym; 281 StringRef SymName; 282 bool StopOnLBrac, AddImmPrefix; 283 InfixCalculator IC; 284 InlineAsmIdentifierInfo Info; 285 286 public: 287 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) : 288 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0), 289 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac), 290 AddImmPrefix(addimmprefix) { Info.clear(); } 291 292 unsigned getBaseReg() { return BaseReg; } 293 unsigned getIndexReg() { return IndexReg; } 294 unsigned getScale() { return Scale; } 295 const MCExpr *getSym() { return Sym; } 296 StringRef getSymName() { return SymName; } 297 int64_t getImm() { return Imm + IC.execute(); } 298 bool isValidEndState() { 299 return State == IES_RBRAC || State == IES_INTEGER; 300 } 301 bool getStopOnLBrac() { return StopOnLBrac; } 302 bool getAddImmPrefix() { return AddImmPrefix; } 303 bool hadError() { return State == IES_ERROR; } 304 305 InlineAsmIdentifierInfo &getIdentifierInfo() { 306 return Info; 307 } 308 309 void onOr() { 310 IntelExprState CurrState = State; 311 switch (State) { 312 default: 313 State = IES_ERROR; 314 break; 315 case IES_INTEGER: 316 case IES_RPAREN: 317 case IES_REGISTER: 318 State = IES_OR; 319 IC.pushOperator(IC_OR); 320 break; 321 } 322 PrevState = CurrState; 323 } 324 void onXor() { 325 IntelExprState CurrState = State; 326 switch (State) { 327 default: 328 State = IES_ERROR; 329 break; 330 case IES_INTEGER: 331 case IES_RPAREN: 332 case IES_REGISTER: 333 State = IES_XOR; 334 IC.pushOperator(IC_XOR); 335 break; 336 } 337 PrevState = CurrState; 338 } 339 void onAnd() { 340 IntelExprState CurrState = State; 341 switch (State) { 342 default: 343 State = IES_ERROR; 344 break; 345 case IES_INTEGER: 346 case IES_RPAREN: 347 case IES_REGISTER: 348 State = IES_AND; 349 IC.pushOperator(IC_AND); 350 break; 351 } 352 PrevState = CurrState; 353 } 354 void onLShift() { 355 IntelExprState CurrState = State; 356 switch (State) { 357 default: 358 State = IES_ERROR; 359 break; 360 case IES_INTEGER: 361 case IES_RPAREN: 362 case IES_REGISTER: 363 State = IES_LSHIFT; 364 IC.pushOperator(IC_LSHIFT); 365 break; 366 } 367 PrevState = CurrState; 368 } 369 void onRShift() { 370 IntelExprState CurrState = State; 371 switch (State) { 372 default: 373 State = IES_ERROR; 374 break; 375 case IES_INTEGER: 376 case IES_RPAREN: 377 case IES_REGISTER: 378 State = IES_RSHIFT; 379 IC.pushOperator(IC_RSHIFT); 380 break; 381 } 382 PrevState = CurrState; 383 } 384 void onPlus() { 385 IntelExprState CurrState = State; 386 switch (State) { 387 default: 388 State = IES_ERROR; 389 break; 390 case IES_INTEGER: 391 case IES_RPAREN: 392 case IES_REGISTER: 393 State = IES_PLUS; 394 IC.pushOperator(IC_PLUS); 395 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 396 // If we already have a BaseReg, then assume this is the IndexReg with 397 // a scale of 1. 398 if (!BaseReg) { 399 BaseReg = TmpReg; 400 } else { 401 assert (!IndexReg && "BaseReg/IndexReg already set!"); 402 IndexReg = TmpReg; 403 Scale = 1; 404 } 405 } 406 break; 407 } 408 PrevState = CurrState; 409 } 410 void onMinus() { 411 IntelExprState CurrState = State; 412 switch (State) { 413 default: 414 State = IES_ERROR; 415 break; 416 case IES_PLUS: 417 case IES_NOT: 418 case IES_MULTIPLY: 419 case IES_DIVIDE: 420 case IES_LPAREN: 421 case IES_RPAREN: 422 case IES_LBRAC: 423 case IES_RBRAC: 424 case IES_INTEGER: 425 case IES_REGISTER: 426 State = IES_MINUS; 427 // Only push the minus operator if it is not a unary operator. 428 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS || 429 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE || 430 CurrState == IES_LPAREN || CurrState == IES_LBRAC)) 431 IC.pushOperator(IC_MINUS); 432 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 433 // If we already have a BaseReg, then assume this is the IndexReg with 434 // a scale of 1. 435 if (!BaseReg) { 436 BaseReg = TmpReg; 437 } else { 438 assert (!IndexReg && "BaseReg/IndexReg already set!"); 439 IndexReg = TmpReg; 440 Scale = 1; 441 } 442 } 443 break; 444 } 445 PrevState = CurrState; 446 } 447 void onNot() { 448 IntelExprState CurrState = State; 449 switch (State) { 450 default: 451 State = IES_ERROR; 452 break; 453 case IES_PLUS: 454 case IES_NOT: 455 State = IES_NOT; 456 break; 457 } 458 PrevState = CurrState; 459 } 460 void onRegister(unsigned Reg) { 461 IntelExprState CurrState = State; 462 switch (State) { 463 default: 464 State = IES_ERROR; 465 break; 466 case IES_PLUS: 467 case IES_LPAREN: 468 State = IES_REGISTER; 469 TmpReg = Reg; 470 IC.pushOperand(IC_REGISTER); 471 break; 472 case IES_MULTIPLY: 473 // Index Register - Scale * Register 474 if (PrevState == IES_INTEGER) { 475 assert (!IndexReg && "IndexReg already set!"); 476 State = IES_REGISTER; 477 IndexReg = Reg; 478 // Get the scale and replace the 'Scale * Register' with '0'. 479 Scale = IC.popOperand(); 480 IC.pushOperand(IC_IMM); 481 IC.popOperator(); 482 } else { 483 State = IES_ERROR; 484 } 485 break; 486 } 487 PrevState = CurrState; 488 } 489 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) { 490 PrevState = State; 491 switch (State) { 492 default: 493 State = IES_ERROR; 494 break; 495 case IES_PLUS: 496 case IES_MINUS: 497 case IES_NOT: 498 State = IES_INTEGER; 499 Sym = SymRef; 500 SymName = SymRefName; 501 IC.pushOperand(IC_IMM); 502 break; 503 } 504 } 505 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) { 506 IntelExprState CurrState = State; 507 switch (State) { 508 default: 509 State = IES_ERROR; 510 break; 511 case IES_PLUS: 512 case IES_MINUS: 513 case IES_NOT: 514 case IES_OR: 515 case IES_XOR: 516 case IES_AND: 517 case IES_LSHIFT: 518 case IES_RSHIFT: 519 case IES_DIVIDE: 520 case IES_MULTIPLY: 521 case IES_LPAREN: 522 State = IES_INTEGER; 523 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 524 // Index Register - Register * Scale 525 assert (!IndexReg && "IndexReg already set!"); 526 IndexReg = TmpReg; 527 Scale = TmpInt; 528 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) { 529 ErrMsg = "scale factor in address must be 1, 2, 4 or 8"; 530 return true; 531 } 532 // Get the scale and replace the 'Register * Scale' with '0'. 533 IC.popOperator(); 534 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || 535 PrevState == IES_OR || PrevState == IES_AND || 536 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || 537 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || 538 PrevState == IES_LPAREN || PrevState == IES_LBRAC || 539 PrevState == IES_NOT || PrevState == IES_XOR) && 540 CurrState == IES_MINUS) { 541 // Unary minus. No need to pop the minus operand because it was never 542 // pushed. 543 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm. 544 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || 545 PrevState == IES_OR || PrevState == IES_AND || 546 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || 547 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || 548 PrevState == IES_LPAREN || PrevState == IES_LBRAC || 549 PrevState == IES_NOT || PrevState == IES_XOR) && 550 CurrState == IES_NOT) { 551 // Unary not. No need to pop the not operand because it was never 552 // pushed. 553 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm. 554 } else { 555 IC.pushOperand(IC_IMM, TmpInt); 556 } 557 break; 558 } 559 PrevState = CurrState; 560 return false; 561 } 562 void onStar() { 563 PrevState = State; 564 switch (State) { 565 default: 566 State = IES_ERROR; 567 break; 568 case IES_INTEGER: 569 case IES_REGISTER: 570 case IES_RPAREN: 571 State = IES_MULTIPLY; 572 IC.pushOperator(IC_MULTIPLY); 573 break; 574 } 575 } 576 void onDivide() { 577 PrevState = State; 578 switch (State) { 579 default: 580 State = IES_ERROR; 581 break; 582 case IES_INTEGER: 583 case IES_RPAREN: 584 State = IES_DIVIDE; 585 IC.pushOperator(IC_DIVIDE); 586 break; 587 } 588 } 589 void onLBrac() { 590 PrevState = State; 591 switch (State) { 592 default: 593 State = IES_ERROR; 594 break; 595 case IES_RBRAC: 596 State = IES_PLUS; 597 IC.pushOperator(IC_PLUS); 598 break; 599 } 600 } 601 void onRBrac() { 602 IntelExprState CurrState = State; 603 switch (State) { 604 default: 605 State = IES_ERROR; 606 break; 607 case IES_INTEGER: 608 case IES_REGISTER: 609 case IES_RPAREN: 610 State = IES_RBRAC; 611 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 612 // If we already have a BaseReg, then assume this is the IndexReg with 613 // a scale of 1. 614 if (!BaseReg) { 615 BaseReg = TmpReg; 616 } else { 617 assert (!IndexReg && "BaseReg/IndexReg already set!"); 618 IndexReg = TmpReg; 619 Scale = 1; 620 } 621 } 622 break; 623 } 624 PrevState = CurrState; 625 } 626 void onLParen() { 627 IntelExprState CurrState = State; 628 switch (State) { 629 default: 630 State = IES_ERROR; 631 break; 632 case IES_PLUS: 633 case IES_MINUS: 634 case IES_NOT: 635 case IES_OR: 636 case IES_XOR: 637 case IES_AND: 638 case IES_LSHIFT: 639 case IES_RSHIFT: 640 case IES_MULTIPLY: 641 case IES_DIVIDE: 642 case IES_LPAREN: 643 // FIXME: We don't handle this type of unary minus or not, yet. 644 if ((PrevState == IES_PLUS || PrevState == IES_MINUS || 645 PrevState == IES_OR || PrevState == IES_AND || 646 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || 647 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || 648 PrevState == IES_LPAREN || PrevState == IES_LBRAC || 649 PrevState == IES_NOT || PrevState == IES_XOR) && 650 (CurrState == IES_MINUS || CurrState == IES_NOT)) { 651 State = IES_ERROR; 652 break; 653 } 654 State = IES_LPAREN; 655 IC.pushOperator(IC_LPAREN); 656 break; 657 } 658 PrevState = CurrState; 659 } 660 void onRParen() { 661 PrevState = State; 662 switch (State) { 663 default: 664 State = IES_ERROR; 665 break; 666 case IES_INTEGER: 667 case IES_REGISTER: 668 case IES_RPAREN: 669 State = IES_RPAREN; 670 IC.pushOperator(IC_RPAREN); 671 break; 672 } 673 } 674 }; 675 676 bool Error(SMLoc L, const Twine &Msg, SMRange Range = None, 677 bool MatchingInlineAsm = false) { 678 MCAsmParser &Parser = getParser(); 679 if (MatchingInlineAsm) { 680 if (!getLexer().isAtStartOfStatement()) 681 Parser.eatToEndOfStatement(); 682 return false; 683 } 684 return Parser.Error(L, Msg, Range); 685 } 686 687 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) { 688 Error(Loc, Msg); 689 return nullptr; 690 } 691 692 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc); 693 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc); 694 bool IsSIReg(unsigned Reg); 695 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg); 696 void 697 AddDefaultSrcDestOperands(OperandVector &Operands, 698 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 699 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst); 700 bool VerifyAndAdjustOperands(OperandVector &OrigOperands, 701 OperandVector &FinalOperands); 702 std::unique_ptr<X86Operand> ParseOperand(); 703 std::unique_ptr<X86Operand> ParseATTOperand(); 704 std::unique_ptr<X86Operand> ParseIntelOperand(); 705 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator(); 706 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp); 707 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind); 708 std::unique_ptr<X86Operand> 709 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size); 710 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End); 711 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); 712 std::unique_ptr<X86Operand> 713 ParseIntelBracExpression(unsigned SegReg, SMLoc Start, int64_t ImmDisp, 714 bool isSymbol, unsigned Size); 715 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier, 716 InlineAsmIdentifierInfo &Info, 717 bool IsUnevaluatedOperand, SMLoc &End); 718 719 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc); 720 721 std::unique_ptr<X86Operand> 722 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, 723 unsigned IndexReg, unsigned Scale, SMLoc Start, 724 SMLoc End, unsigned Size, StringRef Identifier, 725 InlineAsmIdentifierInfo &Info, 726 bool AllowBetterSizeMatch = false); 727 728 bool parseDirectiveEven(SMLoc L); 729 bool ParseDirectiveWord(unsigned Size, SMLoc L); 730 bool ParseDirectiveCode(StringRef IDVal, SMLoc L); 731 732 bool processInstruction(MCInst &Inst, const OperandVector &Ops); 733 734 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds 735 /// instrumentation around Inst. 736 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out); 737 738 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 739 OperandVector &Operands, MCStreamer &Out, 740 uint64_t &ErrorInfo, 741 bool MatchingInlineAsm) override; 742 743 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands, 744 MCStreamer &Out, bool MatchingInlineAsm); 745 746 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo, 747 bool MatchingInlineAsm); 748 749 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, 750 OperandVector &Operands, MCStreamer &Out, 751 uint64_t &ErrorInfo, 752 bool MatchingInlineAsm); 753 754 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, 755 OperandVector &Operands, MCStreamer &Out, 756 uint64_t &ErrorInfo, 757 bool MatchingInlineAsm); 758 759 bool OmitRegisterFromClobberLists(unsigned RegNo) override; 760 761 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z}) 762 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required. 763 /// return false if no parsing errors occurred, true otherwise. 764 bool HandleAVX512Operand(OperandVector &Operands, 765 const MCParsedAsmOperand &Op); 766 767 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc); 768 769 /// MS-compatibility: 770 /// Obtain an appropriate size qualifier, when facing its absence, 771 /// upon AVX512 vector/broadcast memory operand 772 unsigned AdjustAVX512Mem(unsigned Size, X86Operand* UnsizedMemOpNext); 773 774 bool is64BitMode() const { 775 // FIXME: Can tablegen auto-generate this? 776 return getSTI().getFeatureBits()[X86::Mode64Bit]; 777 } 778 bool is32BitMode() const { 779 // FIXME: Can tablegen auto-generate this? 780 return getSTI().getFeatureBits()[X86::Mode32Bit]; 781 } 782 bool is16BitMode() const { 783 // FIXME: Can tablegen auto-generate this? 784 return getSTI().getFeatureBits()[X86::Mode16Bit]; 785 } 786 void SwitchMode(unsigned mode) { 787 MCSubtargetInfo &STI = copySTI(); 788 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit}); 789 FeatureBitset OldMode = STI.getFeatureBits() & AllModes; 790 unsigned FB = ComputeAvailableFeatures( 791 STI.ToggleFeature(OldMode.flip(mode))); 792 setAvailableFeatures(FB); 793 794 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes)); 795 } 796 797 unsigned getPointerWidth() { 798 if (is16BitMode()) return 16; 799 if (is32BitMode()) return 32; 800 if (is64BitMode()) return 64; 801 llvm_unreachable("invalid mode"); 802 } 803 804 bool isParsingIntelSyntax() { 805 return getParser().getAssemblerDialect(); 806 } 807 808 /// @name Auto-generated Matcher Functions 809 /// { 810 811 #define GET_ASSEMBLER_HEADER 812 #include "X86GenAsmMatcher.inc" 813 814 /// } 815 816 public: 817 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser, 818 const MCInstrInfo &mii, const MCTargetOptions &Options) 819 : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr), 820 Code16GCC(false) { 821 822 // Initialize the set of available features. 823 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); 824 Instrumentation.reset( 825 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI)); 826 } 827 828 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 829 830 void SetFrameRegister(unsigned RegNo) override; 831 832 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 833 SMLoc NameLoc, OperandVector &Operands) override; 834 835 bool ParseDirective(AsmToken DirectiveID) override; 836 }; 837 } // end anonymous namespace 838 839 /// @name Auto-generated Match Functions 840 /// { 841 842 static unsigned MatchRegisterName(StringRef Name); 843 844 /// } 845 846 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg, 847 StringRef &ErrMsg) { 848 // If we have both a base register and an index register make sure they are 849 // both 64-bit or 32-bit registers. 850 // To support VSIB, IndexReg can be 128-bit or 256-bit registers. 851 852 if ((BaseReg == X86::RIP && IndexReg != 0) || (IndexReg == X86::RIP)) { 853 ErrMsg = "invalid base+index expression"; 854 return true; 855 } 856 if (BaseReg != 0 && IndexReg != 0) { 857 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && 858 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 859 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) && 860 IndexReg != X86::RIZ) { 861 ErrMsg = "base register is 64-bit, but index register is not"; 862 return true; 863 } 864 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && 865 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 866 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) && 867 IndexReg != X86::EIZ){ 868 ErrMsg = "base register is 32-bit, but index register is not"; 869 return true; 870 } 871 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) { 872 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 873 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) { 874 ErrMsg = "base register is 16-bit, but index register is not"; 875 return true; 876 } 877 if (((BaseReg == X86::BX || BaseReg == X86::BP) && 878 IndexReg != X86::SI && IndexReg != X86::DI) || 879 ((BaseReg == X86::SI || BaseReg == X86::DI) && 880 IndexReg != X86::BX && IndexReg != X86::BP)) { 881 ErrMsg = "invalid 16-bit base/index register combination"; 882 return true; 883 } 884 } 885 } 886 return false; 887 } 888 889 bool X86AsmParser::ParseRegister(unsigned &RegNo, 890 SMLoc &StartLoc, SMLoc &EndLoc) { 891 MCAsmParser &Parser = getParser(); 892 RegNo = 0; 893 const AsmToken &PercentTok = Parser.getTok(); 894 StartLoc = PercentTok.getLoc(); 895 896 // If we encounter a %, ignore it. This code handles registers with and 897 // without the prefix, unprefixed registers can occur in cfi directives. 898 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) 899 Parser.Lex(); // Eat percent token. 900 901 const AsmToken &Tok = Parser.getTok(); 902 EndLoc = Tok.getEndLoc(); 903 904 if (Tok.isNot(AsmToken::Identifier)) { 905 if (isParsingIntelSyntax()) return true; 906 return Error(StartLoc, "invalid register name", 907 SMRange(StartLoc, EndLoc)); 908 } 909 910 RegNo = MatchRegisterName(Tok.getString()); 911 912 // If the match failed, try the register name as lowercase. 913 if (RegNo == 0) 914 RegNo = MatchRegisterName(Tok.getString().lower()); 915 916 // The "flags" register cannot be referenced directly. 917 // Treat it as an identifier instead. 918 if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS) 919 RegNo = 0; 920 921 if (!is64BitMode()) { 922 // FIXME: This should be done using Requires<Not64BitMode> and 923 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also 924 // checked. 925 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a 926 // REX prefix. 927 if (RegNo == X86::RIZ || 928 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || 929 X86II::isX86_64NonExtLowByteReg(RegNo) || 930 X86II::isX86_64ExtendedReg(RegNo)) 931 return Error(StartLoc, "register %" 932 + Tok.getString() + " is only available in 64-bit mode", 933 SMRange(StartLoc, EndLoc)); 934 } else if (!getSTI().getFeatureBits()[X86::FeatureAVX512]) { 935 if (X86II::is32ExtendedReg(RegNo)) 936 return Error(StartLoc, "register %" 937 + Tok.getString() + " is only available with AVX512", 938 SMRange(StartLoc, EndLoc)); 939 } 940 941 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 942 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { 943 RegNo = X86::ST0; 944 Parser.Lex(); // Eat 'st' 945 946 // Check to see if we have '(4)' after %st. 947 if (getLexer().isNot(AsmToken::LParen)) 948 return false; 949 // Lex the paren. 950 getParser().Lex(); 951 952 const AsmToken &IntTok = Parser.getTok(); 953 if (IntTok.isNot(AsmToken::Integer)) 954 return Error(IntTok.getLoc(), "expected stack index"); 955 switch (IntTok.getIntVal()) { 956 case 0: RegNo = X86::ST0; break; 957 case 1: RegNo = X86::ST1; break; 958 case 2: RegNo = X86::ST2; break; 959 case 3: RegNo = X86::ST3; break; 960 case 4: RegNo = X86::ST4; break; 961 case 5: RegNo = X86::ST5; break; 962 case 6: RegNo = X86::ST6; break; 963 case 7: RegNo = X86::ST7; break; 964 default: return Error(IntTok.getLoc(), "invalid stack index"); 965 } 966 967 if (getParser().Lex().isNot(AsmToken::RParen)) 968 return Error(Parser.getTok().getLoc(), "expected ')'"); 969 970 EndLoc = Parser.getTok().getEndLoc(); 971 Parser.Lex(); // Eat ')' 972 return false; 973 } 974 975 EndLoc = Parser.getTok().getEndLoc(); 976 977 // If this is "db[0-7]", match it as an alias 978 // for dr[0-7]. 979 if (RegNo == 0 && Tok.getString().size() == 3 && 980 Tok.getString().startswith("db")) { 981 switch (Tok.getString()[2]) { 982 case '0': RegNo = X86::DR0; break; 983 case '1': RegNo = X86::DR1; break; 984 case '2': RegNo = X86::DR2; break; 985 case '3': RegNo = X86::DR3; break; 986 case '4': RegNo = X86::DR4; break; 987 case '5': RegNo = X86::DR5; break; 988 case '6': RegNo = X86::DR6; break; 989 case '7': RegNo = X86::DR7; break; 990 } 991 992 if (RegNo != 0) { 993 EndLoc = Parser.getTok().getEndLoc(); 994 Parser.Lex(); // Eat it. 995 return false; 996 } 997 } 998 999 if (RegNo == 0) { 1000 if (isParsingIntelSyntax()) return true; 1001 return Error(StartLoc, "invalid register name", 1002 SMRange(StartLoc, EndLoc)); 1003 } 1004 1005 Parser.Lex(); // Eat identifier token. 1006 return false; 1007 } 1008 1009 void X86AsmParser::SetFrameRegister(unsigned RegNo) { 1010 Instrumentation->SetInitialFrameRegister(RegNo); 1011 } 1012 1013 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { 1014 bool Parse32 = is32BitMode() || Code16GCC; 1015 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI); 1016 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1017 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1018 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, 1019 Loc, Loc, 0); 1020 } 1021 1022 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { 1023 bool Parse32 = is32BitMode() || Code16GCC; 1024 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI); 1025 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1026 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1027 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, 1028 Loc, Loc, 0); 1029 } 1030 1031 bool X86AsmParser::IsSIReg(unsigned Reg) { 1032 switch (Reg) { 1033 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!"); 1034 case X86::RSI: 1035 case X86::ESI: 1036 case X86::SI: 1037 return true; 1038 case X86::RDI: 1039 case X86::EDI: 1040 case X86::DI: 1041 return false; 1042 } 1043 } 1044 1045 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, 1046 bool IsSIReg) { 1047 switch (RegClassID) { 1048 default: llvm_unreachable("Unexpected register class"); 1049 case X86::GR64RegClassID: 1050 return IsSIReg ? X86::RSI : X86::RDI; 1051 case X86::GR32RegClassID: 1052 return IsSIReg ? X86::ESI : X86::EDI; 1053 case X86::GR16RegClassID: 1054 return IsSIReg ? X86::SI : X86::DI; 1055 } 1056 } 1057 1058 void X86AsmParser::AddDefaultSrcDestOperands( 1059 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 1060 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) { 1061 if (isParsingIntelSyntax()) { 1062 Operands.push_back(std::move(Dst)); 1063 Operands.push_back(std::move(Src)); 1064 } 1065 else { 1066 Operands.push_back(std::move(Src)); 1067 Operands.push_back(std::move(Dst)); 1068 } 1069 } 1070 1071 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands, 1072 OperandVector &FinalOperands) { 1073 1074 if (OrigOperands.size() > 1) { 1075 // Check if sizes match, OrigOperands also contains the instruction name 1076 assert(OrigOperands.size() == FinalOperands.size() + 1 && 1077 "Operand size mismatch"); 1078 1079 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings; 1080 // Verify types match 1081 int RegClassID = -1; 1082 for (unsigned int i = 0; i < FinalOperands.size(); ++i) { 1083 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]); 1084 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]); 1085 1086 if (FinalOp.isReg() && 1087 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg())) 1088 // Return false and let a normal complaint about bogus operands happen 1089 return false; 1090 1091 if (FinalOp.isMem()) { 1092 1093 if (!OrigOp.isMem()) 1094 // Return false and let a normal complaint about bogus operands happen 1095 return false; 1096 1097 unsigned OrigReg = OrigOp.Mem.BaseReg; 1098 unsigned FinalReg = FinalOp.Mem.BaseReg; 1099 1100 // If we've already encounterd a register class, make sure all register 1101 // bases are of the same register class 1102 if (RegClassID != -1 && 1103 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) { 1104 return Error(OrigOp.getStartLoc(), 1105 "mismatching source and destination index registers"); 1106 } 1107 1108 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg)) 1109 RegClassID = X86::GR64RegClassID; 1110 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg)) 1111 RegClassID = X86::GR32RegClassID; 1112 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg)) 1113 RegClassID = X86::GR16RegClassID; 1114 else 1115 // Unexpected register class type 1116 // Return false and let a normal complaint about bogus operands happen 1117 return false; 1118 1119 bool IsSI = IsSIReg(FinalReg); 1120 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI); 1121 1122 if (FinalReg != OrigReg) { 1123 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI"; 1124 Warnings.push_back(std::make_pair( 1125 OrigOp.getStartLoc(), 1126 "memory operand is only for determining the size, " + RegName + 1127 " will be used for the location")); 1128 } 1129 1130 FinalOp.Mem.Size = OrigOp.Mem.Size; 1131 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg; 1132 FinalOp.Mem.BaseReg = FinalReg; 1133 } 1134 } 1135 1136 // Produce warnings only if all the operands passed the adjustment - prevent 1137 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings 1138 for (auto &WarningMsg : Warnings) { 1139 Warning(WarningMsg.first, WarningMsg.second); 1140 } 1141 1142 // Remove old operands 1143 for (unsigned int i = 0; i < FinalOperands.size(); ++i) 1144 OrigOperands.pop_back(); 1145 } 1146 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end()); 1147 for (unsigned int i = 0; i < FinalOperands.size(); ++i) 1148 OrigOperands.push_back(std::move(FinalOperands[i])); 1149 1150 return false; 1151 } 1152 1153 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() { 1154 if (isParsingIntelSyntax()) 1155 return ParseIntelOperand(); 1156 return ParseATTOperand(); 1157 } 1158 1159 /// getIntelMemOperandSize - Return intel memory operand size. 1160 static unsigned getIntelMemOperandSize(StringRef OpStr) { 1161 unsigned Size = StringSwitch<unsigned>(OpStr) 1162 .Cases("BYTE", "byte", 8) 1163 .Cases("WORD", "word", 16) 1164 .Cases("DWORD", "dword", 32) 1165 .Cases("FWORD", "fword", 48) 1166 .Cases("QWORD", "qword", 64) 1167 .Cases("MMWORD","mmword", 64) 1168 .Cases("XWORD", "xword", 80) 1169 .Cases("TBYTE", "tbyte", 80) 1170 .Cases("XMMWORD", "xmmword", 128) 1171 .Cases("YMMWORD", "ymmword", 256) 1172 .Cases("ZMMWORD", "zmmword", 512) 1173 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter 1174 .Default(0); 1175 return Size; 1176 } 1177 1178 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm( 1179 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, 1180 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier, 1181 InlineAsmIdentifierInfo &Info, bool AllowBetterSizeMatch) { 1182 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or 1183 // some other label reference. 1184 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) { 1185 // Insert an explicit size if the user didn't have one. 1186 if (!Size) { 1187 Size = getPointerWidth(); 1188 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start, 1189 /*Len=*/0, Size); 1190 } 1191 1192 // Create an absolute memory reference in order to match against 1193 // instructions taking a PC relative operand. 1194 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size, 1195 Identifier, Info.OpDecl); 1196 } 1197 1198 // We either have a direct symbol reference, or an offset from a symbol. The 1199 // parser always puts the symbol on the LHS, so look there for size 1200 // calculation purposes. 1201 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp); 1202 bool IsSymRef = 1203 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp); 1204 if (IsSymRef) { 1205 if (!Size) { 1206 Size = Info.Type * 8; // Size is in terms of bits in this context. 1207 if (Size) 1208 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start, 1209 /*Len=*/0, Size); 1210 if (AllowBetterSizeMatch) 1211 // Handle cases where size qualifier is absent, upon an indirect symbol 1212 // reference - e.g. "vaddps zmm1, zmm2, [var]" 1213 // set Size to zero to allow matching mechansim to try and find a better 1214 // size qualifier than our initial guess, based on available variants of 1215 // the given instruction 1216 Size = 0; 1217 } 1218 } 1219 1220 // When parsing inline assembly we set the base register to a non-zero value 1221 // if we don't know the actual value at this time. This is necessary to 1222 // get the matching correct in some cases. 1223 BaseReg = BaseReg ? BaseReg : 1; 1224 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, 1225 IndexReg, Scale, Start, End, Size, Identifier, 1226 Info.OpDecl); 1227 } 1228 1229 static void 1230 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites, 1231 StringRef SymName, int64_t ImmDisp, 1232 int64_t FinalImmDisp, SMLoc &BracLoc, 1233 SMLoc &StartInBrac, SMLoc &End) { 1234 // Remove the '[' and ']' from the IR string. 1235 AsmRewrites.emplace_back(AOK_Skip, BracLoc, 1); 1236 AsmRewrites.emplace_back(AOK_Skip, End, 1); 1237 1238 // If ImmDisp is non-zero, then we parsed a displacement before the 1239 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp]) 1240 // If ImmDisp doesn't match the displacement computed by the state machine 1241 // then we have an additional displacement in the bracketed expression. 1242 if (ImmDisp != FinalImmDisp) { 1243 if (ImmDisp) { 1244 // We have an immediate displacement before the bracketed expression. 1245 // Adjust this to match the final immediate displacement. 1246 bool Found = false; 1247 for (AsmRewrite &AR : AsmRewrites) { 1248 if (AR.Loc.getPointer() > BracLoc.getPointer()) 1249 continue; 1250 if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) { 1251 assert (!Found && "ImmDisp already rewritten."); 1252 AR.Kind = AOK_Imm; 1253 AR.Len = BracLoc.getPointer() - AR.Loc.getPointer(); 1254 AR.Val = FinalImmDisp; 1255 Found = true; 1256 break; 1257 } 1258 } 1259 assert (Found && "Unable to rewrite ImmDisp."); 1260 (void)Found; 1261 } else { 1262 // We have a symbolic and an immediate displacement, but no displacement 1263 // before the bracketed expression. Put the immediate displacement 1264 // before the bracketed expression. 1265 AsmRewrites.emplace_back(AOK_Imm, BracLoc, 0, FinalImmDisp); 1266 } 1267 } 1268 // Remove all the ImmPrefix rewrites within the brackets. 1269 for (AsmRewrite &AR : AsmRewrites) { 1270 if (AR.Loc.getPointer() < StartInBrac.getPointer()) 1271 continue; 1272 if (AR.Kind == AOK_ImmPrefix) 1273 AR.Kind = AOK_Delete; 1274 } 1275 const char *SymLocPtr = SymName.data(); 1276 // Skip everything before the symbol. 1277 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) { 1278 assert(Len > 0 && "Expected a non-negative length."); 1279 AsmRewrites.emplace_back(AOK_Skip, StartInBrac, Len); 1280 } 1281 // Skip everything after the symbol. 1282 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) { 1283 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size()); 1284 assert(Len > 0 && "Expected a non-negative length."); 1285 AsmRewrites.emplace_back(AOK_Skip, Loc, Len); 1286 } 1287 } 1288 1289 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { 1290 MCAsmParser &Parser = getParser(); 1291 const AsmToken &Tok = Parser.getTok(); 1292 1293 AsmToken::TokenKind PrevTK = AsmToken::Error; 1294 bool Done = false; 1295 while (!Done) { 1296 bool UpdateLocLex = true; 1297 1298 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an 1299 // identifier. Don't try an parse it as a register. 1300 if (PrevTK != AsmToken::Error && Tok.getString().startswith(".")) 1301 break; 1302 1303 // If we're parsing an immediate expression, we don't expect a '['. 1304 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac) 1305 break; 1306 1307 AsmToken::TokenKind TK = getLexer().getKind(); 1308 switch (TK) { 1309 default: { 1310 if (SM.isValidEndState()) { 1311 Done = true; 1312 break; 1313 } 1314 return Error(Tok.getLoc(), "unknown token in expression"); 1315 } 1316 case AsmToken::EndOfStatement: { 1317 Done = true; 1318 break; 1319 } 1320 case AsmToken::String: 1321 case AsmToken::Identifier: { 1322 // This could be a register or a symbolic displacement. 1323 unsigned TmpReg; 1324 const MCExpr *Val; 1325 SMLoc IdentLoc = Tok.getLoc(); 1326 StringRef Identifier = Tok.getString(); 1327 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) { 1328 SM.onRegister(TmpReg); 1329 UpdateLocLex = false; 1330 break; 1331 } else { 1332 if (!isParsingInlineAsm()) { 1333 if (getParser().parsePrimaryExpr(Val, End)) 1334 return Error(Tok.getLoc(), "Unexpected identifier!"); 1335 } else { 1336 // This is a dot operator, not an adjacent identifier. 1337 if (Identifier.find('.') != StringRef::npos && 1338 PrevTK == AsmToken::RBrac) { 1339 return false; 1340 } else { 1341 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); 1342 if (ParseIntelIdentifier(Val, Identifier, Info, 1343 /*Unevaluated=*/false, End)) 1344 return true; 1345 } 1346 } 1347 SM.onIdentifierExpr(Val, Identifier); 1348 UpdateLocLex = false; 1349 break; 1350 } 1351 return Error(Tok.getLoc(), "Unexpected identifier!"); 1352 } 1353 case AsmToken::Integer: { 1354 StringRef ErrMsg; 1355 if (isParsingInlineAsm() && SM.getAddImmPrefix()) 1356 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Tok.getLoc()); 1357 // Look for 'b' or 'f' following an Integer as a directional label 1358 SMLoc Loc = getTok().getLoc(); 1359 int64_t IntVal = getTok().getIntVal(); 1360 End = consumeToken(); 1361 UpdateLocLex = false; 1362 if (getLexer().getKind() == AsmToken::Identifier) { 1363 StringRef IDVal = getTok().getString(); 1364 if (IDVal == "f" || IDVal == "b") { 1365 MCSymbol *Sym = 1366 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b"); 1367 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1368 const MCExpr *Val = 1369 MCSymbolRefExpr::create(Sym, Variant, getContext()); 1370 if (IDVal == "b" && Sym->isUndefined()) 1371 return Error(Loc, "invalid reference to undefined symbol"); 1372 StringRef Identifier = Sym->getName(); 1373 SM.onIdentifierExpr(Val, Identifier); 1374 End = consumeToken(); 1375 } else { 1376 if (SM.onInteger(IntVal, ErrMsg)) 1377 return Error(Loc, ErrMsg); 1378 } 1379 } else { 1380 if (SM.onInteger(IntVal, ErrMsg)) 1381 return Error(Loc, ErrMsg); 1382 } 1383 break; 1384 } 1385 case AsmToken::Plus: SM.onPlus(); break; 1386 case AsmToken::Minus: SM.onMinus(); break; 1387 case AsmToken::Tilde: SM.onNot(); break; 1388 case AsmToken::Star: SM.onStar(); break; 1389 case AsmToken::Slash: SM.onDivide(); break; 1390 case AsmToken::Pipe: SM.onOr(); break; 1391 case AsmToken::Caret: SM.onXor(); break; 1392 case AsmToken::Amp: SM.onAnd(); break; 1393 case AsmToken::LessLess: 1394 SM.onLShift(); break; 1395 case AsmToken::GreaterGreater: 1396 SM.onRShift(); break; 1397 case AsmToken::LBrac: SM.onLBrac(); break; 1398 case AsmToken::RBrac: SM.onRBrac(); break; 1399 case AsmToken::LParen: SM.onLParen(); break; 1400 case AsmToken::RParen: SM.onRParen(); break; 1401 } 1402 if (SM.hadError()) 1403 return Error(Tok.getLoc(), "unknown token in expression"); 1404 1405 if (!Done && UpdateLocLex) 1406 End = consumeToken(); 1407 1408 PrevTK = TK; 1409 } 1410 return false; 1411 } 1412 1413 std::unique_ptr<X86Operand> 1414 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, 1415 int64_t ImmDisp, bool isSymbol, 1416 unsigned Size) { 1417 MCAsmParser &Parser = getParser(); 1418 const AsmToken &Tok = Parser.getTok(); 1419 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc(); 1420 if (getLexer().isNot(AsmToken::LBrac)) 1421 return ErrorOperand(BracLoc, "Expected '[' token!"); 1422 Parser.Lex(); // Eat '[' 1423 1424 SMLoc StartInBrac = Parser.getTok().getLoc(); 1425 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We 1426 // may have already parsed an immediate displacement before the bracketed 1427 // expression. 1428 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true); 1429 if (ParseIntelExpression(SM, End)) 1430 return nullptr; 1431 1432 const MCExpr *Disp = nullptr; 1433 if (const MCExpr *Sym = SM.getSym()) { 1434 // A symbolic displacement. 1435 Disp = Sym; 1436 if (isParsingInlineAsm()) 1437 RewriteIntelBracExpression(*InstInfo->AsmRewrites, SM.getSymName(), 1438 ImmDisp, SM.getImm(), BracLoc, StartInBrac, 1439 End); 1440 } 1441 1442 if (SM.getImm() || !Disp) { 1443 const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext()); 1444 if (Disp) 1445 Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext()); 1446 else 1447 Disp = Imm; // An immediate displacement only. 1448 } 1449 1450 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC 1451 // will in fact do global lookup the field name inside all global typedefs, 1452 // but we don't emulate that. 1453 if ((Parser.getTok().getKind() == AsmToken::Identifier || 1454 Parser.getTok().getKind() == AsmToken::Dot || 1455 Parser.getTok().getKind() == AsmToken::Real) && 1456 Parser.getTok().getString().find('.') != StringRef::npos) { 1457 const MCExpr *NewDisp; 1458 if (ParseIntelDotOperator(Disp, NewDisp)) 1459 return nullptr; 1460 1461 End = Tok.getEndLoc(); 1462 Parser.Lex(); // Eat the field. 1463 Disp = NewDisp; 1464 } 1465 1466 if (isSymbol) { 1467 if (SM.getSym()) { 1468 Error(Start, "cannot use more than one symbol in memory operand"); 1469 return nullptr; 1470 } 1471 if (SM.getBaseReg()) { 1472 Error(Start, "cannot use base register with variable reference"); 1473 return nullptr; 1474 } 1475 if (SM.getIndexReg()) { 1476 Error(Start, "cannot use index register with variable reference"); 1477 return nullptr; 1478 } 1479 } 1480 1481 int BaseReg = SM.getBaseReg(); 1482 int IndexReg = SM.getIndexReg(); 1483 int Scale = SM.getScale(); 1484 if (!isParsingInlineAsm()) { 1485 // handle [-42] 1486 if (!BaseReg && !IndexReg) { 1487 if (!SegReg) 1488 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size); 1489 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1, 1490 Start, End, Size); 1491 } 1492 StringRef ErrMsg; 1493 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) { 1494 Error(StartInBrac, ErrMsg); 1495 return nullptr; 1496 } 1497 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, 1498 IndexReg, Scale, Start, End, Size); 1499 } 1500 1501 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); 1502 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start, 1503 End, Size, SM.getSymName(), Info, 1504 isParsingInlineAsm()); 1505 } 1506 1507 // Inline assembly may use variable names with namespace alias qualifiers. 1508 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, 1509 StringRef &Identifier, 1510 InlineAsmIdentifierInfo &Info, 1511 bool IsUnevaluatedOperand, SMLoc &End) { 1512 MCAsmParser &Parser = getParser(); 1513 assert(isParsingInlineAsm() && "Expected to be parsing inline assembly."); 1514 Val = nullptr; 1515 1516 StringRef LineBuf(Identifier.data()); 1517 void *Result = 1518 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); 1519 1520 const AsmToken &Tok = Parser.getTok(); 1521 SMLoc Loc = Tok.getLoc(); 1522 1523 // Advance the token stream until the end of the current token is 1524 // after the end of what the frontend claimed. 1525 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); 1526 do { 1527 End = Tok.getEndLoc(); 1528 getLexer().Lex(); 1529 } while (End.getPointer() < EndPtr); 1530 Identifier = LineBuf; 1531 1532 // The frontend should end parsing on an assembler token boundary, unless it 1533 // failed parsing. 1534 assert((End.getPointer() == EndPtr || !Result) && 1535 "frontend claimed part of a token?"); 1536 1537 // If the identifier lookup was unsuccessful, assume that we are dealing with 1538 // a label. 1539 if (!Result) { 1540 StringRef InternalName = 1541 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(), 1542 Loc, false); 1543 assert(InternalName.size() && "We should have an internal name here."); 1544 // Push a rewrite for replacing the identifier name with the internal name. 1545 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(), 1546 InternalName); 1547 } 1548 1549 // Create the symbol reference. 1550 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); 1551 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1552 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext()); 1553 return false; 1554 } 1555 1556 /// \brief Parse intel style segment override. 1557 std::unique_ptr<X86Operand> 1558 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, 1559 unsigned Size) { 1560 MCAsmParser &Parser = getParser(); 1561 assert(SegReg != 0 && "Tried to parse a segment override without a segment!"); 1562 const AsmToken &Tok = Parser.getTok(); // Eat colon. 1563 if (Tok.isNot(AsmToken::Colon)) 1564 return ErrorOperand(Tok.getLoc(), "Expected ':' token!"); 1565 Parser.Lex(); // Eat ':' 1566 1567 int64_t ImmDisp = 0; 1568 if (getLexer().is(AsmToken::Integer)) { 1569 ImmDisp = Tok.getIntVal(); 1570 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer. 1571 1572 if (isParsingInlineAsm()) 1573 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, ImmDispToken.getLoc()); 1574 1575 if (getLexer().isNot(AsmToken::LBrac)) { 1576 // An immediate following a 'segment register', 'colon' token sequence can 1577 // be followed by a bracketed expression. If it isn't we know we have our 1578 // final segment override. 1579 const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext()); 1580 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 1581 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1, 1582 Start, ImmDispToken.getEndLoc(), Size); 1583 } 1584 } 1585 1586 if (getLexer().is(AsmToken::LBrac)) 1587 return ParseIntelBracExpression(SegReg, Start, ImmDisp, false, Size); 1588 1589 const MCExpr *Val; 1590 SMLoc End; 1591 if (!isParsingInlineAsm()) { 1592 if (getParser().parsePrimaryExpr(Val, End)) 1593 return ErrorOperand(Tok.getLoc(), "unknown token in expression"); 1594 1595 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size); 1596 } 1597 1598 InlineAsmIdentifierInfo Info; 1599 StringRef Identifier = Tok.getString(); 1600 if (ParseIntelIdentifier(Val, Identifier, Info, 1601 /*Unevaluated=*/false, End)) 1602 return nullptr; 1603 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0, 1604 /*Scale=*/1, Start, End, Size, Identifier, Info); 1605 } 1606 1607 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand 1608 std::unique_ptr<X86Operand> 1609 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) { 1610 MCAsmParser &Parser = getParser(); 1611 const AsmToken &Tok = Parser.getTok(); 1612 // Eat "{" and mark the current place. 1613 const SMLoc consumedToken = consumeToken(); 1614 if (Tok.getIdentifier().startswith("r")){ 1615 int rndMode = StringSwitch<int>(Tok.getIdentifier()) 1616 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT) 1617 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF) 1618 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF) 1619 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO) 1620 .Default(-1); 1621 if (-1 == rndMode) 1622 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode."); 1623 Parser.Lex(); // Eat "r*" of r*-sae 1624 if (!getLexer().is(AsmToken::Minus)) 1625 return ErrorOperand(Tok.getLoc(), "Expected - at this point"); 1626 Parser.Lex(); // Eat "-" 1627 Parser.Lex(); // Eat the sae 1628 if (!getLexer().is(AsmToken::RCurly)) 1629 return ErrorOperand(Tok.getLoc(), "Expected } at this point"); 1630 Parser.Lex(); // Eat "}" 1631 const MCExpr *RndModeOp = 1632 MCConstantExpr::create(rndMode, Parser.getContext()); 1633 return X86Operand::CreateImm(RndModeOp, Start, End); 1634 } 1635 if(Tok.getIdentifier().equals("sae")){ 1636 Parser.Lex(); // Eat the sae 1637 if (!getLexer().is(AsmToken::RCurly)) 1638 return ErrorOperand(Tok.getLoc(), "Expected } at this point"); 1639 Parser.Lex(); // Eat "}" 1640 return X86Operand::CreateToken("{sae}", consumedToken); 1641 } 1642 return ErrorOperand(Tok.getLoc(), "unknown token in expression"); 1643 } 1644 1645 /// Parse the '.' operator. 1646 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, 1647 const MCExpr *&NewDisp) { 1648 MCAsmParser &Parser = getParser(); 1649 const AsmToken &Tok = Parser.getTok(); 1650 int64_t OrigDispVal, DotDispVal; 1651 1652 // FIXME: Handle non-constant expressions. 1653 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) 1654 OrigDispVal = OrigDisp->getValue(); 1655 else 1656 return Error(Tok.getLoc(), "Non-constant offsets are not supported!"); 1657 1658 // Drop the optional '.'. 1659 StringRef DotDispStr = Tok.getString(); 1660 if (DotDispStr.startswith(".")) 1661 DotDispStr = DotDispStr.drop_front(1); 1662 1663 // .Imm gets lexed as a real. 1664 if (Tok.is(AsmToken::Real)) { 1665 APInt DotDisp; 1666 DotDispStr.getAsInteger(10, DotDisp); 1667 DotDispVal = DotDisp.getZExtValue(); 1668 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { 1669 unsigned DotDisp; 1670 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); 1671 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, 1672 DotDisp)) 1673 return Error(Tok.getLoc(), "Unable to lookup field reference!"); 1674 DotDispVal = DotDisp; 1675 } else 1676 return Error(Tok.getLoc(), "Unexpected token type!"); 1677 1678 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { 1679 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); 1680 unsigned Len = DotDispStr.size(); 1681 unsigned Val = OrigDispVal + DotDispVal; 1682 InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, Val); 1683 } 1684 1685 NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext()); 1686 return false; 1687 } 1688 1689 /// Parse the 'offset' operator. This operator is used to specify the 1690 /// location rather then the content of a variable. 1691 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() { 1692 MCAsmParser &Parser = getParser(); 1693 const AsmToken &Tok = Parser.getTok(); 1694 SMLoc OffsetOfLoc = Tok.getLoc(); 1695 Parser.Lex(); // Eat offset. 1696 1697 const MCExpr *Val; 1698 InlineAsmIdentifierInfo Info; 1699 SMLoc Start = Tok.getLoc(), End; 1700 StringRef Identifier = Tok.getString(); 1701 if (ParseIntelIdentifier(Val, Identifier, Info, 1702 /*Unevaluated=*/false, End)) 1703 return nullptr; 1704 1705 // Don't emit the offset operator. 1706 InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7); 1707 1708 // The offset operator will have an 'r' constraint, thus we need to create 1709 // register operand to ensure proper matching. Just pick a GPR based on 1710 // the size of a pointer. 1711 bool Parse32 = is32BitMode() || Code16GCC; 1712 unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX); 1713 1714 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, 1715 OffsetOfLoc, Identifier, Info.OpDecl); 1716 } 1717 1718 enum IntelOperatorKind { 1719 IOK_LENGTH, 1720 IOK_SIZE, 1721 IOK_TYPE 1722 }; 1723 1724 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator 1725 /// returns the number of elements in an array. It returns the value 1 for 1726 /// non-array variables. The SIZE operator returns the size of a C or C++ 1727 /// variable. A variable's size is the product of its LENGTH and TYPE. The 1728 /// TYPE operator returns the size of a C or C++ type or variable. If the 1729 /// variable is an array, TYPE returns the size of a single element. 1730 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) { 1731 MCAsmParser &Parser = getParser(); 1732 const AsmToken &Tok = Parser.getTok(); 1733 SMLoc TypeLoc = Tok.getLoc(); 1734 Parser.Lex(); // Eat operator. 1735 1736 const MCExpr *Val = nullptr; 1737 InlineAsmIdentifierInfo Info; 1738 SMLoc Start = Tok.getLoc(), End; 1739 StringRef Identifier = Tok.getString(); 1740 if (ParseIntelIdentifier(Val, Identifier, Info, 1741 /*Unevaluated=*/true, End)) 1742 return nullptr; 1743 1744 if (!Info.OpDecl) 1745 return ErrorOperand(Start, "unable to lookup expression"); 1746 1747 unsigned CVal = 0; 1748 switch(OpKind) { 1749 default: llvm_unreachable("Unexpected operand kind!"); 1750 case IOK_LENGTH: CVal = Info.Length; break; 1751 case IOK_SIZE: CVal = Info.Size; break; 1752 case IOK_TYPE: CVal = Info.Type; break; 1753 } 1754 1755 // Rewrite the type operator and the C or C++ type or variable in terms of an 1756 // immediate. E.g. TYPE foo -> $$4 1757 unsigned Len = End.getPointer() - TypeLoc.getPointer(); 1758 InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal); 1759 1760 const MCExpr *Imm = MCConstantExpr::create(CVal, getContext()); 1761 return X86Operand::CreateImm(Imm, Start, End); 1762 } 1763 1764 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() { 1765 MCAsmParser &Parser = getParser(); 1766 const AsmToken &Tok = Parser.getTok(); 1767 SMLoc Start, End; 1768 1769 // Offset, length, type and size operators. 1770 if (isParsingInlineAsm()) { 1771 StringRef AsmTokStr = Tok.getString(); 1772 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET") 1773 return ParseIntelOffsetOfOperator(); 1774 if (AsmTokStr == "length" || AsmTokStr == "LENGTH") 1775 return ParseIntelOperator(IOK_LENGTH); 1776 if (AsmTokStr == "size" || AsmTokStr == "SIZE") 1777 return ParseIntelOperator(IOK_SIZE); 1778 if (AsmTokStr == "type" || AsmTokStr == "TYPE") 1779 return ParseIntelOperator(IOK_TYPE); 1780 } 1781 1782 bool PtrInOperand = false; 1783 unsigned Size = getIntelMemOperandSize(Tok.getString()); 1784 if (Size) { 1785 Parser.Lex(); // Eat operand size (e.g., byte, word). 1786 if (Tok.getString() != "PTR" && Tok.getString() != "ptr") 1787 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!"); 1788 Parser.Lex(); // Eat ptr. 1789 PtrInOperand = true; 1790 } 1791 1792 Start = Tok.getLoc(); 1793 1794 // rounding mode token 1795 if (getSTI().getFeatureBits()[X86::FeatureAVX512] && 1796 getLexer().is(AsmToken::LCurly)) 1797 return ParseRoundingModeOp(Start, End); 1798 1799 // Register. 1800 unsigned RegNo = 0; 1801 if (getLexer().is(AsmToken::Identifier) && 1802 !ParseRegister(RegNo, Start, End)) { 1803 // If this is a segment register followed by a ':', then this is the start 1804 // of a segment override, otherwise this is a normal register reference. 1805 // In case it is a normal register and there is ptr in the operand this 1806 // is an error 1807 if (RegNo == X86::RIP) 1808 return ErrorOperand(Start, "rip can only be used as a base register"); 1809 if (getLexer().isNot(AsmToken::Colon)) { 1810 if (PtrInOperand) { 1811 return ErrorOperand(Start, "expected memory operand after " 1812 "'ptr', found register operand instead"); 1813 } 1814 return X86Operand::CreateReg(RegNo, Start, End); 1815 } 1816 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size); 1817 } 1818 1819 // Immediates and Memory 1820 1821 // Parse [ BaseReg + Scale*IndexReg + Disp ]. 1822 if (getLexer().is(AsmToken::LBrac)) 1823 return ParseIntelBracExpression(/*SegReg=*/0, Start, /*ImmDisp=*/0, false, 1824 Size); 1825 1826 AsmToken StartTok = Tok; 1827 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true, 1828 /*AddImmPrefix=*/false); 1829 if (ParseIntelExpression(SM, End)) 1830 return nullptr; 1831 1832 bool isSymbol = SM.getSym() && SM.getSym()->getKind() != MCExpr::Constant; 1833 int64_t Imm = SM.getImm(); 1834 if (SM.getSym() && SM.getSym()->getKind() == MCExpr::Constant) 1835 SM.getSym()->evaluateAsAbsolute(Imm); 1836 1837 if (StartTok.isNot(AsmToken::Identifier) && 1838 StartTok.isNot(AsmToken::String) && isParsingInlineAsm()) { 1839 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer(); 1840 if (StartTok.getString().size() == Len) 1841 // Just add a prefix if this wasn't a complex immediate expression. 1842 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start); 1843 else 1844 // Otherwise, rewrite the complex expression as a single immediate. 1845 InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm); 1846 } 1847 1848 if (getLexer().isNot(AsmToken::LBrac)) { 1849 // If a directional label (ie. 1f or 2b) was parsed above from 1850 // ParseIntelExpression() then SM.getSym() was set to a pointer to 1851 // to the MCExpr with the directional local symbol and this is a 1852 // memory operand not an immediate operand. 1853 if (isSymbol) { 1854 if (isParsingInlineAsm()) 1855 return CreateMemForInlineAsm(/*SegReg=*/0, SM.getSym(), /*BaseReg=*/0, 1856 /*IndexReg=*/0, 1857 /*Scale=*/1, Start, End, Size, 1858 SM.getSymName(), SM.getIdentifierInfo()); 1859 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End, 1860 Size); 1861 } 1862 1863 const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext()); 1864 return X86Operand::CreateImm(ImmExpr, Start, End); 1865 } 1866 1867 // Only positive immediates are valid. 1868 if (Imm < 0) 1869 return ErrorOperand(Start, "expected a positive immediate displacement " 1870 "before bracketed expr."); 1871 1872 return ParseIntelBracExpression(/*SegReg=*/0, Start, Imm, isSymbol, Size); 1873 } 1874 1875 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() { 1876 MCAsmParser &Parser = getParser(); 1877 switch (getLexer().getKind()) { 1878 default: 1879 // Parse a memory operand with no segment register. 1880 return ParseMemOperand(0, Parser.getTok().getLoc()); 1881 case AsmToken::Percent: { 1882 // Read the register. 1883 unsigned RegNo; 1884 SMLoc Start, End; 1885 if (ParseRegister(RegNo, Start, End)) return nullptr; 1886 if (RegNo == X86::EIZ || RegNo == X86::RIZ) { 1887 Error(Start, "%eiz and %riz can only be used as index registers", 1888 SMRange(Start, End)); 1889 return nullptr; 1890 } 1891 if (RegNo == X86::RIP) { 1892 Error(Start, "%rip can only be used as a base register", 1893 SMRange(Start, End)); 1894 return nullptr; 1895 } 1896 1897 // If this is a segment register followed by a ':', then this is the start 1898 // of a memory reference, otherwise this is a normal register reference. 1899 if (getLexer().isNot(AsmToken::Colon)) 1900 return X86Operand::CreateReg(RegNo, Start, End); 1901 1902 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo)) 1903 return ErrorOperand(Start, "invalid segment register"); 1904 1905 getParser().Lex(); // Eat the colon. 1906 return ParseMemOperand(RegNo, Start); 1907 } 1908 case AsmToken::Dollar: { 1909 // $42 -> immediate. 1910 SMLoc Start = Parser.getTok().getLoc(), End; 1911 Parser.Lex(); 1912 const MCExpr *Val; 1913 if (getParser().parseExpression(Val, End)) 1914 return nullptr; 1915 return X86Operand::CreateImm(Val, Start, End); 1916 } 1917 case AsmToken::LCurly:{ 1918 SMLoc Start = Parser.getTok().getLoc(), End; 1919 if (getSTI().getFeatureBits()[X86::FeatureAVX512]) 1920 return ParseRoundingModeOp(Start, End); 1921 return ErrorOperand(Start, "Unexpected '{' in expression"); 1922 } 1923 } 1924 } 1925 1926 // true on failure, false otherwise 1927 // If no {z} mark was found - Parser doesn't advance 1928 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z, 1929 const SMLoc &StartLoc) { 1930 MCAsmParser &Parser = getParser(); 1931 // Assuming we are just pass the '{' mark, quering the next token 1932 // Searched for {z}, but none was found. Return false, as no parsing error was 1933 // encountered 1934 if (!(getLexer().is(AsmToken::Identifier) && 1935 (getLexer().getTok().getIdentifier() == "z"))) 1936 return false; 1937 Parser.Lex(); // Eat z 1938 // Query and eat the '}' mark 1939 if (!getLexer().is(AsmToken::RCurly)) 1940 return Error(getLexer().getLoc(), "Expected } at this point"); 1941 Parser.Lex(); // Eat '}' 1942 // Assign Z with the {z} mark opernad 1943 Z = X86Operand::CreateToken("{z}", StartLoc); 1944 return false; 1945 } 1946 1947 // true on failure, false otherwise 1948 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands, 1949 const MCParsedAsmOperand &Op) { 1950 MCAsmParser &Parser = getParser(); 1951 if(getSTI().getFeatureBits()[X86::FeatureAVX512]) { 1952 if (getLexer().is(AsmToken::LCurly)) { 1953 // Eat "{" and mark the current place. 1954 const SMLoc consumedToken = consumeToken(); 1955 // Distinguish {1to<NUM>} from {%k<NUM>}. 1956 if(getLexer().is(AsmToken::Integer)) { 1957 // Parse memory broadcasting ({1to<NUM>}). 1958 if (getLexer().getTok().getIntVal() != 1) 1959 return TokError("Expected 1to<NUM> at this point"); 1960 Parser.Lex(); // Eat "1" of 1to8 1961 if (!getLexer().is(AsmToken::Identifier) || 1962 !getLexer().getTok().getIdentifier().startswith("to")) 1963 return TokError("Expected 1to<NUM> at this point"); 1964 // Recognize only reasonable suffixes. 1965 const char *BroadcastPrimitive = 1966 StringSwitch<const char*>(getLexer().getTok().getIdentifier()) 1967 .Case("to2", "{1to2}") 1968 .Case("to4", "{1to4}") 1969 .Case("to8", "{1to8}") 1970 .Case("to16", "{1to16}") 1971 .Default(nullptr); 1972 if (!BroadcastPrimitive) 1973 return TokError("Invalid memory broadcast primitive."); 1974 Parser.Lex(); // Eat "toN" of 1toN 1975 if (!getLexer().is(AsmToken::RCurly)) 1976 return TokError("Expected } at this point"); 1977 Parser.Lex(); // Eat "}" 1978 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive, 1979 consumedToken)); 1980 // No AVX512 specific primitives can pass 1981 // after memory broadcasting, so return. 1982 return false; 1983 } else { 1984 // Parse either {k}{z}, {z}{k}, {k} or {z} 1985 // last one have no meaning, but GCC accepts it 1986 // Currently, we're just pass a '{' mark 1987 std::unique_ptr<X86Operand> Z; 1988 if (ParseZ(Z, consumedToken)) 1989 return true; 1990 // Reaching here means that parsing of the allegadly '{z}' mark yielded 1991 // no errors. 1992 // Query for the need of further parsing for a {%k<NUM>} mark 1993 if (!Z || getLexer().is(AsmToken::LCurly)) { 1994 const SMLoc StartLoc = Z ? consumeToken() : consumedToken; 1995 // Parse an op-mask register mark ({%k<NUM>}), which is now to be 1996 // expected 1997 if (std::unique_ptr<X86Operand> Op = ParseOperand()) { 1998 if (!getLexer().is(AsmToken::RCurly)) 1999 return Error(getLexer().getLoc(), "Expected } at this point"); 2000 Operands.push_back(X86Operand::CreateToken("{", StartLoc)); 2001 Operands.push_back(std::move(Op)); 2002 Operands.push_back(X86Operand::CreateToken("}", consumeToken())); 2003 } else 2004 return Error(getLexer().getLoc(), 2005 "Expected an op-mask register at this point"); 2006 // {%k<NUM>} mark is found, inquire for {z} 2007 if (getLexer().is(AsmToken::LCurly) && !Z) { 2008 // Have we've found a parsing error, or found no (expected) {z} mark 2009 // - report an error 2010 if (ParseZ(Z, consumeToken()) || !Z) 2011 return true; 2012 2013 } 2014 // '{z}' on its own is meaningless, hence should be ignored. 2015 // on the contrary - have it been accompanied by a K register, 2016 // allow it. 2017 if (Z) 2018 Operands.push_back(std::move(Z)); 2019 } 2020 } 2021 } 2022 } 2023 return false; 2024 } 2025 2026 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix 2027 /// has already been parsed if present. 2028 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg, 2029 SMLoc MemStart) { 2030 2031 MCAsmParser &Parser = getParser(); 2032 // We have to disambiguate a parenthesized expression "(4+5)" from the start 2033 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The 2034 // only way to do this without lookahead is to eat the '(' and see what is 2035 // after it. 2036 const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext()); 2037 if (getLexer().isNot(AsmToken::LParen)) { 2038 SMLoc ExprEnd; 2039 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr; 2040 2041 // After parsing the base expression we could either have a parenthesized 2042 // memory address or not. If not, return now. If so, eat the (. 2043 if (getLexer().isNot(AsmToken::LParen)) { 2044 // Unless we have a segment register, treat this as an immediate. 2045 if (SegReg == 0) 2046 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd); 2047 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1, 2048 MemStart, ExprEnd); 2049 } 2050 2051 // Eat the '('. 2052 Parser.Lex(); 2053 } else { 2054 // Okay, we have a '('. We don't know if this is an expression or not, but 2055 // so we have to eat the ( to see beyond it. 2056 SMLoc LParenLoc = Parser.getTok().getLoc(); 2057 Parser.Lex(); // Eat the '('. 2058 2059 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { 2060 // Nothing to do here, fall into the code below with the '(' part of the 2061 // memory operand consumed. 2062 } else { 2063 SMLoc ExprEnd; 2064 2065 // It must be an parenthesized expression, parse it now. 2066 if (getParser().parseParenExpression(Disp, ExprEnd)) 2067 return nullptr; 2068 2069 // After parsing the base expression we could either have a parenthesized 2070 // memory address or not. If not, return now. If so, eat the (. 2071 if (getLexer().isNot(AsmToken::LParen)) { 2072 // Unless we have a segment register, treat this as an immediate. 2073 if (SegReg == 0) 2074 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc, 2075 ExprEnd); 2076 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1, 2077 MemStart, ExprEnd); 2078 } 2079 2080 // Eat the '('. 2081 Parser.Lex(); 2082 } 2083 } 2084 2085 // If we reached here, then we just ate the ( of the memory operand. Process 2086 // the rest of the memory operand. 2087 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 2088 SMLoc IndexLoc, BaseLoc; 2089 2090 if (getLexer().is(AsmToken::Percent)) { 2091 SMLoc StartLoc, EndLoc; 2092 BaseLoc = Parser.getTok().getLoc(); 2093 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr; 2094 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) { 2095 Error(StartLoc, "eiz and riz can only be used as index registers", 2096 SMRange(StartLoc, EndLoc)); 2097 return nullptr; 2098 } 2099 } 2100 2101 if (getLexer().is(AsmToken::Comma)) { 2102 Parser.Lex(); // Eat the comma. 2103 IndexLoc = Parser.getTok().getLoc(); 2104 2105 // Following the comma we should have either an index register, or a scale 2106 // value. We don't support the later form, but we want to parse it 2107 // correctly. 2108 // 2109 // Not that even though it would be completely consistent to support syntax 2110 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 2111 if (getLexer().is(AsmToken::Percent)) { 2112 SMLoc L; 2113 if (ParseRegister(IndexReg, L, L)) 2114 return nullptr; 2115 if (BaseReg == X86::RIP) { 2116 Error(IndexLoc, "%rip as base register can not have an index register"); 2117 return nullptr; 2118 } 2119 if (IndexReg == X86::RIP) { 2120 Error(IndexLoc, "%rip is not allowed as an index register"); 2121 return nullptr; 2122 } 2123 2124 if (getLexer().isNot(AsmToken::RParen)) { 2125 // Parse the scale amount: 2126 // ::= ',' [scale-expression] 2127 if (getLexer().isNot(AsmToken::Comma)) { 2128 Error(Parser.getTok().getLoc(), 2129 "expected comma in scale expression"); 2130 return nullptr; 2131 } 2132 Parser.Lex(); // Eat the comma. 2133 2134 if (getLexer().isNot(AsmToken::RParen)) { 2135 SMLoc Loc = Parser.getTok().getLoc(); 2136 2137 int64_t ScaleVal; 2138 if (getParser().parseAbsoluteExpression(ScaleVal)){ 2139 Error(Loc, "expected scale expression"); 2140 return nullptr; 2141 } 2142 2143 // Validate the scale amount. 2144 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 2145 ScaleVal != 1) { 2146 Error(Loc, "scale factor in 16-bit address must be 1"); 2147 return nullptr; 2148 } 2149 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && 2150 ScaleVal != 8) { 2151 Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); 2152 return nullptr; 2153 } 2154 Scale = (unsigned)ScaleVal; 2155 } 2156 } 2157 } else if (getLexer().isNot(AsmToken::RParen)) { 2158 // A scale amount without an index is ignored. 2159 // index. 2160 SMLoc Loc = Parser.getTok().getLoc(); 2161 2162 int64_t Value; 2163 if (getParser().parseAbsoluteExpression(Value)) 2164 return nullptr; 2165 2166 if (Value != 1) 2167 Warning(Loc, "scale factor without index register is ignored"); 2168 Scale = 1; 2169 } 2170 } 2171 2172 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 2173 if (getLexer().isNot(AsmToken::RParen)) { 2174 Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); 2175 return nullptr; 2176 } 2177 SMLoc MemEnd = Parser.getTok().getEndLoc(); 2178 Parser.Lex(); // Eat the ')'. 2179 2180 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed, 2181 // and then only in non-64-bit modes. Except for DX, which is a special case 2182 // because an unofficial form of in/out instructions uses it. 2183 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 2184 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP && 2185 BaseReg != X86::SI && BaseReg != X86::DI)) && 2186 BaseReg != X86::DX) { 2187 Error(BaseLoc, "invalid 16-bit base register"); 2188 return nullptr; 2189 } 2190 if (BaseReg == 0 && 2191 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) { 2192 Error(IndexLoc, "16-bit memory operand may not include only index register"); 2193 return nullptr; 2194 } 2195 2196 StringRef ErrMsg; 2197 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) { 2198 Error(BaseLoc, ErrMsg); 2199 return nullptr; 2200 } 2201 2202 if (SegReg || BaseReg || IndexReg) 2203 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, 2204 IndexReg, Scale, MemStart, MemEnd); 2205 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd); 2206 } 2207 2208 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 2209 SMLoc NameLoc, OperandVector &Operands) { 2210 MCAsmParser &Parser = getParser(); 2211 InstInfo = &Info; 2212 StringRef PatchedName = Name; 2213 2214 if (Name == "jmp" && isParsingIntelSyntax() && isParsingInlineAsm()) { 2215 StringRef NextTok = Parser.getTok().getString(); 2216 if (NextTok == "short") { 2217 SMLoc NameEndLoc = 2218 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size()); 2219 // Eat the short keyword 2220 Parser.Lex(); 2221 // MS ignores the short keyword, it determines the jmp type based 2222 // on the distance of the label 2223 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc, 2224 NextTok.size() + 1); 2225 } 2226 } 2227 2228 // FIXME: Hack to recognize setneb as setne. 2229 if (PatchedName.startswith("set") && PatchedName.endswith("b") && 2230 PatchedName != "setb" && PatchedName != "setnb") 2231 PatchedName = PatchedName.substr(0, Name.size()-1); 2232 2233 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. 2234 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && 2235 (PatchedName.endswith("ss") || PatchedName.endswith("sd") || 2236 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { 2237 bool IsVCMP = PatchedName[0] == 'v'; 2238 unsigned CCIdx = IsVCMP ? 4 : 3; 2239 unsigned ComparisonCode = StringSwitch<unsigned>( 2240 PatchedName.slice(CCIdx, PatchedName.size() - 2)) 2241 .Case("eq", 0x00) 2242 .Case("eq_oq", 0x00) 2243 .Case("lt", 0x01) 2244 .Case("lt_os", 0x01) 2245 .Case("le", 0x02) 2246 .Case("le_os", 0x02) 2247 .Case("unord", 0x03) 2248 .Case("unord_q", 0x03) 2249 .Case("neq", 0x04) 2250 .Case("neq_uq", 0x04) 2251 .Case("nlt", 0x05) 2252 .Case("nlt_us", 0x05) 2253 .Case("nle", 0x06) 2254 .Case("nle_us", 0x06) 2255 .Case("ord", 0x07) 2256 .Case("ord_q", 0x07) 2257 /* AVX only from here */ 2258 .Case("eq_uq", 0x08) 2259 .Case("nge", 0x09) 2260 .Case("nge_us", 0x09) 2261 .Case("ngt", 0x0A) 2262 .Case("ngt_us", 0x0A) 2263 .Case("false", 0x0B) 2264 .Case("false_oq", 0x0B) 2265 .Case("neq_oq", 0x0C) 2266 .Case("ge", 0x0D) 2267 .Case("ge_os", 0x0D) 2268 .Case("gt", 0x0E) 2269 .Case("gt_os", 0x0E) 2270 .Case("true", 0x0F) 2271 .Case("true_uq", 0x0F) 2272 .Case("eq_os", 0x10) 2273 .Case("lt_oq", 0x11) 2274 .Case("le_oq", 0x12) 2275 .Case("unord_s", 0x13) 2276 .Case("neq_us", 0x14) 2277 .Case("nlt_uq", 0x15) 2278 .Case("nle_uq", 0x16) 2279 .Case("ord_s", 0x17) 2280 .Case("eq_us", 0x18) 2281 .Case("nge_uq", 0x19) 2282 .Case("ngt_uq", 0x1A) 2283 .Case("false_os", 0x1B) 2284 .Case("neq_os", 0x1C) 2285 .Case("ge_oq", 0x1D) 2286 .Case("gt_oq", 0x1E) 2287 .Case("true_us", 0x1F) 2288 .Default(~0U); 2289 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) { 2290 2291 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx), 2292 NameLoc)); 2293 2294 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode, 2295 getParser().getContext()); 2296 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 2297 2298 PatchedName = PatchedName.substr(PatchedName.size() - 2); 2299 } 2300 } 2301 2302 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}. 2303 if (PatchedName.startswith("vpcmp") && 2304 (PatchedName.endswith("b") || PatchedName.endswith("w") || 2305 PatchedName.endswith("d") || PatchedName.endswith("q"))) { 2306 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1; 2307 unsigned ComparisonCode = StringSwitch<unsigned>( 2308 PatchedName.slice(5, PatchedName.size() - CCIdx)) 2309 .Case("eq", 0x0) // Only allowed on unsigned. Checked below. 2310 .Case("lt", 0x1) 2311 .Case("le", 0x2) 2312 //.Case("false", 0x3) // Not a documented alias. 2313 .Case("neq", 0x4) 2314 .Case("nlt", 0x5) 2315 .Case("nle", 0x6) 2316 //.Case("true", 0x7) // Not a documented alias. 2317 .Default(~0U); 2318 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) { 2319 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc)); 2320 2321 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode, 2322 getParser().getContext()); 2323 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 2324 2325 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx); 2326 } 2327 } 2328 2329 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}. 2330 if (PatchedName.startswith("vpcom") && 2331 (PatchedName.endswith("b") || PatchedName.endswith("w") || 2332 PatchedName.endswith("d") || PatchedName.endswith("q"))) { 2333 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1; 2334 unsigned ComparisonCode = StringSwitch<unsigned>( 2335 PatchedName.slice(5, PatchedName.size() - CCIdx)) 2336 .Case("lt", 0x0) 2337 .Case("le", 0x1) 2338 .Case("gt", 0x2) 2339 .Case("ge", 0x3) 2340 .Case("eq", 0x4) 2341 .Case("neq", 0x5) 2342 .Case("false", 0x6) 2343 .Case("true", 0x7) 2344 .Default(~0U); 2345 if (ComparisonCode != ~0U) { 2346 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc)); 2347 2348 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode, 2349 getParser().getContext()); 2350 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 2351 2352 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx); 2353 } 2354 } 2355 2356 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 2357 2358 // Determine whether this is an instruction prefix. 2359 bool isPrefix = 2360 Name == "lock" || Name == "rep" || 2361 Name == "repe" || Name == "repz" || 2362 Name == "repne" || Name == "repnz" || 2363 Name == "rex64" || Name == "data16"; 2364 2365 bool CurlyAsEndOfStatement = false; 2366 // This does the actual operand parsing. Don't parse any more if we have a 2367 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 2368 // just want to parse the "lock" as the first instruction and the "incl" as 2369 // the next one. 2370 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { 2371 2372 // Parse '*' modifier. 2373 if (getLexer().is(AsmToken::Star)) 2374 Operands.push_back(X86Operand::CreateToken("*", consumeToken())); 2375 2376 // Read the operands. 2377 while(1) { 2378 if (std::unique_ptr<X86Operand> Op = ParseOperand()) { 2379 Operands.push_back(std::move(Op)); 2380 if (HandleAVX512Operand(Operands, *Operands.back())) 2381 return true; 2382 } else { 2383 return true; 2384 } 2385 // check for comma and eat it 2386 if (getLexer().is(AsmToken::Comma)) 2387 Parser.Lex(); 2388 else 2389 break; 2390 } 2391 2392 // In MS inline asm curly braces mark the begining/end of a block, therefore 2393 // they should be interepreted as end of statement 2394 CurlyAsEndOfStatement = 2395 isParsingIntelSyntax() && isParsingInlineAsm() && 2396 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly)); 2397 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement) 2398 return TokError("unexpected token in argument list"); 2399 } 2400 2401 // Consume the EndOfStatement or the prefix separator Slash 2402 if (getLexer().is(AsmToken::EndOfStatement) || 2403 (isPrefix && getLexer().is(AsmToken::Slash))) 2404 Parser.Lex(); 2405 else if (CurlyAsEndOfStatement) 2406 // Add an actual EndOfStatement before the curly brace 2407 Info.AsmRewrites->emplace_back(AOK_EndOfStatement, 2408 getLexer().getTok().getLoc(), 0); 2409 2410 // This is for gas compatibility and cannot be done in td. 2411 // Adding "p" for some floating point with no argument. 2412 // For example: fsub --> fsubp 2413 bool IsFp = 2414 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr"; 2415 if (IsFp && Operands.size() == 1) { 2416 const char *Repl = StringSwitch<const char *>(Name) 2417 .Case("fsub", "fsubp") 2418 .Case("fdiv", "fdivp") 2419 .Case("fsubr", "fsubrp") 2420 .Case("fdivr", "fdivrp"); 2421 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl); 2422 } 2423 2424 // Moving a 32 or 16 bit value into a segment register has the same 2425 // behavior. Modify such instructions to always take shorter form. 2426 if ((Name == "mov" || Name == "movw" || Name == "movl") && 2427 (Operands.size() == 3)) { 2428 X86Operand &Op1 = (X86Operand &)*Operands[1]; 2429 X86Operand &Op2 = (X86Operand &)*Operands[2]; 2430 SMLoc Loc = Op1.getEndLoc(); 2431 if (Op1.isReg() && Op2.isReg() && 2432 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains( 2433 Op2.getReg()) && 2434 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) || 2435 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) { 2436 // Change instruction name to match new instruction. 2437 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) { 2438 Name = is16BitMode() ? "movw" : "movl"; 2439 Operands[0] = X86Operand::CreateToken(Name, NameLoc); 2440 } 2441 // Select the correct equivalent 16-/32-bit source register. 2442 unsigned Reg = 2443 getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32); 2444 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc); 2445 } 2446 } 2447 2448 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" -> 2449 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely 2450 // documented form in various unofficial manuals, so a lot of code uses it. 2451 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" || 2452 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") && 2453 Operands.size() == 3) { 2454 X86Operand &Op = (X86Operand &)*Operands.back(); 2455 if (Op.isMem() && Op.Mem.SegReg == 0 && 2456 isa<MCConstantExpr>(Op.Mem.Disp) && 2457 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 2458 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 2459 SMLoc Loc = Op.getEndLoc(); 2460 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 2461 } 2462 } 2463 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al". 2464 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" || 2465 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") && 2466 Operands.size() == 3) { 2467 X86Operand &Op = (X86Operand &)*Operands[1]; 2468 if (Op.isMem() && Op.Mem.SegReg == 0 && 2469 isa<MCConstantExpr>(Op.Mem.Disp) && 2470 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 2471 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 2472 SMLoc Loc = Op.getEndLoc(); 2473 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 2474 } 2475 } 2476 2477 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands; 2478 bool HadVerifyError = false; 2479 2480 // Append default arguments to "ins[bwld]" 2481 if (Name.startswith("ins") && 2482 (Operands.size() == 1 || Operands.size() == 3) && 2483 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" || 2484 Name == "ins")) { 2485 2486 AddDefaultSrcDestOperands(TmpOperands, 2487 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc), 2488 DefaultMemDIOperand(NameLoc)); 2489 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2490 } 2491 2492 // Append default arguments to "outs[bwld]" 2493 if (Name.startswith("outs") && 2494 (Operands.size() == 1 || Operands.size() == 3) && 2495 (Name == "outsb" || Name == "outsw" || Name == "outsl" || 2496 Name == "outsd" || Name == "outs")) { 2497 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), 2498 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 2499 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2500 } 2501 2502 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate 2503 // values of $SIREG according to the mode. It would be nice if this 2504 // could be achieved with InstAlias in the tables. 2505 if (Name.startswith("lods") && 2506 (Operands.size() == 1 || Operands.size() == 2) && 2507 (Name == "lods" || Name == "lodsb" || Name == "lodsw" || 2508 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) { 2509 TmpOperands.push_back(DefaultMemSIOperand(NameLoc)); 2510 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2511 } 2512 2513 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate 2514 // values of $DIREG according to the mode. It would be nice if this 2515 // could be achieved with InstAlias in the tables. 2516 if (Name.startswith("stos") && 2517 (Operands.size() == 1 || Operands.size() == 2) && 2518 (Name == "stos" || Name == "stosb" || Name == "stosw" || 2519 Name == "stosl" || Name == "stosd" || Name == "stosq")) { 2520 TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); 2521 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2522 } 2523 2524 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate 2525 // values of $DIREG according to the mode. It would be nice if this 2526 // could be achieved with InstAlias in the tables. 2527 if (Name.startswith("scas") && 2528 (Operands.size() == 1 || Operands.size() == 2) && 2529 (Name == "scas" || Name == "scasb" || Name == "scasw" || 2530 Name == "scasl" || Name == "scasd" || Name == "scasq")) { 2531 TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); 2532 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2533 } 2534 2535 // Add default SI and DI operands to "cmps[bwlq]". 2536 if (Name.startswith("cmps") && 2537 (Operands.size() == 1 || Operands.size() == 3) && 2538 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" || 2539 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) { 2540 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc), 2541 DefaultMemSIOperand(NameLoc)); 2542 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2543 } 2544 2545 // Add default SI and DI operands to "movs[bwlq]". 2546 if (((Name.startswith("movs") && 2547 (Name == "movs" || Name == "movsb" || Name == "movsw" || 2548 Name == "movsl" || Name == "movsd" || Name == "movsq")) || 2549 (Name.startswith("smov") && 2550 (Name == "smov" || Name == "smovb" || Name == "smovw" || 2551 Name == "smovl" || Name == "smovd" || Name == "smovq"))) && 2552 (Operands.size() == 1 || Operands.size() == 3)) { 2553 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax()) 2554 Operands.back() = X86Operand::CreateToken("movsl", NameLoc); 2555 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), 2556 DefaultMemDIOperand(NameLoc)); 2557 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2558 } 2559 2560 // Check if we encountered an error for one the string insturctions 2561 if (HadVerifyError) { 2562 return HadVerifyError; 2563 } 2564 2565 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to 2566 // "shift <op>". 2567 if ((Name.startswith("shr") || Name.startswith("sar") || 2568 Name.startswith("shl") || Name.startswith("sal") || 2569 Name.startswith("rcl") || Name.startswith("rcr") || 2570 Name.startswith("rol") || Name.startswith("ror")) && 2571 Operands.size() == 3) { 2572 if (isParsingIntelSyntax()) { 2573 // Intel syntax 2574 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]); 2575 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) && 2576 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1) 2577 Operands.pop_back(); 2578 } else { 2579 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 2580 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) && 2581 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1) 2582 Operands.erase(Operands.begin() + 1); 2583 } 2584 } 2585 2586 // Transforms "int $3" into "int3" as a size optimization. We can't write an 2587 // instalias with an immediate operand yet. 2588 if (Name == "int" && Operands.size() == 2) { 2589 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 2590 if (Op1.isImm()) 2591 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm())) 2592 if (CE->getValue() == 3) { 2593 Operands.erase(Operands.begin() + 1); 2594 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3"); 2595 } 2596 } 2597 2598 // Transforms "xlat mem8" into "xlatb" 2599 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) { 2600 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 2601 if (Op1.isMem8()) { 2602 Warning(Op1.getStartLoc(), "memory operand is only for determining the " 2603 "size, (R|E)BX will be used for the location"); 2604 Operands.pop_back(); 2605 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb"); 2606 } 2607 } 2608 2609 return false; 2610 } 2611 2612 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { 2613 return false; 2614 } 2615 2616 static const char *getSubtargetFeatureName(uint64_t Val); 2617 2618 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands, 2619 MCStreamer &Out) { 2620 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(), 2621 MII, Out); 2622 } 2623 2624 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2625 OperandVector &Operands, 2626 MCStreamer &Out, uint64_t &ErrorInfo, 2627 bool MatchingInlineAsm) { 2628 if (isParsingIntelSyntax()) 2629 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, 2630 MatchingInlineAsm); 2631 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, 2632 MatchingInlineAsm); 2633 } 2634 2635 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, 2636 OperandVector &Operands, MCStreamer &Out, 2637 bool MatchingInlineAsm) { 2638 // FIXME: This should be replaced with a real .td file alias mechanism. 2639 // Also, MatchInstructionImpl should actually *do* the EmitInstruction 2640 // call. 2641 const char *Repl = StringSwitch<const char *>(Op.getToken()) 2642 .Case("finit", "fninit") 2643 .Case("fsave", "fnsave") 2644 .Case("fstcw", "fnstcw") 2645 .Case("fstcww", "fnstcw") 2646 .Case("fstenv", "fnstenv") 2647 .Case("fstsw", "fnstsw") 2648 .Case("fstsww", "fnstsw") 2649 .Case("fclex", "fnclex") 2650 .Default(nullptr); 2651 if (Repl) { 2652 MCInst Inst; 2653 Inst.setOpcode(X86::WAIT); 2654 Inst.setLoc(IDLoc); 2655 if (!MatchingInlineAsm) 2656 EmitInstruction(Inst, Operands, Out); 2657 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 2658 } 2659 } 2660 2661 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo, 2662 bool MatchingInlineAsm) { 2663 assert(ErrorInfo && "Unknown missing feature!"); 2664 SmallString<126> Msg; 2665 raw_svector_ostream OS(Msg); 2666 OS << "instruction requires:"; 2667 uint64_t Mask = 1; 2668 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { 2669 if (ErrorInfo & Mask) 2670 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask); 2671 Mask <<= 1; 2672 } 2673 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm); 2674 } 2675 2676 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, 2677 OperandVector &Operands, 2678 MCStreamer &Out, 2679 uint64_t &ErrorInfo, 2680 bool MatchingInlineAsm) { 2681 assert(!Operands.empty() && "Unexpect empty operand list!"); 2682 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 2683 assert(Op.isToken() && "Leading operand should always be a mnemonic!"); 2684 SMRange EmptyRange = None; 2685 2686 // First, handle aliases that expand to multiple instructions. 2687 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm); 2688 2689 bool WasOriginallyInvalidOperand = false; 2690 MCInst Inst; 2691 2692 // First, try a direct match. 2693 switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm, 2694 isParsingIntelSyntax())) { 2695 default: llvm_unreachable("Unexpected match result!"); 2696 case Match_Success: 2697 // Some instructions need post-processing to, for example, tweak which 2698 // encoding is selected. Loop on it while changes happen so the 2699 // individual transformations can chain off each other. 2700 if (!MatchingInlineAsm) 2701 while (processInstruction(Inst, Operands)) 2702 ; 2703 2704 Inst.setLoc(IDLoc); 2705 if (!MatchingInlineAsm) 2706 EmitInstruction(Inst, Operands, Out); 2707 Opcode = Inst.getOpcode(); 2708 return false; 2709 case Match_MissingFeature: 2710 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm); 2711 case Match_InvalidOperand: 2712 WasOriginallyInvalidOperand = true; 2713 break; 2714 case Match_MnemonicFail: 2715 break; 2716 } 2717 2718 // FIXME: Ideally, we would only attempt suffix matches for things which are 2719 // valid prefixes, and we could just infer the right unambiguous 2720 // type. However, that requires substantially more matcher support than the 2721 // following hack. 2722 2723 // Change the operand to point to a temporary token. 2724 StringRef Base = Op.getToken(); 2725 SmallString<16> Tmp; 2726 Tmp += Base; 2727 Tmp += ' '; 2728 Op.setTokenValue(Tmp); 2729 2730 // If this instruction starts with an 'f', then it is a floating point stack 2731 // instruction. These come in up to three forms for 32-bit, 64-bit, and 2732 // 80-bit floating point, which use the suffixes s,l,t respectively. 2733 // 2734 // Otherwise, we assume that this may be an integer instruction, which comes 2735 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. 2736 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; 2737 2738 // Check for the various suffix matches. 2739 uint64_t ErrorInfoIgnore; 2740 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings. 2741 unsigned Match[4]; 2742 2743 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) { 2744 Tmp.back() = Suffixes[I]; 2745 Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore, 2746 MatchingInlineAsm, isParsingIntelSyntax()); 2747 // If this returned as a missing feature failure, remember that. 2748 if (Match[I] == Match_MissingFeature) 2749 ErrorInfoMissingFeature = ErrorInfoIgnore; 2750 } 2751 2752 // Restore the old token. 2753 Op.setTokenValue(Base); 2754 2755 // If exactly one matched, then we treat that as a successful match (and the 2756 // instruction will already have been filled in correctly, since the failing 2757 // matches won't have modified it). 2758 unsigned NumSuccessfulMatches = 2759 std::count(std::begin(Match), std::end(Match), Match_Success); 2760 if (NumSuccessfulMatches == 1) { 2761 Inst.setLoc(IDLoc); 2762 if (!MatchingInlineAsm) 2763 EmitInstruction(Inst, Operands, Out); 2764 Opcode = Inst.getOpcode(); 2765 return false; 2766 } 2767 2768 // Otherwise, the match failed, try to produce a decent error message. 2769 2770 // If we had multiple suffix matches, then identify this as an ambiguous 2771 // match. 2772 if (NumSuccessfulMatches > 1) { 2773 char MatchChars[4]; 2774 unsigned NumMatches = 0; 2775 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) 2776 if (Match[I] == Match_Success) 2777 MatchChars[NumMatches++] = Suffixes[I]; 2778 2779 SmallString<126> Msg; 2780 raw_svector_ostream OS(Msg); 2781 OS << "ambiguous instructions require an explicit suffix (could be "; 2782 for (unsigned i = 0; i != NumMatches; ++i) { 2783 if (i != 0) 2784 OS << ", "; 2785 if (i + 1 == NumMatches) 2786 OS << "or "; 2787 OS << "'" << Base << MatchChars[i] << "'"; 2788 } 2789 OS << ")"; 2790 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm); 2791 return true; 2792 } 2793 2794 // Okay, we know that none of the variants matched successfully. 2795 2796 // If all of the instructions reported an invalid mnemonic, then the original 2797 // mnemonic was invalid. 2798 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) { 2799 if (!WasOriginallyInvalidOperand) { 2800 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", 2801 Op.getLocRange(), MatchingInlineAsm); 2802 } 2803 2804 // Recover location info for the operand if we know which was the problem. 2805 if (ErrorInfo != ~0ULL) { 2806 if (ErrorInfo >= Operands.size()) 2807 return Error(IDLoc, "too few operands for instruction", EmptyRange, 2808 MatchingInlineAsm); 2809 2810 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo]; 2811 if (Operand.getStartLoc().isValid()) { 2812 SMRange OperandRange = Operand.getLocRange(); 2813 return Error(Operand.getStartLoc(), "invalid operand for instruction", 2814 OperandRange, MatchingInlineAsm); 2815 } 2816 } 2817 2818 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 2819 MatchingInlineAsm); 2820 } 2821 2822 // If one instruction matched with a missing feature, report this as a 2823 // missing feature. 2824 if (std::count(std::begin(Match), std::end(Match), 2825 Match_MissingFeature) == 1) { 2826 ErrorInfo = ErrorInfoMissingFeature; 2827 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature, 2828 MatchingInlineAsm); 2829 } 2830 2831 // If one instruction matched with an invalid operand, report this as an 2832 // operand failure. 2833 if (std::count(std::begin(Match), std::end(Match), 2834 Match_InvalidOperand) == 1) { 2835 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 2836 MatchingInlineAsm); 2837 } 2838 2839 // If all of these were an outright failure, report it in a useless way. 2840 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", 2841 EmptyRange, MatchingInlineAsm); 2842 return true; 2843 } 2844 2845 unsigned X86AsmParser::AdjustAVX512Mem(unsigned Size, 2846 X86Operand* UnsizedMemOpNext) { 2847 // Check for the existence of an AVX512 platform 2848 if (!getSTI().getFeatureBits()[X86::FeatureAVX512]) 2849 return 0; 2850 // Allow adjusting upon a (x|y|z)mm 2851 if (Size == 512 || Size == 256 || Size == 128) 2852 return Size; 2853 // This is an allegadly broadcasting mem op adjustment, 2854 // allow some more inquiring to validate it 2855 if (Size == 64 || Size == 32) 2856 return UnsizedMemOpNext && UnsizedMemOpNext->isToken() && 2857 UnsizedMemOpNext->getToken().substr(0, 4).equals("{1to") ? Size : 0; 2858 // Do not allow any other type of adjustments 2859 return 0; 2860 } 2861 2862 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, 2863 OperandVector &Operands, 2864 MCStreamer &Out, 2865 uint64_t &ErrorInfo, 2866 bool MatchingInlineAsm) { 2867 assert(!Operands.empty() && "Unexpect empty operand list!"); 2868 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 2869 assert(Op.isToken() && "Leading operand should always be a mnemonic!"); 2870 StringRef Mnemonic = Op.getToken(); 2871 SMRange EmptyRange = None; 2872 StringRef Base = Op.getToken(); 2873 2874 // First, handle aliases that expand to multiple instructions. 2875 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm); 2876 2877 MCInst Inst; 2878 2879 // Find one unsized memory operand, if present. 2880 X86Operand *UnsizedMemOp = nullptr; 2881 // If unsized memory operand was found - obtain following operand. 2882 // For use in AdjustAVX512Mem 2883 X86Operand *UnsizedMemOpNext = nullptr; 2884 for (const auto &Op : Operands) { 2885 X86Operand *X86Op = static_cast<X86Operand *>(Op.get()); 2886 if (UnsizedMemOp) { 2887 UnsizedMemOpNext = X86Op; 2888 // Have we found an unqualified memory operand, 2889 // break. IA allows only one memory operand. 2890 break; 2891 } 2892 if (X86Op->isMemUnsized()) 2893 UnsizedMemOp = X86Op; 2894 } 2895 2896 // Allow some instructions to have implicitly pointer-sized operands. This is 2897 // compatible with gas. 2898 if (UnsizedMemOp) { 2899 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"}; 2900 for (const char *Instr : PtrSizedInstrs) { 2901 if (Mnemonic == Instr) { 2902 UnsizedMemOp->Mem.Size = getPointerWidth(); 2903 break; 2904 } 2905 } 2906 } 2907 2908 SmallVector<unsigned, 8> Match; 2909 uint64_t ErrorInfoMissingFeature = 0; 2910 2911 // If unsized push has immediate operand we should default the default pointer 2912 // size for the size. 2913 if (Mnemonic == "push" && Operands.size() == 2) { 2914 auto *X86Op = static_cast<X86Operand *>(Operands[1].get()); 2915 if (X86Op->isImm()) { 2916 // If it's not a constant fall through and let remainder take care of it. 2917 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm()); 2918 unsigned Size = getPointerWidth(); 2919 if (CE && 2920 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) { 2921 SmallString<16> Tmp; 2922 Tmp += Base; 2923 Tmp += (is64BitMode()) 2924 ? "q" 2925 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " "; 2926 Op.setTokenValue(Tmp); 2927 // Do match in ATT mode to allow explicit suffix usage. 2928 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo, 2929 MatchingInlineAsm, 2930 false /*isParsingIntelSyntax()*/)); 2931 Op.setTokenValue(Base); 2932 } 2933 } 2934 } 2935 2936 // If an unsized memory operand is present, try to match with each memory 2937 // operand size. In Intel assembly, the size is not part of the instruction 2938 // mnemonic. 2939 unsigned MatchedSize = 0; 2940 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) { 2941 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512}; 2942 for (unsigned Size : MopSizes) { 2943 UnsizedMemOp->Mem.Size = Size; 2944 uint64_t ErrorInfoIgnore; 2945 unsigned LastOpcode = Inst.getOpcode(); 2946 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore, 2947 MatchingInlineAsm, isParsingIntelSyntax()); 2948 if (Match.empty() || LastOpcode != Inst.getOpcode()) 2949 Match.push_back(M); 2950 2951 // If this returned as a missing feature failure, remember that. 2952 if (Match.back() == Match_MissingFeature) 2953 ErrorInfoMissingFeature = ErrorInfoIgnore; 2954 if (M == Match_Success) 2955 // MS-compatability: 2956 // Adjust AVX512 vector/broadcast memory operand, 2957 // when facing the absence of a size qualifier. 2958 // Match GCC behavior on respective cases. 2959 MatchedSize = AdjustAVX512Mem(Size, UnsizedMemOpNext); 2960 } 2961 2962 // Restore the size of the unsized memory operand if we modified it. 2963 if (UnsizedMemOp) 2964 UnsizedMemOp->Mem.Size = 0; 2965 } 2966 2967 // If we haven't matched anything yet, this is not a basic integer or FPU 2968 // operation. There shouldn't be any ambiguity in our mnemonic table, so try 2969 // matching with the unsized operand. 2970 if (Match.empty()) { 2971 Match.push_back(MatchInstruction( 2972 Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax())); 2973 // If this returned as a missing feature failure, remember that. 2974 if (Match.back() == Match_MissingFeature) 2975 ErrorInfoMissingFeature = ErrorInfo; 2976 } 2977 2978 // Restore the size of the unsized memory operand if we modified it. 2979 if (UnsizedMemOp) 2980 UnsizedMemOp->Mem.Size = 0; 2981 2982 // If it's a bad mnemonic, all results will be the same. 2983 if (Match.back() == Match_MnemonicFail) { 2984 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'", 2985 Op.getLocRange(), MatchingInlineAsm); 2986 } 2987 2988 // If exactly one matched, then we treat that as a successful match (and the 2989 // instruction will already have been filled in correctly, since the failing 2990 // matches won't have modified it). 2991 unsigned NumSuccessfulMatches = 2992 std::count(std::begin(Match), std::end(Match), Match_Success); 2993 if (NumSuccessfulMatches == 1) { 2994 if (MatchedSize && isParsingInlineAsm() && isParsingIntelSyntax()) 2995 // MS compatibility - 2996 // Fix the rewrite according to the matched memory size 2997 // MS inline assembly only 2998 for (AsmRewrite &AR : *InstInfo->AsmRewrites) 2999 if ((AR.Loc.getPointer() == UnsizedMemOp->StartLoc.getPointer()) && 3000 (AR.Kind == AOK_SizeDirective)) 3001 AR.Val = MatchedSize; 3002 // Some instructions need post-processing to, for example, tweak which 3003 // encoding is selected. Loop on it while changes happen so the individual 3004 // transformations can chain off each other. 3005 if (!MatchingInlineAsm) 3006 while (processInstruction(Inst, Operands)) 3007 ; 3008 Inst.setLoc(IDLoc); 3009 if (!MatchingInlineAsm) 3010 EmitInstruction(Inst, Operands, Out); 3011 Opcode = Inst.getOpcode(); 3012 return false; 3013 } else if (NumSuccessfulMatches > 1) { 3014 assert(UnsizedMemOp && 3015 "multiple matches only possible with unsized memory operands"); 3016 return Error(UnsizedMemOp->getStartLoc(), 3017 "ambiguous operand size for instruction '" + Mnemonic + "\'", 3018 UnsizedMemOp->getLocRange(), MatchingInlineAsm); 3019 } 3020 3021 // If one instruction matched with a missing feature, report this as a 3022 // missing feature. 3023 if (std::count(std::begin(Match), std::end(Match), 3024 Match_MissingFeature) == 1) { 3025 ErrorInfo = ErrorInfoMissingFeature; 3026 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature, 3027 MatchingInlineAsm); 3028 } 3029 3030 // If one instruction matched with an invalid operand, report this as an 3031 // operand failure. 3032 if (std::count(std::begin(Match), std::end(Match), 3033 Match_InvalidOperand) == 1) { 3034 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 3035 MatchingInlineAsm); 3036 } 3037 3038 // If all of these were an outright failure, report it in a useless way. 3039 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange, 3040 MatchingInlineAsm); 3041 } 3042 3043 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) { 3044 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo); 3045 } 3046 3047 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { 3048 MCAsmParser &Parser = getParser(); 3049 StringRef IDVal = DirectiveID.getIdentifier(); 3050 if (IDVal == ".word") 3051 return ParseDirectiveWord(2, DirectiveID.getLoc()); 3052 else if (IDVal.startswith(".code")) 3053 return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); 3054 else if (IDVal.startswith(".att_syntax")) { 3055 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3056 if (Parser.getTok().getString() == "prefix") 3057 Parser.Lex(); 3058 else if (Parser.getTok().getString() == "noprefix") 3059 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not " 3060 "supported: registers must have a " 3061 "'%' prefix in .att_syntax"); 3062 } 3063 getParser().setAssemblerDialect(0); 3064 return false; 3065 } else if (IDVal.startswith(".intel_syntax")) { 3066 getParser().setAssemblerDialect(1); 3067 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3068 if (Parser.getTok().getString() == "noprefix") 3069 Parser.Lex(); 3070 else if (Parser.getTok().getString() == "prefix") 3071 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not " 3072 "supported: registers must not have " 3073 "a '%' prefix in .intel_syntax"); 3074 } 3075 return false; 3076 } else if (IDVal == ".even") 3077 return parseDirectiveEven(DirectiveID.getLoc()); 3078 return true; 3079 } 3080 3081 /// parseDirectiveEven 3082 /// ::= .even 3083 bool X86AsmParser::parseDirectiveEven(SMLoc L) { 3084 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3085 TokError("unexpected token in directive"); 3086 return false; 3087 } 3088 const MCSection *Section = getStreamer().getCurrentSectionOnly(); 3089 if (!Section) { 3090 getStreamer().InitSections(false); 3091 Section = getStreamer().getCurrentSectionOnly(); 3092 } 3093 if (Section->UseCodeAlign()) 3094 getStreamer().EmitCodeAlignment(2, 0); 3095 else 3096 getStreamer().EmitValueToAlignment(2, 0, 1, 0); 3097 return false; 3098 } 3099 /// ParseDirectiveWord 3100 /// ::= .word [ expression (, expression)* ] 3101 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 3102 MCAsmParser &Parser = getParser(); 3103 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3104 for (;;) { 3105 const MCExpr *Value; 3106 SMLoc ExprLoc = getLexer().getLoc(); 3107 if (getParser().parseExpression(Value)) 3108 return false; 3109 3110 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) { 3111 assert(Size <= 8 && "Invalid size"); 3112 uint64_t IntValue = MCE->getValue(); 3113 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue)) 3114 return Error(ExprLoc, "literal value out of range for directive"); 3115 getStreamer().EmitIntValue(IntValue, Size); 3116 } else { 3117 getStreamer().EmitValue(Value, Size, ExprLoc); 3118 } 3119 3120 if (getLexer().is(AsmToken::EndOfStatement)) 3121 break; 3122 3123 // FIXME: Improve diagnostic. 3124 if (getLexer().isNot(AsmToken::Comma)) { 3125 Error(L, "unexpected token in directive"); 3126 return false; 3127 } 3128 Parser.Lex(); 3129 } 3130 } 3131 3132 Parser.Lex(); 3133 return false; 3134 } 3135 3136 /// ParseDirectiveCode 3137 /// ::= .code16 | .code32 | .code64 3138 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { 3139 MCAsmParser &Parser = getParser(); 3140 Code16GCC = false; 3141 if (IDVal == ".code16") { 3142 Parser.Lex(); 3143 if (!is16BitMode()) { 3144 SwitchMode(X86::Mode16Bit); 3145 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); 3146 } 3147 } else if (IDVal == ".code16gcc") { 3148 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode. 3149 Parser.Lex(); 3150 Code16GCC = true; 3151 if (!is16BitMode()) { 3152 SwitchMode(X86::Mode16Bit); 3153 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); 3154 } 3155 } else if (IDVal == ".code32") { 3156 Parser.Lex(); 3157 if (!is32BitMode()) { 3158 SwitchMode(X86::Mode32Bit); 3159 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); 3160 } 3161 } else if (IDVal == ".code64") { 3162 Parser.Lex(); 3163 if (!is64BitMode()) { 3164 SwitchMode(X86::Mode64Bit); 3165 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64); 3166 } 3167 } else { 3168 Error(L, "unknown directive " + IDVal); 3169 return false; 3170 } 3171 3172 return false; 3173 } 3174 3175 // Force static initialization. 3176 extern "C" void LLVMInitializeX86AsmParser() { 3177 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target()); 3178 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target()); 3179 } 3180 3181 #define GET_REGISTER_MATCHER 3182 #define GET_MATCHER_IMPLEMENTATION 3183 #define GET_SUBTARGET_FEATURE_NAME 3184 #include "X86GenAsmMatcher.inc" 3185