1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86IntelInstPrinter.h" 11 #include "MCTargetDesc/X86MCExpr.h" 12 #include "MCTargetDesc/X86TargetStreamer.h" 13 #include "TargetInfo/X86TargetInfo.h" 14 #include "X86AsmParserCommon.h" 15 #include "X86Operand.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/SmallString.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/MC/MCContext.h" 22 #include "llvm/MC/MCExpr.h" 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/MC/MCInstrInfo.h" 25 #include "llvm/MC/MCParser/MCAsmLexer.h" 26 #include "llvm/MC/MCParser/MCAsmParser.h" 27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 28 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/MC/MCSection.h" 31 #include "llvm/MC/MCStreamer.h" 32 #include "llvm/MC/MCSubtargetInfo.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/Support/SourceMgr.h" 35 #include "llvm/Support/TargetRegistry.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include <algorithm> 38 #include <memory> 39 40 using namespace llvm; 41 42 static bool checkScale(unsigned Scale, StringRef &ErrMsg) { 43 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) { 44 ErrMsg = "scale factor in address must be 1, 2, 4 or 8"; 45 return true; 46 } 47 return false; 48 } 49 50 namespace { 51 52 static const char OpPrecedence[] = { 53 0, // IC_OR 54 1, // IC_XOR 55 2, // IC_AND 56 3, // IC_LSHIFT 57 3, // IC_RSHIFT 58 4, // IC_PLUS 59 4, // IC_MINUS 60 5, // IC_MULTIPLY 61 5, // IC_DIVIDE 62 5, // IC_MOD 63 6, // IC_NOT 64 7, // IC_NEG 65 8, // IC_RPAREN 66 9, // IC_LPAREN 67 0, // IC_IMM 68 0 // IC_REGISTER 69 }; 70 71 class X86AsmParser : public MCTargetAsmParser { 72 ParseInstructionInfo *InstInfo; 73 bool Code16GCC; 74 75 enum VEXEncoding { 76 VEXEncoding_Default, 77 VEXEncoding_VEX2, 78 VEXEncoding_VEX3, 79 VEXEncoding_EVEX, 80 }; 81 82 VEXEncoding ForcedVEXEncoding = VEXEncoding_Default; 83 84 private: 85 SMLoc consumeToken() { 86 MCAsmParser &Parser = getParser(); 87 SMLoc Result = Parser.getTok().getLoc(); 88 Parser.Lex(); 89 return Result; 90 } 91 92 X86TargetStreamer &getTargetStreamer() { 93 assert(getParser().getStreamer().getTargetStreamer() && 94 "do not have a target streamer"); 95 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 96 return static_cast<X86TargetStreamer &>(TS); 97 } 98 99 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst, 100 uint64_t &ErrorInfo, FeatureBitset &MissingFeatures, 101 bool matchingInlineAsm, unsigned VariantID = 0) { 102 // In Code16GCC mode, match as 32-bit. 103 if (Code16GCC) 104 SwitchMode(X86::Mode32Bit); 105 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo, 106 MissingFeatures, matchingInlineAsm, 107 VariantID); 108 if (Code16GCC) 109 SwitchMode(X86::Mode16Bit); 110 return rv; 111 } 112 113 enum InfixCalculatorTok { 114 IC_OR = 0, 115 IC_XOR, 116 IC_AND, 117 IC_LSHIFT, 118 IC_RSHIFT, 119 IC_PLUS, 120 IC_MINUS, 121 IC_MULTIPLY, 122 IC_DIVIDE, 123 IC_MOD, 124 IC_NOT, 125 IC_NEG, 126 IC_RPAREN, 127 IC_LPAREN, 128 IC_IMM, 129 IC_REGISTER 130 }; 131 132 enum IntelOperatorKind { 133 IOK_INVALID = 0, 134 IOK_LENGTH, 135 IOK_SIZE, 136 IOK_TYPE, 137 IOK_OFFSET 138 }; 139 140 class InfixCalculator { 141 typedef std::pair< InfixCalculatorTok, int64_t > ICToken; 142 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; 143 SmallVector<ICToken, 4> PostfixStack; 144 145 bool isUnaryOperator(const InfixCalculatorTok Op) { 146 return Op == IC_NEG || Op == IC_NOT; 147 } 148 149 public: 150 int64_t popOperand() { 151 assert (!PostfixStack.empty() && "Poped an empty stack!"); 152 ICToken Op = PostfixStack.pop_back_val(); 153 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER)) 154 return -1; // The invalid Scale value will be caught later by checkScale 155 return Op.second; 156 } 157 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { 158 assert ((Op == IC_IMM || Op == IC_REGISTER) && 159 "Unexpected operand!"); 160 PostfixStack.push_back(std::make_pair(Op, Val)); 161 } 162 163 void popOperator() { InfixOperatorStack.pop_back(); } 164 void pushOperator(InfixCalculatorTok Op) { 165 // Push the new operator if the stack is empty. 166 if (InfixOperatorStack.empty()) { 167 InfixOperatorStack.push_back(Op); 168 return; 169 } 170 171 // Push the new operator if it has a higher precedence than the operator 172 // on the top of the stack or the operator on the top of the stack is a 173 // left parentheses. 174 unsigned Idx = InfixOperatorStack.size() - 1; 175 InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; 176 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { 177 InfixOperatorStack.push_back(Op); 178 return; 179 } 180 181 // The operator on the top of the stack has higher precedence than the 182 // new operator. 183 unsigned ParenCount = 0; 184 while (1) { 185 // Nothing to process. 186 if (InfixOperatorStack.empty()) 187 break; 188 189 Idx = InfixOperatorStack.size() - 1; 190 StackOp = InfixOperatorStack[Idx]; 191 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) 192 break; 193 194 // If we have an even parentheses count and we see a left parentheses, 195 // then stop processing. 196 if (!ParenCount && StackOp == IC_LPAREN) 197 break; 198 199 if (StackOp == IC_RPAREN) { 200 ++ParenCount; 201 InfixOperatorStack.pop_back(); 202 } else if (StackOp == IC_LPAREN) { 203 --ParenCount; 204 InfixOperatorStack.pop_back(); 205 } else { 206 InfixOperatorStack.pop_back(); 207 PostfixStack.push_back(std::make_pair(StackOp, 0)); 208 } 209 } 210 // Push the new operator. 211 InfixOperatorStack.push_back(Op); 212 } 213 214 int64_t execute() { 215 // Push any remaining operators onto the postfix stack. 216 while (!InfixOperatorStack.empty()) { 217 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); 218 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) 219 PostfixStack.push_back(std::make_pair(StackOp, 0)); 220 } 221 222 if (PostfixStack.empty()) 223 return 0; 224 225 SmallVector<ICToken, 16> OperandStack; 226 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) { 227 ICToken Op = PostfixStack[i]; 228 if (Op.first == IC_IMM || Op.first == IC_REGISTER) { 229 OperandStack.push_back(Op); 230 } else if (isUnaryOperator(Op.first)) { 231 assert (OperandStack.size() > 0 && "Too few operands."); 232 ICToken Operand = OperandStack.pop_back_val(); 233 assert (Operand.first == IC_IMM && 234 "Unary operation with a register!"); 235 switch (Op.first) { 236 default: 237 report_fatal_error("Unexpected operator!"); 238 break; 239 case IC_NEG: 240 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second)); 241 break; 242 case IC_NOT: 243 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second)); 244 break; 245 } 246 } else { 247 assert (OperandStack.size() > 1 && "Too few operands."); 248 int64_t Val; 249 ICToken Op2 = OperandStack.pop_back_val(); 250 ICToken Op1 = OperandStack.pop_back_val(); 251 switch (Op.first) { 252 default: 253 report_fatal_error("Unexpected operator!"); 254 break; 255 case IC_PLUS: 256 Val = Op1.second + Op2.second; 257 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 258 break; 259 case IC_MINUS: 260 Val = Op1.second - Op2.second; 261 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 262 break; 263 case IC_MULTIPLY: 264 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 265 "Multiply operation with an immediate and a register!"); 266 Val = Op1.second * Op2.second; 267 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 268 break; 269 case IC_DIVIDE: 270 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 271 "Divide operation with an immediate and a register!"); 272 assert (Op2.second != 0 && "Division by zero!"); 273 Val = Op1.second / Op2.second; 274 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 275 break; 276 case IC_MOD: 277 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 278 "Modulo operation with an immediate and a register!"); 279 Val = Op1.second % Op2.second; 280 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 281 break; 282 case IC_OR: 283 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 284 "Or operation with an immediate and a register!"); 285 Val = Op1.second | Op2.second; 286 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 287 break; 288 case IC_XOR: 289 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 290 "Xor operation with an immediate and a register!"); 291 Val = Op1.second ^ Op2.second; 292 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 293 break; 294 case IC_AND: 295 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 296 "And operation with an immediate and a register!"); 297 Val = Op1.second & Op2.second; 298 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 299 break; 300 case IC_LSHIFT: 301 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 302 "Left shift operation with an immediate and a register!"); 303 Val = Op1.second << Op2.second; 304 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 305 break; 306 case IC_RSHIFT: 307 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 308 "Right shift operation with an immediate and a register!"); 309 Val = Op1.second >> Op2.second; 310 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 311 break; 312 } 313 } 314 } 315 assert (OperandStack.size() == 1 && "Expected a single result."); 316 return OperandStack.pop_back_val().second; 317 } 318 }; 319 320 enum IntelExprState { 321 IES_INIT, 322 IES_OR, 323 IES_XOR, 324 IES_AND, 325 IES_LSHIFT, 326 IES_RSHIFT, 327 IES_PLUS, 328 IES_MINUS, 329 IES_NOT, 330 IES_MULTIPLY, 331 IES_DIVIDE, 332 IES_MOD, 333 IES_LBRAC, 334 IES_RBRAC, 335 IES_LPAREN, 336 IES_RPAREN, 337 IES_REGISTER, 338 IES_INTEGER, 339 IES_IDENTIFIER, 340 IES_ERROR 341 }; 342 343 class IntelExprStateMachine { 344 IntelExprState State, PrevState; 345 unsigned BaseReg, IndexReg, TmpReg, Scale; 346 int64_t Imm; 347 const MCExpr *Sym; 348 StringRef SymName; 349 InfixCalculator IC; 350 InlineAsmIdentifierInfo Info; 351 short BracCount; 352 bool MemExpr; 353 354 public: 355 IntelExprStateMachine() 356 : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), 357 TmpReg(0), Scale(0), Imm(0), Sym(nullptr), BracCount(0), 358 MemExpr(false) {} 359 360 void addImm(int64_t imm) { Imm += imm; } 361 short getBracCount() { return BracCount; } 362 bool isMemExpr() { return MemExpr; } 363 unsigned getBaseReg() { return BaseReg; } 364 unsigned getIndexReg() { return IndexReg; } 365 unsigned getScale() { return Scale; } 366 const MCExpr *getSym() { return Sym; } 367 StringRef getSymName() { return SymName; } 368 int64_t getImm() { return Imm + IC.execute(); } 369 bool isValidEndState() { 370 return State == IES_RBRAC || State == IES_INTEGER; 371 } 372 bool hadError() { return State == IES_ERROR; } 373 InlineAsmIdentifierInfo &getIdentifierInfo() { return Info; } 374 375 void onOr() { 376 IntelExprState CurrState = State; 377 switch (State) { 378 default: 379 State = IES_ERROR; 380 break; 381 case IES_INTEGER: 382 case IES_RPAREN: 383 case IES_REGISTER: 384 State = IES_OR; 385 IC.pushOperator(IC_OR); 386 break; 387 } 388 PrevState = CurrState; 389 } 390 void onXor() { 391 IntelExprState CurrState = State; 392 switch (State) { 393 default: 394 State = IES_ERROR; 395 break; 396 case IES_INTEGER: 397 case IES_RPAREN: 398 case IES_REGISTER: 399 State = IES_XOR; 400 IC.pushOperator(IC_XOR); 401 break; 402 } 403 PrevState = CurrState; 404 } 405 void onAnd() { 406 IntelExprState CurrState = State; 407 switch (State) { 408 default: 409 State = IES_ERROR; 410 break; 411 case IES_INTEGER: 412 case IES_RPAREN: 413 case IES_REGISTER: 414 State = IES_AND; 415 IC.pushOperator(IC_AND); 416 break; 417 } 418 PrevState = CurrState; 419 } 420 void onLShift() { 421 IntelExprState CurrState = State; 422 switch (State) { 423 default: 424 State = IES_ERROR; 425 break; 426 case IES_INTEGER: 427 case IES_RPAREN: 428 case IES_REGISTER: 429 State = IES_LSHIFT; 430 IC.pushOperator(IC_LSHIFT); 431 break; 432 } 433 PrevState = CurrState; 434 } 435 void onRShift() { 436 IntelExprState CurrState = State; 437 switch (State) { 438 default: 439 State = IES_ERROR; 440 break; 441 case IES_INTEGER: 442 case IES_RPAREN: 443 case IES_REGISTER: 444 State = IES_RSHIFT; 445 IC.pushOperator(IC_RSHIFT); 446 break; 447 } 448 PrevState = CurrState; 449 } 450 bool onPlus(StringRef &ErrMsg) { 451 IntelExprState CurrState = State; 452 switch (State) { 453 default: 454 State = IES_ERROR; 455 break; 456 case IES_INTEGER: 457 case IES_RPAREN: 458 case IES_REGISTER: 459 State = IES_PLUS; 460 IC.pushOperator(IC_PLUS); 461 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 462 // If we already have a BaseReg, then assume this is the IndexReg with 463 // no explicit scale. 464 if (!BaseReg) { 465 BaseReg = TmpReg; 466 } else { 467 if (IndexReg) { 468 ErrMsg = "BaseReg/IndexReg already set!"; 469 return true; 470 } 471 IndexReg = TmpReg; 472 Scale = 0; 473 } 474 } 475 break; 476 } 477 PrevState = CurrState; 478 return false; 479 } 480 bool onMinus(StringRef &ErrMsg) { 481 IntelExprState CurrState = State; 482 switch (State) { 483 default: 484 State = IES_ERROR; 485 break; 486 case IES_OR: 487 case IES_XOR: 488 case IES_AND: 489 case IES_LSHIFT: 490 case IES_RSHIFT: 491 case IES_PLUS: 492 case IES_NOT: 493 case IES_MULTIPLY: 494 case IES_DIVIDE: 495 case IES_MOD: 496 case IES_LPAREN: 497 case IES_RPAREN: 498 case IES_LBRAC: 499 case IES_RBRAC: 500 case IES_INTEGER: 501 case IES_REGISTER: 502 case IES_INIT: 503 State = IES_MINUS; 504 // push minus operator if it is not a negate operator 505 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN || 506 CurrState == IES_INTEGER || CurrState == IES_RBRAC) 507 IC.pushOperator(IC_MINUS); 508 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 509 // We have negate operator for Scale: it's illegal 510 ErrMsg = "Scale can't be negative"; 511 return true; 512 } else 513 IC.pushOperator(IC_NEG); 514 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 515 // If we already have a BaseReg, then assume this is the IndexReg with 516 // no explicit scale. 517 if (!BaseReg) { 518 BaseReg = TmpReg; 519 } else { 520 if (IndexReg) { 521 ErrMsg = "BaseReg/IndexReg already set!"; 522 return true; 523 } 524 IndexReg = TmpReg; 525 Scale = 0; 526 } 527 } 528 break; 529 } 530 PrevState = CurrState; 531 return false; 532 } 533 void onNot() { 534 IntelExprState CurrState = State; 535 switch (State) { 536 default: 537 State = IES_ERROR; 538 break; 539 case IES_OR: 540 case IES_XOR: 541 case IES_AND: 542 case IES_LSHIFT: 543 case IES_RSHIFT: 544 case IES_PLUS: 545 case IES_MINUS: 546 case IES_NOT: 547 case IES_MULTIPLY: 548 case IES_DIVIDE: 549 case IES_MOD: 550 case IES_LPAREN: 551 case IES_LBRAC: 552 case IES_INIT: 553 State = IES_NOT; 554 IC.pushOperator(IC_NOT); 555 break; 556 } 557 PrevState = CurrState; 558 } 559 560 bool onRegister(unsigned Reg, StringRef &ErrMsg) { 561 IntelExprState CurrState = State; 562 switch (State) { 563 default: 564 State = IES_ERROR; 565 break; 566 case IES_PLUS: 567 case IES_LPAREN: 568 case IES_LBRAC: 569 State = IES_REGISTER; 570 TmpReg = Reg; 571 IC.pushOperand(IC_REGISTER); 572 break; 573 case IES_MULTIPLY: 574 // Index Register - Scale * Register 575 if (PrevState == IES_INTEGER) { 576 if (IndexReg) { 577 ErrMsg = "BaseReg/IndexReg already set!"; 578 return true; 579 } 580 State = IES_REGISTER; 581 IndexReg = Reg; 582 // Get the scale and replace the 'Scale * Register' with '0'. 583 Scale = IC.popOperand(); 584 if (checkScale(Scale, ErrMsg)) 585 return true; 586 IC.pushOperand(IC_IMM); 587 IC.popOperator(); 588 } else { 589 State = IES_ERROR; 590 } 591 break; 592 } 593 PrevState = CurrState; 594 return false; 595 } 596 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName, 597 const InlineAsmIdentifierInfo &IDInfo, 598 bool ParsingInlineAsm, StringRef &ErrMsg) { 599 // InlineAsm: Treat an enum value as an integer 600 if (ParsingInlineAsm) 601 if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) 602 return onInteger(IDInfo.Enum.EnumVal, ErrMsg); 603 // Treat a symbolic constant like an integer 604 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef)) 605 return onInteger(CE->getValue(), ErrMsg); 606 PrevState = State; 607 bool HasSymbol = Sym != nullptr; 608 switch (State) { 609 default: 610 State = IES_ERROR; 611 break; 612 case IES_PLUS: 613 case IES_MINUS: 614 case IES_NOT: 615 case IES_INIT: 616 case IES_LBRAC: 617 MemExpr = true; 618 State = IES_INTEGER; 619 Sym = SymRef; 620 SymName = SymRefName; 621 IC.pushOperand(IC_IMM); 622 if (ParsingInlineAsm) 623 Info = IDInfo; 624 break; 625 } 626 if (HasSymbol) 627 ErrMsg = "cannot use more than one symbol in memory operand"; 628 return HasSymbol; 629 } 630 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) { 631 IntelExprState CurrState = State; 632 switch (State) { 633 default: 634 State = IES_ERROR; 635 break; 636 case IES_PLUS: 637 case IES_MINUS: 638 case IES_NOT: 639 case IES_OR: 640 case IES_XOR: 641 case IES_AND: 642 case IES_LSHIFT: 643 case IES_RSHIFT: 644 case IES_DIVIDE: 645 case IES_MOD: 646 case IES_MULTIPLY: 647 case IES_LPAREN: 648 case IES_INIT: 649 case IES_LBRAC: 650 State = IES_INTEGER; 651 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 652 // Index Register - Register * Scale 653 if (IndexReg) { 654 ErrMsg = "BaseReg/IndexReg already set!"; 655 return true; 656 } 657 IndexReg = TmpReg; 658 Scale = TmpInt; 659 if (checkScale(Scale, ErrMsg)) 660 return true; 661 // Get the scale and replace the 'Register * Scale' with '0'. 662 IC.popOperator(); 663 } else { 664 IC.pushOperand(IC_IMM, TmpInt); 665 } 666 break; 667 } 668 PrevState = CurrState; 669 return false; 670 } 671 void onStar() { 672 PrevState = State; 673 switch (State) { 674 default: 675 State = IES_ERROR; 676 break; 677 case IES_INTEGER: 678 case IES_REGISTER: 679 case IES_RPAREN: 680 State = IES_MULTIPLY; 681 IC.pushOperator(IC_MULTIPLY); 682 break; 683 } 684 } 685 void onDivide() { 686 PrevState = State; 687 switch (State) { 688 default: 689 State = IES_ERROR; 690 break; 691 case IES_INTEGER: 692 case IES_RPAREN: 693 State = IES_DIVIDE; 694 IC.pushOperator(IC_DIVIDE); 695 break; 696 } 697 } 698 void onMod() { 699 PrevState = State; 700 switch (State) { 701 default: 702 State = IES_ERROR; 703 break; 704 case IES_INTEGER: 705 case IES_RPAREN: 706 State = IES_MOD; 707 IC.pushOperator(IC_MOD); 708 break; 709 } 710 } 711 bool onLBrac() { 712 if (BracCount) 713 return true; 714 PrevState = State; 715 switch (State) { 716 default: 717 State = IES_ERROR; 718 break; 719 case IES_RBRAC: 720 case IES_INTEGER: 721 case IES_RPAREN: 722 State = IES_PLUS; 723 IC.pushOperator(IC_PLUS); 724 break; 725 case IES_INIT: 726 assert(!BracCount && "BracCount should be zero on parsing's start"); 727 State = IES_LBRAC; 728 break; 729 } 730 MemExpr = true; 731 BracCount++; 732 return false; 733 } 734 bool onRBrac() { 735 IntelExprState CurrState = State; 736 switch (State) { 737 default: 738 State = IES_ERROR; 739 break; 740 case IES_INTEGER: 741 case IES_REGISTER: 742 case IES_RPAREN: 743 if (BracCount-- != 1) 744 return true; 745 State = IES_RBRAC; 746 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 747 // If we already have a BaseReg, then assume this is the IndexReg with 748 // no explicit scale. 749 if (!BaseReg) { 750 BaseReg = TmpReg; 751 } else { 752 assert (!IndexReg && "BaseReg/IndexReg already set!"); 753 IndexReg = TmpReg; 754 Scale = 0; 755 } 756 } 757 break; 758 } 759 PrevState = CurrState; 760 return false; 761 } 762 void onLParen() { 763 IntelExprState CurrState = State; 764 switch (State) { 765 default: 766 State = IES_ERROR; 767 break; 768 case IES_PLUS: 769 case IES_MINUS: 770 case IES_NOT: 771 case IES_OR: 772 case IES_XOR: 773 case IES_AND: 774 case IES_LSHIFT: 775 case IES_RSHIFT: 776 case IES_MULTIPLY: 777 case IES_DIVIDE: 778 case IES_MOD: 779 case IES_LPAREN: 780 case IES_INIT: 781 case IES_LBRAC: 782 State = IES_LPAREN; 783 IC.pushOperator(IC_LPAREN); 784 break; 785 } 786 PrevState = CurrState; 787 } 788 void onRParen() { 789 PrevState = State; 790 switch (State) { 791 default: 792 State = IES_ERROR; 793 break; 794 case IES_INTEGER: 795 case IES_REGISTER: 796 case IES_RPAREN: 797 State = IES_RPAREN; 798 IC.pushOperator(IC_RPAREN); 799 break; 800 } 801 } 802 }; 803 804 bool Error(SMLoc L, const Twine &Msg, SMRange Range = None, 805 bool MatchingInlineAsm = false) { 806 MCAsmParser &Parser = getParser(); 807 if (MatchingInlineAsm) { 808 if (!getLexer().isAtStartOfStatement()) 809 Parser.eatToEndOfStatement(); 810 return false; 811 } 812 return Parser.Error(L, Msg, Range); 813 } 814 815 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg, SMRange R = SMRange()) { 816 Error(Loc, Msg, R); 817 return nullptr; 818 } 819 820 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc); 821 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc); 822 bool IsSIReg(unsigned Reg); 823 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg); 824 void 825 AddDefaultSrcDestOperands(OperandVector &Operands, 826 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 827 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst); 828 bool VerifyAndAdjustOperands(OperandVector &OrigOperands, 829 OperandVector &FinalOperands); 830 std::unique_ptr<X86Operand> ParseOperand(); 831 std::unique_ptr<X86Operand> ParseATTOperand(); 832 std::unique_ptr<X86Operand> ParseIntelOperand(); 833 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator(); 834 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End); 835 unsigned IdentifyIntelInlineAsmOperator(StringRef Name); 836 unsigned ParseIntelInlineAsmOperator(unsigned OpKind); 837 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start); 838 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM); 839 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start, 840 SMLoc End); 841 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); 842 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier, 843 InlineAsmIdentifierInfo &Info, 844 bool IsUnevaluatedOperand, SMLoc &End); 845 846 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, 847 const MCExpr *&Disp, 848 const SMLoc &StartLoc, 849 SMLoc &EndLoc); 850 851 X86::CondCode ParseConditionCode(StringRef CCode); 852 853 bool ParseIntelMemoryOperandSize(unsigned &Size); 854 std::unique_ptr<X86Operand> 855 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, 856 unsigned IndexReg, unsigned Scale, SMLoc Start, 857 SMLoc End, unsigned Size, StringRef Identifier, 858 const InlineAsmIdentifierInfo &Info); 859 860 bool parseDirectiveEven(SMLoc L); 861 bool ParseDirectiveCode(StringRef IDVal, SMLoc L); 862 863 /// CodeView FPO data directives. 864 bool parseDirectiveFPOProc(SMLoc L); 865 bool parseDirectiveFPOSetFrame(SMLoc L); 866 bool parseDirectiveFPOPushReg(SMLoc L); 867 bool parseDirectiveFPOStackAlloc(SMLoc L); 868 bool parseDirectiveFPOStackAlign(SMLoc L); 869 bool parseDirectiveFPOEndPrologue(SMLoc L); 870 bool parseDirectiveFPOEndProc(SMLoc L); 871 bool parseDirectiveFPOData(SMLoc L); 872 873 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 874 875 bool validateInstruction(MCInst &Inst, const OperandVector &Ops); 876 bool processInstruction(MCInst &Inst, const OperandVector &Ops); 877 878 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds 879 /// instrumentation around Inst. 880 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out); 881 882 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 883 OperandVector &Operands, MCStreamer &Out, 884 uint64_t &ErrorInfo, 885 bool MatchingInlineAsm) override; 886 887 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands, 888 MCStreamer &Out, bool MatchingInlineAsm); 889 890 bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures, 891 bool MatchingInlineAsm); 892 893 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, 894 OperandVector &Operands, MCStreamer &Out, 895 uint64_t &ErrorInfo, 896 bool MatchingInlineAsm); 897 898 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, 899 OperandVector &Operands, MCStreamer &Out, 900 uint64_t &ErrorInfo, 901 bool MatchingInlineAsm); 902 903 bool OmitRegisterFromClobberLists(unsigned RegNo) override; 904 905 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z}) 906 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required. 907 /// return false if no parsing errors occurred, true otherwise. 908 bool HandleAVX512Operand(OperandVector &Operands, 909 const MCParsedAsmOperand &Op); 910 911 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc); 912 913 bool is64BitMode() const { 914 // FIXME: Can tablegen auto-generate this? 915 return getSTI().getFeatureBits()[X86::Mode64Bit]; 916 } 917 bool is32BitMode() const { 918 // FIXME: Can tablegen auto-generate this? 919 return getSTI().getFeatureBits()[X86::Mode32Bit]; 920 } 921 bool is16BitMode() const { 922 // FIXME: Can tablegen auto-generate this? 923 return getSTI().getFeatureBits()[X86::Mode16Bit]; 924 } 925 void SwitchMode(unsigned mode) { 926 MCSubtargetInfo &STI = copySTI(); 927 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit}); 928 FeatureBitset OldMode = STI.getFeatureBits() & AllModes; 929 FeatureBitset FB = ComputeAvailableFeatures( 930 STI.ToggleFeature(OldMode.flip(mode))); 931 setAvailableFeatures(FB); 932 933 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes)); 934 } 935 936 unsigned getPointerWidth() { 937 if (is16BitMode()) return 16; 938 if (is32BitMode()) return 32; 939 if (is64BitMode()) return 64; 940 llvm_unreachable("invalid mode"); 941 } 942 943 bool isParsingIntelSyntax() { 944 return getParser().getAssemblerDialect(); 945 } 946 947 /// @name Auto-generated Matcher Functions 948 /// { 949 950 #define GET_ASSEMBLER_HEADER 951 #include "X86GenAsmMatcher.inc" 952 953 /// } 954 955 public: 956 enum X86MatchResultTy { 957 Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY, 958 #define GET_OPERAND_DIAGNOSTIC_TYPES 959 #include "X86GenAsmMatcher.inc" 960 }; 961 962 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser, 963 const MCInstrInfo &mii, const MCTargetOptions &Options) 964 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr), 965 Code16GCC(false) { 966 967 Parser.addAliasForDirective(".word", ".2byte"); 968 969 // Initialize the set of available features. 970 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); 971 } 972 973 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 974 975 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; 976 977 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 978 SMLoc NameLoc, OperandVector &Operands) override; 979 980 bool ParseDirective(AsmToken DirectiveID) override; 981 }; 982 } // end anonymous namespace 983 984 /// @name Auto-generated Match Functions 985 /// { 986 987 static unsigned MatchRegisterName(StringRef Name); 988 989 /// } 990 991 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, 992 unsigned Scale, bool Is64BitMode, 993 StringRef &ErrMsg) { 994 // If we have both a base register and an index register make sure they are 995 // both 64-bit or 32-bit registers. 996 // To support VSIB, IndexReg can be 128-bit or 256-bit registers. 997 998 if (BaseReg != 0 && 999 !(BaseReg == X86::RIP || BaseReg == X86::EIP || 1000 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) || 1001 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) || 1002 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) { 1003 ErrMsg = "invalid base+index expression"; 1004 return true; 1005 } 1006 1007 if (IndexReg != 0 && 1008 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ || 1009 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1010 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1011 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || 1012 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) || 1013 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) || 1014 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) { 1015 ErrMsg = "invalid base+index expression"; 1016 return true; 1017 } 1018 1019 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) || 1020 IndexReg == X86::EIP || IndexReg == X86::RIP || 1021 IndexReg == X86::ESP || IndexReg == X86::RSP) { 1022 ErrMsg = "invalid base+index expression"; 1023 return true; 1024 } 1025 1026 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed, 1027 // and then only in non-64-bit modes. 1028 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 1029 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP && 1030 BaseReg != X86::SI && BaseReg != X86::DI))) { 1031 ErrMsg = "invalid 16-bit base register"; 1032 return true; 1033 } 1034 1035 if (BaseReg == 0 && 1036 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) { 1037 ErrMsg = "16-bit memory operand may not include only index register"; 1038 return true; 1039 } 1040 1041 if (BaseReg != 0 && IndexReg != 0) { 1042 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && 1043 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1044 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1045 IndexReg == X86::EIZ)) { 1046 ErrMsg = "base register is 64-bit, but index register is not"; 1047 return true; 1048 } 1049 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && 1050 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1051 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || 1052 IndexReg == X86::RIZ)) { 1053 ErrMsg = "base register is 32-bit, but index register is not"; 1054 return true; 1055 } 1056 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) { 1057 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1058 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) { 1059 ErrMsg = "base register is 16-bit, but index register is not"; 1060 return true; 1061 } 1062 if ((BaseReg != X86::BX && BaseReg != X86::BP) || 1063 (IndexReg != X86::SI && IndexReg != X86::DI)) { 1064 ErrMsg = "invalid 16-bit base/index register combination"; 1065 return true; 1066 } 1067 } 1068 } 1069 1070 // RIP/EIP-relative addressing is only supported in 64-bit mode. 1071 if (!Is64BitMode && BaseReg != 0 && 1072 (BaseReg == X86::RIP || BaseReg == X86::EIP)) { 1073 ErrMsg = "IP-relative addressing requires 64-bit mode"; 1074 return true; 1075 } 1076 1077 return checkScale(Scale, ErrMsg); 1078 } 1079 1080 bool X86AsmParser::ParseRegister(unsigned &RegNo, 1081 SMLoc &StartLoc, SMLoc &EndLoc) { 1082 MCAsmParser &Parser = getParser(); 1083 RegNo = 0; 1084 const AsmToken &PercentTok = Parser.getTok(); 1085 StartLoc = PercentTok.getLoc(); 1086 1087 // If we encounter a %, ignore it. This code handles registers with and 1088 // without the prefix, unprefixed registers can occur in cfi directives. 1089 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) 1090 Parser.Lex(); // Eat percent token. 1091 1092 const AsmToken &Tok = Parser.getTok(); 1093 EndLoc = Tok.getEndLoc(); 1094 1095 if (Tok.isNot(AsmToken::Identifier)) { 1096 if (isParsingIntelSyntax()) return true; 1097 return Error(StartLoc, "invalid register name", 1098 SMRange(StartLoc, EndLoc)); 1099 } 1100 1101 RegNo = MatchRegisterName(Tok.getString()); 1102 1103 // If the match failed, try the register name as lowercase. 1104 if (RegNo == 0) 1105 RegNo = MatchRegisterName(Tok.getString().lower()); 1106 1107 // The "flags" register cannot be referenced directly. 1108 // Treat it as an identifier instead. 1109 if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS) 1110 RegNo = 0; 1111 1112 if (!is64BitMode()) { 1113 // FIXME: This should be done using Requires<Not64BitMode> and 1114 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also 1115 // checked. 1116 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a 1117 // REX prefix. 1118 if (RegNo == X86::RIZ || RegNo == X86::RIP || 1119 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || 1120 X86II::isX86_64NonExtLowByteReg(RegNo) || 1121 X86II::isX86_64ExtendedReg(RegNo)) { 1122 StringRef RegName = Tok.getString(); 1123 Parser.Lex(); // Eat register name. 1124 return Error(StartLoc, 1125 "register %" + RegName + " is only available in 64-bit mode", 1126 SMRange(StartLoc, EndLoc)); 1127 } 1128 } 1129 1130 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 1131 if (RegNo == X86::ST0) { 1132 Parser.Lex(); // Eat 'st' 1133 1134 // Check to see if we have '(4)' after %st. 1135 if (getLexer().isNot(AsmToken::LParen)) 1136 return false; 1137 // Lex the paren. 1138 getParser().Lex(); 1139 1140 const AsmToken &IntTok = Parser.getTok(); 1141 if (IntTok.isNot(AsmToken::Integer)) 1142 return Error(IntTok.getLoc(), "expected stack index"); 1143 switch (IntTok.getIntVal()) { 1144 case 0: RegNo = X86::ST0; break; 1145 case 1: RegNo = X86::ST1; break; 1146 case 2: RegNo = X86::ST2; break; 1147 case 3: RegNo = X86::ST3; break; 1148 case 4: RegNo = X86::ST4; break; 1149 case 5: RegNo = X86::ST5; break; 1150 case 6: RegNo = X86::ST6; break; 1151 case 7: RegNo = X86::ST7; break; 1152 default: return Error(IntTok.getLoc(), "invalid stack index"); 1153 } 1154 1155 if (getParser().Lex().isNot(AsmToken::RParen)) 1156 return Error(Parser.getTok().getLoc(), "expected ')'"); 1157 1158 EndLoc = Parser.getTok().getEndLoc(); 1159 Parser.Lex(); // Eat ')' 1160 return false; 1161 } 1162 1163 EndLoc = Parser.getTok().getEndLoc(); 1164 1165 // If this is "db[0-15]", match it as an alias 1166 // for dr[0-15]. 1167 if (RegNo == 0 && Tok.getString().startswith("db")) { 1168 if (Tok.getString().size() == 3) { 1169 switch (Tok.getString()[2]) { 1170 case '0': RegNo = X86::DR0; break; 1171 case '1': RegNo = X86::DR1; break; 1172 case '2': RegNo = X86::DR2; break; 1173 case '3': RegNo = X86::DR3; break; 1174 case '4': RegNo = X86::DR4; break; 1175 case '5': RegNo = X86::DR5; break; 1176 case '6': RegNo = X86::DR6; break; 1177 case '7': RegNo = X86::DR7; break; 1178 case '8': RegNo = X86::DR8; break; 1179 case '9': RegNo = X86::DR9; break; 1180 } 1181 } else if (Tok.getString().size() == 4 && Tok.getString()[2] == '1') { 1182 switch (Tok.getString()[3]) { 1183 case '0': RegNo = X86::DR10; break; 1184 case '1': RegNo = X86::DR11; break; 1185 case '2': RegNo = X86::DR12; break; 1186 case '3': RegNo = X86::DR13; break; 1187 case '4': RegNo = X86::DR14; break; 1188 case '5': RegNo = X86::DR15; break; 1189 } 1190 } 1191 1192 if (RegNo != 0) { 1193 EndLoc = Parser.getTok().getEndLoc(); 1194 Parser.Lex(); // Eat it. 1195 return false; 1196 } 1197 } 1198 1199 if (RegNo == 0) { 1200 if (isParsingIntelSyntax()) return true; 1201 return Error(StartLoc, "invalid register name", 1202 SMRange(StartLoc, EndLoc)); 1203 } 1204 1205 Parser.Lex(); // Eat identifier token. 1206 return false; 1207 } 1208 1209 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { 1210 bool Parse32 = is32BitMode() || Code16GCC; 1211 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI); 1212 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1213 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1214 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, 1215 Loc, Loc, 0); 1216 } 1217 1218 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { 1219 bool Parse32 = is32BitMode() || Code16GCC; 1220 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI); 1221 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1222 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1223 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, 1224 Loc, Loc, 0); 1225 } 1226 1227 bool X86AsmParser::IsSIReg(unsigned Reg) { 1228 switch (Reg) { 1229 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!"); 1230 case X86::RSI: 1231 case X86::ESI: 1232 case X86::SI: 1233 return true; 1234 case X86::RDI: 1235 case X86::EDI: 1236 case X86::DI: 1237 return false; 1238 } 1239 } 1240 1241 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, 1242 bool IsSIReg) { 1243 switch (RegClassID) { 1244 default: llvm_unreachable("Unexpected register class"); 1245 case X86::GR64RegClassID: 1246 return IsSIReg ? X86::RSI : X86::RDI; 1247 case X86::GR32RegClassID: 1248 return IsSIReg ? X86::ESI : X86::EDI; 1249 case X86::GR16RegClassID: 1250 return IsSIReg ? X86::SI : X86::DI; 1251 } 1252 } 1253 1254 void X86AsmParser::AddDefaultSrcDestOperands( 1255 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 1256 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) { 1257 if (isParsingIntelSyntax()) { 1258 Operands.push_back(std::move(Dst)); 1259 Operands.push_back(std::move(Src)); 1260 } 1261 else { 1262 Operands.push_back(std::move(Src)); 1263 Operands.push_back(std::move(Dst)); 1264 } 1265 } 1266 1267 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands, 1268 OperandVector &FinalOperands) { 1269 1270 if (OrigOperands.size() > 1) { 1271 // Check if sizes match, OrigOperands also contains the instruction name 1272 assert(OrigOperands.size() == FinalOperands.size() + 1 && 1273 "Operand size mismatch"); 1274 1275 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings; 1276 // Verify types match 1277 int RegClassID = -1; 1278 for (unsigned int i = 0; i < FinalOperands.size(); ++i) { 1279 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]); 1280 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]); 1281 1282 if (FinalOp.isReg() && 1283 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg())) 1284 // Return false and let a normal complaint about bogus operands happen 1285 return false; 1286 1287 if (FinalOp.isMem()) { 1288 1289 if (!OrigOp.isMem()) 1290 // Return false and let a normal complaint about bogus operands happen 1291 return false; 1292 1293 unsigned OrigReg = OrigOp.Mem.BaseReg; 1294 unsigned FinalReg = FinalOp.Mem.BaseReg; 1295 1296 // If we've already encounterd a register class, make sure all register 1297 // bases are of the same register class 1298 if (RegClassID != -1 && 1299 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) { 1300 return Error(OrigOp.getStartLoc(), 1301 "mismatching source and destination index registers"); 1302 } 1303 1304 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg)) 1305 RegClassID = X86::GR64RegClassID; 1306 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg)) 1307 RegClassID = X86::GR32RegClassID; 1308 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg)) 1309 RegClassID = X86::GR16RegClassID; 1310 else 1311 // Unexpected register class type 1312 // Return false and let a normal complaint about bogus operands happen 1313 return false; 1314 1315 bool IsSI = IsSIReg(FinalReg); 1316 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI); 1317 1318 if (FinalReg != OrigReg) { 1319 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI"; 1320 Warnings.push_back(std::make_pair( 1321 OrigOp.getStartLoc(), 1322 "memory operand is only for determining the size, " + RegName + 1323 " will be used for the location")); 1324 } 1325 1326 FinalOp.Mem.Size = OrigOp.Mem.Size; 1327 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg; 1328 FinalOp.Mem.BaseReg = FinalReg; 1329 } 1330 } 1331 1332 // Produce warnings only if all the operands passed the adjustment - prevent 1333 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings 1334 for (auto &WarningMsg : Warnings) { 1335 Warning(WarningMsg.first, WarningMsg.second); 1336 } 1337 1338 // Remove old operands 1339 for (unsigned int i = 0; i < FinalOperands.size(); ++i) 1340 OrigOperands.pop_back(); 1341 } 1342 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end()); 1343 for (unsigned int i = 0; i < FinalOperands.size(); ++i) 1344 OrigOperands.push_back(std::move(FinalOperands[i])); 1345 1346 return false; 1347 } 1348 1349 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() { 1350 if (isParsingIntelSyntax()) 1351 return ParseIntelOperand(); 1352 return ParseATTOperand(); 1353 } 1354 1355 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm( 1356 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, 1357 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier, 1358 const InlineAsmIdentifierInfo &Info) { 1359 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or 1360 // some other label reference. 1361 if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) { 1362 // Insert an explicit size if the user didn't have one. 1363 if (!Size) { 1364 Size = getPointerWidth(); 1365 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start, 1366 /*Len=*/0, Size); 1367 } 1368 // Create an absolute memory reference in order to match against 1369 // instructions taking a PC relative operand. 1370 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size, 1371 Identifier, Info.Label.Decl); 1372 } 1373 // We either have a direct symbol reference, or an offset from a symbol. The 1374 // parser always puts the symbol on the LHS, so look there for size 1375 // calculation purposes. 1376 unsigned FrontendSize = 0; 1377 void *Decl = nullptr; 1378 bool IsGlobalLV = false; 1379 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 1380 // Size is in terms of bits in this context. 1381 FrontendSize = Info.Var.Type * 8; 1382 Decl = Info.Var.Decl; 1383 IsGlobalLV = Info.Var.IsGlobalLV; 1384 } 1385 // It is widely common for MS InlineAsm to use a global variable and one/two 1386 // registers in a mmory expression, and though unaccessible via rip/eip. 1387 if (IsGlobalLV && (BaseReg || IndexReg)) { 1388 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End); 1389 // Otherwise, we set the base register to a non-zero value 1390 // if we don't know the actual value at this time. This is necessary to 1391 // get the matching correct in some cases. 1392 } else { 1393 BaseReg = BaseReg ? BaseReg : 1; 1394 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, 1395 IndexReg, Scale, Start, End, Size, Identifier, 1396 Decl, FrontendSize); 1397 } 1398 } 1399 1400 // Some binary bitwise operators have a named synonymous 1401 // Query a candidate string for being such a named operator 1402 // and if so - invoke the appropriate handler 1403 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) { 1404 // A named operator should be either lower or upper case, but not a mix 1405 if (Name.compare(Name.lower()) && Name.compare(Name.upper())) 1406 return false; 1407 if (Name.equals_lower("not")) 1408 SM.onNot(); 1409 else if (Name.equals_lower("or")) 1410 SM.onOr(); 1411 else if (Name.equals_lower("shl")) 1412 SM.onLShift(); 1413 else if (Name.equals_lower("shr")) 1414 SM.onRShift(); 1415 else if (Name.equals_lower("xor")) 1416 SM.onXor(); 1417 else if (Name.equals_lower("and")) 1418 SM.onAnd(); 1419 else if (Name.equals_lower("mod")) 1420 SM.onMod(); 1421 else 1422 return false; 1423 return true; 1424 } 1425 1426 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { 1427 MCAsmParser &Parser = getParser(); 1428 const AsmToken &Tok = Parser.getTok(); 1429 StringRef ErrMsg; 1430 1431 AsmToken::TokenKind PrevTK = AsmToken::Error; 1432 bool Done = false; 1433 while (!Done) { 1434 bool UpdateLocLex = true; 1435 AsmToken::TokenKind TK = getLexer().getKind(); 1436 1437 switch (TK) { 1438 default: 1439 if ((Done = SM.isValidEndState())) 1440 break; 1441 return Error(Tok.getLoc(), "unknown token in expression"); 1442 case AsmToken::EndOfStatement: 1443 Done = true; 1444 break; 1445 case AsmToken::Real: 1446 // DotOperator: [ebx].0 1447 UpdateLocLex = false; 1448 if (ParseIntelDotOperator(SM, End)) 1449 return true; 1450 break; 1451 case AsmToken::At: 1452 case AsmToken::String: 1453 case AsmToken::Identifier: { 1454 SMLoc IdentLoc = Tok.getLoc(); 1455 StringRef Identifier = Tok.getString(); 1456 UpdateLocLex = false; 1457 // Register 1458 unsigned Reg; 1459 if (Tok.is(AsmToken::Identifier) && !ParseRegister(Reg, IdentLoc, End)) { 1460 if (SM.onRegister(Reg, ErrMsg)) 1461 return Error(Tok.getLoc(), ErrMsg); 1462 break; 1463 } 1464 // Operator synonymous ("not", "or" etc.) 1465 if ((UpdateLocLex = ParseIntelNamedOperator(Identifier, SM))) 1466 break; 1467 // Symbol reference, when parsing assembly content 1468 InlineAsmIdentifierInfo Info; 1469 const MCExpr *Val; 1470 if (!isParsingInlineAsm()) { 1471 if (getParser().parsePrimaryExpr(Val, End)) { 1472 return Error(Tok.getLoc(), "Unexpected identifier!"); 1473 } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) { 1474 return Error(IdentLoc, ErrMsg); 1475 } else 1476 break; 1477 } 1478 // MS InlineAsm operators (TYPE/LENGTH/SIZE) 1479 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) { 1480 if (OpKind == IOK_OFFSET) 1481 return Error(IdentLoc, "Dealing OFFSET operator as part of" 1482 "a compound immediate expression is yet to be supported"); 1483 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) { 1484 if (SM.onInteger(Val, ErrMsg)) 1485 return Error(IdentLoc, ErrMsg); 1486 } else 1487 return true; 1488 break; 1489 } 1490 // MS Dot Operator expression 1491 if (Identifier.count('.') && PrevTK == AsmToken::RBrac) { 1492 if (ParseIntelDotOperator(SM, End)) 1493 return true; 1494 break; 1495 } 1496 // MS InlineAsm identifier 1497 // Call parseIdentifier() to combine @ with the identifier behind it. 1498 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier)) 1499 return Error(IdentLoc, "expected identifier"); 1500 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End)) 1501 return true; 1502 else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg)) 1503 return Error(IdentLoc, ErrMsg); 1504 break; 1505 } 1506 case AsmToken::Integer: { 1507 // Look for 'b' or 'f' following an Integer as a directional label 1508 SMLoc Loc = getTok().getLoc(); 1509 int64_t IntVal = getTok().getIntVal(); 1510 End = consumeToken(); 1511 UpdateLocLex = false; 1512 if (getLexer().getKind() == AsmToken::Identifier) { 1513 StringRef IDVal = getTok().getString(); 1514 if (IDVal == "f" || IDVal == "b") { 1515 MCSymbol *Sym = 1516 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b"); 1517 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1518 const MCExpr *Val = 1519 MCSymbolRefExpr::create(Sym, Variant, getContext()); 1520 if (IDVal == "b" && Sym->isUndefined()) 1521 return Error(Loc, "invalid reference to undefined symbol"); 1522 StringRef Identifier = Sym->getName(); 1523 InlineAsmIdentifierInfo Info; 1524 if (SM.onIdentifierExpr(Val, Identifier, Info, 1525 isParsingInlineAsm(), ErrMsg)) 1526 return Error(Loc, ErrMsg); 1527 End = consumeToken(); 1528 } else { 1529 if (SM.onInteger(IntVal, ErrMsg)) 1530 return Error(Loc, ErrMsg); 1531 } 1532 } else { 1533 if (SM.onInteger(IntVal, ErrMsg)) 1534 return Error(Loc, ErrMsg); 1535 } 1536 break; 1537 } 1538 case AsmToken::Plus: 1539 if (SM.onPlus(ErrMsg)) 1540 return Error(getTok().getLoc(), ErrMsg); 1541 break; 1542 case AsmToken::Minus: 1543 if (SM.onMinus(ErrMsg)) 1544 return Error(getTok().getLoc(), ErrMsg); 1545 break; 1546 case AsmToken::Tilde: SM.onNot(); break; 1547 case AsmToken::Star: SM.onStar(); break; 1548 case AsmToken::Slash: SM.onDivide(); break; 1549 case AsmToken::Percent: SM.onMod(); break; 1550 case AsmToken::Pipe: SM.onOr(); break; 1551 case AsmToken::Caret: SM.onXor(); break; 1552 case AsmToken::Amp: SM.onAnd(); break; 1553 case AsmToken::LessLess: 1554 SM.onLShift(); break; 1555 case AsmToken::GreaterGreater: 1556 SM.onRShift(); break; 1557 case AsmToken::LBrac: 1558 if (SM.onLBrac()) 1559 return Error(Tok.getLoc(), "unexpected bracket encountered"); 1560 break; 1561 case AsmToken::RBrac: 1562 if (SM.onRBrac()) 1563 return Error(Tok.getLoc(), "unexpected bracket encountered"); 1564 break; 1565 case AsmToken::LParen: SM.onLParen(); break; 1566 case AsmToken::RParen: SM.onRParen(); break; 1567 } 1568 if (SM.hadError()) 1569 return Error(Tok.getLoc(), "unknown token in expression"); 1570 1571 if (!Done && UpdateLocLex) 1572 End = consumeToken(); 1573 1574 PrevTK = TK; 1575 } 1576 return false; 1577 } 1578 1579 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM, 1580 SMLoc Start, SMLoc End) { 1581 SMLoc Loc = Start; 1582 unsigned ExprLen = End.getPointer() - Start.getPointer(); 1583 // Skip everything before a symbol displacement (if we have one) 1584 if (SM.getSym()) { 1585 StringRef SymName = SM.getSymName(); 1586 if (unsigned Len = SymName.data() - Start.getPointer()) 1587 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len); 1588 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size()); 1589 ExprLen = End.getPointer() - (SymName.data() + SymName.size()); 1590 // If we have only a symbol than there's no need for complex rewrite, 1591 // simply skip everything after it 1592 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) { 1593 if (ExprLen) 1594 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen); 1595 return; 1596 } 1597 } 1598 // Build an Intel Expression rewrite 1599 StringRef BaseRegStr; 1600 StringRef IndexRegStr; 1601 if (SM.getBaseReg()) 1602 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg()); 1603 if (SM.getIndexReg()) 1604 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg()); 1605 // Emit it 1606 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), SM.getImm(), SM.isMemExpr()); 1607 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr); 1608 } 1609 1610 // Inline assembly may use variable names with namespace alias qualifiers. 1611 bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val, 1612 StringRef &Identifier, 1613 InlineAsmIdentifierInfo &Info, 1614 bool IsUnevaluatedOperand, 1615 SMLoc &End) { 1616 MCAsmParser &Parser = getParser(); 1617 assert(isParsingInlineAsm() && "Expected to be parsing inline assembly."); 1618 Val = nullptr; 1619 1620 StringRef LineBuf(Identifier.data()); 1621 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); 1622 1623 const AsmToken &Tok = Parser.getTok(); 1624 SMLoc Loc = Tok.getLoc(); 1625 1626 // Advance the token stream until the end of the current token is 1627 // after the end of what the frontend claimed. 1628 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); 1629 do { 1630 End = Tok.getEndLoc(); 1631 getLexer().Lex(); 1632 } while (End.getPointer() < EndPtr); 1633 Identifier = LineBuf; 1634 1635 // The frontend should end parsing on an assembler token boundary, unless it 1636 // failed parsing. 1637 assert((End.getPointer() == EndPtr || 1638 Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) && 1639 "frontend claimed part of a token?"); 1640 1641 // If the identifier lookup was unsuccessful, assume that we are dealing with 1642 // a label. 1643 if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) { 1644 StringRef InternalName = 1645 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(), 1646 Loc, false); 1647 assert(InternalName.size() && "We should have an internal name here."); 1648 // Push a rewrite for replacing the identifier name with the internal name. 1649 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(), 1650 InternalName); 1651 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) 1652 return false; 1653 // Create the symbol reference. 1654 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); 1655 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1656 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext()); 1657 return false; 1658 } 1659 1660 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand 1661 std::unique_ptr<X86Operand> 1662 X86AsmParser::ParseRoundingModeOp(SMLoc Start) { 1663 MCAsmParser &Parser = getParser(); 1664 const AsmToken &Tok = Parser.getTok(); 1665 // Eat "{" and mark the current place. 1666 const SMLoc consumedToken = consumeToken(); 1667 if (Tok.isNot(AsmToken::Identifier)) 1668 return ErrorOperand(Tok.getLoc(), "Expected an identifier after {"); 1669 if (Tok.getIdentifier().startswith("r")){ 1670 int rndMode = StringSwitch<int>(Tok.getIdentifier()) 1671 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT) 1672 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF) 1673 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF) 1674 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO) 1675 .Default(-1); 1676 if (-1 == rndMode) 1677 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode."); 1678 Parser.Lex(); // Eat "r*" of r*-sae 1679 if (!getLexer().is(AsmToken::Minus)) 1680 return ErrorOperand(Tok.getLoc(), "Expected - at this point"); 1681 Parser.Lex(); // Eat "-" 1682 Parser.Lex(); // Eat the sae 1683 if (!getLexer().is(AsmToken::RCurly)) 1684 return ErrorOperand(Tok.getLoc(), "Expected } at this point"); 1685 SMLoc End = Tok.getEndLoc(); 1686 Parser.Lex(); // Eat "}" 1687 const MCExpr *RndModeOp = 1688 MCConstantExpr::create(rndMode, Parser.getContext()); 1689 return X86Operand::CreateImm(RndModeOp, Start, End); 1690 } 1691 if(Tok.getIdentifier().equals("sae")){ 1692 Parser.Lex(); // Eat the sae 1693 if (!getLexer().is(AsmToken::RCurly)) 1694 return ErrorOperand(Tok.getLoc(), "Expected } at this point"); 1695 Parser.Lex(); // Eat "}" 1696 return X86Operand::CreateToken("{sae}", consumedToken); 1697 } 1698 return ErrorOperand(Tok.getLoc(), "unknown token in expression"); 1699 } 1700 1701 /// Parse the '.' operator. 1702 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) { 1703 const AsmToken &Tok = getTok(); 1704 unsigned Offset; 1705 1706 // Drop the optional '.'. 1707 StringRef DotDispStr = Tok.getString(); 1708 if (DotDispStr.startswith(".")) 1709 DotDispStr = DotDispStr.drop_front(1); 1710 1711 // .Imm gets lexed as a real. 1712 if (Tok.is(AsmToken::Real)) { 1713 APInt DotDisp; 1714 DotDispStr.getAsInteger(10, DotDisp); 1715 Offset = DotDisp.getZExtValue(); 1716 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { 1717 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); 1718 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, 1719 Offset)) 1720 return Error(Tok.getLoc(), "Unable to lookup field reference!"); 1721 } else 1722 return Error(Tok.getLoc(), "Unexpected token type!"); 1723 1724 // Eat the DotExpression and update End 1725 End = SMLoc::getFromPointer(DotDispStr.data()); 1726 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size(); 1727 while (Tok.getLoc().getPointer() < DotExprEndLoc) 1728 Lex(); 1729 SM.addImm(Offset); 1730 return false; 1731 } 1732 1733 /// Parse the 'offset' operator. This operator is used to specify the 1734 /// location rather then the content of a variable. 1735 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() { 1736 MCAsmParser &Parser = getParser(); 1737 const AsmToken &Tok = Parser.getTok(); 1738 SMLoc OffsetOfLoc = Tok.getLoc(); 1739 Parser.Lex(); // Eat offset. 1740 1741 const MCExpr *Val; 1742 InlineAsmIdentifierInfo Info; 1743 SMLoc Start = Tok.getLoc(), End; 1744 StringRef Identifier = Tok.getString(); 1745 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, 1746 /*Unevaluated=*/false, End)) 1747 return nullptr; 1748 1749 void *Decl = nullptr; 1750 // FIXME: MS evaluates "offset <Constant>" to the underlying integral 1751 if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) 1752 return ErrorOperand(Start, "offset operator cannot yet handle constants"); 1753 else if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) 1754 Decl = Info.Var.Decl; 1755 // Don't emit the offset operator. 1756 InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7); 1757 1758 // The offset operator will have an 'r' constraint, thus we need to create 1759 // register operand to ensure proper matching. Just pick a GPR based on 1760 // the size of a pointer. 1761 bool Parse32 = is32BitMode() || Code16GCC; 1762 unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX); 1763 1764 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, 1765 OffsetOfLoc, Identifier, Decl); 1766 } 1767 1768 // Query a candidate string for being an Intel assembly operator 1769 // Report back its kind, or IOK_INVALID if does not evaluated as a known one 1770 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) { 1771 return StringSwitch<unsigned>(Name) 1772 .Cases("TYPE","type",IOK_TYPE) 1773 .Cases("SIZE","size",IOK_SIZE) 1774 .Cases("LENGTH","length",IOK_LENGTH) 1775 .Cases("OFFSET","offset",IOK_OFFSET) 1776 .Default(IOK_INVALID); 1777 } 1778 1779 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator 1780 /// returns the number of elements in an array. It returns the value 1 for 1781 /// non-array variables. The SIZE operator returns the size of a C or C++ 1782 /// variable. A variable's size is the product of its LENGTH and TYPE. The 1783 /// TYPE operator returns the size of a C or C++ type or variable. If the 1784 /// variable is an array, TYPE returns the size of a single element. 1785 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) { 1786 MCAsmParser &Parser = getParser(); 1787 const AsmToken &Tok = Parser.getTok(); 1788 Parser.Lex(); // Eat operator. 1789 1790 const MCExpr *Val = nullptr; 1791 InlineAsmIdentifierInfo Info; 1792 SMLoc Start = Tok.getLoc(), End; 1793 StringRef Identifier = Tok.getString(); 1794 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, 1795 /*Unevaluated=*/true, End)) 1796 return 0; 1797 1798 if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 1799 Error(Start, "unable to lookup expression"); 1800 return 0; 1801 } 1802 1803 unsigned CVal = 0; 1804 switch(OpKind) { 1805 default: llvm_unreachable("Unexpected operand kind!"); 1806 case IOK_LENGTH: CVal = Info.Var.Length; break; 1807 case IOK_SIZE: CVal = Info.Var.Size; break; 1808 case IOK_TYPE: CVal = Info.Var.Type; break; 1809 } 1810 1811 return CVal; 1812 } 1813 1814 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) { 1815 Size = StringSwitch<unsigned>(getTok().getString()) 1816 .Cases("BYTE", "byte", 8) 1817 .Cases("WORD", "word", 16) 1818 .Cases("DWORD", "dword", 32) 1819 .Cases("FLOAT", "float", 32) 1820 .Cases("LONG", "long", 32) 1821 .Cases("FWORD", "fword", 48) 1822 .Cases("DOUBLE", "double", 64) 1823 .Cases("QWORD", "qword", 64) 1824 .Cases("MMWORD","mmword", 64) 1825 .Cases("XWORD", "xword", 80) 1826 .Cases("TBYTE", "tbyte", 80) 1827 .Cases("XMMWORD", "xmmword", 128) 1828 .Cases("YMMWORD", "ymmword", 256) 1829 .Cases("ZMMWORD", "zmmword", 512) 1830 .Default(0); 1831 if (Size) { 1832 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word). 1833 if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr"))) 1834 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!"); 1835 Lex(); // Eat ptr. 1836 } 1837 return false; 1838 } 1839 1840 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() { 1841 MCAsmParser &Parser = getParser(); 1842 const AsmToken &Tok = Parser.getTok(); 1843 SMLoc Start, End; 1844 1845 // FIXME: Offset operator 1846 // Should be handled as part of immediate expression, as other operators 1847 // Currently, only supported as a stand-alone operand 1848 if (isParsingInlineAsm()) 1849 if (IdentifyIntelInlineAsmOperator(Tok.getString()) == IOK_OFFSET) 1850 return ParseIntelOffsetOfOperator(); 1851 1852 // Parse optional Size directive. 1853 unsigned Size; 1854 if (ParseIntelMemoryOperandSize(Size)) 1855 return nullptr; 1856 bool PtrInOperand = bool(Size); 1857 1858 Start = Tok.getLoc(); 1859 1860 // Rounding mode operand. 1861 if (getLexer().is(AsmToken::LCurly)) 1862 return ParseRoundingModeOp(Start); 1863 1864 // Register operand. 1865 unsigned RegNo = 0; 1866 if (Tok.is(AsmToken::Identifier) && !ParseRegister(RegNo, Start, End)) { 1867 if (RegNo == X86::RIP) 1868 return ErrorOperand(Start, "rip can only be used as a base register"); 1869 // A Register followed by ':' is considered a segment override 1870 if (Tok.isNot(AsmToken::Colon)) 1871 return !PtrInOperand ? X86Operand::CreateReg(RegNo, Start, End) : 1872 ErrorOperand(Start, "expected memory operand after 'ptr', " 1873 "found register operand instead"); 1874 // An alleged segment override. check if we have a valid segment register 1875 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo)) 1876 return ErrorOperand(Start, "invalid segment register"); 1877 // Eat ':' and update Start location 1878 Start = Lex().getLoc(); 1879 } 1880 1881 // Immediates and Memory 1882 IntelExprStateMachine SM; 1883 if (ParseIntelExpression(SM, End)) 1884 return nullptr; 1885 1886 if (isParsingInlineAsm()) 1887 RewriteIntelExpression(SM, Start, Tok.getLoc()); 1888 1889 int64_t Imm = SM.getImm(); 1890 const MCExpr *Disp = SM.getSym(); 1891 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext()); 1892 if (Disp && Imm) 1893 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext()); 1894 if (!Disp) 1895 Disp = ImmDisp; 1896 1897 // RegNo != 0 specifies a valid segment register, 1898 // and we are parsing a segment override 1899 if (!SM.isMemExpr() && !RegNo) 1900 return X86Operand::CreateImm(Disp, Start, End); 1901 1902 StringRef ErrMsg; 1903 unsigned BaseReg = SM.getBaseReg(); 1904 unsigned IndexReg = SM.getIndexReg(); 1905 unsigned Scale = SM.getScale(); 1906 1907 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP && 1908 (IndexReg == X86::ESP || IndexReg == X86::RSP)) 1909 std::swap(BaseReg, IndexReg); 1910 1911 // If BaseReg is a vector register and IndexReg is not, swap them unless 1912 // Scale was specified in which case it would be an error. 1913 if (Scale == 0 && 1914 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) || 1915 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) || 1916 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) && 1917 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) || 1918 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) || 1919 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg))) 1920 std::swap(BaseReg, IndexReg); 1921 1922 if (Scale != 0 && 1923 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) 1924 return ErrorOperand(Start, "16-bit addresses cannot have a scale"); 1925 1926 // If there was no explicit scale specified, change it to 1. 1927 if (Scale == 0) 1928 Scale = 1; 1929 1930 // If this is a 16-bit addressing mode with the base and index in the wrong 1931 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is 1932 // shared with att syntax where order matters. 1933 if ((BaseReg == X86::SI || BaseReg == X86::DI) && 1934 (IndexReg == X86::BX || IndexReg == X86::BP)) 1935 std::swap(BaseReg, IndexReg); 1936 1937 if ((BaseReg || IndexReg) && 1938 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(), 1939 ErrMsg)) 1940 return ErrorOperand(Start, ErrMsg); 1941 if (isParsingInlineAsm()) 1942 return CreateMemForInlineAsm(RegNo, Disp, BaseReg, IndexReg, 1943 Scale, Start, End, Size, SM.getSymName(), 1944 SM.getIdentifierInfo()); 1945 if (!(BaseReg || IndexReg || RegNo)) 1946 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size); 1947 return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp, 1948 BaseReg, IndexReg, Scale, Start, End, Size); 1949 } 1950 1951 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() { 1952 MCAsmParser &Parser = getParser(); 1953 switch (getLexer().getKind()) { 1954 case AsmToken::Dollar: { 1955 // $42 or $ID -> immediate. 1956 SMLoc Start = Parser.getTok().getLoc(), End; 1957 Parser.Lex(); 1958 const MCExpr *Val; 1959 // This is an immediate, so we should not parse a register. Do a precheck 1960 // for '%' to supercede intra-register parse errors. 1961 SMLoc L = Parser.getTok().getLoc(); 1962 if (check(getLexer().is(AsmToken::Percent), L, 1963 "expected immediate expression") || 1964 getParser().parseExpression(Val, End) || 1965 check(isa<X86MCExpr>(Val), L, "expected immediate expression")) 1966 return nullptr; 1967 return X86Operand::CreateImm(Val, Start, End); 1968 } 1969 case AsmToken::LCurly: { 1970 SMLoc Start = Parser.getTok().getLoc(); 1971 return ParseRoundingModeOp(Start); 1972 } 1973 default: { 1974 // This a memory operand or a register. We have some parsing complications 1975 // as a '(' may be part of an immediate expression or the addressing mode 1976 // block. This is complicated by the fact that an assembler-level variable 1977 // may refer either to a register or an immediate expression. 1978 1979 SMLoc Loc = Parser.getTok().getLoc(), EndLoc; 1980 const MCExpr *Expr = nullptr; 1981 unsigned Reg = 0; 1982 if (getLexer().isNot(AsmToken::LParen)) { 1983 // No '(' so this is either a displacement expression or a register. 1984 if (Parser.parseExpression(Expr, EndLoc)) 1985 return nullptr; 1986 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) { 1987 // Segment Register. Reset Expr and copy value to register. 1988 Expr = nullptr; 1989 Reg = RE->getRegNo(); 1990 1991 // Sanity check register. 1992 if (Reg == X86::EIZ || Reg == X86::RIZ) 1993 return ErrorOperand( 1994 Loc, "%eiz and %riz can only be used as index registers", 1995 SMRange(Loc, EndLoc)); 1996 if (Reg == X86::RIP) 1997 return ErrorOperand(Loc, "%rip can only be used as a base register", 1998 SMRange(Loc, EndLoc)); 1999 // Return register that are not segment prefixes immediately. 2000 if (!Parser.parseOptionalToken(AsmToken::Colon)) 2001 return X86Operand::CreateReg(Reg, Loc, EndLoc); 2002 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg)) 2003 return ErrorOperand(Loc, "invalid segment register"); 2004 } 2005 } 2006 // This is a Memory operand. 2007 return ParseMemOperand(Reg, Expr, Loc, EndLoc); 2008 } 2009 } 2010 } 2011 2012 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic, 2013 // otherwise the EFLAGS Condition Code enumerator. 2014 X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) { 2015 return StringSwitch<X86::CondCode>(CC) 2016 .Case("o", X86::COND_O) // Overflow 2017 .Case("no", X86::COND_NO) // No Overflow 2018 .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal 2019 .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below 2020 .Cases("e", "z", X86::COND_E) // Equal/Zero 2021 .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero 2022 .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above 2023 .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal 2024 .Case("s", X86::COND_S) // Sign 2025 .Case("ns", X86::COND_NS) // No Sign 2026 .Cases("p", "pe", X86::COND_P) // Parity/Parity Even 2027 .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd 2028 .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal 2029 .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less 2030 .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater 2031 .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal 2032 .Default(X86::COND_INVALID); 2033 } 2034 2035 // true on failure, false otherwise 2036 // If no {z} mark was found - Parser doesn't advance 2037 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z, 2038 const SMLoc &StartLoc) { 2039 MCAsmParser &Parser = getParser(); 2040 // Assuming we are just pass the '{' mark, quering the next token 2041 // Searched for {z}, but none was found. Return false, as no parsing error was 2042 // encountered 2043 if (!(getLexer().is(AsmToken::Identifier) && 2044 (getLexer().getTok().getIdentifier() == "z"))) 2045 return false; 2046 Parser.Lex(); // Eat z 2047 // Query and eat the '}' mark 2048 if (!getLexer().is(AsmToken::RCurly)) 2049 return Error(getLexer().getLoc(), "Expected } at this point"); 2050 Parser.Lex(); // Eat '}' 2051 // Assign Z with the {z} mark opernad 2052 Z = X86Operand::CreateToken("{z}", StartLoc); 2053 return false; 2054 } 2055 2056 // true on failure, false otherwise 2057 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands, 2058 const MCParsedAsmOperand &Op) { 2059 MCAsmParser &Parser = getParser(); 2060 if (getLexer().is(AsmToken::LCurly)) { 2061 // Eat "{" and mark the current place. 2062 const SMLoc consumedToken = consumeToken(); 2063 // Distinguish {1to<NUM>} from {%k<NUM>}. 2064 if(getLexer().is(AsmToken::Integer)) { 2065 // Parse memory broadcasting ({1to<NUM>}). 2066 if (getLexer().getTok().getIntVal() != 1) 2067 return TokError("Expected 1to<NUM> at this point"); 2068 Parser.Lex(); // Eat "1" of 1to8 2069 if (!getLexer().is(AsmToken::Identifier) || 2070 !getLexer().getTok().getIdentifier().startswith("to")) 2071 return TokError("Expected 1to<NUM> at this point"); 2072 // Recognize only reasonable suffixes. 2073 const char *BroadcastPrimitive = 2074 StringSwitch<const char*>(getLexer().getTok().getIdentifier()) 2075 .Case("to2", "{1to2}") 2076 .Case("to4", "{1to4}") 2077 .Case("to8", "{1to8}") 2078 .Case("to16", "{1to16}") 2079 .Default(nullptr); 2080 if (!BroadcastPrimitive) 2081 return TokError("Invalid memory broadcast primitive."); 2082 Parser.Lex(); // Eat "toN" of 1toN 2083 if (!getLexer().is(AsmToken::RCurly)) 2084 return TokError("Expected } at this point"); 2085 Parser.Lex(); // Eat "}" 2086 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive, 2087 consumedToken)); 2088 // No AVX512 specific primitives can pass 2089 // after memory broadcasting, so return. 2090 return false; 2091 } else { 2092 // Parse either {k}{z}, {z}{k}, {k} or {z} 2093 // last one have no meaning, but GCC accepts it 2094 // Currently, we're just pass a '{' mark 2095 std::unique_ptr<X86Operand> Z; 2096 if (ParseZ(Z, consumedToken)) 2097 return true; 2098 // Reaching here means that parsing of the allegadly '{z}' mark yielded 2099 // no errors. 2100 // Query for the need of further parsing for a {%k<NUM>} mark 2101 if (!Z || getLexer().is(AsmToken::LCurly)) { 2102 SMLoc StartLoc = Z ? consumeToken() : consumedToken; 2103 // Parse an op-mask register mark ({%k<NUM>}), which is now to be 2104 // expected 2105 unsigned RegNo; 2106 SMLoc RegLoc; 2107 if (!ParseRegister(RegNo, RegLoc, StartLoc) && 2108 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) { 2109 if (RegNo == X86::K0) 2110 return Error(RegLoc, "Register k0 can't be used as write mask"); 2111 if (!getLexer().is(AsmToken::RCurly)) 2112 return Error(getLexer().getLoc(), "Expected } at this point"); 2113 Operands.push_back(X86Operand::CreateToken("{", StartLoc)); 2114 Operands.push_back( 2115 X86Operand::CreateReg(RegNo, StartLoc, StartLoc)); 2116 Operands.push_back(X86Operand::CreateToken("}", consumeToken())); 2117 } else 2118 return Error(getLexer().getLoc(), 2119 "Expected an op-mask register at this point"); 2120 // {%k<NUM>} mark is found, inquire for {z} 2121 if (getLexer().is(AsmToken::LCurly) && !Z) { 2122 // Have we've found a parsing error, or found no (expected) {z} mark 2123 // - report an error 2124 if (ParseZ(Z, consumeToken()) || !Z) 2125 return Error(getLexer().getLoc(), 2126 "Expected a {z} mark at this point"); 2127 2128 } 2129 // '{z}' on its own is meaningless, hence should be ignored. 2130 // on the contrary - have it been accompanied by a K register, 2131 // allow it. 2132 if (Z) 2133 Operands.push_back(std::move(Z)); 2134 } 2135 } 2136 } 2137 return false; 2138 } 2139 2140 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix 2141 /// has already been parsed if present. disp may be provided as well. 2142 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg, 2143 const MCExpr *&Disp, 2144 const SMLoc &StartLoc, 2145 SMLoc &EndLoc) { 2146 MCAsmParser &Parser = getParser(); 2147 SMLoc Loc; 2148 // Based on the initial passed values, we may be in any of these cases, we are 2149 // in one of these cases (with current position (*)): 2150 2151 // 1. seg : * disp (base-index-scale-expr) 2152 // 2. seg : *(disp) (base-index-scale-expr) 2153 // 3. seg : *(base-index-scale-expr) 2154 // 4. disp *(base-index-scale-expr) 2155 // 5. *(disp) (base-index-scale-expr) 2156 // 6. *(base-index-scale-expr) 2157 // 7. disp * 2158 // 8. *(disp) 2159 2160 // If we do not have an displacement yet, check if we're in cases 4 or 6 by 2161 // checking if the first object after the parenthesis is a register (or an 2162 // identifier referring to a register) and parse the displacement or default 2163 // to 0 as appropriate. 2164 auto isAtMemOperand = [this]() { 2165 if (this->getLexer().isNot(AsmToken::LParen)) 2166 return false; 2167 AsmToken Buf[2]; 2168 StringRef Id; 2169 auto TokCount = this->getLexer().peekTokens(Buf, true); 2170 if (TokCount == 0) 2171 return false; 2172 switch (Buf[0].getKind()) { 2173 case AsmToken::Percent: 2174 case AsmToken::Comma: 2175 return true; 2176 // These lower cases are doing a peekIdentifier. 2177 case AsmToken::At: 2178 case AsmToken::Dollar: 2179 if ((TokCount > 1) && 2180 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) && 2181 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer())) 2182 Id = StringRef(Buf[0].getLoc().getPointer(), 2183 Buf[1].getIdentifier().size() + 1); 2184 break; 2185 case AsmToken::Identifier: 2186 case AsmToken::String: 2187 Id = Buf[0].getIdentifier(); 2188 break; 2189 default: 2190 return false; 2191 } 2192 // We have an ID. Check if it is bound to a register. 2193 if (!Id.empty()) { 2194 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id); 2195 if (Sym->isVariable()) { 2196 auto V = Sym->getVariableValue(/*SetUsed*/ false); 2197 return isa<X86MCExpr>(V); 2198 } 2199 } 2200 return false; 2201 }; 2202 2203 if (!Disp) { 2204 // Parse immediate if we're not at a mem operand yet. 2205 if (!isAtMemOperand()) { 2206 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc)) 2207 return nullptr; 2208 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here."); 2209 } else { 2210 // Disp is implicitly zero if we haven't parsed it yet. 2211 Disp = MCConstantExpr::create(0, Parser.getContext()); 2212 } 2213 } 2214 2215 // We are now either at the end of the operand or at the '(' at the start of a 2216 // base-index-scale-expr. 2217 2218 if (!parseOptionalToken(AsmToken::LParen)) { 2219 if (SegReg == 0) 2220 return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc); 2221 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1, 2222 StartLoc, EndLoc); 2223 } 2224 2225 // If we reached here, then eat the '(' and Process 2226 // the rest of the memory operand. 2227 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 2228 SMLoc BaseLoc = getLexer().getLoc(); 2229 const MCExpr *E; 2230 StringRef ErrMsg; 2231 2232 // Parse BaseReg if one is provided. 2233 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) { 2234 if (Parser.parseExpression(E, EndLoc) || 2235 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here")) 2236 return nullptr; 2237 2238 // Sanity check register. 2239 BaseReg = cast<X86MCExpr>(E)->getRegNo(); 2240 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) 2241 return ErrorOperand(BaseLoc, 2242 "eiz and riz can only be used as index registers", 2243 SMRange(BaseLoc, EndLoc)); 2244 } 2245 2246 if (parseOptionalToken(AsmToken::Comma)) { 2247 // Following the comma we should have either an index register, or a scale 2248 // value. We don't support the later form, but we want to parse it 2249 // correctly. 2250 // 2251 // Even though it would be completely consistent to support syntax like 2252 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 2253 if (getLexer().isNot(AsmToken::RParen)) { 2254 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc)) 2255 return nullptr; 2256 2257 if (!isa<X86MCExpr>(E)) { 2258 // We've parsed an unexpected Scale Value instead of an index 2259 // register. Interpret it as an absolute. 2260 int64_t ScaleVal; 2261 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr())) 2262 return ErrorOperand(Loc, "expected absolute expression"); 2263 if (ScaleVal != 1) 2264 Warning(Loc, "scale factor without index register is ignored"); 2265 Scale = 1; 2266 } else { // IndexReg Found. 2267 IndexReg = cast<X86MCExpr>(E)->getRegNo(); 2268 2269 if (BaseReg == X86::RIP) 2270 return ErrorOperand( 2271 Loc, "%rip as base register can not have an index register"); 2272 if (IndexReg == X86::RIP) 2273 return ErrorOperand(Loc, "%rip is not allowed as an index register"); 2274 2275 if (parseOptionalToken(AsmToken::Comma)) { 2276 // Parse the scale amount: 2277 // ::= ',' [scale-expression] 2278 2279 // A scale amount without an index is ignored. 2280 if (getLexer().isNot(AsmToken::RParen)) { 2281 int64_t ScaleVal; 2282 if (Parser.parseTokenLoc(Loc) || 2283 Parser.parseAbsoluteExpression(ScaleVal)) 2284 return ErrorOperand(Loc, "expected scale expression"); 2285 Scale = (unsigned)ScaleVal; 2286 // Validate the scale amount. 2287 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 2288 Scale != 1) 2289 return ErrorOperand(Loc, 2290 "scale factor in 16-bit address must be 1"); 2291 if (checkScale(Scale, ErrMsg)) 2292 return ErrorOperand(Loc, ErrMsg); 2293 } 2294 } 2295 } 2296 } 2297 } 2298 2299 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 2300 if (parseToken(AsmToken::RParen, "unexpected token in memory operand")) 2301 return nullptr; 2302 2303 // This is to support otherwise illegal operand (%dx) found in various 2304 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now 2305 // be supported. Mark such DX variants separately fix only in special cases. 2306 if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 && 2307 isa<MCConstantExpr>(Disp) && cast<MCConstantExpr>(Disp)->getValue() == 0) 2308 return X86Operand::CreateDXReg(BaseLoc, BaseLoc); 2309 2310 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(), 2311 ErrMsg)) 2312 return ErrorOperand(BaseLoc, ErrMsg); 2313 2314 if (SegReg || BaseReg || IndexReg) 2315 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, 2316 IndexReg, Scale, StartLoc, EndLoc); 2317 return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc); 2318 } 2319 2320 // Parse either a standard primary expression or a register. 2321 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 2322 MCAsmParser &Parser = getParser(); 2323 // See if this is a register first. 2324 if (getTok().is(AsmToken::Percent) || 2325 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) && 2326 MatchRegisterName(Parser.getTok().getString()))) { 2327 SMLoc StartLoc = Parser.getTok().getLoc(); 2328 unsigned RegNo; 2329 if (ParseRegister(RegNo, StartLoc, EndLoc)) 2330 return true; 2331 Res = X86MCExpr::create(RegNo, Parser.getContext()); 2332 return false; 2333 } 2334 return Parser.parsePrimaryExpr(Res, EndLoc); 2335 } 2336 2337 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 2338 SMLoc NameLoc, OperandVector &Operands) { 2339 MCAsmParser &Parser = getParser(); 2340 InstInfo = &Info; 2341 2342 // Reset the forced VEX encoding. 2343 ForcedVEXEncoding = VEXEncoding_Default; 2344 2345 // Parse pseudo prefixes. 2346 while (1) { 2347 if (Name == "{") { 2348 if (getLexer().isNot(AsmToken::Identifier)) 2349 return Error(Parser.getTok().getLoc(), "Unexpected token after '{'"); 2350 std::string Prefix = Parser.getTok().getString().lower(); 2351 Parser.Lex(); // Eat identifier. 2352 if (getLexer().isNot(AsmToken::RCurly)) 2353 return Error(Parser.getTok().getLoc(), "Expected '}'"); 2354 Parser.Lex(); // Eat curly. 2355 2356 if (Prefix == "vex2") 2357 ForcedVEXEncoding = VEXEncoding_VEX2; 2358 else if (Prefix == "vex3") 2359 ForcedVEXEncoding = VEXEncoding_VEX3; 2360 else if (Prefix == "evex") 2361 ForcedVEXEncoding = VEXEncoding_EVEX; 2362 else 2363 return Error(NameLoc, "unknown prefix"); 2364 2365 NameLoc = Parser.getTok().getLoc(); 2366 if (getLexer().is(AsmToken::LCurly)) { 2367 Parser.Lex(); 2368 Name = "{"; 2369 } else { 2370 if (getLexer().isNot(AsmToken::Identifier)) 2371 return Error(Parser.getTok().getLoc(), "Expected identifier"); 2372 // FIXME: The mnemonic won't match correctly if its not in lower case. 2373 Name = Parser.getTok().getString(); 2374 Parser.Lex(); 2375 } 2376 continue; 2377 } 2378 2379 break; 2380 } 2381 2382 StringRef PatchedName = Name; 2383 2384 // Hack to skip "short" following Jcc. 2385 if (isParsingIntelSyntax() && 2386 (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" || 2387 PatchedName == "jcxz" || PatchedName == "jexcz" || 2388 (PatchedName.startswith("j") && 2389 ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) { 2390 StringRef NextTok = Parser.getTok().getString(); 2391 if (NextTok == "short") { 2392 SMLoc NameEndLoc = 2393 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size()); 2394 // Eat the short keyword. 2395 Parser.Lex(); 2396 // MS and GAS ignore the short keyword; they both determine the jmp type 2397 // based on the distance of the label. (NASM does emit different code with 2398 // and without "short," though.) 2399 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc, 2400 NextTok.size() + 1); 2401 } 2402 } 2403 2404 // FIXME: Hack to recognize setneb as setne. 2405 if (PatchedName.startswith("set") && PatchedName.endswith("b") && 2406 PatchedName != "setb" && PatchedName != "setnb") 2407 PatchedName = PatchedName.substr(0, Name.size()-1); 2408 2409 unsigned ComparisonPredicate = ~0U; 2410 2411 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. 2412 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && 2413 (PatchedName.endswith("ss") || PatchedName.endswith("sd") || 2414 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { 2415 bool IsVCMP = PatchedName[0] == 'v'; 2416 unsigned CCIdx = IsVCMP ? 4 : 3; 2417 unsigned CC = StringSwitch<unsigned>( 2418 PatchedName.slice(CCIdx, PatchedName.size() - 2)) 2419 .Case("eq", 0x00) 2420 .Case("eq_oq", 0x00) 2421 .Case("lt", 0x01) 2422 .Case("lt_os", 0x01) 2423 .Case("le", 0x02) 2424 .Case("le_os", 0x02) 2425 .Case("unord", 0x03) 2426 .Case("unord_q", 0x03) 2427 .Case("neq", 0x04) 2428 .Case("neq_uq", 0x04) 2429 .Case("nlt", 0x05) 2430 .Case("nlt_us", 0x05) 2431 .Case("nle", 0x06) 2432 .Case("nle_us", 0x06) 2433 .Case("ord", 0x07) 2434 .Case("ord_q", 0x07) 2435 /* AVX only from here */ 2436 .Case("eq_uq", 0x08) 2437 .Case("nge", 0x09) 2438 .Case("nge_us", 0x09) 2439 .Case("ngt", 0x0A) 2440 .Case("ngt_us", 0x0A) 2441 .Case("false", 0x0B) 2442 .Case("false_oq", 0x0B) 2443 .Case("neq_oq", 0x0C) 2444 .Case("ge", 0x0D) 2445 .Case("ge_os", 0x0D) 2446 .Case("gt", 0x0E) 2447 .Case("gt_os", 0x0E) 2448 .Case("true", 0x0F) 2449 .Case("true_uq", 0x0F) 2450 .Case("eq_os", 0x10) 2451 .Case("lt_oq", 0x11) 2452 .Case("le_oq", 0x12) 2453 .Case("unord_s", 0x13) 2454 .Case("neq_us", 0x14) 2455 .Case("nlt_uq", 0x15) 2456 .Case("nle_uq", 0x16) 2457 .Case("ord_s", 0x17) 2458 .Case("eq_us", 0x18) 2459 .Case("nge_uq", 0x19) 2460 .Case("ngt_uq", 0x1A) 2461 .Case("false_os", 0x1B) 2462 .Case("neq_os", 0x1C) 2463 .Case("ge_oq", 0x1D) 2464 .Case("gt_oq", 0x1E) 2465 .Case("true_us", 0x1F) 2466 .Default(~0U); 2467 if (CC != ~0U && (IsVCMP || CC < 8)) { 2468 if (PatchedName.endswith("ss")) 2469 PatchedName = IsVCMP ? "vcmpss" : "cmpss"; 2470 else if (PatchedName.endswith("sd")) 2471 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; 2472 else if (PatchedName.endswith("ps")) 2473 PatchedName = IsVCMP ? "vcmpps" : "cmpps"; 2474 else if (PatchedName.endswith("pd")) 2475 PatchedName = IsVCMP ? "vcmppd" : "cmppd"; 2476 else 2477 llvm_unreachable("Unexpected suffix!"); 2478 2479 ComparisonPredicate = CC; 2480 } 2481 } 2482 2483 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}. 2484 if (PatchedName.startswith("vpcmp") && 2485 (PatchedName.back() == 'b' || PatchedName.back() == 'w' || 2486 PatchedName.back() == 'd' || PatchedName.back() == 'q')) { 2487 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1; 2488 unsigned CC = StringSwitch<unsigned>( 2489 PatchedName.slice(5, PatchedName.size() - SuffixSize)) 2490 .Case("eq", 0x0) // Only allowed on unsigned. Checked below. 2491 .Case("lt", 0x1) 2492 .Case("le", 0x2) 2493 //.Case("false", 0x3) // Not a documented alias. 2494 .Case("neq", 0x4) 2495 .Case("nlt", 0x5) 2496 .Case("nle", 0x6) 2497 //.Case("true", 0x7) // Not a documented alias. 2498 .Default(~0U); 2499 if (CC != ~0U && (CC != 0 || SuffixSize == 2)) { 2500 switch (PatchedName.back()) { 2501 default: llvm_unreachable("Unexpected character!"); 2502 case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break; 2503 case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break; 2504 case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break; 2505 case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break; 2506 } 2507 // Set up the immediate to push into the operands later. 2508 ComparisonPredicate = CC; 2509 } 2510 } 2511 2512 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}. 2513 if (PatchedName.startswith("vpcom") && 2514 (PatchedName.back() == 'b' || PatchedName.back() == 'w' || 2515 PatchedName.back() == 'd' || PatchedName.back() == 'q')) { 2516 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1; 2517 unsigned CC = StringSwitch<unsigned>( 2518 PatchedName.slice(5, PatchedName.size() - SuffixSize)) 2519 .Case("lt", 0x0) 2520 .Case("le", 0x1) 2521 .Case("gt", 0x2) 2522 .Case("ge", 0x3) 2523 .Case("eq", 0x4) 2524 .Case("neq", 0x5) 2525 .Case("false", 0x6) 2526 .Case("true", 0x7) 2527 .Default(~0U); 2528 if (CC != ~0U) { 2529 switch (PatchedName.back()) { 2530 default: llvm_unreachable("Unexpected character!"); 2531 case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break; 2532 case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break; 2533 case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break; 2534 case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break; 2535 } 2536 // Set up the immediate to push into the operands later. 2537 ComparisonPredicate = CC; 2538 } 2539 } 2540 2541 2542 // Determine whether this is an instruction prefix. 2543 // FIXME: 2544 // Enhance prefixes integrity robustness. for example, following forms 2545 // are currently tolerated: 2546 // repz repnz <insn> ; GAS errors for the use of two similar prefixes 2547 // lock addq %rax, %rbx ; Destination operand must be of memory type 2548 // xacquire <insn> ; xacquire must be accompanied by 'lock' 2549 bool isPrefix = StringSwitch<bool>(Name) 2550 .Cases("rex64", "data32", "data16", true) 2551 .Cases("xacquire", "xrelease", true) 2552 .Cases("acquire", "release", isParsingIntelSyntax()) 2553 .Default(false); 2554 2555 auto isLockRepeatNtPrefix = [](StringRef N) { 2556 return StringSwitch<bool>(N) 2557 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true) 2558 .Default(false); 2559 }; 2560 2561 bool CurlyAsEndOfStatement = false; 2562 2563 unsigned Flags = X86::IP_NO_PREFIX; 2564 while (isLockRepeatNtPrefix(Name.lower())) { 2565 unsigned Prefix = 2566 StringSwitch<unsigned>(Name) 2567 .Cases("lock", "lock", X86::IP_HAS_LOCK) 2568 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT) 2569 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE) 2570 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK) 2571 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible) 2572 Flags |= Prefix; 2573 if (getLexer().is(AsmToken::EndOfStatement)) { 2574 // We don't have real instr with the given prefix 2575 // let's use the prefix as the instr. 2576 // TODO: there could be several prefixes one after another 2577 Flags = X86::IP_NO_PREFIX; 2578 break; 2579 } 2580 // FIXME: The mnemonic won't match correctly if its not in lower case. 2581 Name = Parser.getTok().getString(); 2582 Parser.Lex(); // eat the prefix 2583 // Hack: we could have something like "rep # some comment" or 2584 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl" 2585 while (Name.startswith(";") || Name.startswith("\n") || 2586 Name.startswith("#") || Name.startswith("\t") || 2587 Name.startswith("/")) { 2588 // FIXME: The mnemonic won't match correctly if its not in lower case. 2589 Name = Parser.getTok().getString(); 2590 Parser.Lex(); // go to next prefix or instr 2591 } 2592 } 2593 2594 if (Flags) 2595 PatchedName = Name; 2596 2597 // Hacks to handle 'data16' and 'data32' 2598 if (PatchedName == "data16" && is16BitMode()) { 2599 return Error(NameLoc, "redundant data16 prefix"); 2600 } 2601 if (PatchedName == "data32") { 2602 if (is32BitMode()) 2603 return Error(NameLoc, "redundant data32 prefix"); 2604 if (is64BitMode()) 2605 return Error(NameLoc, "'data32' is not supported in 64-bit mode"); 2606 // Hack to 'data16' for the table lookup. 2607 PatchedName = "data16"; 2608 } 2609 2610 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 2611 2612 // Push the immediate if we extracted one from the mnemonic. 2613 if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) { 2614 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate, 2615 getParser().getContext()); 2616 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 2617 } 2618 2619 // This does the actual operand parsing. Don't parse any more if we have a 2620 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 2621 // just want to parse the "lock" as the first instruction and the "incl" as 2622 // the next one. 2623 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { 2624 // Parse '*' modifier. 2625 if (getLexer().is(AsmToken::Star)) 2626 Operands.push_back(X86Operand::CreateToken("*", consumeToken())); 2627 2628 // Read the operands. 2629 while(1) { 2630 if (std::unique_ptr<X86Operand> Op = ParseOperand()) { 2631 Operands.push_back(std::move(Op)); 2632 if (HandleAVX512Operand(Operands, *Operands.back())) 2633 return true; 2634 } else { 2635 return true; 2636 } 2637 // check for comma and eat it 2638 if (getLexer().is(AsmToken::Comma)) 2639 Parser.Lex(); 2640 else 2641 break; 2642 } 2643 2644 // In MS inline asm curly braces mark the beginning/end of a block, 2645 // therefore they should be interepreted as end of statement 2646 CurlyAsEndOfStatement = 2647 isParsingIntelSyntax() && isParsingInlineAsm() && 2648 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly)); 2649 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement) 2650 return TokError("unexpected token in argument list"); 2651 } 2652 2653 // Push the immediate if we extracted one from the mnemonic. 2654 if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) { 2655 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate, 2656 getParser().getContext()); 2657 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 2658 } 2659 2660 // Consume the EndOfStatement or the prefix separator Slash 2661 if (getLexer().is(AsmToken::EndOfStatement) || 2662 (isPrefix && getLexer().is(AsmToken::Slash))) 2663 Parser.Lex(); 2664 else if (CurlyAsEndOfStatement) 2665 // Add an actual EndOfStatement before the curly brace 2666 Info.AsmRewrites->emplace_back(AOK_EndOfStatement, 2667 getLexer().getTok().getLoc(), 0); 2668 2669 // This is for gas compatibility and cannot be done in td. 2670 // Adding "p" for some floating point with no argument. 2671 // For example: fsub --> fsubp 2672 bool IsFp = 2673 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr"; 2674 if (IsFp && Operands.size() == 1) { 2675 const char *Repl = StringSwitch<const char *>(Name) 2676 .Case("fsub", "fsubp") 2677 .Case("fdiv", "fdivp") 2678 .Case("fsubr", "fsubrp") 2679 .Case("fdivr", "fdivrp"); 2680 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl); 2681 } 2682 2683 if ((Name == "mov" || Name == "movw" || Name == "movl") && 2684 (Operands.size() == 3)) { 2685 X86Operand &Op1 = (X86Operand &)*Operands[1]; 2686 X86Operand &Op2 = (X86Operand &)*Operands[2]; 2687 SMLoc Loc = Op1.getEndLoc(); 2688 // Moving a 32 or 16 bit value into a segment register has the same 2689 // behavior. Modify such instructions to always take shorter form. 2690 if (Op1.isReg() && Op2.isReg() && 2691 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains( 2692 Op2.getReg()) && 2693 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) || 2694 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) { 2695 // Change instruction name to match new instruction. 2696 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) { 2697 Name = is16BitMode() ? "movw" : "movl"; 2698 Operands[0] = X86Operand::CreateToken(Name, NameLoc); 2699 } 2700 // Select the correct equivalent 16-/32-bit source register. 2701 unsigned Reg = 2702 getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32); 2703 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc); 2704 } 2705 } 2706 2707 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" -> 2708 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely 2709 // documented form in various unofficial manuals, so a lot of code uses it. 2710 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" || 2711 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") && 2712 Operands.size() == 3) { 2713 X86Operand &Op = (X86Operand &)*Operands.back(); 2714 if (Op.isDXReg()) 2715 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(), 2716 Op.getEndLoc()); 2717 } 2718 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al". 2719 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" || 2720 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") && 2721 Operands.size() == 3) { 2722 X86Operand &Op = (X86Operand &)*Operands[1]; 2723 if (Op.isDXReg()) 2724 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(), 2725 Op.getEndLoc()); 2726 } 2727 2728 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands; 2729 bool HadVerifyError = false; 2730 2731 // Append default arguments to "ins[bwld]" 2732 if (Name.startswith("ins") && 2733 (Operands.size() == 1 || Operands.size() == 3) && 2734 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" || 2735 Name == "ins")) { 2736 2737 AddDefaultSrcDestOperands(TmpOperands, 2738 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc), 2739 DefaultMemDIOperand(NameLoc)); 2740 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2741 } 2742 2743 // Append default arguments to "outs[bwld]" 2744 if (Name.startswith("outs") && 2745 (Operands.size() == 1 || Operands.size() == 3) && 2746 (Name == "outsb" || Name == "outsw" || Name == "outsl" || 2747 Name == "outsd" || Name == "outs")) { 2748 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), 2749 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 2750 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2751 } 2752 2753 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate 2754 // values of $SIREG according to the mode. It would be nice if this 2755 // could be achieved with InstAlias in the tables. 2756 if (Name.startswith("lods") && 2757 (Operands.size() == 1 || Operands.size() == 2) && 2758 (Name == "lods" || Name == "lodsb" || Name == "lodsw" || 2759 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) { 2760 TmpOperands.push_back(DefaultMemSIOperand(NameLoc)); 2761 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2762 } 2763 2764 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate 2765 // values of $DIREG according to the mode. It would be nice if this 2766 // could be achieved with InstAlias in the tables. 2767 if (Name.startswith("stos") && 2768 (Operands.size() == 1 || Operands.size() == 2) && 2769 (Name == "stos" || Name == "stosb" || Name == "stosw" || 2770 Name == "stosl" || Name == "stosd" || Name == "stosq")) { 2771 TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); 2772 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2773 } 2774 2775 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate 2776 // values of $DIREG according to the mode. It would be nice if this 2777 // could be achieved with InstAlias in the tables. 2778 if (Name.startswith("scas") && 2779 (Operands.size() == 1 || Operands.size() == 2) && 2780 (Name == "scas" || Name == "scasb" || Name == "scasw" || 2781 Name == "scasl" || Name == "scasd" || Name == "scasq")) { 2782 TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); 2783 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2784 } 2785 2786 // Add default SI and DI operands to "cmps[bwlq]". 2787 if (Name.startswith("cmps") && 2788 (Operands.size() == 1 || Operands.size() == 3) && 2789 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" || 2790 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) { 2791 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc), 2792 DefaultMemSIOperand(NameLoc)); 2793 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2794 } 2795 2796 // Add default SI and DI operands to "movs[bwlq]". 2797 if (((Name.startswith("movs") && 2798 (Name == "movs" || Name == "movsb" || Name == "movsw" || 2799 Name == "movsl" || Name == "movsd" || Name == "movsq")) || 2800 (Name.startswith("smov") && 2801 (Name == "smov" || Name == "smovb" || Name == "smovw" || 2802 Name == "smovl" || Name == "smovd" || Name == "smovq"))) && 2803 (Operands.size() == 1 || Operands.size() == 3)) { 2804 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax()) 2805 Operands.back() = X86Operand::CreateToken("movsl", NameLoc); 2806 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), 2807 DefaultMemDIOperand(NameLoc)); 2808 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2809 } 2810 2811 // Check if we encountered an error for one the string insturctions 2812 if (HadVerifyError) { 2813 return HadVerifyError; 2814 } 2815 2816 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to 2817 // "shift <op>". 2818 if ((Name.startswith("shr") || Name.startswith("sar") || 2819 Name.startswith("shl") || Name.startswith("sal") || 2820 Name.startswith("rcl") || Name.startswith("rcr") || 2821 Name.startswith("rol") || Name.startswith("ror")) && 2822 Operands.size() == 3) { 2823 if (isParsingIntelSyntax()) { 2824 // Intel syntax 2825 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]); 2826 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) && 2827 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1) 2828 Operands.pop_back(); 2829 } else { 2830 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 2831 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) && 2832 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1) 2833 Operands.erase(Operands.begin() + 1); 2834 } 2835 } 2836 2837 // Transforms "int $3" into "int3" as a size optimization. We can't write an 2838 // instalias with an immediate operand yet. 2839 if (Name == "int" && Operands.size() == 2) { 2840 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 2841 if (Op1.isImm()) 2842 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm())) 2843 if (CE->getValue() == 3) { 2844 Operands.erase(Operands.begin() + 1); 2845 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3"); 2846 } 2847 } 2848 2849 // Transforms "xlat mem8" into "xlatb" 2850 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) { 2851 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 2852 if (Op1.isMem8()) { 2853 Warning(Op1.getStartLoc(), "memory operand is only for determining the " 2854 "size, (R|E)BX will be used for the location"); 2855 Operands.pop_back(); 2856 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb"); 2857 } 2858 } 2859 2860 if (Flags) 2861 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc)); 2862 return false; 2863 } 2864 2865 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { 2866 const MCRegisterInfo *MRI = getContext().getRegisterInfo(); 2867 2868 switch (Inst.getOpcode()) { 2869 default: return false; 2870 case X86::VMOVZPQILo2PQIrr: 2871 case X86::VMOVAPDrr: 2872 case X86::VMOVAPDYrr: 2873 case X86::VMOVAPSrr: 2874 case X86::VMOVAPSYrr: 2875 case X86::VMOVDQArr: 2876 case X86::VMOVDQAYrr: 2877 case X86::VMOVDQUrr: 2878 case X86::VMOVDQUYrr: 2879 case X86::VMOVUPDrr: 2880 case X86::VMOVUPDYrr: 2881 case X86::VMOVUPSrr: 2882 case X86::VMOVUPSYrr: { 2883 // We can get a smaller encoding by using VEX.R instead of VEX.B if one of 2884 // the registers is extended, but other isn't. 2885 if (ForcedVEXEncoding == VEXEncoding_VEX3 || 2886 MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 || 2887 MRI->getEncodingValue(Inst.getOperand(1).getReg()) < 8) 2888 return false; 2889 2890 unsigned NewOpc; 2891 switch (Inst.getOpcode()) { 2892 default: llvm_unreachable("Invalid opcode"); 2893 case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break; 2894 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; 2895 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; 2896 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; 2897 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; 2898 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; 2899 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; 2900 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; 2901 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; 2902 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; 2903 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; 2904 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; 2905 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; 2906 } 2907 Inst.setOpcode(NewOpc); 2908 return true; 2909 } 2910 case X86::VMOVSDrr: 2911 case X86::VMOVSSrr: { 2912 // We can get a smaller encoding by using VEX.R instead of VEX.B if one of 2913 // the registers is extended, but other isn't. 2914 if (ForcedVEXEncoding == VEXEncoding_VEX3 || 2915 MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 || 2916 MRI->getEncodingValue(Inst.getOperand(2).getReg()) < 8) 2917 return false; 2918 2919 unsigned NewOpc; 2920 switch (Inst.getOpcode()) { 2921 default: llvm_unreachable("Invalid opcode"); 2922 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; 2923 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; 2924 } 2925 Inst.setOpcode(NewOpc); 2926 return true; 2927 } 2928 } 2929 } 2930 2931 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { 2932 const MCRegisterInfo *MRI = getContext().getRegisterInfo(); 2933 2934 switch (Inst.getOpcode()) { 2935 case X86::VGATHERDPDYrm: 2936 case X86::VGATHERDPDrm: 2937 case X86::VGATHERDPSYrm: 2938 case X86::VGATHERDPSrm: 2939 case X86::VGATHERQPDYrm: 2940 case X86::VGATHERQPDrm: 2941 case X86::VGATHERQPSYrm: 2942 case X86::VGATHERQPSrm: 2943 case X86::VPGATHERDDYrm: 2944 case X86::VPGATHERDDrm: 2945 case X86::VPGATHERDQYrm: 2946 case X86::VPGATHERDQrm: 2947 case X86::VPGATHERQDYrm: 2948 case X86::VPGATHERQDrm: 2949 case X86::VPGATHERQQYrm: 2950 case X86::VPGATHERQQrm: { 2951 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); 2952 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg()); 2953 unsigned Index = 2954 MRI->getEncodingValue(Inst.getOperand(3 + X86::AddrIndexReg).getReg()); 2955 if (Dest == Mask || Dest == Index || Mask == Index) 2956 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination " 2957 "registers should be distinct"); 2958 break; 2959 } 2960 case X86::VGATHERDPDZ128rm: 2961 case X86::VGATHERDPDZ256rm: 2962 case X86::VGATHERDPDZrm: 2963 case X86::VGATHERDPSZ128rm: 2964 case X86::VGATHERDPSZ256rm: 2965 case X86::VGATHERDPSZrm: 2966 case X86::VGATHERQPDZ128rm: 2967 case X86::VGATHERQPDZ256rm: 2968 case X86::VGATHERQPDZrm: 2969 case X86::VGATHERQPSZ128rm: 2970 case X86::VGATHERQPSZ256rm: 2971 case X86::VGATHERQPSZrm: 2972 case X86::VPGATHERDDZ128rm: 2973 case X86::VPGATHERDDZ256rm: 2974 case X86::VPGATHERDDZrm: 2975 case X86::VPGATHERDQZ128rm: 2976 case X86::VPGATHERDQZ256rm: 2977 case X86::VPGATHERDQZrm: 2978 case X86::VPGATHERQDZ128rm: 2979 case X86::VPGATHERQDZ256rm: 2980 case X86::VPGATHERQDZrm: 2981 case X86::VPGATHERQQZ128rm: 2982 case X86::VPGATHERQQZ256rm: 2983 case X86::VPGATHERQQZrm: { 2984 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); 2985 unsigned Index = 2986 MRI->getEncodingValue(Inst.getOperand(4 + X86::AddrIndexReg).getReg()); 2987 if (Dest == Index) 2988 return Warning(Ops[0]->getStartLoc(), "index and destination registers " 2989 "should be distinct"); 2990 break; 2991 } 2992 case X86::V4FMADDPSrm: 2993 case X86::V4FMADDPSrmk: 2994 case X86::V4FMADDPSrmkz: 2995 case X86::V4FMADDSSrm: 2996 case X86::V4FMADDSSrmk: 2997 case X86::V4FMADDSSrmkz: 2998 case X86::V4FNMADDPSrm: 2999 case X86::V4FNMADDPSrmk: 3000 case X86::V4FNMADDPSrmkz: 3001 case X86::V4FNMADDSSrm: 3002 case X86::V4FNMADDSSrmk: 3003 case X86::V4FNMADDSSrmkz: 3004 case X86::VP4DPWSSDSrm: 3005 case X86::VP4DPWSSDSrmk: 3006 case X86::VP4DPWSSDSrmkz: 3007 case X86::VP4DPWSSDrm: 3008 case X86::VP4DPWSSDrmk: 3009 case X86::VP4DPWSSDrmkz: { 3010 unsigned Src2 = Inst.getOperand(Inst.getNumOperands() - 3011 X86::AddrNumOperands - 1).getReg(); 3012 unsigned Src2Enc = MRI->getEncodingValue(Src2); 3013 if (Src2Enc % 4 != 0) { 3014 StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2); 3015 unsigned GroupStart = (Src2Enc / 4) * 4; 3016 unsigned GroupEnd = GroupStart + 3; 3017 return Warning(Ops[0]->getStartLoc(), 3018 "source register '" + RegName + "' implicitly denotes '" + 3019 RegName.take_front(3) + Twine(GroupStart) + "' to '" + 3020 RegName.take_front(3) + Twine(GroupEnd) + 3021 "' source group"); 3022 } 3023 break; 3024 } 3025 } 3026 3027 return false; 3028 } 3029 3030 static const char *getSubtargetFeatureName(uint64_t Val); 3031 3032 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands, 3033 MCStreamer &Out) { 3034 Out.EmitInstruction(Inst, getSTI()); 3035 } 3036 3037 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3038 OperandVector &Operands, 3039 MCStreamer &Out, uint64_t &ErrorInfo, 3040 bool MatchingInlineAsm) { 3041 if (isParsingIntelSyntax()) 3042 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, 3043 MatchingInlineAsm); 3044 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, 3045 MatchingInlineAsm); 3046 } 3047 3048 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, 3049 OperandVector &Operands, MCStreamer &Out, 3050 bool MatchingInlineAsm) { 3051 // FIXME: This should be replaced with a real .td file alias mechanism. 3052 // Also, MatchInstructionImpl should actually *do* the EmitInstruction 3053 // call. 3054 const char *Repl = StringSwitch<const char *>(Op.getToken()) 3055 .Case("finit", "fninit") 3056 .Case("fsave", "fnsave") 3057 .Case("fstcw", "fnstcw") 3058 .Case("fstcww", "fnstcw") 3059 .Case("fstenv", "fnstenv") 3060 .Case("fstsw", "fnstsw") 3061 .Case("fstsww", "fnstsw") 3062 .Case("fclex", "fnclex") 3063 .Default(nullptr); 3064 if (Repl) { 3065 MCInst Inst; 3066 Inst.setOpcode(X86::WAIT); 3067 Inst.setLoc(IDLoc); 3068 if (!MatchingInlineAsm) 3069 EmitInstruction(Inst, Operands, Out); 3070 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 3071 } 3072 } 3073 3074 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, 3075 const FeatureBitset &MissingFeatures, 3076 bool MatchingInlineAsm) { 3077 assert(MissingFeatures.any() && "Unknown missing feature!"); 3078 SmallString<126> Msg; 3079 raw_svector_ostream OS(Msg); 3080 OS << "instruction requires:"; 3081 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) { 3082 if (MissingFeatures[i]) 3083 OS << ' ' << getSubtargetFeatureName(i); 3084 } 3085 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm); 3086 } 3087 3088 static unsigned getPrefixes(OperandVector &Operands) { 3089 unsigned Result = 0; 3090 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back()); 3091 if (Prefix.isPrefix()) { 3092 Result = Prefix.getPrefix(); 3093 Operands.pop_back(); 3094 } 3095 return Result; 3096 } 3097 3098 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3099 unsigned Opc = Inst.getOpcode(); 3100 const MCInstrDesc &MCID = MII.get(Opc); 3101 3102 if (ForcedVEXEncoding == VEXEncoding_EVEX && 3103 (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX) 3104 return Match_Unsupported; 3105 3106 if ((ForcedVEXEncoding == VEXEncoding_VEX2 || 3107 ForcedVEXEncoding == VEXEncoding_VEX3) && 3108 (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX) 3109 return Match_Unsupported; 3110 3111 // These instructions match ambiguously with their VEX encoded counterparts 3112 // and appear first in the matching table. Reject them unless we're forcing 3113 // EVEX encoding. 3114 // FIXME: We really need a way to break the ambiguity. 3115 switch (Opc) { 3116 case X86::VCVTSD2SIZrm_Int: 3117 case X86::VCVTSD2SI64Zrm_Int: 3118 case X86::VCVTSS2SIZrm_Int: 3119 case X86::VCVTSS2SI64Zrm_Int: 3120 case X86::VCVTTSD2SIZrm: case X86::VCVTTSD2SIZrm_Int: 3121 case X86::VCVTTSD2SI64Zrm: case X86::VCVTTSD2SI64Zrm_Int: 3122 case X86::VCVTTSS2SIZrm: case X86::VCVTTSS2SIZrm_Int: 3123 case X86::VCVTTSS2SI64Zrm: case X86::VCVTTSS2SI64Zrm_Int: 3124 if (ForcedVEXEncoding != VEXEncoding_EVEX) 3125 return Match_Unsupported; 3126 } 3127 3128 return Match_Success; 3129 } 3130 3131 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, 3132 OperandVector &Operands, 3133 MCStreamer &Out, 3134 uint64_t &ErrorInfo, 3135 bool MatchingInlineAsm) { 3136 assert(!Operands.empty() && "Unexpect empty operand list!"); 3137 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); 3138 SMRange EmptyRange = None; 3139 3140 // First, handle aliases that expand to multiple instructions. 3141 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, 3142 Out, MatchingInlineAsm); 3143 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 3144 unsigned Prefixes = getPrefixes(Operands); 3145 3146 MCInst Inst; 3147 3148 // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the 3149 // encoder. 3150 if (ForcedVEXEncoding == VEXEncoding_VEX3) 3151 Prefixes |= X86::IP_USE_VEX3; 3152 3153 if (Prefixes) 3154 Inst.setFlags(Prefixes); 3155 3156 // First, try a direct match. 3157 FeatureBitset MissingFeatures; 3158 unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo, 3159 MissingFeatures, MatchingInlineAsm, 3160 isParsingIntelSyntax()); 3161 switch (OriginalError) { 3162 default: llvm_unreachable("Unexpected match result!"); 3163 case Match_Success: 3164 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 3165 return true; 3166 // Some instructions need post-processing to, for example, tweak which 3167 // encoding is selected. Loop on it while changes happen so the 3168 // individual transformations can chain off each other. 3169 if (!MatchingInlineAsm) 3170 while (processInstruction(Inst, Operands)) 3171 ; 3172 3173 Inst.setLoc(IDLoc); 3174 if (!MatchingInlineAsm) 3175 EmitInstruction(Inst, Operands, Out); 3176 Opcode = Inst.getOpcode(); 3177 return false; 3178 case Match_InvalidImmUnsignedi4: { 3179 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc(); 3180 if (ErrorLoc == SMLoc()) 3181 ErrorLoc = IDLoc; 3182 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]", 3183 EmptyRange, MatchingInlineAsm); 3184 } 3185 case Match_MissingFeature: 3186 return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm); 3187 case Match_InvalidOperand: 3188 case Match_MnemonicFail: 3189 case Match_Unsupported: 3190 break; 3191 } 3192 if (Op.getToken().empty()) { 3193 Error(IDLoc, "instruction must have size higher than 0", EmptyRange, 3194 MatchingInlineAsm); 3195 return true; 3196 } 3197 3198 // FIXME: Ideally, we would only attempt suffix matches for things which are 3199 // valid prefixes, and we could just infer the right unambiguous 3200 // type. However, that requires substantially more matcher support than the 3201 // following hack. 3202 3203 // Change the operand to point to a temporary token. 3204 StringRef Base = Op.getToken(); 3205 SmallString<16> Tmp; 3206 Tmp += Base; 3207 Tmp += ' '; 3208 Op.setTokenValue(Tmp); 3209 3210 // If this instruction starts with an 'f', then it is a floating point stack 3211 // instruction. These come in up to three forms for 32-bit, 64-bit, and 3212 // 80-bit floating point, which use the suffixes s,l,t respectively. 3213 // 3214 // Otherwise, we assume that this may be an integer instruction, which comes 3215 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. 3216 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; 3217 3218 // Check for the various suffix matches. 3219 uint64_t ErrorInfoIgnore; 3220 FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings. 3221 unsigned Match[4]; 3222 3223 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) { 3224 Tmp.back() = Suffixes[I]; 3225 Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore, 3226 MissingFeatures, MatchingInlineAsm, 3227 isParsingIntelSyntax()); 3228 // If this returned as a missing feature failure, remember that. 3229 if (Match[I] == Match_MissingFeature) 3230 ErrorInfoMissingFeatures = MissingFeatures; 3231 } 3232 3233 // Restore the old token. 3234 Op.setTokenValue(Base); 3235 3236 // If exactly one matched, then we treat that as a successful match (and the 3237 // instruction will already have been filled in correctly, since the failing 3238 // matches won't have modified it). 3239 unsigned NumSuccessfulMatches = 3240 std::count(std::begin(Match), std::end(Match), Match_Success); 3241 if (NumSuccessfulMatches == 1) { 3242 Inst.setLoc(IDLoc); 3243 if (!MatchingInlineAsm) 3244 EmitInstruction(Inst, Operands, Out); 3245 Opcode = Inst.getOpcode(); 3246 return false; 3247 } 3248 3249 // Otherwise, the match failed, try to produce a decent error message. 3250 3251 // If we had multiple suffix matches, then identify this as an ambiguous 3252 // match. 3253 if (NumSuccessfulMatches > 1) { 3254 char MatchChars[4]; 3255 unsigned NumMatches = 0; 3256 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) 3257 if (Match[I] == Match_Success) 3258 MatchChars[NumMatches++] = Suffixes[I]; 3259 3260 SmallString<126> Msg; 3261 raw_svector_ostream OS(Msg); 3262 OS << "ambiguous instructions require an explicit suffix (could be "; 3263 for (unsigned i = 0; i != NumMatches; ++i) { 3264 if (i != 0) 3265 OS << ", "; 3266 if (i + 1 == NumMatches) 3267 OS << "or "; 3268 OS << "'" << Base << MatchChars[i] << "'"; 3269 } 3270 OS << ")"; 3271 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm); 3272 return true; 3273 } 3274 3275 // Okay, we know that none of the variants matched successfully. 3276 3277 // If all of the instructions reported an invalid mnemonic, then the original 3278 // mnemonic was invalid. 3279 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) { 3280 if (OriginalError == Match_MnemonicFail) 3281 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", 3282 Op.getLocRange(), MatchingInlineAsm); 3283 3284 if (OriginalError == Match_Unsupported) 3285 return Error(IDLoc, "unsupported instruction", EmptyRange, 3286 MatchingInlineAsm); 3287 3288 assert(OriginalError == Match_InvalidOperand && "Unexpected error"); 3289 // Recover location info for the operand if we know which was the problem. 3290 if (ErrorInfo != ~0ULL) { 3291 if (ErrorInfo >= Operands.size()) 3292 return Error(IDLoc, "too few operands for instruction", EmptyRange, 3293 MatchingInlineAsm); 3294 3295 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo]; 3296 if (Operand.getStartLoc().isValid()) { 3297 SMRange OperandRange = Operand.getLocRange(); 3298 return Error(Operand.getStartLoc(), "invalid operand for instruction", 3299 OperandRange, MatchingInlineAsm); 3300 } 3301 } 3302 3303 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 3304 MatchingInlineAsm); 3305 } 3306 3307 // If one instruction matched as unsupported, report this as unsupported. 3308 if (std::count(std::begin(Match), std::end(Match), 3309 Match_Unsupported) == 1) { 3310 return Error(IDLoc, "unsupported instruction", EmptyRange, 3311 MatchingInlineAsm); 3312 } 3313 3314 // If one instruction matched with a missing feature, report this as a 3315 // missing feature. 3316 if (std::count(std::begin(Match), std::end(Match), 3317 Match_MissingFeature) == 1) { 3318 ErrorInfo = Match_MissingFeature; 3319 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures, 3320 MatchingInlineAsm); 3321 } 3322 3323 // If one instruction matched with an invalid operand, report this as an 3324 // operand failure. 3325 if (std::count(std::begin(Match), std::end(Match), 3326 Match_InvalidOperand) == 1) { 3327 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 3328 MatchingInlineAsm); 3329 } 3330 3331 // If all of these were an outright failure, report it in a useless way. 3332 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", 3333 EmptyRange, MatchingInlineAsm); 3334 return true; 3335 } 3336 3337 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, 3338 OperandVector &Operands, 3339 MCStreamer &Out, 3340 uint64_t &ErrorInfo, 3341 bool MatchingInlineAsm) { 3342 assert(!Operands.empty() && "Unexpect empty operand list!"); 3343 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); 3344 StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken(); 3345 SMRange EmptyRange = None; 3346 StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken(); 3347 unsigned Prefixes = getPrefixes(Operands); 3348 3349 // First, handle aliases that expand to multiple instructions. 3350 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm); 3351 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 3352 3353 MCInst Inst; 3354 3355 // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the 3356 // encoder. 3357 if (ForcedVEXEncoding == VEXEncoding_VEX3) 3358 Prefixes |= X86::IP_USE_VEX3; 3359 3360 if (Prefixes) 3361 Inst.setFlags(Prefixes); 3362 3363 // Find one unsized memory operand, if present. 3364 X86Operand *UnsizedMemOp = nullptr; 3365 for (const auto &Op : Operands) { 3366 X86Operand *X86Op = static_cast<X86Operand *>(Op.get()); 3367 if (X86Op->isMemUnsized()) { 3368 UnsizedMemOp = X86Op; 3369 // Have we found an unqualified memory operand, 3370 // break. IA allows only one memory operand. 3371 break; 3372 } 3373 } 3374 3375 // Allow some instructions to have implicitly pointer-sized operands. This is 3376 // compatible with gas. 3377 if (UnsizedMemOp) { 3378 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"}; 3379 for (const char *Instr : PtrSizedInstrs) { 3380 if (Mnemonic == Instr) { 3381 UnsizedMemOp->Mem.Size = getPointerWidth(); 3382 break; 3383 } 3384 } 3385 } 3386 3387 SmallVector<unsigned, 8> Match; 3388 FeatureBitset ErrorInfoMissingFeatures; 3389 FeatureBitset MissingFeatures; 3390 3391 // If unsized push has immediate operand we should default the default pointer 3392 // size for the size. 3393 if (Mnemonic == "push" && Operands.size() == 2) { 3394 auto *X86Op = static_cast<X86Operand *>(Operands[1].get()); 3395 if (X86Op->isImm()) { 3396 // If it's not a constant fall through and let remainder take care of it. 3397 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm()); 3398 unsigned Size = getPointerWidth(); 3399 if (CE && 3400 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) { 3401 SmallString<16> Tmp; 3402 Tmp += Base; 3403 Tmp += (is64BitMode()) 3404 ? "q" 3405 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " "; 3406 Op.setTokenValue(Tmp); 3407 // Do match in ATT mode to allow explicit suffix usage. 3408 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo, 3409 MissingFeatures, MatchingInlineAsm, 3410 false /*isParsingIntelSyntax()*/)); 3411 Op.setTokenValue(Base); 3412 } 3413 } 3414 } 3415 3416 // If an unsized memory operand is present, try to match with each memory 3417 // operand size. In Intel assembly, the size is not part of the instruction 3418 // mnemonic. 3419 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) { 3420 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512}; 3421 for (unsigned Size : MopSizes) { 3422 UnsizedMemOp->Mem.Size = Size; 3423 uint64_t ErrorInfoIgnore; 3424 unsigned LastOpcode = Inst.getOpcode(); 3425 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore, 3426 MissingFeatures, MatchingInlineAsm, 3427 isParsingIntelSyntax()); 3428 if (Match.empty() || LastOpcode != Inst.getOpcode()) 3429 Match.push_back(M); 3430 3431 // If this returned as a missing feature failure, remember that. 3432 if (Match.back() == Match_MissingFeature) 3433 ErrorInfoMissingFeatures = MissingFeatures; 3434 } 3435 3436 // Restore the size of the unsized memory operand if we modified it. 3437 UnsizedMemOp->Mem.Size = 0; 3438 } 3439 3440 // If we haven't matched anything yet, this is not a basic integer or FPU 3441 // operation. There shouldn't be any ambiguity in our mnemonic table, so try 3442 // matching with the unsized operand. 3443 if (Match.empty()) { 3444 Match.push_back(MatchInstruction( 3445 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm, 3446 isParsingIntelSyntax())); 3447 // If this returned as a missing feature failure, remember that. 3448 if (Match.back() == Match_MissingFeature) 3449 ErrorInfoMissingFeatures = MissingFeatures; 3450 } 3451 3452 // Restore the size of the unsized memory operand if we modified it. 3453 if (UnsizedMemOp) 3454 UnsizedMemOp->Mem.Size = 0; 3455 3456 // If it's a bad mnemonic, all results will be the same. 3457 if (Match.back() == Match_MnemonicFail) { 3458 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'", 3459 Op.getLocRange(), MatchingInlineAsm); 3460 } 3461 3462 unsigned NumSuccessfulMatches = 3463 std::count(std::begin(Match), std::end(Match), Match_Success); 3464 3465 // If matching was ambiguous and we had size information from the frontend, 3466 // try again with that. This handles cases like "movxz eax, m8/m16". 3467 if (UnsizedMemOp && NumSuccessfulMatches > 1 && 3468 UnsizedMemOp->getMemFrontendSize()) { 3469 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize(); 3470 unsigned M = MatchInstruction( 3471 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm, 3472 isParsingIntelSyntax()); 3473 if (M == Match_Success) 3474 NumSuccessfulMatches = 1; 3475 3476 // Add a rewrite that encodes the size information we used from the 3477 // frontend. 3478 InstInfo->AsmRewrites->emplace_back( 3479 AOK_SizeDirective, UnsizedMemOp->getStartLoc(), 3480 /*Len=*/0, UnsizedMemOp->getMemFrontendSize()); 3481 } 3482 3483 // If exactly one matched, then we treat that as a successful match (and the 3484 // instruction will already have been filled in correctly, since the failing 3485 // matches won't have modified it). 3486 if (NumSuccessfulMatches == 1) { 3487 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 3488 return true; 3489 // Some instructions need post-processing to, for example, tweak which 3490 // encoding is selected. Loop on it while changes happen so the individual 3491 // transformations can chain off each other. 3492 if (!MatchingInlineAsm) 3493 while (processInstruction(Inst, Operands)) 3494 ; 3495 Inst.setLoc(IDLoc); 3496 if (!MatchingInlineAsm) 3497 EmitInstruction(Inst, Operands, Out); 3498 Opcode = Inst.getOpcode(); 3499 return false; 3500 } else if (NumSuccessfulMatches > 1) { 3501 assert(UnsizedMemOp && 3502 "multiple matches only possible with unsized memory operands"); 3503 return Error(UnsizedMemOp->getStartLoc(), 3504 "ambiguous operand size for instruction '" + Mnemonic + "\'", 3505 UnsizedMemOp->getLocRange()); 3506 } 3507 3508 // If one instruction matched as unsupported, report this as unsupported. 3509 if (std::count(std::begin(Match), std::end(Match), 3510 Match_Unsupported) == 1) { 3511 return Error(IDLoc, "unsupported instruction", EmptyRange, 3512 MatchingInlineAsm); 3513 } 3514 3515 // If one instruction matched with a missing feature, report this as a 3516 // missing feature. 3517 if (std::count(std::begin(Match), std::end(Match), 3518 Match_MissingFeature) == 1) { 3519 ErrorInfo = Match_MissingFeature; 3520 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures, 3521 MatchingInlineAsm); 3522 } 3523 3524 // If one instruction matched with an invalid operand, report this as an 3525 // operand failure. 3526 if (std::count(std::begin(Match), std::end(Match), 3527 Match_InvalidOperand) == 1) { 3528 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 3529 MatchingInlineAsm); 3530 } 3531 3532 if (std::count(std::begin(Match), std::end(Match), 3533 Match_InvalidImmUnsignedi4) == 1) { 3534 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc(); 3535 if (ErrorLoc == SMLoc()) 3536 ErrorLoc = IDLoc; 3537 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]", 3538 EmptyRange, MatchingInlineAsm); 3539 } 3540 3541 // If all of these were an outright failure, report it in a useless way. 3542 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange, 3543 MatchingInlineAsm); 3544 } 3545 3546 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) { 3547 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo); 3548 } 3549 3550 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { 3551 MCAsmParser &Parser = getParser(); 3552 StringRef IDVal = DirectiveID.getIdentifier(); 3553 if (IDVal.startswith(".code")) 3554 return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); 3555 else if (IDVal.startswith(".att_syntax")) { 3556 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3557 if (Parser.getTok().getString() == "prefix") 3558 Parser.Lex(); 3559 else if (Parser.getTok().getString() == "noprefix") 3560 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not " 3561 "supported: registers must have a " 3562 "'%' prefix in .att_syntax"); 3563 } 3564 getParser().setAssemblerDialect(0); 3565 return false; 3566 } else if (IDVal.startswith(".intel_syntax")) { 3567 getParser().setAssemblerDialect(1); 3568 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3569 if (Parser.getTok().getString() == "noprefix") 3570 Parser.Lex(); 3571 else if (Parser.getTok().getString() == "prefix") 3572 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not " 3573 "supported: registers must not have " 3574 "a '%' prefix in .intel_syntax"); 3575 } 3576 return false; 3577 } else if (IDVal == ".even") 3578 return parseDirectiveEven(DirectiveID.getLoc()); 3579 else if (IDVal == ".cv_fpo_proc") 3580 return parseDirectiveFPOProc(DirectiveID.getLoc()); 3581 else if (IDVal == ".cv_fpo_setframe") 3582 return parseDirectiveFPOSetFrame(DirectiveID.getLoc()); 3583 else if (IDVal == ".cv_fpo_pushreg") 3584 return parseDirectiveFPOPushReg(DirectiveID.getLoc()); 3585 else if (IDVal == ".cv_fpo_stackalloc") 3586 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc()); 3587 else if (IDVal == ".cv_fpo_stackalign") 3588 return parseDirectiveFPOStackAlign(DirectiveID.getLoc()); 3589 else if (IDVal == ".cv_fpo_endprologue") 3590 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc()); 3591 else if (IDVal == ".cv_fpo_endproc") 3592 return parseDirectiveFPOEndProc(DirectiveID.getLoc()); 3593 3594 return true; 3595 } 3596 3597 /// parseDirectiveEven 3598 /// ::= .even 3599 bool X86AsmParser::parseDirectiveEven(SMLoc L) { 3600 if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) 3601 return false; 3602 3603 const MCSection *Section = getStreamer().getCurrentSectionOnly(); 3604 if (!Section) { 3605 getStreamer().InitSections(false); 3606 Section = getStreamer().getCurrentSectionOnly(); 3607 } 3608 if (Section->UseCodeAlign()) 3609 getStreamer().EmitCodeAlignment(2, 0); 3610 else 3611 getStreamer().EmitValueToAlignment(2, 0, 1, 0); 3612 return false; 3613 } 3614 3615 /// ParseDirectiveCode 3616 /// ::= .code16 | .code32 | .code64 3617 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { 3618 MCAsmParser &Parser = getParser(); 3619 Code16GCC = false; 3620 if (IDVal == ".code16") { 3621 Parser.Lex(); 3622 if (!is16BitMode()) { 3623 SwitchMode(X86::Mode16Bit); 3624 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); 3625 } 3626 } else if (IDVal == ".code16gcc") { 3627 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode. 3628 Parser.Lex(); 3629 Code16GCC = true; 3630 if (!is16BitMode()) { 3631 SwitchMode(X86::Mode16Bit); 3632 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); 3633 } 3634 } else if (IDVal == ".code32") { 3635 Parser.Lex(); 3636 if (!is32BitMode()) { 3637 SwitchMode(X86::Mode32Bit); 3638 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); 3639 } 3640 } else if (IDVal == ".code64") { 3641 Parser.Lex(); 3642 if (!is64BitMode()) { 3643 SwitchMode(X86::Mode64Bit); 3644 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64); 3645 } 3646 } else { 3647 Error(L, "unknown directive " + IDVal); 3648 return false; 3649 } 3650 3651 return false; 3652 } 3653 3654 // .cv_fpo_proc foo 3655 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) { 3656 MCAsmParser &Parser = getParser(); 3657 StringRef ProcName; 3658 int64_t ParamsSize; 3659 if (Parser.parseIdentifier(ProcName)) 3660 return Parser.TokError("expected symbol name"); 3661 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count")) 3662 return true; 3663 if (!isUIntN(32, ParamsSize)) 3664 return Parser.TokError("parameters size out of range"); 3665 if (Parser.parseEOL("unexpected tokens")) 3666 return addErrorSuffix(" in '.cv_fpo_proc' directive"); 3667 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); 3668 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L); 3669 } 3670 3671 // .cv_fpo_setframe ebp 3672 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) { 3673 MCAsmParser &Parser = getParser(); 3674 unsigned Reg; 3675 SMLoc DummyLoc; 3676 if (ParseRegister(Reg, DummyLoc, DummyLoc) || 3677 Parser.parseEOL("unexpected tokens")) 3678 return addErrorSuffix(" in '.cv_fpo_setframe' directive"); 3679 return getTargetStreamer().emitFPOSetFrame(Reg, L); 3680 } 3681 3682 // .cv_fpo_pushreg ebx 3683 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) { 3684 MCAsmParser &Parser = getParser(); 3685 unsigned Reg; 3686 SMLoc DummyLoc; 3687 if (ParseRegister(Reg, DummyLoc, DummyLoc) || 3688 Parser.parseEOL("unexpected tokens")) 3689 return addErrorSuffix(" in '.cv_fpo_pushreg' directive"); 3690 return getTargetStreamer().emitFPOPushReg(Reg, L); 3691 } 3692 3693 // .cv_fpo_stackalloc 20 3694 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) { 3695 MCAsmParser &Parser = getParser(); 3696 int64_t Offset; 3697 if (Parser.parseIntToken(Offset, "expected offset") || 3698 Parser.parseEOL("unexpected tokens")) 3699 return addErrorSuffix(" in '.cv_fpo_stackalloc' directive"); 3700 return getTargetStreamer().emitFPOStackAlloc(Offset, L); 3701 } 3702 3703 // .cv_fpo_stackalign 8 3704 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) { 3705 MCAsmParser &Parser = getParser(); 3706 int64_t Offset; 3707 if (Parser.parseIntToken(Offset, "expected offset") || 3708 Parser.parseEOL("unexpected tokens")) 3709 return addErrorSuffix(" in '.cv_fpo_stackalign' directive"); 3710 return getTargetStreamer().emitFPOStackAlign(Offset, L); 3711 } 3712 3713 // .cv_fpo_endprologue 3714 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) { 3715 MCAsmParser &Parser = getParser(); 3716 if (Parser.parseEOL("unexpected tokens")) 3717 return addErrorSuffix(" in '.cv_fpo_endprologue' directive"); 3718 return getTargetStreamer().emitFPOEndPrologue(L); 3719 } 3720 3721 // .cv_fpo_endproc 3722 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) { 3723 MCAsmParser &Parser = getParser(); 3724 if (Parser.parseEOL("unexpected tokens")) 3725 return addErrorSuffix(" in '.cv_fpo_endproc' directive"); 3726 return getTargetStreamer().emitFPOEndProc(L); 3727 } 3728 3729 // Force static initialization. 3730 extern "C" void LLVMInitializeX86AsmParser() { 3731 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target()); 3732 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target()); 3733 } 3734 3735 #define GET_REGISTER_MATCHER 3736 #define GET_MATCHER_IMPLEMENTATION 3737 #define GET_SUBTARGET_FEATURE_NAME 3738 #include "X86GenAsmMatcher.inc" 3739