1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains code to lower X86 MachineInstrs to their corresponding
10 // MCInst records.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "MCTargetDesc/X86ATTInstPrinter.h"
15 #include "MCTargetDesc/X86BaseInfo.h"
16 #include "MCTargetDesc/X86InstComments.h"
17 #include "MCTargetDesc/X86ShuffleDecode.h"
18 #include "MCTargetDesc/X86TargetStreamer.h"
19 #include "X86AsmPrinter.h"
20 #include "X86RegisterInfo.h"
21 #include "X86ShuffleDecodeConstantPool.h"
22 #include "X86Subtarget.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/iterator_range.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/GlobalValue.h"
33 #include "llvm/IR/Mangler.h"
34 #include "llvm/MC/MCAsmInfo.h"
35 #include "llvm/MC/MCCodeEmitter.h"
36 #include "llvm/MC/MCContext.h"
37 #include "llvm/MC/MCExpr.h"
38 #include "llvm/MC/MCFixup.h"
39 #include "llvm/MC/MCInst.h"
40 #include "llvm/MC/MCInstBuilder.h"
41 #include "llvm/MC/MCSection.h"
42 #include "llvm/MC/MCSectionELF.h"
43 #include "llvm/MC/MCStreamer.h"
44 #include "llvm/MC/MCSymbol.h"
45 #include "llvm/MC/MCSymbolELF.h"
46 #include "llvm/MC/TargetRegistry.h"
47 #include "llvm/Target/TargetLoweringObjectFile.h"
48 #include "llvm/Target/TargetMachine.h"
49 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
50 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
51 #include <string>
52 
53 using namespace llvm;
54 
55 namespace {
56 
57 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
58 class X86MCInstLower {
59   MCContext &Ctx;
60   const MachineFunction &MF;
61   const TargetMachine &TM;
62   const MCAsmInfo &MAI;
63   X86AsmPrinter &AsmPrinter;
64 
65 public:
66   X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
67 
68   Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
69                                           const MachineOperand &MO) const;
70   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
71 
72   MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
73   MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
74 
75 private:
76   MachineModuleInfoMachO &getMachOMMI() const;
77 };
78 
79 } // end anonymous namespace
80 
81 /// A RAII helper which defines a region of instructions which can't have
82 /// padding added between them for correctness.
83 struct NoAutoPaddingScope {
84   MCStreamer &OS;
85   const bool OldAllowAutoPadding;
86   NoAutoPaddingScope(MCStreamer &OS)
87       : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
88     changeAndComment(false);
89   }
90   ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); }
91   void changeAndComment(bool b) {
92     if (b == OS.getAllowAutoPadding())
93       return;
94     OS.setAllowAutoPadding(b);
95     if (b)
96       OS.emitRawComment("autopadding");
97     else
98       OS.emitRawComment("noautopadding");
99   }
100 };
101 
102 // Emit a minimal sequence of nops spanning NumBytes bytes.
103 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
104                         const X86Subtarget *Subtarget);
105 
106 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
107                                                  const MCSubtargetInfo &STI,
108                                                  MCCodeEmitter *CodeEmitter) {
109   if (InShadow) {
110     SmallString<256> Code;
111     SmallVector<MCFixup, 4> Fixups;
112     raw_svector_ostream VecOS(Code);
113     CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
114     CurrentShadowSize += Code.size();
115     if (CurrentShadowSize >= RequiredShadowSize)
116       InShadow = false; // The shadow is big enough. Stop counting.
117   }
118 }
119 
120 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
121     MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
122   if (InShadow && CurrentShadowSize < RequiredShadowSize) {
123     InShadow = false;
124     emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
125                 &MF->getSubtarget<X86Subtarget>());
126   }
127 }
128 
129 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
130   OutStreamer->emitInstruction(Inst, getSubtargetInfo());
131   SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
132 }
133 
134 X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
135                                X86AsmPrinter &asmprinter)
136     : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
137       AsmPrinter(asmprinter) {}
138 
139 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
140   return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
141 }
142 
143 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
144 /// operand to an MCSymbol.
145 MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
146   const Triple &TT = TM.getTargetTriple();
147   if (MO.isGlobal() && TT.isOSBinFormatELF())
148     return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal());
149 
150   const DataLayout &DL = MF.getDataLayout();
151   assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
152          "Isn't a symbol reference");
153 
154   MCSymbol *Sym = nullptr;
155   SmallString<128> Name;
156   StringRef Suffix;
157 
158   switch (MO.getTargetFlags()) {
159   case X86II::MO_DLLIMPORT:
160     // Handle dllimport linkage.
161     Name += "__imp_";
162     break;
163   case X86II::MO_COFFSTUB:
164     Name += ".refptr.";
165     break;
166   case X86II::MO_DARWIN_NONLAZY:
167   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
168     Suffix = "$non_lazy_ptr";
169     break;
170   }
171 
172   if (!Suffix.empty())
173     Name += DL.getPrivateGlobalPrefix();
174 
175   if (MO.isGlobal()) {
176     const GlobalValue *GV = MO.getGlobal();
177     AsmPrinter.getNameWithPrefix(Name, GV);
178   } else if (MO.isSymbol()) {
179     Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
180   } else if (MO.isMBB()) {
181     assert(Suffix.empty());
182     Sym = MO.getMBB()->getSymbol();
183   }
184 
185   Name += Suffix;
186   if (!Sym)
187     Sym = Ctx.getOrCreateSymbol(Name);
188 
189   // If the target flags on the operand changes the name of the symbol, do that
190   // before we return the symbol.
191   switch (MO.getTargetFlags()) {
192   default:
193     break;
194   case X86II::MO_COFFSTUB: {
195     MachineModuleInfoCOFF &MMICOFF =
196         MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
197     MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
198     if (!StubSym.getPointer()) {
199       assert(MO.isGlobal() && "Extern symbol not handled yet");
200       StubSym = MachineModuleInfoImpl::StubValueTy(
201           AsmPrinter.getSymbol(MO.getGlobal()), true);
202     }
203     break;
204   }
205   case X86II::MO_DARWIN_NONLAZY:
206   case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
207     MachineModuleInfoImpl::StubValueTy &StubSym =
208         getMachOMMI().getGVStubEntry(Sym);
209     if (!StubSym.getPointer()) {
210       assert(MO.isGlobal() && "Extern symbol not handled yet");
211       StubSym = MachineModuleInfoImpl::StubValueTy(
212           AsmPrinter.getSymbol(MO.getGlobal()),
213           !MO.getGlobal()->hasInternalLinkage());
214     }
215     break;
216   }
217   }
218 
219   return Sym;
220 }
221 
222 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
223                                              MCSymbol *Sym) const {
224   // FIXME: We would like an efficient form for this, so we don't have to do a
225   // lot of extra uniquing.
226   const MCExpr *Expr = nullptr;
227   MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
228 
229   switch (MO.getTargetFlags()) {
230   default:
231     llvm_unreachable("Unknown target flag on GV operand");
232   case X86II::MO_NO_FLAG: // No flag.
233   // These affect the name of the symbol, not any suffix.
234   case X86II::MO_DARWIN_NONLAZY:
235   case X86II::MO_DLLIMPORT:
236   case X86II::MO_COFFSTUB:
237     break;
238 
239   case X86II::MO_TLVP:
240     RefKind = MCSymbolRefExpr::VK_TLVP;
241     break;
242   case X86II::MO_TLVP_PIC_BASE:
243     Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
244     // Subtract the pic base.
245     Expr = MCBinaryExpr::createSub(
246         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
247     break;
248   case X86II::MO_SECREL:
249     RefKind = MCSymbolRefExpr::VK_SECREL;
250     break;
251   case X86II::MO_TLSGD:
252     RefKind = MCSymbolRefExpr::VK_TLSGD;
253     break;
254   case X86II::MO_TLSLD:
255     RefKind = MCSymbolRefExpr::VK_TLSLD;
256     break;
257   case X86II::MO_TLSLDM:
258     RefKind = MCSymbolRefExpr::VK_TLSLDM;
259     break;
260   case X86II::MO_GOTTPOFF:
261     RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
262     break;
263   case X86II::MO_INDNTPOFF:
264     RefKind = MCSymbolRefExpr::VK_INDNTPOFF;
265     break;
266   case X86II::MO_TPOFF:
267     RefKind = MCSymbolRefExpr::VK_TPOFF;
268     break;
269   case X86II::MO_DTPOFF:
270     RefKind = MCSymbolRefExpr::VK_DTPOFF;
271     break;
272   case X86II::MO_NTPOFF:
273     RefKind = MCSymbolRefExpr::VK_NTPOFF;
274     break;
275   case X86II::MO_GOTNTPOFF:
276     RefKind = MCSymbolRefExpr::VK_GOTNTPOFF;
277     break;
278   case X86II::MO_GOTPCREL:
279     RefKind = MCSymbolRefExpr::VK_GOTPCREL;
280     break;
281   case X86II::MO_GOTPCREL_NORELAX:
282     RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX;
283     break;
284   case X86II::MO_GOT:
285     RefKind = MCSymbolRefExpr::VK_GOT;
286     break;
287   case X86II::MO_GOTOFF:
288     RefKind = MCSymbolRefExpr::VK_GOTOFF;
289     break;
290   case X86II::MO_PLT:
291     RefKind = MCSymbolRefExpr::VK_PLT;
292     break;
293   case X86II::MO_ABS8:
294     RefKind = MCSymbolRefExpr::VK_X86_ABS8;
295     break;
296   case X86II::MO_PIC_BASE_OFFSET:
297   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
298     Expr = MCSymbolRefExpr::create(Sym, Ctx);
299     // Subtract the pic base.
300     Expr = MCBinaryExpr::createSub(
301         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
302     if (MO.isJTI()) {
303       assert(MAI.doesSetDirectiveSuppressReloc());
304       // If .set directive is supported, use it to reduce the number of
305       // relocations the assembler will generate for differences between
306       // local labels. This is only safe when the symbols are in the same
307       // section so we are restricting it to jumptable references.
308       MCSymbol *Label = Ctx.createTempSymbol();
309       AsmPrinter.OutStreamer->emitAssignment(Label, Expr);
310       Expr = MCSymbolRefExpr::create(Label, Ctx);
311     }
312     break;
313   }
314 
315   if (!Expr)
316     Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
317 
318   if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
319     Expr = MCBinaryExpr::createAdd(
320         Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
321   return MCOperand::createExpr(Expr);
322 }
323 
324 /// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
325 /// a short fixed-register form.
326 static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
327   unsigned ImmOp = Inst.getNumOperands() - 1;
328   assert(Inst.getOperand(0).isReg() &&
329          (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
330          ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
331            Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
332           Inst.getNumOperands() == 2) &&
333          "Unexpected instruction!");
334 
335   // Check whether the destination register can be fixed.
336   unsigned Reg = Inst.getOperand(0).getReg();
337   if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
338     return;
339 
340   // If so, rewrite the instruction.
341   MCOperand Saved = Inst.getOperand(ImmOp);
342   Inst = MCInst();
343   Inst.setOpcode(Opcode);
344   Inst.addOperand(Saved);
345 }
346 
347 /// If a movsx instruction has a shorter encoding for the used register
348 /// simplify the instruction to use it instead.
349 static void SimplifyMOVSX(MCInst &Inst) {
350   unsigned NewOpcode = 0;
351   unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg();
352   switch (Inst.getOpcode()) {
353   default:
354     llvm_unreachable("Unexpected instruction!");
355   case X86::MOVSX16rr8: // movsbw %al, %ax   --> cbtw
356     if (Op0 == X86::AX && Op1 == X86::AL)
357       NewOpcode = X86::CBW;
358     break;
359   case X86::MOVSX32rr16: // movswl %ax, %eax  --> cwtl
360     if (Op0 == X86::EAX && Op1 == X86::AX)
361       NewOpcode = X86::CWDE;
362     break;
363   case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq
364     if (Op0 == X86::RAX && Op1 == X86::EAX)
365       NewOpcode = X86::CDQE;
366     break;
367   }
368 
369   if (NewOpcode != 0) {
370     Inst = MCInst();
371     Inst.setOpcode(NewOpcode);
372   }
373 }
374 
375 /// Simplify things like MOV32rm to MOV32o32a.
376 static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
377                                   unsigned Opcode) {
378   // Don't make these simplifications in 64-bit mode; other assemblers don't
379   // perform them because they make the code larger.
380   if (Printer.getSubtarget().is64Bit())
381     return;
382 
383   bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
384   unsigned AddrBase = IsStore;
385   unsigned RegOp = IsStore ? 0 : 5;
386   unsigned AddrOp = AddrBase + 3;
387   assert(
388       Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
389       Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
390       Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
391       Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
392       Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
393       (Inst.getOperand(AddrOp).isExpr() || Inst.getOperand(AddrOp).isImm()) &&
394       "Unexpected instruction!");
395 
396   // Check whether the destination register can be fixed.
397   unsigned Reg = Inst.getOperand(RegOp).getReg();
398   if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
399     return;
400 
401   // Check whether this is an absolute address.
402   // FIXME: We know TLVP symbol refs aren't, but there should be a better way
403   // to do this here.
404   bool Absolute = true;
405   if (Inst.getOperand(AddrOp).isExpr()) {
406     const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
407     if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
408       if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
409         Absolute = false;
410   }
411 
412   if (Absolute &&
413       (Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 ||
414        Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 ||
415        Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0))
416     return;
417 
418   // If so, rewrite the instruction.
419   MCOperand Saved = Inst.getOperand(AddrOp);
420   MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg);
421   Inst = MCInst();
422   Inst.setOpcode(Opcode);
423   Inst.addOperand(Saved);
424   Inst.addOperand(Seg);
425 }
426 
427 static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
428   return Subtarget.is64Bit() ? X86::RET64 : X86::RET32;
429 }
430 
431 Optional<MCOperand>
432 X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
433                                     const MachineOperand &MO) const {
434   switch (MO.getType()) {
435   default:
436     MI->print(errs());
437     llvm_unreachable("unknown operand type");
438   case MachineOperand::MO_Register:
439     // Ignore all implicit register operands.
440     if (MO.isImplicit())
441       return None;
442     return MCOperand::createReg(MO.getReg());
443   case MachineOperand::MO_Immediate:
444     return MCOperand::createImm(MO.getImm());
445   case MachineOperand::MO_MachineBasicBlock:
446   case MachineOperand::MO_GlobalAddress:
447   case MachineOperand::MO_ExternalSymbol:
448     return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
449   case MachineOperand::MO_MCSymbol:
450     return LowerSymbolOperand(MO, MO.getMCSymbol());
451   case MachineOperand::MO_JumpTableIndex:
452     return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
453   case MachineOperand::MO_ConstantPoolIndex:
454     return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
455   case MachineOperand::MO_BlockAddress:
456     return LowerSymbolOperand(
457         MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
458   case MachineOperand::MO_RegisterMask:
459     // Ignore call clobbers.
460     return None;
461   }
462 }
463 
464 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding
465 // information.
466 static unsigned convertTailJumpOpcode(unsigned Opcode) {
467   switch (Opcode) {
468   case X86::TAILJMPr:
469     Opcode = X86::JMP32r;
470     break;
471   case X86::TAILJMPm:
472     Opcode = X86::JMP32m;
473     break;
474   case X86::TAILJMPr64:
475     Opcode = X86::JMP64r;
476     break;
477   case X86::TAILJMPm64:
478     Opcode = X86::JMP64m;
479     break;
480   case X86::TAILJMPr64_REX:
481     Opcode = X86::JMP64r_REX;
482     break;
483   case X86::TAILJMPm64_REX:
484     Opcode = X86::JMP64m_REX;
485     break;
486   case X86::TAILJMPd:
487   case X86::TAILJMPd64:
488     Opcode = X86::JMP_1;
489     break;
490   case X86::TAILJMPd_CC:
491   case X86::TAILJMPd64_CC:
492     Opcode = X86::JCC_1;
493     break;
494   }
495 
496   return Opcode;
497 }
498 
499 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
500   OutMI.setOpcode(MI->getOpcode());
501 
502   for (const MachineOperand &MO : MI->operands())
503     if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
504       OutMI.addOperand(MaybeMCOp.getValue());
505 
506   // Handle a few special cases to eliminate operand modifiers.
507   switch (OutMI.getOpcode()) {
508   case X86::LEA64_32r:
509   case X86::LEA64r:
510   case X86::LEA16r:
511   case X86::LEA32r:
512     // LEA should have a segment register, but it must be empty.
513     assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands &&
514            "Unexpected # of LEA operands");
515     assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
516            "LEA has segment specified!");
517     break;
518 
519   case X86::MULX32Hrr:
520   case X86::MULX32Hrm:
521   case X86::MULX64Hrr:
522   case X86::MULX64Hrm: {
523     // Turn into regular MULX by duplicating the destination.
524     unsigned NewOpc;
525     switch (OutMI.getOpcode()) {
526     default: llvm_unreachable("Invalid opcode");
527     case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
528     case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
529     case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
530     case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
531     }
532     OutMI.setOpcode(NewOpc);
533     // Duplicate the destination.
534     unsigned DestReg = OutMI.getOperand(0).getReg();
535     OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg));
536     break;
537   }
538 
539   // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
540   // if one of the registers is extended, but other isn't.
541   case X86::VMOVZPQILo2PQIrr:
542   case X86::VMOVAPDrr:
543   case X86::VMOVAPDYrr:
544   case X86::VMOVAPSrr:
545   case X86::VMOVAPSYrr:
546   case X86::VMOVDQArr:
547   case X86::VMOVDQAYrr:
548   case X86::VMOVDQUrr:
549   case X86::VMOVDQUYrr:
550   case X86::VMOVUPDrr:
551   case X86::VMOVUPDYrr:
552   case X86::VMOVUPSrr:
553   case X86::VMOVUPSYrr: {
554     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
555         X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
556       unsigned NewOpc;
557       switch (OutMI.getOpcode()) {
558       default: llvm_unreachable("Invalid opcode");
559       case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr;   break;
560       case X86::VMOVAPDrr:        NewOpc = X86::VMOVAPDrr_REV;  break;
561       case X86::VMOVAPDYrr:       NewOpc = X86::VMOVAPDYrr_REV; break;
562       case X86::VMOVAPSrr:        NewOpc = X86::VMOVAPSrr_REV;  break;
563       case X86::VMOVAPSYrr:       NewOpc = X86::VMOVAPSYrr_REV; break;
564       case X86::VMOVDQArr:        NewOpc = X86::VMOVDQArr_REV;  break;
565       case X86::VMOVDQAYrr:       NewOpc = X86::VMOVDQAYrr_REV; break;
566       case X86::VMOVDQUrr:        NewOpc = X86::VMOVDQUrr_REV;  break;
567       case X86::VMOVDQUYrr:       NewOpc = X86::VMOVDQUYrr_REV; break;
568       case X86::VMOVUPDrr:        NewOpc = X86::VMOVUPDrr_REV;  break;
569       case X86::VMOVUPDYrr:       NewOpc = X86::VMOVUPDYrr_REV; break;
570       case X86::VMOVUPSrr:        NewOpc = X86::VMOVUPSrr_REV;  break;
571       case X86::VMOVUPSYrr:       NewOpc = X86::VMOVUPSYrr_REV; break;
572       }
573       OutMI.setOpcode(NewOpc);
574     }
575     break;
576   }
577   case X86::VMOVSDrr:
578   case X86::VMOVSSrr: {
579     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
580         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
581       unsigned NewOpc;
582       switch (OutMI.getOpcode()) {
583       default: llvm_unreachable("Invalid opcode");
584       case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
585       case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
586       }
587       OutMI.setOpcode(NewOpc);
588     }
589     break;
590   }
591 
592   case X86::VPCMPBZ128rmi:  case X86::VPCMPBZ128rmik:
593   case X86::VPCMPBZ128rri:  case X86::VPCMPBZ128rrik:
594   case X86::VPCMPBZ256rmi:  case X86::VPCMPBZ256rmik:
595   case X86::VPCMPBZ256rri:  case X86::VPCMPBZ256rrik:
596   case X86::VPCMPBZrmi:     case X86::VPCMPBZrmik:
597   case X86::VPCMPBZrri:     case X86::VPCMPBZrrik:
598   case X86::VPCMPDZ128rmi:  case X86::VPCMPDZ128rmik:
599   case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
600   case X86::VPCMPDZ128rri:  case X86::VPCMPDZ128rrik:
601   case X86::VPCMPDZ256rmi:  case X86::VPCMPDZ256rmik:
602   case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
603   case X86::VPCMPDZ256rri:  case X86::VPCMPDZ256rrik:
604   case X86::VPCMPDZrmi:     case X86::VPCMPDZrmik:
605   case X86::VPCMPDZrmib:    case X86::VPCMPDZrmibk:
606   case X86::VPCMPDZrri:     case X86::VPCMPDZrrik:
607   case X86::VPCMPQZ128rmi:  case X86::VPCMPQZ128rmik:
608   case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
609   case X86::VPCMPQZ128rri:  case X86::VPCMPQZ128rrik:
610   case X86::VPCMPQZ256rmi:  case X86::VPCMPQZ256rmik:
611   case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
612   case X86::VPCMPQZ256rri:  case X86::VPCMPQZ256rrik:
613   case X86::VPCMPQZrmi:     case X86::VPCMPQZrmik:
614   case X86::VPCMPQZrmib:    case X86::VPCMPQZrmibk:
615   case X86::VPCMPQZrri:     case X86::VPCMPQZrrik:
616   case X86::VPCMPWZ128rmi:  case X86::VPCMPWZ128rmik:
617   case X86::VPCMPWZ128rri:  case X86::VPCMPWZ128rrik:
618   case X86::VPCMPWZ256rmi:  case X86::VPCMPWZ256rmik:
619   case X86::VPCMPWZ256rri:  case X86::VPCMPWZ256rrik:
620   case X86::VPCMPWZrmi:     case X86::VPCMPWZrmik:
621   case X86::VPCMPWZrri:     case X86::VPCMPWZrrik: {
622     // Turn immediate 0 into the VPCMPEQ instruction.
623     if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) {
624       unsigned NewOpc;
625       switch (OutMI.getOpcode()) {
626       default: llvm_unreachable("Invalid opcode");
627       case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPEQBZ128rm;   break;
628       case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPEQBZ128rmk;  break;
629       case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPEQBZ128rr;   break;
630       case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPEQBZ128rrk;  break;
631       case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPEQBZ256rm;   break;
632       case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPEQBZ256rmk;  break;
633       case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPEQBZ256rr;   break;
634       case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPEQBZ256rrk;  break;
635       case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPEQBZrm;      break;
636       case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPEQBZrmk;     break;
637       case X86::VPCMPBZrri:      NewOpc = X86::VPCMPEQBZrr;      break;
638       case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPEQBZrrk;     break;
639       case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPEQDZ128rm;   break;
640       case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPEQDZ128rmb;  break;
641       case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPEQDZ128rmbk; break;
642       case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPEQDZ128rmk;  break;
643       case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPEQDZ128rr;   break;
644       case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPEQDZ128rrk;  break;
645       case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPEQDZ256rm;   break;
646       case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPEQDZ256rmb;  break;
647       case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPEQDZ256rmbk; break;
648       case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPEQDZ256rmk;  break;
649       case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPEQDZ256rr;   break;
650       case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPEQDZ256rrk;  break;
651       case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPEQDZrm;      break;
652       case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPEQDZrmb;     break;
653       case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPEQDZrmbk;    break;
654       case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPEQDZrmk;     break;
655       case X86::VPCMPDZrri:      NewOpc = X86::VPCMPEQDZrr;      break;
656       case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPEQDZrrk;     break;
657       case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPEQQZ128rm;   break;
658       case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPEQQZ128rmb;  break;
659       case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPEQQZ128rmbk; break;
660       case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPEQQZ128rmk;  break;
661       case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPEQQZ128rr;   break;
662       case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPEQQZ128rrk;  break;
663       case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPEQQZ256rm;   break;
664       case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPEQQZ256rmb;  break;
665       case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPEQQZ256rmbk; break;
666       case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPEQQZ256rmk;  break;
667       case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPEQQZ256rr;   break;
668       case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPEQQZ256rrk;  break;
669       case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPEQQZrm;      break;
670       case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPEQQZrmb;     break;
671       case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPEQQZrmbk;    break;
672       case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPEQQZrmk;     break;
673       case X86::VPCMPQZrri:      NewOpc = X86::VPCMPEQQZrr;      break;
674       case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPEQQZrrk;     break;
675       case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPEQWZ128rm;   break;
676       case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPEQWZ128rmk;  break;
677       case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPEQWZ128rr;   break;
678       case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPEQWZ128rrk;  break;
679       case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPEQWZ256rm;   break;
680       case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPEQWZ256rmk;  break;
681       case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPEQWZ256rr;   break;
682       case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPEQWZ256rrk;  break;
683       case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPEQWZrm;      break;
684       case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPEQWZrmk;     break;
685       case X86::VPCMPWZrri:      NewOpc = X86::VPCMPEQWZrr;      break;
686       case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPEQWZrrk;     break;
687       }
688 
689       OutMI.setOpcode(NewOpc);
690       OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
691       break;
692     }
693 
694     // Turn immediate 6 into the VPCMPGT instruction.
695     if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) {
696       unsigned NewOpc;
697       switch (OutMI.getOpcode()) {
698       default: llvm_unreachable("Invalid opcode");
699       case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPGTBZ128rm;   break;
700       case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPGTBZ128rmk;  break;
701       case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPGTBZ128rr;   break;
702       case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPGTBZ128rrk;  break;
703       case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPGTBZ256rm;   break;
704       case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPGTBZ256rmk;  break;
705       case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPGTBZ256rr;   break;
706       case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPGTBZ256rrk;  break;
707       case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPGTBZrm;      break;
708       case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPGTBZrmk;     break;
709       case X86::VPCMPBZrri:      NewOpc = X86::VPCMPGTBZrr;      break;
710       case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPGTBZrrk;     break;
711       case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPGTDZ128rm;   break;
712       case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPGTDZ128rmb;  break;
713       case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPGTDZ128rmbk; break;
714       case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPGTDZ128rmk;  break;
715       case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPGTDZ128rr;   break;
716       case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPGTDZ128rrk;  break;
717       case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPGTDZ256rm;   break;
718       case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPGTDZ256rmb;  break;
719       case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPGTDZ256rmbk; break;
720       case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPGTDZ256rmk;  break;
721       case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPGTDZ256rr;   break;
722       case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPGTDZ256rrk;  break;
723       case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPGTDZrm;      break;
724       case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPGTDZrmb;     break;
725       case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPGTDZrmbk;    break;
726       case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPGTDZrmk;     break;
727       case X86::VPCMPDZrri:      NewOpc = X86::VPCMPGTDZrr;      break;
728       case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPGTDZrrk;     break;
729       case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPGTQZ128rm;   break;
730       case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPGTQZ128rmb;  break;
731       case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPGTQZ128rmbk; break;
732       case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPGTQZ128rmk;  break;
733       case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPGTQZ128rr;   break;
734       case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPGTQZ128rrk;  break;
735       case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPGTQZ256rm;   break;
736       case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPGTQZ256rmb;  break;
737       case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPGTQZ256rmbk; break;
738       case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPGTQZ256rmk;  break;
739       case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPGTQZ256rr;   break;
740       case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPGTQZ256rrk;  break;
741       case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPGTQZrm;      break;
742       case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPGTQZrmb;     break;
743       case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPGTQZrmbk;    break;
744       case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPGTQZrmk;     break;
745       case X86::VPCMPQZrri:      NewOpc = X86::VPCMPGTQZrr;      break;
746       case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPGTQZrrk;     break;
747       case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPGTWZ128rm;   break;
748       case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPGTWZ128rmk;  break;
749       case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPGTWZ128rr;   break;
750       case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPGTWZ128rrk;  break;
751       case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPGTWZ256rm;   break;
752       case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPGTWZ256rmk;  break;
753       case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPGTWZ256rr;   break;
754       case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPGTWZ256rrk;  break;
755       case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPGTWZrm;      break;
756       case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPGTWZrmk;     break;
757       case X86::VPCMPWZrri:      NewOpc = X86::VPCMPGTWZrr;      break;
758       case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPGTWZrrk;     break;
759       }
760 
761       OutMI.setOpcode(NewOpc);
762       OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
763       break;
764     }
765 
766     break;
767   }
768 
769   // CALL64r, CALL64pcrel32 - These instructions used to have
770   // register inputs modeled as normal uses instead of implicit uses.  As such,
771   // they we used to truncate off all but the first operand (the callee). This
772   // issue seems to have been fixed at some point. This assert verifies that.
773   case X86::CALL64r:
774   case X86::CALL64pcrel32:
775     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
776     break;
777 
778   case X86::EH_RETURN:
779   case X86::EH_RETURN64: {
780     OutMI = MCInst();
781     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
782     break;
783   }
784 
785   case X86::CLEANUPRET: {
786     // Replace CLEANUPRET with the appropriate RET.
787     OutMI = MCInst();
788     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
789     break;
790   }
791 
792   case X86::CATCHRET: {
793     // Replace CATCHRET with the appropriate RET.
794     const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
795     unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
796     OutMI = MCInst();
797     OutMI.setOpcode(getRetOpcode(Subtarget));
798     OutMI.addOperand(MCOperand::createReg(ReturnReg));
799     break;
800   }
801 
802   // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
803   // instruction.
804   case X86::TAILJMPr:
805   case X86::TAILJMPr64:
806   case X86::TAILJMPr64_REX:
807   case X86::TAILJMPd:
808   case X86::TAILJMPd64:
809     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
810     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
811     break;
812 
813   case X86::TAILJMPd_CC:
814   case X86::TAILJMPd64_CC:
815     assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!");
816     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
817     break;
818 
819   case X86::TAILJMPm:
820   case X86::TAILJMPm64:
821   case X86::TAILJMPm64_REX:
822     assert(OutMI.getNumOperands() == X86::AddrNumOperands &&
823            "Unexpected number of operands!");
824     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
825     break;
826 
827   case X86::DEC16r:
828   case X86::DEC32r:
829   case X86::INC16r:
830   case X86::INC32r:
831     // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
832     if (!AsmPrinter.getSubtarget().is64Bit()) {
833       unsigned Opcode;
834       switch (OutMI.getOpcode()) {
835       default: llvm_unreachable("Invalid opcode");
836       case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
837       case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
838       case X86::INC16r: Opcode = X86::INC16r_alt; break;
839       case X86::INC32r: Opcode = X86::INC32r_alt; break;
840       }
841       OutMI.setOpcode(Opcode);
842     }
843     break;
844 
845   // We don't currently select the correct instruction form for instructions
846   // which have a short %eax, etc. form. Handle this by custom lowering, for
847   // now.
848   //
849   // Note, we are currently not handling the following instructions:
850   // MOV64ao8, MOV64o8a
851   // XCHG16ar, XCHG32ar, XCHG64ar
852   case X86::MOV8mr_NOREX:
853   case X86::MOV8mr:
854   case X86::MOV8rm_NOREX:
855   case X86::MOV8rm:
856   case X86::MOV16mr:
857   case X86::MOV16rm:
858   case X86::MOV32mr:
859   case X86::MOV32rm: {
860     unsigned NewOpc;
861     switch (OutMI.getOpcode()) {
862     default: llvm_unreachable("Invalid opcode");
863     case X86::MOV8mr_NOREX:
864     case X86::MOV8mr:  NewOpc = X86::MOV8o32a; break;
865     case X86::MOV8rm_NOREX:
866     case X86::MOV8rm:  NewOpc = X86::MOV8ao32; break;
867     case X86::MOV16mr: NewOpc = X86::MOV16o32a; break;
868     case X86::MOV16rm: NewOpc = X86::MOV16ao32; break;
869     case X86::MOV32mr: NewOpc = X86::MOV32o32a; break;
870     case X86::MOV32rm: NewOpc = X86::MOV32ao32; break;
871     }
872     SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
873     break;
874   }
875 
876   case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32:
877   case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32:
878   case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32:
879   case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32:
880   case X86::OR8ri:  case X86::OR16ri:  case X86::OR32ri:  case X86::OR64ri32:
881   case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32:
882   case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32:
883   case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32:
884   case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: {
885     unsigned NewOpc;
886     switch (OutMI.getOpcode()) {
887     default: llvm_unreachable("Invalid opcode");
888     case X86::ADC8ri:     NewOpc = X86::ADC8i8;    break;
889     case X86::ADC16ri:    NewOpc = X86::ADC16i16;  break;
890     case X86::ADC32ri:    NewOpc = X86::ADC32i32;  break;
891     case X86::ADC64ri32:  NewOpc = X86::ADC64i32;  break;
892     case X86::ADD8ri:     NewOpc = X86::ADD8i8;    break;
893     case X86::ADD16ri:    NewOpc = X86::ADD16i16;  break;
894     case X86::ADD32ri:    NewOpc = X86::ADD32i32;  break;
895     case X86::ADD64ri32:  NewOpc = X86::ADD64i32;  break;
896     case X86::AND8ri:     NewOpc = X86::AND8i8;    break;
897     case X86::AND16ri:    NewOpc = X86::AND16i16;  break;
898     case X86::AND32ri:    NewOpc = X86::AND32i32;  break;
899     case X86::AND64ri32:  NewOpc = X86::AND64i32;  break;
900     case X86::CMP8ri:     NewOpc = X86::CMP8i8;    break;
901     case X86::CMP16ri:    NewOpc = X86::CMP16i16;  break;
902     case X86::CMP32ri:    NewOpc = X86::CMP32i32;  break;
903     case X86::CMP64ri32:  NewOpc = X86::CMP64i32;  break;
904     case X86::OR8ri:      NewOpc = X86::OR8i8;     break;
905     case X86::OR16ri:     NewOpc = X86::OR16i16;   break;
906     case X86::OR32ri:     NewOpc = X86::OR32i32;   break;
907     case X86::OR64ri32:   NewOpc = X86::OR64i32;   break;
908     case X86::SBB8ri:     NewOpc = X86::SBB8i8;    break;
909     case X86::SBB16ri:    NewOpc = X86::SBB16i16;  break;
910     case X86::SBB32ri:    NewOpc = X86::SBB32i32;  break;
911     case X86::SBB64ri32:  NewOpc = X86::SBB64i32;  break;
912     case X86::SUB8ri:     NewOpc = X86::SUB8i8;    break;
913     case X86::SUB16ri:    NewOpc = X86::SUB16i16;  break;
914     case X86::SUB32ri:    NewOpc = X86::SUB32i32;  break;
915     case X86::SUB64ri32:  NewOpc = X86::SUB64i32;  break;
916     case X86::TEST8ri:    NewOpc = X86::TEST8i8;   break;
917     case X86::TEST16ri:   NewOpc = X86::TEST16i16; break;
918     case X86::TEST32ri:   NewOpc = X86::TEST32i32; break;
919     case X86::TEST64ri32: NewOpc = X86::TEST64i32; break;
920     case X86::XOR8ri:     NewOpc = X86::XOR8i8;    break;
921     case X86::XOR16ri:    NewOpc = X86::XOR16i16;  break;
922     case X86::XOR32ri:    NewOpc = X86::XOR32i32;  break;
923     case X86::XOR64ri32:  NewOpc = X86::XOR64i32;  break;
924     }
925     SimplifyShortImmForm(OutMI, NewOpc);
926     break;
927   }
928 
929   // Try to shrink some forms of movsx.
930   case X86::MOVSX16rr8:
931   case X86::MOVSX32rr16:
932   case X86::MOVSX64rr32:
933     SimplifyMOVSX(OutMI);
934     break;
935 
936   case X86::VCMPPDrri:
937   case X86::VCMPPDYrri:
938   case X86::VCMPPSrri:
939   case X86::VCMPPSYrri:
940   case X86::VCMPSDrr:
941   case X86::VCMPSSrr: {
942     // Swap the operands if it will enable a 2 byte VEX encoding.
943     // FIXME: Change the immediate to improve opportunities?
944     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
945         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
946       unsigned Imm = MI->getOperand(3).getImm() & 0x7;
947       switch (Imm) {
948       default: break;
949       case 0x00: // EQUAL
950       case 0x03: // UNORDERED
951       case 0x04: // NOT EQUAL
952       case 0x07: // ORDERED
953         std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
954         break;
955       }
956     }
957     break;
958   }
959 
960   case X86::VMOVHLPSrr:
961   case X86::VUNPCKHPDrr:
962     // These are not truly commutable so hide them from the default case.
963     break;
964 
965   default: {
966     // If the instruction is a commutable arithmetic instruction we might be
967     // able to commute the operands to get a 2 byte VEX prefix.
968     uint64_t TSFlags = MI->getDesc().TSFlags;
969     if (MI->getDesc().isCommutable() &&
970         (TSFlags & X86II::EncodingMask) == X86II::VEX &&
971         (TSFlags & X86II::OpMapMask) == X86II::TB &&
972         (TSFlags & X86II::FormMask) == X86II::MRMSrcReg &&
973         !(TSFlags & X86II::VEX_W) && (TSFlags & X86II::VEX_4V) &&
974         OutMI.getNumOperands() == 3) {
975       if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
976           X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg()))
977         std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
978     }
979     break;
980   }
981   }
982 }
983 
984 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
985                                  const MachineInstr &MI) {
986   NoAutoPaddingScope NoPadScope(*OutStreamer);
987   bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 &&
988                   MI.getOpcode() != X86::TLS_base_addr32;
989   bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
990                       MI.getOpcode() == X86::TLS_base_addr64;
991   MCContext &Ctx = OutStreamer->getContext();
992 
993   MCSymbolRefExpr::VariantKind SRVK;
994   switch (MI.getOpcode()) {
995   case X86::TLS_addr32:
996   case X86::TLS_addr64:
997   case X86::TLS_addrX32:
998     SRVK = MCSymbolRefExpr::VK_TLSGD;
999     break;
1000   case X86::TLS_base_addr32:
1001     SRVK = MCSymbolRefExpr::VK_TLSLDM;
1002     break;
1003   case X86::TLS_base_addr64:
1004   case X86::TLS_base_addrX32:
1005     SRVK = MCSymbolRefExpr::VK_TLSLD;
1006     break;
1007   default:
1008     llvm_unreachable("unexpected opcode");
1009   }
1010 
1011   const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
1012       MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx);
1013 
1014   // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
1015   // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
1016   // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
1017   // only using GOT when GOTPCRELX is enabled.
1018   // TODO Delete the workaround when GOTPCRELX becomes commonplace.
1019   bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
1020                 Ctx.getAsmInfo()->canRelaxRelocations();
1021 
1022   if (Is64Bits) {
1023     bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
1024     if (NeedsPadding && Is64BitsLP64)
1025       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1026     EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
1027                                 .addReg(X86::RDI)
1028                                 .addReg(X86::RIP)
1029                                 .addImm(1)
1030                                 .addReg(0)
1031                                 .addExpr(Sym)
1032                                 .addReg(0));
1033     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
1034     if (NeedsPadding) {
1035       if (!UseGot)
1036         EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1037       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1038       EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
1039     }
1040     if (UseGot) {
1041       const MCExpr *Expr = MCSymbolRefExpr::create(
1042           TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
1043       EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
1044                                   .addReg(X86::RIP)
1045                                   .addImm(1)
1046                                   .addReg(0)
1047                                   .addExpr(Expr)
1048                                   .addReg(0));
1049     } else {
1050       EmitAndCountInstruction(
1051           MCInstBuilder(X86::CALL64pcrel32)
1052               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1053                                                MCSymbolRefExpr::VK_PLT, Ctx)));
1054     }
1055   } else {
1056     if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) {
1057       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1058                                   .addReg(X86::EAX)
1059                                   .addReg(0)
1060                                   .addImm(1)
1061                                   .addReg(X86::EBX)
1062                                   .addExpr(Sym)
1063                                   .addReg(0));
1064     } else {
1065       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1066                                   .addReg(X86::EAX)
1067                                   .addReg(X86::EBX)
1068                                   .addImm(1)
1069                                   .addReg(0)
1070                                   .addExpr(Sym)
1071                                   .addReg(0));
1072     }
1073 
1074     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
1075     if (UseGot) {
1076       const MCExpr *Expr =
1077           MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx);
1078       EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
1079                                   .addReg(X86::EBX)
1080                                   .addImm(1)
1081                                   .addReg(0)
1082                                   .addExpr(Expr)
1083                                   .addReg(0));
1084     } else {
1085       EmitAndCountInstruction(
1086           MCInstBuilder(X86::CALLpcrel32)
1087               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1088                                                MCSymbolRefExpr::VK_PLT, Ctx)));
1089     }
1090   }
1091 }
1092 
1093 /// Emit the largest nop instruction smaller than or equal to \p NumBytes
1094 /// bytes.  Return the size of nop emitted.
1095 static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
1096                         const X86Subtarget *Subtarget) {
1097   // Determine the longest nop which can be efficiently decoded for the given
1098   // target cpu.  15-bytes is the longest single NOP instruction, but some
1099   // platforms can't decode the longest forms efficiently.
1100   unsigned MaxNopLength = 1;
1101   if (Subtarget->is64Bit()) {
1102     // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
1103     // IndexReg/BaseReg below need to be updated.
1104     if (Subtarget->hasFeature(X86::TuningFast7ByteNOP))
1105       MaxNopLength = 7;
1106     else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP))
1107       MaxNopLength = 15;
1108     else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP))
1109       MaxNopLength = 11;
1110     else
1111       MaxNopLength = 10;
1112   } if (Subtarget->is32Bit())
1113     MaxNopLength = 2;
1114 
1115   // Cap a single nop emission at the profitable value for the target
1116   NumBytes = std::min(NumBytes, MaxNopLength);
1117 
1118   unsigned NopSize;
1119   unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
1120   IndexReg = Displacement = SegmentReg = 0;
1121   BaseReg = X86::RAX;
1122   ScaleVal = 1;
1123   switch (NumBytes) {
1124   case 0:
1125     llvm_unreachable("Zero nops?");
1126     break;
1127   case 1:
1128     NopSize = 1;
1129     Opc = X86::NOOP;
1130     break;
1131   case 2:
1132     NopSize = 2;
1133     Opc = X86::XCHG16ar;
1134     break;
1135   case 3:
1136     NopSize = 3;
1137     Opc = X86::NOOPL;
1138     break;
1139   case 4:
1140     NopSize = 4;
1141     Opc = X86::NOOPL;
1142     Displacement = 8;
1143     break;
1144   case 5:
1145     NopSize = 5;
1146     Opc = X86::NOOPL;
1147     Displacement = 8;
1148     IndexReg = X86::RAX;
1149     break;
1150   case 6:
1151     NopSize = 6;
1152     Opc = X86::NOOPW;
1153     Displacement = 8;
1154     IndexReg = X86::RAX;
1155     break;
1156   case 7:
1157     NopSize = 7;
1158     Opc = X86::NOOPL;
1159     Displacement = 512;
1160     break;
1161   case 8:
1162     NopSize = 8;
1163     Opc = X86::NOOPL;
1164     Displacement = 512;
1165     IndexReg = X86::RAX;
1166     break;
1167   case 9:
1168     NopSize = 9;
1169     Opc = X86::NOOPW;
1170     Displacement = 512;
1171     IndexReg = X86::RAX;
1172     break;
1173   default:
1174     NopSize = 10;
1175     Opc = X86::NOOPW;
1176     Displacement = 512;
1177     IndexReg = X86::RAX;
1178     SegmentReg = X86::CS;
1179     break;
1180   }
1181 
1182   unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
1183   NopSize += NumPrefixes;
1184   for (unsigned i = 0; i != NumPrefixes; ++i)
1185     OS.emitBytes("\x66");
1186 
1187   switch (Opc) {
1188   default: llvm_unreachable("Unexpected opcode");
1189   case X86::NOOP:
1190     OS.emitInstruction(MCInstBuilder(Opc), *Subtarget);
1191     break;
1192   case X86::XCHG16ar:
1193     OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX),
1194                        *Subtarget);
1195     break;
1196   case X86::NOOPL:
1197   case X86::NOOPW:
1198     OS.emitInstruction(MCInstBuilder(Opc)
1199                            .addReg(BaseReg)
1200                            .addImm(ScaleVal)
1201                            .addReg(IndexReg)
1202                            .addImm(Displacement)
1203                            .addReg(SegmentReg),
1204                        *Subtarget);
1205     break;
1206   }
1207   assert(NopSize <= NumBytes && "We overemitted?");
1208   return NopSize;
1209 }
1210 
1211 /// Emit the optimal amount of multi-byte nops on X86.
1212 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
1213                         const X86Subtarget *Subtarget) {
1214   unsigned NopsToEmit = NumBytes;
1215   (void)NopsToEmit;
1216   while (NumBytes) {
1217     NumBytes -= emitNop(OS, NumBytes, Subtarget);
1218     assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
1219   }
1220 }
1221 
1222 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
1223                                     X86MCInstLower &MCIL) {
1224   assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
1225 
1226   NoAutoPaddingScope NoPadScope(*OutStreamer);
1227 
1228   StatepointOpers SOpers(&MI);
1229   if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
1230     emitX86Nops(*OutStreamer, PatchBytes, Subtarget);
1231   } else {
1232     // Lower call target and choose correct opcode
1233     const MachineOperand &CallTarget = SOpers.getCallTarget();
1234     MCOperand CallTargetMCOp;
1235     unsigned CallOpcode;
1236     switch (CallTarget.getType()) {
1237     case MachineOperand::MO_GlobalAddress:
1238     case MachineOperand::MO_ExternalSymbol:
1239       CallTargetMCOp = MCIL.LowerSymbolOperand(
1240           CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
1241       CallOpcode = X86::CALL64pcrel32;
1242       // Currently, we only support relative addressing with statepoints.
1243       // Otherwise, we'll need a scratch register to hold the target
1244       // address.  You'll fail asserts during load & relocation if this
1245       // symbol is to far away. (TODO: support non-relative addressing)
1246       break;
1247     case MachineOperand::MO_Immediate:
1248       CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
1249       CallOpcode = X86::CALL64pcrel32;
1250       // Currently, we only support relative addressing with statepoints.
1251       // Otherwise, we'll need a scratch register to hold the target
1252       // immediate.  You'll fail asserts during load & relocation if this
1253       // address is to far away. (TODO: support non-relative addressing)
1254       break;
1255     case MachineOperand::MO_Register:
1256       // FIXME: Add retpoline support and remove this.
1257       if (Subtarget->useIndirectThunkCalls())
1258         report_fatal_error("Lowering register statepoints with thunks not "
1259                            "yet implemented.");
1260       CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
1261       CallOpcode = X86::CALL64r;
1262       break;
1263     default:
1264       llvm_unreachable("Unsupported operand type in statepoint call target");
1265       break;
1266     }
1267 
1268     // Emit call
1269     MCInst CallInst;
1270     CallInst.setOpcode(CallOpcode);
1271     CallInst.addOperand(CallTargetMCOp);
1272     OutStreamer->emitInstruction(CallInst, getSubtargetInfo());
1273   }
1274 
1275   // Record our statepoint node in the same section used by STACKMAP
1276   // and PATCHPOINT
1277   auto &Ctx = OutStreamer->getContext();
1278   MCSymbol *MILabel = Ctx.createTempSymbol();
1279   OutStreamer->emitLabel(MILabel);
1280   SM.recordStatepoint(*MILabel, MI);
1281 }
1282 
1283 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
1284                                      X86MCInstLower &MCIL) {
1285   // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
1286   //                  <opcode>, <operands>
1287 
1288   NoAutoPaddingScope NoPadScope(*OutStreamer);
1289 
1290   Register DefRegister = FaultingMI.getOperand(0).getReg();
1291   FaultMaps::FaultKind FK =
1292       static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
1293   MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
1294   unsigned Opcode = FaultingMI.getOperand(3).getImm();
1295   unsigned OperandsBeginIdx = 4;
1296 
1297   auto &Ctx = OutStreamer->getContext();
1298   MCSymbol *FaultingLabel = Ctx.createTempSymbol();
1299   OutStreamer->emitLabel(FaultingLabel);
1300 
1301   assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
1302   FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
1303 
1304   MCInst MI;
1305   MI.setOpcode(Opcode);
1306 
1307   if (DefRegister != X86::NoRegister)
1308     MI.addOperand(MCOperand::createReg(DefRegister));
1309 
1310   for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
1311             E = FaultingMI.operands_end();
1312        I != E; ++I)
1313     if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
1314       MI.addOperand(MaybeOperand.getValue());
1315 
1316   OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
1317   OutStreamer->emitInstruction(MI, getSubtargetInfo());
1318 }
1319 
1320 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
1321                                      X86MCInstLower &MCIL) {
1322   bool Is64Bits = Subtarget->is64Bit();
1323   MCContext &Ctx = OutStreamer->getContext();
1324   MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
1325   const MCSymbolRefExpr *Op =
1326       MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
1327 
1328   EmitAndCountInstruction(
1329       MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
1330           .addExpr(Op));
1331 }
1332 
1333 void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
1334   // FIXME: Make this work on non-ELF.
1335   if (!TM.getTargetTriple().isOSBinFormatELF()) {
1336     report_fatal_error("llvm.asan.check.memaccess only supported on ELF");
1337     return;
1338   }
1339 
1340   const auto &Reg = MI.getOperand(0).getReg();
1341   ASanAccessInfo AccessInfo(MI.getOperand(1).getImm());
1342 
1343   uint64_t ShadowBase;
1344   int MappingScale;
1345   bool OrShadowOffset;
1346   getAddressSanitizerParams(Triple(TM.getTargetTriple()), 64,
1347                             AccessInfo.CompileKernel, &ShadowBase,
1348                             &MappingScale, &OrShadowOffset);
1349 
1350   StringRef Name = AccessInfo.IsWrite ? "store" : "load";
1351   StringRef Op = OrShadowOffset ? "or" : "add";
1352   std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" +
1353                          Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" +
1354                          TM.getMCRegisterInfo()->getName(Reg.asMCReg()))
1355                             .str();
1356   if (OrShadowOffset)
1357     report_fatal_error(
1358         "OrShadowOffset is not supported with optimized callbacks");
1359 
1360   EmitAndCountInstruction(
1361       MCInstBuilder(X86::CALL64pcrel32)
1362           .addExpr(MCSymbolRefExpr::create(
1363               OutContext.getOrCreateSymbol(SymName), OutContext)));
1364 }
1365 
1366 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
1367                                       X86MCInstLower &MCIL) {
1368   // PATCHABLE_OP minsize, opcode, operands
1369 
1370   NoAutoPaddingScope NoPadScope(*OutStreamer);
1371 
1372   unsigned MinSize = MI.getOperand(0).getImm();
1373   unsigned Opcode = MI.getOperand(1).getImm();
1374 
1375   MCInst MCI;
1376   MCI.setOpcode(Opcode);
1377   for (auto &MO : drop_begin(MI.operands(), 2))
1378     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1379       MCI.addOperand(MaybeOperand.getValue());
1380 
1381   SmallString<256> Code;
1382   SmallVector<MCFixup, 4> Fixups;
1383   raw_svector_ostream VecOS(Code);
1384   CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
1385 
1386   if (Code.size() < MinSize) {
1387     if (MinSize == 2 && Subtarget->is32Bit() &&
1388         Subtarget->isTargetWindowsMSVC() &&
1389         (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
1390       // For compatibilty reasons, when targetting MSVC, is is important to
1391       // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
1392       // rely specifically on this pattern to be able to patch a function.
1393       // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
1394       OutStreamer->emitInstruction(
1395           MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
1396           *Subtarget);
1397     } else if (MinSize == 2 && Opcode == X86::PUSH64r) {
1398       // This is an optimization that lets us get away without emitting a nop in
1399       // many cases.
1400       //
1401       // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
1402       // bytes too, so the check on MinSize is important.
1403       MCI.setOpcode(X86::PUSH64rmr);
1404     } else {
1405       unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget);
1406       assert(NopSize == MinSize && "Could not implement MinSize!");
1407       (void)NopSize;
1408     }
1409   }
1410 
1411   OutStreamer->emitInstruction(MCI, getSubtargetInfo());
1412 }
1413 
1414 // Lower a stackmap of the form:
1415 // <id>, <shadowBytes>, ...
1416 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1417   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1418 
1419   auto &Ctx = OutStreamer->getContext();
1420   MCSymbol *MILabel = Ctx.createTempSymbol();
1421   OutStreamer->emitLabel(MILabel);
1422 
1423   SM.recordStackMap(*MILabel, MI);
1424   unsigned NumShadowBytes = MI.getOperand(1).getImm();
1425   SMShadowTracker.reset(NumShadowBytes);
1426 }
1427 
1428 // Lower a patchpoint of the form:
1429 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1430 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1431                                     X86MCInstLower &MCIL) {
1432   assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1433 
1434   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1435 
1436   NoAutoPaddingScope NoPadScope(*OutStreamer);
1437 
1438   auto &Ctx = OutStreamer->getContext();
1439   MCSymbol *MILabel = Ctx.createTempSymbol();
1440   OutStreamer->emitLabel(MILabel);
1441   SM.recordPatchPoint(*MILabel, MI);
1442 
1443   PatchPointOpers opers(&MI);
1444   unsigned ScratchIdx = opers.getNextScratchIdx();
1445   unsigned EncodedBytes = 0;
1446   const MachineOperand &CalleeMO = opers.getCallTarget();
1447 
1448   // Check for null target. If target is non-null (i.e. is non-zero or is
1449   // symbolic) then emit a call.
1450   if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1451     MCOperand CalleeMCOp;
1452     switch (CalleeMO.getType()) {
1453     default:
1454       /// FIXME: Add a verifier check for bad callee types.
1455       llvm_unreachable("Unrecognized callee operand type.");
1456     case MachineOperand::MO_Immediate:
1457       if (CalleeMO.getImm())
1458         CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
1459       break;
1460     case MachineOperand::MO_ExternalSymbol:
1461     case MachineOperand::MO_GlobalAddress:
1462       CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
1463                                            MCIL.GetSymbolFromOperand(CalleeMO));
1464       break;
1465     }
1466 
1467     // Emit MOV to materialize the target address and the CALL to target.
1468     // This is encoded with 12-13 bytes, depending on which register is used.
1469     Register ScratchReg = MI.getOperand(ScratchIdx).getReg();
1470     if (X86II::isX86_64ExtendedReg(ScratchReg))
1471       EncodedBytes = 13;
1472     else
1473       EncodedBytes = 12;
1474 
1475     EmitAndCountInstruction(
1476         MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1477     // FIXME: Add retpoline support and remove this.
1478     if (Subtarget->useIndirectThunkCalls())
1479       report_fatal_error(
1480           "Lowering patchpoint with thunks not yet implemented.");
1481     EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1482   }
1483 
1484   // Emit padding.
1485   unsigned NumBytes = opers.getNumPatchBytes();
1486   assert(NumBytes >= EncodedBytes &&
1487          "Patchpoint can't request size less than the length of a call.");
1488 
1489   emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget);
1490 }
1491 
1492 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1493                                               X86MCInstLower &MCIL) {
1494   assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1495 
1496   NoAutoPaddingScope NoPadScope(*OutStreamer);
1497 
1498   // We want to emit the following pattern, which follows the x86 calling
1499   // convention to prepare for the trampoline call to be patched in.
1500   //
1501   //   .p2align 1, ...
1502   // .Lxray_event_sled_N:
1503   //   jmp +N                        // jump across the instrumentation sled
1504   //   ...                           // set up arguments in register
1505   //   callq __xray_CustomEvent@plt  // force dependency to symbol
1506   //   ...
1507   //   <jump here>
1508   //
1509   // After patching, it would look something like:
1510   //
1511   //   nopw (2-byte nop)
1512   //   ...
1513   //   callq __xrayCustomEvent  // already lowered
1514   //   ...
1515   //
1516   // ---
1517   // First we emit the label and the jump.
1518   auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
1519   OutStreamer->AddComment("# XRay Custom Event Log");
1520   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1521   OutStreamer->emitLabel(CurSled);
1522 
1523   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1524   // an operand (computed as an offset from the jmp instruction).
1525   // FIXME: Find another less hacky way do force the relative jump.
1526   OutStreamer->emitBinaryData("\xeb\x0f");
1527 
1528   // The default C calling convention will place two arguments into %rcx and
1529   // %rdx -- so we only work with those.
1530   const Register DestRegs[] = {X86::RDI, X86::RSI};
1531   bool UsedMask[] = {false, false};
1532   // Filled out in loop.
1533   Register SrcRegs[] = {0, 0};
1534 
1535   // Then we put the operands in the %rdi and %rsi registers. We spill the
1536   // values in the register before we clobber them, and mark them as used in
1537   // UsedMask. In case the arguments are already in the correct register, we use
1538   // emit nops appropriately sized to keep the sled the same size in every
1539   // situation.
1540   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1541     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1542       assert(Op->isReg() && "Only support arguments in registers");
1543       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1544       if (SrcRegs[I] != DestRegs[I]) {
1545         UsedMask[I] = true;
1546         EmitAndCountInstruction(
1547             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1548       } else {
1549         emitX86Nops(*OutStreamer, 4, Subtarget);
1550       }
1551     }
1552 
1553   // Now that the register values are stashed, mov arguments into place.
1554   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1555   // earlier DestReg. We will have already overwritten over the register before
1556   // we can copy from it.
1557   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1558     if (SrcRegs[I] != DestRegs[I])
1559       EmitAndCountInstruction(
1560           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1561 
1562   // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1563   // name of the trampoline to be implemented by the XRay runtime.
1564   auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
1565   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1566   if (isPositionIndependent())
1567     TOp.setTargetFlags(X86II::MO_PLT);
1568 
1569   // Emit the call instruction.
1570   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1571                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1572 
1573   // Restore caller-saved and used registers.
1574   for (unsigned I = sizeof UsedMask; I-- > 0;)
1575     if (UsedMask[I])
1576       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1577     else
1578       emitX86Nops(*OutStreamer, 1, Subtarget);
1579 
1580   OutStreamer->AddComment("xray custom event end.");
1581 
1582   // Record the sled version. Version 0 of this sled was spelled differently, so
1583   // we let the runtime handle the different offsets we're using. Version 2
1584   // changed the absolute address to a PC-relative address.
1585   recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);
1586 }
1587 
1588 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1589                                                     X86MCInstLower &MCIL) {
1590   assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1591 
1592   NoAutoPaddingScope NoPadScope(*OutStreamer);
1593 
1594   // We want to emit the following pattern, which follows the x86 calling
1595   // convention to prepare for the trampoline call to be patched in.
1596   //
1597   //   .p2align 1, ...
1598   // .Lxray_event_sled_N:
1599   //   jmp +N                        // jump across the instrumentation sled
1600   //   ...                           // set up arguments in register
1601   //   callq __xray_TypedEvent@plt  // force dependency to symbol
1602   //   ...
1603   //   <jump here>
1604   //
1605   // After patching, it would look something like:
1606   //
1607   //   nopw (2-byte nop)
1608   //   ...
1609   //   callq __xrayTypedEvent  // already lowered
1610   //   ...
1611   //
1612   // ---
1613   // First we emit the label and the jump.
1614   auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
1615   OutStreamer->AddComment("# XRay Typed Event Log");
1616   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1617   OutStreamer->emitLabel(CurSled);
1618 
1619   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1620   // an operand (computed as an offset from the jmp instruction).
1621   // FIXME: Find another less hacky way do force the relative jump.
1622   OutStreamer->emitBinaryData("\xeb\x14");
1623 
1624   // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1625   // so we'll work with those. Or we may be called via SystemV, in which case
1626   // we don't have to do any translation.
1627   const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1628   bool UsedMask[] = {false, false, false};
1629 
1630   // Will fill out src regs in the loop.
1631   Register SrcRegs[] = {0, 0, 0};
1632 
1633   // Then we put the operands in the SystemV registers. We spill the values in
1634   // the registers before we clobber them, and mark them as used in UsedMask.
1635   // In case the arguments are already in the correct register, we emit nops
1636   // appropriately sized to keep the sled the same size in every situation.
1637   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1638     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1639       // TODO: Is register only support adequate?
1640       assert(Op->isReg() && "Only supports arguments in registers");
1641       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1642       if (SrcRegs[I] != DestRegs[I]) {
1643         UsedMask[I] = true;
1644         EmitAndCountInstruction(
1645             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1646       } else {
1647         emitX86Nops(*OutStreamer, 4, Subtarget);
1648       }
1649     }
1650 
1651   // In the above loop we only stash all of the destination registers or emit
1652   // nops if the arguments are already in the right place. Doing the actually
1653   // moving is postponed until after all the registers are stashed so nothing
1654   // is clobbers. We've already added nops to account for the size of mov and
1655   // push if the register is in the right place, so we only have to worry about
1656   // emitting movs.
1657   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1658   // earlier DestReg. We will have already overwritten over the register before
1659   // we can copy from it.
1660   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1661     if (UsedMask[I])
1662       EmitAndCountInstruction(
1663           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1664 
1665   // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1666   // name of the trampoline to be implemented by the XRay runtime.
1667   auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
1668   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1669   if (isPositionIndependent())
1670     TOp.setTargetFlags(X86II::MO_PLT);
1671 
1672   // Emit the call instruction.
1673   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1674                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1675 
1676   // Restore caller-saved and used registers.
1677   for (unsigned I = sizeof UsedMask; I-- > 0;)
1678     if (UsedMask[I])
1679       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1680     else
1681       emitX86Nops(*OutStreamer, 1, Subtarget);
1682 
1683   OutStreamer->AddComment("xray typed event end.");
1684 
1685   // Record the sled version.
1686   recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);
1687 }
1688 
1689 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1690                                                   X86MCInstLower &MCIL) {
1691 
1692   NoAutoPaddingScope NoPadScope(*OutStreamer);
1693 
1694   const Function &F = MF->getFunction();
1695   if (F.hasFnAttribute("patchable-function-entry")) {
1696     unsigned Num;
1697     if (F.getFnAttribute("patchable-function-entry")
1698             .getValueAsString()
1699             .getAsInteger(10, Num))
1700       return;
1701     emitX86Nops(*OutStreamer, Num, Subtarget);
1702     return;
1703   }
1704   // We want to emit the following pattern:
1705   //
1706   //   .p2align 1, ...
1707   // .Lxray_sled_N:
1708   //   jmp .tmpN
1709   //   # 9 bytes worth of noops
1710   //
1711   // We need the 9 bytes because at runtime, we'd be patching over the full 11
1712   // bytes with the following pattern:
1713   //
1714   //   mov %r10, <function id, 32-bit>   // 6 bytes
1715   //   call <relative offset, 32-bits>   // 5 bytes
1716   //
1717   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1718   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1719   OutStreamer->emitLabel(CurSled);
1720 
1721   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1722   // an operand (computed as an offset from the jmp instruction).
1723   // FIXME: Find another less hacky way do force the relative jump.
1724   OutStreamer->emitBytes("\xeb\x09");
1725   emitX86Nops(*OutStreamer, 9, Subtarget);
1726   recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2);
1727 }
1728 
1729 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1730                                        X86MCInstLower &MCIL) {
1731   NoAutoPaddingScope NoPadScope(*OutStreamer);
1732 
1733   // Since PATCHABLE_RET takes the opcode of the return statement as an
1734   // argument, we use that to emit the correct form of the RET that we want.
1735   // i.e. when we see this:
1736   //
1737   //   PATCHABLE_RET X86::RET ...
1738   //
1739   // We should emit the RET followed by sleds.
1740   //
1741   //   .p2align 1, ...
1742   // .Lxray_sled_N:
1743   //   ret  # or equivalent instruction
1744   //   # 10 bytes worth of noops
1745   //
1746   // This just makes sure that the alignment for the next instruction is 2.
1747   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1748   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1749   OutStreamer->emitLabel(CurSled);
1750   unsigned OpCode = MI.getOperand(0).getImm();
1751   MCInst Ret;
1752   Ret.setOpcode(OpCode);
1753   for (auto &MO : drop_begin(MI.operands()))
1754     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1755       Ret.addOperand(MaybeOperand.getValue());
1756   OutStreamer->emitInstruction(Ret, getSubtargetInfo());
1757   emitX86Nops(*OutStreamer, 10, Subtarget);
1758   recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2);
1759 }
1760 
1761 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1762                                              X86MCInstLower &MCIL) {
1763   NoAutoPaddingScope NoPadScope(*OutStreamer);
1764 
1765   // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1766   // instruction so we lower that particular instruction and its operands.
1767   // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1768   // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1769   // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1770   // tail call much like how we have it in PATCHABLE_RET.
1771   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1772   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1773   OutStreamer->emitLabel(CurSled);
1774   auto Target = OutContext.createTempSymbol();
1775 
1776   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1777   // an operand (computed as an offset from the jmp instruction).
1778   // FIXME: Find another less hacky way do force the relative jump.
1779   OutStreamer->emitBytes("\xeb\x09");
1780   emitX86Nops(*OutStreamer, 9, Subtarget);
1781   OutStreamer->emitLabel(Target);
1782   recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);
1783 
1784   unsigned OpCode = MI.getOperand(0).getImm();
1785   OpCode = convertTailJumpOpcode(OpCode);
1786   MCInst TC;
1787   TC.setOpcode(OpCode);
1788 
1789   // Before emitting the instruction, add a comment to indicate that this is
1790   // indeed a tail call.
1791   OutStreamer->AddComment("TAILCALL");
1792   for (auto &MO : drop_begin(MI.operands()))
1793     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1794       TC.addOperand(MaybeOperand.getValue());
1795   OutStreamer->emitInstruction(TC, getSubtargetInfo());
1796 }
1797 
1798 // Returns instruction preceding MBBI in MachineFunction.
1799 // If MBBI is the first instruction of the first basic block, returns null.
1800 static MachineBasicBlock::const_iterator
1801 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
1802   const MachineBasicBlock *MBB = MBBI->getParent();
1803   while (MBBI == MBB->begin()) {
1804     if (MBB == &MBB->getParent()->front())
1805       return MachineBasicBlock::const_iterator();
1806     MBB = MBB->getPrevNode();
1807     MBBI = MBB->end();
1808   }
1809   --MBBI;
1810   return MBBI;
1811 }
1812 
1813 static const Constant *getConstantFromPool(const MachineInstr &MI,
1814                                            const MachineOperand &Op) {
1815   if (!Op.isCPI() || Op.getOffset() != 0)
1816     return nullptr;
1817 
1818   ArrayRef<MachineConstantPoolEntry> Constants =
1819       MI.getParent()->getParent()->getConstantPool()->getConstants();
1820   const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
1821 
1822   // Bail if this is a machine constant pool entry, we won't be able to dig out
1823   // anything useful.
1824   if (ConstantEntry.isMachineConstantPoolEntry())
1825     return nullptr;
1826 
1827   return ConstantEntry.Val.ConstVal;
1828 }
1829 
1830 static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
1831                                      unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1832   std::string Comment;
1833 
1834   // Compute the name for a register. This is really goofy because we have
1835   // multiple instruction printers that could (in theory) use different
1836   // names. Fortunately most people use the ATT style (outside of Windows)
1837   // and they actually agree on register naming here. Ultimately, this is
1838   // a comment, and so its OK if it isn't perfect.
1839   auto GetRegisterName = [](unsigned RegNum) -> StringRef {
1840     return X86ATTInstPrinter::getRegisterName(RegNum);
1841   };
1842 
1843   const MachineOperand &DstOp = MI->getOperand(0);
1844   const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
1845   const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
1846 
1847   StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
1848   StringRef Src1Name =
1849       SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
1850   StringRef Src2Name =
1851       SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
1852 
1853   // One source operand, fix the mask to print all elements in one span.
1854   SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end());
1855   if (Src1Name == Src2Name)
1856     for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
1857       if (ShuffleMask[i] >= e)
1858         ShuffleMask[i] -= e;
1859 
1860   raw_string_ostream CS(Comment);
1861   CS << DstName;
1862 
1863   // Handle AVX512 MASK/MASXZ write mask comments.
1864   // MASK: zmmX {%kY}
1865   // MASKZ: zmmX {%kY} {z}
1866   if (SrcOp1Idx > 1) {
1867     assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
1868 
1869     const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
1870     if (WriteMaskOp.isReg()) {
1871       CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
1872 
1873       if (SrcOp1Idx == 2) {
1874         CS << " {z}";
1875       }
1876     }
1877   }
1878 
1879   CS << " = ";
1880 
1881   for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
1882     if (i != 0)
1883       CS << ",";
1884     if (ShuffleMask[i] == SM_SentinelZero) {
1885       CS << "zero";
1886       continue;
1887     }
1888 
1889     // Otherwise, it must come from src1 or src2.  Print the span of elements
1890     // that comes from this src.
1891     bool isSrc1 = ShuffleMask[i] < (int)e;
1892     CS << (isSrc1 ? Src1Name : Src2Name) << '[';
1893 
1894     bool IsFirst = true;
1895     while (i != e && ShuffleMask[i] != SM_SentinelZero &&
1896            (ShuffleMask[i] < (int)e) == isSrc1) {
1897       if (!IsFirst)
1898         CS << ',';
1899       else
1900         IsFirst = false;
1901       if (ShuffleMask[i] == SM_SentinelUndef)
1902         CS << "u";
1903       else
1904         CS << ShuffleMask[i] % (int)e;
1905       ++i;
1906     }
1907     CS << ']';
1908     --i; // For loop increments element #.
1909   }
1910   CS.flush();
1911 
1912   return Comment;
1913 }
1914 
1915 static void printConstant(const APInt &Val, raw_ostream &CS) {
1916   if (Val.getBitWidth() <= 64) {
1917     CS << Val.getZExtValue();
1918   } else {
1919     // print multi-word constant as (w0,w1)
1920     CS << "(";
1921     for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
1922       if (i > 0)
1923         CS << ",";
1924       CS << Val.getRawData()[i];
1925     }
1926     CS << ")";
1927   }
1928 }
1929 
1930 static void printConstant(const APFloat &Flt, raw_ostream &CS) {
1931   SmallString<32> Str;
1932   // Force scientific notation to distinquish from integers.
1933   Flt.toString(Str, 0, 0);
1934   CS << Str;
1935 }
1936 
1937 static void printConstant(const Constant *COp, raw_ostream &CS) {
1938   if (isa<UndefValue>(COp)) {
1939     CS << "u";
1940   } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
1941     printConstant(CI->getValue(), CS);
1942   } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
1943     printConstant(CF->getValueAPF(), CS);
1944   } else {
1945     CS << "?";
1946   }
1947 }
1948 
1949 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
1950   assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1951   assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only");
1952 
1953   // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1954   if (EmitFPOData) {
1955     X86TargetStreamer *XTS =
1956         static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
1957     switch (MI->getOpcode()) {
1958     case X86::SEH_PushReg:
1959       XTS->emitFPOPushReg(MI->getOperand(0).getImm());
1960       break;
1961     case X86::SEH_StackAlloc:
1962       XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
1963       break;
1964     case X86::SEH_StackAlign:
1965       XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
1966       break;
1967     case X86::SEH_SetFrame:
1968       assert(MI->getOperand(1).getImm() == 0 &&
1969              ".cv_fpo_setframe takes no offset");
1970       XTS->emitFPOSetFrame(MI->getOperand(0).getImm());
1971       break;
1972     case X86::SEH_EndPrologue:
1973       XTS->emitFPOEndPrologue();
1974       break;
1975     case X86::SEH_SaveReg:
1976     case X86::SEH_SaveXMM:
1977     case X86::SEH_PushFrame:
1978       llvm_unreachable("SEH_ directive incompatible with FPO");
1979       break;
1980     default:
1981       llvm_unreachable("expected SEH_ instruction");
1982     }
1983     return;
1984   }
1985 
1986   // Otherwise, use the .seh_ directives for all other Windows platforms.
1987   switch (MI->getOpcode()) {
1988   case X86::SEH_PushReg:
1989     OutStreamer->EmitWinCFIPushReg(MI->getOperand(0).getImm());
1990     break;
1991 
1992   case X86::SEH_SaveReg:
1993     OutStreamer->EmitWinCFISaveReg(MI->getOperand(0).getImm(),
1994                                    MI->getOperand(1).getImm());
1995     break;
1996 
1997   case X86::SEH_SaveXMM:
1998     OutStreamer->EmitWinCFISaveXMM(MI->getOperand(0).getImm(),
1999                                    MI->getOperand(1).getImm());
2000     break;
2001 
2002   case X86::SEH_StackAlloc:
2003     OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm());
2004     break;
2005 
2006   case X86::SEH_SetFrame:
2007     OutStreamer->EmitWinCFISetFrame(MI->getOperand(0).getImm(),
2008                                     MI->getOperand(1).getImm());
2009     break;
2010 
2011   case X86::SEH_PushFrame:
2012     OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm());
2013     break;
2014 
2015   case X86::SEH_EndPrologue:
2016     OutStreamer->EmitWinCFIEndProlog();
2017     break;
2018 
2019   default:
2020     llvm_unreachable("expected SEH_ instruction");
2021   }
2022 }
2023 
2024 static unsigned getRegisterWidth(const MCOperandInfo &Info) {
2025   if (Info.RegClass == X86::VR128RegClassID ||
2026       Info.RegClass == X86::VR128XRegClassID)
2027     return 128;
2028   if (Info.RegClass == X86::VR256RegClassID ||
2029       Info.RegClass == X86::VR256XRegClassID)
2030     return 256;
2031   if (Info.RegClass == X86::VR512RegClassID)
2032     return 512;
2033   llvm_unreachable("Unknown register class!");
2034 }
2035 
2036 static void addConstantComments(const MachineInstr *MI,
2037                                 MCStreamer &OutStreamer) {
2038   switch (MI->getOpcode()) {
2039   // Lower PSHUFB and VPERMILP normally but add a comment if we can find
2040   // a constant shuffle mask. We won't be able to do this at the MC layer
2041   // because the mask isn't an immediate.
2042   case X86::PSHUFBrm:
2043   case X86::VPSHUFBrm:
2044   case X86::VPSHUFBYrm:
2045   case X86::VPSHUFBZ128rm:
2046   case X86::VPSHUFBZ128rmk:
2047   case X86::VPSHUFBZ128rmkz:
2048   case X86::VPSHUFBZ256rm:
2049   case X86::VPSHUFBZ256rmk:
2050   case X86::VPSHUFBZ256rmkz:
2051   case X86::VPSHUFBZrm:
2052   case X86::VPSHUFBZrmk:
2053   case X86::VPSHUFBZrmkz: {
2054     unsigned SrcIdx = 1;
2055     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2056       // Skip mask operand.
2057       ++SrcIdx;
2058       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2059         // Skip passthru operand.
2060         ++SrcIdx;
2061       }
2062     }
2063     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2064 
2065     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2066            "Unexpected number of operands!");
2067 
2068     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2069     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2070       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2071       SmallVector<int, 64> Mask;
2072       DecodePSHUFBMask(C, Width, Mask);
2073       if (!Mask.empty())
2074         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2075     }
2076     break;
2077   }
2078 
2079   case X86::VPERMILPSrm:
2080   case X86::VPERMILPSYrm:
2081   case X86::VPERMILPSZ128rm:
2082   case X86::VPERMILPSZ128rmk:
2083   case X86::VPERMILPSZ128rmkz:
2084   case X86::VPERMILPSZ256rm:
2085   case X86::VPERMILPSZ256rmk:
2086   case X86::VPERMILPSZ256rmkz:
2087   case X86::VPERMILPSZrm:
2088   case X86::VPERMILPSZrmk:
2089   case X86::VPERMILPSZrmkz:
2090   case X86::VPERMILPDrm:
2091   case X86::VPERMILPDYrm:
2092   case X86::VPERMILPDZ128rm:
2093   case X86::VPERMILPDZ128rmk:
2094   case X86::VPERMILPDZ128rmkz:
2095   case X86::VPERMILPDZ256rm:
2096   case X86::VPERMILPDZ256rmk:
2097   case X86::VPERMILPDZ256rmkz:
2098   case X86::VPERMILPDZrm:
2099   case X86::VPERMILPDZrmk:
2100   case X86::VPERMILPDZrmkz: {
2101     unsigned ElSize;
2102     switch (MI->getOpcode()) {
2103     default: llvm_unreachable("Invalid opcode");
2104     case X86::VPERMILPSrm:
2105     case X86::VPERMILPSYrm:
2106     case X86::VPERMILPSZ128rm:
2107     case X86::VPERMILPSZ256rm:
2108     case X86::VPERMILPSZrm:
2109     case X86::VPERMILPSZ128rmkz:
2110     case X86::VPERMILPSZ256rmkz:
2111     case X86::VPERMILPSZrmkz:
2112     case X86::VPERMILPSZ128rmk:
2113     case X86::VPERMILPSZ256rmk:
2114     case X86::VPERMILPSZrmk:
2115       ElSize = 32;
2116       break;
2117     case X86::VPERMILPDrm:
2118     case X86::VPERMILPDYrm:
2119     case X86::VPERMILPDZ128rm:
2120     case X86::VPERMILPDZ256rm:
2121     case X86::VPERMILPDZrm:
2122     case X86::VPERMILPDZ128rmkz:
2123     case X86::VPERMILPDZ256rmkz:
2124     case X86::VPERMILPDZrmkz:
2125     case X86::VPERMILPDZ128rmk:
2126     case X86::VPERMILPDZ256rmk:
2127     case X86::VPERMILPDZrmk:
2128       ElSize = 64;
2129       break;
2130     }
2131 
2132     unsigned SrcIdx = 1;
2133     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2134       // Skip mask operand.
2135       ++SrcIdx;
2136       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2137         // Skip passthru operand.
2138         ++SrcIdx;
2139       }
2140     }
2141     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2142 
2143     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2144            "Unexpected number of operands!");
2145 
2146     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2147     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2148       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2149       SmallVector<int, 16> Mask;
2150       DecodeVPERMILPMask(C, ElSize, Width, Mask);
2151       if (!Mask.empty())
2152         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2153     }
2154     break;
2155   }
2156 
2157   case X86::VPERMIL2PDrm:
2158   case X86::VPERMIL2PSrm:
2159   case X86::VPERMIL2PDYrm:
2160   case X86::VPERMIL2PSYrm: {
2161     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) &&
2162            "Unexpected number of operands!");
2163 
2164     const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
2165     if (!CtrlOp.isImm())
2166       break;
2167 
2168     unsigned ElSize;
2169     switch (MI->getOpcode()) {
2170     default: llvm_unreachable("Invalid opcode");
2171     case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
2172     case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
2173     }
2174 
2175     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2176     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2177       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2178       SmallVector<int, 16> Mask;
2179       DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
2180       if (!Mask.empty())
2181         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2182     }
2183     break;
2184   }
2185 
2186   case X86::VPPERMrrm: {
2187     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands) &&
2188            "Unexpected number of operands!");
2189 
2190     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2191     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2192       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2193       SmallVector<int, 16> Mask;
2194       DecodeVPPERMMask(C, Width, Mask);
2195       if (!Mask.empty())
2196         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2197     }
2198     break;
2199   }
2200 
2201   case X86::MMX_MOVQ64rm: {
2202     assert(MI->getNumOperands() == (1 + X86::AddrNumOperands) &&
2203            "Unexpected number of operands!");
2204     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2205       std::string Comment;
2206       raw_string_ostream CS(Comment);
2207       const MachineOperand &DstOp = MI->getOperand(0);
2208       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2209       if (auto *CF = dyn_cast<ConstantFP>(C)) {
2210         CS << "0x" << toString(CF->getValueAPF().bitcastToAPInt(), 16, false);
2211         OutStreamer.AddComment(CS.str());
2212       }
2213     }
2214     break;
2215   }
2216 
2217 #define MOV_CASE(Prefix, Suffix)                                               \
2218   case X86::Prefix##MOVAPD##Suffix##rm:                                        \
2219   case X86::Prefix##MOVAPS##Suffix##rm:                                        \
2220   case X86::Prefix##MOVUPD##Suffix##rm:                                        \
2221   case X86::Prefix##MOVUPS##Suffix##rm:                                        \
2222   case X86::Prefix##MOVDQA##Suffix##rm:                                        \
2223   case X86::Prefix##MOVDQU##Suffix##rm:
2224 
2225 #define MOV_AVX512_CASE(Suffix)                                                \
2226   case X86::VMOVDQA64##Suffix##rm:                                             \
2227   case X86::VMOVDQA32##Suffix##rm:                                             \
2228   case X86::VMOVDQU64##Suffix##rm:                                             \
2229   case X86::VMOVDQU32##Suffix##rm:                                             \
2230   case X86::VMOVDQU16##Suffix##rm:                                             \
2231   case X86::VMOVDQU8##Suffix##rm:                                              \
2232   case X86::VMOVAPS##Suffix##rm:                                               \
2233   case X86::VMOVAPD##Suffix##rm:                                               \
2234   case X86::VMOVUPS##Suffix##rm:                                               \
2235   case X86::VMOVUPD##Suffix##rm:
2236 
2237 #define CASE_ALL_MOV_RM()                                                      \
2238   MOV_CASE(, )   /* SSE */                                                     \
2239   MOV_CASE(V, )  /* AVX-128 */                                                 \
2240   MOV_CASE(V, Y) /* AVX-256 */                                                 \
2241   MOV_AVX512_CASE(Z)                                                           \
2242   MOV_AVX512_CASE(Z256)                                                        \
2243   MOV_AVX512_CASE(Z128)
2244 
2245     // For loads from a constant pool to a vector register, print the constant
2246     // loaded.
2247     CASE_ALL_MOV_RM()
2248   case X86::VBROADCASTF128:
2249   case X86::VBROADCASTI128:
2250   case X86::VBROADCASTF32X4Z256rm:
2251   case X86::VBROADCASTF32X4rm:
2252   case X86::VBROADCASTF32X8rm:
2253   case X86::VBROADCASTF64X2Z128rm:
2254   case X86::VBROADCASTF64X2rm:
2255   case X86::VBROADCASTF64X4rm:
2256   case X86::VBROADCASTI32X4Z256rm:
2257   case X86::VBROADCASTI32X4rm:
2258   case X86::VBROADCASTI32X8rm:
2259   case X86::VBROADCASTI64X2Z128rm:
2260   case X86::VBROADCASTI64X2rm:
2261   case X86::VBROADCASTI64X4rm:
2262     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2263            "Unexpected number of operands!");
2264     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2265       int NumLanes = 1;
2266       // Override NumLanes for the broadcast instructions.
2267       switch (MI->getOpcode()) {
2268       case X86::VBROADCASTF128:        NumLanes = 2; break;
2269       case X86::VBROADCASTI128:        NumLanes = 2; break;
2270       case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
2271       case X86::VBROADCASTF32X4rm:     NumLanes = 4; break;
2272       case X86::VBROADCASTF32X8rm:     NumLanes = 2; break;
2273       case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
2274       case X86::VBROADCASTF64X2rm:     NumLanes = 4; break;
2275       case X86::VBROADCASTF64X4rm:     NumLanes = 2; break;
2276       case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
2277       case X86::VBROADCASTI32X4rm:     NumLanes = 4; break;
2278       case X86::VBROADCASTI32X8rm:     NumLanes = 2; break;
2279       case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
2280       case X86::VBROADCASTI64X2rm:     NumLanes = 4; break;
2281       case X86::VBROADCASTI64X4rm:     NumLanes = 2; break;
2282       }
2283 
2284       std::string Comment;
2285       raw_string_ostream CS(Comment);
2286       const MachineOperand &DstOp = MI->getOperand(0);
2287       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2288       if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
2289         CS << "[";
2290         for (int l = 0; l != NumLanes; ++l) {
2291           for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements;
2292                ++i) {
2293             if (i != 0 || l != 0)
2294               CS << ",";
2295             if (CDS->getElementType()->isIntegerTy())
2296               printConstant(CDS->getElementAsAPInt(i), CS);
2297             else if (CDS->getElementType()->isHalfTy() ||
2298                      CDS->getElementType()->isFloatTy() ||
2299                      CDS->getElementType()->isDoubleTy())
2300               printConstant(CDS->getElementAsAPFloat(i), CS);
2301             else
2302               CS << "?";
2303           }
2304         }
2305         CS << "]";
2306         OutStreamer.AddComment(CS.str());
2307       } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
2308         CS << "<";
2309         for (int l = 0; l != NumLanes; ++l) {
2310           for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands;
2311                ++i) {
2312             if (i != 0 || l != 0)
2313               CS << ",";
2314             printConstant(CV->getOperand(i), CS);
2315           }
2316         }
2317         CS << ">";
2318         OutStreamer.AddComment(CS.str());
2319       }
2320     }
2321     break;
2322 
2323   case X86::MOVDDUPrm:
2324   case X86::VMOVDDUPrm:
2325   case X86::VMOVDDUPZ128rm:
2326   case X86::VBROADCASTSSrm:
2327   case X86::VBROADCASTSSYrm:
2328   case X86::VBROADCASTSSZ128rm:
2329   case X86::VBROADCASTSSZ256rm:
2330   case X86::VBROADCASTSSZrm:
2331   case X86::VBROADCASTSDYrm:
2332   case X86::VBROADCASTSDZ256rm:
2333   case X86::VBROADCASTSDZrm:
2334   case X86::VPBROADCASTBrm:
2335   case X86::VPBROADCASTBYrm:
2336   case X86::VPBROADCASTBZ128rm:
2337   case X86::VPBROADCASTBZ256rm:
2338   case X86::VPBROADCASTBZrm:
2339   case X86::VPBROADCASTDrm:
2340   case X86::VPBROADCASTDYrm:
2341   case X86::VPBROADCASTDZ128rm:
2342   case X86::VPBROADCASTDZ256rm:
2343   case X86::VPBROADCASTDZrm:
2344   case X86::VPBROADCASTQrm:
2345   case X86::VPBROADCASTQYrm:
2346   case X86::VPBROADCASTQZ128rm:
2347   case X86::VPBROADCASTQZ256rm:
2348   case X86::VPBROADCASTQZrm:
2349   case X86::VPBROADCASTWrm:
2350   case X86::VPBROADCASTWYrm:
2351   case X86::VPBROADCASTWZ128rm:
2352   case X86::VPBROADCASTWZ256rm:
2353   case X86::VPBROADCASTWZrm:
2354     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2355            "Unexpected number of operands!");
2356     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2357       int NumElts;
2358       switch (MI->getOpcode()) {
2359       default: llvm_unreachable("Invalid opcode");
2360       case X86::MOVDDUPrm:          NumElts = 2;  break;
2361       case X86::VMOVDDUPrm:         NumElts = 2;  break;
2362       case X86::VMOVDDUPZ128rm:     NumElts = 2;  break;
2363       case X86::VBROADCASTSSrm:     NumElts = 4;  break;
2364       case X86::VBROADCASTSSYrm:    NumElts = 8;  break;
2365       case X86::VBROADCASTSSZ128rm: NumElts = 4;  break;
2366       case X86::VBROADCASTSSZ256rm: NumElts = 8;  break;
2367       case X86::VBROADCASTSSZrm:    NumElts = 16; break;
2368       case X86::VBROADCASTSDYrm:    NumElts = 4;  break;
2369       case X86::VBROADCASTSDZ256rm: NumElts = 4;  break;
2370       case X86::VBROADCASTSDZrm:    NumElts = 8;  break;
2371       case X86::VPBROADCASTBrm:     NumElts = 16; break;
2372       case X86::VPBROADCASTBYrm:    NumElts = 32; break;
2373       case X86::VPBROADCASTBZ128rm: NumElts = 16; break;
2374       case X86::VPBROADCASTBZ256rm: NumElts = 32; break;
2375       case X86::VPBROADCASTBZrm:    NumElts = 64; break;
2376       case X86::VPBROADCASTDrm:     NumElts = 4;  break;
2377       case X86::VPBROADCASTDYrm:    NumElts = 8;  break;
2378       case X86::VPBROADCASTDZ128rm: NumElts = 4;  break;
2379       case X86::VPBROADCASTDZ256rm: NumElts = 8;  break;
2380       case X86::VPBROADCASTDZrm:    NumElts = 16; break;
2381       case X86::VPBROADCASTQrm:     NumElts = 2;  break;
2382       case X86::VPBROADCASTQYrm:    NumElts = 4;  break;
2383       case X86::VPBROADCASTQZ128rm: NumElts = 2;  break;
2384       case X86::VPBROADCASTQZ256rm: NumElts = 4;  break;
2385       case X86::VPBROADCASTQZrm:    NumElts = 8;  break;
2386       case X86::VPBROADCASTWrm:     NumElts = 8;  break;
2387       case X86::VPBROADCASTWYrm:    NumElts = 16; break;
2388       case X86::VPBROADCASTWZ128rm: NumElts = 8;  break;
2389       case X86::VPBROADCASTWZ256rm: NumElts = 16; break;
2390       case X86::VPBROADCASTWZrm:    NumElts = 32; break;
2391       }
2392 
2393       std::string Comment;
2394       raw_string_ostream CS(Comment);
2395       const MachineOperand &DstOp = MI->getOperand(0);
2396       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2397       CS << "[";
2398       for (int i = 0; i != NumElts; ++i) {
2399         if (i != 0)
2400           CS << ",";
2401         printConstant(C, CS);
2402       }
2403       CS << "]";
2404       OutStreamer.AddComment(CS.str());
2405     }
2406   }
2407 }
2408 
2409 void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2410   X86MCInstLower MCInstLowering(*MF, *this);
2411   const X86RegisterInfo *RI =
2412       MF->getSubtarget<X86Subtarget>().getRegisterInfo();
2413 
2414   if (MI->getOpcode() == X86::OR64rm) {
2415     for (auto &Opd : MI->operands()) {
2416       if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) ==
2417                                 "swift_async_extendedFramePointerFlags") {
2418         ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true;
2419       }
2420     }
2421   }
2422 
2423   // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
2424   // are compressed from EVEX encoding to VEX encoding.
2425   if (TM.Options.MCOptions.ShowMCEncoding) {
2426     if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
2427       OutStreamer->AddComment("EVEX TO VEX Compression ", false);
2428   }
2429 
2430   // Add comments for values loaded from constant pool.
2431   if (OutStreamer->isVerboseAsm())
2432     addConstantComments(MI, *OutStreamer);
2433 
2434   switch (MI->getOpcode()) {
2435   case TargetOpcode::DBG_VALUE:
2436     llvm_unreachable("Should be handled target independently");
2437 
2438   // Emit nothing here but a comment if we can.
2439   case X86::Int_MemBarrier:
2440     OutStreamer->emitRawComment("MEMBARRIER");
2441     return;
2442 
2443   case X86::EH_RETURN:
2444   case X86::EH_RETURN64: {
2445     // Lower these as normal, but add some comments.
2446     Register Reg = MI->getOperand(0).getReg();
2447     OutStreamer->AddComment(StringRef("eh_return, addr: %") +
2448                             X86ATTInstPrinter::getRegisterName(Reg));
2449     break;
2450   }
2451   case X86::CLEANUPRET: {
2452     // Lower these as normal, but add some comments.
2453     OutStreamer->AddComment("CLEANUPRET");
2454     break;
2455   }
2456 
2457   case X86::CATCHRET: {
2458     // Lower these as normal, but add some comments.
2459     OutStreamer->AddComment("CATCHRET");
2460     break;
2461   }
2462 
2463   case X86::ENDBR32:
2464   case X86::ENDBR64: {
2465     // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2466     // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2467     // non-empty. If MI is the initial ENDBR, place the
2468     // __patchable_function_entries label after ENDBR.
2469     if (CurrentPatchableFunctionEntrySym &&
2470         CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
2471         MI == &MF->front().front()) {
2472       MCInst Inst;
2473       MCInstLowering.Lower(MI, Inst);
2474       EmitAndCountInstruction(Inst);
2475       CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
2476       OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym);
2477       return;
2478     }
2479     break;
2480   }
2481 
2482   case X86::TAILJMPr:
2483   case X86::TAILJMPm:
2484   case X86::TAILJMPd:
2485   case X86::TAILJMPd_CC:
2486   case X86::TAILJMPr64:
2487   case X86::TAILJMPm64:
2488   case X86::TAILJMPd64:
2489   case X86::TAILJMPd64_CC:
2490   case X86::TAILJMPr64_REX:
2491   case X86::TAILJMPm64_REX:
2492     // Lower these as normal, but add some comments.
2493     OutStreamer->AddComment("TAILCALL");
2494     break;
2495 
2496   case X86::TLS_addr32:
2497   case X86::TLS_addr64:
2498   case X86::TLS_addrX32:
2499   case X86::TLS_base_addr32:
2500   case X86::TLS_base_addr64:
2501   case X86::TLS_base_addrX32:
2502     return LowerTlsAddr(MCInstLowering, *MI);
2503 
2504   case X86::MOVPC32r: {
2505     // This is a pseudo op for a two instruction sequence with a label, which
2506     // looks like:
2507     //     call "L1$pb"
2508     // "L1$pb":
2509     //     popl %esi
2510 
2511     // Emit the call.
2512     MCSymbol *PICBase = MF->getPICBaseSymbol();
2513     // FIXME: We would like an efficient form for this, so we don't have to do a
2514     // lot of extra uniquing.
2515     EmitAndCountInstruction(
2516         MCInstBuilder(X86::CALLpcrel32)
2517             .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
2518 
2519     const X86FrameLowering *FrameLowering =
2520         MF->getSubtarget<X86Subtarget>().getFrameLowering();
2521     bool hasFP = FrameLowering->hasFP(*MF);
2522 
2523     // TODO: This is needed only if we require precise CFA.
2524     bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
2525                                !OutStreamer->getDwarfFrameInfos().back().End;
2526 
2527     int stackGrowth = -RI->getSlotSize();
2528 
2529     if (HasActiveDwarfFrame && !hasFP) {
2530       OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth);
2531     }
2532 
2533     // Emit the label.
2534     OutStreamer->emitLabel(PICBase);
2535 
2536     // popl $reg
2537     EmitAndCountInstruction(
2538         MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
2539 
2540     if (HasActiveDwarfFrame && !hasFP) {
2541       OutStreamer->emitCFIAdjustCfaOffset(stackGrowth);
2542     }
2543     return;
2544   }
2545 
2546   case X86::ADD32ri: {
2547     // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2548     if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
2549       break;
2550 
2551     // Okay, we have something like:
2552     //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2553 
2554     // For this, we want to print something like:
2555     //   MYGLOBAL + (. - PICBASE)
2556     // However, we can't generate a ".", so just emit a new label here and refer
2557     // to it.
2558     MCSymbol *DotSym = OutContext.createTempSymbol();
2559     OutStreamer->emitLabel(DotSym);
2560 
2561     // Now that we have emitted the label, lower the complex operand expression.
2562     MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
2563 
2564     const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
2565     const MCExpr *PICBase =
2566         MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
2567     DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
2568 
2569     DotExpr = MCBinaryExpr::createAdd(
2570         MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
2571 
2572     EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
2573                                 .addReg(MI->getOperand(0).getReg())
2574                                 .addReg(MI->getOperand(1).getReg())
2575                                 .addExpr(DotExpr));
2576     return;
2577   }
2578   case TargetOpcode::STATEPOINT:
2579     return LowerSTATEPOINT(*MI, MCInstLowering);
2580 
2581   case TargetOpcode::FAULTING_OP:
2582     return LowerFAULTING_OP(*MI, MCInstLowering);
2583 
2584   case TargetOpcode::FENTRY_CALL:
2585     return LowerFENTRY_CALL(*MI, MCInstLowering);
2586 
2587   case TargetOpcode::PATCHABLE_OP:
2588     return LowerPATCHABLE_OP(*MI, MCInstLowering);
2589 
2590   case TargetOpcode::STACKMAP:
2591     return LowerSTACKMAP(*MI);
2592 
2593   case TargetOpcode::PATCHPOINT:
2594     return LowerPATCHPOINT(*MI, MCInstLowering);
2595 
2596   case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2597     return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
2598 
2599   case TargetOpcode::PATCHABLE_RET:
2600     return LowerPATCHABLE_RET(*MI, MCInstLowering);
2601 
2602   case TargetOpcode::PATCHABLE_TAIL_CALL:
2603     return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
2604 
2605   case TargetOpcode::PATCHABLE_EVENT_CALL:
2606     return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
2607 
2608   case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2609     return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
2610 
2611   case X86::MORESTACK_RET:
2612     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2613     return;
2614 
2615   case X86::ASAN_CHECK_MEMACCESS:
2616     return LowerASAN_CHECK_MEMACCESS(*MI);
2617 
2618   case X86::MORESTACK_RET_RESTORE_R10:
2619     // Return, then restore R10.
2620     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2621     EmitAndCountInstruction(
2622         MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
2623     return;
2624 
2625   case X86::SEH_PushReg:
2626   case X86::SEH_SaveReg:
2627   case X86::SEH_SaveXMM:
2628   case X86::SEH_StackAlloc:
2629   case X86::SEH_StackAlign:
2630   case X86::SEH_SetFrame:
2631   case X86::SEH_PushFrame:
2632   case X86::SEH_EndPrologue:
2633     EmitSEHInstruction(MI);
2634     return;
2635 
2636   case X86::SEH_Epilogue: {
2637     assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2638     MachineBasicBlock::const_iterator MBBI(MI);
2639     // Check if preceded by a call and emit nop if so.
2640     for (MBBI = PrevCrossBBInst(MBBI);
2641          MBBI != MachineBasicBlock::const_iterator();
2642          MBBI = PrevCrossBBInst(MBBI)) {
2643       // Conservatively assume that pseudo instructions don't emit code and keep
2644       // looking for a call. We may emit an unnecessary nop in some cases.
2645       if (!MBBI->isPseudo()) {
2646         if (MBBI->isCall())
2647           EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2648         break;
2649       }
2650     }
2651     return;
2652   }
2653   case X86::UBSAN_UD1:
2654     EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm)
2655                                 .addReg(X86::EAX)
2656                                 .addReg(X86::EAX)
2657                                 .addImm(1)
2658                                 .addReg(X86::NoRegister)
2659                                 .addImm(MI->getOperand(0).getImm())
2660                                 .addReg(X86::NoRegister));
2661     return;
2662   }
2663 
2664   MCInst TmpInst;
2665   MCInstLowering.Lower(MI, TmpInst);
2666 
2667   // Stackmap shadows cannot include branch targets, so we can count the bytes
2668   // in a call towards the shadow, but must ensure that the no thread returns
2669   // in to the stackmap shadow.  The only way to achieve this is if the call
2670   // is at the end of the shadow.
2671   if (MI->isCall()) {
2672     // Count then size of the call towards the shadow
2673     SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get());
2674     // Then flush the shadow so that we fill with nops before the call, not
2675     // after it.
2676     SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
2677     // Then emit the call
2678     OutStreamer->emitInstruction(TmpInst, getSubtargetInfo());
2679     return;
2680   }
2681 
2682   EmitAndCountInstruction(TmpInst);
2683 }
2684