1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains code to lower X86 MachineInstrs to their corresponding
10 // MCInst records.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "MCTargetDesc/X86ATTInstPrinter.h"
15 #include "MCTargetDesc/X86BaseInfo.h"
16 #include "MCTargetDesc/X86InstComments.h"
17 #include "MCTargetDesc/X86ShuffleDecode.h"
18 #include "MCTargetDesc/X86TargetStreamer.h"
19 #include "X86AsmPrinter.h"
20 #include "X86RegisterInfo.h"
21 #include "X86ShuffleDecodeConstantPool.h"
22 #include "X86Subtarget.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/iterator_range.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/GlobalValue.h"
33 #include "llvm/IR/Mangler.h"
34 #include "llvm/MC/MCAsmInfo.h"
35 #include "llvm/MC/MCCodeEmitter.h"
36 #include "llvm/MC/MCContext.h"
37 #include "llvm/MC/MCExpr.h"
38 #include "llvm/MC/MCFixup.h"
39 #include "llvm/MC/MCInst.h"
40 #include "llvm/MC/MCInstBuilder.h"
41 #include "llvm/MC/MCSection.h"
42 #include "llvm/MC/MCSectionELF.h"
43 #include "llvm/MC/MCStreamer.h"
44 #include "llvm/MC/MCSymbol.h"
45 #include "llvm/MC/MCSymbolELF.h"
46 #include "llvm/MC/TargetRegistry.h"
47 #include "llvm/Target/TargetLoweringObjectFile.h"
48 #include "llvm/Target/TargetMachine.h"
49 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
50 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
51 #include <string>
52
53 using namespace llvm;
54
55 namespace {
56
57 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
58 class X86MCInstLower {
59 MCContext &Ctx;
60 const MachineFunction &MF;
61 const TargetMachine &TM;
62 const MCAsmInfo &MAI;
63 X86AsmPrinter &AsmPrinter;
64
65 public:
66 X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
67
68 Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
69 const MachineOperand &MO) const;
70 void Lower(const MachineInstr *MI, MCInst &OutMI) const;
71
72 MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
73 MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
74
75 private:
76 MachineModuleInfoMachO &getMachOMMI() const;
77 };
78
79 } // end anonymous namespace
80
81 /// A RAII helper which defines a region of instructions which can't have
82 /// padding added between them for correctness.
83 struct NoAutoPaddingScope {
84 MCStreamer &OS;
85 const bool OldAllowAutoPadding;
NoAutoPaddingScopeNoAutoPaddingScope86 NoAutoPaddingScope(MCStreamer &OS)
87 : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
88 changeAndComment(false);
89 }
~NoAutoPaddingScopeNoAutoPaddingScope90 ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); }
changeAndCommentNoAutoPaddingScope91 void changeAndComment(bool b) {
92 if (b == OS.getAllowAutoPadding())
93 return;
94 OS.setAllowAutoPadding(b);
95 if (b)
96 OS.emitRawComment("autopadding");
97 else
98 OS.emitRawComment("noautopadding");
99 }
100 };
101
102 // Emit a minimal sequence of nops spanning NumBytes bytes.
103 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
104 const X86Subtarget *Subtarget);
105
count(MCInst & Inst,const MCSubtargetInfo & STI,MCCodeEmitter * CodeEmitter)106 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
107 const MCSubtargetInfo &STI,
108 MCCodeEmitter *CodeEmitter) {
109 if (InShadow) {
110 SmallString<256> Code;
111 SmallVector<MCFixup, 4> Fixups;
112 raw_svector_ostream VecOS(Code);
113 CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
114 CurrentShadowSize += Code.size();
115 if (CurrentShadowSize >= RequiredShadowSize)
116 InShadow = false; // The shadow is big enough. Stop counting.
117 }
118 }
119
emitShadowPadding(MCStreamer & OutStreamer,const MCSubtargetInfo & STI)120 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
121 MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
122 if (InShadow && CurrentShadowSize < RequiredShadowSize) {
123 InShadow = false;
124 emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
125 &MF->getSubtarget<X86Subtarget>());
126 }
127 }
128
EmitAndCountInstruction(MCInst & Inst)129 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
130 OutStreamer->emitInstruction(Inst, getSubtargetInfo());
131 SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
132 }
133
X86MCInstLower(const MachineFunction & mf,X86AsmPrinter & asmprinter)134 X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
135 X86AsmPrinter &asmprinter)
136 : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
137 AsmPrinter(asmprinter) {}
138
getMachOMMI() const139 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
140 return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
141 }
142
143 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
144 /// operand to an MCSymbol.
GetSymbolFromOperand(const MachineOperand & MO) const145 MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
146 const Triple &TT = TM.getTargetTriple();
147 if (MO.isGlobal() && TT.isOSBinFormatELF())
148 return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal());
149
150 const DataLayout &DL = MF.getDataLayout();
151 assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
152 "Isn't a symbol reference");
153
154 MCSymbol *Sym = nullptr;
155 SmallString<128> Name;
156 StringRef Suffix;
157
158 switch (MO.getTargetFlags()) {
159 case X86II::MO_DLLIMPORT:
160 // Handle dllimport linkage.
161 Name += "__imp_";
162 break;
163 case X86II::MO_COFFSTUB:
164 Name += ".refptr.";
165 break;
166 case X86II::MO_DARWIN_NONLAZY:
167 case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
168 Suffix = "$non_lazy_ptr";
169 break;
170 }
171
172 if (!Suffix.empty())
173 Name += DL.getPrivateGlobalPrefix();
174
175 if (MO.isGlobal()) {
176 const GlobalValue *GV = MO.getGlobal();
177 AsmPrinter.getNameWithPrefix(Name, GV);
178 } else if (MO.isSymbol()) {
179 Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
180 } else if (MO.isMBB()) {
181 assert(Suffix.empty());
182 Sym = MO.getMBB()->getSymbol();
183 }
184
185 Name += Suffix;
186 if (!Sym)
187 Sym = Ctx.getOrCreateSymbol(Name);
188
189 // If the target flags on the operand changes the name of the symbol, do that
190 // before we return the symbol.
191 switch (MO.getTargetFlags()) {
192 default:
193 break;
194 case X86II::MO_COFFSTUB: {
195 MachineModuleInfoCOFF &MMICOFF =
196 MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
197 MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
198 if (!StubSym.getPointer()) {
199 assert(MO.isGlobal() && "Extern symbol not handled yet");
200 StubSym = MachineModuleInfoImpl::StubValueTy(
201 AsmPrinter.getSymbol(MO.getGlobal()), true);
202 }
203 break;
204 }
205 case X86II::MO_DARWIN_NONLAZY:
206 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
207 MachineModuleInfoImpl::StubValueTy &StubSym =
208 getMachOMMI().getGVStubEntry(Sym);
209 if (!StubSym.getPointer()) {
210 assert(MO.isGlobal() && "Extern symbol not handled yet");
211 StubSym = MachineModuleInfoImpl::StubValueTy(
212 AsmPrinter.getSymbol(MO.getGlobal()),
213 !MO.getGlobal()->hasInternalLinkage());
214 }
215 break;
216 }
217 }
218
219 return Sym;
220 }
221
LowerSymbolOperand(const MachineOperand & MO,MCSymbol * Sym) const222 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
223 MCSymbol *Sym) const {
224 // FIXME: We would like an efficient form for this, so we don't have to do a
225 // lot of extra uniquing.
226 const MCExpr *Expr = nullptr;
227 MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
228
229 switch (MO.getTargetFlags()) {
230 default:
231 llvm_unreachable("Unknown target flag on GV operand");
232 case X86II::MO_NO_FLAG: // No flag.
233 // These affect the name of the symbol, not any suffix.
234 case X86II::MO_DARWIN_NONLAZY:
235 case X86II::MO_DLLIMPORT:
236 case X86II::MO_COFFSTUB:
237 break;
238
239 case X86II::MO_TLVP:
240 RefKind = MCSymbolRefExpr::VK_TLVP;
241 break;
242 case X86II::MO_TLVP_PIC_BASE:
243 Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
244 // Subtract the pic base.
245 Expr = MCBinaryExpr::createSub(
246 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
247 break;
248 case X86II::MO_SECREL:
249 RefKind = MCSymbolRefExpr::VK_SECREL;
250 break;
251 case X86II::MO_TLSGD:
252 RefKind = MCSymbolRefExpr::VK_TLSGD;
253 break;
254 case X86II::MO_TLSLD:
255 RefKind = MCSymbolRefExpr::VK_TLSLD;
256 break;
257 case X86II::MO_TLSLDM:
258 RefKind = MCSymbolRefExpr::VK_TLSLDM;
259 break;
260 case X86II::MO_GOTTPOFF:
261 RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
262 break;
263 case X86II::MO_INDNTPOFF:
264 RefKind = MCSymbolRefExpr::VK_INDNTPOFF;
265 break;
266 case X86II::MO_TPOFF:
267 RefKind = MCSymbolRefExpr::VK_TPOFF;
268 break;
269 case X86II::MO_DTPOFF:
270 RefKind = MCSymbolRefExpr::VK_DTPOFF;
271 break;
272 case X86II::MO_NTPOFF:
273 RefKind = MCSymbolRefExpr::VK_NTPOFF;
274 break;
275 case X86II::MO_GOTNTPOFF:
276 RefKind = MCSymbolRefExpr::VK_GOTNTPOFF;
277 break;
278 case X86II::MO_GOTPCREL:
279 RefKind = MCSymbolRefExpr::VK_GOTPCREL;
280 break;
281 case X86II::MO_GOTPCREL_NORELAX:
282 RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX;
283 break;
284 case X86II::MO_GOT:
285 RefKind = MCSymbolRefExpr::VK_GOT;
286 break;
287 case X86II::MO_GOTOFF:
288 RefKind = MCSymbolRefExpr::VK_GOTOFF;
289 break;
290 case X86II::MO_PLT:
291 RefKind = MCSymbolRefExpr::VK_PLT;
292 break;
293 case X86II::MO_ABS8:
294 RefKind = MCSymbolRefExpr::VK_X86_ABS8;
295 break;
296 case X86II::MO_PIC_BASE_OFFSET:
297 case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
298 Expr = MCSymbolRefExpr::create(Sym, Ctx);
299 // Subtract the pic base.
300 Expr = MCBinaryExpr::createSub(
301 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
302 if (MO.isJTI()) {
303 assert(MAI.doesSetDirectiveSuppressReloc());
304 // If .set directive is supported, use it to reduce the number of
305 // relocations the assembler will generate for differences between
306 // local labels. This is only safe when the symbols are in the same
307 // section so we are restricting it to jumptable references.
308 MCSymbol *Label = Ctx.createTempSymbol();
309 AsmPrinter.OutStreamer->emitAssignment(Label, Expr);
310 Expr = MCSymbolRefExpr::create(Label, Ctx);
311 }
312 break;
313 }
314
315 if (!Expr)
316 Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
317
318 if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
319 Expr = MCBinaryExpr::createAdd(
320 Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
321 return MCOperand::createExpr(Expr);
322 }
323
324 /// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
325 /// a short fixed-register form.
SimplifyShortImmForm(MCInst & Inst,unsigned Opcode)326 static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
327 unsigned ImmOp = Inst.getNumOperands() - 1;
328 assert(Inst.getOperand(0).isReg() &&
329 (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
330 ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
331 Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
332 Inst.getNumOperands() == 2) &&
333 "Unexpected instruction!");
334
335 // Check whether the destination register can be fixed.
336 unsigned Reg = Inst.getOperand(0).getReg();
337 if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
338 return;
339
340 // If so, rewrite the instruction.
341 MCOperand Saved = Inst.getOperand(ImmOp);
342 Inst = MCInst();
343 Inst.setOpcode(Opcode);
344 Inst.addOperand(Saved);
345 }
346
347 /// If a movsx instruction has a shorter encoding for the used register
348 /// simplify the instruction to use it instead.
SimplifyMOVSX(MCInst & Inst)349 static void SimplifyMOVSX(MCInst &Inst) {
350 unsigned NewOpcode = 0;
351 unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg();
352 switch (Inst.getOpcode()) {
353 default:
354 llvm_unreachable("Unexpected instruction!");
355 case X86::MOVSX16rr8: // movsbw %al, %ax --> cbtw
356 if (Op0 == X86::AX && Op1 == X86::AL)
357 NewOpcode = X86::CBW;
358 break;
359 case X86::MOVSX32rr16: // movswl %ax, %eax --> cwtl
360 if (Op0 == X86::EAX && Op1 == X86::AX)
361 NewOpcode = X86::CWDE;
362 break;
363 case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq
364 if (Op0 == X86::RAX && Op1 == X86::EAX)
365 NewOpcode = X86::CDQE;
366 break;
367 }
368
369 if (NewOpcode != 0) {
370 Inst = MCInst();
371 Inst.setOpcode(NewOpcode);
372 }
373 }
374
375 /// Simplify things like MOV32rm to MOV32o32a.
SimplifyShortMoveForm(X86AsmPrinter & Printer,MCInst & Inst,unsigned Opcode)376 static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
377 unsigned Opcode) {
378 // Don't make these simplifications in 64-bit mode; other assemblers don't
379 // perform them because they make the code larger.
380 if (Printer.getSubtarget().is64Bit())
381 return;
382
383 bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
384 unsigned AddrBase = IsStore;
385 unsigned RegOp = IsStore ? 0 : 5;
386 unsigned AddrOp = AddrBase + 3;
387 assert(
388 Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
389 Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
390 Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
391 Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
392 Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
393 (Inst.getOperand(AddrOp).isExpr() || Inst.getOperand(AddrOp).isImm()) &&
394 "Unexpected instruction!");
395
396 // Check whether the destination register can be fixed.
397 unsigned Reg = Inst.getOperand(RegOp).getReg();
398 if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
399 return;
400
401 // Check whether this is an absolute address.
402 // FIXME: We know TLVP symbol refs aren't, but there should be a better way
403 // to do this here.
404 bool Absolute = true;
405 if (Inst.getOperand(AddrOp).isExpr()) {
406 const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
407 if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
408 if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
409 Absolute = false;
410 }
411
412 if (Absolute &&
413 (Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 ||
414 Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 ||
415 Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0))
416 return;
417
418 // If so, rewrite the instruction.
419 MCOperand Saved = Inst.getOperand(AddrOp);
420 MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg);
421 Inst = MCInst();
422 Inst.setOpcode(Opcode);
423 Inst.addOperand(Saved);
424 Inst.addOperand(Seg);
425 }
426
getRetOpcode(const X86Subtarget & Subtarget)427 static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
428 return Subtarget.is64Bit() ? X86::RET64 : X86::RET32;
429 }
430
431 Optional<MCOperand>
LowerMachineOperand(const MachineInstr * MI,const MachineOperand & MO) const432 X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
433 const MachineOperand &MO) const {
434 switch (MO.getType()) {
435 default:
436 MI->print(errs());
437 llvm_unreachable("unknown operand type");
438 case MachineOperand::MO_Register:
439 // Ignore all implicit register operands.
440 if (MO.isImplicit())
441 return None;
442 return MCOperand::createReg(MO.getReg());
443 case MachineOperand::MO_Immediate:
444 return MCOperand::createImm(MO.getImm());
445 case MachineOperand::MO_MachineBasicBlock:
446 case MachineOperand::MO_GlobalAddress:
447 case MachineOperand::MO_ExternalSymbol:
448 return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
449 case MachineOperand::MO_MCSymbol:
450 return LowerSymbolOperand(MO, MO.getMCSymbol());
451 case MachineOperand::MO_JumpTableIndex:
452 return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
453 case MachineOperand::MO_ConstantPoolIndex:
454 return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
455 case MachineOperand::MO_BlockAddress:
456 return LowerSymbolOperand(
457 MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
458 case MachineOperand::MO_RegisterMask:
459 // Ignore call clobbers.
460 return None;
461 }
462 }
463
464 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding
465 // information.
convertTailJumpOpcode(unsigned Opcode)466 static unsigned convertTailJumpOpcode(unsigned Opcode) {
467 switch (Opcode) {
468 case X86::TAILJMPr:
469 Opcode = X86::JMP32r;
470 break;
471 case X86::TAILJMPm:
472 Opcode = X86::JMP32m;
473 break;
474 case X86::TAILJMPr64:
475 Opcode = X86::JMP64r;
476 break;
477 case X86::TAILJMPm64:
478 Opcode = X86::JMP64m;
479 break;
480 case X86::TAILJMPr64_REX:
481 Opcode = X86::JMP64r_REX;
482 break;
483 case X86::TAILJMPm64_REX:
484 Opcode = X86::JMP64m_REX;
485 break;
486 case X86::TAILJMPd:
487 case X86::TAILJMPd64:
488 Opcode = X86::JMP_1;
489 break;
490 case X86::TAILJMPd_CC:
491 case X86::TAILJMPd64_CC:
492 Opcode = X86::JCC_1;
493 break;
494 }
495
496 return Opcode;
497 }
498
Lower(const MachineInstr * MI,MCInst & OutMI) const499 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
500 OutMI.setOpcode(MI->getOpcode());
501
502 for (const MachineOperand &MO : MI->operands())
503 if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
504 OutMI.addOperand(*MaybeMCOp);
505
506 // Handle a few special cases to eliminate operand modifiers.
507 switch (OutMI.getOpcode()) {
508 case X86::LEA64_32r:
509 case X86::LEA64r:
510 case X86::LEA16r:
511 case X86::LEA32r:
512 // LEA should have a segment register, but it must be empty.
513 assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands &&
514 "Unexpected # of LEA operands");
515 assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
516 "LEA has segment specified!");
517 break;
518
519 case X86::MULX32Hrr:
520 case X86::MULX32Hrm:
521 case X86::MULX64Hrr:
522 case X86::MULX64Hrm: {
523 // Turn into regular MULX by duplicating the destination.
524 unsigned NewOpc;
525 switch (OutMI.getOpcode()) {
526 default: llvm_unreachable("Invalid opcode");
527 case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
528 case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
529 case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
530 case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
531 }
532 OutMI.setOpcode(NewOpc);
533 // Duplicate the destination.
534 unsigned DestReg = OutMI.getOperand(0).getReg();
535 OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg));
536 break;
537 }
538
539 // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
540 // if one of the registers is extended, but other isn't.
541 case X86::VMOVZPQILo2PQIrr:
542 case X86::VMOVAPDrr:
543 case X86::VMOVAPDYrr:
544 case X86::VMOVAPSrr:
545 case X86::VMOVAPSYrr:
546 case X86::VMOVDQArr:
547 case X86::VMOVDQAYrr:
548 case X86::VMOVDQUrr:
549 case X86::VMOVDQUYrr:
550 case X86::VMOVUPDrr:
551 case X86::VMOVUPDYrr:
552 case X86::VMOVUPSrr:
553 case X86::VMOVUPSYrr: {
554 if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
555 X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
556 unsigned NewOpc;
557 switch (OutMI.getOpcode()) {
558 default: llvm_unreachable("Invalid opcode");
559 case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
560 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
561 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
562 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
563 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
564 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
565 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
566 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
567 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
568 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
569 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
570 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
571 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
572 }
573 OutMI.setOpcode(NewOpc);
574 }
575 break;
576 }
577 case X86::VMOVSDrr:
578 case X86::VMOVSSrr: {
579 if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
580 X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
581 unsigned NewOpc;
582 switch (OutMI.getOpcode()) {
583 default: llvm_unreachable("Invalid opcode");
584 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
585 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
586 }
587 OutMI.setOpcode(NewOpc);
588 }
589 break;
590 }
591
592 case X86::VPCMPBZ128rmi: case X86::VPCMPBZ128rmik:
593 case X86::VPCMPBZ128rri: case X86::VPCMPBZ128rrik:
594 case X86::VPCMPBZ256rmi: case X86::VPCMPBZ256rmik:
595 case X86::VPCMPBZ256rri: case X86::VPCMPBZ256rrik:
596 case X86::VPCMPBZrmi: case X86::VPCMPBZrmik:
597 case X86::VPCMPBZrri: case X86::VPCMPBZrrik:
598 case X86::VPCMPDZ128rmi: case X86::VPCMPDZ128rmik:
599 case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
600 case X86::VPCMPDZ128rri: case X86::VPCMPDZ128rrik:
601 case X86::VPCMPDZ256rmi: case X86::VPCMPDZ256rmik:
602 case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
603 case X86::VPCMPDZ256rri: case X86::VPCMPDZ256rrik:
604 case X86::VPCMPDZrmi: case X86::VPCMPDZrmik:
605 case X86::VPCMPDZrmib: case X86::VPCMPDZrmibk:
606 case X86::VPCMPDZrri: case X86::VPCMPDZrrik:
607 case X86::VPCMPQZ128rmi: case X86::VPCMPQZ128rmik:
608 case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
609 case X86::VPCMPQZ128rri: case X86::VPCMPQZ128rrik:
610 case X86::VPCMPQZ256rmi: case X86::VPCMPQZ256rmik:
611 case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
612 case X86::VPCMPQZ256rri: case X86::VPCMPQZ256rrik:
613 case X86::VPCMPQZrmi: case X86::VPCMPQZrmik:
614 case X86::VPCMPQZrmib: case X86::VPCMPQZrmibk:
615 case X86::VPCMPQZrri: case X86::VPCMPQZrrik:
616 case X86::VPCMPWZ128rmi: case X86::VPCMPWZ128rmik:
617 case X86::VPCMPWZ128rri: case X86::VPCMPWZ128rrik:
618 case X86::VPCMPWZ256rmi: case X86::VPCMPWZ256rmik:
619 case X86::VPCMPWZ256rri: case X86::VPCMPWZ256rrik:
620 case X86::VPCMPWZrmi: case X86::VPCMPWZrmik:
621 case X86::VPCMPWZrri: case X86::VPCMPWZrrik: {
622 // Turn immediate 0 into the VPCMPEQ instruction.
623 if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) {
624 unsigned NewOpc;
625 switch (OutMI.getOpcode()) {
626 default: llvm_unreachable("Invalid opcode");
627 case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPEQBZ128rm; break;
628 case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPEQBZ128rmk; break;
629 case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPEQBZ128rr; break;
630 case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPEQBZ128rrk; break;
631 case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPEQBZ256rm; break;
632 case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPEQBZ256rmk; break;
633 case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPEQBZ256rr; break;
634 case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPEQBZ256rrk; break;
635 case X86::VPCMPBZrmi: NewOpc = X86::VPCMPEQBZrm; break;
636 case X86::VPCMPBZrmik: NewOpc = X86::VPCMPEQBZrmk; break;
637 case X86::VPCMPBZrri: NewOpc = X86::VPCMPEQBZrr; break;
638 case X86::VPCMPBZrrik: NewOpc = X86::VPCMPEQBZrrk; break;
639 case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPEQDZ128rm; break;
640 case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPEQDZ128rmb; break;
641 case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPEQDZ128rmbk; break;
642 case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPEQDZ128rmk; break;
643 case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPEQDZ128rr; break;
644 case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPEQDZ128rrk; break;
645 case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPEQDZ256rm; break;
646 case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPEQDZ256rmb; break;
647 case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPEQDZ256rmbk; break;
648 case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPEQDZ256rmk; break;
649 case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPEQDZ256rr; break;
650 case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPEQDZ256rrk; break;
651 case X86::VPCMPDZrmi: NewOpc = X86::VPCMPEQDZrm; break;
652 case X86::VPCMPDZrmib: NewOpc = X86::VPCMPEQDZrmb; break;
653 case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPEQDZrmbk; break;
654 case X86::VPCMPDZrmik: NewOpc = X86::VPCMPEQDZrmk; break;
655 case X86::VPCMPDZrri: NewOpc = X86::VPCMPEQDZrr; break;
656 case X86::VPCMPDZrrik: NewOpc = X86::VPCMPEQDZrrk; break;
657 case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPEQQZ128rm; break;
658 case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPEQQZ128rmb; break;
659 case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPEQQZ128rmbk; break;
660 case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPEQQZ128rmk; break;
661 case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPEQQZ128rr; break;
662 case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPEQQZ128rrk; break;
663 case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPEQQZ256rm; break;
664 case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPEQQZ256rmb; break;
665 case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPEQQZ256rmbk; break;
666 case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPEQQZ256rmk; break;
667 case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPEQQZ256rr; break;
668 case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPEQQZ256rrk; break;
669 case X86::VPCMPQZrmi: NewOpc = X86::VPCMPEQQZrm; break;
670 case X86::VPCMPQZrmib: NewOpc = X86::VPCMPEQQZrmb; break;
671 case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPEQQZrmbk; break;
672 case X86::VPCMPQZrmik: NewOpc = X86::VPCMPEQQZrmk; break;
673 case X86::VPCMPQZrri: NewOpc = X86::VPCMPEQQZrr; break;
674 case X86::VPCMPQZrrik: NewOpc = X86::VPCMPEQQZrrk; break;
675 case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPEQWZ128rm; break;
676 case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPEQWZ128rmk; break;
677 case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPEQWZ128rr; break;
678 case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPEQWZ128rrk; break;
679 case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPEQWZ256rm; break;
680 case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPEQWZ256rmk; break;
681 case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPEQWZ256rr; break;
682 case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPEQWZ256rrk; break;
683 case X86::VPCMPWZrmi: NewOpc = X86::VPCMPEQWZrm; break;
684 case X86::VPCMPWZrmik: NewOpc = X86::VPCMPEQWZrmk; break;
685 case X86::VPCMPWZrri: NewOpc = X86::VPCMPEQWZrr; break;
686 case X86::VPCMPWZrrik: NewOpc = X86::VPCMPEQWZrrk; break;
687 }
688
689 OutMI.setOpcode(NewOpc);
690 OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
691 break;
692 }
693
694 // Turn immediate 6 into the VPCMPGT instruction.
695 if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) {
696 unsigned NewOpc;
697 switch (OutMI.getOpcode()) {
698 default: llvm_unreachable("Invalid opcode");
699 case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPGTBZ128rm; break;
700 case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPGTBZ128rmk; break;
701 case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPGTBZ128rr; break;
702 case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPGTBZ128rrk; break;
703 case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPGTBZ256rm; break;
704 case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPGTBZ256rmk; break;
705 case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPGTBZ256rr; break;
706 case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPGTBZ256rrk; break;
707 case X86::VPCMPBZrmi: NewOpc = X86::VPCMPGTBZrm; break;
708 case X86::VPCMPBZrmik: NewOpc = X86::VPCMPGTBZrmk; break;
709 case X86::VPCMPBZrri: NewOpc = X86::VPCMPGTBZrr; break;
710 case X86::VPCMPBZrrik: NewOpc = X86::VPCMPGTBZrrk; break;
711 case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPGTDZ128rm; break;
712 case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPGTDZ128rmb; break;
713 case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPGTDZ128rmbk; break;
714 case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPGTDZ128rmk; break;
715 case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPGTDZ128rr; break;
716 case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPGTDZ128rrk; break;
717 case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPGTDZ256rm; break;
718 case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPGTDZ256rmb; break;
719 case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPGTDZ256rmbk; break;
720 case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPGTDZ256rmk; break;
721 case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPGTDZ256rr; break;
722 case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPGTDZ256rrk; break;
723 case X86::VPCMPDZrmi: NewOpc = X86::VPCMPGTDZrm; break;
724 case X86::VPCMPDZrmib: NewOpc = X86::VPCMPGTDZrmb; break;
725 case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPGTDZrmbk; break;
726 case X86::VPCMPDZrmik: NewOpc = X86::VPCMPGTDZrmk; break;
727 case X86::VPCMPDZrri: NewOpc = X86::VPCMPGTDZrr; break;
728 case X86::VPCMPDZrrik: NewOpc = X86::VPCMPGTDZrrk; break;
729 case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPGTQZ128rm; break;
730 case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPGTQZ128rmb; break;
731 case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPGTQZ128rmbk; break;
732 case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPGTQZ128rmk; break;
733 case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPGTQZ128rr; break;
734 case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPGTQZ128rrk; break;
735 case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPGTQZ256rm; break;
736 case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPGTQZ256rmb; break;
737 case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPGTQZ256rmbk; break;
738 case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPGTQZ256rmk; break;
739 case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPGTQZ256rr; break;
740 case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPGTQZ256rrk; break;
741 case X86::VPCMPQZrmi: NewOpc = X86::VPCMPGTQZrm; break;
742 case X86::VPCMPQZrmib: NewOpc = X86::VPCMPGTQZrmb; break;
743 case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPGTQZrmbk; break;
744 case X86::VPCMPQZrmik: NewOpc = X86::VPCMPGTQZrmk; break;
745 case X86::VPCMPQZrri: NewOpc = X86::VPCMPGTQZrr; break;
746 case X86::VPCMPQZrrik: NewOpc = X86::VPCMPGTQZrrk; break;
747 case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPGTWZ128rm; break;
748 case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPGTWZ128rmk; break;
749 case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPGTWZ128rr; break;
750 case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPGTWZ128rrk; break;
751 case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPGTWZ256rm; break;
752 case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPGTWZ256rmk; break;
753 case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPGTWZ256rr; break;
754 case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPGTWZ256rrk; break;
755 case X86::VPCMPWZrmi: NewOpc = X86::VPCMPGTWZrm; break;
756 case X86::VPCMPWZrmik: NewOpc = X86::VPCMPGTWZrmk; break;
757 case X86::VPCMPWZrri: NewOpc = X86::VPCMPGTWZrr; break;
758 case X86::VPCMPWZrrik: NewOpc = X86::VPCMPGTWZrrk; break;
759 }
760
761 OutMI.setOpcode(NewOpc);
762 OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
763 break;
764 }
765
766 break;
767 }
768
769 // CALL64r, CALL64pcrel32 - These instructions used to have
770 // register inputs modeled as normal uses instead of implicit uses. As such,
771 // they we used to truncate off all but the first operand (the callee). This
772 // issue seems to have been fixed at some point. This assert verifies that.
773 case X86::CALL64r:
774 case X86::CALL64pcrel32:
775 assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
776 break;
777
778 case X86::EH_RETURN:
779 case X86::EH_RETURN64: {
780 OutMI = MCInst();
781 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
782 break;
783 }
784
785 case X86::CLEANUPRET: {
786 // Replace CLEANUPRET with the appropriate RET.
787 OutMI = MCInst();
788 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
789 break;
790 }
791
792 case X86::CATCHRET: {
793 // Replace CATCHRET with the appropriate RET.
794 const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
795 unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
796 OutMI = MCInst();
797 OutMI.setOpcode(getRetOpcode(Subtarget));
798 OutMI.addOperand(MCOperand::createReg(ReturnReg));
799 break;
800 }
801
802 // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
803 // instruction.
804 case X86::TAILJMPr:
805 case X86::TAILJMPr64:
806 case X86::TAILJMPr64_REX:
807 case X86::TAILJMPd:
808 case X86::TAILJMPd64:
809 assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
810 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
811 break;
812
813 case X86::TAILJMPd_CC:
814 case X86::TAILJMPd64_CC:
815 assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!");
816 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
817 break;
818
819 case X86::TAILJMPm:
820 case X86::TAILJMPm64:
821 case X86::TAILJMPm64_REX:
822 assert(OutMI.getNumOperands() == X86::AddrNumOperands &&
823 "Unexpected number of operands!");
824 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
825 break;
826
827 case X86::DEC16r:
828 case X86::DEC32r:
829 case X86::INC16r:
830 case X86::INC32r:
831 // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
832 if (!AsmPrinter.getSubtarget().is64Bit()) {
833 unsigned Opcode;
834 switch (OutMI.getOpcode()) {
835 default: llvm_unreachable("Invalid opcode");
836 case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
837 case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
838 case X86::INC16r: Opcode = X86::INC16r_alt; break;
839 case X86::INC32r: Opcode = X86::INC32r_alt; break;
840 }
841 OutMI.setOpcode(Opcode);
842 }
843 break;
844
845 // We don't currently select the correct instruction form for instructions
846 // which have a short %eax, etc. form. Handle this by custom lowering, for
847 // now.
848 //
849 // Note, we are currently not handling the following instructions:
850 // MOV64ao8, MOV64o8a
851 // XCHG16ar, XCHG32ar, XCHG64ar
852 case X86::MOV8mr_NOREX:
853 case X86::MOV8mr:
854 case X86::MOV8rm_NOREX:
855 case X86::MOV8rm:
856 case X86::MOV16mr:
857 case X86::MOV16rm:
858 case X86::MOV32mr:
859 case X86::MOV32rm: {
860 unsigned NewOpc;
861 switch (OutMI.getOpcode()) {
862 default: llvm_unreachable("Invalid opcode");
863 case X86::MOV8mr_NOREX:
864 case X86::MOV8mr: NewOpc = X86::MOV8o32a; break;
865 case X86::MOV8rm_NOREX:
866 case X86::MOV8rm: NewOpc = X86::MOV8ao32; break;
867 case X86::MOV16mr: NewOpc = X86::MOV16o32a; break;
868 case X86::MOV16rm: NewOpc = X86::MOV16ao32; break;
869 case X86::MOV32mr: NewOpc = X86::MOV32o32a; break;
870 case X86::MOV32rm: NewOpc = X86::MOV32ao32; break;
871 }
872 SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
873 break;
874 }
875
876 case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32:
877 case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32:
878 case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32:
879 case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32:
880 case X86::OR8ri: case X86::OR16ri: case X86::OR32ri: case X86::OR64ri32:
881 case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32:
882 case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32:
883 case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32:
884 case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: {
885 unsigned NewOpc;
886 switch (OutMI.getOpcode()) {
887 default: llvm_unreachable("Invalid opcode");
888 case X86::ADC8ri: NewOpc = X86::ADC8i8; break;
889 case X86::ADC16ri: NewOpc = X86::ADC16i16; break;
890 case X86::ADC32ri: NewOpc = X86::ADC32i32; break;
891 case X86::ADC64ri32: NewOpc = X86::ADC64i32; break;
892 case X86::ADD8ri: NewOpc = X86::ADD8i8; break;
893 case X86::ADD16ri: NewOpc = X86::ADD16i16; break;
894 case X86::ADD32ri: NewOpc = X86::ADD32i32; break;
895 case X86::ADD64ri32: NewOpc = X86::ADD64i32; break;
896 case X86::AND8ri: NewOpc = X86::AND8i8; break;
897 case X86::AND16ri: NewOpc = X86::AND16i16; break;
898 case X86::AND32ri: NewOpc = X86::AND32i32; break;
899 case X86::AND64ri32: NewOpc = X86::AND64i32; break;
900 case X86::CMP8ri: NewOpc = X86::CMP8i8; break;
901 case X86::CMP16ri: NewOpc = X86::CMP16i16; break;
902 case X86::CMP32ri: NewOpc = X86::CMP32i32; break;
903 case X86::CMP64ri32: NewOpc = X86::CMP64i32; break;
904 case X86::OR8ri: NewOpc = X86::OR8i8; break;
905 case X86::OR16ri: NewOpc = X86::OR16i16; break;
906 case X86::OR32ri: NewOpc = X86::OR32i32; break;
907 case X86::OR64ri32: NewOpc = X86::OR64i32; break;
908 case X86::SBB8ri: NewOpc = X86::SBB8i8; break;
909 case X86::SBB16ri: NewOpc = X86::SBB16i16; break;
910 case X86::SBB32ri: NewOpc = X86::SBB32i32; break;
911 case X86::SBB64ri32: NewOpc = X86::SBB64i32; break;
912 case X86::SUB8ri: NewOpc = X86::SUB8i8; break;
913 case X86::SUB16ri: NewOpc = X86::SUB16i16; break;
914 case X86::SUB32ri: NewOpc = X86::SUB32i32; break;
915 case X86::SUB64ri32: NewOpc = X86::SUB64i32; break;
916 case X86::TEST8ri: NewOpc = X86::TEST8i8; break;
917 case X86::TEST16ri: NewOpc = X86::TEST16i16; break;
918 case X86::TEST32ri: NewOpc = X86::TEST32i32; break;
919 case X86::TEST64ri32: NewOpc = X86::TEST64i32; break;
920 case X86::XOR8ri: NewOpc = X86::XOR8i8; break;
921 case X86::XOR16ri: NewOpc = X86::XOR16i16; break;
922 case X86::XOR32ri: NewOpc = X86::XOR32i32; break;
923 case X86::XOR64ri32: NewOpc = X86::XOR64i32; break;
924 }
925 SimplifyShortImmForm(OutMI, NewOpc);
926 break;
927 }
928
929 // Try to shrink some forms of movsx.
930 case X86::MOVSX16rr8:
931 case X86::MOVSX32rr16:
932 case X86::MOVSX64rr32:
933 SimplifyMOVSX(OutMI);
934 break;
935
936 case X86::VCMPPDrri:
937 case X86::VCMPPDYrri:
938 case X86::VCMPPSrri:
939 case X86::VCMPPSYrri:
940 case X86::VCMPSDrr:
941 case X86::VCMPSSrr: {
942 // Swap the operands if it will enable a 2 byte VEX encoding.
943 // FIXME: Change the immediate to improve opportunities?
944 if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
945 X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
946 unsigned Imm = MI->getOperand(3).getImm() & 0x7;
947 switch (Imm) {
948 default: break;
949 case 0x00: // EQUAL
950 case 0x03: // UNORDERED
951 case 0x04: // NOT EQUAL
952 case 0x07: // ORDERED
953 std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
954 break;
955 }
956 }
957 break;
958 }
959
960 case X86::VMOVHLPSrr:
961 case X86::VUNPCKHPDrr:
962 // These are not truly commutable so hide them from the default case.
963 break;
964
965 case X86::MASKMOVDQU:
966 case X86::VMASKMOVDQU:
967 if (AsmPrinter.getSubtarget().is64Bit())
968 OutMI.setFlags(X86::IP_HAS_AD_SIZE);
969 break;
970
971 default: {
972 // If the instruction is a commutable arithmetic instruction we might be
973 // able to commute the operands to get a 2 byte VEX prefix.
974 uint64_t TSFlags = MI->getDesc().TSFlags;
975 if (MI->getDesc().isCommutable() &&
976 (TSFlags & X86II::EncodingMask) == X86II::VEX &&
977 (TSFlags & X86II::OpMapMask) == X86II::TB &&
978 (TSFlags & X86II::FormMask) == X86II::MRMSrcReg &&
979 !(TSFlags & X86II::VEX_W) && (TSFlags & X86II::VEX_4V) &&
980 OutMI.getNumOperands() == 3) {
981 if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
982 X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg()))
983 std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
984 }
985 break;
986 }
987 }
988 }
989
LowerTlsAddr(X86MCInstLower & MCInstLowering,const MachineInstr & MI)990 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
991 const MachineInstr &MI) {
992 NoAutoPaddingScope NoPadScope(*OutStreamer);
993 bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 &&
994 MI.getOpcode() != X86::TLS_base_addr32;
995 bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
996 MI.getOpcode() == X86::TLS_base_addr64;
997 MCContext &Ctx = OutStreamer->getContext();
998
999 MCSymbolRefExpr::VariantKind SRVK;
1000 switch (MI.getOpcode()) {
1001 case X86::TLS_addr32:
1002 case X86::TLS_addr64:
1003 case X86::TLS_addrX32:
1004 SRVK = MCSymbolRefExpr::VK_TLSGD;
1005 break;
1006 case X86::TLS_base_addr32:
1007 SRVK = MCSymbolRefExpr::VK_TLSLDM;
1008 break;
1009 case X86::TLS_base_addr64:
1010 case X86::TLS_base_addrX32:
1011 SRVK = MCSymbolRefExpr::VK_TLSLD;
1012 break;
1013 default:
1014 llvm_unreachable("unexpected opcode");
1015 }
1016
1017 const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
1018 MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx);
1019
1020 // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
1021 // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
1022 // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
1023 // only using GOT when GOTPCRELX is enabled.
1024 // TODO Delete the workaround when GOTPCRELX becomes commonplace.
1025 bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
1026 Ctx.getAsmInfo()->canRelaxRelocations();
1027
1028 if (Is64Bits) {
1029 bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
1030 if (NeedsPadding && Is64BitsLP64)
1031 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1032 EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
1033 .addReg(X86::RDI)
1034 .addReg(X86::RIP)
1035 .addImm(1)
1036 .addReg(0)
1037 .addExpr(Sym)
1038 .addReg(0));
1039 const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
1040 if (NeedsPadding) {
1041 if (!UseGot)
1042 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1043 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1044 EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
1045 }
1046 if (UseGot) {
1047 const MCExpr *Expr = MCSymbolRefExpr::create(
1048 TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
1049 EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
1050 .addReg(X86::RIP)
1051 .addImm(1)
1052 .addReg(0)
1053 .addExpr(Expr)
1054 .addReg(0));
1055 } else {
1056 EmitAndCountInstruction(
1057 MCInstBuilder(X86::CALL64pcrel32)
1058 .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1059 MCSymbolRefExpr::VK_PLT, Ctx)));
1060 }
1061 } else {
1062 if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) {
1063 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1064 .addReg(X86::EAX)
1065 .addReg(0)
1066 .addImm(1)
1067 .addReg(X86::EBX)
1068 .addExpr(Sym)
1069 .addReg(0));
1070 } else {
1071 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1072 .addReg(X86::EAX)
1073 .addReg(X86::EBX)
1074 .addImm(1)
1075 .addReg(0)
1076 .addExpr(Sym)
1077 .addReg(0));
1078 }
1079
1080 const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
1081 if (UseGot) {
1082 const MCExpr *Expr =
1083 MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx);
1084 EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
1085 .addReg(X86::EBX)
1086 .addImm(1)
1087 .addReg(0)
1088 .addExpr(Expr)
1089 .addReg(0));
1090 } else {
1091 EmitAndCountInstruction(
1092 MCInstBuilder(X86::CALLpcrel32)
1093 .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1094 MCSymbolRefExpr::VK_PLT, Ctx)));
1095 }
1096 }
1097 }
1098
1099 /// Emit the largest nop instruction smaller than or equal to \p NumBytes
1100 /// bytes. Return the size of nop emitted.
emitNop(MCStreamer & OS,unsigned NumBytes,const X86Subtarget * Subtarget)1101 static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
1102 const X86Subtarget *Subtarget) {
1103 // Determine the longest nop which can be efficiently decoded for the given
1104 // target cpu. 15-bytes is the longest single NOP instruction, but some
1105 // platforms can't decode the longest forms efficiently.
1106 unsigned MaxNopLength = 1;
1107 if (Subtarget->is64Bit()) {
1108 // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
1109 // IndexReg/BaseReg below need to be updated.
1110 if (Subtarget->hasFeature(X86::TuningFast7ByteNOP))
1111 MaxNopLength = 7;
1112 else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP))
1113 MaxNopLength = 15;
1114 else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP))
1115 MaxNopLength = 11;
1116 else
1117 MaxNopLength = 10;
1118 } if (Subtarget->is32Bit())
1119 MaxNopLength = 2;
1120
1121 // Cap a single nop emission at the profitable value for the target
1122 NumBytes = std::min(NumBytes, MaxNopLength);
1123
1124 unsigned NopSize;
1125 unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
1126 IndexReg = Displacement = SegmentReg = 0;
1127 BaseReg = X86::RAX;
1128 ScaleVal = 1;
1129 switch (NumBytes) {
1130 case 0:
1131 llvm_unreachable("Zero nops?");
1132 break;
1133 case 1:
1134 NopSize = 1;
1135 Opc = X86::NOOP;
1136 break;
1137 case 2:
1138 NopSize = 2;
1139 Opc = X86::XCHG16ar;
1140 break;
1141 case 3:
1142 NopSize = 3;
1143 Opc = X86::NOOPL;
1144 break;
1145 case 4:
1146 NopSize = 4;
1147 Opc = X86::NOOPL;
1148 Displacement = 8;
1149 break;
1150 case 5:
1151 NopSize = 5;
1152 Opc = X86::NOOPL;
1153 Displacement = 8;
1154 IndexReg = X86::RAX;
1155 break;
1156 case 6:
1157 NopSize = 6;
1158 Opc = X86::NOOPW;
1159 Displacement = 8;
1160 IndexReg = X86::RAX;
1161 break;
1162 case 7:
1163 NopSize = 7;
1164 Opc = X86::NOOPL;
1165 Displacement = 512;
1166 break;
1167 case 8:
1168 NopSize = 8;
1169 Opc = X86::NOOPL;
1170 Displacement = 512;
1171 IndexReg = X86::RAX;
1172 break;
1173 case 9:
1174 NopSize = 9;
1175 Opc = X86::NOOPW;
1176 Displacement = 512;
1177 IndexReg = X86::RAX;
1178 break;
1179 default:
1180 NopSize = 10;
1181 Opc = X86::NOOPW;
1182 Displacement = 512;
1183 IndexReg = X86::RAX;
1184 SegmentReg = X86::CS;
1185 break;
1186 }
1187
1188 unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
1189 NopSize += NumPrefixes;
1190 for (unsigned i = 0; i != NumPrefixes; ++i)
1191 OS.emitBytes("\x66");
1192
1193 switch (Opc) {
1194 default: llvm_unreachable("Unexpected opcode");
1195 case X86::NOOP:
1196 OS.emitInstruction(MCInstBuilder(Opc), *Subtarget);
1197 break;
1198 case X86::XCHG16ar:
1199 OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX),
1200 *Subtarget);
1201 break;
1202 case X86::NOOPL:
1203 case X86::NOOPW:
1204 OS.emitInstruction(MCInstBuilder(Opc)
1205 .addReg(BaseReg)
1206 .addImm(ScaleVal)
1207 .addReg(IndexReg)
1208 .addImm(Displacement)
1209 .addReg(SegmentReg),
1210 *Subtarget);
1211 break;
1212 }
1213 assert(NopSize <= NumBytes && "We overemitted?");
1214 return NopSize;
1215 }
1216
1217 /// Emit the optimal amount of multi-byte nops on X86.
emitX86Nops(MCStreamer & OS,unsigned NumBytes,const X86Subtarget * Subtarget)1218 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
1219 const X86Subtarget *Subtarget) {
1220 unsigned NopsToEmit = NumBytes;
1221 (void)NopsToEmit;
1222 while (NumBytes) {
1223 NumBytes -= emitNop(OS, NumBytes, Subtarget);
1224 assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
1225 }
1226 }
1227
LowerSTATEPOINT(const MachineInstr & MI,X86MCInstLower & MCIL)1228 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
1229 X86MCInstLower &MCIL) {
1230 assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
1231
1232 NoAutoPaddingScope NoPadScope(*OutStreamer);
1233
1234 StatepointOpers SOpers(&MI);
1235 if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
1236 emitX86Nops(*OutStreamer, PatchBytes, Subtarget);
1237 } else {
1238 // Lower call target and choose correct opcode
1239 const MachineOperand &CallTarget = SOpers.getCallTarget();
1240 MCOperand CallTargetMCOp;
1241 unsigned CallOpcode;
1242 switch (CallTarget.getType()) {
1243 case MachineOperand::MO_GlobalAddress:
1244 case MachineOperand::MO_ExternalSymbol:
1245 CallTargetMCOp = MCIL.LowerSymbolOperand(
1246 CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
1247 CallOpcode = X86::CALL64pcrel32;
1248 // Currently, we only support relative addressing with statepoints.
1249 // Otherwise, we'll need a scratch register to hold the target
1250 // address. You'll fail asserts during load & relocation if this
1251 // symbol is to far away. (TODO: support non-relative addressing)
1252 break;
1253 case MachineOperand::MO_Immediate:
1254 CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
1255 CallOpcode = X86::CALL64pcrel32;
1256 // Currently, we only support relative addressing with statepoints.
1257 // Otherwise, we'll need a scratch register to hold the target
1258 // immediate. You'll fail asserts during load & relocation if this
1259 // address is to far away. (TODO: support non-relative addressing)
1260 break;
1261 case MachineOperand::MO_Register:
1262 // FIXME: Add retpoline support and remove this.
1263 if (Subtarget->useIndirectThunkCalls())
1264 report_fatal_error("Lowering register statepoints with thunks not "
1265 "yet implemented.");
1266 CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
1267 CallOpcode = X86::CALL64r;
1268 break;
1269 default:
1270 llvm_unreachable("Unsupported operand type in statepoint call target");
1271 break;
1272 }
1273
1274 // Emit call
1275 MCInst CallInst;
1276 CallInst.setOpcode(CallOpcode);
1277 CallInst.addOperand(CallTargetMCOp);
1278 OutStreamer->emitInstruction(CallInst, getSubtargetInfo());
1279 }
1280
1281 // Record our statepoint node in the same section used by STACKMAP
1282 // and PATCHPOINT
1283 auto &Ctx = OutStreamer->getContext();
1284 MCSymbol *MILabel = Ctx.createTempSymbol();
1285 OutStreamer->emitLabel(MILabel);
1286 SM.recordStatepoint(*MILabel, MI);
1287 }
1288
LowerFAULTING_OP(const MachineInstr & FaultingMI,X86MCInstLower & MCIL)1289 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
1290 X86MCInstLower &MCIL) {
1291 // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
1292 // <opcode>, <operands>
1293
1294 NoAutoPaddingScope NoPadScope(*OutStreamer);
1295
1296 Register DefRegister = FaultingMI.getOperand(0).getReg();
1297 FaultMaps::FaultKind FK =
1298 static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
1299 MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
1300 unsigned Opcode = FaultingMI.getOperand(3).getImm();
1301 unsigned OperandsBeginIdx = 4;
1302
1303 auto &Ctx = OutStreamer->getContext();
1304 MCSymbol *FaultingLabel = Ctx.createTempSymbol();
1305 OutStreamer->emitLabel(FaultingLabel);
1306
1307 assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
1308 FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
1309
1310 MCInst MI;
1311 MI.setOpcode(Opcode);
1312
1313 if (DefRegister != X86::NoRegister)
1314 MI.addOperand(MCOperand::createReg(DefRegister));
1315
1316 for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
1317 E = FaultingMI.operands_end();
1318 I != E; ++I)
1319 if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
1320 MI.addOperand(*MaybeOperand);
1321
1322 OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
1323 OutStreamer->emitInstruction(MI, getSubtargetInfo());
1324 }
1325
LowerFENTRY_CALL(const MachineInstr & MI,X86MCInstLower & MCIL)1326 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
1327 X86MCInstLower &MCIL) {
1328 bool Is64Bits = Subtarget->is64Bit();
1329 MCContext &Ctx = OutStreamer->getContext();
1330 MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
1331 const MCSymbolRefExpr *Op =
1332 MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
1333
1334 EmitAndCountInstruction(
1335 MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
1336 .addExpr(Op));
1337 }
1338
LowerASAN_CHECK_MEMACCESS(const MachineInstr & MI)1339 void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
1340 // FIXME: Make this work on non-ELF.
1341 if (!TM.getTargetTriple().isOSBinFormatELF()) {
1342 report_fatal_error("llvm.asan.check.memaccess only supported on ELF");
1343 return;
1344 }
1345
1346 const auto &Reg = MI.getOperand(0).getReg();
1347 ASanAccessInfo AccessInfo(MI.getOperand(1).getImm());
1348
1349 uint64_t ShadowBase;
1350 int MappingScale;
1351 bool OrShadowOffset;
1352 getAddressSanitizerParams(Triple(TM.getTargetTriple()), 64,
1353 AccessInfo.CompileKernel, &ShadowBase,
1354 &MappingScale, &OrShadowOffset);
1355
1356 StringRef Name = AccessInfo.IsWrite ? "store" : "load";
1357 StringRef Op = OrShadowOffset ? "or" : "add";
1358 std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" +
1359 Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" +
1360 TM.getMCRegisterInfo()->getName(Reg.asMCReg()))
1361 .str();
1362 if (OrShadowOffset)
1363 report_fatal_error(
1364 "OrShadowOffset is not supported with optimized callbacks");
1365
1366 EmitAndCountInstruction(
1367 MCInstBuilder(X86::CALL64pcrel32)
1368 .addExpr(MCSymbolRefExpr::create(
1369 OutContext.getOrCreateSymbol(SymName), OutContext)));
1370 }
1371
LowerPATCHABLE_OP(const MachineInstr & MI,X86MCInstLower & MCIL)1372 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
1373 X86MCInstLower &MCIL) {
1374 // PATCHABLE_OP minsize, opcode, operands
1375
1376 NoAutoPaddingScope NoPadScope(*OutStreamer);
1377
1378 unsigned MinSize = MI.getOperand(0).getImm();
1379 unsigned Opcode = MI.getOperand(1).getImm();
1380
1381 MCInst MCI;
1382 MCI.setOpcode(Opcode);
1383 for (auto &MO : drop_begin(MI.operands(), 2))
1384 if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1385 MCI.addOperand(*MaybeOperand);
1386
1387 SmallString<256> Code;
1388 SmallVector<MCFixup, 4> Fixups;
1389 raw_svector_ostream VecOS(Code);
1390 CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
1391
1392 if (Code.size() < MinSize) {
1393 if (MinSize == 2 && Subtarget->is32Bit() &&
1394 Subtarget->isTargetWindowsMSVC() &&
1395 (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
1396 // For compatibilty reasons, when targetting MSVC, is is important to
1397 // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
1398 // rely specifically on this pattern to be able to patch a function.
1399 // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
1400 OutStreamer->emitInstruction(
1401 MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
1402 *Subtarget);
1403 } else if (MinSize == 2 && Opcode == X86::PUSH64r) {
1404 // This is an optimization that lets us get away without emitting a nop in
1405 // many cases.
1406 //
1407 // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
1408 // bytes too, so the check on MinSize is important.
1409 MCI.setOpcode(X86::PUSH64rmr);
1410 } else {
1411 unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget);
1412 assert(NopSize == MinSize && "Could not implement MinSize!");
1413 (void)NopSize;
1414 }
1415 }
1416
1417 OutStreamer->emitInstruction(MCI, getSubtargetInfo());
1418 }
1419
1420 // Lower a stackmap of the form:
1421 // <id>, <shadowBytes>, ...
LowerSTACKMAP(const MachineInstr & MI)1422 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1423 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1424
1425 auto &Ctx = OutStreamer->getContext();
1426 MCSymbol *MILabel = Ctx.createTempSymbol();
1427 OutStreamer->emitLabel(MILabel);
1428
1429 SM.recordStackMap(*MILabel, MI);
1430 unsigned NumShadowBytes = MI.getOperand(1).getImm();
1431 SMShadowTracker.reset(NumShadowBytes);
1432 }
1433
1434 // Lower a patchpoint of the form:
1435 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
LowerPATCHPOINT(const MachineInstr & MI,X86MCInstLower & MCIL)1436 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1437 X86MCInstLower &MCIL) {
1438 assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1439
1440 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1441
1442 NoAutoPaddingScope NoPadScope(*OutStreamer);
1443
1444 auto &Ctx = OutStreamer->getContext();
1445 MCSymbol *MILabel = Ctx.createTempSymbol();
1446 OutStreamer->emitLabel(MILabel);
1447 SM.recordPatchPoint(*MILabel, MI);
1448
1449 PatchPointOpers opers(&MI);
1450 unsigned ScratchIdx = opers.getNextScratchIdx();
1451 unsigned EncodedBytes = 0;
1452 const MachineOperand &CalleeMO = opers.getCallTarget();
1453
1454 // Check for null target. If target is non-null (i.e. is non-zero or is
1455 // symbolic) then emit a call.
1456 if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1457 MCOperand CalleeMCOp;
1458 switch (CalleeMO.getType()) {
1459 default:
1460 /// FIXME: Add a verifier check for bad callee types.
1461 llvm_unreachable("Unrecognized callee operand type.");
1462 case MachineOperand::MO_Immediate:
1463 if (CalleeMO.getImm())
1464 CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
1465 break;
1466 case MachineOperand::MO_ExternalSymbol:
1467 case MachineOperand::MO_GlobalAddress:
1468 CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
1469 MCIL.GetSymbolFromOperand(CalleeMO));
1470 break;
1471 }
1472
1473 // Emit MOV to materialize the target address and the CALL to target.
1474 // This is encoded with 12-13 bytes, depending on which register is used.
1475 Register ScratchReg = MI.getOperand(ScratchIdx).getReg();
1476 if (X86II::isX86_64ExtendedReg(ScratchReg))
1477 EncodedBytes = 13;
1478 else
1479 EncodedBytes = 12;
1480
1481 EmitAndCountInstruction(
1482 MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1483 // FIXME: Add retpoline support and remove this.
1484 if (Subtarget->useIndirectThunkCalls())
1485 report_fatal_error(
1486 "Lowering patchpoint with thunks not yet implemented.");
1487 EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1488 }
1489
1490 // Emit padding.
1491 unsigned NumBytes = opers.getNumPatchBytes();
1492 assert(NumBytes >= EncodedBytes &&
1493 "Patchpoint can't request size less than the length of a call.");
1494
1495 emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget);
1496 }
1497
LowerPATCHABLE_EVENT_CALL(const MachineInstr & MI,X86MCInstLower & MCIL)1498 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1499 X86MCInstLower &MCIL) {
1500 assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1501
1502 NoAutoPaddingScope NoPadScope(*OutStreamer);
1503
1504 // We want to emit the following pattern, which follows the x86 calling
1505 // convention to prepare for the trampoline call to be patched in.
1506 //
1507 // .p2align 1, ...
1508 // .Lxray_event_sled_N:
1509 // jmp +N // jump across the instrumentation sled
1510 // ... // set up arguments in register
1511 // callq __xray_CustomEvent@plt // force dependency to symbol
1512 // ...
1513 // <jump here>
1514 //
1515 // After patching, it would look something like:
1516 //
1517 // nopw (2-byte nop)
1518 // ...
1519 // callq __xrayCustomEvent // already lowered
1520 // ...
1521 //
1522 // ---
1523 // First we emit the label and the jump.
1524 auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
1525 OutStreamer->AddComment("# XRay Custom Event Log");
1526 OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1527 OutStreamer->emitLabel(CurSled);
1528
1529 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1530 // an operand (computed as an offset from the jmp instruction).
1531 // FIXME: Find another less hacky way do force the relative jump.
1532 OutStreamer->emitBinaryData("\xeb\x0f");
1533
1534 // The default C calling convention will place two arguments into %rcx and
1535 // %rdx -- so we only work with those.
1536 const Register DestRegs[] = {X86::RDI, X86::RSI};
1537 bool UsedMask[] = {false, false};
1538 // Filled out in loop.
1539 Register SrcRegs[] = {0, 0};
1540
1541 // Then we put the operands in the %rdi and %rsi registers. We spill the
1542 // values in the register before we clobber them, and mark them as used in
1543 // UsedMask. In case the arguments are already in the correct register, we use
1544 // emit nops appropriately sized to keep the sled the same size in every
1545 // situation.
1546 for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1547 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1548 assert(Op->isReg() && "Only support arguments in registers");
1549 SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1550 if (SrcRegs[I] != DestRegs[I]) {
1551 UsedMask[I] = true;
1552 EmitAndCountInstruction(
1553 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1554 } else {
1555 emitX86Nops(*OutStreamer, 4, Subtarget);
1556 }
1557 }
1558
1559 // Now that the register values are stashed, mov arguments into place.
1560 // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1561 // earlier DestReg. We will have already overwritten over the register before
1562 // we can copy from it.
1563 for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1564 if (SrcRegs[I] != DestRegs[I])
1565 EmitAndCountInstruction(
1566 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1567
1568 // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1569 // name of the trampoline to be implemented by the XRay runtime.
1570 auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
1571 MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1572 if (isPositionIndependent())
1573 TOp.setTargetFlags(X86II::MO_PLT);
1574
1575 // Emit the call instruction.
1576 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1577 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1578
1579 // Restore caller-saved and used registers.
1580 for (unsigned I = sizeof UsedMask; I-- > 0;)
1581 if (UsedMask[I])
1582 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1583 else
1584 emitX86Nops(*OutStreamer, 1, Subtarget);
1585
1586 OutStreamer->AddComment("xray custom event end.");
1587
1588 // Record the sled version. Version 0 of this sled was spelled differently, so
1589 // we let the runtime handle the different offsets we're using. Version 2
1590 // changed the absolute address to a PC-relative address.
1591 recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);
1592 }
1593
LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr & MI,X86MCInstLower & MCIL)1594 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1595 X86MCInstLower &MCIL) {
1596 assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1597
1598 NoAutoPaddingScope NoPadScope(*OutStreamer);
1599
1600 // We want to emit the following pattern, which follows the x86 calling
1601 // convention to prepare for the trampoline call to be patched in.
1602 //
1603 // .p2align 1, ...
1604 // .Lxray_event_sled_N:
1605 // jmp +N // jump across the instrumentation sled
1606 // ... // set up arguments in register
1607 // callq __xray_TypedEvent@plt // force dependency to symbol
1608 // ...
1609 // <jump here>
1610 //
1611 // After patching, it would look something like:
1612 //
1613 // nopw (2-byte nop)
1614 // ...
1615 // callq __xrayTypedEvent // already lowered
1616 // ...
1617 //
1618 // ---
1619 // First we emit the label and the jump.
1620 auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
1621 OutStreamer->AddComment("# XRay Typed Event Log");
1622 OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1623 OutStreamer->emitLabel(CurSled);
1624
1625 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1626 // an operand (computed as an offset from the jmp instruction).
1627 // FIXME: Find another less hacky way do force the relative jump.
1628 OutStreamer->emitBinaryData("\xeb\x14");
1629
1630 // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1631 // so we'll work with those. Or we may be called via SystemV, in which case
1632 // we don't have to do any translation.
1633 const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1634 bool UsedMask[] = {false, false, false};
1635
1636 // Will fill out src regs in the loop.
1637 Register SrcRegs[] = {0, 0, 0};
1638
1639 // Then we put the operands in the SystemV registers. We spill the values in
1640 // the registers before we clobber them, and mark them as used in UsedMask.
1641 // In case the arguments are already in the correct register, we emit nops
1642 // appropriately sized to keep the sled the same size in every situation.
1643 for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1644 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1645 // TODO: Is register only support adequate?
1646 assert(Op->isReg() && "Only supports arguments in registers");
1647 SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1648 if (SrcRegs[I] != DestRegs[I]) {
1649 UsedMask[I] = true;
1650 EmitAndCountInstruction(
1651 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1652 } else {
1653 emitX86Nops(*OutStreamer, 4, Subtarget);
1654 }
1655 }
1656
1657 // In the above loop we only stash all of the destination registers or emit
1658 // nops if the arguments are already in the right place. Doing the actually
1659 // moving is postponed until after all the registers are stashed so nothing
1660 // is clobbers. We've already added nops to account for the size of mov and
1661 // push if the register is in the right place, so we only have to worry about
1662 // emitting movs.
1663 // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1664 // earlier DestReg. We will have already overwritten over the register before
1665 // we can copy from it.
1666 for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1667 if (UsedMask[I])
1668 EmitAndCountInstruction(
1669 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1670
1671 // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1672 // name of the trampoline to be implemented by the XRay runtime.
1673 auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
1674 MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1675 if (isPositionIndependent())
1676 TOp.setTargetFlags(X86II::MO_PLT);
1677
1678 // Emit the call instruction.
1679 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1680 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1681
1682 // Restore caller-saved and used registers.
1683 for (unsigned I = sizeof UsedMask; I-- > 0;)
1684 if (UsedMask[I])
1685 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1686 else
1687 emitX86Nops(*OutStreamer, 1, Subtarget);
1688
1689 OutStreamer->AddComment("xray typed event end.");
1690
1691 // Record the sled version.
1692 recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);
1693 }
1694
LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr & MI,X86MCInstLower & MCIL)1695 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1696 X86MCInstLower &MCIL) {
1697
1698 NoAutoPaddingScope NoPadScope(*OutStreamer);
1699
1700 const Function &F = MF->getFunction();
1701 if (F.hasFnAttribute("patchable-function-entry")) {
1702 unsigned Num;
1703 if (F.getFnAttribute("patchable-function-entry")
1704 .getValueAsString()
1705 .getAsInteger(10, Num))
1706 return;
1707 emitX86Nops(*OutStreamer, Num, Subtarget);
1708 return;
1709 }
1710 // We want to emit the following pattern:
1711 //
1712 // .p2align 1, ...
1713 // .Lxray_sled_N:
1714 // jmp .tmpN
1715 // # 9 bytes worth of noops
1716 //
1717 // We need the 9 bytes because at runtime, we'd be patching over the full 11
1718 // bytes with the following pattern:
1719 //
1720 // mov %r10, <function id, 32-bit> // 6 bytes
1721 // call <relative offset, 32-bits> // 5 bytes
1722 //
1723 auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1724 OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1725 OutStreamer->emitLabel(CurSled);
1726
1727 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1728 // an operand (computed as an offset from the jmp instruction).
1729 // FIXME: Find another less hacky way do force the relative jump.
1730 OutStreamer->emitBytes("\xeb\x09");
1731 emitX86Nops(*OutStreamer, 9, Subtarget);
1732 recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2);
1733 }
1734
LowerPATCHABLE_RET(const MachineInstr & MI,X86MCInstLower & MCIL)1735 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1736 X86MCInstLower &MCIL) {
1737 NoAutoPaddingScope NoPadScope(*OutStreamer);
1738
1739 // Since PATCHABLE_RET takes the opcode of the return statement as an
1740 // argument, we use that to emit the correct form of the RET that we want.
1741 // i.e. when we see this:
1742 //
1743 // PATCHABLE_RET X86::RET ...
1744 //
1745 // We should emit the RET followed by sleds.
1746 //
1747 // .p2align 1, ...
1748 // .Lxray_sled_N:
1749 // ret # or equivalent instruction
1750 // # 10 bytes worth of noops
1751 //
1752 // This just makes sure that the alignment for the next instruction is 2.
1753 auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1754 OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1755 OutStreamer->emitLabel(CurSled);
1756 unsigned OpCode = MI.getOperand(0).getImm();
1757 MCInst Ret;
1758 Ret.setOpcode(OpCode);
1759 for (auto &MO : drop_begin(MI.operands()))
1760 if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1761 Ret.addOperand(*MaybeOperand);
1762 OutStreamer->emitInstruction(Ret, getSubtargetInfo());
1763 emitX86Nops(*OutStreamer, 10, Subtarget);
1764 recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2);
1765 }
1766
LowerPATCHABLE_TAIL_CALL(const MachineInstr & MI,X86MCInstLower & MCIL)1767 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1768 X86MCInstLower &MCIL) {
1769 NoAutoPaddingScope NoPadScope(*OutStreamer);
1770
1771 // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1772 // instruction so we lower that particular instruction and its operands.
1773 // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1774 // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1775 // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1776 // tail call much like how we have it in PATCHABLE_RET.
1777 auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1778 OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1779 OutStreamer->emitLabel(CurSled);
1780 auto Target = OutContext.createTempSymbol();
1781
1782 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1783 // an operand (computed as an offset from the jmp instruction).
1784 // FIXME: Find another less hacky way do force the relative jump.
1785 OutStreamer->emitBytes("\xeb\x09");
1786 emitX86Nops(*OutStreamer, 9, Subtarget);
1787 OutStreamer->emitLabel(Target);
1788 recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);
1789
1790 unsigned OpCode = MI.getOperand(0).getImm();
1791 OpCode = convertTailJumpOpcode(OpCode);
1792 MCInst TC;
1793 TC.setOpcode(OpCode);
1794
1795 // Before emitting the instruction, add a comment to indicate that this is
1796 // indeed a tail call.
1797 OutStreamer->AddComment("TAILCALL");
1798 for (auto &MO : drop_begin(MI.operands()))
1799 if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1800 TC.addOperand(*MaybeOperand);
1801 OutStreamer->emitInstruction(TC, getSubtargetInfo());
1802 }
1803
1804 // Returns instruction preceding MBBI in MachineFunction.
1805 // If MBBI is the first instruction of the first basic block, returns null.
1806 static MachineBasicBlock::const_iterator
PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI)1807 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
1808 const MachineBasicBlock *MBB = MBBI->getParent();
1809 while (MBBI == MBB->begin()) {
1810 if (MBB == &MBB->getParent()->front())
1811 return MachineBasicBlock::const_iterator();
1812 MBB = MBB->getPrevNode();
1813 MBBI = MBB->end();
1814 }
1815 --MBBI;
1816 return MBBI;
1817 }
1818
getConstantFromPool(const MachineInstr & MI,const MachineOperand & Op)1819 static const Constant *getConstantFromPool(const MachineInstr &MI,
1820 const MachineOperand &Op) {
1821 if (!Op.isCPI() || Op.getOffset() != 0)
1822 return nullptr;
1823
1824 ArrayRef<MachineConstantPoolEntry> Constants =
1825 MI.getParent()->getParent()->getConstantPool()->getConstants();
1826 const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
1827
1828 // Bail if this is a machine constant pool entry, we won't be able to dig out
1829 // anything useful.
1830 if (ConstantEntry.isMachineConstantPoolEntry())
1831 return nullptr;
1832
1833 return ConstantEntry.Val.ConstVal;
1834 }
1835
getShuffleComment(const MachineInstr * MI,unsigned SrcOp1Idx,unsigned SrcOp2Idx,ArrayRef<int> Mask)1836 static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
1837 unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1838 std::string Comment;
1839
1840 // Compute the name for a register. This is really goofy because we have
1841 // multiple instruction printers that could (in theory) use different
1842 // names. Fortunately most people use the ATT style (outside of Windows)
1843 // and they actually agree on register naming here. Ultimately, this is
1844 // a comment, and so its OK if it isn't perfect.
1845 auto GetRegisterName = [](unsigned RegNum) -> StringRef {
1846 return X86ATTInstPrinter::getRegisterName(RegNum);
1847 };
1848
1849 const MachineOperand &DstOp = MI->getOperand(0);
1850 const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
1851 const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
1852
1853 StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
1854 StringRef Src1Name =
1855 SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
1856 StringRef Src2Name =
1857 SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
1858
1859 // One source operand, fix the mask to print all elements in one span.
1860 SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end());
1861 if (Src1Name == Src2Name)
1862 for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
1863 if (ShuffleMask[i] >= e)
1864 ShuffleMask[i] -= e;
1865
1866 raw_string_ostream CS(Comment);
1867 CS << DstName;
1868
1869 // Handle AVX512 MASK/MASXZ write mask comments.
1870 // MASK: zmmX {%kY}
1871 // MASKZ: zmmX {%kY} {z}
1872 if (SrcOp1Idx > 1) {
1873 assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
1874
1875 const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
1876 if (WriteMaskOp.isReg()) {
1877 CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
1878
1879 if (SrcOp1Idx == 2) {
1880 CS << " {z}";
1881 }
1882 }
1883 }
1884
1885 CS << " = ";
1886
1887 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
1888 if (i != 0)
1889 CS << ",";
1890 if (ShuffleMask[i] == SM_SentinelZero) {
1891 CS << "zero";
1892 continue;
1893 }
1894
1895 // Otherwise, it must come from src1 or src2. Print the span of elements
1896 // that comes from this src.
1897 bool isSrc1 = ShuffleMask[i] < (int)e;
1898 CS << (isSrc1 ? Src1Name : Src2Name) << '[';
1899
1900 bool IsFirst = true;
1901 while (i != e && ShuffleMask[i] != SM_SentinelZero &&
1902 (ShuffleMask[i] < (int)e) == isSrc1) {
1903 if (!IsFirst)
1904 CS << ',';
1905 else
1906 IsFirst = false;
1907 if (ShuffleMask[i] == SM_SentinelUndef)
1908 CS << "u";
1909 else
1910 CS << ShuffleMask[i] % (int)e;
1911 ++i;
1912 }
1913 CS << ']';
1914 --i; // For loop increments element #.
1915 }
1916 CS.flush();
1917
1918 return Comment;
1919 }
1920
printConstant(const APInt & Val,raw_ostream & CS)1921 static void printConstant(const APInt &Val, raw_ostream &CS) {
1922 if (Val.getBitWidth() <= 64) {
1923 CS << Val.getZExtValue();
1924 } else {
1925 // print multi-word constant as (w0,w1)
1926 CS << "(";
1927 for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
1928 if (i > 0)
1929 CS << ",";
1930 CS << Val.getRawData()[i];
1931 }
1932 CS << ")";
1933 }
1934 }
1935
printConstant(const APFloat & Flt,raw_ostream & CS)1936 static void printConstant(const APFloat &Flt, raw_ostream &CS) {
1937 SmallString<32> Str;
1938 // Force scientific notation to distinquish from integers.
1939 Flt.toString(Str, 0, 0);
1940 CS << Str;
1941 }
1942
printConstant(const Constant * COp,raw_ostream & CS)1943 static void printConstant(const Constant *COp, raw_ostream &CS) {
1944 if (isa<UndefValue>(COp)) {
1945 CS << "u";
1946 } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
1947 printConstant(CI->getValue(), CS);
1948 } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
1949 printConstant(CF->getValueAPF(), CS);
1950 } else {
1951 CS << "?";
1952 }
1953 }
1954
EmitSEHInstruction(const MachineInstr * MI)1955 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
1956 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1957 assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only");
1958
1959 // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1960 if (EmitFPOData) {
1961 X86TargetStreamer *XTS =
1962 static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
1963 switch (MI->getOpcode()) {
1964 case X86::SEH_PushReg:
1965 XTS->emitFPOPushReg(MI->getOperand(0).getImm());
1966 break;
1967 case X86::SEH_StackAlloc:
1968 XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
1969 break;
1970 case X86::SEH_StackAlign:
1971 XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
1972 break;
1973 case X86::SEH_SetFrame:
1974 assert(MI->getOperand(1).getImm() == 0 &&
1975 ".cv_fpo_setframe takes no offset");
1976 XTS->emitFPOSetFrame(MI->getOperand(0).getImm());
1977 break;
1978 case X86::SEH_EndPrologue:
1979 XTS->emitFPOEndPrologue();
1980 break;
1981 case X86::SEH_SaveReg:
1982 case X86::SEH_SaveXMM:
1983 case X86::SEH_PushFrame:
1984 llvm_unreachable("SEH_ directive incompatible with FPO");
1985 break;
1986 default:
1987 llvm_unreachable("expected SEH_ instruction");
1988 }
1989 return;
1990 }
1991
1992 // Otherwise, use the .seh_ directives for all other Windows platforms.
1993 switch (MI->getOpcode()) {
1994 case X86::SEH_PushReg:
1995 OutStreamer->emitWinCFIPushReg(MI->getOperand(0).getImm());
1996 break;
1997
1998 case X86::SEH_SaveReg:
1999 OutStreamer->emitWinCFISaveReg(MI->getOperand(0).getImm(),
2000 MI->getOperand(1).getImm());
2001 break;
2002
2003 case X86::SEH_SaveXMM:
2004 OutStreamer->emitWinCFISaveXMM(MI->getOperand(0).getImm(),
2005 MI->getOperand(1).getImm());
2006 break;
2007
2008 case X86::SEH_StackAlloc:
2009 OutStreamer->emitWinCFIAllocStack(MI->getOperand(0).getImm());
2010 break;
2011
2012 case X86::SEH_SetFrame:
2013 OutStreamer->emitWinCFISetFrame(MI->getOperand(0).getImm(),
2014 MI->getOperand(1).getImm());
2015 break;
2016
2017 case X86::SEH_PushFrame:
2018 OutStreamer->emitWinCFIPushFrame(MI->getOperand(0).getImm());
2019 break;
2020
2021 case X86::SEH_EndPrologue:
2022 OutStreamer->emitWinCFIEndProlog();
2023 break;
2024
2025 default:
2026 llvm_unreachable("expected SEH_ instruction");
2027 }
2028 }
2029
getRegisterWidth(const MCOperandInfo & Info)2030 static unsigned getRegisterWidth(const MCOperandInfo &Info) {
2031 if (Info.RegClass == X86::VR128RegClassID ||
2032 Info.RegClass == X86::VR128XRegClassID)
2033 return 128;
2034 if (Info.RegClass == X86::VR256RegClassID ||
2035 Info.RegClass == X86::VR256XRegClassID)
2036 return 256;
2037 if (Info.RegClass == X86::VR512RegClassID)
2038 return 512;
2039 llvm_unreachable("Unknown register class!");
2040 }
2041
addConstantComments(const MachineInstr * MI,MCStreamer & OutStreamer)2042 static void addConstantComments(const MachineInstr *MI,
2043 MCStreamer &OutStreamer) {
2044 switch (MI->getOpcode()) {
2045 // Lower PSHUFB and VPERMILP normally but add a comment if we can find
2046 // a constant shuffle mask. We won't be able to do this at the MC layer
2047 // because the mask isn't an immediate.
2048 case X86::PSHUFBrm:
2049 case X86::VPSHUFBrm:
2050 case X86::VPSHUFBYrm:
2051 case X86::VPSHUFBZ128rm:
2052 case X86::VPSHUFBZ128rmk:
2053 case X86::VPSHUFBZ128rmkz:
2054 case X86::VPSHUFBZ256rm:
2055 case X86::VPSHUFBZ256rmk:
2056 case X86::VPSHUFBZ256rmkz:
2057 case X86::VPSHUFBZrm:
2058 case X86::VPSHUFBZrmk:
2059 case X86::VPSHUFBZrmkz: {
2060 unsigned SrcIdx = 1;
2061 if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2062 // Skip mask operand.
2063 ++SrcIdx;
2064 if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2065 // Skip passthru operand.
2066 ++SrcIdx;
2067 }
2068 }
2069 unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2070
2071 assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2072 "Unexpected number of operands!");
2073
2074 const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2075 if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2076 unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2077 SmallVector<int, 64> Mask;
2078 DecodePSHUFBMask(C, Width, Mask);
2079 if (!Mask.empty())
2080 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2081 }
2082 break;
2083 }
2084
2085 case X86::VPERMILPSrm:
2086 case X86::VPERMILPSYrm:
2087 case X86::VPERMILPSZ128rm:
2088 case X86::VPERMILPSZ128rmk:
2089 case X86::VPERMILPSZ128rmkz:
2090 case X86::VPERMILPSZ256rm:
2091 case X86::VPERMILPSZ256rmk:
2092 case X86::VPERMILPSZ256rmkz:
2093 case X86::VPERMILPSZrm:
2094 case X86::VPERMILPSZrmk:
2095 case X86::VPERMILPSZrmkz:
2096 case X86::VPERMILPDrm:
2097 case X86::VPERMILPDYrm:
2098 case X86::VPERMILPDZ128rm:
2099 case X86::VPERMILPDZ128rmk:
2100 case X86::VPERMILPDZ128rmkz:
2101 case X86::VPERMILPDZ256rm:
2102 case X86::VPERMILPDZ256rmk:
2103 case X86::VPERMILPDZ256rmkz:
2104 case X86::VPERMILPDZrm:
2105 case X86::VPERMILPDZrmk:
2106 case X86::VPERMILPDZrmkz: {
2107 unsigned ElSize;
2108 switch (MI->getOpcode()) {
2109 default: llvm_unreachable("Invalid opcode");
2110 case X86::VPERMILPSrm:
2111 case X86::VPERMILPSYrm:
2112 case X86::VPERMILPSZ128rm:
2113 case X86::VPERMILPSZ256rm:
2114 case X86::VPERMILPSZrm:
2115 case X86::VPERMILPSZ128rmkz:
2116 case X86::VPERMILPSZ256rmkz:
2117 case X86::VPERMILPSZrmkz:
2118 case X86::VPERMILPSZ128rmk:
2119 case X86::VPERMILPSZ256rmk:
2120 case X86::VPERMILPSZrmk:
2121 ElSize = 32;
2122 break;
2123 case X86::VPERMILPDrm:
2124 case X86::VPERMILPDYrm:
2125 case X86::VPERMILPDZ128rm:
2126 case X86::VPERMILPDZ256rm:
2127 case X86::VPERMILPDZrm:
2128 case X86::VPERMILPDZ128rmkz:
2129 case X86::VPERMILPDZ256rmkz:
2130 case X86::VPERMILPDZrmkz:
2131 case X86::VPERMILPDZ128rmk:
2132 case X86::VPERMILPDZ256rmk:
2133 case X86::VPERMILPDZrmk:
2134 ElSize = 64;
2135 break;
2136 }
2137
2138 unsigned SrcIdx = 1;
2139 if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2140 // Skip mask operand.
2141 ++SrcIdx;
2142 if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2143 // Skip passthru operand.
2144 ++SrcIdx;
2145 }
2146 }
2147 unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2148
2149 assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2150 "Unexpected number of operands!");
2151
2152 const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2153 if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2154 unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2155 SmallVector<int, 16> Mask;
2156 DecodeVPERMILPMask(C, ElSize, Width, Mask);
2157 if (!Mask.empty())
2158 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2159 }
2160 break;
2161 }
2162
2163 case X86::VPERMIL2PDrm:
2164 case X86::VPERMIL2PSrm:
2165 case X86::VPERMIL2PDYrm:
2166 case X86::VPERMIL2PSYrm: {
2167 assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) &&
2168 "Unexpected number of operands!");
2169
2170 const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
2171 if (!CtrlOp.isImm())
2172 break;
2173
2174 unsigned ElSize;
2175 switch (MI->getOpcode()) {
2176 default: llvm_unreachable("Invalid opcode");
2177 case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
2178 case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
2179 }
2180
2181 const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2182 if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2183 unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2184 SmallVector<int, 16> Mask;
2185 DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
2186 if (!Mask.empty())
2187 OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2188 }
2189 break;
2190 }
2191
2192 case X86::VPPERMrrm: {
2193 assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands) &&
2194 "Unexpected number of operands!");
2195
2196 const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2197 if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2198 unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2199 SmallVector<int, 16> Mask;
2200 DecodeVPPERMMask(C, Width, Mask);
2201 if (!Mask.empty())
2202 OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2203 }
2204 break;
2205 }
2206
2207 case X86::MMX_MOVQ64rm: {
2208 assert(MI->getNumOperands() == (1 + X86::AddrNumOperands) &&
2209 "Unexpected number of operands!");
2210 if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2211 std::string Comment;
2212 raw_string_ostream CS(Comment);
2213 const MachineOperand &DstOp = MI->getOperand(0);
2214 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2215 if (auto *CF = dyn_cast<ConstantFP>(C)) {
2216 CS << "0x" << toString(CF->getValueAPF().bitcastToAPInt(), 16, false);
2217 OutStreamer.AddComment(CS.str());
2218 }
2219 }
2220 break;
2221 }
2222
2223 #define MOV_CASE(Prefix, Suffix) \
2224 case X86::Prefix##MOVAPD##Suffix##rm: \
2225 case X86::Prefix##MOVAPS##Suffix##rm: \
2226 case X86::Prefix##MOVUPD##Suffix##rm: \
2227 case X86::Prefix##MOVUPS##Suffix##rm: \
2228 case X86::Prefix##MOVDQA##Suffix##rm: \
2229 case X86::Prefix##MOVDQU##Suffix##rm:
2230
2231 #define MOV_AVX512_CASE(Suffix) \
2232 case X86::VMOVDQA64##Suffix##rm: \
2233 case X86::VMOVDQA32##Suffix##rm: \
2234 case X86::VMOVDQU64##Suffix##rm: \
2235 case X86::VMOVDQU32##Suffix##rm: \
2236 case X86::VMOVDQU16##Suffix##rm: \
2237 case X86::VMOVDQU8##Suffix##rm: \
2238 case X86::VMOVAPS##Suffix##rm: \
2239 case X86::VMOVAPD##Suffix##rm: \
2240 case X86::VMOVUPS##Suffix##rm: \
2241 case X86::VMOVUPD##Suffix##rm:
2242
2243 #define CASE_ALL_MOV_RM() \
2244 MOV_CASE(, ) /* SSE */ \
2245 MOV_CASE(V, ) /* AVX-128 */ \
2246 MOV_CASE(V, Y) /* AVX-256 */ \
2247 MOV_AVX512_CASE(Z) \
2248 MOV_AVX512_CASE(Z256) \
2249 MOV_AVX512_CASE(Z128)
2250
2251 // For loads from a constant pool to a vector register, print the constant
2252 // loaded.
2253 CASE_ALL_MOV_RM()
2254 case X86::VBROADCASTF128:
2255 case X86::VBROADCASTI128:
2256 case X86::VBROADCASTF32X4Z256rm:
2257 case X86::VBROADCASTF32X4rm:
2258 case X86::VBROADCASTF32X8rm:
2259 case X86::VBROADCASTF64X2Z128rm:
2260 case X86::VBROADCASTF64X2rm:
2261 case X86::VBROADCASTF64X4rm:
2262 case X86::VBROADCASTI32X4Z256rm:
2263 case X86::VBROADCASTI32X4rm:
2264 case X86::VBROADCASTI32X8rm:
2265 case X86::VBROADCASTI64X2Z128rm:
2266 case X86::VBROADCASTI64X2rm:
2267 case X86::VBROADCASTI64X4rm:
2268 assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2269 "Unexpected number of operands!");
2270 if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2271 int NumLanes = 1;
2272 // Override NumLanes for the broadcast instructions.
2273 switch (MI->getOpcode()) {
2274 case X86::VBROADCASTF128: NumLanes = 2; break;
2275 case X86::VBROADCASTI128: NumLanes = 2; break;
2276 case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
2277 case X86::VBROADCASTF32X4rm: NumLanes = 4; break;
2278 case X86::VBROADCASTF32X8rm: NumLanes = 2; break;
2279 case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
2280 case X86::VBROADCASTF64X2rm: NumLanes = 4; break;
2281 case X86::VBROADCASTF64X4rm: NumLanes = 2; break;
2282 case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
2283 case X86::VBROADCASTI32X4rm: NumLanes = 4; break;
2284 case X86::VBROADCASTI32X8rm: NumLanes = 2; break;
2285 case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
2286 case X86::VBROADCASTI64X2rm: NumLanes = 4; break;
2287 case X86::VBROADCASTI64X4rm: NumLanes = 2; break;
2288 }
2289
2290 std::string Comment;
2291 raw_string_ostream CS(Comment);
2292 const MachineOperand &DstOp = MI->getOperand(0);
2293 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2294 if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
2295 CS << "[";
2296 for (int l = 0; l != NumLanes; ++l) {
2297 for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements;
2298 ++i) {
2299 if (i != 0 || l != 0)
2300 CS << ",";
2301 if (CDS->getElementType()->isIntegerTy())
2302 printConstant(CDS->getElementAsAPInt(i), CS);
2303 else if (CDS->getElementType()->isHalfTy() ||
2304 CDS->getElementType()->isFloatTy() ||
2305 CDS->getElementType()->isDoubleTy())
2306 printConstant(CDS->getElementAsAPFloat(i), CS);
2307 else
2308 CS << "?";
2309 }
2310 }
2311 CS << "]";
2312 OutStreamer.AddComment(CS.str());
2313 } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
2314 CS << "<";
2315 for (int l = 0; l != NumLanes; ++l) {
2316 for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands;
2317 ++i) {
2318 if (i != 0 || l != 0)
2319 CS << ",";
2320 printConstant(CV->getOperand(i), CS);
2321 }
2322 }
2323 CS << ">";
2324 OutStreamer.AddComment(CS.str());
2325 }
2326 }
2327 break;
2328
2329 case X86::MOVDDUPrm:
2330 case X86::VMOVDDUPrm:
2331 case X86::VMOVDDUPZ128rm:
2332 case X86::VBROADCASTSSrm:
2333 case X86::VBROADCASTSSYrm:
2334 case X86::VBROADCASTSSZ128rm:
2335 case X86::VBROADCASTSSZ256rm:
2336 case X86::VBROADCASTSSZrm:
2337 case X86::VBROADCASTSDYrm:
2338 case X86::VBROADCASTSDZ256rm:
2339 case X86::VBROADCASTSDZrm:
2340 case X86::VPBROADCASTBrm:
2341 case X86::VPBROADCASTBYrm:
2342 case X86::VPBROADCASTBZ128rm:
2343 case X86::VPBROADCASTBZ256rm:
2344 case X86::VPBROADCASTBZrm:
2345 case X86::VPBROADCASTDrm:
2346 case X86::VPBROADCASTDYrm:
2347 case X86::VPBROADCASTDZ128rm:
2348 case X86::VPBROADCASTDZ256rm:
2349 case X86::VPBROADCASTDZrm:
2350 case X86::VPBROADCASTQrm:
2351 case X86::VPBROADCASTQYrm:
2352 case X86::VPBROADCASTQZ128rm:
2353 case X86::VPBROADCASTQZ256rm:
2354 case X86::VPBROADCASTQZrm:
2355 case X86::VPBROADCASTWrm:
2356 case X86::VPBROADCASTWYrm:
2357 case X86::VPBROADCASTWZ128rm:
2358 case X86::VPBROADCASTWZ256rm:
2359 case X86::VPBROADCASTWZrm:
2360 assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2361 "Unexpected number of operands!");
2362 if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2363 int NumElts;
2364 switch (MI->getOpcode()) {
2365 default: llvm_unreachable("Invalid opcode");
2366 case X86::MOVDDUPrm: NumElts = 2; break;
2367 case X86::VMOVDDUPrm: NumElts = 2; break;
2368 case X86::VMOVDDUPZ128rm: NumElts = 2; break;
2369 case X86::VBROADCASTSSrm: NumElts = 4; break;
2370 case X86::VBROADCASTSSYrm: NumElts = 8; break;
2371 case X86::VBROADCASTSSZ128rm: NumElts = 4; break;
2372 case X86::VBROADCASTSSZ256rm: NumElts = 8; break;
2373 case X86::VBROADCASTSSZrm: NumElts = 16; break;
2374 case X86::VBROADCASTSDYrm: NumElts = 4; break;
2375 case X86::VBROADCASTSDZ256rm: NumElts = 4; break;
2376 case X86::VBROADCASTSDZrm: NumElts = 8; break;
2377 case X86::VPBROADCASTBrm: NumElts = 16; break;
2378 case X86::VPBROADCASTBYrm: NumElts = 32; break;
2379 case X86::VPBROADCASTBZ128rm: NumElts = 16; break;
2380 case X86::VPBROADCASTBZ256rm: NumElts = 32; break;
2381 case X86::VPBROADCASTBZrm: NumElts = 64; break;
2382 case X86::VPBROADCASTDrm: NumElts = 4; break;
2383 case X86::VPBROADCASTDYrm: NumElts = 8; break;
2384 case X86::VPBROADCASTDZ128rm: NumElts = 4; break;
2385 case X86::VPBROADCASTDZ256rm: NumElts = 8; break;
2386 case X86::VPBROADCASTDZrm: NumElts = 16; break;
2387 case X86::VPBROADCASTQrm: NumElts = 2; break;
2388 case X86::VPBROADCASTQYrm: NumElts = 4; break;
2389 case X86::VPBROADCASTQZ128rm: NumElts = 2; break;
2390 case X86::VPBROADCASTQZ256rm: NumElts = 4; break;
2391 case X86::VPBROADCASTQZrm: NumElts = 8; break;
2392 case X86::VPBROADCASTWrm: NumElts = 8; break;
2393 case X86::VPBROADCASTWYrm: NumElts = 16; break;
2394 case X86::VPBROADCASTWZ128rm: NumElts = 8; break;
2395 case X86::VPBROADCASTWZ256rm: NumElts = 16; break;
2396 case X86::VPBROADCASTWZrm: NumElts = 32; break;
2397 }
2398
2399 std::string Comment;
2400 raw_string_ostream CS(Comment);
2401 const MachineOperand &DstOp = MI->getOperand(0);
2402 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2403 CS << "[";
2404 for (int i = 0; i != NumElts; ++i) {
2405 if (i != 0)
2406 CS << ",";
2407 printConstant(C, CS);
2408 }
2409 CS << "]";
2410 OutStreamer.AddComment(CS.str());
2411 }
2412 }
2413 }
2414
emitInstruction(const MachineInstr * MI)2415 void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2416 // FIXME: Enable feature predicate checks once all the test pass.
2417 // X86_MC::verifyInstructionPredicates(MI->getOpcode(),
2418 // Subtarget->getFeatureBits());
2419
2420 X86MCInstLower MCInstLowering(*MF, *this);
2421 const X86RegisterInfo *RI =
2422 MF->getSubtarget<X86Subtarget>().getRegisterInfo();
2423
2424 if (MI->getOpcode() == X86::OR64rm) {
2425 for (auto &Opd : MI->operands()) {
2426 if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) ==
2427 "swift_async_extendedFramePointerFlags") {
2428 ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true;
2429 }
2430 }
2431 }
2432
2433 // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
2434 // are compressed from EVEX encoding to VEX encoding.
2435 if (TM.Options.MCOptions.ShowMCEncoding) {
2436 if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
2437 OutStreamer->AddComment("EVEX TO VEX Compression ", false);
2438 }
2439
2440 // Add comments for values loaded from constant pool.
2441 if (OutStreamer->isVerboseAsm())
2442 addConstantComments(MI, *OutStreamer);
2443
2444 switch (MI->getOpcode()) {
2445 case TargetOpcode::DBG_VALUE:
2446 llvm_unreachable("Should be handled target independently");
2447
2448 // Emit nothing here but a comment if we can.
2449 case X86::Int_MemBarrier:
2450 OutStreamer->emitRawComment("MEMBARRIER");
2451 return;
2452
2453 case X86::EH_RETURN:
2454 case X86::EH_RETURN64: {
2455 // Lower these as normal, but add some comments.
2456 Register Reg = MI->getOperand(0).getReg();
2457 OutStreamer->AddComment(StringRef("eh_return, addr: %") +
2458 X86ATTInstPrinter::getRegisterName(Reg));
2459 break;
2460 }
2461 case X86::CLEANUPRET: {
2462 // Lower these as normal, but add some comments.
2463 OutStreamer->AddComment("CLEANUPRET");
2464 break;
2465 }
2466
2467 case X86::CATCHRET: {
2468 // Lower these as normal, but add some comments.
2469 OutStreamer->AddComment("CATCHRET");
2470 break;
2471 }
2472
2473 case X86::ENDBR32:
2474 case X86::ENDBR64: {
2475 // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2476 // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2477 // non-empty. If MI is the initial ENDBR, place the
2478 // __patchable_function_entries label after ENDBR.
2479 if (CurrentPatchableFunctionEntrySym &&
2480 CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
2481 MI == &MF->front().front()) {
2482 MCInst Inst;
2483 MCInstLowering.Lower(MI, Inst);
2484 EmitAndCountInstruction(Inst);
2485 CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
2486 OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym);
2487 return;
2488 }
2489 break;
2490 }
2491
2492 case X86::TAILJMPr:
2493 case X86::TAILJMPm:
2494 case X86::TAILJMPd:
2495 case X86::TAILJMPd_CC:
2496 case X86::TAILJMPr64:
2497 case X86::TAILJMPm64:
2498 case X86::TAILJMPd64:
2499 case X86::TAILJMPd64_CC:
2500 case X86::TAILJMPr64_REX:
2501 case X86::TAILJMPm64_REX:
2502 // Lower these as normal, but add some comments.
2503 OutStreamer->AddComment("TAILCALL");
2504 break;
2505
2506 case X86::TLS_addr32:
2507 case X86::TLS_addr64:
2508 case X86::TLS_addrX32:
2509 case X86::TLS_base_addr32:
2510 case X86::TLS_base_addr64:
2511 case X86::TLS_base_addrX32:
2512 return LowerTlsAddr(MCInstLowering, *MI);
2513
2514 case X86::MOVPC32r: {
2515 // This is a pseudo op for a two instruction sequence with a label, which
2516 // looks like:
2517 // call "L1$pb"
2518 // "L1$pb":
2519 // popl %esi
2520
2521 // Emit the call.
2522 MCSymbol *PICBase = MF->getPICBaseSymbol();
2523 // FIXME: We would like an efficient form for this, so we don't have to do a
2524 // lot of extra uniquing.
2525 EmitAndCountInstruction(
2526 MCInstBuilder(X86::CALLpcrel32)
2527 .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
2528
2529 const X86FrameLowering *FrameLowering =
2530 MF->getSubtarget<X86Subtarget>().getFrameLowering();
2531 bool hasFP = FrameLowering->hasFP(*MF);
2532
2533 // TODO: This is needed only if we require precise CFA.
2534 bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
2535 !OutStreamer->getDwarfFrameInfos().back().End;
2536
2537 int stackGrowth = -RI->getSlotSize();
2538
2539 if (HasActiveDwarfFrame && !hasFP) {
2540 OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth);
2541 }
2542
2543 // Emit the label.
2544 OutStreamer->emitLabel(PICBase);
2545
2546 // popl $reg
2547 EmitAndCountInstruction(
2548 MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
2549
2550 if (HasActiveDwarfFrame && !hasFP) {
2551 OutStreamer->emitCFIAdjustCfaOffset(stackGrowth);
2552 }
2553 return;
2554 }
2555
2556 case X86::ADD32ri: {
2557 // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2558 if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
2559 break;
2560
2561 // Okay, we have something like:
2562 // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2563
2564 // For this, we want to print something like:
2565 // MYGLOBAL + (. - PICBASE)
2566 // However, we can't generate a ".", so just emit a new label here and refer
2567 // to it.
2568 MCSymbol *DotSym = OutContext.createTempSymbol();
2569 OutStreamer->emitLabel(DotSym);
2570
2571 // Now that we have emitted the label, lower the complex operand expression.
2572 MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
2573
2574 const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
2575 const MCExpr *PICBase =
2576 MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
2577 DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
2578
2579 DotExpr = MCBinaryExpr::createAdd(
2580 MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
2581
2582 EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
2583 .addReg(MI->getOperand(0).getReg())
2584 .addReg(MI->getOperand(1).getReg())
2585 .addExpr(DotExpr));
2586 return;
2587 }
2588 case TargetOpcode::STATEPOINT:
2589 return LowerSTATEPOINT(*MI, MCInstLowering);
2590
2591 case TargetOpcode::FAULTING_OP:
2592 return LowerFAULTING_OP(*MI, MCInstLowering);
2593
2594 case TargetOpcode::FENTRY_CALL:
2595 return LowerFENTRY_CALL(*MI, MCInstLowering);
2596
2597 case TargetOpcode::PATCHABLE_OP:
2598 return LowerPATCHABLE_OP(*MI, MCInstLowering);
2599
2600 case TargetOpcode::STACKMAP:
2601 return LowerSTACKMAP(*MI);
2602
2603 case TargetOpcode::PATCHPOINT:
2604 return LowerPATCHPOINT(*MI, MCInstLowering);
2605
2606 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2607 return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
2608
2609 case TargetOpcode::PATCHABLE_RET:
2610 return LowerPATCHABLE_RET(*MI, MCInstLowering);
2611
2612 case TargetOpcode::PATCHABLE_TAIL_CALL:
2613 return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
2614
2615 case TargetOpcode::PATCHABLE_EVENT_CALL:
2616 return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
2617
2618 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2619 return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
2620
2621 case X86::MORESTACK_RET:
2622 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2623 return;
2624
2625 case X86::ASAN_CHECK_MEMACCESS:
2626 return LowerASAN_CHECK_MEMACCESS(*MI);
2627
2628 case X86::MORESTACK_RET_RESTORE_R10:
2629 // Return, then restore R10.
2630 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2631 EmitAndCountInstruction(
2632 MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
2633 return;
2634
2635 case X86::SEH_PushReg:
2636 case X86::SEH_SaveReg:
2637 case X86::SEH_SaveXMM:
2638 case X86::SEH_StackAlloc:
2639 case X86::SEH_StackAlign:
2640 case X86::SEH_SetFrame:
2641 case X86::SEH_PushFrame:
2642 case X86::SEH_EndPrologue:
2643 EmitSEHInstruction(MI);
2644 return;
2645
2646 case X86::SEH_Epilogue: {
2647 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2648 MachineBasicBlock::const_iterator MBBI(MI);
2649 // Check if preceded by a call and emit nop if so.
2650 for (MBBI = PrevCrossBBInst(MBBI);
2651 MBBI != MachineBasicBlock::const_iterator();
2652 MBBI = PrevCrossBBInst(MBBI)) {
2653 // Conservatively assume that pseudo instructions don't emit code and keep
2654 // looking for a call. We may emit an unnecessary nop in some cases.
2655 if (!MBBI->isPseudo()) {
2656 if (MBBI->isCall())
2657 EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2658 break;
2659 }
2660 }
2661 return;
2662 }
2663 case X86::UBSAN_UD1:
2664 EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm)
2665 .addReg(X86::EAX)
2666 .addReg(X86::EAX)
2667 .addImm(1)
2668 .addReg(X86::NoRegister)
2669 .addImm(MI->getOperand(0).getImm())
2670 .addReg(X86::NoRegister));
2671 return;
2672 }
2673
2674 MCInst TmpInst;
2675 MCInstLowering.Lower(MI, TmpInst);
2676
2677 // Stackmap shadows cannot include branch targets, so we can count the bytes
2678 // in a call towards the shadow, but must ensure that the no thread returns
2679 // in to the stackmap shadow. The only way to achieve this is if the call
2680 // is at the end of the shadow.
2681 if (MI->isCall()) {
2682 // Count then size of the call towards the shadow
2683 SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get());
2684 // Then flush the shadow so that we fill with nops before the call, not
2685 // after it.
2686 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
2687 // Then emit the call
2688 OutStreamer->emitInstruction(TmpInst, getSubtargetInfo());
2689 return;
2690 }
2691
2692 EmitAndCountInstruction(TmpInst);
2693 }
2694