1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86FixupKinds.h"
11 #include "MCTargetDesc/X86InstrRelaxTables.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/BinaryFormat/MachO.h"
15 #include "llvm/MC/MCAsmBackend.h"
16 #include "llvm/MC/MCAsmLayout.h"
17 #include "llvm/MC/MCAssembler.h"
18 #include "llvm/MC/MCCodeEmitter.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCDwarf.h"
21 #include "llvm/MC/MCELFObjectWriter.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCFixupKindInfo.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCMachObjectWriter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionMachO.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/MCValue.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/raw_ostream.h"
37
38 using namespace llvm;
39
40 namespace {
41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42 class X86AlignBranchKind {
43 private:
44 uint8_t AlignBranchKind = 0;
45
46 public:
operator =(const std::string & Val)47 void operator=(const std::string &Val) {
48 if (Val.empty())
49 return;
50 SmallVector<StringRef, 6> BranchTypes;
51 StringRef(Val).split(BranchTypes, '+', -1, false);
52 for (auto BranchType : BranchTypes) {
53 if (BranchType == "fused")
54 addKind(X86::AlignBranchFused);
55 else if (BranchType == "jcc")
56 addKind(X86::AlignBranchJcc);
57 else if (BranchType == "jmp")
58 addKind(X86::AlignBranchJmp);
59 else if (BranchType == "call")
60 addKind(X86::AlignBranchCall);
61 else if (BranchType == "ret")
62 addKind(X86::AlignBranchRet);
63 else if (BranchType == "indirect")
64 addKind(X86::AlignBranchIndirect);
65 else {
66 errs() << "invalid argument " << BranchType.str()
67 << " to -x86-align-branch=; each element must be one of: fused, "
68 "jcc, jmp, call, ret, indirect.(plus separated)\n";
69 }
70 }
71 }
72
operator uint8_t() const73 operator uint8_t() const { return AlignBranchKind; }
addKind(X86::AlignBranchBoundaryKind Value)74 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
75 };
76
77 X86AlignBranchKind X86AlignBranchKindLoc;
78
79 cl::opt<unsigned> X86AlignBranchBoundary(
80 "x86-align-branch-boundary", cl::init(0),
81 cl::desc(
82 "Control how the assembler should align branches with NOP. If the "
83 "boundary's size is not 0, it should be a power of 2 and no less "
84 "than 32. Branches will be aligned to prevent from being across or "
85 "against the boundary of specified size. The default value 0 does not "
86 "align branches."));
87
88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
89 "x86-align-branch",
90 cl::desc(
91 "Specify types of branches to align (plus separated list of types):"
92 "\njcc indicates conditional jumps"
93 "\nfused indicates fused conditional jumps"
94 "\njmp indicates direct unconditional jumps"
95 "\ncall indicates direct and indirect calls"
96 "\nret indicates rets"
97 "\nindirect indicates indirect unconditional jumps"),
98 cl::location(X86AlignBranchKindLoc));
99
100 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
101 "x86-branches-within-32B-boundaries", cl::init(false),
102 cl::desc(
103 "Align selected instructions to mitigate negative performance impact "
104 "of Intel's micro code update for errata skx102. May break "
105 "assumptions about labels corresponding to particular instructions, "
106 "and should be used with caution."));
107
108 cl::opt<unsigned> X86PadMaxPrefixSize(
109 "x86-pad-max-prefix-size", cl::init(0),
110 cl::desc("Maximum number of prefixes to use for padding"));
111
112 cl::opt<bool> X86PadForAlign(
113 "x86-pad-for-align", cl::init(false), cl::Hidden,
114 cl::desc("Pad previous instructions to implement align directives"));
115
116 cl::opt<bool> X86PadForBranchAlign(
117 "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
118 cl::desc("Pad previous instructions to implement branch alignment"));
119
120 class X86AsmBackend : public MCAsmBackend {
121 const MCSubtargetInfo &STI;
122 std::unique_ptr<const MCInstrInfo> MCII;
123 X86AlignBranchKind AlignBranchType;
124 Align AlignBoundary;
125 unsigned TargetPrefixMax = 0;
126
127 MCInst PrevInst;
128 MCBoundaryAlignFragment *PendingBA = nullptr;
129 std::pair<MCFragment *, size_t> PrevInstPosition;
130 bool CanPadInst;
131
132 uint8_t determinePaddingPrefix(const MCInst &Inst) const;
133 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
134 bool needAlign(const MCInst &Inst) const;
135 bool canPadBranches(MCObjectStreamer &OS) const;
136 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
137
138 public:
X86AsmBackend(const Target & T,const MCSubtargetInfo & STI)139 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
140 : MCAsmBackend(support::little), STI(STI),
141 MCII(T.createMCInstrInfo()) {
142 if (X86AlignBranchWithin32BBoundaries) {
143 // At the moment, this defaults to aligning fused branches, unconditional
144 // jumps, and (unfused) conditional jumps with nops. Both the
145 // instructions aligned and the alignment method (nop vs prefix) may
146 // change in the future.
147 AlignBoundary = assumeAligned(32);;
148 AlignBranchType.addKind(X86::AlignBranchFused);
149 AlignBranchType.addKind(X86::AlignBranchJcc);
150 AlignBranchType.addKind(X86::AlignBranchJmp);
151 }
152 // Allow overriding defaults set by main flag
153 if (X86AlignBranchBoundary.getNumOccurrences())
154 AlignBoundary = assumeAligned(X86AlignBranchBoundary);
155 if (X86AlignBranch.getNumOccurrences())
156 AlignBranchType = X86AlignBranchKindLoc;
157 if (X86PadMaxPrefixSize.getNumOccurrences())
158 TargetPrefixMax = X86PadMaxPrefixSize;
159 }
160
161 bool allowAutoPadding() const override;
162 bool allowEnhancedRelaxation() const override;
163 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
164 const MCSubtargetInfo &STI) override;
165 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
166
getNumFixupKinds() const167 unsigned getNumFixupKinds() const override {
168 return X86::NumTargetFixupKinds;
169 }
170
171 Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
172
173 const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
174
175 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
176 const MCValue &Target) override;
177
178 void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
179 const MCValue &Target, MutableArrayRef<char> Data,
180 uint64_t Value, bool IsResolved,
181 const MCSubtargetInfo *STI) const override;
182
183 bool mayNeedRelaxation(const MCInst &Inst,
184 const MCSubtargetInfo &STI) const override;
185
186 bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
187 const MCRelaxableFragment *DF,
188 const MCAsmLayout &Layout) const override;
189
190 void relaxInstruction(MCInst &Inst,
191 const MCSubtargetInfo &STI) const override;
192
193 bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
194 MCCodeEmitter &Emitter,
195 unsigned &RemainingSize) const;
196
197 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
198 unsigned &RemainingSize) const;
199
200 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
201 unsigned &RemainingSize) const;
202
203 void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
204
205 unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
206
207 bool writeNopData(raw_ostream &OS, uint64_t Count,
208 const MCSubtargetInfo *STI) const override;
209 };
210 } // end anonymous namespace
211
getRelaxedOpcodeBranch(const MCInst & Inst,bool Is16BitMode)212 static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
213 unsigned Op = Inst.getOpcode();
214 switch (Op) {
215 default:
216 return Op;
217 case X86::JCC_1:
218 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
219 case X86::JMP_1:
220 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
221 }
222 }
223
getRelaxedOpcodeArith(const MCInst & Inst)224 static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
225 unsigned Op = Inst.getOpcode();
226 return X86::getRelaxedOpcodeArith(Op);
227 }
228
getRelaxedOpcode(const MCInst & Inst,bool Is16BitMode)229 static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
230 unsigned R = getRelaxedOpcodeArith(Inst);
231 if (R != Inst.getOpcode())
232 return R;
233 return getRelaxedOpcodeBranch(Inst, Is16BitMode);
234 }
235
getCondFromBranch(const MCInst & MI,const MCInstrInfo & MCII)236 static X86::CondCode getCondFromBranch(const MCInst &MI,
237 const MCInstrInfo &MCII) {
238 unsigned Opcode = MI.getOpcode();
239 switch (Opcode) {
240 default:
241 return X86::COND_INVALID;
242 case X86::JCC_1: {
243 const MCInstrDesc &Desc = MCII.get(Opcode);
244 return static_cast<X86::CondCode>(
245 MI.getOperand(Desc.getNumOperands() - 1).getImm());
246 }
247 }
248 }
249
250 static X86::SecondMacroFusionInstKind
classifySecondInstInMacroFusion(const MCInst & MI,const MCInstrInfo & MCII)251 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
252 X86::CondCode CC = getCondFromBranch(MI, MCII);
253 return classifySecondCondCodeInMacroFusion(CC);
254 }
255
256 /// Check if the instruction uses RIP relative addressing.
isRIPRelative(const MCInst & MI,const MCInstrInfo & MCII)257 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
258 unsigned Opcode = MI.getOpcode();
259 const MCInstrDesc &Desc = MCII.get(Opcode);
260 uint64_t TSFlags = Desc.TSFlags;
261 unsigned CurOp = X86II::getOperandBias(Desc);
262 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
263 if (MemoryOperand < 0)
264 return false;
265 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
266 unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
267 return (BaseReg == X86::RIP);
268 }
269
270 /// Check if the instruction is a prefix.
isPrefix(const MCInst & MI,const MCInstrInfo & MCII)271 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
272 return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
273 }
274
275 /// Check if the instruction is valid as the first instruction in macro fusion.
isFirstMacroFusibleInst(const MCInst & Inst,const MCInstrInfo & MCII)276 static bool isFirstMacroFusibleInst(const MCInst &Inst,
277 const MCInstrInfo &MCII) {
278 // An Intel instruction with RIP relative addressing is not macro fusible.
279 if (isRIPRelative(Inst, MCII))
280 return false;
281 X86::FirstMacroFusionInstKind FIK =
282 X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
283 return FIK != X86::FirstMacroFusionInstKind::Invalid;
284 }
285
286 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
287 /// get a better peformance in some cases. Here, we determine which prefix is
288 /// the most suitable.
289 ///
290 /// If the instruction has a segment override prefix, use the existing one.
291 /// If the target is 64-bit, use the CS.
292 /// If the target is 32-bit,
293 /// - If the instruction has a ESP/EBP base register, use SS.
294 /// - Otherwise use DS.
determinePaddingPrefix(const MCInst & Inst) const295 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
296 assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
297 "Prefixes can be added only in 32-bit or 64-bit mode.");
298 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
299 uint64_t TSFlags = Desc.TSFlags;
300
301 // Determine where the memory operand starts, if present.
302 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
303 if (MemoryOperand != -1)
304 MemoryOperand += X86II::getOperandBias(Desc);
305
306 unsigned SegmentReg = 0;
307 if (MemoryOperand >= 0) {
308 // Check for explicit segment override on memory operand.
309 SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
310 }
311
312 switch (TSFlags & X86II::FormMask) {
313 default:
314 break;
315 case X86II::RawFrmDstSrc: {
316 // Check segment override opcode prefix as needed (not for %ds).
317 if (Inst.getOperand(2).getReg() != X86::DS)
318 SegmentReg = Inst.getOperand(2).getReg();
319 break;
320 }
321 case X86II::RawFrmSrc: {
322 // Check segment override opcode prefix as needed (not for %ds).
323 if (Inst.getOperand(1).getReg() != X86::DS)
324 SegmentReg = Inst.getOperand(1).getReg();
325 break;
326 }
327 case X86II::RawFrmMemOffs: {
328 // Check segment override opcode prefix as needed.
329 SegmentReg = Inst.getOperand(1).getReg();
330 break;
331 }
332 }
333
334 if (SegmentReg != 0)
335 return X86::getSegmentOverridePrefixForReg(SegmentReg);
336
337 if (STI.hasFeature(X86::Is64Bit))
338 return X86::CS_Encoding;
339
340 if (MemoryOperand >= 0) {
341 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
342 unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
343 if (BaseReg == X86::ESP || BaseReg == X86::EBP)
344 return X86::SS_Encoding;
345 }
346 return X86::DS_Encoding;
347 }
348
349 /// Check if the two instructions will be macro-fused on the target cpu.
isMacroFused(const MCInst & Cmp,const MCInst & Jcc) const350 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
351 const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
352 if (!InstDesc.isConditionalBranch())
353 return false;
354 if (!isFirstMacroFusibleInst(Cmp, *MCII))
355 return false;
356 const X86::FirstMacroFusionInstKind CmpKind =
357 X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
358 const X86::SecondMacroFusionInstKind BranchKind =
359 classifySecondInstInMacroFusion(Jcc, *MCII);
360 return X86::isMacroFused(CmpKind, BranchKind);
361 }
362
363 /// Check if the instruction has a variant symbol operand.
hasVariantSymbol(const MCInst & MI)364 static bool hasVariantSymbol(const MCInst &MI) {
365 for (auto &Operand : MI) {
366 if (!Operand.isExpr())
367 continue;
368 const MCExpr &Expr = *Operand.getExpr();
369 if (Expr.getKind() == MCExpr::SymbolRef &&
370 cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
371 return true;
372 }
373 return false;
374 }
375
allowAutoPadding() const376 bool X86AsmBackend::allowAutoPadding() const {
377 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
378 }
379
allowEnhancedRelaxation() const380 bool X86AsmBackend::allowEnhancedRelaxation() const {
381 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
382 }
383
384 /// X86 has certain instructions which enable interrupts exactly one
385 /// instruction *after* the instruction which stores to SS. Return true if the
386 /// given instruction has such an interrupt delay slot.
hasInterruptDelaySlot(const MCInst & Inst)387 static bool hasInterruptDelaySlot(const MCInst &Inst) {
388 switch (Inst.getOpcode()) {
389 case X86::POPSS16:
390 case X86::POPSS32:
391 case X86::STI:
392 return true;
393
394 case X86::MOV16sr:
395 case X86::MOV32sr:
396 case X86::MOV64sr:
397 case X86::MOV16sm:
398 if (Inst.getOperand(0).getReg() == X86::SS)
399 return true;
400 break;
401 }
402 return false;
403 }
404
405 /// Check if the instruction to be emitted is right after any data.
406 static bool
isRightAfterData(MCFragment * CurrentFragment,const std::pair<MCFragment *,size_t> & PrevInstPosition)407 isRightAfterData(MCFragment *CurrentFragment,
408 const std::pair<MCFragment *, size_t> &PrevInstPosition) {
409 MCFragment *F = CurrentFragment;
410 // Empty data fragments may be created to prevent further data being
411 // added into the previous fragment, we need to skip them since they
412 // have no contents.
413 for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
414 if (cast<MCDataFragment>(F)->getContents().size() != 0)
415 break;
416
417 // Since data is always emitted into a DataFragment, our check strategy is
418 // simple here.
419 // - If the fragment is a DataFragment
420 // - If it's not the fragment where the previous instruction is,
421 // returns true.
422 // - If it's the fragment holding the previous instruction but its
423 // size changed since the the previous instruction was emitted into
424 // it, returns true.
425 // - Otherwise returns false.
426 // - If the fragment is not a DataFragment, returns false.
427 if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
428 return DF != PrevInstPosition.first ||
429 DF->getContents().size() != PrevInstPosition.second;
430
431 return false;
432 }
433
434 /// \returns the fragment size if it has instructions, otherwise returns 0.
getSizeForInstFragment(const MCFragment * F)435 static size_t getSizeForInstFragment(const MCFragment *F) {
436 if (!F || !F->hasInstructions())
437 return 0;
438 // MCEncodedFragmentWithContents being templated makes this tricky.
439 switch (F->getKind()) {
440 default:
441 llvm_unreachable("Unknown fragment with instructions!");
442 case MCFragment::FT_Data:
443 return cast<MCDataFragment>(*F).getContents().size();
444 case MCFragment::FT_Relaxable:
445 return cast<MCRelaxableFragment>(*F).getContents().size();
446 case MCFragment::FT_CompactEncodedInst:
447 return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
448 }
449 }
450
451 /// Return true if we can insert NOP or prefixes automatically before the
452 /// the instruction to be emitted.
canPadInst(const MCInst & Inst,MCObjectStreamer & OS) const453 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
454 if (hasVariantSymbol(Inst))
455 // Linker may rewrite the instruction with variant symbol operand(e.g.
456 // TLSCALL).
457 return false;
458
459 if (hasInterruptDelaySlot(PrevInst))
460 // If this instruction follows an interrupt enabling instruction with a one
461 // instruction delay, inserting a nop would change behavior.
462 return false;
463
464 if (isPrefix(PrevInst, *MCII))
465 // If this instruction follows a prefix, inserting a nop/prefix would change
466 // semantic.
467 return false;
468
469 if (isPrefix(Inst, *MCII))
470 // If this instruction is a prefix, inserting a prefix would change
471 // semantic.
472 return false;
473
474 if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
475 // If this instruction follows any data, there is no clear
476 // instruction boundary, inserting a nop/prefix would change semantic.
477 return false;
478
479 return true;
480 }
481
canPadBranches(MCObjectStreamer & OS) const482 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
483 if (!OS.getAllowAutoPadding())
484 return false;
485 assert(allowAutoPadding() && "incorrect initialization!");
486
487 // We only pad in text section.
488 if (!OS.getCurrentSectionOnly()->getKind().isText())
489 return false;
490
491 // To be Done: Currently don't deal with Bundle cases.
492 if (OS.getAssembler().isBundlingEnabled())
493 return false;
494
495 // Branches only need to be aligned in 32-bit or 64-bit mode.
496 if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
497 return false;
498
499 return true;
500 }
501
502 /// Check if the instruction operand needs to be aligned.
needAlign(const MCInst & Inst) const503 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
504 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
505 return (Desc.isConditionalBranch() &&
506 (AlignBranchType & X86::AlignBranchJcc)) ||
507 (Desc.isUnconditionalBranch() &&
508 (AlignBranchType & X86::AlignBranchJmp)) ||
509 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
510 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
511 (Desc.isIndirectBranch() &&
512 (AlignBranchType & X86::AlignBranchIndirect));
513 }
514
515 /// Insert BoundaryAlignFragment before instructions to align branches.
emitInstructionBegin(MCObjectStreamer & OS,const MCInst & Inst,const MCSubtargetInfo & STI)516 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
517 const MCInst &Inst, const MCSubtargetInfo &STI) {
518 CanPadInst = canPadInst(Inst, OS);
519
520 if (!canPadBranches(OS))
521 return;
522
523 if (!isMacroFused(PrevInst, Inst))
524 // Macro fusion doesn't happen indeed, clear the pending.
525 PendingBA = nullptr;
526
527 if (!CanPadInst)
528 return;
529
530 if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
531 // Macro fusion actually happens and there is no other fragment inserted
532 // after the previous instruction.
533 //
534 // Do nothing here since we already inserted a BoudaryAlign fragment when
535 // we met the first instruction in the fused pair and we'll tie them
536 // together in emitInstructionEnd.
537 //
538 // Note: When there is at least one fragment, such as MCAlignFragment,
539 // inserted after the previous instruction, e.g.
540 //
541 // \code
542 // cmp %rax %rcx
543 // .align 16
544 // je .Label0
545 // \ endcode
546 //
547 // We will treat the JCC as a unfused branch although it may be fused
548 // with the CMP.
549 return;
550 }
551
552 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
553 isFirstMacroFusibleInst(Inst, *MCII))) {
554 // If we meet a unfused branch or the first instuction in a fusiable pair,
555 // insert a BoundaryAlign fragment.
556 OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI));
557 }
558 }
559
560 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
emitInstructionEnd(MCObjectStreamer & OS,const MCInst & Inst)561 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
562 PrevInst = Inst;
563 MCFragment *CF = OS.getCurrentFragment();
564 PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
565 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
566 F->setAllowAutoPadding(CanPadInst);
567
568 if (!canPadBranches(OS))
569 return;
570
571 if (!needAlign(Inst) || !PendingBA)
572 return;
573
574 // Tie the aligned instructions into a a pending BoundaryAlign.
575 PendingBA->setLastFragment(CF);
576 PendingBA = nullptr;
577
578 // We need to ensure that further data isn't added to the current
579 // DataFragment, so that we can get the size of instructions later in
580 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
581 // DataFragment.
582 if (isa_and_nonnull<MCDataFragment>(CF))
583 OS.insert(new MCDataFragment());
584
585 // Update the maximum alignment on the current section if necessary.
586 MCSection *Sec = OS.getCurrentSectionOnly();
587 if (AlignBoundary.value() > Sec->getAlignment())
588 Sec->setAlignment(AlignBoundary);
589 }
590
getFixupKind(StringRef Name) const591 Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
592 if (STI.getTargetTriple().isOSBinFormatELF()) {
593 unsigned Type;
594 if (STI.getTargetTriple().getArch() == Triple::x86_64) {
595 Type = llvm::StringSwitch<unsigned>(Name)
596 #define ELF_RELOC(X, Y) .Case(#X, Y)
597 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
598 #undef ELF_RELOC
599 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
600 .Case("BFD_RELOC_8", ELF::R_X86_64_8)
601 .Case("BFD_RELOC_16", ELF::R_X86_64_16)
602 .Case("BFD_RELOC_32", ELF::R_X86_64_32)
603 .Case("BFD_RELOC_64", ELF::R_X86_64_64)
604 .Default(-1u);
605 } else {
606 Type = llvm::StringSwitch<unsigned>(Name)
607 #define ELF_RELOC(X, Y) .Case(#X, Y)
608 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
609 #undef ELF_RELOC
610 .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
611 .Case("BFD_RELOC_8", ELF::R_386_8)
612 .Case("BFD_RELOC_16", ELF::R_386_16)
613 .Case("BFD_RELOC_32", ELF::R_386_32)
614 .Default(-1u);
615 }
616 if (Type == -1u)
617 return None;
618 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
619 }
620 return MCAsmBackend::getFixupKind(Name);
621 }
622
getFixupKindInfo(MCFixupKind Kind) const623 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
624 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
625 {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
626 {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
627 {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
628 {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
629 {"reloc_signed_4byte", 0, 32, 0},
630 {"reloc_signed_4byte_relax", 0, 32, 0},
631 {"reloc_global_offset_table", 0, 32, 0},
632 {"reloc_global_offset_table8", 0, 64, 0},
633 {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
634 };
635
636 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
637 // do not require any extra processing.
638 if (Kind >= FirstLiteralRelocationKind)
639 return MCAsmBackend::getFixupKindInfo(FK_NONE);
640
641 if (Kind < FirstTargetFixupKind)
642 return MCAsmBackend::getFixupKindInfo(Kind);
643
644 assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
645 "Invalid kind!");
646 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
647 return Infos[Kind - FirstTargetFixupKind];
648 }
649
shouldForceRelocation(const MCAssembler &,const MCFixup & Fixup,const MCValue &)650 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
651 const MCFixup &Fixup,
652 const MCValue &) {
653 return Fixup.getKind() >= FirstLiteralRelocationKind;
654 }
655
getFixupKindSize(unsigned Kind)656 static unsigned getFixupKindSize(unsigned Kind) {
657 switch (Kind) {
658 default:
659 llvm_unreachable("invalid fixup kind!");
660 case FK_NONE:
661 return 0;
662 case FK_PCRel_1:
663 case FK_SecRel_1:
664 case FK_Data_1:
665 return 1;
666 case FK_PCRel_2:
667 case FK_SecRel_2:
668 case FK_Data_2:
669 return 2;
670 case FK_PCRel_4:
671 case X86::reloc_riprel_4byte:
672 case X86::reloc_riprel_4byte_relax:
673 case X86::reloc_riprel_4byte_relax_rex:
674 case X86::reloc_riprel_4byte_movq_load:
675 case X86::reloc_signed_4byte:
676 case X86::reloc_signed_4byte_relax:
677 case X86::reloc_global_offset_table:
678 case X86::reloc_branch_4byte_pcrel:
679 case FK_SecRel_4:
680 case FK_Data_4:
681 return 4;
682 case FK_PCRel_8:
683 case FK_SecRel_8:
684 case FK_Data_8:
685 case X86::reloc_global_offset_table8:
686 return 8;
687 }
688 }
689
applyFixup(const MCAssembler & Asm,const MCFixup & Fixup,const MCValue & Target,MutableArrayRef<char> Data,uint64_t Value,bool IsResolved,const MCSubtargetInfo * STI) const690 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
691 const MCValue &Target,
692 MutableArrayRef<char> Data,
693 uint64_t Value, bool IsResolved,
694 const MCSubtargetInfo *STI) const {
695 unsigned Kind = Fixup.getKind();
696 if (Kind >= FirstLiteralRelocationKind)
697 return;
698 unsigned Size = getFixupKindSize(Kind);
699
700 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
701
702 int64_t SignedValue = static_cast<int64_t>(Value);
703 if ((Target.isAbsolute() || IsResolved) &&
704 getFixupKindInfo(Fixup.getKind()).Flags &
705 MCFixupKindInfo::FKF_IsPCRel) {
706 // check that PC relative fixup fits into the fixup size.
707 if (Size > 0 && !isIntN(Size * 8, SignedValue))
708 Asm.getContext().reportError(
709 Fixup.getLoc(), "value of " + Twine(SignedValue) +
710 " is too large for field of " + Twine(Size) +
711 ((Size == 1) ? " byte." : " bytes."));
712 } else {
713 // Check that uppper bits are either all zeros or all ones.
714 // Specifically ignore overflow/underflow as long as the leakage is
715 // limited to the lower bits. This is to remain compatible with
716 // other assemblers.
717 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
718 "Value does not fit in the Fixup field");
719 }
720
721 for (unsigned i = 0; i != Size; ++i)
722 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
723 }
724
mayNeedRelaxation(const MCInst & Inst,const MCSubtargetInfo & STI) const725 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst,
726 const MCSubtargetInfo &STI) const {
727 // Branches can always be relaxed in either mode.
728 if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode())
729 return true;
730
731 // Check if this instruction is ever relaxable.
732 if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode())
733 return false;
734
735
736 // Check if the relaxable operand has an expression. For the current set of
737 // relaxable instructions, the relaxable operand is always the last operand.
738 unsigned RelaxableOp = Inst.getNumOperands() - 1;
739 if (Inst.getOperand(RelaxableOp).isExpr())
740 return true;
741
742 return false;
743 }
744
fixupNeedsRelaxation(const MCFixup & Fixup,uint64_t Value,const MCRelaxableFragment * DF,const MCAsmLayout & Layout) const745 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
746 uint64_t Value,
747 const MCRelaxableFragment *DF,
748 const MCAsmLayout &Layout) const {
749 // Relax if the value is too big for a (signed) i8.
750 return !isInt<8>(Value);
751 }
752
753 // FIXME: Can tblgen help at all here to verify there aren't other instructions
754 // we can relax?
relaxInstruction(MCInst & Inst,const MCSubtargetInfo & STI) const755 void X86AsmBackend::relaxInstruction(MCInst &Inst,
756 const MCSubtargetInfo &STI) const {
757 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
758 bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit];
759 unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
760
761 if (RelaxedOp == Inst.getOpcode()) {
762 SmallString<256> Tmp;
763 raw_svector_ostream OS(Tmp);
764 Inst.dump_pretty(OS);
765 OS << "\n";
766 report_fatal_error("unexpected instruction to relax: " + OS.str());
767 }
768
769 Inst.setOpcode(RelaxedOp);
770 }
771
772 /// Return true if this instruction has been fully relaxed into it's most
773 /// general available form.
isFullyRelaxed(const MCRelaxableFragment & RF)774 static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
775 auto &Inst = RF.getInst();
776 auto &STI = *RF.getSubtargetInfo();
777 bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit];
778 return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
779 }
780
padInstructionViaPrefix(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const781 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
782 MCCodeEmitter &Emitter,
783 unsigned &RemainingSize) const {
784 if (!RF.getAllowAutoPadding())
785 return false;
786 // If the instruction isn't fully relaxed, shifting it around might require a
787 // larger value for one of the fixups then can be encoded. The outer loop
788 // will also catch this before moving to the next instruction, but we need to
789 // prevent padding this single instruction as well.
790 if (!isFullyRelaxed(RF))
791 return false;
792
793 const unsigned OldSize = RF.getContents().size();
794 if (OldSize == 15)
795 return false;
796
797 const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
798 const unsigned RemainingPrefixSize = [&]() -> unsigned {
799 SmallString<15> Code;
800 raw_svector_ostream VecOS(Code);
801 Emitter.emitPrefix(RF.getInst(), VecOS, STI);
802 assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
803
804 // TODO: It turns out we need a decent amount of plumbing for the target
805 // specific bits to determine number of prefixes its safe to add. Various
806 // targets (older chips mostly, but also Atom family) encounter decoder
807 // stalls with too many prefixes. For testing purposes, we set the value
808 // externally for the moment.
809 unsigned ExistingPrefixSize = Code.size();
810 if (TargetPrefixMax <= ExistingPrefixSize)
811 return 0;
812 return TargetPrefixMax - ExistingPrefixSize;
813 }();
814 const unsigned PrefixBytesToAdd =
815 std::min(MaxPossiblePad, RemainingPrefixSize);
816 if (PrefixBytesToAdd == 0)
817 return false;
818
819 const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
820
821 SmallString<256> Code;
822 Code.append(PrefixBytesToAdd, Prefix);
823 Code.append(RF.getContents().begin(), RF.getContents().end());
824 RF.getContents() = Code;
825
826 // Adjust the fixups for the change in offsets
827 for (auto &F : RF.getFixups()) {
828 F.setOffset(F.getOffset() + PrefixBytesToAdd);
829 }
830
831 RemainingSize -= PrefixBytesToAdd;
832 return true;
833 }
834
padInstructionViaRelaxation(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const835 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
836 MCCodeEmitter &Emitter,
837 unsigned &RemainingSize) const {
838 if (isFullyRelaxed(RF))
839 // TODO: There are lots of other tricks we could apply for increasing
840 // encoding size without impacting performance.
841 return false;
842
843 MCInst Relaxed = RF.getInst();
844 relaxInstruction(Relaxed, *RF.getSubtargetInfo());
845
846 SmallVector<MCFixup, 4> Fixups;
847 SmallString<15> Code;
848 raw_svector_ostream VecOS(Code);
849 Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo());
850 const unsigned OldSize = RF.getContents().size();
851 const unsigned NewSize = Code.size();
852 assert(NewSize >= OldSize && "size decrease during relaxation?");
853 unsigned Delta = NewSize - OldSize;
854 if (Delta > RemainingSize)
855 return false;
856 RF.setInst(Relaxed);
857 RF.getContents() = Code;
858 RF.getFixups() = Fixups;
859 RemainingSize -= Delta;
860 return true;
861 }
862
padInstructionEncoding(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const863 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
864 MCCodeEmitter &Emitter,
865 unsigned &RemainingSize) const {
866 bool Changed = false;
867 if (RemainingSize != 0)
868 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
869 if (RemainingSize != 0)
870 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
871 return Changed;
872 }
873
finishLayout(MCAssembler const & Asm,MCAsmLayout & Layout) const874 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
875 MCAsmLayout &Layout) const {
876 // See if we can further relax some instructions to cut down on the number of
877 // nop bytes required for code alignment. The actual win is in reducing
878 // instruction count, not number of bytes. Modern X86-64 can easily end up
879 // decode limited. It is often better to reduce the number of instructions
880 // (i.e. eliminate nops) even at the cost of increasing the size and
881 // complexity of others.
882 if (!X86PadForAlign && !X86PadForBranchAlign)
883 return;
884
885 // The processed regions are delimitered by LabeledFragments. -g may have more
886 // MCSymbols and therefore different relaxation results. X86PadForAlign is
887 // disabled by default to eliminate the -g vs non -g difference.
888 DenseSet<MCFragment *> LabeledFragments;
889 for (const MCSymbol &S : Asm.symbols())
890 LabeledFragments.insert(S.getFragment(false));
891
892 for (MCSection &Sec : Asm) {
893 if (!Sec.getKind().isText())
894 continue;
895
896 SmallVector<MCRelaxableFragment *, 4> Relaxable;
897 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
898 MCFragment &F = *I;
899
900 if (LabeledFragments.count(&F))
901 Relaxable.clear();
902
903 if (F.getKind() == MCFragment::FT_Data ||
904 F.getKind() == MCFragment::FT_CompactEncodedInst)
905 // Skip and ignore
906 continue;
907
908 if (F.getKind() == MCFragment::FT_Relaxable) {
909 auto &RF = cast<MCRelaxableFragment>(*I);
910 Relaxable.push_back(&RF);
911 continue;
912 }
913
914 auto canHandle = [](MCFragment &F) -> bool {
915 switch (F.getKind()) {
916 default:
917 return false;
918 case MCFragment::FT_Align:
919 return X86PadForAlign;
920 case MCFragment::FT_BoundaryAlign:
921 return X86PadForBranchAlign;
922 }
923 };
924 // For any unhandled kind, assume we can't change layout.
925 if (!canHandle(F)) {
926 Relaxable.clear();
927 continue;
928 }
929
930 #ifndef NDEBUG
931 const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
932 #endif
933 const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
934
935 // To keep the effects local, prefer to relax instructions closest to
936 // the align directive. This is purely about human understandability
937 // of the resulting code. If we later find a reason to expand
938 // particular instructions over others, we can adjust.
939 MCFragment *FirstChangedFragment = nullptr;
940 unsigned RemainingSize = OrigSize;
941 while (!Relaxable.empty() && RemainingSize != 0) {
942 auto &RF = *Relaxable.pop_back_val();
943 // Give the backend a chance to play any tricks it wishes to increase
944 // the encoding size of the given instruction. Target independent code
945 // will try further relaxation, but target's may play further tricks.
946 if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
947 FirstChangedFragment = &RF;
948
949 // If we have an instruction which hasn't been fully relaxed, we can't
950 // skip past it and insert bytes before it. Changing its starting
951 // offset might require a larger negative offset than it can encode.
952 // We don't need to worry about larger positive offsets as none of the
953 // possible offsets between this and our align are visible, and the
954 // ones afterwards aren't changing.
955 if (!isFullyRelaxed(RF))
956 break;
957 }
958 Relaxable.clear();
959
960 if (FirstChangedFragment) {
961 // Make sure the offsets for any fragments in the effected range get
962 // updated. Note that this (conservatively) invalidates the offsets of
963 // those following, but this is not required.
964 Layout.invalidateFragmentsFrom(FirstChangedFragment);
965 }
966
967 // BoundaryAlign explicitly tracks it's size (unlike align)
968 if (F.getKind() == MCFragment::FT_BoundaryAlign)
969 cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
970
971 #ifndef NDEBUG
972 const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
973 const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
974 assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
975 "can't move start of next fragment!");
976 assert(FinalSize == RemainingSize && "inconsistent size computation?");
977 #endif
978
979 // If we're looking at a boundary align, make sure we don't try to pad
980 // its target instructions for some following directive. Doing so would
981 // break the alignment of the current boundary align.
982 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
983 const MCFragment *LastFragment = BF->getLastFragment();
984 if (!LastFragment)
985 continue;
986 while (&*I != LastFragment)
987 ++I;
988 }
989 }
990 }
991
992 // The layout is done. Mark every fragment as valid.
993 for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
994 MCSection &Section = *Layout.getSectionOrder()[i];
995 Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
996 Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
997 }
998 }
999
getMaximumNopSize(const MCSubtargetInfo & STI) const1000 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
1001 if (STI.hasFeature(X86::Is16Bit))
1002 return 4;
1003 if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
1004 return 1;
1005 if (STI.getFeatureBits()[X86::TuningFast7ByteNOP])
1006 return 7;
1007 if (STI.getFeatureBits()[X86::TuningFast15ByteNOP])
1008 return 15;
1009 if (STI.getFeatureBits()[X86::TuningFast11ByteNOP])
1010 return 11;
1011 // FIXME: handle 32-bit mode
1012 // 15-bytes is the longest single NOP instruction, but 10-bytes is
1013 // commonly the longest that can be efficiently decoded.
1014 return 10;
1015 }
1016
1017 /// Write a sequence of optimal nops to the output, covering \p Count
1018 /// bytes.
1019 /// \return - true on success, false on failure
writeNopData(raw_ostream & OS,uint64_t Count,const MCSubtargetInfo * STI) const1020 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
1021 const MCSubtargetInfo *STI) const {
1022 static const char Nops32Bit[10][11] = {
1023 // nop
1024 "\x90",
1025 // xchg %ax,%ax
1026 "\x66\x90",
1027 // nopl (%[re]ax)
1028 "\x0f\x1f\x00",
1029 // nopl 0(%[re]ax)
1030 "\x0f\x1f\x40\x00",
1031 // nopl 0(%[re]ax,%[re]ax,1)
1032 "\x0f\x1f\x44\x00\x00",
1033 // nopw 0(%[re]ax,%[re]ax,1)
1034 "\x66\x0f\x1f\x44\x00\x00",
1035 // nopl 0L(%[re]ax)
1036 "\x0f\x1f\x80\x00\x00\x00\x00",
1037 // nopl 0L(%[re]ax,%[re]ax,1)
1038 "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1039 // nopw 0L(%[re]ax,%[re]ax,1)
1040 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1041 // nopw %cs:0L(%[re]ax,%[re]ax,1)
1042 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1043 };
1044
1045 // 16-bit mode uses different nop patterns than 32-bit.
1046 static const char Nops16Bit[4][11] = {
1047 // nop
1048 "\x90",
1049 // xchg %eax,%eax
1050 "\x66\x90",
1051 // lea 0(%si),%si
1052 "\x8d\x74\x00",
1053 // lea 0w(%si),%si
1054 "\x8d\xb4\x00\x00",
1055 };
1056
1057 const char(*Nops)[11] =
1058 STI->getFeatureBits()[X86::Is16Bit] ? Nops16Bit : Nops32Bit;
1059
1060 uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
1061
1062 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1063 // length.
1064 do {
1065 const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1066 const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1067 for (uint8_t i = 0; i < Prefixes; i++)
1068 OS << '\x66';
1069 const uint8_t Rest = ThisNopLength - Prefixes;
1070 if (Rest != 0)
1071 OS.write(Nops[Rest - 1], Rest);
1072 Count -= ThisNopLength;
1073 } while (Count != 0);
1074
1075 return true;
1076 }
1077
1078 /* *** */
1079
1080 namespace {
1081
1082 class ELFX86AsmBackend : public X86AsmBackend {
1083 public:
1084 uint8_t OSABI;
ELFX86AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1085 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1086 : X86AsmBackend(T, STI), OSABI(OSABI) {}
1087 };
1088
1089 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1090 public:
ELFX86_32AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1091 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1092 const MCSubtargetInfo &STI)
1093 : ELFX86AsmBackend(T, OSABI, STI) {}
1094
1095 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1096 createObjectTargetWriter() const override {
1097 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1098 }
1099 };
1100
1101 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1102 public:
ELFX86_X32AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1103 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1104 const MCSubtargetInfo &STI)
1105 : ELFX86AsmBackend(T, OSABI, STI) {}
1106
1107 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1108 createObjectTargetWriter() const override {
1109 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1110 ELF::EM_X86_64);
1111 }
1112 };
1113
1114 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1115 public:
ELFX86_IAMCUAsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1116 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1117 const MCSubtargetInfo &STI)
1118 : ELFX86AsmBackend(T, OSABI, STI) {}
1119
1120 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1121 createObjectTargetWriter() const override {
1122 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1123 ELF::EM_IAMCU);
1124 }
1125 };
1126
1127 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1128 public:
ELFX86_64AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1129 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1130 const MCSubtargetInfo &STI)
1131 : ELFX86AsmBackend(T, OSABI, STI) {}
1132
1133 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1134 createObjectTargetWriter() const override {
1135 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1136 }
1137 };
1138
1139 class WindowsX86AsmBackend : public X86AsmBackend {
1140 bool Is64Bit;
1141
1142 public:
WindowsX86AsmBackend(const Target & T,bool is64Bit,const MCSubtargetInfo & STI)1143 WindowsX86AsmBackend(const Target &T, bool is64Bit,
1144 const MCSubtargetInfo &STI)
1145 : X86AsmBackend(T, STI)
1146 , Is64Bit(is64Bit) {
1147 }
1148
getFixupKind(StringRef Name) const1149 Optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1150 return StringSwitch<Optional<MCFixupKind>>(Name)
1151 .Case("dir32", FK_Data_4)
1152 .Case("secrel32", FK_SecRel_4)
1153 .Case("secidx", FK_SecRel_2)
1154 .Default(MCAsmBackend::getFixupKind(Name));
1155 }
1156
1157 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1158 createObjectTargetWriter() const override {
1159 return createX86WinCOFFObjectWriter(Is64Bit);
1160 }
1161 };
1162
1163 namespace CU {
1164
1165 /// Compact unwind encoding values.
1166 enum CompactUnwindEncodings {
1167 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1168 /// the return address, then [RE]SP is moved to [RE]BP.
1169 UNWIND_MODE_BP_FRAME = 0x01000000,
1170
1171 /// A frameless function with a small constant stack size.
1172 UNWIND_MODE_STACK_IMMD = 0x02000000,
1173
1174 /// A frameless function with a large constant stack size.
1175 UNWIND_MODE_STACK_IND = 0x03000000,
1176
1177 /// No compact unwind encoding is available.
1178 UNWIND_MODE_DWARF = 0x04000000,
1179
1180 /// Mask for encoding the frame registers.
1181 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF,
1182
1183 /// Mask for encoding the frameless registers.
1184 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1185 };
1186
1187 } // namespace CU
1188
1189 class DarwinX86AsmBackend : public X86AsmBackend {
1190 const MCRegisterInfo &MRI;
1191
1192 /// Number of registers that can be saved in a compact unwind encoding.
1193 enum { CU_NUM_SAVED_REGS = 6 };
1194
1195 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1196 Triple TT;
1197 bool Is64Bit;
1198
1199 unsigned OffsetSize; ///< Offset of a "push" instruction.
1200 unsigned MoveInstrSize; ///< Size of a "move" instruction.
1201 unsigned StackDivide; ///< Amount to adjust stack size by.
1202 protected:
1203 /// Size of a "push" instruction for the given register.
PushInstrSize(unsigned Reg) const1204 unsigned PushInstrSize(unsigned Reg) const {
1205 switch (Reg) {
1206 case X86::EBX:
1207 case X86::ECX:
1208 case X86::EDX:
1209 case X86::EDI:
1210 case X86::ESI:
1211 case X86::EBP:
1212 case X86::RBX:
1213 case X86::RBP:
1214 return 1;
1215 case X86::R12:
1216 case X86::R13:
1217 case X86::R14:
1218 case X86::R15:
1219 return 2;
1220 }
1221 return 1;
1222 }
1223
1224 private:
1225 /// Get the compact unwind number for a given register. The number
1226 /// corresponds to the enum lists in compact_unwind_encoding.h.
getCompactUnwindRegNum(unsigned Reg) const1227 int getCompactUnwindRegNum(unsigned Reg) const {
1228 static const MCPhysReg CU32BitRegs[7] = {
1229 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1230 };
1231 static const MCPhysReg CU64BitRegs[] = {
1232 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1233 };
1234 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1235 for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1236 if (*CURegs == Reg)
1237 return Idx;
1238
1239 return -1;
1240 }
1241
1242 /// Return the registers encoded for a compact encoding with a frame
1243 /// pointer.
encodeCompactUnwindRegistersWithFrame() const1244 uint32_t encodeCompactUnwindRegistersWithFrame() const {
1245 // Encode the registers in the order they were saved --- 3-bits per
1246 // register. The list of saved registers is assumed to be in reverse
1247 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1248 uint32_t RegEnc = 0;
1249 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1250 unsigned Reg = SavedRegs[i];
1251 if (Reg == 0) break;
1252
1253 int CURegNum = getCompactUnwindRegNum(Reg);
1254 if (CURegNum == -1) return ~0U;
1255
1256 // Encode the 3-bit register number in order, skipping over 3-bits for
1257 // each register.
1258 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1259 }
1260
1261 assert((RegEnc & 0x3FFFF) == RegEnc &&
1262 "Invalid compact register encoding!");
1263 return RegEnc;
1264 }
1265
1266 /// Create the permutation encoding used with frameless stacks. It is
1267 /// passed the number of registers to be saved and an array of the registers
1268 /// saved.
encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const1269 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1270 // The saved registers are numbered from 1 to 6. In order to encode the
1271 // order in which they were saved, we re-number them according to their
1272 // place in the register order. The re-numbering is relative to the last
1273 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1274 // that order:
1275 //
1276 // Orig Re-Num
1277 // ---- ------
1278 // 6 6
1279 // 2 2
1280 // 4 3
1281 // 5 3
1282 //
1283 for (unsigned i = 0; i < RegCount; ++i) {
1284 int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1285 if (CUReg == -1) return ~0U;
1286 SavedRegs[i] = CUReg;
1287 }
1288
1289 // Reverse the list.
1290 std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1291
1292 uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1293 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1294 unsigned Countless = 0;
1295 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1296 if (SavedRegs[j] < SavedRegs[i])
1297 ++Countless;
1298
1299 RenumRegs[i] = SavedRegs[i] - Countless - 1;
1300 }
1301
1302 // Take the renumbered values and encode them into a 10-bit number.
1303 uint32_t permutationEncoding = 0;
1304 switch (RegCount) {
1305 case 6:
1306 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1307 + 6 * RenumRegs[2] + 2 * RenumRegs[3]
1308 + RenumRegs[4];
1309 break;
1310 case 5:
1311 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1312 + 6 * RenumRegs[3] + 2 * RenumRegs[4]
1313 + RenumRegs[5];
1314 break;
1315 case 4:
1316 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
1317 + 3 * RenumRegs[4] + RenumRegs[5];
1318 break;
1319 case 3:
1320 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
1321 + RenumRegs[5];
1322 break;
1323 case 2:
1324 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
1325 break;
1326 case 1:
1327 permutationEncoding |= RenumRegs[5];
1328 break;
1329 }
1330
1331 assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1332 "Invalid compact register encoding!");
1333 return permutationEncoding;
1334 }
1335
1336 public:
DarwinX86AsmBackend(const Target & T,const MCRegisterInfo & MRI,const MCSubtargetInfo & STI)1337 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1338 const MCSubtargetInfo &STI)
1339 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1340 Is64Bit(TT.isArch64Bit()) {
1341 memset(SavedRegs, 0, sizeof(SavedRegs));
1342 OffsetSize = Is64Bit ? 8 : 4;
1343 MoveInstrSize = Is64Bit ? 3 : 2;
1344 StackDivide = Is64Bit ? 8 : 4;
1345 }
1346
1347 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1348 createObjectTargetWriter() const override {
1349 uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1350 uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1351 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1352 }
1353
1354 /// Implementation of algorithm to generate the compact unwind encoding
1355 /// for the CFI instructions.
1356 uint32_t
generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const1357 generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override {
1358 if (Instrs.empty()) return 0;
1359
1360 // Reset the saved registers.
1361 unsigned SavedRegIdx = 0;
1362 memset(SavedRegs, 0, sizeof(SavedRegs));
1363
1364 bool HasFP = false;
1365
1366 // Encode that we are using EBP/RBP as the frame pointer.
1367 uint32_t CompactUnwindEncoding = 0;
1368
1369 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1370 unsigned InstrOffset = 0;
1371 unsigned StackAdjust = 0;
1372 unsigned StackSize = 0;
1373 int MinAbsOffset = std::numeric_limits<int>::max();
1374
1375 for (const MCCFIInstruction &Inst : Instrs) {
1376 switch (Inst.getOperation()) {
1377 default:
1378 // Any other CFI directives indicate a frame that we aren't prepared
1379 // to represent via compact unwind, so just bail out.
1380 return CU::UNWIND_MODE_DWARF;
1381 case MCCFIInstruction::OpDefCfaRegister: {
1382 // Defines a frame pointer. E.g.
1383 //
1384 // movq %rsp, %rbp
1385 // L0:
1386 // .cfi_def_cfa_register %rbp
1387 //
1388 HasFP = true;
1389
1390 // If the frame pointer is other than esp/rsp, we do not have a way to
1391 // generate a compact unwinding representation, so bail out.
1392 if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1393 (Is64Bit ? X86::RBP : X86::EBP))
1394 return CU::UNWIND_MODE_DWARF;
1395
1396 // Reset the counts.
1397 memset(SavedRegs, 0, sizeof(SavedRegs));
1398 StackAdjust = 0;
1399 SavedRegIdx = 0;
1400 MinAbsOffset = std::numeric_limits<int>::max();
1401 InstrOffset += MoveInstrSize;
1402 break;
1403 }
1404 case MCCFIInstruction::OpDefCfaOffset: {
1405 // Defines a new offset for the CFA. E.g.
1406 //
1407 // With frame:
1408 //
1409 // pushq %rbp
1410 // L0:
1411 // .cfi_def_cfa_offset 16
1412 //
1413 // Without frame:
1414 //
1415 // subq $72, %rsp
1416 // L0:
1417 // .cfi_def_cfa_offset 80
1418 //
1419 StackSize = Inst.getOffset() / StackDivide;
1420 break;
1421 }
1422 case MCCFIInstruction::OpOffset: {
1423 // Defines a "push" of a callee-saved register. E.g.
1424 //
1425 // pushq %r15
1426 // pushq %r14
1427 // pushq %rbx
1428 // L0:
1429 // subq $120, %rsp
1430 // L1:
1431 // .cfi_offset %rbx, -40
1432 // .cfi_offset %r14, -32
1433 // .cfi_offset %r15, -24
1434 //
1435 if (SavedRegIdx == CU_NUM_SAVED_REGS)
1436 // If there are too many saved registers, we cannot use a compact
1437 // unwind encoding.
1438 return CU::UNWIND_MODE_DWARF;
1439
1440 unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1441 SavedRegs[SavedRegIdx++] = Reg;
1442 StackAdjust += OffsetSize;
1443 MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset()));
1444 InstrOffset += PushInstrSize(Reg);
1445 break;
1446 }
1447 }
1448 }
1449
1450 StackAdjust /= StackDivide;
1451
1452 if (HasFP) {
1453 if ((StackAdjust & 0xFF) != StackAdjust)
1454 // Offset was too big for a compact unwind encoding.
1455 return CU::UNWIND_MODE_DWARF;
1456
1457 // We don't attempt to track a real StackAdjust, so if the saved registers
1458 // aren't adjacent to rbp we can't cope.
1459 if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1460 return CU::UNWIND_MODE_DWARF;
1461
1462 // Get the encoding of the saved registers when we have a frame pointer.
1463 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1464 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1465
1466 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1467 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1468 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1469 } else {
1470 SubtractInstrIdx += InstrOffset;
1471 ++StackAdjust;
1472
1473 if ((StackSize & 0xFF) == StackSize) {
1474 // Frameless stack with a small stack size.
1475 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1476
1477 // Encode the stack size.
1478 CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1479 } else {
1480 if ((StackAdjust & 0x7) != StackAdjust)
1481 // The extra stack adjustments are too big for us to handle.
1482 return CU::UNWIND_MODE_DWARF;
1483
1484 // Frameless stack with an offset too large for us to encode compactly.
1485 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1486
1487 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1488 // instruction.
1489 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1490
1491 // Encode any extra stack adjustments (done via push instructions).
1492 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1493 }
1494
1495 // Encode the number of registers saved. (Reverse the list first.)
1496 std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1497 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1498
1499 // Get the encoding of the saved registers when we don't have a frame
1500 // pointer.
1501 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1502 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1503
1504 // Encode the register encoding.
1505 CompactUnwindEncoding |=
1506 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1507 }
1508
1509 return CompactUnwindEncoding;
1510 }
1511 };
1512
1513 } // end anonymous namespace
1514
createX86_32AsmBackend(const Target & T,const MCSubtargetInfo & STI,const MCRegisterInfo & MRI,const MCTargetOptions & Options)1515 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1516 const MCSubtargetInfo &STI,
1517 const MCRegisterInfo &MRI,
1518 const MCTargetOptions &Options) {
1519 const Triple &TheTriple = STI.getTargetTriple();
1520 if (TheTriple.isOSBinFormatMachO())
1521 return new DarwinX86AsmBackend(T, MRI, STI);
1522
1523 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1524 return new WindowsX86AsmBackend(T, false, STI);
1525
1526 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1527
1528 if (TheTriple.isOSIAMCU())
1529 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1530
1531 return new ELFX86_32AsmBackend(T, OSABI, STI);
1532 }
1533
createX86_64AsmBackend(const Target & T,const MCSubtargetInfo & STI,const MCRegisterInfo & MRI,const MCTargetOptions & Options)1534 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1535 const MCSubtargetInfo &STI,
1536 const MCRegisterInfo &MRI,
1537 const MCTargetOptions &Options) {
1538 const Triple &TheTriple = STI.getTargetTriple();
1539 if (TheTriple.isOSBinFormatMachO())
1540 return new DarwinX86AsmBackend(T, MRI, STI);
1541
1542 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1543 return new WindowsX86AsmBackend(T, true, STI);
1544
1545 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1546
1547 if (TheTriple.isX32())
1548 return new ELFX86_X32AsmBackend(T, OSABI, STI);
1549 return new ELFX86_64AsmBackend(T, OSABI, STI);
1550 }
1551