1 //===- bolt/Target/AArch64/AArch64MCPlusBuilder.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides AArch64-specific MCPlus builder.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "MCTargetDesc/AArch64AddressingModes.h"
14 #include "MCTargetDesc/AArch64MCExpr.h"
15 #include "MCTargetDesc/AArch64MCTargetDesc.h"
16 #include "Utils/AArch64BaseInfo.h"
17 #include "bolt/Core/MCPlusBuilder.h"
18 #include "llvm/BinaryFormat/ELF.h"
19 #include "llvm/MC/MCInstrInfo.h"
20 #include "llvm/MC/MCRegisterInfo.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/ErrorHandling.h"
23 
24 #define DEBUG_TYPE "mcplus"
25 
26 using namespace llvm;
27 using namespace bolt;
28 
29 namespace {
30 
31 class AArch64MCPlusBuilder : public MCPlusBuilder {
32 public:
AArch64MCPlusBuilder(const MCInstrAnalysis * Analysis,const MCInstrInfo * Info,const MCRegisterInfo * RegInfo)33   AArch64MCPlusBuilder(const MCInstrAnalysis *Analysis, const MCInstrInfo *Info,
34                        const MCRegisterInfo *RegInfo)
35       : MCPlusBuilder(Analysis, Info, RegInfo) {}
36 
equals(const MCTargetExpr & A,const MCTargetExpr & B,CompFuncTy Comp) const37   bool equals(const MCTargetExpr &A, const MCTargetExpr &B,
38               CompFuncTy Comp) const override {
39     const auto &AArch64ExprA = cast<AArch64MCExpr>(A);
40     const auto &AArch64ExprB = cast<AArch64MCExpr>(B);
41     if (AArch64ExprA.getKind() != AArch64ExprB.getKind())
42       return false;
43 
44     return MCPlusBuilder::equals(*AArch64ExprA.getSubExpr(),
45                                  *AArch64ExprB.getSubExpr(), Comp);
46   }
47 
hasEVEXEncoding(const MCInst &) const48   bool hasEVEXEncoding(const MCInst &) const override { return false; }
49 
isMacroOpFusionPair(ArrayRef<MCInst> Insts) const50   bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
51     return false;
52   }
53 
shortenInstruction(MCInst &,const MCSubtargetInfo &) const54   bool shortenInstruction(MCInst &, const MCSubtargetInfo &) const override {
55     return false;
56   }
57 
isADRP(const MCInst & Inst) const58   bool isADRP(const MCInst &Inst) const override {
59     return Inst.getOpcode() == AArch64::ADRP;
60   }
61 
isADR(const MCInst & Inst) const62   bool isADR(const MCInst &Inst) const override {
63     return Inst.getOpcode() == AArch64::ADR;
64   }
65 
getADRReg(const MCInst & Inst,MCPhysReg & RegName) const66   void getADRReg(const MCInst &Inst, MCPhysReg &RegName) const override {
67     assert((isADR(Inst) || isADRP(Inst)) && "Not an ADR instruction");
68     assert(MCPlus::getNumPrimeOperands(Inst) != 0 &&
69            "No operands for ADR instruction");
70     assert(Inst.getOperand(0).isReg() &&
71            "Unexpected operand in ADR instruction");
72     RegName = Inst.getOperand(0).getReg();
73   }
74 
isTB(const MCInst & Inst) const75   bool isTB(const MCInst &Inst) const {
76     return (Inst.getOpcode() == AArch64::TBNZW ||
77             Inst.getOpcode() == AArch64::TBNZX ||
78             Inst.getOpcode() == AArch64::TBZW ||
79             Inst.getOpcode() == AArch64::TBZX);
80   }
81 
isCB(const MCInst & Inst) const82   bool isCB(const MCInst &Inst) const {
83     return (Inst.getOpcode() == AArch64::CBNZW ||
84             Inst.getOpcode() == AArch64::CBNZX ||
85             Inst.getOpcode() == AArch64::CBZW ||
86             Inst.getOpcode() == AArch64::CBZX);
87   }
88 
isMOVW(const MCInst & Inst) const89   bool isMOVW(const MCInst &Inst) const {
90     return (Inst.getOpcode() == AArch64::MOVKWi ||
91             Inst.getOpcode() == AArch64::MOVKXi ||
92             Inst.getOpcode() == AArch64::MOVNWi ||
93             Inst.getOpcode() == AArch64::MOVNXi ||
94             Inst.getOpcode() == AArch64::MOVZXi ||
95             Inst.getOpcode() == AArch64::MOVZWi);
96   }
97 
isADD(const MCInst & Inst) const98   bool isADD(const MCInst &Inst) const {
99     return (Inst.getOpcode() == AArch64::ADDSWri ||
100             Inst.getOpcode() == AArch64::ADDSWrr ||
101             Inst.getOpcode() == AArch64::ADDSWrs ||
102             Inst.getOpcode() == AArch64::ADDSWrx ||
103             Inst.getOpcode() == AArch64::ADDSXri ||
104             Inst.getOpcode() == AArch64::ADDSXrr ||
105             Inst.getOpcode() == AArch64::ADDSXrs ||
106             Inst.getOpcode() == AArch64::ADDSXrx ||
107             Inst.getOpcode() == AArch64::ADDSXrx64 ||
108             Inst.getOpcode() == AArch64::ADDWri ||
109             Inst.getOpcode() == AArch64::ADDWrr ||
110             Inst.getOpcode() == AArch64::ADDWrs ||
111             Inst.getOpcode() == AArch64::ADDWrx ||
112             Inst.getOpcode() == AArch64::ADDXri ||
113             Inst.getOpcode() == AArch64::ADDXrr ||
114             Inst.getOpcode() == AArch64::ADDXrs ||
115             Inst.getOpcode() == AArch64::ADDXrx ||
116             Inst.getOpcode() == AArch64::ADDXrx64);
117   }
118 
isLDRB(const MCInst & Inst) const119   bool isLDRB(const MCInst &Inst) const {
120     return (Inst.getOpcode() == AArch64::LDRBBpost ||
121             Inst.getOpcode() == AArch64::LDRBBpre ||
122             Inst.getOpcode() == AArch64::LDRBBroW ||
123             Inst.getOpcode() == AArch64::LDRBBroX ||
124             Inst.getOpcode() == AArch64::LDRBBui ||
125             Inst.getOpcode() == AArch64::LDRSBWpost ||
126             Inst.getOpcode() == AArch64::LDRSBWpre ||
127             Inst.getOpcode() == AArch64::LDRSBWroW ||
128             Inst.getOpcode() == AArch64::LDRSBWroX ||
129             Inst.getOpcode() == AArch64::LDRSBWui ||
130             Inst.getOpcode() == AArch64::LDRSBXpost ||
131             Inst.getOpcode() == AArch64::LDRSBXpre ||
132             Inst.getOpcode() == AArch64::LDRSBXroW ||
133             Inst.getOpcode() == AArch64::LDRSBXroX ||
134             Inst.getOpcode() == AArch64::LDRSBXui);
135   }
136 
isLDRH(const MCInst & Inst) const137   bool isLDRH(const MCInst &Inst) const {
138     return (Inst.getOpcode() == AArch64::LDRHHpost ||
139             Inst.getOpcode() == AArch64::LDRHHpre ||
140             Inst.getOpcode() == AArch64::LDRHHroW ||
141             Inst.getOpcode() == AArch64::LDRHHroX ||
142             Inst.getOpcode() == AArch64::LDRHHui ||
143             Inst.getOpcode() == AArch64::LDRSHWpost ||
144             Inst.getOpcode() == AArch64::LDRSHWpre ||
145             Inst.getOpcode() == AArch64::LDRSHWroW ||
146             Inst.getOpcode() == AArch64::LDRSHWroX ||
147             Inst.getOpcode() == AArch64::LDRSHWui ||
148             Inst.getOpcode() == AArch64::LDRSHXpost ||
149             Inst.getOpcode() == AArch64::LDRSHXpre ||
150             Inst.getOpcode() == AArch64::LDRSHXroW ||
151             Inst.getOpcode() == AArch64::LDRSHXroX ||
152             Inst.getOpcode() == AArch64::LDRSHXui);
153   }
154 
isLDRW(const MCInst & Inst) const155   bool isLDRW(const MCInst &Inst) const {
156     return (Inst.getOpcode() == AArch64::LDRWpost ||
157             Inst.getOpcode() == AArch64::LDRWpre ||
158             Inst.getOpcode() == AArch64::LDRWroW ||
159             Inst.getOpcode() == AArch64::LDRWroX ||
160             Inst.getOpcode() == AArch64::LDRWui);
161   }
162 
isLDRX(const MCInst & Inst) const163   bool isLDRX(const MCInst &Inst) const {
164     return (Inst.getOpcode() == AArch64::LDRXpost ||
165             Inst.getOpcode() == AArch64::LDRXpre ||
166             Inst.getOpcode() == AArch64::LDRXroW ||
167             Inst.getOpcode() == AArch64::LDRXroX ||
168             Inst.getOpcode() == AArch64::LDRXui);
169   }
170 
isLoad(const MCInst & Inst) const171   bool isLoad(const MCInst &Inst) const override {
172     return isLDRB(Inst) || isLDRH(Inst) || isLDRW(Inst) || isLDRX(Inst);
173   }
174 
isLoadFromStack(const MCInst & Inst) const175   bool isLoadFromStack(const MCInst &Inst) const {
176     if (!isLoad(Inst))
177       return false;
178     const MCInstrDesc &InstInfo = Info->get(Inst.getOpcode());
179     unsigned NumDefs = InstInfo.getNumDefs();
180     for (unsigned I = NumDefs, E = InstInfo.getNumOperands(); I < E; ++I) {
181       const MCOperand &Operand = Inst.getOperand(I);
182       if (!Operand.isReg())
183         continue;
184       unsigned Reg = Operand.getReg();
185       if (Reg == AArch64::SP || Reg == AArch64::WSP || Reg == AArch64::FP ||
186           Reg == AArch64::W29)
187         return true;
188     }
189     return false;
190   }
191 
isRegToRegMove(const MCInst & Inst,MCPhysReg & From,MCPhysReg & To) const192   bool isRegToRegMove(const MCInst &Inst, MCPhysReg &From,
193                       MCPhysReg &To) const override {
194     if (Inst.getOpcode() != AArch64::ORRXrs)
195       return false;
196     if (Inst.getOperand(1).getReg() != AArch64::XZR)
197       return false;
198     if (Inst.getOperand(3).getImm() != 0)
199       return false;
200     From = Inst.getOperand(2).getReg();
201     To = Inst.getOperand(0).getReg();
202     return true;
203   }
204 
isIndirectCall(const MCInst & Inst) const205   bool isIndirectCall(const MCInst &Inst) const override {
206     return Inst.getOpcode() == AArch64::BLR;
207   }
208 
hasPCRelOperand(const MCInst & Inst) const209   bool hasPCRelOperand(const MCInst &Inst) const override {
210     // ADRP is blacklisted and is an exception. Even though it has a
211     // PC-relative operand, this operand is not a complete symbol reference
212     // and BOLT shouldn't try to process it in isolation.
213     if (isADRP(Inst))
214       return false;
215 
216     if (isADR(Inst))
217       return true;
218 
219     // Look for literal addressing mode (see C1-143 ARM DDI 0487B.a)
220     const MCInstrDesc &MCII = Info->get(Inst.getOpcode());
221     for (unsigned I = 0, E = MCII.getNumOperands(); I != E; ++I)
222       if (MCII.OpInfo[I].OperandType == MCOI::OPERAND_PCREL)
223         return true;
224 
225     return false;
226   }
227 
evaluateADR(const MCInst & Inst,int64_t & Imm,const MCExpr ** DispExpr) const228   bool evaluateADR(const MCInst &Inst, int64_t &Imm,
229                    const MCExpr **DispExpr) const {
230     assert((isADR(Inst) || isADRP(Inst)) && "Not an ADR instruction");
231 
232     const MCOperand &Label = Inst.getOperand(1);
233     if (!Label.isImm()) {
234       assert(Label.isExpr() && "Unexpected ADR operand");
235       assert(DispExpr && "DispExpr must be set");
236       *DispExpr = Label.getExpr();
237       return false;
238     }
239 
240     if (Inst.getOpcode() == AArch64::ADR) {
241       Imm = Label.getImm();
242       return true;
243     }
244     Imm = Label.getImm() << 12;
245     return true;
246   }
247 
evaluateAArch64MemoryOperand(const MCInst & Inst,int64_t & DispImm,const MCExpr ** DispExpr=nullptr) const248   bool evaluateAArch64MemoryOperand(const MCInst &Inst, int64_t &DispImm,
249                                     const MCExpr **DispExpr = nullptr) const {
250     if (isADR(Inst) || isADRP(Inst))
251       return evaluateADR(Inst, DispImm, DispExpr);
252 
253     // Literal addressing mode
254     const MCInstrDesc &MCII = Info->get(Inst.getOpcode());
255     for (unsigned I = 0, E = MCII.getNumOperands(); I != E; ++I) {
256       if (MCII.OpInfo[I].OperandType != MCOI::OPERAND_PCREL)
257         continue;
258 
259       if (!Inst.getOperand(I).isImm()) {
260         assert(Inst.getOperand(I).isExpr() && "Unexpected PCREL operand");
261         assert(DispExpr && "DispExpr must be set");
262         *DispExpr = Inst.getOperand(I).getExpr();
263         return true;
264       }
265 
266       DispImm = Inst.getOperand(I).getImm() << 2;
267       return true;
268     }
269     return false;
270   }
271 
evaluateMemOperandTarget(const MCInst & Inst,uint64_t & Target,uint64_t Address,uint64_t Size) const272   bool evaluateMemOperandTarget(const MCInst &Inst, uint64_t &Target,
273                                 uint64_t Address,
274                                 uint64_t Size) const override {
275     int64_t DispValue;
276     const MCExpr *DispExpr = nullptr;
277     if (!evaluateAArch64MemoryOperand(Inst, DispValue, &DispExpr))
278       return false;
279 
280     // Make sure it's a well-formed addressing we can statically evaluate.
281     if (DispExpr)
282       return false;
283 
284     Target = DispValue;
285     if (Inst.getOpcode() == AArch64::ADRP)
286       Target += Address & ~0xFFFULL;
287     else
288       Target += Address;
289     return true;
290   }
291 
replaceMemOperandDisp(MCInst & Inst,MCOperand Operand) const292   bool replaceMemOperandDisp(MCInst &Inst, MCOperand Operand) const override {
293     MCInst::iterator OI = Inst.begin();
294     if (isADR(Inst) || isADRP(Inst)) {
295       assert(MCPlus::getNumPrimeOperands(Inst) >= 2 &&
296              "Unexpected number of operands");
297       ++OI;
298     } else {
299       const MCInstrDesc &MCII = Info->get(Inst.getOpcode());
300       for (unsigned I = 0, E = MCII.getNumOperands(); I != E; ++I) {
301         if (MCII.OpInfo[I].OperandType == MCOI::OPERAND_PCREL)
302           break;
303         ++OI;
304       }
305       assert(OI != Inst.end() && "Literal operand not found");
306     }
307     *OI = Operand;
308     return true;
309   }
310 
getTargetExprFor(MCInst & Inst,const MCExpr * Expr,MCContext & Ctx,uint64_t RelType) const311   const MCExpr *getTargetExprFor(MCInst &Inst, const MCExpr *Expr,
312                                  MCContext &Ctx,
313                                  uint64_t RelType) const override {
314 
315     if (isADR(Inst) || RelType == ELF::R_AARCH64_ADR_PREL_LO21 ||
316         RelType == ELF::R_AARCH64_TLSDESC_ADR_PREL21) {
317       return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS, Ctx);
318     } else if (isADRP(Inst) || RelType == ELF::R_AARCH64_ADR_PREL_PG_HI21 ||
319                RelType == ELF::R_AARCH64_ADR_PREL_PG_HI21_NC ||
320                RelType == ELF::R_AARCH64_TLSDESC_ADR_PAGE21 ||
321                RelType == ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 ||
322                RelType == ELF::R_AARCH64_ADR_GOT_PAGE) {
323       // Never emit a GOT reloc, we handled this in
324       // RewriteInstance::readRelocations().
325       return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_PAGE, Ctx);
326     } else {
327       switch (RelType) {
328       case ELF::R_AARCH64_ADD_ABS_LO12_NC:
329       case ELF::R_AARCH64_LD64_GOT_LO12_NC:
330       case ELF::R_AARCH64_LDST8_ABS_LO12_NC:
331       case ELF::R_AARCH64_LDST16_ABS_LO12_NC:
332       case ELF::R_AARCH64_LDST32_ABS_LO12_NC:
333       case ELF::R_AARCH64_LDST64_ABS_LO12_NC:
334       case ELF::R_AARCH64_LDST128_ABS_LO12_NC:
335       case ELF::R_AARCH64_TLSDESC_ADD_LO12:
336       case ELF::R_AARCH64_TLSDESC_LD64_LO12:
337       case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
338       case ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
339         return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_LO12, Ctx);
340       case ELF::R_AARCH64_MOVW_UABS_G3:
341         return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_G3, Ctx);
342       case ELF::R_AARCH64_MOVW_UABS_G2:
343       case ELF::R_AARCH64_MOVW_UABS_G2_NC:
344         return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_G2_NC, Ctx);
345       case ELF::R_AARCH64_MOVW_UABS_G1:
346       case ELF::R_AARCH64_MOVW_UABS_G1_NC:
347         return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_G1_NC, Ctx);
348       case ELF::R_AARCH64_MOVW_UABS_G0:
349       case ELF::R_AARCH64_MOVW_UABS_G0_NC:
350         return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_G0_NC, Ctx);
351       default:
352         break;
353       }
354     }
355     return Expr;
356   }
357 
getSymbolRefOperandNum(const MCInst & Inst,unsigned & OpNum) const358   bool getSymbolRefOperandNum(const MCInst &Inst, unsigned &OpNum) const {
359     if (OpNum >= MCPlus::getNumPrimeOperands(Inst))
360       return false;
361 
362     // Auto-select correct operand number
363     if (OpNum == 0) {
364       if (isConditionalBranch(Inst) || isADR(Inst) || isADRP(Inst))
365         OpNum = 1;
366       if (isTB(Inst))
367         OpNum = 2;
368       if (isMOVW(Inst))
369         OpNum = 1;
370     }
371 
372     return true;
373   }
374 
getTargetSymbol(const MCExpr * Expr) const375   const MCSymbol *getTargetSymbol(const MCExpr *Expr) const override {
376     auto *AArchExpr = dyn_cast<AArch64MCExpr>(Expr);
377     if (AArchExpr && AArchExpr->getSubExpr())
378       return getTargetSymbol(AArchExpr->getSubExpr());
379 
380     auto *BinExpr = dyn_cast<MCBinaryExpr>(Expr);
381     if (BinExpr)
382       return getTargetSymbol(BinExpr->getLHS());
383 
384     auto *SymExpr = dyn_cast<MCSymbolRefExpr>(Expr);
385     if (SymExpr && SymExpr->getKind() == MCSymbolRefExpr::VK_None)
386       return &SymExpr->getSymbol();
387 
388     return nullptr;
389   }
390 
getTargetSymbol(const MCInst & Inst,unsigned OpNum=0) const391   const MCSymbol *getTargetSymbol(const MCInst &Inst,
392                                   unsigned OpNum = 0) const override {
393     if (!getSymbolRefOperandNum(Inst, OpNum))
394       return nullptr;
395 
396     const MCOperand &Op = Inst.getOperand(OpNum);
397     if (!Op.isExpr())
398       return nullptr;
399 
400     return getTargetSymbol(Op.getExpr());
401   }
402 
getTargetAddend(const MCExpr * Expr) const403   int64_t getTargetAddend(const MCExpr *Expr) const override {
404     auto *AArchExpr = dyn_cast<AArch64MCExpr>(Expr);
405     if (AArchExpr && AArchExpr->getSubExpr())
406       return getTargetAddend(AArchExpr->getSubExpr());
407 
408     auto *BinExpr = dyn_cast<MCBinaryExpr>(Expr);
409     if (BinExpr && BinExpr->getOpcode() == MCBinaryExpr::Add)
410       return getTargetAddend(BinExpr->getRHS());
411 
412     auto *ConstExpr = dyn_cast<MCConstantExpr>(Expr);
413     if (ConstExpr)
414       return ConstExpr->getValue();
415 
416     return 0;
417   }
418 
getTargetAddend(const MCInst & Inst,unsigned OpNum=0) const419   int64_t getTargetAddend(const MCInst &Inst,
420                           unsigned OpNum = 0) const override {
421     if (!getSymbolRefOperandNum(Inst, OpNum))
422       return 0;
423 
424     const MCOperand &Op = Inst.getOperand(OpNum);
425     if (!Op.isExpr())
426       return 0;
427 
428     return getTargetAddend(Op.getExpr());
429   }
430 
evaluateBranch(const MCInst & Inst,uint64_t Addr,uint64_t Size,uint64_t & Target) const431   bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
432                       uint64_t &Target) const override {
433     size_t OpNum = 0;
434 
435     if (isConditionalBranch(Inst)) {
436       assert(MCPlus::getNumPrimeOperands(Inst) >= 2 &&
437              "Invalid number of operands");
438       OpNum = 1;
439     }
440 
441     if (isTB(Inst)) {
442       assert(MCPlus::getNumPrimeOperands(Inst) >= 3 &&
443              "Invalid number of operands");
444       OpNum = 2;
445     }
446 
447     if (Info->get(Inst.getOpcode()).OpInfo[OpNum].OperandType !=
448         MCOI::OPERAND_PCREL) {
449       assert((isIndirectBranch(Inst) || isIndirectCall(Inst)) &&
450              "FAILED evaluateBranch");
451       return false;
452     }
453 
454     int64_t Imm = Inst.getOperand(OpNum).getImm() << 2;
455     Target = Addr + Imm;
456     return true;
457   }
458 
replaceBranchTarget(MCInst & Inst,const MCSymbol * TBB,MCContext * Ctx) const459   bool replaceBranchTarget(MCInst &Inst, const MCSymbol *TBB,
460                            MCContext *Ctx) const override {
461     assert((isCall(Inst) || isBranch(Inst)) && !isIndirectBranch(Inst) &&
462            "Invalid instruction");
463     assert(MCPlus::getNumPrimeOperands(Inst) >= 1 &&
464            "Invalid number of operands");
465     MCInst::iterator OI = Inst.begin();
466 
467     if (isConditionalBranch(Inst)) {
468       assert(MCPlus::getNumPrimeOperands(Inst) >= 2 &&
469              "Invalid number of operands");
470       ++OI;
471     }
472 
473     if (isTB(Inst)) {
474       assert(MCPlus::getNumPrimeOperands(Inst) >= 3 &&
475              "Invalid number of operands");
476       OI = Inst.begin() + 2;
477     }
478 
479     *OI = MCOperand::createExpr(
480         MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx));
481     return true;
482   }
483 
484   /// Matches indirect branch patterns in AArch64 related to a jump table (JT),
485   /// helping us to build the complete CFG. A typical indirect branch to
486   /// a jump table entry in AArch64 looks like the following:
487   ///
488   ///   adrp    x1, #-7585792           # Get JT Page location
489   ///   add     x1, x1, #692            # Complement with JT Page offset
490   ///   ldrh    w0, [x1, w0, uxtw #1]   # Loads JT entry
491   ///   adr     x1, #12                 # Get PC + 12 (end of this BB) used next
492   ///   add     x0, x1, w0, sxth #2     # Finish building branch target
493   ///                                   # (entries in JT are relative to the end
494   ///                                   #  of this BB)
495   ///   br      x0                      # Indirect jump instruction
496   ///
analyzeIndirectBranchFragment(const MCInst & Inst,DenseMap<const MCInst *,SmallVector<MCInst *,4>> & UDChain,const MCExpr * & JumpTable,int64_t & Offset,int64_t & ScaleValue,MCInst * & PCRelBase) const497   bool analyzeIndirectBranchFragment(
498       const MCInst &Inst,
499       DenseMap<const MCInst *, SmallVector<MCInst *, 4>> &UDChain,
500       const MCExpr *&JumpTable, int64_t &Offset, int64_t &ScaleValue,
501       MCInst *&PCRelBase) const {
502     // Expect AArch64 BR
503     assert(Inst.getOpcode() == AArch64::BR && "Unexpected opcode");
504 
505     // Match the indirect branch pattern for aarch64
506     SmallVector<MCInst *, 4> &UsesRoot = UDChain[&Inst];
507     if (UsesRoot.size() == 0 || UsesRoot[0] == nullptr)
508       return false;
509 
510     const MCInst *DefAdd = UsesRoot[0];
511 
512     // Now we match an ADD
513     if (!isADD(*DefAdd)) {
514       // If the address is not broken up in two parts, this is not branching
515       // according to a jump table entry. Fail.
516       return false;
517     }
518     if (DefAdd->getOpcode() == AArch64::ADDXri) {
519       // This can happen when there is no offset, but a direct jump that was
520       // transformed into an indirect one  (indirect tail call) :
521       //   ADRP   x2, Perl_re_compiler
522       //   ADD    x2, x2, :lo12:Perl_re_compiler
523       //   BR     x2
524       return false;
525     }
526     if (DefAdd->getOpcode() == AArch64::ADDXrs) {
527       // Covers the less common pattern where JT entries are relative to
528       // the JT itself (like x86). Seems less efficient since we can't
529       // assume the JT is aligned at 4B boundary and thus drop 2 bits from
530       // JT values.
531       // cde264:
532       //    adrp    x12, #21544960  ; 216a000
533       //    add     x12, x12, #1696 ; 216a6a0  (JT object in .rodata)
534       //    ldrsw   x8, [x12, x8, lsl #2]   --> loads e.g. 0xfeb73bd8
535       //  * add     x8, x8, x12   --> = cde278, next block
536       //    br      x8
537       // cde278:
538       //
539       // Parsed as ADDXrs reg:x8 reg:x8 reg:x12 imm:0
540       return false;
541     }
542     assert(DefAdd->getOpcode() == AArch64::ADDXrx &&
543            "Failed to match indirect branch!");
544 
545     // Validate ADD operands
546     int64_t OperandExtension = DefAdd->getOperand(3).getImm();
547     unsigned ShiftVal = AArch64_AM::getArithShiftValue(OperandExtension);
548     AArch64_AM::ShiftExtendType ExtendType =
549         AArch64_AM::getArithExtendType(OperandExtension);
550     if (ShiftVal != 2)
551       llvm_unreachable("Failed to match indirect branch! (fragment 2)");
552 
553     if (ExtendType == AArch64_AM::SXTB)
554       ScaleValue = 1LL;
555     else if (ExtendType == AArch64_AM::SXTH)
556       ScaleValue = 2LL;
557     else if (ExtendType == AArch64_AM::SXTW)
558       ScaleValue = 4LL;
559     else
560       llvm_unreachable("Failed to match indirect branch! (fragment 3)");
561 
562     // Match an ADR to load base address to be used when addressing JT targets
563     SmallVector<MCInst *, 4> &UsesAdd = UDChain[DefAdd];
564     if (UsesAdd.size() <= 1 || UsesAdd[1] == nullptr || UsesAdd[2] == nullptr) {
565       // This happens when we don't have enough context about this jump table
566       // because the jumping code sequence was split in multiple basic blocks.
567       // This was observed in the wild in HHVM code (dispatchImpl).
568       return false;
569     }
570     MCInst *DefBaseAddr = UsesAdd[1];
571     assert(DefBaseAddr->getOpcode() == AArch64::ADR &&
572            "Failed to match indirect branch pattern! (fragment 3)");
573 
574     PCRelBase = DefBaseAddr;
575     // Match LOAD to load the jump table (relative) target
576     const MCInst *DefLoad = UsesAdd[2];
577     assert(isLoad(*DefLoad) &&
578            "Failed to match indirect branch load pattern! (1)");
579     assert((ScaleValue != 1LL || isLDRB(*DefLoad)) &&
580            "Failed to match indirect branch load pattern! (2)");
581     assert((ScaleValue != 2LL || isLDRH(*DefLoad)) &&
582            "Failed to match indirect branch load pattern! (3)");
583 
584     // Match ADD that calculates the JumpTable Base Address (not the offset)
585     SmallVector<MCInst *, 4> &UsesLoad = UDChain[DefLoad];
586     const MCInst *DefJTBaseAdd = UsesLoad[1];
587     MCPhysReg From, To;
588     if (DefJTBaseAdd == nullptr || isLoadFromStack(*DefJTBaseAdd) ||
589         isRegToRegMove(*DefJTBaseAdd, From, To)) {
590       // Sometimes base address may have been defined in another basic block
591       // (hoisted). Return with no jump table info.
592       JumpTable = nullptr;
593       return true;
594     }
595 
596     assert(DefJTBaseAdd->getOpcode() == AArch64::ADDXri &&
597            "Failed to match jump table base address pattern! (1)");
598 
599     if (DefJTBaseAdd->getOperand(2).isImm())
600       Offset = DefJTBaseAdd->getOperand(2).getImm();
601     SmallVector<MCInst *, 4> &UsesJTBaseAdd = UDChain[DefJTBaseAdd];
602     const MCInst *DefJTBasePage = UsesJTBaseAdd[1];
603     if (DefJTBasePage == nullptr || isLoadFromStack(*DefJTBasePage)) {
604       JumpTable = nullptr;
605       return true;
606     }
607     assert(DefJTBasePage->getOpcode() == AArch64::ADRP &&
608            "Failed to match jump table base page pattern! (2)");
609     if (DefJTBasePage->getOperand(1).isExpr())
610       JumpTable = DefJTBasePage->getOperand(1).getExpr();
611     return true;
612   }
613 
614   DenseMap<const MCInst *, SmallVector<MCInst *, 4>>
computeLocalUDChain(const MCInst * CurInstr,InstructionIterator Begin,InstructionIterator End) const615   computeLocalUDChain(const MCInst *CurInstr, InstructionIterator Begin,
616                       InstructionIterator End) const {
617     DenseMap<int, MCInst *> RegAliasTable;
618     DenseMap<const MCInst *, SmallVector<MCInst *, 4>> Uses;
619 
620     auto addInstrOperands = [&](const MCInst &Instr) {
621       // Update Uses table
622       for (const MCOperand &Operand : MCPlus::primeOperands(Instr)) {
623         if (!Operand.isReg())
624           continue;
625         unsigned Reg = Operand.getReg();
626         MCInst *AliasInst = RegAliasTable[Reg];
627         Uses[&Instr].push_back(AliasInst);
628         LLVM_DEBUG({
629           dbgs() << "Adding reg operand " << Reg << " refs ";
630           if (AliasInst != nullptr)
631             AliasInst->dump();
632           else
633             dbgs() << "\n";
634         });
635       }
636     };
637 
638     LLVM_DEBUG(dbgs() << "computeLocalUDChain\n");
639     bool TerminatorSeen = false;
640     for (auto II = Begin; II != End; ++II) {
641       MCInst &Instr = *II;
642       // Ignore nops and CFIs
643       if (isPseudo(Instr) || isNoop(Instr))
644         continue;
645       if (TerminatorSeen) {
646         RegAliasTable.clear();
647         Uses.clear();
648       }
649 
650       LLVM_DEBUG(dbgs() << "Now updating for:\n ");
651       LLVM_DEBUG(Instr.dump());
652       addInstrOperands(Instr);
653 
654       BitVector Regs = BitVector(RegInfo->getNumRegs(), false);
655       getWrittenRegs(Instr, Regs);
656 
657       // Update register definitions after this point
658       for (int Idx : Regs.set_bits()) {
659         RegAliasTable[Idx] = &Instr;
660         LLVM_DEBUG(dbgs() << "Setting reg " << Idx
661                           << " def to current instr.\n");
662       }
663 
664       TerminatorSeen = isTerminator(Instr);
665     }
666 
667     // Process the last instruction, which is not currently added into the
668     // instruction stream
669     if (CurInstr)
670       addInstrOperands(*CurInstr);
671 
672     return Uses;
673   }
674 
analyzeIndirectBranch(MCInst & Instruction,InstructionIterator Begin,InstructionIterator End,const unsigned PtrSize,MCInst * & MemLocInstrOut,unsigned & BaseRegNumOut,unsigned & IndexRegNumOut,int64_t & DispValueOut,const MCExpr * & DispExprOut,MCInst * & PCRelBaseOut) const675   IndirectBranchType analyzeIndirectBranch(
676       MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
677       const unsigned PtrSize, MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
678       unsigned &IndexRegNumOut, int64_t &DispValueOut,
679       const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut) const override {
680     MemLocInstrOut = nullptr;
681     BaseRegNumOut = AArch64::NoRegister;
682     IndexRegNumOut = AArch64::NoRegister;
683     DispValueOut = 0;
684     DispExprOut = nullptr;
685 
686     // An instruction referencing memory used by jump instruction (directly or
687     // via register). This location could be an array of function pointers
688     // in case of indirect tail call, or a jump table.
689     MCInst *MemLocInstr = nullptr;
690 
691     // Analyze the memory location.
692     int64_t ScaleValue, DispValue;
693     const MCExpr *DispExpr;
694 
695     DenseMap<const MCInst *, SmallVector<llvm::MCInst *, 4>> UDChain =
696         computeLocalUDChain(&Instruction, Begin, End);
697     MCInst *PCRelBase;
698     if (!analyzeIndirectBranchFragment(Instruction, UDChain, DispExpr,
699                                        DispValue, ScaleValue, PCRelBase))
700       return IndirectBranchType::UNKNOWN;
701 
702     MemLocInstrOut = MemLocInstr;
703     DispValueOut = DispValue;
704     DispExprOut = DispExpr;
705     PCRelBaseOut = PCRelBase;
706     return IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE;
707   }
708 
709   ///  Matches PLT entry pattern and returns the associated GOT entry address.
710   ///  Typical PLT entry looks like the following:
711   ///
712   ///    adrp    x16, 230000
713   ///    ldr     x17, [x16, #3040]
714   ///    add     x16, x16, #0xbe0
715   ///    br      x17
716   ///
analyzePLTEntry(MCInst & Instruction,InstructionIterator Begin,InstructionIterator End,uint64_t BeginPC) const717   uint64_t analyzePLTEntry(MCInst &Instruction, InstructionIterator Begin,
718                            InstructionIterator End,
719                            uint64_t BeginPC) const override {
720     // Check branch instruction
721     MCInst *Branch = &Instruction;
722     assert(Branch->getOpcode() == AArch64::BR && "Unexpected opcode");
723 
724     DenseMap<const MCInst *, SmallVector<llvm::MCInst *, 4>> UDChain =
725         computeLocalUDChain(Branch, Begin, End);
726 
727     // Match ldr instruction
728     SmallVector<MCInst *, 4> &BranchUses = UDChain[Branch];
729     if (BranchUses.size() < 1 || BranchUses[0] == nullptr)
730       return 0;
731 
732     // Check ldr instruction
733     const MCInst *Ldr = BranchUses[0];
734     if (Ldr->getOpcode() != AArch64::LDRXui)
735       return 0;
736 
737     // Get ldr value
738     const unsigned ScaleLdr = 8; // LDRX operates on 8 bytes segments
739     assert(Ldr->getOperand(2).isImm() && "Unexpected ldr operand");
740     const uint64_t Offset = Ldr->getOperand(2).getImm() * ScaleLdr;
741 
742     // Match adrp instruction
743     SmallVector<MCInst *, 4> &LdrUses = UDChain[Ldr];
744     if (LdrUses.size() < 2 || LdrUses[1] == nullptr)
745       return 0;
746 
747     // Check adrp instruction
748     MCInst *Adrp = LdrUses[1];
749     if (Adrp->getOpcode() != AArch64::ADRP)
750       return 0;
751 
752     // Get adrp instruction PC
753     const unsigned InstSize = 4;
754     uint64_t AdrpPC = BeginPC;
755     for (InstructionIterator It = Begin; It != End; ++It) {
756       if (&(*It) == Adrp)
757         break;
758       AdrpPC += InstSize;
759     }
760 
761     // Get adrp value
762     uint64_t Base;
763     assert(Adrp->getOperand(1).isImm() && "Unexpected adrp operand");
764     bool Ret = evaluateMemOperandTarget(*Adrp, Base, AdrpPC, InstSize);
765     assert(Ret && "Failed to evaluate adrp");
766     (void)Ret;
767 
768     return Base + Offset;
769   }
770 
getInvertedBranchOpcode(unsigned Opcode) const771   unsigned getInvertedBranchOpcode(unsigned Opcode) const {
772     switch (Opcode) {
773     default:
774       llvm_unreachable("Failed to invert branch opcode");
775       return Opcode;
776     case AArch64::TBZW:     return AArch64::TBNZW;
777     case AArch64::TBZX:     return AArch64::TBNZX;
778     case AArch64::TBNZW:    return AArch64::TBZW;
779     case AArch64::TBNZX:    return AArch64::TBZX;
780     case AArch64::CBZW:     return AArch64::CBNZW;
781     case AArch64::CBZX:     return AArch64::CBNZX;
782     case AArch64::CBNZW:    return AArch64::CBZW;
783     case AArch64::CBNZX:    return AArch64::CBZX;
784     }
785   }
786 
getCondCode(const MCInst & Inst) const787   unsigned getCondCode(const MCInst &Inst) const override {
788     // AArch64 does not use conditional codes, so we just return the opcode
789     // of the conditional branch here.
790     return Inst.getOpcode();
791   }
792 
getCanonicalBranchCondCode(unsigned Opcode) const793   unsigned getCanonicalBranchCondCode(unsigned Opcode) const override {
794     switch (Opcode) {
795     default:
796       return Opcode;
797     case AArch64::TBNZW:    return AArch64::TBZW;
798     case AArch64::TBNZX:    return AArch64::TBZX;
799     case AArch64::CBNZW:    return AArch64::CBZW;
800     case AArch64::CBNZX:    return AArch64::CBZX;
801     }
802   }
803 
reverseBranchCondition(MCInst & Inst,const MCSymbol * TBB,MCContext * Ctx) const804   bool reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
805                               MCContext *Ctx) const override {
806     if (isTB(Inst) || isCB(Inst)) {
807       Inst.setOpcode(getInvertedBranchOpcode(Inst.getOpcode()));
808       assert(Inst.getOpcode() != 0 && "Invalid branch instruction");
809     } else if (Inst.getOpcode() == AArch64::Bcc) {
810       Inst.getOperand(0).setImm(AArch64CC::getInvertedCondCode(
811           static_cast<AArch64CC::CondCode>(Inst.getOperand(0).getImm())));
812       assert(Inst.getOperand(0).getImm() != AArch64CC::AL &&
813              Inst.getOperand(0).getImm() != AArch64CC::NV &&
814              "Can't reverse ALWAYS cond code");
815     } else {
816       LLVM_DEBUG(Inst.dump());
817       llvm_unreachable("Unrecognized branch instruction");
818     }
819     return replaceBranchTarget(Inst, TBB, Ctx);
820   }
821 
getPCRelEncodingSize(const MCInst & Inst) const822   int getPCRelEncodingSize(const MCInst &Inst) const override {
823     switch (Inst.getOpcode()) {
824     default:
825       llvm_unreachable("Failed to get pcrel encoding size");
826       return 0;
827     case AArch64::TBZW:     return 16;
828     case AArch64::TBZX:     return 16;
829     case AArch64::TBNZW:    return 16;
830     case AArch64::TBNZX:    return 16;
831     case AArch64::CBZW:     return 21;
832     case AArch64::CBZX:     return 21;
833     case AArch64::CBNZW:    return 21;
834     case AArch64::CBNZX:    return 21;
835     case AArch64::B:        return 28;
836     case AArch64::BL:       return 28;
837     case AArch64::Bcc:      return 21;
838     }
839   }
840 
getShortJmpEncodingSize() const841   int getShortJmpEncodingSize() const override { return 33; }
842 
getUncondBranchEncodingSize() const843   int getUncondBranchEncodingSize() const override { return 28; }
844 
createTailCall(MCInst & Inst,const MCSymbol * Target,MCContext * Ctx)845   bool createTailCall(MCInst &Inst, const MCSymbol *Target,
846                       MCContext *Ctx) override {
847     Inst.setOpcode(AArch64::B);
848     Inst.addOperand(MCOperand::createExpr(getTargetExprFor(
849         Inst, MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx),
850         *Ctx, 0)));
851     setTailCall(Inst);
852     return true;
853   }
854 
createLongTailCall(InstructionListType & Seq,const MCSymbol * Target,MCContext * Ctx)855   void createLongTailCall(InstructionListType &Seq, const MCSymbol *Target,
856                           MCContext *Ctx) override {
857     createShortJmp(Seq, Target, Ctx, /*IsTailCall*/ true);
858   }
859 
createTrap(MCInst & Inst) const860   bool createTrap(MCInst &Inst) const override {
861     Inst.clear();
862     Inst.setOpcode(AArch64::BRK);
863     Inst.addOperand(MCOperand::createImm(1));
864     return true;
865   }
866 
convertJmpToTailCall(MCInst & Inst)867   bool convertJmpToTailCall(MCInst &Inst) override {
868     setTailCall(Inst);
869     return true;
870   }
871 
convertTailCallToJmp(MCInst & Inst)872   bool convertTailCallToJmp(MCInst &Inst) override {
873     removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall);
874     clearOffset(Inst);
875     if (getConditionalTailCall(Inst))
876       unsetConditionalTailCall(Inst);
877     return true;
878   }
879 
lowerTailCall(MCInst & Inst)880   bool lowerTailCall(MCInst &Inst) override {
881     removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall);
882     if (getConditionalTailCall(Inst))
883       unsetConditionalTailCall(Inst);
884     return true;
885   }
886 
isNoop(const MCInst & Inst) const887   bool isNoop(const MCInst &Inst) const override {
888     return Inst.getOpcode() == AArch64::HINT &&
889            Inst.getOperand(0).getImm() == 0;
890   }
891 
createNoop(MCInst & Inst) const892   bool createNoop(MCInst &Inst) const override {
893     Inst.setOpcode(AArch64::HINT);
894     Inst.clear();
895     Inst.addOperand(MCOperand::createImm(0));
896     return true;
897   }
898 
isStore(const MCInst & Inst) const899   bool isStore(const MCInst &Inst) const override { return false; }
900 
analyzeBranch(InstructionIterator Begin,InstructionIterator End,const MCSymbol * & TBB,const MCSymbol * & FBB,MCInst * & CondBranch,MCInst * & UncondBranch) const901   bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
902                      const MCSymbol *&TBB, const MCSymbol *&FBB,
903                      MCInst *&CondBranch,
904                      MCInst *&UncondBranch) const override {
905     auto I = End;
906 
907     while (I != Begin) {
908       --I;
909 
910       // Ignore nops and CFIs
911       if (isPseudo(*I) || isNoop(*I))
912         continue;
913 
914       // Stop when we find the first non-terminator
915       if (!isTerminator(*I) || isTailCall(*I) || !isBranch(*I))
916         break;
917 
918       // Handle unconditional branches.
919       if (isUnconditionalBranch(*I)) {
920         // If any code was seen after this unconditional branch, we've seen
921         // unreachable code. Ignore them.
922         CondBranch = nullptr;
923         UncondBranch = &*I;
924         const MCSymbol *Sym = getTargetSymbol(*I);
925         assert(Sym != nullptr &&
926                "Couldn't extract BB symbol from jump operand");
927         TBB = Sym;
928         continue;
929       }
930 
931       // Handle conditional branches and ignore indirect branches
932       if (isIndirectBranch(*I))
933         return false;
934 
935       if (CondBranch == nullptr) {
936         const MCSymbol *TargetBB = getTargetSymbol(*I);
937         if (TargetBB == nullptr) {
938           // Unrecognized branch target
939           return false;
940         }
941         FBB = TBB;
942         TBB = TargetBB;
943         CondBranch = &*I;
944         continue;
945       }
946 
947       llvm_unreachable("multiple conditional branches in one BB");
948     }
949     return true;
950   }
951 
createLongJmp(InstructionListType & Seq,const MCSymbol * Target,MCContext * Ctx,bool IsTailCall)952   void createLongJmp(InstructionListType &Seq, const MCSymbol *Target,
953                      MCContext *Ctx, bool IsTailCall) override {
954     // ip0 (r16) is reserved to the linker (refer to 5.3.1.1 of "Procedure Call
955     //   Standard for the ARM 64-bit Architecture (AArch64)".
956     // The sequence of instructions we create here is the following:
957     //  movz ip0, #:abs_g3:<addr>
958     //  movk ip0, #:abs_g2_nc:<addr>
959     //  movk ip0, #:abs_g1_nc:<addr>
960     //  movk ip0, #:abs_g0_nc:<addr>
961     //  br ip0
962     MCInst Inst;
963     Inst.setOpcode(AArch64::MOVZXi);
964     Inst.addOperand(MCOperand::createReg(AArch64::X16));
965     Inst.addOperand(MCOperand::createExpr(AArch64MCExpr::create(
966         MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx),
967         AArch64MCExpr::VK_ABS_G3, *Ctx)));
968     Inst.addOperand(MCOperand::createImm(0x30));
969     Seq.emplace_back(Inst);
970 
971     Inst.clear();
972     Inst.setOpcode(AArch64::MOVKXi);
973     Inst.addOperand(MCOperand::createReg(AArch64::X16));
974     Inst.addOperand(MCOperand::createReg(AArch64::X16));
975     Inst.addOperand(MCOperand::createExpr(AArch64MCExpr::create(
976         MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx),
977         AArch64MCExpr::VK_ABS_G2_NC, *Ctx)));
978     Inst.addOperand(MCOperand::createImm(0x20));
979     Seq.emplace_back(Inst);
980 
981     Inst.clear();
982     Inst.setOpcode(AArch64::MOVKXi);
983     Inst.addOperand(MCOperand::createReg(AArch64::X16));
984     Inst.addOperand(MCOperand::createReg(AArch64::X16));
985     Inst.addOperand(MCOperand::createExpr(AArch64MCExpr::create(
986         MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx),
987         AArch64MCExpr::VK_ABS_G1_NC, *Ctx)));
988     Inst.addOperand(MCOperand::createImm(0x10));
989     Seq.emplace_back(Inst);
990 
991     Inst.clear();
992     Inst.setOpcode(AArch64::MOVKXi);
993     Inst.addOperand(MCOperand::createReg(AArch64::X16));
994     Inst.addOperand(MCOperand::createReg(AArch64::X16));
995     Inst.addOperand(MCOperand::createExpr(AArch64MCExpr::create(
996         MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx),
997         AArch64MCExpr::VK_ABS_G0_NC, *Ctx)));
998     Inst.addOperand(MCOperand::createImm(0));
999     Seq.emplace_back(Inst);
1000 
1001     Inst.clear();
1002     Inst.setOpcode(AArch64::BR);
1003     Inst.addOperand(MCOperand::createReg(AArch64::X16));
1004     if (IsTailCall)
1005       setTailCall(Inst);
1006     Seq.emplace_back(Inst);
1007   }
1008 
createShortJmp(InstructionListType & Seq,const MCSymbol * Target,MCContext * Ctx,bool IsTailCall)1009   void createShortJmp(InstructionListType &Seq, const MCSymbol *Target,
1010                       MCContext *Ctx, bool IsTailCall) override {
1011     // ip0 (r16) is reserved to the linker (refer to 5.3.1.1 of "Procedure Call
1012     //   Standard for the ARM 64-bit Architecture (AArch64)".
1013     // The sequence of instructions we create here is the following:
1014     //  adrp ip0, imm
1015     //  add ip0, ip0, imm
1016     //  br ip0
1017     MCPhysReg Reg = AArch64::X16;
1018     InstructionListType Insts = materializeAddress(Target, Ctx, Reg);
1019     Insts.emplace_back();
1020     MCInst &Inst = Insts.back();
1021     Inst.clear();
1022     Inst.setOpcode(AArch64::BR);
1023     Inst.addOperand(MCOperand::createReg(Reg));
1024     if (IsTailCall)
1025       setTailCall(Inst);
1026     Seq.swap(Insts);
1027   }
1028 
1029   /// Matching pattern here is
1030   ///
1031   ///    ADRP  x16, imm
1032   ///    ADD   x16, x16, imm
1033   ///    BR    x16
1034   ///
matchLinkerVeneer(InstructionIterator Begin,InstructionIterator End,uint64_t Address,const MCInst & CurInst,MCInst * & TargetHiBits,MCInst * & TargetLowBits,uint64_t & Target) const1035   uint64_t matchLinkerVeneer(InstructionIterator Begin, InstructionIterator End,
1036                              uint64_t Address, const MCInst &CurInst,
1037                              MCInst *&TargetHiBits, MCInst *&TargetLowBits,
1038                              uint64_t &Target) const override {
1039     if (CurInst.getOpcode() != AArch64::BR || !CurInst.getOperand(0).isReg() ||
1040         CurInst.getOperand(0).getReg() != AArch64::X16)
1041       return 0;
1042 
1043     auto I = End;
1044     if (I == Begin)
1045       return 0;
1046 
1047     --I;
1048     Address -= 4;
1049     if (I == Begin || I->getOpcode() != AArch64::ADDXri ||
1050         MCPlus::getNumPrimeOperands(*I) < 3 || !I->getOperand(0).isReg() ||
1051         !I->getOperand(1).isReg() ||
1052         I->getOperand(0).getReg() != AArch64::X16 ||
1053         I->getOperand(1).getReg() != AArch64::X16 || !I->getOperand(2).isImm())
1054       return 0;
1055     TargetLowBits = &*I;
1056     uint64_t Addr = I->getOperand(2).getImm() & 0xFFF;
1057 
1058     --I;
1059     Address -= 4;
1060     if (I->getOpcode() != AArch64::ADRP ||
1061         MCPlus::getNumPrimeOperands(*I) < 2 || !I->getOperand(0).isReg() ||
1062         !I->getOperand(1).isImm() || I->getOperand(0).getReg() != AArch64::X16)
1063       return 0;
1064     TargetHiBits = &*I;
1065     Addr |= (Address + ((int64_t)I->getOperand(1).getImm() << 12)) &
1066             0xFFFFFFFFFFFFF000ULL;
1067     Target = Addr;
1068     return 3;
1069   }
1070 
replaceImmWithSymbolRef(MCInst & Inst,const MCSymbol * Symbol,int64_t Addend,MCContext * Ctx,int64_t & Value,uint64_t RelType) const1071   bool replaceImmWithSymbolRef(MCInst &Inst, const MCSymbol *Symbol,
1072                                int64_t Addend, MCContext *Ctx, int64_t &Value,
1073                                uint64_t RelType) const override {
1074     unsigned ImmOpNo = -1U;
1075     for (unsigned Index = 0; Index < MCPlus::getNumPrimeOperands(Inst);
1076          ++Index) {
1077       if (Inst.getOperand(Index).isImm()) {
1078         ImmOpNo = Index;
1079         break;
1080       }
1081     }
1082     if (ImmOpNo == -1U)
1083       return false;
1084 
1085     Value = Inst.getOperand(ImmOpNo).getImm();
1086 
1087     setOperandToSymbolRef(Inst, ImmOpNo, Symbol, Addend, Ctx, RelType);
1088 
1089     return true;
1090   }
1091 
createUncondBranch(MCInst & Inst,const MCSymbol * TBB,MCContext * Ctx) const1092   bool createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
1093                           MCContext *Ctx) const override {
1094     Inst.setOpcode(AArch64::B);
1095     Inst.clear();
1096     Inst.addOperand(MCOperand::createExpr(getTargetExprFor(
1097         Inst, MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx),
1098         *Ctx, 0)));
1099     return true;
1100   }
1101 
isMoveMem2Reg(const MCInst & Inst) const1102   bool isMoveMem2Reg(const MCInst &Inst) const override { return false; }
1103 
isLeave(const MCInst & Inst) const1104   bool isLeave(const MCInst &Inst) const override { return false; }
1105 
isPop(const MCInst & Inst) const1106   bool isPop(const MCInst &Inst) const override { return false; }
1107 
isPrefix(const MCInst & Inst) const1108   bool isPrefix(const MCInst &Inst) const override { return false; }
1109 
createReturn(MCInst & Inst) const1110   bool createReturn(MCInst &Inst) const override {
1111     Inst.setOpcode(AArch64::RET);
1112     Inst.clear();
1113     Inst.addOperand(MCOperand::createReg(AArch64::LR));
1114     return true;
1115   }
1116 
materializeAddress(const MCSymbol * Target,MCContext * Ctx,MCPhysReg RegName,int64_t Addend=0) const1117   InstructionListType materializeAddress(const MCSymbol *Target, MCContext *Ctx,
1118                                          MCPhysReg RegName,
1119                                          int64_t Addend = 0) const override {
1120     // Get page-aligned address and add page offset
1121     InstructionListType Insts(2);
1122     Insts[0].setOpcode(AArch64::ADRP);
1123     Insts[0].clear();
1124     Insts[0].addOperand(MCOperand::createReg(RegName));
1125     Insts[0].addOperand(MCOperand::createImm(0));
1126     setOperandToSymbolRef(Insts[0], /* OpNum */ 1, Target, Addend, Ctx,
1127                           ELF::R_AARCH64_NONE);
1128     Insts[1].setOpcode(AArch64::ADDXri);
1129     Insts[1].clear();
1130     Insts[1].addOperand(MCOperand::createReg(RegName));
1131     Insts[1].addOperand(MCOperand::createReg(RegName));
1132     Insts[1].addOperand(MCOperand::createImm(0));
1133     Insts[1].addOperand(MCOperand::createImm(0));
1134     setOperandToSymbolRef(Insts[1], /* OpNum */ 2, Target, Addend, Ctx,
1135                           ELF::R_AARCH64_ADD_ABS_LO12_NC);
1136     return Insts;
1137   }
1138 };
1139 
1140 } // end anonymous namespace
1141 
1142 namespace llvm {
1143 namespace bolt {
1144 
createAArch64MCPlusBuilder(const MCInstrAnalysis * Analysis,const MCInstrInfo * Info,const MCRegisterInfo * RegInfo)1145 MCPlusBuilder *createAArch64MCPlusBuilder(const MCInstrAnalysis *Analysis,
1146                                           const MCInstrInfo *Info,
1147                                           const MCRegisterInfo *RegInfo) {
1148   return new AArch64MCPlusBuilder(Analysis, Info, RegInfo);
1149 }
1150 
1151 } // namespace bolt
1152 } // namespace llvm
1153