1 //===- bolt/Target/AArch64/AArch64MCPlusBuilder.cpp -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides AArch64-specific MCPlus builder. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MCTargetDesc/AArch64AddressingModes.h" 14 #include "MCTargetDesc/AArch64MCExpr.h" 15 #include "MCTargetDesc/AArch64MCTargetDesc.h" 16 #include "Utils/AArch64BaseInfo.h" 17 #include "bolt/Core/MCPlusBuilder.h" 18 #include "llvm/BinaryFormat/ELF.h" 19 #include "llvm/MC/MCInstrInfo.h" 20 #include "llvm/MC/MCRegisterInfo.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/ErrorHandling.h" 23 24 #define DEBUG_TYPE "mcplus" 25 26 using namespace llvm; 27 using namespace bolt; 28 29 namespace { 30 31 class AArch64MCPlusBuilder : public MCPlusBuilder { 32 public: 33 AArch64MCPlusBuilder(const MCInstrAnalysis *Analysis, const MCInstrInfo *Info, 34 const MCRegisterInfo *RegInfo) 35 : MCPlusBuilder(Analysis, Info, RegInfo) {} 36 37 bool equals(const MCTargetExpr &A, const MCTargetExpr &B, 38 CompFuncTy Comp) const override { 39 const auto &AArch64ExprA = cast<AArch64MCExpr>(A); 40 const auto &AArch64ExprB = cast<AArch64MCExpr>(B); 41 if (AArch64ExprA.getKind() != AArch64ExprB.getKind()) 42 return false; 43 44 return MCPlusBuilder::equals(*AArch64ExprA.getSubExpr(), 45 *AArch64ExprB.getSubExpr(), Comp); 46 } 47 48 bool hasEVEXEncoding(const MCInst &) const override { return false; } 49 50 bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override { 51 return false; 52 } 53 54 bool shortenInstruction(MCInst &) const override { return false; } 55 56 bool isADRP(const MCInst &Inst) const override { 57 return Inst.getOpcode() == AArch64::ADRP; 58 } 59 60 bool isADR(const MCInst &Inst) const override { 61 return Inst.getOpcode() == AArch64::ADR; 62 } 63 64 void getADRReg(const MCInst &Inst, MCPhysReg &RegName) const override { 65 assert((isADR(Inst) || isADRP(Inst)) && "Not an ADR instruction"); 66 assert(MCPlus::getNumPrimeOperands(Inst) != 0 && 67 "No operands for ADR instruction"); 68 assert(Inst.getOperand(0).isReg() && 69 "Unexpected operand in ADR instruction"); 70 RegName = Inst.getOperand(0).getReg(); 71 } 72 73 bool isTB(const MCInst &Inst) const { 74 return (Inst.getOpcode() == AArch64::TBNZW || 75 Inst.getOpcode() == AArch64::TBNZX || 76 Inst.getOpcode() == AArch64::TBZW || 77 Inst.getOpcode() == AArch64::TBZX); 78 } 79 80 bool isCB(const MCInst &Inst) const { 81 return (Inst.getOpcode() == AArch64::CBNZW || 82 Inst.getOpcode() == AArch64::CBNZX || 83 Inst.getOpcode() == AArch64::CBZW || 84 Inst.getOpcode() == AArch64::CBZX); 85 } 86 87 bool isMOVW(const MCInst &Inst) const { 88 return (Inst.getOpcode() == AArch64::MOVKWi || 89 Inst.getOpcode() == AArch64::MOVKXi || 90 Inst.getOpcode() == AArch64::MOVNWi || 91 Inst.getOpcode() == AArch64::MOVNXi || 92 Inst.getOpcode() == AArch64::MOVZXi || 93 Inst.getOpcode() == AArch64::MOVZWi); 94 } 95 96 bool isADD(const MCInst &Inst) const { 97 return (Inst.getOpcode() == AArch64::ADDSWri || 98 Inst.getOpcode() == AArch64::ADDSWrr || 99 Inst.getOpcode() == AArch64::ADDSWrs || 100 Inst.getOpcode() == AArch64::ADDSWrx || 101 Inst.getOpcode() == AArch64::ADDSXri || 102 Inst.getOpcode() == AArch64::ADDSXrr || 103 Inst.getOpcode() == AArch64::ADDSXrs || 104 Inst.getOpcode() == AArch64::ADDSXrx || 105 Inst.getOpcode() == AArch64::ADDSXrx64 || 106 Inst.getOpcode() == AArch64::ADDWri || 107 Inst.getOpcode() == AArch64::ADDWrr || 108 Inst.getOpcode() == AArch64::ADDWrs || 109 Inst.getOpcode() == AArch64::ADDWrx || 110 Inst.getOpcode() == AArch64::ADDXri || 111 Inst.getOpcode() == AArch64::ADDXrr || 112 Inst.getOpcode() == AArch64::ADDXrs || 113 Inst.getOpcode() == AArch64::ADDXrx || 114 Inst.getOpcode() == AArch64::ADDXrx64); 115 } 116 117 bool isLDRB(const MCInst &Inst) const { 118 return (Inst.getOpcode() == AArch64::LDRBBpost || 119 Inst.getOpcode() == AArch64::LDRBBpre || 120 Inst.getOpcode() == AArch64::LDRBBroW || 121 Inst.getOpcode() == AArch64::LDRBBroX || 122 Inst.getOpcode() == AArch64::LDRBBui || 123 Inst.getOpcode() == AArch64::LDRSBWpost || 124 Inst.getOpcode() == AArch64::LDRSBWpre || 125 Inst.getOpcode() == AArch64::LDRSBWroW || 126 Inst.getOpcode() == AArch64::LDRSBWroX || 127 Inst.getOpcode() == AArch64::LDRSBWui || 128 Inst.getOpcode() == AArch64::LDRSBXpost || 129 Inst.getOpcode() == AArch64::LDRSBXpre || 130 Inst.getOpcode() == AArch64::LDRSBXroW || 131 Inst.getOpcode() == AArch64::LDRSBXroX || 132 Inst.getOpcode() == AArch64::LDRSBXui); 133 } 134 135 bool isLDRH(const MCInst &Inst) const { 136 return (Inst.getOpcode() == AArch64::LDRHHpost || 137 Inst.getOpcode() == AArch64::LDRHHpre || 138 Inst.getOpcode() == AArch64::LDRHHroW || 139 Inst.getOpcode() == AArch64::LDRHHroX || 140 Inst.getOpcode() == AArch64::LDRHHui || 141 Inst.getOpcode() == AArch64::LDRSHWpost || 142 Inst.getOpcode() == AArch64::LDRSHWpre || 143 Inst.getOpcode() == AArch64::LDRSHWroW || 144 Inst.getOpcode() == AArch64::LDRSHWroX || 145 Inst.getOpcode() == AArch64::LDRSHWui || 146 Inst.getOpcode() == AArch64::LDRSHXpost || 147 Inst.getOpcode() == AArch64::LDRSHXpre || 148 Inst.getOpcode() == AArch64::LDRSHXroW || 149 Inst.getOpcode() == AArch64::LDRSHXroX || 150 Inst.getOpcode() == AArch64::LDRSHXui); 151 } 152 153 bool isLDRW(const MCInst &Inst) const { 154 return (Inst.getOpcode() == AArch64::LDRWpost || 155 Inst.getOpcode() == AArch64::LDRWpre || 156 Inst.getOpcode() == AArch64::LDRWroW || 157 Inst.getOpcode() == AArch64::LDRWroX || 158 Inst.getOpcode() == AArch64::LDRWui); 159 } 160 161 bool isLDRX(const MCInst &Inst) const { 162 return (Inst.getOpcode() == AArch64::LDRXpost || 163 Inst.getOpcode() == AArch64::LDRXpre || 164 Inst.getOpcode() == AArch64::LDRXroW || 165 Inst.getOpcode() == AArch64::LDRXroX || 166 Inst.getOpcode() == AArch64::LDRXui); 167 } 168 169 bool isLoad(const MCInst &Inst) const override { 170 return isLDRB(Inst) || isLDRH(Inst) || isLDRW(Inst) || isLDRX(Inst); 171 } 172 173 bool isLoadFromStack(const MCInst &Inst) const { 174 if (!isLoad(Inst)) 175 return false; 176 const MCInstrDesc &InstInfo = Info->get(Inst.getOpcode()); 177 unsigned NumDefs = InstInfo.getNumDefs(); 178 for (unsigned I = NumDefs, E = InstInfo.getNumOperands(); I < E; ++I) { 179 const MCOperand &Operand = Inst.getOperand(I); 180 if (!Operand.isReg()) 181 continue; 182 unsigned Reg = Operand.getReg(); 183 if (Reg == AArch64::SP || Reg == AArch64::WSP || Reg == AArch64::FP || 184 Reg == AArch64::W29) 185 return true; 186 } 187 return false; 188 } 189 190 bool isRegToRegMove(const MCInst &Inst, MCPhysReg &From, 191 MCPhysReg &To) const override { 192 if (Inst.getOpcode() != AArch64::ORRXrs) 193 return false; 194 if (Inst.getOperand(1).getReg() != AArch64::XZR) 195 return false; 196 if (Inst.getOperand(3).getImm() != 0) 197 return false; 198 From = Inst.getOperand(2).getReg(); 199 To = Inst.getOperand(0).getReg(); 200 return true; 201 } 202 203 bool isIndirectCall(const MCInst &Inst) const override { 204 return Inst.getOpcode() == AArch64::BLR; 205 } 206 207 MCPhysReg getNoRegister() const override { return AArch64::NoRegister; } 208 209 bool hasPCRelOperand(const MCInst &Inst) const override { 210 // ADRP is blacklisted and is an exception. Even though it has a 211 // PC-relative operand, this operand is not a complete symbol reference 212 // and BOLT shouldn't try to process it in isolation. 213 if (isADRP(Inst)) 214 return false; 215 216 if (isADR(Inst)) 217 return true; 218 219 // Look for literal addressing mode (see C1-143 ARM DDI 0487B.a) 220 const MCInstrDesc &MCII = Info->get(Inst.getOpcode()); 221 for (unsigned I = 0, E = MCII.getNumOperands(); I != E; ++I) 222 if (MCII.OpInfo[I].OperandType == MCOI::OPERAND_PCREL) 223 return true; 224 225 return false; 226 } 227 228 bool evaluateADR(const MCInst &Inst, int64_t &Imm, 229 const MCExpr **DispExpr) const { 230 assert((isADR(Inst) || isADRP(Inst)) && "Not an ADR instruction"); 231 232 const MCOperand &Label = Inst.getOperand(1); 233 if (!Label.isImm()) { 234 assert(Label.isExpr() && "Unexpected ADR operand"); 235 assert(DispExpr && "DispExpr must be set"); 236 *DispExpr = Label.getExpr(); 237 return false; 238 } 239 240 if (Inst.getOpcode() == AArch64::ADR) { 241 Imm = Label.getImm(); 242 return true; 243 } 244 Imm = Label.getImm() << 12; 245 return true; 246 } 247 248 bool evaluateAArch64MemoryOperand(const MCInst &Inst, int64_t &DispImm, 249 const MCExpr **DispExpr = nullptr) const { 250 if (isADR(Inst) || isADRP(Inst)) 251 return evaluateADR(Inst, DispImm, DispExpr); 252 253 // Literal addressing mode 254 const MCInstrDesc &MCII = Info->get(Inst.getOpcode()); 255 for (unsigned I = 0, E = MCII.getNumOperands(); I != E; ++I) { 256 if (MCII.OpInfo[I].OperandType != MCOI::OPERAND_PCREL) 257 continue; 258 259 if (!Inst.getOperand(I).isImm()) { 260 assert(Inst.getOperand(I).isExpr() && "Unexpected PCREL operand"); 261 assert(DispExpr && "DispExpr must be set"); 262 *DispExpr = Inst.getOperand(I).getExpr(); 263 return true; 264 } 265 266 DispImm = Inst.getOperand(I).getImm() << 2; 267 return true; 268 } 269 return false; 270 } 271 272 bool evaluateMemOperandTarget(const MCInst &Inst, uint64_t &Target, 273 uint64_t Address, 274 uint64_t Size) const override { 275 int64_t DispValue; 276 const MCExpr *DispExpr = nullptr; 277 if (!evaluateAArch64MemoryOperand(Inst, DispValue, &DispExpr)) 278 return false; 279 280 // Make sure it's a well-formed addressing we can statically evaluate. 281 if (DispExpr) 282 return false; 283 284 Target = DispValue; 285 if (Inst.getOpcode() == AArch64::ADRP) 286 Target += Address & ~0xFFFULL; 287 else 288 Target += Address; 289 return true; 290 } 291 292 bool replaceMemOperandDisp(MCInst &Inst, MCOperand Operand) const override { 293 MCInst::iterator OI = Inst.begin(); 294 if (isADR(Inst) || isADRP(Inst)) { 295 assert(MCPlus::getNumPrimeOperands(Inst) >= 2 && 296 "Unexpected number of operands"); 297 ++OI; 298 } else { 299 const MCInstrDesc &MCII = Info->get(Inst.getOpcode()); 300 for (unsigned I = 0, E = MCII.getNumOperands(); I != E; ++I) { 301 if (MCII.OpInfo[I].OperandType == MCOI::OPERAND_PCREL) 302 break; 303 ++OI; 304 } 305 assert(OI != Inst.end() && "Literal operand not found"); 306 } 307 *OI = Operand; 308 return true; 309 } 310 311 const MCExpr *getTargetExprFor(MCInst &Inst, const MCExpr *Expr, 312 MCContext &Ctx, 313 uint64_t RelType) const override { 314 315 if (isADR(Inst) || RelType == ELF::R_AARCH64_ADR_PREL_LO21 || 316 RelType == ELF::R_AARCH64_TLSDESC_ADR_PREL21) { 317 return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS, Ctx); 318 } else if (isADRP(Inst) || RelType == ELF::R_AARCH64_ADR_PREL_PG_HI21 || 319 RelType == ELF::R_AARCH64_ADR_PREL_PG_HI21_NC || 320 RelType == ELF::R_AARCH64_TLSDESC_ADR_PAGE21 || 321 RelType == ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 || 322 RelType == ELF::R_AARCH64_ADR_GOT_PAGE) { 323 // Never emit a GOT reloc, we handled this in 324 // RewriteInstance::readRelocations(). 325 return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_PAGE, Ctx); 326 } else { 327 switch (RelType) { 328 case ELF::R_AARCH64_ADD_ABS_LO12_NC: 329 case ELF::R_AARCH64_LD64_GOT_LO12_NC: 330 case ELF::R_AARCH64_LDST8_ABS_LO12_NC: 331 case ELF::R_AARCH64_LDST16_ABS_LO12_NC: 332 case ELF::R_AARCH64_LDST32_ABS_LO12_NC: 333 case ELF::R_AARCH64_LDST64_ABS_LO12_NC: 334 case ELF::R_AARCH64_LDST128_ABS_LO12_NC: 335 case ELF::R_AARCH64_TLSDESC_ADD_LO12: 336 case ELF::R_AARCH64_TLSDESC_LD64_LO12: 337 case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: 338 case ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: 339 return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_LO12, Ctx); 340 case ELF::R_AARCH64_MOVW_UABS_G3: 341 return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_G3, Ctx); 342 case ELF::R_AARCH64_MOVW_UABS_G2: 343 case ELF::R_AARCH64_MOVW_UABS_G2_NC: 344 return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_G2_NC, Ctx); 345 case ELF::R_AARCH64_MOVW_UABS_G1: 346 case ELF::R_AARCH64_MOVW_UABS_G1_NC: 347 return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_G1_NC, Ctx); 348 case ELF::R_AARCH64_MOVW_UABS_G0: 349 case ELF::R_AARCH64_MOVW_UABS_G0_NC: 350 return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_G0_NC, Ctx); 351 default: 352 break; 353 } 354 } 355 return Expr; 356 } 357 358 bool getSymbolRefOperandNum(const MCInst &Inst, unsigned &OpNum) const { 359 if (OpNum >= MCPlus::getNumPrimeOperands(Inst)) 360 return false; 361 362 // Auto-select correct operand number 363 if (OpNum == 0) { 364 if (isConditionalBranch(Inst) || isADR(Inst) || isADRP(Inst)) 365 OpNum = 1; 366 if (isTB(Inst)) 367 OpNum = 2; 368 if (isMOVW(Inst)) 369 OpNum = 1; 370 } 371 372 return true; 373 } 374 375 const MCSymbol *getTargetSymbol(const MCExpr *Expr) const override { 376 auto *AArchExpr = dyn_cast<AArch64MCExpr>(Expr); 377 if (AArchExpr && AArchExpr->getSubExpr()) 378 return getTargetSymbol(AArchExpr->getSubExpr()); 379 380 auto *BinExpr = dyn_cast<MCBinaryExpr>(Expr); 381 if (BinExpr) 382 return getTargetSymbol(BinExpr->getLHS()); 383 384 auto *SymExpr = dyn_cast<MCSymbolRefExpr>(Expr); 385 if (SymExpr && SymExpr->getKind() == MCSymbolRefExpr::VK_None) 386 return &SymExpr->getSymbol(); 387 388 return nullptr; 389 } 390 391 const MCSymbol *getTargetSymbol(const MCInst &Inst, 392 unsigned OpNum = 0) const override { 393 if (!getSymbolRefOperandNum(Inst, OpNum)) 394 return nullptr; 395 396 const MCOperand &Op = Inst.getOperand(OpNum); 397 if (!Op.isExpr()) 398 return nullptr; 399 400 return getTargetSymbol(Op.getExpr()); 401 } 402 403 int64_t getTargetAddend(const MCExpr *Expr) const override { 404 auto *AArchExpr = dyn_cast<AArch64MCExpr>(Expr); 405 if (AArchExpr && AArchExpr->getSubExpr()) 406 return getTargetAddend(AArchExpr->getSubExpr()); 407 408 auto *BinExpr = dyn_cast<MCBinaryExpr>(Expr); 409 if (BinExpr && BinExpr->getOpcode() == MCBinaryExpr::Add) 410 return getTargetAddend(BinExpr->getRHS()); 411 412 auto *ConstExpr = dyn_cast<MCConstantExpr>(Expr); 413 if (ConstExpr) 414 return ConstExpr->getValue(); 415 416 return 0; 417 } 418 419 int64_t getTargetAddend(const MCInst &Inst, 420 unsigned OpNum = 0) const override { 421 if (!getSymbolRefOperandNum(Inst, OpNum)) 422 return 0; 423 424 const MCOperand &Op = Inst.getOperand(OpNum); 425 if (!Op.isExpr()) 426 return 0; 427 428 return getTargetAddend(Op.getExpr()); 429 } 430 431 bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, 432 uint64_t &Target) const override { 433 size_t OpNum = 0; 434 435 if (isConditionalBranch(Inst)) { 436 assert(MCPlus::getNumPrimeOperands(Inst) >= 2 && 437 "Invalid number of operands"); 438 OpNum = 1; 439 } 440 441 if (isTB(Inst)) { 442 assert(MCPlus::getNumPrimeOperands(Inst) >= 3 && 443 "Invalid number of operands"); 444 OpNum = 2; 445 } 446 447 if (Info->get(Inst.getOpcode()).OpInfo[OpNum].OperandType != 448 MCOI::OPERAND_PCREL) { 449 assert((isIndirectBranch(Inst) || isIndirectCall(Inst)) && 450 "FAILED evaluateBranch"); 451 return false; 452 } 453 454 int64_t Imm = Inst.getOperand(OpNum).getImm() << 2; 455 Target = Addr + Imm; 456 return true; 457 } 458 459 bool replaceBranchTarget(MCInst &Inst, const MCSymbol *TBB, 460 MCContext *Ctx) const override { 461 assert((isCall(Inst) || isBranch(Inst)) && !isIndirectBranch(Inst) && 462 "Invalid instruction"); 463 assert(MCPlus::getNumPrimeOperands(Inst) >= 1 && 464 "Invalid number of operands"); 465 MCInst::iterator OI = Inst.begin(); 466 467 if (isConditionalBranch(Inst)) { 468 assert(MCPlus::getNumPrimeOperands(Inst) >= 2 && 469 "Invalid number of operands"); 470 ++OI; 471 } 472 473 if (isTB(Inst)) { 474 assert(MCPlus::getNumPrimeOperands(Inst) >= 3 && 475 "Invalid number of operands"); 476 OI = Inst.begin() + 2; 477 } 478 479 *OI = MCOperand::createExpr( 480 MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx)); 481 return true; 482 } 483 484 /// Matches indirect branch patterns in AArch64 related to a jump table (JT), 485 /// helping us to build the complete CFG. A typical indirect branch to 486 /// a jump table entry in AArch64 looks like the following: 487 /// 488 /// adrp x1, #-7585792 # Get JT Page location 489 /// add x1, x1, #692 # Complement with JT Page offset 490 /// ldrh w0, [x1, w0, uxtw #1] # Loads JT entry 491 /// adr x1, #12 # Get PC + 12 (end of this BB) used next 492 /// add x0, x1, w0, sxth #2 # Finish building branch target 493 /// # (entries in JT are relative to the end 494 /// # of this BB) 495 /// br x0 # Indirect jump instruction 496 /// 497 bool analyzeIndirectBranchFragment( 498 const MCInst &Inst, 499 DenseMap<const MCInst *, SmallVector<MCInst *, 4>> &UDChain, 500 const MCExpr *&JumpTable, int64_t &Offset, int64_t &ScaleValue, 501 MCInst *&PCRelBase) const { 502 // Expect AArch64 BR 503 assert(Inst.getOpcode() == AArch64::BR && "Unexpected opcode"); 504 505 // Match the indirect branch pattern for aarch64 506 SmallVector<MCInst *, 4> &UsesRoot = UDChain[&Inst]; 507 if (UsesRoot.size() == 0 || UsesRoot[0] == nullptr) 508 return false; 509 510 const MCInst *DefAdd = UsesRoot[0]; 511 512 // Now we match an ADD 513 if (!isADD(*DefAdd)) { 514 // If the address is not broken up in two parts, this is not branching 515 // according to a jump table entry. Fail. 516 return false; 517 } 518 if (DefAdd->getOpcode() == AArch64::ADDXri) { 519 // This can happen when there is no offset, but a direct jump that was 520 // transformed into an indirect one (indirect tail call) : 521 // ADRP x2, Perl_re_compiler 522 // ADD x2, x2, :lo12:Perl_re_compiler 523 // BR x2 524 return false; 525 } 526 if (DefAdd->getOpcode() == AArch64::ADDXrs) { 527 // Covers the less common pattern where JT entries are relative to 528 // the JT itself (like x86). Seems less efficient since we can't 529 // assume the JT is aligned at 4B boundary and thus drop 2 bits from 530 // JT values. 531 // cde264: 532 // adrp x12, #21544960 ; 216a000 533 // add x12, x12, #1696 ; 216a6a0 (JT object in .rodata) 534 // ldrsw x8, [x12, x8, lsl #2] --> loads e.g. 0xfeb73bd8 535 // * add x8, x8, x12 --> = cde278, next block 536 // br x8 537 // cde278: 538 // 539 // Parsed as ADDXrs reg:x8 reg:x8 reg:x12 imm:0 540 return false; 541 } 542 assert(DefAdd->getOpcode() == AArch64::ADDXrx && 543 "Failed to match indirect branch!"); 544 545 // Validate ADD operands 546 int64_t OperandExtension = DefAdd->getOperand(3).getImm(); 547 unsigned ShiftVal = AArch64_AM::getArithShiftValue(OperandExtension); 548 AArch64_AM::ShiftExtendType ExtendType = 549 AArch64_AM::getArithExtendType(OperandExtension); 550 if (ShiftVal != 2) 551 llvm_unreachable("Failed to match indirect branch! (fragment 2)"); 552 553 if (ExtendType == AArch64_AM::SXTB) 554 ScaleValue = 1LL; 555 else if (ExtendType == AArch64_AM::SXTH) 556 ScaleValue = 2LL; 557 else if (ExtendType == AArch64_AM::SXTW) 558 ScaleValue = 4LL; 559 else 560 llvm_unreachable("Failed to match indirect branch! (fragment 3)"); 561 562 // Match an ADR to load base address to be used when addressing JT targets 563 SmallVector<MCInst *, 4> &UsesAdd = UDChain[DefAdd]; 564 if (UsesAdd.size() <= 1 || UsesAdd[1] == nullptr || UsesAdd[2] == nullptr) { 565 // This happens when we don't have enough context about this jump table 566 // because the jumping code sequence was split in multiple basic blocks. 567 // This was observed in the wild in HHVM code (dispatchImpl). 568 return false; 569 } 570 MCInst *DefBaseAddr = UsesAdd[1]; 571 assert(DefBaseAddr->getOpcode() == AArch64::ADR && 572 "Failed to match indirect branch pattern! (fragment 3)"); 573 574 PCRelBase = DefBaseAddr; 575 // Match LOAD to load the jump table (relative) target 576 const MCInst *DefLoad = UsesAdd[2]; 577 assert(isLoad(*DefLoad) && 578 "Failed to match indirect branch load pattern! (1)"); 579 assert((ScaleValue != 1LL || isLDRB(*DefLoad)) && 580 "Failed to match indirect branch load pattern! (2)"); 581 assert((ScaleValue != 2LL || isLDRH(*DefLoad)) && 582 "Failed to match indirect branch load pattern! (3)"); 583 584 // Match ADD that calculates the JumpTable Base Address (not the offset) 585 SmallVector<MCInst *, 4> &UsesLoad = UDChain[DefLoad]; 586 const MCInst *DefJTBaseAdd = UsesLoad[1]; 587 MCPhysReg From, To; 588 if (DefJTBaseAdd == nullptr || isLoadFromStack(*DefJTBaseAdd) || 589 isRegToRegMove(*DefJTBaseAdd, From, To)) { 590 // Sometimes base address may have been defined in another basic block 591 // (hoisted). Return with no jump table info. 592 JumpTable = nullptr; 593 return true; 594 } 595 596 assert(DefJTBaseAdd->getOpcode() == AArch64::ADDXri && 597 "Failed to match jump table base address pattern! (1)"); 598 599 if (DefJTBaseAdd->getOperand(2).isImm()) 600 Offset = DefJTBaseAdd->getOperand(2).getImm(); 601 SmallVector<MCInst *, 4> &UsesJTBaseAdd = UDChain[DefJTBaseAdd]; 602 const MCInst *DefJTBasePage = UsesJTBaseAdd[1]; 603 if (DefJTBasePage == nullptr || isLoadFromStack(*DefJTBasePage)) { 604 JumpTable = nullptr; 605 return true; 606 } 607 assert(DefJTBasePage->getOpcode() == AArch64::ADRP && 608 "Failed to match jump table base page pattern! (2)"); 609 if (DefJTBasePage->getOperand(1).isExpr()) 610 JumpTable = DefJTBasePage->getOperand(1).getExpr(); 611 return true; 612 } 613 614 DenseMap<const MCInst *, SmallVector<MCInst *, 4>> 615 computeLocalUDChain(const MCInst *CurInstr, InstructionIterator Begin, 616 InstructionIterator End) const { 617 DenseMap<int, MCInst *> RegAliasTable; 618 DenseMap<const MCInst *, SmallVector<MCInst *, 4>> Uses; 619 620 auto addInstrOperands = [&](const MCInst &Instr) { 621 // Update Uses table 622 for (unsigned OpNum = 0, OpEnd = MCPlus::getNumPrimeOperands(Instr); 623 OpNum != OpEnd; ++OpNum) { 624 if (!Instr.getOperand(OpNum).isReg()) 625 continue; 626 unsigned Reg = Instr.getOperand(OpNum).getReg(); 627 MCInst *AliasInst = RegAliasTable[Reg]; 628 Uses[&Instr].push_back(AliasInst); 629 LLVM_DEBUG({ 630 dbgs() << "Adding reg operand " << Reg << " refs "; 631 if (AliasInst != nullptr) 632 AliasInst->dump(); 633 else 634 dbgs() << "\n"; 635 }); 636 } 637 }; 638 639 LLVM_DEBUG(dbgs() << "computeLocalUDChain\n"); 640 bool TerminatorSeen = false; 641 for (auto II = Begin; II != End; ++II) { 642 MCInst &Instr = *II; 643 // Ignore nops and CFIs 644 if (isPseudo(Instr) || isNoop(Instr)) 645 continue; 646 if (TerminatorSeen) { 647 RegAliasTable.clear(); 648 Uses.clear(); 649 } 650 651 LLVM_DEBUG(dbgs() << "Now updating for:\n "); 652 LLVM_DEBUG(Instr.dump()); 653 addInstrOperands(Instr); 654 655 BitVector Regs = BitVector(RegInfo->getNumRegs(), false); 656 getWrittenRegs(Instr, Regs); 657 658 // Update register definitions after this point 659 int Idx = Regs.find_first(); 660 while (Idx != -1) { 661 RegAliasTable[Idx] = &Instr; 662 LLVM_DEBUG(dbgs() << "Setting reg " << Idx 663 << " def to current instr.\n"); 664 Idx = Regs.find_next(Idx); 665 } 666 667 TerminatorSeen = isTerminator(Instr); 668 } 669 670 // Process the last instruction, which is not currently added into the 671 // instruction stream 672 if (CurInstr) 673 addInstrOperands(*CurInstr); 674 675 return Uses; 676 } 677 678 IndirectBranchType analyzeIndirectBranch( 679 MCInst &Instruction, InstructionIterator Begin, InstructionIterator End, 680 const unsigned PtrSize, MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut, 681 unsigned &IndexRegNumOut, int64_t &DispValueOut, 682 const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut) const override { 683 MemLocInstrOut = nullptr; 684 BaseRegNumOut = AArch64::NoRegister; 685 IndexRegNumOut = AArch64::NoRegister; 686 DispValueOut = 0; 687 DispExprOut = nullptr; 688 689 // An instruction referencing memory used by jump instruction (directly or 690 // via register). This location could be an array of function pointers 691 // in case of indirect tail call, or a jump table. 692 MCInst *MemLocInstr = nullptr; 693 694 // Analyze the memory location. 695 int64_t ScaleValue, DispValue; 696 const MCExpr *DispExpr; 697 698 DenseMap<const MCInst *, SmallVector<llvm::MCInst *, 4>> UDChain = 699 computeLocalUDChain(&Instruction, Begin, End); 700 MCInst *PCRelBase; 701 if (!analyzeIndirectBranchFragment(Instruction, UDChain, DispExpr, 702 DispValue, ScaleValue, PCRelBase)) 703 return IndirectBranchType::UNKNOWN; 704 705 MemLocInstrOut = MemLocInstr; 706 DispValueOut = DispValue; 707 DispExprOut = DispExpr; 708 PCRelBaseOut = PCRelBase; 709 return IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE; 710 } 711 712 unsigned getInvertedBranchOpcode(unsigned Opcode) const { 713 switch (Opcode) { 714 default: 715 llvm_unreachable("Failed to invert branch opcode"); 716 return Opcode; 717 case AArch64::TBZW: return AArch64::TBNZW; 718 case AArch64::TBZX: return AArch64::TBNZX; 719 case AArch64::TBNZW: return AArch64::TBZW; 720 case AArch64::TBNZX: return AArch64::TBZX; 721 case AArch64::CBZW: return AArch64::CBNZW; 722 case AArch64::CBZX: return AArch64::CBNZX; 723 case AArch64::CBNZW: return AArch64::CBZW; 724 case AArch64::CBNZX: return AArch64::CBZX; 725 } 726 } 727 728 unsigned getCondCode(const MCInst &Inst) const override { 729 // AArch64 does not use conditional codes, so we just return the opcode 730 // of the conditional branch here. 731 return Inst.getOpcode(); 732 } 733 734 unsigned getCanonicalBranchCondCode(unsigned Opcode) const override { 735 switch (Opcode) { 736 default: 737 return Opcode; 738 case AArch64::TBNZW: return AArch64::TBZW; 739 case AArch64::TBNZX: return AArch64::TBZX; 740 case AArch64::CBNZW: return AArch64::CBZW; 741 case AArch64::CBNZX: return AArch64::CBZX; 742 } 743 } 744 745 bool reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB, 746 MCContext *Ctx) const override { 747 if (isTB(Inst) || isCB(Inst)) { 748 Inst.setOpcode(getInvertedBranchOpcode(Inst.getOpcode())); 749 assert(Inst.getOpcode() != 0 && "Invalid branch instruction"); 750 } else if (Inst.getOpcode() == AArch64::Bcc) { 751 Inst.getOperand(0).setImm(AArch64CC::getInvertedCondCode( 752 static_cast<AArch64CC::CondCode>(Inst.getOperand(0).getImm()))); 753 assert(Inst.getOperand(0).getImm() != AArch64CC::AL && 754 Inst.getOperand(0).getImm() != AArch64CC::NV && 755 "Can't reverse ALWAYS cond code"); 756 } else { 757 LLVM_DEBUG(Inst.dump()); 758 llvm_unreachable("Unrecognized branch instruction"); 759 } 760 return replaceBranchTarget(Inst, TBB, Ctx); 761 } 762 763 int getPCRelEncodingSize(const MCInst &Inst) const override { 764 switch (Inst.getOpcode()) { 765 default: 766 llvm_unreachable("Failed to get pcrel encoding size"); 767 return 0; 768 case AArch64::TBZW: return 16; 769 case AArch64::TBZX: return 16; 770 case AArch64::TBNZW: return 16; 771 case AArch64::TBNZX: return 16; 772 case AArch64::CBZW: return 21; 773 case AArch64::CBZX: return 21; 774 case AArch64::CBNZW: return 21; 775 case AArch64::CBNZX: return 21; 776 case AArch64::B: return 28; 777 case AArch64::BL: return 28; 778 case AArch64::Bcc: return 21; 779 } 780 } 781 782 int getShortJmpEncodingSize() const override { return 33; } 783 784 int getUncondBranchEncodingSize() const override { return 28; } 785 786 bool createTailCall(MCInst &Inst, const MCSymbol *Target, 787 MCContext *Ctx) override { 788 Inst.setOpcode(AArch64::B); 789 Inst.addOperand(MCOperand::createExpr(getTargetExprFor( 790 Inst, MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx), 791 *Ctx, 0))); 792 setTailCall(Inst); 793 return true; 794 } 795 796 void createLongTailCall(InstructionListType &Seq, const MCSymbol *Target, 797 MCContext *Ctx) override { 798 createShortJmp(Seq, Target, Ctx, /*IsTailCall*/ true); 799 } 800 801 bool convertJmpToTailCall(MCInst &Inst) override { 802 setTailCall(Inst); 803 return true; 804 } 805 806 bool convertTailCallToJmp(MCInst &Inst) override { 807 removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall); 808 removeAnnotation(Inst, "Offset"); 809 if (getConditionalTailCall(Inst)) 810 unsetConditionalTailCall(Inst); 811 return true; 812 } 813 814 bool lowerTailCall(MCInst &Inst) override { 815 removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall); 816 if (getConditionalTailCall(Inst)) 817 unsetConditionalTailCall(Inst); 818 return true; 819 } 820 821 bool isNoop(const MCInst &Inst) const override { 822 return Inst.getOpcode() == AArch64::HINT && 823 Inst.getOperand(0).getImm() == 0; 824 } 825 826 bool createNoop(MCInst &Inst) const override { 827 Inst.setOpcode(AArch64::HINT); 828 Inst.clear(); 829 Inst.addOperand(MCOperand::createImm(0)); 830 return true; 831 } 832 833 bool isStore(const MCInst &Inst) const override { return false; } 834 835 bool analyzeBranch(InstructionIterator Begin, InstructionIterator End, 836 const MCSymbol *&TBB, const MCSymbol *&FBB, 837 MCInst *&CondBranch, 838 MCInst *&UncondBranch) const override { 839 auto I = End; 840 841 while (I != Begin) { 842 --I; 843 844 // Ignore nops and CFIs 845 if (isPseudo(*I) || isNoop(*I)) 846 continue; 847 848 // Stop when we find the first non-terminator 849 if (!isTerminator(*I) || isTailCall(*I) || !isBranch(*I)) 850 break; 851 852 // Handle unconditional branches. 853 if (isUnconditionalBranch(*I)) { 854 // If any code was seen after this unconditional branch, we've seen 855 // unreachable code. Ignore them. 856 CondBranch = nullptr; 857 UncondBranch = &*I; 858 const MCSymbol *Sym = getTargetSymbol(*I); 859 assert(Sym != nullptr && 860 "Couldn't extract BB symbol from jump operand"); 861 TBB = Sym; 862 continue; 863 } 864 865 // Handle conditional branches and ignore indirect branches 866 if (isIndirectBranch(*I)) 867 return false; 868 869 if (CondBranch == nullptr) { 870 const MCSymbol *TargetBB = getTargetSymbol(*I); 871 if (TargetBB == nullptr) { 872 // Unrecognized branch target 873 return false; 874 } 875 FBB = TBB; 876 TBB = TargetBB; 877 CondBranch = &*I; 878 continue; 879 } 880 881 llvm_unreachable("multiple conditional branches in one BB"); 882 } 883 return true; 884 } 885 886 void createLongJmp(InstructionListType &Seq, const MCSymbol *Target, 887 MCContext *Ctx, bool IsTailCall) override { 888 // ip0 (r16) is reserved to the linker (refer to 5.3.1.1 of "Procedure Call 889 // Standard for the ARM 64-bit Architecture (AArch64)". 890 // The sequence of instructions we create here is the following: 891 // movz ip0, #:abs_g3:<addr> 892 // movk ip0, #:abs_g2_nc:<addr> 893 // movk ip0, #:abs_g1_nc:<addr> 894 // movk ip0, #:abs_g0_nc:<addr> 895 // br ip0 896 MCInst Inst; 897 Inst.setOpcode(AArch64::MOVZXi); 898 Inst.addOperand(MCOperand::createReg(AArch64::X16)); 899 Inst.addOperand(MCOperand::createExpr(AArch64MCExpr::create( 900 MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx), 901 AArch64MCExpr::VK_ABS_G3, *Ctx))); 902 Inst.addOperand(MCOperand::createImm(0x30)); 903 Seq.emplace_back(Inst); 904 905 Inst.clear(); 906 Inst.setOpcode(AArch64::MOVKXi); 907 Inst.addOperand(MCOperand::createReg(AArch64::X16)); 908 Inst.addOperand(MCOperand::createReg(AArch64::X16)); 909 Inst.addOperand(MCOperand::createExpr(AArch64MCExpr::create( 910 MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx), 911 AArch64MCExpr::VK_ABS_G2_NC, *Ctx))); 912 Inst.addOperand(MCOperand::createImm(0x20)); 913 Seq.emplace_back(Inst); 914 915 Inst.clear(); 916 Inst.setOpcode(AArch64::MOVKXi); 917 Inst.addOperand(MCOperand::createReg(AArch64::X16)); 918 Inst.addOperand(MCOperand::createReg(AArch64::X16)); 919 Inst.addOperand(MCOperand::createExpr(AArch64MCExpr::create( 920 MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx), 921 AArch64MCExpr::VK_ABS_G1_NC, *Ctx))); 922 Inst.addOperand(MCOperand::createImm(0x10)); 923 Seq.emplace_back(Inst); 924 925 Inst.clear(); 926 Inst.setOpcode(AArch64::MOVKXi); 927 Inst.addOperand(MCOperand::createReg(AArch64::X16)); 928 Inst.addOperand(MCOperand::createReg(AArch64::X16)); 929 Inst.addOperand(MCOperand::createExpr(AArch64MCExpr::create( 930 MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx), 931 AArch64MCExpr::VK_ABS_G0_NC, *Ctx))); 932 Inst.addOperand(MCOperand::createImm(0)); 933 Seq.emplace_back(Inst); 934 935 Inst.clear(); 936 Inst.setOpcode(AArch64::BR); 937 Inst.addOperand(MCOperand::createReg(AArch64::X16)); 938 if (IsTailCall) 939 setTailCall(Inst); 940 Seq.emplace_back(Inst); 941 } 942 943 void createShortJmp(InstructionListType &Seq, const MCSymbol *Target, 944 MCContext *Ctx, bool IsTailCall) override { 945 // ip0 (r16) is reserved to the linker (refer to 5.3.1.1 of "Procedure Call 946 // Standard for the ARM 64-bit Architecture (AArch64)". 947 // The sequence of instructions we create here is the following: 948 // adrp ip0, imm 949 // add ip0, ip0, imm 950 // br ip0 951 MCPhysReg Reg = AArch64::X16; 952 InstructionListType Insts = materializeAddress(Target, Ctx, Reg); 953 Insts.emplace_back(); 954 MCInst &Inst = Insts.back(); 955 Inst.clear(); 956 Inst.setOpcode(AArch64::BR); 957 Inst.addOperand(MCOperand::createReg(Reg)); 958 if (IsTailCall) 959 setTailCall(Inst); 960 Seq.swap(Insts); 961 } 962 963 /// Matching pattern here is 964 /// 965 /// ADRP x16, imm 966 /// ADD x16, x16, imm 967 /// BR x16 968 /// 969 bool matchLinkerVeneer(InstructionIterator Begin, InstructionIterator End, 970 uint64_t Address, const MCInst &CurInst, 971 MCInst *&TargetHiBits, MCInst *&TargetLowBits, 972 uint64_t &Target) const override { 973 if (CurInst.getOpcode() != AArch64::BR || !CurInst.getOperand(0).isReg() || 974 CurInst.getOperand(0).getReg() != AArch64::X16) 975 return false; 976 977 auto I = End; 978 if (I == Begin) 979 return false; 980 981 --I; 982 Address -= 4; 983 if (I == Begin || I->getOpcode() != AArch64::ADDXri || 984 MCPlus::getNumPrimeOperands(*I) < 3 || !I->getOperand(0).isReg() || 985 !I->getOperand(1).isReg() || 986 I->getOperand(0).getReg() != AArch64::X16 || 987 I->getOperand(1).getReg() != AArch64::X16 || !I->getOperand(2).isImm()) 988 return false; 989 TargetLowBits = &*I; 990 uint64_t Addr = I->getOperand(2).getImm() & 0xFFF; 991 992 --I; 993 Address -= 4; 994 if (I->getOpcode() != AArch64::ADRP || 995 MCPlus::getNumPrimeOperands(*I) < 2 || !I->getOperand(0).isReg() || 996 !I->getOperand(1).isImm() || I->getOperand(0).getReg() != AArch64::X16) 997 return false; 998 TargetHiBits = &*I; 999 Addr |= (Address + ((int64_t)I->getOperand(1).getImm() << 12)) & 1000 0xFFFFFFFFFFFFF000ULL; 1001 Target = Addr; 1002 return true; 1003 } 1004 1005 bool replaceImmWithSymbolRef(MCInst &Inst, const MCSymbol *Symbol, 1006 int64_t Addend, MCContext *Ctx, int64_t &Value, 1007 uint64_t RelType) const override { 1008 unsigned ImmOpNo = -1U; 1009 for (unsigned Index = 0; Index < MCPlus::getNumPrimeOperands(Inst); 1010 ++Index) { 1011 if (Inst.getOperand(Index).isImm()) { 1012 ImmOpNo = Index; 1013 break; 1014 } 1015 } 1016 if (ImmOpNo == -1U) 1017 return false; 1018 1019 Value = Inst.getOperand(ImmOpNo).getImm(); 1020 1021 setOperandToSymbolRef(Inst, ImmOpNo, Symbol, Addend, Ctx, RelType); 1022 1023 return true; 1024 } 1025 1026 bool createUncondBranch(MCInst &Inst, const MCSymbol *TBB, 1027 MCContext *Ctx) const override { 1028 Inst.setOpcode(AArch64::B); 1029 Inst.clear(); 1030 Inst.addOperand(MCOperand::createExpr(getTargetExprFor( 1031 Inst, MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx), 1032 *Ctx, 0))); 1033 return true; 1034 } 1035 1036 bool isMoveMem2Reg(const MCInst &Inst) const override { return false; } 1037 1038 bool isADD64rr(const MCInst &Inst) const override { return false; } 1039 1040 bool isLeave(const MCInst &Inst) const override { return false; } 1041 1042 bool isPop(const MCInst &Inst) const override { return false; } 1043 1044 bool isPrefix(const MCInst &Inst) const override { return false; } 1045 1046 bool deleteREPPrefix(MCInst &Inst) const override { return false; } 1047 1048 bool createReturn(MCInst &Inst) const override { 1049 Inst.setOpcode(AArch64::RET); 1050 Inst.clear(); 1051 Inst.addOperand(MCOperand::createReg(AArch64::LR)); 1052 return true; 1053 } 1054 1055 InstructionListType materializeAddress(const MCSymbol *Target, MCContext *Ctx, 1056 MCPhysReg RegName, 1057 int64_t Addend = 0) const override { 1058 // Get page-aligned address and add page offset 1059 InstructionListType Insts(2); 1060 Insts[0].setOpcode(AArch64::ADRP); 1061 Insts[0].clear(); 1062 Insts[0].addOperand(MCOperand::createReg(RegName)); 1063 Insts[0].addOperand(MCOperand::createImm(0)); 1064 setOperandToSymbolRef(Insts[0], /* OpNum */ 1, Target, Addend, Ctx, 1065 ELF::R_AARCH64_NONE); 1066 Insts[1].setOpcode(AArch64::ADDXri); 1067 Insts[1].clear(); 1068 Insts[1].addOperand(MCOperand::createReg(RegName)); 1069 Insts[1].addOperand(MCOperand::createReg(RegName)); 1070 Insts[1].addOperand(MCOperand::createImm(0)); 1071 Insts[1].addOperand(MCOperand::createImm(0)); 1072 setOperandToSymbolRef(Insts[1], /* OpNum */ 2, Target, Addend, Ctx, 1073 ELF::R_AARCH64_ADD_ABS_LO12_NC); 1074 return Insts; 1075 } 1076 }; 1077 1078 } // end anonymous namespace 1079 1080 namespace llvm { 1081 namespace bolt { 1082 1083 MCPlusBuilder *createAArch64MCPlusBuilder(const MCInstrAnalysis *Analysis, 1084 const MCInstrInfo *Info, 1085 const MCRegisterInfo *RegInfo) { 1086 return new AArch64MCPlusBuilder(Analysis, Info, RegInfo); 1087 } 1088 1089 } // namespace bolt 1090 } // namespace llvm 1091