1 //===- bolt/Target/AArch64/AArch64MCPlusBuilder.cpp -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides AArch64-specific MCPlus builder. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MCTargetDesc/AArch64AddressingModes.h" 14 #include "MCTargetDesc/AArch64MCExpr.h" 15 #include "MCTargetDesc/AArch64MCTargetDesc.h" 16 #include "Utils/AArch64BaseInfo.h" 17 #include "bolt/Core/MCPlusBuilder.h" 18 #include "llvm/BinaryFormat/ELF.h" 19 #include "llvm/MC/MCInstrInfo.h" 20 #include "llvm/MC/MCRegisterInfo.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/ErrorHandling.h" 23 24 #define DEBUG_TYPE "mcplus" 25 26 using namespace llvm; 27 using namespace bolt; 28 29 namespace { 30 31 class AArch64MCPlusBuilder : public MCPlusBuilder { 32 public: 33 AArch64MCPlusBuilder(const MCInstrAnalysis *Analysis, const MCInstrInfo *Info, 34 const MCRegisterInfo *RegInfo) 35 : MCPlusBuilder(Analysis, Info, RegInfo) {} 36 37 bool equals(const MCTargetExpr &A, const MCTargetExpr &B, 38 CompFuncTy Comp) const override { 39 const auto &AArch64ExprA = cast<AArch64MCExpr>(A); 40 const auto &AArch64ExprB = cast<AArch64MCExpr>(B); 41 if (AArch64ExprA.getKind() != AArch64ExprB.getKind()) 42 return false; 43 44 return MCPlusBuilder::equals(*AArch64ExprA.getSubExpr(), 45 *AArch64ExprB.getSubExpr(), Comp); 46 } 47 48 bool hasEVEXEncoding(const MCInst &) const override { return false; } 49 50 bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override { 51 return false; 52 } 53 54 bool shortenInstruction(MCInst &) const override { return false; } 55 56 bool isADRP(const MCInst &Inst) const override { 57 return Inst.getOpcode() == AArch64::ADRP; 58 } 59 60 bool isADR(const MCInst &Inst) const override { 61 return Inst.getOpcode() == AArch64::ADR; 62 } 63 64 void getADRReg(const MCInst &Inst, MCPhysReg &RegName) const override { 65 assert((isADR(Inst) || isADRP(Inst)) && "Not an ADR instruction"); 66 assert(MCPlus::getNumPrimeOperands(Inst) != 0 && 67 "No operands for ADR instruction"); 68 assert(Inst.getOperand(0).isReg() && 69 "Unexpected operand in ADR instruction"); 70 RegName = Inst.getOperand(0).getReg(); 71 } 72 73 bool isTB(const MCInst &Inst) const { 74 return (Inst.getOpcode() == AArch64::TBNZW || 75 Inst.getOpcode() == AArch64::TBNZX || 76 Inst.getOpcode() == AArch64::TBZW || 77 Inst.getOpcode() == AArch64::TBZX); 78 } 79 80 bool isCB(const MCInst &Inst) const { 81 return (Inst.getOpcode() == AArch64::CBNZW || 82 Inst.getOpcode() == AArch64::CBNZX || 83 Inst.getOpcode() == AArch64::CBZW || 84 Inst.getOpcode() == AArch64::CBZX); 85 } 86 87 bool isMOVW(const MCInst &Inst) const { 88 return (Inst.getOpcode() == AArch64::MOVKWi || 89 Inst.getOpcode() == AArch64::MOVKXi || 90 Inst.getOpcode() == AArch64::MOVNWi || 91 Inst.getOpcode() == AArch64::MOVNXi || 92 Inst.getOpcode() == AArch64::MOVZXi || 93 Inst.getOpcode() == AArch64::MOVZWi); 94 } 95 96 bool isADD(const MCInst &Inst) const { 97 return (Inst.getOpcode() == AArch64::ADDSWri || 98 Inst.getOpcode() == AArch64::ADDSWrr || 99 Inst.getOpcode() == AArch64::ADDSWrs || 100 Inst.getOpcode() == AArch64::ADDSWrx || 101 Inst.getOpcode() == AArch64::ADDSXri || 102 Inst.getOpcode() == AArch64::ADDSXrr || 103 Inst.getOpcode() == AArch64::ADDSXrs || 104 Inst.getOpcode() == AArch64::ADDSXrx || 105 Inst.getOpcode() == AArch64::ADDSXrx64 || 106 Inst.getOpcode() == AArch64::ADDWri || 107 Inst.getOpcode() == AArch64::ADDWrr || 108 Inst.getOpcode() == AArch64::ADDWrs || 109 Inst.getOpcode() == AArch64::ADDWrx || 110 Inst.getOpcode() == AArch64::ADDXri || 111 Inst.getOpcode() == AArch64::ADDXrr || 112 Inst.getOpcode() == AArch64::ADDXrs || 113 Inst.getOpcode() == AArch64::ADDXrx || 114 Inst.getOpcode() == AArch64::ADDXrx64); 115 } 116 117 bool isLDRB(const MCInst &Inst) const { 118 return (Inst.getOpcode() == AArch64::LDRBBpost || 119 Inst.getOpcode() == AArch64::LDRBBpre || 120 Inst.getOpcode() == AArch64::LDRBBroW || 121 Inst.getOpcode() == AArch64::LDRBBroX || 122 Inst.getOpcode() == AArch64::LDRBBui || 123 Inst.getOpcode() == AArch64::LDRSBWpost || 124 Inst.getOpcode() == AArch64::LDRSBWpre || 125 Inst.getOpcode() == AArch64::LDRSBWroW || 126 Inst.getOpcode() == AArch64::LDRSBWroX || 127 Inst.getOpcode() == AArch64::LDRSBWui || 128 Inst.getOpcode() == AArch64::LDRSBXpost || 129 Inst.getOpcode() == AArch64::LDRSBXpre || 130 Inst.getOpcode() == AArch64::LDRSBXroW || 131 Inst.getOpcode() == AArch64::LDRSBXroX || 132 Inst.getOpcode() == AArch64::LDRSBXui); 133 } 134 135 bool isLDRH(const MCInst &Inst) const { 136 return (Inst.getOpcode() == AArch64::LDRHHpost || 137 Inst.getOpcode() == AArch64::LDRHHpre || 138 Inst.getOpcode() == AArch64::LDRHHroW || 139 Inst.getOpcode() == AArch64::LDRHHroX || 140 Inst.getOpcode() == AArch64::LDRHHui || 141 Inst.getOpcode() == AArch64::LDRSHWpost || 142 Inst.getOpcode() == AArch64::LDRSHWpre || 143 Inst.getOpcode() == AArch64::LDRSHWroW || 144 Inst.getOpcode() == AArch64::LDRSHWroX || 145 Inst.getOpcode() == AArch64::LDRSHWui || 146 Inst.getOpcode() == AArch64::LDRSHXpost || 147 Inst.getOpcode() == AArch64::LDRSHXpre || 148 Inst.getOpcode() == AArch64::LDRSHXroW || 149 Inst.getOpcode() == AArch64::LDRSHXroX || 150 Inst.getOpcode() == AArch64::LDRSHXui); 151 } 152 153 bool isLDRW(const MCInst &Inst) const { 154 return (Inst.getOpcode() == AArch64::LDRWpost || 155 Inst.getOpcode() == AArch64::LDRWpre || 156 Inst.getOpcode() == AArch64::LDRWroW || 157 Inst.getOpcode() == AArch64::LDRWroX || 158 Inst.getOpcode() == AArch64::LDRWui); 159 } 160 161 bool isLDRX(const MCInst &Inst) const { 162 return (Inst.getOpcode() == AArch64::LDRXpost || 163 Inst.getOpcode() == AArch64::LDRXpre || 164 Inst.getOpcode() == AArch64::LDRXroW || 165 Inst.getOpcode() == AArch64::LDRXroX || 166 Inst.getOpcode() == AArch64::LDRXui); 167 } 168 169 bool isLoad(const MCInst &Inst) const override { 170 return isLDRB(Inst) || isLDRH(Inst) || isLDRW(Inst) || isLDRX(Inst); 171 } 172 173 bool isLoadFromStack(const MCInst &Inst) const { 174 if (!isLoad(Inst)) 175 return false; 176 const MCInstrDesc &InstInfo = Info->get(Inst.getOpcode()); 177 unsigned NumDefs = InstInfo.getNumDefs(); 178 for (unsigned I = NumDefs, E = InstInfo.getNumOperands(); I < E; ++I) { 179 const MCOperand &Operand = Inst.getOperand(I); 180 if (!Operand.isReg()) 181 continue; 182 unsigned Reg = Operand.getReg(); 183 if (Reg == AArch64::SP || Reg == AArch64::WSP || Reg == AArch64::FP || 184 Reg == AArch64::W29) 185 return true; 186 } 187 return false; 188 } 189 190 bool isRegToRegMove(const MCInst &Inst, MCPhysReg &From, 191 MCPhysReg &To) const override { 192 if (Inst.getOpcode() != AArch64::ORRXrs) 193 return false; 194 if (Inst.getOperand(1).getReg() != AArch64::XZR) 195 return false; 196 if (Inst.getOperand(3).getImm() != 0) 197 return false; 198 From = Inst.getOperand(2).getReg(); 199 To = Inst.getOperand(0).getReg(); 200 return true; 201 } 202 203 bool isIndirectCall(const MCInst &Inst) const override { 204 return Inst.getOpcode() == AArch64::BLR; 205 } 206 207 MCPhysReg getNoRegister() const override { return AArch64::NoRegister; } 208 209 bool hasPCRelOperand(const MCInst &Inst) const override { 210 // ADRP is blacklisted and is an exception. Even though it has a 211 // PC-relative operand, this operand is not a complete symbol reference 212 // and BOLT shouldn't try to process it in isolation. 213 if (isADRP(Inst)) 214 return false; 215 216 if (isADR(Inst)) 217 return true; 218 219 // Look for literal addressing mode (see C1-143 ARM DDI 0487B.a) 220 const MCInstrDesc &MCII = Info->get(Inst.getOpcode()); 221 for (unsigned I = 0, E = MCII.getNumOperands(); I != E; ++I) 222 if (MCII.OpInfo[I].OperandType == MCOI::OPERAND_PCREL) 223 return true; 224 225 return false; 226 } 227 228 bool evaluateADR(const MCInst &Inst, int64_t &Imm, 229 const MCExpr **DispExpr) const { 230 assert((isADR(Inst) || isADRP(Inst)) && "Not an ADR instruction"); 231 232 const MCOperand &Label = Inst.getOperand(1); 233 if (!Label.isImm()) { 234 assert(Label.isExpr() && "Unexpected ADR operand"); 235 assert(DispExpr && "DispExpr must be set"); 236 *DispExpr = Label.getExpr(); 237 return false; 238 } 239 240 if (Inst.getOpcode() == AArch64::ADR) { 241 Imm = Label.getImm(); 242 return true; 243 } 244 Imm = Label.getImm() << 12; 245 return true; 246 } 247 248 bool evaluateAArch64MemoryOperand(const MCInst &Inst, int64_t &DispImm, 249 const MCExpr **DispExpr = nullptr) const { 250 if (isADR(Inst) || isADRP(Inst)) 251 return evaluateADR(Inst, DispImm, DispExpr); 252 253 // Literal addressing mode 254 const MCInstrDesc &MCII = Info->get(Inst.getOpcode()); 255 for (unsigned I = 0, E = MCII.getNumOperands(); I != E; ++I) { 256 if (MCII.OpInfo[I].OperandType != MCOI::OPERAND_PCREL) 257 continue; 258 259 if (!Inst.getOperand(I).isImm()) { 260 assert(Inst.getOperand(I).isExpr() && "Unexpected PCREL operand"); 261 assert(DispExpr && "DispExpr must be set"); 262 *DispExpr = Inst.getOperand(I).getExpr(); 263 return true; 264 } 265 266 DispImm = Inst.getOperand(I).getImm() << 2; 267 return true; 268 } 269 return false; 270 } 271 272 bool evaluateMemOperandTarget(const MCInst &Inst, uint64_t &Target, 273 uint64_t Address, 274 uint64_t Size) const override { 275 int64_t DispValue; 276 const MCExpr *DispExpr = nullptr; 277 if (!evaluateAArch64MemoryOperand(Inst, DispValue, &DispExpr)) 278 return false; 279 280 // Make sure it's a well-formed addressing we can statically evaluate. 281 if (DispExpr) 282 return false; 283 284 Target = DispValue; 285 if (Inst.getOpcode() == AArch64::ADRP) 286 Target += Address & ~0xFFFULL; 287 else 288 Target += Address; 289 return true; 290 } 291 292 bool replaceMemOperandDisp(MCInst &Inst, MCOperand Operand) const override { 293 MCInst::iterator OI = Inst.begin(); 294 if (isADR(Inst) || isADRP(Inst)) { 295 assert(MCPlus::getNumPrimeOperands(Inst) >= 2 && 296 "Unexpected number of operands"); 297 ++OI; 298 } else { 299 const MCInstrDesc &MCII = Info->get(Inst.getOpcode()); 300 for (unsigned I = 0, E = MCII.getNumOperands(); I != E; ++I) { 301 if (MCII.OpInfo[I].OperandType == MCOI::OPERAND_PCREL) 302 break; 303 ++OI; 304 } 305 assert(OI != Inst.end() && "Literal operand not found"); 306 } 307 *OI = Operand; 308 return true; 309 } 310 311 const MCExpr *getTargetExprFor(MCInst &Inst, const MCExpr *Expr, 312 MCContext &Ctx, 313 uint64_t RelType) const override { 314 315 if (isADR(Inst) || RelType == ELF::R_AARCH64_ADR_PREL_LO21 || 316 RelType == ELF::R_AARCH64_TLSDESC_ADR_PREL21) { 317 return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS, Ctx); 318 } else if (isADRP(Inst) || RelType == ELF::R_AARCH64_ADR_PREL_PG_HI21 || 319 RelType == ELF::R_AARCH64_ADR_PREL_PG_HI21_NC || 320 RelType == ELF::R_AARCH64_TLSDESC_ADR_PAGE21 || 321 RelType == ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 || 322 RelType == ELF::R_AARCH64_ADR_GOT_PAGE) { 323 // Never emit a GOT reloc, we handled this in 324 // RewriteInstance::readRelocations(). 325 return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_PAGE, Ctx); 326 } else { 327 switch (RelType) { 328 case ELF::R_AARCH64_ADD_ABS_LO12_NC: 329 case ELF::R_AARCH64_LD64_GOT_LO12_NC: 330 case ELF::R_AARCH64_LDST8_ABS_LO12_NC: 331 case ELF::R_AARCH64_LDST16_ABS_LO12_NC: 332 case ELF::R_AARCH64_LDST32_ABS_LO12_NC: 333 case ELF::R_AARCH64_LDST64_ABS_LO12_NC: 334 case ELF::R_AARCH64_LDST128_ABS_LO12_NC: 335 case ELF::R_AARCH64_TLSDESC_ADD_LO12: 336 case ELF::R_AARCH64_TLSDESC_LD64_LO12: 337 case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: 338 case ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: 339 return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_LO12, Ctx); 340 case ELF::R_AARCH64_MOVW_UABS_G3: 341 return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_G3, Ctx); 342 case ELF::R_AARCH64_MOVW_UABS_G2: 343 case ELF::R_AARCH64_MOVW_UABS_G2_NC: 344 return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_G2_NC, Ctx); 345 case ELF::R_AARCH64_MOVW_UABS_G1: 346 case ELF::R_AARCH64_MOVW_UABS_G1_NC: 347 return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_G1_NC, Ctx); 348 case ELF::R_AARCH64_MOVW_UABS_G0: 349 case ELF::R_AARCH64_MOVW_UABS_G0_NC: 350 return AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_G0_NC, Ctx); 351 default: 352 break; 353 } 354 } 355 return Expr; 356 } 357 358 bool getSymbolRefOperandNum(const MCInst &Inst, unsigned &OpNum) const { 359 if (OpNum >= MCPlus::getNumPrimeOperands(Inst)) 360 return false; 361 362 // Auto-select correct operand number 363 if (OpNum == 0) { 364 if (isConditionalBranch(Inst) || isADR(Inst) || isADRP(Inst)) 365 OpNum = 1; 366 if (isTB(Inst)) 367 OpNum = 2; 368 if (isMOVW(Inst)) 369 OpNum = 1; 370 } 371 372 return true; 373 } 374 375 const MCSymbol *getTargetSymbol(const MCExpr *Expr) const override { 376 auto *AArchExpr = dyn_cast<AArch64MCExpr>(Expr); 377 if (AArchExpr && AArchExpr->getSubExpr()) 378 return getTargetSymbol(AArchExpr->getSubExpr()); 379 380 auto *BinExpr = dyn_cast<MCBinaryExpr>(Expr); 381 if (BinExpr) 382 return getTargetSymbol(BinExpr->getLHS()); 383 384 auto *SymExpr = dyn_cast<MCSymbolRefExpr>(Expr); 385 if (SymExpr && SymExpr->getKind() == MCSymbolRefExpr::VK_None) 386 return &SymExpr->getSymbol(); 387 388 return nullptr; 389 } 390 391 const MCSymbol *getTargetSymbol(const MCInst &Inst, 392 unsigned OpNum = 0) const override { 393 if (!getSymbolRefOperandNum(Inst, OpNum)) 394 return nullptr; 395 396 const MCOperand &Op = Inst.getOperand(OpNum); 397 if (!Op.isExpr()) 398 return nullptr; 399 400 return getTargetSymbol(Op.getExpr()); 401 } 402 403 int64_t getTargetAddend(const MCExpr *Expr) const override { 404 auto *AArchExpr = dyn_cast<AArch64MCExpr>(Expr); 405 if (AArchExpr && AArchExpr->getSubExpr()) 406 return getTargetAddend(AArchExpr->getSubExpr()); 407 408 auto *BinExpr = dyn_cast<MCBinaryExpr>(Expr); 409 if (BinExpr && BinExpr->getOpcode() == MCBinaryExpr::Add) 410 return getTargetAddend(BinExpr->getRHS()); 411 412 auto *ConstExpr = dyn_cast<MCConstantExpr>(Expr); 413 if (ConstExpr) 414 return ConstExpr->getValue(); 415 416 return 0; 417 } 418 419 int64_t getTargetAddend(const MCInst &Inst, 420 unsigned OpNum = 0) const override { 421 if (!getSymbolRefOperandNum(Inst, OpNum)) 422 return 0; 423 424 const MCOperand &Op = Inst.getOperand(OpNum); 425 if (!Op.isExpr()) 426 return 0; 427 428 return getTargetAddend(Op.getExpr()); 429 } 430 431 bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, 432 uint64_t &Target) const override { 433 size_t OpNum = 0; 434 435 if (isConditionalBranch(Inst)) { 436 assert(MCPlus::getNumPrimeOperands(Inst) >= 2 && 437 "Invalid number of operands"); 438 OpNum = 1; 439 } 440 441 if (isTB(Inst)) { 442 assert(MCPlus::getNumPrimeOperands(Inst) >= 3 && 443 "Invalid number of operands"); 444 OpNum = 2; 445 } 446 447 if (Info->get(Inst.getOpcode()).OpInfo[OpNum].OperandType != 448 MCOI::OPERAND_PCREL) { 449 assert((isIndirectBranch(Inst) || isIndirectCall(Inst)) && 450 "FAILED evaluateBranch"); 451 return false; 452 } 453 454 int64_t Imm = Inst.getOperand(OpNum).getImm() << 2; 455 Target = Addr + Imm; 456 return true; 457 } 458 459 bool replaceBranchTarget(MCInst &Inst, const MCSymbol *TBB, 460 MCContext *Ctx) const override { 461 assert((isCall(Inst) || isBranch(Inst)) && !isIndirectBranch(Inst) && 462 "Invalid instruction"); 463 assert(MCPlus::getNumPrimeOperands(Inst) >= 1 && 464 "Invalid number of operands"); 465 MCInst::iterator OI = Inst.begin(); 466 467 if (isConditionalBranch(Inst)) { 468 assert(MCPlus::getNumPrimeOperands(Inst) >= 2 && 469 "Invalid number of operands"); 470 ++OI; 471 } 472 473 if (isTB(Inst)) { 474 assert(MCPlus::getNumPrimeOperands(Inst) >= 3 && 475 "Invalid number of operands"); 476 OI = Inst.begin() + 2; 477 } 478 479 *OI = MCOperand::createExpr( 480 MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx)); 481 return true; 482 } 483 484 /// Matches indirect branch patterns in AArch64 related to a jump table (JT), 485 /// helping us to build the complete CFG. A typical indirect branch to 486 /// a jump table entry in AArch64 looks like the following: 487 /// 488 /// adrp x1, #-7585792 # Get JT Page location 489 /// add x1, x1, #692 # Complement with JT Page offset 490 /// ldrh w0, [x1, w0, uxtw #1] # Loads JT entry 491 /// adr x1, #12 # Get PC + 12 (end of this BB) used next 492 /// add x0, x1, w0, sxth #2 # Finish building branch target 493 /// # (entries in JT are relative to the end 494 /// # of this BB) 495 /// br x0 # Indirect jump instruction 496 /// 497 bool analyzeIndirectBranchFragment( 498 const MCInst &Inst, 499 DenseMap<const MCInst *, SmallVector<MCInst *, 4>> &UDChain, 500 const MCExpr *&JumpTable, int64_t &Offset, int64_t &ScaleValue, 501 MCInst *&PCRelBase) const { 502 // Expect AArch64 BR 503 assert(Inst.getOpcode() == AArch64::BR && "Unexpected opcode"); 504 505 // Match the indirect branch pattern for aarch64 506 SmallVector<MCInst *, 4> &UsesRoot = UDChain[&Inst]; 507 if (UsesRoot.size() == 0 || UsesRoot[0] == nullptr) 508 return false; 509 510 const MCInst *DefAdd = UsesRoot[0]; 511 512 // Now we match an ADD 513 if (!isADD(*DefAdd)) { 514 // If the address is not broken up in two parts, this is not branching 515 // according to a jump table entry. Fail. 516 return false; 517 } 518 if (DefAdd->getOpcode() == AArch64::ADDXri) { 519 // This can happen when there is no offset, but a direct jump that was 520 // transformed into an indirect one (indirect tail call) : 521 // ADRP x2, Perl_re_compiler 522 // ADD x2, x2, :lo12:Perl_re_compiler 523 // BR x2 524 return false; 525 } 526 if (DefAdd->getOpcode() == AArch64::ADDXrs) { 527 // Covers the less common pattern where JT entries are relative to 528 // the JT itself (like x86). Seems less efficient since we can't 529 // assume the JT is aligned at 4B boundary and thus drop 2 bits from 530 // JT values. 531 // cde264: 532 // adrp x12, #21544960 ; 216a000 533 // add x12, x12, #1696 ; 216a6a0 (JT object in .rodata) 534 // ldrsw x8, [x12, x8, lsl #2] --> loads e.g. 0xfeb73bd8 535 // * add x8, x8, x12 --> = cde278, next block 536 // br x8 537 // cde278: 538 // 539 // Parsed as ADDXrs reg:x8 reg:x8 reg:x12 imm:0 540 return false; 541 } 542 assert(DefAdd->getOpcode() == AArch64::ADDXrx && 543 "Failed to match indirect branch!"); 544 545 // Validate ADD operands 546 int64_t OperandExtension = DefAdd->getOperand(3).getImm(); 547 unsigned ShiftVal = AArch64_AM::getArithShiftValue(OperandExtension); 548 AArch64_AM::ShiftExtendType ExtendType = 549 AArch64_AM::getArithExtendType(OperandExtension); 550 if (ShiftVal != 2) 551 llvm_unreachable("Failed to match indirect branch! (fragment 2)"); 552 553 if (ExtendType == AArch64_AM::SXTB) 554 ScaleValue = 1LL; 555 else if (ExtendType == AArch64_AM::SXTH) 556 ScaleValue = 2LL; 557 else if (ExtendType == AArch64_AM::SXTW) 558 ScaleValue = 4LL; 559 else 560 llvm_unreachable("Failed to match indirect branch! (fragment 3)"); 561 562 // Match an ADR to load base address to be used when addressing JT targets 563 SmallVector<MCInst *, 4> &UsesAdd = UDChain[DefAdd]; 564 if (UsesAdd.size() <= 1 || UsesAdd[1] == nullptr || UsesAdd[2] == nullptr) { 565 // This happens when we don't have enough context about this jump table 566 // because the jumping code sequence was split in multiple basic blocks. 567 // This was observed in the wild in HHVM code (dispatchImpl). 568 return false; 569 } 570 MCInst *DefBaseAddr = UsesAdd[1]; 571 assert(DefBaseAddr->getOpcode() == AArch64::ADR && 572 "Failed to match indirect branch pattern! (fragment 3)"); 573 574 PCRelBase = DefBaseAddr; 575 // Match LOAD to load the jump table (relative) target 576 const MCInst *DefLoad = UsesAdd[2]; 577 assert(isLoad(*DefLoad) && 578 "Failed to match indirect branch load pattern! (1)"); 579 assert((ScaleValue != 1LL || isLDRB(*DefLoad)) && 580 "Failed to match indirect branch load pattern! (2)"); 581 assert((ScaleValue != 2LL || isLDRH(*DefLoad)) && 582 "Failed to match indirect branch load pattern! (3)"); 583 584 // Match ADD that calculates the JumpTable Base Address (not the offset) 585 SmallVector<MCInst *, 4> &UsesLoad = UDChain[DefLoad]; 586 const MCInst *DefJTBaseAdd = UsesLoad[1]; 587 MCPhysReg From, To; 588 if (DefJTBaseAdd == nullptr || isLoadFromStack(*DefJTBaseAdd) || 589 isRegToRegMove(*DefJTBaseAdd, From, To)) { 590 // Sometimes base address may have been defined in another basic block 591 // (hoisted). Return with no jump table info. 592 JumpTable = nullptr; 593 return true; 594 } 595 596 assert(DefJTBaseAdd->getOpcode() == AArch64::ADDXri && 597 "Failed to match jump table base address pattern! (1)"); 598 599 if (DefJTBaseAdd->getOperand(2).isImm()) 600 Offset = DefJTBaseAdd->getOperand(2).getImm(); 601 SmallVector<MCInst *, 4> &UsesJTBaseAdd = UDChain[DefJTBaseAdd]; 602 const MCInst *DefJTBasePage = UsesJTBaseAdd[1]; 603 if (DefJTBasePage == nullptr || isLoadFromStack(*DefJTBasePage)) { 604 JumpTable = nullptr; 605 return true; 606 } 607 assert(DefJTBasePage->getOpcode() == AArch64::ADRP && 608 "Failed to match jump table base page pattern! (2)"); 609 if (DefJTBasePage->getOperand(1).isExpr()) 610 JumpTable = DefJTBasePage->getOperand(1).getExpr(); 611 return true; 612 } 613 614 DenseMap<const MCInst *, SmallVector<MCInst *, 4>> 615 computeLocalUDChain(const MCInst *CurInstr, InstructionIterator Begin, 616 InstructionIterator End) const { 617 DenseMap<int, MCInst *> RegAliasTable; 618 DenseMap<const MCInst *, SmallVector<MCInst *, 4>> Uses; 619 620 auto addInstrOperands = [&](const MCInst &Instr) { 621 // Update Uses table 622 for (unsigned OpNum = 0, OpEnd = MCPlus::getNumPrimeOperands(Instr); 623 OpNum != OpEnd; ++OpNum) { 624 if (!Instr.getOperand(OpNum).isReg()) 625 continue; 626 unsigned Reg = Instr.getOperand(OpNum).getReg(); 627 MCInst *AliasInst = RegAliasTable[Reg]; 628 Uses[&Instr].push_back(AliasInst); 629 LLVM_DEBUG({ 630 dbgs() << "Adding reg operand " << Reg << " refs "; 631 if (AliasInst != nullptr) 632 AliasInst->dump(); 633 else 634 dbgs() << "\n"; 635 }); 636 } 637 }; 638 639 LLVM_DEBUG(dbgs() << "computeLocalUDChain\n"); 640 bool TerminatorSeen = false; 641 for (auto II = Begin; II != End; ++II) { 642 MCInst &Instr = *II; 643 // Ignore nops and CFIs 644 if (isPseudo(Instr) || isNoop(Instr)) 645 continue; 646 if (TerminatorSeen) { 647 RegAliasTable.clear(); 648 Uses.clear(); 649 } 650 651 LLVM_DEBUG(dbgs() << "Now updating for:\n "); 652 LLVM_DEBUG(Instr.dump()); 653 addInstrOperands(Instr); 654 655 BitVector Regs = BitVector(RegInfo->getNumRegs(), false); 656 getWrittenRegs(Instr, Regs); 657 658 // Update register definitions after this point 659 int Idx = Regs.find_first(); 660 while (Idx != -1) { 661 RegAliasTable[Idx] = &Instr; 662 LLVM_DEBUG(dbgs() << "Setting reg " << Idx 663 << " def to current instr.\n"); 664 Idx = Regs.find_next(Idx); 665 } 666 667 TerminatorSeen = isTerminator(Instr); 668 } 669 670 // Process the last instruction, which is not currently added into the 671 // instruction stream 672 if (CurInstr) 673 addInstrOperands(*CurInstr); 674 675 return Uses; 676 } 677 678 IndirectBranchType analyzeIndirectBranch( 679 MCInst &Instruction, InstructionIterator Begin, InstructionIterator End, 680 const unsigned PtrSize, MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut, 681 unsigned &IndexRegNumOut, int64_t &DispValueOut, 682 const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut) const override { 683 MemLocInstrOut = nullptr; 684 BaseRegNumOut = AArch64::NoRegister; 685 IndexRegNumOut = AArch64::NoRegister; 686 DispValueOut = 0; 687 DispExprOut = nullptr; 688 689 // An instruction referencing memory used by jump instruction (directly or 690 // via register). This location could be an array of function pointers 691 // in case of indirect tail call, or a jump table. 692 MCInst *MemLocInstr = nullptr; 693 694 // Analyze the memory location. 695 int64_t ScaleValue, DispValue; 696 const MCExpr *DispExpr; 697 698 DenseMap<const MCInst *, SmallVector<llvm::MCInst *, 4>> UDChain = 699 computeLocalUDChain(&Instruction, Begin, End); 700 MCInst *PCRelBase; 701 if (!analyzeIndirectBranchFragment(Instruction, UDChain, DispExpr, 702 DispValue, ScaleValue, PCRelBase)) 703 return IndirectBranchType::UNKNOWN; 704 705 MemLocInstrOut = MemLocInstr; 706 DispValueOut = DispValue; 707 DispExprOut = DispExpr; 708 PCRelBaseOut = PCRelBase; 709 return IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE; 710 } 711 712 unsigned getInvertedBranchOpcode(unsigned Opcode) const { 713 switch (Opcode) { 714 default: 715 llvm_unreachable("Failed to invert branch opcode"); 716 return Opcode; 717 case AArch64::TBZW: return AArch64::TBNZW; 718 case AArch64::TBZX: return AArch64::TBNZX; 719 case AArch64::TBNZW: return AArch64::TBZW; 720 case AArch64::TBNZX: return AArch64::TBZX; 721 case AArch64::CBZW: return AArch64::CBNZW; 722 case AArch64::CBZX: return AArch64::CBNZX; 723 case AArch64::CBNZW: return AArch64::CBZW; 724 case AArch64::CBNZX: return AArch64::CBZX; 725 } 726 } 727 728 unsigned getCondCode(const MCInst &Inst) const override { 729 // AArch64 does not use conditional codes, so we just return the opcode 730 // of the conditional branch here. 731 return Inst.getOpcode(); 732 } 733 734 unsigned getCanonicalBranchCondCode(unsigned Opcode) const override { 735 switch (Opcode) { 736 default: 737 return Opcode; 738 case AArch64::TBNZW: return AArch64::TBZW; 739 case AArch64::TBNZX: return AArch64::TBZX; 740 case AArch64::CBNZW: return AArch64::CBZW; 741 case AArch64::CBNZX: return AArch64::CBZX; 742 } 743 } 744 745 bool reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB, 746 MCContext *Ctx) const override { 747 if (isTB(Inst) || isCB(Inst)) { 748 Inst.setOpcode(getInvertedBranchOpcode(Inst.getOpcode())); 749 assert(Inst.getOpcode() != 0 && "Invalid branch instruction"); 750 } else if (Inst.getOpcode() == AArch64::Bcc) { 751 Inst.getOperand(0).setImm(AArch64CC::getInvertedCondCode( 752 static_cast<AArch64CC::CondCode>(Inst.getOperand(0).getImm()))); 753 assert(Inst.getOperand(0).getImm() != AArch64CC::AL && 754 Inst.getOperand(0).getImm() != AArch64CC::NV && 755 "Can't reverse ALWAYS cond code"); 756 } else { 757 LLVM_DEBUG(Inst.dump()); 758 llvm_unreachable("Unrecognized branch instruction"); 759 } 760 return replaceBranchTarget(Inst, TBB, Ctx); 761 } 762 763 int getPCRelEncodingSize(const MCInst &Inst) const override { 764 switch (Inst.getOpcode()) { 765 default: 766 llvm_unreachable("Failed to get pcrel encoding size"); 767 return 0; 768 case AArch64::TBZW: return 16; 769 case AArch64::TBZX: return 16; 770 case AArch64::TBNZW: return 16; 771 case AArch64::TBNZX: return 16; 772 case AArch64::CBZW: return 21; 773 case AArch64::CBZX: return 21; 774 case AArch64::CBNZW: return 21; 775 case AArch64::CBNZX: return 21; 776 case AArch64::B: return 28; 777 case AArch64::BL: return 28; 778 case AArch64::Bcc: return 21; 779 } 780 } 781 782 int getShortJmpEncodingSize() const override { return 33; } 783 784 int getUncondBranchEncodingSize() const override { return 28; } 785 786 bool createTailCall(MCInst &Inst, const MCSymbol *Target, 787 MCContext *Ctx) override { 788 Inst.setOpcode(AArch64::B); 789 Inst.addOperand(MCOperand::createExpr(getTargetExprFor( 790 Inst, MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx), 791 *Ctx, 0))); 792 setTailCall(Inst); 793 return true; 794 } 795 796 void createLongTailCall(InstructionListType &Seq, const MCSymbol *Target, 797 MCContext *Ctx) override { 798 createShortJmp(Seq, Target, Ctx, /*IsTailCall*/ true); 799 } 800 801 bool createTrap(MCInst &Inst) const override { 802 Inst.clear(); 803 Inst.setOpcode(AArch64::BRK); 804 Inst.addOperand(MCOperand::createImm(1)); 805 return true; 806 } 807 808 bool convertJmpToTailCall(MCInst &Inst) override { 809 setTailCall(Inst); 810 return true; 811 } 812 813 bool convertTailCallToJmp(MCInst &Inst) override { 814 removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall); 815 clearOffset(Inst); 816 if (getConditionalTailCall(Inst)) 817 unsetConditionalTailCall(Inst); 818 return true; 819 } 820 821 bool lowerTailCall(MCInst &Inst) override { 822 removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall); 823 if (getConditionalTailCall(Inst)) 824 unsetConditionalTailCall(Inst); 825 return true; 826 } 827 828 bool isNoop(const MCInst &Inst) const override { 829 return Inst.getOpcode() == AArch64::HINT && 830 Inst.getOperand(0).getImm() == 0; 831 } 832 833 bool createNoop(MCInst &Inst) const override { 834 Inst.setOpcode(AArch64::HINT); 835 Inst.clear(); 836 Inst.addOperand(MCOperand::createImm(0)); 837 return true; 838 } 839 840 bool isStore(const MCInst &Inst) const override { return false; } 841 842 bool analyzeBranch(InstructionIterator Begin, InstructionIterator End, 843 const MCSymbol *&TBB, const MCSymbol *&FBB, 844 MCInst *&CondBranch, 845 MCInst *&UncondBranch) const override { 846 auto I = End; 847 848 while (I != Begin) { 849 --I; 850 851 // Ignore nops and CFIs 852 if (isPseudo(*I) || isNoop(*I)) 853 continue; 854 855 // Stop when we find the first non-terminator 856 if (!isTerminator(*I) || isTailCall(*I) || !isBranch(*I)) 857 break; 858 859 // Handle unconditional branches. 860 if (isUnconditionalBranch(*I)) { 861 // If any code was seen after this unconditional branch, we've seen 862 // unreachable code. Ignore them. 863 CondBranch = nullptr; 864 UncondBranch = &*I; 865 const MCSymbol *Sym = getTargetSymbol(*I); 866 assert(Sym != nullptr && 867 "Couldn't extract BB symbol from jump operand"); 868 TBB = Sym; 869 continue; 870 } 871 872 // Handle conditional branches and ignore indirect branches 873 if (isIndirectBranch(*I)) 874 return false; 875 876 if (CondBranch == nullptr) { 877 const MCSymbol *TargetBB = getTargetSymbol(*I); 878 if (TargetBB == nullptr) { 879 // Unrecognized branch target 880 return false; 881 } 882 FBB = TBB; 883 TBB = TargetBB; 884 CondBranch = &*I; 885 continue; 886 } 887 888 llvm_unreachable("multiple conditional branches in one BB"); 889 } 890 return true; 891 } 892 893 void createLongJmp(InstructionListType &Seq, const MCSymbol *Target, 894 MCContext *Ctx, bool IsTailCall) override { 895 // ip0 (r16) is reserved to the linker (refer to 5.3.1.1 of "Procedure Call 896 // Standard for the ARM 64-bit Architecture (AArch64)". 897 // The sequence of instructions we create here is the following: 898 // movz ip0, #:abs_g3:<addr> 899 // movk ip0, #:abs_g2_nc:<addr> 900 // movk ip0, #:abs_g1_nc:<addr> 901 // movk ip0, #:abs_g0_nc:<addr> 902 // br ip0 903 MCInst Inst; 904 Inst.setOpcode(AArch64::MOVZXi); 905 Inst.addOperand(MCOperand::createReg(AArch64::X16)); 906 Inst.addOperand(MCOperand::createExpr(AArch64MCExpr::create( 907 MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx), 908 AArch64MCExpr::VK_ABS_G3, *Ctx))); 909 Inst.addOperand(MCOperand::createImm(0x30)); 910 Seq.emplace_back(Inst); 911 912 Inst.clear(); 913 Inst.setOpcode(AArch64::MOVKXi); 914 Inst.addOperand(MCOperand::createReg(AArch64::X16)); 915 Inst.addOperand(MCOperand::createReg(AArch64::X16)); 916 Inst.addOperand(MCOperand::createExpr(AArch64MCExpr::create( 917 MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx), 918 AArch64MCExpr::VK_ABS_G2_NC, *Ctx))); 919 Inst.addOperand(MCOperand::createImm(0x20)); 920 Seq.emplace_back(Inst); 921 922 Inst.clear(); 923 Inst.setOpcode(AArch64::MOVKXi); 924 Inst.addOperand(MCOperand::createReg(AArch64::X16)); 925 Inst.addOperand(MCOperand::createReg(AArch64::X16)); 926 Inst.addOperand(MCOperand::createExpr(AArch64MCExpr::create( 927 MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx), 928 AArch64MCExpr::VK_ABS_G1_NC, *Ctx))); 929 Inst.addOperand(MCOperand::createImm(0x10)); 930 Seq.emplace_back(Inst); 931 932 Inst.clear(); 933 Inst.setOpcode(AArch64::MOVKXi); 934 Inst.addOperand(MCOperand::createReg(AArch64::X16)); 935 Inst.addOperand(MCOperand::createReg(AArch64::X16)); 936 Inst.addOperand(MCOperand::createExpr(AArch64MCExpr::create( 937 MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx), 938 AArch64MCExpr::VK_ABS_G0_NC, *Ctx))); 939 Inst.addOperand(MCOperand::createImm(0)); 940 Seq.emplace_back(Inst); 941 942 Inst.clear(); 943 Inst.setOpcode(AArch64::BR); 944 Inst.addOperand(MCOperand::createReg(AArch64::X16)); 945 if (IsTailCall) 946 setTailCall(Inst); 947 Seq.emplace_back(Inst); 948 } 949 950 void createShortJmp(InstructionListType &Seq, const MCSymbol *Target, 951 MCContext *Ctx, bool IsTailCall) override { 952 // ip0 (r16) is reserved to the linker (refer to 5.3.1.1 of "Procedure Call 953 // Standard for the ARM 64-bit Architecture (AArch64)". 954 // The sequence of instructions we create here is the following: 955 // adrp ip0, imm 956 // add ip0, ip0, imm 957 // br ip0 958 MCPhysReg Reg = AArch64::X16; 959 InstructionListType Insts = materializeAddress(Target, Ctx, Reg); 960 Insts.emplace_back(); 961 MCInst &Inst = Insts.back(); 962 Inst.clear(); 963 Inst.setOpcode(AArch64::BR); 964 Inst.addOperand(MCOperand::createReg(Reg)); 965 if (IsTailCall) 966 setTailCall(Inst); 967 Seq.swap(Insts); 968 } 969 970 /// Matching pattern here is 971 /// 972 /// ADRP x16, imm 973 /// ADD x16, x16, imm 974 /// BR x16 975 /// 976 bool matchLinkerVeneer(InstructionIterator Begin, InstructionIterator End, 977 uint64_t Address, const MCInst &CurInst, 978 MCInst *&TargetHiBits, MCInst *&TargetLowBits, 979 uint64_t &Target) const override { 980 if (CurInst.getOpcode() != AArch64::BR || !CurInst.getOperand(0).isReg() || 981 CurInst.getOperand(0).getReg() != AArch64::X16) 982 return false; 983 984 auto I = End; 985 if (I == Begin) 986 return false; 987 988 --I; 989 Address -= 4; 990 if (I == Begin || I->getOpcode() != AArch64::ADDXri || 991 MCPlus::getNumPrimeOperands(*I) < 3 || !I->getOperand(0).isReg() || 992 !I->getOperand(1).isReg() || 993 I->getOperand(0).getReg() != AArch64::X16 || 994 I->getOperand(1).getReg() != AArch64::X16 || !I->getOperand(2).isImm()) 995 return false; 996 TargetLowBits = &*I; 997 uint64_t Addr = I->getOperand(2).getImm() & 0xFFF; 998 999 --I; 1000 Address -= 4; 1001 if (I->getOpcode() != AArch64::ADRP || 1002 MCPlus::getNumPrimeOperands(*I) < 2 || !I->getOperand(0).isReg() || 1003 !I->getOperand(1).isImm() || I->getOperand(0).getReg() != AArch64::X16) 1004 return false; 1005 TargetHiBits = &*I; 1006 Addr |= (Address + ((int64_t)I->getOperand(1).getImm() << 12)) & 1007 0xFFFFFFFFFFFFF000ULL; 1008 Target = Addr; 1009 return true; 1010 } 1011 1012 bool replaceImmWithSymbolRef(MCInst &Inst, const MCSymbol *Symbol, 1013 int64_t Addend, MCContext *Ctx, int64_t &Value, 1014 uint64_t RelType) const override { 1015 unsigned ImmOpNo = -1U; 1016 for (unsigned Index = 0; Index < MCPlus::getNumPrimeOperands(Inst); 1017 ++Index) { 1018 if (Inst.getOperand(Index).isImm()) { 1019 ImmOpNo = Index; 1020 break; 1021 } 1022 } 1023 if (ImmOpNo == -1U) 1024 return false; 1025 1026 Value = Inst.getOperand(ImmOpNo).getImm(); 1027 1028 setOperandToSymbolRef(Inst, ImmOpNo, Symbol, Addend, Ctx, RelType); 1029 1030 return true; 1031 } 1032 1033 bool createUncondBranch(MCInst &Inst, const MCSymbol *TBB, 1034 MCContext *Ctx) const override { 1035 Inst.setOpcode(AArch64::B); 1036 Inst.clear(); 1037 Inst.addOperand(MCOperand::createExpr(getTargetExprFor( 1038 Inst, MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx), 1039 *Ctx, 0))); 1040 return true; 1041 } 1042 1043 bool isMoveMem2Reg(const MCInst &Inst) const override { return false; } 1044 1045 bool isADD64rr(const MCInst &Inst) const override { return false; } 1046 1047 bool isLeave(const MCInst &Inst) const override { return false; } 1048 1049 bool isPop(const MCInst &Inst) const override { return false; } 1050 1051 bool isPrefix(const MCInst &Inst) const override { return false; } 1052 1053 bool deleteREPPrefix(MCInst &Inst) const override { return false; } 1054 1055 bool createReturn(MCInst &Inst) const override { 1056 Inst.setOpcode(AArch64::RET); 1057 Inst.clear(); 1058 Inst.addOperand(MCOperand::createReg(AArch64::LR)); 1059 return true; 1060 } 1061 1062 InstructionListType materializeAddress(const MCSymbol *Target, MCContext *Ctx, 1063 MCPhysReg RegName, 1064 int64_t Addend = 0) const override { 1065 // Get page-aligned address and add page offset 1066 InstructionListType Insts(2); 1067 Insts[0].setOpcode(AArch64::ADRP); 1068 Insts[0].clear(); 1069 Insts[0].addOperand(MCOperand::createReg(RegName)); 1070 Insts[0].addOperand(MCOperand::createImm(0)); 1071 setOperandToSymbolRef(Insts[0], /* OpNum */ 1, Target, Addend, Ctx, 1072 ELF::R_AARCH64_NONE); 1073 Insts[1].setOpcode(AArch64::ADDXri); 1074 Insts[1].clear(); 1075 Insts[1].addOperand(MCOperand::createReg(RegName)); 1076 Insts[1].addOperand(MCOperand::createReg(RegName)); 1077 Insts[1].addOperand(MCOperand::createImm(0)); 1078 Insts[1].addOperand(MCOperand::createImm(0)); 1079 setOperandToSymbolRef(Insts[1], /* OpNum */ 2, Target, Addend, Ctx, 1080 ELF::R_AARCH64_ADD_ABS_LO12_NC); 1081 return Insts; 1082 } 1083 }; 1084 1085 } // end anonymous namespace 1086 1087 namespace llvm { 1088 namespace bolt { 1089 1090 MCPlusBuilder *createAArch64MCPlusBuilder(const MCInstrAnalysis *Analysis, 1091 const MCInstrInfo *Info, 1092 const MCRegisterInfo *RegInfo) { 1093 return new AArch64MCPlusBuilder(Analysis, Info, RegInfo); 1094 } 1095 1096 } // namespace bolt 1097 } // namespace llvm 1098