1 //===- AArch64.cpp --------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Symbols.h" 10 #include "SyntheticSections.h" 11 #include "Target.h" 12 #include "Thunks.h" 13 #include "lld/Common/ErrorHandler.h" 14 #include "llvm/Object/ELF.h" 15 #include "llvm/Support/Endian.h" 16 17 using namespace llvm; 18 using namespace llvm::support::endian; 19 using namespace llvm::ELF; 20 using namespace lld; 21 using namespace lld::elf; 22 23 // Page(Expr) is the page address of the expression Expr, defined 24 // as (Expr & ~0xFFF). (This applies even if the machine page size 25 // supported by the platform has a different value.) 26 uint64_t elf::getAArch64Page(uint64_t expr) { 27 return expr & ~static_cast<uint64_t>(0xFFF); 28 } 29 30 namespace { 31 class AArch64 : public TargetInfo { 32 public: 33 AArch64(); 34 RelExpr getRelExpr(RelType type, const Symbol &s, 35 const uint8_t *loc) const override; 36 RelType getDynRel(RelType type) const override; 37 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; 38 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 39 void writePltHeader(uint8_t *buf) const override; 40 void writePlt(uint8_t *buf, const Symbol &sym, 41 uint64_t pltEntryAddr) const override; 42 bool needsThunk(RelExpr expr, RelType type, const InputFile *file, 43 uint64_t branchAddr, const Symbol &s, 44 int64_t a) const override; 45 uint32_t getThunkSectionSpacing() const override; 46 bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; 47 bool usesOnlyLowPageBits(RelType type) const override; 48 void relocate(uint8_t *loc, const Relocation &rel, 49 uint64_t val) const override; 50 RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; 51 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, 52 uint64_t val) const override; 53 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, 54 uint64_t val) const override; 55 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, 56 uint64_t val) const override; 57 }; 58 } // namespace 59 60 AArch64::AArch64() { 61 copyRel = R_AARCH64_COPY; 62 relativeRel = R_AARCH64_RELATIVE; 63 iRelativeRel = R_AARCH64_IRELATIVE; 64 gotRel = R_AARCH64_GLOB_DAT; 65 pltRel = R_AARCH64_JUMP_SLOT; 66 symbolicRel = R_AARCH64_ABS64; 67 tlsDescRel = R_AARCH64_TLSDESC; 68 tlsGotRel = R_AARCH64_TLS_TPREL64; 69 pltHeaderSize = 32; 70 pltEntrySize = 16; 71 ipltEntrySize = 16; 72 defaultMaxPageSize = 65536; 73 74 // Align to the 2 MiB page size (known as a superpage or huge page). 75 // FreeBSD automatically promotes 2 MiB-aligned allocations. 76 defaultImageBase = 0x200000; 77 78 needsThunks = true; 79 } 80 81 RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, 82 const uint8_t *loc) const { 83 switch (type) { 84 case R_AARCH64_ABS16: 85 case R_AARCH64_ABS32: 86 case R_AARCH64_ABS64: 87 case R_AARCH64_ADD_ABS_LO12_NC: 88 case R_AARCH64_LDST128_ABS_LO12_NC: 89 case R_AARCH64_LDST16_ABS_LO12_NC: 90 case R_AARCH64_LDST32_ABS_LO12_NC: 91 case R_AARCH64_LDST64_ABS_LO12_NC: 92 case R_AARCH64_LDST8_ABS_LO12_NC: 93 case R_AARCH64_MOVW_SABS_G0: 94 case R_AARCH64_MOVW_SABS_G1: 95 case R_AARCH64_MOVW_SABS_G2: 96 case R_AARCH64_MOVW_UABS_G0: 97 case R_AARCH64_MOVW_UABS_G0_NC: 98 case R_AARCH64_MOVW_UABS_G1: 99 case R_AARCH64_MOVW_UABS_G1_NC: 100 case R_AARCH64_MOVW_UABS_G2: 101 case R_AARCH64_MOVW_UABS_G2_NC: 102 case R_AARCH64_MOVW_UABS_G3: 103 return R_ABS; 104 case R_AARCH64_TLSDESC_ADR_PAGE21: 105 return R_AARCH64_TLSDESC_PAGE; 106 case R_AARCH64_TLSDESC_LD64_LO12: 107 case R_AARCH64_TLSDESC_ADD_LO12: 108 return R_TLSDESC; 109 case R_AARCH64_TLSDESC_CALL: 110 return R_TLSDESC_CALL; 111 case R_AARCH64_TLSLE_ADD_TPREL_HI12: 112 case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: 113 case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC: 114 case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC: 115 case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC: 116 case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC: 117 case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC: 118 case R_AARCH64_TLSLE_MOVW_TPREL_G0: 119 case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: 120 case R_AARCH64_TLSLE_MOVW_TPREL_G1: 121 case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC: 122 case R_AARCH64_TLSLE_MOVW_TPREL_G2: 123 return R_TPREL; 124 case R_AARCH64_CALL26: 125 case R_AARCH64_CONDBR19: 126 case R_AARCH64_JUMP26: 127 case R_AARCH64_TSTBR14: 128 case R_AARCH64_PLT32: 129 return R_PLT_PC; 130 case R_AARCH64_PREL16: 131 case R_AARCH64_PREL32: 132 case R_AARCH64_PREL64: 133 case R_AARCH64_ADR_PREL_LO21: 134 case R_AARCH64_LD_PREL_LO19: 135 case R_AARCH64_MOVW_PREL_G0: 136 case R_AARCH64_MOVW_PREL_G0_NC: 137 case R_AARCH64_MOVW_PREL_G1: 138 case R_AARCH64_MOVW_PREL_G1_NC: 139 case R_AARCH64_MOVW_PREL_G2: 140 case R_AARCH64_MOVW_PREL_G2_NC: 141 case R_AARCH64_MOVW_PREL_G3: 142 return R_PC; 143 case R_AARCH64_ADR_PREL_PG_HI21: 144 case R_AARCH64_ADR_PREL_PG_HI21_NC: 145 return R_AARCH64_PAGE_PC; 146 case R_AARCH64_LD64_GOT_LO12_NC: 147 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: 148 return R_GOT; 149 case R_AARCH64_LD64_GOTPAGE_LO15: 150 return R_AARCH64_GOT_PAGE; 151 case R_AARCH64_ADR_GOT_PAGE: 152 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: 153 return R_AARCH64_GOT_PAGE_PC; 154 case R_AARCH64_NONE: 155 return R_NONE; 156 default: 157 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + 158 ") against symbol " + toString(s)); 159 return R_NONE; 160 } 161 } 162 163 RelExpr AArch64::adjustTlsExpr(RelType type, RelExpr expr) const { 164 if (expr == R_RELAX_TLS_GD_TO_IE) { 165 if (type == R_AARCH64_TLSDESC_ADR_PAGE21) 166 return R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC; 167 return R_RELAX_TLS_GD_TO_IE_ABS; 168 } 169 return expr; 170 } 171 172 bool AArch64::usesOnlyLowPageBits(RelType type) const { 173 switch (type) { 174 default: 175 return false; 176 case R_AARCH64_ADD_ABS_LO12_NC: 177 case R_AARCH64_LD64_GOT_LO12_NC: 178 case R_AARCH64_LDST128_ABS_LO12_NC: 179 case R_AARCH64_LDST16_ABS_LO12_NC: 180 case R_AARCH64_LDST32_ABS_LO12_NC: 181 case R_AARCH64_LDST64_ABS_LO12_NC: 182 case R_AARCH64_LDST8_ABS_LO12_NC: 183 case R_AARCH64_TLSDESC_ADD_LO12: 184 case R_AARCH64_TLSDESC_LD64_LO12: 185 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: 186 return true; 187 } 188 } 189 190 RelType AArch64::getDynRel(RelType type) const { 191 if (type == R_AARCH64_ABS64) 192 return type; 193 return R_AARCH64_NONE; 194 } 195 196 int64_t AArch64::getImplicitAddend(const uint8_t *buf, RelType type) const { 197 switch (type) { 198 case R_AARCH64_TLSDESC: 199 return read64(buf + 8); 200 default: 201 internalLinkerError(getErrorLocation(buf), 202 "cannot read addend for relocation " + toString(type)); 203 return 0; 204 } 205 } 206 207 void AArch64::writeGotPlt(uint8_t *buf, const Symbol &) const { 208 write64(buf, in.plt->getVA()); 209 } 210 211 void AArch64::writePltHeader(uint8_t *buf) const { 212 const uint8_t pltData[] = { 213 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]! 214 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[2])) 215 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[2]))] 216 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.plt.got[2])) 217 0x20, 0x02, 0x1f, 0xd6, // br x17 218 0x1f, 0x20, 0x03, 0xd5, // nop 219 0x1f, 0x20, 0x03, 0xd5, // nop 220 0x1f, 0x20, 0x03, 0xd5 // nop 221 }; 222 memcpy(buf, pltData, sizeof(pltData)); 223 224 uint64_t got = in.gotPlt->getVA(); 225 uint64_t plt = in.plt->getVA(); 226 relocateNoSym(buf + 4, R_AARCH64_ADR_PREL_PG_HI21, 227 getAArch64Page(got + 16) - getAArch64Page(plt + 4)); 228 relocateNoSym(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16); 229 relocateNoSym(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16); 230 } 231 232 void AArch64::writePlt(uint8_t *buf, const Symbol &sym, 233 uint64_t pltEntryAddr) const { 234 const uint8_t inst[] = { 235 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[n])) 236 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[n]))] 237 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.plt.got[n])) 238 0x20, 0x02, 0x1f, 0xd6 // br x17 239 }; 240 memcpy(buf, inst, sizeof(inst)); 241 242 uint64_t gotPltEntryAddr = sym.getGotPltVA(); 243 relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21, 244 getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr)); 245 relocateNoSym(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr); 246 relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr); 247 } 248 249 bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file, 250 uint64_t branchAddr, const Symbol &s, 251 int64_t a) const { 252 // If s is an undefined weak symbol and does not have a PLT entry then it 253 // will be resolved as a branch to the next instruction. 254 if (s.isUndefWeak() && !s.isInPlt()) 255 return false; 256 // ELF for the ARM 64-bit architecture, section Call and Jump relocations 257 // only permits range extension thunks for R_AARCH64_CALL26 and 258 // R_AARCH64_JUMP26 relocation types. 259 if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 && 260 type != R_AARCH64_PLT32) 261 return false; 262 uint64_t dst = expr == R_PLT_PC ? s.getPltVA() : s.getVA(a); 263 return !inBranchRange(type, branchAddr, dst); 264 } 265 266 uint32_t AArch64::getThunkSectionSpacing() const { 267 // See comment in Arch/ARM.cpp for a more detailed explanation of 268 // getThunkSectionSpacing(). For AArch64 the only branches we are permitted to 269 // Thunk have a range of +/- 128 MiB 270 return (128 * 1024 * 1024) - 0x30000; 271 } 272 273 bool AArch64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { 274 if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 && 275 type != R_AARCH64_PLT32) 276 return true; 277 // The AArch64 call and unconditional branch instructions have a range of 278 // +/- 128 MiB. The PLT32 relocation supports a range up to +/- 2 GiB. 279 uint64_t range = 280 type == R_AARCH64_PLT32 ? (UINT64_C(1) << 31) : (128 * 1024 * 1024); 281 if (dst > src) { 282 // Immediate of branch is signed. 283 range -= 4; 284 return dst - src <= range; 285 } 286 return src - dst <= range; 287 } 288 289 static void write32AArch64Addr(uint8_t *l, uint64_t imm) { 290 uint32_t immLo = (imm & 0x3) << 29; 291 uint32_t immHi = (imm & 0x1FFFFC) << 3; 292 uint64_t mask = (0x3 << 29) | (0x1FFFFC << 3); 293 write32le(l, (read32le(l) & ~mask) | immLo | immHi); 294 } 295 296 // Return the bits [Start, End] from Val shifted Start bits. 297 // For instance, getBits(0xF0, 4, 8) returns 0xF. 298 static uint64_t getBits(uint64_t val, int start, int end) { 299 uint64_t mask = ((uint64_t)1 << (end + 1 - start)) - 1; 300 return (val >> start) & mask; 301 } 302 303 static void or32le(uint8_t *p, int32_t v) { write32le(p, read32le(p) | v); } 304 305 // Update the immediate field in a AARCH64 ldr, str, and add instruction. 306 static void or32AArch64Imm(uint8_t *l, uint64_t imm) { 307 or32le(l, (imm & 0xFFF) << 10); 308 } 309 310 // Update the immediate field in an AArch64 movk, movn or movz instruction 311 // for a signed relocation, and update the opcode of a movn or movz instruction 312 // to match the sign of the operand. 313 static void writeSMovWImm(uint8_t *loc, uint32_t imm) { 314 uint32_t inst = read32le(loc); 315 // Opcode field is bits 30, 29, with 10 = movz, 00 = movn and 11 = movk. 316 if (!(inst & (1 << 29))) { 317 // movn or movz. 318 if (imm & 0x10000) { 319 // Change opcode to movn, which takes an inverted operand. 320 imm ^= 0xFFFF; 321 inst &= ~(1 << 30); 322 } else { 323 // Change opcode to movz. 324 inst |= 1 << 30; 325 } 326 } 327 write32le(loc, inst | ((imm & 0xFFFF) << 5)); 328 } 329 330 void AArch64::relocate(uint8_t *loc, const Relocation &rel, 331 uint64_t val) const { 332 switch (rel.type) { 333 case R_AARCH64_ABS16: 334 case R_AARCH64_PREL16: 335 checkIntUInt(loc, val, 16, rel); 336 write16(loc, val); 337 break; 338 case R_AARCH64_ABS32: 339 case R_AARCH64_PREL32: 340 checkIntUInt(loc, val, 32, rel); 341 write32(loc, val); 342 break; 343 case R_AARCH64_PLT32: 344 checkInt(loc, val, 32, rel); 345 write32(loc, val); 346 break; 347 case R_AARCH64_ABS64: 348 case R_AARCH64_PREL64: 349 write64(loc, val); 350 break; 351 case R_AARCH64_ADD_ABS_LO12_NC: 352 or32AArch64Imm(loc, val); 353 break; 354 case R_AARCH64_ADR_GOT_PAGE: 355 case R_AARCH64_ADR_PREL_PG_HI21: 356 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: 357 case R_AARCH64_TLSDESC_ADR_PAGE21: 358 checkInt(loc, val, 33, rel); 359 LLVM_FALLTHROUGH; 360 case R_AARCH64_ADR_PREL_PG_HI21_NC: 361 write32AArch64Addr(loc, val >> 12); 362 break; 363 case R_AARCH64_ADR_PREL_LO21: 364 checkInt(loc, val, 21, rel); 365 write32AArch64Addr(loc, val); 366 break; 367 case R_AARCH64_JUMP26: 368 // Normally we would just write the bits of the immediate field, however 369 // when patching instructions for the cpu errata fix -fix-cortex-a53-843419 370 // we want to replace a non-branch instruction with a branch immediate 371 // instruction. By writing all the bits of the instruction including the 372 // opcode and the immediate (0 001 | 01 imm26) we can do this 373 // transformation by placing a R_AARCH64_JUMP26 relocation at the offset of 374 // the instruction we want to patch. 375 write32le(loc, 0x14000000); 376 LLVM_FALLTHROUGH; 377 case R_AARCH64_CALL26: 378 checkInt(loc, val, 28, rel); 379 or32le(loc, (val & 0x0FFFFFFC) >> 2); 380 break; 381 case R_AARCH64_CONDBR19: 382 case R_AARCH64_LD_PREL_LO19: 383 checkAlignment(loc, val, 4, rel); 384 checkInt(loc, val, 21, rel); 385 or32le(loc, (val & 0x1FFFFC) << 3); 386 break; 387 case R_AARCH64_LDST8_ABS_LO12_NC: 388 case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC: 389 or32AArch64Imm(loc, getBits(val, 0, 11)); 390 break; 391 case R_AARCH64_LDST16_ABS_LO12_NC: 392 case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC: 393 checkAlignment(loc, val, 2, rel); 394 or32AArch64Imm(loc, getBits(val, 1, 11)); 395 break; 396 case R_AARCH64_LDST32_ABS_LO12_NC: 397 case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC: 398 checkAlignment(loc, val, 4, rel); 399 or32AArch64Imm(loc, getBits(val, 2, 11)); 400 break; 401 case R_AARCH64_LDST64_ABS_LO12_NC: 402 case R_AARCH64_LD64_GOT_LO12_NC: 403 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: 404 case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC: 405 case R_AARCH64_TLSDESC_LD64_LO12: 406 checkAlignment(loc, val, 8, rel); 407 or32AArch64Imm(loc, getBits(val, 3, 11)); 408 break; 409 case R_AARCH64_LDST128_ABS_LO12_NC: 410 case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC: 411 checkAlignment(loc, val, 16, rel); 412 or32AArch64Imm(loc, getBits(val, 4, 11)); 413 break; 414 case R_AARCH64_LD64_GOTPAGE_LO15: 415 checkAlignment(loc, val, 8, rel); 416 or32AArch64Imm(loc, getBits(val, 3, 14)); 417 break; 418 case R_AARCH64_MOVW_UABS_G0: 419 checkUInt(loc, val, 16, rel); 420 LLVM_FALLTHROUGH; 421 case R_AARCH64_MOVW_UABS_G0_NC: 422 or32le(loc, (val & 0xFFFF) << 5); 423 break; 424 case R_AARCH64_MOVW_UABS_G1: 425 checkUInt(loc, val, 32, rel); 426 LLVM_FALLTHROUGH; 427 case R_AARCH64_MOVW_UABS_G1_NC: 428 or32le(loc, (val & 0xFFFF0000) >> 11); 429 break; 430 case R_AARCH64_MOVW_UABS_G2: 431 checkUInt(loc, val, 48, rel); 432 LLVM_FALLTHROUGH; 433 case R_AARCH64_MOVW_UABS_G2_NC: 434 or32le(loc, (val & 0xFFFF00000000) >> 27); 435 break; 436 case R_AARCH64_MOVW_UABS_G3: 437 or32le(loc, (val & 0xFFFF000000000000) >> 43); 438 break; 439 case R_AARCH64_MOVW_PREL_G0: 440 case R_AARCH64_MOVW_SABS_G0: 441 case R_AARCH64_TLSLE_MOVW_TPREL_G0: 442 checkInt(loc, val, 17, rel); 443 LLVM_FALLTHROUGH; 444 case R_AARCH64_MOVW_PREL_G0_NC: 445 case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: 446 writeSMovWImm(loc, val); 447 break; 448 case R_AARCH64_MOVW_PREL_G1: 449 case R_AARCH64_MOVW_SABS_G1: 450 case R_AARCH64_TLSLE_MOVW_TPREL_G1: 451 checkInt(loc, val, 33, rel); 452 LLVM_FALLTHROUGH; 453 case R_AARCH64_MOVW_PREL_G1_NC: 454 case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC: 455 writeSMovWImm(loc, val >> 16); 456 break; 457 case R_AARCH64_MOVW_PREL_G2: 458 case R_AARCH64_MOVW_SABS_G2: 459 case R_AARCH64_TLSLE_MOVW_TPREL_G2: 460 checkInt(loc, val, 49, rel); 461 LLVM_FALLTHROUGH; 462 case R_AARCH64_MOVW_PREL_G2_NC: 463 writeSMovWImm(loc, val >> 32); 464 break; 465 case R_AARCH64_MOVW_PREL_G3: 466 writeSMovWImm(loc, val >> 48); 467 break; 468 case R_AARCH64_TSTBR14: 469 checkInt(loc, val, 16, rel); 470 or32le(loc, (val & 0xFFFC) << 3); 471 break; 472 case R_AARCH64_TLSLE_ADD_TPREL_HI12: 473 checkUInt(loc, val, 24, rel); 474 or32AArch64Imm(loc, val >> 12); 475 break; 476 case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: 477 case R_AARCH64_TLSDESC_ADD_LO12: 478 or32AArch64Imm(loc, val); 479 break; 480 case R_AARCH64_TLSDESC: 481 // For R_AARCH64_TLSDESC the addend is stored in the second 64-bit word. 482 write64(loc + 8, val); 483 break; 484 default: 485 llvm_unreachable("unknown relocation"); 486 } 487 } 488 489 void AArch64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, 490 uint64_t val) const { 491 // TLSDESC Global-Dynamic relocation are in the form: 492 // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21] 493 // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12] 494 // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12] 495 // .tlsdesccall [R_AARCH64_TLSDESC_CALL] 496 // blr x1 497 // And it can optimized to: 498 // movz x0, #0x0, lsl #16 499 // movk x0, #0x10 500 // nop 501 // nop 502 checkUInt(loc, val, 32, rel); 503 504 switch (rel.type) { 505 case R_AARCH64_TLSDESC_ADD_LO12: 506 case R_AARCH64_TLSDESC_CALL: 507 write32le(loc, 0xd503201f); // nop 508 return; 509 case R_AARCH64_TLSDESC_ADR_PAGE21: 510 write32le(loc, 0xd2a00000 | (((val >> 16) & 0xffff) << 5)); // movz 511 return; 512 case R_AARCH64_TLSDESC_LD64_LO12: 513 write32le(loc, 0xf2800000 | ((val & 0xffff) << 5)); // movk 514 return; 515 default: 516 llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); 517 } 518 } 519 520 void AArch64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, 521 uint64_t val) const { 522 // TLSDESC Global-Dynamic relocation are in the form: 523 // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21] 524 // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12] 525 // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12] 526 // .tlsdesccall [R_AARCH64_TLSDESC_CALL] 527 // blr x1 528 // And it can optimized to: 529 // adrp x0, :gottprel:v 530 // ldr x0, [x0, :gottprel_lo12:v] 531 // nop 532 // nop 533 534 switch (rel.type) { 535 case R_AARCH64_TLSDESC_ADD_LO12: 536 case R_AARCH64_TLSDESC_CALL: 537 write32le(loc, 0xd503201f); // nop 538 break; 539 case R_AARCH64_TLSDESC_ADR_PAGE21: 540 write32le(loc, 0x90000000); // adrp 541 relocateNoSym(loc, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val); 542 break; 543 case R_AARCH64_TLSDESC_LD64_LO12: 544 write32le(loc, 0xf9400000); // ldr 545 relocateNoSym(loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val); 546 break; 547 default: 548 llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); 549 } 550 } 551 552 void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, 553 uint64_t val) const { 554 checkUInt(loc, val, 32, rel); 555 556 if (rel.type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) { 557 // Generate MOVZ. 558 uint32_t regNo = read32le(loc) & 0x1f; 559 write32le(loc, (0xd2a00000 | regNo) | (((val >> 16) & 0xffff) << 5)); 560 return; 561 } 562 if (rel.type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) { 563 // Generate MOVK. 564 uint32_t regNo = read32le(loc) & 0x1f; 565 write32le(loc, (0xf2800000 | regNo) | ((val & 0xffff) << 5)); 566 return; 567 } 568 llvm_unreachable("invalid relocation for TLS IE to LE relaxation"); 569 } 570 571 AArch64Relaxer::AArch64Relaxer(ArrayRef<Relocation> relocs) { 572 if (!config->relax || config->emachine != EM_AARCH64) { 573 safeToRelaxAdrpLdr = false; 574 return; 575 } 576 // Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC 577 // always appear in pairs. 578 size_t i = 0; 579 const size_t size = relocs.size(); 580 for (; i != size; ++i) { 581 if (relocs[i].type == R_AARCH64_ADR_GOT_PAGE) { 582 if (i + 1 < size && relocs[i + 1].type == R_AARCH64_LD64_GOT_LO12_NC) { 583 ++i; 584 continue; 585 } 586 break; 587 } else if (relocs[i].type == R_AARCH64_LD64_GOT_LO12_NC) { 588 break; 589 } 590 } 591 safeToRelaxAdrpLdr = i == size; 592 } 593 594 bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel, 595 const Relocation &ldrRel, uint64_t secAddr, 596 uint8_t *buf) const { 597 if (!safeToRelaxAdrpLdr) 598 return false; 599 600 // When the definition of sym is not preemptible then we may 601 // be able to relax 602 // ADRP xn, :got: sym 603 // LDR xn, [ xn :got_lo12: sym] 604 // to 605 // ADRP xn, sym 606 // ADD xn, xn, :lo_12: sym 607 608 if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE || 609 ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC) 610 return false; 611 // Check if the relocations apply to consecutive instructions. 612 if (adrpRel.offset + 4 != ldrRel.offset) 613 return false; 614 // Check if the relocations reference the same symbol and 615 // skip undefined, preemptible and STT_GNU_IFUNC symbols. 616 if (!adrpRel.sym || adrpRel.sym != ldrRel.sym || !adrpRel.sym->isDefined() || 617 adrpRel.sym->isPreemptible || adrpRel.sym->isGnuIFunc()) 618 return false; 619 // Check if the addends of the both relocations are zero. 620 if (adrpRel.addend != 0 || ldrRel.addend != 0) 621 return false; 622 uint32_t adrpInstr = read32le(buf + adrpRel.offset); 623 uint32_t ldrInstr = read32le(buf + ldrRel.offset); 624 // Check if the first instruction is ADRP and the second instruction is LDR. 625 if ((adrpInstr & 0x9f000000) != 0x90000000 || 626 (ldrInstr & 0x3b000000) != 0x39000000) 627 return false; 628 // Check the value of the sf bit. 629 if (!(ldrInstr >> 31)) 630 return false; 631 uint32_t adrpDestReg = adrpInstr & 0x1f; 632 uint32_t ldrDestReg = ldrInstr & 0x1f; 633 uint32_t ldrSrcReg = (ldrInstr >> 5) & 0x1f; 634 // Check if ADPR and LDR use the same register. 635 if (adrpDestReg != ldrDestReg || adrpDestReg != ldrSrcReg) 636 return false; 637 638 Symbol &sym = *adrpRel.sym; 639 // Check if the address difference is within 4GB range. 640 int64_t val = 641 getAArch64Page(sym.getVA()) - getAArch64Page(secAddr + adrpRel.offset); 642 if (val != llvm::SignExtend64(val, 33)) 643 return false; 644 645 Relocation adrpSymRel = {R_AARCH64_PAGE_PC, R_AARCH64_ADR_PREL_PG_HI21, 646 adrpRel.offset, /*addend=*/0, &sym}; 647 Relocation addRel = {R_ABS, R_AARCH64_ADD_ABS_LO12_NC, ldrRel.offset, 648 /*addend=*/0, &sym}; 649 650 // adrp x_<dest_reg> 651 write32le(buf + adrpSymRel.offset, 0x90000000 | adrpDestReg); 652 // add x_<dest reg>, x_<dest reg> 653 write32le(buf + addRel.offset, 0x91000000 | adrpDestReg | (adrpDestReg << 5)); 654 655 target->relocate(buf + adrpSymRel.offset, adrpSymRel, 656 SignExtend64(getAArch64Page(sym.getVA()) - 657 getAArch64Page(secAddr + adrpSymRel.offset), 658 64)); 659 target->relocate(buf + addRel.offset, addRel, SignExtend64(sym.getVA(), 64)); 660 return true; 661 } 662 663 // AArch64 may use security features in variant PLT sequences. These are: 664 // Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target 665 // Indicator (BTI) introduced in armv8.5-a. The additional instructions used 666 // in the variant Plt sequences are encoded in the Hint space so they can be 667 // deployed on older architectures, which treat the instructions as a nop. 668 // PAC and BTI can be combined leading to the following combinations: 669 // writePltHeader 670 // writePltHeaderBti (no PAC Header needed) 671 // writePlt 672 // writePltBti (BTI only) 673 // writePltPac (PAC only) 674 // writePltBtiPac (BTI and PAC) 675 // 676 // When PAC is enabled the dynamic loader encrypts the address that it places 677 // in the .got.plt using the pacia1716 instruction which encrypts the value in 678 // x17 using the modifier in x16. The static linker places autia1716 before the 679 // indirect branch to x17 to authenticate the address in x17 with the modifier 680 // in x16. This makes it more difficult for an attacker to modify the value in 681 // the .got.plt. 682 // 683 // When BTI is enabled all indirect branches must land on a bti instruction. 684 // The static linker must place a bti instruction at the start of any PLT entry 685 // that may be the target of an indirect branch. As the PLT entries call the 686 // lazy resolver indirectly this must have a bti instruction at start. In 687 // general a bti instruction is not needed for a PLT entry as indirect calls 688 // are resolved to the function address and not the PLT entry for the function. 689 // There are a small number of cases where the PLT address can escape, such as 690 // taking the address of a function or ifunc via a non got-generating 691 // relocation, and a shared library refers to that symbol. 692 // 693 // We use the bti c variant of the instruction which permits indirect branches 694 // (br) via x16/x17 and indirect function calls (blr) via any register. The ABI 695 // guarantees that all indirect branches from code requiring BTI protection 696 // will go via x16/x17 697 698 namespace { 699 class AArch64BtiPac final : public AArch64 { 700 public: 701 AArch64BtiPac(); 702 void writePltHeader(uint8_t *buf) const override; 703 void writePlt(uint8_t *buf, const Symbol &sym, 704 uint64_t pltEntryAddr) const override; 705 706 private: 707 bool btiHeader; // bti instruction needed in PLT Header and Entry 708 bool pacEntry; // autia1716 instruction needed in PLT Entry 709 }; 710 } // namespace 711 712 AArch64BtiPac::AArch64BtiPac() { 713 btiHeader = (config->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI); 714 // A BTI (Branch Target Indicator) Plt Entry is only required if the 715 // address of the PLT entry can be taken by the program, which permits an 716 // indirect jump to the PLT entry. This can happen when the address 717 // of the PLT entry for a function is canonicalised due to the address of 718 // the function in an executable being taken by a shared library, or 719 // non-preemptible ifunc referenced by non-GOT-generating, non-PLT-generating 720 // relocations. 721 // The PAC PLT entries require dynamic loader support and this isn't known 722 // from properties in the objects, so we use the command line flag. 723 pacEntry = config->zPacPlt; 724 725 if (btiHeader || pacEntry) { 726 pltEntrySize = 24; 727 ipltEntrySize = 24; 728 } 729 } 730 731 void AArch64BtiPac::writePltHeader(uint8_t *buf) const { 732 const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c 733 const uint8_t pltData[] = { 734 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]! 735 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[2])) 736 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[2]))] 737 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.plt.got[2])) 738 0x20, 0x02, 0x1f, 0xd6, // br x17 739 0x1f, 0x20, 0x03, 0xd5, // nop 740 0x1f, 0x20, 0x03, 0xd5 // nop 741 }; 742 const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop 743 744 uint64_t got = in.gotPlt->getVA(); 745 uint64_t plt = in.plt->getVA(); 746 747 if (btiHeader) { 748 // PltHeader is called indirectly by plt[N]. Prefix pltData with a BTI C 749 // instruction. 750 memcpy(buf, btiData, sizeof(btiData)); 751 buf += sizeof(btiData); 752 plt += sizeof(btiData); 753 } 754 memcpy(buf, pltData, sizeof(pltData)); 755 756 relocateNoSym(buf + 4, R_AARCH64_ADR_PREL_PG_HI21, 757 getAArch64Page(got + 16) - getAArch64Page(plt + 8)); 758 relocateNoSym(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16); 759 relocateNoSym(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16); 760 if (!btiHeader) 761 // We didn't add the BTI c instruction so round out size with NOP. 762 memcpy(buf + sizeof(pltData), nopData, sizeof(nopData)); 763 } 764 765 void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym, 766 uint64_t pltEntryAddr) const { 767 // The PLT entry is of the form: 768 // [btiData] addrInst (pacBr | stdBr) [nopData] 769 const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c 770 const uint8_t addrInst[] = { 771 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[n])) 772 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[n]))] 773 0x10, 0x02, 0x00, 0x91 // add x16, x16, Offset(&(.plt.got[n])) 774 }; 775 const uint8_t pacBr[] = { 776 0x9f, 0x21, 0x03, 0xd5, // autia1716 777 0x20, 0x02, 0x1f, 0xd6 // br x17 778 }; 779 const uint8_t stdBr[] = { 780 0x20, 0x02, 0x1f, 0xd6, // br x17 781 0x1f, 0x20, 0x03, 0xd5 // nop 782 }; 783 const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop 784 785 // needsCopy indicates a non-ifunc canonical PLT entry whose address may 786 // escape to shared objects. isInIplt indicates a non-preemptible ifunc. Its 787 // address may escape if referenced by a direct relocation. The condition is 788 // conservative. 789 bool hasBti = btiHeader && (sym.needsCopy || sym.isInIplt); 790 if (hasBti) { 791 memcpy(buf, btiData, sizeof(btiData)); 792 buf += sizeof(btiData); 793 pltEntryAddr += sizeof(btiData); 794 } 795 796 uint64_t gotPltEntryAddr = sym.getGotPltVA(); 797 memcpy(buf, addrInst, sizeof(addrInst)); 798 relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21, 799 getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr)); 800 relocateNoSym(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr); 801 relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr); 802 803 if (pacEntry) 804 memcpy(buf + sizeof(addrInst), pacBr, sizeof(pacBr)); 805 else 806 memcpy(buf + sizeof(addrInst), stdBr, sizeof(stdBr)); 807 if (!hasBti) 808 // We didn't add the BTI c instruction so round out size with NOP. 809 memcpy(buf + sizeof(addrInst) + sizeof(stdBr), nopData, sizeof(nopData)); 810 } 811 812 static TargetInfo *getTargetInfo() { 813 if (config->andFeatures & (GNU_PROPERTY_AARCH64_FEATURE_1_BTI | 814 GNU_PROPERTY_AARCH64_FEATURE_1_PAC)) { 815 static AArch64BtiPac t; 816 return &t; 817 } 818 static AArch64 t; 819 return &t; 820 } 821 822 TargetInfo *elf::getAArch64TargetInfo() { return getTargetInfo(); } 823