1 //===- AArch64.cpp --------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "Symbols.h"
10 #include "SyntheticSections.h"
11 #include "Target.h"
12 #include "lld/Common/ErrorHandler.h"
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/Support/Endian.h"
15
16 using namespace llvm;
17 using namespace llvm::support::endian;
18 using namespace llvm::ELF;
19 using namespace lld;
20 using namespace lld::elf;
21
22 // Page(Expr) is the page address of the expression Expr, defined
23 // as (Expr & ~0xFFF). (This applies even if the machine page size
24 // supported by the platform has a different value.)
getAArch64Page(uint64_t expr)25 uint64_t elf::getAArch64Page(uint64_t expr) {
26 return expr & ~static_cast<uint64_t>(0xFFF);
27 }
28
29 namespace {
30 class AArch64 : public TargetInfo {
31 public:
32 AArch64();
33 RelExpr getRelExpr(RelType type, const Symbol &s,
34 const uint8_t *loc) const override;
35 RelType getDynRel(RelType type) const override;
36 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
37 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
38 void writePltHeader(uint8_t *buf) const override;
39 void writePlt(uint8_t *buf, const Symbol &sym,
40 uint64_t pltEntryAddr) const override;
41 bool needsThunk(RelExpr expr, RelType type, const InputFile *file,
42 uint64_t branchAddr, const Symbol &s,
43 int64_t a) const override;
44 uint32_t getThunkSectionSpacing() const override;
45 bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
46 bool usesOnlyLowPageBits(RelType type) const override;
47 void relocate(uint8_t *loc, const Relocation &rel,
48 uint64_t val) const override;
49 RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
50 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
51 uint64_t val) const override;
52 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
53 uint64_t val) const override;
54 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
55 uint64_t val) const override;
56 };
57 } // namespace
58
AArch64()59 AArch64::AArch64() {
60 copyRel = R_AARCH64_COPY;
61 relativeRel = R_AARCH64_RELATIVE;
62 iRelativeRel = R_AARCH64_IRELATIVE;
63 gotRel = R_AARCH64_GLOB_DAT;
64 pltRel = R_AARCH64_JUMP_SLOT;
65 symbolicRel = R_AARCH64_ABS64;
66 tlsDescRel = R_AARCH64_TLSDESC;
67 tlsGotRel = R_AARCH64_TLS_TPREL64;
68 pltHeaderSize = 32;
69 pltEntrySize = 16;
70 ipltEntrySize = 16;
71 defaultMaxPageSize = 65536;
72
73 // Align to the 2 MiB page size (known as a superpage or huge page).
74 // FreeBSD automatically promotes 2 MiB-aligned allocations.
75 defaultImageBase = 0x200000;
76
77 needsThunks = true;
78 }
79
getRelExpr(RelType type,const Symbol & s,const uint8_t * loc) const80 RelExpr AArch64::getRelExpr(RelType type, const Symbol &s,
81 const uint8_t *loc) const {
82 switch (type) {
83 case R_AARCH64_ABS16:
84 case R_AARCH64_ABS32:
85 case R_AARCH64_ABS64:
86 case R_AARCH64_ADD_ABS_LO12_NC:
87 case R_AARCH64_LDST128_ABS_LO12_NC:
88 case R_AARCH64_LDST16_ABS_LO12_NC:
89 case R_AARCH64_LDST32_ABS_LO12_NC:
90 case R_AARCH64_LDST64_ABS_LO12_NC:
91 case R_AARCH64_LDST8_ABS_LO12_NC:
92 case R_AARCH64_MOVW_SABS_G0:
93 case R_AARCH64_MOVW_SABS_G1:
94 case R_AARCH64_MOVW_SABS_G2:
95 case R_AARCH64_MOVW_UABS_G0:
96 case R_AARCH64_MOVW_UABS_G0_NC:
97 case R_AARCH64_MOVW_UABS_G1:
98 case R_AARCH64_MOVW_UABS_G1_NC:
99 case R_AARCH64_MOVW_UABS_G2:
100 case R_AARCH64_MOVW_UABS_G2_NC:
101 case R_AARCH64_MOVW_UABS_G3:
102 return R_ABS;
103 case R_AARCH64_TLSDESC_ADR_PAGE21:
104 return R_AARCH64_TLSDESC_PAGE;
105 case R_AARCH64_TLSDESC_LD64_LO12:
106 case R_AARCH64_TLSDESC_ADD_LO12:
107 return R_TLSDESC;
108 case R_AARCH64_TLSDESC_CALL:
109 return R_TLSDESC_CALL;
110 case R_AARCH64_TLSLE_ADD_TPREL_HI12:
111 case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
112 case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
113 case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
114 case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
115 case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
116 case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
117 case R_AARCH64_TLSLE_MOVW_TPREL_G0:
118 case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
119 case R_AARCH64_TLSLE_MOVW_TPREL_G1:
120 case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
121 case R_AARCH64_TLSLE_MOVW_TPREL_G2:
122 return R_TPREL;
123 case R_AARCH64_CALL26:
124 case R_AARCH64_CONDBR19:
125 case R_AARCH64_JUMP26:
126 case R_AARCH64_TSTBR14:
127 case R_AARCH64_PLT32:
128 return R_PLT_PC;
129 case R_AARCH64_PREL16:
130 case R_AARCH64_PREL32:
131 case R_AARCH64_PREL64:
132 case R_AARCH64_ADR_PREL_LO21:
133 case R_AARCH64_LD_PREL_LO19:
134 case R_AARCH64_MOVW_PREL_G0:
135 case R_AARCH64_MOVW_PREL_G0_NC:
136 case R_AARCH64_MOVW_PREL_G1:
137 case R_AARCH64_MOVW_PREL_G1_NC:
138 case R_AARCH64_MOVW_PREL_G2:
139 case R_AARCH64_MOVW_PREL_G2_NC:
140 case R_AARCH64_MOVW_PREL_G3:
141 return R_PC;
142 case R_AARCH64_ADR_PREL_PG_HI21:
143 case R_AARCH64_ADR_PREL_PG_HI21_NC:
144 return R_AARCH64_PAGE_PC;
145 case R_AARCH64_LD64_GOT_LO12_NC:
146 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
147 return R_GOT;
148 case R_AARCH64_LD64_GOTPAGE_LO15:
149 return R_AARCH64_GOT_PAGE;
150 case R_AARCH64_ADR_GOT_PAGE:
151 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
152 return R_AARCH64_GOT_PAGE_PC;
153 case R_AARCH64_NONE:
154 return R_NONE;
155 default:
156 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
157 ") against symbol " + toString(s));
158 return R_NONE;
159 }
160 }
161
adjustTlsExpr(RelType type,RelExpr expr) const162 RelExpr AArch64::adjustTlsExpr(RelType type, RelExpr expr) const {
163 if (expr == R_RELAX_TLS_GD_TO_IE) {
164 if (type == R_AARCH64_TLSDESC_ADR_PAGE21)
165 return R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC;
166 return R_RELAX_TLS_GD_TO_IE_ABS;
167 }
168 return expr;
169 }
170
usesOnlyLowPageBits(RelType type) const171 bool AArch64::usesOnlyLowPageBits(RelType type) const {
172 switch (type) {
173 default:
174 return false;
175 case R_AARCH64_ADD_ABS_LO12_NC:
176 case R_AARCH64_LD64_GOT_LO12_NC:
177 case R_AARCH64_LDST128_ABS_LO12_NC:
178 case R_AARCH64_LDST16_ABS_LO12_NC:
179 case R_AARCH64_LDST32_ABS_LO12_NC:
180 case R_AARCH64_LDST64_ABS_LO12_NC:
181 case R_AARCH64_LDST8_ABS_LO12_NC:
182 case R_AARCH64_TLSDESC_ADD_LO12:
183 case R_AARCH64_TLSDESC_LD64_LO12:
184 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
185 return true;
186 }
187 }
188
getDynRel(RelType type) const189 RelType AArch64::getDynRel(RelType type) const {
190 if (type == R_AARCH64_ABS64)
191 return type;
192 return R_AARCH64_NONE;
193 }
194
getImplicitAddend(const uint8_t * buf,RelType type) const195 int64_t AArch64::getImplicitAddend(const uint8_t *buf, RelType type) const {
196 switch (type) {
197 case R_AARCH64_TLSDESC:
198 return read64(buf + 8);
199 case R_AARCH64_NONE:
200 return 0;
201 case R_AARCH64_PREL32:
202 return SignExtend64<32>(read32(buf));
203 case R_AARCH64_ABS64:
204 case R_AARCH64_PREL64:
205 return read64(buf);
206 default:
207 internalLinkerError(getErrorLocation(buf),
208 "cannot read addend for relocation " + toString(type));
209 return 0;
210 }
211 }
212
writeGotPlt(uint8_t * buf,const Symbol &) const213 void AArch64::writeGotPlt(uint8_t *buf, const Symbol &) const {
214 write64(buf, in.plt->getVA());
215 }
216
writePltHeader(uint8_t * buf) const217 void AArch64::writePltHeader(uint8_t *buf) const {
218 const uint8_t pltData[] = {
219 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]!
220 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[2]))
221 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[2]))]
222 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.plt.got[2]))
223 0x20, 0x02, 0x1f, 0xd6, // br x17
224 0x1f, 0x20, 0x03, 0xd5, // nop
225 0x1f, 0x20, 0x03, 0xd5, // nop
226 0x1f, 0x20, 0x03, 0xd5 // nop
227 };
228 memcpy(buf, pltData, sizeof(pltData));
229
230 uint64_t got = in.gotPlt->getVA();
231 uint64_t plt = in.plt->getVA();
232 relocateNoSym(buf + 4, R_AARCH64_ADR_PREL_PG_HI21,
233 getAArch64Page(got + 16) - getAArch64Page(plt + 4));
234 relocateNoSym(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16);
235 relocateNoSym(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16);
236 }
237
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const238 void AArch64::writePlt(uint8_t *buf, const Symbol &sym,
239 uint64_t pltEntryAddr) const {
240 const uint8_t inst[] = {
241 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[n]))
242 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[n]))]
243 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.plt.got[n]))
244 0x20, 0x02, 0x1f, 0xd6 // br x17
245 };
246 memcpy(buf, inst, sizeof(inst));
247
248 uint64_t gotPltEntryAddr = sym.getGotPltVA();
249 relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21,
250 getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr));
251 relocateNoSym(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr);
252 relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr);
253 }
254
needsThunk(RelExpr expr,RelType type,const InputFile * file,uint64_t branchAddr,const Symbol & s,int64_t a) const255 bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
256 uint64_t branchAddr, const Symbol &s,
257 int64_t a) const {
258 // If s is an undefined weak symbol and does not have a PLT entry then it will
259 // be resolved as a branch to the next instruction. If it is hidden, its
260 // binding has been converted to local, so we just check isUndefined() here. A
261 // undefined non-weak symbol will have been errored.
262 if (s.isUndefined() && !s.isInPlt())
263 return false;
264 // ELF for the ARM 64-bit architecture, section Call and Jump relocations
265 // only permits range extension thunks for R_AARCH64_CALL26 and
266 // R_AARCH64_JUMP26 relocation types.
267 if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
268 type != R_AARCH64_PLT32)
269 return false;
270 uint64_t dst = expr == R_PLT_PC ? s.getPltVA() : s.getVA(a);
271 return !inBranchRange(type, branchAddr, dst);
272 }
273
getThunkSectionSpacing() const274 uint32_t AArch64::getThunkSectionSpacing() const {
275 // See comment in Arch/ARM.cpp for a more detailed explanation of
276 // getThunkSectionSpacing(). For AArch64 the only branches we are permitted to
277 // Thunk have a range of +/- 128 MiB
278 return (128 * 1024 * 1024) - 0x30000;
279 }
280
inBranchRange(RelType type,uint64_t src,uint64_t dst) const281 bool AArch64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
282 if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
283 type != R_AARCH64_PLT32)
284 return true;
285 // The AArch64 call and unconditional branch instructions have a range of
286 // +/- 128 MiB. The PLT32 relocation supports a range up to +/- 2 GiB.
287 uint64_t range =
288 type == R_AARCH64_PLT32 ? (UINT64_C(1) << 31) : (128 * 1024 * 1024);
289 if (dst > src) {
290 // Immediate of branch is signed.
291 range -= 4;
292 return dst - src <= range;
293 }
294 return src - dst <= range;
295 }
296
write32AArch64Addr(uint8_t * l,uint64_t imm)297 static void write32AArch64Addr(uint8_t *l, uint64_t imm) {
298 uint32_t immLo = (imm & 0x3) << 29;
299 uint32_t immHi = (imm & 0x1FFFFC) << 3;
300 uint64_t mask = (0x3 << 29) | (0x1FFFFC << 3);
301 write32le(l, (read32le(l) & ~mask) | immLo | immHi);
302 }
303
304 // Return the bits [Start, End] from Val shifted Start bits.
305 // For instance, getBits(0xF0, 4, 8) returns 0xF.
getBits(uint64_t val,int start,int end)306 static uint64_t getBits(uint64_t val, int start, int end) {
307 uint64_t mask = ((uint64_t)1 << (end + 1 - start)) - 1;
308 return (val >> start) & mask;
309 }
310
or32le(uint8_t * p,int32_t v)311 static void or32le(uint8_t *p, int32_t v) { write32le(p, read32le(p) | v); }
312
313 // Update the immediate field in a AARCH64 ldr, str, and add instruction.
or32AArch64Imm(uint8_t * l,uint64_t imm)314 static void or32AArch64Imm(uint8_t *l, uint64_t imm) {
315 or32le(l, (imm & 0xFFF) << 10);
316 }
317
318 // Update the immediate field in an AArch64 movk, movn or movz instruction
319 // for a signed relocation, and update the opcode of a movn or movz instruction
320 // to match the sign of the operand.
writeSMovWImm(uint8_t * loc,uint32_t imm)321 static void writeSMovWImm(uint8_t *loc, uint32_t imm) {
322 uint32_t inst = read32le(loc);
323 // Opcode field is bits 30, 29, with 10 = movz, 00 = movn and 11 = movk.
324 if (!(inst & (1 << 29))) {
325 // movn or movz.
326 if (imm & 0x10000) {
327 // Change opcode to movn, which takes an inverted operand.
328 imm ^= 0xFFFF;
329 inst &= ~(1 << 30);
330 } else {
331 // Change opcode to movz.
332 inst |= 1 << 30;
333 }
334 }
335 write32le(loc, inst | ((imm & 0xFFFF) << 5));
336 }
337
relocate(uint8_t * loc,const Relocation & rel,uint64_t val) const338 void AArch64::relocate(uint8_t *loc, const Relocation &rel,
339 uint64_t val) const {
340 switch (rel.type) {
341 case R_AARCH64_ABS16:
342 case R_AARCH64_PREL16:
343 checkIntUInt(loc, val, 16, rel);
344 write16(loc, val);
345 break;
346 case R_AARCH64_ABS32:
347 case R_AARCH64_PREL32:
348 checkIntUInt(loc, val, 32, rel);
349 write32(loc, val);
350 break;
351 case R_AARCH64_PLT32:
352 checkInt(loc, val, 32, rel);
353 write32(loc, val);
354 break;
355 case R_AARCH64_ABS64:
356 case R_AARCH64_PREL64:
357 write64(loc, val);
358 break;
359 case R_AARCH64_ADD_ABS_LO12_NC:
360 or32AArch64Imm(loc, val);
361 break;
362 case R_AARCH64_ADR_GOT_PAGE:
363 case R_AARCH64_ADR_PREL_PG_HI21:
364 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
365 case R_AARCH64_TLSDESC_ADR_PAGE21:
366 checkInt(loc, val, 33, rel);
367 LLVM_FALLTHROUGH;
368 case R_AARCH64_ADR_PREL_PG_HI21_NC:
369 write32AArch64Addr(loc, val >> 12);
370 break;
371 case R_AARCH64_ADR_PREL_LO21:
372 checkInt(loc, val, 21, rel);
373 write32AArch64Addr(loc, val);
374 break;
375 case R_AARCH64_JUMP26:
376 // Normally we would just write the bits of the immediate field, however
377 // when patching instructions for the cpu errata fix -fix-cortex-a53-843419
378 // we want to replace a non-branch instruction with a branch immediate
379 // instruction. By writing all the bits of the instruction including the
380 // opcode and the immediate (0 001 | 01 imm26) we can do this
381 // transformation by placing a R_AARCH64_JUMP26 relocation at the offset of
382 // the instruction we want to patch.
383 write32le(loc, 0x14000000);
384 LLVM_FALLTHROUGH;
385 case R_AARCH64_CALL26:
386 checkInt(loc, val, 28, rel);
387 or32le(loc, (val & 0x0FFFFFFC) >> 2);
388 break;
389 case R_AARCH64_CONDBR19:
390 case R_AARCH64_LD_PREL_LO19:
391 checkAlignment(loc, val, 4, rel);
392 checkInt(loc, val, 21, rel);
393 or32le(loc, (val & 0x1FFFFC) << 3);
394 break;
395 case R_AARCH64_LDST8_ABS_LO12_NC:
396 case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
397 or32AArch64Imm(loc, getBits(val, 0, 11));
398 break;
399 case R_AARCH64_LDST16_ABS_LO12_NC:
400 case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
401 checkAlignment(loc, val, 2, rel);
402 or32AArch64Imm(loc, getBits(val, 1, 11));
403 break;
404 case R_AARCH64_LDST32_ABS_LO12_NC:
405 case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
406 checkAlignment(loc, val, 4, rel);
407 or32AArch64Imm(loc, getBits(val, 2, 11));
408 break;
409 case R_AARCH64_LDST64_ABS_LO12_NC:
410 case R_AARCH64_LD64_GOT_LO12_NC:
411 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
412 case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
413 case R_AARCH64_TLSDESC_LD64_LO12:
414 checkAlignment(loc, val, 8, rel);
415 or32AArch64Imm(loc, getBits(val, 3, 11));
416 break;
417 case R_AARCH64_LDST128_ABS_LO12_NC:
418 case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
419 checkAlignment(loc, val, 16, rel);
420 or32AArch64Imm(loc, getBits(val, 4, 11));
421 break;
422 case R_AARCH64_LD64_GOTPAGE_LO15:
423 checkAlignment(loc, val, 8, rel);
424 or32AArch64Imm(loc, getBits(val, 3, 14));
425 break;
426 case R_AARCH64_MOVW_UABS_G0:
427 checkUInt(loc, val, 16, rel);
428 LLVM_FALLTHROUGH;
429 case R_AARCH64_MOVW_UABS_G0_NC:
430 or32le(loc, (val & 0xFFFF) << 5);
431 break;
432 case R_AARCH64_MOVW_UABS_G1:
433 checkUInt(loc, val, 32, rel);
434 LLVM_FALLTHROUGH;
435 case R_AARCH64_MOVW_UABS_G1_NC:
436 or32le(loc, (val & 0xFFFF0000) >> 11);
437 break;
438 case R_AARCH64_MOVW_UABS_G2:
439 checkUInt(loc, val, 48, rel);
440 LLVM_FALLTHROUGH;
441 case R_AARCH64_MOVW_UABS_G2_NC:
442 or32le(loc, (val & 0xFFFF00000000) >> 27);
443 break;
444 case R_AARCH64_MOVW_UABS_G3:
445 or32le(loc, (val & 0xFFFF000000000000) >> 43);
446 break;
447 case R_AARCH64_MOVW_PREL_G0:
448 case R_AARCH64_MOVW_SABS_G0:
449 case R_AARCH64_TLSLE_MOVW_TPREL_G0:
450 checkInt(loc, val, 17, rel);
451 LLVM_FALLTHROUGH;
452 case R_AARCH64_MOVW_PREL_G0_NC:
453 case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
454 writeSMovWImm(loc, val);
455 break;
456 case R_AARCH64_MOVW_PREL_G1:
457 case R_AARCH64_MOVW_SABS_G1:
458 case R_AARCH64_TLSLE_MOVW_TPREL_G1:
459 checkInt(loc, val, 33, rel);
460 LLVM_FALLTHROUGH;
461 case R_AARCH64_MOVW_PREL_G1_NC:
462 case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
463 writeSMovWImm(loc, val >> 16);
464 break;
465 case R_AARCH64_MOVW_PREL_G2:
466 case R_AARCH64_MOVW_SABS_G2:
467 case R_AARCH64_TLSLE_MOVW_TPREL_G2:
468 checkInt(loc, val, 49, rel);
469 LLVM_FALLTHROUGH;
470 case R_AARCH64_MOVW_PREL_G2_NC:
471 writeSMovWImm(loc, val >> 32);
472 break;
473 case R_AARCH64_MOVW_PREL_G3:
474 writeSMovWImm(loc, val >> 48);
475 break;
476 case R_AARCH64_TSTBR14:
477 checkInt(loc, val, 16, rel);
478 or32le(loc, (val & 0xFFFC) << 3);
479 break;
480 case R_AARCH64_TLSLE_ADD_TPREL_HI12:
481 checkUInt(loc, val, 24, rel);
482 or32AArch64Imm(loc, val >> 12);
483 break;
484 case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
485 case R_AARCH64_TLSDESC_ADD_LO12:
486 or32AArch64Imm(loc, val);
487 break;
488 case R_AARCH64_TLSDESC:
489 // For R_AARCH64_TLSDESC the addend is stored in the second 64-bit word.
490 write64(loc + 8, val);
491 break;
492 default:
493 llvm_unreachable("unknown relocation");
494 }
495 }
496
relaxTlsGdToLe(uint8_t * loc,const Relocation & rel,uint64_t val) const497 void AArch64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
498 uint64_t val) const {
499 // TLSDESC Global-Dynamic relocation are in the form:
500 // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
501 // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
502 // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
503 // .tlsdesccall [R_AARCH64_TLSDESC_CALL]
504 // blr x1
505 // And it can optimized to:
506 // movz x0, #0x0, lsl #16
507 // movk x0, #0x10
508 // nop
509 // nop
510 checkUInt(loc, val, 32, rel);
511
512 switch (rel.type) {
513 case R_AARCH64_TLSDESC_ADD_LO12:
514 case R_AARCH64_TLSDESC_CALL:
515 write32le(loc, 0xd503201f); // nop
516 return;
517 case R_AARCH64_TLSDESC_ADR_PAGE21:
518 write32le(loc, 0xd2a00000 | (((val >> 16) & 0xffff) << 5)); // movz
519 return;
520 case R_AARCH64_TLSDESC_LD64_LO12:
521 write32le(loc, 0xf2800000 | ((val & 0xffff) << 5)); // movk
522 return;
523 default:
524 llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
525 }
526 }
527
relaxTlsGdToIe(uint8_t * loc,const Relocation & rel,uint64_t val) const528 void AArch64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
529 uint64_t val) const {
530 // TLSDESC Global-Dynamic relocation are in the form:
531 // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
532 // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
533 // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
534 // .tlsdesccall [R_AARCH64_TLSDESC_CALL]
535 // blr x1
536 // And it can optimized to:
537 // adrp x0, :gottprel:v
538 // ldr x0, [x0, :gottprel_lo12:v]
539 // nop
540 // nop
541
542 switch (rel.type) {
543 case R_AARCH64_TLSDESC_ADD_LO12:
544 case R_AARCH64_TLSDESC_CALL:
545 write32le(loc, 0xd503201f); // nop
546 break;
547 case R_AARCH64_TLSDESC_ADR_PAGE21:
548 write32le(loc, 0x90000000); // adrp
549 relocateNoSym(loc, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val);
550 break;
551 case R_AARCH64_TLSDESC_LD64_LO12:
552 write32le(loc, 0xf9400000); // ldr
553 relocateNoSym(loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val);
554 break;
555 default:
556 llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
557 }
558 }
559
relaxTlsIeToLe(uint8_t * loc,const Relocation & rel,uint64_t val) const560 void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
561 uint64_t val) const {
562 checkUInt(loc, val, 32, rel);
563
564 if (rel.type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) {
565 // Generate MOVZ.
566 uint32_t regNo = read32le(loc) & 0x1f;
567 write32le(loc, (0xd2a00000 | regNo) | (((val >> 16) & 0xffff) << 5));
568 return;
569 }
570 if (rel.type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) {
571 // Generate MOVK.
572 uint32_t regNo = read32le(loc) & 0x1f;
573 write32le(loc, (0xf2800000 | regNo) | ((val & 0xffff) << 5));
574 return;
575 }
576 llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
577 }
578
AArch64Relaxer(ArrayRef<Relocation> relocs)579 AArch64Relaxer::AArch64Relaxer(ArrayRef<Relocation> relocs) {
580 if (!config->relax || config->emachine != EM_AARCH64) {
581 safeToRelaxAdrpLdr = false;
582 return;
583 }
584 // Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC
585 // always appear in pairs.
586 size_t i = 0;
587 const size_t size = relocs.size();
588 for (; i != size; ++i) {
589 if (relocs[i].type == R_AARCH64_ADR_GOT_PAGE) {
590 if (i + 1 < size && relocs[i + 1].type == R_AARCH64_LD64_GOT_LO12_NC) {
591 ++i;
592 continue;
593 }
594 break;
595 } else if (relocs[i].type == R_AARCH64_LD64_GOT_LO12_NC) {
596 break;
597 }
598 }
599 safeToRelaxAdrpLdr = i == size;
600 }
601
tryRelaxAdrpAdd(const Relocation & adrpRel,const Relocation & addRel,uint64_t secAddr,uint8_t * buf) const602 bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation &adrpRel,
603 const Relocation &addRel, uint64_t secAddr,
604 uint8_t *buf) const {
605 // When the address of sym is within the range of ADR then
606 // we may relax
607 // ADRP xn, sym
608 // ADD xn, xn, :lo12: sym
609 // to
610 // NOP
611 // ADR xn, sym
612 if (!config->relax || adrpRel.type != R_AARCH64_ADR_PREL_PG_HI21 ||
613 addRel.type != R_AARCH64_ADD_ABS_LO12_NC)
614 return false;
615 // Check if the relocations apply to consecutive instructions.
616 if (adrpRel.offset + 4 != addRel.offset)
617 return false;
618 if (adrpRel.sym != addRel.sym)
619 return false;
620 if (adrpRel.addend != 0 || addRel.addend != 0)
621 return false;
622
623 uint32_t adrpInstr = read32le(buf + adrpRel.offset);
624 uint32_t addInstr = read32le(buf + addRel.offset);
625 // Check if the first instruction is ADRP and the second instruction is ADD.
626 if ((adrpInstr & 0x9f000000) != 0x90000000 ||
627 (addInstr & 0xffc00000) != 0x91000000)
628 return false;
629 uint32_t adrpDestReg = adrpInstr & 0x1f;
630 uint32_t addDestReg = addInstr & 0x1f;
631 uint32_t addSrcReg = (addInstr >> 5) & 0x1f;
632 if (adrpDestReg != addDestReg || adrpDestReg != addSrcReg)
633 return false;
634
635 Symbol &sym = *adrpRel.sym;
636 // Check if the address difference is within 1MiB range.
637 int64_t val = sym.getVA() - (secAddr + addRel.offset);
638 if (val < -1024 * 1024 || val >= 1024 * 1024)
639 return false;
640
641 Relocation adrRel = {R_ABS, R_AARCH64_ADR_PREL_LO21, addRel.offset,
642 /*addend=*/0, &sym};
643 // nop
644 write32le(buf + adrpRel.offset, 0xd503201f);
645 // adr x_<dest_reg>
646 write32le(buf + adrRel.offset, 0x10000000 | adrpDestReg);
647 target->relocate(buf + adrRel.offset, adrRel, val);
648 return true;
649 }
650
tryRelaxAdrpLdr(const Relocation & adrpRel,const Relocation & ldrRel,uint64_t secAddr,uint8_t * buf) const651 bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
652 const Relocation &ldrRel, uint64_t secAddr,
653 uint8_t *buf) const {
654 if (!safeToRelaxAdrpLdr)
655 return false;
656
657 // When the definition of sym is not preemptible then we may
658 // be able to relax
659 // ADRP xn, :got: sym
660 // LDR xn, [ xn :got_lo12: sym]
661 // to
662 // ADRP xn, sym
663 // ADD xn, xn, :lo_12: sym
664
665 if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE ||
666 ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC)
667 return false;
668 // Check if the relocations apply to consecutive instructions.
669 if (adrpRel.offset + 4 != ldrRel.offset)
670 return false;
671 // Check if the relocations reference the same symbol and
672 // skip undefined, preemptible and STT_GNU_IFUNC symbols.
673 if (!adrpRel.sym || adrpRel.sym != ldrRel.sym || !adrpRel.sym->isDefined() ||
674 adrpRel.sym->isPreemptible || adrpRel.sym->isGnuIFunc())
675 return false;
676 // Check if the addends of the both relocations are zero.
677 if (adrpRel.addend != 0 || ldrRel.addend != 0)
678 return false;
679 uint32_t adrpInstr = read32le(buf + adrpRel.offset);
680 uint32_t ldrInstr = read32le(buf + ldrRel.offset);
681 // Check if the first instruction is ADRP and the second instruction is LDR.
682 if ((adrpInstr & 0x9f000000) != 0x90000000 ||
683 (ldrInstr & 0x3b000000) != 0x39000000)
684 return false;
685 // Check the value of the sf bit.
686 if (!(ldrInstr >> 31))
687 return false;
688 uint32_t adrpDestReg = adrpInstr & 0x1f;
689 uint32_t ldrDestReg = ldrInstr & 0x1f;
690 uint32_t ldrSrcReg = (ldrInstr >> 5) & 0x1f;
691 // Check if ADPR and LDR use the same register.
692 if (adrpDestReg != ldrDestReg || adrpDestReg != ldrSrcReg)
693 return false;
694
695 Symbol &sym = *adrpRel.sym;
696 // GOT references to absolute symbols can't be relaxed to use ADRP/ADD in
697 // position-independent code because these instructions produce a relative
698 // address.
699 if (config->isPic && !cast<Defined>(sym).section)
700 return false;
701 // Check if the address difference is within 4GB range.
702 int64_t val =
703 getAArch64Page(sym.getVA()) - getAArch64Page(secAddr + adrpRel.offset);
704 if (val != llvm::SignExtend64(val, 33))
705 return false;
706
707 Relocation adrpSymRel = {R_AARCH64_PAGE_PC, R_AARCH64_ADR_PREL_PG_HI21,
708 adrpRel.offset, /*addend=*/0, &sym};
709 Relocation addRel = {R_ABS, R_AARCH64_ADD_ABS_LO12_NC, ldrRel.offset,
710 /*addend=*/0, &sym};
711
712 // adrp x_<dest_reg>
713 write32le(buf + adrpSymRel.offset, 0x90000000 | adrpDestReg);
714 // add x_<dest reg>, x_<dest reg>
715 write32le(buf + addRel.offset, 0x91000000 | adrpDestReg | (adrpDestReg << 5));
716
717 target->relocate(buf + adrpSymRel.offset, adrpSymRel,
718 SignExtend64(getAArch64Page(sym.getVA()) -
719 getAArch64Page(secAddr + adrpSymRel.offset),
720 64));
721 target->relocate(buf + addRel.offset, addRel, SignExtend64(sym.getVA(), 64));
722 tryRelaxAdrpAdd(adrpSymRel, addRel, secAddr, buf);
723 return true;
724 }
725
726 // AArch64 may use security features in variant PLT sequences. These are:
727 // Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
728 // Indicator (BTI) introduced in armv8.5-a. The additional instructions used
729 // in the variant Plt sequences are encoded in the Hint space so they can be
730 // deployed on older architectures, which treat the instructions as a nop.
731 // PAC and BTI can be combined leading to the following combinations:
732 // writePltHeader
733 // writePltHeaderBti (no PAC Header needed)
734 // writePlt
735 // writePltBti (BTI only)
736 // writePltPac (PAC only)
737 // writePltBtiPac (BTI and PAC)
738 //
739 // When PAC is enabled the dynamic loader encrypts the address that it places
740 // in the .got.plt using the pacia1716 instruction which encrypts the value in
741 // x17 using the modifier in x16. The static linker places autia1716 before the
742 // indirect branch to x17 to authenticate the address in x17 with the modifier
743 // in x16. This makes it more difficult for an attacker to modify the value in
744 // the .got.plt.
745 //
746 // When BTI is enabled all indirect branches must land on a bti instruction.
747 // The static linker must place a bti instruction at the start of any PLT entry
748 // that may be the target of an indirect branch. As the PLT entries call the
749 // lazy resolver indirectly this must have a bti instruction at start. In
750 // general a bti instruction is not needed for a PLT entry as indirect calls
751 // are resolved to the function address and not the PLT entry for the function.
752 // There are a small number of cases where the PLT address can escape, such as
753 // taking the address of a function or ifunc via a non got-generating
754 // relocation, and a shared library refers to that symbol.
755 //
756 // We use the bti c variant of the instruction which permits indirect branches
757 // (br) via x16/x17 and indirect function calls (blr) via any register. The ABI
758 // guarantees that all indirect branches from code requiring BTI protection
759 // will go via x16/x17
760
761 namespace {
762 class AArch64BtiPac final : public AArch64 {
763 public:
764 AArch64BtiPac();
765 void writePltHeader(uint8_t *buf) const override;
766 void writePlt(uint8_t *buf, const Symbol &sym,
767 uint64_t pltEntryAddr) const override;
768
769 private:
770 bool btiHeader; // bti instruction needed in PLT Header and Entry
771 bool pacEntry; // autia1716 instruction needed in PLT Entry
772 };
773 } // namespace
774
AArch64BtiPac()775 AArch64BtiPac::AArch64BtiPac() {
776 btiHeader = (config->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI);
777 // A BTI (Branch Target Indicator) Plt Entry is only required if the
778 // address of the PLT entry can be taken by the program, which permits an
779 // indirect jump to the PLT entry. This can happen when the address
780 // of the PLT entry for a function is canonicalised due to the address of
781 // the function in an executable being taken by a shared library, or
782 // non-preemptible ifunc referenced by non-GOT-generating, non-PLT-generating
783 // relocations.
784 // The PAC PLT entries require dynamic loader support and this isn't known
785 // from properties in the objects, so we use the command line flag.
786 pacEntry = config->zPacPlt;
787
788 if (btiHeader || pacEntry) {
789 pltEntrySize = 24;
790 ipltEntrySize = 24;
791 }
792 }
793
writePltHeader(uint8_t * buf) const794 void AArch64BtiPac::writePltHeader(uint8_t *buf) const {
795 const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
796 const uint8_t pltData[] = {
797 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]!
798 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[2]))
799 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[2]))]
800 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.plt.got[2]))
801 0x20, 0x02, 0x1f, 0xd6, // br x17
802 0x1f, 0x20, 0x03, 0xd5, // nop
803 0x1f, 0x20, 0x03, 0xd5 // nop
804 };
805 const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
806
807 uint64_t got = in.gotPlt->getVA();
808 uint64_t plt = in.plt->getVA();
809
810 if (btiHeader) {
811 // PltHeader is called indirectly by plt[N]. Prefix pltData with a BTI C
812 // instruction.
813 memcpy(buf, btiData, sizeof(btiData));
814 buf += sizeof(btiData);
815 plt += sizeof(btiData);
816 }
817 memcpy(buf, pltData, sizeof(pltData));
818
819 relocateNoSym(buf + 4, R_AARCH64_ADR_PREL_PG_HI21,
820 getAArch64Page(got + 16) - getAArch64Page(plt + 8));
821 relocateNoSym(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16);
822 relocateNoSym(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16);
823 if (!btiHeader)
824 // We didn't add the BTI c instruction so round out size with NOP.
825 memcpy(buf + sizeof(pltData), nopData, sizeof(nopData));
826 }
827
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const828 void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym,
829 uint64_t pltEntryAddr) const {
830 // The PLT entry is of the form:
831 // [btiData] addrInst (pacBr | stdBr) [nopData]
832 const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
833 const uint8_t addrInst[] = {
834 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[n]))
835 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[n]))]
836 0x10, 0x02, 0x00, 0x91 // add x16, x16, Offset(&(.plt.got[n]))
837 };
838 const uint8_t pacBr[] = {
839 0x9f, 0x21, 0x03, 0xd5, // autia1716
840 0x20, 0x02, 0x1f, 0xd6 // br x17
841 };
842 const uint8_t stdBr[] = {
843 0x20, 0x02, 0x1f, 0xd6, // br x17
844 0x1f, 0x20, 0x03, 0xd5 // nop
845 };
846 const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
847
848 // needsCopy indicates a non-ifunc canonical PLT entry whose address may
849 // escape to shared objects. isInIplt indicates a non-preemptible ifunc. Its
850 // address may escape if referenced by a direct relocation. The condition is
851 // conservative.
852 bool hasBti = btiHeader && (sym.needsCopy || sym.isInIplt);
853 if (hasBti) {
854 memcpy(buf, btiData, sizeof(btiData));
855 buf += sizeof(btiData);
856 pltEntryAddr += sizeof(btiData);
857 }
858
859 uint64_t gotPltEntryAddr = sym.getGotPltVA();
860 memcpy(buf, addrInst, sizeof(addrInst));
861 relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21,
862 getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr));
863 relocateNoSym(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr);
864 relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr);
865
866 if (pacEntry)
867 memcpy(buf + sizeof(addrInst), pacBr, sizeof(pacBr));
868 else
869 memcpy(buf + sizeof(addrInst), stdBr, sizeof(stdBr));
870 if (!hasBti)
871 // We didn't add the BTI c instruction so round out size with NOP.
872 memcpy(buf + sizeof(addrInst) + sizeof(stdBr), nopData, sizeof(nopData));
873 }
874
getTargetInfo()875 static TargetInfo *getTargetInfo() {
876 if ((config->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) ||
877 config->zPacPlt) {
878 static AArch64BtiPac t;
879 return &t;
880 }
881 static AArch64 t;
882 return &t;
883 }
884
getAArch64TargetInfo()885 TargetInfo *elf::getAArch64TargetInfo() { return getTargetInfo(); }
886