1 //===- X86.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "Symbols.h"
10 #include "SyntheticSections.h"
11 #include "Target.h"
12 #include "lld/Common/ErrorHandler.h"
13 #include "llvm/Support/Endian.h"
14
15 using namespace llvm;
16 using namespace llvm::support::endian;
17 using namespace llvm::ELF;
18 using namespace lld;
19 using namespace lld::elf;
20
21 namespace {
22 class X86 : public TargetInfo {
23 public:
24 X86();
25 int getTlsGdRelaxSkip(RelType type) const override;
26 RelExpr getRelExpr(RelType type, const Symbol &s,
27 const uint8_t *loc) const override;
28 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
29 void writeGotPltHeader(uint8_t *buf) const override;
30 RelType getDynRel(RelType type) const override;
31 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
32 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
33 void writePltHeader(uint8_t *buf) const override;
34 void writePlt(uint8_t *buf, const Symbol &sym,
35 uint64_t pltEntryAddr) const override;
36 void relocate(uint8_t *loc, const Relocation &rel,
37 uint64_t val) const override;
38
39 RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
40 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
41 uint64_t val) const override;
42 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
43 uint64_t val) const override;
44 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
45 uint64_t val) const override;
46 void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
47 uint64_t val) const override;
48 };
49 } // namespace
50
X86()51 X86::X86() {
52 copyRel = R_386_COPY;
53 gotRel = R_386_GLOB_DAT;
54 pltRel = R_386_JUMP_SLOT;
55 iRelativeRel = R_386_IRELATIVE;
56 relativeRel = R_386_RELATIVE;
57 symbolicRel = R_386_32;
58 tlsDescRel = R_386_TLS_DESC;
59 tlsGotRel = R_386_TLS_TPOFF;
60 tlsModuleIndexRel = R_386_TLS_DTPMOD32;
61 tlsOffsetRel = R_386_TLS_DTPOFF32;
62 gotBaseSymInGotPlt = true;
63 pltHeaderSize = 16;
64 pltEntrySize = 16;
65 ipltEntrySize = 16;
66 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
67
68 // Align to the non-PAE large page size (known as a superpage or huge page).
69 // FreeBSD automatically promotes large, superpage-aligned allocations.
70 defaultImageBase = 0x400000;
71 }
72
getTlsGdRelaxSkip(RelType type) const73 int X86::getTlsGdRelaxSkip(RelType type) const {
74 // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
75 return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2;
76 }
77
getRelExpr(RelType type,const Symbol & s,const uint8_t * loc) const78 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
79 const uint8_t *loc) const {
80 if (type == R_386_TLS_IE || type == R_386_TLS_GOTIE)
81 config->hasTlsIe = true;
82
83 switch (type) {
84 case R_386_8:
85 case R_386_16:
86 case R_386_32:
87 return R_ABS;
88 case R_386_TLS_LDO_32:
89 return R_DTPREL;
90 case R_386_TLS_GD:
91 return R_TLSGD_GOTPLT;
92 case R_386_TLS_LDM:
93 return R_TLSLD_GOTPLT;
94 case R_386_PLT32:
95 return R_PLT_PC;
96 case R_386_PC8:
97 case R_386_PC16:
98 case R_386_PC32:
99 return R_PC;
100 case R_386_GOTPC:
101 return R_GOTPLTONLY_PC;
102 case R_386_TLS_IE:
103 return R_GOT;
104 case R_386_GOT32:
105 case R_386_GOT32X:
106 // These relocations are arguably mis-designed because their calculations
107 // depend on the instructions they are applied to. This is bad because we
108 // usually don't care about whether the target section contains valid
109 // machine instructions or not. But this is part of the documented ABI, so
110 // we had to implement as the standard requires.
111 //
112 // x86 does not support PC-relative data access. Therefore, in order to
113 // access GOT contents, a GOT address needs to be known at link-time
114 // (which means non-PIC) or compilers have to emit code to get a GOT
115 // address at runtime (which means code is position-independent but
116 // compilers need to emit extra code for each GOT access.) This decision
117 // is made at compile-time. In the latter case, compilers emit code to
118 // load a GOT address to a register, which is usually %ebx.
119 //
120 // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
121 // foo@GOT(%ebx).
122 //
123 // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
124 // find such relocation, we should report an error. foo@GOT is resolved to
125 // an *absolute* address of foo's GOT entry, because both GOT address and
126 // foo's offset are known. In other words, it's G + A.
127 //
128 // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
129 // foo's GOT entry in the table, because GOT address is not known but foo's
130 // offset in the table is known. It's G + A - GOT.
131 //
132 // It's unfortunate that compilers emit the same relocation for these
133 // different use cases. In order to distinguish them, we have to read a
134 // machine instruction.
135 //
136 // The following code implements it. We assume that Loc[0] is the first byte
137 // of a displacement or an immediate field of a valid machine
138 // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
139 // the byte, we can determine whether the instruction uses the operand as an
140 // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
141 return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
142 case R_386_TLS_GOTDESC:
143 return R_TLSDESC_GOTPLT;
144 case R_386_TLS_DESC_CALL:
145 return R_TLSDESC_CALL;
146 case R_386_TLS_GOTIE:
147 return R_GOTPLT;
148 case R_386_GOTOFF:
149 return R_GOTPLTREL;
150 case R_386_TLS_LE:
151 return R_TPREL;
152 case R_386_TLS_LE_32:
153 return R_TPREL_NEG;
154 case R_386_NONE:
155 return R_NONE;
156 default:
157 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
158 ") against symbol " + toString(s));
159 return R_NONE;
160 }
161 }
162
adjustTlsExpr(RelType type,RelExpr expr) const163 RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
164 switch (expr) {
165 default:
166 return expr;
167 case R_RELAX_TLS_GD_TO_IE:
168 return R_RELAX_TLS_GD_TO_IE_GOTPLT;
169 case R_RELAX_TLS_GD_TO_LE:
170 return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
171 : R_RELAX_TLS_GD_TO_LE;
172 }
173 }
174
writeGotPltHeader(uint8_t * buf) const175 void X86::writeGotPltHeader(uint8_t *buf) const {
176 write32le(buf, mainPart->dynamic->getVA());
177 }
178
writeGotPlt(uint8_t * buf,const Symbol & s) const179 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
180 // Entries in .got.plt initially points back to the corresponding
181 // PLT entries with a fixed offset to skip the first instruction.
182 write32le(buf, s.getPltVA() + 6);
183 }
184
writeIgotPlt(uint8_t * buf,const Symbol & s) const185 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
186 // An x86 entry is the address of the ifunc resolver function.
187 write32le(buf, s.getVA());
188 }
189
getDynRel(RelType type) const190 RelType X86::getDynRel(RelType type) const {
191 if (type == R_386_TLS_LE)
192 return R_386_TLS_TPOFF;
193 if (type == R_386_TLS_LE_32)
194 return R_386_TLS_TPOFF32;
195 return type;
196 }
197
writePltHeader(uint8_t * buf) const198 void X86::writePltHeader(uint8_t *buf) const {
199 if (config->isPic) {
200 const uint8_t v[] = {
201 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
202 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
203 0x90, 0x90, 0x90, 0x90 // nop
204 };
205 memcpy(buf, v, sizeof(v));
206 return;
207 }
208
209 const uint8_t pltData[] = {
210 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
211 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
212 0x90, 0x90, 0x90, 0x90, // nop
213 };
214 memcpy(buf, pltData, sizeof(pltData));
215 uint32_t gotPlt = in.gotPlt->getVA();
216 write32le(buf + 2, gotPlt + 4);
217 write32le(buf + 8, gotPlt + 8);
218 }
219
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const220 void X86::writePlt(uint8_t *buf, const Symbol &sym,
221 uint64_t pltEntryAddr) const {
222 unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
223 if (config->isPic) {
224 const uint8_t inst[] = {
225 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
226 0x68, 0, 0, 0, 0, // pushl $reloc_offset
227 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
228 };
229 memcpy(buf, inst, sizeof(inst));
230 write32le(buf + 2, sym.getGotPltVA() - in.gotPlt->getVA());
231 } else {
232 const uint8_t inst[] = {
233 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
234 0x68, 0, 0, 0, 0, // pushl $reloc_offset
235 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
236 };
237 memcpy(buf, inst, sizeof(inst));
238 write32le(buf + 2, sym.getGotPltVA());
239 }
240
241 write32le(buf + 7, relOff);
242 write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
243 }
244
getImplicitAddend(const uint8_t * buf,RelType type) const245 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
246 switch (type) {
247 case R_386_8:
248 case R_386_PC8:
249 return SignExtend64<8>(*buf);
250 case R_386_16:
251 case R_386_PC16:
252 return SignExtend64<16>(read16le(buf));
253 case R_386_32:
254 case R_386_GLOB_DAT:
255 case R_386_GOT32:
256 case R_386_GOT32X:
257 case R_386_GOTOFF:
258 case R_386_GOTPC:
259 case R_386_IRELATIVE:
260 case R_386_PC32:
261 case R_386_PLT32:
262 case R_386_RELATIVE:
263 case R_386_TLS_GOTDESC:
264 case R_386_TLS_DESC_CALL:
265 case R_386_TLS_DTPMOD32:
266 case R_386_TLS_DTPOFF32:
267 case R_386_TLS_LDO_32:
268 case R_386_TLS_LDM:
269 case R_386_TLS_IE:
270 case R_386_TLS_IE_32:
271 case R_386_TLS_LE:
272 case R_386_TLS_LE_32:
273 case R_386_TLS_GD:
274 case R_386_TLS_GD_32:
275 case R_386_TLS_GOTIE:
276 case R_386_TLS_TPOFF:
277 case R_386_TLS_TPOFF32:
278 return SignExtend64<32>(read32le(buf));
279 case R_386_TLS_DESC:
280 return SignExtend64<32>(read32le(buf + 4));
281 case R_386_NONE:
282 case R_386_JUMP_SLOT:
283 // These relocations are defined as not having an implicit addend.
284 return 0;
285 default:
286 internalLinkerError(getErrorLocation(buf),
287 "cannot read addend for relocation " + toString(type));
288 return 0;
289 }
290 }
291
relocate(uint8_t * loc,const Relocation & rel,uint64_t val) const292 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
293 switch (rel.type) {
294 case R_386_8:
295 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
296 // being used for some 16-bit programs such as boot loaders, so
297 // we want to support them.
298 checkIntUInt(loc, val, 8, rel);
299 *loc = val;
300 break;
301 case R_386_PC8:
302 checkInt(loc, val, 8, rel);
303 *loc = val;
304 break;
305 case R_386_16:
306 checkIntUInt(loc, val, 16, rel);
307 write16le(loc, val);
308 break;
309 case R_386_PC16:
310 // R_386_PC16 is normally used with 16 bit code. In that situation
311 // the PC is 16 bits, just like the addend. This means that it can
312 // point from any 16 bit address to any other if the possibility
313 // of wrapping is included.
314 // The only restriction we have to check then is that the destination
315 // address fits in 16 bits. That is impossible to do here. The problem is
316 // that we are passed the final value, which already had the
317 // current location subtracted from it.
318 // We just check that Val fits in 17 bits. This misses some cases, but
319 // should have no false positives.
320 checkInt(loc, val, 17, rel);
321 write16le(loc, val);
322 break;
323 case R_386_32:
324 case R_386_GOT32:
325 case R_386_GOT32X:
326 case R_386_GOTOFF:
327 case R_386_GOTPC:
328 case R_386_PC32:
329 case R_386_PLT32:
330 case R_386_RELATIVE:
331 case R_386_TLS_GOTDESC:
332 case R_386_TLS_DESC_CALL:
333 case R_386_TLS_DTPMOD32:
334 case R_386_TLS_DTPOFF32:
335 case R_386_TLS_GD:
336 case R_386_TLS_GOTIE:
337 case R_386_TLS_IE:
338 case R_386_TLS_LDM:
339 case R_386_TLS_LDO_32:
340 case R_386_TLS_LE:
341 case R_386_TLS_LE_32:
342 case R_386_TLS_TPOFF:
343 case R_386_TLS_TPOFF32:
344 checkInt(loc, val, 32, rel);
345 write32le(loc, val);
346 break;
347 case R_386_TLS_DESC:
348 // The addend is stored in the second 32-bit word.
349 write32le(loc + 4, val);
350 break;
351 default:
352 llvm_unreachable("unknown relocation");
353 }
354 }
355
relaxTlsGdToLe(uint8_t * loc,const Relocation & rel,uint64_t val) const356 void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
357 uint64_t val) const {
358 if (rel.type == R_386_TLS_GD) {
359 // Convert
360 // leal x@tlsgd(, %ebx, 1), %eax
361 // call __tls_get_addr@plt
362 // to
363 // movl %gs:0, %eax
364 // subl $x@tpoff, %eax
365 const uint8_t inst[] = {
366 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
367 0x81, 0xe8, 0, 0, 0, 0, // subl val(%ebx), %eax
368 };
369 memcpy(loc - 3, inst, sizeof(inst));
370 write32le(loc + 5, val);
371 } else if (rel.type == R_386_TLS_GOTDESC) {
372 // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
373 //
374 // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction.
375 if (memcmp(loc - 2, "\x8d\x83", 2)) {
376 error(getErrorLocation(loc - 2) +
377 "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax");
378 return;
379 }
380 loc[-1] = 0x05;
381 write32le(loc, val);
382 } else {
383 // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
384 assert(rel.type == R_386_TLS_DESC_CALL);
385 loc[0] = 0x66;
386 loc[1] = 0x90;
387 }
388 }
389
relaxTlsGdToIe(uint8_t * loc,const Relocation & rel,uint64_t val) const390 void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
391 uint64_t val) const {
392 if (rel.type == R_386_TLS_GD) {
393 // Convert
394 // leal x@tlsgd(, %ebx, 1), %eax
395 // call __tls_get_addr@plt
396 // to
397 // movl %gs:0, %eax
398 // addl x@gotntpoff(%ebx), %eax
399 const uint8_t inst[] = {
400 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
401 0x03, 0x83, 0, 0, 0, 0, // addl val(%ebx), %eax
402 };
403 memcpy(loc - 3, inst, sizeof(inst));
404 write32le(loc + 5, val);
405 } else if (rel.type == R_386_TLS_GOTDESC) {
406 // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
407 if (memcmp(loc - 2, "\x8d\x83", 2)) {
408 error(getErrorLocation(loc - 2) +
409 "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax");
410 return;
411 }
412 loc[-2] = 0x8b;
413 write32le(loc, val);
414 } else {
415 // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
416 assert(rel.type == R_386_TLS_DESC_CALL);
417 loc[0] = 0x66;
418 loc[1] = 0x90;
419 }
420 }
421
422 // In some conditions, relocations can be optimized to avoid using GOT.
423 // This function does that for Initial Exec to Local Exec case.
relaxTlsIeToLe(uint8_t * loc,const Relocation & rel,uint64_t val) const424 void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
425 uint64_t val) const {
426 // Ulrich's document section 6.2 says that @gotntpoff can
427 // be used with MOVL or ADDL instructions.
428 // @indntpoff is similar to @gotntpoff, but for use in
429 // position dependent code.
430 uint8_t reg = (loc[-1] >> 3) & 7;
431
432 if (rel.type == R_386_TLS_IE) {
433 if (loc[-1] == 0xa1) {
434 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
435 // This case is different from the generic case below because
436 // this is a 5 byte instruction while below is 6 bytes.
437 loc[-1] = 0xb8;
438 } else if (loc[-2] == 0x8b) {
439 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
440 loc[-2] = 0xc7;
441 loc[-1] = 0xc0 | reg;
442 } else {
443 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
444 loc[-2] = 0x81;
445 loc[-1] = 0xc0 | reg;
446 }
447 } else {
448 assert(rel.type == R_386_TLS_GOTIE);
449 if (loc[-2] == 0x8b) {
450 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
451 loc[-2] = 0xc7;
452 loc[-1] = 0xc0 | reg;
453 } else {
454 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
455 loc[-2] = 0x8d;
456 loc[-1] = 0x80 | (reg << 3) | reg;
457 }
458 }
459 write32le(loc, val);
460 }
461
relaxTlsLdToLe(uint8_t * loc,const Relocation & rel,uint64_t val) const462 void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
463 uint64_t val) const {
464 if (rel.type == R_386_TLS_LDO_32) {
465 write32le(loc, val);
466 return;
467 }
468
469 // Convert
470 // leal foo(%reg),%eax
471 // call ___tls_get_addr
472 // to
473 // movl %gs:0,%eax
474 // nop
475 // leal 0(%esi,1),%esi
476 const uint8_t inst[] = {
477 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
478 0x90, // nop
479 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi
480 };
481 memcpy(loc - 2, inst, sizeof(inst));
482 }
483
484 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
485 // entries containing endbr32 instructions. A PLT entry will be split into two
486 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
487 namespace {
488 class IntelIBT : public X86 {
489 public:
490 IntelIBT();
491 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
492 void writePlt(uint8_t *buf, const Symbol &sym,
493 uint64_t pltEntryAddr) const override;
494 void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
495
496 static const unsigned IBTPltHeaderSize = 16;
497 };
498 } // namespace
499
IntelIBT()500 IntelIBT::IntelIBT() { pltHeaderSize = 0; }
501
writeGotPlt(uint8_t * buf,const Symbol & s) const502 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
503 uint64_t va =
504 in.ibtPlt->getVA() + IBTPltHeaderSize + s.getPltIdx() * pltEntrySize;
505 write32le(buf, va);
506 }
507
writePlt(uint8_t * buf,const Symbol & sym,uint64_t) const508 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
509 uint64_t /*pltEntryAddr*/) const {
510 if (config->isPic) {
511 const uint8_t inst[] = {
512 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
513 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx)
514 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
515 };
516 memcpy(buf, inst, sizeof(inst));
517 write32le(buf + 6, sym.getGotPltVA() - in.gotPlt->getVA());
518 return;
519 }
520
521 const uint8_t inst[] = {
522 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
523 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
524 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
525 };
526 memcpy(buf, inst, sizeof(inst));
527 write32le(buf + 6, sym.getGotPltVA());
528 }
529
writeIBTPlt(uint8_t * buf,size_t numEntries) const530 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
531 writePltHeader(buf);
532 buf += IBTPltHeaderSize;
533
534 const uint8_t inst[] = {
535 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
536 0x68, 0, 0, 0, 0, // pushl $reloc_offset
537 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC
538 0x66, 0x90, // nop
539 };
540
541 for (size_t i = 0; i < numEntries; ++i) {
542 memcpy(buf, inst, sizeof(inst));
543 write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
544 write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
545 buf += sizeof(inst);
546 }
547 }
548
549 namespace {
550 class RetpolinePic : public X86 {
551 public:
552 RetpolinePic();
553 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
554 void writePltHeader(uint8_t *buf) const override;
555 void writePlt(uint8_t *buf, const Symbol &sym,
556 uint64_t pltEntryAddr) const override;
557 };
558
559 class RetpolineNoPic : public X86 {
560 public:
561 RetpolineNoPic();
562 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
563 void writePltHeader(uint8_t *buf) const override;
564 void writePlt(uint8_t *buf, const Symbol &sym,
565 uint64_t pltEntryAddr) const override;
566 };
567 } // namespace
568
RetpolinePic()569 RetpolinePic::RetpolinePic() {
570 pltHeaderSize = 48;
571 pltEntrySize = 32;
572 ipltEntrySize = 32;
573 }
574
writeGotPlt(uint8_t * buf,const Symbol & s) const575 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
576 write32le(buf, s.getPltVA() + 17);
577 }
578
writePltHeader(uint8_t * buf) const579 void RetpolinePic::writePltHeader(uint8_t *buf) const {
580 const uint8_t insn[] = {
581 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx)
582 0x50, // 6: pushl %eax
583 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax
584 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next
585 0xf3, 0x90, // 12: loop: pause
586 0x0f, 0xae, 0xe8, // 14: lfence
587 0xeb, 0xf9, // 17: jmp loop
588 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
589 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
590 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
591 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
592 0x89, 0xc8, // 2b: mov %ecx, %eax
593 0x59, // 2d: pop %ecx
594 0xc3, // 2e: ret
595 0xcc, // 2f: int3; padding
596 };
597 memcpy(buf, insn, sizeof(insn));
598 }
599
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const600 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
601 uint64_t pltEntryAddr) const {
602 unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
603 const uint8_t insn[] = {
604 0x50, // pushl %eax
605 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax
606 0xe8, 0, 0, 0, 0, // call plt+0x20
607 0xe9, 0, 0, 0, 0, // jmp plt+0x12
608 0x68, 0, 0, 0, 0, // pushl $reloc_offset
609 0xe9, 0, 0, 0, 0, // jmp plt+0
610 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding
611 };
612 memcpy(buf, insn, sizeof(insn));
613
614 uint32_t ebx = in.gotPlt->getVA();
615 unsigned off = pltEntryAddr - in.plt->getVA();
616 write32le(buf + 3, sym.getGotPltVA() - ebx);
617 write32le(buf + 8, -off - 12 + 32);
618 write32le(buf + 13, -off - 17 + 18);
619 write32le(buf + 18, relOff);
620 write32le(buf + 23, -off - 27);
621 }
622
RetpolineNoPic()623 RetpolineNoPic::RetpolineNoPic() {
624 pltHeaderSize = 48;
625 pltEntrySize = 32;
626 ipltEntrySize = 32;
627 }
628
writeGotPlt(uint8_t * buf,const Symbol & s) const629 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
630 write32le(buf, s.getPltVA() + 16);
631 }
632
writePltHeader(uint8_t * buf) const633 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
634 const uint8_t insn[] = {
635 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4
636 0x50, // 6: pushl %eax
637 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax
638 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next
639 0xf3, 0x90, // 11: loop: pause
640 0x0f, 0xae, 0xe8, // 13: lfence
641 0xeb, 0xf9, // 16: jmp loop
642 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3
643 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16
644 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
645 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
646 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
647 0x89, 0xc8, // 2b: mov %ecx, %eax
648 0x59, // 2d: pop %ecx
649 0xc3, // 2e: ret
650 0xcc, // 2f: int3; padding
651 };
652 memcpy(buf, insn, sizeof(insn));
653
654 uint32_t gotPlt = in.gotPlt->getVA();
655 write32le(buf + 2, gotPlt + 4);
656 write32le(buf + 8, gotPlt + 8);
657 }
658
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const659 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
660 uint64_t pltEntryAddr) const {
661 unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
662 const uint8_t insn[] = {
663 0x50, // 0: pushl %eax
664 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax
665 0xe8, 0, 0, 0, 0, // 6: call plt+0x20
666 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11
667 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset
668 0xe9, 0, 0, 0, 0, // 15: jmp plt+0
669 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
670 0xcc, // 1f: int3; padding
671 };
672 memcpy(buf, insn, sizeof(insn));
673
674 unsigned off = pltEntryAddr - in.plt->getVA();
675 write32le(buf + 2, sym.getGotPltVA());
676 write32le(buf + 7, -off - 11 + 32);
677 write32le(buf + 12, -off - 16 + 17);
678 write32le(buf + 17, relOff);
679 write32le(buf + 22, -off - 26);
680 }
681
getX86TargetInfo()682 TargetInfo *elf::getX86TargetInfo() {
683 if (config->zRetpolineplt) {
684 if (config->isPic) {
685 static RetpolinePic t;
686 return &t;
687 }
688 static RetpolineNoPic t;
689 return &t;
690 }
691
692 if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
693 static IntelIBT t;
694 return &t;
695 }
696
697 static X86 t;
698 return &t;
699 }
700