1 //===- PPC64.cpp ----------------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "Symbols.h" 11 #include "SyntheticSections.h" 12 #include "Target.h" 13 #include "lld/Common/ErrorHandler.h" 14 #include "llvm/Support/Endian.h" 15 16 using namespace llvm; 17 using namespace llvm::object; 18 using namespace llvm::support::endian; 19 using namespace llvm::ELF; 20 using namespace lld; 21 using namespace lld::elf; 22 23 static uint64_t PPC64TocOffset = 0x8000; 24 static uint64_t DynamicThreadPointerOffset = 0x8000; 25 26 // The instruction encoding of bits 21-30 from the ISA for the Xform and Dform 27 // instructions that can be used as part of the initial exec TLS sequence. 28 enum XFormOpcd { 29 LBZX = 87, 30 LHZX = 279, 31 LWZX = 23, 32 LDX = 21, 33 STBX = 215, 34 STHX = 407, 35 STWX = 151, 36 STDX = 149, 37 ADD = 266, 38 }; 39 40 enum DFormOpcd { 41 LBZ = 34, 42 LBZU = 35, 43 LHZ = 40, 44 LHZU = 41, 45 LHAU = 43, 46 LWZ = 32, 47 LWZU = 33, 48 LFSU = 49, 49 LD = 58, 50 LFDU = 51, 51 STB = 38, 52 STBU = 39, 53 STH = 44, 54 STHU = 45, 55 STW = 36, 56 STWU = 37, 57 STFSU = 53, 58 STFDU = 55, 59 STD = 62, 60 ADDI = 14 61 }; 62 63 uint64_t elf::getPPC64TocBase() { 64 // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The 65 // TOC starts where the first of these sections starts. We always create a 66 // .got when we see a relocation that uses it, so for us the start is always 67 // the .got. 68 uint64_t TocVA = In.Got->getVA(); 69 70 // Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000 71 // thus permitting a full 64 Kbytes segment. Note that the glibc startup 72 // code (crt1.o) assumes that you can get from the TOC base to the 73 // start of the .toc section with only a single (signed) 16-bit relocation. 74 return TocVA + PPC64TocOffset; 75 } 76 77 unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(uint8_t StOther) { 78 // The offset is encoded into the 3 most significant bits of the st_other 79 // field, with some special values described in section 3.4.1 of the ABI: 80 // 0 --> Zero offset between the GEP and LEP, and the function does NOT use 81 // the TOC pointer (r2). r2 will hold the same value on returning from 82 // the function as it did on entering the function. 83 // 1 --> Zero offset between the GEP and LEP, and r2 should be treated as a 84 // caller-saved register for all callers. 85 // 2-6 --> The binary logarithm of the offset eg: 86 // 2 --> 2^2 = 4 bytes --> 1 instruction. 87 // 6 --> 2^6 = 64 bytes --> 16 instructions. 88 // 7 --> Reserved. 89 uint8_t GepToLep = (StOther >> 5) & 7; 90 if (GepToLep < 2) 91 return 0; 92 93 // The value encoded in the st_other bits is the 94 // log-base-2(offset). 95 if (GepToLep < 7) 96 return 1 << GepToLep; 97 98 error("reserved value of 7 in the 3 most-significant-bits of st_other"); 99 return 0; 100 } 101 102 namespace { 103 class PPC64 final : public TargetInfo { 104 public: 105 PPC64(); 106 uint32_t calcEFlags() const override; 107 RelExpr getRelExpr(RelType Type, const Symbol &S, 108 const uint8_t *Loc) const override; 109 void writePltHeader(uint8_t *Buf) const override; 110 void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, 111 int32_t Index, unsigned RelOff) const override; 112 void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override; 113 void writeGotHeader(uint8_t *Buf) const override; 114 bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File, 115 uint64_t BranchAddr, const Symbol &S) const override; 116 bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override; 117 RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, 118 RelExpr Expr) const override; 119 void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; 120 void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; 121 void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; 122 void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; 123 124 bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End, 125 uint8_t StOther) const override; 126 }; 127 } // namespace 128 129 // Relocation masks following the #lo(value), #hi(value), #ha(value), 130 // #higher(value), #highera(value), #highest(value), and #highesta(value) 131 // macros defined in section 4.5.1. Relocation Types of the PPC-elf64abi 132 // document. 133 static uint16_t lo(uint64_t V) { return V; } 134 static uint16_t hi(uint64_t V) { return V >> 16; } 135 static uint16_t ha(uint64_t V) { return (V + 0x8000) >> 16; } 136 static uint16_t higher(uint64_t V) { return V >> 32; } 137 static uint16_t highera(uint64_t V) { return (V + 0x8000) >> 32; } 138 static uint16_t highest(uint64_t V) { return V >> 48; } 139 static uint16_t highesta(uint64_t V) { return (V + 0x8000) >> 48; } 140 141 // Extracts the 'PO' field of an instruction encoding. 142 static uint8_t getPrimaryOpCode(uint32_t Encoding) { return (Encoding >> 26); } 143 144 static bool isDQFormInstruction(uint32_t Encoding) { 145 switch (getPrimaryOpCode(Encoding)) { 146 default: 147 return false; 148 case 56: 149 // The only instruction with a primary opcode of 56 is `lq`. 150 return true; 151 case 61: 152 // There are both DS and DQ instruction forms with this primary opcode. 153 // Namely `lxv` and `stxv` are the DQ-forms that use it. 154 // The DS 'XO' bits being set to 01 is restricted to DQ form. 155 return (Encoding & 3) == 0x1; 156 } 157 } 158 159 static bool isInstructionUpdateForm(uint32_t Encoding) { 160 switch (getPrimaryOpCode(Encoding)) { 161 default: 162 return false; 163 case LBZU: 164 case LHAU: 165 case LHZU: 166 case LWZU: 167 case LFSU: 168 case LFDU: 169 case STBU: 170 case STHU: 171 case STWU: 172 case STFSU: 173 case STFDU: 174 return true; 175 // LWA has the same opcode as LD, and the DS bits is what differentiates 176 // between LD/LDU/LWA 177 case LD: 178 case STD: 179 return (Encoding & 3) == 1; 180 } 181 } 182 183 // There are a number of places when we either want to read or write an 184 // instruction when handling a half16 relocation type. On big-endian the buffer 185 // pointer is pointing into the middle of the word we want to extract, and on 186 // little-endian it is pointing to the start of the word. These 2 helpers are to 187 // simplify reading and writing in that context. 188 static void writeInstrFromHalf16(uint8_t *Loc, uint32_t Instr) { 189 write32(Loc - (Config->EKind == ELF64BEKind ? 2 : 0), Instr); 190 } 191 192 static uint32_t readInstrFromHalf16(const uint8_t *Loc) { 193 return read32(Loc - (Config->EKind == ELF64BEKind ? 2 : 0)); 194 } 195 196 PPC64::PPC64() { 197 GotRel = R_PPC64_GLOB_DAT; 198 NoneRel = R_PPC64_NONE; 199 PltRel = R_PPC64_JMP_SLOT; 200 RelativeRel = R_PPC64_RELATIVE; 201 IRelativeRel = R_PPC64_IRELATIVE; 202 GotEntrySize = 8; 203 PltEntrySize = 4; 204 GotPltEntrySize = 8; 205 GotBaseSymInGotPlt = false; 206 GotBaseSymOff = 0x8000; 207 GotHeaderEntriesNum = 1; 208 GotPltHeaderEntriesNum = 2; 209 PltHeaderSize = 60; 210 NeedsThunks = true; 211 212 TlsModuleIndexRel = R_PPC64_DTPMOD64; 213 TlsOffsetRel = R_PPC64_DTPREL64; 214 215 TlsGotRel = R_PPC64_TPREL64; 216 217 NeedsMoreStackNonSplit = false; 218 219 // We need 64K pages (at least under glibc/Linux, the loader won't 220 // set different permissions on a finer granularity than that). 221 DefaultMaxPageSize = 65536; 222 223 // The PPC64 ELF ABI v1 spec, says: 224 // 225 // It is normally desirable to put segments with different characteristics 226 // in separate 256 Mbyte portions of the address space, to give the 227 // operating system full paging flexibility in the 64-bit address space. 228 // 229 // And because the lowest non-zero 256M boundary is 0x10000000, PPC64 linkers 230 // use 0x10000000 as the starting address. 231 DefaultImageBase = 0x10000000; 232 233 write32(TrapInstr.data(), 0x7fe00008); 234 } 235 236 static uint32_t getEFlags(InputFile *File) { 237 if (Config->EKind == ELF64BEKind) 238 return cast<ObjFile<ELF64BE>>(File)->getObj().getHeader()->e_flags; 239 return cast<ObjFile<ELF64LE>>(File)->getObj().getHeader()->e_flags; 240 } 241 242 // This file implements v2 ABI. This function makes sure that all 243 // object files have v2 or an unspecified version as an ABI version. 244 uint32_t PPC64::calcEFlags() const { 245 for (InputFile *F : ObjectFiles) { 246 uint32_t Flag = getEFlags(F); 247 if (Flag == 1) 248 error(toString(F) + ": ABI version 1 is not supported"); 249 else if (Flag > 2) 250 error(toString(F) + ": unrecognized e_flags: " + Twine(Flag)); 251 } 252 return 2; 253 } 254 255 void PPC64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { 256 // Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement. 257 // The general dynamic code sequence for a global `x` will look like: 258 // Instruction Relocation Symbol 259 // addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x 260 // addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x 261 // bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x 262 // R_PPC64_REL24 __tls_get_addr 263 // nop None None 264 265 // Relaxing to local exec entails converting: 266 // addis r3, r2, x@got@tlsgd@ha into nop 267 // addi r3, r3, x@got@tlsgd@l into addis r3, r13, x@tprel@ha 268 // bl __tls_get_addr(x@tlsgd) into nop 269 // nop into addi r3, r3, x@tprel@l 270 271 switch (Type) { 272 case R_PPC64_GOT_TLSGD16_HA: 273 writeInstrFromHalf16(Loc, 0x60000000); // nop 274 break; 275 case R_PPC64_GOT_TLSGD16: 276 case R_PPC64_GOT_TLSGD16_LO: 277 writeInstrFromHalf16(Loc, 0x3c6d0000); // addis r3, r13 278 relocateOne(Loc, R_PPC64_TPREL16_HA, Val); 279 break; 280 case R_PPC64_TLSGD: 281 write32(Loc, 0x60000000); // nop 282 write32(Loc + 4, 0x38630000); // addi r3, r3 283 // Since we are relocating a half16 type relocation and Loc + 4 points to 284 // the start of an instruction we need to advance the buffer by an extra 285 // 2 bytes on BE. 286 relocateOne(Loc + 4 + (Config->EKind == ELF64BEKind ? 2 : 0), 287 R_PPC64_TPREL16_LO, Val); 288 break; 289 default: 290 llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); 291 } 292 } 293 294 void PPC64::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { 295 // Reference: 3.7.4.3 of the 64-bit ELF V2 abi supplement. 296 // The local dynamic code sequence for a global `x` will look like: 297 // Instruction Relocation Symbol 298 // addis r3, r2, x@got@tlsld@ha R_PPC64_GOT_TLSLD16_HA x 299 // addi r3, r3, x@got@tlsld@l R_PPC64_GOT_TLSLD16_LO x 300 // bl __tls_get_addr(x@tlsgd) R_PPC64_TLSLD x 301 // R_PPC64_REL24 __tls_get_addr 302 // nop None None 303 304 // Relaxing to local exec entails converting: 305 // addis r3, r2, x@got@tlsld@ha into nop 306 // addi r3, r3, x@got@tlsld@l into addis r3, r13, 0 307 // bl __tls_get_addr(x@tlsgd) into nop 308 // nop into addi r3, r3, 4096 309 310 switch (Type) { 311 case R_PPC64_GOT_TLSLD16_HA: 312 writeInstrFromHalf16(Loc, 0x60000000); // nop 313 break; 314 case R_PPC64_GOT_TLSLD16_LO: 315 writeInstrFromHalf16(Loc, 0x3c6d0000); // addis r3, r13, 0 316 break; 317 case R_PPC64_TLSLD: 318 write32(Loc, 0x60000000); // nop 319 write32(Loc + 4, 0x38631000); // addi r3, r3, 4096 320 break; 321 case R_PPC64_DTPREL16: 322 case R_PPC64_DTPREL16_HA: 323 case R_PPC64_DTPREL16_HI: 324 case R_PPC64_DTPREL16_DS: 325 case R_PPC64_DTPREL16_LO: 326 case R_PPC64_DTPREL16_LO_DS: 327 case R_PPC64_GOT_DTPREL16_HA: 328 case R_PPC64_GOT_DTPREL16_LO_DS: 329 case R_PPC64_GOT_DTPREL16_DS: 330 case R_PPC64_GOT_DTPREL16_HI: 331 relocateOne(Loc, Type, Val); 332 break; 333 default: 334 llvm_unreachable("unsupported relocation for TLS LD to LE relaxation"); 335 } 336 } 337 338 static unsigned getDFormOp(unsigned SecondaryOp) { 339 switch (SecondaryOp) { 340 case LBZX: 341 return LBZ; 342 case LHZX: 343 return LHZ; 344 case LWZX: 345 return LWZ; 346 case LDX: 347 return LD; 348 case STBX: 349 return STB; 350 case STHX: 351 return STH; 352 case STWX: 353 return STW; 354 case STDX: 355 return STD; 356 case ADD: 357 return ADDI; 358 default: 359 error("unrecognized instruction for IE to LE R_PPC64_TLS"); 360 return 0; 361 } 362 } 363 364 void PPC64::relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { 365 // The initial exec code sequence for a global `x` will look like: 366 // Instruction Relocation Symbol 367 // addis r9, r2, x@got@tprel@ha R_PPC64_GOT_TPREL16_HA x 368 // ld r9, x@got@tprel@l(r9) R_PPC64_GOT_TPREL16_LO_DS x 369 // add r9, r9, x@tls R_PPC64_TLS x 370 371 // Relaxing to local exec entails converting: 372 // addis r9, r2, x@got@tprel@ha into nop 373 // ld r9, x@got@tprel@l(r9) into addis r9, r13, x@tprel@ha 374 // add r9, r9, x@tls into addi r9, r9, x@tprel@l 375 376 // x@tls R_PPC64_TLS is a relocation which does not compute anything, 377 // it is replaced with r13 (thread pointer). 378 379 // The add instruction in the initial exec sequence has multiple variations 380 // that need to be handled. If we are building an address it will use an add 381 // instruction, if we are accessing memory it will use any of the X-form 382 // indexed load or store instructions. 383 384 unsigned Offset = (Config->EKind == ELF64BEKind) ? 2 : 0; 385 switch (Type) { 386 case R_PPC64_GOT_TPREL16_HA: 387 write32(Loc - Offset, 0x60000000); // nop 388 break; 389 case R_PPC64_GOT_TPREL16_LO_DS: 390 case R_PPC64_GOT_TPREL16_DS: { 391 uint32_t RegNo = read32(Loc - Offset) & 0x03E00000; // bits 6-10 392 write32(Loc - Offset, 0x3C0D0000 | RegNo); // addis RegNo, r13 393 relocateOne(Loc, R_PPC64_TPREL16_HA, Val); 394 break; 395 } 396 case R_PPC64_TLS: { 397 uint32_t PrimaryOp = getPrimaryOpCode(read32(Loc)); 398 if (PrimaryOp != 31) 399 error("unrecognized instruction for IE to LE R_PPC64_TLS"); 400 uint32_t SecondaryOp = (read32(Loc) & 0x000007FE) >> 1; // bits 21-30 401 uint32_t DFormOp = getDFormOp(SecondaryOp); 402 write32(Loc, ((DFormOp << 26) | (read32(Loc) & 0x03FFFFFF))); 403 relocateOne(Loc + Offset, R_PPC64_TPREL16_LO, Val); 404 break; 405 } 406 default: 407 llvm_unreachable("unknown relocation for IE to LE"); 408 break; 409 } 410 } 411 412 RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S, 413 const uint8_t *Loc) const { 414 switch (Type) { 415 case R_PPC64_TOC16: 416 case R_PPC64_TOC16_DS: 417 case R_PPC64_TOC16_HA: 418 case R_PPC64_TOC16_HI: 419 case R_PPC64_TOC16_LO: 420 case R_PPC64_TOC16_LO_DS: 421 return R_GOTREL; 422 case R_PPC64_TOC: 423 return R_PPC_TOC; 424 case R_PPC64_REL24: 425 return R_PPC_CALL_PLT; 426 case R_PPC64_REL16_LO: 427 case R_PPC64_REL16_HA: 428 case R_PPC64_REL32: 429 case R_PPC64_REL64: 430 return R_PC; 431 case R_PPC64_GOT_TLSGD16: 432 case R_PPC64_GOT_TLSGD16_HA: 433 case R_PPC64_GOT_TLSGD16_HI: 434 case R_PPC64_GOT_TLSGD16_LO: 435 return R_TLSGD_GOT; 436 case R_PPC64_GOT_TLSLD16: 437 case R_PPC64_GOT_TLSLD16_HA: 438 case R_PPC64_GOT_TLSLD16_HI: 439 case R_PPC64_GOT_TLSLD16_LO: 440 return R_TLSLD_GOT; 441 case R_PPC64_GOT_TPREL16_HA: 442 case R_PPC64_GOT_TPREL16_LO_DS: 443 case R_PPC64_GOT_TPREL16_DS: 444 case R_PPC64_GOT_TPREL16_HI: 445 return R_GOT_OFF; 446 case R_PPC64_GOT_DTPREL16_HA: 447 case R_PPC64_GOT_DTPREL16_LO_DS: 448 case R_PPC64_GOT_DTPREL16_DS: 449 case R_PPC64_GOT_DTPREL16_HI: 450 return R_TLSLD_GOT_OFF; 451 case R_PPC64_TPREL16: 452 case R_PPC64_TPREL16_HA: 453 case R_PPC64_TPREL16_LO: 454 case R_PPC64_TPREL16_HI: 455 case R_PPC64_TPREL16_DS: 456 case R_PPC64_TPREL16_LO_DS: 457 case R_PPC64_TPREL16_HIGHER: 458 case R_PPC64_TPREL16_HIGHERA: 459 case R_PPC64_TPREL16_HIGHEST: 460 case R_PPC64_TPREL16_HIGHESTA: 461 return R_TLS; 462 case R_PPC64_DTPREL16: 463 case R_PPC64_DTPREL16_DS: 464 case R_PPC64_DTPREL16_HA: 465 case R_PPC64_DTPREL16_HI: 466 case R_PPC64_DTPREL16_HIGHER: 467 case R_PPC64_DTPREL16_HIGHERA: 468 case R_PPC64_DTPREL16_HIGHEST: 469 case R_PPC64_DTPREL16_HIGHESTA: 470 case R_PPC64_DTPREL16_LO: 471 case R_PPC64_DTPREL16_LO_DS: 472 case R_PPC64_DTPREL64: 473 return R_ABS; 474 case R_PPC64_TLSGD: 475 return R_TLSDESC_CALL; 476 case R_PPC64_TLSLD: 477 return R_TLSLD_HINT; 478 case R_PPC64_TLS: 479 return R_TLSIE_HINT; 480 default: 481 return R_ABS; 482 } 483 } 484 485 void PPC64::writeGotHeader(uint8_t *Buf) const { 486 write64(Buf, getPPC64TocBase()); 487 } 488 489 void PPC64::writePltHeader(uint8_t *Buf) const { 490 // The generic resolver stub goes first. 491 write32(Buf + 0, 0x7c0802a6); // mflr r0 492 write32(Buf + 4, 0x429f0005); // bcl 20,4*cr7+so,8 <_glink+0x8> 493 write32(Buf + 8, 0x7d6802a6); // mflr r11 494 write32(Buf + 12, 0x7c0803a6); // mtlr r0 495 write32(Buf + 16, 0x7d8b6050); // subf r12, r11, r12 496 write32(Buf + 20, 0x380cffcc); // subi r0,r12,52 497 write32(Buf + 24, 0x7800f082); // srdi r0,r0,62,2 498 write32(Buf + 28, 0xe98b002c); // ld r12,44(r11) 499 write32(Buf + 32, 0x7d6c5a14); // add r11,r12,r11 500 write32(Buf + 36, 0xe98b0000); // ld r12,0(r11) 501 write32(Buf + 40, 0xe96b0008); // ld r11,8(r11) 502 write32(Buf + 44, 0x7d8903a6); // mtctr r12 503 write32(Buf + 48, 0x4e800420); // bctr 504 505 // The 'bcl' instruction will set the link register to the address of the 506 // following instruction ('mflr r11'). Here we store the offset from that 507 // instruction to the first entry in the GotPlt section. 508 int64_t GotPltOffset = In.GotPlt->getVA() - (In.Plt->getVA() + 8); 509 write64(Buf + 52, GotPltOffset); 510 } 511 512 void PPC64::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, 513 uint64_t PltEntryAddr, int32_t Index, 514 unsigned RelOff) const { 515 int32_t Offset = PltHeaderSize + Index * PltEntrySize; 516 // bl __glink_PLTresolve 517 write32(Buf, 0x48000000 | ((-Offset) & 0x03FFFFFc)); 518 } 519 520 static std::pair<RelType, uint64_t> toAddr16Rel(RelType Type, uint64_t Val) { 521 // Relocations relative to the toc-base need to be adjusted by the Toc offset. 522 uint64_t TocBiasedVal = Val - PPC64TocOffset; 523 // Relocations relative to dtv[dtpmod] need to be adjusted by the DTP offset. 524 uint64_t DTPBiasedVal = Val - DynamicThreadPointerOffset; 525 526 switch (Type) { 527 // TOC biased relocation. 528 case R_PPC64_GOT_TLSGD16: 529 case R_PPC64_GOT_TLSLD16: 530 case R_PPC64_TOC16: 531 return {R_PPC64_ADDR16, TocBiasedVal}; 532 case R_PPC64_TOC16_DS: 533 case R_PPC64_GOT_TPREL16_DS: 534 case R_PPC64_GOT_DTPREL16_DS: 535 return {R_PPC64_ADDR16_DS, TocBiasedVal}; 536 case R_PPC64_GOT_TLSGD16_HA: 537 case R_PPC64_GOT_TLSLD16_HA: 538 case R_PPC64_GOT_TPREL16_HA: 539 case R_PPC64_GOT_DTPREL16_HA: 540 case R_PPC64_TOC16_HA: 541 return {R_PPC64_ADDR16_HA, TocBiasedVal}; 542 case R_PPC64_GOT_TLSGD16_HI: 543 case R_PPC64_GOT_TLSLD16_HI: 544 case R_PPC64_GOT_TPREL16_HI: 545 case R_PPC64_GOT_DTPREL16_HI: 546 case R_PPC64_TOC16_HI: 547 return {R_PPC64_ADDR16_HI, TocBiasedVal}; 548 case R_PPC64_GOT_TLSGD16_LO: 549 case R_PPC64_GOT_TLSLD16_LO: 550 case R_PPC64_TOC16_LO: 551 return {R_PPC64_ADDR16_LO, TocBiasedVal}; 552 case R_PPC64_TOC16_LO_DS: 553 case R_PPC64_GOT_TPREL16_LO_DS: 554 case R_PPC64_GOT_DTPREL16_LO_DS: 555 return {R_PPC64_ADDR16_LO_DS, TocBiasedVal}; 556 557 // Dynamic Thread pointer biased relocation types. 558 case R_PPC64_DTPREL16: 559 return {R_PPC64_ADDR16, DTPBiasedVal}; 560 case R_PPC64_DTPREL16_DS: 561 return {R_PPC64_ADDR16_DS, DTPBiasedVal}; 562 case R_PPC64_DTPREL16_HA: 563 return {R_PPC64_ADDR16_HA, DTPBiasedVal}; 564 case R_PPC64_DTPREL16_HI: 565 return {R_PPC64_ADDR16_HI, DTPBiasedVal}; 566 case R_PPC64_DTPREL16_HIGHER: 567 return {R_PPC64_ADDR16_HIGHER, DTPBiasedVal}; 568 case R_PPC64_DTPREL16_HIGHERA: 569 return {R_PPC64_ADDR16_HIGHERA, DTPBiasedVal}; 570 case R_PPC64_DTPREL16_HIGHEST: 571 return {R_PPC64_ADDR16_HIGHEST, DTPBiasedVal}; 572 case R_PPC64_DTPREL16_HIGHESTA: 573 return {R_PPC64_ADDR16_HIGHESTA, DTPBiasedVal}; 574 case R_PPC64_DTPREL16_LO: 575 return {R_PPC64_ADDR16_LO, DTPBiasedVal}; 576 case R_PPC64_DTPREL16_LO_DS: 577 return {R_PPC64_ADDR16_LO_DS, DTPBiasedVal}; 578 case R_PPC64_DTPREL64: 579 return {R_PPC64_ADDR64, DTPBiasedVal}; 580 581 default: 582 return {Type, Val}; 583 } 584 } 585 586 static bool isTocRelType(RelType Type) { 587 return Type == R_PPC64_TOC16_HA || Type == R_PPC64_TOC16_LO_DS || 588 Type == R_PPC64_TOC16_LO; 589 } 590 591 void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const { 592 // For a TOC-relative relocation, proceed in terms of the corresponding 593 // ADDR16 relocation type. 594 bool IsTocRelType = isTocRelType(Type); 595 std::tie(Type, Val) = toAddr16Rel(Type, Val); 596 597 switch (Type) { 598 case R_PPC64_ADDR14: { 599 checkAlignment(Loc, Val, 4, Type); 600 // Preserve the AA/LK bits in the branch instruction 601 uint8_t AALK = Loc[3]; 602 write16(Loc + 2, (AALK & 3) | (Val & 0xfffc)); 603 break; 604 } 605 case R_PPC64_ADDR16: 606 case R_PPC64_TPREL16: 607 checkInt(Loc, Val, 16, Type); 608 write16(Loc, Val); 609 break; 610 case R_PPC64_ADDR16_DS: 611 case R_PPC64_TPREL16_DS: { 612 checkInt(Loc, Val, 16, Type); 613 // DQ-form instructions use bits 28-31 as part of the instruction encoding 614 // DS-form instructions only use bits 30-31. 615 uint16_t Mask = isDQFormInstruction(readInstrFromHalf16(Loc)) ? 0xF : 0x3; 616 checkAlignment(Loc, lo(Val), Mask + 1, Type); 617 write16(Loc, (read16(Loc) & Mask) | lo(Val)); 618 } break; 619 case R_PPC64_ADDR16_HA: 620 case R_PPC64_REL16_HA: 621 case R_PPC64_TPREL16_HA: 622 if (Config->TocOptimize && IsTocRelType && ha(Val) == 0) 623 writeInstrFromHalf16(Loc, 0x60000000); 624 else 625 write16(Loc, ha(Val)); 626 break; 627 case R_PPC64_ADDR16_HI: 628 case R_PPC64_REL16_HI: 629 case R_PPC64_TPREL16_HI: 630 write16(Loc, hi(Val)); 631 break; 632 case R_PPC64_ADDR16_HIGHER: 633 case R_PPC64_TPREL16_HIGHER: 634 write16(Loc, higher(Val)); 635 break; 636 case R_PPC64_ADDR16_HIGHERA: 637 case R_PPC64_TPREL16_HIGHERA: 638 write16(Loc, highera(Val)); 639 break; 640 case R_PPC64_ADDR16_HIGHEST: 641 case R_PPC64_TPREL16_HIGHEST: 642 write16(Loc, highest(Val)); 643 break; 644 case R_PPC64_ADDR16_HIGHESTA: 645 case R_PPC64_TPREL16_HIGHESTA: 646 write16(Loc, highesta(Val)); 647 break; 648 case R_PPC64_ADDR16_LO: 649 case R_PPC64_REL16_LO: 650 case R_PPC64_TPREL16_LO: 651 // When the high-adjusted part of a toc relocation evalutes to 0, it is 652 // changed into a nop. The lo part then needs to be updated to use the 653 // toc-pointer register r2, as the base register. 654 if (Config->TocOptimize && IsTocRelType && ha(Val) == 0) { 655 uint32_t Instr = readInstrFromHalf16(Loc); 656 if (isInstructionUpdateForm(Instr)) 657 error(getErrorLocation(Loc) + 658 "can't toc-optimize an update instruction: 0x" + 659 utohexstr(Instr)); 660 Instr = (Instr & 0xFFE00000) | 0x00020000; 661 writeInstrFromHalf16(Loc, Instr); 662 } 663 write16(Loc, lo(Val)); 664 break; 665 case R_PPC64_ADDR16_LO_DS: 666 case R_PPC64_TPREL16_LO_DS: { 667 // DQ-form instructions use bits 28-31 as part of the instruction encoding 668 // DS-form instructions only use bits 30-31. 669 uint32_t Inst = readInstrFromHalf16(Loc); 670 uint16_t Mask = isDQFormInstruction(Inst) ? 0xF : 0x3; 671 checkAlignment(Loc, lo(Val), Mask + 1, Type); 672 if (Config->TocOptimize && IsTocRelType && ha(Val) == 0) { 673 // When the high-adjusted part of a toc relocation evalutes to 0, it is 674 // changed into a nop. The lo part then needs to be updated to use the toc 675 // pointer register r2, as the base register. 676 if (isInstructionUpdateForm(Inst)) 677 error(getErrorLocation(Loc) + 678 "Can't toc-optimize an update instruction: 0x" + 679 Twine::utohexstr(Inst)); 680 Inst = (Inst & 0xFFE0000F) | 0x00020000; 681 writeInstrFromHalf16(Loc, Inst); 682 } 683 write16(Loc, (read16(Loc) & Mask) | lo(Val)); 684 } break; 685 case R_PPC64_ADDR32: 686 case R_PPC64_REL32: 687 checkInt(Loc, Val, 32, Type); 688 write32(Loc, Val); 689 break; 690 case R_PPC64_ADDR64: 691 case R_PPC64_REL64: 692 case R_PPC64_TOC: 693 write64(Loc, Val); 694 break; 695 case R_PPC64_REL24: { 696 uint32_t Mask = 0x03FFFFFC; 697 checkInt(Loc, Val, 26, Type); 698 checkAlignment(Loc, Val, 4, Type); 699 write32(Loc, (read32(Loc) & ~Mask) | (Val & Mask)); 700 break; 701 } 702 case R_PPC64_DTPREL64: 703 write64(Loc, Val - DynamicThreadPointerOffset); 704 break; 705 default: 706 error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); 707 } 708 } 709 710 bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File, 711 uint64_t BranchAddr, const Symbol &S) const { 712 // The only call relocation we currently support is the REL24 type. 713 if (Type != R_PPC64_REL24) 714 return false; 715 716 // If a function is in the Plt it needs to be called with a call-stub. 717 if (S.isInPlt()) 718 return true; 719 720 // If a symbol is a weak undefined and we are compiling an executable 721 // it doesn't need a range-extending thunk since it can't be called. 722 if (S.isUndefWeak() && !Config->Shared) 723 return false; 724 725 // If the offset exceeds the range of the branch type then it will need 726 // a range-extending thunk. 727 return !inBranchRange(Type, BranchAddr, S.getVA()); 728 } 729 730 bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const { 731 assert(Type == R_PPC64_REL24 && "Unexpected relocation type used in branch"); 732 int64_t Offset = Dst - Src; 733 return isInt<26>(Offset); 734 } 735 736 RelExpr PPC64::adjustRelaxExpr(RelType Type, const uint8_t *Data, 737 RelExpr Expr) const { 738 if (Expr == R_RELAX_TLS_GD_TO_IE) 739 return R_RELAX_TLS_GD_TO_IE_GOT_OFF; 740 if (Expr == R_RELAX_TLS_LD_TO_LE) 741 return R_RELAX_TLS_LD_TO_LE_ABS; 742 return Expr; 743 } 744 745 // Reference: 3.7.4.1 of the 64-bit ELF V2 abi supplement. 746 // The general dynamic code sequence for a global `x` uses 4 instructions. 747 // Instruction Relocation Symbol 748 // addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x 749 // addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x 750 // bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x 751 // R_PPC64_REL24 __tls_get_addr 752 // nop None None 753 // 754 // Relaxing to initial-exec entails: 755 // 1) Convert the addis/addi pair that builds the address of the tls_index 756 // struct for 'x' to an addis/ld pair that loads an offset from a got-entry. 757 // 2) Convert the call to __tls_get_addr to a nop. 758 // 3) Convert the nop following the call to an add of the loaded offset to the 759 // thread pointer. 760 // Since the nop must directly follow the call, the R_PPC64_TLSGD relocation is 761 // used as the relaxation hint for both steps 2 and 3. 762 void PPC64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const { 763 switch (Type) { 764 case R_PPC64_GOT_TLSGD16_HA: 765 // This is relaxed from addis rT, r2, sym@got@tlsgd@ha to 766 // addis rT, r2, sym@got@tprel@ha. 767 relocateOne(Loc, R_PPC64_GOT_TPREL16_HA, Val); 768 return; 769 case R_PPC64_GOT_TLSGD16_LO: { 770 // Relax from addi r3, rA, sym@got@tlsgd@l to 771 // ld r3, sym@got@tprel@l(rA) 772 uint32_t InputRegister = (readInstrFromHalf16(Loc) & (0x1f << 16)); 773 writeInstrFromHalf16(Loc, 0xE8600000 | InputRegister); 774 relocateOne(Loc, R_PPC64_GOT_TPREL16_LO_DS, Val); 775 return; 776 } 777 case R_PPC64_TLSGD: 778 write32(Loc, 0x60000000); // bl __tls_get_addr(sym@tlsgd) --> nop 779 write32(Loc + 4, 0x7c636A14); // nop --> add r3, r3, r13 780 return; 781 default: 782 llvm_unreachable("unsupported relocation for TLS GD to IE relaxation"); 783 } 784 } 785 786 // The prologue for a split-stack function is expected to look roughly 787 // like this: 788 // .Lglobal_entry_point: 789 // # TOC pointer initalization. 790 // ... 791 // .Llocal_entry_point: 792 // # load the __private_ss member of the threads tcbhead. 793 // ld r0,-0x7000-64(r13) 794 // # subtract the functions stack size from the stack pointer. 795 // addis r12, r1, ha(-stack-frame size) 796 // addi r12, r12, l(-stack-frame size) 797 // # compare needed to actual and branch to allocate_more_stack if more 798 // # space is needed, otherwise fallthrough to 'normal' function body. 799 // cmpld cr7,r12,r0 800 // blt- cr7, .Lallocate_more_stack 801 // 802 // -) The allocate_more_stack block might be placed after the split-stack 803 // prologue and the `blt-` replaced with a `bge+ .Lnormal_func_body` 804 // instead. 805 // -) If either the addis or addi is not needed due to the stack size being 806 // smaller then 32K or a multiple of 64K they will be replaced with a nop, 807 // but there will always be 2 instructions the linker can overwrite for the 808 // adjusted stack size. 809 // 810 // The linkers job here is to increase the stack size used in the addis/addi 811 // pair by split-stack-size-adjust. 812 // addis r12, r1, ha(-stack-frame size - split-stack-adjust-size) 813 // addi r12, r12, l(-stack-frame size - split-stack-adjust-size) 814 bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End, 815 uint8_t StOther) const { 816 // If the caller has a global entry point adjust the buffer past it. The start 817 // of the split-stack prologue will be at the local entry point. 818 Loc += getPPC64GlobalEntryToLocalEntryOffset(StOther); 819 820 // At the very least we expect to see a load of some split-stack data from the 821 // tcb, and 2 instructions that calculate the ending stack address this 822 // function will require. If there is not enough room for at least 3 823 // instructions it can't be a split-stack prologue. 824 if (Loc + 12 >= End) 825 return false; 826 827 // First instruction must be `ld r0, -0x7000-64(r13)` 828 if (read32(Loc) != 0xe80d8fc0) 829 return false; 830 831 int16_t HiImm = 0; 832 int16_t LoImm = 0; 833 // First instruction can be either an addis if the frame size is larger then 834 // 32K, or an addi if the size is less then 32K. 835 int32_t FirstInstr = read32(Loc + 4); 836 if (getPrimaryOpCode(FirstInstr) == 15) { 837 HiImm = FirstInstr & 0xFFFF; 838 } else if (getPrimaryOpCode(FirstInstr) == 14) { 839 LoImm = FirstInstr & 0xFFFF; 840 } else { 841 return false; 842 } 843 844 // Second instruction is either an addi or a nop. If the first instruction was 845 // an addi then LoImm is set and the second instruction must be a nop. 846 uint32_t SecondInstr = read32(Loc + 8); 847 if (!LoImm && getPrimaryOpCode(SecondInstr) == 14) { 848 LoImm = SecondInstr & 0xFFFF; 849 } else if (SecondInstr != 0x60000000) { 850 return false; 851 } 852 853 // The register operands of the first instruction should be the stack-pointer 854 // (r1) as the input (RA) and r12 as the output (RT). If the second 855 // instruction is not a nop, then it should use r12 as both input and output. 856 auto CheckRegOperands = [](uint32_t Instr, uint8_t ExpectedRT, 857 uint8_t ExpectedRA) { 858 return ((Instr & 0x3E00000) >> 21 == ExpectedRT) && 859 ((Instr & 0x1F0000) >> 16 == ExpectedRA); 860 }; 861 if (!CheckRegOperands(FirstInstr, 12, 1)) 862 return false; 863 if (SecondInstr != 0x60000000 && !CheckRegOperands(SecondInstr, 12, 12)) 864 return false; 865 866 int32_t StackFrameSize = (HiImm * 65536) + LoImm; 867 // Check that the adjusted size doesn't overflow what we can represent with 2 868 // instructions. 869 if (StackFrameSize < Config->SplitStackAdjustSize + INT32_MIN) { 870 error(getErrorLocation(Loc) + "split-stack prologue adjustment overflows"); 871 return false; 872 } 873 874 int32_t AdjustedStackFrameSize = 875 StackFrameSize - Config->SplitStackAdjustSize; 876 877 LoImm = AdjustedStackFrameSize & 0xFFFF; 878 HiImm = (AdjustedStackFrameSize + 0x8000) >> 16; 879 if (HiImm) { 880 write32(Loc + 4, 0x3D810000 | (uint16_t)HiImm); 881 // If the low immediate is zero the second instruction will be a nop. 882 SecondInstr = LoImm ? 0x398C0000 | (uint16_t)LoImm : 0x60000000; 883 write32(Loc + 8, SecondInstr); 884 } else { 885 // addi r12, r1, imm 886 write32(Loc + 4, (0x39810000) | (uint16_t)LoImm); 887 write32(Loc + 8, 0x60000000); 888 } 889 890 return true; 891 } 892 893 TargetInfo *elf::getPPC64TargetInfo() { 894 static PPC64 Target; 895 return &Target; 896 } 897