1 //===- InputChunks.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputChunks.h" 10 #include "Config.h" 11 #include "OutputSegment.h" 12 #include "WriterUtils.h" 13 #include "lld/Common/ErrorHandler.h" 14 #include "lld/Common/LLVM.h" 15 #include "llvm/Support/LEB128.h" 16 17 #define DEBUG_TYPE "lld" 18 19 using namespace llvm; 20 using namespace llvm::wasm; 21 using namespace llvm::support::endian; 22 23 namespace lld { 24 StringRef relocTypeToString(uint8_t relocType) { 25 switch (relocType) { 26 #define WASM_RELOC(NAME, REL) \ 27 case REL: \ 28 return #NAME; 29 #include "llvm/BinaryFormat/WasmRelocs.def" 30 #undef WASM_RELOC 31 } 32 llvm_unreachable("unknown reloc type"); 33 } 34 35 std::string toString(const wasm::InputChunk *c) { 36 return (toString(c->file) + ":(" + c->getName() + ")").str(); 37 } 38 39 namespace wasm { 40 StringRef InputChunk::getComdatName() const { 41 uint32_t index = getComdat(); 42 if (index == UINT32_MAX) 43 return StringRef(); 44 return file->getWasmObj()->linkingData().Comdats[index]; 45 } 46 47 void InputChunk::verifyRelocTargets() const { 48 for (const WasmRelocation &rel : relocations) { 49 uint64_t existingValue; 50 unsigned bytesRead = 0; 51 auto offset = rel.Offset - getInputSectionOffset(); 52 const uint8_t *loc = data().data() + offset; 53 switch (rel.Type) { 54 case R_WASM_TYPE_INDEX_LEB: 55 case R_WASM_FUNCTION_INDEX_LEB: 56 case R_WASM_GLOBAL_INDEX_LEB: 57 case R_WASM_EVENT_INDEX_LEB: 58 case R_WASM_MEMORY_ADDR_LEB: 59 case R_WASM_MEMORY_ADDR_LEB64: 60 existingValue = decodeULEB128(loc, &bytesRead); 61 break; 62 case R_WASM_TABLE_INDEX_SLEB: 63 case R_WASM_TABLE_INDEX_REL_SLEB: 64 case R_WASM_MEMORY_ADDR_SLEB: 65 case R_WASM_MEMORY_ADDR_SLEB64: 66 case R_WASM_MEMORY_ADDR_REL_SLEB: 67 case R_WASM_MEMORY_ADDR_REL_SLEB64: 68 existingValue = static_cast<uint64_t>(decodeSLEB128(loc, &bytesRead)); 69 break; 70 case R_WASM_TABLE_INDEX_I32: 71 case R_WASM_MEMORY_ADDR_I32: 72 case R_WASM_FUNCTION_OFFSET_I32: 73 case R_WASM_SECTION_OFFSET_I32: 74 case R_WASM_GLOBAL_INDEX_I32: 75 existingValue = read32le(loc); 76 break; 77 case R_WASM_MEMORY_ADDR_I64: 78 existingValue = read64le(loc); 79 break; 80 default: 81 llvm_unreachable("unknown relocation type"); 82 } 83 84 if (bytesRead && bytesRead != 5) 85 warn("expected LEB at relocation site be 5-byte padded"); 86 87 if (rel.Type != R_WASM_GLOBAL_INDEX_LEB && 88 rel.Type != R_WASM_GLOBAL_INDEX_I32) { 89 auto expectedValue = file->calcExpectedValue(rel); 90 if (expectedValue != existingValue) 91 warn("unexpected existing value for " + relocTypeToString(rel.Type) + 92 ": existing=" + Twine(existingValue) + 93 " expected=" + Twine(expectedValue)); 94 } 95 } 96 } 97 98 // Copy this input chunk to an mmap'ed output file and apply relocations. 99 void InputChunk::writeTo(uint8_t *buf) const { 100 // Copy contents 101 memcpy(buf + outputOffset, data().data(), data().size()); 102 103 // Apply relocations 104 if (relocations.empty()) 105 return; 106 107 #ifndef NDEBUG 108 verifyRelocTargets(); 109 #endif 110 111 LLVM_DEBUG(dbgs() << "applying relocations: " << toString(this) 112 << " count=" << relocations.size() << "\n"); 113 int32_t off = outputOffset - getInputSectionOffset(); 114 115 for (const WasmRelocation &rel : relocations) { 116 uint8_t *loc = buf + rel.Offset + off; 117 auto value = file->calcNewValue(rel); 118 LLVM_DEBUG(dbgs() << "apply reloc: type=" << relocTypeToString(rel.Type)); 119 if (rel.Type != R_WASM_TYPE_INDEX_LEB) 120 LLVM_DEBUG(dbgs() << " sym=" << file->getSymbols()[rel.Index]->getName()); 121 LLVM_DEBUG(dbgs() << " addend=" << rel.Addend << " index=" << rel.Index 122 << " value=" << value << " offset=" << rel.Offset 123 << "\n"); 124 125 switch (rel.Type) { 126 case R_WASM_TYPE_INDEX_LEB: 127 case R_WASM_FUNCTION_INDEX_LEB: 128 case R_WASM_GLOBAL_INDEX_LEB: 129 case R_WASM_EVENT_INDEX_LEB: 130 case R_WASM_MEMORY_ADDR_LEB: 131 encodeULEB128(value, loc, 5); 132 break; 133 case R_WASM_MEMORY_ADDR_LEB64: 134 encodeULEB128(value, loc, 10); 135 break; 136 case R_WASM_TABLE_INDEX_SLEB: 137 case R_WASM_TABLE_INDEX_REL_SLEB: 138 case R_WASM_MEMORY_ADDR_SLEB: 139 case R_WASM_MEMORY_ADDR_REL_SLEB: 140 encodeSLEB128(static_cast<int32_t>(value), loc, 5); 141 break; 142 case R_WASM_MEMORY_ADDR_SLEB64: 143 case R_WASM_MEMORY_ADDR_REL_SLEB64: 144 encodeSLEB128(static_cast<int64_t>(value), loc, 10); 145 break; 146 case R_WASM_TABLE_INDEX_I32: 147 case R_WASM_MEMORY_ADDR_I32: 148 case R_WASM_FUNCTION_OFFSET_I32: 149 case R_WASM_SECTION_OFFSET_I32: 150 case R_WASM_GLOBAL_INDEX_I32: 151 write32le(loc, value); 152 break; 153 case R_WASM_MEMORY_ADDR_I64: 154 write64le(loc, value); 155 break; 156 default: 157 llvm_unreachable("unknown relocation type"); 158 } 159 } 160 } 161 162 // Copy relocation entries to a given output stream. 163 // This function is used only when a user passes "-r". For a regular link, 164 // we consume relocations instead of copying them to an output file. 165 void InputChunk::writeRelocations(raw_ostream &os) const { 166 if (relocations.empty()) 167 return; 168 169 int32_t off = outputOffset - getInputSectionOffset(); 170 LLVM_DEBUG(dbgs() << "writeRelocations: " << file->getName() 171 << " offset=" << Twine(off) << "\n"); 172 173 for (const WasmRelocation &rel : relocations) { 174 writeUleb128(os, rel.Type, "reloc type"); 175 writeUleb128(os, rel.Offset + off, "reloc offset"); 176 writeUleb128(os, file->calcNewIndex(rel), "reloc index"); 177 178 if (relocTypeHasAddend(rel.Type)) 179 writeSleb128(os, file->calcNewAddend(rel), "reloc addend"); 180 } 181 } 182 183 void InputFunction::setFunctionIndex(uint32_t index) { 184 LLVM_DEBUG(dbgs() << "InputFunction::setFunctionIndex: " << getName() 185 << " -> " << index << "\n"); 186 assert(!hasFunctionIndex()); 187 functionIndex = index; 188 } 189 190 void InputFunction::setTableIndex(uint32_t index) { 191 LLVM_DEBUG(dbgs() << "InputFunction::setTableIndex: " << getName() << " -> " 192 << index << "\n"); 193 assert(!hasTableIndex()); 194 tableIndex = index; 195 } 196 197 // Write a relocation value without padding and return the number of bytes 198 // witten. 199 static unsigned writeCompressedReloc(uint8_t *buf, const WasmRelocation &rel, 200 uint64_t value) { 201 switch (rel.Type) { 202 case R_WASM_TYPE_INDEX_LEB: 203 case R_WASM_FUNCTION_INDEX_LEB: 204 case R_WASM_GLOBAL_INDEX_LEB: 205 case R_WASM_EVENT_INDEX_LEB: 206 case R_WASM_MEMORY_ADDR_LEB: 207 case R_WASM_MEMORY_ADDR_LEB64: 208 return encodeULEB128(value, buf); 209 case R_WASM_TABLE_INDEX_SLEB: 210 case R_WASM_MEMORY_ADDR_SLEB: 211 case R_WASM_MEMORY_ADDR_SLEB64: 212 return encodeSLEB128(static_cast<int64_t>(value), buf); 213 default: 214 llvm_unreachable("unexpected relocation type"); 215 } 216 } 217 218 static unsigned getRelocWidthPadded(const WasmRelocation &rel) { 219 switch (rel.Type) { 220 case R_WASM_TYPE_INDEX_LEB: 221 case R_WASM_FUNCTION_INDEX_LEB: 222 case R_WASM_GLOBAL_INDEX_LEB: 223 case R_WASM_EVENT_INDEX_LEB: 224 case R_WASM_MEMORY_ADDR_LEB: 225 case R_WASM_TABLE_INDEX_SLEB: 226 case R_WASM_MEMORY_ADDR_SLEB: 227 return 5; 228 case R_WASM_MEMORY_ADDR_LEB64: 229 case R_WASM_MEMORY_ADDR_SLEB64: 230 return 10; 231 default: 232 llvm_unreachable("unexpected relocation type"); 233 } 234 } 235 236 static unsigned getRelocWidth(const WasmRelocation &rel, uint64_t value) { 237 uint8_t buf[10]; 238 return writeCompressedReloc(buf, rel, value); 239 } 240 241 // Relocations of type LEB and SLEB in the code section are padded to 5 bytes 242 // so that a fast linker can blindly overwrite them without needing to worry 243 // about the number of bytes needed to encode the values. 244 // However, for optimal output the code section can be compressed to remove 245 // the padding then outputting non-relocatable files. 246 // In this case we need to perform a size calculation based on the value at each 247 // relocation. At best we end up saving 4 bytes for each relocation entry. 248 // 249 // This function only computes the final output size. It must be called 250 // before getSize() is used to calculate of layout of the code section. 251 void InputFunction::calculateSize() { 252 if (!file || !config->compressRelocations) 253 return; 254 255 LLVM_DEBUG(dbgs() << "calculateSize: " << getName() << "\n"); 256 257 const uint8_t *secStart = file->codeSection->Content.data(); 258 const uint8_t *funcStart = secStart + getInputSectionOffset(); 259 uint32_t functionSizeLength; 260 decodeULEB128(funcStart, &functionSizeLength); 261 262 uint32_t start = getInputSectionOffset(); 263 uint32_t end = start + function->Size; 264 265 uint32_t lastRelocEnd = start + functionSizeLength; 266 for (const WasmRelocation &rel : relocations) { 267 LLVM_DEBUG(dbgs() << " region: " << (rel.Offset - lastRelocEnd) << "\n"); 268 compressedFuncSize += rel.Offset - lastRelocEnd; 269 compressedFuncSize += getRelocWidth(rel, file->calcNewValue(rel)); 270 lastRelocEnd = rel.Offset + getRelocWidthPadded(rel); 271 } 272 LLVM_DEBUG(dbgs() << " final region: " << (end - lastRelocEnd) << "\n"); 273 compressedFuncSize += end - lastRelocEnd; 274 275 // Now we know how long the resulting function is we can add the encoding 276 // of its length 277 uint8_t buf[5]; 278 compressedSize = compressedFuncSize + encodeULEB128(compressedFuncSize, buf); 279 280 LLVM_DEBUG(dbgs() << " calculateSize orig: " << function->Size << "\n"); 281 LLVM_DEBUG(dbgs() << " calculateSize new: " << compressedSize << "\n"); 282 } 283 284 // Override the default writeTo method so that we can (optionally) write the 285 // compressed version of the function. 286 void InputFunction::writeTo(uint8_t *buf) const { 287 if (!file || !config->compressRelocations) 288 return InputChunk::writeTo(buf); 289 290 buf += outputOffset; 291 uint8_t *orig = buf; 292 (void)orig; 293 294 const uint8_t *secStart = file->codeSection->Content.data(); 295 const uint8_t *funcStart = secStart + getInputSectionOffset(); 296 const uint8_t *end = funcStart + function->Size; 297 uint32_t count; 298 decodeULEB128(funcStart, &count); 299 funcStart += count; 300 301 LLVM_DEBUG(dbgs() << "write func: " << getName() << "\n"); 302 buf += encodeULEB128(compressedFuncSize, buf); 303 const uint8_t *lastRelocEnd = funcStart; 304 for (const WasmRelocation &rel : relocations) { 305 unsigned chunkSize = (secStart + rel.Offset) - lastRelocEnd; 306 LLVM_DEBUG(dbgs() << " write chunk: " << chunkSize << "\n"); 307 memcpy(buf, lastRelocEnd, chunkSize); 308 buf += chunkSize; 309 buf += writeCompressedReloc(buf, rel, file->calcNewValue(rel)); 310 lastRelocEnd = secStart + rel.Offset + getRelocWidthPadded(rel); 311 } 312 313 unsigned chunkSize = end - lastRelocEnd; 314 LLVM_DEBUG(dbgs() << " write final chunk: " << chunkSize << "\n"); 315 memcpy(buf, lastRelocEnd, chunkSize); 316 LLVM_DEBUG(dbgs() << " total: " << (buf + chunkSize - orig) << "\n"); 317 } 318 319 // Generate code to apply relocations to the data section at runtime. 320 // This is only called when generating shared libaries (PIC) where address are 321 // not known at static link time. 322 void InputSegment::generateRelocationCode(raw_ostream &os) const { 323 LLVM_DEBUG(dbgs() << "generating runtime relocations: " << getName() 324 << " count=" << relocations.size() << "\n"); 325 326 // TODO(sbc): Encode the relocations in the data section and write a loop 327 // here to apply them. 328 uint32_t segmentVA = outputSeg->startVA + outputSegmentOffset; 329 for (const WasmRelocation &rel : relocations) { 330 uint32_t offset = rel.Offset - getInputSectionOffset(); 331 uint32_t outputOffset = segmentVA + offset; 332 333 LLVM_DEBUG(dbgs() << "gen reloc: type=" << relocTypeToString(rel.Type) 334 << " addend=" << rel.Addend << " index=" << rel.Index 335 << " output offset=" << outputOffset << "\n"); 336 337 // Get __memory_base 338 writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); 339 writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base"); 340 341 // Add the offset of the relocation 342 writeU8(os, WASM_OPCODE_I32_CONST, "I32_CONST"); 343 writeSleb128(os, outputOffset, "offset"); 344 writeU8(os, WASM_OPCODE_I32_ADD, "ADD"); 345 346 Symbol *sym = file->getSymbol(rel); 347 // Now figure out what we want to store 348 if (sym->hasGOTIndex()) { 349 writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); 350 writeUleb128(os, sym->getGOTIndex(), "global index"); 351 if (rel.Addend) { 352 writeU8(os, WASM_OPCODE_I32_CONST, "CONST"); 353 writeSleb128(os, rel.Addend, "addend"); 354 writeU8(os, WASM_OPCODE_I32_ADD, "ADD"); 355 } 356 } else { 357 const GlobalSymbol* baseSymbol = WasmSym::memoryBase; 358 if (rel.Type == R_WASM_TABLE_INDEX_I32) 359 baseSymbol = WasmSym::tableBase; 360 writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); 361 writeUleb128(os, baseSymbol->getGlobalIndex(), "base"); 362 writeU8(os, WASM_OPCODE_I32_CONST, "CONST"); 363 writeSleb128(os, file->calcNewValue(rel), "offset"); 364 writeU8(os, WASM_OPCODE_I32_ADD, "ADD"); 365 } 366 367 // Store that value at the virtual address 368 writeU8(os, WASM_OPCODE_I32_STORE, "I32_STORE"); 369 writeUleb128(os, 2, "align"); 370 writeUleb128(os, 0, "offset"); 371 } 372 } 373 374 } // namespace wasm 375 } // namespace lld 376