1 //===- InputChunks.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputChunks.h"
10 #include "Config.h"
11 #include "OutputSegment.h"
12 #include "WriterUtils.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "lld/Common/LLVM.h"
15 #include "llvm/Support/LEB128.h"
16 
17 #define DEBUG_TYPE "lld"
18 
19 using namespace llvm;
20 using namespace llvm::wasm;
21 using namespace llvm::support::endian;
22 
23 namespace lld {
24 StringRef relocTypeToString(uint8_t relocType) {
25   switch (relocType) {
26 #define WASM_RELOC(NAME, REL)                                                  \
27   case REL:                                                                    \
28     return #NAME;
29 #include "llvm/BinaryFormat/WasmRelocs.def"
30 #undef WASM_RELOC
31   }
32   llvm_unreachable("unknown reloc type");
33 }
34 
35 std::string toString(const wasm::InputChunk *c) {
36   return (toString(c->file) + ":(" + c->getName() + ")").str();
37 }
38 
39 namespace wasm {
40 StringRef InputChunk::getComdatName() const {
41   uint32_t index = getComdat();
42   if (index == UINT32_MAX)
43     return StringRef();
44   return file->getWasmObj()->linkingData().Comdats[index];
45 }
46 
47 void InputChunk::verifyRelocTargets() const {
48   for (const WasmRelocation &rel : relocations) {
49     uint32_t existingValue;
50     unsigned bytesRead = 0;
51     uint32_t offset = rel.Offset - getInputSectionOffset();
52     const uint8_t *loc = data().data() + offset;
53     switch (rel.Type) {
54     case R_WASM_TYPE_INDEX_LEB:
55     case R_WASM_FUNCTION_INDEX_LEB:
56     case R_WASM_GLOBAL_INDEX_LEB:
57     case R_WASM_EVENT_INDEX_LEB:
58     case R_WASM_MEMORY_ADDR_LEB:
59       existingValue = decodeULEB128(loc, &bytesRead);
60       break;
61     case R_WASM_TABLE_INDEX_SLEB:
62     case R_WASM_TABLE_INDEX_REL_SLEB:
63     case R_WASM_MEMORY_ADDR_SLEB:
64     case R_WASM_MEMORY_ADDR_REL_SLEB:
65       existingValue = static_cast<uint32_t>(decodeSLEB128(loc, &bytesRead));
66       break;
67     case R_WASM_TABLE_INDEX_I32:
68     case R_WASM_MEMORY_ADDR_I32:
69     case R_WASM_FUNCTION_OFFSET_I32:
70     case R_WASM_SECTION_OFFSET_I32:
71     case R_WASM_GLOBAL_INDEX_I32:
72       existingValue = static_cast<uint32_t>(read32le(loc));
73       break;
74     default:
75       llvm_unreachable("unknown relocation type");
76     }
77 
78     if (bytesRead && bytesRead != 5)
79       warn("expected LEB at relocation site be 5-byte padded");
80 
81     if (rel.Type != R_WASM_GLOBAL_INDEX_LEB &&
82         rel.Type != R_WASM_GLOBAL_INDEX_I32) {
83       uint32_t expectedValue = file->calcExpectedValue(rel);
84       if (expectedValue != existingValue)
85         warn("unexpected existing value for " + relocTypeToString(rel.Type) +
86              ": existing=" + Twine(existingValue) +
87              " expected=" + Twine(expectedValue));
88     }
89   }
90 }
91 
92 // Copy this input chunk to an mmap'ed output file and apply relocations.
93 void InputChunk::writeTo(uint8_t *buf) const {
94   // Copy contents
95   memcpy(buf + outputOffset, data().data(), data().size());
96 
97   // Apply relocations
98   if (relocations.empty())
99     return;
100 
101 #ifndef NDEBUG
102   verifyRelocTargets();
103 #endif
104 
105   LLVM_DEBUG(dbgs() << "applying relocations: " << toString(this)
106                     << " count=" << relocations.size() << "\n");
107   int32_t off = outputOffset - getInputSectionOffset();
108 
109   for (const WasmRelocation &rel : relocations) {
110     uint8_t *loc = buf + rel.Offset + off;
111     uint32_t value = file->calcNewValue(rel);
112     LLVM_DEBUG(dbgs() << "apply reloc: type=" << relocTypeToString(rel.Type));
113     if (rel.Type != R_WASM_TYPE_INDEX_LEB)
114       LLVM_DEBUG(dbgs() << " sym=" << file->getSymbols()[rel.Index]->getName());
115     LLVM_DEBUG(dbgs() << " addend=" << rel.Addend << " index=" << rel.Index
116                       << " value=" << value << " offset=" << rel.Offset
117                       << "\n");
118 
119     switch (rel.Type) {
120     case R_WASM_TYPE_INDEX_LEB:
121     case R_WASM_FUNCTION_INDEX_LEB:
122     case R_WASM_GLOBAL_INDEX_LEB:
123     case R_WASM_EVENT_INDEX_LEB:
124     case R_WASM_MEMORY_ADDR_LEB:
125       encodeULEB128(value, loc, 5);
126       break;
127     case R_WASM_TABLE_INDEX_SLEB:
128     case R_WASM_TABLE_INDEX_REL_SLEB:
129     case R_WASM_MEMORY_ADDR_SLEB:
130     case R_WASM_MEMORY_ADDR_REL_SLEB:
131       encodeSLEB128(static_cast<int32_t>(value), loc, 5);
132       break;
133     case R_WASM_TABLE_INDEX_I32:
134     case R_WASM_MEMORY_ADDR_I32:
135     case R_WASM_FUNCTION_OFFSET_I32:
136     case R_WASM_SECTION_OFFSET_I32:
137     case R_WASM_GLOBAL_INDEX_I32:
138       write32le(loc, value);
139       break;
140     default:
141       llvm_unreachable("unknown relocation type");
142     }
143   }
144 }
145 
146 // Copy relocation entries to a given output stream.
147 // This function is used only when a user passes "-r". For a regular link,
148 // we consume relocations instead of copying them to an output file.
149 void InputChunk::writeRelocations(raw_ostream &os) const {
150   if (relocations.empty())
151     return;
152 
153   int32_t off = outputOffset - getInputSectionOffset();
154   LLVM_DEBUG(dbgs() << "writeRelocations: " << file->getName()
155                     << " offset=" << Twine(off) << "\n");
156 
157   for (const WasmRelocation &rel : relocations) {
158     writeUleb128(os, rel.Type, "reloc type");
159     writeUleb128(os, rel.Offset + off, "reloc offset");
160     writeUleb128(os, file->calcNewIndex(rel), "reloc index");
161 
162     if (relocTypeHasAddend(rel.Type))
163       writeSleb128(os, file->calcNewAddend(rel), "reloc addend");
164   }
165 }
166 
167 void InputFunction::setFunctionIndex(uint32_t index) {
168   LLVM_DEBUG(dbgs() << "InputFunction::setFunctionIndex: " << getName()
169                     << " -> " << index << "\n");
170   assert(!hasFunctionIndex());
171   functionIndex = index;
172 }
173 
174 void InputFunction::setTableIndex(uint32_t index) {
175   LLVM_DEBUG(dbgs() << "InputFunction::setTableIndex: " << getName() << " -> "
176                     << index << "\n");
177   assert(!hasTableIndex());
178   tableIndex = index;
179 }
180 
181 // Write a relocation value without padding and return the number of bytes
182 // witten.
183 static unsigned writeCompressedReloc(uint8_t *buf, const WasmRelocation &rel,
184                                      uint32_t value) {
185   switch (rel.Type) {
186   case R_WASM_TYPE_INDEX_LEB:
187   case R_WASM_FUNCTION_INDEX_LEB:
188   case R_WASM_GLOBAL_INDEX_LEB:
189   case R_WASM_EVENT_INDEX_LEB:
190   case R_WASM_MEMORY_ADDR_LEB:
191     return encodeULEB128(value, buf);
192   case R_WASM_TABLE_INDEX_SLEB:
193   case R_WASM_MEMORY_ADDR_SLEB:
194     return encodeSLEB128(static_cast<int32_t>(value), buf);
195   default:
196     llvm_unreachable("unexpected relocation type");
197   }
198 }
199 
200 static unsigned getRelocWidthPadded(const WasmRelocation &rel) {
201   switch (rel.Type) {
202   case R_WASM_TYPE_INDEX_LEB:
203   case R_WASM_FUNCTION_INDEX_LEB:
204   case R_WASM_GLOBAL_INDEX_LEB:
205   case R_WASM_EVENT_INDEX_LEB:
206   case R_WASM_MEMORY_ADDR_LEB:
207   case R_WASM_TABLE_INDEX_SLEB:
208   case R_WASM_MEMORY_ADDR_SLEB:
209     return 5;
210   default:
211     llvm_unreachable("unexpected relocation type");
212   }
213 }
214 
215 static unsigned getRelocWidth(const WasmRelocation &rel, uint32_t value) {
216   uint8_t buf[5];
217   return writeCompressedReloc(buf, rel, value);
218 }
219 
220 // Relocations of type LEB and SLEB in the code section are padded to 5 bytes
221 // so that a fast linker can blindly overwrite them without needing to worry
222 // about the number of bytes needed to encode the values.
223 // However, for optimal output the code section can be compressed to remove
224 // the padding then outputting non-relocatable files.
225 // In this case we need to perform a size calculation based on the value at each
226 // relocation.  At best we end up saving 4 bytes for each relocation entry.
227 //
228 // This function only computes the final output size.  It must be called
229 // before getSize() is used to calculate of layout of the code section.
230 void InputFunction::calculateSize() {
231   if (!file || !config->compressRelocations)
232     return;
233 
234   LLVM_DEBUG(dbgs() << "calculateSize: " << getName() << "\n");
235 
236   const uint8_t *secStart = file->codeSection->Content.data();
237   const uint8_t *funcStart = secStart + getInputSectionOffset();
238   uint32_t functionSizeLength;
239   decodeULEB128(funcStart, &functionSizeLength);
240 
241   uint32_t start = getInputSectionOffset();
242   uint32_t end = start + function->Size;
243 
244   uint32_t lastRelocEnd = start + functionSizeLength;
245   for (const WasmRelocation &rel : relocations) {
246     LLVM_DEBUG(dbgs() << "  region: " << (rel.Offset - lastRelocEnd) << "\n");
247     compressedFuncSize += rel.Offset - lastRelocEnd;
248     compressedFuncSize += getRelocWidth(rel, file->calcNewValue(rel));
249     lastRelocEnd = rel.Offset + getRelocWidthPadded(rel);
250   }
251   LLVM_DEBUG(dbgs() << "  final region: " << (end - lastRelocEnd) << "\n");
252   compressedFuncSize += end - lastRelocEnd;
253 
254   // Now we know how long the resulting function is we can add the encoding
255   // of its length
256   uint8_t buf[5];
257   compressedSize = compressedFuncSize + encodeULEB128(compressedFuncSize, buf);
258 
259   LLVM_DEBUG(dbgs() << "  calculateSize orig: " << function->Size << "\n");
260   LLVM_DEBUG(dbgs() << "  calculateSize  new: " << compressedSize << "\n");
261 }
262 
263 // Override the default writeTo method so that we can (optionally) write the
264 // compressed version of the function.
265 void InputFunction::writeTo(uint8_t *buf) const {
266   if (!file || !config->compressRelocations)
267     return InputChunk::writeTo(buf);
268 
269   buf += outputOffset;
270   uint8_t *orig = buf;
271   (void)orig;
272 
273   const uint8_t *secStart = file->codeSection->Content.data();
274   const uint8_t *funcStart = secStart + getInputSectionOffset();
275   const uint8_t *end = funcStart + function->Size;
276   uint32_t count;
277   decodeULEB128(funcStart, &count);
278   funcStart += count;
279 
280   LLVM_DEBUG(dbgs() << "write func: " << getName() << "\n");
281   buf += encodeULEB128(compressedFuncSize, buf);
282   const uint8_t *lastRelocEnd = funcStart;
283   for (const WasmRelocation &rel : relocations) {
284     unsigned chunkSize = (secStart + rel.Offset) - lastRelocEnd;
285     LLVM_DEBUG(dbgs() << "  write chunk: " << chunkSize << "\n");
286     memcpy(buf, lastRelocEnd, chunkSize);
287     buf += chunkSize;
288     buf += writeCompressedReloc(buf, rel, file->calcNewValue(rel));
289     lastRelocEnd = secStart + rel.Offset + getRelocWidthPadded(rel);
290   }
291 
292   unsigned chunkSize = end - lastRelocEnd;
293   LLVM_DEBUG(dbgs() << "  write final chunk: " << chunkSize << "\n");
294   memcpy(buf, lastRelocEnd, chunkSize);
295   LLVM_DEBUG(dbgs() << "  total: " << (buf + chunkSize - orig) << "\n");
296 }
297 
298 // Generate code to apply relocations to the data section at runtime.
299 // This is only called when generating shared libaries (PIC) where address are
300 // not known at static link time.
301 void InputSegment::generateRelocationCode(raw_ostream &os) const {
302   LLVM_DEBUG(dbgs() << "generating runtime relocations: " << getName()
303                     << " count=" << relocations.size() << "\n");
304 
305   // TODO(sbc): Encode the relocations in the data section and write a loop
306   // here to apply them.
307   uint32_t segmentVA = outputSeg->startVA + outputSegmentOffset;
308   for (const WasmRelocation &rel : relocations) {
309     uint32_t offset = rel.Offset - getInputSectionOffset();
310     uint32_t outputOffset = segmentVA + offset;
311 
312     LLVM_DEBUG(dbgs() << "gen reloc: type=" << relocTypeToString(rel.Type)
313                       << " addend=" << rel.Addend << " index=" << rel.Index
314                       << " output offset=" << outputOffset << "\n");
315 
316     // Get __memory_base
317     writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
318     writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base");
319 
320     // Add the offset of the relocation
321     writeU8(os, WASM_OPCODE_I32_CONST, "I32_CONST");
322     writeSleb128(os, outputOffset, "offset");
323     writeU8(os, WASM_OPCODE_I32_ADD, "ADD");
324 
325     Symbol *sym = file->getSymbol(rel);
326     // Now figure out what we want to store
327     if (sym->hasGOTIndex()) {
328       writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
329       writeUleb128(os, sym->getGOTIndex(), "global index");
330       if (rel.Addend) {
331         writeU8(os, WASM_OPCODE_I32_CONST, "CONST");
332         writeSleb128(os, rel.Addend, "addend");
333         writeU8(os, WASM_OPCODE_I32_ADD, "ADD");
334       }
335     } else {
336       const GlobalSymbol* baseSymbol = WasmSym::memoryBase;
337       if (rel.Type == R_WASM_TABLE_INDEX_I32)
338         baseSymbol = WasmSym::tableBase;
339       writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
340       writeUleb128(os, baseSymbol->getGlobalIndex(), "base");
341       writeU8(os, WASM_OPCODE_I32_CONST, "CONST");
342       writeSleb128(os, file->calcNewValue(rel), "offset");
343       writeU8(os, WASM_OPCODE_I32_ADD, "ADD");
344     }
345 
346     // Store that value at the virtual address
347     writeU8(os, WASM_OPCODE_I32_STORE, "I32_STORE");
348     writeUleb128(os, 2, "align");
349     writeUleb128(os, 0, "offset");
350   }
351 }
352 
353 } // namespace wasm
354 } // namespace lld
355