16acd3003SFangrui Song //===- InputSection.cpp ---------------------------------------------------===//
26acd3003SFangrui Song //
36acd3003SFangrui Song // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
46acd3003SFangrui Song // See https://llvm.org/LICENSE.txt for license information.
56acd3003SFangrui Song // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66acd3003SFangrui Song //
76acd3003SFangrui Song //===----------------------------------------------------------------------===//
86acd3003SFangrui Song
96acd3003SFangrui Song #include "InputSection.h"
10428a7c1bSJez Ng #include "ConcatOutputSection.h"
11a8a6e5b0SLeonard Grey #include "Config.h"
123c9100fbSJez Ng #include "InputFiles.h"
136f63216cSJez Ng #include "OutputSegment.h"
146acd3003SFangrui Song #include "Symbols.h"
153a9d2f14SGreg McGary #include "SyntheticSections.h"
166acd3003SFangrui Song #include "Target.h"
17f27e4548SGreg McGary #include "UnwindInfoSection.h"
18daaaed6bSJez Ng #include "Writer.h"
196acd3003SFangrui Song #include "lld/Common/Memory.h"
206acd3003SFangrui Song #include "llvm/Support/Endian.h"
2104259cdeSJez Ng #include "llvm/Support/xxhash.h"
226acd3003SFangrui Song
23df2a5778SJez Ng using namespace llvm;
246acd3003SFangrui Song using namespace llvm::MachO;
256acd3003SFangrui Song using namespace llvm::support;
266acd3003SFangrui Song using namespace lld;
276acd3003SFangrui Song using namespace lld::macho;
286acd3003SFangrui Song
292f5d6a0eSShoaib Meenai // Verify ConcatInputSection's size on 64-bit builds. The size of std::vector
302f5d6a0eSShoaib Meenai // can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL),
312f5d6a0eSShoaib Meenai // so account for that.
32a3f67f09SDaniel Bertalan static_assert(sizeof(void *) != 8 || sizeof(ConcatInputSection) ==
33a3f67f09SDaniel Bertalan sizeof(std::vector<Reloc>) + 104,
3401510ac0SShoaib Meenai "Try to minimize ConcatInputSection's size, we create many "
3501510ac0SShoaib Meenai "instances of it");
3601510ac0SShoaib Meenai
373a11528dSJez Ng std::vector<ConcatInputSection *> macho::inputSections;
386acd3003SFangrui Song
getFileSize() const397b007ac0SJez Ng uint64_t InputSection::getFileSize() const {
40f6b6e721SJez Ng return isZeroFill(getFlags()) ? 0 : getSize();
417b007ac0SJez Ng }
427b007ac0SJez Ng
getVA(uint64_t off) const4304259cdeSJez Ng uint64_t InputSection::getVA(uint64_t off) const {
4404259cdeSJez Ng return parent->addr + getOffset(off);
4504259cdeSJez Ng }
466cb07313SKellie Medlin
resolveSymbolVA(const Symbol * sym,uint8_t type)4793c8559bSGreg McGary static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) {
485433a791SJez Ng const RelocAttrs &relocAttrs = target->getRelocAttrs(type);
4993c8559bSGreg McGary if (relocAttrs.hasAttr(RelocAttrBits::BRANCH))
5093c8559bSGreg McGary return sym->resolveBranchVA();
51393116faSNico Weber if (relocAttrs.hasAttr(RelocAttrBits::GOT))
5293c8559bSGreg McGary return sym->resolveGotVA();
53393116faSNico Weber if (relocAttrs.hasAttr(RelocAttrBits::TLV))
5493c8559bSGreg McGary return sym->resolveTlvVA();
5593c8559bSGreg McGary return sym->getVA();
563a9d2f14SGreg McGary }
573a9d2f14SGreg McGary
getContainingSymbol(uint64_t off) const585792797cSDaniel Bertalan const Defined *InputSection::getContainingSymbol(uint64_t off) const {
595792797cSDaniel Bertalan auto *nextSym = llvm::upper_bound(
605792797cSDaniel Bertalan symbols, off, [](uint64_t a, const Defined *b) { return a < b->value; });
615792797cSDaniel Bertalan if (nextSym == symbols.begin())
625792797cSDaniel Bertalan return nullptr;
635792797cSDaniel Bertalan return *std::prev(nextSym);
645792797cSDaniel Bertalan }
655792797cSDaniel Bertalan
getLocation(uint64_t off) const6606f863acSJez Ng std::string InputSection::getLocation(uint64_t off) const {
6706f863acSJez Ng // First, try to find a symbol that's near the offset. Use it as a reference
6806f863acSJez Ng // point.
695792797cSDaniel Bertalan if (auto *sym = getContainingSymbol(off))
70*fd304142SDaniel Bertalan return (toString(getFile()) + ":(symbol " + toString(*sym) + "+0x" +
715f627cc2SDaniel Bertalan Twine::utohexstr(off - sym->value) + ")")
7206f863acSJez Ng .str();
7306f863acSJez Ng
7406f863acSJez Ng // If that fails, use the section itself as a reference point.
7506f863acSJez Ng for (const Subsection &subsec : section.subsections) {
7606f863acSJez Ng if (subsec.isec == this) {
7706f863acSJez Ng off += subsec.offset;
7806f863acSJez Ng break;
7906f863acSJez Ng }
8006f863acSJez Ng }
815792797cSDaniel Bertalan
8206f863acSJez Ng return (toString(getFile()) + ":(" + getName() + "+0x" +
8306f863acSJez Ng Twine::utohexstr(off) + ")")
8406f863acSJez Ng .str();
8506f863acSJez Ng }
8606f863acSJez Ng
getSourceLocation(uint64_t off) const875792797cSDaniel Bertalan std::string InputSection::getSourceLocation(uint64_t off) const {
88ed39fd51SDaniel Bertalan auto *obj = dyn_cast_or_null<ObjFile>(getFile());
895792797cSDaniel Bertalan if (!obj)
905792797cSDaniel Bertalan return {};
915792797cSDaniel Bertalan
925792797cSDaniel Bertalan DWARFCache *dwarf = obj->getDwarf();
935792797cSDaniel Bertalan if (!dwarf)
945792797cSDaniel Bertalan return std::string();
955792797cSDaniel Bertalan
965792797cSDaniel Bertalan for (const Subsection &subsec : section.subsections) {
975792797cSDaniel Bertalan if (subsec.isec == this) {
985792797cSDaniel Bertalan off += subsec.offset;
995792797cSDaniel Bertalan break;
1005792797cSDaniel Bertalan }
1015792797cSDaniel Bertalan }
1025792797cSDaniel Bertalan
1035792797cSDaniel Bertalan auto createMsg = [&](StringRef path, unsigned line) {
1045792797cSDaniel Bertalan std::string filename = sys::path::filename(path).str();
1055792797cSDaniel Bertalan std::string lineStr = (":" + Twine(line)).str();
1065792797cSDaniel Bertalan if (filename == path)
1075792797cSDaniel Bertalan return filename + lineStr;
1085792797cSDaniel Bertalan return (filename + lineStr + " (" + path + lineStr + ")").str();
1095792797cSDaniel Bertalan };
1105792797cSDaniel Bertalan
1115792797cSDaniel Bertalan // First, look up a function for a given offset.
1125792797cSDaniel Bertalan if (Optional<DILineInfo> li = dwarf->getDILineInfo(
1135792797cSDaniel Bertalan section.addr + off, object::SectionedAddress::UndefSection))
1145792797cSDaniel Bertalan return createMsg(li->FileName, li->Line);
1155792797cSDaniel Bertalan
1165792797cSDaniel Bertalan // If it failed, look up again as a variable.
1175792797cSDaniel Bertalan if (const Defined *sym = getContainingSymbol(off)) {
1185792797cSDaniel Bertalan // Symbols are generally prefixed with an underscore, which is not included
1195792797cSDaniel Bertalan // in the debug information.
1205792797cSDaniel Bertalan StringRef symName = sym->getName();
1215792797cSDaniel Bertalan if (!symName.empty() && symName[0] == '_')
1225792797cSDaniel Bertalan symName = symName.substr(1);
1235792797cSDaniel Bertalan
1245792797cSDaniel Bertalan if (Optional<std::pair<std::string, unsigned>> fileLine =
1255792797cSDaniel Bertalan dwarf->getVariableLoc(symName))
1265792797cSDaniel Bertalan return createMsg(fileLine->first, fileLine->second);
1275792797cSDaniel Bertalan }
1285792797cSDaniel Bertalan
1295792797cSDaniel Bertalan // Try to get the source file's name from the DWARF information.
1305792797cSDaniel Bertalan if (obj->compileUnit)
1315792797cSDaniel Bertalan return obj->sourceFile();
1325792797cSDaniel Bertalan
1335792797cSDaniel Bertalan return {};
1345792797cSDaniel Bertalan }
1355792797cSDaniel Bertalan
foldIdentical(ConcatInputSection * copy)136f27e4548SGreg McGary void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
137f27e4548SGreg McGary align = std::max(align, copy->align);
138f27e4548SGreg McGary copy->live = false;
139f27e4548SGreg McGary copy->wasCoalesced = true;
140f27e4548SGreg McGary copy->replacement = this;
1418c1ea1abSNico Weber for (auto ©Sym : copy->symbols)
1428c1ea1abSNico Weber copySym->wasIdenticalCodeFolded = true;
143002eda70SJez Ng
144002eda70SJez Ng // Merge the sorted vectors of symbols together.
145002eda70SJez Ng auto it = symbols.begin();
146002eda70SJez Ng for (auto copyIt = copy->symbols.begin(); copyIt != copy->symbols.end();) {
147002eda70SJez Ng if (it == symbols.end()) {
148002eda70SJez Ng symbols.push_back(*copyIt++);
149002eda70SJez Ng it = symbols.end();
150002eda70SJez Ng } else if ((*it)->value > (*copyIt)->value) {
151002eda70SJez Ng std::swap(*it++, *copyIt);
152002eda70SJez Ng } else {
153002eda70SJez Ng ++it;
154002eda70SJez Ng }
155002eda70SJez Ng }
156002eda70SJez Ng copy->symbols.clear();
157002eda70SJez Ng
158002eda70SJez Ng // Remove duplicate compact unwind info for symbols at the same address.
1593f35dd06SVy Nguyen if (symbols.empty())
160002eda70SJez Ng return;
161002eda70SJez Ng it = symbols.begin();
162002eda70SJez Ng uint64_t v = (*it)->value;
163002eda70SJez Ng for (++it; it != symbols.end(); ++it) {
1649cc489a4SGreg McGary Defined *d = *it;
1659cc489a4SGreg McGary if (d->value == v)
1669cc489a4SGreg McGary d->unwindEntry = nullptr;
167002eda70SJez Ng else
1689cc489a4SGreg McGary v = d->value;
169002eda70SJez Ng }
170f27e4548SGreg McGary }
171f27e4548SGreg McGary
writeTo(uint8_t * buf)1725de7467eSJez Ng void ConcatInputSection::writeTo(uint8_t *buf) {
173d5a70db1SNico Weber assert(!shouldOmitFromOutput());
174d5a70db1SNico Weber
17574871cdaSJez Ng if (getFileSize() == 0)
17674871cdaSJez Ng return;
17774871cdaSJez Ng
1786acd3003SFangrui Song memcpy(buf, data.data(), data.size());
1796acd3003SFangrui Song
180a3f67f09SDaniel Bertalan std::vector<uint64_t> relocTargets;
181a3f67f09SDaniel Bertalan if (!optimizationHints.empty())
182a3f67f09SDaniel Bertalan relocTargets.reserve(relocs.size());
183a3f67f09SDaniel Bertalan
1843a9d2f14SGreg McGary for (size_t i = 0; i < relocs.size(); i++) {
18587104faaSGreg McGary const Reloc &r = relocs[i];
18687104faaSGreg McGary uint8_t *loc = buf + r.offset;
1871a3ef041SGreg McGary uint64_t referentVA = 0;
188a723db92SJez Ng if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) {
189a723db92SJez Ng const Symbol *fromSym = r.referent.get<Symbol *>();
1901aa29dffSJez Ng const Reloc &minuend = relocs[++i];
1911aa29dffSJez Ng uint64_t minuendVA;
1921aa29dffSJez Ng if (const Symbol *toSym = minuend.referent.dyn_cast<Symbol *>())
19304259cdeSJez Ng minuendVA = toSym->getVA() + minuend.addend;
194d5a70db1SNico Weber else {
195d5a70db1SNico Weber auto *referentIsec = minuend.referent.get<InputSection *>();
196b8bbb972SJez Ng assert(!::shouldOmitFromOutput(referentIsec));
19704259cdeSJez Ng minuendVA = referentIsec->getVA(minuend.addend);
198d5a70db1SNico Weber }
19904259cdeSJez Ng referentVA = minuendVA - fromSym->getVA();
2003a9d2f14SGreg McGary } else if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) {
2013a9d2f14SGreg McGary if (target->hasAttr(r.type, RelocAttrBits::LOAD) &&
2023a9d2f14SGreg McGary !referentSym->isInGot())
2033a9d2f14SGreg McGary target->relaxGotLoad(loc, r.type);
2046c641d0dSKaining Zhong // For dtrace symbols, do not handle them as normal undefined symbols
2056c641d0dSKaining Zhong if (referentSym->getName().startswith("___dtrace_")) {
2066c641d0dSKaining Zhong // Change dtrace call site to pre-defined instructions
2076c641d0dSKaining Zhong target->handleDtraceReloc(referentSym, r, loc);
2086c641d0dSKaining Zhong continue;
2096c641d0dSKaining Zhong }
21004259cdeSJez Ng referentVA = resolveSymbolVA(referentSym, r.type) + r.addend;
211ca85e373SJez Ng
212f6b6e721SJez Ng if (isThreadLocalVariables(getFlags())) {
213daaaed6bSJez Ng // References from thread-local variable sections are treated as offsets
214daaaed6bSJez Ng // relative to the start of the thread-local data memory area, which
215daaaed6bSJez Ng // is initialized via copying all the TLV data sections (which are all
216daaaed6bSJez Ng // contiguous).
217fb98a1beSKazu Hirata if (isa<Defined>(referentSym))
218daaaed6bSJez Ng referentVA -= firstTLVDataSection->addr;
219ca85e373SJez Ng }
2201a3ef041SGreg McGary } else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) {
221b8bbb972SJez Ng assert(!::shouldOmitFromOutput(referentIsec));
22204259cdeSJez Ng referentVA = referentIsec->getVA(r.addend);
223ca85e373SJez Ng }
2245de7467eSJez Ng target->relocateOne(loc, r, referentVA, getVA() + r.offset);
225a3f67f09SDaniel Bertalan
226a3f67f09SDaniel Bertalan if (!optimizationHints.empty())
227a3f67f09SDaniel Bertalan relocTargets.push_back(referentVA);
2286acd3003SFangrui Song }
229a3f67f09SDaniel Bertalan
230a3f67f09SDaniel Bertalan if (!optimizationHints.empty())
231a3f67f09SDaniel Bertalan target->applyOptimizationHints(buf, this, relocTargets);
2326acd3003SFangrui Song }
2333c9100fbSJez Ng
makeSyntheticInputSection(StringRef segName,StringRef sectName,uint32_t flags,ArrayRef<uint8_t> data,uint32_t align)2342b78ef06SJez Ng ConcatInputSection *macho::makeSyntheticInputSection(StringRef segName,
2352b78ef06SJez Ng StringRef sectName,
2362b78ef06SJez Ng uint32_t flags,
2372b78ef06SJez Ng ArrayRef<uint8_t> data,
2382b78ef06SJez Ng uint32_t align) {
2392b78ef06SJez Ng Section §ion =
2402b78ef06SJez Ng *make<Section>(/*file=*/nullptr, segName, sectName, flags, /*addr=*/0);
2412b78ef06SJez Ng auto isec = make<ConcatInputSection>(section, data, align);
2422b78ef06SJez Ng section.subsections.push_back({0, isec});
2432b78ef06SJez Ng return isec;
2442b78ef06SJez Ng }
2452b78ef06SJez Ng
splitIntoPieces()24604259cdeSJez Ng void CStringInputSection::splitIntoPieces() {
24704259cdeSJez Ng size_t off = 0;
24804259cdeSJez Ng StringRef s = toStringRef(data);
24904259cdeSJez Ng while (!s.empty()) {
25004259cdeSJez Ng size_t end = s.find(0);
25104259cdeSJez Ng if (end == StringRef::npos)
25206f863acSJez Ng fatal(getLocation(off) + ": string is not null terminated");
25304259cdeSJez Ng size_t size = end + 1;
254a8a6e5b0SLeonard Grey uint32_t hash = config->dedupLiterals ? xxHash64(s.substr(0, size)) : 0;
255a8a6e5b0SLeonard Grey pieces.emplace_back(off, hash);
25604259cdeSJez Ng s = s.substr(size);
25704259cdeSJez Ng off += size;
25804259cdeSJez Ng }
25904259cdeSJez Ng }
26004259cdeSJez Ng
getStringPiece(uint64_t off)261464d3dc3SJez Ng StringPiece &CStringInputSection::getStringPiece(uint64_t off) {
26204259cdeSJez Ng if (off >= data.size())
26304259cdeSJez Ng fatal(toString(this) + ": offset is outside the section");
26404259cdeSJez Ng
26504259cdeSJez Ng auto it =
26604259cdeSJez Ng partition_point(pieces, [=](StringPiece p) { return p.inSecOff <= off; });
26704259cdeSJez Ng return it[-1];
26804259cdeSJez Ng }
26904259cdeSJez Ng
getStringPiece(uint64_t off) const270464d3dc3SJez Ng const StringPiece &CStringInputSection::getStringPiece(uint64_t off) const {
271464d3dc3SJez Ng return const_cast<CStringInputSection *>(this)->getStringPiece(off);
272464d3dc3SJez Ng }
273464d3dc3SJez Ng
getOffset(uint64_t off) const27404259cdeSJez Ng uint64_t CStringInputSection::getOffset(uint64_t off) const {
27504259cdeSJez Ng const StringPiece &piece = getStringPiece(off);
27604259cdeSJez Ng uint64_t addend = off - piece.inSecOff;
27704259cdeSJez Ng return piece.outSecOff + addend;
27804259cdeSJez Ng }
27904259cdeSJez Ng
WordLiteralInputSection(const Section & section,ArrayRef<uint8_t> data,uint32_t align)2802b78ef06SJez Ng WordLiteralInputSection::WordLiteralInputSection(const Section §ion,
281681cfeb5SJez Ng ArrayRef<uint8_t> data,
2822b78ef06SJez Ng uint32_t align)
2832b78ef06SJez Ng : InputSection(WordLiteralKind, section, data, align) {
2842b78ef06SJez Ng switch (sectionType(getFlags())) {
285464d3dc3SJez Ng case S_4BYTE_LITERALS:
286464d3dc3SJez Ng power2LiteralSize = 2;
287464d3dc3SJez Ng break;
288464d3dc3SJez Ng case S_8BYTE_LITERALS:
289464d3dc3SJez Ng power2LiteralSize = 3;
290464d3dc3SJez Ng break;
291464d3dc3SJez Ng case S_16BYTE_LITERALS:
292464d3dc3SJez Ng power2LiteralSize = 4;
293464d3dc3SJez Ng break;
294464d3dc3SJez Ng default:
295464d3dc3SJez Ng llvm_unreachable("invalid literal section type");
296464d3dc3SJez Ng }
297464d3dc3SJez Ng
298464d3dc3SJez Ng live.resize(data.size() >> power2LiteralSize, !config->deadStrip);
299464d3dc3SJez Ng }
300681cfeb5SJez Ng
getOffset(uint64_t off) const3015d88f2ddSJez Ng uint64_t WordLiteralInputSection::getOffset(uint64_t off) const {
3025d88f2ddSJez Ng auto *osec = cast<WordLiteralSection>(parent);
3036503a685SNico Weber const uintptr_t buf = reinterpret_cast<uintptr_t>(data.data());
304f6b6e721SJez Ng switch (sectionType(getFlags())) {
3055d88f2ddSJez Ng case S_4BYTE_LITERALS:
3066503a685SNico Weber return osec->getLiteral4Offset(buf + (off & ~3LLU)) | (off & 3);
3075d88f2ddSJez Ng case S_8BYTE_LITERALS:
3086503a685SNico Weber return osec->getLiteral8Offset(buf + (off & ~7LLU)) | (off & 7);
3095d88f2ddSJez Ng case S_16BYTE_LITERALS:
3106503a685SNico Weber return osec->getLiteral16Offset(buf + (off & ~15LLU)) | (off & 15);
3115d88f2ddSJez Ng default:
3125d88f2ddSJez Ng llvm_unreachable("invalid literal section type");
3135d88f2ddSJez Ng }
3145d88f2ddSJez Ng }
3155d88f2ddSJez Ng
isCodeSection(const InputSection * isec)316dc2c6cf2SAlexander Shaposhnikov bool macho::isCodeSection(const InputSection *isec) {
317f6b6e721SJez Ng uint32_t type = sectionType(isec->getFlags());
318c7dbaec3SJez Ng if (type != S_REGULAR && type != S_COALESCED)
319c7dbaec3SJez Ng return false;
320c7dbaec3SJez Ng
321f6b6e721SJez Ng uint32_t attr = isec->getFlags() & SECTION_ATTRIBUTES_USR;
322c7dbaec3SJez Ng if (attr == S_ATTR_PURE_INSTRUCTIONS)
323c7dbaec3SJez Ng return true;
324c7dbaec3SJez Ng
325f6b6e721SJez Ng if (isec->getSegName() == segment_names::text)
326f6b6e721SJez Ng return StringSwitch<bool>(isec->getName())
327465204d6SGreg McGary .Cases(section_names::textCoalNt, section_names::staticInit, true)
328c7dbaec3SJez Ng .Default(false);
329c7dbaec3SJez Ng
330c7dbaec3SJez Ng return false;
331c7dbaec3SJez Ng }
332c7dbaec3SJez Ng
isCfStringSection(const InputSection * isec)333ac2dd06bSJez Ng bool macho::isCfStringSection(const InputSection *isec) {
334f6b6e721SJez Ng return isec->getName() == section_names::cfString &&
335f6b6e721SJez Ng isec->getSegName() == segment_names::data;
336ac2dd06bSJez Ng }
337ac2dd06bSJez Ng
isClassRefsSection(const InputSection * isec)338ce2ae381SJez Ng bool macho::isClassRefsSection(const InputSection *isec) {
339ce2ae381SJez Ng return isec->getName() == section_names::objcClassRefs &&
340ce2ae381SJez Ng isec->getSegName() == segment_names::data;
341ce2ae381SJez Ng }
342ce2ae381SJez Ng
isEhFrameSection(const InputSection * isec)343e183bf8eSJez Ng bool macho::isEhFrameSection(const InputSection *isec) {
344e183bf8eSJez Ng return isec->getName() == section_names::ehFrame &&
345e183bf8eSJez Ng isec->getSegName() == segment_names::text;
346e183bf8eSJez Ng }
347e183bf8eSJez Ng
isGccExceptTabSection(const InputSection * isec)348f6017abbSJez Ng bool macho::isGccExceptTabSection(const InputSection *isec) {
349f6017abbSJez Ng return isec->getName() == section_names::gccExceptTab &&
350f6017abbSJez Ng isec->getSegName() == segment_names::text;
351f6017abbSJez Ng }
352f6017abbSJez Ng
toString(const InputSection * isec)3533c9100fbSJez Ng std::string lld::toString(const InputSection *isec) {
354f6b6e721SJez Ng return (toString(isec->getFile()) + ":(" + isec->getName() + ")").str();
3553c9100fbSJez Ng }
356