1 //===- SymbolTable.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "SymbolTable.h" 10 #include "ConcatOutputSection.h" 11 #include "Config.h" 12 #include "InputFiles.h" 13 #include "InputSection.h" 14 #include "Symbols.h" 15 #include "SyntheticSections.h" 16 #include "lld/Common/ErrorHandler.h" 17 #include "lld/Common/Memory.h" 18 19 using namespace llvm; 20 using namespace lld; 21 using namespace lld::macho; 22 23 Symbol *SymbolTable::find(CachedHashStringRef cachedName) { 24 auto it = symMap.find(cachedName); 25 if (it == symMap.end()) 26 return nullptr; 27 return symVector[it->second]; 28 } 29 30 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, 31 const InputFile *file) { 32 auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()}); 33 34 Symbol *sym; 35 if (!p.second) { 36 // Name already present in the symbol table. 37 sym = symVector[p.first->second]; 38 } else { 39 // Name is a new symbol. 40 sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 41 symVector.push_back(sym); 42 } 43 44 sym->isUsedInRegularObj |= !file || isa<ObjFile>(file); 45 return {sym, p.second}; 46 } 47 48 Defined *SymbolTable::addDefined(StringRef name, InputFile *file, 49 InputSection *isec, uint64_t value, 50 uint64_t size, bool isWeakDef, 51 bool isPrivateExtern, bool isThumb, 52 bool isReferencedDynamically, bool noDeadStrip, 53 bool isWeakDefCanBeHidden) { 54 Symbol *s; 55 bool wasInserted; 56 bool overridesWeakDef = false; 57 std::tie(s, wasInserted) = insert(name, file); 58 59 assert(!isWeakDef || (isa<BitcodeFile>(file) && !isec) || 60 (isa<ObjFile>(file) && file == isec->getFile())); 61 62 if (!wasInserted) { 63 if (auto *defined = dyn_cast<Defined>(s)) { 64 if (isWeakDef) { 65 // See further comment in createDefined() in InputFiles.cpp 66 if (defined->isWeakDef()) { 67 defined->privateExtern &= isPrivateExtern; 68 defined->weakDefCanBeHidden &= isWeakDefCanBeHidden; 69 defined->referencedDynamically |= isReferencedDynamically; 70 defined->noDeadStrip |= noDeadStrip; 71 } 72 // FIXME: Handle this for bitcode files. 73 if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec)) 74 concatIsec->wasCoalesced = true; 75 return defined; 76 } 77 78 if (defined->isWeakDef()) { 79 // FIXME: Handle this for bitcode files. 80 if (auto concatIsec = 81 dyn_cast_or_null<ConcatInputSection>(defined->isec)) { 82 concatIsec->wasCoalesced = true; 83 concatIsec->symbols.erase(llvm::find(concatIsec->symbols, defined)); 84 } 85 } else { 86 error("duplicate symbol: " + toString(*defined) + "\n>>> defined in " + 87 toString(defined->getFile()) + "\n>>> defined in " + 88 toString(file)); 89 } 90 91 } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { 92 overridesWeakDef = !isWeakDef && dysym->isWeakDef(); 93 dysym->unreference(); 94 } 95 // Defined symbols take priority over other types of symbols, so in case 96 // of a name conflict, we fall through to the replaceSymbol() call below. 97 } 98 99 // With -flat_namespace, all extern symbols in dylibs are interposable. 100 // FIXME: Add support for `-interposable` (PR53680). 101 bool interposable = config->namespaceKind == NamespaceKind::flat && 102 config->outputType != MachO::MH_EXECUTE && 103 !isPrivateExtern; 104 Defined *defined = replaceSymbol<Defined>( 105 s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true, 106 isPrivateExtern, /*includeInSymtab=*/true, isThumb, 107 isReferencedDynamically, noDeadStrip, overridesWeakDef, 108 isWeakDefCanBeHidden, interposable); 109 return defined; 110 } 111 112 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file, 113 bool isWeakRef) { 114 Symbol *s; 115 bool wasInserted; 116 std::tie(s, wasInserted) = insert(name, file); 117 118 RefState refState = isWeakRef ? RefState::Weak : RefState::Strong; 119 120 if (wasInserted) 121 replaceSymbol<Undefined>(s, name, file, refState); 122 else if (auto *lazy = dyn_cast<LazyArchive>(s)) 123 lazy->fetchArchiveMember(); 124 else if (isa<LazyObject>(s)) 125 extract(*s->getFile(), s->getName()); 126 else if (auto *dynsym = dyn_cast<DylibSymbol>(s)) 127 dynsym->reference(refState); 128 else if (auto *undefined = dyn_cast<Undefined>(s)) 129 undefined->refState = std::max(undefined->refState, refState); 130 return s; 131 } 132 133 Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size, 134 uint32_t align, bool isPrivateExtern) { 135 Symbol *s; 136 bool wasInserted; 137 std::tie(s, wasInserted) = insert(name, file); 138 139 if (!wasInserted) { 140 if (auto *common = dyn_cast<CommonSymbol>(s)) { 141 if (size < common->size) 142 return s; 143 } else if (isa<Defined>(s)) { 144 return s; 145 } 146 // Common symbols take priority over all non-Defined symbols, so in case of 147 // a name conflict, we fall through to the replaceSymbol() call below. 148 } 149 150 replaceSymbol<CommonSymbol>(s, name, file, size, align, isPrivateExtern); 151 return s; 152 } 153 154 Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef, 155 bool isTlv) { 156 Symbol *s; 157 bool wasInserted; 158 std::tie(s, wasInserted) = insert(name, file); 159 160 RefState refState = RefState::Unreferenced; 161 if (!wasInserted) { 162 if (auto *defined = dyn_cast<Defined>(s)) { 163 if (isWeakDef && !defined->isWeakDef()) 164 defined->overridesWeakDef = true; 165 } else if (auto *undefined = dyn_cast<Undefined>(s)) { 166 refState = undefined->refState; 167 } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { 168 refState = dysym->getRefState(); 169 } 170 } 171 172 bool isDynamicLookup = file == nullptr; 173 if (wasInserted || isa<Undefined>(s) || 174 (isa<DylibSymbol>(s) && 175 ((!isWeakDef && s->isWeakDef()) || 176 (!isDynamicLookup && cast<DylibSymbol>(s)->isDynamicLookup())))) { 177 if (auto *dynsym = dyn_cast<DylibSymbol>(s)) 178 dynsym->unreference(); 179 replaceSymbol<DylibSymbol>(s, file, name, isWeakDef, refState, isTlv); 180 } 181 182 return s; 183 } 184 185 Symbol *SymbolTable::addDynamicLookup(StringRef name) { 186 return addDylib(name, /*file=*/nullptr, /*isWeakDef=*/false, /*isTlv=*/false); 187 } 188 189 Symbol *SymbolTable::addLazyArchive(StringRef name, ArchiveFile *file, 190 const object::Archive::Symbol &sym) { 191 Symbol *s; 192 bool wasInserted; 193 std::tie(s, wasInserted) = insert(name, file); 194 195 if (wasInserted) { 196 replaceSymbol<LazyArchive>(s, file, sym); 197 } else if (isa<Undefined>(s)) { 198 file->fetch(sym); 199 } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { 200 if (dysym->isWeakDef()) { 201 if (dysym->getRefState() != RefState::Unreferenced) 202 file->fetch(sym); 203 else 204 replaceSymbol<LazyArchive>(s, file, sym); 205 } 206 } 207 return s; 208 } 209 210 Symbol *SymbolTable::addLazyObject(StringRef name, InputFile &file) { 211 Symbol *s; 212 bool wasInserted; 213 std::tie(s, wasInserted) = insert(name, &file); 214 215 if (wasInserted) { 216 replaceSymbol<LazyObject>(s, file, name); 217 } else if (isa<Undefined>(s)) { 218 extract(file, name); 219 } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { 220 if (dysym->isWeakDef()) { 221 if (dysym->getRefState() != RefState::Unreferenced) 222 extract(file, name); 223 else 224 replaceSymbol<LazyObject>(s, file, name); 225 } 226 } 227 return s; 228 } 229 230 Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec, 231 uint64_t value, bool isPrivateExtern, 232 bool includeInSymtab, 233 bool referencedDynamically) { 234 assert(!isec || !isec->getFile()); // See makeSyntheticInputSection(). 235 Defined *s = 236 addDefined(name, /*file=*/nullptr, isec, value, /*size=*/0, 237 /*isWeakDef=*/false, isPrivateExtern, /*isThumb=*/false, 238 referencedDynamically, /*noDeadStrip=*/false, 239 /*isWeakDefCanBeHidden=*/false); 240 s->includeInSymtab = includeInSymtab; 241 return s; 242 } 243 244 enum class Boundary { 245 Start, 246 End, 247 }; 248 249 static Defined *createBoundarySymbol(const Undefined &sym) { 250 return symtab->addSynthetic( 251 sym.getName(), /*isec=*/nullptr, /*value=*/-1, /*isPrivateExtern=*/true, 252 /*includeInSymtab=*/false, /*referencedDynamically=*/false); 253 } 254 255 static void handleSectionBoundarySymbol(const Undefined &sym, StringRef segSect, 256 Boundary which) { 257 StringRef segName, sectName; 258 std::tie(segName, sectName) = segSect.split('$'); 259 260 // Attach the symbol to any InputSection that will end up in the right 261 // OutputSection -- it doesn't matter which one we pick. 262 // Don't bother looking through inputSections for a matching 263 // ConcatInputSection -- we need to create ConcatInputSection for 264 // non-existing sections anyways, and that codepath works even if we should 265 // already have a ConcatInputSection with the right name. 266 267 OutputSection *osec = nullptr; 268 // This looks for __TEXT,__cstring etc. 269 for (SyntheticSection *ssec : syntheticSections) 270 if (ssec->segname == segName && ssec->name == sectName) { 271 osec = ssec->isec->parent; 272 break; 273 } 274 275 if (!osec) { 276 ConcatInputSection *isec = makeSyntheticInputSection(segName, sectName); 277 278 // This runs after markLive() and is only called for Undefineds that are 279 // live. Marking the isec live ensures an OutputSection is created that the 280 // start/end symbol can refer to. 281 assert(sym.isLive()); 282 isec->live = true; 283 284 // This runs after gatherInputSections(), so need to explicitly set parent 285 // and add to inputSections. 286 osec = isec->parent = ConcatOutputSection::getOrCreateForInput(isec); 287 inputSections.push_back(isec); 288 } 289 290 if (which == Boundary::Start) 291 osec->sectionStartSymbols.push_back(createBoundarySymbol(sym)); 292 else 293 osec->sectionEndSymbols.push_back(createBoundarySymbol(sym)); 294 } 295 296 static void handleSegmentBoundarySymbol(const Undefined &sym, StringRef segName, 297 Boundary which) { 298 OutputSegment *seg = getOrCreateOutputSegment(segName); 299 if (which == Boundary::Start) 300 seg->segmentStartSymbols.push_back(createBoundarySymbol(sym)); 301 else 302 seg->segmentEndSymbols.push_back(createBoundarySymbol(sym)); 303 } 304 305 // Try to find a definition for an undefined symbol. 306 // Returns true if a definition was found and no diagnostics are needed. 307 static bool recoverFromUndefinedSymbol(const Undefined &sym) { 308 // Handle start/end symbols. 309 StringRef name = sym.getName(); 310 if (name.consume_front("section$start$")) { 311 handleSectionBoundarySymbol(sym, name, Boundary::Start); 312 return true; 313 } 314 if (name.consume_front("section$end$")) { 315 handleSectionBoundarySymbol(sym, name, Boundary::End); 316 return true; 317 } 318 if (name.consume_front("segment$start$")) { 319 handleSegmentBoundarySymbol(sym, name, Boundary::Start); 320 return true; 321 } 322 if (name.consume_front("segment$end$")) { 323 handleSegmentBoundarySymbol(sym, name, Boundary::End); 324 return true; 325 } 326 327 // Handle -U. 328 if (config->explicitDynamicLookups.count(sym.getName())) { 329 symtab->addDynamicLookup(sym.getName()); 330 return true; 331 } 332 333 // Handle -undefined. 334 if (config->undefinedSymbolTreatment == 335 UndefinedSymbolTreatment::dynamic_lookup || 336 config->undefinedSymbolTreatment == UndefinedSymbolTreatment::suppress) { 337 symtab->addDynamicLookup(sym.getName()); 338 return true; 339 } 340 341 // We do not return true here, as we still need to print diagnostics. 342 if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::warning) 343 symtab->addDynamicLookup(sym.getName()); 344 345 return false; 346 } 347 348 struct UndefinedDiag { 349 struct SectionAndOffset { 350 const InputSection *isec; 351 uint64_t offset; 352 }; 353 354 std::vector<SectionAndOffset> codeReferences; 355 std::vector<std::string> otherReferences; 356 }; 357 358 static MapVector<const Undefined *, UndefinedDiag> undefs; 359 360 void macho::reportPendingUndefinedSymbols() { 361 for (const auto &undef : undefs) { 362 const UndefinedDiag &locations = undef.second; 363 364 std::string message = "undefined symbol"; 365 if (config->archMultiple) 366 message += (" for arch " + getArchitectureName(config->arch())).str(); 367 message += ": " + toString(*undef.first); 368 369 const size_t maxUndefinedReferences = 3; 370 size_t i = 0; 371 for (const std::string &loc : locations.otherReferences) { 372 if (i >= maxUndefinedReferences) 373 break; 374 message += "\n>>> referenced by " + loc; 375 ++i; 376 } 377 378 for (const UndefinedDiag::SectionAndOffset &loc : 379 locations.codeReferences) { 380 if (i >= maxUndefinedReferences) 381 break; 382 // TODO: Get source file/line from debug information. 383 message += "\n>>> referenced by " + loc.isec->getLocation(loc.offset); 384 ++i; 385 } 386 387 size_t totalReferences = 388 locations.otherReferences.size() + locations.codeReferences.size(); 389 if (totalReferences > i) 390 message += 391 ("\n>>> referenced " + Twine(totalReferences - i) + " more times") 392 .str(); 393 394 if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::error) 395 error(message); 396 else if (config->undefinedSymbolTreatment == 397 UndefinedSymbolTreatment::warning) 398 warn(message); 399 else 400 assert(false && 401 "diagnostics make sense for -undefined error|warning only"); 402 } 403 404 // This function is called multiple times during execution. Clear the printed 405 // diagnostics to avoid printing the same things again the next time. 406 undefs.clear(); 407 } 408 409 void macho::treatUndefinedSymbol(const Undefined &sym, StringRef source) { 410 if (recoverFromUndefinedSymbol(sym)) 411 return; 412 413 undefs[&sym].otherReferences.push_back(source.str()); 414 } 415 416 void macho::treatUndefinedSymbol(const Undefined &sym, const InputSection *isec, 417 uint64_t offset) { 418 if (recoverFromUndefinedSymbol(sym)) 419 return; 420 421 undefs[&sym].codeReferences.push_back({isec, offset}); 422 } 423 424 std::unique_ptr<SymbolTable> macho::symtab; 425