1 //===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the ArchiveObjectFile class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Object/Archive.h" 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/SmallString.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Support/Endian.h" 19 #include "llvm/Support/MemoryBuffer.h" 20 21 using namespace llvm; 22 using namespace object; 23 using namespace llvm::support::endian; 24 25 static const char *const Magic = "!<arch>\n"; 26 static const char *const ThinMagic = "!<thin>\n"; 27 28 void Archive::anchor() { } 29 30 StringRef ArchiveMemberHeader::getName() const { 31 char EndCond; 32 if (Name[0] == '/' || Name[0] == '#') 33 EndCond = ' '; 34 else 35 EndCond = '/'; 36 llvm::StringRef::size_type end = 37 llvm::StringRef(Name, sizeof(Name)).find(EndCond); 38 if (end == llvm::StringRef::npos) 39 end = sizeof(Name); 40 assert(end <= sizeof(Name) && end > 0); 41 // Don't include the EndCond if there is one. 42 return llvm::StringRef(Name, end); 43 } 44 45 uint32_t ArchiveMemberHeader::getSize() const { 46 uint32_t Ret; 47 if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret)) 48 llvm_unreachable("Size is not a decimal number."); 49 return Ret; 50 } 51 52 sys::fs::perms ArchiveMemberHeader::getAccessMode() const { 53 unsigned Ret; 54 if (StringRef(AccessMode, sizeof(AccessMode)).rtrim(" ").getAsInteger(8, Ret)) 55 llvm_unreachable("Access mode is not an octal number."); 56 return static_cast<sys::fs::perms>(Ret); 57 } 58 59 sys::TimeValue ArchiveMemberHeader::getLastModified() const { 60 unsigned Seconds; 61 if (StringRef(LastModified, sizeof(LastModified)).rtrim(" ") 62 .getAsInteger(10, Seconds)) 63 llvm_unreachable("Last modified time not a decimal number."); 64 65 sys::TimeValue Ret; 66 Ret.fromEpochTime(Seconds); 67 return Ret; 68 } 69 70 unsigned ArchiveMemberHeader::getUID() const { 71 unsigned Ret; 72 if (StringRef(UID, sizeof(UID)).rtrim(" ").getAsInteger(10, Ret)) 73 llvm_unreachable("UID time not a decimal number."); 74 return Ret; 75 } 76 77 unsigned ArchiveMemberHeader::getGID() const { 78 unsigned Ret; 79 if (StringRef(GID, sizeof(GID)).rtrim(" ").getAsInteger(10, Ret)) 80 llvm_unreachable("GID time not a decimal number."); 81 return Ret; 82 } 83 84 Archive::Child::Child(const Archive *Parent, const char *Start) 85 : Parent(Parent) { 86 if (!Start) 87 return; 88 89 const ArchiveMemberHeader *Header = 90 reinterpret_cast<const ArchiveMemberHeader *>(Start); 91 uint64_t Size = sizeof(ArchiveMemberHeader); 92 if (!Parent->IsThin || Header->getName() == "/" || Header->getName() == "//") 93 Size += Header->getSize(); 94 Data = StringRef(Start, Size); 95 96 // Setup StartOfFile and PaddingBytes. 97 StartOfFile = sizeof(ArchiveMemberHeader); 98 // Don't include attached name. 99 StringRef Name = Header->getName(); 100 if (Name.startswith("#1/")) { 101 uint64_t NameSize; 102 if (Name.substr(3).rtrim(" ").getAsInteger(10, NameSize)) 103 llvm_unreachable("Long name length is not an integer"); 104 StartOfFile += NameSize; 105 } 106 } 107 108 uint64_t Archive::Child::getSize() const { 109 if (Parent->IsThin) 110 return getHeader()->getSize(); 111 return Data.size() - StartOfFile; 112 } 113 114 uint64_t Archive::Child::getRawSize() const { 115 return getHeader()->getSize(); 116 } 117 118 Archive::Child Archive::Child::getNext() const { 119 size_t SpaceToSkip = Data.size(); 120 // If it's odd, add 1 to make it even. 121 if (SpaceToSkip & 1) 122 ++SpaceToSkip; 123 124 const char *NextLoc = Data.data() + SpaceToSkip; 125 126 // Check to see if this is past the end of the archive. 127 if (NextLoc >= Parent->Data.getBufferEnd()) 128 return Child(Parent, nullptr); 129 130 return Child(Parent, NextLoc); 131 } 132 133 uint64_t Archive::Child::getChildOffset() const { 134 const char *a = Parent->Data.getBuffer().data(); 135 const char *c = Data.data(); 136 uint64_t offset = c - a; 137 return offset; 138 } 139 140 ErrorOr<StringRef> Archive::Child::getName() const { 141 StringRef name = getRawName(); 142 // Check if it's a special name. 143 if (name[0] == '/') { 144 if (name.size() == 1) // Linker member. 145 return name; 146 if (name.size() == 2 && name[1] == '/') // String table. 147 return name; 148 // It's a long name. 149 // Get the offset. 150 std::size_t offset; 151 if (name.substr(1).rtrim(" ").getAsInteger(10, offset)) 152 llvm_unreachable("Long name offset is not an integer"); 153 const char *addr = Parent->StringTable->Data.begin() 154 + sizeof(ArchiveMemberHeader) 155 + offset; 156 // Verify it. 157 if (Parent->StringTable == Parent->child_end() 158 || addr < (Parent->StringTable->Data.begin() 159 + sizeof(ArchiveMemberHeader)) 160 || addr > (Parent->StringTable->Data.begin() 161 + sizeof(ArchiveMemberHeader) 162 + Parent->StringTable->getSize())) 163 return object_error::parse_failed; 164 165 // GNU long file names end with a /. 166 if (Parent->kind() == K_GNU || Parent->kind() == K_MIPS64) { 167 StringRef::size_type End = StringRef(addr).find('/'); 168 return StringRef(addr, End); 169 } 170 return StringRef(addr); 171 } else if (name.startswith("#1/")) { 172 uint64_t name_size; 173 if (name.substr(3).rtrim(" ").getAsInteger(10, name_size)) 174 llvm_unreachable("Long name length is not an ingeter"); 175 return Data.substr(sizeof(ArchiveMemberHeader), name_size) 176 .rtrim(StringRef("\0", 1)); 177 } 178 // It's a simple name. 179 if (name[name.size() - 1] == '/') 180 return name.substr(0, name.size() - 1); 181 return name; 182 } 183 184 ErrorOr<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const { 185 ErrorOr<StringRef> NameOrErr = getName(); 186 if (std::error_code EC = NameOrErr.getError()) 187 return EC; 188 StringRef Name = NameOrErr.get(); 189 return MemoryBufferRef(getBuffer(), Name); 190 } 191 192 ErrorOr<std::unique_ptr<Binary>> 193 Archive::Child::getAsBinary(LLVMContext *Context) const { 194 ErrorOr<MemoryBufferRef> BuffOrErr = getMemoryBufferRef(); 195 if (std::error_code EC = BuffOrErr.getError()) 196 return EC; 197 198 return createBinary(BuffOrErr.get(), Context); 199 } 200 201 ErrorOr<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) { 202 std::error_code EC; 203 std::unique_ptr<Archive> Ret(new Archive(Source, EC)); 204 if (EC) 205 return EC; 206 return std::move(Ret); 207 } 208 209 Archive::Archive(MemoryBufferRef Source, std::error_code &ec) 210 : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()), 211 StringTable(child_end()), FirstRegular(child_end()) { 212 StringRef Buffer = Data.getBuffer(); 213 // Check for sufficient magic. 214 if (Buffer.startswith(ThinMagic)) { 215 IsThin = true; 216 } else if (Buffer.startswith(Magic)) { 217 IsThin = false; 218 } else { 219 ec = object_error::invalid_file_type; 220 return; 221 } 222 223 // Get the special members. 224 child_iterator i = child_begin(false); 225 child_iterator e = child_end(); 226 227 if (i == e) { 228 ec = std::error_code(); 229 return; 230 } 231 232 StringRef Name = i->getRawName(); 233 234 // Below is the pattern that is used to figure out the archive format 235 // GNU archive format 236 // First member : / (may exist, if it exists, points to the symbol table ) 237 // Second member : // (may exist, if it exists, points to the string table) 238 // Note : The string table is used if the filename exceeds 15 characters 239 // BSD archive format 240 // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table) 241 // There is no string table, if the filename exceeds 15 characters or has a 242 // embedded space, the filename has #1/<size>, The size represents the size 243 // of the filename that needs to be read after the archive header 244 // COFF archive format 245 // First member : / 246 // Second member : / (provides a directory of symbols) 247 // Third member : // (may exist, if it exists, contains the string table) 248 // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present 249 // even if the string table is empty. However, lib.exe does not in fact 250 // seem to create the third member if there's no member whose filename 251 // exceeds 15 characters. So the third member is optional. 252 253 if (Name == "__.SYMDEF") { 254 Format = K_BSD; 255 SymbolTable = i; 256 ++i; 257 FirstRegular = i; 258 ec = std::error_code(); 259 return; 260 } 261 262 if (Name.startswith("#1/")) { 263 Format = K_BSD; 264 // We know this is BSD, so getName will work since there is no string table. 265 ErrorOr<StringRef> NameOrErr = i->getName(); 266 ec = NameOrErr.getError(); 267 if (ec) 268 return; 269 Name = NameOrErr.get(); 270 if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") { 271 SymbolTable = i; 272 ++i; 273 } 274 FirstRegular = i; 275 return; 276 } 277 278 // MIPS 64-bit ELF archives use a special format of a symbol table. 279 // This format is marked by `ar_name` field equals to "/SYM64/". 280 // For detailed description see page 96 in the following document: 281 // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf 282 283 bool has64SymTable = false; 284 if (Name == "/" || Name == "/SYM64/") { 285 SymbolTable = i; 286 if (Name == "/SYM64/") 287 has64SymTable = true; 288 289 ++i; 290 if (i == e) { 291 ec = std::error_code(); 292 return; 293 } 294 Name = i->getRawName(); 295 } 296 297 if (Name == "//") { 298 Format = has64SymTable ? K_MIPS64 : K_GNU; 299 StringTable = i; 300 ++i; 301 FirstRegular = i; 302 ec = std::error_code(); 303 return; 304 } 305 306 if (Name[0] != '/') { 307 Format = has64SymTable ? K_MIPS64 : K_GNU; 308 FirstRegular = i; 309 ec = std::error_code(); 310 return; 311 } 312 313 if (Name != "/") { 314 ec = object_error::parse_failed; 315 return; 316 } 317 318 Format = K_COFF; 319 SymbolTable = i; 320 321 ++i; 322 if (i == e) { 323 FirstRegular = i; 324 ec = std::error_code(); 325 return; 326 } 327 328 Name = i->getRawName(); 329 330 if (Name == "//") { 331 StringTable = i; 332 ++i; 333 } 334 335 FirstRegular = i; 336 ec = std::error_code(); 337 } 338 339 Archive::child_iterator Archive::child_begin(bool SkipInternal) const { 340 if (Data.getBufferSize() == 8) // empty archive. 341 return child_end(); 342 343 if (SkipInternal) 344 return FirstRegular; 345 346 const char *Loc = Data.getBufferStart() + strlen(Magic); 347 Child c(this, Loc); 348 return c; 349 } 350 351 Archive::child_iterator Archive::child_end() const { 352 return Child(this, nullptr); 353 } 354 355 StringRef Archive::Symbol::getName() const { 356 return Parent->SymbolTable->getBuffer().begin() + StringIndex; 357 } 358 359 ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const { 360 const char *Buf = Parent->SymbolTable->getBuffer().begin(); 361 const char *Offsets = Buf; 362 if (Parent->kind() == K_MIPS64) 363 Offsets += sizeof(uint64_t); 364 else 365 Offsets += sizeof(uint32_t); 366 uint32_t Offset = 0; 367 if (Parent->kind() == K_GNU) { 368 Offset = read32be(Offsets + SymbolIndex * 4); 369 } else if (Parent->kind() == K_MIPS64) { 370 Offset = read64be(Offsets + SymbolIndex * 8); 371 } else if (Parent->kind() == K_BSD) { 372 // The SymbolIndex is an index into the ranlib structs that start at 373 // Offsets (the first uint32_t is the number of bytes of the ranlib 374 // structs). The ranlib structs are a pair of uint32_t's the first 375 // being a string table offset and the second being the offset into 376 // the archive of the member that defines the symbol. Which is what 377 // is needed here. 378 Offset = read32le(Offsets + SymbolIndex * 8 + 4); 379 } else { 380 // Skip offsets. 381 uint32_t MemberCount = read32le(Buf); 382 Buf += MemberCount * 4 + 4; 383 384 uint32_t SymbolCount = read32le(Buf); 385 if (SymbolIndex >= SymbolCount) 386 return object_error::parse_failed; 387 388 // Skip SymbolCount to get to the indices table. 389 const char *Indices = Buf + 4; 390 391 // Get the index of the offset in the file member offset table for this 392 // symbol. 393 uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2); 394 // Subtract 1 since OffsetIndex is 1 based. 395 --OffsetIndex; 396 397 if (OffsetIndex >= MemberCount) 398 return object_error::parse_failed; 399 400 Offset = read32le(Offsets + OffsetIndex * 4); 401 } 402 403 const char *Loc = Parent->getData().begin() + Offset; 404 child_iterator Iter(Child(Parent, Loc)); 405 return Iter; 406 } 407 408 Archive::Symbol Archive::Symbol::getNext() const { 409 Symbol t(*this); 410 if (Parent->kind() == K_BSD) { 411 // t.StringIndex is an offset from the start of the __.SYMDEF or 412 // "__.SYMDEF SORTED" member into the string table for the ranlib 413 // struct indexed by t.SymbolIndex . To change t.StringIndex to the 414 // offset in the string table for t.SymbolIndex+1 we subtract the 415 // its offset from the start of the string table for t.SymbolIndex 416 // and add the offset of the string table for t.SymbolIndex+1. 417 418 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t 419 // which is the number of bytes of ranlib structs that follow. The ranlib 420 // structs are a pair of uint32_t's the first being a string table offset 421 // and the second being the offset into the archive of the member that 422 // define the symbol. After that the next uint32_t is the byte count of 423 // the string table followed by the string table. 424 const char *Buf = Parent->SymbolTable->getBuffer().begin(); 425 uint32_t RanlibCount = 0; 426 RanlibCount = read32le(Buf) / 8; 427 // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount) 428 // don't change the t.StringIndex as we don't want to reference a ranlib 429 // past RanlibCount. 430 if (t.SymbolIndex + 1 < RanlibCount) { 431 const char *Ranlibs = Buf + 4; 432 uint32_t CurRanStrx = 0; 433 uint32_t NextRanStrx = 0; 434 CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8); 435 NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8); 436 t.StringIndex -= CurRanStrx; 437 t.StringIndex += NextRanStrx; 438 } 439 } else { 440 // Go to one past next null. 441 t.StringIndex = 442 Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1; 443 } 444 ++t.SymbolIndex; 445 return t; 446 } 447 448 Archive::symbol_iterator Archive::symbol_begin() const { 449 if (!hasSymbolTable()) 450 return symbol_iterator(Symbol(this, 0, 0)); 451 452 const char *buf = SymbolTable->getBuffer().begin(); 453 if (kind() == K_GNU) { 454 uint32_t symbol_count = 0; 455 symbol_count = read32be(buf); 456 buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t))); 457 } else if (kind() == K_MIPS64) { 458 uint64_t symbol_count = read64be(buf); 459 buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t))); 460 } else if (kind() == K_BSD) { 461 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t 462 // which is the number of bytes of ranlib structs that follow. The ranlib 463 // structs are a pair of uint32_t's the first being a string table offset 464 // and the second being the offset into the archive of the member that 465 // define the symbol. After that the next uint32_t is the byte count of 466 // the string table followed by the string table. 467 uint32_t ranlib_count = 0; 468 ranlib_count = read32le(buf) / 8; 469 const char *ranlibs = buf + 4; 470 uint32_t ran_strx = 0; 471 ran_strx = read32le(ranlibs); 472 buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t)))); 473 // Skip the byte count of the string table. 474 buf += sizeof(uint32_t); 475 buf += ran_strx; 476 } else { 477 uint32_t member_count = 0; 478 uint32_t symbol_count = 0; 479 member_count = read32le(buf); 480 buf += 4 + (member_count * 4); // Skip offsets. 481 symbol_count = read32le(buf); 482 buf += 4 + (symbol_count * 2); // Skip indices. 483 } 484 uint32_t string_start_offset = buf - SymbolTable->getBuffer().begin(); 485 return symbol_iterator(Symbol(this, 0, string_start_offset)); 486 } 487 488 Archive::symbol_iterator Archive::symbol_end() const { 489 if (!hasSymbolTable()) 490 return symbol_iterator(Symbol(this, 0, 0)); 491 return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0)); 492 } 493 494 uint32_t Archive::getNumberOfSymbols() const { 495 const char *buf = SymbolTable->getBuffer().begin(); 496 if (kind() == K_GNU) 497 return read32be(buf); 498 if (kind() == K_MIPS64) 499 return read64be(buf); 500 if (kind() == K_BSD) 501 return read32le(buf) / 8; 502 uint32_t member_count = 0; 503 member_count = read32le(buf); 504 buf += 4 + (member_count * 4); // Skip offsets. 505 return read32le(buf); 506 } 507 508 Archive::child_iterator Archive::findSym(StringRef name) const { 509 Archive::symbol_iterator bs = symbol_begin(); 510 Archive::symbol_iterator es = symbol_end(); 511 512 for (; bs != es; ++bs) { 513 StringRef SymName = bs->getName(); 514 if (SymName == name) { 515 ErrorOr<Archive::child_iterator> ResultOrErr = bs->getMember(); 516 // FIXME: Should we really eat the error? 517 if (ResultOrErr.getError()) 518 return child_end(); 519 return ResultOrErr.get(); 520 } 521 } 522 return child_end(); 523 } 524 525 bool Archive::hasSymbolTable() const { 526 return SymbolTable != child_end(); 527 } 528