1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "FileAnalysis.h" 11 #include "GraphBuilder.h" 12 13 #include "llvm/BinaryFormat/ELF.h" 14 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 15 #include "llvm/MC/MCAsmInfo.h" 16 #include "llvm/MC/MCContext.h" 17 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 18 #include "llvm/MC/MCInst.h" 19 #include "llvm/MC/MCInstPrinter.h" 20 #include "llvm/MC/MCInstrAnalysis.h" 21 #include "llvm/MC/MCInstrDesc.h" 22 #include "llvm/MC/MCInstrInfo.h" 23 #include "llvm/MC/MCObjectFileInfo.h" 24 #include "llvm/MC/MCRegisterInfo.h" 25 #include "llvm/MC/MCSubtargetInfo.h" 26 #include "llvm/Object/Binary.h" 27 #include "llvm/Object/COFF.h" 28 #include "llvm/Object/ELFObjectFile.h" 29 #include "llvm/Object/ObjectFile.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Error.h" 33 #include "llvm/Support/MemoryBuffer.h" 34 #include "llvm/Support/TargetRegistry.h" 35 #include "llvm/Support/TargetSelect.h" 36 #include "llvm/Support/raw_ostream.h" 37 38 39 using Instr = llvm::cfi_verify::FileAnalysis::Instr; 40 using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer; 41 42 namespace llvm { 43 namespace cfi_verify { 44 45 bool IgnoreDWARFFlag; 46 47 static cl::opt<bool, true> IgnoreDWARFArg( 48 "ignore-dwarf", 49 cl::desc( 50 "Ignore all DWARF data. This relaxes the requirements for all " 51 "statically linked libraries to have been compiled with '-g', but " 52 "will result in false positives for 'CFI unprotected' instructions."), 53 cl::location(IgnoreDWARFFlag), cl::init(false)); 54 55 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) { 56 switch (Status) { 57 case CFIProtectionStatus::PROTECTED: 58 return "PROTECTED"; 59 case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF: 60 return "FAIL_NOT_INDIRECT_CF"; 61 case CFIProtectionStatus::FAIL_ORPHANS: 62 return "FAIL_ORPHANS"; 63 case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH: 64 return "FAIL_BAD_CONDITIONAL_BRANCH"; 65 case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED: 66 return "FAIL_REGISTER_CLOBBERED"; 67 case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION: 68 return "FAIL_INVALID_INSTRUCTION"; 69 } 70 llvm_unreachable("Attempted to stringify an unknown enum value."); 71 } 72 73 Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) { 74 // Open the filename provided. 75 Expected<object::OwningBinary<object::Binary>> BinaryOrErr = 76 object::createBinary(Filename); 77 if (!BinaryOrErr) 78 return BinaryOrErr.takeError(); 79 80 // Construct the object and allow it to take ownership of the binary. 81 object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get()); 82 FileAnalysis Analysis(std::move(Binary)); 83 84 Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary()); 85 if (!Analysis.Object) 86 return make_error<UnsupportedDisassembly>("Failed to cast object"); 87 88 switch (Analysis.Object->getArch()) { 89 case Triple::x86: 90 case Triple::x86_64: 91 case Triple::aarch64: 92 case Triple::aarch64_be: 93 break; 94 default: 95 return make_error<UnsupportedDisassembly>("Unsupported architecture."); 96 } 97 98 Analysis.ObjectTriple = Analysis.Object->makeTriple(); 99 Analysis.Features = Analysis.Object->getFeatures(); 100 101 // Init the rest of the object. 102 if (auto InitResponse = Analysis.initialiseDisassemblyMembers()) 103 return std::move(InitResponse); 104 105 if (auto SectionParseResponse = Analysis.parseCodeSections()) 106 return std::move(SectionParseResponse); 107 108 if (auto SymbolTableParseResponse = Analysis.parseSymbolTable()) 109 return std::move(SymbolTableParseResponse); 110 111 return std::move(Analysis); 112 } 113 114 FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary) 115 : Binary(std::move(Binary)) {} 116 117 FileAnalysis::FileAnalysis(const Triple &ObjectTriple, 118 const SubtargetFeatures &Features) 119 : ObjectTriple(ObjectTriple), Features(Features) {} 120 121 const Instr * 122 FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const { 123 std::map<uint64_t, Instr>::const_iterator KV = 124 Instructions.find(InstrMeta.VMAddress); 125 if (KV == Instructions.end() || KV == Instructions.begin()) 126 return nullptr; 127 128 if (!(--KV)->second.Valid) 129 return nullptr; 130 131 return &KV->second; 132 } 133 134 const Instr * 135 FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const { 136 std::map<uint64_t, Instr>::const_iterator KV = 137 Instructions.find(InstrMeta.VMAddress); 138 if (KV == Instructions.end() || ++KV == Instructions.end()) 139 return nullptr; 140 141 if (!KV->second.Valid) 142 return nullptr; 143 144 return &KV->second; 145 } 146 147 bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const { 148 for (const auto &Operand : InstrMeta.Instruction) { 149 if (Operand.isReg()) 150 return true; 151 } 152 return false; 153 } 154 155 const Instr *FileAnalysis::getInstruction(uint64_t Address) const { 156 const auto &InstrKV = Instructions.find(Address); 157 if (InstrKV == Instructions.end()) 158 return nullptr; 159 160 return &InstrKV->second; 161 } 162 163 const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const { 164 const auto &InstrKV = Instructions.find(Address); 165 assert(InstrKV != Instructions.end() && "Address doesn't exist."); 166 return InstrKV->second; 167 } 168 169 bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const { 170 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 171 return InstrDesc.isTrap() || willTrapOnCFIViolation(InstrMeta); 172 } 173 174 bool FileAnalysis::willTrapOnCFIViolation(const Instr &InstrMeta) const { 175 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 176 if (!InstrDesc.isCall()) 177 return false; 178 uint64_t Target; 179 if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, 180 InstrMeta.InstructionSize, Target)) 181 return false; 182 return TrapOnFailFunctionAddresses.count(Target) > 0; 183 } 184 185 bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const { 186 if (!InstrMeta.Valid) 187 return false; 188 189 if (isCFITrap(InstrMeta)) 190 return false; 191 192 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 193 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) 194 return InstrDesc.isConditionalBranch(); 195 196 return true; 197 } 198 199 const Instr * 200 FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const { 201 if (!InstrMeta.Valid) 202 return nullptr; 203 204 if (isCFITrap(InstrMeta)) 205 return nullptr; 206 207 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 208 const Instr *NextMetaPtr; 209 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) { 210 if (InstrDesc.isConditionalBranch()) 211 return nullptr; 212 213 uint64_t Target; 214 if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, 215 InstrMeta.InstructionSize, Target)) 216 return nullptr; 217 218 NextMetaPtr = getInstruction(Target); 219 } else { 220 NextMetaPtr = 221 getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize); 222 } 223 224 if (!NextMetaPtr || !NextMetaPtr->Valid) 225 return nullptr; 226 227 return NextMetaPtr; 228 } 229 230 std::set<const Instr *> 231 FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const { 232 std::set<const Instr *> CFCrossReferences; 233 const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta); 234 235 if (PrevInstruction && canFallThrough(*PrevInstruction)) 236 CFCrossReferences.insert(PrevInstruction); 237 238 const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress); 239 if (TargetRefsKV == StaticBranchTargetings.end()) 240 return CFCrossReferences; 241 242 for (uint64_t SourceInstrAddress : TargetRefsKV->second) { 243 const auto &SourceInstrKV = Instructions.find(SourceInstrAddress); 244 if (SourceInstrKV == Instructions.end()) { 245 errs() << "Failed to find source instruction at address " 246 << format_hex(SourceInstrAddress, 2) 247 << " for the cross-reference to instruction at address " 248 << format_hex(InstrMeta.VMAddress, 2) << ".\n"; 249 continue; 250 } 251 252 CFCrossReferences.insert(&SourceInstrKV->second); 253 } 254 255 return CFCrossReferences; 256 } 257 258 const std::set<uint64_t> &FileAnalysis::getIndirectInstructions() const { 259 return IndirectInstructions; 260 } 261 262 const MCRegisterInfo *FileAnalysis::getRegisterInfo() const { 263 return RegisterInfo.get(); 264 } 265 266 const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); } 267 268 const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const { 269 return MIA.get(); 270 } 271 272 Expected<DIInliningInfo> FileAnalysis::symbolizeInlinedCode(uint64_t Address) { 273 assert(Symbolizer != nullptr && "Symbolizer is invalid."); 274 return Symbolizer->symbolizeInlinedCode(Object->getFileName(), Address); 275 } 276 277 CFIProtectionStatus 278 FileAnalysis::validateCFIProtection(const GraphResult &Graph) const { 279 const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress); 280 if (!InstrMetaPtr) 281 return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION; 282 283 const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode()); 284 if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo)) 285 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; 286 287 if (!usesRegisterOperand(*InstrMetaPtr)) 288 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; 289 290 if (!Graph.OrphanedNodes.empty()) 291 return CFIProtectionStatus::FAIL_ORPHANS; 292 293 for (const auto &BranchNode : Graph.ConditionalBranchNodes) { 294 if (!BranchNode.CFIProtection) 295 return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH; 296 } 297 298 if (indirectCFOperandClobber(Graph) != Graph.BaseAddress) 299 return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED; 300 301 return CFIProtectionStatus::PROTECTED; 302 } 303 304 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const { 305 assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty."); 306 307 // Get the set of registers we must check to ensure they're not clobbered. 308 const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress); 309 DenseSet<unsigned> RegisterNumbers; 310 for (const auto &Operand : IndirectCF.Instruction) { 311 if (Operand.isReg()) 312 RegisterNumbers.insert(Operand.getReg()); 313 } 314 assert(RegisterNumbers.size() && "Zero register operands on indirect CF."); 315 316 // Now check all branches to indirect CFs and ensure no clobbering happens. 317 for (const auto &Branch : Graph.ConditionalBranchNodes) { 318 uint64_t Node; 319 if (Branch.IndirectCFIsOnTargetPath) 320 Node = Branch.Target; 321 else 322 Node = Branch.Fallthrough; 323 324 // Some architectures (e.g., AArch64) cannot load in an indirect branch, so 325 // we allow them one load. 326 bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad(); 327 328 // We walk backwards from the indirect CF. It is the last node returned by 329 // Graph.flattenAddress, so we skip it since we already handled it. 330 DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers; 331 std::vector<uint64_t> Nodes = Graph.flattenAddress(Node); 332 for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) { 333 Node = *I; 334 const Instr &NodeInstr = getInstructionOrDie(Node); 335 const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode()); 336 337 for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end(); 338 RI != RE; ++RI) { 339 unsigned RegNum = *RI; 340 if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum, 341 *RegisterInfo)) { 342 if (!canLoad || !InstrDesc.mayLoad()) 343 return Node; 344 canLoad = false; 345 CurRegisterNumbers.erase(RI); 346 // Add the registers this load reads to those we check for clobbers. 347 for (unsigned i = InstrDesc.getNumDefs(), 348 e = InstrDesc.getNumOperands(); i != e; i++) { 349 const auto Operand = NodeInstr.Instruction.getOperand(i); 350 if (Operand.isReg()) 351 CurRegisterNumbers.insert(Operand.getReg()); 352 } 353 break; 354 } 355 } 356 } 357 } 358 359 return Graph.BaseAddress; 360 } 361 362 void FileAnalysis::printInstruction(const Instr &InstrMeta, 363 raw_ostream &OS) const { 364 Printer->printInst(&InstrMeta.Instruction, OS, "", *SubtargetInfo.get()); 365 } 366 367 Error FileAnalysis::initialiseDisassemblyMembers() { 368 std::string TripleName = ObjectTriple.getTriple(); 369 ArchName = ""; 370 MCPU = ""; 371 std::string ErrorString; 372 373 Symbolizer.reset(new LLVMSymbolizer()); 374 375 ObjectTarget = 376 TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString); 377 if (!ObjectTarget) 378 return make_error<UnsupportedDisassembly>( 379 (Twine("Couldn't find target \"") + ObjectTriple.getTriple() + 380 "\", failed with error: " + ErrorString) 381 .str()); 382 383 RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName)); 384 if (!RegisterInfo) 385 return make_error<UnsupportedDisassembly>( 386 "Failed to initialise RegisterInfo."); 387 388 AsmInfo.reset(ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName)); 389 if (!AsmInfo) 390 return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo."); 391 392 SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo( 393 TripleName, MCPU, Features.getString())); 394 if (!SubtargetInfo) 395 return make_error<UnsupportedDisassembly>( 396 "Failed to initialise SubtargetInfo."); 397 398 MII.reset(ObjectTarget->createMCInstrInfo()); 399 if (!MII) 400 return make_error<UnsupportedDisassembly>("Failed to initialise MII."); 401 402 Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI)); 403 404 Disassembler.reset( 405 ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context)); 406 407 if (!Disassembler) 408 return make_error<UnsupportedDisassembly>( 409 "No disassembler available for target"); 410 411 MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get())); 412 413 Printer.reset(ObjectTarget->createMCInstPrinter( 414 ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII, 415 *RegisterInfo)); 416 417 return Error::success(); 418 } 419 420 Error FileAnalysis::parseCodeSections() { 421 if (!IgnoreDWARFFlag) { 422 std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object); 423 if (!DWARF) 424 return make_error<StringError>("Could not create DWARF information.", 425 inconvertibleErrorCode()); 426 427 bool LineInfoValid = false; 428 429 for (auto &Unit : DWARF->compile_units()) { 430 const auto &LineTable = DWARF->getLineTableForUnit(Unit.get()); 431 if (LineTable && !LineTable->Rows.empty()) { 432 LineInfoValid = true; 433 break; 434 } 435 } 436 437 if (!LineInfoValid) 438 return make_error<StringError>( 439 "DWARF line information missing. Did you compile with '-g'?", 440 inconvertibleErrorCode()); 441 } 442 443 for (const object::SectionRef &Section : Object->sections()) { 444 // Ensure only executable sections get analysed. 445 if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR)) 446 continue; 447 448 // Avoid checking the PLT since it produces spurious failures on AArch64 449 // when ignoring DWARF data. 450 StringRef SectionName; 451 if (!Section.getName(SectionName) && SectionName == ".plt") 452 continue; 453 454 StringRef SectionContents; 455 if (Section.getContents(SectionContents)) 456 return make_error<StringError>("Failed to retrieve section contents", 457 inconvertibleErrorCode()); 458 459 ArrayRef<uint8_t> SectionBytes((const uint8_t *)SectionContents.data(), 460 Section.getSize()); 461 parseSectionContents(SectionBytes, Section.getAddress()); 462 } 463 return Error::success(); 464 } 465 466 void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes, 467 uint64_t SectionAddress) { 468 assert(Symbolizer && "Symbolizer is uninitialised."); 469 MCInst Instruction; 470 Instr InstrMeta; 471 uint64_t InstructionSize; 472 473 for (uint64_t Byte = 0; Byte < SectionBytes.size();) { 474 bool ValidInstruction = 475 Disassembler->getInstruction(Instruction, InstructionSize, 476 SectionBytes.drop_front(Byte), 0, nulls(), 477 outs()) == MCDisassembler::Success; 478 479 Byte += InstructionSize; 480 481 uint64_t VMAddress = SectionAddress + Byte - InstructionSize; 482 InstrMeta.Instruction = Instruction; 483 InstrMeta.VMAddress = VMAddress; 484 InstrMeta.InstructionSize = InstructionSize; 485 InstrMeta.Valid = ValidInstruction; 486 487 addInstruction(InstrMeta); 488 489 if (!ValidInstruction) 490 continue; 491 492 // Skip additional parsing for instructions that do not affect the control 493 // flow. 494 const auto &InstrDesc = MII->get(Instruction.getOpcode()); 495 if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo)) 496 continue; 497 498 uint64_t Target; 499 if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) { 500 // If the target can be evaluated, it's not indirect. 501 StaticBranchTargetings[Target].push_back(VMAddress); 502 continue; 503 } 504 505 if (!usesRegisterOperand(InstrMeta)) 506 continue; 507 508 if (InstrDesc.isReturn()) 509 continue; 510 511 // Check if this instruction exists in the range of the DWARF metadata. 512 if (!IgnoreDWARFFlag) { 513 auto LineInfo = 514 Symbolizer->symbolizeCode(Object->getFileName(), VMAddress); 515 if (!LineInfo) { 516 handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) { 517 errs() << "Symbolizer failed to get line: " << E.message() << "\n"; 518 }); 519 continue; 520 } 521 522 if (LineInfo->FileName == "<invalid>") 523 continue; 524 } 525 526 IndirectInstructions.insert(VMAddress); 527 } 528 } 529 530 void FileAnalysis::addInstruction(const Instr &Instruction) { 531 const auto &KV = 532 Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction)); 533 if (!KV.second) { 534 errs() << "Failed to add instruction at address " 535 << format_hex(Instruction.VMAddress, 2) 536 << ": Instruction at this address already exists.\n"; 537 exit(EXIT_FAILURE); 538 } 539 } 540 541 Error FileAnalysis::parseSymbolTable() { 542 // Functions that will trap on CFI violations. 543 SmallSet<StringRef, 4> TrapOnFailFunctions; 544 TrapOnFailFunctions.insert("__cfi_slowpath"); 545 TrapOnFailFunctions.insert("__cfi_slowpath_diag"); 546 TrapOnFailFunctions.insert("abort"); 547 548 // Look through the list of symbols for functions that will trap on CFI 549 // violations. 550 for (auto &Sym : Object->symbols()) { 551 auto SymNameOrErr = Sym.getName(); 552 if (!SymNameOrErr) 553 consumeError(SymNameOrErr.takeError()); 554 else if (TrapOnFailFunctions.count(*SymNameOrErr) > 0) { 555 auto AddrOrErr = Sym.getAddress(); 556 if (!AddrOrErr) 557 consumeError(AddrOrErr.takeError()); 558 else 559 TrapOnFailFunctionAddresses.insert(*AddrOrErr); 560 } 561 } 562 if (auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Object)) { 563 for (const auto &Addr : ElfObject->getPltAddresses()) { 564 object::SymbolRef Sym(Addr.first, Object); 565 auto SymNameOrErr = Sym.getName(); 566 if (!SymNameOrErr) 567 consumeError(SymNameOrErr.takeError()); 568 else if (TrapOnFailFunctions.count(*SymNameOrErr) > 0) 569 TrapOnFailFunctionAddresses.insert(Addr.second); 570 } 571 } 572 return Error::success(); 573 } 574 575 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) : Text(Text) {} 576 577 char UnsupportedDisassembly::ID; 578 void UnsupportedDisassembly::log(raw_ostream &OS) const { 579 OS << "Could not initialise disassembler: " << Text; 580 } 581 582 std::error_code UnsupportedDisassembly::convertToErrorCode() const { 583 return std::error_code(); 584 } 585 586 } // namespace cfi_verify 587 } // namespace llvm 588