1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "FileAnalysis.h" 10 #include "GraphBuilder.h" 11 12 #include "llvm/BinaryFormat/ELF.h" 13 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 14 #include "llvm/MC/MCAsmInfo.h" 15 #include "llvm/MC/MCContext.h" 16 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 17 #include "llvm/MC/MCInst.h" 18 #include "llvm/MC/MCInstPrinter.h" 19 #include "llvm/MC/MCInstrAnalysis.h" 20 #include "llvm/MC/MCInstrDesc.h" 21 #include "llvm/MC/MCInstrInfo.h" 22 #include "llvm/MC/MCObjectFileInfo.h" 23 #include "llvm/MC/MCRegisterInfo.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/Object/Binary.h" 26 #include "llvm/Object/COFF.h" 27 #include "llvm/Object/ELFObjectFile.h" 28 #include "llvm/Object/ObjectFile.h" 29 #include "llvm/Support/Casting.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Error.h" 32 #include "llvm/Support/MemoryBuffer.h" 33 #include "llvm/Support/TargetRegistry.h" 34 #include "llvm/Support/TargetSelect.h" 35 #include "llvm/Support/raw_ostream.h" 36 37 38 using Instr = llvm::cfi_verify::FileAnalysis::Instr; 39 using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer; 40 41 namespace llvm { 42 namespace cfi_verify { 43 44 bool IgnoreDWARFFlag; 45 46 static cl::opt<bool, true> IgnoreDWARFArg( 47 "ignore-dwarf", 48 cl::desc( 49 "Ignore all DWARF data. This relaxes the requirements for all " 50 "statically linked libraries to have been compiled with '-g', but " 51 "will result in false positives for 'CFI unprotected' instructions."), 52 cl::location(IgnoreDWARFFlag), cl::init(false)); 53 54 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) { 55 switch (Status) { 56 case CFIProtectionStatus::PROTECTED: 57 return "PROTECTED"; 58 case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF: 59 return "FAIL_NOT_INDIRECT_CF"; 60 case CFIProtectionStatus::FAIL_ORPHANS: 61 return "FAIL_ORPHANS"; 62 case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH: 63 return "FAIL_BAD_CONDITIONAL_BRANCH"; 64 case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED: 65 return "FAIL_REGISTER_CLOBBERED"; 66 case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION: 67 return "FAIL_INVALID_INSTRUCTION"; 68 } 69 llvm_unreachable("Attempted to stringify an unknown enum value."); 70 } 71 72 Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) { 73 // Open the filename provided. 74 Expected<object::OwningBinary<object::Binary>> BinaryOrErr = 75 object::createBinary(Filename); 76 if (!BinaryOrErr) 77 return BinaryOrErr.takeError(); 78 79 // Construct the object and allow it to take ownership of the binary. 80 object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get()); 81 FileAnalysis Analysis(std::move(Binary)); 82 83 Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary()); 84 if (!Analysis.Object) 85 return make_error<UnsupportedDisassembly>("Failed to cast object"); 86 87 switch (Analysis.Object->getArch()) { 88 case Triple::x86: 89 case Triple::x86_64: 90 case Triple::aarch64: 91 case Triple::aarch64_be: 92 break; 93 default: 94 return make_error<UnsupportedDisassembly>("Unsupported architecture."); 95 } 96 97 Analysis.ObjectTriple = Analysis.Object->makeTriple(); 98 Analysis.Features = Analysis.Object->getFeatures(); 99 100 // Init the rest of the object. 101 if (auto InitResponse = Analysis.initialiseDisassemblyMembers()) 102 return std::move(InitResponse); 103 104 if (auto SectionParseResponse = Analysis.parseCodeSections()) 105 return std::move(SectionParseResponse); 106 107 if (auto SymbolTableParseResponse = Analysis.parseSymbolTable()) 108 return std::move(SymbolTableParseResponse); 109 110 return std::move(Analysis); 111 } 112 113 FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary) 114 : Binary(std::move(Binary)) {} 115 116 FileAnalysis::FileAnalysis(const Triple &ObjectTriple, 117 const SubtargetFeatures &Features) 118 : ObjectTriple(ObjectTriple), Features(Features) {} 119 120 const Instr * 121 FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const { 122 std::map<uint64_t, Instr>::const_iterator KV = 123 Instructions.find(InstrMeta.VMAddress); 124 if (KV == Instructions.end() || KV == Instructions.begin()) 125 return nullptr; 126 127 if (!(--KV)->second.Valid) 128 return nullptr; 129 130 return &KV->second; 131 } 132 133 const Instr * 134 FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const { 135 std::map<uint64_t, Instr>::const_iterator KV = 136 Instructions.find(InstrMeta.VMAddress); 137 if (KV == Instructions.end() || ++KV == Instructions.end()) 138 return nullptr; 139 140 if (!KV->second.Valid) 141 return nullptr; 142 143 return &KV->second; 144 } 145 146 bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const { 147 for (const auto &Operand : InstrMeta.Instruction) { 148 if (Operand.isReg()) 149 return true; 150 } 151 return false; 152 } 153 154 const Instr *FileAnalysis::getInstruction(uint64_t Address) const { 155 const auto &InstrKV = Instructions.find(Address); 156 if (InstrKV == Instructions.end()) 157 return nullptr; 158 159 return &InstrKV->second; 160 } 161 162 const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const { 163 const auto &InstrKV = Instructions.find(Address); 164 assert(InstrKV != Instructions.end() && "Address doesn't exist."); 165 return InstrKV->second; 166 } 167 168 bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const { 169 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 170 return InstrDesc.isTrap() || willTrapOnCFIViolation(InstrMeta); 171 } 172 173 bool FileAnalysis::willTrapOnCFIViolation(const Instr &InstrMeta) const { 174 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 175 if (!InstrDesc.isCall()) 176 return false; 177 uint64_t Target; 178 if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, 179 InstrMeta.InstructionSize, Target)) 180 return false; 181 return TrapOnFailFunctionAddresses.count(Target) > 0; 182 } 183 184 bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const { 185 if (!InstrMeta.Valid) 186 return false; 187 188 if (isCFITrap(InstrMeta)) 189 return false; 190 191 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 192 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) 193 return InstrDesc.isConditionalBranch(); 194 195 return true; 196 } 197 198 const Instr * 199 FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const { 200 if (!InstrMeta.Valid) 201 return nullptr; 202 203 if (isCFITrap(InstrMeta)) 204 return nullptr; 205 206 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 207 const Instr *NextMetaPtr; 208 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) { 209 if (InstrDesc.isConditionalBranch()) 210 return nullptr; 211 212 uint64_t Target; 213 if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, 214 InstrMeta.InstructionSize, Target)) 215 return nullptr; 216 217 NextMetaPtr = getInstruction(Target); 218 } else { 219 NextMetaPtr = 220 getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize); 221 } 222 223 if (!NextMetaPtr || !NextMetaPtr->Valid) 224 return nullptr; 225 226 return NextMetaPtr; 227 } 228 229 std::set<const Instr *> 230 FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const { 231 std::set<const Instr *> CFCrossReferences; 232 const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta); 233 234 if (PrevInstruction && canFallThrough(*PrevInstruction)) 235 CFCrossReferences.insert(PrevInstruction); 236 237 const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress); 238 if (TargetRefsKV == StaticBranchTargetings.end()) 239 return CFCrossReferences; 240 241 for (uint64_t SourceInstrAddress : TargetRefsKV->second) { 242 const auto &SourceInstrKV = Instructions.find(SourceInstrAddress); 243 if (SourceInstrKV == Instructions.end()) { 244 errs() << "Failed to find source instruction at address " 245 << format_hex(SourceInstrAddress, 2) 246 << " for the cross-reference to instruction at address " 247 << format_hex(InstrMeta.VMAddress, 2) << ".\n"; 248 continue; 249 } 250 251 CFCrossReferences.insert(&SourceInstrKV->second); 252 } 253 254 return CFCrossReferences; 255 } 256 257 const std::set<uint64_t> &FileAnalysis::getIndirectInstructions() const { 258 return IndirectInstructions; 259 } 260 261 const MCRegisterInfo *FileAnalysis::getRegisterInfo() const { 262 return RegisterInfo.get(); 263 } 264 265 const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); } 266 267 const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const { 268 return MIA.get(); 269 } 270 271 Expected<DIInliningInfo> FileAnalysis::symbolizeInlinedCode(uint64_t Address) { 272 assert(Symbolizer != nullptr && "Symbolizer is invalid."); 273 return Symbolizer->symbolizeInlinedCode(Object->getFileName(), Address); 274 } 275 276 CFIProtectionStatus 277 FileAnalysis::validateCFIProtection(const GraphResult &Graph) const { 278 const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress); 279 if (!InstrMetaPtr) 280 return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION; 281 282 const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode()); 283 if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo)) 284 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; 285 286 if (!usesRegisterOperand(*InstrMetaPtr)) 287 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; 288 289 if (!Graph.OrphanedNodes.empty()) 290 return CFIProtectionStatus::FAIL_ORPHANS; 291 292 for (const auto &BranchNode : Graph.ConditionalBranchNodes) { 293 if (!BranchNode.CFIProtection) 294 return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH; 295 } 296 297 if (indirectCFOperandClobber(Graph) != Graph.BaseAddress) 298 return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED; 299 300 return CFIProtectionStatus::PROTECTED; 301 } 302 303 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const { 304 assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty."); 305 306 // Get the set of registers we must check to ensure they're not clobbered. 307 const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress); 308 DenseSet<unsigned> RegisterNumbers; 309 for (const auto &Operand : IndirectCF.Instruction) { 310 if (Operand.isReg()) 311 RegisterNumbers.insert(Operand.getReg()); 312 } 313 assert(RegisterNumbers.size() && "Zero register operands on indirect CF."); 314 315 // Now check all branches to indirect CFs and ensure no clobbering happens. 316 for (const auto &Branch : Graph.ConditionalBranchNodes) { 317 uint64_t Node; 318 if (Branch.IndirectCFIsOnTargetPath) 319 Node = Branch.Target; 320 else 321 Node = Branch.Fallthrough; 322 323 // Some architectures (e.g., AArch64) cannot load in an indirect branch, so 324 // we allow them one load. 325 bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad(); 326 327 // We walk backwards from the indirect CF. It is the last node returned by 328 // Graph.flattenAddress, so we skip it since we already handled it. 329 DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers; 330 std::vector<uint64_t> Nodes = Graph.flattenAddress(Node); 331 for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) { 332 Node = *I; 333 const Instr &NodeInstr = getInstructionOrDie(Node); 334 const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode()); 335 336 for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end(); 337 RI != RE; ++RI) { 338 unsigned RegNum = *RI; 339 if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum, 340 *RegisterInfo)) { 341 if (!canLoad || !InstrDesc.mayLoad()) 342 return Node; 343 canLoad = false; 344 CurRegisterNumbers.erase(RI); 345 // Add the registers this load reads to those we check for clobbers. 346 for (unsigned i = InstrDesc.getNumDefs(), 347 e = InstrDesc.getNumOperands(); i != e; i++) { 348 const auto Operand = NodeInstr.Instruction.getOperand(i); 349 if (Operand.isReg()) 350 CurRegisterNumbers.insert(Operand.getReg()); 351 } 352 break; 353 } 354 } 355 } 356 } 357 358 return Graph.BaseAddress; 359 } 360 361 void FileAnalysis::printInstruction(const Instr &InstrMeta, 362 raw_ostream &OS) const { 363 Printer->printInst(&InstrMeta.Instruction, OS, "", *SubtargetInfo.get()); 364 } 365 366 Error FileAnalysis::initialiseDisassemblyMembers() { 367 std::string TripleName = ObjectTriple.getTriple(); 368 ArchName = ""; 369 MCPU = ""; 370 std::string ErrorString; 371 372 Symbolizer.reset(new LLVMSymbolizer()); 373 374 ObjectTarget = 375 TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString); 376 if (!ObjectTarget) 377 return make_error<UnsupportedDisassembly>( 378 (Twine("Couldn't find target \"") + ObjectTriple.getTriple() + 379 "\", failed with error: " + ErrorString) 380 .str()); 381 382 RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName)); 383 if (!RegisterInfo) 384 return make_error<UnsupportedDisassembly>( 385 "Failed to initialise RegisterInfo."); 386 387 AsmInfo.reset(ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName)); 388 if (!AsmInfo) 389 return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo."); 390 391 SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo( 392 TripleName, MCPU, Features.getString())); 393 if (!SubtargetInfo) 394 return make_error<UnsupportedDisassembly>( 395 "Failed to initialise SubtargetInfo."); 396 397 MII.reset(ObjectTarget->createMCInstrInfo()); 398 if (!MII) 399 return make_error<UnsupportedDisassembly>("Failed to initialise MII."); 400 401 Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI)); 402 403 Disassembler.reset( 404 ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context)); 405 406 if (!Disassembler) 407 return make_error<UnsupportedDisassembly>( 408 "No disassembler available for target"); 409 410 MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get())); 411 412 Printer.reset(ObjectTarget->createMCInstPrinter( 413 ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII, 414 *RegisterInfo)); 415 416 return Error::success(); 417 } 418 419 Error FileAnalysis::parseCodeSections() { 420 if (!IgnoreDWARFFlag) { 421 std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object); 422 if (!DWARF) 423 return make_error<StringError>("Could not create DWARF information.", 424 inconvertibleErrorCode()); 425 426 bool LineInfoValid = false; 427 428 for (auto &Unit : DWARF->compile_units()) { 429 const auto &LineTable = DWARF->getLineTableForUnit(Unit.get()); 430 if (LineTable && !LineTable->Rows.empty()) { 431 LineInfoValid = true; 432 break; 433 } 434 } 435 436 if (!LineInfoValid) 437 return make_error<StringError>( 438 "DWARF line information missing. Did you compile with '-g'?", 439 inconvertibleErrorCode()); 440 } 441 442 for (const object::SectionRef &Section : Object->sections()) { 443 // Ensure only executable sections get analysed. 444 if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR)) 445 continue; 446 447 // Avoid checking the PLT since it produces spurious failures on AArch64 448 // when ignoring DWARF data. 449 StringRef SectionName; 450 if (!Section.getName(SectionName) && SectionName == ".plt") 451 continue; 452 453 StringRef SectionContents; 454 if (Section.getContents(SectionContents)) 455 return make_error<StringError>("Failed to retrieve section contents", 456 inconvertibleErrorCode()); 457 458 ArrayRef<uint8_t> SectionBytes((const uint8_t *)SectionContents.data(), 459 Section.getSize()); 460 parseSectionContents(SectionBytes, Section.getAddress()); 461 } 462 return Error::success(); 463 } 464 465 void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes, 466 uint64_t SectionAddress) { 467 assert(Symbolizer && "Symbolizer is uninitialised."); 468 MCInst Instruction; 469 Instr InstrMeta; 470 uint64_t InstructionSize; 471 472 for (uint64_t Byte = 0; Byte < SectionBytes.size();) { 473 bool ValidInstruction = 474 Disassembler->getInstruction(Instruction, InstructionSize, 475 SectionBytes.drop_front(Byte), 0, nulls(), 476 outs()) == MCDisassembler::Success; 477 478 Byte += InstructionSize; 479 480 uint64_t VMAddress = SectionAddress + Byte - InstructionSize; 481 InstrMeta.Instruction = Instruction; 482 InstrMeta.VMAddress = VMAddress; 483 InstrMeta.InstructionSize = InstructionSize; 484 InstrMeta.Valid = ValidInstruction; 485 486 addInstruction(InstrMeta); 487 488 if (!ValidInstruction) 489 continue; 490 491 // Skip additional parsing for instructions that do not affect the control 492 // flow. 493 const auto &InstrDesc = MII->get(Instruction.getOpcode()); 494 if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo)) 495 continue; 496 497 uint64_t Target; 498 if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) { 499 // If the target can be evaluated, it's not indirect. 500 StaticBranchTargetings[Target].push_back(VMAddress); 501 continue; 502 } 503 504 if (!usesRegisterOperand(InstrMeta)) 505 continue; 506 507 if (InstrDesc.isReturn()) 508 continue; 509 510 // Check if this instruction exists in the range of the DWARF metadata. 511 if (!IgnoreDWARFFlag) { 512 auto LineInfo = 513 Symbolizer->symbolizeCode(Object->getFileName(), VMAddress); 514 if (!LineInfo) { 515 handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) { 516 errs() << "Symbolizer failed to get line: " << E.message() << "\n"; 517 }); 518 continue; 519 } 520 521 if (LineInfo->FileName == "<invalid>") 522 continue; 523 } 524 525 IndirectInstructions.insert(VMAddress); 526 } 527 } 528 529 void FileAnalysis::addInstruction(const Instr &Instruction) { 530 const auto &KV = 531 Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction)); 532 if (!KV.second) { 533 errs() << "Failed to add instruction at address " 534 << format_hex(Instruction.VMAddress, 2) 535 << ": Instruction at this address already exists.\n"; 536 exit(EXIT_FAILURE); 537 } 538 } 539 540 Error FileAnalysis::parseSymbolTable() { 541 // Functions that will trap on CFI violations. 542 SmallSet<StringRef, 4> TrapOnFailFunctions; 543 TrapOnFailFunctions.insert("__cfi_slowpath"); 544 TrapOnFailFunctions.insert("__cfi_slowpath_diag"); 545 TrapOnFailFunctions.insert("abort"); 546 547 // Look through the list of symbols for functions that will trap on CFI 548 // violations. 549 for (auto &Sym : Object->symbols()) { 550 auto SymNameOrErr = Sym.getName(); 551 if (!SymNameOrErr) 552 consumeError(SymNameOrErr.takeError()); 553 else if (TrapOnFailFunctions.count(*SymNameOrErr) > 0) { 554 auto AddrOrErr = Sym.getAddress(); 555 if (!AddrOrErr) 556 consumeError(AddrOrErr.takeError()); 557 else 558 TrapOnFailFunctionAddresses.insert(*AddrOrErr); 559 } 560 } 561 if (auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Object)) { 562 for (const auto &Addr : ElfObject->getPltAddresses()) { 563 object::SymbolRef Sym(Addr.first, Object); 564 auto SymNameOrErr = Sym.getName(); 565 if (!SymNameOrErr) 566 consumeError(SymNameOrErr.takeError()); 567 else if (TrapOnFailFunctions.count(*SymNameOrErr) > 0) 568 TrapOnFailFunctionAddresses.insert(Addr.second); 569 } 570 } 571 return Error::success(); 572 } 573 574 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) : Text(Text) {} 575 576 char UnsupportedDisassembly::ID; 577 void UnsupportedDisassembly::log(raw_ostream &OS) const { 578 OS << "Could not initialise disassembler: " << Text; 579 } 580 581 std::error_code UnsupportedDisassembly::convertToErrorCode() const { 582 return std::error_code(); 583 } 584 585 } // namespace cfi_verify 586 } // namespace llvm 587