1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===//
2 //
3 //                      The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "FileAnalysis.h"
11 #include "GraphBuilder.h"
12 
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
15 #include "llvm/MC/MCAsmInfo.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCInstPrinter.h"
20 #include "llvm/MC/MCInstrAnalysis.h"
21 #include "llvm/MC/MCInstrDesc.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCObjectFileInfo.h"
24 #include "llvm/MC/MCRegisterInfo.h"
25 #include "llvm/MC/MCSubtargetInfo.h"
26 #include "llvm/Object/Binary.h"
27 #include "llvm/Object/COFF.h"
28 #include "llvm/Object/ELFObjectFile.h"
29 #include "llvm/Object/ObjectFile.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Error.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/TargetSelect.h"
36 #include "llvm/Support/raw_ostream.h"
37 
38 
39 using Instr = llvm::cfi_verify::FileAnalysis::Instr;
40 using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer;
41 
42 namespace llvm {
43 namespace cfi_verify {
44 
45 bool IgnoreDWARFFlag;
46 
47 static cl::opt<bool, true> IgnoreDWARFArg(
48     "ignore-dwarf",
49     cl::desc(
50         "Ignore all DWARF data. This relaxes the requirements for all "
51         "statically linked libraries to have been compiled with '-g', but "
52         "will result in false positives for 'CFI unprotected' instructions."),
53     cl::location(IgnoreDWARFFlag), cl::init(false));
54 
55 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) {
56   switch (Status) {
57   case CFIProtectionStatus::PROTECTED:
58     return "PROTECTED";
59   case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF:
60     return "FAIL_NOT_INDIRECT_CF";
61   case CFIProtectionStatus::FAIL_ORPHANS:
62     return "FAIL_ORPHANS";
63   case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH:
64     return "FAIL_BAD_CONDITIONAL_BRANCH";
65   case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED:
66     return "FAIL_REGISTER_CLOBBERED";
67   case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION:
68     return "FAIL_INVALID_INSTRUCTION";
69   }
70   llvm_unreachable("Attempted to stringify an unknown enum value.");
71 }
72 
73 Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) {
74   // Open the filename provided.
75   Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
76       object::createBinary(Filename);
77   if (!BinaryOrErr)
78     return BinaryOrErr.takeError();
79 
80   // Construct the object and allow it to take ownership of the binary.
81   object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get());
82   FileAnalysis Analysis(std::move(Binary));
83 
84   Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary());
85   if (!Analysis.Object)
86     return make_error<UnsupportedDisassembly>("Failed to cast object");
87 
88   switch (Analysis.Object->getArch()) {
89     case Triple::x86:
90     case Triple::x86_64:
91     case Triple::aarch64:
92     case Triple::aarch64_be:
93       break;
94     default:
95       return make_error<UnsupportedDisassembly>("Unsupported architecture.");
96   }
97 
98   Analysis.ObjectTriple = Analysis.Object->makeTriple();
99   Analysis.Features = Analysis.Object->getFeatures();
100 
101   // Init the rest of the object.
102   if (auto InitResponse = Analysis.initialiseDisassemblyMembers())
103     return std::move(InitResponse);
104 
105   if (auto SectionParseResponse = Analysis.parseCodeSections())
106     return std::move(SectionParseResponse);
107 
108   if (auto SymbolTableParseResponse = Analysis.parseSymbolTable())
109     return std::move(SymbolTableParseResponse);
110 
111   return std::move(Analysis);
112 }
113 
114 FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary)
115     : Binary(std::move(Binary)) {}
116 
117 FileAnalysis::FileAnalysis(const Triple &ObjectTriple,
118                            const SubtargetFeatures &Features)
119     : ObjectTriple(ObjectTriple), Features(Features) {}
120 
121 const Instr *
122 FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const {
123   std::map<uint64_t, Instr>::const_iterator KV =
124       Instructions.find(InstrMeta.VMAddress);
125   if (KV == Instructions.end() || KV == Instructions.begin())
126     return nullptr;
127 
128   if (!(--KV)->second.Valid)
129     return nullptr;
130 
131   return &KV->second;
132 }
133 
134 const Instr *
135 FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const {
136   std::map<uint64_t, Instr>::const_iterator KV =
137       Instructions.find(InstrMeta.VMAddress);
138   if (KV == Instructions.end() || ++KV == Instructions.end())
139     return nullptr;
140 
141   if (!KV->second.Valid)
142     return nullptr;
143 
144   return &KV->second;
145 }
146 
147 bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const {
148   for (const auto &Operand : InstrMeta.Instruction) {
149     if (Operand.isReg())
150       return true;
151   }
152   return false;
153 }
154 
155 const Instr *FileAnalysis::getInstruction(uint64_t Address) const {
156   const auto &InstrKV = Instructions.find(Address);
157   if (InstrKV == Instructions.end())
158     return nullptr;
159 
160   return &InstrKV->second;
161 }
162 
163 const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const {
164   const auto &InstrKV = Instructions.find(Address);
165   assert(InstrKV != Instructions.end() && "Address doesn't exist.");
166   return InstrKV->second;
167 }
168 
169 bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const {
170   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
171   return InstrDesc.isTrap() || willTrapOnCFIViolation(InstrMeta);
172 }
173 
174 bool FileAnalysis::willTrapOnCFIViolation(const Instr &InstrMeta) const {
175   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
176   if (!InstrDesc.isCall())
177     return false;
178   uint64_t Target;
179   if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress,
180                            InstrMeta.InstructionSize, Target))
181     return false;
182   return TrapOnFailFunctionAddresses.count(Target) > 0;
183 }
184 
185 bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const {
186   if (!InstrMeta.Valid)
187     return false;
188 
189   if (isCFITrap(InstrMeta))
190     return false;
191 
192   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
193   if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo))
194     return InstrDesc.isConditionalBranch();
195 
196   return true;
197 }
198 
199 const Instr *
200 FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const {
201   if (!InstrMeta.Valid)
202     return nullptr;
203 
204   if (isCFITrap(InstrMeta))
205     return nullptr;
206 
207   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
208   const Instr *NextMetaPtr;
209   if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) {
210     if (InstrDesc.isConditionalBranch())
211       return nullptr;
212 
213     uint64_t Target;
214     if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress,
215                              InstrMeta.InstructionSize, Target))
216       return nullptr;
217 
218     NextMetaPtr = getInstruction(Target);
219   } else {
220     NextMetaPtr =
221         getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize);
222   }
223 
224   if (!NextMetaPtr || !NextMetaPtr->Valid)
225     return nullptr;
226 
227   return NextMetaPtr;
228 }
229 
230 std::set<const Instr *>
231 FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const {
232   std::set<const Instr *> CFCrossReferences;
233   const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta);
234 
235   if (PrevInstruction && canFallThrough(*PrevInstruction))
236     CFCrossReferences.insert(PrevInstruction);
237 
238   const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress);
239   if (TargetRefsKV == StaticBranchTargetings.end())
240     return CFCrossReferences;
241 
242   for (uint64_t SourceInstrAddress : TargetRefsKV->second) {
243     const auto &SourceInstrKV = Instructions.find(SourceInstrAddress);
244     if (SourceInstrKV == Instructions.end()) {
245       errs() << "Failed to find source instruction at address "
246              << format_hex(SourceInstrAddress, 2)
247              << " for the cross-reference to instruction at address "
248              << format_hex(InstrMeta.VMAddress, 2) << ".\n";
249       continue;
250     }
251 
252     CFCrossReferences.insert(&SourceInstrKV->second);
253   }
254 
255   return CFCrossReferences;
256 }
257 
258 const std::set<uint64_t> &FileAnalysis::getIndirectInstructions() const {
259   return IndirectInstructions;
260 }
261 
262 const MCRegisterInfo *FileAnalysis::getRegisterInfo() const {
263   return RegisterInfo.get();
264 }
265 
266 const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); }
267 
268 const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const {
269   return MIA.get();
270 }
271 
272 Expected<DIInliningInfo> FileAnalysis::symbolizeInlinedCode(uint64_t Address) {
273   assert(Symbolizer != nullptr && "Symbolizer is invalid.");
274   return Symbolizer->symbolizeInlinedCode(Object->getFileName(), Address);
275 }
276 
277 CFIProtectionStatus
278 FileAnalysis::validateCFIProtection(const GraphResult &Graph) const {
279   const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress);
280   if (!InstrMetaPtr)
281     return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION;
282 
283   const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode());
284   if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo))
285     return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
286 
287   if (!usesRegisterOperand(*InstrMetaPtr))
288     return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
289 
290   if (!Graph.OrphanedNodes.empty())
291     return CFIProtectionStatus::FAIL_ORPHANS;
292 
293   for (const auto &BranchNode : Graph.ConditionalBranchNodes) {
294     if (!BranchNode.CFIProtection)
295       return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH;
296   }
297 
298   if (indirectCFOperandClobber(Graph) != Graph.BaseAddress)
299     return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED;
300 
301   return CFIProtectionStatus::PROTECTED;
302 }
303 
304 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const {
305   assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty.");
306 
307   // Get the set of registers we must check to ensure they're not clobbered.
308   const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress);
309   DenseSet<unsigned> RegisterNumbers;
310   for (const auto &Operand : IndirectCF.Instruction) {
311     if (Operand.isReg())
312       RegisterNumbers.insert(Operand.getReg());
313   }
314   assert(RegisterNumbers.size() && "Zero register operands on indirect CF.");
315 
316   // Now check all branches to indirect CFs and ensure no clobbering happens.
317   for (const auto &Branch : Graph.ConditionalBranchNodes) {
318     uint64_t Node;
319     if (Branch.IndirectCFIsOnTargetPath)
320       Node = Branch.Target;
321     else
322       Node = Branch.Fallthrough;
323 
324     // Some architectures (e.g., AArch64) cannot load in an indirect branch, so
325     // we allow them one load.
326     bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad();
327 
328     // We walk backwards from the indirect CF.  It is the last node returned by
329     // Graph.flattenAddress, so we skip it since we already handled it.
330     DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers;
331     std::vector<uint64_t> Nodes = Graph.flattenAddress(Node);
332     for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) {
333       Node = *I;
334       const Instr &NodeInstr = getInstructionOrDie(Node);
335       const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode());
336 
337       for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end();
338            RI != RE; ++RI) {
339         unsigned RegNum = *RI;
340         if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum,
341                                       *RegisterInfo)) {
342           if (!canLoad || !InstrDesc.mayLoad())
343             return Node;
344           canLoad = false;
345           CurRegisterNumbers.erase(RI);
346           // Add the registers this load reads to those we check for clobbers.
347           for (unsigned i = InstrDesc.getNumDefs(),
348                         e = InstrDesc.getNumOperands(); i != e; i++) {
349             const auto Operand = NodeInstr.Instruction.getOperand(i);
350             if (Operand.isReg())
351               CurRegisterNumbers.insert(Operand.getReg());
352           }
353           break;
354         }
355       }
356     }
357   }
358 
359   return Graph.BaseAddress;
360 }
361 
362 void FileAnalysis::printInstruction(const Instr &InstrMeta,
363                                     raw_ostream &OS) const {
364   Printer->printInst(&InstrMeta.Instruction, OS, "", *SubtargetInfo.get());
365 }
366 
367 Error FileAnalysis::initialiseDisassemblyMembers() {
368   std::string TripleName = ObjectTriple.getTriple();
369   ArchName = "";
370   MCPU = "";
371   std::string ErrorString;
372 
373   Symbolizer.reset(new LLVMSymbolizer());
374 
375   ObjectTarget =
376       TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString);
377   if (!ObjectTarget)
378     return make_error<UnsupportedDisassembly>(
379         (Twine("Couldn't find target \"") + ObjectTriple.getTriple() +
380          "\", failed with error: " + ErrorString)
381             .str());
382 
383   RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName));
384   if (!RegisterInfo)
385     return make_error<UnsupportedDisassembly>(
386         "Failed to initialise RegisterInfo.");
387 
388   AsmInfo.reset(ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName));
389   if (!AsmInfo)
390     return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo.");
391 
392   SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo(
393       TripleName, MCPU, Features.getString()));
394   if (!SubtargetInfo)
395     return make_error<UnsupportedDisassembly>(
396         "Failed to initialise SubtargetInfo.");
397 
398   MII.reset(ObjectTarget->createMCInstrInfo());
399   if (!MII)
400     return make_error<UnsupportedDisassembly>("Failed to initialise MII.");
401 
402   Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI));
403 
404   Disassembler.reset(
405       ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context));
406 
407   if (!Disassembler)
408     return make_error<UnsupportedDisassembly>(
409         "No disassembler available for target");
410 
411   MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get()));
412 
413   Printer.reset(ObjectTarget->createMCInstPrinter(
414       ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII,
415       *RegisterInfo));
416 
417   return Error::success();
418 }
419 
420 Error FileAnalysis::parseCodeSections() {
421   if (!IgnoreDWARFFlag) {
422     std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object);
423     if (!DWARF)
424       return make_error<StringError>("Could not create DWARF information.",
425                                      inconvertibleErrorCode());
426 
427     bool LineInfoValid = false;
428 
429     for (auto &Unit : DWARF->compile_units()) {
430       const auto &LineTable = DWARF->getLineTableForUnit(Unit.get());
431       if (LineTable && !LineTable->Rows.empty()) {
432         LineInfoValid = true;
433         break;
434       }
435     }
436 
437     if (!LineInfoValid)
438       return make_error<StringError>(
439           "DWARF line information missing. Did you compile with '-g'?",
440           inconvertibleErrorCode());
441   }
442 
443   for (const object::SectionRef &Section : Object->sections()) {
444     // Ensure only executable sections get analysed.
445     if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR))
446       continue;
447 
448     // Avoid checking the PLT since it produces spurious failures on AArch64
449     // when ignoring DWARF data.
450     StringRef SectionName;
451     if (!Section.getName(SectionName) && SectionName == ".plt")
452       continue;
453 
454     StringRef SectionContents;
455     if (Section.getContents(SectionContents))
456       return make_error<StringError>("Failed to retrieve section contents",
457                                      inconvertibleErrorCode());
458 
459     ArrayRef<uint8_t> SectionBytes((const uint8_t *)SectionContents.data(),
460                                    Section.getSize());
461     parseSectionContents(SectionBytes, Section.getAddress());
462   }
463   return Error::success();
464 }
465 
466 void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes,
467                                         uint64_t SectionAddress) {
468   assert(Symbolizer && "Symbolizer is uninitialised.");
469   MCInst Instruction;
470   Instr InstrMeta;
471   uint64_t InstructionSize;
472 
473   for (uint64_t Byte = 0; Byte < SectionBytes.size();) {
474     bool ValidInstruction =
475         Disassembler->getInstruction(Instruction, InstructionSize,
476                                      SectionBytes.drop_front(Byte), 0, nulls(),
477                                      outs()) == MCDisassembler::Success;
478 
479     Byte += InstructionSize;
480 
481     uint64_t VMAddress = SectionAddress + Byte - InstructionSize;
482     InstrMeta.Instruction = Instruction;
483     InstrMeta.VMAddress = VMAddress;
484     InstrMeta.InstructionSize = InstructionSize;
485     InstrMeta.Valid = ValidInstruction;
486 
487     addInstruction(InstrMeta);
488 
489     if (!ValidInstruction)
490       continue;
491 
492     // Skip additional parsing for instructions that do not affect the control
493     // flow.
494     const auto &InstrDesc = MII->get(Instruction.getOpcode());
495     if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo))
496       continue;
497 
498     uint64_t Target;
499     if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) {
500       // If the target can be evaluated, it's not indirect.
501       StaticBranchTargetings[Target].push_back(VMAddress);
502       continue;
503     }
504 
505     if (!usesRegisterOperand(InstrMeta))
506       continue;
507 
508     if (InstrDesc.isReturn())
509       continue;
510 
511     // Check if this instruction exists in the range of the DWARF metadata.
512     if (!IgnoreDWARFFlag) {
513       auto LineInfo =
514           Symbolizer->symbolizeCode(Object->getFileName(), VMAddress);
515       if (!LineInfo) {
516         handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) {
517           errs() << "Symbolizer failed to get line: " << E.message() << "\n";
518         });
519         continue;
520       }
521 
522       if (LineInfo->FileName == "<invalid>")
523         continue;
524     }
525 
526     IndirectInstructions.insert(VMAddress);
527   }
528 }
529 
530 void FileAnalysis::addInstruction(const Instr &Instruction) {
531   const auto &KV =
532       Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction));
533   if (!KV.second) {
534     errs() << "Failed to add instruction at address "
535            << format_hex(Instruction.VMAddress, 2)
536            << ": Instruction at this address already exists.\n";
537     exit(EXIT_FAILURE);
538   }
539 }
540 
541 Error FileAnalysis::parseSymbolTable() {
542   // Functions that will trap on CFI violations.
543   SmallSet<StringRef, 4> TrapOnFailFunctions;
544   TrapOnFailFunctions.insert("__cfi_slowpath");
545   TrapOnFailFunctions.insert("__cfi_slowpath_diag");
546   TrapOnFailFunctions.insert("abort");
547 
548   // Look through the list of symbols for functions that will trap on CFI
549   // violations.
550   for (auto &Sym : Object->symbols()) {
551     auto SymNameOrErr = Sym.getName();
552     if (!SymNameOrErr)
553       consumeError(SymNameOrErr.takeError());
554     else if (TrapOnFailFunctions.count(*SymNameOrErr) > 0) {
555       auto AddrOrErr = Sym.getAddress();
556       if (!AddrOrErr)
557         consumeError(AddrOrErr.takeError());
558       else
559         TrapOnFailFunctionAddresses.insert(*AddrOrErr);
560     }
561   }
562   if (auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Object)) {
563     for (const auto &Addr : ElfObject->getPltAddresses()) {
564       object::SymbolRef Sym(Addr.first, Object);
565       auto SymNameOrErr = Sym.getName();
566       if (!SymNameOrErr)
567         consumeError(SymNameOrErr.takeError());
568       else if (TrapOnFailFunctions.count(*SymNameOrErr) > 0)
569         TrapOnFailFunctionAddresses.insert(Addr.second);
570     }
571   }
572   return Error::success();
573 }
574 
575 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) : Text(Text) {}
576 
577 char UnsupportedDisassembly::ID;
578 void UnsupportedDisassembly::log(raw_ostream &OS) const {
579   OS << "Could not initialise disassembler: " << Text;
580 }
581 
582 std::error_code UnsupportedDisassembly::convertToErrorCode() const {
583   return std::error_code();
584 }
585 
586 } // namespace cfi_verify
587 } // namespace llvm
588