1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===//
2 //
3 //                      The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "FileAnalysis.h"
11 #include "GraphBuilder.h"
12 
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
15 #include "llvm/MC/MCAsmInfo.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCInstPrinter.h"
20 #include "llvm/MC/MCInstrAnalysis.h"
21 #include "llvm/MC/MCInstrDesc.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCObjectFileInfo.h"
24 #include "llvm/MC/MCRegisterInfo.h"
25 #include "llvm/MC/MCSubtargetInfo.h"
26 #include "llvm/Object/Binary.h"
27 #include "llvm/Object/COFF.h"
28 #include "llvm/Object/ELFObjectFile.h"
29 #include "llvm/Object/ObjectFile.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Error.h"
33 #include "llvm/Support/FormatVariadic.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/TargetRegistry.h"
36 #include "llvm/Support/TargetSelect.h"
37 #include "llvm/Support/raw_ostream.h"
38 
39 #include <functional>
40 
41 using Instr = llvm::cfi_verify::FileAnalysis::Instr;
42 using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer;
43 
44 namespace llvm {
45 namespace cfi_verify {
46 
47 bool IgnoreDWARFFlag;
48 
49 static cl::opt<bool, true> IgnoreDWARFArg(
50     "ignore-dwarf",
51     cl::desc(
52         "Ignore all DWARF data. This relaxes the requirements for all "
53         "statically linked libraries to have been compiled with '-g', but "
54         "will result in false positives for 'CFI unprotected' instructions."),
55     cl::location(IgnoreDWARFFlag), cl::init(false));
56 
57 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) {
58   switch (Status) {
59   case CFIProtectionStatus::PROTECTED:
60     return "PROTECTED";
61   case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF:
62     return "FAIL_NOT_INDIRECT_CF";
63   case CFIProtectionStatus::FAIL_ORPHANS:
64     return "FAIL_ORPHANS";
65   case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH:
66     return "FAIL_BAD_CONDITIONAL_BRANCH";
67   case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED:
68     return "FAIL_REGISTER_CLOBBERED";
69   case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION:
70     return "FAIL_INVALID_INSTRUCTION";
71   }
72   llvm_unreachable("Attempted to stringify an unknown enum value.");
73 }
74 
75 Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) {
76   // Open the filename provided.
77   Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
78       object::createBinary(Filename);
79   if (!BinaryOrErr)
80     return BinaryOrErr.takeError();
81 
82   // Construct the object and allow it to take ownership of the binary.
83   object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get());
84   FileAnalysis Analysis(std::move(Binary));
85 
86   Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary());
87   if (!Analysis.Object)
88     return make_error<UnsupportedDisassembly>("Failed to cast object");
89 
90   Analysis.ObjectTriple = Analysis.Object->makeTriple();
91   Analysis.Features = Analysis.Object->getFeatures();
92 
93   // Init the rest of the object.
94   if (auto InitResponse = Analysis.initialiseDisassemblyMembers())
95     return std::move(InitResponse);
96 
97   if (auto SectionParseResponse = Analysis.parseCodeSections())
98     return std::move(SectionParseResponse);
99 
100   return std::move(Analysis);
101 }
102 
103 FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary)
104     : Binary(std::move(Binary)) {}
105 
106 FileAnalysis::FileAnalysis(const Triple &ObjectTriple,
107                            const SubtargetFeatures &Features)
108     : ObjectTriple(ObjectTriple), Features(Features) {}
109 
110 const Instr *
111 FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const {
112   std::map<uint64_t, Instr>::const_iterator KV =
113       Instructions.find(InstrMeta.VMAddress);
114   if (KV == Instructions.end() || KV == Instructions.begin())
115     return nullptr;
116 
117   if (!(--KV)->second.Valid)
118     return nullptr;
119 
120   return &KV->second;
121 }
122 
123 const Instr *
124 FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const {
125   std::map<uint64_t, Instr>::const_iterator KV =
126       Instructions.find(InstrMeta.VMAddress);
127   if (KV == Instructions.end() || ++KV == Instructions.end())
128     return nullptr;
129 
130   if (!KV->second.Valid)
131     return nullptr;
132 
133   return &KV->second;
134 }
135 
136 bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const {
137   for (const auto &Operand : InstrMeta.Instruction) {
138     if (Operand.isReg())
139       return true;
140   }
141   return false;
142 }
143 
144 const Instr *FileAnalysis::getInstruction(uint64_t Address) const {
145   const auto &InstrKV = Instructions.find(Address);
146   if (InstrKV == Instructions.end())
147     return nullptr;
148 
149   return &InstrKV->second;
150 }
151 
152 const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const {
153   const auto &InstrKV = Instructions.find(Address);
154   assert(InstrKV != Instructions.end() && "Address doesn't exist.");
155   return InstrKV->second;
156 }
157 
158 bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const {
159   return MII->getName(InstrMeta.Instruction.getOpcode()) == "TRAP";
160 }
161 
162 bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const {
163   if (!InstrMeta.Valid)
164     return false;
165 
166   if (isCFITrap(InstrMeta))
167     return false;
168 
169   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
170   if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo))
171     return InstrDesc.isConditionalBranch();
172 
173   return true;
174 }
175 
176 const Instr *
177 FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const {
178   if (!InstrMeta.Valid)
179     return nullptr;
180 
181   if (isCFITrap(InstrMeta))
182     return nullptr;
183 
184   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
185   const Instr *NextMetaPtr;
186   if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) {
187     if (InstrDesc.isConditionalBranch())
188       return nullptr;
189 
190     uint64_t Target;
191     if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress,
192                              InstrMeta.InstructionSize, Target))
193       return nullptr;
194 
195     NextMetaPtr = getInstruction(Target);
196   } else {
197     NextMetaPtr =
198         getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize);
199   }
200 
201   if (!NextMetaPtr || !NextMetaPtr->Valid)
202     return nullptr;
203 
204   return NextMetaPtr;
205 }
206 
207 std::set<const Instr *>
208 FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const {
209   std::set<const Instr *> CFCrossReferences;
210   const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta);
211 
212   if (PrevInstruction && canFallThrough(*PrevInstruction))
213     CFCrossReferences.insert(PrevInstruction);
214 
215   const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress);
216   if (TargetRefsKV == StaticBranchTargetings.end())
217     return CFCrossReferences;
218 
219   for (uint64_t SourceInstrAddress : TargetRefsKV->second) {
220     const auto &SourceInstrKV = Instructions.find(SourceInstrAddress);
221     if (SourceInstrKV == Instructions.end()) {
222       errs() << "Failed to find source instruction at address "
223              << format_hex(SourceInstrAddress, 2)
224              << " for the cross-reference to instruction at address "
225              << format_hex(InstrMeta.VMAddress, 2) << ".\n";
226       continue;
227     }
228 
229     CFCrossReferences.insert(&SourceInstrKV->second);
230   }
231 
232   return CFCrossReferences;
233 }
234 
235 const std::set<uint64_t> &FileAnalysis::getIndirectInstructions() const {
236   return IndirectInstructions;
237 }
238 
239 const MCRegisterInfo *FileAnalysis::getRegisterInfo() const {
240   return RegisterInfo.get();
241 }
242 
243 const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); }
244 
245 const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const {
246   return MIA.get();
247 }
248 
249 Expected<DIInliningInfo> FileAnalysis::symbolizeInlinedCode(uint64_t Address) {
250   assert(Symbolizer != nullptr && "Symbolizer is invalid.");
251   return Symbolizer->symbolizeInlinedCode(Object->getFileName(), Address);
252 }
253 
254 CFIProtectionStatus
255 FileAnalysis::validateCFIProtection(const GraphResult &Graph) const {
256   const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress);
257   if (!InstrMetaPtr)
258     return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION;
259 
260   const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode());
261   if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo))
262     return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
263 
264   if (!usesRegisterOperand(*InstrMetaPtr))
265     return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
266 
267   if (!Graph.OrphanedNodes.empty())
268     return CFIProtectionStatus::FAIL_ORPHANS;
269 
270   for (const auto &BranchNode : Graph.ConditionalBranchNodes) {
271     if (!BranchNode.CFIProtection)
272       return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH;
273   }
274 
275   if (indirectCFOperandClobber(Graph) != Graph.BaseAddress)
276     return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED;
277 
278   return CFIProtectionStatus::PROTECTED;
279 }
280 
281 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const {
282   assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty.");
283 
284   // Get the set of registers we must check to ensure they're not clobbered.
285   const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress);
286   DenseSet<unsigned> RegisterNumbers;
287   for (const auto &Operand : IndirectCF.Instruction) {
288     if (Operand.isReg())
289       RegisterNumbers.insert(Operand.getReg());
290   }
291   assert(RegisterNumbers.size() && "Zero register operands on indirect CF.");
292 
293   // Now check all branches to indirect CFs and ensure no clobbering happens.
294   for (const auto &Branch : Graph.ConditionalBranchNodes) {
295     uint64_t Node;
296     if (Branch.IndirectCFIsOnTargetPath)
297       Node = Branch.Target;
298     else
299       Node = Branch.Fallthrough;
300 
301     while (Node != Graph.BaseAddress) {
302       const Instr &NodeInstr = getInstructionOrDie(Node);
303       const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode());
304 
305       for (unsigned RegNum : RegisterNumbers) {
306         if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum,
307                                       *RegisterInfo))
308           return Node;
309       }
310 
311       const auto &KV = Graph.IntermediateNodes.find(Node);
312       assert((KV != Graph.IntermediateNodes.end()) &&
313              "Could not get next node.");
314       Node = KV->second;
315     }
316   }
317 
318   return Graph.BaseAddress;
319 }
320 
321 void FileAnalysis::printInstruction(const Instr &InstrMeta,
322                                     raw_ostream &OS) const {
323   Printer->printInst(&InstrMeta.Instruction, OS, "", *SubtargetInfo.get());
324 }
325 
326 Error FileAnalysis::initialiseDisassemblyMembers() {
327   std::string TripleName = ObjectTriple.getTriple();
328   ArchName = "";
329   MCPU = "";
330   std::string ErrorString;
331 
332   Symbolizer.reset(new LLVMSymbolizer());
333 
334   ObjectTarget =
335       TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString);
336   if (!ObjectTarget)
337     return make_error<UnsupportedDisassembly>(
338         (Twine("Couldn't find target \"") + ObjectTriple.getTriple() +
339          "\", failed with error: " + ErrorString)
340             .str());
341 
342   RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName));
343   if (!RegisterInfo)
344     return make_error<UnsupportedDisassembly>(
345         "Failed to initialise RegisterInfo.");
346 
347   AsmInfo.reset(ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName));
348   if (!AsmInfo)
349     return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo.");
350 
351   SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo(
352       TripleName, MCPU, Features.getString()));
353   if (!SubtargetInfo)
354     return make_error<UnsupportedDisassembly>(
355         "Failed to initialise SubtargetInfo.");
356 
357   MII.reset(ObjectTarget->createMCInstrInfo());
358   if (!MII)
359     return make_error<UnsupportedDisassembly>("Failed to initialise MII.");
360 
361   Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI));
362 
363   Disassembler.reset(
364       ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context));
365 
366   if (!Disassembler)
367     return make_error<UnsupportedDisassembly>(
368         "No disassembler available for target");
369 
370   MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get()));
371 
372   Printer.reset(ObjectTarget->createMCInstPrinter(
373       ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII,
374       *RegisterInfo));
375 
376   return Error::success();
377 }
378 
379 Error FileAnalysis::parseCodeSections() {
380   if (!IgnoreDWARFFlag) {
381     std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object);
382     if (!DWARF)
383       return make_error<StringError>("Could not create DWARF information.",
384                                      inconvertibleErrorCode());
385 
386     bool LineInfoValid = false;
387 
388     for (auto &Unit : DWARF->compile_units()) {
389       const auto &LineTable = DWARF->getLineTableForUnit(Unit.get());
390       if (LineTable && !LineTable->Rows.empty()) {
391         LineInfoValid = true;
392         break;
393       }
394     }
395 
396     if (!LineInfoValid)
397       return make_error<StringError>(
398           "DWARF line information missing. Did you compile with '-g'?",
399           inconvertibleErrorCode());
400   }
401 
402   for (const object::SectionRef &Section : Object->sections()) {
403     // Ensure only executable sections get analysed.
404     if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR))
405       continue;
406 
407     StringRef SectionContents;
408     if (Section.getContents(SectionContents))
409       return make_error<StringError>("Failed to retrieve section contents",
410                                      inconvertibleErrorCode());
411 
412     ArrayRef<uint8_t> SectionBytes((const uint8_t *)SectionContents.data(),
413                                    Section.getSize());
414     parseSectionContents(SectionBytes, Section.getAddress());
415   }
416   return Error::success();
417 }
418 
419 void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes,
420                                         uint64_t SectionAddress) {
421   assert(Symbolizer && "Symbolizer is uninitialised.");
422   MCInst Instruction;
423   Instr InstrMeta;
424   uint64_t InstructionSize;
425 
426   for (uint64_t Byte = 0; Byte < SectionBytes.size();) {
427     bool ValidInstruction =
428         Disassembler->getInstruction(Instruction, InstructionSize,
429                                      SectionBytes.drop_front(Byte), 0, nulls(),
430                                      outs()) == MCDisassembler::Success;
431 
432     Byte += InstructionSize;
433 
434     uint64_t VMAddress = SectionAddress + Byte - InstructionSize;
435     InstrMeta.Instruction = Instruction;
436     InstrMeta.VMAddress = VMAddress;
437     InstrMeta.InstructionSize = InstructionSize;
438     InstrMeta.Valid = ValidInstruction;
439 
440     addInstruction(InstrMeta);
441 
442     if (!ValidInstruction)
443       continue;
444 
445     // Skip additional parsing for instructions that do not affect the control
446     // flow.
447     const auto &InstrDesc = MII->get(Instruction.getOpcode());
448     if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo))
449       continue;
450 
451     uint64_t Target;
452     if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) {
453       // If the target can be evaluated, it's not indirect.
454       StaticBranchTargetings[Target].push_back(VMAddress);
455       continue;
456     }
457 
458     if (!usesRegisterOperand(InstrMeta))
459       continue;
460 
461     // Check if this instruction exists in the range of the DWARF metadata.
462     if (!IgnoreDWARFFlag) {
463       auto LineInfo =
464           Symbolizer->symbolizeCode(Object->getFileName(), VMAddress);
465       if (!LineInfo) {
466         handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) {
467           errs() << "Symbolizer failed to get line: " << E.message() << "\n";
468         });
469         continue;
470       }
471 
472       if (LineInfo->FileName == "<invalid>")
473         continue;
474     }
475 
476     IndirectInstructions.insert(VMAddress);
477   }
478 }
479 
480 void FileAnalysis::addInstruction(const Instr &Instruction) {
481   const auto &KV =
482       Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction));
483   if (!KV.second) {
484     errs() << "Failed to add instruction at address "
485            << format_hex(Instruction.VMAddress, 2)
486            << ": Instruction at this address already exists.\n";
487     exit(EXIT_FAILURE);
488   }
489 }
490 
491 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) : Text(Text) {}
492 
493 char UnsupportedDisassembly::ID;
494 void UnsupportedDisassembly::log(raw_ostream &OS) const {
495   OS << "Could not initialise disassembler: " << Text;
496 }
497 
498 std::error_code UnsupportedDisassembly::convertToErrorCode() const {
499   return std::error_code();
500 }
501 
502 } // namespace cfi_verify
503 } // namespace llvm
504