1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===//
2 //
3 //                      The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "FileAnalysis.h"
11 #include "GraphBuilder.h"
12 
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
15 #include "llvm/MC/MCAsmInfo.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCInstPrinter.h"
20 #include "llvm/MC/MCInstrAnalysis.h"
21 #include "llvm/MC/MCInstrDesc.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCObjectFileInfo.h"
24 #include "llvm/MC/MCRegisterInfo.h"
25 #include "llvm/MC/MCSubtargetInfo.h"
26 #include "llvm/Object/Binary.h"
27 #include "llvm/Object/COFF.h"
28 #include "llvm/Object/ELFObjectFile.h"
29 #include "llvm/Object/ObjectFile.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Error.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/TargetSelect.h"
36 #include "llvm/Support/raw_ostream.h"
37 
38 
39 using Instr = llvm::cfi_verify::FileAnalysis::Instr;
40 using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer;
41 
42 namespace llvm {
43 namespace cfi_verify {
44 
45 bool IgnoreDWARFFlag;
46 
47 static cl::opt<bool, true> IgnoreDWARFArg(
48     "ignore-dwarf",
49     cl::desc(
50         "Ignore all DWARF data. This relaxes the requirements for all "
51         "statically linked libraries to have been compiled with '-g', but "
52         "will result in false positives for 'CFI unprotected' instructions."),
53     cl::location(IgnoreDWARFFlag), cl::init(false));
54 
55 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) {
56   switch (Status) {
57   case CFIProtectionStatus::PROTECTED:
58     return "PROTECTED";
59   case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF:
60     return "FAIL_NOT_INDIRECT_CF";
61   case CFIProtectionStatus::FAIL_ORPHANS:
62     return "FAIL_ORPHANS";
63   case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH:
64     return "FAIL_BAD_CONDITIONAL_BRANCH";
65   case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED:
66     return "FAIL_REGISTER_CLOBBERED";
67   case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION:
68     return "FAIL_INVALID_INSTRUCTION";
69   }
70   llvm_unreachable("Attempted to stringify an unknown enum value.");
71 }
72 
73 Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) {
74   // Open the filename provided.
75   Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
76       object::createBinary(Filename);
77   if (!BinaryOrErr)
78     return BinaryOrErr.takeError();
79 
80   // Construct the object and allow it to take ownership of the binary.
81   object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get());
82   FileAnalysis Analysis(std::move(Binary));
83 
84   Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary());
85   if (!Analysis.Object)
86     return make_error<UnsupportedDisassembly>("Failed to cast object");
87 
88   Analysis.ObjectTriple = Analysis.Object->makeTriple();
89   Analysis.Features = Analysis.Object->getFeatures();
90 
91   // Init the rest of the object.
92   if (auto InitResponse = Analysis.initialiseDisassemblyMembers())
93     return std::move(InitResponse);
94 
95   if (auto SectionParseResponse = Analysis.parseCodeSections())
96     return std::move(SectionParseResponse);
97 
98   return std::move(Analysis);
99 }
100 
101 FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary)
102     : Binary(std::move(Binary)) {}
103 
104 FileAnalysis::FileAnalysis(const Triple &ObjectTriple,
105                            const SubtargetFeatures &Features)
106     : ObjectTriple(ObjectTriple), Features(Features) {}
107 
108 const Instr *
109 FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const {
110   std::map<uint64_t, Instr>::const_iterator KV =
111       Instructions.find(InstrMeta.VMAddress);
112   if (KV == Instructions.end() || KV == Instructions.begin())
113     return nullptr;
114 
115   if (!(--KV)->second.Valid)
116     return nullptr;
117 
118   return &KV->second;
119 }
120 
121 const Instr *
122 FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const {
123   std::map<uint64_t, Instr>::const_iterator KV =
124       Instructions.find(InstrMeta.VMAddress);
125   if (KV == Instructions.end() || ++KV == Instructions.end())
126     return nullptr;
127 
128   if (!KV->second.Valid)
129     return nullptr;
130 
131   return &KV->second;
132 }
133 
134 bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const {
135   for (const auto &Operand : InstrMeta.Instruction) {
136     if (Operand.isReg())
137       return true;
138   }
139   return false;
140 }
141 
142 const Instr *FileAnalysis::getInstruction(uint64_t Address) const {
143   const auto &InstrKV = Instructions.find(Address);
144   if (InstrKV == Instructions.end())
145     return nullptr;
146 
147   return &InstrKV->second;
148 }
149 
150 const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const {
151   const auto &InstrKV = Instructions.find(Address);
152   assert(InstrKV != Instructions.end() && "Address doesn't exist.");
153   return InstrKV->second;
154 }
155 
156 bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const {
157   return MII->getName(InstrMeta.Instruction.getOpcode()) == "TRAP";
158 }
159 
160 bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const {
161   if (!InstrMeta.Valid)
162     return false;
163 
164   if (isCFITrap(InstrMeta))
165     return false;
166 
167   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
168   if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo))
169     return InstrDesc.isConditionalBranch();
170 
171   return true;
172 }
173 
174 const Instr *
175 FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const {
176   if (!InstrMeta.Valid)
177     return nullptr;
178 
179   if (isCFITrap(InstrMeta))
180     return nullptr;
181 
182   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
183   const Instr *NextMetaPtr;
184   if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) {
185     if (InstrDesc.isConditionalBranch())
186       return nullptr;
187 
188     uint64_t Target;
189     if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress,
190                              InstrMeta.InstructionSize, Target))
191       return nullptr;
192 
193     NextMetaPtr = getInstruction(Target);
194   } else {
195     NextMetaPtr =
196         getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize);
197   }
198 
199   if (!NextMetaPtr || !NextMetaPtr->Valid)
200     return nullptr;
201 
202   return NextMetaPtr;
203 }
204 
205 std::set<const Instr *>
206 FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const {
207   std::set<const Instr *> CFCrossReferences;
208   const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta);
209 
210   if (PrevInstruction && canFallThrough(*PrevInstruction))
211     CFCrossReferences.insert(PrevInstruction);
212 
213   const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress);
214   if (TargetRefsKV == StaticBranchTargetings.end())
215     return CFCrossReferences;
216 
217   for (uint64_t SourceInstrAddress : TargetRefsKV->second) {
218     const auto &SourceInstrKV = Instructions.find(SourceInstrAddress);
219     if (SourceInstrKV == Instructions.end()) {
220       errs() << "Failed to find source instruction at address "
221              << format_hex(SourceInstrAddress, 2)
222              << " for the cross-reference to instruction at address "
223              << format_hex(InstrMeta.VMAddress, 2) << ".\n";
224       continue;
225     }
226 
227     CFCrossReferences.insert(&SourceInstrKV->second);
228   }
229 
230   return CFCrossReferences;
231 }
232 
233 const std::set<uint64_t> &FileAnalysis::getIndirectInstructions() const {
234   return IndirectInstructions;
235 }
236 
237 const MCRegisterInfo *FileAnalysis::getRegisterInfo() const {
238   return RegisterInfo.get();
239 }
240 
241 const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); }
242 
243 const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const {
244   return MIA.get();
245 }
246 
247 Expected<DIInliningInfo> FileAnalysis::symbolizeInlinedCode(uint64_t Address) {
248   assert(Symbolizer != nullptr && "Symbolizer is invalid.");
249   return Symbolizer->symbolizeInlinedCode(Object->getFileName(), Address);
250 }
251 
252 CFIProtectionStatus
253 FileAnalysis::validateCFIProtection(const GraphResult &Graph) const {
254   const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress);
255   if (!InstrMetaPtr)
256     return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION;
257 
258   const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode());
259   if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo))
260     return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
261 
262   if (!usesRegisterOperand(*InstrMetaPtr))
263     return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
264 
265   if (!Graph.OrphanedNodes.empty())
266     return CFIProtectionStatus::FAIL_ORPHANS;
267 
268   for (const auto &BranchNode : Graph.ConditionalBranchNodes) {
269     if (!BranchNode.CFIProtection)
270       return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH;
271   }
272 
273   if (indirectCFOperandClobber(Graph) != Graph.BaseAddress)
274     return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED;
275 
276   return CFIProtectionStatus::PROTECTED;
277 }
278 
279 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const {
280   assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty.");
281 
282   // Get the set of registers we must check to ensure they're not clobbered.
283   const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress);
284   DenseSet<unsigned> RegisterNumbers;
285   for (const auto &Operand : IndirectCF.Instruction) {
286     if (Operand.isReg())
287       RegisterNumbers.insert(Operand.getReg());
288   }
289   assert(RegisterNumbers.size() && "Zero register operands on indirect CF.");
290 
291   // Now check all branches to indirect CFs and ensure no clobbering happens.
292   for (const auto &Branch : Graph.ConditionalBranchNodes) {
293     uint64_t Node;
294     if (Branch.IndirectCFIsOnTargetPath)
295       Node = Branch.Target;
296     else
297       Node = Branch.Fallthrough;
298 
299     while (Node != Graph.BaseAddress) {
300       const Instr &NodeInstr = getInstructionOrDie(Node);
301       const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode());
302 
303       for (unsigned RegNum : RegisterNumbers) {
304         if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum,
305                                       *RegisterInfo))
306           return Node;
307       }
308 
309       const auto &KV = Graph.IntermediateNodes.find(Node);
310       assert((KV != Graph.IntermediateNodes.end()) &&
311              "Could not get next node.");
312       Node = KV->second;
313     }
314   }
315 
316   return Graph.BaseAddress;
317 }
318 
319 void FileAnalysis::printInstruction(const Instr &InstrMeta,
320                                     raw_ostream &OS) const {
321   Printer->printInst(&InstrMeta.Instruction, OS, "", *SubtargetInfo.get());
322 }
323 
324 Error FileAnalysis::initialiseDisassemblyMembers() {
325   std::string TripleName = ObjectTriple.getTriple();
326   ArchName = "";
327   MCPU = "";
328   std::string ErrorString;
329 
330   Symbolizer.reset(new LLVMSymbolizer());
331 
332   ObjectTarget =
333       TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString);
334   if (!ObjectTarget)
335     return make_error<UnsupportedDisassembly>(
336         (Twine("Couldn't find target \"") + ObjectTriple.getTriple() +
337          "\", failed with error: " + ErrorString)
338             .str());
339 
340   RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName));
341   if (!RegisterInfo)
342     return make_error<UnsupportedDisassembly>(
343         "Failed to initialise RegisterInfo.");
344 
345   AsmInfo.reset(ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName));
346   if (!AsmInfo)
347     return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo.");
348 
349   SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo(
350       TripleName, MCPU, Features.getString()));
351   if (!SubtargetInfo)
352     return make_error<UnsupportedDisassembly>(
353         "Failed to initialise SubtargetInfo.");
354 
355   MII.reset(ObjectTarget->createMCInstrInfo());
356   if (!MII)
357     return make_error<UnsupportedDisassembly>("Failed to initialise MII.");
358 
359   Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI));
360 
361   Disassembler.reset(
362       ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context));
363 
364   if (!Disassembler)
365     return make_error<UnsupportedDisassembly>(
366         "No disassembler available for target");
367 
368   MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get()));
369 
370   Printer.reset(ObjectTarget->createMCInstPrinter(
371       ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII,
372       *RegisterInfo));
373 
374   return Error::success();
375 }
376 
377 Error FileAnalysis::parseCodeSections() {
378   if (!IgnoreDWARFFlag) {
379     std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object);
380     if (!DWARF)
381       return make_error<StringError>("Could not create DWARF information.",
382                                      inconvertibleErrorCode());
383 
384     bool LineInfoValid = false;
385 
386     for (auto &Unit : DWARF->compile_units()) {
387       const auto &LineTable = DWARF->getLineTableForUnit(Unit.get());
388       if (LineTable && !LineTable->Rows.empty()) {
389         LineInfoValid = true;
390         break;
391       }
392     }
393 
394     if (!LineInfoValid)
395       return make_error<StringError>(
396           "DWARF line information missing. Did you compile with '-g'?",
397           inconvertibleErrorCode());
398   }
399 
400   for (const object::SectionRef &Section : Object->sections()) {
401     // Ensure only executable sections get analysed.
402     if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR))
403       continue;
404 
405     StringRef SectionContents;
406     if (Section.getContents(SectionContents))
407       return make_error<StringError>("Failed to retrieve section contents",
408                                      inconvertibleErrorCode());
409 
410     ArrayRef<uint8_t> SectionBytes((const uint8_t *)SectionContents.data(),
411                                    Section.getSize());
412     parseSectionContents(SectionBytes, Section.getAddress());
413   }
414   return Error::success();
415 }
416 
417 void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes,
418                                         uint64_t SectionAddress) {
419   assert(Symbolizer && "Symbolizer is uninitialised.");
420   MCInst Instruction;
421   Instr InstrMeta;
422   uint64_t InstructionSize;
423 
424   for (uint64_t Byte = 0; Byte < SectionBytes.size();) {
425     bool ValidInstruction =
426         Disassembler->getInstruction(Instruction, InstructionSize,
427                                      SectionBytes.drop_front(Byte), 0, nulls(),
428                                      outs()) == MCDisassembler::Success;
429 
430     Byte += InstructionSize;
431 
432     uint64_t VMAddress = SectionAddress + Byte - InstructionSize;
433     InstrMeta.Instruction = Instruction;
434     InstrMeta.VMAddress = VMAddress;
435     InstrMeta.InstructionSize = InstructionSize;
436     InstrMeta.Valid = ValidInstruction;
437 
438     addInstruction(InstrMeta);
439 
440     if (!ValidInstruction)
441       continue;
442 
443     // Skip additional parsing for instructions that do not affect the control
444     // flow.
445     const auto &InstrDesc = MII->get(Instruction.getOpcode());
446     if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo))
447       continue;
448 
449     uint64_t Target;
450     if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) {
451       // If the target can be evaluated, it's not indirect.
452       StaticBranchTargetings[Target].push_back(VMAddress);
453       continue;
454     }
455 
456     if (!usesRegisterOperand(InstrMeta))
457       continue;
458 
459     // Check if this instruction exists in the range of the DWARF metadata.
460     if (!IgnoreDWARFFlag) {
461       auto LineInfo =
462           Symbolizer->symbolizeCode(Object->getFileName(), VMAddress);
463       if (!LineInfo) {
464         handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) {
465           errs() << "Symbolizer failed to get line: " << E.message() << "\n";
466         });
467         continue;
468       }
469 
470       if (LineInfo->FileName == "<invalid>")
471         continue;
472     }
473 
474     IndirectInstructions.insert(VMAddress);
475   }
476 }
477 
478 void FileAnalysis::addInstruction(const Instr &Instruction) {
479   const auto &KV =
480       Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction));
481   if (!KV.second) {
482     errs() << "Failed to add instruction at address "
483            << format_hex(Instruction.VMAddress, 2)
484            << ": Instruction at this address already exists.\n";
485     exit(EXIT_FAILURE);
486   }
487 }
488 
489 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) : Text(Text) {}
490 
491 char UnsupportedDisassembly::ID;
492 void UnsupportedDisassembly::log(raw_ostream &OS) const {
493   OS << "Could not initialise disassembler: " << Text;
494 }
495 
496 std::error_code UnsupportedDisassembly::convertToErrorCode() const {
497   return std::error_code();
498 }
499 
500 } // namespace cfi_verify
501 } // namespace llvm
502