1 //===-- IRDynamicChecks.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/IR/Constants.h"
10 #include "llvm/IR/DataLayout.h"
11 #include "llvm/IR/Function.h"
12 #include "llvm/IR/Instructions.h"
13 #include "llvm/IR/Module.h"
14 #include "llvm/IR/Value.h"
15 #include "llvm/Support/raw_ostream.h"
16 
17 #include "IRDynamicChecks.h"
18 
19 #include "lldb/Expression/UtilityFunction.h"
20 #include "lldb/Target/ExecutionContext.h"
21 #include "lldb/Target/Process.h"
22 #include "lldb/Target/StackFrame.h"
23 #include "lldb/Target/Target.h"
24 #include "lldb/Utility/ConstString.h"
25 #include "lldb/Utility/LLDBLog.h"
26 #include "lldb/Utility/Log.h"
27 
28 #include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
29 
30 using namespace llvm;
31 using namespace lldb_private;
32 
33 static char ID;
34 
35 #define VALID_POINTER_CHECK_NAME "_$__lldb_valid_pointer_check"
36 #define VALID_OBJC_OBJECT_CHECK_NAME "$__lldb_objc_object_check"
37 
38 static const char g_valid_pointer_check_text[] =
39     "extern \"C\" void\n"
40     "_$__lldb_valid_pointer_check (unsigned char *$__lldb_arg_ptr)\n"
41     "{\n"
42     "    unsigned char $__lldb_local_val = *$__lldb_arg_ptr;\n"
43     "}";
44 
45 ClangDynamicCheckerFunctions::ClangDynamicCheckerFunctions()
46     : DynamicCheckerFunctions(DCF_Clang) {}
47 
48 ClangDynamicCheckerFunctions::~ClangDynamicCheckerFunctions() = default;
49 
50 bool ClangDynamicCheckerFunctions::Install(
51     DiagnosticManager &diagnostic_manager, ExecutionContext &exe_ctx) {
52   auto utility_fn_or_error = exe_ctx.GetTargetRef().CreateUtilityFunction(
53       g_valid_pointer_check_text, VALID_POINTER_CHECK_NAME,
54       lldb::eLanguageTypeC, exe_ctx);
55   if (!utility_fn_or_error) {
56     llvm::consumeError(utility_fn_or_error.takeError());
57     return false;
58   }
59   m_valid_pointer_check = std::move(*utility_fn_or_error);
60 
61   if (Process *process = exe_ctx.GetProcessPtr()) {
62     ObjCLanguageRuntime *objc_language_runtime =
63         ObjCLanguageRuntime::Get(*process);
64 
65     if (objc_language_runtime) {
66       auto utility_fn_or_error = objc_language_runtime->CreateObjectChecker(
67           VALID_OBJC_OBJECT_CHECK_NAME, exe_ctx);
68       if (!utility_fn_or_error) {
69         llvm::consumeError(utility_fn_or_error.takeError());
70         return false;
71       }
72       m_objc_object_check = std::move(*utility_fn_or_error);
73     }
74   }
75 
76   return true;
77 }
78 
79 bool ClangDynamicCheckerFunctions::DoCheckersExplainStop(lldb::addr_t addr,
80                                                          Stream &message) {
81   // FIXME: We have to get the checkers to know why they scotched the call in
82   // more detail,
83   // so we can print a better message here.
84   if (m_valid_pointer_check && m_valid_pointer_check->ContainsAddress(addr)) {
85     message.Printf("Attempted to dereference an invalid pointer.");
86     return true;
87   } else if (m_objc_object_check &&
88              m_objc_object_check->ContainsAddress(addr)) {
89     message.Printf("Attempted to dereference an invalid ObjC Object or send it "
90                    "an unrecognized selector");
91     return true;
92   }
93   return false;
94 }
95 
96 static std::string PrintValue(llvm::Value *V, bool truncate = false) {
97   std::string s;
98   raw_string_ostream rso(s);
99   V->print(rso);
100   rso.flush();
101   if (truncate)
102     s.resize(s.length() - 1);
103   return s;
104 }
105 
106 /// \class Instrumenter IRDynamicChecks.cpp
107 /// Finds and instruments individual LLVM IR instructions
108 ///
109 /// When instrumenting LLVM IR, it is frequently desirable to first search for
110 /// instructions, and then later modify them.  This way iterators remain
111 /// intact, and multiple passes can look at the same code base without
112 /// treading on each other's toes.
113 ///
114 /// The Instrumenter class implements this functionality.  A client first
115 /// calls Inspect on a function, which populates a list of instructions to be
116 /// instrumented.  Then, later, when all passes' Inspect functions have been
117 /// called, the client calls Instrument, which adds the desired
118 /// instrumentation.
119 ///
120 /// A subclass of Instrumenter must override InstrumentInstruction, which
121 /// is responsible for adding whatever instrumentation is necessary.
122 ///
123 /// A subclass of Instrumenter may override:
124 ///
125 /// - InspectInstruction [default: does nothing]
126 ///
127 /// - InspectBasicBlock [default: iterates through the instructions in a
128 ///   basic block calling InspectInstruction]
129 ///
130 /// - InspectFunction [default: iterates through the basic blocks in a
131 ///   function calling InspectBasicBlock]
132 class Instrumenter {
133 public:
134   /// Constructor
135   ///
136   /// \param[in] module
137   ///     The module being instrumented.
138   Instrumenter(llvm::Module &module,
139                std::shared_ptr<UtilityFunction> checker_function)
140       : m_module(module), m_checker_function(checker_function),
141         m_i8ptr_ty(nullptr), m_intptr_ty(nullptr) {}
142 
143   virtual ~Instrumenter() = default;
144 
145   /// Inspect a function to find instructions to instrument
146   ///
147   /// \param[in] function
148   ///     The function to inspect.
149   ///
150   /// \return
151   ///     True on success; false on error.
152   bool Inspect(llvm::Function &function) { return InspectFunction(function); }
153 
154   /// Instrument all the instructions found by Inspect()
155   ///
156   /// \return
157   ///     True on success; false on error.
158   bool Instrument() {
159     for (InstIterator ii = m_to_instrument.begin(),
160                       last_ii = m_to_instrument.end();
161          ii != last_ii; ++ii) {
162       if (!InstrumentInstruction(*ii))
163         return false;
164     }
165 
166     return true;
167   }
168 
169 protected:
170   /// Add instrumentation to a single instruction
171   ///
172   /// \param[in] inst
173   ///     The instruction to be instrumented.
174   ///
175   /// \return
176   ///     True on success; false otherwise.
177   virtual bool InstrumentInstruction(llvm::Instruction *inst) = 0;
178 
179   /// Register a single instruction to be instrumented
180   ///
181   /// \param[in] inst
182   ///     The instruction to be instrumented.
183   void RegisterInstruction(llvm::Instruction &inst) {
184     m_to_instrument.push_back(&inst);
185   }
186 
187   /// Determine whether a single instruction is interesting to instrument,
188   /// and, if so, call RegisterInstruction
189   ///
190   /// \param[in] i
191   ///     The instruction to be inspected.
192   ///
193   /// \return
194   ///     False if there was an error scanning; true otherwise.
195   virtual bool InspectInstruction(llvm::Instruction &i) { return true; }
196 
197   /// Scan a basic block to see if any instructions are interesting
198   ///
199   /// \param[in] bb
200   ///     The basic block to be inspected.
201   ///
202   /// \return
203   ///     False if there was an error scanning; true otherwise.
204   virtual bool InspectBasicBlock(llvm::BasicBlock &bb) {
205     for (llvm::BasicBlock::iterator ii = bb.begin(), last_ii = bb.end();
206          ii != last_ii; ++ii) {
207       if (!InspectInstruction(*ii))
208         return false;
209     }
210 
211     return true;
212   }
213 
214   /// Scan a function to see if any instructions are interesting
215   ///
216   /// \param[in] f
217   ///     The function to be inspected.
218   ///
219   /// \return
220   ///     False if there was an error scanning; true otherwise.
221   virtual bool InspectFunction(llvm::Function &f) {
222     for (llvm::Function::iterator bbi = f.begin(), last_bbi = f.end();
223          bbi != last_bbi; ++bbi) {
224       if (!InspectBasicBlock(*bbi))
225         return false;
226     }
227 
228     return true;
229   }
230 
231   /// Build a function pointer for a function with signature void
232   /// (*)(uint8_t*) with a given address
233   ///
234   /// \param[in] start_address
235   ///     The address of the function.
236   ///
237   /// \return
238   ///     The function pointer, for use in a CallInst.
239   llvm::FunctionCallee BuildPointerValidatorFunc(lldb::addr_t start_address) {
240     llvm::Type *param_array[1];
241 
242     param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy());
243 
244     ArrayRef<llvm::Type *> params(param_array, 1);
245 
246     FunctionType *fun_ty = FunctionType::get(
247         llvm::Type::getVoidTy(m_module.getContext()), params, true);
248     PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty);
249     Constant *fun_addr_int =
250         ConstantInt::get(GetIntptrTy(), start_address, false);
251     return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)};
252   }
253 
254   /// Build a function pointer for a function with signature void
255   /// (*)(uint8_t*, uint8_t*) with a given address
256   ///
257   /// \param[in] start_address
258   ///     The address of the function.
259   ///
260   /// \return
261   ///     The function pointer, for use in a CallInst.
262   llvm::FunctionCallee BuildObjectCheckerFunc(lldb::addr_t start_address) {
263     llvm::Type *param_array[2];
264 
265     param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy());
266     param_array[1] = const_cast<llvm::PointerType *>(GetI8PtrTy());
267 
268     ArrayRef<llvm::Type *> params(param_array, 2);
269 
270     FunctionType *fun_ty = FunctionType::get(
271         llvm::Type::getVoidTy(m_module.getContext()), params, true);
272     PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty);
273     Constant *fun_addr_int =
274         ConstantInt::get(GetIntptrTy(), start_address, false);
275     return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)};
276   }
277 
278   PointerType *GetI8PtrTy() {
279     if (!m_i8ptr_ty)
280       m_i8ptr_ty = llvm::Type::getInt8PtrTy(m_module.getContext());
281 
282     return m_i8ptr_ty;
283   }
284 
285   IntegerType *GetIntptrTy() {
286     if (!m_intptr_ty) {
287       llvm::DataLayout data_layout(&m_module);
288 
289       m_intptr_ty = llvm::Type::getIntNTy(m_module.getContext(),
290                                           data_layout.getPointerSizeInBits());
291     }
292 
293     return m_intptr_ty;
294   }
295 
296   typedef std::vector<llvm::Instruction *> InstVector;
297   typedef InstVector::iterator InstIterator;
298 
299   InstVector m_to_instrument; ///< List of instructions the inspector found
300   llvm::Module &m_module;     ///< The module which is being instrumented
301   std::shared_ptr<UtilityFunction>
302       m_checker_function; ///< The dynamic checker function for the process
303 
304 private:
305   PointerType *m_i8ptr_ty;
306   IntegerType *m_intptr_ty;
307 };
308 
309 class ValidPointerChecker : public Instrumenter {
310 public:
311   ValidPointerChecker(llvm::Module &module,
312                       std::shared_ptr<UtilityFunction> checker_function)
313       : Instrumenter(module, checker_function),
314         m_valid_pointer_check_func(nullptr) {}
315 
316   ~ValidPointerChecker() override = default;
317 
318 protected:
319   bool InstrumentInstruction(llvm::Instruction *inst) override {
320     Log *log = GetLog(LLDBLog::Expressions);
321 
322     LLDB_LOGF(log, "Instrumenting load/store instruction: %s\n",
323               PrintValue(inst).c_str());
324 
325     if (!m_valid_pointer_check_func)
326       m_valid_pointer_check_func =
327           BuildPointerValidatorFunc(m_checker_function->StartAddress());
328 
329     llvm::Value *dereferenced_ptr = nullptr;
330 
331     if (llvm::LoadInst *li = dyn_cast<llvm::LoadInst>(inst))
332       dereferenced_ptr = li->getPointerOperand();
333     else if (llvm::StoreInst *si = dyn_cast<llvm::StoreInst>(inst))
334       dereferenced_ptr = si->getPointerOperand();
335     else
336       return false;
337 
338     // Insert an instruction to cast the loaded value to int8_t*
339 
340     BitCastInst *bit_cast =
341         new BitCastInst(dereferenced_ptr, GetI8PtrTy(), "", inst);
342 
343     // Insert an instruction to call the helper with the result
344 
345     llvm::Value *arg_array[1];
346 
347     arg_array[0] = bit_cast;
348 
349     llvm::ArrayRef<llvm::Value *> args(arg_array, 1);
350 
351     CallInst::Create(m_valid_pointer_check_func, args, "", inst);
352 
353     return true;
354   }
355 
356   bool InspectInstruction(llvm::Instruction &i) override {
357     if (isa<llvm::LoadInst>(&i) || isa<llvm::StoreInst>(&i))
358       RegisterInstruction(i);
359 
360     return true;
361   }
362 
363 private:
364   llvm::FunctionCallee m_valid_pointer_check_func;
365 };
366 
367 class ObjcObjectChecker : public Instrumenter {
368 public:
369   ObjcObjectChecker(llvm::Module &module,
370                     std::shared_ptr<UtilityFunction> checker_function)
371       : Instrumenter(module, checker_function),
372         m_objc_object_check_func(nullptr) {}
373 
374   ~ObjcObjectChecker() override = default;
375 
376   enum msgSend_type {
377     eMsgSend = 0,
378     eMsgSendSuper,
379     eMsgSendSuper_stret,
380     eMsgSend_fpret,
381     eMsgSend_stret
382   };
383 
384   std::map<llvm::Instruction *, msgSend_type> msgSend_types;
385 
386 protected:
387   bool InstrumentInstruction(llvm::Instruction *inst) override {
388     CallInst *call_inst = dyn_cast<CallInst>(inst);
389 
390     if (!call_inst)
391       return false; // call_inst really shouldn't be nullptr, because otherwise
392                     // InspectInstruction wouldn't have registered it
393 
394     if (!m_objc_object_check_func)
395       m_objc_object_check_func =
396           BuildObjectCheckerFunc(m_checker_function->StartAddress());
397 
398     // id objc_msgSend(id theReceiver, SEL theSelector, ...)
399 
400     llvm::Value *target_object;
401     llvm::Value *selector;
402 
403     switch (msgSend_types[inst]) {
404     case eMsgSend:
405     case eMsgSend_fpret:
406       // On arm64, clang uses objc_msgSend for scalar and struct return
407       // calls.  The call instruction will record which was used.
408       if (call_inst->hasStructRetAttr()) {
409         target_object = call_inst->getArgOperand(1);
410         selector = call_inst->getArgOperand(2);
411       } else {
412         target_object = call_inst->getArgOperand(0);
413         selector = call_inst->getArgOperand(1);
414       }
415       break;
416     case eMsgSend_stret:
417       target_object = call_inst->getArgOperand(1);
418       selector = call_inst->getArgOperand(2);
419       break;
420     case eMsgSendSuper:
421     case eMsgSendSuper_stret:
422       return true;
423     }
424 
425     // These objects should always be valid according to Sean Calannan
426     assert(target_object);
427     assert(selector);
428 
429     // Insert an instruction to cast the receiver id to int8_t*
430 
431     BitCastInst *bit_cast =
432         new BitCastInst(target_object, GetI8PtrTy(), "", inst);
433 
434     // Insert an instruction to call the helper with the result
435 
436     llvm::Value *arg_array[2];
437 
438     arg_array[0] = bit_cast;
439     arg_array[1] = selector;
440 
441     ArrayRef<llvm::Value *> args(arg_array, 2);
442 
443     CallInst::Create(m_objc_object_check_func, args, "", inst);
444 
445     return true;
446   }
447 
448   static llvm::Function *GetFunction(llvm::Value *value) {
449     if (llvm::Function *function = llvm::dyn_cast<llvm::Function>(value)) {
450       return function;
451     }
452 
453     if (llvm::ConstantExpr *const_expr =
454             llvm::dyn_cast<llvm::ConstantExpr>(value)) {
455       switch (const_expr->getOpcode()) {
456       default:
457         return nullptr;
458       case llvm::Instruction::BitCast:
459         return GetFunction(const_expr->getOperand(0));
460       }
461     }
462 
463     return nullptr;
464   }
465 
466   static llvm::Function *GetCalledFunction(llvm::CallInst *inst) {
467     return GetFunction(inst->getCalledOperand());
468   }
469 
470   bool InspectInstruction(llvm::Instruction &i) override {
471     Log *log = GetLog(LLDBLog::Expressions);
472 
473     CallInst *call_inst = dyn_cast<CallInst>(&i);
474 
475     if (call_inst) {
476       const llvm::Function *called_function = GetCalledFunction(call_inst);
477 
478       if (!called_function)
479         return true;
480 
481       std::string name_str = called_function->getName().str();
482       const char *name_cstr = name_str.c_str();
483 
484       LLDB_LOGF(log, "Found call to %s: %s\n", name_cstr,
485                 PrintValue(call_inst).c_str());
486 
487       if (name_str.find("objc_msgSend") == std::string::npos)
488         return true;
489 
490       if (!strcmp(name_cstr, "objc_msgSend")) {
491         RegisterInstruction(i);
492         msgSend_types[&i] = eMsgSend;
493         return true;
494       }
495 
496       if (!strcmp(name_cstr, "objc_msgSend_stret")) {
497         RegisterInstruction(i);
498         msgSend_types[&i] = eMsgSend_stret;
499         return true;
500       }
501 
502       if (!strcmp(name_cstr, "objc_msgSend_fpret")) {
503         RegisterInstruction(i);
504         msgSend_types[&i] = eMsgSend_fpret;
505         return true;
506       }
507 
508       if (!strcmp(name_cstr, "objc_msgSendSuper")) {
509         RegisterInstruction(i);
510         msgSend_types[&i] = eMsgSendSuper;
511         return true;
512       }
513 
514       if (!strcmp(name_cstr, "objc_msgSendSuper_stret")) {
515         RegisterInstruction(i);
516         msgSend_types[&i] = eMsgSendSuper_stret;
517         return true;
518       }
519 
520       LLDB_LOGF(log,
521                 "Function name '%s' contains 'objc_msgSend' but is not handled",
522                 name_str.c_str());
523 
524       return true;
525     }
526 
527     return true;
528   }
529 
530 private:
531   llvm::FunctionCallee m_objc_object_check_func;
532 };
533 
534 IRDynamicChecks::IRDynamicChecks(
535     ClangDynamicCheckerFunctions &checker_functions, const char *func_name)
536     : ModulePass(ID), m_func_name(func_name),
537       m_checker_functions(checker_functions) {}
538 
539 IRDynamicChecks::~IRDynamicChecks() = default;
540 
541 bool IRDynamicChecks::runOnModule(llvm::Module &M) {
542   Log *log = GetLog(LLDBLog::Expressions);
543 
544   llvm::Function *function = M.getFunction(StringRef(m_func_name));
545 
546   if (!function) {
547     LLDB_LOGF(log, "Couldn't find %s() in the module", m_func_name.c_str());
548 
549     return false;
550   }
551 
552   if (m_checker_functions.m_valid_pointer_check) {
553     ValidPointerChecker vpc(M, m_checker_functions.m_valid_pointer_check);
554 
555     if (!vpc.Inspect(*function))
556       return false;
557 
558     if (!vpc.Instrument())
559       return false;
560   }
561 
562   if (m_checker_functions.m_objc_object_check) {
563     ObjcObjectChecker ooc(M, m_checker_functions.m_objc_object_check);
564 
565     if (!ooc.Inspect(*function))
566       return false;
567 
568     if (!ooc.Instrument())
569       return false;
570   }
571 
572   if (log && log->GetVerbose()) {
573     std::string s;
574     raw_string_ostream oss(s);
575 
576     M.print(oss, nullptr);
577 
578     oss.flush();
579 
580     LLDB_LOGF(log, "Module after dynamic checks: \n%s", s.c_str());
581   }
582 
583   return true;
584 }
585 
586 void IRDynamicChecks::assignPassManager(PMStack &PMS, PassManagerType T) {}
587 
588 PassManagerType IRDynamicChecks::getPotentialPassManagerType() const {
589   return PMT_ModulePassManager;
590 }
591