1 //===-- IRDynamicChecks.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/IR/Constants.h" 10 #include "llvm/IR/DataLayout.h" 11 #include "llvm/IR/Function.h" 12 #include "llvm/IR/Instructions.h" 13 #include "llvm/IR/Module.h" 14 #include "llvm/IR/Value.h" 15 #include "llvm/Support/raw_ostream.h" 16 17 #include "IRDynamicChecks.h" 18 19 #include "lldb/Expression/UtilityFunction.h" 20 #include "lldb/Target/ExecutionContext.h" 21 #include "lldb/Target/Process.h" 22 #include "lldb/Target/StackFrame.h" 23 #include "lldb/Target/Target.h" 24 #include "lldb/Utility/ConstString.h" 25 #include "lldb/Utility/LLDBLog.h" 26 #include "lldb/Utility/Log.h" 27 28 #include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" 29 30 using namespace llvm; 31 using namespace lldb_private; 32 33 static char ID; 34 35 #define VALID_POINTER_CHECK_NAME "_$__lldb_valid_pointer_check" 36 #define VALID_OBJC_OBJECT_CHECK_NAME "$__lldb_objc_object_check" 37 38 static const char g_valid_pointer_check_text[] = 39 "extern \"C\" void\n" 40 "_$__lldb_valid_pointer_check (unsigned char *$__lldb_arg_ptr)\n" 41 "{\n" 42 " unsigned char $__lldb_local_val = *$__lldb_arg_ptr;\n" 43 "}"; 44 45 ClangDynamicCheckerFunctions::ClangDynamicCheckerFunctions() 46 : DynamicCheckerFunctions(DCF_Clang) {} 47 48 ClangDynamicCheckerFunctions::~ClangDynamicCheckerFunctions() = default; 49 50 bool ClangDynamicCheckerFunctions::Install( 51 DiagnosticManager &diagnostic_manager, ExecutionContext &exe_ctx) { 52 auto utility_fn_or_error = exe_ctx.GetTargetRef().CreateUtilityFunction( 53 g_valid_pointer_check_text, VALID_POINTER_CHECK_NAME, 54 lldb::eLanguageTypeC, exe_ctx); 55 if (!utility_fn_or_error) { 56 llvm::consumeError(utility_fn_or_error.takeError()); 57 return false; 58 } 59 m_valid_pointer_check = std::move(*utility_fn_or_error); 60 61 if (Process *process = exe_ctx.GetProcessPtr()) { 62 ObjCLanguageRuntime *objc_language_runtime = 63 ObjCLanguageRuntime::Get(*process); 64 65 if (objc_language_runtime) { 66 auto utility_fn_or_error = objc_language_runtime->CreateObjectChecker( 67 VALID_OBJC_OBJECT_CHECK_NAME, exe_ctx); 68 if (!utility_fn_or_error) { 69 llvm::consumeError(utility_fn_or_error.takeError()); 70 return false; 71 } 72 m_objc_object_check = std::move(*utility_fn_or_error); 73 } 74 } 75 76 return true; 77 } 78 79 bool ClangDynamicCheckerFunctions::DoCheckersExplainStop(lldb::addr_t addr, 80 Stream &message) { 81 // FIXME: We have to get the checkers to know why they scotched the call in 82 // more detail, 83 // so we can print a better message here. 84 if (m_valid_pointer_check && m_valid_pointer_check->ContainsAddress(addr)) { 85 message.Printf("Attempted to dereference an invalid pointer."); 86 return true; 87 } else if (m_objc_object_check && 88 m_objc_object_check->ContainsAddress(addr)) { 89 message.Printf("Attempted to dereference an invalid ObjC Object or send it " 90 "an unrecognized selector"); 91 return true; 92 } 93 return false; 94 } 95 96 static std::string PrintValue(llvm::Value *V, bool truncate = false) { 97 std::string s; 98 raw_string_ostream rso(s); 99 V->print(rso); 100 rso.flush(); 101 if (truncate) 102 s.resize(s.length() - 1); 103 return s; 104 } 105 106 /// \class Instrumenter IRDynamicChecks.cpp 107 /// Finds and instruments individual LLVM IR instructions 108 /// 109 /// When instrumenting LLVM IR, it is frequently desirable to first search for 110 /// instructions, and then later modify them. This way iterators remain 111 /// intact, and multiple passes can look at the same code base without 112 /// treading on each other's toes. 113 /// 114 /// The Instrumenter class implements this functionality. A client first 115 /// calls Inspect on a function, which populates a list of instructions to be 116 /// instrumented. Then, later, when all passes' Inspect functions have been 117 /// called, the client calls Instrument, which adds the desired 118 /// instrumentation. 119 /// 120 /// A subclass of Instrumenter must override InstrumentInstruction, which 121 /// is responsible for adding whatever instrumentation is necessary. 122 /// 123 /// A subclass of Instrumenter may override: 124 /// 125 /// - InspectInstruction [default: does nothing] 126 /// 127 /// - InspectBasicBlock [default: iterates through the instructions in a 128 /// basic block calling InspectInstruction] 129 /// 130 /// - InspectFunction [default: iterates through the basic blocks in a 131 /// function calling InspectBasicBlock] 132 class Instrumenter { 133 public: 134 /// Constructor 135 /// 136 /// \param[in] module 137 /// The module being instrumented. 138 Instrumenter(llvm::Module &module, 139 std::shared_ptr<UtilityFunction> checker_function) 140 : m_module(module), m_checker_function(checker_function), 141 m_i8ptr_ty(nullptr), m_intptr_ty(nullptr) {} 142 143 virtual ~Instrumenter() = default; 144 145 /// Inspect a function to find instructions to instrument 146 /// 147 /// \param[in] function 148 /// The function to inspect. 149 /// 150 /// \return 151 /// True on success; false on error. 152 bool Inspect(llvm::Function &function) { return InspectFunction(function); } 153 154 /// Instrument all the instructions found by Inspect() 155 /// 156 /// \return 157 /// True on success; false on error. 158 bool Instrument() { 159 for (InstIterator ii = m_to_instrument.begin(), 160 last_ii = m_to_instrument.end(); 161 ii != last_ii; ++ii) { 162 if (!InstrumentInstruction(*ii)) 163 return false; 164 } 165 166 return true; 167 } 168 169 protected: 170 /// Add instrumentation to a single instruction 171 /// 172 /// \param[in] inst 173 /// The instruction to be instrumented. 174 /// 175 /// \return 176 /// True on success; false otherwise. 177 virtual bool InstrumentInstruction(llvm::Instruction *inst) = 0; 178 179 /// Register a single instruction to be instrumented 180 /// 181 /// \param[in] inst 182 /// The instruction to be instrumented. 183 void RegisterInstruction(llvm::Instruction &inst) { 184 m_to_instrument.push_back(&inst); 185 } 186 187 /// Determine whether a single instruction is interesting to instrument, 188 /// and, if so, call RegisterInstruction 189 /// 190 /// \param[in] i 191 /// The instruction to be inspected. 192 /// 193 /// \return 194 /// False if there was an error scanning; true otherwise. 195 virtual bool InspectInstruction(llvm::Instruction &i) { return true; } 196 197 /// Scan a basic block to see if any instructions are interesting 198 /// 199 /// \param[in] bb 200 /// The basic block to be inspected. 201 /// 202 /// \return 203 /// False if there was an error scanning; true otherwise. 204 virtual bool InspectBasicBlock(llvm::BasicBlock &bb) { 205 for (llvm::BasicBlock::iterator ii = bb.begin(), last_ii = bb.end(); 206 ii != last_ii; ++ii) { 207 if (!InspectInstruction(*ii)) 208 return false; 209 } 210 211 return true; 212 } 213 214 /// Scan a function to see if any instructions are interesting 215 /// 216 /// \param[in] f 217 /// The function to be inspected. 218 /// 219 /// \return 220 /// False if there was an error scanning; true otherwise. 221 virtual bool InspectFunction(llvm::Function &f) { 222 for (llvm::Function::iterator bbi = f.begin(), last_bbi = f.end(); 223 bbi != last_bbi; ++bbi) { 224 if (!InspectBasicBlock(*bbi)) 225 return false; 226 } 227 228 return true; 229 } 230 231 /// Build a function pointer for a function with signature void 232 /// (*)(uint8_t*) with a given address 233 /// 234 /// \param[in] start_address 235 /// The address of the function. 236 /// 237 /// \return 238 /// The function pointer, for use in a CallInst. 239 llvm::FunctionCallee BuildPointerValidatorFunc(lldb::addr_t start_address) { 240 llvm::Type *param_array[1]; 241 242 param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy()); 243 244 ArrayRef<llvm::Type *> params(param_array, 1); 245 246 FunctionType *fun_ty = FunctionType::get( 247 llvm::Type::getVoidTy(m_module.getContext()), params, true); 248 PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty); 249 Constant *fun_addr_int = 250 ConstantInt::get(GetIntptrTy(), start_address, false); 251 return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)}; 252 } 253 254 /// Build a function pointer for a function with signature void 255 /// (*)(uint8_t*, uint8_t*) with a given address 256 /// 257 /// \param[in] start_address 258 /// The address of the function. 259 /// 260 /// \return 261 /// The function pointer, for use in a CallInst. 262 llvm::FunctionCallee BuildObjectCheckerFunc(lldb::addr_t start_address) { 263 llvm::Type *param_array[2]; 264 265 param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy()); 266 param_array[1] = const_cast<llvm::PointerType *>(GetI8PtrTy()); 267 268 ArrayRef<llvm::Type *> params(param_array, 2); 269 270 FunctionType *fun_ty = FunctionType::get( 271 llvm::Type::getVoidTy(m_module.getContext()), params, true); 272 PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty); 273 Constant *fun_addr_int = 274 ConstantInt::get(GetIntptrTy(), start_address, false); 275 return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)}; 276 } 277 278 PointerType *GetI8PtrTy() { 279 if (!m_i8ptr_ty) 280 m_i8ptr_ty = llvm::Type::getInt8PtrTy(m_module.getContext()); 281 282 return m_i8ptr_ty; 283 } 284 285 IntegerType *GetIntptrTy() { 286 if (!m_intptr_ty) { 287 llvm::DataLayout data_layout(&m_module); 288 289 m_intptr_ty = llvm::Type::getIntNTy(m_module.getContext(), 290 data_layout.getPointerSizeInBits()); 291 } 292 293 return m_intptr_ty; 294 } 295 296 typedef std::vector<llvm::Instruction *> InstVector; 297 typedef InstVector::iterator InstIterator; 298 299 InstVector m_to_instrument; ///< List of instructions the inspector found 300 llvm::Module &m_module; ///< The module which is being instrumented 301 std::shared_ptr<UtilityFunction> 302 m_checker_function; ///< The dynamic checker function for the process 303 304 private: 305 PointerType *m_i8ptr_ty; 306 IntegerType *m_intptr_ty; 307 }; 308 309 class ValidPointerChecker : public Instrumenter { 310 public: 311 ValidPointerChecker(llvm::Module &module, 312 std::shared_ptr<UtilityFunction> checker_function) 313 : Instrumenter(module, checker_function), 314 m_valid_pointer_check_func(nullptr) {} 315 316 ~ValidPointerChecker() override = default; 317 318 protected: 319 bool InstrumentInstruction(llvm::Instruction *inst) override { 320 Log *log = GetLog(LLDBLog::Expressions); 321 322 LLDB_LOGF(log, "Instrumenting load/store instruction: %s\n", 323 PrintValue(inst).c_str()); 324 325 if (!m_valid_pointer_check_func) 326 m_valid_pointer_check_func = 327 BuildPointerValidatorFunc(m_checker_function->StartAddress()); 328 329 llvm::Value *dereferenced_ptr = nullptr; 330 331 if (llvm::LoadInst *li = dyn_cast<llvm::LoadInst>(inst)) 332 dereferenced_ptr = li->getPointerOperand(); 333 else if (llvm::StoreInst *si = dyn_cast<llvm::StoreInst>(inst)) 334 dereferenced_ptr = si->getPointerOperand(); 335 else 336 return false; 337 338 // Insert an instruction to cast the loaded value to int8_t* 339 340 BitCastInst *bit_cast = 341 new BitCastInst(dereferenced_ptr, GetI8PtrTy(), "", inst); 342 343 // Insert an instruction to call the helper with the result 344 345 llvm::Value *arg_array[1]; 346 347 arg_array[0] = bit_cast; 348 349 llvm::ArrayRef<llvm::Value *> args(arg_array, 1); 350 351 CallInst::Create(m_valid_pointer_check_func, args, "", inst); 352 353 return true; 354 } 355 356 bool InspectInstruction(llvm::Instruction &i) override { 357 if (isa<llvm::LoadInst>(&i) || isa<llvm::StoreInst>(&i)) 358 RegisterInstruction(i); 359 360 return true; 361 } 362 363 private: 364 llvm::FunctionCallee m_valid_pointer_check_func; 365 }; 366 367 class ObjcObjectChecker : public Instrumenter { 368 public: 369 ObjcObjectChecker(llvm::Module &module, 370 std::shared_ptr<UtilityFunction> checker_function) 371 : Instrumenter(module, checker_function), 372 m_objc_object_check_func(nullptr) {} 373 374 ~ObjcObjectChecker() override = default; 375 376 enum msgSend_type { 377 eMsgSend = 0, 378 eMsgSendSuper, 379 eMsgSendSuper_stret, 380 eMsgSend_fpret, 381 eMsgSend_stret 382 }; 383 384 std::map<llvm::Instruction *, msgSend_type> msgSend_types; 385 386 protected: 387 bool InstrumentInstruction(llvm::Instruction *inst) override { 388 CallInst *call_inst = dyn_cast<CallInst>(inst); 389 390 if (!call_inst) 391 return false; // call_inst really shouldn't be nullptr, because otherwise 392 // InspectInstruction wouldn't have registered it 393 394 if (!m_objc_object_check_func) 395 m_objc_object_check_func = 396 BuildObjectCheckerFunc(m_checker_function->StartAddress()); 397 398 // id objc_msgSend(id theReceiver, SEL theSelector, ...) 399 400 llvm::Value *target_object; 401 llvm::Value *selector; 402 403 switch (msgSend_types[inst]) { 404 case eMsgSend: 405 case eMsgSend_fpret: 406 // On arm64, clang uses objc_msgSend for scalar and struct return 407 // calls. The call instruction will record which was used. 408 if (call_inst->hasStructRetAttr()) { 409 target_object = call_inst->getArgOperand(1); 410 selector = call_inst->getArgOperand(2); 411 } else { 412 target_object = call_inst->getArgOperand(0); 413 selector = call_inst->getArgOperand(1); 414 } 415 break; 416 case eMsgSend_stret: 417 target_object = call_inst->getArgOperand(1); 418 selector = call_inst->getArgOperand(2); 419 break; 420 case eMsgSendSuper: 421 case eMsgSendSuper_stret: 422 return true; 423 } 424 425 // These objects should always be valid according to Sean Calannan 426 assert(target_object); 427 assert(selector); 428 429 // Insert an instruction to cast the receiver id to int8_t* 430 431 BitCastInst *bit_cast = 432 new BitCastInst(target_object, GetI8PtrTy(), "", inst); 433 434 // Insert an instruction to call the helper with the result 435 436 llvm::Value *arg_array[2]; 437 438 arg_array[0] = bit_cast; 439 arg_array[1] = selector; 440 441 ArrayRef<llvm::Value *> args(arg_array, 2); 442 443 CallInst::Create(m_objc_object_check_func, args, "", inst); 444 445 return true; 446 } 447 448 static llvm::Function *GetFunction(llvm::Value *value) { 449 if (llvm::Function *function = llvm::dyn_cast<llvm::Function>(value)) { 450 return function; 451 } 452 453 if (llvm::ConstantExpr *const_expr = 454 llvm::dyn_cast<llvm::ConstantExpr>(value)) { 455 switch (const_expr->getOpcode()) { 456 default: 457 return nullptr; 458 case llvm::Instruction::BitCast: 459 return GetFunction(const_expr->getOperand(0)); 460 } 461 } 462 463 return nullptr; 464 } 465 466 static llvm::Function *GetCalledFunction(llvm::CallInst *inst) { 467 return GetFunction(inst->getCalledOperand()); 468 } 469 470 bool InspectInstruction(llvm::Instruction &i) override { 471 Log *log = GetLog(LLDBLog::Expressions); 472 473 CallInst *call_inst = dyn_cast<CallInst>(&i); 474 475 if (call_inst) { 476 const llvm::Function *called_function = GetCalledFunction(call_inst); 477 478 if (!called_function) 479 return true; 480 481 std::string name_str = called_function->getName().str(); 482 const char *name_cstr = name_str.c_str(); 483 484 LLDB_LOGF(log, "Found call to %s: %s\n", name_cstr, 485 PrintValue(call_inst).c_str()); 486 487 if (name_str.find("objc_msgSend") == std::string::npos) 488 return true; 489 490 if (!strcmp(name_cstr, "objc_msgSend")) { 491 RegisterInstruction(i); 492 msgSend_types[&i] = eMsgSend; 493 return true; 494 } 495 496 if (!strcmp(name_cstr, "objc_msgSend_stret")) { 497 RegisterInstruction(i); 498 msgSend_types[&i] = eMsgSend_stret; 499 return true; 500 } 501 502 if (!strcmp(name_cstr, "objc_msgSend_fpret")) { 503 RegisterInstruction(i); 504 msgSend_types[&i] = eMsgSend_fpret; 505 return true; 506 } 507 508 if (!strcmp(name_cstr, "objc_msgSendSuper")) { 509 RegisterInstruction(i); 510 msgSend_types[&i] = eMsgSendSuper; 511 return true; 512 } 513 514 if (!strcmp(name_cstr, "objc_msgSendSuper_stret")) { 515 RegisterInstruction(i); 516 msgSend_types[&i] = eMsgSendSuper_stret; 517 return true; 518 } 519 520 LLDB_LOGF(log, 521 "Function name '%s' contains 'objc_msgSend' but is not handled", 522 name_str.c_str()); 523 524 return true; 525 } 526 527 return true; 528 } 529 530 private: 531 llvm::FunctionCallee m_objc_object_check_func; 532 }; 533 534 IRDynamicChecks::IRDynamicChecks( 535 ClangDynamicCheckerFunctions &checker_functions, const char *func_name) 536 : ModulePass(ID), m_func_name(func_name), 537 m_checker_functions(checker_functions) {} 538 539 IRDynamicChecks::~IRDynamicChecks() = default; 540 541 bool IRDynamicChecks::runOnModule(llvm::Module &M) { 542 Log *log = GetLog(LLDBLog::Expressions); 543 544 llvm::Function *function = M.getFunction(StringRef(m_func_name)); 545 546 if (!function) { 547 LLDB_LOGF(log, "Couldn't find %s() in the module", m_func_name.c_str()); 548 549 return false; 550 } 551 552 if (m_checker_functions.m_valid_pointer_check) { 553 ValidPointerChecker vpc(M, m_checker_functions.m_valid_pointer_check); 554 555 if (!vpc.Inspect(*function)) 556 return false; 557 558 if (!vpc.Instrument()) 559 return false; 560 } 561 562 if (m_checker_functions.m_objc_object_check) { 563 ObjcObjectChecker ooc(M, m_checker_functions.m_objc_object_check); 564 565 if (!ooc.Inspect(*function)) 566 return false; 567 568 if (!ooc.Instrument()) 569 return false; 570 } 571 572 if (log && log->GetVerbose()) { 573 std::string s; 574 raw_string_ostream oss(s); 575 576 M.print(oss, nullptr); 577 578 oss.flush(); 579 580 LLDB_LOGF(log, "Module after dynamic checks: \n%s", s.c_str()); 581 } 582 583 return true; 584 } 585 586 void IRDynamicChecks::assignPassManager(PMStack &PMS, PassManagerType T) {} 587 588 PassManagerType IRDynamicChecks::getPotentialPassManagerType() const { 589 return PMT_ModulePassManager; 590 } 591