1 //===-- x86AssemblyInspectionEngine.cpp -------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "x86AssemblyInspectionEngine.h" 11 12 #include "llvm-c/Disassembler.h" 13 14 #include "lldb/Core/Address.h" 15 #include "lldb/Symbol/UnwindPlan.h" 16 #include "lldb/Target/RegisterContext.h" 17 #include "lldb/Target/UnwindAssembly.h" 18 19 using namespace lldb_private; 20 using namespace lldb; 21 22 x86AssemblyInspectionEngine::x86AssemblyInspectionEngine(const ArchSpec &arch) 23 : m_cur_insn(nullptr), m_machine_ip_regnum(LLDB_INVALID_REGNUM), 24 m_machine_sp_regnum(LLDB_INVALID_REGNUM), 25 m_machine_fp_regnum(LLDB_INVALID_REGNUM), 26 m_lldb_ip_regnum(LLDB_INVALID_REGNUM), 27 m_lldb_sp_regnum(LLDB_INVALID_REGNUM), 28 m_lldb_fp_regnum(LLDB_INVALID_REGNUM), 29 30 m_reg_map(), m_arch(arch), m_cpu(k_cpu_unspecified), m_wordsize(-1), 31 m_register_map_initialized(false), m_disasm_context() { 32 m_disasm_context = 33 ::LLVMCreateDisasm(arch.GetTriple().getTriple().c_str(), nullptr, 34 /*TagType=*/1, nullptr, nullptr); 35 } 36 37 x86AssemblyInspectionEngine::~x86AssemblyInspectionEngine() { 38 ::LLVMDisasmDispose(m_disasm_context); 39 } 40 41 void x86AssemblyInspectionEngine::Initialize(RegisterContextSP ®_ctx) { 42 m_cpu = k_cpu_unspecified; 43 m_wordsize = -1; 44 m_register_map_initialized = false; 45 46 const llvm::Triple::ArchType cpu = m_arch.GetMachine(); 47 if (cpu == llvm::Triple::x86) 48 m_cpu = k_i386; 49 else if (cpu == llvm::Triple::x86_64) 50 m_cpu = k_x86_64; 51 52 if (m_cpu == k_cpu_unspecified) 53 return; 54 55 if (reg_ctx.get() == nullptr) 56 return; 57 58 if (m_cpu == k_i386) { 59 m_machine_ip_regnum = k_machine_eip; 60 m_machine_sp_regnum = k_machine_esp; 61 m_machine_fp_regnum = k_machine_ebp; 62 m_wordsize = 4; 63 64 struct lldb_reg_info reginfo; 65 reginfo.name = "eax"; 66 m_reg_map[k_machine_eax] = reginfo; 67 reginfo.name = "edx"; 68 m_reg_map[k_machine_edx] = reginfo; 69 reginfo.name = "esp"; 70 m_reg_map[k_machine_esp] = reginfo; 71 reginfo.name = "esi"; 72 m_reg_map[k_machine_esi] = reginfo; 73 reginfo.name = "eip"; 74 m_reg_map[k_machine_eip] = reginfo; 75 reginfo.name = "ecx"; 76 m_reg_map[k_machine_ecx] = reginfo; 77 reginfo.name = "ebx"; 78 m_reg_map[k_machine_ebx] = reginfo; 79 reginfo.name = "ebp"; 80 m_reg_map[k_machine_ebp] = reginfo; 81 reginfo.name = "edi"; 82 m_reg_map[k_machine_edi] = reginfo; 83 } else { 84 m_machine_ip_regnum = k_machine_rip; 85 m_machine_sp_regnum = k_machine_rsp; 86 m_machine_fp_regnum = k_machine_rbp; 87 m_wordsize = 8; 88 89 struct lldb_reg_info reginfo; 90 reginfo.name = "rax"; 91 m_reg_map[k_machine_rax] = reginfo; 92 reginfo.name = "rdx"; 93 m_reg_map[k_machine_rdx] = reginfo; 94 reginfo.name = "rsp"; 95 m_reg_map[k_machine_rsp] = reginfo; 96 reginfo.name = "rsi"; 97 m_reg_map[k_machine_rsi] = reginfo; 98 reginfo.name = "r8"; 99 m_reg_map[k_machine_r8] = reginfo; 100 reginfo.name = "r10"; 101 m_reg_map[k_machine_r10] = reginfo; 102 reginfo.name = "r12"; 103 m_reg_map[k_machine_r12] = reginfo; 104 reginfo.name = "r14"; 105 m_reg_map[k_machine_r14] = reginfo; 106 reginfo.name = "rip"; 107 m_reg_map[k_machine_rip] = reginfo; 108 reginfo.name = "rcx"; 109 m_reg_map[k_machine_rcx] = reginfo; 110 reginfo.name = "rbx"; 111 m_reg_map[k_machine_rbx] = reginfo; 112 reginfo.name = "rbp"; 113 m_reg_map[k_machine_rbp] = reginfo; 114 reginfo.name = "rdi"; 115 m_reg_map[k_machine_rdi] = reginfo; 116 reginfo.name = "r9"; 117 m_reg_map[k_machine_r9] = reginfo; 118 reginfo.name = "r11"; 119 m_reg_map[k_machine_r11] = reginfo; 120 reginfo.name = "r13"; 121 m_reg_map[k_machine_r13] = reginfo; 122 reginfo.name = "r15"; 123 m_reg_map[k_machine_r15] = reginfo; 124 } 125 126 for (MachineRegnumToNameAndLLDBRegnum::iterator it = m_reg_map.begin(); 127 it != m_reg_map.end(); ++it) { 128 const RegisterInfo *ri = reg_ctx->GetRegisterInfoByName(it->second.name); 129 if (ri) 130 it->second.lldb_regnum = ri->kinds[eRegisterKindLLDB]; 131 } 132 133 uint32_t lldb_regno; 134 if (machine_regno_to_lldb_regno(m_machine_sp_regnum, lldb_regno)) 135 m_lldb_sp_regnum = lldb_regno; 136 if (machine_regno_to_lldb_regno(m_machine_fp_regnum, lldb_regno)) 137 m_lldb_fp_regnum = lldb_regno; 138 if (machine_regno_to_lldb_regno(m_machine_ip_regnum, lldb_regno)) 139 m_lldb_ip_regnum = lldb_regno; 140 141 m_register_map_initialized = true; 142 } 143 144 void x86AssemblyInspectionEngine::Initialize( 145 std::vector<lldb_reg_info> ®_info) { 146 m_cpu = k_cpu_unspecified; 147 m_wordsize = -1; 148 m_register_map_initialized = false; 149 150 const llvm::Triple::ArchType cpu = m_arch.GetMachine(); 151 if (cpu == llvm::Triple::x86) 152 m_cpu = k_i386; 153 else if (cpu == llvm::Triple::x86_64) 154 m_cpu = k_x86_64; 155 156 if (m_cpu == k_cpu_unspecified) 157 return; 158 159 if (m_cpu == k_i386) { 160 m_machine_ip_regnum = k_machine_eip; 161 m_machine_sp_regnum = k_machine_esp; 162 m_machine_fp_regnum = k_machine_ebp; 163 m_wordsize = 4; 164 165 struct lldb_reg_info reginfo; 166 reginfo.name = "eax"; 167 m_reg_map[k_machine_eax] = reginfo; 168 reginfo.name = "edx"; 169 m_reg_map[k_machine_edx] = reginfo; 170 reginfo.name = "esp"; 171 m_reg_map[k_machine_esp] = reginfo; 172 reginfo.name = "esi"; 173 m_reg_map[k_machine_esi] = reginfo; 174 reginfo.name = "eip"; 175 m_reg_map[k_machine_eip] = reginfo; 176 reginfo.name = "ecx"; 177 m_reg_map[k_machine_ecx] = reginfo; 178 reginfo.name = "ebx"; 179 m_reg_map[k_machine_ebx] = reginfo; 180 reginfo.name = "ebp"; 181 m_reg_map[k_machine_ebp] = reginfo; 182 reginfo.name = "edi"; 183 m_reg_map[k_machine_edi] = reginfo; 184 } else { 185 m_machine_ip_regnum = k_machine_rip; 186 m_machine_sp_regnum = k_machine_rsp; 187 m_machine_fp_regnum = k_machine_rbp; 188 m_wordsize = 8; 189 190 struct lldb_reg_info reginfo; 191 reginfo.name = "rax"; 192 m_reg_map[k_machine_rax] = reginfo; 193 reginfo.name = "rdx"; 194 m_reg_map[k_machine_rdx] = reginfo; 195 reginfo.name = "rsp"; 196 m_reg_map[k_machine_rsp] = reginfo; 197 reginfo.name = "rsi"; 198 m_reg_map[k_machine_rsi] = reginfo; 199 reginfo.name = "r8"; 200 m_reg_map[k_machine_r8] = reginfo; 201 reginfo.name = "r10"; 202 m_reg_map[k_machine_r10] = reginfo; 203 reginfo.name = "r12"; 204 m_reg_map[k_machine_r12] = reginfo; 205 reginfo.name = "r14"; 206 m_reg_map[k_machine_r14] = reginfo; 207 reginfo.name = "rip"; 208 m_reg_map[k_machine_rip] = reginfo; 209 reginfo.name = "rcx"; 210 m_reg_map[k_machine_rcx] = reginfo; 211 reginfo.name = "rbx"; 212 m_reg_map[k_machine_rbx] = reginfo; 213 reginfo.name = "rbp"; 214 m_reg_map[k_machine_rbp] = reginfo; 215 reginfo.name = "rdi"; 216 m_reg_map[k_machine_rdi] = reginfo; 217 reginfo.name = "r9"; 218 m_reg_map[k_machine_r9] = reginfo; 219 reginfo.name = "r11"; 220 m_reg_map[k_machine_r11] = reginfo; 221 reginfo.name = "r13"; 222 m_reg_map[k_machine_r13] = reginfo; 223 reginfo.name = "r15"; 224 m_reg_map[k_machine_r15] = reginfo; 225 } 226 227 for (MachineRegnumToNameAndLLDBRegnum::iterator it = m_reg_map.begin(); 228 it != m_reg_map.end(); ++it) { 229 for (size_t i = 0; i < reg_info.size(); ++i) { 230 if (::strcmp(reg_info[i].name, it->second.name) == 0) { 231 it->second.lldb_regnum = reg_info[i].lldb_regnum; 232 break; 233 } 234 } 235 } 236 237 uint32_t lldb_regno; 238 if (machine_regno_to_lldb_regno(m_machine_sp_regnum, lldb_regno)) 239 m_lldb_sp_regnum = lldb_regno; 240 if (machine_regno_to_lldb_regno(m_machine_fp_regnum, lldb_regno)) 241 m_lldb_fp_regnum = lldb_regno; 242 if (machine_regno_to_lldb_regno(m_machine_ip_regnum, lldb_regno)) 243 m_lldb_ip_regnum = lldb_regno; 244 245 m_register_map_initialized = true; 246 } 247 248 // This function expects an x86 native register number (i.e. the bits stripped 249 // out of the 250 // actual instruction), not an lldb register number. 251 // 252 // FIXME: This is ABI dependent, it shouldn't be hardcoded here. 253 254 bool x86AssemblyInspectionEngine::nonvolatile_reg_p(int machine_regno) { 255 if (m_cpu == k_i386) { 256 switch (machine_regno) { 257 case k_machine_ebx: 258 case k_machine_ebp: // not actually a nonvolatile but often treated as such 259 // by convention 260 case k_machine_esi: 261 case k_machine_edi: 262 case k_machine_esp: 263 return true; 264 default: 265 return false; 266 } 267 } 268 if (m_cpu == k_x86_64) { 269 switch (machine_regno) { 270 case k_machine_rbx: 271 case k_machine_rsp: 272 case k_machine_rbp: // not actually a nonvolatile but often treated as such 273 // by convention 274 case k_machine_r12: 275 case k_machine_r13: 276 case k_machine_r14: 277 case k_machine_r15: 278 return true; 279 default: 280 return false; 281 } 282 } 283 return false; 284 } 285 286 // Macro to detect if this is a REX mode prefix byte. 287 #define REX_W_PREFIX_P(opcode) (((opcode) & (~0x5)) == 0x48) 288 289 // The high bit which should be added to the source register number (the "R" 290 // bit) 291 #define REX_W_SRCREG(opcode) (((opcode)&0x4) >> 2) 292 293 // The high bit which should be added to the destination register number (the 294 // "B" bit) 295 #define REX_W_DSTREG(opcode) ((opcode)&0x1) 296 297 // pushq %rbp [0x55] 298 bool x86AssemblyInspectionEngine::push_rbp_pattern_p() { 299 uint8_t *p = m_cur_insn; 300 if (*p == 0x55) 301 return true; 302 return false; 303 } 304 305 // pushq $0 ; the first instruction in start() [0x6a 0x00] 306 bool x86AssemblyInspectionEngine::push_0_pattern_p() { 307 uint8_t *p = m_cur_insn; 308 if (*p == 0x6a && *(p + 1) == 0x0) 309 return true; 310 return false; 311 } 312 313 // pushq $0 314 // pushl $0 315 bool x86AssemblyInspectionEngine::push_imm_pattern_p() { 316 uint8_t *p = m_cur_insn; 317 if (*p == 0x68 || *p == 0x6a) 318 return true; 319 return false; 320 } 321 322 // pushl imm8(%esp) 323 // 324 // e.g. 0xff 0x74 0x24 0x20 - 'pushl 0x20(%esp)' 325 // (same byte pattern for 'pushq 0x20(%rsp)' in an x86_64 program) 326 // 327 // 0xff (with opcode bits '6' in next byte, PUSH r/m32) 328 // 0x74 (ModR/M byte with three bits used to specify the opcode) 329 // mod == b01, opcode == b110, R/M == b100 330 // "+disp8" 331 // 0x24 (SIB byte - scaled index = 0, r32 == esp) 332 // 0x20 imm8 value 333 334 bool x86AssemblyInspectionEngine::push_extended_pattern_p() { 335 if (*m_cur_insn == 0xff) { 336 // Get the 3 opcode bits from the ModR/M byte 337 uint8_t opcode = (*(m_cur_insn + 1) >> 3) & 7; 338 if (opcode == 6) { 339 // I'm only looking for 0xff /6 here - I 340 // don't really care what value is being pushed, 341 // just that we're pushing a 32/64 bit value on 342 // to the stack is enough. 343 return true; 344 } 345 } 346 return false; 347 } 348 349 // instructions only valid in 32-bit mode: 350 // 0x0e - push cs 351 // 0x16 - push ss 352 // 0x1e - push ds 353 // 0x06 - push es 354 bool x86AssemblyInspectionEngine::push_misc_reg_p() { 355 uint8_t p = *m_cur_insn; 356 if (m_wordsize == 4) { 357 if (p == 0x0e || p == 0x16 || p == 0x1e || p == 0x06) 358 return true; 359 } 360 return false; 361 } 362 363 // pushq %rbx 364 // pushl %ebx 365 bool x86AssemblyInspectionEngine::push_reg_p(int ®no) { 366 uint8_t *p = m_cur_insn; 367 int regno_prefix_bit = 0; 368 // If we have a rex prefix byte, check to see if a B bit is set 369 if (m_wordsize == 8 && *p == 0x41) { 370 regno_prefix_bit = 1 << 3; 371 p++; 372 } 373 if (*p >= 0x50 && *p <= 0x57) { 374 regno = (*p - 0x50) | regno_prefix_bit; 375 return true; 376 } 377 return false; 378 } 379 380 // movq %rsp, %rbp [0x48 0x8b 0xec] or [0x48 0x89 0xe5] 381 // movl %esp, %ebp [0x8b 0xec] or [0x89 0xe5] 382 bool x86AssemblyInspectionEngine::mov_rsp_rbp_pattern_p() { 383 uint8_t *p = m_cur_insn; 384 if (m_wordsize == 8 && *p == 0x48) 385 p++; 386 if (*(p) == 0x8b && *(p + 1) == 0xec) 387 return true; 388 if (*(p) == 0x89 && *(p + 1) == 0xe5) 389 return true; 390 return false; 391 } 392 393 // subq $0x20, %rsp 394 bool x86AssemblyInspectionEngine::sub_rsp_pattern_p(int &amount) { 395 uint8_t *p = m_cur_insn; 396 if (m_wordsize == 8 && *p == 0x48) 397 p++; 398 // 8-bit immediate operand 399 if (*p == 0x83 && *(p + 1) == 0xec) { 400 amount = (int8_t) * (p + 2); 401 return true; 402 } 403 // 32-bit immediate operand 404 if (*p == 0x81 && *(p + 1) == 0xec) { 405 amount = (int32_t)extract_4(p + 2); 406 return true; 407 } 408 return false; 409 } 410 411 // addq $0x20, %rsp 412 bool x86AssemblyInspectionEngine::add_rsp_pattern_p(int &amount) { 413 uint8_t *p = m_cur_insn; 414 if (m_wordsize == 8 && *p == 0x48) 415 p++; 416 // 8-bit immediate operand 417 if (*p == 0x83 && *(p + 1) == 0xc4) { 418 amount = (int8_t) * (p + 2); 419 return true; 420 } 421 // 32-bit immediate operand 422 if (*p == 0x81 && *(p + 1) == 0xc4) { 423 amount = (int32_t)extract_4(p + 2); 424 return true; 425 } 426 return false; 427 } 428 429 // lea esp, [esp - 0x28] 430 // lea esp, [esp + 0x28] 431 bool x86AssemblyInspectionEngine::lea_rsp_pattern_p(int &amount) { 432 uint8_t *p = m_cur_insn; 433 if (m_wordsize == 8 && *p == 0x48) 434 p++; 435 436 // Check opcode 437 if (*p != 0x8d) 438 return false; 439 440 // 8 bit displacement 441 if (*(p + 1) == 0x64 && (*(p + 2) & 0x3f) == 0x24) { 442 amount = (int8_t) * (p + 3); 443 return true; 444 } 445 446 // 32 bit displacement 447 if (*(p + 1) == 0xa4 && (*(p + 2) & 0x3f) == 0x24) { 448 amount = (int32_t)extract_4(p + 3); 449 return true; 450 } 451 452 return false; 453 } 454 455 // popq %rbx 456 // popl %ebx 457 bool x86AssemblyInspectionEngine::pop_reg_p(int ®no) { 458 uint8_t *p = m_cur_insn; 459 int regno_prefix_bit = 0; 460 // If we have a rex prefix byte, check to see if a B bit is set 461 if (m_wordsize == 8 && *p == 0x41) { 462 regno_prefix_bit = 1 << 3; 463 p++; 464 } 465 if (*p >= 0x58 && *p <= 0x5f) { 466 regno = (*p - 0x58) | regno_prefix_bit; 467 return true; 468 } 469 return false; 470 } 471 472 // popq %rbp [0x5d] 473 // popl %ebp [0x5d] 474 bool x86AssemblyInspectionEngine::pop_rbp_pattern_p() { 475 uint8_t *p = m_cur_insn; 476 return (*p == 0x5d); 477 } 478 479 // instructions valid only in 32-bit mode: 480 // 0x1f - pop ds 481 // 0x07 - pop es 482 // 0x17 - pop ss 483 bool x86AssemblyInspectionEngine::pop_misc_reg_p() { 484 uint8_t p = *m_cur_insn; 485 if (m_wordsize == 4) { 486 if (p == 0x1f || p == 0x07 || p == 0x17) 487 return true; 488 } 489 return false; 490 } 491 492 // leave [0xc9] 493 bool x86AssemblyInspectionEngine::leave_pattern_p() { 494 uint8_t *p = m_cur_insn; 495 return (*p == 0xc9); 496 } 497 498 // call $0 [0xe8 0x0 0x0 0x0 0x0] 499 bool x86AssemblyInspectionEngine::call_next_insn_pattern_p() { 500 uint8_t *p = m_cur_insn; 501 return (*p == 0xe8) && (*(p + 1) == 0x0) && (*(p + 2) == 0x0) && 502 (*(p + 3) == 0x0) && (*(p + 4) == 0x0); 503 } 504 505 // Look for an instruction sequence storing a nonvolatile register 506 // on to the stack frame. 507 508 // movq %rax, -0x10(%rbp) [0x48 0x89 0x45 0xf0] 509 // movl %eax, -0xc(%ebp) [0x89 0x45 0xf4] 510 511 // The offset value returned in rbp_offset will be positive -- 512 // but it must be subtraced from the frame base register to get 513 // the actual location. The positive value returned for the offset 514 // is a convention used elsewhere for CFA offsets et al. 515 516 bool x86AssemblyInspectionEngine::mov_reg_to_local_stack_frame_p( 517 int ®no, int &rbp_offset) { 518 uint8_t *p = m_cur_insn; 519 int src_reg_prefix_bit = 0; 520 int target_reg_prefix_bit = 0; 521 522 if (m_wordsize == 8 && REX_W_PREFIX_P(*p)) { 523 src_reg_prefix_bit = REX_W_SRCREG(*p) << 3; 524 target_reg_prefix_bit = REX_W_DSTREG(*p) << 3; 525 if (target_reg_prefix_bit == 1) { 526 // rbp/ebp don't need a prefix bit - we know this isn't the 527 // reg we care about. 528 return false; 529 } 530 p++; 531 } 532 533 if (*p == 0x89) { 534 /* Mask off the 3-5 bits which indicate the destination register 535 if this is a ModR/M byte. */ 536 int opcode_destreg_masked_out = *(p + 1) & (~0x38); 537 538 /* Is this a ModR/M byte with Mod bits 01 and R/M bits 101 539 and three bits between them, e.g. 01nnn101 540 We're looking for a destination of ebp-disp8 or ebp-disp32. */ 541 int immsize; 542 if (opcode_destreg_masked_out == 0x45) 543 immsize = 2; 544 else if (opcode_destreg_masked_out == 0x85) 545 immsize = 4; 546 else 547 return false; 548 549 int offset = 0; 550 if (immsize == 2) 551 offset = (int8_t) * (p + 2); 552 if (immsize == 4) 553 offset = (uint32_t)extract_4(p + 2); 554 if (offset > 0) 555 return false; 556 557 regno = ((*(p + 1) >> 3) & 0x7) | src_reg_prefix_bit; 558 rbp_offset = offset > 0 ? offset : -offset; 559 return true; 560 } 561 return false; 562 } 563 564 // ret [0xc9] or [0xc2 imm8] or [0xca imm8] 565 bool x86AssemblyInspectionEngine::ret_pattern_p() { 566 uint8_t *p = m_cur_insn; 567 if (*p == 0xc9 || *p == 0xc2 || *p == 0xca || *p == 0xc3) 568 return true; 569 return false; 570 } 571 572 uint32_t x86AssemblyInspectionEngine::extract_4(uint8_t *b) { 573 uint32_t v = 0; 574 for (int i = 3; i >= 0; i--) 575 v = (v << 8) | b[i]; 576 return v; 577 } 578 579 bool x86AssemblyInspectionEngine::instruction_length(uint8_t *insn_p, 580 int &length) { 581 582 const uint32_t max_op_byte_size = m_arch.GetMaximumOpcodeByteSize(); 583 llvm::SmallVector<uint8_t, 32> opcode_data; 584 opcode_data.resize(max_op_byte_size); 585 586 char out_string[512]; 587 const size_t inst_size = 588 ::LLVMDisasmInstruction(m_disasm_context, insn_p, max_op_byte_size, 0, 589 out_string, sizeof(out_string)); 590 591 length = inst_size; 592 return true; 593 } 594 595 bool x86AssemblyInspectionEngine::machine_regno_to_lldb_regno( 596 int machine_regno, uint32_t &lldb_regno) { 597 MachineRegnumToNameAndLLDBRegnum::iterator it = m_reg_map.find(machine_regno); 598 if (it != m_reg_map.end()) { 599 lldb_regno = it->second.lldb_regnum; 600 return true; 601 } 602 return false; 603 return false; 604 } 605 606 bool x86AssemblyInspectionEngine::GetNonCallSiteUnwindPlanFromAssembly( 607 uint8_t *data, size_t size, AddressRange &func_range, 608 UnwindPlan &unwind_plan) { 609 unwind_plan.Clear(); 610 611 if (data == nullptr || size == 0) 612 return false; 613 614 if (m_register_map_initialized == false) 615 return false; 616 617 addr_t current_func_text_offset = 0; 618 int current_sp_bytes_offset_from_cfa = 0; 619 UnwindPlan::Row::RegisterLocation initial_regloc; 620 UnwindPlan::RowSP row(new UnwindPlan::Row); 621 622 unwind_plan.SetPlanValidAddressRange(func_range); 623 unwind_plan.SetRegisterKind(eRegisterKindLLDB); 624 625 // At the start of the function, find the CFA by adding wordsize to the SP 626 // register 627 row->SetOffset(current_func_text_offset); 628 row->GetCFAValue().SetIsRegisterPlusOffset(m_lldb_sp_regnum, m_wordsize); 629 630 // caller's stack pointer value before the call insn is the CFA address 631 initial_regloc.SetIsCFAPlusOffset(0); 632 row->SetRegisterInfo(m_lldb_sp_regnum, initial_regloc); 633 634 // saved instruction pointer can be found at CFA - wordsize. 635 current_sp_bytes_offset_from_cfa = m_wordsize; 636 initial_regloc.SetAtCFAPlusOffset(-current_sp_bytes_offset_from_cfa); 637 row->SetRegisterInfo(m_lldb_ip_regnum, initial_regloc); 638 639 unwind_plan.AppendRow(row); 640 641 // Allocate a new Row, populate it with the existing Row contents. 642 UnwindPlan::Row *newrow = new UnwindPlan::Row; 643 *newrow = *row.get(); 644 row.reset(newrow); 645 646 // Track which registers have been saved so far in the prologue. 647 // If we see another push of that register, it's not part of the prologue. 648 // The register numbers used here are the machine register #'s 649 // (i386_register_numbers, x86_64_register_numbers). 650 std::vector<bool> saved_registers(32, false); 651 652 // Once the prologue has completed we'll save a copy of the unwind 653 // instructions 654 // If there is an epilogue in the middle of the function, after that epilogue 655 // we'll reinstate 656 // the unwind setup -- we assume that some code path jumps over the 657 // mid-function epilogue 658 659 UnwindPlan::RowSP prologue_completed_row; // copy of prologue row of CFI 660 int prologue_completed_sp_bytes_offset_from_cfa; // The sp value before the 661 // epilogue started executed 662 std::vector<bool> prologue_completed_saved_registers; 663 664 while (current_func_text_offset < size) { 665 int stack_offset, insn_len; 666 int machine_regno; // register numbers masked directly out of instructions 667 uint32_t lldb_regno; // register numbers in lldb's eRegisterKindLLDB 668 // numbering scheme 669 670 bool in_epilogue = false; // we're in the middle of an epilogue sequence 671 bool row_updated = false; // The UnwindPlan::Row 'row' has been updated 672 673 m_cur_insn = data + current_func_text_offset; 674 if (!instruction_length(m_cur_insn, insn_len) || insn_len == 0 || 675 insn_len > kMaxInstructionByteSize) { 676 // An unrecognized/junk instruction 677 break; 678 } 679 680 if (push_rbp_pattern_p()) { 681 current_sp_bytes_offset_from_cfa += m_wordsize; 682 row->GetCFAValue().SetOffset(current_sp_bytes_offset_from_cfa); 683 UnwindPlan::Row::RegisterLocation regloc; 684 regloc.SetAtCFAPlusOffset(-row->GetCFAValue().GetOffset()); 685 row->SetRegisterInfo(m_lldb_fp_regnum, regloc); 686 saved_registers[m_machine_fp_regnum] = true; 687 row_updated = true; 688 } 689 690 else if (mov_rsp_rbp_pattern_p()) { 691 row->GetCFAValue().SetIsRegisterPlusOffset( 692 m_lldb_fp_regnum, row->GetCFAValue().GetOffset()); 693 row_updated = true; 694 } 695 696 // This is the start() function (or a pthread equivalent), it starts with a 697 // pushl $0x0 which puts the 698 // saved pc value of 0 on the stack. In this case we want to pretend we 699 // didn't see a stack movement at all -- 700 // normally the saved pc value is already on the stack by the time the 701 // function starts executing. 702 else if (push_0_pattern_p()) { 703 } 704 705 else if (push_reg_p(machine_regno)) { 706 current_sp_bytes_offset_from_cfa += m_wordsize; 707 // the PUSH instruction has moved the stack pointer - if the CFA is set in 708 // terms of the stack pointer, 709 // we need to add a new row of instructions. 710 if (row->GetCFAValue().GetRegisterNumber() == m_lldb_sp_regnum) { 711 row->GetCFAValue().SetOffset(current_sp_bytes_offset_from_cfa); 712 row_updated = true; 713 } 714 // record where non-volatile (callee-saved, spilled) registers are saved 715 // on the stack 716 if (nonvolatile_reg_p(machine_regno) && 717 machine_regno_to_lldb_regno(machine_regno, lldb_regno) && 718 saved_registers[machine_regno] == false) { 719 UnwindPlan::Row::RegisterLocation regloc; 720 regloc.SetAtCFAPlusOffset(-current_sp_bytes_offset_from_cfa); 721 row->SetRegisterInfo(lldb_regno, regloc); 722 saved_registers[machine_regno] = true; 723 row_updated = true; 724 } 725 } 726 727 else if (pop_reg_p(machine_regno)) { 728 current_sp_bytes_offset_from_cfa -= m_wordsize; 729 730 if (nonvolatile_reg_p(machine_regno) && 731 machine_regno_to_lldb_regno(machine_regno, lldb_regno) && 732 saved_registers[machine_regno] == true) { 733 saved_registers[machine_regno] = false; 734 row->RemoveRegisterInfo(lldb_regno); 735 736 if (machine_regno == (int)m_machine_fp_regnum) { 737 row->GetCFAValue().SetIsRegisterPlusOffset( 738 m_lldb_sp_regnum, row->GetCFAValue().GetOffset()); 739 } 740 741 in_epilogue = true; 742 row_updated = true; 743 } 744 745 // the POP instruction has moved the stack pointer - if the CFA is set in 746 // terms of the stack pointer, 747 // we need to add a new row of instructions. 748 if (row->GetCFAValue().GetRegisterNumber() == m_lldb_sp_regnum) { 749 row->GetCFAValue().SetIsRegisterPlusOffset( 750 m_lldb_sp_regnum, current_sp_bytes_offset_from_cfa); 751 row_updated = true; 752 } 753 } 754 755 else if (pop_misc_reg_p()) { 756 current_sp_bytes_offset_from_cfa -= m_wordsize; 757 if (row->GetCFAValue().GetRegisterNumber() == m_lldb_sp_regnum) { 758 row->GetCFAValue().SetIsRegisterPlusOffset( 759 m_lldb_sp_regnum, current_sp_bytes_offset_from_cfa); 760 row_updated = true; 761 } 762 } 763 764 // The LEAVE instruction moves the value from rbp into rsp and pops 765 // a value off the stack into rbp (restoring the caller's rbp value). 766 // It is the opposite of ENTER, or 'push rbp, mov rsp rbp'. 767 else if (leave_pattern_p()) { 768 // We're going to copy the value in rbp into rsp, so re-set the sp offset 769 // based on the CFAValue. Also, adjust it to recognize that we're popping 770 // the saved rbp value off the stack. 771 current_sp_bytes_offset_from_cfa = row->GetCFAValue().GetOffset(); 772 current_sp_bytes_offset_from_cfa -= m_wordsize; 773 row->GetCFAValue().SetOffset(current_sp_bytes_offset_from_cfa); 774 775 // rbp is restored to the caller's value 776 saved_registers[m_machine_fp_regnum] = false; 777 row->RemoveRegisterInfo(m_lldb_fp_regnum); 778 779 // cfa is now in terms of rsp again. 780 row->GetCFAValue().SetIsRegisterPlusOffset( 781 m_lldb_sp_regnum, row->GetCFAValue().GetOffset()); 782 row->GetCFAValue().SetOffset(current_sp_bytes_offset_from_cfa); 783 784 in_epilogue = true; 785 row_updated = true; 786 } 787 788 else if (mov_reg_to_local_stack_frame_p(machine_regno, stack_offset) && 789 nonvolatile_reg_p(machine_regno) && 790 machine_regno_to_lldb_regno(machine_regno, lldb_regno) && 791 saved_registers[machine_regno] == false) { 792 saved_registers[machine_regno] = true; 793 794 UnwindPlan::Row::RegisterLocation regloc; 795 796 // stack_offset for 'movq %r15, -80(%rbp)' will be 80. 797 // In the Row, we want to express this as the offset from the CFA. If the 798 // frame base 799 // is rbp (like the above instruction), the CFA offset for rbp is probably 800 // 16. So we 801 // want to say that the value is stored at the CFA address - 96. 802 regloc.SetAtCFAPlusOffset( 803 -(stack_offset + row->GetCFAValue().GetOffset())); 804 805 row->SetRegisterInfo(lldb_regno, regloc); 806 807 row_updated = true; 808 } 809 810 else if (sub_rsp_pattern_p(stack_offset)) { 811 current_sp_bytes_offset_from_cfa += stack_offset; 812 if (row->GetCFAValue().GetRegisterNumber() == m_lldb_sp_regnum) { 813 row->GetCFAValue().SetOffset(current_sp_bytes_offset_from_cfa); 814 row_updated = true; 815 } 816 } 817 818 else if (add_rsp_pattern_p(stack_offset)) { 819 current_sp_bytes_offset_from_cfa -= stack_offset; 820 if (row->GetCFAValue().GetRegisterNumber() == m_lldb_sp_regnum) { 821 row->GetCFAValue().SetOffset(current_sp_bytes_offset_from_cfa); 822 row_updated = true; 823 } 824 in_epilogue = true; 825 } 826 827 else if (push_extended_pattern_p() || push_imm_pattern_p() || 828 push_misc_reg_p()) { 829 current_sp_bytes_offset_from_cfa += m_wordsize; 830 if (row->GetCFAValue().GetRegisterNumber() == m_lldb_sp_regnum) { 831 row->GetCFAValue().SetOffset(current_sp_bytes_offset_from_cfa); 832 row_updated = true; 833 } 834 } 835 836 else if (lea_rsp_pattern_p(stack_offset)) { 837 current_sp_bytes_offset_from_cfa -= stack_offset; 838 if (row->GetCFAValue().GetRegisterNumber() == m_lldb_sp_regnum) { 839 row->GetCFAValue().SetOffset(current_sp_bytes_offset_from_cfa); 840 row_updated = true; 841 } 842 if (stack_offset > 0) 843 in_epilogue = true; 844 } 845 846 else if (ret_pattern_p() && prologue_completed_row.get()) { 847 // Reinstate the saved prologue setup for any instructions 848 // that come after the ret instruction 849 850 UnwindPlan::Row *newrow = new UnwindPlan::Row; 851 *newrow = *prologue_completed_row.get(); 852 row.reset(newrow); 853 current_sp_bytes_offset_from_cfa = 854 prologue_completed_sp_bytes_offset_from_cfa; 855 856 saved_registers.clear(); 857 saved_registers.resize(prologue_completed_saved_registers.size(), false); 858 for (size_t i = 0; i < prologue_completed_saved_registers.size(); ++i) { 859 saved_registers[i] = prologue_completed_saved_registers[i]; 860 } 861 862 in_epilogue = true; 863 row_updated = true; 864 } 865 866 // call next instruction 867 // call 0 868 // => pop %ebx 869 // This is used in i386 programs to get the PIC base address for finding 870 // global data 871 else if (call_next_insn_pattern_p()) { 872 current_sp_bytes_offset_from_cfa += m_wordsize; 873 if (row->GetCFAValue().GetRegisterNumber() == m_lldb_sp_regnum) { 874 row->GetCFAValue().SetOffset(current_sp_bytes_offset_from_cfa); 875 row_updated = true; 876 } 877 } 878 879 if (row_updated) { 880 if (current_func_text_offset + insn_len < size) { 881 row->SetOffset(current_func_text_offset + insn_len); 882 unwind_plan.AppendRow(row); 883 // Allocate a new Row, populate it with the existing Row contents. 884 newrow = new UnwindPlan::Row; 885 *newrow = *row.get(); 886 row.reset(newrow); 887 } 888 } 889 890 if (in_epilogue == false && row_updated) { 891 // If we're not in an epilogue sequence, save the updated Row 892 UnwindPlan::Row *newrow = new UnwindPlan::Row; 893 *newrow = *row.get(); 894 prologue_completed_row.reset(newrow); 895 896 prologue_completed_saved_registers.clear(); 897 prologue_completed_saved_registers.resize(saved_registers.size(), false); 898 for (size_t i = 0; i < saved_registers.size(); ++i) { 899 prologue_completed_saved_registers[i] = saved_registers[i]; 900 } 901 } 902 903 // We may change the sp value without adding a new Row necessarily -- keep 904 // track of it either way. 905 if (in_epilogue == false) { 906 prologue_completed_sp_bytes_offset_from_cfa = 907 current_sp_bytes_offset_from_cfa; 908 } 909 910 m_cur_insn = m_cur_insn + insn_len; 911 current_func_text_offset += insn_len; 912 } 913 914 unwind_plan.SetSourceName("assembly insn profiling"); 915 unwind_plan.SetSourcedFromCompiler(eLazyBoolNo); 916 unwind_plan.SetUnwindPlanValidAtAllInstructions(eLazyBoolYes); 917 918 return true; 919 } 920 921 bool x86AssemblyInspectionEngine::AugmentUnwindPlanFromCallSite( 922 uint8_t *data, size_t size, AddressRange &func_range, 923 UnwindPlan &unwind_plan, RegisterContextSP ®_ctx) { 924 Address addr_start = func_range.GetBaseAddress(); 925 if (!addr_start.IsValid()) 926 return false; 927 928 // We either need a live RegisterContext, or we need the UnwindPlan to already 929 // be in the lldb register numbering scheme. 930 if (reg_ctx.get() == nullptr && 931 unwind_plan.GetRegisterKind() != eRegisterKindLLDB) 932 return false; 933 934 // Is original unwind_plan valid? 935 // unwind_plan should have at least one row which is ABI-default (CFA register 936 // is sp), 937 // and another row in mid-function. 938 if (unwind_plan.GetRowCount() < 2) 939 return false; 940 941 UnwindPlan::RowSP first_row = unwind_plan.GetRowAtIndex(0); 942 if (first_row->GetOffset() != 0) 943 return false; 944 uint32_t cfa_reg = first_row->GetCFAValue().GetRegisterNumber(); 945 if (unwind_plan.GetRegisterKind() != eRegisterKindLLDB) { 946 cfa_reg = reg_ctx->ConvertRegisterKindToRegisterNumber( 947 unwind_plan.GetRegisterKind(), 948 first_row->GetCFAValue().GetRegisterNumber()); 949 } 950 if (cfa_reg != m_lldb_sp_regnum || 951 first_row->GetCFAValue().GetOffset() != m_wordsize) 952 return false; 953 954 UnwindPlan::RowSP original_last_row = unwind_plan.GetRowForFunctionOffset(-1); 955 956 size_t offset = 0; 957 int row_id = 1; 958 bool unwind_plan_updated = false; 959 UnwindPlan::RowSP row(new UnwindPlan::Row(*first_row)); 960 m_cur_insn = data + offset; 961 962 // After a mid-function epilogue we will need to re-insert the original unwind 963 // rules 964 // so unwinds work for the remainder of the function. These aren't common 965 // with clang/gcc 966 // on x86 but it is possible. 967 bool reinstate_unwind_state = false; 968 969 while (offset < size) { 970 m_cur_insn = data + offset; 971 int insn_len; 972 if (!instruction_length(m_cur_insn, insn_len) || insn_len == 0 || 973 insn_len > kMaxInstructionByteSize) { 974 // An unrecognized/junk instruction. 975 break; 976 } 977 978 // Advance offsets. 979 offset += insn_len; 980 m_cur_insn = data + offset; 981 982 if (reinstate_unwind_state) { 983 // that was the last instruction of this function 984 if (offset >= size) 985 continue; 986 987 UnwindPlan::RowSP new_row(new UnwindPlan::Row()); 988 *new_row = *original_last_row; 989 new_row->SetOffset(offset); 990 unwind_plan.AppendRow(new_row); 991 row.reset(new UnwindPlan::Row()); 992 *row = *new_row; 993 reinstate_unwind_state = false; 994 unwind_plan_updated = true; 995 continue; 996 } 997 998 // If we already have one row for this instruction, we can continue. 999 while (row_id < unwind_plan.GetRowCount() && 1000 unwind_plan.GetRowAtIndex(row_id)->GetOffset() <= offset) { 1001 row_id++; 1002 } 1003 UnwindPlan::RowSP original_row = unwind_plan.GetRowAtIndex(row_id - 1); 1004 if (original_row->GetOffset() == offset) { 1005 *row = *original_row; 1006 continue; 1007 } 1008 1009 if (row_id == 0) { 1010 // If we are here, compiler didn't generate CFI for prologue. 1011 // This won't happen to GCC or clang. 1012 // In this case, bail out directly. 1013 return false; 1014 } 1015 1016 // Inspect the instruction to check if we need a new row for it. 1017 cfa_reg = row->GetCFAValue().GetRegisterNumber(); 1018 if (unwind_plan.GetRegisterKind() != eRegisterKindLLDB) { 1019 cfa_reg = reg_ctx->ConvertRegisterKindToRegisterNumber( 1020 unwind_plan.GetRegisterKind(), 1021 row->GetCFAValue().GetRegisterNumber()); 1022 } 1023 if (cfa_reg == m_lldb_sp_regnum) { 1024 // CFA register is sp. 1025 1026 // call next instruction 1027 // call 0 1028 // => pop %ebx 1029 if (call_next_insn_pattern_p()) { 1030 row->SetOffset(offset); 1031 row->GetCFAValue().IncOffset(m_wordsize); 1032 1033 UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row)); 1034 unwind_plan.InsertRow(new_row); 1035 unwind_plan_updated = true; 1036 continue; 1037 } 1038 1039 // push/pop register 1040 int regno; 1041 if (push_reg_p(regno)) { 1042 row->SetOffset(offset); 1043 row->GetCFAValue().IncOffset(m_wordsize); 1044 1045 UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row)); 1046 unwind_plan.InsertRow(new_row); 1047 unwind_plan_updated = true; 1048 continue; 1049 } 1050 if (pop_reg_p(regno)) { 1051 // Technically, this might be a nonvolatile register recover in 1052 // epilogue. 1053 // We should reset RegisterInfo for the register. 1054 // But in practice, previous rule for the register is still valid... 1055 // So we ignore this case. 1056 1057 row->SetOffset(offset); 1058 row->GetCFAValue().IncOffset(-m_wordsize); 1059 1060 UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row)); 1061 unwind_plan.InsertRow(new_row); 1062 unwind_plan_updated = true; 1063 continue; 1064 } 1065 1066 if (pop_misc_reg_p()) { 1067 row->SetOffset(offset); 1068 row->GetCFAValue().IncOffset(-m_wordsize); 1069 1070 UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row)); 1071 unwind_plan.InsertRow(new_row); 1072 unwind_plan_updated = true; 1073 continue; 1074 } 1075 1076 // push imm 1077 if (push_imm_pattern_p()) { 1078 row->SetOffset(offset); 1079 row->GetCFAValue().IncOffset(m_wordsize); 1080 UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row)); 1081 unwind_plan.InsertRow(new_row); 1082 unwind_plan_updated = true; 1083 continue; 1084 } 1085 1086 // push extended 1087 if (push_extended_pattern_p() || push_misc_reg_p()) { 1088 row->SetOffset(offset); 1089 row->GetCFAValue().IncOffset(m_wordsize); 1090 UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row)); 1091 unwind_plan.InsertRow(new_row); 1092 unwind_plan_updated = true; 1093 continue; 1094 } 1095 1096 // add/sub %rsp/%esp 1097 int amount; 1098 if (add_rsp_pattern_p(amount)) { 1099 row->SetOffset(offset); 1100 row->GetCFAValue().IncOffset(-amount); 1101 1102 UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row)); 1103 unwind_plan.InsertRow(new_row); 1104 unwind_plan_updated = true; 1105 continue; 1106 } 1107 if (sub_rsp_pattern_p(amount)) { 1108 row->SetOffset(offset); 1109 row->GetCFAValue().IncOffset(amount); 1110 1111 UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row)); 1112 unwind_plan.InsertRow(new_row); 1113 unwind_plan_updated = true; 1114 continue; 1115 } 1116 1117 // lea %rsp, [%rsp + $offset] 1118 if (lea_rsp_pattern_p(amount)) { 1119 row->SetOffset(offset); 1120 row->GetCFAValue().IncOffset(-amount); 1121 1122 UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row)); 1123 unwind_plan.InsertRow(new_row); 1124 unwind_plan_updated = true; 1125 continue; 1126 } 1127 1128 if (ret_pattern_p()) { 1129 reinstate_unwind_state = true; 1130 continue; 1131 } 1132 } else if (cfa_reg == m_lldb_fp_regnum) { 1133 // CFA register is fp. 1134 1135 // The only case we care about is epilogue: 1136 // [0x5d] pop %rbp/%ebp 1137 // => [0xc3] ret 1138 if (pop_rbp_pattern_p() || leave_pattern_p()) { 1139 offset += 1; 1140 row->SetOffset(offset); 1141 row->GetCFAValue().SetIsRegisterPlusOffset( 1142 first_row->GetCFAValue().GetRegisterNumber(), m_wordsize); 1143 1144 UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row)); 1145 unwind_plan.InsertRow(new_row); 1146 unwind_plan_updated = true; 1147 reinstate_unwind_state = true; 1148 continue; 1149 } 1150 } else { 1151 // CFA register is not sp or fp. 1152 1153 // This must be hand-written assembly. 1154 // Just trust eh_frame and assume we have finished. 1155 break; 1156 } 1157 } 1158 1159 unwind_plan.SetPlanValidAddressRange(func_range); 1160 if (unwind_plan_updated) { 1161 std::string unwind_plan_source(unwind_plan.GetSourceName().AsCString()); 1162 unwind_plan_source += " plus augmentation from assembly parsing"; 1163 unwind_plan.SetSourceName(unwind_plan_source.c_str()); 1164 unwind_plan.SetSourcedFromCompiler(eLazyBoolNo); 1165 unwind_plan.SetUnwindPlanValidAtAllInstructions(eLazyBoolYes); 1166 } 1167 return true; 1168 } 1169 1170 bool x86AssemblyInspectionEngine::FindFirstNonPrologueInstruction( 1171 uint8_t *data, size_t size, size_t &offset) { 1172 offset = 0; 1173 1174 if (m_register_map_initialized == false) 1175 return false; 1176 1177 while (offset < size) { 1178 int regno; 1179 int insn_len; 1180 int scratch; 1181 1182 m_cur_insn = data + offset; 1183 if (!instruction_length(m_cur_insn, insn_len) || 1184 insn_len > kMaxInstructionByteSize || insn_len == 0) { 1185 // An error parsing the instruction, i.e. probably data/garbage - stop 1186 // scanning 1187 break; 1188 } 1189 1190 if (push_rbp_pattern_p() || mov_rsp_rbp_pattern_p() || 1191 sub_rsp_pattern_p(scratch) || push_reg_p(regno) || 1192 mov_reg_to_local_stack_frame_p(regno, scratch) || 1193 (lea_rsp_pattern_p(scratch) && offset == 0)) { 1194 offset += insn_len; 1195 continue; 1196 } 1197 // 1198 // Unknown non-prologue instruction - stop scanning 1199 break; 1200 } 1201 1202 return true; 1203 } 1204