1 //===-- Disassembler.cpp --------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Core/Disassembler.h" 10 11 #include "lldb/Core/AddressRange.h" 12 #include "lldb/Core/Debugger.h" 13 #include "lldb/Core/EmulateInstruction.h" 14 #include "lldb/Core/Mangled.h" 15 #include "lldb/Core/Module.h" 16 #include "lldb/Core/ModuleList.h" 17 #include "lldb/Core/PluginManager.h" 18 #include "lldb/Core/SourceManager.h" 19 #include "lldb/Host/FileSystem.h" 20 #include "lldb/Interpreter/OptionValue.h" 21 #include "lldb/Interpreter/OptionValueArray.h" 22 #include "lldb/Interpreter/OptionValueDictionary.h" 23 #include "lldb/Interpreter/OptionValueRegex.h" 24 #include "lldb/Interpreter/OptionValueString.h" 25 #include "lldb/Interpreter/OptionValueUInt64.h" 26 #include "lldb/Symbol/Function.h" 27 #include "lldb/Symbol/Symbol.h" 28 #include "lldb/Symbol/SymbolContext.h" 29 #include "lldb/Target/ExecutionContext.h" 30 #include "lldb/Target/SectionLoadList.h" 31 #include "lldb/Target/StackFrame.h" 32 #include "lldb/Target/Target.h" 33 #include "lldb/Target/Thread.h" 34 #include "lldb/Utility/DataBufferHeap.h" 35 #include "lldb/Utility/DataExtractor.h" 36 #include "lldb/Utility/RegularExpression.h" 37 #include "lldb/Utility/Status.h" 38 #include "lldb/Utility/Stream.h" 39 #include "lldb/Utility/StreamString.h" 40 #include "lldb/Utility/Timer.h" 41 #include "lldb/lldb-private-enumerations.h" 42 #include "lldb/lldb-private-interfaces.h" 43 #include "lldb/lldb-private-types.h" 44 #include "llvm/ADT/Triple.h" 45 #include "llvm/Support/Compiler.h" 46 47 #include <cstdint> 48 #include <cstring> 49 #include <utility> 50 51 #include <assert.h> 52 53 #define DEFAULT_DISASM_BYTE_SIZE 32 54 55 using namespace lldb; 56 using namespace lldb_private; 57 58 DisassemblerSP Disassembler::FindPlugin(const ArchSpec &arch, 59 const char *flavor, 60 const char *plugin_name) { 61 static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); 62 Timer scoped_timer(func_cat, 63 "Disassembler::FindPlugin (arch = %s, plugin_name = %s)", 64 arch.GetArchitectureName(), plugin_name); 65 66 DisassemblerCreateInstance create_callback = nullptr; 67 68 if (plugin_name) { 69 ConstString const_plugin_name(plugin_name); 70 create_callback = PluginManager::GetDisassemblerCreateCallbackForPluginName( 71 const_plugin_name); 72 if (create_callback) { 73 DisassemblerSP disassembler_sp(create_callback(arch, flavor)); 74 75 if (disassembler_sp) 76 return disassembler_sp; 77 } 78 } else { 79 for (uint32_t idx = 0; 80 (create_callback = PluginManager::GetDisassemblerCreateCallbackAtIndex( 81 idx)) != nullptr; 82 ++idx) { 83 DisassemblerSP disassembler_sp(create_callback(arch, flavor)); 84 85 if (disassembler_sp) 86 return disassembler_sp; 87 } 88 } 89 return DisassemblerSP(); 90 } 91 92 DisassemblerSP Disassembler::FindPluginForTarget(const Target &target, 93 const ArchSpec &arch, 94 const char *flavor, 95 const char *plugin_name) { 96 if (flavor == nullptr) { 97 // FIXME - we don't have the mechanism in place to do per-architecture 98 // settings. But since we know that for now we only support flavors on x86 99 // & x86_64, 100 if (arch.GetTriple().getArch() == llvm::Triple::x86 || 101 arch.GetTriple().getArch() == llvm::Triple::x86_64) 102 flavor = target.GetDisassemblyFlavor(); 103 } 104 return FindPlugin(arch, flavor, plugin_name); 105 } 106 107 static Address ResolveAddress(Target &target, const Address &addr) { 108 if (!addr.IsSectionOffset()) { 109 Address resolved_addr; 110 // If we weren't passed in a section offset address range, try and resolve 111 // it to something 112 bool is_resolved = target.GetSectionLoadList().IsEmpty() 113 ? target.GetImages().ResolveFileAddress( 114 addr.GetOffset(), resolved_addr) 115 : target.GetSectionLoadList().ResolveLoadAddress( 116 addr.GetOffset(), resolved_addr); 117 118 // We weren't able to resolve the address, just treat it as a raw address 119 if (is_resolved && resolved_addr.IsValid()) 120 return resolved_addr; 121 } 122 return addr; 123 } 124 125 lldb::DisassemblerSP Disassembler::DisassembleRange( 126 const ArchSpec &arch, const char *plugin_name, const char *flavor, 127 Target &target, const AddressRange &range, bool prefer_file_cache) { 128 if (range.GetByteSize() <= 0) 129 return {}; 130 131 if (!range.GetBaseAddress().IsValid()) 132 return {}; 133 134 lldb::DisassemblerSP disasm_sp = 135 Disassembler::FindPluginForTarget(target, arch, flavor, plugin_name); 136 137 if (!disasm_sp) 138 return {}; 139 140 const size_t bytes_disassembled = disasm_sp->ParseInstructions( 141 target, range.GetBaseAddress(), {Limit::Bytes, range.GetByteSize()}, 142 nullptr, prefer_file_cache); 143 if (bytes_disassembled == 0) 144 return {}; 145 146 return disasm_sp; 147 } 148 149 lldb::DisassemblerSP 150 Disassembler::DisassembleBytes(const ArchSpec &arch, const char *plugin_name, 151 const char *flavor, const Address &start, 152 const void *src, size_t src_len, 153 uint32_t num_instructions, bool data_from_file) { 154 if (!src) 155 return {}; 156 157 lldb::DisassemblerSP disasm_sp = 158 Disassembler::FindPlugin(arch, flavor, plugin_name); 159 160 if (!disasm_sp) 161 return {}; 162 163 DataExtractor data(src, src_len, arch.GetByteOrder(), 164 arch.GetAddressByteSize()); 165 166 (void)disasm_sp->DecodeInstructions(start, data, 0, num_instructions, false, 167 data_from_file); 168 return disasm_sp; 169 } 170 171 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch, 172 const char *plugin_name, const char *flavor, 173 const ExecutionContext &exe_ctx, 174 const Address &address, Limit limit, 175 bool mixed_source_and_assembly, 176 uint32_t num_mixed_context_lines, 177 uint32_t options, Stream &strm) { 178 if (!exe_ctx.GetTargetPtr()) 179 return false; 180 181 lldb::DisassemblerSP disasm_sp(Disassembler::FindPluginForTarget( 182 exe_ctx.GetTargetRef(), arch, flavor, plugin_name)); 183 if (!disasm_sp) 184 return false; 185 186 const bool prefer_file_cache = false; 187 size_t bytes_disassembled = disasm_sp->ParseInstructions( 188 exe_ctx.GetTargetRef(), address, limit, &strm, prefer_file_cache); 189 if (bytes_disassembled == 0) 190 return false; 191 192 disasm_sp->PrintInstructions(debugger, arch, exe_ctx, 193 mixed_source_and_assembly, 194 num_mixed_context_lines, options, strm); 195 return true; 196 } 197 198 Disassembler::SourceLine 199 Disassembler::GetFunctionDeclLineEntry(const SymbolContext &sc) { 200 if (!sc.function) 201 return {}; 202 203 if (!sc.line_entry.IsValid()) 204 return {}; 205 206 LineEntry prologue_end_line = sc.line_entry; 207 FileSpec func_decl_file; 208 uint32_t func_decl_line; 209 sc.function->GetStartLineSourceInfo(func_decl_file, func_decl_line); 210 211 if (func_decl_file != prologue_end_line.file && 212 func_decl_file != prologue_end_line.original_file) 213 return {}; 214 215 SourceLine decl_line; 216 decl_line.file = func_decl_file; 217 decl_line.line = func_decl_line; 218 // TODO: Do we care about column on these entries? If so, we need to plumb 219 // that through GetStartLineSourceInfo. 220 decl_line.column = 0; 221 return decl_line; 222 } 223 224 void Disassembler::AddLineToSourceLineTables( 225 SourceLine &line, 226 std::map<FileSpec, std::set<uint32_t>> &source_lines_seen) { 227 if (line.IsValid()) { 228 auto source_lines_seen_pos = source_lines_seen.find(line.file); 229 if (source_lines_seen_pos == source_lines_seen.end()) { 230 std::set<uint32_t> lines; 231 lines.insert(line.line); 232 source_lines_seen.emplace(line.file, lines); 233 } else { 234 source_lines_seen_pos->second.insert(line.line); 235 } 236 } 237 } 238 239 bool Disassembler::ElideMixedSourceAndDisassemblyLine( 240 const ExecutionContext &exe_ctx, const SymbolContext &sc, 241 SourceLine &line) { 242 243 // TODO: should we also check target.process.thread.step-avoid-libraries ? 244 245 const RegularExpression *avoid_regex = nullptr; 246 247 // Skip any line #0 entries - they are implementation details 248 if (line.line == 0) 249 return false; 250 251 ThreadSP thread_sp = exe_ctx.GetThreadSP(); 252 if (thread_sp) { 253 avoid_regex = thread_sp->GetSymbolsToAvoidRegexp(); 254 } else { 255 TargetSP target_sp = exe_ctx.GetTargetSP(); 256 if (target_sp) { 257 Status error; 258 OptionValueSP value_sp = target_sp->GetDebugger().GetPropertyValue( 259 &exe_ctx, "target.process.thread.step-avoid-regexp", false, error); 260 if (value_sp && value_sp->GetType() == OptionValue::eTypeRegex) { 261 OptionValueRegex *re = value_sp->GetAsRegex(); 262 if (re) { 263 avoid_regex = re->GetCurrentValue(); 264 } 265 } 266 } 267 } 268 if (avoid_regex && sc.symbol != nullptr) { 269 const char *function_name = 270 sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments) 271 .GetCString(); 272 if (function_name && avoid_regex->Execute(function_name)) { 273 // skip this source line 274 return true; 275 } 276 } 277 // don't skip this source line 278 return false; 279 } 280 281 void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch, 282 const ExecutionContext &exe_ctx, 283 bool mixed_source_and_assembly, 284 uint32_t num_mixed_context_lines, 285 uint32_t options, Stream &strm) { 286 // We got some things disassembled... 287 size_t num_instructions_found = GetInstructionList().GetSize(); 288 289 const uint32_t max_opcode_byte_size = 290 GetInstructionList().GetMaxOpcocdeByteSize(); 291 SymbolContext sc; 292 SymbolContext prev_sc; 293 AddressRange current_source_line_range; 294 const Address *pc_addr_ptr = nullptr; 295 StackFrame *frame = exe_ctx.GetFramePtr(); 296 297 TargetSP target_sp(exe_ctx.GetTargetSP()); 298 SourceManager &source_manager = 299 target_sp ? target_sp->GetSourceManager() : debugger.GetSourceManager(); 300 301 if (frame) { 302 pc_addr_ptr = &frame->GetFrameCodeAddress(); 303 } 304 const uint32_t scope = 305 eSymbolContextLineEntry | eSymbolContextFunction | eSymbolContextSymbol; 306 const bool use_inline_block_range = false; 307 308 const FormatEntity::Entry *disassembly_format = nullptr; 309 FormatEntity::Entry format; 310 if (exe_ctx.HasTargetScope()) { 311 disassembly_format = 312 exe_ctx.GetTargetRef().GetDebugger().GetDisassemblyFormat(); 313 } else { 314 FormatEntity::Parse("${addr}: ", format); 315 disassembly_format = &format; 316 } 317 318 // First pass: step through the list of instructions, find how long the 319 // initial addresses strings are, insert padding in the second pass so the 320 // opcodes all line up nicely. 321 322 // Also build up the source line mapping if this is mixed source & assembly 323 // mode. Calculate the source line for each assembly instruction (eliding 324 // inlined functions which the user wants to skip). 325 326 std::map<FileSpec, std::set<uint32_t>> source_lines_seen; 327 Symbol *previous_symbol = nullptr; 328 329 size_t address_text_size = 0; 330 for (size_t i = 0; i < num_instructions_found; ++i) { 331 Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get(); 332 if (inst) { 333 const Address &addr = inst->GetAddress(); 334 ModuleSP module_sp(addr.GetModule()); 335 if (module_sp) { 336 const SymbolContextItem resolve_mask = eSymbolContextFunction | 337 eSymbolContextSymbol | 338 eSymbolContextLineEntry; 339 uint32_t resolved_mask = 340 module_sp->ResolveSymbolContextForAddress(addr, resolve_mask, sc); 341 if (resolved_mask) { 342 StreamString strmstr; 343 Debugger::FormatDisassemblerAddress(disassembly_format, &sc, nullptr, 344 &exe_ctx, &addr, strmstr); 345 size_t cur_line = strmstr.GetSizeOfLastLine(); 346 if (cur_line > address_text_size) 347 address_text_size = cur_line; 348 349 // Add entries to our "source_lines_seen" map+set which list which 350 // sources lines occur in this disassembly session. We will print 351 // lines of context around a source line, but we don't want to print 352 // a source line that has a line table entry of its own - we'll leave 353 // that source line to be printed when it actually occurs in the 354 // disassembly. 355 356 if (mixed_source_and_assembly && sc.line_entry.IsValid()) { 357 if (sc.symbol != previous_symbol) { 358 SourceLine decl_line = GetFunctionDeclLineEntry(sc); 359 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, decl_line)) 360 AddLineToSourceLineTables(decl_line, source_lines_seen); 361 } 362 if (sc.line_entry.IsValid()) { 363 SourceLine this_line; 364 this_line.file = sc.line_entry.file; 365 this_line.line = sc.line_entry.line; 366 this_line.column = sc.line_entry.column; 367 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, this_line)) 368 AddLineToSourceLineTables(this_line, source_lines_seen); 369 } 370 } 371 } 372 sc.Clear(false); 373 } 374 } 375 } 376 377 previous_symbol = nullptr; 378 SourceLine previous_line; 379 for (size_t i = 0; i < num_instructions_found; ++i) { 380 Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get(); 381 382 if (inst) { 383 const Address &addr = inst->GetAddress(); 384 const bool inst_is_at_pc = pc_addr_ptr && addr == *pc_addr_ptr; 385 SourceLinesToDisplay source_lines_to_display; 386 387 prev_sc = sc; 388 389 ModuleSP module_sp(addr.GetModule()); 390 if (module_sp) { 391 uint32_t resolved_mask = module_sp->ResolveSymbolContextForAddress( 392 addr, eSymbolContextEverything, sc); 393 if (resolved_mask) { 394 if (mixed_source_and_assembly) { 395 396 // If we've started a new function (non-inlined), print all of the 397 // source lines from the function declaration until the first line 398 // table entry - typically the opening curly brace of the function. 399 if (previous_symbol != sc.symbol) { 400 // The default disassembly format puts an extra blank line 401 // between functions - so when we're displaying the source 402 // context for a function, we don't want to add a blank line 403 // after the source context or we'll end up with two of them. 404 if (previous_symbol != nullptr) 405 source_lines_to_display.print_source_context_end_eol = false; 406 407 previous_symbol = sc.symbol; 408 if (sc.function && sc.line_entry.IsValid()) { 409 LineEntry prologue_end_line = sc.line_entry; 410 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, 411 prologue_end_line)) { 412 FileSpec func_decl_file; 413 uint32_t func_decl_line; 414 sc.function->GetStartLineSourceInfo(func_decl_file, 415 func_decl_line); 416 if (func_decl_file == prologue_end_line.file || 417 func_decl_file == prologue_end_line.original_file) { 418 // Add all the lines between the function declaration and 419 // the first non-prologue source line to the list of lines 420 // to print. 421 for (uint32_t lineno = func_decl_line; 422 lineno <= prologue_end_line.line; lineno++) { 423 SourceLine this_line; 424 this_line.file = func_decl_file; 425 this_line.line = lineno; 426 source_lines_to_display.lines.push_back(this_line); 427 } 428 // Mark the last line as the "current" one. Usually this 429 // is the open curly brace. 430 if (source_lines_to_display.lines.size() > 0) 431 source_lines_to_display.current_source_line = 432 source_lines_to_display.lines.size() - 1; 433 } 434 } 435 } 436 sc.GetAddressRange(scope, 0, use_inline_block_range, 437 current_source_line_range); 438 } 439 440 // If we've left a previous source line's address range, print a 441 // new source line 442 if (!current_source_line_range.ContainsFileAddress(addr)) { 443 sc.GetAddressRange(scope, 0, use_inline_block_range, 444 current_source_line_range); 445 446 if (sc != prev_sc && sc.comp_unit && sc.line_entry.IsValid()) { 447 SourceLine this_line; 448 this_line.file = sc.line_entry.file; 449 this_line.line = sc.line_entry.line; 450 451 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, 452 this_line)) { 453 // Only print this source line if it is different from the 454 // last source line we printed. There may have been inlined 455 // functions between these lines that we elided, resulting in 456 // the same line being printed twice in a row for a 457 // contiguous block of assembly instructions. 458 if (this_line != previous_line) { 459 460 std::vector<uint32_t> previous_lines; 461 for (uint32_t i = 0; 462 i < num_mixed_context_lines && 463 (this_line.line - num_mixed_context_lines) > 0; 464 i++) { 465 uint32_t line = 466 this_line.line - num_mixed_context_lines + i; 467 auto pos = source_lines_seen.find(this_line.file); 468 if (pos != source_lines_seen.end()) { 469 if (pos->second.count(line) == 1) { 470 previous_lines.clear(); 471 } else { 472 previous_lines.push_back(line); 473 } 474 } 475 } 476 for (size_t i = 0; i < previous_lines.size(); i++) { 477 SourceLine previous_line; 478 previous_line.file = this_line.file; 479 previous_line.line = previous_lines[i]; 480 auto pos = source_lines_seen.find(previous_line.file); 481 if (pos != source_lines_seen.end()) { 482 pos->second.insert(previous_line.line); 483 } 484 source_lines_to_display.lines.push_back(previous_line); 485 } 486 487 source_lines_to_display.lines.push_back(this_line); 488 source_lines_to_display.current_source_line = 489 source_lines_to_display.lines.size() - 1; 490 491 for (uint32_t i = 0; i < num_mixed_context_lines; i++) { 492 SourceLine next_line; 493 next_line.file = this_line.file; 494 next_line.line = this_line.line + i + 1; 495 auto pos = source_lines_seen.find(next_line.file); 496 if (pos != source_lines_seen.end()) { 497 if (pos->second.count(next_line.line) == 1) 498 break; 499 pos->second.insert(next_line.line); 500 } 501 source_lines_to_display.lines.push_back(next_line); 502 } 503 } 504 previous_line = this_line; 505 } 506 } 507 } 508 } 509 } else { 510 sc.Clear(true); 511 } 512 } 513 514 if (source_lines_to_display.lines.size() > 0) { 515 strm.EOL(); 516 for (size_t idx = 0; idx < source_lines_to_display.lines.size(); 517 idx++) { 518 SourceLine ln = source_lines_to_display.lines[idx]; 519 const char *line_highlight = ""; 520 if (inst_is_at_pc && (options & eOptionMarkPCSourceLine)) { 521 line_highlight = "->"; 522 } else if (idx == source_lines_to_display.current_source_line) { 523 line_highlight = "**"; 524 } 525 source_manager.DisplaySourceLinesWithLineNumbers( 526 ln.file, ln.line, ln.column, 0, 0, line_highlight, &strm); 527 } 528 if (source_lines_to_display.print_source_context_end_eol) 529 strm.EOL(); 530 } 531 532 const bool show_bytes = (options & eOptionShowBytes) != 0; 533 inst->Dump(&strm, max_opcode_byte_size, true, show_bytes, &exe_ctx, &sc, 534 &prev_sc, nullptr, address_text_size); 535 strm.EOL(); 536 } else { 537 break; 538 } 539 } 540 } 541 542 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch, 543 StackFrame &frame, Stream &strm) { 544 AddressRange range; 545 SymbolContext sc( 546 frame.GetSymbolContext(eSymbolContextFunction | eSymbolContextSymbol)); 547 if (sc.function) { 548 range = sc.function->GetAddressRange(); 549 } else if (sc.symbol && sc.symbol->ValueIsAddress()) { 550 range.GetBaseAddress() = sc.symbol->GetAddressRef(); 551 range.SetByteSize(sc.symbol->GetByteSize()); 552 } else { 553 range.GetBaseAddress() = frame.GetFrameCodeAddress(); 554 } 555 556 if (range.GetBaseAddress().IsValid() && range.GetByteSize() == 0) 557 range.SetByteSize(DEFAULT_DISASM_BYTE_SIZE); 558 559 Disassembler::Limit limit = {Disassembler::Limit::Bytes, 560 range.GetByteSize()}; 561 if (limit.value == 0) 562 limit.value = DEFAULT_DISASM_BYTE_SIZE; 563 564 return Disassemble(debugger, arch, nullptr, nullptr, frame, 565 range.GetBaseAddress(), limit, false, 0, 0, strm); 566 } 567 568 Instruction::Instruction(const Address &address, AddressClass addr_class) 569 : m_address(address), m_address_class(addr_class), m_opcode(), 570 m_calculated_strings(false) {} 571 572 Instruction::~Instruction() = default; 573 574 AddressClass Instruction::GetAddressClass() { 575 if (m_address_class == AddressClass::eInvalid) 576 m_address_class = m_address.GetAddressClass(); 577 return m_address_class; 578 } 579 580 void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size, 581 bool show_address, bool show_bytes, 582 const ExecutionContext *exe_ctx, 583 const SymbolContext *sym_ctx, 584 const SymbolContext *prev_sym_ctx, 585 const FormatEntity::Entry *disassembly_addr_format, 586 size_t max_address_text_size) { 587 size_t opcode_column_width = 7; 588 const size_t operand_column_width = 25; 589 590 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 591 592 StreamString ss; 593 594 if (show_address) { 595 Debugger::FormatDisassemblerAddress(disassembly_addr_format, sym_ctx, 596 prev_sym_ctx, exe_ctx, &m_address, ss); 597 ss.FillLastLineToColumn(max_address_text_size, ' '); 598 } 599 600 if (show_bytes) { 601 if (m_opcode.GetType() == Opcode::eTypeBytes) { 602 // x86_64 and i386 are the only ones that use bytes right now so pad out 603 // the byte dump to be able to always show 15 bytes (3 chars each) plus a 604 // space 605 if (max_opcode_byte_size > 0) 606 m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1); 607 else 608 m_opcode.Dump(&ss, 15 * 3 + 1); 609 } else { 610 // Else, we have ARM or MIPS which can show up to a uint32_t 0x00000000 611 // (10 spaces) plus two for padding... 612 if (max_opcode_byte_size > 0) 613 m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1); 614 else 615 m_opcode.Dump(&ss, 12); 616 } 617 } 618 619 const size_t opcode_pos = ss.GetSizeOfLastLine(); 620 621 // The default opcode size of 7 characters is plenty for most architectures 622 // but some like arm can pull out the occasional vqrshrun.s16. We won't get 623 // consistent column spacing in these cases, unfortunately. 624 if (m_opcode_name.length() >= opcode_column_width) { 625 opcode_column_width = m_opcode_name.length() + 1; 626 } 627 628 ss.PutCString(m_opcode_name); 629 ss.FillLastLineToColumn(opcode_pos + opcode_column_width, ' '); 630 ss.PutCString(m_mnemonics); 631 632 if (!m_comment.empty()) { 633 ss.FillLastLineToColumn( 634 opcode_pos + opcode_column_width + operand_column_width, ' '); 635 ss.PutCString(" ; "); 636 ss.PutCString(m_comment); 637 } 638 s->PutCString(ss.GetString()); 639 } 640 641 bool Instruction::DumpEmulation(const ArchSpec &arch) { 642 std::unique_ptr<EmulateInstruction> insn_emulator_up( 643 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr)); 644 if (insn_emulator_up) { 645 insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr); 646 return insn_emulator_up->EvaluateInstruction(0); 647 } 648 649 return false; 650 } 651 652 bool Instruction::CanSetBreakpoint () { 653 return !HasDelaySlot(); 654 } 655 656 bool Instruction::HasDelaySlot() { 657 // Default is false. 658 return false; 659 } 660 661 OptionValueSP Instruction::ReadArray(FILE *in_file, Stream *out_stream, 662 OptionValue::Type data_type) { 663 bool done = false; 664 char buffer[1024]; 665 666 auto option_value_sp = std::make_shared<OptionValueArray>(1u << data_type); 667 668 int idx = 0; 669 while (!done) { 670 if (!fgets(buffer, 1023, in_file)) { 671 out_stream->Printf( 672 "Instruction::ReadArray: Error reading file (fgets).\n"); 673 option_value_sp.reset(); 674 return option_value_sp; 675 } 676 677 std::string line(buffer); 678 679 size_t len = line.size(); 680 if (line[len - 1] == '\n') { 681 line[len - 1] = '\0'; 682 line.resize(len - 1); 683 } 684 685 if ((line.size() == 1) && line[0] == ']') { 686 done = true; 687 line.clear(); 688 } 689 690 if (!line.empty()) { 691 std::string value; 692 static RegularExpression g_reg_exp( 693 llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$")); 694 llvm::SmallVector<llvm::StringRef, 2> matches; 695 if (g_reg_exp.Execute(line, &matches)) 696 value = matches[1].str(); 697 else 698 value = line; 699 700 OptionValueSP data_value_sp; 701 switch (data_type) { 702 case OptionValue::eTypeUInt64: 703 data_value_sp = std::make_shared<OptionValueUInt64>(0, 0); 704 data_value_sp->SetValueFromString(value); 705 break; 706 // Other types can be added later as needed. 707 default: 708 data_value_sp = std::make_shared<OptionValueString>(value.c_str(), ""); 709 break; 710 } 711 712 option_value_sp->GetAsArray()->InsertValue(idx, data_value_sp); 713 ++idx; 714 } 715 } 716 717 return option_value_sp; 718 } 719 720 OptionValueSP Instruction::ReadDictionary(FILE *in_file, Stream *out_stream) { 721 bool done = false; 722 char buffer[1024]; 723 724 auto option_value_sp = std::make_shared<OptionValueDictionary>(); 725 static ConstString encoding_key("data_encoding"); 726 OptionValue::Type data_type = OptionValue::eTypeInvalid; 727 728 while (!done) { 729 // Read the next line in the file 730 if (!fgets(buffer, 1023, in_file)) { 731 out_stream->Printf( 732 "Instruction::ReadDictionary: Error reading file (fgets).\n"); 733 option_value_sp.reset(); 734 return option_value_sp; 735 } 736 737 // Check to see if the line contains the end-of-dictionary marker ("}") 738 std::string line(buffer); 739 740 size_t len = line.size(); 741 if (line[len - 1] == '\n') { 742 line[len - 1] = '\0'; 743 line.resize(len - 1); 744 } 745 746 if ((line.size() == 1) && (line[0] == '}')) { 747 done = true; 748 line.clear(); 749 } 750 751 // Try to find a key-value pair in the current line and add it to the 752 // dictionary. 753 if (!line.empty()) { 754 static RegularExpression g_reg_exp(llvm::StringRef( 755 "^[ \t]*([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*=[ \t]*(.*)[ \t]*$")); 756 757 llvm::SmallVector<llvm::StringRef, 3> matches; 758 759 bool reg_exp_success = g_reg_exp.Execute(line, &matches); 760 std::string key; 761 std::string value; 762 if (reg_exp_success) { 763 key = matches[1].str(); 764 value = matches[2].str(); 765 } else { 766 out_stream->Printf("Instruction::ReadDictionary: Failure executing " 767 "regular expression.\n"); 768 option_value_sp.reset(); 769 return option_value_sp; 770 } 771 772 ConstString const_key(key.c_str()); 773 // Check value to see if it's the start of an array or dictionary. 774 775 lldb::OptionValueSP value_sp; 776 assert(value.empty() == false); 777 assert(key.empty() == false); 778 779 if (value[0] == '{') { 780 assert(value.size() == 1); 781 // value is a dictionary 782 value_sp = ReadDictionary(in_file, out_stream); 783 if (!value_sp) { 784 option_value_sp.reset(); 785 return option_value_sp; 786 } 787 } else if (value[0] == '[') { 788 assert(value.size() == 1); 789 // value is an array 790 value_sp = ReadArray(in_file, out_stream, data_type); 791 if (!value_sp) { 792 option_value_sp.reset(); 793 return option_value_sp; 794 } 795 // We've used the data_type to read an array; re-set the type to 796 // Invalid 797 data_type = OptionValue::eTypeInvalid; 798 } else if ((value[0] == '0') && (value[1] == 'x')) { 799 value_sp = std::make_shared<OptionValueUInt64>(0, 0); 800 value_sp->SetValueFromString(value); 801 } else { 802 size_t len = value.size(); 803 if ((value[0] == '"') && (value[len - 1] == '"')) 804 value = value.substr(1, len - 2); 805 value_sp = std::make_shared<OptionValueString>(value.c_str(), ""); 806 } 807 808 if (const_key == encoding_key) { 809 // A 'data_encoding=..." is NOT a normal key-value pair; it is meta-data 810 // indicating the 811 // data type of an upcoming array (usually the next bit of data to be 812 // read in). 813 if (strcmp(value.c_str(), "uint32_t") == 0) 814 data_type = OptionValue::eTypeUInt64; 815 } else 816 option_value_sp->GetAsDictionary()->SetValueForKey(const_key, value_sp, 817 false); 818 } 819 } 820 821 return option_value_sp; 822 } 823 824 bool Instruction::TestEmulation(Stream *out_stream, const char *file_name) { 825 if (!out_stream) 826 return false; 827 828 if (!file_name) { 829 out_stream->Printf("Instruction::TestEmulation: Missing file_name."); 830 return false; 831 } 832 FILE *test_file = FileSystem::Instance().Fopen(file_name, "r"); 833 if (!test_file) { 834 out_stream->Printf( 835 "Instruction::TestEmulation: Attempt to open test file failed."); 836 return false; 837 } 838 839 char buffer[256]; 840 if (!fgets(buffer, 255, test_file)) { 841 out_stream->Printf( 842 "Instruction::TestEmulation: Error reading first line of test file.\n"); 843 fclose(test_file); 844 return false; 845 } 846 847 if (strncmp(buffer, "InstructionEmulationState={", 27) != 0) { 848 out_stream->Printf("Instructin::TestEmulation: Test file does not contain " 849 "emulation state dictionary\n"); 850 fclose(test_file); 851 return false; 852 } 853 854 // Read all the test information from the test file into an 855 // OptionValueDictionary. 856 857 OptionValueSP data_dictionary_sp(ReadDictionary(test_file, out_stream)); 858 if (!data_dictionary_sp) { 859 out_stream->Printf( 860 "Instruction::TestEmulation: Error reading Dictionary Object.\n"); 861 fclose(test_file); 862 return false; 863 } 864 865 fclose(test_file); 866 867 OptionValueDictionary *data_dictionary = 868 data_dictionary_sp->GetAsDictionary(); 869 static ConstString description_key("assembly_string"); 870 static ConstString triple_key("triple"); 871 872 OptionValueSP value_sp = data_dictionary->GetValueForKey(description_key); 873 874 if (!value_sp) { 875 out_stream->Printf("Instruction::TestEmulation: Test file does not " 876 "contain description string.\n"); 877 return false; 878 } 879 880 SetDescription(value_sp->GetStringValue()); 881 882 value_sp = data_dictionary->GetValueForKey(triple_key); 883 if (!value_sp) { 884 out_stream->Printf( 885 "Instruction::TestEmulation: Test file does not contain triple.\n"); 886 return false; 887 } 888 889 ArchSpec arch; 890 arch.SetTriple(llvm::Triple(value_sp->GetStringValue())); 891 892 bool success = false; 893 std::unique_ptr<EmulateInstruction> insn_emulator_up( 894 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr)); 895 if (insn_emulator_up) 896 success = 897 insn_emulator_up->TestEmulation(out_stream, arch, data_dictionary); 898 899 if (success) 900 out_stream->Printf("Emulation test succeeded."); 901 else 902 out_stream->Printf("Emulation test failed."); 903 904 return success; 905 } 906 907 bool Instruction::Emulate( 908 const ArchSpec &arch, uint32_t evaluate_options, void *baton, 909 EmulateInstruction::ReadMemoryCallback read_mem_callback, 910 EmulateInstruction::WriteMemoryCallback write_mem_callback, 911 EmulateInstruction::ReadRegisterCallback read_reg_callback, 912 EmulateInstruction::WriteRegisterCallback write_reg_callback) { 913 std::unique_ptr<EmulateInstruction> insn_emulator_up( 914 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr)); 915 if (insn_emulator_up) { 916 insn_emulator_up->SetBaton(baton); 917 insn_emulator_up->SetCallbacks(read_mem_callback, write_mem_callback, 918 read_reg_callback, write_reg_callback); 919 insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr); 920 return insn_emulator_up->EvaluateInstruction(evaluate_options); 921 } 922 923 return false; 924 } 925 926 uint32_t Instruction::GetData(DataExtractor &data) { 927 return m_opcode.GetData(data); 928 } 929 930 InstructionList::InstructionList() : m_instructions() {} 931 932 InstructionList::~InstructionList() = default; 933 934 size_t InstructionList::GetSize() const { return m_instructions.size(); } 935 936 uint32_t InstructionList::GetMaxOpcocdeByteSize() const { 937 uint32_t max_inst_size = 0; 938 collection::const_iterator pos, end; 939 for (pos = m_instructions.begin(), end = m_instructions.end(); pos != end; 940 ++pos) { 941 uint32_t inst_size = (*pos)->GetOpcode().GetByteSize(); 942 if (max_inst_size < inst_size) 943 max_inst_size = inst_size; 944 } 945 return max_inst_size; 946 } 947 948 InstructionSP InstructionList::GetInstructionAtIndex(size_t idx) const { 949 InstructionSP inst_sp; 950 if (idx < m_instructions.size()) 951 inst_sp = m_instructions[idx]; 952 return inst_sp; 953 } 954 955 InstructionSP InstructionList::GetInstructionAtAddress(const Address &address) { 956 uint32_t index = GetIndexOfInstructionAtAddress(address); 957 if (index != UINT32_MAX) 958 return GetInstructionAtIndex(index); 959 return nullptr; 960 } 961 962 void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes, 963 const ExecutionContext *exe_ctx) { 964 const uint32_t max_opcode_byte_size = GetMaxOpcocdeByteSize(); 965 collection::const_iterator pos, begin, end; 966 967 const FormatEntity::Entry *disassembly_format = nullptr; 968 FormatEntity::Entry format; 969 if (exe_ctx && exe_ctx->HasTargetScope()) { 970 disassembly_format = 971 exe_ctx->GetTargetRef().GetDebugger().GetDisassemblyFormat(); 972 } else { 973 FormatEntity::Parse("${addr}: ", format); 974 disassembly_format = &format; 975 } 976 977 for (begin = m_instructions.begin(), end = m_instructions.end(), pos = begin; 978 pos != end; ++pos) { 979 if (pos != begin) 980 s->EOL(); 981 (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes, exe_ctx, 982 nullptr, nullptr, disassembly_format, 0); 983 } 984 } 985 986 void InstructionList::Clear() { m_instructions.clear(); } 987 988 void InstructionList::Append(lldb::InstructionSP &inst_sp) { 989 if (inst_sp) 990 m_instructions.push_back(inst_sp); 991 } 992 993 uint32_t 994 InstructionList::GetIndexOfNextBranchInstruction(uint32_t start, 995 bool ignore_calls, 996 bool *found_calls) const { 997 size_t num_instructions = m_instructions.size(); 998 999 uint32_t next_branch = UINT32_MAX; 1000 1001 if (found_calls) 1002 *found_calls = false; 1003 for (size_t i = start; i < num_instructions; i++) { 1004 if (m_instructions[i]->DoesBranch()) { 1005 if (ignore_calls && m_instructions[i]->IsCall()) { 1006 if (found_calls) 1007 *found_calls = true; 1008 continue; 1009 } 1010 next_branch = i; 1011 break; 1012 } 1013 } 1014 1015 return next_branch; 1016 } 1017 1018 uint32_t 1019 InstructionList::GetIndexOfInstructionAtAddress(const Address &address) { 1020 size_t num_instructions = m_instructions.size(); 1021 uint32_t index = UINT32_MAX; 1022 for (size_t i = 0; i < num_instructions; i++) { 1023 if (m_instructions[i]->GetAddress() == address) { 1024 index = i; 1025 break; 1026 } 1027 } 1028 return index; 1029 } 1030 1031 uint32_t 1032 InstructionList::GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr, 1033 Target &target) { 1034 Address address; 1035 address.SetLoadAddress(load_addr, &target); 1036 return GetIndexOfInstructionAtAddress(address); 1037 } 1038 1039 size_t Disassembler::ParseInstructions(Target &target, Address start, 1040 Limit limit, Stream *error_strm_ptr, 1041 bool prefer_file_cache) { 1042 m_instruction_list.Clear(); 1043 1044 if (!start.IsValid()) 1045 return 0; 1046 1047 start = ResolveAddress(target, start); 1048 1049 addr_t byte_size = limit.value; 1050 if (limit.kind == Limit::Instructions) 1051 byte_size *= m_arch.GetMaximumOpcodeByteSize(); 1052 auto data_sp = std::make_shared<DataBufferHeap>(byte_size, '\0'); 1053 1054 Status error; 1055 lldb::addr_t load_addr = LLDB_INVALID_ADDRESS; 1056 const size_t bytes_read = 1057 target.ReadMemory(start, prefer_file_cache, data_sp->GetBytes(), 1058 data_sp->GetByteSize(), error, &load_addr); 1059 const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS; 1060 1061 if (bytes_read == 0) { 1062 if (error_strm_ptr) { 1063 if (const char *error_cstr = error.AsCString()) 1064 error_strm_ptr->Printf("error: %s\n", error_cstr); 1065 } 1066 return 0; 1067 } 1068 1069 if (bytes_read != data_sp->GetByteSize()) 1070 data_sp->SetByteSize(bytes_read); 1071 DataExtractor data(data_sp, m_arch.GetByteOrder(), 1072 m_arch.GetAddressByteSize()); 1073 return DecodeInstructions(start, data, 0, 1074 limit.kind == Limit::Instructions ? limit.value 1075 : UINT32_MAX, 1076 false, data_from_file); 1077 } 1078 1079 // Disassembler copy constructor 1080 Disassembler::Disassembler(const ArchSpec &arch, const char *flavor) 1081 : m_arch(arch), m_instruction_list(), m_base_addr(LLDB_INVALID_ADDRESS), 1082 m_flavor() { 1083 if (flavor == nullptr) 1084 m_flavor.assign("default"); 1085 else 1086 m_flavor.assign(flavor); 1087 1088 // If this is an arm variant that can only include thumb (T16, T32) 1089 // instructions, force the arch triple to be "thumbv.." instead of "armv..." 1090 if (arch.IsAlwaysThumbInstructions()) { 1091 std::string thumb_arch_name(arch.GetTriple().getArchName().str()); 1092 // Replace "arm" with "thumb" so we get all thumb variants correct 1093 if (thumb_arch_name.size() > 3) { 1094 thumb_arch_name.erase(0, 3); 1095 thumb_arch_name.insert(0, "thumb"); 1096 } 1097 m_arch.SetTriple(thumb_arch_name.c_str()); 1098 } 1099 } 1100 1101 Disassembler::~Disassembler() = default; 1102 1103 InstructionList &Disassembler::GetInstructionList() { 1104 return m_instruction_list; 1105 } 1106 1107 const InstructionList &Disassembler::GetInstructionList() const { 1108 return m_instruction_list; 1109 } 1110 1111 // Class PseudoInstruction 1112 1113 PseudoInstruction::PseudoInstruction() 1114 : Instruction(Address(), AddressClass::eUnknown), m_description() {} 1115 1116 PseudoInstruction::~PseudoInstruction() = default; 1117 1118 bool PseudoInstruction::DoesBranch() { 1119 // This is NOT a valid question for a pseudo instruction. 1120 return false; 1121 } 1122 1123 bool PseudoInstruction::HasDelaySlot() { 1124 // This is NOT a valid question for a pseudo instruction. 1125 return false; 1126 } 1127 1128 size_t PseudoInstruction::Decode(const lldb_private::Disassembler &disassembler, 1129 const lldb_private::DataExtractor &data, 1130 lldb::offset_t data_offset) { 1131 return m_opcode.GetByteSize(); 1132 } 1133 1134 void PseudoInstruction::SetOpcode(size_t opcode_size, void *opcode_data) { 1135 if (!opcode_data) 1136 return; 1137 1138 switch (opcode_size) { 1139 case 8: { 1140 uint8_t value8 = *((uint8_t *)opcode_data); 1141 m_opcode.SetOpcode8(value8, eByteOrderInvalid); 1142 break; 1143 } 1144 case 16: { 1145 uint16_t value16 = *((uint16_t *)opcode_data); 1146 m_opcode.SetOpcode16(value16, eByteOrderInvalid); 1147 break; 1148 } 1149 case 32: { 1150 uint32_t value32 = *((uint32_t *)opcode_data); 1151 m_opcode.SetOpcode32(value32, eByteOrderInvalid); 1152 break; 1153 } 1154 case 64: { 1155 uint64_t value64 = *((uint64_t *)opcode_data); 1156 m_opcode.SetOpcode64(value64, eByteOrderInvalid); 1157 break; 1158 } 1159 default: 1160 break; 1161 } 1162 } 1163 1164 void PseudoInstruction::SetDescription(llvm::StringRef description) { 1165 m_description = std::string(description); 1166 } 1167 1168 Instruction::Operand Instruction::Operand::BuildRegister(ConstString &r) { 1169 Operand ret; 1170 ret.m_type = Type::Register; 1171 ret.m_register = r; 1172 return ret; 1173 } 1174 1175 Instruction::Operand Instruction::Operand::BuildImmediate(lldb::addr_t imm, 1176 bool neg) { 1177 Operand ret; 1178 ret.m_type = Type::Immediate; 1179 ret.m_immediate = imm; 1180 ret.m_negative = neg; 1181 return ret; 1182 } 1183 1184 Instruction::Operand Instruction::Operand::BuildImmediate(int64_t imm) { 1185 Operand ret; 1186 ret.m_type = Type::Immediate; 1187 if (imm < 0) { 1188 ret.m_immediate = -imm; 1189 ret.m_negative = true; 1190 } else { 1191 ret.m_immediate = imm; 1192 ret.m_negative = false; 1193 } 1194 return ret; 1195 } 1196 1197 Instruction::Operand 1198 Instruction::Operand::BuildDereference(const Operand &ref) { 1199 Operand ret; 1200 ret.m_type = Type::Dereference; 1201 ret.m_children = {ref}; 1202 return ret; 1203 } 1204 1205 Instruction::Operand Instruction::Operand::BuildSum(const Operand &lhs, 1206 const Operand &rhs) { 1207 Operand ret; 1208 ret.m_type = Type::Sum; 1209 ret.m_children = {lhs, rhs}; 1210 return ret; 1211 } 1212 1213 Instruction::Operand Instruction::Operand::BuildProduct(const Operand &lhs, 1214 const Operand &rhs) { 1215 Operand ret; 1216 ret.m_type = Type::Product; 1217 ret.m_children = {lhs, rhs}; 1218 return ret; 1219 } 1220 1221 std::function<bool(const Instruction::Operand &)> 1222 lldb_private::OperandMatchers::MatchBinaryOp( 1223 std::function<bool(const Instruction::Operand &)> base, 1224 std::function<bool(const Instruction::Operand &)> left, 1225 std::function<bool(const Instruction::Operand &)> right) { 1226 return [base, left, right](const Instruction::Operand &op) -> bool { 1227 return (base(op) && op.m_children.size() == 2 && 1228 ((left(op.m_children[0]) && right(op.m_children[1])) || 1229 (left(op.m_children[1]) && right(op.m_children[0])))); 1230 }; 1231 } 1232 1233 std::function<bool(const Instruction::Operand &)> 1234 lldb_private::OperandMatchers::MatchUnaryOp( 1235 std::function<bool(const Instruction::Operand &)> base, 1236 std::function<bool(const Instruction::Operand &)> child) { 1237 return [base, child](const Instruction::Operand &op) -> bool { 1238 return (base(op) && op.m_children.size() == 1 && child(op.m_children[0])); 1239 }; 1240 } 1241 1242 std::function<bool(const Instruction::Operand &)> 1243 lldb_private::OperandMatchers::MatchRegOp(const RegisterInfo &info) { 1244 return [&info](const Instruction::Operand &op) { 1245 return (op.m_type == Instruction::Operand::Type::Register && 1246 (op.m_register == ConstString(info.name) || 1247 op.m_register == ConstString(info.alt_name))); 1248 }; 1249 } 1250 1251 std::function<bool(const Instruction::Operand &)> 1252 lldb_private::OperandMatchers::FetchRegOp(ConstString ®) { 1253 return [®](const Instruction::Operand &op) { 1254 if (op.m_type != Instruction::Operand::Type::Register) { 1255 return false; 1256 } 1257 reg = op.m_register; 1258 return true; 1259 }; 1260 } 1261 1262 std::function<bool(const Instruction::Operand &)> 1263 lldb_private::OperandMatchers::MatchImmOp(int64_t imm) { 1264 return [imm](const Instruction::Operand &op) { 1265 return (op.m_type == Instruction::Operand::Type::Immediate && 1266 ((op.m_negative && op.m_immediate == (uint64_t)-imm) || 1267 (!op.m_negative && op.m_immediate == (uint64_t)imm))); 1268 }; 1269 } 1270 1271 std::function<bool(const Instruction::Operand &)> 1272 lldb_private::OperandMatchers::FetchImmOp(int64_t &imm) { 1273 return [&imm](const Instruction::Operand &op) { 1274 if (op.m_type != Instruction::Operand::Type::Immediate) { 1275 return false; 1276 } 1277 if (op.m_negative) { 1278 imm = -((int64_t)op.m_immediate); 1279 } else { 1280 imm = ((int64_t)op.m_immediate); 1281 } 1282 return true; 1283 }; 1284 } 1285 1286 std::function<bool(const Instruction::Operand &)> 1287 lldb_private::OperandMatchers::MatchOpType(Instruction::Operand::Type type) { 1288 return [type](const Instruction::Operand &op) { return op.m_type == type; }; 1289 } 1290