1 //===-- Disassembler.h ------------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef liblldb_Disassembler_h_ 11 #define liblldb_Disassembler_h_ 12 13 #include "lldb/Core/Address.h" 14 #include "lldb/Core/EmulateInstruction.h" 15 #include "lldb/Core/FormatEntity.h" 16 #include "lldb/Core/Opcode.h" 17 #include "lldb/Core/PluginInterface.h" 18 #include "lldb/Interpreter/OptionValue.h" 19 #include "lldb/Symbol/LineEntry.h" 20 #include "lldb/Target/ExecutionContext.h" 21 #include "lldb/Utility/ArchSpec.h" 22 #include "lldb/Utility/ConstString.h" 23 #include "lldb/Utility/FileSpec.h" 24 #include "lldb/lldb-defines.h" 25 #include "lldb/lldb-forward.h" 26 #include "lldb/lldb-private-enumerations.h" 27 #include "lldb/lldb-types.h" 28 29 #include "llvm/ADT/StringRef.h" 30 31 #include <functional> 32 #include <map> 33 #include <memory> 34 #include <set> 35 #include <string> 36 #include <vector> 37 38 #include <stddef.h> 39 #include <stdint.h> 40 #include <stdio.h> 41 42 namespace lldb_private { 43 class AddressRange; 44 } 45 namespace lldb_private { 46 class DataExtractor; 47 } 48 namespace lldb_private { 49 class Debugger; 50 } 51 namespace lldb_private { 52 class Disassembler; 53 } 54 namespace lldb_private { 55 class Module; 56 } 57 namespace lldb_private { 58 class Stream; 59 } 60 namespace lldb_private { 61 class SymbolContext; 62 } 63 namespace lldb_private { 64 class SymbolContextList; 65 } 66 namespace lldb_private { 67 class Target; 68 } 69 namespace lldb_private { 70 struct RegisterInfo; 71 } 72 namespace llvm { 73 template <typename T> class SmallVectorImpl; 74 } 75 76 namespace lldb_private { 77 78 class Instruction { 79 public: 80 Instruction(const Address &address, 81 AddressClass addr_class = AddressClass::eInvalid); 82 83 virtual ~Instruction(); 84 GetAddress()85 const Address &GetAddress() const { return m_address; } 86 GetMnemonic(const ExecutionContext * exe_ctx)87 const char *GetMnemonic(const ExecutionContext *exe_ctx) { 88 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 89 return m_opcode_name.c_str(); 90 } 91 GetOperands(const ExecutionContext * exe_ctx)92 const char *GetOperands(const ExecutionContext *exe_ctx) { 93 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 94 return m_mnemonics.c_str(); 95 } 96 GetComment(const ExecutionContext * exe_ctx)97 const char *GetComment(const ExecutionContext *exe_ctx) { 98 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 99 return m_comment.c_str(); 100 } 101 102 virtual void 103 CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0; 104 105 AddressClass GetAddressClass(); 106 SetAddress(const Address & addr)107 void SetAddress(const Address &addr) { 108 // Invalidate the address class to lazily discover it if we need to. 109 m_address_class = AddressClass::eInvalid; 110 m_address = addr; 111 } 112 113 //------------------------------------------------------------------ 114 /// Dump the text representation of this Instruction to a Stream 115 /// 116 /// Print the (optional) address, (optional) bytes, opcode, 117 /// operands, and instruction comments to a stream. 118 /// 119 /// @param[in] s 120 /// The Stream to add the text to. 121 /// 122 /// @param[in] show_address 123 /// Whether the address (using disassembly_addr_format_spec formatting) 124 /// should be printed. 125 /// 126 /// @param[in] show_bytes 127 /// Whether the bytes of the assembly instruction should be printed. 128 /// 129 /// @param[in] max_opcode_byte_size 130 /// The size (in bytes) of the largest instruction in the list that 131 /// we are printing (for text justification/alignment purposes) 132 /// Only needed if show_bytes is true. 133 /// 134 /// @param[in] exe_ctx 135 /// The current execution context, if available. May be used in 136 /// the assembling of the operands+comments for this instruction. 137 /// Pass NULL if not applicable. 138 /// 139 /// @param[in] sym_ctx 140 /// The SymbolContext for this instruction. 141 /// Pass NULL if not available/computed. 142 /// Only needed if show_address is true. 143 /// 144 /// @param[in] prev_sym_ctx 145 /// The SymbolContext for the previous instruction. Depending on 146 /// the disassembly address format specification, a change in 147 /// Symbol / Function may mean that a line is printed with the new 148 /// symbol/function name. 149 /// Pass NULL if unavailable, or if this is the first instruction of 150 /// the InstructionList. 151 /// Only needed if show_address is true. 152 /// 153 /// @param[in] disassembly_addr_format 154 /// The format specification for how addresses are printed. 155 /// Only needed if show_address is true. 156 /// 157 /// @param[in] max_address_text_size 158 /// The length of the longest address string at the start of the 159 /// disassembly line that will be printed (the 160 /// Debugger::FormatDisassemblerAddress() string) 161 /// so this method can properly align the instruction opcodes. 162 /// May be 0 to indicate no indentation/alignment of the opcodes. 163 //------------------------------------------------------------------ 164 virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address, 165 bool show_bytes, const ExecutionContext *exe_ctx, 166 const SymbolContext *sym_ctx, 167 const SymbolContext *prev_sym_ctx, 168 const FormatEntity::Entry *disassembly_addr_format, 169 size_t max_address_text_size); 170 171 virtual bool DoesBranch() = 0; 172 173 virtual bool HasDelaySlot(); 174 175 bool CanSetBreakpoint (); 176 177 virtual size_t Decode(const Disassembler &disassembler, 178 const DataExtractor &data, 179 lldb::offset_t data_offset) = 0; 180 SetDescription(llvm::StringRef)181 virtual void SetDescription(llvm::StringRef) { 182 } // May be overridden in sub-classes that have descriptions. 183 184 lldb::OptionValueSP ReadArray(FILE *in_file, Stream *out_stream, 185 OptionValue::Type data_type); 186 187 lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream *out_stream); 188 189 bool DumpEmulation(const ArchSpec &arch); 190 191 virtual bool TestEmulation(Stream *stream, const char *test_file_name); 192 193 bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton, 194 EmulateInstruction::ReadMemoryCallback read_mem_callback, 195 EmulateInstruction::WriteMemoryCallback write_mem_calback, 196 EmulateInstruction::ReadRegisterCallback read_reg_callback, 197 EmulateInstruction::WriteRegisterCallback write_reg_callback); 198 GetOpcode()199 const Opcode &GetOpcode() const { return m_opcode; } 200 201 uint32_t GetData(DataExtractor &data); 202 203 struct Operand { 204 enum class Type { 205 Invalid = 0, 206 Register, 207 Immediate, 208 Dereference, 209 Sum, 210 Product 211 } m_type = Type::Invalid; 212 std::vector<Operand> m_children; 213 lldb::addr_t m_immediate = 0; 214 ConstString m_register; 215 bool m_negative = false; 216 bool m_clobbered = false; 217 IsValidOperand218 bool IsValid() { return m_type != Type::Invalid; } 219 220 static Operand BuildRegister(ConstString &r); 221 static Operand BuildImmediate(lldb::addr_t imm, bool neg); 222 static Operand BuildImmediate(int64_t imm); 223 static Operand BuildDereference(const Operand &ref); 224 static Operand BuildSum(const Operand &lhs, const Operand &rhs); 225 static Operand BuildProduct(const Operand &lhs, const Operand &rhs); 226 }; 227 ParseOperands(llvm::SmallVectorImpl<Operand> & operands)228 virtual bool ParseOperands(llvm::SmallVectorImpl<Operand> &operands) { 229 return false; 230 } 231 IsCall()232 virtual bool IsCall() { return false; } 233 234 protected: 235 Address m_address; // The section offset address of this instruction 236 // We include an address class in the Instruction class to 237 // allow the instruction specify the 238 // AddressClass::eCodeAlternateISA (currently used for 239 // thumb), and also to specify data (AddressClass::eData). 240 // The usual value will be AddressClass::eCode, but often 241 // when disassembling memory, you might run into data. 242 // This can help us to disassemble appropriately. 243 private: 244 AddressClass m_address_class; // Use GetAddressClass () accessor function! 245 246 protected: 247 Opcode m_opcode; // The opcode for this instruction 248 std::string m_opcode_name; 249 std::string m_mnemonics; 250 std::string m_comment; 251 bool m_calculated_strings; 252 253 void CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext * exe_ctx)254 CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) { 255 if (!m_calculated_strings) { 256 m_calculated_strings = true; 257 CalculateMnemonicOperandsAndComment(exe_ctx); 258 } 259 } 260 }; 261 262 namespace OperandMatchers { 263 std::function<bool(const Instruction::Operand &)> 264 MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base, 265 std::function<bool(const Instruction::Operand &)> left, 266 std::function<bool(const Instruction::Operand &)> right); 267 268 std::function<bool(const Instruction::Operand &)> 269 MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base, 270 std::function<bool(const Instruction::Operand &)> child); 271 272 std::function<bool(const Instruction::Operand &)> 273 MatchRegOp(const RegisterInfo &info); 274 275 std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString ®); 276 277 std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm); 278 279 std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm); 280 281 std::function<bool(const Instruction::Operand &)> 282 MatchOpType(Instruction::Operand::Type type); 283 } 284 285 class InstructionList { 286 public: 287 InstructionList(); 288 ~InstructionList(); 289 290 size_t GetSize() const; 291 292 uint32_t GetMaxOpcocdeByteSize() const; 293 294 lldb::InstructionSP GetInstructionAtIndex(size_t idx) const; 295 296 uint32_t GetIndexOfNextBranchInstruction(uint32_t start, 297 Target &target) const; 298 299 uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr, 300 Target &target); 301 302 uint32_t GetIndexOfInstructionAtAddress(const Address &addr); 303 304 void Clear(); 305 306 void Append(lldb::InstructionSP &inst_sp); 307 308 void Dump(Stream *s, bool show_address, bool show_bytes, 309 const ExecutionContext *exe_ctx); 310 311 private: 312 typedef std::vector<lldb::InstructionSP> collection; 313 typedef collection::iterator iterator; 314 typedef collection::const_iterator const_iterator; 315 316 collection m_instructions; 317 }; 318 319 class PseudoInstruction : public Instruction { 320 public: 321 PseudoInstruction(); 322 323 ~PseudoInstruction() override; 324 325 bool DoesBranch() override; 326 327 bool HasDelaySlot() override; 328 CalculateMnemonicOperandsAndComment(const ExecutionContext * exe_ctx)329 void CalculateMnemonicOperandsAndComment( 330 const ExecutionContext *exe_ctx) override { 331 // TODO: fill this in and put opcode name into Instruction::m_opcode_name, 332 // mnemonic into Instruction::m_mnemonics, and any comment into 333 // Instruction::m_comment 334 } 335 336 size_t Decode(const Disassembler &disassembler, const DataExtractor &data, 337 lldb::offset_t data_offset) override; 338 339 void SetOpcode(size_t opcode_size, void *opcode_data); 340 341 void SetDescription(llvm::StringRef description) override; 342 343 protected: 344 std::string m_description; 345 346 DISALLOW_COPY_AND_ASSIGN(PseudoInstruction); 347 }; 348 349 class Disassembler : public std::enable_shared_from_this<Disassembler>, 350 public PluginInterface { 351 public: 352 enum { 353 eOptionNone = 0u, 354 eOptionShowBytes = (1u << 0), 355 eOptionRawOuput = (1u << 1), 356 eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains 357 // the current PC (mixed mode only) 358 eOptionMarkPCAddress = 359 (1u << 3) // Mark the disassembly line the contains the PC 360 }; 361 362 enum HexImmediateStyle { 363 eHexStyleC, 364 eHexStyleAsm, 365 }; 366 367 // FindPlugin should be lax about the flavor string (it is too annoying to 368 // have various internal uses of the disassembler fail because the global 369 // flavor string gets set wrong. Instead, if you get a flavor string you 370 // don't understand, use the default. Folks who care to check can use the 371 // FlavorValidForArchSpec method on the disassembler they got back. 372 static lldb::DisassemblerSP 373 FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name); 374 375 // This version will use the value in the Target settings if flavor is NULL; 376 static lldb::DisassemblerSP 377 FindPluginForTarget(const lldb::TargetSP target_sp, const ArchSpec &arch, 378 const char *flavor, const char *plugin_name); 379 380 static lldb::DisassemblerSP 381 DisassembleRange(const ArchSpec &arch, const char *plugin_name, 382 const char *flavor, const ExecutionContext &exe_ctx, 383 const AddressRange &disasm_range, bool prefer_file_cache); 384 385 static lldb::DisassemblerSP 386 DisassembleBytes(const ArchSpec &arch, const char *plugin_name, 387 const char *flavor, const Address &start, const void *bytes, 388 size_t length, uint32_t max_num_instructions, 389 bool data_from_file); 390 391 static bool Disassemble(Debugger &debugger, const ArchSpec &arch, 392 const char *plugin_name, const char *flavor, 393 const ExecutionContext &exe_ctx, 394 const AddressRange &range, uint32_t num_instructions, 395 bool mixed_source_and_assembly, 396 uint32_t num_mixed_context_lines, uint32_t options, 397 Stream &strm); 398 399 static bool Disassemble(Debugger &debugger, const ArchSpec &arch, 400 const char *plugin_name, const char *flavor, 401 const ExecutionContext &exe_ctx, const Address &start, 402 uint32_t num_instructions, 403 bool mixed_source_and_assembly, 404 uint32_t num_mixed_context_lines, uint32_t options, 405 Stream &strm); 406 407 static size_t 408 Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name, 409 const char *flavor, const ExecutionContext &exe_ctx, 410 SymbolContextList &sc_list, uint32_t num_instructions, 411 bool mixed_source_and_assembly, uint32_t num_mixed_context_lines, 412 uint32_t options, Stream &strm); 413 414 static bool 415 Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name, 416 const char *flavor, const ExecutionContext &exe_ctx, 417 const ConstString &name, Module *module, 418 uint32_t num_instructions, bool mixed_source_and_assembly, 419 uint32_t num_mixed_context_lines, uint32_t options, Stream &strm); 420 421 static bool 422 Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name, 423 const char *flavor, const ExecutionContext &exe_ctx, 424 uint32_t num_instructions, bool mixed_source_and_assembly, 425 uint32_t num_mixed_context_lines, uint32_t options, Stream &strm); 426 427 //------------------------------------------------------------------ 428 // Constructors and Destructors 429 //------------------------------------------------------------------ 430 Disassembler(const ArchSpec &arch, const char *flavor); 431 ~Disassembler() override; 432 433 typedef const char *(*SummaryCallback)(const Instruction &inst, 434 ExecutionContext *exe_context, 435 void *user_data); 436 437 static bool PrintInstructions(Disassembler *disasm_ptr, Debugger &debugger, 438 const ArchSpec &arch, 439 const ExecutionContext &exe_ctx, 440 uint32_t num_instructions, 441 bool mixed_source_and_assembly, 442 uint32_t num_mixed_context_lines, 443 uint32_t options, Stream &strm); 444 445 size_t ParseInstructions(const ExecutionContext *exe_ctx, 446 const AddressRange &range, Stream *error_strm_ptr, 447 bool prefer_file_cache); 448 449 size_t ParseInstructions(const ExecutionContext *exe_ctx, 450 const Address &range, uint32_t num_instructions, 451 bool prefer_file_cache); 452 453 virtual size_t DecodeInstructions(const Address &base_addr, 454 const DataExtractor &data, 455 lldb::offset_t data_offset, 456 size_t num_instructions, bool append, 457 bool data_from_file) = 0; 458 459 InstructionList &GetInstructionList(); 460 461 const InstructionList &GetInstructionList() const; 462 GetArchitecture()463 const ArchSpec &GetArchitecture() const { return m_arch; } 464 GetFlavor()465 const char *GetFlavor() const { return m_flavor.c_str(); } 466 467 virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch, 468 const char *flavor) = 0; 469 470 protected: 471 // SourceLine and SourceLinesToDisplay structures are only used in the mixed 472 // source and assembly display methods internal to this class. 473 474 struct SourceLine { 475 FileSpec file; 476 uint32_t line; 477 uint32_t column; 478 SourceLineSourceLine479 SourceLine() : file(), line(LLDB_INVALID_LINE_NUMBER), column(0) {} 480 481 bool operator==(const SourceLine &rhs) const { 482 return file == rhs.file && line == rhs.line && rhs.column == column; 483 } 484 485 bool operator!=(const SourceLine &rhs) const { 486 return file != rhs.file || line != rhs.line || column != rhs.column; 487 } 488 IsValidSourceLine489 bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; } 490 }; 491 492 struct SourceLinesToDisplay { 493 std::vector<SourceLine> lines; 494 495 // index of the "current" source line, if we want to highlight that when 496 // displaying the source lines. (as opposed to the surrounding source 497 // lines provided to give context) 498 size_t current_source_line; 499 500 // Whether to print a blank line at the end of the source lines. 501 bool print_source_context_end_eol; 502 SourceLinesToDisplaySourceLinesToDisplay503 SourceLinesToDisplay() 504 : lines(), current_source_line(-1), print_source_context_end_eol(true) { 505 } 506 }; 507 508 // Get the function's declaration line number, hopefully a line number 509 // earlier than the opening curly brace at the start of the function body. 510 static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc); 511 512 // Add the provided SourceLine to the map of filenames-to-source-lines-seen. 513 static void AddLineToSourceLineTables( 514 SourceLine &line, 515 std::map<FileSpec, std::set<uint32_t>> &source_lines_seen); 516 517 // Given a source line, determine if we should print it when we're doing 518 // mixed source & assembly output. We're currently using the 519 // target.process.thread.step-avoid-regexp setting (which is used for 520 // stepping over inlined STL functions by default) to determine what source 521 // lines to avoid showing. 522 // 523 // Returns true if this source line should be elided (if the source line 524 // should not be displayed). 525 static bool 526 ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, 527 const SymbolContext &sc, SourceLine &line); 528 529 static bool ElideMixedSourceAndDisassemblyLine(const ExecutionContext & exe_ctx,const SymbolContext & sc,LineEntry & line)530 ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, 531 const SymbolContext &sc, LineEntry &line) { 532 SourceLine sl; 533 sl.file = line.file; 534 sl.line = line.line; 535 sl.column = line.column; 536 return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, sl); 537 }; 538 539 //------------------------------------------------------------------ 540 // Classes that inherit from Disassembler can see and modify these 541 //------------------------------------------------------------------ 542 ArchSpec m_arch; 543 InstructionList m_instruction_list; 544 lldb::addr_t m_base_addr; 545 std::string m_flavor; 546 547 private: 548 //------------------------------------------------------------------ 549 // For Disassembler only 550 //------------------------------------------------------------------ 551 DISALLOW_COPY_AND_ASSIGN(Disassembler); 552 }; 553 554 } // namespace lldb_private 555 556 #endif // liblldb_Disassembler_h_ 557