1 //===-- Disassembler.cpp --------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Core/Disassembler.h"
10 
11 #include "lldb/Core/AddressRange.h"
12 #include "lldb/Core/Debugger.h"
13 #include "lldb/Core/EmulateInstruction.h"
14 #include "lldb/Core/Mangled.h"
15 #include "lldb/Core/Module.h"
16 #include "lldb/Core/ModuleList.h"
17 #include "lldb/Core/PluginManager.h"
18 #include "lldb/Core/SourceManager.h"
19 #include "lldb/Host/FileSystem.h"
20 #include "lldb/Interpreter/OptionValue.h"
21 #include "lldb/Interpreter/OptionValueArray.h"
22 #include "lldb/Interpreter/OptionValueDictionary.h"
23 #include "lldb/Interpreter/OptionValueRegex.h"
24 #include "lldb/Interpreter/OptionValueString.h"
25 #include "lldb/Interpreter/OptionValueUInt64.h"
26 #include "lldb/Symbol/Function.h"
27 #include "lldb/Symbol/Symbol.h"
28 #include "lldb/Symbol/SymbolContext.h"
29 #include "lldb/Target/ExecutionContext.h"
30 #include "lldb/Target/SectionLoadList.h"
31 #include "lldb/Target/StackFrame.h"
32 #include "lldb/Target/Target.h"
33 #include "lldb/Target/Thread.h"
34 #include "lldb/Utility/DataBufferHeap.h"
35 #include "lldb/Utility/DataExtractor.h"
36 #include "lldb/Utility/RegularExpression.h"
37 #include "lldb/Utility/Status.h"
38 #include "lldb/Utility/Stream.h"
39 #include "lldb/Utility/StreamString.h"
40 #include "lldb/Utility/Timer.h"
41 #include "lldb/lldb-private-enumerations.h"
42 #include "lldb/lldb-private-interfaces.h"
43 #include "lldb/lldb-private-types.h"
44 #include "llvm/ADT/Triple.h"
45 #include "llvm/Support/Compiler.h"
46 
47 #include <cstdint>
48 #include <cstring>
49 #include <utility>
50 
51 #include <assert.h>
52 
53 #define DEFAULT_DISASM_BYTE_SIZE 32
54 
55 using namespace lldb;
56 using namespace lldb_private;
57 
58 DisassemblerSP Disassembler::FindPlugin(const ArchSpec &arch,
59                                         const char *flavor,
60                                         const char *plugin_name) {
61   static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
62   Timer scoped_timer(func_cat,
63                      "Disassembler::FindPlugin (arch = %s, plugin_name = %s)",
64                      arch.GetArchitectureName(), plugin_name);
65 
66   DisassemblerCreateInstance create_callback = nullptr;
67 
68   if (plugin_name) {
69     ConstString const_plugin_name(plugin_name);
70     create_callback = PluginManager::GetDisassemblerCreateCallbackForPluginName(
71         const_plugin_name);
72     if (create_callback) {
73       DisassemblerSP disassembler_sp(create_callback(arch, flavor));
74 
75       if (disassembler_sp)
76         return disassembler_sp;
77     }
78   } else {
79     for (uint32_t idx = 0;
80          (create_callback = PluginManager::GetDisassemblerCreateCallbackAtIndex(
81               idx)) != nullptr;
82          ++idx) {
83       DisassemblerSP disassembler_sp(create_callback(arch, flavor));
84 
85       if (disassembler_sp)
86         return disassembler_sp;
87     }
88   }
89   return DisassemblerSP();
90 }
91 
92 DisassemblerSP Disassembler::FindPluginForTarget(const Target &target,
93                                                  const ArchSpec &arch,
94                                                  const char *flavor,
95                                                  const char *plugin_name) {
96   if (flavor == nullptr) {
97     // FIXME - we don't have the mechanism in place to do per-architecture
98     // settings.  But since we know that for now we only support flavors on x86
99     // & x86_64,
100     if (arch.GetTriple().getArch() == llvm::Triple::x86 ||
101         arch.GetTriple().getArch() == llvm::Triple::x86_64)
102       flavor = target.GetDisassemblyFlavor();
103   }
104   return FindPlugin(arch, flavor, plugin_name);
105 }
106 
107 static Address ResolveAddress(Target &target, const Address &addr) {
108   if (!addr.IsSectionOffset()) {
109     Address resolved_addr;
110     // If we weren't passed in a section offset address range, try and resolve
111     // it to something
112     bool is_resolved = target.GetSectionLoadList().IsEmpty()
113                            ? target.GetImages().ResolveFileAddress(
114                                  addr.GetOffset(), resolved_addr)
115                            : target.GetSectionLoadList().ResolveLoadAddress(
116                                  addr.GetOffset(), resolved_addr);
117 
118     // We weren't able to resolve the address, just treat it as a raw address
119     if (is_resolved && resolved_addr.IsValid())
120       return resolved_addr;
121   }
122   return addr;
123 }
124 
125 lldb::DisassemblerSP Disassembler::DisassembleRange(
126     const ArchSpec &arch, const char *plugin_name, const char *flavor,
127     Target &target, const AddressRange &range, bool prefer_file_cache) {
128   if (range.GetByteSize() <= 0)
129     return {};
130 
131   if (!range.GetBaseAddress().IsValid())
132     return {};
133 
134   lldb::DisassemblerSP disasm_sp =
135       Disassembler::FindPluginForTarget(target, arch, flavor, plugin_name);
136 
137   if (!disasm_sp)
138     return {};
139 
140   const size_t bytes_disassembled = disasm_sp->ParseInstructions(
141       target, range.GetBaseAddress(), {Limit::Bytes, range.GetByteSize()},
142       nullptr, prefer_file_cache);
143   if (bytes_disassembled == 0)
144     return {};
145 
146   return disasm_sp;
147 }
148 
149 lldb::DisassemblerSP
150 Disassembler::DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
151                                const char *flavor, const Address &start,
152                                const void *src, size_t src_len,
153                                uint32_t num_instructions, bool data_from_file) {
154   if (!src)
155     return {};
156 
157   lldb::DisassemblerSP disasm_sp =
158       Disassembler::FindPlugin(arch, flavor, plugin_name);
159 
160   if (!disasm_sp)
161     return {};
162 
163   DataExtractor data(src, src_len, arch.GetByteOrder(),
164                      arch.GetAddressByteSize());
165 
166   (void)disasm_sp->DecodeInstructions(start, data, 0, num_instructions, false,
167                                       data_from_file);
168   return disasm_sp;
169 }
170 
171 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
172                                const char *plugin_name, const char *flavor,
173                                const ExecutionContext &exe_ctx,
174                                const Address &address, Limit limit,
175                                bool mixed_source_and_assembly,
176                                uint32_t num_mixed_context_lines,
177                                uint32_t options, Stream &strm) {
178   if (!exe_ctx.GetTargetPtr())
179     return false;
180 
181   lldb::DisassemblerSP disasm_sp(Disassembler::FindPluginForTarget(
182       exe_ctx.GetTargetRef(), arch, flavor, plugin_name));
183   if (!disasm_sp)
184     return false;
185 
186   const bool prefer_file_cache = false;
187   size_t bytes_disassembled = disasm_sp->ParseInstructions(
188       exe_ctx.GetTargetRef(), address, limit, &strm, prefer_file_cache);
189   if (bytes_disassembled == 0)
190     return false;
191 
192   disasm_sp->PrintInstructions(debugger, arch, exe_ctx,
193                                mixed_source_and_assembly,
194                                num_mixed_context_lines, options, strm);
195   return true;
196 }
197 
198 Disassembler::SourceLine
199 Disassembler::GetFunctionDeclLineEntry(const SymbolContext &sc) {
200   if (!sc.function)
201     return {};
202 
203   if (!sc.line_entry.IsValid())
204     return {};
205 
206   LineEntry prologue_end_line = sc.line_entry;
207   FileSpec func_decl_file;
208   uint32_t func_decl_line;
209   sc.function->GetStartLineSourceInfo(func_decl_file, func_decl_line);
210 
211   if (func_decl_file != prologue_end_line.file &&
212       func_decl_file != prologue_end_line.original_file)
213     return {};
214 
215   SourceLine decl_line;
216   decl_line.file = func_decl_file;
217   decl_line.line = func_decl_line;
218   // TODO: Do we care about column on these entries?  If so, we need to plumb
219   // that through GetStartLineSourceInfo.
220   decl_line.column = 0;
221   return decl_line;
222 }
223 
224 void Disassembler::AddLineToSourceLineTables(
225     SourceLine &line,
226     std::map<FileSpec, std::set<uint32_t>> &source_lines_seen) {
227   if (line.IsValid()) {
228     auto source_lines_seen_pos = source_lines_seen.find(line.file);
229     if (source_lines_seen_pos == source_lines_seen.end()) {
230       std::set<uint32_t> lines;
231       lines.insert(line.line);
232       source_lines_seen.emplace(line.file, lines);
233     } else {
234       source_lines_seen_pos->second.insert(line.line);
235     }
236   }
237 }
238 
239 bool Disassembler::ElideMixedSourceAndDisassemblyLine(
240     const ExecutionContext &exe_ctx, const SymbolContext &sc,
241     SourceLine &line) {
242 
243   // TODO: should we also check target.process.thread.step-avoid-libraries ?
244 
245   const RegularExpression *avoid_regex = nullptr;
246 
247   // Skip any line #0 entries - they are implementation details
248   if (line.line == 0)
249     return false;
250 
251   ThreadSP thread_sp = exe_ctx.GetThreadSP();
252   if (thread_sp) {
253     avoid_regex = thread_sp->GetSymbolsToAvoidRegexp();
254   } else {
255     TargetSP target_sp = exe_ctx.GetTargetSP();
256     if (target_sp) {
257       Status error;
258       OptionValueSP value_sp = target_sp->GetDebugger().GetPropertyValue(
259           &exe_ctx, "target.process.thread.step-avoid-regexp", false, error);
260       if (value_sp && value_sp->GetType() == OptionValue::eTypeRegex) {
261         OptionValueRegex *re = value_sp->GetAsRegex();
262         if (re) {
263           avoid_regex = re->GetCurrentValue();
264         }
265       }
266     }
267   }
268   if (avoid_regex && sc.symbol != nullptr) {
269     const char *function_name =
270         sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments)
271             .GetCString();
272     if (function_name && avoid_regex->Execute(function_name)) {
273       // skip this source line
274       return true;
275     }
276   }
277   // don't skip this source line
278   return false;
279 }
280 
281 void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch,
282                                      const ExecutionContext &exe_ctx,
283                                      bool mixed_source_and_assembly,
284                                      uint32_t num_mixed_context_lines,
285                                      uint32_t options, Stream &strm) {
286   // We got some things disassembled...
287   size_t num_instructions_found = GetInstructionList().GetSize();
288 
289   const uint32_t max_opcode_byte_size =
290       GetInstructionList().GetMaxOpcocdeByteSize();
291   SymbolContext sc;
292   SymbolContext prev_sc;
293   AddressRange current_source_line_range;
294   const Address *pc_addr_ptr = nullptr;
295   StackFrame *frame = exe_ctx.GetFramePtr();
296 
297   TargetSP target_sp(exe_ctx.GetTargetSP());
298   SourceManager &source_manager =
299       target_sp ? target_sp->GetSourceManager() : debugger.GetSourceManager();
300 
301   if (frame) {
302     pc_addr_ptr = &frame->GetFrameCodeAddress();
303   }
304   const uint32_t scope =
305       eSymbolContextLineEntry | eSymbolContextFunction | eSymbolContextSymbol;
306   const bool use_inline_block_range = false;
307 
308   const FormatEntity::Entry *disassembly_format = nullptr;
309   FormatEntity::Entry format;
310   if (exe_ctx.HasTargetScope()) {
311     disassembly_format =
312         exe_ctx.GetTargetRef().GetDebugger().GetDisassemblyFormat();
313   } else {
314     FormatEntity::Parse("${addr}: ", format);
315     disassembly_format = &format;
316   }
317 
318   // First pass: step through the list of instructions, find how long the
319   // initial addresses strings are, insert padding in the second pass so the
320   // opcodes all line up nicely.
321 
322   // Also build up the source line mapping if this is mixed source & assembly
323   // mode. Calculate the source line for each assembly instruction (eliding
324   // inlined functions which the user wants to skip).
325 
326   std::map<FileSpec, std::set<uint32_t>> source_lines_seen;
327   Symbol *previous_symbol = nullptr;
328 
329   size_t address_text_size = 0;
330   for (size_t i = 0; i < num_instructions_found; ++i) {
331     Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get();
332     if (inst) {
333       const Address &addr = inst->GetAddress();
334       ModuleSP module_sp(addr.GetModule());
335       if (module_sp) {
336         const SymbolContextItem resolve_mask = eSymbolContextFunction |
337                                                eSymbolContextSymbol |
338                                                eSymbolContextLineEntry;
339         uint32_t resolved_mask =
340             module_sp->ResolveSymbolContextForAddress(addr, resolve_mask, sc);
341         if (resolved_mask) {
342           StreamString strmstr;
343           Debugger::FormatDisassemblerAddress(disassembly_format, &sc, nullptr,
344                                               &exe_ctx, &addr, strmstr);
345           size_t cur_line = strmstr.GetSizeOfLastLine();
346           if (cur_line > address_text_size)
347             address_text_size = cur_line;
348 
349           // Add entries to our "source_lines_seen" map+set which list which
350           // sources lines occur in this disassembly session.  We will print
351           // lines of context around a source line, but we don't want to print
352           // a source line that has a line table entry of its own - we'll leave
353           // that source line to be printed when it actually occurs in the
354           // disassembly.
355 
356           if (mixed_source_and_assembly && sc.line_entry.IsValid()) {
357             if (sc.symbol != previous_symbol) {
358               SourceLine decl_line = GetFunctionDeclLineEntry(sc);
359               if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, decl_line))
360                 AddLineToSourceLineTables(decl_line, source_lines_seen);
361             }
362             if (sc.line_entry.IsValid()) {
363               SourceLine this_line;
364               this_line.file = sc.line_entry.file;
365               this_line.line = sc.line_entry.line;
366               this_line.column = sc.line_entry.column;
367               if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, this_line))
368                 AddLineToSourceLineTables(this_line, source_lines_seen);
369             }
370           }
371         }
372         sc.Clear(false);
373       }
374     }
375   }
376 
377   previous_symbol = nullptr;
378   SourceLine previous_line;
379   for (size_t i = 0; i < num_instructions_found; ++i) {
380     Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get();
381 
382     if (inst) {
383       const Address &addr = inst->GetAddress();
384       const bool inst_is_at_pc = pc_addr_ptr && addr == *pc_addr_ptr;
385       SourceLinesToDisplay source_lines_to_display;
386 
387       prev_sc = sc;
388 
389       ModuleSP module_sp(addr.GetModule());
390       if (module_sp) {
391         uint32_t resolved_mask = module_sp->ResolveSymbolContextForAddress(
392             addr, eSymbolContextEverything, sc);
393         if (resolved_mask) {
394           if (mixed_source_and_assembly) {
395 
396             // If we've started a new function (non-inlined), print all of the
397             // source lines from the function declaration until the first line
398             // table entry - typically the opening curly brace of the function.
399             if (previous_symbol != sc.symbol) {
400               // The default disassembly format puts an extra blank line
401               // between functions - so when we're displaying the source
402               // context for a function, we don't want to add a blank line
403               // after the source context or we'll end up with two of them.
404               if (previous_symbol != nullptr)
405                 source_lines_to_display.print_source_context_end_eol = false;
406 
407               previous_symbol = sc.symbol;
408               if (sc.function && sc.line_entry.IsValid()) {
409                 LineEntry prologue_end_line = sc.line_entry;
410                 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc,
411                                                         prologue_end_line)) {
412                   FileSpec func_decl_file;
413                   uint32_t func_decl_line;
414                   sc.function->GetStartLineSourceInfo(func_decl_file,
415                                                       func_decl_line);
416                   if (func_decl_file == prologue_end_line.file ||
417                       func_decl_file == prologue_end_line.original_file) {
418                     // Add all the lines between the function declaration and
419                     // the first non-prologue source line to the list of lines
420                     // to print.
421                     for (uint32_t lineno = func_decl_line;
422                          lineno <= prologue_end_line.line; lineno++) {
423                       SourceLine this_line;
424                       this_line.file = func_decl_file;
425                       this_line.line = lineno;
426                       source_lines_to_display.lines.push_back(this_line);
427                     }
428                     // Mark the last line as the "current" one.  Usually this
429                     // is the open curly brace.
430                     if (source_lines_to_display.lines.size() > 0)
431                       source_lines_to_display.current_source_line =
432                           source_lines_to_display.lines.size() - 1;
433                   }
434                 }
435               }
436               sc.GetAddressRange(scope, 0, use_inline_block_range,
437                                  current_source_line_range);
438             }
439 
440             // If we've left a previous source line's address range, print a
441             // new source line
442             if (!current_source_line_range.ContainsFileAddress(addr)) {
443               sc.GetAddressRange(scope, 0, use_inline_block_range,
444                                  current_source_line_range);
445 
446               if (sc != prev_sc && sc.comp_unit && sc.line_entry.IsValid()) {
447                 SourceLine this_line;
448                 this_line.file = sc.line_entry.file;
449                 this_line.line = sc.line_entry.line;
450 
451                 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc,
452                                                         this_line)) {
453                   // Only print this source line if it is different from the
454                   // last source line we printed.  There may have been inlined
455                   // functions between these lines that we elided, resulting in
456                   // the same line being printed twice in a row for a
457                   // contiguous block of assembly instructions.
458                   if (this_line != previous_line) {
459 
460                     std::vector<uint32_t> previous_lines;
461                     for (uint32_t i = 0;
462                          i < num_mixed_context_lines &&
463                          (this_line.line - num_mixed_context_lines) > 0;
464                          i++) {
465                       uint32_t line =
466                           this_line.line - num_mixed_context_lines + i;
467                       auto pos = source_lines_seen.find(this_line.file);
468                       if (pos != source_lines_seen.end()) {
469                         if (pos->second.count(line) == 1) {
470                           previous_lines.clear();
471                         } else {
472                           previous_lines.push_back(line);
473                         }
474                       }
475                     }
476                     for (size_t i = 0; i < previous_lines.size(); i++) {
477                       SourceLine previous_line;
478                       previous_line.file = this_line.file;
479                       previous_line.line = previous_lines[i];
480                       auto pos = source_lines_seen.find(previous_line.file);
481                       if (pos != source_lines_seen.end()) {
482                         pos->second.insert(previous_line.line);
483                       }
484                       source_lines_to_display.lines.push_back(previous_line);
485                     }
486 
487                     source_lines_to_display.lines.push_back(this_line);
488                     source_lines_to_display.current_source_line =
489                         source_lines_to_display.lines.size() - 1;
490 
491                     for (uint32_t i = 0; i < num_mixed_context_lines; i++) {
492                       SourceLine next_line;
493                       next_line.file = this_line.file;
494                       next_line.line = this_line.line + i + 1;
495                       auto pos = source_lines_seen.find(next_line.file);
496                       if (pos != source_lines_seen.end()) {
497                         if (pos->second.count(next_line.line) == 1)
498                           break;
499                         pos->second.insert(next_line.line);
500                       }
501                       source_lines_to_display.lines.push_back(next_line);
502                     }
503                   }
504                   previous_line = this_line;
505                 }
506               }
507             }
508           }
509         } else {
510           sc.Clear(true);
511         }
512       }
513 
514       if (source_lines_to_display.lines.size() > 0) {
515         strm.EOL();
516         for (size_t idx = 0; idx < source_lines_to_display.lines.size();
517              idx++) {
518           SourceLine ln = source_lines_to_display.lines[idx];
519           const char *line_highlight = "";
520           if (inst_is_at_pc && (options & eOptionMarkPCSourceLine)) {
521             line_highlight = "->";
522           } else if (idx == source_lines_to_display.current_source_line) {
523             line_highlight = "**";
524           }
525           source_manager.DisplaySourceLinesWithLineNumbers(
526               ln.file, ln.line, ln.column, 0, 0, line_highlight, &strm);
527         }
528         if (source_lines_to_display.print_source_context_end_eol)
529           strm.EOL();
530       }
531 
532       const bool show_bytes = (options & eOptionShowBytes) != 0;
533       inst->Dump(&strm, max_opcode_byte_size, true, show_bytes, &exe_ctx, &sc,
534                  &prev_sc, nullptr, address_text_size);
535       strm.EOL();
536     } else {
537       break;
538     }
539   }
540 }
541 
542 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
543                                StackFrame &frame, Stream &strm) {
544   AddressRange range;
545   SymbolContext sc(
546       frame.GetSymbolContext(eSymbolContextFunction | eSymbolContextSymbol));
547   if (sc.function) {
548     range = sc.function->GetAddressRange();
549   } else if (sc.symbol && sc.symbol->ValueIsAddress()) {
550     range.GetBaseAddress() = sc.symbol->GetAddressRef();
551     range.SetByteSize(sc.symbol->GetByteSize());
552   } else {
553     range.GetBaseAddress() = frame.GetFrameCodeAddress();
554   }
555 
556     if (range.GetBaseAddress().IsValid() && range.GetByteSize() == 0)
557       range.SetByteSize(DEFAULT_DISASM_BYTE_SIZE);
558 
559     Disassembler::Limit limit = {Disassembler::Limit::Bytes,
560                                  range.GetByteSize()};
561     if (limit.value == 0)
562       limit.value = DEFAULT_DISASM_BYTE_SIZE;
563 
564     return Disassemble(debugger, arch, nullptr, nullptr, frame,
565                        range.GetBaseAddress(), limit, false, 0, 0, strm);
566 }
567 
568 Instruction::Instruction(const Address &address, AddressClass addr_class)
569     : m_address(address), m_address_class(addr_class), m_opcode(),
570       m_calculated_strings(false) {}
571 
572 Instruction::~Instruction() = default;
573 
574 AddressClass Instruction::GetAddressClass() {
575   if (m_address_class == AddressClass::eInvalid)
576     m_address_class = m_address.GetAddressClass();
577   return m_address_class;
578 }
579 
580 void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size,
581                        bool show_address, bool show_bytes,
582                        const ExecutionContext *exe_ctx,
583                        const SymbolContext *sym_ctx,
584                        const SymbolContext *prev_sym_ctx,
585                        const FormatEntity::Entry *disassembly_addr_format,
586                        size_t max_address_text_size) {
587   size_t opcode_column_width = 7;
588   const size_t operand_column_width = 25;
589 
590   CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
591 
592   StreamString ss;
593 
594   if (show_address) {
595     Debugger::FormatDisassemblerAddress(disassembly_addr_format, sym_ctx,
596                                         prev_sym_ctx, exe_ctx, &m_address, ss);
597     ss.FillLastLineToColumn(max_address_text_size, ' ');
598   }
599 
600   if (show_bytes) {
601     if (m_opcode.GetType() == Opcode::eTypeBytes) {
602       // x86_64 and i386 are the only ones that use bytes right now so pad out
603       // the byte dump to be able to always show 15 bytes (3 chars each) plus a
604       // space
605       if (max_opcode_byte_size > 0)
606         m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1);
607       else
608         m_opcode.Dump(&ss, 15 * 3 + 1);
609     } else {
610       // Else, we have ARM or MIPS which can show up to a uint32_t 0x00000000
611       // (10 spaces) plus two for padding...
612       if (max_opcode_byte_size > 0)
613         m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1);
614       else
615         m_opcode.Dump(&ss, 12);
616     }
617   }
618 
619   const size_t opcode_pos = ss.GetSizeOfLastLine();
620 
621   // The default opcode size of 7 characters is plenty for most architectures
622   // but some like arm can pull out the occasional vqrshrun.s16.  We won't get
623   // consistent column spacing in these cases, unfortunately.
624   if (m_opcode_name.length() >= opcode_column_width) {
625     opcode_column_width = m_opcode_name.length() + 1;
626   }
627 
628   ss.PutCString(m_opcode_name);
629   ss.FillLastLineToColumn(opcode_pos + opcode_column_width, ' ');
630   ss.PutCString(m_mnemonics);
631 
632   if (!m_comment.empty()) {
633     ss.FillLastLineToColumn(
634         opcode_pos + opcode_column_width + operand_column_width, ' ');
635     ss.PutCString(" ; ");
636     ss.PutCString(m_comment);
637   }
638   s->PutCString(ss.GetString());
639 }
640 
641 bool Instruction::DumpEmulation(const ArchSpec &arch) {
642   std::unique_ptr<EmulateInstruction> insn_emulator_up(
643       EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
644   if (insn_emulator_up) {
645     insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr);
646     return insn_emulator_up->EvaluateInstruction(0);
647   }
648 
649   return false;
650 }
651 
652 bool Instruction::CanSetBreakpoint () {
653   return !HasDelaySlot();
654 }
655 
656 bool Instruction::HasDelaySlot() {
657   // Default is false.
658   return false;
659 }
660 
661 OptionValueSP Instruction::ReadArray(FILE *in_file, Stream *out_stream,
662                                      OptionValue::Type data_type) {
663   bool done = false;
664   char buffer[1024];
665 
666   auto option_value_sp = std::make_shared<OptionValueArray>(1u << data_type);
667 
668   int idx = 0;
669   while (!done) {
670     if (!fgets(buffer, 1023, in_file)) {
671       out_stream->Printf(
672           "Instruction::ReadArray:  Error reading file (fgets).\n");
673       option_value_sp.reset();
674       return option_value_sp;
675     }
676 
677     std::string line(buffer);
678 
679     size_t len = line.size();
680     if (line[len - 1] == '\n') {
681       line[len - 1] = '\0';
682       line.resize(len - 1);
683     }
684 
685     if ((line.size() == 1) && line[0] == ']') {
686       done = true;
687       line.clear();
688     }
689 
690     if (!line.empty()) {
691       std::string value;
692       static RegularExpression g_reg_exp(
693           llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$"));
694       llvm::SmallVector<llvm::StringRef, 2> matches;
695       if (g_reg_exp.Execute(line, &matches))
696         value = matches[1].str();
697       else
698         value = line;
699 
700       OptionValueSP data_value_sp;
701       switch (data_type) {
702       case OptionValue::eTypeUInt64:
703         data_value_sp = std::make_shared<OptionValueUInt64>(0, 0);
704         data_value_sp->SetValueFromString(value);
705         break;
706       // Other types can be added later as needed.
707       default:
708         data_value_sp = std::make_shared<OptionValueString>(value.c_str(), "");
709         break;
710       }
711 
712       option_value_sp->GetAsArray()->InsertValue(idx, data_value_sp);
713       ++idx;
714     }
715   }
716 
717   return option_value_sp;
718 }
719 
720 OptionValueSP Instruction::ReadDictionary(FILE *in_file, Stream *out_stream) {
721   bool done = false;
722   char buffer[1024];
723 
724   auto option_value_sp = std::make_shared<OptionValueDictionary>();
725   static ConstString encoding_key("data_encoding");
726   OptionValue::Type data_type = OptionValue::eTypeInvalid;
727 
728   while (!done) {
729     // Read the next line in the file
730     if (!fgets(buffer, 1023, in_file)) {
731       out_stream->Printf(
732           "Instruction::ReadDictionary: Error reading file (fgets).\n");
733       option_value_sp.reset();
734       return option_value_sp;
735     }
736 
737     // Check to see if the line contains the end-of-dictionary marker ("}")
738     std::string line(buffer);
739 
740     size_t len = line.size();
741     if (line[len - 1] == '\n') {
742       line[len - 1] = '\0';
743       line.resize(len - 1);
744     }
745 
746     if ((line.size() == 1) && (line[0] == '}')) {
747       done = true;
748       line.clear();
749     }
750 
751     // Try to find a key-value pair in the current line and add it to the
752     // dictionary.
753     if (!line.empty()) {
754       static RegularExpression g_reg_exp(llvm::StringRef(
755           "^[ \t]*([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*=[ \t]*(.*)[ \t]*$"));
756 
757       llvm::SmallVector<llvm::StringRef, 3> matches;
758 
759       bool reg_exp_success = g_reg_exp.Execute(line, &matches);
760       std::string key;
761       std::string value;
762       if (reg_exp_success) {
763         key = matches[1].str();
764         value = matches[2].str();
765       } else {
766         out_stream->Printf("Instruction::ReadDictionary: Failure executing "
767                            "regular expression.\n");
768         option_value_sp.reset();
769         return option_value_sp;
770       }
771 
772       ConstString const_key(key.c_str());
773       // Check value to see if it's the start of an array or dictionary.
774 
775       lldb::OptionValueSP value_sp;
776       assert(value.empty() == false);
777       assert(key.empty() == false);
778 
779       if (value[0] == '{') {
780         assert(value.size() == 1);
781         // value is a dictionary
782         value_sp = ReadDictionary(in_file, out_stream);
783         if (!value_sp) {
784           option_value_sp.reset();
785           return option_value_sp;
786         }
787       } else if (value[0] == '[') {
788         assert(value.size() == 1);
789         // value is an array
790         value_sp = ReadArray(in_file, out_stream, data_type);
791         if (!value_sp) {
792           option_value_sp.reset();
793           return option_value_sp;
794         }
795         // We've used the data_type to read an array; re-set the type to
796         // Invalid
797         data_type = OptionValue::eTypeInvalid;
798       } else if ((value[0] == '0') && (value[1] == 'x')) {
799         value_sp = std::make_shared<OptionValueUInt64>(0, 0);
800         value_sp->SetValueFromString(value);
801       } else {
802         size_t len = value.size();
803         if ((value[0] == '"') && (value[len - 1] == '"'))
804           value = value.substr(1, len - 2);
805         value_sp = std::make_shared<OptionValueString>(value.c_str(), "");
806       }
807 
808       if (const_key == encoding_key) {
809         // A 'data_encoding=..." is NOT a normal key-value pair; it is meta-data
810         // indicating the
811         // data type of an upcoming array (usually the next bit of data to be
812         // read in).
813         if (strcmp(value.c_str(), "uint32_t") == 0)
814           data_type = OptionValue::eTypeUInt64;
815       } else
816         option_value_sp->GetAsDictionary()->SetValueForKey(const_key, value_sp,
817                                                            false);
818     }
819   }
820 
821   return option_value_sp;
822 }
823 
824 bool Instruction::TestEmulation(Stream *out_stream, const char *file_name) {
825   if (!out_stream)
826     return false;
827 
828   if (!file_name) {
829     out_stream->Printf("Instruction::TestEmulation:  Missing file_name.");
830     return false;
831   }
832   FILE *test_file = FileSystem::Instance().Fopen(file_name, "r");
833   if (!test_file) {
834     out_stream->Printf(
835         "Instruction::TestEmulation: Attempt to open test file failed.");
836     return false;
837   }
838 
839   char buffer[256];
840   if (!fgets(buffer, 255, test_file)) {
841     out_stream->Printf(
842         "Instruction::TestEmulation: Error reading first line of test file.\n");
843     fclose(test_file);
844     return false;
845   }
846 
847   if (strncmp(buffer, "InstructionEmulationState={", 27) != 0) {
848     out_stream->Printf("Instructin::TestEmulation: Test file does not contain "
849                        "emulation state dictionary\n");
850     fclose(test_file);
851     return false;
852   }
853 
854   // Read all the test information from the test file into an
855   // OptionValueDictionary.
856 
857   OptionValueSP data_dictionary_sp(ReadDictionary(test_file, out_stream));
858   if (!data_dictionary_sp) {
859     out_stream->Printf(
860         "Instruction::TestEmulation:  Error reading Dictionary Object.\n");
861     fclose(test_file);
862     return false;
863   }
864 
865   fclose(test_file);
866 
867   OptionValueDictionary *data_dictionary =
868       data_dictionary_sp->GetAsDictionary();
869   static ConstString description_key("assembly_string");
870   static ConstString triple_key("triple");
871 
872   OptionValueSP value_sp = data_dictionary->GetValueForKey(description_key);
873 
874   if (!value_sp) {
875     out_stream->Printf("Instruction::TestEmulation:  Test file does not "
876                        "contain description string.\n");
877     return false;
878   }
879 
880   SetDescription(value_sp->GetStringValue());
881 
882   value_sp = data_dictionary->GetValueForKey(triple_key);
883   if (!value_sp) {
884     out_stream->Printf(
885         "Instruction::TestEmulation: Test file does not contain triple.\n");
886     return false;
887   }
888 
889   ArchSpec arch;
890   arch.SetTriple(llvm::Triple(value_sp->GetStringValue()));
891 
892   bool success = false;
893   std::unique_ptr<EmulateInstruction> insn_emulator_up(
894       EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
895   if (insn_emulator_up)
896     success =
897         insn_emulator_up->TestEmulation(out_stream, arch, data_dictionary);
898 
899   if (success)
900     out_stream->Printf("Emulation test succeeded.");
901   else
902     out_stream->Printf("Emulation test failed.");
903 
904   return success;
905 }
906 
907 bool Instruction::Emulate(
908     const ArchSpec &arch, uint32_t evaluate_options, void *baton,
909     EmulateInstruction::ReadMemoryCallback read_mem_callback,
910     EmulateInstruction::WriteMemoryCallback write_mem_callback,
911     EmulateInstruction::ReadRegisterCallback read_reg_callback,
912     EmulateInstruction::WriteRegisterCallback write_reg_callback) {
913   std::unique_ptr<EmulateInstruction> insn_emulator_up(
914       EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
915   if (insn_emulator_up) {
916     insn_emulator_up->SetBaton(baton);
917     insn_emulator_up->SetCallbacks(read_mem_callback, write_mem_callback,
918                                    read_reg_callback, write_reg_callback);
919     insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr);
920     return insn_emulator_up->EvaluateInstruction(evaluate_options);
921   }
922 
923   return false;
924 }
925 
926 uint32_t Instruction::GetData(DataExtractor &data) {
927   return m_opcode.GetData(data);
928 }
929 
930 InstructionList::InstructionList() : m_instructions() {}
931 
932 InstructionList::~InstructionList() = default;
933 
934 size_t InstructionList::GetSize() const { return m_instructions.size(); }
935 
936 uint32_t InstructionList::GetMaxOpcocdeByteSize() const {
937   uint32_t max_inst_size = 0;
938   collection::const_iterator pos, end;
939   for (pos = m_instructions.begin(), end = m_instructions.end(); pos != end;
940        ++pos) {
941     uint32_t inst_size = (*pos)->GetOpcode().GetByteSize();
942     if (max_inst_size < inst_size)
943       max_inst_size = inst_size;
944   }
945   return max_inst_size;
946 }
947 
948 InstructionSP InstructionList::GetInstructionAtIndex(size_t idx) const {
949   InstructionSP inst_sp;
950   if (idx < m_instructions.size())
951     inst_sp = m_instructions[idx];
952   return inst_sp;
953 }
954 
955 void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes,
956                            const ExecutionContext *exe_ctx) {
957   const uint32_t max_opcode_byte_size = GetMaxOpcocdeByteSize();
958   collection::const_iterator pos, begin, end;
959 
960   const FormatEntity::Entry *disassembly_format = nullptr;
961   FormatEntity::Entry format;
962   if (exe_ctx && exe_ctx->HasTargetScope()) {
963     disassembly_format =
964         exe_ctx->GetTargetRef().GetDebugger().GetDisassemblyFormat();
965   } else {
966     FormatEntity::Parse("${addr}: ", format);
967     disassembly_format = &format;
968   }
969 
970   for (begin = m_instructions.begin(), end = m_instructions.end(), pos = begin;
971        pos != end; ++pos) {
972     if (pos != begin)
973       s->EOL();
974     (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes, exe_ctx,
975                  nullptr, nullptr, disassembly_format, 0);
976   }
977 }
978 
979 void InstructionList::Clear() { m_instructions.clear(); }
980 
981 void InstructionList::Append(lldb::InstructionSP &inst_sp) {
982   if (inst_sp)
983     m_instructions.push_back(inst_sp);
984 }
985 
986 uint32_t
987 InstructionList::GetIndexOfNextBranchInstruction(uint32_t start,
988                                                  bool ignore_calls,
989                                                  bool *found_calls) const {
990   size_t num_instructions = m_instructions.size();
991 
992   uint32_t next_branch = UINT32_MAX;
993 
994   if (found_calls)
995     *found_calls = false;
996   for (size_t i = start; i < num_instructions; i++) {
997     if (m_instructions[i]->DoesBranch()) {
998       if (ignore_calls && m_instructions[i]->IsCall()) {
999         if (found_calls)
1000           *found_calls = true;
1001         continue;
1002       }
1003       next_branch = i;
1004       break;
1005     }
1006   }
1007 
1008   return next_branch;
1009 }
1010 
1011 uint32_t
1012 InstructionList::GetIndexOfInstructionAtAddress(const Address &address) {
1013   size_t num_instructions = m_instructions.size();
1014   uint32_t index = UINT32_MAX;
1015   for (size_t i = 0; i < num_instructions; i++) {
1016     if (m_instructions[i]->GetAddress() == address) {
1017       index = i;
1018       break;
1019     }
1020   }
1021   return index;
1022 }
1023 
1024 uint32_t
1025 InstructionList::GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
1026                                                     Target &target) {
1027   Address address;
1028   address.SetLoadAddress(load_addr, &target);
1029   return GetIndexOfInstructionAtAddress(address);
1030 }
1031 
1032 size_t Disassembler::ParseInstructions(Target &target, Address start,
1033                                        Limit limit, Stream *error_strm_ptr,
1034                                        bool prefer_file_cache) {
1035   m_instruction_list.Clear();
1036 
1037   if (!start.IsValid())
1038     return 0;
1039 
1040   start = ResolveAddress(target, start);
1041 
1042   addr_t byte_size = limit.value;
1043   if (limit.kind == Limit::Instructions)
1044     byte_size *= m_arch.GetMaximumOpcodeByteSize();
1045   auto data_sp = std::make_shared<DataBufferHeap>(byte_size, '\0');
1046 
1047   Status error;
1048   lldb::addr_t load_addr = LLDB_INVALID_ADDRESS;
1049   const size_t bytes_read =
1050       target.ReadMemory(start, prefer_file_cache, data_sp->GetBytes(),
1051                         data_sp->GetByteSize(), error, &load_addr);
1052   const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS;
1053 
1054   if (bytes_read == 0) {
1055     if (error_strm_ptr) {
1056       if (const char *error_cstr = error.AsCString())
1057         error_strm_ptr->Printf("error: %s\n", error_cstr);
1058     }
1059     return 0;
1060   }
1061 
1062   if (bytes_read != data_sp->GetByteSize())
1063     data_sp->SetByteSize(bytes_read);
1064   DataExtractor data(data_sp, m_arch.GetByteOrder(),
1065                      m_arch.GetAddressByteSize());
1066   return DecodeInstructions(start, data, 0,
1067                             limit.kind == Limit::Instructions ? limit.value
1068                                                               : UINT32_MAX,
1069                             false, data_from_file);
1070 }
1071 
1072 // Disassembler copy constructor
1073 Disassembler::Disassembler(const ArchSpec &arch, const char *flavor)
1074     : m_arch(arch), m_instruction_list(), m_base_addr(LLDB_INVALID_ADDRESS),
1075       m_flavor() {
1076   if (flavor == nullptr)
1077     m_flavor.assign("default");
1078   else
1079     m_flavor.assign(flavor);
1080 
1081   // If this is an arm variant that can only include thumb (T16, T32)
1082   // instructions, force the arch triple to be "thumbv.." instead of "armv..."
1083   if (arch.IsAlwaysThumbInstructions()) {
1084     std::string thumb_arch_name(arch.GetTriple().getArchName().str());
1085     // Replace "arm" with "thumb" so we get all thumb variants correct
1086     if (thumb_arch_name.size() > 3) {
1087       thumb_arch_name.erase(0, 3);
1088       thumb_arch_name.insert(0, "thumb");
1089     }
1090     m_arch.SetTriple(thumb_arch_name.c_str());
1091   }
1092 }
1093 
1094 Disassembler::~Disassembler() = default;
1095 
1096 InstructionList &Disassembler::GetInstructionList() {
1097   return m_instruction_list;
1098 }
1099 
1100 const InstructionList &Disassembler::GetInstructionList() const {
1101   return m_instruction_list;
1102 }
1103 
1104 // Class PseudoInstruction
1105 
1106 PseudoInstruction::PseudoInstruction()
1107     : Instruction(Address(), AddressClass::eUnknown), m_description() {}
1108 
1109 PseudoInstruction::~PseudoInstruction() = default;
1110 
1111 bool PseudoInstruction::DoesBranch() {
1112   // This is NOT a valid question for a pseudo instruction.
1113   return false;
1114 }
1115 
1116 bool PseudoInstruction::HasDelaySlot() {
1117   // This is NOT a valid question for a pseudo instruction.
1118   return false;
1119 }
1120 
1121 size_t PseudoInstruction::Decode(const lldb_private::Disassembler &disassembler,
1122                                  const lldb_private::DataExtractor &data,
1123                                  lldb::offset_t data_offset) {
1124   return m_opcode.GetByteSize();
1125 }
1126 
1127 void PseudoInstruction::SetOpcode(size_t opcode_size, void *opcode_data) {
1128   if (!opcode_data)
1129     return;
1130 
1131   switch (opcode_size) {
1132   case 8: {
1133     uint8_t value8 = *((uint8_t *)opcode_data);
1134     m_opcode.SetOpcode8(value8, eByteOrderInvalid);
1135     break;
1136   }
1137   case 16: {
1138     uint16_t value16 = *((uint16_t *)opcode_data);
1139     m_opcode.SetOpcode16(value16, eByteOrderInvalid);
1140     break;
1141   }
1142   case 32: {
1143     uint32_t value32 = *((uint32_t *)opcode_data);
1144     m_opcode.SetOpcode32(value32, eByteOrderInvalid);
1145     break;
1146   }
1147   case 64: {
1148     uint64_t value64 = *((uint64_t *)opcode_data);
1149     m_opcode.SetOpcode64(value64, eByteOrderInvalid);
1150     break;
1151   }
1152   default:
1153     break;
1154   }
1155 }
1156 
1157 void PseudoInstruction::SetDescription(llvm::StringRef description) {
1158   m_description = std::string(description);
1159 }
1160 
1161 Instruction::Operand Instruction::Operand::BuildRegister(ConstString &r) {
1162   Operand ret;
1163   ret.m_type = Type::Register;
1164   ret.m_register = r;
1165   return ret;
1166 }
1167 
1168 Instruction::Operand Instruction::Operand::BuildImmediate(lldb::addr_t imm,
1169                                                           bool neg) {
1170   Operand ret;
1171   ret.m_type = Type::Immediate;
1172   ret.m_immediate = imm;
1173   ret.m_negative = neg;
1174   return ret;
1175 }
1176 
1177 Instruction::Operand Instruction::Operand::BuildImmediate(int64_t imm) {
1178   Operand ret;
1179   ret.m_type = Type::Immediate;
1180   if (imm < 0) {
1181     ret.m_immediate = -imm;
1182     ret.m_negative = true;
1183   } else {
1184     ret.m_immediate = imm;
1185     ret.m_negative = false;
1186   }
1187   return ret;
1188 }
1189 
1190 Instruction::Operand
1191 Instruction::Operand::BuildDereference(const Operand &ref) {
1192   Operand ret;
1193   ret.m_type = Type::Dereference;
1194   ret.m_children = {ref};
1195   return ret;
1196 }
1197 
1198 Instruction::Operand Instruction::Operand::BuildSum(const Operand &lhs,
1199                                                     const Operand &rhs) {
1200   Operand ret;
1201   ret.m_type = Type::Sum;
1202   ret.m_children = {lhs, rhs};
1203   return ret;
1204 }
1205 
1206 Instruction::Operand Instruction::Operand::BuildProduct(const Operand &lhs,
1207                                                         const Operand &rhs) {
1208   Operand ret;
1209   ret.m_type = Type::Product;
1210   ret.m_children = {lhs, rhs};
1211   return ret;
1212 }
1213 
1214 std::function<bool(const Instruction::Operand &)>
1215 lldb_private::OperandMatchers::MatchBinaryOp(
1216     std::function<bool(const Instruction::Operand &)> base,
1217     std::function<bool(const Instruction::Operand &)> left,
1218     std::function<bool(const Instruction::Operand &)> right) {
1219   return [base, left, right](const Instruction::Operand &op) -> bool {
1220     return (base(op) && op.m_children.size() == 2 &&
1221             ((left(op.m_children[0]) && right(op.m_children[1])) ||
1222              (left(op.m_children[1]) && right(op.m_children[0]))));
1223   };
1224 }
1225 
1226 std::function<bool(const Instruction::Operand &)>
1227 lldb_private::OperandMatchers::MatchUnaryOp(
1228     std::function<bool(const Instruction::Operand &)> base,
1229     std::function<bool(const Instruction::Operand &)> child) {
1230   return [base, child](const Instruction::Operand &op) -> bool {
1231     return (base(op) && op.m_children.size() == 1 && child(op.m_children[0]));
1232   };
1233 }
1234 
1235 std::function<bool(const Instruction::Operand &)>
1236 lldb_private::OperandMatchers::MatchRegOp(const RegisterInfo &info) {
1237   return [&info](const Instruction::Operand &op) {
1238     return (op.m_type == Instruction::Operand::Type::Register &&
1239             (op.m_register == ConstString(info.name) ||
1240              op.m_register == ConstString(info.alt_name)));
1241   };
1242 }
1243 
1244 std::function<bool(const Instruction::Operand &)>
1245 lldb_private::OperandMatchers::FetchRegOp(ConstString &reg) {
1246   return [&reg](const Instruction::Operand &op) {
1247     if (op.m_type != Instruction::Operand::Type::Register) {
1248       return false;
1249     }
1250     reg = op.m_register;
1251     return true;
1252   };
1253 }
1254 
1255 std::function<bool(const Instruction::Operand &)>
1256 lldb_private::OperandMatchers::MatchImmOp(int64_t imm) {
1257   return [imm](const Instruction::Operand &op) {
1258     return (op.m_type == Instruction::Operand::Type::Immediate &&
1259             ((op.m_negative && op.m_immediate == (uint64_t)-imm) ||
1260              (!op.m_negative && op.m_immediate == (uint64_t)imm)));
1261   };
1262 }
1263 
1264 std::function<bool(const Instruction::Operand &)>
1265 lldb_private::OperandMatchers::FetchImmOp(int64_t &imm) {
1266   return [&imm](const Instruction::Operand &op) {
1267     if (op.m_type != Instruction::Operand::Type::Immediate) {
1268       return false;
1269     }
1270     if (op.m_negative) {
1271       imm = -((int64_t)op.m_immediate);
1272     } else {
1273       imm = ((int64_t)op.m_immediate);
1274     }
1275     return true;
1276   };
1277 }
1278 
1279 std::function<bool(const Instruction::Operand &)>
1280 lldb_private::OperandMatchers::MatchOpType(Instruction::Operand::Type type) {
1281   return [type](const Instruction::Operand &op) { return op.m_type == type; };
1282 }
1283