xref: /wasmtime-44.0.1/src/commands/objdump.rs (revision efb4e4c0)
1 //! Implementation of the `wasmtime objdump` CLI command.
2 
3 use anyhow::{bail, Context, Result};
4 use capstone::InsnGroupType::{CS_GRP_JUMP, CS_GRP_RET};
5 use clap::Parser;
6 use cranelift_codegen::isa::lookup_by_name;
7 use cranelift_codegen::settings::Flags;
8 use object::read::elf::ElfFile64;
9 use object::{Architecture, Endianness, FileFlags, Object, ObjectSection, ObjectSymbol};
10 use pulley_interpreter::decode::{Decoder, DecodingError, OpVisitor};
11 use pulley_interpreter::disas::Disassembler;
12 use std::io::{IsTerminal, Read, Write};
13 use std::iter::{self, Peekable};
14 use std::path::{Path, PathBuf};
15 use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
16 use wasmtime::Engine;
17 use wasmtime_environ::{obj, FilePos, StackMap, Trap};
18 
19 /// A helper utility in wasmtime to explore the compiled object file format of
20 /// a `*.cwasm` file.
21 #[derive(Parser)]
22 pub struct ObjdumpCommand {
23     /// The path to a compiled `*.cwasm` file.
24     ///
25     /// If this is `-` or not provided then stdin is used as input.
26     cwasm: Option<PathBuf>,
27 
28     /// Whether or not to display function/instruction addresses.
29     #[arg(long)]
30     addresses: bool,
31 
32     /// Whether or not to try to only display addresses of instruction jump
33     /// targets.
34     #[arg(long)]
35     address_jumps: bool,
36 
37     /// What functions should be printed
38     #[arg(long, default_value = "wasm", value_name = "KIND")]
39     funcs: Vec<Func>,
40 
41     /// String filter to apply to function names to only print some functions.
42     #[arg(long, value_name = "STR")]
43     filter: Option<String>,
44 
45     /// Whether or not instruction bytes are disassembled.
46     #[arg(long)]
47     bytes: bool,
48 
49     /// Whether or not to use color.
50     #[arg(long, default_value = "auto")]
51     color: ColorChoice,
52 
53     /// Whether or not to interleave instructions with address maps.
54     #[arg(long, require_equals = true, value_name = "true|false")]
55     addrmap: Option<Option<bool>>,
56 
57     /// Column width of how large an address is rendered as.
58     #[arg(long, default_value = "10", value_name = "N")]
59     address_width: usize,
60 
61     /// Whether or not to show information about what instructions can trap.
62     #[arg(long, require_equals = true, value_name = "true|false")]
63     traps: Option<Option<bool>>,
64 
65     /// Whether or not to show information about stack maps.
66     #[arg(long, require_equals = true, value_name = "true|false")]
67     stack_maps: Option<Option<bool>>,
68 }
69 
70 fn optional_flag_with_default(flag: Option<Option<bool>>, default: bool) -> bool {
71     match flag {
72         None => default,
73         Some(None) => true,
74         Some(Some(val)) => val,
75     }
76 }
77 
78 impl ObjdumpCommand {
79     fn addrmap(&self) -> bool {
80         optional_flag_with_default(self.addrmap, false)
81     }
82 
83     fn traps(&self) -> bool {
84         optional_flag_with_default(self.traps, true)
85     }
86 
87     fn stack_maps(&self) -> bool {
88         optional_flag_with_default(self.stack_maps, true)
89     }
90 
91     /// Executes the command.
92     pub fn execute(self) -> Result<()> {
93         // Setup stdout handling color options. Also build some variables used
94         // below to configure colors of certain items.
95         let mut choice = self.color;
96         if choice == ColorChoice::Auto && !std::io::stdout().is_terminal() {
97             choice = ColorChoice::Never;
98         }
99         let mut stdout = StandardStream::stdout(choice);
100 
101         let mut color_address = ColorSpec::new();
102         color_address.set_bold(true).set_fg(Some(Color::Yellow));
103         let mut color_bytes = ColorSpec::new();
104         color_bytes.set_fg(Some(Color::Magenta));
105 
106         let bytes = self.read_cwasm()?;
107 
108         // Double-check this is a `*.cwasm`
109         if Engine::detect_precompiled(&bytes).is_none() {
110             bail!("not a `*.cwasm` file from wasmtime: {:?}", self.cwasm);
111         }
112 
113         // Parse the input as an ELF file, extract the `.text` section.
114         let elf = ElfFile64::<Endianness>::parse(&bytes)?;
115         let text = elf
116             .section_by_name(".text")
117             .context("missing .text section")?;
118         let text = text.data()?;
119 
120         // Build the helper that'll get used to attach decorations/annotations
121         // to various instructions.
122         let mut decorator = Decorator {
123             addrmap: elf
124                 .section_by_name(obj::ELF_WASMTIME_ADDRMAP)
125                 .and_then(|section| section.data().ok())
126                 .and_then(|bytes| wasmtime_environ::iterate_address_map(bytes))
127                 .map(|i| (Box::new(i) as Box<dyn Iterator<Item = _>>).peekable()),
128             traps: elf
129                 .section_by_name(obj::ELF_WASMTIME_TRAPS)
130                 .and_then(|section| section.data().ok())
131                 .and_then(|bytes| wasmtime_environ::iterate_traps(bytes))
132                 .map(|i| (Box::new(i) as Box<dyn Iterator<Item = _>>).peekable()),
133             stack_maps: elf
134                 .section_by_name(obj::ELF_WASMTIME_STACK_MAP)
135                 .and_then(|section| section.data().ok())
136                 .and_then(|bytes| StackMap::iter(bytes))
137                 .map(|i| (Box::new(i) as Box<dyn Iterator<Item = _>>).peekable()),
138             objdump: &self,
139         };
140 
141         // Iterate over all symbols which will be functions for a cwasm and
142         // we'll disassemble them all.
143         let mut first = true;
144         for sym in elf.symbols() {
145             let name = match sym.name() {
146                 Ok(name) => name,
147                 Err(_) => continue,
148             };
149             let bytes = &text[sym.address() as usize..][..sym.size() as usize];
150 
151             let kind = if name.starts_with("wasmtime_builtin") {
152                 Func::Builtin
153             } else if name.contains("]::function[") {
154                 Func::Wasm
155             } else if name.contains("trampoline") {
156                 Func::Trampoline
157             } else if name.contains("libcall") {
158                 Func::Libcall
159             } else {
160                 panic!("unknown symbol: {name}")
161             };
162 
163             // Apply any filters, if provided, to this function to look at just
164             // one function in the disassembly.
165             if self.funcs.is_empty() {
166                 if kind != Func::Wasm {
167                     continue;
168                 }
169             } else {
170                 if !(self.funcs.contains(&Func::All) || self.funcs.contains(&kind)) {
171                     continue;
172                 }
173             }
174             if let Some(filter) = &self.filter {
175                 if !name.contains(filter) {
176                     continue;
177                 }
178             }
179 
180             // Place a blank line between functions.
181             if first {
182                 first = false;
183             } else {
184                 writeln!(stdout)?;
185             }
186 
187             // Print the function's address, if so desired. Then print the
188             // function name.
189             if self.addresses {
190                 stdout.set_color(color_address.clone().set_bold(true))?;
191                 write!(stdout, "{:08x} ", sym.address())?;
192                 stdout.reset()?;
193             }
194             stdout.set_color(ColorSpec::new().set_bold(true).set_fg(Some(Color::Green)))?;
195             write!(stdout, "{name}")?;
196             stdout.reset()?;
197             writeln!(stdout, ":")?;
198 
199             // Tracking variables for rough heuristics of printing targets of
200             // jump instructions for `--address-jumps` mode.
201             let mut prev_jump = false;
202             let mut write_offsets = false;
203 
204             for inst in self.disas(&elf, bytes, sym.address())? {
205                 let Inst {
206                     address,
207                     is_jump,
208                     is_return,
209                     disassembly: disas,
210                     bytes,
211                 } = inst;
212 
213                 // Generate an infinite list of bytes to make printing below
214                 // easier, but only limit `inline_bytes` to get printed before
215                 // an instruction.
216                 let mut bytes = bytes.iter().map(Some).chain(iter::repeat(None));
217                 let inline_bytes = 9;
218                 let width = self.address_width;
219 
220                 // Some instructions may disassemble to multiple lines, such as
221                 // `br_table` with Pulley. Handle separate lines per-instruction
222                 // here.
223                 for (i, line) in disas.lines().enumerate() {
224                     let print_address = self.addresses
225                         || (self.address_jumps && (write_offsets || (prev_jump && !is_jump)));
226                     if i == 0 && print_address {
227                         stdout.set_color(&color_address)?;
228                         write!(stdout, "{address:>width$x}: ")?;
229                         stdout.reset()?;
230                     } else {
231                         write!(stdout, "{:width$}  ", "")?;
232                     }
233 
234                     // If we're printing inline bytes then print up to
235                     // `inline_bytes` of instruction data, and any remaining
236                     // data will go on the next line, if any, or after the
237                     // instruction below.
238                     if self.bytes {
239                         stdout.set_color(&color_bytes)?;
240                         for byte in bytes.by_ref().take(inline_bytes) {
241                             match byte {
242                                 Some(byte) => write!(stdout, "{byte:02x} ")?,
243                                 None => write!(stdout, "   ")?,
244                             }
245                         }
246                         write!(stdout, "  ")?;
247                         stdout.reset()?;
248                     }
249 
250                     writeln!(stdout, "{line}")?;
251                 }
252 
253                 // Flip write_offsets to true once we've seen a `ret`, as
254                 // instructions that follow the return are often related to trap
255                 // tables.
256                 write_offsets |= is_return;
257                 prev_jump = is_jump;
258 
259                 // After the instruction is printed then finish printing the
260                 // instruction bytes if any are present. Still limit to
261                 // `inline_bytes` per line.
262                 if self.bytes {
263                     let mut inline = 0;
264                     stdout.set_color(&color_bytes)?;
265                     for byte in bytes {
266                         let Some(byte) = byte else { break };
267                         if inline == 0 {
268                             write!(stdout, "{:width$}  ", "")?;
269                         } else {
270                             write!(stdout, " ")?;
271                         }
272                         write!(stdout, "{byte:02x}")?;
273                         inline += 1;
274                         if inline == inline_bytes {
275                             writeln!(stdout)?;
276                             inline = 0;
277                         }
278                     }
279                     stdout.reset()?;
280                     if inline > 0 {
281                         writeln!(stdout)?;
282                     }
283                 }
284 
285                 // And now finally after an instruction is printed try to
286                 // collect any "decorations" or annotations for this
287                 // instruction. This is for example the address map, stack maps,
288                 // etc.
289                 //
290                 // Once they're collected then print them after the instruction
291                 // attempting to use some unicode characters to make it easier
292                 // to read/scan.
293                 let mut decorations = Vec::new();
294                 decorator.decorate(address, &mut decorations);
295 
296                 let print_whitespace_to_decoration = |stdout: &mut StandardStream| -> Result<()> {
297                     write!(stdout, "{:width$}  ", "")?;
298                     if self.bytes {
299                         for _ in 0..inline_bytes + 1 {
300                             write!(stdout, "   ")?;
301                         }
302                     }
303                     Ok(())
304                 };
305                 for (i, decoration) in decorations.iter().enumerate() {
306                     print_whitespace_to_decoration(&mut stdout)?;
307                     let mut color = ColorSpec::new();
308                     color.set_fg(Some(Color::Cyan));
309                     stdout.set_color(&color)?;
310                     let final_decoration = i == decorations.len() - 1;
311                     if !final_decoration {
312                         write!(stdout, "├")?;
313                     } else {
314                         write!(stdout, "╰")?;
315                     }
316                     for (i, line) in decoration.lines().enumerate() {
317                         if i == 0 {
318                             write!(stdout, "─╼ ")?;
319                         } else {
320                             print_whitespace_to_decoration(&mut stdout)?;
321                             if final_decoration {
322                                 write!(stdout, "    ")?;
323                             } else {
324                                 write!(stdout, "│   ")?;
325                             }
326                         }
327                         writeln!(stdout, "{line}")?;
328                     }
329                     stdout.reset()?;
330                 }
331             }
332         }
333         Ok(())
334     }
335 
336     /// Disassembles `func` contained within `elf` returning a list of
337     /// instructions that represent the function.
338     fn disas(&self, elf: &ElfFile64<'_, Endianness>, func: &[u8], addr: u64) -> Result<Vec<Inst>> {
339         let cranelift_target = match elf.architecture() {
340             Architecture::X86_64 => "x86_64",
341             Architecture::Aarch64 => "aarch64",
342             Architecture::S390x => "s390x",
343             Architecture::Riscv64 => {
344                 let e_flags = match elf.flags() {
345                     FileFlags::Elf { e_flags, .. } => e_flags,
346                     _ => bail!("not an ELF file"),
347                 };
348                 if e_flags & (obj::EF_WASMTIME_PULLEY32 | obj::EF_WASMTIME_PULLEY64) != 0 {
349                     return self.disas_pulley(func, addr);
350                 } else {
351                     "riscv64"
352                 }
353             }
354             other => bail!("unknown architecture {other:?}"),
355         };
356         let builder =
357             lookup_by_name(cranelift_target).context("failed to load cranelift ISA builder")?;
358         let flags = cranelift_codegen::settings::builder();
359         let isa = builder.finish(Flags::new(flags))?;
360         let isa = &*isa;
361         let capstone = isa
362             .to_capstone()
363             .context("failed to create a capstone disassembler")?;
364 
365         let insts = capstone
366             .disasm_all(func, addr)?
367             .into_iter()
368             .map(|inst| {
369                 let detail = capstone.insn_detail(&inst).ok();
370                 let detail = detail.as_ref();
371                 let is_jump = detail
372                     .map(|d| {
373                         d.groups()
374                             .iter()
375                             .find(|g| g.0 as u32 == CS_GRP_JUMP)
376                             .is_some()
377                     })
378                     .unwrap_or(false);
379 
380                 let is_return = detail
381                     .map(|d| {
382                         d.groups()
383                             .iter()
384                             .find(|g| g.0 as u32 == CS_GRP_RET)
385                             .is_some()
386                     })
387                     .unwrap_or(false);
388 
389                 let disassembly = match (inst.mnemonic(), inst.op_str()) {
390                     (Some(i), Some(o)) => {
391                         if o.is_empty() {
392                             format!("{i}")
393                         } else {
394                             format!("{i:7} {o}")
395                         }
396                     }
397                     (Some(i), None) => format!("{i}"),
398                     _ => unreachable!(),
399                 };
400 
401                 let address = inst.address();
402                 Inst {
403                     address,
404                     is_jump,
405                     is_return,
406                     bytes: inst.bytes().to_vec(),
407                     disassembly,
408                 }
409             })
410             .collect::<Vec<_>>();
411         Ok(insts)
412     }
413 
414     /// Same as `dias` above, but just for Pulley.
415     fn disas_pulley(&self, func: &[u8], addr: u64) -> Result<Vec<Inst>> {
416         let mut result = vec![];
417 
418         let mut disas = Disassembler::new(func);
419         disas.offsets(false);
420         disas.hexdump(false);
421         disas.start_offset(usize::try_from(addr).unwrap());
422         let mut decoder = Decoder::new();
423         let mut last_disas_pos = 0;
424         loop {
425             let start_addr = disas.bytecode().position();
426 
427             match decoder.decode_one(&mut disas) {
428                 // If we got EOF at the initial position, then we're done disassembling.
429                 Err(DecodingError::UnexpectedEof { position }) if position == start_addr => break,
430 
431                 // Otherwise, propagate the error.
432                 Err(e) => {
433                     return Err(e).context("failed to disassembly pulley bytecode");
434                 }
435 
436                 Ok(()) => {
437                     let bytes_range = start_addr..disas.bytecode().position();
438                     let disassembly = disas.disas()[last_disas_pos..].trim();
439                     last_disas_pos = disas.disas().len();
440                     let address = u64::try_from(start_addr).unwrap() + addr;
441                     let is_jump = disassembly.contains("jump") || disassembly.contains("br_");
442                     let is_return = disassembly == "ret";
443                     result.push(Inst {
444                         bytes: func[bytes_range].to_vec(),
445                         address,
446                         is_jump,
447                         is_return,
448                         disassembly: disassembly.to_string(),
449                     });
450                 }
451             }
452         }
453 
454         Ok(result)
455     }
456 
457     /// Helper to read the input bytes of the `*.cwasm` handling stdin
458     /// automatically.
459     fn read_cwasm(&self) -> Result<Vec<u8>> {
460         if let Some(path) = &self.cwasm {
461             if path != Path::new("-") {
462                 return std::fs::read(path).with_context(|| format!("failed to read {path:?}"));
463             }
464         }
465 
466         let mut stdin = Vec::new();
467         std::io::stdin()
468             .read_to_end(&mut stdin)
469             .context("failed to read stdin")?;
470         Ok(stdin)
471     }
472 }
473 
474 /// Helper structure to package up metadata about an instruction.
475 struct Inst {
476     address: u64,
477     is_jump: bool,
478     is_return: bool,
479     disassembly: String,
480     bytes: Vec<u8>,
481 }
482 
483 #[derive(clap::ValueEnum, Clone, Copy, PartialEq, Eq)]
484 enum Func {
485     All,
486     Wasm,
487     Trampoline,
488     Builtin,
489     Libcall,
490 }
491 
492 struct Decorator<'a> {
493     objdump: &'a ObjdumpCommand,
494     addrmap: Option<Peekable<Box<dyn Iterator<Item = (u32, FilePos)> + 'a>>>,
495     traps: Option<Peekable<Box<dyn Iterator<Item = (u32, Trap)> + 'a>>>,
496     stack_maps: Option<Peekable<Box<dyn Iterator<Item = (u32, StackMap<'a>)> + 'a>>>,
497 }
498 
499 impl Decorator<'_> {
500     fn decorate(&mut self, address: u64, list: &mut Vec<String>) {
501         self.addrmap(address, list);
502         self.traps(address, list);
503         self.stack_maps(address, list);
504     }
505 
506     fn addrmap(&mut self, address: u64, list: &mut Vec<String>) {
507         if !self.objdump.addrmap() {
508             return;
509         }
510         let Some(addrmap) = &mut self.addrmap else {
511             return;
512         };
513         while let Some((addr, pos)) = addrmap.next_if(|(addr, _pos)| u64::from(*addr) <= address) {
514             if u64::from(addr) != address {
515                 continue;
516             }
517             if let Some(offset) = pos.file_offset() {
518                 list.push(format!("addrmap: {offset:#x}"));
519             }
520         }
521     }
522 
523     fn traps(&mut self, address: u64, list: &mut Vec<String>) {
524         if !self.objdump.traps() {
525             return;
526         }
527         let Some(traps) = &mut self.traps else {
528             return;
529         };
530         while let Some((addr, trap)) = traps.next_if(|(addr, _pos)| u64::from(*addr) <= address) {
531             if u64::from(addr) != address {
532                 continue;
533             }
534             list.push(format!("trap: {trap:?}"));
535         }
536     }
537 
538     fn stack_maps(&mut self, address: u64, list: &mut Vec<String>) {
539         if !self.objdump.stack_maps() {
540             return;
541         }
542         let Some(stack_maps) = &mut self.stack_maps else {
543             return;
544         };
545         while let Some((addr, stack_map)) =
546             stack_maps.next_if(|(addr, _pos)| u64::from(*addr) <= address)
547         {
548             if u64::from(addr) != address {
549                 continue;
550             }
551             list.push(format!(
552                 "stack_map: frame_size={}, frame_offsets={:?}",
553                 stack_map.frame_size(),
554                 stack_map.offsets().collect::<Vec<_>>()
555             ));
556         }
557     }
558 }
559