xref: /wasmtime-44.0.1/src/commands/objdump.rs (revision 56dcd4ed)
1 //! Implementation of the `wasmtime objdump` CLI command.
2 
3 use anyhow::{Context, Result, bail};
4 use capstone::InsnGroupType::{CS_GRP_JUMP, CS_GRP_RET};
5 use clap::Parser;
6 use cranelift_codegen::isa::lookup_by_name;
7 use cranelift_codegen::settings::Flags;
8 use object::read::elf::ElfFile64;
9 use object::{Architecture, Endianness, FileFlags, Object, ObjectSection, ObjectSymbol};
10 use pulley_interpreter::decode::{Decoder, DecodingError, OpVisitor};
11 use pulley_interpreter::disas::Disassembler;
12 use std::io::{IsTerminal, Read, Write};
13 use std::iter::{self, Peekable};
14 use std::path::{Path, PathBuf};
15 use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
16 use wasmtime::Engine;
17 use wasmtime_environ::{FilePos, StackMap, Trap, obj};
18 use wasmtime_unwinder::{ExceptionHandler, ExceptionTable};
19 
20 /// A helper utility in wasmtime to explore the compiled object file format of
21 /// a `*.cwasm` file.
22 #[derive(Parser)]
23 pub struct ObjdumpCommand {
24     /// The path to a compiled `*.cwasm` file.
25     ///
26     /// If this is `-` or not provided then stdin is used as input.
27     cwasm: Option<PathBuf>,
28 
29     /// Whether or not to display function/instruction addresses.
30     #[arg(long)]
31     addresses: bool,
32 
33     /// Whether or not to try to only display addresses of instruction jump
34     /// targets.
35     #[arg(long)]
36     address_jumps: bool,
37 
38     /// What functions should be printed
39     #[arg(long, default_value = "wasm", value_name = "KIND")]
40     funcs: Vec<Func>,
41 
42     /// String filter to apply to function names to only print some functions.
43     #[arg(long, value_name = "STR")]
44     filter: Option<String>,
45 
46     /// Whether or not instruction bytes are disassembled.
47     #[arg(long)]
48     bytes: bool,
49 
50     /// Whether or not to use color.
51     #[arg(long, default_value = "auto")]
52     color: ColorChoice,
53 
54     /// Whether or not to interleave instructions with address maps.
55     #[arg(long, require_equals = true, value_name = "true|false")]
56     addrmap: Option<Option<bool>>,
57 
58     /// Column width of how large an address is rendered as.
59     #[arg(long, default_value = "10", value_name = "N")]
60     address_width: usize,
61 
62     /// Whether or not to show information about what instructions can trap.
63     #[arg(long, require_equals = true, value_name = "true|false")]
64     traps: Option<Option<bool>>,
65 
66     /// Whether or not to show information about stack maps.
67     #[arg(long, require_equals = true, value_name = "true|false")]
68     stack_maps: Option<Option<bool>>,
69 
70     /// Whether or not to show information about exception tables.
71     #[arg(long, require_equals = true, value_name = "true|false")]
72     exception_tables: Option<Option<bool>>,
73 }
74 
75 fn optional_flag_with_default(flag: Option<Option<bool>>, default: bool) -> bool {
76     match flag {
77         None => default,
78         Some(None) => true,
79         Some(Some(val)) => val,
80     }
81 }
82 
83 impl ObjdumpCommand {
84     fn addrmap(&self) -> bool {
85         optional_flag_with_default(self.addrmap, false)
86     }
87 
88     fn traps(&self) -> bool {
89         optional_flag_with_default(self.traps, true)
90     }
91 
92     fn stack_maps(&self) -> bool {
93         optional_flag_with_default(self.stack_maps, true)
94     }
95 
96     fn exception_tables(&self) -> bool {
97         optional_flag_with_default(self.exception_tables, true)
98     }
99 
100     /// Executes the command.
101     pub fn execute(self) -> Result<()> {
102         // Setup stdout handling color options. Also build some variables used
103         // below to configure colors of certain items.
104         let mut choice = self.color;
105         if choice == ColorChoice::Auto && !std::io::stdout().is_terminal() {
106             choice = ColorChoice::Never;
107         }
108         let mut stdout = StandardStream::stdout(choice);
109 
110         let mut color_address = ColorSpec::new();
111         color_address.set_bold(true).set_fg(Some(Color::Yellow));
112         let mut color_bytes = ColorSpec::new();
113         color_bytes.set_fg(Some(Color::Magenta));
114 
115         let bytes = self.read_cwasm()?;
116 
117         // Double-check this is a `*.cwasm`
118         if Engine::detect_precompiled(&bytes).is_none() {
119             bail!("not a `*.cwasm` file from wasmtime: {:?}", self.cwasm);
120         }
121 
122         // Parse the input as an ELF file, extract the `.text` section.
123         let elf = ElfFile64::<Endianness>::parse(&bytes)?;
124         let text = elf
125             .section_by_name(".text")
126             .context("missing .text section")?;
127         let text = text.data()?;
128 
129         // Build the helper that'll get used to attach decorations/annotations
130         // to various instructions.
131         let mut decorator = Decorator {
132             addrmap: elf
133                 .section_by_name(obj::ELF_WASMTIME_ADDRMAP)
134                 .and_then(|section| section.data().ok())
135                 .and_then(|bytes| wasmtime_environ::iterate_address_map(bytes))
136                 .map(|i| (Box::new(i) as Box<dyn Iterator<Item = _>>).peekable()),
137             traps: elf
138                 .section_by_name(obj::ELF_WASMTIME_TRAPS)
139                 .and_then(|section| section.data().ok())
140                 .and_then(|bytes| wasmtime_environ::iterate_traps(bytes))
141                 .map(|i| (Box::new(i) as Box<dyn Iterator<Item = _>>).peekable()),
142             stack_maps: elf
143                 .section_by_name(obj::ELF_WASMTIME_STACK_MAP)
144                 .and_then(|section| section.data().ok())
145                 .and_then(|bytes| StackMap::iter(bytes))
146                 .map(|i| (Box::new(i) as Box<dyn Iterator<Item = _>>).peekable()),
147             exception_tables: elf
148                 .section_by_name(obj::ELF_WASMTIME_EXCEPTIONS)
149                 .and_then(|section| section.data().ok())
150                 .and_then(|bytes| ExceptionTable::parse(bytes).ok())
151                 .map(|table| table.into_iter())
152                 .map(|i| (Box::new(i) as Box<dyn Iterator<Item = _>>).peekable()),
153             objdump: &self,
154         };
155 
156         // Iterate over all symbols which will be functions for a cwasm and
157         // we'll disassemble them all.
158         let mut first = true;
159         for sym in elf.symbols() {
160             let name = match sym.name() {
161                 Ok(name) => name,
162                 Err(_) => continue,
163             };
164             let bytes = &text[sym.address() as usize..][..sym.size() as usize];
165 
166             let kind = if name.starts_with("wasmtime_builtin") {
167                 Func::Builtin
168             } else if name.contains("]::function[") {
169                 Func::Wasm
170             } else if name.contains("trampoline")
171                 || name.ends_with("_array_call")
172                 || name.ends_with("_wasm_call")
173             {
174                 Func::Trampoline
175             } else if name.contains("libcall") || name.starts_with("component") {
176                 Func::Libcall
177             } else {
178                 panic!("unknown symbol: {name}")
179             };
180 
181             // Apply any filters, if provided, to this function to look at just
182             // one function in the disassembly.
183             if self.funcs.is_empty() {
184                 if kind != Func::Wasm {
185                     continue;
186                 }
187             } else {
188                 if !(self.funcs.contains(&Func::All) || self.funcs.contains(&kind)) {
189                     continue;
190                 }
191             }
192             if let Some(filter) = &self.filter {
193                 if !name.contains(filter) {
194                     continue;
195                 }
196             }
197 
198             // Place a blank line between functions.
199             if first {
200                 first = false;
201             } else {
202                 writeln!(stdout)?;
203             }
204 
205             // Print the function's address, if so desired. Then print the
206             // function name.
207             if self.addresses {
208                 stdout.set_color(color_address.clone().set_bold(true))?;
209                 write!(stdout, "{:08x} ", sym.address())?;
210                 stdout.reset()?;
211             }
212             stdout.set_color(ColorSpec::new().set_bold(true).set_fg(Some(Color::Green)))?;
213             write!(stdout, "{name}")?;
214             stdout.reset()?;
215             writeln!(stdout, ":")?;
216 
217             // Tracking variables for rough heuristics of printing targets of
218             // jump instructions for `--address-jumps` mode.
219             let mut prev_jump = false;
220             let mut write_offsets = false;
221 
222             for inst in self.disas(&elf, bytes, sym.address())? {
223                 let Inst {
224                     address,
225                     is_jump,
226                     is_return,
227                     disassembly: disas,
228                     bytes,
229                 } = inst;
230 
231                 // Generate an infinite list of bytes to make printing below
232                 // easier, but only limit `inline_bytes` to get printed before
233                 // an instruction.
234                 let mut bytes = bytes.iter().map(Some).chain(iter::repeat(None));
235                 let inline_bytes = 9;
236                 let width = self.address_width;
237 
238                 // Collect any "decorations" or annotations for this
239                 // instruction. This includes the address map, stack
240                 // maps, exception handlers, etc.
241                 //
242                 // Once they're collected then we print them before or
243                 // after the instruction attempting to use some
244                 // unicode characters to make it easier to read/scan.
245                 //
246                 // Note that some decorations occur "before" an
247                 // instruction: for example, exception handler entries
248                 // logically occur at the return point after a call,
249                 // so "before" the instruction following the call.
250                 let mut pre_decorations = Vec::new();
251                 let mut post_decorations = Vec::new();
252                 decorator.decorate(address, &mut pre_decorations, &mut post_decorations);
253 
254                 let print_whitespace_to_decoration = |stdout: &mut StandardStream| -> Result<()> {
255                     write!(stdout, "{:width$}  ", "")?;
256                     if self.bytes {
257                         for _ in 0..inline_bytes + 1 {
258                             write!(stdout, "   ")?;
259                         }
260                     }
261                     Ok(())
262                 };
263 
264                 let print_decorations =
265                     |stdout: &mut StandardStream, decorations: Vec<String>| -> Result<()> {
266                         for (i, decoration) in decorations.iter().enumerate() {
267                             print_whitespace_to_decoration(stdout)?;
268                             let mut color = ColorSpec::new();
269                             color.set_fg(Some(Color::Cyan));
270                             stdout.set_color(&color)?;
271                             let final_decoration = i == decorations.len() - 1;
272                             if !final_decoration {
273                                 write!(stdout, "├")?;
274                             } else {
275                                 write!(stdout, "╰")?;
276                             }
277                             for (i, line) in decoration.lines().enumerate() {
278                                 if i == 0 {
279                                     write!(stdout, "─╼ ")?;
280                                 } else {
281                                     print_whitespace_to_decoration(stdout)?;
282                                     if final_decoration {
283                                         write!(stdout, "    ")?;
284                                     } else {
285                                         write!(stdout, "│   ")?;
286                                     }
287                                 }
288                                 writeln!(stdout, "{line}")?;
289                             }
290                             stdout.reset()?;
291                         }
292                         Ok(())
293                     };
294 
295                 print_decorations(&mut stdout, pre_decorations)?;
296 
297                 // Some instructions may disassemble to multiple lines, such as
298                 // `br_table` with Pulley. Handle separate lines per-instruction
299                 // here.
300                 for (i, line) in disas.lines().enumerate() {
301                     let print_address = self.addresses
302                         || (self.address_jumps && (write_offsets || (prev_jump && !is_jump)));
303                     if i == 0 && print_address {
304                         stdout.set_color(&color_address)?;
305                         write!(stdout, "{address:>width$x}: ")?;
306                         stdout.reset()?;
307                     } else {
308                         write!(stdout, "{:width$}  ", "")?;
309                     }
310 
311                     // If we're printing inline bytes then print up to
312                     // `inline_bytes` of instruction data, and any remaining
313                     // data will go on the next line, if any, or after the
314                     // instruction below.
315                     if self.bytes {
316                         stdout.set_color(&color_bytes)?;
317                         for byte in bytes.by_ref().take(inline_bytes) {
318                             match byte {
319                                 Some(byte) => write!(stdout, "{byte:02x} ")?,
320                                 None => write!(stdout, "   ")?,
321                             }
322                         }
323                         write!(stdout, "  ")?;
324                         stdout.reset()?;
325                     }
326 
327                     writeln!(stdout, "{line}")?;
328                 }
329 
330                 // Flip write_offsets to true once we've seen a `ret`, as
331                 // instructions that follow the return are often related to trap
332                 // tables.
333                 write_offsets |= is_return;
334                 prev_jump = is_jump;
335 
336                 // After the instruction is printed then finish printing the
337                 // instruction bytes if any are present. Still limit to
338                 // `inline_bytes` per line.
339                 if self.bytes {
340                     let mut inline = 0;
341                     stdout.set_color(&color_bytes)?;
342                     for byte in bytes {
343                         let Some(byte) = byte else { break };
344                         if inline == 0 {
345                             write!(stdout, "{:width$}  ", "")?;
346                         } else {
347                             write!(stdout, " ")?;
348                         }
349                         write!(stdout, "{byte:02x}")?;
350                         inline += 1;
351                         if inline == inline_bytes {
352                             writeln!(stdout)?;
353                             inline = 0;
354                         }
355                     }
356                     stdout.reset()?;
357                     if inline > 0 {
358                         writeln!(stdout)?;
359                     }
360                 }
361 
362                 print_decorations(&mut stdout, post_decorations)?;
363             }
364         }
365         Ok(())
366     }
367 
368     /// Disassembles `func` contained within `elf` returning a list of
369     /// instructions that represent the function.
370     fn disas(&self, elf: &ElfFile64<'_, Endianness>, func: &[u8], addr: u64) -> Result<Vec<Inst>> {
371         let cranelift_target = match elf.architecture() {
372             Architecture::X86_64 => "x86_64",
373             Architecture::Aarch64 => "aarch64",
374             Architecture::S390x => "s390x",
375             Architecture::Riscv64 => {
376                 let e_flags = match elf.flags() {
377                     FileFlags::Elf { e_flags, .. } => e_flags,
378                     _ => bail!("not an ELF file"),
379                 };
380                 if e_flags & (obj::EF_WASMTIME_PULLEY32 | obj::EF_WASMTIME_PULLEY64) != 0 {
381                     return self.disas_pulley(func, addr);
382                 } else {
383                     "riscv64"
384                 }
385             }
386             other => bail!("unknown architecture {other:?}"),
387         };
388         let builder =
389             lookup_by_name(cranelift_target).context("failed to load cranelift ISA builder")?;
390         let flags = cranelift_codegen::settings::builder();
391         let isa = builder.finish(Flags::new(flags))?;
392         let isa = &*isa;
393         let capstone = isa
394             .to_capstone()
395             .context("failed to create a capstone disassembler")?;
396 
397         let insts = capstone
398             .disasm_all(func, addr)?
399             .into_iter()
400             .map(|inst| {
401                 let detail = capstone.insn_detail(&inst).ok();
402                 let detail = detail.as_ref();
403                 let is_jump = detail
404                     .map(|d| {
405                         d.groups()
406                             .iter()
407                             .find(|g| g.0 as u32 == CS_GRP_JUMP)
408                             .is_some()
409                     })
410                     .unwrap_or(false);
411 
412                 let is_return = detail
413                     .map(|d| {
414                         d.groups()
415                             .iter()
416                             .find(|g| g.0 as u32 == CS_GRP_RET)
417                             .is_some()
418                     })
419                     .unwrap_or(false);
420 
421                 let disassembly = match (inst.mnemonic(), inst.op_str()) {
422                     (Some(i), Some(o)) => {
423                         if o.is_empty() {
424                             format!("{i}")
425                         } else {
426                             format!("{i:7} {o}")
427                         }
428                     }
429                     (Some(i), None) => format!("{i}"),
430                     _ => unreachable!(),
431                 };
432 
433                 let address = inst.address();
434                 Inst {
435                     address,
436                     is_jump,
437                     is_return,
438                     bytes: inst.bytes().to_vec(),
439                     disassembly,
440                 }
441             })
442             .collect::<Vec<_>>();
443         Ok(insts)
444     }
445 
446     /// Same as `dias` above, but just for Pulley.
447     fn disas_pulley(&self, func: &[u8], addr: u64) -> Result<Vec<Inst>> {
448         let mut result = vec![];
449 
450         let mut disas = Disassembler::new(func);
451         disas.offsets(false);
452         disas.hexdump(false);
453         disas.start_offset(usize::try_from(addr).unwrap());
454         let mut decoder = Decoder::new();
455         let mut last_disas_pos = 0;
456         loop {
457             let start_addr = disas.bytecode().position();
458 
459             match decoder.decode_one(&mut disas) {
460                 // If we got EOF at the initial position, then we're done disassembling.
461                 Err(DecodingError::UnexpectedEof { position }) if position == start_addr => break,
462 
463                 // Otherwise, propagate the error.
464                 Err(e) => {
465                     return Err(e).context("failed to disassembly pulley bytecode");
466                 }
467 
468                 Ok(()) => {
469                     let bytes_range = start_addr..disas.bytecode().position();
470                     let disassembly = disas.disas()[last_disas_pos..].trim();
471                     last_disas_pos = disas.disas().len();
472                     let address = u64::try_from(start_addr).unwrap() + addr;
473                     let is_jump = disassembly.contains("jump") || disassembly.contains("br_");
474                     let is_return = disassembly == "ret";
475                     result.push(Inst {
476                         bytes: func[bytes_range].to_vec(),
477                         address,
478                         is_jump,
479                         is_return,
480                         disassembly: disassembly.to_string(),
481                     });
482                 }
483             }
484         }
485 
486         Ok(result)
487     }
488 
489     /// Helper to read the input bytes of the `*.cwasm` handling stdin
490     /// automatically.
491     fn read_cwasm(&self) -> Result<Vec<u8>> {
492         if let Some(path) = &self.cwasm {
493             if path != Path::new("-") {
494                 return std::fs::read(path).with_context(|| format!("failed to read {path:?}"));
495             }
496         }
497 
498         let mut stdin = Vec::new();
499         std::io::stdin()
500             .read_to_end(&mut stdin)
501             .context("failed to read stdin")?;
502         Ok(stdin)
503     }
504 }
505 
506 /// Helper structure to package up metadata about an instruction.
507 struct Inst {
508     address: u64,
509     is_jump: bool,
510     is_return: bool,
511     disassembly: String,
512     bytes: Vec<u8>,
513 }
514 
515 #[derive(clap::ValueEnum, Clone, Copy, PartialEq, Eq)]
516 enum Func {
517     All,
518     Wasm,
519     Trampoline,
520     Builtin,
521     Libcall,
522 }
523 
524 struct Decorator<'a> {
525     objdump: &'a ObjdumpCommand,
526     addrmap: Option<Peekable<Box<dyn Iterator<Item = (u32, FilePos)> + 'a>>>,
527     traps: Option<Peekable<Box<dyn Iterator<Item = (u32, Trap)> + 'a>>>,
528     stack_maps: Option<Peekable<Box<dyn Iterator<Item = (u32, StackMap<'a>)> + 'a>>>,
529     exception_tables:
530         Option<Peekable<Box<dyn Iterator<Item = (u32, Option<u32>, Vec<ExceptionHandler>)> + 'a>>>,
531 }
532 
533 impl Decorator<'_> {
534     fn decorate(&mut self, address: u64, pre_list: &mut Vec<String>, post_list: &mut Vec<String>) {
535         self.addrmap(address, post_list);
536         self.traps(address, post_list);
537         self.stack_maps(address, post_list);
538         self.exception_table(address, pre_list);
539     }
540 
541     fn addrmap(&mut self, address: u64, list: &mut Vec<String>) {
542         if !self.objdump.addrmap() {
543             return;
544         }
545         let Some(addrmap) = &mut self.addrmap else {
546             return;
547         };
548         while let Some((addr, pos)) = addrmap.next_if(|(addr, _pos)| u64::from(*addr) <= address) {
549             if u64::from(addr) != address {
550                 continue;
551             }
552             if let Some(offset) = pos.file_offset() {
553                 list.push(format!("addrmap: {offset:#x}"));
554             }
555         }
556     }
557 
558     fn traps(&mut self, address: u64, list: &mut Vec<String>) {
559         if !self.objdump.traps() {
560             return;
561         }
562         let Some(traps) = &mut self.traps else {
563             return;
564         };
565         while let Some((addr, trap)) = traps.next_if(|(addr, _pos)| u64::from(*addr) <= address) {
566             if u64::from(addr) != address {
567                 continue;
568             }
569             list.push(format!("trap: {trap:?}"));
570         }
571     }
572 
573     fn stack_maps(&mut self, address: u64, list: &mut Vec<String>) {
574         if !self.objdump.stack_maps() {
575             return;
576         }
577         let Some(stack_maps) = &mut self.stack_maps else {
578             return;
579         };
580         while let Some((addr, stack_map)) =
581             stack_maps.next_if(|(addr, _pos)| u64::from(*addr) <= address)
582         {
583             if u64::from(addr) != address {
584                 continue;
585             }
586             list.push(format!(
587                 "stack_map: frame_size={}, frame_offsets={:?}",
588                 stack_map.frame_size(),
589                 stack_map.offsets().collect::<Vec<_>>()
590             ));
591         }
592     }
593 
594     fn exception_table(&mut self, address: u64, list: &mut Vec<String>) {
595         if !self.objdump.exception_tables() {
596             return;
597         }
598         let Some(exception_tables) = &mut self.exception_tables else {
599             return;
600         };
601         while let Some((addr, frame_offset, handlers)) =
602             exception_tables.next_if(|(addr, _, _)| u64::from(*addr) <= address)
603         {
604             if u64::from(addr) != address {
605                 continue;
606             }
607             if let Some(frame_offset) = frame_offset {
608                 list.push(format!(
609                     "exception frame offset: SP = FP - 0x{frame_offset:x}",
610                 ));
611             }
612             for handler in &handlers {
613                 let tag = match handler.tag {
614                     Some(tag) => format!("tag={tag}"),
615                     None => "default handler".to_string(),
616                 };
617                 let context = match handler.context_sp_offset {
618                     Some(offset) => format!("context at [SP+0x{offset:x}]"),
619                     None => "no dynamic context".to_string(),
620                 };
621                 list.push(format!(
622                     "exception handler: {tag}, {context}, handler=0x{:x}",
623                     handler.handler_offset
624                 ));
625             }
626         }
627     }
628 }
629