1 //! Implementation of the `wasmtime objdump` CLI command. 2 3 use anyhow::{bail, Context, Result}; 4 use capstone::InsnGroupType::{CS_GRP_JUMP, CS_GRP_RET}; 5 use clap::Parser; 6 use cranelift_codegen::isa::lookup_by_name; 7 use cranelift_codegen::settings::Flags; 8 use object::read::elf::ElfFile64; 9 use object::{Architecture, Endianness, FileFlags, Object, ObjectSection, ObjectSymbol}; 10 use pulley_interpreter::decode::{Decoder, DecodingError, OpVisitor}; 11 use pulley_interpreter::disas::Disassembler; 12 use std::io::{IsTerminal, Read, Write}; 13 use std::iter::{self, Peekable}; 14 use std::path::{Path, PathBuf}; 15 use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; 16 use wasmtime::Engine; 17 use wasmtime_environ::{obj, FilePos, StackMap, Trap}; 18 19 /// A helper utility in wasmtime to explore the compiled object file format of 20 /// a `*.cwasm` file. 21 #[derive(Parser)] 22 pub struct ObjdumpCommand { 23 /// The path to a compiled `*.cwasm` file. 24 /// 25 /// If this is `-` or not provided then stdin is used as input. 26 cwasm: Option<PathBuf>, 27 28 /// Whether or not to display function/instruction addresses. 29 #[arg(long)] 30 addresses: bool, 31 32 /// Whether or not to try to only display addresses of instruction jump 33 /// targets. 34 #[arg(long)] 35 address_jumps: bool, 36 37 /// What functions should be printed 38 #[arg(long, default_value = "wasm", value_name = "KIND")] 39 funcs: Vec<Func>, 40 41 /// String filter to apply to function names to only print some functions. 42 #[arg(long, value_name = "STR")] 43 filter: Option<String>, 44 45 /// Whether or not instruction bytes are disassembled. 46 #[arg(long)] 47 bytes: bool, 48 49 /// Whether or not to use color. 50 #[arg(long, default_value = "auto")] 51 color: ColorChoice, 52 53 /// Whether or not to interleave instructions with address maps. 54 #[arg(long, require_equals = true, value_name = "true|false")] 55 addrmap: Option<Option<bool>>, 56 57 /// Column width of how large an address is rendered as. 58 #[arg(long, default_value = "10", value_name = "N")] 59 address_width: usize, 60 61 /// Whether or not to show information about what instructions can trap. 62 #[arg(long, require_equals = true, value_name = "true|false")] 63 traps: Option<Option<bool>>, 64 65 /// Whether or not to show information about stack maps. 66 #[arg(long, require_equals = true, value_name = "true|false")] 67 stack_maps: Option<Option<bool>>, 68 } 69 70 fn optional_flag_with_default(flag: Option<Option<bool>>, default: bool) -> bool { 71 match flag { 72 None => default, 73 Some(None) => true, 74 Some(Some(val)) => val, 75 } 76 } 77 78 impl ObjdumpCommand { 79 fn addrmap(&self) -> bool { 80 optional_flag_with_default(self.addrmap, false) 81 } 82 83 fn traps(&self) -> bool { 84 optional_flag_with_default(self.traps, true) 85 } 86 87 fn stack_maps(&self) -> bool { 88 optional_flag_with_default(self.stack_maps, true) 89 } 90 91 /// Executes the command. 92 pub fn execute(self) -> Result<()> { 93 // Setup stdout handling color options. Also build some variables used 94 // below to configure colors of certain items. 95 let mut choice = self.color; 96 if choice == ColorChoice::Auto && !std::io::stdout().is_terminal() { 97 choice = ColorChoice::Never; 98 } 99 let mut stdout = StandardStream::stdout(choice); 100 101 let mut color_address = ColorSpec::new(); 102 color_address.set_bold(true).set_fg(Some(Color::Yellow)); 103 let mut color_bytes = ColorSpec::new(); 104 color_bytes.set_fg(Some(Color::Magenta)); 105 106 let bytes = self.read_cwasm()?; 107 108 // Double-check this is a `*.cwasm` 109 if Engine::detect_precompiled(&bytes).is_none() { 110 bail!("not a `*.cwasm` file from wasmtime: {:?}", self.cwasm); 111 } 112 113 // Parse the input as an ELF file, extract the `.text` section. 114 let elf = ElfFile64::<Endianness>::parse(&bytes)?; 115 let text = elf 116 .section_by_name(".text") 117 .context("missing .text section")?; 118 let text = text.data()?; 119 120 // Build the helper that'll get used to attach decorations/annotations 121 // to various instructions. 122 let mut decorator = Decorator { 123 addrmap: elf 124 .section_by_name(obj::ELF_WASMTIME_ADDRMAP) 125 .and_then(|section| section.data().ok()) 126 .and_then(|bytes| wasmtime_environ::iterate_address_map(bytes)) 127 .map(|i| (Box::new(i) as Box<dyn Iterator<Item = _>>).peekable()), 128 traps: elf 129 .section_by_name(obj::ELF_WASMTIME_TRAPS) 130 .and_then(|section| section.data().ok()) 131 .and_then(|bytes| wasmtime_environ::iterate_traps(bytes)) 132 .map(|i| (Box::new(i) as Box<dyn Iterator<Item = _>>).peekable()), 133 stack_maps: elf 134 .section_by_name(obj::ELF_WASMTIME_STACK_MAP) 135 .and_then(|section| section.data().ok()) 136 .and_then(|bytes| StackMap::iter(bytes)) 137 .map(|i| (Box::new(i) as Box<dyn Iterator<Item = _>>).peekable()), 138 objdump: &self, 139 }; 140 141 // Iterate over all symbols which will be functions for a cwasm and 142 // we'll disassemble them all. 143 let mut first = true; 144 for sym in elf.symbols() { 145 let name = match sym.name() { 146 Ok(name) => name, 147 Err(_) => continue, 148 }; 149 let bytes = &text[sym.address() as usize..][..sym.size() as usize]; 150 151 let kind = if name.starts_with("wasmtime_builtin") { 152 Func::Builtin 153 } else if name.contains("]::function[") { 154 Func::Wasm 155 } else if name.contains("trampoline") { 156 Func::Trampoline 157 } else if name.contains("libcall") { 158 Func::Libcall 159 } else { 160 panic!("unknown symbol: {name}") 161 }; 162 163 // Apply any filters, if provided, to this function to look at just 164 // one function in the disassembly. 165 if self.funcs.is_empty() { 166 if kind != Func::Wasm { 167 continue; 168 } 169 } else { 170 if !(self.funcs.contains(&Func::All) || self.funcs.contains(&kind)) { 171 continue; 172 } 173 } 174 if let Some(filter) = &self.filter { 175 if !name.contains(filter) { 176 continue; 177 } 178 } 179 180 // Place a blank line between functions. 181 if first { 182 first = false; 183 } else { 184 writeln!(stdout)?; 185 } 186 187 // Print the function's address, if so desired. Then print the 188 // function name. 189 if self.addresses { 190 stdout.set_color(color_address.clone().set_bold(true))?; 191 write!(stdout, "{:08x} ", sym.address())?; 192 stdout.reset()?; 193 } 194 stdout.set_color(ColorSpec::new().set_bold(true).set_fg(Some(Color::Green)))?; 195 write!(stdout, "{name}")?; 196 stdout.reset()?; 197 writeln!(stdout, ":")?; 198 199 // Tracking variables for rough heuristics of printing targets of 200 // jump instructions for `--address-jumps` mode. 201 let mut prev_jump = false; 202 let mut write_offsets = false; 203 204 for inst in self.disas(&elf, bytes, sym.address())? { 205 let Inst { 206 address, 207 is_jump, 208 is_return, 209 disassembly: disas, 210 bytes, 211 } = inst; 212 213 // Generate an infinite list of bytes to make printing below 214 // easier, but only limit `inline_bytes` to get printed before 215 // an instruction. 216 let mut bytes = bytes.iter().map(Some).chain(iter::repeat(None)); 217 let inline_bytes = 9; 218 let width = self.address_width; 219 220 // Some instructions may disassemble to multiple lines, such as 221 // `br_table` with Pulley. Handle separate lines per-instruction 222 // here. 223 for (i, line) in disas.lines().enumerate() { 224 let print_address = self.addresses 225 || (self.address_jumps && (write_offsets || (prev_jump && !is_jump))); 226 if i == 0 && print_address { 227 stdout.set_color(&color_address)?; 228 write!(stdout, "{address:>width$x}: ")?; 229 stdout.reset()?; 230 } else { 231 write!(stdout, "{:width$} ", "")?; 232 } 233 234 // If we're printing inline bytes then print up to 235 // `inline_bytes` of instruction data, and any remaining 236 // data will go on the next line, if any, or after the 237 // instruction below. 238 if self.bytes { 239 stdout.set_color(&color_bytes)?; 240 for byte in bytes.by_ref().take(inline_bytes) { 241 match byte { 242 Some(byte) => write!(stdout, "{byte:02x} ")?, 243 None => write!(stdout, " ")?, 244 } 245 } 246 write!(stdout, " ")?; 247 stdout.reset()?; 248 } 249 250 writeln!(stdout, "{line}")?; 251 } 252 253 // Flip write_offsets to true once we've seen a `ret`, as 254 // instructions that follow the return are often related to trap 255 // tables. 256 write_offsets |= is_return; 257 prev_jump = is_jump; 258 259 // After the instruction is printed then finish printing the 260 // instruction bytes if any are present. Still limit to 261 // `inline_bytes` per line. 262 if self.bytes { 263 let mut inline = 0; 264 stdout.set_color(&color_bytes)?; 265 for byte in bytes { 266 let Some(byte) = byte else { break }; 267 if inline == 0 { 268 write!(stdout, "{:width$} ", "")?; 269 } else { 270 write!(stdout, " ")?; 271 } 272 write!(stdout, "{byte:02x}")?; 273 inline += 1; 274 if inline == inline_bytes { 275 writeln!(stdout)?; 276 inline = 0; 277 } 278 } 279 stdout.reset()?; 280 if inline > 0 { 281 writeln!(stdout)?; 282 } 283 } 284 285 // And now finally after an instruction is printed try to 286 // collect any "decorations" or annotations for this 287 // instruction. This is for example the address map, stack maps, 288 // etc. 289 // 290 // Once they're collected then print them after the instruction 291 // attempting to use some unicode characters to make it easier 292 // to read/scan. 293 let mut decorations = Vec::new(); 294 decorator.decorate(address, &mut decorations); 295 296 let print_whitespace_to_decoration = |stdout: &mut StandardStream| -> Result<()> { 297 write!(stdout, "{:width$} ", "")?; 298 if self.bytes { 299 for _ in 0..inline_bytes + 1 { 300 write!(stdout, " ")?; 301 } 302 } 303 Ok(()) 304 }; 305 for (i, decoration) in decorations.iter().enumerate() { 306 print_whitespace_to_decoration(&mut stdout)?; 307 let mut color = ColorSpec::new(); 308 color.set_fg(Some(Color::Cyan)); 309 stdout.set_color(&color)?; 310 let final_decoration = i == decorations.len() - 1; 311 if !final_decoration { 312 write!(stdout, "├")?; 313 } else { 314 write!(stdout, "╰")?; 315 } 316 for (i, line) in decoration.lines().enumerate() { 317 if i == 0 { 318 write!(stdout, "─╼ ")?; 319 } else { 320 print_whitespace_to_decoration(&mut stdout)?; 321 if final_decoration { 322 write!(stdout, " ")?; 323 } else { 324 write!(stdout, "│ ")?; 325 } 326 } 327 writeln!(stdout, "{line}")?; 328 } 329 stdout.reset()?; 330 } 331 } 332 } 333 Ok(()) 334 } 335 336 /// Disassembles `func` contained within `elf` returning a list of 337 /// instructions that represent the function. 338 fn disas(&self, elf: &ElfFile64<'_, Endianness>, func: &[u8], addr: u64) -> Result<Vec<Inst>> { 339 let cranelift_target = match elf.architecture() { 340 Architecture::X86_64 => "x86_64", 341 Architecture::Aarch64 => "aarch64", 342 Architecture::S390x => "s390x", 343 Architecture::Riscv64 => { 344 let e_flags = match elf.flags() { 345 FileFlags::Elf { e_flags, .. } => e_flags, 346 _ => bail!("not an ELF file"), 347 }; 348 if e_flags & (obj::EF_WASMTIME_PULLEY32 | obj::EF_WASMTIME_PULLEY64) != 0 { 349 return self.disas_pulley(func, addr); 350 } else { 351 "riscv64" 352 } 353 } 354 other => bail!("unknown architecture {other:?}"), 355 }; 356 let builder = 357 lookup_by_name(cranelift_target).context("failed to load cranelift ISA builder")?; 358 let flags = cranelift_codegen::settings::builder(); 359 let isa = builder.finish(Flags::new(flags))?; 360 let isa = &*isa; 361 let capstone = isa 362 .to_capstone() 363 .context("failed to create a capstone disassembler")?; 364 365 let insts = capstone 366 .disasm_all(func, addr)? 367 .into_iter() 368 .map(|inst| { 369 let detail = capstone.insn_detail(&inst).ok(); 370 let detail = detail.as_ref(); 371 let is_jump = detail 372 .map(|d| { 373 d.groups() 374 .iter() 375 .find(|g| g.0 as u32 == CS_GRP_JUMP) 376 .is_some() 377 }) 378 .unwrap_or(false); 379 380 let is_return = detail 381 .map(|d| { 382 d.groups() 383 .iter() 384 .find(|g| g.0 as u32 == CS_GRP_RET) 385 .is_some() 386 }) 387 .unwrap_or(false); 388 389 let disassembly = match (inst.mnemonic(), inst.op_str()) { 390 (Some(i), Some(o)) => { 391 if o.is_empty() { 392 format!("{i}") 393 } else { 394 format!("{i:7} {o}") 395 } 396 } 397 (Some(i), None) => format!("{i}"), 398 _ => unreachable!(), 399 }; 400 401 let address = inst.address(); 402 Inst { 403 address, 404 is_jump, 405 is_return, 406 bytes: inst.bytes().to_vec(), 407 disassembly, 408 } 409 }) 410 .collect::<Vec<_>>(); 411 Ok(insts) 412 } 413 414 /// Same as `dias` above, but just for Pulley. 415 fn disas_pulley(&self, func: &[u8], addr: u64) -> Result<Vec<Inst>> { 416 let mut result = vec![]; 417 418 let mut disas = Disassembler::new(func); 419 disas.offsets(false); 420 disas.hexdump(false); 421 disas.start_offset(usize::try_from(addr).unwrap()); 422 let mut decoder = Decoder::new(); 423 let mut last_disas_pos = 0; 424 loop { 425 let start_addr = disas.bytecode().position(); 426 427 match decoder.decode_one(&mut disas) { 428 // If we got EOF at the initial position, then we're done disassembling. 429 Err(DecodingError::UnexpectedEof { position }) if position == start_addr => break, 430 431 // Otherwise, propagate the error. 432 Err(e) => { 433 return Err(e).context("failed to disassembly pulley bytecode"); 434 } 435 436 Ok(()) => { 437 let bytes_range = start_addr..disas.bytecode().position(); 438 let disassembly = disas.disas()[last_disas_pos..].trim(); 439 last_disas_pos = disas.disas().len(); 440 let address = u64::try_from(start_addr).unwrap() + addr; 441 let is_jump = disassembly.contains("jump") || disassembly.contains("br_"); 442 let is_return = disassembly == "ret"; 443 result.push(Inst { 444 bytes: func[bytes_range].to_vec(), 445 address, 446 is_jump, 447 is_return, 448 disassembly: disassembly.to_string(), 449 }); 450 } 451 } 452 } 453 454 Ok(result) 455 } 456 457 /// Helper to read the input bytes of the `*.cwasm` handling stdin 458 /// automatically. 459 fn read_cwasm(&self) -> Result<Vec<u8>> { 460 if let Some(path) = &self.cwasm { 461 if path != Path::new("-") { 462 return std::fs::read(path).with_context(|| format!("failed to read {path:?}")); 463 } 464 } 465 466 let mut stdin = Vec::new(); 467 std::io::stdin() 468 .read_to_end(&mut stdin) 469 .context("failed to read stdin")?; 470 Ok(stdin) 471 } 472 } 473 474 /// Helper structure to package up metadata about an instruction. 475 struct Inst { 476 address: u64, 477 is_jump: bool, 478 is_return: bool, 479 disassembly: String, 480 bytes: Vec<u8>, 481 } 482 483 #[derive(clap::ValueEnum, Clone, Copy, PartialEq, Eq)] 484 enum Func { 485 All, 486 Wasm, 487 Trampoline, 488 Builtin, 489 Libcall, 490 } 491 492 struct Decorator<'a> { 493 objdump: &'a ObjdumpCommand, 494 addrmap: Option<Peekable<Box<dyn Iterator<Item = (u32, FilePos)> + 'a>>>, 495 traps: Option<Peekable<Box<dyn Iterator<Item = (u32, Trap)> + 'a>>>, 496 stack_maps: Option<Peekable<Box<dyn Iterator<Item = (u32, StackMap<'a>)> + 'a>>>, 497 } 498 499 impl Decorator<'_> { 500 fn decorate(&mut self, address: u64, list: &mut Vec<String>) { 501 self.addrmap(address, list); 502 self.traps(address, list); 503 self.stack_maps(address, list); 504 } 505 506 fn addrmap(&mut self, address: u64, list: &mut Vec<String>) { 507 if !self.objdump.addrmap() { 508 return; 509 } 510 let Some(addrmap) = &mut self.addrmap else { 511 return; 512 }; 513 while let Some((addr, pos)) = addrmap.next_if(|(addr, _pos)| u64::from(*addr) <= address) { 514 if u64::from(addr) != address { 515 continue; 516 } 517 if let Some(offset) = pos.file_offset() { 518 list.push(format!("addrmap: {offset:#x}")); 519 } 520 } 521 } 522 523 fn traps(&mut self, address: u64, list: &mut Vec<String>) { 524 if !self.objdump.traps() { 525 return; 526 } 527 let Some(traps) = &mut self.traps else { 528 return; 529 }; 530 while let Some((addr, trap)) = traps.next_if(|(addr, _pos)| u64::from(*addr) <= address) { 531 if u64::from(addr) != address { 532 continue; 533 } 534 list.push(format!("trap: {trap:?}")); 535 } 536 } 537 538 fn stack_maps(&mut self, address: u64, list: &mut Vec<String>) { 539 if !self.objdump.stack_maps() { 540 return; 541 } 542 let Some(stack_maps) = &mut self.stack_maps else { 543 return; 544 }; 545 while let Some((addr, stack_map)) = 546 stack_maps.next_if(|(addr, _pos)| u64::from(*addr) <= address) 547 { 548 if u64::from(addr) != address { 549 continue; 550 } 551 list.push(format!( 552 "stack_map: frame_size={}, frame_offsets={:?}", 553 stack_map.frame_size(), 554 stack_map.offsets().collect::<Vec<_>>() 555 )); 556 } 557 } 558 } 559