//! Support for jitdump files which can be used by perf for profiling jitted code. //! Spec definitions for the output format is as described here: //! //! //! Usage Example: //! Record //! sudo perf record -k 1 -e instructions:u target/debug/wasmtime -g --profile=jitdump test.wasm //! Combine //! sudo perf inject -v -j -i perf.data -o perf.jit.data //! Report //! sudo perf report -i perf.jit.data -F+period,srcline use std::fmt::Debug; use std::fs::{File, OpenOptions}; use std::io::{self, Write}; use std::path::Path; use std::ptr; use std::string::String; use std::vec::Vec; use std::{mem, process}; /// Defines jitdump record types #[repr(u32)] pub enum RecordId { /// Value 0: JIT_CODE_LOAD: record describing a jitted function JitCodeLoad = 0, /// Value 1: JIT_CODE_MOVE: record describing an already jitted function which is moved JitCodeMove = 1, /// Value 2: JIT_CODE_DEBUG_INFO: record describing the debug information for a jitted function JitCodeDebugInfo = 2, /// Value 3: JIT_CODE_CLOSE: record marking the end of the jit runtime (optional) JitCodeClose = 3, /// Value 4: JIT_CODE_UNWINDING_INFO: record describing a function unwinding information JitCodeUnwindingInfo = 4, } /// Each record starts with this fixed size record header which describes the record that follows #[derive(Debug, Default, Clone, Copy)] #[repr(C)] pub struct RecordHeader { /// uint32_t id: a value identifying the record type (see below) pub id: u32, /// uint32_t total_size: the size in bytes of the record including the header. pub record_size: u32, /// uint64_t timestamp: a timestamp of when the record was created. pub timestamp: u64, } unsafe impl object::Pod for RecordHeader {} /// The CodeLoadRecord is used for describing jitted functions #[derive(Debug, Default, Clone, Copy)] #[repr(C)] pub struct CodeLoadRecord { /// Fixed sized header that describes this record pub header: RecordHeader, /// `uint32_t pid`: OS process id of the runtime generating the jitted code pub pid: u32, /// `uint32_t tid`: OS thread identification of the runtime thread generating the jitted code pub tid: u32, /// `uint64_t vma`: virtual address of jitted code start pub virtual_address: u64, /// `uint64_t code_addr`: code start address for the jitted code. By default vma = code_addr pub address: u64, /// `uint64_t code_size`: size in bytes of the generated jitted code pub size: u64, /// `uint64_t code_index`: unique identifier for the jitted code (see below) pub index: u64, } unsafe impl object::Pod for CodeLoadRecord {} /// Describes source line information for a jitted function #[derive(Debug, Default)] #[repr(C)] pub struct DebugEntry { /// `uint64_t code_addr`: address of function for which the debug information is generated pub address: u64, /// `uint32_t line`: source file line number (starting at 1) pub line: u32, /// `uint32_t discrim`: column discriminator, 0 is default pub discriminator: u32, /// `char name[n]`: source file name in ASCII, including null termination pub filename: String, } /// Describes debug information for a jitted function. An array of debug entries are /// appended to this record during writing. Note, this record must precede the code /// load record that describes the same jitted function. #[derive(Debug, Default, Clone, Copy)] #[repr(C)] pub struct DebugInfoRecord { /// Fixed sized header that describes this record pub header: RecordHeader, /// `uint64_t code_addr`: address of function for which the debug information is generated pub address: u64, /// `uint64_t nr_entry`: number of debug entries for the function appended to this record pub count: u64, } unsafe impl object::Pod for DebugInfoRecord {} /// Fixed-sized header for each jitdump file #[derive(Debug, Default, Clone, Copy)] #[repr(C)] pub struct FileHeader { /// `uint32_t magic`: a magic number tagging the file type. The value is 4-byte long and represents the /// string "JiTD" in ASCII form. It is 0x4A695444 or 0x4454694a depending on the endianness. The field can /// be used to detect the endianness of the file pub magic: u32, /// `uint32_t version`: a 4-byte value representing the format version. It is currently set to 2 pub version: u32, /// `uint32_t total_size`: size in bytes of file header pub size: u32, /// `uint32_t elf_mach`: ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h) pub e_machine: u32, /// `uint32_t pad1`: padding. Reserved for future use pub pad1: u32, /// `uint32_t pid`: JIT runtime process identification (OS specific) pub pid: u32, /// `uint64_t timestamp`: timestamp of when the file was created pub timestamp: u64, /// `uint64_t flags`: a bitmask of flags pub flags: u64, } unsafe impl object::Pod for FileHeader {} /// Interface for driving the creation of jitdump files pub struct JitDumpFile { /// File instance for the jit dump file jitdump_file: File, map_addr: usize, map_len: usize, /// Unique identifier for jitted code code_index: u64, e_machine: u32, } impl JitDumpFile { /// Initialize a JitDumpAgent and write out the header pub fn new(filename: impl AsRef, e_machine: u32) -> io::Result { // Note that the file here is opened in `append` mode to handle the case // that multiple JIT engines in the same process are all writing to the // same jitdump file. In this situation we want to append new records // with what Wasmtime reports and we ideally don't want to interfere // with anything else. let jitdump_file = OpenOptions::new() .read(true) .write(true) .create(true) .append(true) .open(filename.as_ref())?; // After we make our `*.dump` file we execute an `mmap` syscall, // specifically with executable permissions, to map it into our address // space. This is required so `perf inject` will work later. The `perf // inject` command will see that an mmap syscall happened, and it'll see // the filename we mapped, and that'll trigger it to actually read and // parse the file. // // To match what some perf examples are doing we keep this `mmap` alive // until this agent goes away. let map_len = 1024; let map_addr = unsafe { let ptr = rustix::mm::mmap( ptr::null_mut(), map_len, rustix::mm::ProtFlags::EXEC | rustix::mm::ProtFlags::READ, rustix::mm::MapFlags::PRIVATE, &jitdump_file, 0, )?; ptr as usize }; let state = JitDumpFile { jitdump_file, map_addr, map_len, code_index: 0, e_machine, }; state.maybe_write_file_header()?; Ok(state) } } impl JitDumpFile { /// Returns timestamp from a single source pub fn get_time_stamp(&self) -> u64 { // We need to use `CLOCK_MONOTONIC` on Linux which is what `Instant` // conveniently also uses, but `Instant` doesn't allow us to get access // to nanoseconds as an internal detail, so we calculate the nanoseconds // ourselves here. let ts = rustix::time::clock_gettime(rustix::time::ClockId::Monotonic); // TODO: What does it mean for either sec or nsec to be negative? (ts.tv_sec * 1_000_000_000 + ts.tv_nsec) as u64 } /// Returns the next code index pub fn next_code_index(&mut self) -> u64 { let code_index = self.code_index; self.code_index += 1; code_index } /// Helper function to write `bytes` to the jitdump file. /// /// This is effectively a workaround for the limitation of the jitdump file /// format. Ideally Wasmtime would be writing to its own personal file and /// wouldn't have to worry about concurrent modifications, but we don't have /// the luxury of doing that. The jitdump file format requires that there's /// a single file-per-process with records in it. Additionally there might /// be multiple JIT engines in the same process all writing to this file. /// /// To handle this situation a best effort is made to write the entirety of /// `bytes` to the file in one go. The file itself is opened with `O_APPEND` /// meaning that this should work out just fine if the bytes are written in /// one call to the `write` syscall. The problem though is what happens on a /// partial write? /// /// If there are parallel actors in the same process then a partial write /// may mean that the file is now corrupted. For example we could write most /// of `bytes`, but not all, then some other thread writes to the file. The /// question then is what to do in this situation? On one hand an error /// could be returned to inform the user that it's corrupt. On the other /// hand though it's a pretty niche case to have multiple JIT engines in one /// process and it'd be a bummer if we failed to profile functions that /// happened to be big enough to require two calls to `write`. /// /// In the end this for now uses the `write_all` helper in the standard /// library. That means that this will produce corrupt files in the face of /// partial writes when there are other engines also writing to the file. In /// lieu of some actual synchronization protocol between engines though this /// is about the best that we can do. fn maybe_atomic_write_all(&self, bytes: &[u8]) -> io::Result<()> { (&self.jitdump_file).write_all(bytes)?; Ok(()) } fn maybe_write_file_header(&self) -> io::Result<()> { let header = FileHeader { timestamp: self.get_time_stamp(), e_machine: self.e_machine, magic: 0x4A695444, version: 1, size: mem::size_of::() as u32, pad1: 0, pid: process::id(), flags: 0, }; // If it looks like some other engine in the same process has opened the // file and added data already then assume that they were the ones to // add the file header. If it's empty, though, assume we're the ones to // add the file header. // // This is subject to a TOCTOU-style race condition but there's not // really anything we can do about that. That'd require higher-level // coordination in the application to boot up profiling agents serially // or something like that. Either that or a better dump format where we // can place output in our own engine-specific file. Alas. if self.jitdump_file.metadata()?.len() == 0 { self.maybe_atomic_write_all(object::bytes_of(&header))?; } Ok(()) } /// Get raw access to the underlying file that is being written to. pub fn file(&self) -> &File { &self.jitdump_file } /// Get raw mutable access to the underlying file that is being written to. pub fn file_mut(&mut self) -> &mut File { &mut self.jitdump_file } pub fn dump_code_load_record( &mut self, method_name: &str, code: &[u8], timestamp: u64, pid: u32, tid: u32, ) -> io::Result<()> { let name_len = method_name.len() + 1; let size_limit = mem::size_of::(); let rh = RecordHeader { id: RecordId::JitCodeLoad as u32, record_size: size_limit as u32 + name_len as u32 + code.len() as u32, timestamp, }; let clr = CodeLoadRecord { header: rh, pid, tid, virtual_address: code.as_ptr() as u64, address: code.as_ptr() as u64, size: code.len() as u64, index: self.next_code_index(), }; let mut record = Vec::new(); record.extend_from_slice(object::bytes_of(&clr)); record.extend_from_slice(method_name.as_bytes()); record.push(0); // null terminator for the method name record.extend_from_slice(code); self.maybe_atomic_write_all(&record)?; Ok(()) } } impl Drop for JitDumpFile { fn drop(&mut self) { unsafe { rustix::mm::munmap(self.map_addr as *mut _, self.map_len).unwrap(); } } }