1 //! Support for jitdump files which can be used by perf for profiling jitted code. 2 //! Spec definitions for the output format is as described here: 3 //! <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt> 4 //! 5 //! Usage Example: 6 //! Record 7 //! sudo perf record -k 1 -e instructions:u target/debug/wasmtime -g --profile=jitdump test.wasm 8 //! Combine 9 //! sudo perf inject -v -j -i perf.data -o perf.jit.data 10 //! Report 11 //! sudo perf report -i perf.jit.data -F+period,srcline 12 13 use std::fmt::Debug; 14 use std::fs::{File, OpenOptions}; 15 use std::io::{self, Write}; 16 use std::path::Path; 17 use std::ptr; 18 use std::string::String; 19 use std::vec::Vec; 20 use std::{mem, process}; 21 22 /// Defines jitdump record types 23 #[repr(u32)] 24 pub enum RecordId { 25 /// Value 0: JIT_CODE_LOAD: record describing a jitted function 26 JitCodeLoad = 0, 27 /// Value 1: JIT_CODE_MOVE: record describing an already jitted function which is moved 28 JitCodeMove = 1, 29 /// Value 2: JIT_CODE_DEBUG_INFO: record describing the debug information for a jitted function 30 JitCodeDebugInfo = 2, 31 /// Value 3: JIT_CODE_CLOSE: record marking the end of the jit runtime (optional) 32 JitCodeClose = 3, 33 /// Value 4: JIT_CODE_UNWINDING_INFO: record describing a function unwinding information 34 JitCodeUnwindingInfo = 4, 35 } 36 37 /// Each record starts with this fixed size record header which describes the record that follows 38 #[derive(Debug, Default, Clone, Copy)] 39 #[repr(C)] 40 pub struct RecordHeader { 41 /// uint32_t id: a value identifying the record type (see below) 42 pub id: u32, 43 /// uint32_t total_size: the size in bytes of the record including the header. 44 pub record_size: u32, 45 /// uint64_t timestamp: a timestamp of when the record was created. 46 pub timestamp: u64, 47 } 48 49 unsafe impl object::Pod for RecordHeader {} 50 51 /// The CodeLoadRecord is used for describing jitted functions 52 #[derive(Debug, Default, Clone, Copy)] 53 #[repr(C)] 54 pub struct CodeLoadRecord { 55 /// Fixed sized header that describes this record 56 pub header: RecordHeader, 57 /// `uint32_t pid`: OS process id of the runtime generating the jitted code 58 pub pid: u32, 59 /// `uint32_t tid`: OS thread identification of the runtime thread generating the jitted code 60 pub tid: u32, 61 /// `uint64_t vma`: virtual address of jitted code start 62 pub virtual_address: u64, 63 /// `uint64_t code_addr`: code start address for the jitted code. By default vma = code_addr 64 pub address: u64, 65 /// `uint64_t code_size`: size in bytes of the generated jitted code 66 pub size: u64, 67 /// `uint64_t code_index`: unique identifier for the jitted code (see below) 68 pub index: u64, 69 } 70 71 unsafe impl object::Pod for CodeLoadRecord {} 72 73 /// Describes source line information for a jitted function 74 #[derive(Debug, Default)] 75 #[repr(C)] 76 pub struct DebugEntry { 77 /// `uint64_t code_addr`: address of function for which the debug information is generated 78 pub address: u64, 79 /// `uint32_t line`: source file line number (starting at 1) 80 pub line: u32, 81 /// `uint32_t discrim`: column discriminator, 0 is default 82 pub discriminator: u32, 83 /// `char name[n]`: source file name in ASCII, including null termination 84 pub filename: String, 85 } 86 87 /// Describes debug information for a jitted function. An array of debug entries are 88 /// appended to this record during writing. Note, this record must precede the code 89 /// load record that describes the same jitted function. 90 #[derive(Debug, Default, Clone, Copy)] 91 #[repr(C)] 92 pub struct DebugInfoRecord { 93 /// Fixed sized header that describes this record 94 pub header: RecordHeader, 95 /// `uint64_t code_addr`: address of function for which the debug information is generated 96 pub address: u64, 97 /// `uint64_t nr_entry`: number of debug entries for the function appended to this record 98 pub count: u64, 99 } 100 101 unsafe impl object::Pod for DebugInfoRecord {} 102 103 /// Fixed-sized header for each jitdump file 104 #[derive(Debug, Default, Clone, Copy)] 105 #[repr(C)] 106 pub struct FileHeader { 107 /// `uint32_t magic`: a magic number tagging the file type. The value is 4-byte long and represents the 108 /// string "JiTD" in ASCII form. It is 0x4A695444 or 0x4454694a depending on the endianness. The field can 109 /// be used to detect the endianness of the file 110 pub magic: u32, 111 /// `uint32_t version`: a 4-byte value representing the format version. It is currently set to 2 112 pub version: u32, 113 /// `uint32_t total_size`: size in bytes of file header 114 pub size: u32, 115 /// `uint32_t elf_mach`: ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h) 116 pub e_machine: u32, 117 /// `uint32_t pad1`: padding. Reserved for future use 118 pub pad1: u32, 119 /// `uint32_t pid`: JIT runtime process identification (OS specific) 120 pub pid: u32, 121 /// `uint64_t timestamp`: timestamp of when the file was created 122 pub timestamp: u64, 123 /// `uint64_t flags`: a bitmask of flags 124 pub flags: u64, 125 } 126 127 unsafe impl object::Pod for FileHeader {} 128 129 /// Interface for driving the creation of jitdump files 130 pub struct JitDumpFile { 131 /// File instance for the jit dump file 132 jitdump_file: File, 133 134 map_addr: usize, 135 map_len: usize, 136 137 /// Unique identifier for jitted code 138 code_index: u64, 139 140 e_machine: u32, 141 } 142 143 impl JitDumpFile { 144 /// Initialize a JitDumpAgent and write out the header new(filename: impl AsRef<Path>, e_machine: u32) -> io::Result<Self>145 pub fn new(filename: impl AsRef<Path>, e_machine: u32) -> io::Result<Self> { 146 // Note that the file here is opened in `append` mode to handle the case 147 // that multiple JIT engines in the same process are all writing to the 148 // same jitdump file. In this situation we want to append new records 149 // with what Wasmtime reports and we ideally don't want to interfere 150 // with anything else. 151 let jitdump_file = OpenOptions::new() 152 .read(true) 153 .write(true) 154 .create(true) 155 .append(true) 156 .open(filename.as_ref())?; 157 158 // After we make our `*.dump` file we execute an `mmap` syscall, 159 // specifically with executable permissions, to map it into our address 160 // space. This is required so `perf inject` will work later. The `perf 161 // inject` command will see that an mmap syscall happened, and it'll see 162 // the filename we mapped, and that'll trigger it to actually read and 163 // parse the file. 164 // 165 // To match what some perf examples are doing we keep this `mmap` alive 166 // until this agent goes away. 167 let map_len = 1024; 168 let map_addr = unsafe { 169 let ptr = rustix::mm::mmap( 170 ptr::null_mut(), 171 map_len, 172 rustix::mm::ProtFlags::EXEC | rustix::mm::ProtFlags::READ, 173 rustix::mm::MapFlags::PRIVATE, 174 &jitdump_file, 175 0, 176 )?; 177 ptr as usize 178 }; 179 let state = JitDumpFile { 180 jitdump_file, 181 map_addr, 182 map_len, 183 code_index: 0, 184 e_machine, 185 }; 186 state.maybe_write_file_header()?; 187 Ok(state) 188 } 189 } 190 191 impl JitDumpFile { 192 /// Returns timestamp from a single source get_time_stamp(&self) -> u64193 pub fn get_time_stamp(&self) -> u64 { 194 // We need to use `CLOCK_MONOTONIC` on Linux which is what `Instant` 195 // conveniently also uses, but `Instant` doesn't allow us to get access 196 // to nanoseconds as an internal detail, so we calculate the nanoseconds 197 // ourselves here. 198 let ts = rustix::time::clock_gettime(rustix::time::ClockId::Monotonic); 199 // TODO: What does it mean for either sec or nsec to be negative? 200 (ts.tv_sec * 1_000_000_000 + ts.tv_nsec) as u64 201 } 202 203 /// Returns the next code index next_code_index(&mut self) -> u64204 pub fn next_code_index(&mut self) -> u64 { 205 let code_index = self.code_index; 206 self.code_index += 1; 207 code_index 208 } 209 210 /// Helper function to write `bytes` to the jitdump file. 211 /// 212 /// This is effectively a workaround for the limitation of the jitdump file 213 /// format. Ideally Wasmtime would be writing to its own personal file and 214 /// wouldn't have to worry about concurrent modifications, but we don't have 215 /// the luxury of doing that. The jitdump file format requires that there's 216 /// a single file-per-process with records in it. Additionally there might 217 /// be multiple JIT engines in the same process all writing to this file. 218 /// 219 /// To handle this situation a best effort is made to write the entirety of 220 /// `bytes` to the file in one go. The file itself is opened with `O_APPEND` 221 /// meaning that this should work out just fine if the bytes are written in 222 /// one call to the `write` syscall. The problem though is what happens on a 223 /// partial write? 224 /// 225 /// If there are parallel actors in the same process then a partial write 226 /// may mean that the file is now corrupted. For example we could write most 227 /// of `bytes`, but not all, then some other thread writes to the file. The 228 /// question then is what to do in this situation? On one hand an error 229 /// could be returned to inform the user that it's corrupt. On the other 230 /// hand though it's a pretty niche case to have multiple JIT engines in one 231 /// process and it'd be a bummer if we failed to profile functions that 232 /// happened to be big enough to require two calls to `write`. 233 /// 234 /// In the end this for now uses the `write_all` helper in the standard 235 /// library. That means that this will produce corrupt files in the face of 236 /// partial writes when there are other engines also writing to the file. In 237 /// lieu of some actual synchronization protocol between engines though this 238 /// is about the best that we can do. maybe_atomic_write_all(&self, bytes: &[u8]) -> io::Result<()>239 fn maybe_atomic_write_all(&self, bytes: &[u8]) -> io::Result<()> { 240 (&self.jitdump_file).write_all(bytes)?; 241 Ok(()) 242 } 243 maybe_write_file_header(&self) -> io::Result<()>244 fn maybe_write_file_header(&self) -> io::Result<()> { 245 let header = FileHeader { 246 timestamp: self.get_time_stamp(), 247 e_machine: self.e_machine, 248 magic: 0x4A695444, 249 version: 1, 250 size: mem::size_of::<FileHeader>() as u32, 251 pad1: 0, 252 pid: process::id(), 253 flags: 0, 254 }; 255 256 // If it looks like some other engine in the same process has opened the 257 // file and added data already then assume that they were the ones to 258 // add the file header. If it's empty, though, assume we're the ones to 259 // add the file header. 260 // 261 // This is subject to a TOCTOU-style race condition but there's not 262 // really anything we can do about that. That'd require higher-level 263 // coordination in the application to boot up profiling agents serially 264 // or something like that. Either that or a better dump format where we 265 // can place output in our own engine-specific file. Alas. 266 if self.jitdump_file.metadata()?.len() == 0 { 267 self.maybe_atomic_write_all(object::bytes_of(&header))?; 268 } 269 Ok(()) 270 } 271 272 /// Get raw access to the underlying file that is being written to. file(&self) -> &File273 pub fn file(&self) -> &File { 274 &self.jitdump_file 275 } 276 277 /// Get raw mutable access to the underlying file that is being written to. file_mut(&mut self) -> &mut File278 pub fn file_mut(&mut self) -> &mut File { 279 &mut self.jitdump_file 280 } 281 dump_code_load_record( &mut self, method_name: &str, code: &[u8], timestamp: u64, pid: u32, tid: u32, ) -> io::Result<()>282 pub fn dump_code_load_record( 283 &mut self, 284 method_name: &str, 285 code: &[u8], 286 timestamp: u64, 287 pid: u32, 288 tid: u32, 289 ) -> io::Result<()> { 290 let name_len = method_name.len() + 1; 291 let size_limit = mem::size_of::<CodeLoadRecord>(); 292 293 let rh = RecordHeader { 294 id: RecordId::JitCodeLoad as u32, 295 record_size: size_limit as u32 + name_len as u32 + code.len() as u32, 296 timestamp, 297 }; 298 299 let clr = CodeLoadRecord { 300 header: rh, 301 pid, 302 tid, 303 virtual_address: code.as_ptr() as u64, 304 address: code.as_ptr() as u64, 305 size: code.len() as u64, 306 index: self.next_code_index(), 307 }; 308 309 let mut record = Vec::new(); 310 record.extend_from_slice(object::bytes_of(&clr)); 311 record.extend_from_slice(method_name.as_bytes()); 312 record.push(0); // null terminator for the method name 313 record.extend_from_slice(code); 314 self.maybe_atomic_write_all(&record)?; 315 Ok(()) 316 } 317 } 318 319 impl Drop for JitDumpFile { drop(&mut self)320 fn drop(&mut self) { 321 unsafe { 322 rustix::mm::munmap(self.map_addr as *mut _, self.map_len).unwrap(); 323 } 324 } 325 } 326