1 //! Support for jitdump files which can be used by perf for profiling jitted code.
2 //! Spec definitions for the output format is as described here:
3 //! <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt>
4 //!
5 //! Usage Example:
6 //!     Record
7 //!         sudo perf record -k 1 -e instructions:u target/debug/wasmtime -g --profile=jitdump test.wasm
8 //!     Combine
9 //!         sudo perf inject -v -j -i perf.data -o perf.jit.data
10 //!     Report
11 //!         sudo perf report -i perf.jit.data -F+period,srcline
12 
13 use std::fmt::Debug;
14 use std::fs::{File, OpenOptions};
15 use std::io::{self, Write};
16 use std::path::Path;
17 use std::ptr;
18 use std::string::String;
19 use std::vec::Vec;
20 use std::{mem, process};
21 
22 /// Defines jitdump record types
23 #[repr(u32)]
24 pub enum RecordId {
25     /// Value 0: JIT_CODE_LOAD: record describing a jitted function
26     JitCodeLoad = 0,
27     /// Value 1: JIT_CODE_MOVE: record describing an already jitted function which is moved
28     JitCodeMove = 1,
29     /// Value 2: JIT_CODE_DEBUG_INFO: record describing the debug information for a jitted function
30     JitCodeDebugInfo = 2,
31     /// Value 3: JIT_CODE_CLOSE: record marking the end of the jit runtime (optional)
32     JitCodeClose = 3,
33     /// Value 4: JIT_CODE_UNWINDING_INFO: record describing a function unwinding information
34     JitCodeUnwindingInfo = 4,
35 }
36 
37 /// Each record starts with this fixed size record header which describes the record that follows
38 #[derive(Debug, Default, Clone, Copy)]
39 #[repr(C)]
40 pub struct RecordHeader {
41     /// uint32_t id: a value identifying the record type (see below)
42     pub id: u32,
43     /// uint32_t total_size: the size in bytes of the record including the header.
44     pub record_size: u32,
45     /// uint64_t timestamp: a timestamp of when the record was created.
46     pub timestamp: u64,
47 }
48 
49 unsafe impl object::Pod for RecordHeader {}
50 
51 /// The CodeLoadRecord is used for describing jitted functions
52 #[derive(Debug, Default, Clone, Copy)]
53 #[repr(C)]
54 pub struct CodeLoadRecord {
55     /// Fixed sized header that describes this record
56     pub header: RecordHeader,
57     /// `uint32_t pid`: OS process id of the runtime generating the jitted code
58     pub pid: u32,
59     /// `uint32_t tid`: OS thread identification of the runtime thread generating the jitted code
60     pub tid: u32,
61     /// `uint64_t vma`: virtual address of jitted code start
62     pub virtual_address: u64,
63     /// `uint64_t code_addr`: code start address for the jitted code. By default vma = code_addr
64     pub address: u64,
65     /// `uint64_t code_size`: size in bytes of the generated jitted code
66     pub size: u64,
67     /// `uint64_t code_index`: unique identifier for the jitted code (see below)
68     pub index: u64,
69 }
70 
71 unsafe impl object::Pod for CodeLoadRecord {}
72 
73 /// Describes source line information for a jitted function
74 #[derive(Debug, Default)]
75 #[repr(C)]
76 pub struct DebugEntry {
77     /// `uint64_t code_addr`: address of function for which the debug information is generated
78     pub address: u64,
79     /// `uint32_t line`: source file line number (starting at 1)
80     pub line: u32,
81     /// `uint32_t discrim`: column discriminator, 0 is default
82     pub discriminator: u32,
83     /// `char name[n]`: source file name in ASCII, including null termination
84     pub filename: String,
85 }
86 
87 /// Describes debug information for a jitted function. An array of debug entries are
88 /// appended to this record during writing. Note, this record must precede the code
89 /// load record that describes the same jitted function.
90 #[derive(Debug, Default, Clone, Copy)]
91 #[repr(C)]
92 pub struct DebugInfoRecord {
93     /// Fixed sized header that describes this record
94     pub header: RecordHeader,
95     /// `uint64_t code_addr`: address of function for which the debug information is generated
96     pub address: u64,
97     /// `uint64_t nr_entry`: number of debug entries for the function appended to this record
98     pub count: u64,
99 }
100 
101 unsafe impl object::Pod for DebugInfoRecord {}
102 
103 /// Fixed-sized header for each jitdump file
104 #[derive(Debug, Default, Clone, Copy)]
105 #[repr(C)]
106 pub struct FileHeader {
107     /// `uint32_t magic`: a magic number tagging the file type. The value is 4-byte long and represents the
108     /// string "JiTD" in ASCII form. It is 0x4A695444 or 0x4454694a depending on the endianness. The field can
109     /// be used to detect the endianness of the file
110     pub magic: u32,
111     /// `uint32_t version`: a 4-byte value representing the format version. It is currently set to 2
112     pub version: u32,
113     /// `uint32_t total_size`: size in bytes of file header
114     pub size: u32,
115     /// `uint32_t elf_mach`: ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h)
116     pub e_machine: u32,
117     /// `uint32_t pad1`: padding. Reserved for future use
118     pub pad1: u32,
119     /// `uint32_t pid`: JIT runtime process identification (OS specific)
120     pub pid: u32,
121     /// `uint64_t timestamp`: timestamp of when the file was created
122     pub timestamp: u64,
123     /// `uint64_t flags`: a bitmask of flags
124     pub flags: u64,
125 }
126 
127 unsafe impl object::Pod for FileHeader {}
128 
129 /// Interface for driving the creation of jitdump files
130 pub struct JitDumpFile {
131     /// File instance for the jit dump file
132     jitdump_file: File,
133 
134     map_addr: usize,
135     map_len: usize,
136 
137     /// Unique identifier for jitted code
138     code_index: u64,
139 
140     e_machine: u32,
141 }
142 
143 impl JitDumpFile {
144     /// Initialize a JitDumpAgent and write out the header
new(filename: impl AsRef<Path>, e_machine: u32) -> io::Result<Self>145     pub fn new(filename: impl AsRef<Path>, e_machine: u32) -> io::Result<Self> {
146         // Note that the file here is opened in `append` mode to handle the case
147         // that multiple JIT engines in the same process are all writing to the
148         // same jitdump file. In this situation we want to append new records
149         // with what Wasmtime reports and we ideally don't want to interfere
150         // with anything else.
151         let jitdump_file = OpenOptions::new()
152             .read(true)
153             .write(true)
154             .create(true)
155             .append(true)
156             .open(filename.as_ref())?;
157 
158         // After we make our `*.dump` file we execute an `mmap` syscall,
159         // specifically with executable permissions, to map it into our address
160         // space. This is required so `perf inject` will work later. The `perf
161         // inject` command will see that an mmap syscall happened, and it'll see
162         // the filename we mapped, and that'll trigger it to actually read and
163         // parse the file.
164         //
165         // To match what some perf examples are doing we keep this `mmap` alive
166         // until this agent goes away.
167         let map_len = 1024;
168         let map_addr = unsafe {
169             let ptr = rustix::mm::mmap(
170                 ptr::null_mut(),
171                 map_len,
172                 rustix::mm::ProtFlags::EXEC | rustix::mm::ProtFlags::READ,
173                 rustix::mm::MapFlags::PRIVATE,
174                 &jitdump_file,
175                 0,
176             )?;
177             ptr as usize
178         };
179         let state = JitDumpFile {
180             jitdump_file,
181             map_addr,
182             map_len,
183             code_index: 0,
184             e_machine,
185         };
186         state.maybe_write_file_header()?;
187         Ok(state)
188     }
189 }
190 
191 impl JitDumpFile {
192     /// Returns timestamp from a single source
get_time_stamp(&self) -> u64193     pub fn get_time_stamp(&self) -> u64 {
194         // We need to use `CLOCK_MONOTONIC` on Linux which is what `Instant`
195         // conveniently also uses, but `Instant` doesn't allow us to get access
196         // to nanoseconds as an internal detail, so we calculate the nanoseconds
197         // ourselves here.
198         let ts = rustix::time::clock_gettime(rustix::time::ClockId::Monotonic);
199         // TODO: What does it mean for either sec or nsec to be negative?
200         (ts.tv_sec * 1_000_000_000 + ts.tv_nsec) as u64
201     }
202 
203     /// Returns the next code index
next_code_index(&mut self) -> u64204     pub fn next_code_index(&mut self) -> u64 {
205         let code_index = self.code_index;
206         self.code_index += 1;
207         code_index
208     }
209 
210     /// Helper function to write `bytes` to the jitdump file.
211     ///
212     /// This is effectively a workaround for the limitation of the jitdump file
213     /// format. Ideally Wasmtime would be writing to its own personal file and
214     /// wouldn't have to worry about concurrent modifications, but we don't have
215     /// the luxury of doing that. The jitdump file format requires that there's
216     /// a single file-per-process with records in it. Additionally there might
217     /// be multiple JIT engines in the same process all writing to this file.
218     ///
219     /// To handle this situation a best effort is made to write the entirety of
220     /// `bytes` to the file in one go. The file itself is opened with `O_APPEND`
221     /// meaning that this should work out just fine if the bytes are written in
222     /// one call to the `write` syscall. The problem though is what happens on a
223     /// partial write?
224     ///
225     /// If there are parallel actors in the same process then a partial write
226     /// may mean that the file is now corrupted. For example we could write most
227     /// of `bytes`, but not all, then some other thread writes to the file. The
228     /// question then is what to do in this situation? On one hand an error
229     /// could be returned to inform the user that it's corrupt. On the other
230     /// hand though it's a pretty niche case to have multiple JIT engines in one
231     /// process and it'd be a bummer if we failed to profile functions that
232     /// happened to be big enough to require two calls to `write`.
233     ///
234     /// In the end this for now uses the `write_all` helper in the standard
235     /// library. That means that this will produce corrupt files in the face of
236     /// partial writes when there are other engines also writing to the file. In
237     /// lieu of some actual synchronization protocol between engines though this
238     /// is about the best that we can do.
maybe_atomic_write_all(&self, bytes: &[u8]) -> io::Result<()>239     fn maybe_atomic_write_all(&self, bytes: &[u8]) -> io::Result<()> {
240         (&self.jitdump_file).write_all(bytes)?;
241         Ok(())
242     }
243 
maybe_write_file_header(&self) -> io::Result<()>244     fn maybe_write_file_header(&self) -> io::Result<()> {
245         let header = FileHeader {
246             timestamp: self.get_time_stamp(),
247             e_machine: self.e_machine,
248             magic: 0x4A695444,
249             version: 1,
250             size: mem::size_of::<FileHeader>() as u32,
251             pad1: 0,
252             pid: process::id(),
253             flags: 0,
254         };
255 
256         // If it looks like some other engine in the same process has opened the
257         // file and added data already then assume that they were the ones to
258         // add the file header. If it's empty, though, assume we're the ones to
259         // add the file header.
260         //
261         // This is subject to a TOCTOU-style race condition but there's not
262         // really anything we can do about that. That'd require higher-level
263         // coordination in the application to boot up profiling agents serially
264         // or something like that. Either that or a better dump format where we
265         // can place output in our own engine-specific file. Alas.
266         if self.jitdump_file.metadata()?.len() == 0 {
267             self.maybe_atomic_write_all(object::bytes_of(&header))?;
268         }
269         Ok(())
270     }
271 
272     /// Get raw access to the underlying file that is being written to.
file(&self) -> &File273     pub fn file(&self) -> &File {
274         &self.jitdump_file
275     }
276 
277     /// Get raw mutable access to the underlying file that is being written to.
file_mut(&mut self) -> &mut File278     pub fn file_mut(&mut self) -> &mut File {
279         &mut self.jitdump_file
280     }
281 
dump_code_load_record( &mut self, method_name: &str, code: &[u8], timestamp: u64, pid: u32, tid: u32, ) -> io::Result<()>282     pub fn dump_code_load_record(
283         &mut self,
284         method_name: &str,
285         code: &[u8],
286         timestamp: u64,
287         pid: u32,
288         tid: u32,
289     ) -> io::Result<()> {
290         let name_len = method_name.len() + 1;
291         let size_limit = mem::size_of::<CodeLoadRecord>();
292 
293         let rh = RecordHeader {
294             id: RecordId::JitCodeLoad as u32,
295             record_size: size_limit as u32 + name_len as u32 + code.len() as u32,
296             timestamp,
297         };
298 
299         let clr = CodeLoadRecord {
300             header: rh,
301             pid,
302             tid,
303             virtual_address: code.as_ptr() as u64,
304             address: code.as_ptr() as u64,
305             size: code.len() as u64,
306             index: self.next_code_index(),
307         };
308 
309         let mut record = Vec::new();
310         record.extend_from_slice(object::bytes_of(&clr));
311         record.extend_from_slice(method_name.as_bytes());
312         record.push(0); // null terminator for the method name
313         record.extend_from_slice(code);
314         self.maybe_atomic_write_all(&record)?;
315         Ok(())
316     }
317 }
318 
319 impl Drop for JitDumpFile {
drop(&mut self)320     fn drop(&mut self) {
321         unsafe {
322             rustix::mm::munmap(self.map_addr as *mut _, self.map_len).unwrap();
323         }
324     }
325 }
326