1 #[cfg(feature = "component-model")]
2 use crate::component::Component;
3 use crate::runtime::vm::Backtrace;
4 use crate::{AsContext, CallHook, Module};
5 use crate::{Engine, prelude::*};
6 use core::cmp::Ordering;
7 use fxprof_processed_profile::debugid::DebugId;
8 use fxprof_processed_profile::{
9     CategoryHandle, Frame, FrameFlags, FrameInfo, LibraryInfo, MarkerLocations, MarkerTiming,
10     Profile, ReferenceTimestamp, StaticSchemaMarker, StaticSchemaMarkerField, StringHandle, Symbol,
11     SymbolTable, Timestamp,
12 };
13 use std::ops::Range;
14 use std::sync::Arc;
15 use std::time::{Duration, Instant};
16 use wasmtime_environ::demangle_function_name_or_index;
17 
18 // TODO: collect more data
19 // - On non-Windows, measure thread-local CPU usage between events with
20 //   rustix::time::clock_gettime(ClockId::ThreadCPUTime)
21 // - Report which wasm module, and maybe instance, each frame came from
22 
23 /// Collects basic profiling data for a single WebAssembly guest.
24 ///
25 /// This profiler can't provide measurements that are as accurate or detailed
26 /// as a platform-specific profiler, such as `perf` on Linux. On the other
27 /// hand, this profiler works on every platform that Wasmtime supports. Also,
28 /// as an embedder you can use this profiler selectively on individual guest
29 /// instances rather than profiling the entire process.
30 ///
31 /// To use this, you'll need to arrange to call [`GuestProfiler::sample`] at
32 /// regular intervals while the guest is on the stack. The most straightforward
33 /// way to do that is to call it from a callback registered with
34 /// [`Store::epoch_deadline_callback()`](crate::Store::epoch_deadline_callback).
35 ///
36 /// # Accuracy
37 ///
38 /// The data collection granularity is limited by the mechanism you use to
39 /// interrupt guest execution and collect a profiling sample.
40 ///
41 /// If you use epoch interruption, then samples will only be collected at
42 /// function entry points and loop headers. This introduces some bias to the
43 /// results. In addition, samples will only be taken at times when WebAssembly
44 /// functions are running, not during host-calls.
45 ///
46 /// It is technically possible to use fuel interruption instead. That
47 /// introduces worse bias since samples occur after a certain number of
48 /// WebAssembly instructions, which can take different amounts of time.
49 ///
50 /// You may instead be able to use platform-specific methods, such as
51 /// `setitimer(ITIMER_VIRTUAL, ...)` on POSIX-compliant systems, to sample on
52 /// a more accurate interval. The only current requirement is that the guest
53 /// you wish to profile must be on the same stack where you call `sample`,
54 /// and executing within the same thread. However, the `GuestProfiler::sample`
55 /// method is not currently async-signal-safe, so doing this correctly is not
56 /// easy.
57 ///
58 /// # Security
59 ///
60 /// Profiles produced using this profiler do not include any configuration
61 /// details from the host, such as virtual memory addresses, or from any
62 /// WebAssembly modules that you haven't specifically allowed. So for
63 /// example, these profiles should be safe to share with untrusted users
64 /// who have provided untrusted code that you are running in a multi-tenancy
65 /// environment.
66 ///
67 /// However, the profile does include byte offsets into the text section of
68 /// the compiled module, revealing some information about the size of the code
69 /// generated for each module. For user-provided modules, the user could get
70 /// the same information by compiling the module for themself using a similar
71 /// version of Wasmtime on the same target architecture, but for any module
72 /// where they don't already have the WebAssembly module binary available this
73 /// could theoretically lead to an undesirable information disclosure. So you
74 /// should only include user-provided modules in profiles.
75 #[derive(Debug)]
76 pub struct GuestProfiler {
77     profile: Profile,
78     modules: Modules,
79     process: fxprof_processed_profile::ProcessHandle,
80     thread: fxprof_processed_profile::ThreadHandle,
81     start: Instant,
82     marker: CallMarker,
83 }
84 
85 #[derive(Debug)]
86 struct ProfiledModule {
87     module: Module,
88     fxprof_libhandle: fxprof_processed_profile::LibraryHandle,
89     text_range: Range<usize>,
90 }
91 
92 type Modules = Vec<ProfiledModule>;
93 
94 impl GuestProfiler {
95     /// Begin profiling a new guest. When this function is called, the current
96     /// wall-clock time is recorded as the start time for the guest.
97     ///
98     /// The `module_name` parameter is recorded in the profile to help identify
99     /// where the profile came from.
100     ///
101     /// The `interval` parameter should match the rate at which you intend
102     /// to call `sample`. However, this is used as a hint and not required to
103     /// exactly match the real sample rate.
104     ///
105     /// Only modules which are present in the `modules` vector will appear in
106     /// stack traces in this profile. Any stack frames which were executing
107     /// host code or functions from other modules will be omitted. See the
108     /// "Security" section of the [`GuestProfiler`] documentation for guidance
109     /// on what modules should not be included in this list.
new( engine: &Engine, module_name: &str, interval: Duration, modules: impl IntoIterator<Item = (String, Module)>, ) -> Result<Self>110     pub fn new(
111         engine: &Engine,
112         module_name: &str,
113         interval: Duration,
114         modules: impl IntoIterator<Item = (String, Module)>,
115     ) -> Result<Self> {
116         // Check that guest debugging is not enabled. The
117         // instrumentation would make profiling results unreliable,
118         // but more fundamentally, it means that code is cloned per
119         // instantiation (for breakpoint patching) so the logic below
120         // is incorrect.
121         if engine.tunables().debug_guest {
122             crate::bail!("Profiling cannot be performed when guest-debugging is enabled.");
123         }
124 
125         let zero = ReferenceTimestamp::from_millis_since_unix_epoch(0.0);
126         let mut profile = Profile::new(module_name, zero, interval.into());
127 
128         // Past this point, we just need to think about modules as we pull out
129         // the disparate module information from components.
130         let mut modules: Vec<_> = modules
131             .into_iter()
132             .filter_map(|(name, module)| {
133                 assert!(Engine::same(module.engine(), engine));
134                 let compiled = module.compiled_module();
135                 let text_range = {
136                     // Assumption: within text, the code for a given module is packed linearly and
137                     // is non-overlapping; if this is violated, it should be safe but might result
138                     // in incorrect profiling results.
139                     let start = compiled.finished_function_ranges().next()?.1.start;
140                     let end = compiled.finished_function_ranges().last()?.1.end;
141 
142                     let start = (module.engine_code().text_range().start + start).raw();
143                     let end = (module.engine_code().text_range().start + end).raw();
144                     start..end
145                 };
146 
147                 module_symbols(name, &module).map(|lib| {
148                     let libhandle = profile.add_lib(lib);
149                     ProfiledModule {
150                         module,
151                         fxprof_libhandle: libhandle,
152                         text_range,
153                     }
154                 })
155             })
156             .collect();
157 
158         modules.sort_unstable_by_key(|m| m.text_range.start);
159 
160         profile.set_reference_timestamp(std::time::SystemTime::now().into());
161         let process = profile.add_process(module_name, 0, Timestamp::from_nanos_since_reference(0));
162         let thread = profile.add_thread(process, 0, Timestamp::from_nanos_since_reference(0), true);
163         let start = Instant::now();
164         let marker = CallMarker::new(&mut profile);
165         Ok(Self {
166             profile,
167             modules,
168             process,
169             thread,
170             start,
171             marker,
172         })
173     }
174 
175     /// Create a new profiler for the provided component
176     ///
177     /// See [`GuestProfiler::new`] for additional information; this function
178     /// works identically except that it takes a component and sets up
179     /// instrumentation to track calls in each of its constituent modules.
180     #[cfg(feature = "component-model")]
new_component( engine: &Engine, component_name: &str, interval: Duration, component: Component, extra_modules: impl IntoIterator<Item = (String, Module)>, ) -> Result<Self>181     pub fn new_component(
182         engine: &Engine,
183         component_name: &str,
184         interval: Duration,
185         component: Component,
186         extra_modules: impl IntoIterator<Item = (String, Module)>,
187     ) -> Result<Self> {
188         let modules = component
189             .static_modules()
190             .map(|m| (m.name().unwrap_or("<unknown>").to_string(), m.clone()))
191             .chain(extra_modules);
192         Self::new(engine, component_name, interval, modules)
193     }
194 
195     /// Add a sample to the profile. This function collects a backtrace from
196     /// any stack frames for allowed modules on the current stack. It should
197     /// typically be called from a callback registered using
198     /// [`Store::epoch_deadline_callback()`](crate::Store::epoch_deadline_callback).
199     ///
200     /// The `delta` parameter is the amount of CPU time that was used by this
201     /// guest since the previous sample. It is allowed to pass `Duration::ZERO`
202     /// here if recording CPU usage information is not needed.
sample(&mut self, store: impl AsContext, delta: Duration)203     pub fn sample(&mut self, store: impl AsContext, delta: Duration) {
204         let now = Timestamp::from_nanos_since_reference(
205             self.start.elapsed().as_nanos().try_into().unwrap(),
206         );
207         let backtrace = Backtrace::new(store.as_context().0);
208         let frames = lookup_frames(&self.modules, &backtrace);
209         let stack = self
210             .profile
211             .intern_stack_frames(self.thread, frames.into_iter());
212         self.profile
213             .add_sample(self.thread, now, stack, delta.into(), 1);
214     }
215 
216     /// Add a marker for transitions between guest and host to the profile.
217     /// This function should typically be called from a callback registered
218     /// using [`Store::call_hook()`](crate::Store::call_hook), and the `kind`
219     /// parameter should be the value of the same type passed into that hook.
call_hook(&mut self, store: impl AsContext, kind: CallHook)220     pub fn call_hook(&mut self, store: impl AsContext, kind: CallHook) {
221         let now = Timestamp::from_nanos_since_reference(
222             self.start.elapsed().as_nanos().try_into().unwrap(),
223         );
224         match kind {
225             CallHook::CallingWasm | CallHook::ReturningFromWasm => {}
226             CallHook::CallingHost => {
227                 let backtrace = Backtrace::new(store.as_context().0);
228                 let frames = lookup_frames(&self.modules, &backtrace);
229                 let marker = self.profile.add_marker(
230                     self.thread,
231                     MarkerTiming::IntervalStart(now),
232                     self.marker,
233                 );
234                 let stack = self
235                     .profile
236                     .intern_stack_frames(self.thread, frames.into_iter());
237                 self.profile.set_marker_stack(self.thread, marker, stack);
238             }
239             CallHook::ReturningFromHost => {
240                 self.profile
241                     .add_marker(self.thread, MarkerTiming::IntervalEnd(now), self.marker);
242             }
243         }
244     }
245 
246     /// When the guest finishes running, call this function to write the
247     /// profile to the given `output`. The output is a JSON-formatted object in
248     /// the [Firefox "processed profile format"][fmt]. Files in this format may
249     /// be visualized at <https://profiler.firefox.com/>.
250     ///
251     /// [fmt]: https://github.com/firefox-devtools/profiler/blob/main/docs-developer/processed-profile-format.md
finish(mut self, output: impl std::io::Write) -> Result<()>252     pub fn finish(mut self, output: impl std::io::Write) -> Result<()> {
253         let now = Timestamp::from_nanos_since_reference(
254             self.start.elapsed().as_nanos().try_into().unwrap(),
255         );
256         self.profile.set_thread_end_time(self.thread, now);
257         self.profile.set_process_end_time(self.process, now);
258 
259         serde_json::to_writer(output, &self.profile)?;
260         Ok(())
261     }
262 }
263 
module_symbols(name: String, module: &Module) -> Option<LibraryInfo>264 fn module_symbols(name: String, module: &Module) -> Option<LibraryInfo> {
265     let compiled = module.compiled_module();
266     let symbols = Vec::from_iter(
267         module
268             .env_module()
269             .defined_func_indices()
270             .map(|defined_idx| {
271                 let loc = compiled.func_loc(defined_idx);
272                 let func_idx = compiled.module().func_index(defined_idx);
273                 let mut name = String::new();
274                 demangle_function_name_or_index(
275                     &mut name,
276                     compiled.func_name(func_idx),
277                     defined_idx.as_u32() as usize,
278                 )
279                 .unwrap();
280                 Symbol {
281                     address: loc.start,
282                     size: Some(loc.length),
283                     name,
284                 }
285             }),
286     );
287     if symbols.is_empty() {
288         return None;
289     }
290 
291     Some(LibraryInfo {
292         name,
293         debug_name: String::new(),
294         path: String::new(),
295         debug_path: String::new(),
296         debug_id: DebugId::nil(),
297         code_id: None,
298         arch: None,
299         symbol_table: Some(Arc::new(SymbolTable::new(symbols))),
300     })
301 }
302 
lookup_frames<'a>( modules: &'a Modules, backtrace: &'a Backtrace, ) -> impl Iterator<Item = FrameInfo> + 'a303 fn lookup_frames<'a>(
304     modules: &'a Modules,
305     backtrace: &'a Backtrace,
306 ) -> impl Iterator<Item = FrameInfo> + 'a {
307     backtrace
308         .frames()
309         // Samply needs to see the oldest frame first, but we list the newest
310         // first, so iterate in reverse.
311         .rev()
312         .filter_map(|frame| {
313             let idx = modules
314                 .binary_search_by(|probe| {
315                     if probe.text_range.contains(&frame.pc()) {
316                         Ordering::Equal
317                     } else {
318                         probe.text_range.start.cmp(&frame.pc())
319                     }
320                 })
321                 .ok()?;
322             let module = modules.get(idx)?;
323 
324             // We need to point to the modules full text (not just its functions) as
325             // the offset for the final phase; these can be different for component
326             // model modules.
327             let module_text_start = module.module.text().as_ptr_range().start as usize;
328             return Some(FrameInfo {
329                 frame: Frame::RelativeAddressFromReturnAddress(
330                     module.fxprof_libhandle,
331                     u32::try_from(frame.pc() - module_text_start).unwrap(),
332                 ),
333                 category_pair: CategoryHandle::OTHER.into(),
334                 flags: FrameFlags::empty(),
335             });
336         })
337 }
338 
339 #[derive(Debug, Clone, Copy)]
340 struct CallMarker {
341     name: StringHandle,
342 }
343 
344 impl CallMarker {
new(profile: &mut Profile) -> Self345     fn new(profile: &mut Profile) -> Self {
346         let name = profile.intern_string(Self::UNIQUE_MARKER_TYPE_NAME);
347         Self { name }
348     }
349 }
350 
351 impl StaticSchemaMarker for CallMarker {
352     const UNIQUE_MARKER_TYPE_NAME: &'static str = "hostcall";
353     const FIELDS: &'static [StaticSchemaMarkerField] = &[];
354     const LOCATIONS: MarkerLocations = MarkerLocations::MARKER_CHART
355         .union(MarkerLocations::MARKER_TABLE.union(MarkerLocations::TIMELINE_OVERVIEW));
356 
name(&self, _profile: &mut Profile) -> StringHandle357     fn name(&self, _profile: &mut Profile) -> StringHandle {
358         self.name
359     }
category(&self, _profile: &mut Profile) -> CategoryHandle360     fn category(&self, _profile: &mut Profile) -> CategoryHandle {
361         CategoryHandle::OTHER
362     }
string_field_value(&self, _field_index: u32) -> StringHandle363     fn string_field_value(&self, _field_index: u32) -> StringHandle {
364         unreachable!("no fields")
365     }
number_field_value(&self, _field_index: u32) -> f64366     fn number_field_value(&self, _field_index: u32) -> f64 {
367         unreachable!("no fields")
368     }
369 }
370