1 #[cfg(feature = "component-model")] 2 use crate::component::Component; 3 use crate::runtime::vm::Backtrace; 4 use crate::{AsContext, CallHook, Module}; 5 use crate::{Engine, prelude::*}; 6 use core::cmp::Ordering; 7 use fxprof_processed_profile::debugid::DebugId; 8 use fxprof_processed_profile::{ 9 CategoryHandle, Frame, FrameFlags, FrameInfo, LibraryInfo, MarkerLocations, MarkerTiming, 10 Profile, ReferenceTimestamp, StaticSchemaMarker, StaticSchemaMarkerField, StringHandle, Symbol, 11 SymbolTable, Timestamp, 12 }; 13 use std::ops::Range; 14 use std::sync::Arc; 15 use std::time::{Duration, Instant}; 16 use wasmtime_environ::demangle_function_name_or_index; 17 18 // TODO: collect more data 19 // - On non-Windows, measure thread-local CPU usage between events with 20 // rustix::time::clock_gettime(ClockId::ThreadCPUTime) 21 // - Report which wasm module, and maybe instance, each frame came from 22 23 /// Collects basic profiling data for a single WebAssembly guest. 24 /// 25 /// This profiler can't provide measurements that are as accurate or detailed 26 /// as a platform-specific profiler, such as `perf` on Linux. On the other 27 /// hand, this profiler works on every platform that Wasmtime supports. Also, 28 /// as an embedder you can use this profiler selectively on individual guest 29 /// instances rather than profiling the entire process. 30 /// 31 /// To use this, you'll need to arrange to call [`GuestProfiler::sample`] at 32 /// regular intervals while the guest is on the stack. The most straightforward 33 /// way to do that is to call it from a callback registered with 34 /// [`Store::epoch_deadline_callback()`](crate::Store::epoch_deadline_callback). 35 /// 36 /// # Accuracy 37 /// 38 /// The data collection granularity is limited by the mechanism you use to 39 /// interrupt guest execution and collect a profiling sample. 40 /// 41 /// If you use epoch interruption, then samples will only be collected at 42 /// function entry points and loop headers. This introduces some bias to the 43 /// results. In addition, samples will only be taken at times when WebAssembly 44 /// functions are running, not during host-calls. 45 /// 46 /// It is technically possible to use fuel interruption instead. That 47 /// introduces worse bias since samples occur after a certain number of 48 /// WebAssembly instructions, which can take different amounts of time. 49 /// 50 /// You may instead be able to use platform-specific methods, such as 51 /// `setitimer(ITIMER_VIRTUAL, ...)` on POSIX-compliant systems, to sample on 52 /// a more accurate interval. The only current requirement is that the guest 53 /// you wish to profile must be on the same stack where you call `sample`, 54 /// and executing within the same thread. However, the `GuestProfiler::sample` 55 /// method is not currently async-signal-safe, so doing this correctly is not 56 /// easy. 57 /// 58 /// # Security 59 /// 60 /// Profiles produced using this profiler do not include any configuration 61 /// details from the host, such as virtual memory addresses, or from any 62 /// WebAssembly modules that you haven't specifically allowed. So for 63 /// example, these profiles should be safe to share with untrusted users 64 /// who have provided untrusted code that you are running in a multi-tenancy 65 /// environment. 66 /// 67 /// However, the profile does include byte offsets into the text section of 68 /// the compiled module, revealing some information about the size of the code 69 /// generated for each module. For user-provided modules, the user could get 70 /// the same information by compiling the module for themself using a similar 71 /// version of Wasmtime on the same target architecture, but for any module 72 /// where they don't already have the WebAssembly module binary available this 73 /// could theoretically lead to an undesirable information disclosure. So you 74 /// should only include user-provided modules in profiles. 75 #[derive(Debug)] 76 pub struct GuestProfiler { 77 profile: Profile, 78 modules: Modules, 79 process: fxprof_processed_profile::ProcessHandle, 80 thread: fxprof_processed_profile::ThreadHandle, 81 start: Instant, 82 marker: CallMarker, 83 } 84 85 #[derive(Debug)] 86 struct ProfiledModule { 87 module: Module, 88 fxprof_libhandle: fxprof_processed_profile::LibraryHandle, 89 text_range: Range<usize>, 90 } 91 92 type Modules = Vec<ProfiledModule>; 93 94 impl GuestProfiler { 95 /// Begin profiling a new guest. When this function is called, the current 96 /// wall-clock time is recorded as the start time for the guest. 97 /// 98 /// The `module_name` parameter is recorded in the profile to help identify 99 /// where the profile came from. 100 /// 101 /// The `interval` parameter should match the rate at which you intend 102 /// to call `sample`. However, this is used as a hint and not required to 103 /// exactly match the real sample rate. 104 /// 105 /// Only modules which are present in the `modules` vector will appear in 106 /// stack traces in this profile. Any stack frames which were executing 107 /// host code or functions from other modules will be omitted. See the 108 /// "Security" section of the [`GuestProfiler`] documentation for guidance 109 /// on what modules should not be included in this list. 110 pub fn new( 111 engine: &Engine, 112 module_name: &str, 113 interval: Duration, 114 modules: impl IntoIterator<Item = (String, Module)>, 115 ) -> Result<Self> { 116 // Check that guest debugging is not enabled. The 117 // instrumentation would make profiling results unreliable, 118 // but more fundamentally, it means that code is cloned per 119 // instantiation (for breakpoint patching) so the logic below 120 // is incorrect. 121 if engine.tunables().debug_guest { 122 anyhow::bail!("Profiling cannot be performed when guest-debugging is enabled."); 123 } 124 125 let zero = ReferenceTimestamp::from_millis_since_unix_epoch(0.0); 126 let mut profile = Profile::new(module_name, zero, interval.into()); 127 128 // Past this point, we just need to think about modules as we pull out 129 // the disparate module information from components. 130 let mut modules: Vec<_> = modules 131 .into_iter() 132 .filter_map(|(name, module)| { 133 assert!(Engine::same(module.engine(), engine)); 134 let compiled = module.compiled_module(); 135 let text_range = { 136 // Assumption: within text, the code for a given module is packed linearly and 137 // is non-overlapping; if this is violated, it should be safe but might result 138 // in incorrect profiling results. 139 let start = compiled.finished_function_ranges().next()?.1.start; 140 let end = compiled.finished_function_ranges().last()?.1.end; 141 142 let start = (module.engine_code().text_range().start + start).raw(); 143 let end = (module.engine_code().text_range().start + end).raw(); 144 start..end 145 }; 146 147 module_symbols(name, &module).map(|lib| { 148 let libhandle = profile.add_lib(lib); 149 ProfiledModule { 150 module, 151 fxprof_libhandle: libhandle, 152 text_range, 153 } 154 }) 155 }) 156 .collect(); 157 158 modules.sort_unstable_by_key(|m| m.text_range.start); 159 160 profile.set_reference_timestamp(std::time::SystemTime::now().into()); 161 let process = profile.add_process(module_name, 0, Timestamp::from_nanos_since_reference(0)); 162 let thread = profile.add_thread(process, 0, Timestamp::from_nanos_since_reference(0), true); 163 let start = Instant::now(); 164 let marker = CallMarker::new(&mut profile); 165 Ok(Self { 166 profile, 167 modules, 168 process, 169 thread, 170 start, 171 marker, 172 }) 173 } 174 175 /// Create a new profiler for the provided component 176 /// 177 /// See [`GuestProfiler::new`] for additional information; this function 178 /// works identically except that it takes a component and sets up 179 /// instrumentation to track calls in each of its constituent modules. 180 #[cfg(feature = "component-model")] 181 pub fn new_component( 182 engine: &Engine, 183 component_name: &str, 184 interval: Duration, 185 component: Component, 186 extra_modules: impl IntoIterator<Item = (String, Module)>, 187 ) -> Result<Self> { 188 let modules = component 189 .static_modules() 190 .map(|m| (m.name().unwrap_or("<unknown>").to_string(), m.clone())) 191 .chain(extra_modules); 192 Self::new(engine, component_name, interval, modules) 193 } 194 195 /// Add a sample to the profile. This function collects a backtrace from 196 /// any stack frames for allowed modules on the current stack. It should 197 /// typically be called from a callback registered using 198 /// [`Store::epoch_deadline_callback()`](crate::Store::epoch_deadline_callback). 199 /// 200 /// The `delta` parameter is the amount of CPU time that was used by this 201 /// guest since the previous sample. It is allowed to pass `Duration::ZERO` 202 /// here if recording CPU usage information is not needed. 203 pub fn sample(&mut self, store: impl AsContext, delta: Duration) { 204 let now = Timestamp::from_nanos_since_reference( 205 self.start.elapsed().as_nanos().try_into().unwrap(), 206 ); 207 let backtrace = Backtrace::new(store.as_context().0); 208 let frames = lookup_frames(&self.modules, &backtrace); 209 let stack = self 210 .profile 211 .intern_stack_frames(self.thread, frames.into_iter()); 212 self.profile 213 .add_sample(self.thread, now, stack, delta.into(), 1); 214 } 215 216 /// Add a marker for transitions between guest and host to the profile. 217 /// This function should typically be called from a callback registered 218 /// using [`Store::call_hook()`](crate::Store::call_hook), and the `kind` 219 /// parameter should be the value of the same type passed into that hook. 220 pub fn call_hook(&mut self, store: impl AsContext, kind: CallHook) { 221 let now = Timestamp::from_nanos_since_reference( 222 self.start.elapsed().as_nanos().try_into().unwrap(), 223 ); 224 match kind { 225 CallHook::CallingWasm | CallHook::ReturningFromWasm => {} 226 CallHook::CallingHost => { 227 let backtrace = Backtrace::new(store.as_context().0); 228 let frames = lookup_frames(&self.modules, &backtrace); 229 let marker = self.profile.add_marker( 230 self.thread, 231 MarkerTiming::IntervalStart(now), 232 self.marker, 233 ); 234 let stack = self 235 .profile 236 .intern_stack_frames(self.thread, frames.into_iter()); 237 self.profile.set_marker_stack(self.thread, marker, stack); 238 } 239 CallHook::ReturningFromHost => { 240 self.profile 241 .add_marker(self.thread, MarkerTiming::IntervalEnd(now), self.marker); 242 } 243 } 244 } 245 246 /// When the guest finishes running, call this function to write the 247 /// profile to the given `output`. The output is a JSON-formatted object in 248 /// the [Firefox "processed profile format"][fmt]. Files in this format may 249 /// be visualized at <https://profiler.firefox.com/>. 250 /// 251 /// [fmt]: https://github.com/firefox-devtools/profiler/blob/main/docs-developer/processed-profile-format.md 252 pub fn finish(mut self, output: impl std::io::Write) -> Result<()> { 253 let now = Timestamp::from_nanos_since_reference( 254 self.start.elapsed().as_nanos().try_into().unwrap(), 255 ); 256 self.profile.set_thread_end_time(self.thread, now); 257 self.profile.set_process_end_time(self.process, now); 258 259 serde_json::to_writer(output, &self.profile)?; 260 Ok(()) 261 } 262 } 263 264 fn module_symbols(name: String, module: &Module) -> Option<LibraryInfo> { 265 let compiled = module.compiled_module(); 266 let symbols = Vec::from_iter( 267 module 268 .env_module() 269 .defined_func_indices() 270 .map(|defined_idx| { 271 let loc = compiled.func_loc(defined_idx); 272 let func_idx = compiled.module().func_index(defined_idx); 273 let mut name = String::new(); 274 demangle_function_name_or_index( 275 &mut name, 276 compiled.func_name(func_idx), 277 defined_idx.as_u32() as usize, 278 ) 279 .unwrap(); 280 Symbol { 281 address: loc.start, 282 size: Some(loc.length), 283 name, 284 } 285 }), 286 ); 287 if symbols.is_empty() { 288 return None; 289 } 290 291 Some(LibraryInfo { 292 name, 293 debug_name: String::new(), 294 path: String::new(), 295 debug_path: String::new(), 296 debug_id: DebugId::nil(), 297 code_id: None, 298 arch: None, 299 symbol_table: Some(Arc::new(SymbolTable::new(symbols))), 300 }) 301 } 302 303 fn lookup_frames<'a>( 304 modules: &'a Modules, 305 backtrace: &'a Backtrace, 306 ) -> impl Iterator<Item = FrameInfo> + 'a { 307 backtrace 308 .frames() 309 // Samply needs to see the oldest frame first, but we list the newest 310 // first, so iterate in reverse. 311 .rev() 312 .filter_map(|frame| { 313 let idx = modules 314 .binary_search_by(|probe| { 315 if probe.text_range.contains(&frame.pc()) { 316 Ordering::Equal 317 } else { 318 probe.text_range.start.cmp(&frame.pc()) 319 } 320 }) 321 .ok()?; 322 let module = modules.get(idx)?; 323 324 // We need to point to the modules full text (not just its functions) as 325 // the offset for the final phase; these can be different for component 326 // model modules. 327 let module_text_start = module.module.text().as_ptr_range().start as usize; 328 return Some(FrameInfo { 329 frame: Frame::RelativeAddressFromReturnAddress( 330 module.fxprof_libhandle, 331 u32::try_from(frame.pc() - module_text_start).unwrap(), 332 ), 333 category_pair: CategoryHandle::OTHER.into(), 334 flags: FrameFlags::empty(), 335 }); 336 }) 337 } 338 339 #[derive(Debug, Clone, Copy)] 340 struct CallMarker { 341 name: StringHandle, 342 } 343 344 impl CallMarker { 345 fn new(profile: &mut Profile) -> Self { 346 let name = profile.intern_string(Self::UNIQUE_MARKER_TYPE_NAME); 347 Self { name } 348 } 349 } 350 351 impl StaticSchemaMarker for CallMarker { 352 const UNIQUE_MARKER_TYPE_NAME: &'static str = "hostcall"; 353 const FIELDS: &'static [StaticSchemaMarkerField] = &[]; 354 const LOCATIONS: MarkerLocations = MarkerLocations::MARKER_CHART 355 .union(MarkerLocations::MARKER_TABLE.union(MarkerLocations::TIMELINE_OVERVIEW)); 356 357 fn name(&self, _profile: &mut Profile) -> StringHandle { 358 self.name 359 } 360 fn category(&self, _profile: &mut Profile) -> CategoryHandle { 361 CategoryHandle::OTHER 362 } 363 fn string_field_value(&self, _field_index: u32) -> StringHandle { 364 unreachable!("no fields") 365 } 366 fn number_field_value(&self, _field_index: u32) -> f64 { 367 unreachable!("no fields") 368 } 369 } 370