1 //! Pass timing. 2 //! 3 //! This modules provides facilities for timing the execution of individual compilation passes. 4 5 use alloc::boxed::Box; 6 use core::any::Any; 7 use core::fmt; 8 9 // Each pass that can be timed is predefined with the `define_passes!` macro. Each pass has a 10 // snake_case name and a plain text description used when printing out the timing report. 11 // 12 // This macro defines: 13 // 14 // - A C-style enum containing all the pass names and a `None` variant. 15 // - A usize constant with the number of defined passes. 16 // - A const array of pass descriptions. 17 // - A public function per pass used to start the timing of that pass. 18 macro_rules! define_passes { 19 ($($pass:ident: $desc:expr,)+) => { 20 /// A single profiled pass. 21 #[expect(non_camel_case_types, reason = "macro-generated code")] 22 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 23 pub enum Pass { 24 $(#[doc=$desc] $pass,)+ 25 /// No active pass. 26 None, 27 } 28 29 /// The amount of profiled passes. 30 pub const NUM_PASSES: usize = Pass::None as usize; 31 32 const DESCRIPTIONS: [&str; NUM_PASSES] = [ $($desc),+ ]; 33 34 $( 35 #[doc=$desc] 36 #[must_use] 37 pub fn $pass() -> Box<dyn Any> { 38 start_pass(Pass::$pass) 39 } 40 )+ 41 } 42 } 43 44 // Pass definitions. 45 define_passes! { 46 // All these are used in other crates but defined here so they appear in the unified 47 // `PassTimes` output. 48 process_file: "Processing test file", 49 parse_text: "Parsing textual Cranelift IR", 50 wasm_translate_module: "Translate WASM module", 51 wasm_translate_function: "Translate WASM function", 52 53 verifier: "Verify Cranelift IR", 54 55 compile: "Compilation passes", 56 try_incremental_cache: "Try loading from incremental cache", 57 store_incremental_cache: "Store in incremental cache", 58 flowgraph: "Control flow graph", 59 domtree: "Dominator tree", 60 loop_analysis: "Loop analysis", 61 preopt: "Pre-legalization rewriting", 62 egraph: "Egraph based optimizations", 63 gvn: "Global value numbering", 64 licm: "Loop invariant code motion", 65 unreachable_code: "Remove unreachable blocks", 66 remove_constant_phis: "Remove constant phi-nodes", 67 68 vcode_lower: "VCode lowering", 69 vcode_emit: "VCode emission", 70 vcode_emit_finish: "VCode emission finalization", 71 72 regalloc: "Register allocation", 73 regalloc_checker: "Register allocation symbolic verification", 74 layout_renumber: "Layout full renumbering", 75 76 canonicalize_nans: "Canonicalization of NaNs", 77 } 78 79 impl Pass { idx(self) -> usize80 fn idx(self) -> usize { 81 self as usize 82 } 83 84 /// Description of the pass. description(self) -> &'static str85 pub fn description(self) -> &'static str { 86 match DESCRIPTIONS.get(self.idx()) { 87 Some(s) => s, 88 None => "<no pass>", 89 } 90 } 91 } 92 93 impl fmt::Display for Pass { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result94 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 95 f.write_str(self.description()) 96 } 97 } 98 99 /// A profiler. 100 pub trait Profiler { 101 /// Start a profiling pass. 102 /// 103 /// Will return a token which when dropped indicates the end of the pass. 104 /// 105 /// Multiple passes can be active at the same time, but they must be started and stopped in a 106 /// LIFO fashion. start_pass(&self, pass: Pass) -> Box<dyn Any>107 fn start_pass(&self, pass: Pass) -> Box<dyn Any>; 108 } 109 110 /// The default profiler. You can get the results using [`take_current`]. 111 pub struct DefaultProfiler; 112 113 #[cfg(not(feature = "timing"))] 114 pub(crate) use disabled::*; 115 #[cfg(feature = "timing")] 116 pub use enabled::*; 117 118 #[cfg(feature = "timing")] 119 mod enabled { 120 use super::{DESCRIPTIONS, DefaultProfiler, NUM_PASSES, Pass, Profiler}; 121 use alloc::boxed::Box; 122 use core::any::Any; 123 use core::cell::{Cell, RefCell}; 124 use core::fmt; 125 use core::mem; 126 use core::time::Duration; 127 use std::time::Instant; 128 129 // Information about passes in a single thread. 130 thread_local! { 131 static PROFILER: RefCell<Box<dyn Profiler>> = RefCell::new(Box::new(DefaultProfiler)); 132 } 133 134 /// Set the profiler for the current thread. 135 /// 136 /// Returns the old profiler. set_thread_profiler(new_profiler: Box<dyn Profiler>) -> Box<dyn Profiler>137 pub fn set_thread_profiler(new_profiler: Box<dyn Profiler>) -> Box<dyn Profiler> { 138 PROFILER.with(|profiler| core::mem::replace(&mut *profiler.borrow_mut(), new_profiler)) 139 } 140 141 /// Start timing `pass` as a child of the currently running pass, if any. 142 /// 143 /// This function is called by the publicly exposed pass functions. start_pass(pass: Pass) -> Box<dyn Any>144 pub fn start_pass(pass: Pass) -> Box<dyn Any> { 145 PROFILER.with(|profiler| profiler.borrow().start_pass(pass)) 146 } 147 148 /// Accumulated timing information for a single pass. 149 #[derive(Default, Copy, Clone)] 150 struct PassTime { 151 /// Total time spent running this pass including children. 152 total: Duration, 153 154 /// Time spent running in child passes. 155 child: Duration, 156 } 157 158 /// Accumulated timing for all passes. 159 pub struct PassTimes { 160 pass: [PassTime; NUM_PASSES], 161 } 162 163 impl PassTimes { 164 /// Add `other` to the timings of this `PassTimes`. add(&mut self, other: &Self)165 pub fn add(&mut self, other: &Self) { 166 for (a, b) in self.pass.iter_mut().zip(&other.pass[..]) { 167 a.total += b.total; 168 a.child += b.child; 169 } 170 } 171 172 /// Returns the total amount of time taken by all the passes measured. total(&self) -> Duration173 pub fn total(&self) -> Duration { 174 self.pass.iter().map(|p| p.total - p.child).sum() 175 } 176 } 177 178 impl Default for PassTimes { default() -> Self179 fn default() -> Self { 180 Self { 181 pass: [Default::default(); NUM_PASSES], 182 } 183 } 184 } 185 186 impl fmt::Display for PassTimes { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result187 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 188 writeln!(f, "======== ======== ==================================")?; 189 writeln!(f, " Total Self Pass")?; 190 writeln!(f, "-------- -------- ----------------------------------")?; 191 for (time, desc) in self.pass.iter().zip(&DESCRIPTIONS[..]) { 192 // Omit passes that haven't run. 193 if time.total == Duration::default() { 194 continue; 195 } 196 197 // Write a duration as secs.millis, trailing space. 198 fn fmtdur(mut dur: Duration, f: &mut fmt::Formatter) -> fmt::Result { 199 // Round to nearest ms by adding 500us. 200 dur += Duration::new(0, 500_000); 201 let ms = dur.subsec_millis(); 202 write!(f, "{:4}.{:03} ", dur.as_secs(), ms) 203 } 204 205 fmtdur(time.total, f)?; 206 if let Some(s) = time.total.checked_sub(time.child) { 207 fmtdur(s, f)?; 208 } 209 writeln!(f, " {desc}")?; 210 } 211 writeln!(f, "======== ======== ==================================") 212 } 213 } 214 215 // Information about passes in a single thread. 216 thread_local! { 217 static PASS_TIME: RefCell<PassTimes> = RefCell::new(Default::default()); 218 } 219 220 /// Take the current accumulated pass timings and reset the timings for the current thread. 221 /// 222 /// Only applies when [`DefaultProfiler`] is used. 223 /// 224 /// # Panics 225 /// 226 /// Panics when any pass timing token is currently active. take_current() -> PassTimes227 pub fn take_current() -> PassTimes { 228 assert_eq!( 229 CURRENT_PASS.with(|p| p.get()), 230 Pass::None, 231 "Cannot take_current() on profiler while a pass is active" 232 ); 233 PASS_TIME.with(|rc| mem::take(&mut *rc.borrow_mut())) 234 } 235 236 // Information about passes in a single thread. 237 thread_local! { 238 static CURRENT_PASS: Cell<Pass> = const { Cell::new(Pass::None) }; 239 static LAST_INSTANT: Cell<Option<Instant>> = Cell::new(None); 240 } 241 242 /// Provides a guaranteed-never-decreasing `Instant`. This is 243 /// supposed to be guaranteed by the operating system APIs that 244 /// are used to implement `Instant::now()`, but apparently 245 /// sometimes kernel bugs may result in this moving backward, and 246 /// if it does, our invariants here are violated, possibly 247 /// resulting in panics when we process nested time spans. So we 248 /// backstop against any bugs by tracking the last-returned 249 /// `Instant` and never regressing before it. monotonic_instant() -> Instant250 fn monotonic_instant() -> Instant { 251 let now = match LAST_INSTANT.get() { 252 None => Instant::now(), 253 Some(prev) => { 254 let system_now = Instant::now(); 255 core::cmp::max(prev, system_now) 256 } 257 }; 258 LAST_INSTANT.set(Some(now)); 259 now 260 } 261 262 impl Profiler for DefaultProfiler { start_pass(&self, pass: Pass) -> Box<dyn Any>263 fn start_pass(&self, pass: Pass) -> Box<dyn Any> { 264 let prev = CURRENT_PASS.with(|p| p.replace(pass)); 265 log::debug!("timing: Starting {pass}, (during {prev})"); 266 Box::new(DefaultTimingToken { 267 start: monotonic_instant(), 268 pass, 269 prev, 270 }) 271 } 272 } 273 274 /// A timing token is responsible for timing the currently running pass. Timing starts when it 275 /// is created and ends when it is dropped. 276 /// 277 /// Multiple passes can be active at the same time, but they must be started and stopped in a 278 /// LIFO fashion. 279 struct DefaultTimingToken { 280 /// Start time for this pass. 281 start: Instant, 282 283 // Pass being timed by this token. 284 pass: Pass, 285 286 // The previously active pass which will be restored when this token is dropped. 287 prev: Pass, 288 } 289 290 /// Dropping a timing token indicated the end of the pass. 291 impl Drop for DefaultTimingToken { drop(&mut self)292 fn drop(&mut self) { 293 let now = monotonic_instant(); 294 let duration = self.start.duration_since(now); 295 log::debug!("timing: Ending {}: {}ms", self.pass, duration.as_millis()); 296 let old_cur = CURRENT_PASS.with(|p| p.replace(self.prev)); 297 assert_eq!(self.pass, old_cur, "Timing tokens dropped out of order"); 298 PASS_TIME.with(|rc| { 299 let mut table = rc.borrow_mut(); 300 table.pass[self.pass.idx()].total += duration; 301 if let Some(parent) = table.pass.get_mut(self.prev.idx()) { 302 parent.child += duration; 303 } 304 }) 305 } 306 } 307 } 308 309 #[cfg(not(feature = "timing"))] 310 mod disabled { 311 use super::{DefaultProfiler, Pass, Profiler}; 312 use alloc::boxed::Box; 313 use core::any::Any; 314 315 impl Profiler for DefaultProfiler { start_pass(&self, _pass: Pass) -> Box<dyn Any>316 fn start_pass(&self, _pass: Pass) -> Box<dyn Any> { 317 Box::new(()) 318 } 319 } 320 start_pass(_pass: Pass) -> Box<dyn Any>321 pub(crate) fn start_pass(_pass: Pass) -> Box<dyn Any> { 322 Box::new(()) 323 } 324 } 325 326 #[cfg(test)] 327 mod tests { 328 use super::*; 329 use alloc::string::ToString; 330 331 #[test] display()332 fn display() { 333 assert_eq!(Pass::None.to_string(), "<no pass>"); 334 assert_eq!(Pass::regalloc.to_string(), "Register allocation"); 335 } 336 } 337