1 //! Pass timing.
2 //!
3 //! This modules provides facilities for timing the execution of individual compilation passes.
4 
5 use alloc::boxed::Box;
6 use core::any::Any;
7 use core::fmt;
8 
9 // Each pass that can be timed is predefined with the `define_passes!` macro. Each pass has a
10 // snake_case name and a plain text description used when printing out the timing report.
11 //
12 // This macro defines:
13 //
14 // - A C-style enum containing all the pass names and a `None` variant.
15 // - A usize constant with the number of defined passes.
16 // - A const array of pass descriptions.
17 // - A public function per pass used to start the timing of that pass.
18 macro_rules! define_passes {
19     ($($pass:ident: $desc:expr,)+) => {
20         /// A single profiled pass.
21         #[expect(non_camel_case_types, reason = "macro-generated code")]
22         #[derive(Clone, Copy, Debug, PartialEq, Eq)]
23         pub enum Pass {
24             $(#[doc=$desc] $pass,)+
25             /// No active pass.
26             None,
27         }
28 
29         /// The amount of profiled passes.
30         pub const NUM_PASSES: usize = Pass::None as usize;
31 
32         const DESCRIPTIONS: [&str; NUM_PASSES] = [ $($desc),+ ];
33 
34         $(
35             #[doc=$desc]
36             #[must_use]
37             pub fn $pass() -> Box<dyn Any> {
38                 start_pass(Pass::$pass)
39             }
40         )+
41     }
42 }
43 
44 // Pass definitions.
45 define_passes! {
46     // All these are used in other crates but defined here so they appear in the unified
47     // `PassTimes` output.
48     process_file: "Processing test file",
49     parse_text: "Parsing textual Cranelift IR",
50     wasm_translate_module: "Translate WASM module",
51     wasm_translate_function: "Translate WASM function",
52 
53     verifier: "Verify Cranelift IR",
54 
55     compile: "Compilation passes",
56     try_incremental_cache: "Try loading from incremental cache",
57     store_incremental_cache: "Store in incremental cache",
58     flowgraph: "Control flow graph",
59     domtree: "Dominator tree",
60     loop_analysis: "Loop analysis",
61     preopt: "Pre-legalization rewriting",
62     egraph: "Egraph based optimizations",
63     gvn: "Global value numbering",
64     licm: "Loop invariant code motion",
65     unreachable_code: "Remove unreachable blocks",
66     remove_constant_phis: "Remove constant phi-nodes",
67 
68     vcode_lower: "VCode lowering",
69     vcode_emit: "VCode emission",
70     vcode_emit_finish: "VCode emission finalization",
71 
72     regalloc: "Register allocation",
73     regalloc_checker: "Register allocation symbolic verification",
74     layout_renumber: "Layout full renumbering",
75 
76     canonicalize_nans: "Canonicalization of NaNs",
77 }
78 
79 impl Pass {
idx(self) -> usize80     fn idx(self) -> usize {
81         self as usize
82     }
83 
84     /// Description of the pass.
description(self) -> &'static str85     pub fn description(self) -> &'static str {
86         match DESCRIPTIONS.get(self.idx()) {
87             Some(s) => s,
88             None => "<no pass>",
89         }
90     }
91 }
92 
93 impl fmt::Display for Pass {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result94     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
95         f.write_str(self.description())
96     }
97 }
98 
99 /// A profiler.
100 pub trait Profiler {
101     /// Start a profiling pass.
102     ///
103     /// Will return a token which when dropped indicates the end of the pass.
104     ///
105     /// Multiple passes can be active at the same time, but they must be started and stopped in a
106     /// LIFO fashion.
start_pass(&self, pass: Pass) -> Box<dyn Any>107     fn start_pass(&self, pass: Pass) -> Box<dyn Any>;
108 }
109 
110 /// The default profiler. You can get the results using [`take_current`].
111 pub struct DefaultProfiler;
112 
113 #[cfg(not(feature = "timing"))]
114 pub(crate) use disabled::*;
115 #[cfg(feature = "timing")]
116 pub use enabled::*;
117 
118 #[cfg(feature = "timing")]
119 mod enabled {
120     use super::{DESCRIPTIONS, DefaultProfiler, NUM_PASSES, Pass, Profiler};
121     use alloc::boxed::Box;
122     use core::any::Any;
123     use core::cell::{Cell, RefCell};
124     use core::fmt;
125     use core::mem;
126     use core::time::Duration;
127     use std::time::Instant;
128 
129     // Information about passes in a single thread.
130     thread_local! {
131         static PROFILER: RefCell<Box<dyn Profiler>> = RefCell::new(Box::new(DefaultProfiler));
132     }
133 
134     /// Set the profiler for the current thread.
135     ///
136     /// Returns the old profiler.
set_thread_profiler(new_profiler: Box<dyn Profiler>) -> Box<dyn Profiler>137     pub fn set_thread_profiler(new_profiler: Box<dyn Profiler>) -> Box<dyn Profiler> {
138         PROFILER.with(|profiler| core::mem::replace(&mut *profiler.borrow_mut(), new_profiler))
139     }
140 
141     /// Start timing `pass` as a child of the currently running pass, if any.
142     ///
143     /// This function is called by the publicly exposed pass functions.
start_pass(pass: Pass) -> Box<dyn Any>144     pub fn start_pass(pass: Pass) -> Box<dyn Any> {
145         PROFILER.with(|profiler| profiler.borrow().start_pass(pass))
146     }
147 
148     /// Accumulated timing information for a single pass.
149     #[derive(Default, Copy, Clone)]
150     struct PassTime {
151         /// Total time spent running this pass including children.
152         total: Duration,
153 
154         /// Time spent running in child passes.
155         child: Duration,
156     }
157 
158     /// Accumulated timing for all passes.
159     pub struct PassTimes {
160         pass: [PassTime; NUM_PASSES],
161     }
162 
163     impl PassTimes {
164         /// Add `other` to the timings of this `PassTimes`.
add(&mut self, other: &Self)165         pub fn add(&mut self, other: &Self) {
166             for (a, b) in self.pass.iter_mut().zip(&other.pass[..]) {
167                 a.total += b.total;
168                 a.child += b.child;
169             }
170         }
171 
172         /// Returns the total amount of time taken by all the passes measured.
total(&self) -> Duration173         pub fn total(&self) -> Duration {
174             self.pass.iter().map(|p| p.total - p.child).sum()
175         }
176     }
177 
178     impl Default for PassTimes {
default() -> Self179         fn default() -> Self {
180             Self {
181                 pass: [Default::default(); NUM_PASSES],
182             }
183         }
184     }
185 
186     impl fmt::Display for PassTimes {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result187         fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
188             writeln!(f, "======== ========  ==================================")?;
189             writeln!(f, "   Total     Self  Pass")?;
190             writeln!(f, "-------- --------  ----------------------------------")?;
191             for (time, desc) in self.pass.iter().zip(&DESCRIPTIONS[..]) {
192                 // Omit passes that haven't run.
193                 if time.total == Duration::default() {
194                     continue;
195                 }
196 
197                 // Write a duration as secs.millis, trailing space.
198                 fn fmtdur(mut dur: Duration, f: &mut fmt::Formatter) -> fmt::Result {
199                     // Round to nearest ms by adding 500us.
200                     dur += Duration::new(0, 500_000);
201                     let ms = dur.subsec_millis();
202                     write!(f, "{:4}.{:03} ", dur.as_secs(), ms)
203                 }
204 
205                 fmtdur(time.total, f)?;
206                 if let Some(s) = time.total.checked_sub(time.child) {
207                     fmtdur(s, f)?;
208                 }
209                 writeln!(f, " {desc}")?;
210             }
211             writeln!(f, "======== ========  ==================================")
212         }
213     }
214 
215     // Information about passes in a single thread.
216     thread_local! {
217         static PASS_TIME: RefCell<PassTimes> = RefCell::new(Default::default());
218     }
219 
220     /// Take the current accumulated pass timings and reset the timings for the current thread.
221     ///
222     /// Only applies when [`DefaultProfiler`] is used.
223     ///
224     /// # Panics
225     ///
226     /// Panics when any pass timing token is currently active.
take_current() -> PassTimes227     pub fn take_current() -> PassTimes {
228         assert_eq!(
229             CURRENT_PASS.with(|p| p.get()),
230             Pass::None,
231             "Cannot take_current() on profiler while a pass is active"
232         );
233         PASS_TIME.with(|rc| mem::take(&mut *rc.borrow_mut()))
234     }
235 
236     // Information about passes in a single thread.
237     thread_local! {
238         static CURRENT_PASS: Cell<Pass> = const { Cell::new(Pass::None) };
239         static LAST_INSTANT: Cell<Option<Instant>> = Cell::new(None);
240     }
241 
242     /// Provides a guaranteed-never-decreasing `Instant`. This is
243     /// supposed to be guaranteed by the operating system APIs that
244     /// are used to implement `Instant::now()`, but apparently
245     /// sometimes kernel bugs may result in this moving backward, and
246     /// if it does, our invariants here are violated, possibly
247     /// resulting in panics when we process nested time spans. So we
248     /// backstop against any bugs by tracking the last-returned
249     /// `Instant` and never regressing before it.
monotonic_instant() -> Instant250     fn monotonic_instant() -> Instant {
251         let now = match LAST_INSTANT.get() {
252             None => Instant::now(),
253             Some(prev) => {
254                 let system_now = Instant::now();
255                 core::cmp::max(prev, system_now)
256             }
257         };
258         LAST_INSTANT.set(Some(now));
259         now
260     }
261 
262     impl Profiler for DefaultProfiler {
start_pass(&self, pass: Pass) -> Box<dyn Any>263         fn start_pass(&self, pass: Pass) -> Box<dyn Any> {
264             let prev = CURRENT_PASS.with(|p| p.replace(pass));
265             log::debug!("timing: Starting {pass}, (during {prev})");
266             Box::new(DefaultTimingToken {
267                 start: monotonic_instant(),
268                 pass,
269                 prev,
270             })
271         }
272     }
273 
274     /// A timing token is responsible for timing the currently running pass. Timing starts when it
275     /// is created and ends when it is dropped.
276     ///
277     /// Multiple passes can be active at the same time, but they must be started and stopped in a
278     /// LIFO fashion.
279     struct DefaultTimingToken {
280         /// Start time for this pass.
281         start: Instant,
282 
283         // Pass being timed by this token.
284         pass: Pass,
285 
286         // The previously active pass which will be restored when this token is dropped.
287         prev: Pass,
288     }
289 
290     /// Dropping a timing token indicated the end of the pass.
291     impl Drop for DefaultTimingToken {
drop(&mut self)292         fn drop(&mut self) {
293             let now = monotonic_instant();
294             let duration = self.start.duration_since(now);
295             log::debug!("timing: Ending {}: {}ms", self.pass, duration.as_millis());
296             let old_cur = CURRENT_PASS.with(|p| p.replace(self.prev));
297             assert_eq!(self.pass, old_cur, "Timing tokens dropped out of order");
298             PASS_TIME.with(|rc| {
299                 let mut table = rc.borrow_mut();
300                 table.pass[self.pass.idx()].total += duration;
301                 if let Some(parent) = table.pass.get_mut(self.prev.idx()) {
302                     parent.child += duration;
303                 }
304             })
305         }
306     }
307 }
308 
309 #[cfg(not(feature = "timing"))]
310 mod disabled {
311     use super::{DefaultProfiler, Pass, Profiler};
312     use alloc::boxed::Box;
313     use core::any::Any;
314 
315     impl Profiler for DefaultProfiler {
start_pass(&self, _pass: Pass) -> Box<dyn Any>316         fn start_pass(&self, _pass: Pass) -> Box<dyn Any> {
317             Box::new(())
318         }
319     }
320 
start_pass(_pass: Pass) -> Box<dyn Any>321     pub(crate) fn start_pass(_pass: Pass) -> Box<dyn Any> {
322         Box::new(())
323     }
324 }
325 
326 #[cfg(test)]
327 mod tests {
328     use super::*;
329     use alloc::string::ToString;
330 
331     #[test]
display()332     fn display() {
333         assert_eq!(Pass::None.to_string(), "<no pass>");
334         assert_eq!(Pass::regalloc.to_string(), "Register allocation");
335     }
336 }
337