1 //! A Constant-Phi-Node removal pass.
2 
3 use crate::dominator_tree::DominatorTree;
4 use crate::ir;
5 use crate::ir::Function;
6 use crate::ir::{Block, BlockArg, BlockCall, Inst, Value};
7 use crate::timing;
8 use crate::{FxHashMap, FxHashSet};
9 use bumpalo::Bump;
10 use cranelift_entity::SecondaryMap;
11 use smallvec::SmallVec;
12 
13 // A note on notation.  For the sake of clarity, this file uses the phrase
14 // "formal parameters" to mean the `Value`s listed in the block head, and
15 // "actual parameters" to mean the `Value`s passed in a branch or a jump:
16 //
17 // block4(v16: i32, v18: i32):            <-- formal parameters
18 //   ...
19 //   brif v27, block7(v22, v24), block6   <-- actual parameters
20 
21 // This transformation pass (conceptually) partitions all values in the
22 // function into two groups:
23 //
24 // * Group A: values defined by block formal parameters, except for the entry block.
25 //
26 // * Group B: All other values: that is, values defined by instructions,
27 //   and the formals of the entry block.
28 //
29 // For each value in Group A, it attempts to establish whether it will have
30 // the value of exactly one member of Group B.  If so, the formal parameter is
31 // deleted, all corresponding actual parameters (in jumps/branches to the
32 // defining block) are deleted, and a rename is inserted.
33 //
34 // The entry block is special-cased because (1) we don't know what values flow
35 // to its formals and (2) in any case we can't change its formals.
36 //
37 // Work proceeds in three phases.
38 //
39 // * Phase 1: examine all instructions.  For each block, make up a useful
40 //   grab-bag of information, `BlockSummary`, that summarises the block's
41 //   formals and jump/branch instruction.  This is used by Phases 2 and 3.
42 //
43 // * Phase 2: for each value in Group A, try to find a single Group B value
44 //   that flows to it.  This is done using a classical iterative forward
45 //   dataflow analysis over a simple constant-propagation style lattice.  It
46 //   converges quickly in practice -- I have seen at most 4 iterations.  This
47 //   is relatively cheap because the iteration is done over the
48 //   `BlockSummary`s, and does not visit each instruction.  The resulting
49 //   fixed point is stored in a `SolverState`.
50 //
51 // * Phase 3: using the `SolverState` and `BlockSummary`, edit the function to
52 //   remove redundant formals and actuals, and to insert suitable renames.
53 //
54 // Note that the effectiveness of the analysis depends on on the fact that
55 // there are no copy instructions in Cranelift's IR.  If there were, the
56 // computation of `actual_absval` in Phase 2 would have to be extended to
57 // chase through such copies.
58 //
59 // For large functions, the analysis cost using the new AArch64 backend is about
60 // 0.6% of the non-optimising compile time, as measured by instruction counts.
61 // This transformation usually pays for itself several times over, though, by
62 // reducing the isel/regalloc cost downstream.  Gains of up to 7% have been
63 // seen for large functions.
64 
65 /// The `Value`s (Group B) that can flow to a formal parameter (Group A).
66 #[derive(Clone, Copy, Debug, PartialEq)]
67 enum AbstractValue {
68     /// Two or more values flow to this formal.
69     Many,
70 
71     /// Exactly one value, as stated, flows to this formal.  The `Value`s that
72     /// can appear here are exactly: `Value`s defined by `Inst`s, plus the
73     /// `Value`s defined by the formals of the entry block.  Note that this is
74     /// exactly the set of `Value`s that are *not* tracked in the solver below
75     /// (see `SolverState`).
76     One(Value /*Group B*/),
77 
78     /// No value flows to this formal.
79     None,
80 }
81 
82 impl AbstractValue {
join(self, other: AbstractValue) -> AbstractValue83     fn join(self, other: AbstractValue) -> AbstractValue {
84         match (self, other) {
85             // Joining with `None` has no effect
86             (AbstractValue::None, p2) => p2,
87             (p1, AbstractValue::None) => p1,
88             // Joining with `Many` produces `Many`
89             (AbstractValue::Many, _p2) => AbstractValue::Many,
90             (_p1, AbstractValue::Many) => AbstractValue::Many,
91             // The only interesting case
92             (AbstractValue::One(v1), AbstractValue::One(v2)) => {
93                 if v1 == v2 {
94                     AbstractValue::One(v1)
95                 } else {
96                     AbstractValue::Many
97                 }
98             }
99         }
100     }
101 
is_one(self) -> bool102     fn is_one(self) -> bool {
103         matches!(self, AbstractValue::One(_))
104     }
105 }
106 
107 #[derive(Clone, Copy, Debug)]
108 struct OutEdge<'a> {
109     /// An instruction that transfers control.
110     inst: Inst,
111     /// The index into branch_destinations for this instruction that corresponds
112     /// to this edge.
113     branch_index: u32,
114     /// The block that control is transferred to.
115     block: Block,
116     /// The arguments to that block.
117     ///
118     /// These values can be from both groups A and B.
119     args: &'a [BlockArg],
120 }
121 
122 impl<'a> OutEdge<'a> {
123     /// Construct a new `OutEdge` for the given instruction.
124     ///
125     /// Returns `None` if this is an edge without any block arguments, which
126     /// means we can ignore it for this analysis's purposes.
127     #[inline]
new( bump: &'a Bump, dfg: &ir::DataFlowGraph, inst: Inst, branch_index: usize, block: BlockCall, ) -> Option<Self>128     fn new(
129         bump: &'a Bump,
130         dfg: &ir::DataFlowGraph,
131         inst: Inst,
132         branch_index: usize,
133         block: BlockCall,
134     ) -> Option<Self> {
135         let inst_var_args = block.args(&dfg.value_lists);
136 
137         // Skip edges without params.
138         if inst_var_args.len() == 0 {
139             return None;
140         }
141 
142         Some(OutEdge {
143             inst,
144             branch_index: branch_index as u32,
145             block: block.block(&dfg.value_lists),
146             args: bump.alloc_slice_fill_iter(
147                 inst_var_args.map(|arg| arg.map_value(|value| dfg.resolve_aliases(value))),
148             ),
149         })
150     }
151 }
152 
153 /// For some block, a useful bundle of info.  The `Block` itself is not stored
154 /// here since it will be the key in the associated `FxHashMap` -- see
155 /// `summaries` below.  For the `SmallVec` tuning params: most blocks have
156 /// few parameters, hence `4`.  And almost all blocks have either one or two
157 /// successors, hence `2`.
158 #[derive(Clone, Debug, Default)]
159 struct BlockSummary<'a> {
160     /// Formal parameters for this `Block`.
161     ///
162     /// These values are from group A.
163     formals: &'a [Value],
164 
165     /// Each outgoing edge from this block.
166     ///
167     /// We don't bother to include transfers that pass zero parameters
168     /// since that makes more work for the solver for no purpose.
169     ///
170     /// We optimize for the case where a branch instruction has up to two
171     /// outgoing edges, as unconditional jumps and conditional branches are
172     /// more prominent than br_table.
173     dests: SmallVec<[OutEdge<'a>; 2]>,
174 }
175 
176 impl<'a> BlockSummary<'a> {
177     /// Construct a new `BlockSummary`, using `values` as its backing storage.
178     #[inline]
new(bump: &'a Bump, formals: &[Value]) -> Self179     fn new(bump: &'a Bump, formals: &[Value]) -> Self {
180         Self {
181             formals: bump.alloc_slice_copy(formals),
182             dests: Default::default(),
183         }
184     }
185 }
186 
187 /// Solver state.  This holds a AbstractValue for each formal parameter, except
188 /// for those from the entry block.
189 struct SolverState {
190     absvals: FxHashMap<Value /*Group A*/, AbstractValue>,
191 }
192 
193 impl SolverState {
new() -> Self194     fn new() -> Self {
195         Self {
196             absvals: FxHashMap::default(),
197         }
198     }
199 
get(&self, actual: Value) -> AbstractValue200     fn get(&self, actual: Value) -> AbstractValue {
201         *self
202             .absvals
203             .get(&actual)
204             .unwrap_or_else(|| panic!("SolverState::get: formal param {actual:?} is untracked?!"))
205     }
206 
maybe_get(&self, actual: Value) -> Option<&AbstractValue>207     fn maybe_get(&self, actual: Value) -> Option<&AbstractValue> {
208         self.absvals.get(&actual)
209     }
210 
set(&mut self, actual: Value, lp: AbstractValue)211     fn set(&mut self, actual: Value, lp: AbstractValue) {
212         match self.absvals.insert(actual, lp) {
213             Some(_old_lp) => {}
214             None => panic!("SolverState::set: formal param {actual:?} is untracked?!"),
215         }
216     }
217 }
218 
219 /// Detect phis in `func` that will only ever produce one value, using a
220 /// classic forward dataflow analysis.  Then remove them.
221 #[inline(never)]
do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)222 pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) {
223     let _tt = timing::remove_constant_phis();
224     debug_assert!(domtree.is_valid());
225 
226     // Phase 1 of 3: for each block, make a summary containing all relevant
227     // info.  The solver will iterate over the summaries, rather than having
228     // to inspect each instruction in each block.
229     let bump =
230         Bump::with_capacity(domtree.cfg_postorder().len() * 4 * core::mem::size_of::<Value>());
231     let mut summaries =
232         SecondaryMap::<Block, BlockSummary>::with_capacity(domtree.cfg_postorder().len());
233 
234     for b in domtree.cfg_rpo().copied() {
235         let formals = func.dfg.block_params(b);
236         let mut summary = BlockSummary::new(&bump, formals);
237 
238         for inst in func.layout.block_insts(b) {
239             for (ix, dest) in func.dfg.insts[inst]
240                 .branch_destination(&func.dfg.jump_tables, &func.dfg.exception_tables)
241                 .iter()
242                 .enumerate()
243             {
244                 if let Some(edge) = OutEdge::new(&bump, &func.dfg, inst, ix, *dest) {
245                     summary.dests.push(edge);
246                 }
247             }
248         }
249 
250         // Ensure the invariant that all blocks (except for the entry) appear
251         // in the summary, *unless* they have neither formals nor any
252         // param-carrying branches/jumps.
253         if formals.len() > 0 || summary.dests.len() > 0 {
254             summaries[b] = summary;
255         }
256     }
257 
258     // Phase 2 of 3: iterate over the summaries in reverse postorder,
259     // computing new `AbstractValue`s for each tracked `Value`.  The set of
260     // tracked `Value`s is exactly Group A as described above.
261 
262     let entry_block = func
263         .layout
264         .entry_block()
265         .expect("remove_constant_phis: entry block unknown");
266 
267     // Set up initial solver state
268     let mut state = SolverState::new();
269 
270     for b in domtree.cfg_rpo().copied() {
271         // For each block, get the formals
272         if b == entry_block {
273             continue;
274         }
275         let formals = func.dfg.block_params(b);
276         for formal in formals {
277             let mb_old_absval = state.absvals.insert(*formal, AbstractValue::None);
278             assert!(mb_old_absval.is_none());
279         }
280     }
281 
282     // Solve: repeatedly traverse the blocks in reverse postorder, until there
283     // are no changes.
284     let mut iter_no = 0;
285     loop {
286         iter_no += 1;
287         let mut changed = false;
288 
289         for src in domtree.cfg_rpo().copied() {
290             let src_summary = &summaries[src];
291             for edge in &src_summary.dests {
292                 assert!(edge.block != entry_block);
293                 // By contrast, the dst block must have a summary.  Phase 1
294                 // will have only included an entry in `src_summary.dests` if
295                 // that branch/jump carried at least one parameter.  So the
296                 // dst block does take parameters, so it must have a summary.
297                 let dst_summary = &summaries[edge.block];
298                 let dst_formals = &dst_summary.formals;
299                 assert_eq!(edge.args.len(), dst_formals.len());
300                 for (formal, actual) in dst_formals.iter().zip(edge.args) {
301                     // Find the abstract value for `actual`.  If it is a block
302                     // formal parameter then the most recent abstract value is
303                     // to be found in the solver state.  If not, then it's a
304                     // real value defining point (not a phi), in which case
305                     // return it itself.
306                     let actual_absval = match actual {
307                         BlockArg::Value(actual) => match state.maybe_get(*actual) {
308                             Some(pt) => *pt,
309                             None => AbstractValue::One(*actual),
310                         },
311                         _ => AbstractValue::Many,
312                     };
313 
314                     // And `join` the new value with the old.
315                     let formal_absval_old = state.get(*formal);
316                     let formal_absval_new = formal_absval_old.join(actual_absval);
317                     if formal_absval_new != formal_absval_old {
318                         changed = true;
319                         state.set(*formal, formal_absval_new);
320                     }
321                 }
322             }
323         }
324 
325         if !changed {
326             break;
327         }
328     }
329 
330     let mut n_consts = 0;
331     for absval in state.absvals.values() {
332         if absval.is_one() {
333             n_consts += 1;
334         }
335     }
336 
337     // Phase 3 of 3: edit the function to remove constant formals, using the
338     // summaries and the final solver state as a guide.
339 
340     // Make up a set of blocks that need editing.
341     let mut need_editing = FxHashSet::<Block>::default();
342     for (block, summary) in summaries.iter() {
343         if block == entry_block {
344             continue;
345         }
346         for formal in summary.formals {
347             let formal_absval = state.get(*formal);
348             if formal_absval.is_one() {
349                 need_editing.insert(block);
350                 break;
351             }
352         }
353     }
354 
355     // Firstly, deal with the formals.  For each formal which is redundant,
356     // remove it, and also add a reroute from it to the constant value which
357     // it we know it to be.
358     for b in &need_editing {
359         let mut del_these = SmallVec::<[(Value, Value); 32]>::new();
360         let formals: &[Value] = func.dfg.block_params(*b);
361         for formal in formals {
362             // The state must give an absval for `formal`.
363             if let AbstractValue::One(replacement_val) = state.get(*formal) {
364                 del_these.push((*formal, replacement_val));
365             }
366         }
367         // We can delete the formals in any order.  However,
368         // `remove_block_param` works by sliding backwards all arguments to
369         // the right of the value it is asked to delete.  Hence when removing more
370         // than one formal, it is significantly more efficient to ask it to
371         // remove the rightmost formal first, and hence this `rev()`.
372         for (redundant_formal, replacement_val) in del_these.into_iter().rev() {
373             func.dfg.remove_block_param(redundant_formal);
374             func.dfg.change_to_alias(redundant_formal, replacement_val);
375         }
376     }
377 
378     // Secondly, visit all branch insns.  If the destination has had its
379     // formals changed, change the actuals accordingly.  Don't scan all insns,
380     // rather just visit those as listed in the summaries we prepared earlier.
381     let mut old_actuals = alloc::vec::Vec::new();
382     for summary in summaries.values() {
383         for edge in &summary.dests {
384             if !need_editing.contains(&edge.block) {
385                 continue;
386             }
387 
388             let dfg = &mut func.dfg;
389             let dests = dfg.insts[edge.inst]
390                 .branch_destination_mut(&mut dfg.jump_tables, &mut dfg.exception_tables);
391             let block = &mut dests[edge.branch_index as usize];
392 
393             old_actuals.extend(block.args(&dfg.value_lists));
394 
395             // Check that the numbers of arguments make sense.
396             let formals = &summaries[edge.block].formals;
397             assert_eq!(formals.len(), old_actuals.len());
398 
399             // Filter out redundant block arguments.
400             let mut formals = formals.iter();
401             old_actuals.retain(|_| {
402                 let formal_i = formals.next().unwrap();
403                 !state.get(*formal_i).is_one()
404             });
405 
406             // Replace the block with a new one that only includes the non-redundant arguments.
407             // This leaks the value list from the old block,
408             // https://github.com/bytecodealliance/wasmtime/issues/5451 for more information.
409             let destination = block.block(&dfg.value_lists);
410             *block = BlockCall::new(destination, old_actuals.drain(..), &mut dfg.value_lists);
411         }
412     }
413 
414     log::debug!(
415         "do_remove_constant_phis: done, {} iters.   {} formals, of which {} const.",
416         iter_no,
417         state.absvals.len(),
418         n_consts
419     );
420 }
421