1 //! A Constant-Phi-Node removal pass. 2 3 use crate::dominator_tree::DominatorTree; 4 use crate::ir; 5 use crate::ir::Function; 6 use crate::ir::{Block, BlockArg, BlockCall, Inst, Value}; 7 use crate::timing; 8 use crate::{FxHashMap, FxHashSet}; 9 use bumpalo::Bump; 10 use cranelift_entity::SecondaryMap; 11 use smallvec::SmallVec; 12 13 // A note on notation. For the sake of clarity, this file uses the phrase 14 // "formal parameters" to mean the `Value`s listed in the block head, and 15 // "actual parameters" to mean the `Value`s passed in a branch or a jump: 16 // 17 // block4(v16: i32, v18: i32): <-- formal parameters 18 // ... 19 // brif v27, block7(v22, v24), block6 <-- actual parameters 20 21 // This transformation pass (conceptually) partitions all values in the 22 // function into two groups: 23 // 24 // * Group A: values defined by block formal parameters, except for the entry block. 25 // 26 // * Group B: All other values: that is, values defined by instructions, 27 // and the formals of the entry block. 28 // 29 // For each value in Group A, it attempts to establish whether it will have 30 // the value of exactly one member of Group B. If so, the formal parameter is 31 // deleted, all corresponding actual parameters (in jumps/branches to the 32 // defining block) are deleted, and a rename is inserted. 33 // 34 // The entry block is special-cased because (1) we don't know what values flow 35 // to its formals and (2) in any case we can't change its formals. 36 // 37 // Work proceeds in three phases. 38 // 39 // * Phase 1: examine all instructions. For each block, make up a useful 40 // grab-bag of information, `BlockSummary`, that summarises the block's 41 // formals and jump/branch instruction. This is used by Phases 2 and 3. 42 // 43 // * Phase 2: for each value in Group A, try to find a single Group B value 44 // that flows to it. This is done using a classical iterative forward 45 // dataflow analysis over a simple constant-propagation style lattice. It 46 // converges quickly in practice -- I have seen at most 4 iterations. This 47 // is relatively cheap because the iteration is done over the 48 // `BlockSummary`s, and does not visit each instruction. The resulting 49 // fixed point is stored in a `SolverState`. 50 // 51 // * Phase 3: using the `SolverState` and `BlockSummary`, edit the function to 52 // remove redundant formals and actuals, and to insert suitable renames. 53 // 54 // Note that the effectiveness of the analysis depends on on the fact that 55 // there are no copy instructions in Cranelift's IR. If there were, the 56 // computation of `actual_absval` in Phase 2 would have to be extended to 57 // chase through such copies. 58 // 59 // For large functions, the analysis cost using the new AArch64 backend is about 60 // 0.6% of the non-optimising compile time, as measured by instruction counts. 61 // This transformation usually pays for itself several times over, though, by 62 // reducing the isel/regalloc cost downstream. Gains of up to 7% have been 63 // seen for large functions. 64 65 /// The `Value`s (Group B) that can flow to a formal parameter (Group A). 66 #[derive(Clone, Copy, Debug, PartialEq)] 67 enum AbstractValue { 68 /// Two or more values flow to this formal. 69 Many, 70 71 /// Exactly one value, as stated, flows to this formal. The `Value`s that 72 /// can appear here are exactly: `Value`s defined by `Inst`s, plus the 73 /// `Value`s defined by the formals of the entry block. Note that this is 74 /// exactly the set of `Value`s that are *not* tracked in the solver below 75 /// (see `SolverState`). 76 One(Value /*Group B*/), 77 78 /// No value flows to this formal. 79 None, 80 } 81 82 impl AbstractValue { 83 fn join(self, other: AbstractValue) -> AbstractValue { 84 match (self, other) { 85 // Joining with `None` has no effect 86 (AbstractValue::None, p2) => p2, 87 (p1, AbstractValue::None) => p1, 88 // Joining with `Many` produces `Many` 89 (AbstractValue::Many, _p2) => AbstractValue::Many, 90 (_p1, AbstractValue::Many) => AbstractValue::Many, 91 // The only interesting case 92 (AbstractValue::One(v1), AbstractValue::One(v2)) => { 93 if v1 == v2 { 94 AbstractValue::One(v1) 95 } else { 96 AbstractValue::Many 97 } 98 } 99 } 100 } 101 102 fn is_one(self) -> bool { 103 matches!(self, AbstractValue::One(_)) 104 } 105 } 106 107 #[derive(Clone, Copy, Debug)] 108 struct OutEdge<'a> { 109 /// An instruction that transfers control. 110 inst: Inst, 111 /// The index into branch_destinations for this instruction that corresponds 112 /// to this edge. 113 branch_index: u32, 114 /// The block that control is transferred to. 115 block: Block, 116 /// The arguments to that block. 117 /// 118 /// These values can be from both groups A and B. 119 args: &'a [BlockArg], 120 } 121 122 impl<'a> OutEdge<'a> { 123 /// Construct a new `OutEdge` for the given instruction. 124 /// 125 /// Returns `None` if this is an edge without any block arguments, which 126 /// means we can ignore it for this analysis's purposes. 127 #[inline] 128 fn new( 129 bump: &'a Bump, 130 dfg: &ir::DataFlowGraph, 131 inst: Inst, 132 branch_index: usize, 133 block: BlockCall, 134 ) -> Option<Self> { 135 let inst_var_args = block.args(&dfg.value_lists); 136 137 // Skip edges without params. 138 if inst_var_args.len() == 0 { 139 return None; 140 } 141 142 Some(OutEdge { 143 inst, 144 branch_index: branch_index as u32, 145 block: block.block(&dfg.value_lists), 146 args: bump.alloc_slice_fill_iter( 147 inst_var_args.map(|arg| arg.map_value(|value| dfg.resolve_aliases(value))), 148 ), 149 }) 150 } 151 } 152 153 /// For some block, a useful bundle of info. The `Block` itself is not stored 154 /// here since it will be the key in the associated `FxHashMap` -- see 155 /// `summaries` below. For the `SmallVec` tuning params: most blocks have 156 /// few parameters, hence `4`. And almost all blocks have either one or two 157 /// successors, hence `2`. 158 #[derive(Clone, Debug, Default)] 159 struct BlockSummary<'a> { 160 /// Formal parameters for this `Block`. 161 /// 162 /// These values are from group A. 163 formals: &'a [Value], 164 165 /// Each outgoing edge from this block. 166 /// 167 /// We don't bother to include transfers that pass zero parameters 168 /// since that makes more work for the solver for no purpose. 169 /// 170 /// We optimize for the case where a branch instruction has up to two 171 /// outgoing edges, as unconditional jumps and conditional branches are 172 /// more prominent than br_table. 173 dests: SmallVec<[OutEdge<'a>; 2]>, 174 } 175 176 impl<'a> BlockSummary<'a> { 177 /// Construct a new `BlockSummary`, using `values` as its backing storage. 178 #[inline] 179 fn new(bump: &'a Bump, formals: &[Value]) -> Self { 180 Self { 181 formals: bump.alloc_slice_copy(formals), 182 dests: Default::default(), 183 } 184 } 185 } 186 187 /// Solver state. This holds a AbstractValue for each formal parameter, except 188 /// for those from the entry block. 189 struct SolverState { 190 absvals: FxHashMap<Value /*Group A*/, AbstractValue>, 191 } 192 193 impl SolverState { 194 fn new() -> Self { 195 Self { 196 absvals: FxHashMap::default(), 197 } 198 } 199 200 fn get(&self, actual: Value) -> AbstractValue { 201 *self 202 .absvals 203 .get(&actual) 204 .unwrap_or_else(|| panic!("SolverState::get: formal param {actual:?} is untracked?!")) 205 } 206 207 fn maybe_get(&self, actual: Value) -> Option<&AbstractValue> { 208 self.absvals.get(&actual) 209 } 210 211 fn set(&mut self, actual: Value, lp: AbstractValue) { 212 match self.absvals.insert(actual, lp) { 213 Some(_old_lp) => {} 214 None => panic!("SolverState::set: formal param {actual:?} is untracked?!"), 215 } 216 } 217 } 218 219 /// Detect phis in `func` that will only ever produce one value, using a 220 /// classic forward dataflow analysis. Then remove them. 221 #[inline(never)] 222 pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) { 223 let _tt = timing::remove_constant_phis(); 224 debug_assert!(domtree.is_valid()); 225 226 // Phase 1 of 3: for each block, make a summary containing all relevant 227 // info. The solver will iterate over the summaries, rather than having 228 // to inspect each instruction in each block. 229 let bump = 230 Bump::with_capacity(domtree.cfg_postorder().len() * 4 * core::mem::size_of::<Value>()); 231 let mut summaries = 232 SecondaryMap::<Block, BlockSummary>::with_capacity(domtree.cfg_postorder().len()); 233 234 for b in domtree.cfg_rpo().copied() { 235 let formals = func.dfg.block_params(b); 236 let mut summary = BlockSummary::new(&bump, formals); 237 238 for inst in func.layout.block_insts(b) { 239 for (ix, dest) in func.dfg.insts[inst] 240 .branch_destination(&func.dfg.jump_tables, &func.dfg.exception_tables) 241 .iter() 242 .enumerate() 243 { 244 if let Some(edge) = OutEdge::new(&bump, &func.dfg, inst, ix, *dest) { 245 summary.dests.push(edge); 246 } 247 } 248 } 249 250 // Ensure the invariant that all blocks (except for the entry) appear 251 // in the summary, *unless* they have neither formals nor any 252 // param-carrying branches/jumps. 253 if formals.len() > 0 || summary.dests.len() > 0 { 254 summaries[b] = summary; 255 } 256 } 257 258 // Phase 2 of 3: iterate over the summaries in reverse postorder, 259 // computing new `AbstractValue`s for each tracked `Value`. The set of 260 // tracked `Value`s is exactly Group A as described above. 261 262 let entry_block = func 263 .layout 264 .entry_block() 265 .expect("remove_constant_phis: entry block unknown"); 266 267 // Set up initial solver state 268 let mut state = SolverState::new(); 269 270 for b in domtree.cfg_rpo().copied() { 271 // For each block, get the formals 272 if b == entry_block { 273 continue; 274 } 275 let formals = func.dfg.block_params(b); 276 for formal in formals { 277 let mb_old_absval = state.absvals.insert(*formal, AbstractValue::None); 278 assert!(mb_old_absval.is_none()); 279 } 280 } 281 282 // Solve: repeatedly traverse the blocks in reverse postorder, until there 283 // are no changes. 284 let mut iter_no = 0; 285 loop { 286 iter_no += 1; 287 let mut changed = false; 288 289 for src in domtree.cfg_rpo().copied() { 290 let src_summary = &summaries[src]; 291 for edge in &src_summary.dests { 292 assert!(edge.block != entry_block); 293 // By contrast, the dst block must have a summary. Phase 1 294 // will have only included an entry in `src_summary.dests` if 295 // that branch/jump carried at least one parameter. So the 296 // dst block does take parameters, so it must have a summary. 297 let dst_summary = &summaries[edge.block]; 298 let dst_formals = &dst_summary.formals; 299 assert_eq!(edge.args.len(), dst_formals.len()); 300 for (formal, actual) in dst_formals.iter().zip(edge.args) { 301 // Find the abstract value for `actual`. If it is a block 302 // formal parameter then the most recent abstract value is 303 // to be found in the solver state. If not, then it's a 304 // real value defining point (not a phi), in which case 305 // return it itself. 306 let actual_absval = match actual { 307 BlockArg::Value(actual) => match state.maybe_get(*actual) { 308 Some(pt) => *pt, 309 None => AbstractValue::One(*actual), 310 }, 311 _ => AbstractValue::Many, 312 }; 313 314 // And `join` the new value with the old. 315 let formal_absval_old = state.get(*formal); 316 let formal_absval_new = formal_absval_old.join(actual_absval); 317 if formal_absval_new != formal_absval_old { 318 changed = true; 319 state.set(*formal, formal_absval_new); 320 } 321 } 322 } 323 } 324 325 if !changed { 326 break; 327 } 328 } 329 330 let mut n_consts = 0; 331 for absval in state.absvals.values() { 332 if absval.is_one() { 333 n_consts += 1; 334 } 335 } 336 337 // Phase 3 of 3: edit the function to remove constant formals, using the 338 // summaries and the final solver state as a guide. 339 340 // Make up a set of blocks that need editing. 341 let mut need_editing = FxHashSet::<Block>::default(); 342 for (block, summary) in summaries.iter() { 343 if block == entry_block { 344 continue; 345 } 346 for formal in summary.formals { 347 let formal_absval = state.get(*formal); 348 if formal_absval.is_one() { 349 need_editing.insert(block); 350 break; 351 } 352 } 353 } 354 355 // Firstly, deal with the formals. For each formal which is redundant, 356 // remove it, and also add a reroute from it to the constant value which 357 // it we know it to be. 358 for b in &need_editing { 359 let mut del_these = SmallVec::<[(Value, Value); 32]>::new(); 360 let formals: &[Value] = func.dfg.block_params(*b); 361 for formal in formals { 362 // The state must give an absval for `formal`. 363 if let AbstractValue::One(replacement_val) = state.get(*formal) { 364 del_these.push((*formal, replacement_val)); 365 } 366 } 367 // We can delete the formals in any order. However, 368 // `remove_block_param` works by sliding backwards all arguments to 369 // the right of the value it is asked to delete. Hence when removing more 370 // than one formal, it is significantly more efficient to ask it to 371 // remove the rightmost formal first, and hence this `rev()`. 372 for (redundant_formal, replacement_val) in del_these.into_iter().rev() { 373 func.dfg.remove_block_param(redundant_formal); 374 func.dfg.change_to_alias(redundant_formal, replacement_val); 375 } 376 } 377 378 // Secondly, visit all branch insns. If the destination has had its 379 // formals changed, change the actuals accordingly. Don't scan all insns, 380 // rather just visit those as listed in the summaries we prepared earlier. 381 let mut old_actuals = alloc::vec::Vec::new(); 382 for summary in summaries.values() { 383 for edge in &summary.dests { 384 if !need_editing.contains(&edge.block) { 385 continue; 386 } 387 388 let dfg = &mut func.dfg; 389 let dests = dfg.insts[edge.inst] 390 .branch_destination_mut(&mut dfg.jump_tables, &mut dfg.exception_tables); 391 let block = &mut dests[edge.branch_index as usize]; 392 393 old_actuals.extend(block.args(&dfg.value_lists)); 394 395 // Check that the numbers of arguments make sense. 396 let formals = &summaries[edge.block].formals; 397 assert_eq!(formals.len(), old_actuals.len()); 398 399 // Filter out redundant block arguments. 400 let mut formals = formals.iter(); 401 old_actuals.retain(|_| { 402 let formal_i = formals.next().unwrap(); 403 !state.get(*formal_i).is_one() 404 }); 405 406 // Replace the block with a new one that only includes the non-redundant arguments. 407 // This leaks the value list from the old block, 408 // https://github.com/bytecodealliance/wasmtime/issues/5451 for more information. 409 let destination = block.block(&dfg.value_lists); 410 *block = BlockCall::new(destination, old_actuals.drain(..), &mut dfg.value_lists); 411 } 412 } 413 414 log::debug!( 415 "do_remove_constant_phis: done, {} iters. {} formals, of which {} const.", 416 iter_no, 417 state.absvals.len(), 418 n_consts 419 ); 420 } 421