1 //! A Constant-Phi-Node removal pass. 2 3 use crate::dominator_tree::DominatorTree; 4 use crate::ir; 5 use crate::ir::Function; 6 use crate::ir::{Block, BlockCall, Inst, Value}; 7 use crate::timing; 8 use bumpalo::Bump; 9 use cranelift_entity::SecondaryMap; 10 use rustc_hash::{FxHashMap, FxHashSet}; 11 use smallvec::SmallVec; 12 13 // A note on notation. For the sake of clarity, this file uses the phrase 14 // "formal parameters" to mean the `Value`s listed in the block head, and 15 // "actual parameters" to mean the `Value`s passed in a branch or a jump: 16 // 17 // block4(v16: i32, v18: i32): <-- formal parameters 18 // ... 19 // brif v27, block7(v22, v24), block6 <-- actual parameters 20 21 // This transformation pass (conceptually) partitions all values in the 22 // function into two groups: 23 // 24 // * Group A: values defined by block formal parameters, except for the entry block. 25 // 26 // * Group B: All other values: that is, values defined by instructions, 27 // and the formals of the entry block. 28 // 29 // For each value in Group A, it attempts to establish whether it will have 30 // the value of exactly one member of Group B. If so, the formal parameter is 31 // deleted, all corresponding actual parameters (in jumps/branches to the 32 // defining block) are deleted, and a rename is inserted. 33 // 34 // The entry block is special-cased because (1) we don't know what values flow 35 // to its formals and (2) in any case we can't change its formals. 36 // 37 // Work proceeds in three phases. 38 // 39 // * Phase 1: examine all instructions. For each block, make up a useful 40 // grab-bag of information, `BlockSummary`, that summarises the block's 41 // formals and jump/branch instruction. This is used by Phases 2 and 3. 42 // 43 // * Phase 2: for each value in Group A, try to find a single Group B value 44 // that flows to it. This is done using a classical iterative forward 45 // dataflow analysis over a simple constant-propagation style lattice. It 46 // converges quickly in practice -- I have seen at most 4 iterations. This 47 // is relatively cheap because the iteration is done over the 48 // `BlockSummary`s, and does not visit each instruction. The resulting 49 // fixed point is stored in a `SolverState`. 50 // 51 // * Phase 3: using the `SolverState` and `BlockSummary`, edit the function to 52 // remove redundant formals and actuals, and to insert suitable renames. 53 // 54 // Note that the effectiveness of the analysis depends on on the fact that 55 // there are no copy instructions in Cranelift's IR. If there were, the 56 // computation of `actual_absval` in Phase 2 would have to be extended to 57 // chase through such copies. 58 // 59 // For large functions, the analysis cost using the new AArch64 backend is about 60 // 0.6% of the non-optimising compile time, as measured by instruction counts. 61 // This transformation usually pays for itself several times over, though, by 62 // reducing the isel/regalloc cost downstream. Gains of up to 7% have been 63 // seen for large functions. 64 65 /// The `Value`s (Group B) that can flow to a formal parameter (Group A). 66 #[derive(Clone, Copy, Debug, PartialEq)] 67 enum AbstractValue { 68 /// Two or more values flow to this formal. 69 Many, 70 71 /// Exactly one value, as stated, flows to this formal. The `Value`s that 72 /// can appear here are exactly: `Value`s defined by `Inst`s, plus the 73 /// `Value`s defined by the formals of the entry block. Note that this is 74 /// exactly the set of `Value`s that are *not* tracked in the solver below 75 /// (see `SolverState`). 76 One(Value /*Group B*/), 77 78 /// No value flows to this formal. 79 None, 80 } 81 82 impl AbstractValue { 83 fn join(self, other: AbstractValue) -> AbstractValue { 84 match (self, other) { 85 // Joining with `None` has no effect 86 (AbstractValue::None, p2) => p2, 87 (p1, AbstractValue::None) => p1, 88 // Joining with `Many` produces `Many` 89 (AbstractValue::Many, _p2) => AbstractValue::Many, 90 (_p1, AbstractValue::Many) => AbstractValue::Many, 91 // The only interesting case 92 (AbstractValue::One(v1), AbstractValue::One(v2)) => { 93 if v1 == v2 { 94 AbstractValue::One(v1) 95 } else { 96 AbstractValue::Many 97 } 98 } 99 } 100 } 101 102 fn is_one(self) -> bool { 103 matches!(self, AbstractValue::One(_)) 104 } 105 } 106 107 #[derive(Clone, Copy, Debug)] 108 struct OutEdge<'a> { 109 /// An instruction that transfers control. 110 inst: Inst, 111 /// The index into branch_destinations for this instruction that corresponds 112 /// to this edge. 113 branch_index: u32, 114 /// The block that control is transferred to. 115 block: Block, 116 /// The arguments to that block. 117 /// 118 /// These values can be from both groups A and B. 119 args: &'a [Value], 120 } 121 122 impl<'a> OutEdge<'a> { 123 /// Construct a new `OutEdge` for the given instruction. 124 /// 125 /// Returns `None` if this is an edge without any block arguments, which 126 /// means we can ignore it for this analysis's purposes. 127 #[inline] 128 fn new( 129 bump: &'a Bump, 130 dfg: &ir::DataFlowGraph, 131 inst: Inst, 132 branch_index: usize, 133 block: BlockCall, 134 ) -> Option<Self> { 135 let inst_var_args = block.args_slice(&dfg.value_lists); 136 137 // Skip edges without params. 138 if inst_var_args.is_empty() { 139 return None; 140 } 141 142 Some(OutEdge { 143 inst, 144 branch_index: branch_index as u32, 145 block: block.block(&dfg.value_lists), 146 args: bump.alloc_slice_fill_iter( 147 inst_var_args 148 .iter() 149 .map(|value| dfg.resolve_aliases(*value)), 150 ), 151 }) 152 } 153 } 154 155 /// For some block, a useful bundle of info. The `Block` itself is not stored 156 /// here since it will be the key in the associated `FxHashMap` -- see 157 /// `summaries` below. For the `SmallVec` tuning params: most blocks have 158 /// few parameters, hence `4`. And almost all blocks have either one or two 159 /// successors, hence `2`. 160 #[derive(Clone, Debug, Default)] 161 struct BlockSummary<'a> { 162 /// Formal parameters for this `Block`. 163 /// 164 /// These values are from group A. 165 formals: &'a [Value], 166 167 /// Each outgoing edge from this block. 168 /// 169 /// We don't bother to include transfers that pass zero parameters 170 /// since that makes more work for the solver for no purpose. 171 /// 172 /// We optimize for the case where a branch instruction has up to two 173 /// outgoing edges, as unconditional jumps and conditional branches are 174 /// more prominent than br_table. 175 dests: SmallVec<[OutEdge<'a>; 2]>, 176 } 177 178 impl<'a> BlockSummary<'a> { 179 /// Construct a new `BlockSummary`, using `values` as its backing storage. 180 #[inline] 181 fn new(bump: &'a Bump, formals: &[Value]) -> Self { 182 Self { 183 formals: bump.alloc_slice_copy(formals), 184 dests: Default::default(), 185 } 186 } 187 } 188 189 /// Solver state. This holds a AbstractValue for each formal parameter, except 190 /// for those from the entry block. 191 struct SolverState { 192 absvals: FxHashMap<Value /*Group A*/, AbstractValue>, 193 } 194 195 impl SolverState { 196 fn new() -> Self { 197 Self { 198 absvals: FxHashMap::default(), 199 } 200 } 201 202 fn get(&self, actual: Value) -> AbstractValue { 203 *self 204 .absvals 205 .get(&actual) 206 .unwrap_or_else(|| panic!("SolverState::get: formal param {actual:?} is untracked?!")) 207 } 208 209 fn maybe_get(&self, actual: Value) -> Option<&AbstractValue> { 210 self.absvals.get(&actual) 211 } 212 213 fn set(&mut self, actual: Value, lp: AbstractValue) { 214 match self.absvals.insert(actual, lp) { 215 Some(_old_lp) => {} 216 None => panic!("SolverState::set: formal param {actual:?} is untracked?!"), 217 } 218 } 219 } 220 221 /// Detect phis in `func` that will only ever produce one value, using a 222 /// classic forward dataflow analysis. Then remove them. 223 #[inline(never)] 224 pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) { 225 let _tt = timing::remove_constant_phis(); 226 debug_assert!(domtree.is_valid()); 227 228 // Phase 1 of 3: for each block, make a summary containing all relevant 229 // info. The solver will iterate over the summaries, rather than having 230 // to inspect each instruction in each block. 231 let bump = 232 Bump::with_capacity(domtree.cfg_postorder().len() * 4 * std::mem::size_of::<Value>()); 233 let mut summaries = 234 SecondaryMap::<Block, BlockSummary>::with_capacity(domtree.cfg_postorder().len()); 235 236 for b in domtree.cfg_rpo().copied() { 237 let formals = func.dfg.block_params(b); 238 let mut summary = BlockSummary::new(&bump, formals); 239 240 for inst in func.layout.block_insts(b) { 241 for (ix, dest) in func.dfg.insts[inst] 242 .branch_destination(&func.dfg.jump_tables) 243 .iter() 244 .enumerate() 245 { 246 if let Some(edge) = OutEdge::new(&bump, &func.dfg, inst, ix, *dest) { 247 summary.dests.push(edge); 248 } 249 } 250 } 251 252 // Ensure the invariant that all blocks (except for the entry) appear 253 // in the summary, *unless* they have neither formals nor any 254 // param-carrying branches/jumps. 255 if formals.len() > 0 || summary.dests.len() > 0 { 256 summaries[b] = summary; 257 } 258 } 259 260 // Phase 2 of 3: iterate over the summaries in reverse postorder, 261 // computing new `AbstractValue`s for each tracked `Value`. The set of 262 // tracked `Value`s is exactly Group A as described above. 263 264 let entry_block = func 265 .layout 266 .entry_block() 267 .expect("remove_constant_phis: entry block unknown"); 268 269 // Set up initial solver state 270 let mut state = SolverState::new(); 271 272 for b in domtree.cfg_rpo().copied() { 273 // For each block, get the formals 274 if b == entry_block { 275 continue; 276 } 277 let formals = func.dfg.block_params(b); 278 for formal in formals { 279 let mb_old_absval = state.absvals.insert(*formal, AbstractValue::None); 280 assert!(mb_old_absval.is_none()); 281 } 282 } 283 284 // Solve: repeatedly traverse the blocks in reverse postorder, until there 285 // are no changes. 286 let mut iter_no = 0; 287 loop { 288 iter_no += 1; 289 let mut changed = false; 290 291 for src in domtree.cfg_rpo().copied() { 292 let src_summary = &summaries[src]; 293 for edge in &src_summary.dests { 294 assert!(edge.block != entry_block); 295 // By contrast, the dst block must have a summary. Phase 1 296 // will have only included an entry in `src_summary.dests` if 297 // that branch/jump carried at least one parameter. So the 298 // dst block does take parameters, so it must have a summary. 299 let dst_summary = &summaries[edge.block]; 300 let dst_formals = &dst_summary.formals; 301 assert_eq!(edge.args.len(), dst_formals.len()); 302 for (formal, actual) in dst_formals.iter().zip(edge.args) { 303 // Find the abstract value for `actual`. If it is a block 304 // formal parameter then the most recent abstract value is 305 // to be found in the solver state. If not, then it's a 306 // real value defining point (not a phi), in which case 307 // return it itself. 308 let actual_absval = match state.maybe_get(*actual) { 309 Some(pt) => *pt, 310 None => AbstractValue::One(*actual), 311 }; 312 313 // And `join` the new value with the old. 314 let formal_absval_old = state.get(*formal); 315 let formal_absval_new = formal_absval_old.join(actual_absval); 316 if formal_absval_new != formal_absval_old { 317 changed = true; 318 state.set(*formal, formal_absval_new); 319 } 320 } 321 } 322 } 323 324 if !changed { 325 break; 326 } 327 } 328 329 let mut n_consts = 0; 330 for absval in state.absvals.values() { 331 if absval.is_one() { 332 n_consts += 1; 333 } 334 } 335 336 // Phase 3 of 3: edit the function to remove constant formals, using the 337 // summaries and the final solver state as a guide. 338 339 // Make up a set of blocks that need editing. 340 let mut need_editing = FxHashSet::<Block>::default(); 341 for (block, summary) in summaries.iter() { 342 if block == entry_block { 343 continue; 344 } 345 for formal in summary.formals { 346 let formal_absval = state.get(*formal); 347 if formal_absval.is_one() { 348 need_editing.insert(block); 349 break; 350 } 351 } 352 } 353 354 // Firstly, deal with the formals. For each formal which is redundant, 355 // remove it, and also add a reroute from it to the constant value which 356 // it we know it to be. 357 for b in &need_editing { 358 let mut del_these = SmallVec::<[(Value, Value); 32]>::new(); 359 let formals: &[Value] = func.dfg.block_params(*b); 360 for formal in formals { 361 // The state must give an absval for `formal`. 362 if let AbstractValue::One(replacement_val) = state.get(*formal) { 363 del_these.push((*formal, replacement_val)); 364 } 365 } 366 // We can delete the formals in any order. However, 367 // `remove_block_param` works by sliding backwards all arguments to 368 // the right of the value it is asked to delete. Hence when removing more 369 // than one formal, it is significantly more efficient to ask it to 370 // remove the rightmost formal first, and hence this `rev()`. 371 for (redundant_formal, replacement_val) in del_these.into_iter().rev() { 372 func.dfg.remove_block_param(redundant_formal); 373 func.dfg.change_to_alias(redundant_formal, replacement_val); 374 } 375 } 376 377 // Secondly, visit all branch insns. If the destination has had its 378 // formals changed, change the actuals accordingly. Don't scan all insns, 379 // rather just visit those as listed in the summaries we prepared earlier. 380 let mut old_actuals = alloc::vec::Vec::new(); 381 for summary in summaries.values() { 382 for edge in &summary.dests { 383 if !need_editing.contains(&edge.block) { 384 continue; 385 } 386 387 let dfg = &mut func.dfg; 388 let dests = dfg.insts[edge.inst].branch_destination_mut(&mut dfg.jump_tables); 389 let block = &mut dests[edge.branch_index as usize]; 390 391 old_actuals.extend(block.args_slice(&dfg.value_lists)); 392 393 // Check that the numbers of arguments make sense. 394 let formals = &summaries[edge.block].formals; 395 assert_eq!(formals.len(), old_actuals.len()); 396 397 // Filter out redundant block arguments. 398 let mut formals = formals.iter(); 399 old_actuals.retain(|_| { 400 let formal_i = formals.next().unwrap(); 401 !state.get(*formal_i).is_one() 402 }); 403 404 // Replace the block with a new one that only includes the non-redundant arguments. 405 // This leaks the value list from the old block, 406 // https://github.com/bytecodealliance/wasmtime/issues/5451 for more information. 407 let destination = block.block(&dfg.value_lists); 408 *block = BlockCall::new(destination, &old_actuals, &mut dfg.value_lists); 409 old_actuals.clear(); 410 } 411 } 412 413 log::debug!( 414 "do_remove_constant_phis: done, {} iters. {} formals, of which {} const.", 415 iter_no, 416 state.absvals.len(), 417 n_consts 418 ); 419 } 420