1 //! Elaboration phase: lowers EGraph back to sequences of operations 2 //! in CFG nodes. 3 4 use super::Stats; 5 use super::cost::Cost; 6 use crate::ctxhash::NullCtx; 7 use crate::dominator_tree::DominatorTree; 8 use crate::hash_map::Entry as HashEntry; 9 use crate::inst_predicates::is_pure_for_egraph; 10 use crate::ir::{Block, Function, Inst, Value, ValueDef}; 11 use crate::loop_analysis::{Loop, LoopAnalysis}; 12 use crate::scoped_hash_map::ScopedHashMap; 13 use crate::trace; 14 use crate::{FxHashMap, FxHashSet}; 15 use alloc::vec::Vec; 16 use cranelift_control::ControlPlane; 17 use cranelift_entity::{EntitySet, SecondaryMap, packed_option::ReservedValue}; 18 use smallvec::{SmallVec, smallvec}; 19 20 pub(crate) struct Elaborator<'a> { 21 func: &'a mut Function, 22 domtree: &'a DominatorTree, 23 loop_analysis: &'a LoopAnalysis, 24 /// Map from Value that is produced by a pure Inst (and was thus 25 /// not in the side-effecting skeleton) to the value produced by 26 /// an elaborated inst (placed in the layout) to whose results we 27 /// refer in the final code. 28 /// 29 /// The first time we use some result of an instruction during 30 /// elaboration, we can place it and insert an identity map (inst 31 /// results to that same inst's results) in this scoped 32 /// map. Within that block and its dom-tree children, that mapping 33 /// is visible and we can continue to use it. This allows us to 34 /// avoid cloning the instruction. However, if we pop that scope 35 /// and use it somewhere else as well, we will need to 36 /// duplicate. We detect this case by checking, when a value that 37 /// we want is not present in this map, whether the producing inst 38 /// is already placed in the Layout. If so, we duplicate, and 39 /// insert non-identity mappings from the original inst's results 40 /// to the cloned inst's results. 41 /// 42 /// Note that as values may refer to unions that represent a subset 43 /// of a larger eclass, it's not valid to walk towards the root of a 44 /// union tree: doing so would potentially equate values that fall 45 /// on different branches of the dominator tree. 46 value_to_elaborated_value: ScopedHashMap<Value, ElaboratedValue>, 47 /// Map from Value to the best (lowest-cost) Value in its eclass 48 /// (tree of union value-nodes). 49 value_to_best_value: SecondaryMap<Value, BestEntry>, 50 /// Stack of blocks and loops in current elaboration path. 51 loop_stack: SmallVec<[LoopStackEntry; 8]>, 52 /// The current block into which we are elaborating. 53 cur_block: Block, 54 /// Values that opt rules have indicated should be rematerialized 55 /// in every block they are used (e.g., immediates or other 56 /// "cheap-to-compute" ops). 57 remat_values: &'a FxHashSet<Value>, 58 /// Explicitly-unrolled value elaboration stack. 59 elab_stack: Vec<ElabStackEntry>, 60 /// Results from the elab stack. 61 elab_result_stack: Vec<ElaboratedValue>, 62 /// Explicitly-unrolled block elaboration stack. 63 block_stack: Vec<BlockStackEntry>, 64 /// Copies of values that have been rematerialized. 65 remat_copies: FxHashMap<(Block, Value), Value>, 66 /// Stats for various events during egraph processing, to help 67 /// with optimization of this infrastructure. 68 stats: &'a mut Stats, 69 /// Chaos-mode control-plane so we can test that we still get 70 /// correct results when our heuristics make bad decisions. 71 ctrl_plane: &'a mut ControlPlane, 72 } 73 74 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 75 struct BestEntry(Cost, Value); 76 77 impl PartialOrd for BestEntry { partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering>78 fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { 79 Some(self.cmp(other)) 80 } 81 } 82 83 impl Ord for BestEntry { 84 #[inline] cmp(&self, other: &Self) -> core::cmp::Ordering85 fn cmp(&self, other: &Self) -> core::cmp::Ordering { 86 self.0.cmp(&other.0).then_with(|| { 87 // Note that this comparison is reversed. When costs are equal, 88 // prefer the value with the bigger index. This is a heuristic that 89 // prefers results of rewrites to the original value, since we 90 // expect that our rewrites are generally improvements. 91 self.1.cmp(&other.1).reverse() 92 }) 93 } 94 } 95 96 #[derive(Clone, Copy, Debug)] 97 struct ElaboratedValue { 98 in_block: Block, 99 value: Value, 100 } 101 102 #[derive(Clone, Debug)] 103 struct LoopStackEntry { 104 /// The loop identifier. 105 lp: Loop, 106 /// The hoist point: a block that immediately dominates this 107 /// loop. May not be an immediate predecessor, but will be a valid 108 /// point to place all loop-invariant ops: they must depend only 109 /// on inputs that dominate the loop, so are available at (the end 110 /// of) this block. 111 hoist_block: Block, 112 /// The depth in the scope map. 113 scope_depth: u32, 114 } 115 116 #[derive(Clone, Debug)] 117 enum ElabStackEntry { 118 /// Next action is to resolve this value into an elaborated inst 119 /// (placed into the layout) that produces the value, and 120 /// recursively elaborate the insts that produce its args. 121 /// 122 /// Any inserted ops should be inserted before `before`, which is 123 /// the instruction demanding this value. 124 Start { value: Value, before: Inst }, 125 /// Args have been pushed; waiting for results. 126 PendingInst { 127 inst: Inst, 128 result_idx: usize, 129 num_args: usize, 130 before: Inst, 131 }, 132 } 133 134 #[derive(Clone, Debug)] 135 enum BlockStackEntry { 136 Elaborate { block: Block, idom: Option<Block> }, 137 Pop, 138 } 139 140 impl<'a> Elaborator<'a> { new( func: &'a mut Function, domtree: &'a DominatorTree, loop_analysis: &'a LoopAnalysis, remat_values: &'a FxHashSet<Value>, stats: &'a mut Stats, ctrl_plane: &'a mut ControlPlane, ) -> Self141 pub(crate) fn new( 142 func: &'a mut Function, 143 domtree: &'a DominatorTree, 144 loop_analysis: &'a LoopAnalysis, 145 remat_values: &'a FxHashSet<Value>, 146 stats: &'a mut Stats, 147 ctrl_plane: &'a mut ControlPlane, 148 ) -> Self { 149 let num_values = func.dfg.num_values(); 150 let mut value_to_best_value = 151 SecondaryMap::with_default(BestEntry(Cost::infinity(), Value::reserved_value())); 152 value_to_best_value.resize(num_values); 153 Self { 154 func, 155 domtree, 156 loop_analysis, 157 value_to_elaborated_value: ScopedHashMap::with_capacity(num_values), 158 value_to_best_value, 159 loop_stack: smallvec![], 160 cur_block: Block::reserved_value(), 161 remat_values, 162 elab_stack: vec![], 163 elab_result_stack: vec![], 164 block_stack: vec![], 165 remat_copies: FxHashMap::default(), 166 stats, 167 ctrl_plane, 168 } 169 } 170 start_block(&mut self, idom: Option<Block>, block: Block)171 fn start_block(&mut self, idom: Option<Block>, block: Block) { 172 trace!( 173 "start_block: block {:?} with idom {:?} at loop depth {:?} scope depth {}", 174 block, 175 idom, 176 self.loop_stack.len(), 177 self.value_to_elaborated_value.depth() 178 ); 179 180 // Pop any loop levels we're no longer in. 181 while let Some(inner_loop) = self.loop_stack.last() { 182 if self.loop_analysis.is_in_loop(block, inner_loop.lp) { 183 break; 184 } 185 self.loop_stack.pop(); 186 } 187 188 // Note that if the *entry* block is a loop header, we will 189 // not make note of the loop here because it will not have an 190 // immediate dominator. We must disallow this case because we 191 // will skip adding the `LoopStackEntry` here but our 192 // `LoopAnalysis` will otherwise still make note of this loop 193 // and loop depths will not match. 194 if let Some(idom) = idom { 195 if let Some(lp) = self.loop_analysis.is_loop_header(block) { 196 self.loop_stack.push(LoopStackEntry { 197 lp, 198 // Any code hoisted out of this loop will have code 199 // placed in `idom`, and will have def mappings 200 // inserted in to the scoped hashmap at that block's 201 // level. 202 hoist_block: idom, 203 scope_depth: (self.value_to_elaborated_value.depth() - 1) as u32, 204 }); 205 trace!( 206 " -> loop header, pushing; depth now {}", 207 self.loop_stack.len() 208 ); 209 } 210 } else { 211 debug_assert!( 212 self.loop_analysis.is_loop_header(block).is_none(), 213 "Entry block (domtree root) cannot be a loop header!" 214 ); 215 } 216 217 trace!("block {}: loop stack is {:?}", block, self.loop_stack); 218 219 self.cur_block = block; 220 } 221 topo_sorted_values(&self) -> Vec<Value>222 fn topo_sorted_values(&self) -> Vec<Value> { 223 #[derive(Debug)] 224 enum Event { 225 Enter, 226 Exit, 227 } 228 let mut stack = Vec::<(Event, Value)>::new(); 229 230 // Traverse the CFG in pre-order so that, when we look at the 231 // instructions and operands inside each block, we see value defs before 232 // uses. 233 for block in crate::traversals::Dfs::new().pre_order_iter(&self.func) { 234 for inst in self.func.layout.block_insts(block) { 235 stack.extend(self.func.dfg.inst_values(inst).map(|v| (Event::Enter, v))); 236 } 237 } 238 239 // We pushed in the desired order, so popping would implicitly reverse 240 // that. Avoid that by reversing the initial stack before we start 241 // traversing the DFG. 242 stack.reverse(); 243 244 let mut sorted = Vec::with_capacity(self.func.dfg.values().len()); 245 let mut seen = EntitySet::<Value>::with_capacity(self.func.dfg.values().len()); 246 247 // Post-order traversal of the DFG, visiting value defs before uses. 248 while let Some((event, value)) = stack.pop() { 249 match event { 250 Event::Enter => { 251 if seen.insert(value) { 252 stack.push((Event::Exit, value)); 253 match self.func.dfg.value_def(value) { 254 ValueDef::Result(inst, _) => { 255 stack.extend( 256 self.func 257 .dfg 258 .inst_values(inst) 259 .rev() 260 .filter(|v| !seen.contains(*v)) 261 .map(|v| (Event::Enter, v)), 262 ); 263 } 264 ValueDef::Union(a, b) => { 265 if !seen.contains(b) { 266 stack.push((Event::Enter, b)); 267 } 268 if !seen.contains(a) { 269 stack.push((Event::Enter, a)); 270 } 271 } 272 ValueDef::Param(..) => {} 273 } 274 } 275 } 276 Event::Exit => { 277 sorted.push(value); 278 } 279 } 280 } 281 282 sorted 283 } 284 compute_best_values(&mut self)285 fn compute_best_values(&mut self) { 286 let sorted_values = self.topo_sorted_values(); 287 288 let best = &mut self.value_to_best_value; 289 290 // We can't make random decisions inside the fixpoint loop below because 291 // that could cause values to change on every iteration of the loop, 292 // which would make the loop never terminate. So in chaos testing 293 // mode we need a form of making suboptimal decisions that is fully 294 // deterministic. We choose to simply make the worst decision we know 295 // how to do instead of the best. 296 let use_worst = self.ctrl_plane.get_decision(); 297 298 trace!( 299 "Computing the {} values for each eclass", 300 if use_worst { 301 "worst (chaos mode)" 302 } else { 303 "best" 304 } 305 ); 306 307 // Because the values are topologically sorted, we know that we will see 308 // defs before uses, so an instruction's operands' costs will already be 309 // computed by the time we are computing the cost for the current value 310 // and its instruction. 311 for value in sorted_values.iter().copied() { 312 let def = self.func.dfg.value_def(value); 313 trace!("computing best for value {:?} def {:?}", value, def); 314 315 match def { 316 // Pick the best of the two options based on min-cost. This 317 // works because each element of `best` is a `(cost, value)` 318 // tuple; `cost` comes first so the natural comparison works 319 // based on cost, and breaks ties based on value number. 320 ValueDef::Union(x, y) => { 321 debug_assert!(!best[x].1.is_reserved_value()); 322 debug_assert!(!best[y].1.is_reserved_value()); 323 best[value] = if use_worst { 324 core::cmp::max(best[x], best[y]) 325 } else { 326 core::cmp::min(best[x], best[y]) 327 }; 328 trace!( 329 " -> best of union({:?}, {:?}) = {:?}", 330 best[x], best[y], best[value] 331 ); 332 } 333 334 ValueDef::Param(_, _) => { 335 best[value] = BestEntry(Cost::zero(), value); 336 } 337 338 // If the Inst is inserted into the layout (which is, 339 // at this point, only the side-effecting skeleton), 340 // then it must be computed and thus we give it zero 341 // cost. 342 ValueDef::Result(inst, _) => { 343 if let Some(_) = self.func.layout.inst_block(inst) { 344 best[value] = BestEntry(Cost::zero(), value); 345 } else { 346 let inst_data = &self.func.dfg.insts[inst]; 347 // N.B.: at this point we know that the opcode is 348 // pure, so `pure_op_cost`'s precondition is 349 // satisfied. 350 let cost = Cost::of_pure_op( 351 inst_data.opcode(), 352 self.func.dfg.inst_values(inst).map(|value| { 353 debug_assert!(!best[value].1.is_reserved_value()); 354 best[value].0 355 }), 356 ); 357 best[value] = BestEntry(cost, value); 358 trace!(" -> cost of value {} = {:?}", value, cost); 359 } 360 } 361 }; 362 363 // You might be expecting an assert that the best cost we just 364 // computed is not infinity, however infinite cost *can* happen in 365 // practice. First, note that our cost function doesn't know about 366 // any shared structure in the dataflow graph, it only sums operand 367 // costs. (And trying to avoid that by deduping a single operation's 368 // operands is a losing game because you can always just add one 369 // indirection and go from `add(x, x)` to `add(foo(x), bar(x))` to 370 // hide the shared structure.) Given that blindness to sharing, we 371 // can make cost grow exponentially with a linear sequence of 372 // operations: 373 // 374 // v0 = iconst.i32 1 ;; cost = 1 375 // v1 = iadd v0, v0 ;; cost = 3 + 1 + 1 376 // v2 = iadd v1, v1 ;; cost = 3 + 5 + 5 377 // v3 = iadd v2, v2 ;; cost = 3 + 13 + 13 378 // v4 = iadd v3, v3 ;; cost = 3 + 29 + 29 379 // v5 = iadd v4, v4 ;; cost = 3 + 61 + 61 380 // v6 = iadd v5, v5 ;; cost = 3 + 125 + 125 381 // ;; etc... 382 // 383 // Such a chain can cause cost to saturate to infinity. How do we 384 // choose which e-node is best when there are multiple that have 385 // saturated to infinity? It doesn't matter. As long as invariant 386 // (2) for optimization rules is upheld by our rule set (see 387 // `cranelift/codegen/src/opts/README.md`) it is safe to choose 388 // *any* e-node in the e-class. At worst we will produce suboptimal 389 // code, but never an incorrectness. 390 } 391 } 392 393 /// Elaborate use of an eclass, inserting any needed new 394 /// instructions before the given inst `before`. Should only be 395 /// given values corresponding to results of instructions or 396 /// blockparams. elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue397 fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue { 398 debug_assert_ne!(value, Value::reserved_value()); 399 400 // Kick off the process by requesting this result 401 // value. 402 self.elab_stack 403 .push(ElabStackEntry::Start { value, before }); 404 405 // Now run the explicit-stack recursion until we reach 406 // the root. 407 self.process_elab_stack(); 408 debug_assert_eq!(self.elab_result_stack.len(), 1); 409 self.elab_result_stack.pop().unwrap() 410 } 411 412 /// Possibly rematerialize the instruction producing the value in 413 /// `arg` and rewrite `arg` to refer to it, if needed. Returns 414 /// `true` if a rewrite occurred. maybe_remat_arg( remat_values: &FxHashSet<Value>, func: &mut Function, remat_copies: &mut FxHashMap<(Block, Value), Value>, insert_block: Block, before: Inst, arg: &mut ElaboratedValue, stats: &mut Stats, ) -> bool415 fn maybe_remat_arg( 416 remat_values: &FxHashSet<Value>, 417 func: &mut Function, 418 remat_copies: &mut FxHashMap<(Block, Value), Value>, 419 insert_block: Block, 420 before: Inst, 421 arg: &mut ElaboratedValue, 422 stats: &mut Stats, 423 ) -> bool { 424 // TODO (#7313): we may want to consider recursive 425 // rematerialization as well. We could process the arguments of 426 // the rematerialized instruction up to a certain depth. This 427 // would affect, e.g., adds-with-one-constant-arg, which are 428 // currently rematerialized. Right now we don't do this, to 429 // avoid the need for another fixpoint loop here. 430 if arg.in_block != insert_block && remat_values.contains(&arg.value) { 431 let new_value = match remat_copies.entry((insert_block, arg.value)) { 432 HashEntry::Occupied(o) => *o.get(), 433 HashEntry::Vacant(v) => { 434 let inst = func.dfg.value_def(arg.value).inst().unwrap(); 435 debug_assert_eq!(func.dfg.inst_results(inst).len(), 1); 436 let new_inst = func.dfg.clone_inst(inst); 437 func.layout.insert_inst(new_inst, before); 438 let new_result = func.dfg.inst_results(new_inst)[0]; 439 *v.insert(new_result) 440 } 441 }; 442 trace!("rematerialized {} as {}", arg.value, new_value); 443 arg.value = new_value; 444 stats.elaborate_remat += 1; 445 true 446 } else { 447 false 448 } 449 } 450 process_elab_stack(&mut self)451 fn process_elab_stack(&mut self) { 452 while let Some(entry) = self.elab_stack.pop() { 453 match entry { 454 ElabStackEntry::Start { value, before } => { 455 debug_assert!(self.func.dfg.value_is_real(value)); 456 457 self.stats.elaborate_visit_node += 1; 458 459 // Get the best option; we use `value` (latest 460 // value) here so we have a full view of the 461 // eclass. 462 trace!("looking up best value for {}", value); 463 let BestEntry(_, best_value) = self.value_to_best_value[value]; 464 trace!("elaborate: value {} -> best {}", value, best_value); 465 debug_assert_ne!(best_value, Value::reserved_value()); 466 467 if let Some(elab_val) = 468 self.value_to_elaborated_value.get(&NullCtx, &best_value) 469 { 470 // Value is available; use it. 471 trace!("elaborate: value {} -> {:?}", value, elab_val); 472 self.stats.elaborate_memoize_hit += 1; 473 self.elab_result_stack.push(*elab_val); 474 continue; 475 } 476 477 self.stats.elaborate_memoize_miss += 1; 478 479 // Now resolve the value to its definition to see 480 // how we can compute it. 481 let (inst, result_idx) = match self.func.dfg.value_def(best_value) { 482 ValueDef::Result(inst, result_idx) => { 483 trace!( 484 " -> value {} is result {} of {}", 485 best_value, result_idx, inst 486 ); 487 (inst, result_idx) 488 } 489 ValueDef::Param(in_block, _) => { 490 // We don't need to do anything to compute 491 // this value; just push its result on the 492 // result stack (blockparams are already 493 // available). 494 trace!(" -> value {} is a blockparam", best_value); 495 self.elab_result_stack.push(ElaboratedValue { 496 in_block, 497 value: best_value, 498 }); 499 continue; 500 } 501 ValueDef::Union(_, _) => { 502 panic!("Should never have a Union value as the best value"); 503 } 504 }; 505 506 trace!( 507 " -> result {} of inst {:?}", 508 result_idx, self.func.dfg.insts[inst] 509 ); 510 511 // We're going to need to use this instruction 512 // result, placing the instruction into the 513 // layout. First, enqueue all args to be 514 // elaborated. Push state to receive the results 515 // and later elab this inst. 516 let num_args = self.func.dfg.inst_values(inst).count(); 517 self.elab_stack.push(ElabStackEntry::PendingInst { 518 inst, 519 result_idx, 520 num_args, 521 before, 522 }); 523 524 // Push args in reverse order so we process the 525 // first arg first. 526 for arg in self.func.dfg.inst_values(inst).rev() { 527 debug_assert_ne!(arg, Value::reserved_value()); 528 self.elab_stack 529 .push(ElabStackEntry::Start { value: arg, before }); 530 } 531 } 532 533 ElabStackEntry::PendingInst { 534 inst, 535 result_idx, 536 num_args, 537 before, 538 } => { 539 trace!( 540 "PendingInst: {} result {} args {} before {}", 541 inst, result_idx, num_args, before 542 ); 543 544 // We should have all args resolved at this 545 // point. Grab them and drain them out, removing 546 // them. 547 let arg_idx = self.elab_result_stack.len() - num_args; 548 let arg_values = &mut self.elab_result_stack[arg_idx..]; 549 550 // Compute max loop depth. 551 // 552 // Note that if there are no arguments then this instruction 553 // is allowed to get hoisted up one loop. This is not 554 // usually used since no-argument values are things like 555 // constants which are typically rematerialized, but for the 556 // `vconst` instruction 128-bit constants aren't as easily 557 // rematerialized. They're hoisted out of inner loops but 558 // not to the function entry which may run the risk of 559 // placing too much register pressure on the entire 560 // function. This is modeled with the `.saturating_sub(1)` 561 // as the default if there's otherwise no maximum. 562 let loop_hoist_level = arg_values 563 .iter() 564 .map(|&value| { 565 // Find the outermost loop level at which 566 // the value's defining block *is not* a 567 // member. This is the loop-nest level 568 // whose hoist-block we hoist to. 569 let hoist_level = self 570 .loop_stack 571 .iter() 572 .position(|loop_entry| { 573 !self.loop_analysis.is_in_loop(value.in_block, loop_entry.lp) 574 }) 575 .unwrap_or(self.loop_stack.len()); 576 trace!( 577 " -> arg: elab_value {:?} hoist level {:?}", 578 value, hoist_level 579 ); 580 hoist_level 581 }) 582 .max() 583 .unwrap_or(self.loop_stack.len().saturating_sub(1)); 584 trace!( 585 " -> loop hoist level: {:?}; cur loop depth: {:?}, loop_stack: {:?}", 586 loop_hoist_level, 587 self.loop_stack.len(), 588 self.loop_stack, 589 ); 590 591 // We know that this is a pure inst, because 592 // non-pure roots have already been placed in the 593 // value-to-elab'd-value map, so they will not 594 // reach this stage of processing. 595 // 596 // We now must determine the location at which we 597 // place the instruction. This is the current 598 // block *unless* we hoist above a loop when all 599 // args are loop-invariant (and this op is pure). 600 let (scope_depth, before, insert_block) = if loop_hoist_level 601 == self.loop_stack.len() 602 { 603 // Depends on some value at the current 604 // loop depth, or remat forces it here: 605 // place it at the current location. 606 ( 607 self.value_to_elaborated_value.depth(), 608 before, 609 self.func.layout.inst_block(before).unwrap(), 610 ) 611 } else { 612 // Does not depend on any args at current 613 // loop depth: hoist out of loop. 614 self.stats.elaborate_licm_hoist += 1; 615 let data = &self.loop_stack[loop_hoist_level]; 616 // `data.hoist_block` should dominate `before`'s block. 617 let before_block = self.func.layout.inst_block(before).unwrap(); 618 debug_assert!(self.domtree.block_dominates(data.hoist_block, before_block)); 619 // Determine the instruction at which we 620 // insert in `data.hoist_block`. 621 let before = self.func.layout.last_inst(data.hoist_block).unwrap(); 622 (data.scope_depth as usize, before, data.hoist_block) 623 }; 624 625 trace!( 626 " -> decided to place: before {} insert_block {}", 627 before, insert_block 628 ); 629 630 // Now that we have the location for the 631 // instruction, check if any of its args are remat 632 // values. If so, and if we don't have a copy of 633 // the rematerializing instruction for this block 634 // yet, create one. 635 let mut remat_arg = false; 636 for arg_value in arg_values.iter_mut() { 637 if Self::maybe_remat_arg( 638 &self.remat_values, 639 &mut self.func, 640 &mut self.remat_copies, 641 insert_block, 642 before, 643 arg_value, 644 &mut self.stats, 645 ) { 646 remat_arg = true; 647 } 648 } 649 650 // Now we need to place `inst` at the computed 651 // location (just before `before`). Note that 652 // `inst` may already have been placed somewhere 653 // else, because a pure node may be elaborated at 654 // more than one place. In this case, we need to 655 // duplicate the instruction (and return the 656 // `Value`s for that duplicated instance instead). 657 // 658 // Also clone if we rematerialized, because we 659 // don't want to rewrite the args in the original 660 // copy. 661 trace!("need inst {} before {}", inst, before); 662 let inst = if self.func.layout.inst_block(inst).is_some() || remat_arg { 663 // Clone the inst! 664 let new_inst = self.func.dfg.clone_inst(inst); 665 trace!( 666 " -> inst {} already has a location; cloned to {}", 667 inst, new_inst 668 ); 669 // Create mappings in the 670 // value-to-elab'd-value map from original 671 // results to cloned results. 672 for (&result, &new_result) in self 673 .func 674 .dfg 675 .inst_results(inst) 676 .iter() 677 .zip(self.func.dfg.inst_results(new_inst).iter()) 678 { 679 let elab_value = ElaboratedValue { 680 value: new_result, 681 in_block: insert_block, 682 }; 683 let best_result = self.value_to_best_value[result]; 684 self.value_to_elaborated_value.insert_if_absent_with_depth( 685 &NullCtx, 686 best_result.1, 687 elab_value, 688 scope_depth, 689 ); 690 691 self.value_to_best_value[new_result] = best_result; 692 693 trace!( 694 " -> cloned inst has new result {} for orig {}", 695 new_result, result 696 ); 697 } 698 new_inst 699 } else { 700 trace!(" -> no location; using original inst"); 701 // Create identity mappings from result values 702 // to themselves in this scope, since we're 703 // using the original inst. 704 for &result in self.func.dfg.inst_results(inst) { 705 let elab_value = ElaboratedValue { 706 value: result, 707 in_block: insert_block, 708 }; 709 let best_result = self.value_to_best_value[result]; 710 self.value_to_elaborated_value.insert_if_absent_with_depth( 711 &NullCtx, 712 best_result.1, 713 elab_value, 714 scope_depth, 715 ); 716 trace!(" -> inserting identity mapping for {}", result); 717 } 718 inst 719 }; 720 721 // Place the inst just before `before`. 722 assert!( 723 is_pure_for_egraph(self.func, inst), 724 "something has gone very wrong if we are elaborating effectful \ 725 instructions, they should have remained in the skeleton" 726 ); 727 self.func.layout.insert_inst(inst, before); 728 729 // Update the inst's arguments. 730 self.func 731 .dfg 732 .overwrite_inst_values(inst, arg_values.into_iter().map(|ev| ev.value)); 733 734 // Now that we've consumed the arg values, pop 735 // them off the stack. 736 self.elab_result_stack.truncate(arg_idx); 737 738 // Push the requested result index of the 739 // instruction onto the elab-results stack. 740 self.elab_result_stack.push(ElaboratedValue { 741 in_block: insert_block, 742 value: self.func.dfg.inst_results(inst)[result_idx], 743 }); 744 } 745 } 746 } 747 } 748 elaborate_block(&mut self, elab_values: &mut Vec<Value>, idom: Option<Block>, block: Block)749 fn elaborate_block(&mut self, elab_values: &mut Vec<Value>, idom: Option<Block>, block: Block) { 750 trace!("elaborate_block: block {}", block); 751 self.start_block(idom, block); 752 753 // Iterate over the side-effecting skeleton using the linked 754 // list in Layout. We will insert instructions that are 755 // elaborated *before* `inst`, so we can always use its 756 // next-link to continue the iteration. 757 let mut next_inst = self.func.layout.first_inst(block); 758 let mut first_branch = None; 759 while let Some(inst) = next_inst { 760 trace!( 761 "elaborating inst {} with results {:?}", 762 inst, 763 self.func.dfg.inst_results(inst) 764 ); 765 // Record the first branch we see in the block; all 766 // elaboration for args of *any* branch must be inserted 767 // before the *first* branch, because the branch group 768 // must remain contiguous at the end of the block. 769 if self.func.dfg.insts[inst].opcode().is_branch() && first_branch == None { 770 first_branch = Some(inst); 771 } 772 773 // Determine where elaboration inserts insts. 774 let before = first_branch.unwrap_or(inst); 775 trace!(" -> inserting before {}", before); 776 777 elab_values.extend(self.func.dfg.inst_values(inst)); 778 for arg in elab_values.iter_mut() { 779 trace!(" -> arg {}", *arg); 780 // Elaborate the arg, placing any newly-inserted insts 781 // before `before`. Get the updated value, which may 782 // be different than the original. 783 let mut new_arg = self.elaborate_eclass_use(*arg, before); 784 Self::maybe_remat_arg( 785 &self.remat_values, 786 &mut self.func, 787 &mut self.remat_copies, 788 block, 789 inst, 790 &mut new_arg, 791 &mut self.stats, 792 ); 793 trace!(" -> rewrote arg to {:?}", new_arg); 794 *arg = new_arg.value; 795 } 796 self.func 797 .dfg 798 .overwrite_inst_values(inst, elab_values.drain(..)); 799 800 // We need to put the results of this instruction in the 801 // map now. 802 for &result in self.func.dfg.inst_results(inst) { 803 trace!(" -> result {}", result); 804 let best_result = self.value_to_best_value[result]; 805 self.value_to_elaborated_value.insert_if_absent( 806 &NullCtx, 807 best_result.1, 808 ElaboratedValue { 809 in_block: block, 810 value: result, 811 }, 812 ); 813 } 814 815 next_inst = self.func.layout.next_inst(inst); 816 } 817 } 818 elaborate_domtree(&mut self, domtree: &DominatorTree)819 fn elaborate_domtree(&mut self, domtree: &DominatorTree) { 820 self.block_stack.push(BlockStackEntry::Elaborate { 821 block: self.func.layout.entry_block().unwrap(), 822 idom: None, 823 }); 824 825 // A temporary workspace for elaborate_block, allocated here to maximize the use of the 826 // allocation. 827 let mut elab_values = Vec::new(); 828 829 while let Some(top) = self.block_stack.pop() { 830 match top { 831 BlockStackEntry::Elaborate { block, idom } => { 832 self.block_stack.push(BlockStackEntry::Pop); 833 self.value_to_elaborated_value.increment_depth(); 834 835 self.elaborate_block(&mut elab_values, idom, block); 836 837 // Push children. We are doing a preorder 838 // traversal so we do this after processing this 839 // block above. 840 let block_stack_end = self.block_stack.len(); 841 for child in self.ctrl_plane.shuffled(domtree.children(block)) { 842 self.block_stack.push(BlockStackEntry::Elaborate { 843 block: child, 844 idom: Some(block), 845 }); 846 } 847 // Reverse what we just pushed so we elaborate in 848 // original block order. (The domtree iter is a 849 // single-ended iter over a singly-linked list so 850 // we can't `.rev()` above.) 851 self.block_stack[block_stack_end..].reverse(); 852 } 853 BlockStackEntry::Pop => { 854 self.value_to_elaborated_value.decrement_depth(); 855 } 856 } 857 } 858 } 859 elaborate(&mut self)860 pub(crate) fn elaborate(&mut self) { 861 self.stats.elaborate_func += 1; 862 self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64; 863 self.compute_best_values(); 864 self.elaborate_domtree(&self.domtree); 865 self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64; 866 } 867 } 868