1 //! Elaboration phase: lowers EGraph back to sequences of operations
2 //! in CFG nodes.
3 
4 use super::Stats;
5 use super::cost::Cost;
6 use crate::ctxhash::NullCtx;
7 use crate::dominator_tree::DominatorTree;
8 use crate::hash_map::Entry as HashEntry;
9 use crate::inst_predicates::is_pure_for_egraph;
10 use crate::ir::{Block, Function, Inst, Value, ValueDef};
11 use crate::loop_analysis::{Loop, LoopAnalysis};
12 use crate::scoped_hash_map::ScopedHashMap;
13 use crate::trace;
14 use crate::{FxHashMap, FxHashSet};
15 use alloc::vec::Vec;
16 use cranelift_control::ControlPlane;
17 use cranelift_entity::{EntitySet, SecondaryMap, packed_option::ReservedValue};
18 use smallvec::{SmallVec, smallvec};
19 
20 pub(crate) struct Elaborator<'a> {
21     func: &'a mut Function,
22     domtree: &'a DominatorTree,
23     loop_analysis: &'a LoopAnalysis,
24     /// Map from Value that is produced by a pure Inst (and was thus
25     /// not in the side-effecting skeleton) to the value produced by
26     /// an elaborated inst (placed in the layout) to whose results we
27     /// refer in the final code.
28     ///
29     /// The first time we use some result of an instruction during
30     /// elaboration, we can place it and insert an identity map (inst
31     /// results to that same inst's results) in this scoped
32     /// map. Within that block and its dom-tree children, that mapping
33     /// is visible and we can continue to use it. This allows us to
34     /// avoid cloning the instruction. However, if we pop that scope
35     /// and use it somewhere else as well, we will need to
36     /// duplicate. We detect this case by checking, when a value that
37     /// we want is not present in this map, whether the producing inst
38     /// is already placed in the Layout. If so, we duplicate, and
39     /// insert non-identity mappings from the original inst's results
40     /// to the cloned inst's results.
41     ///
42     /// Note that as values may refer to unions that represent a subset
43     /// of a larger eclass, it's not valid to walk towards the root of a
44     /// union tree: doing so would potentially equate values that fall
45     /// on different branches of the dominator tree.
46     value_to_elaborated_value: ScopedHashMap<Value, ElaboratedValue>,
47     /// Map from Value to the best (lowest-cost) Value in its eclass
48     /// (tree of union value-nodes).
49     value_to_best_value: SecondaryMap<Value, BestEntry>,
50     /// Stack of blocks and loops in current elaboration path.
51     loop_stack: SmallVec<[LoopStackEntry; 8]>,
52     /// The current block into which we are elaborating.
53     cur_block: Block,
54     /// Values that opt rules have indicated should be rematerialized
55     /// in every block they are used (e.g., immediates or other
56     /// "cheap-to-compute" ops).
57     remat_values: &'a FxHashSet<Value>,
58     /// Explicitly-unrolled value elaboration stack.
59     elab_stack: Vec<ElabStackEntry>,
60     /// Results from the elab stack.
61     elab_result_stack: Vec<ElaboratedValue>,
62     /// Explicitly-unrolled block elaboration stack.
63     block_stack: Vec<BlockStackEntry>,
64     /// Copies of values that have been rematerialized.
65     remat_copies: FxHashMap<(Block, Value), Value>,
66     /// Stats for various events during egraph processing, to help
67     /// with optimization of this infrastructure.
68     stats: &'a mut Stats,
69     /// Chaos-mode control-plane so we can test that we still get
70     /// correct results when our heuristics make bad decisions.
71     ctrl_plane: &'a mut ControlPlane,
72 }
73 
74 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
75 struct BestEntry(Cost, Value);
76 
77 impl PartialOrd for BestEntry {
partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering>78     fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
79         Some(self.cmp(other))
80     }
81 }
82 
83 impl Ord for BestEntry {
84     #[inline]
cmp(&self, other: &Self) -> core::cmp::Ordering85     fn cmp(&self, other: &Self) -> core::cmp::Ordering {
86         self.0.cmp(&other.0).then_with(|| {
87             // Note that this comparison is reversed. When costs are equal,
88             // prefer the value with the bigger index. This is a heuristic that
89             // prefers results of rewrites to the original value, since we
90             // expect that our rewrites are generally improvements.
91             self.1.cmp(&other.1).reverse()
92         })
93     }
94 }
95 
96 #[derive(Clone, Copy, Debug)]
97 struct ElaboratedValue {
98     in_block: Block,
99     value: Value,
100 }
101 
102 #[derive(Clone, Debug)]
103 struct LoopStackEntry {
104     /// The loop identifier.
105     lp: Loop,
106     /// The hoist point: a block that immediately dominates this
107     /// loop. May not be an immediate predecessor, but will be a valid
108     /// point to place all loop-invariant ops: they must depend only
109     /// on inputs that dominate the loop, so are available at (the end
110     /// of) this block.
111     hoist_block: Block,
112     /// The depth in the scope map.
113     scope_depth: u32,
114 }
115 
116 #[derive(Clone, Debug)]
117 enum ElabStackEntry {
118     /// Next action is to resolve this value into an elaborated inst
119     /// (placed into the layout) that produces the value, and
120     /// recursively elaborate the insts that produce its args.
121     ///
122     /// Any inserted ops should be inserted before `before`, which is
123     /// the instruction demanding this value.
124     Start { value: Value, before: Inst },
125     /// Args have been pushed; waiting for results.
126     PendingInst {
127         inst: Inst,
128         result_idx: usize,
129         num_args: usize,
130         before: Inst,
131     },
132 }
133 
134 #[derive(Clone, Debug)]
135 enum BlockStackEntry {
136     Elaborate { block: Block, idom: Option<Block> },
137     Pop,
138 }
139 
140 impl<'a> Elaborator<'a> {
new( func: &'a mut Function, domtree: &'a DominatorTree, loop_analysis: &'a LoopAnalysis, remat_values: &'a FxHashSet<Value>, stats: &'a mut Stats, ctrl_plane: &'a mut ControlPlane, ) -> Self141     pub(crate) fn new(
142         func: &'a mut Function,
143         domtree: &'a DominatorTree,
144         loop_analysis: &'a LoopAnalysis,
145         remat_values: &'a FxHashSet<Value>,
146         stats: &'a mut Stats,
147         ctrl_plane: &'a mut ControlPlane,
148     ) -> Self {
149         let num_values = func.dfg.num_values();
150         let mut value_to_best_value =
151             SecondaryMap::with_default(BestEntry(Cost::infinity(), Value::reserved_value()));
152         value_to_best_value.resize(num_values);
153         Self {
154             func,
155             domtree,
156             loop_analysis,
157             value_to_elaborated_value: ScopedHashMap::with_capacity(num_values),
158             value_to_best_value,
159             loop_stack: smallvec![],
160             cur_block: Block::reserved_value(),
161             remat_values,
162             elab_stack: vec![],
163             elab_result_stack: vec![],
164             block_stack: vec![],
165             remat_copies: FxHashMap::default(),
166             stats,
167             ctrl_plane,
168         }
169     }
170 
start_block(&mut self, idom: Option<Block>, block: Block)171     fn start_block(&mut self, idom: Option<Block>, block: Block) {
172         trace!(
173             "start_block: block {:?} with idom {:?} at loop depth {:?} scope depth {}",
174             block,
175             idom,
176             self.loop_stack.len(),
177             self.value_to_elaborated_value.depth()
178         );
179 
180         // Pop any loop levels we're no longer in.
181         while let Some(inner_loop) = self.loop_stack.last() {
182             if self.loop_analysis.is_in_loop(block, inner_loop.lp) {
183                 break;
184             }
185             self.loop_stack.pop();
186         }
187 
188         // Note that if the *entry* block is a loop header, we will
189         // not make note of the loop here because it will not have an
190         // immediate dominator. We must disallow this case because we
191         // will skip adding the `LoopStackEntry` here but our
192         // `LoopAnalysis` will otherwise still make note of this loop
193         // and loop depths will not match.
194         if let Some(idom) = idom {
195             if let Some(lp) = self.loop_analysis.is_loop_header(block) {
196                 self.loop_stack.push(LoopStackEntry {
197                     lp,
198                     // Any code hoisted out of this loop will have code
199                     // placed in `idom`, and will have def mappings
200                     // inserted in to the scoped hashmap at that block's
201                     // level.
202                     hoist_block: idom,
203                     scope_depth: (self.value_to_elaborated_value.depth() - 1) as u32,
204                 });
205                 trace!(
206                     " -> loop header, pushing; depth now {}",
207                     self.loop_stack.len()
208                 );
209             }
210         } else {
211             debug_assert!(
212                 self.loop_analysis.is_loop_header(block).is_none(),
213                 "Entry block (domtree root) cannot be a loop header!"
214             );
215         }
216 
217         trace!("block {}: loop stack is {:?}", block, self.loop_stack);
218 
219         self.cur_block = block;
220     }
221 
topo_sorted_values(&self) -> Vec<Value>222     fn topo_sorted_values(&self) -> Vec<Value> {
223         #[derive(Debug)]
224         enum Event {
225             Enter,
226             Exit,
227         }
228         let mut stack = Vec::<(Event, Value)>::new();
229 
230         // Traverse the CFG in pre-order so that, when we look at the
231         // instructions and operands inside each block, we see value defs before
232         // uses.
233         for block in crate::traversals::Dfs::new().pre_order_iter(&self.func) {
234             for inst in self.func.layout.block_insts(block) {
235                 stack.extend(self.func.dfg.inst_values(inst).map(|v| (Event::Enter, v)));
236             }
237         }
238 
239         // We pushed in the desired order, so popping would implicitly reverse
240         // that. Avoid that by reversing the initial stack before we start
241         // traversing the DFG.
242         stack.reverse();
243 
244         let mut sorted = Vec::with_capacity(self.func.dfg.values().len());
245         let mut seen = EntitySet::<Value>::with_capacity(self.func.dfg.values().len());
246 
247         // Post-order traversal of the DFG, visiting value defs before uses.
248         while let Some((event, value)) = stack.pop() {
249             match event {
250                 Event::Enter => {
251                     if seen.insert(value) {
252                         stack.push((Event::Exit, value));
253                         match self.func.dfg.value_def(value) {
254                             ValueDef::Result(inst, _) => {
255                                 stack.extend(
256                                     self.func
257                                         .dfg
258                                         .inst_values(inst)
259                                         .rev()
260                                         .filter(|v| !seen.contains(*v))
261                                         .map(|v| (Event::Enter, v)),
262                                 );
263                             }
264                             ValueDef::Union(a, b) => {
265                                 if !seen.contains(b) {
266                                     stack.push((Event::Enter, b));
267                                 }
268                                 if !seen.contains(a) {
269                                     stack.push((Event::Enter, a));
270                                 }
271                             }
272                             ValueDef::Param(..) => {}
273                         }
274                     }
275                 }
276                 Event::Exit => {
277                     sorted.push(value);
278                 }
279             }
280         }
281 
282         sorted
283     }
284 
compute_best_values(&mut self)285     fn compute_best_values(&mut self) {
286         let sorted_values = self.topo_sorted_values();
287 
288         let best = &mut self.value_to_best_value;
289 
290         // We can't make random decisions inside the fixpoint loop below because
291         // that could cause values to change on every iteration of the loop,
292         // which would make the loop never terminate. So in chaos testing
293         // mode we need a form of making suboptimal decisions that is fully
294         // deterministic. We choose to simply make the worst decision we know
295         // how to do instead of the best.
296         let use_worst = self.ctrl_plane.get_decision();
297 
298         trace!(
299             "Computing the {} values for each eclass",
300             if use_worst {
301                 "worst (chaos mode)"
302             } else {
303                 "best"
304             }
305         );
306 
307         // Because the values are topologically sorted, we know that we will see
308         // defs before uses, so an instruction's operands' costs will already be
309         // computed by the time we are computing the cost for the current value
310         // and its instruction.
311         for value in sorted_values.iter().copied() {
312             let def = self.func.dfg.value_def(value);
313             trace!("computing best for value {:?} def {:?}", value, def);
314 
315             match def {
316                 // Pick the best of the two options based on min-cost. This
317                 // works because each element of `best` is a `(cost, value)`
318                 // tuple; `cost` comes first so the natural comparison works
319                 // based on cost, and breaks ties based on value number.
320                 ValueDef::Union(x, y) => {
321                     debug_assert!(!best[x].1.is_reserved_value());
322                     debug_assert!(!best[y].1.is_reserved_value());
323                     best[value] = if use_worst {
324                         core::cmp::max(best[x], best[y])
325                     } else {
326                         core::cmp::min(best[x], best[y])
327                     };
328                     trace!(
329                         " -> best of union({:?}, {:?}) = {:?}",
330                         best[x], best[y], best[value]
331                     );
332                 }
333 
334                 ValueDef::Param(_, _) => {
335                     best[value] = BestEntry(Cost::zero(), value);
336                 }
337 
338                 // If the Inst is inserted into the layout (which is,
339                 // at this point, only the side-effecting skeleton),
340                 // then it must be computed and thus we give it zero
341                 // cost.
342                 ValueDef::Result(inst, _) => {
343                     if let Some(_) = self.func.layout.inst_block(inst) {
344                         best[value] = BestEntry(Cost::zero(), value);
345                     } else {
346                         let inst_data = &self.func.dfg.insts[inst];
347                         // N.B.: at this point we know that the opcode is
348                         // pure, so `pure_op_cost`'s precondition is
349                         // satisfied.
350                         let cost = Cost::of_pure_op(
351                             inst_data.opcode(),
352                             self.func.dfg.inst_values(inst).map(|value| {
353                                 debug_assert!(!best[value].1.is_reserved_value());
354                                 best[value].0
355                             }),
356                         );
357                         best[value] = BestEntry(cost, value);
358                         trace!(" -> cost of value {} = {:?}", value, cost);
359                     }
360                 }
361             };
362 
363             // You might be expecting an assert that the best cost we just
364             // computed is not infinity, however infinite cost *can* happen in
365             // practice. First, note that our cost function doesn't know about
366             // any shared structure in the dataflow graph, it only sums operand
367             // costs. (And trying to avoid that by deduping a single operation's
368             // operands is a losing game because you can always just add one
369             // indirection and go from `add(x, x)` to `add(foo(x), bar(x))` to
370             // hide the shared structure.) Given that blindness to sharing, we
371             // can make cost grow exponentially with a linear sequence of
372             // operations:
373             //
374             //     v0 = iconst.i32 1    ;; cost = 1
375             //     v1 = iadd v0, v0     ;; cost = 3 + 1 + 1
376             //     v2 = iadd v1, v1     ;; cost = 3 + 5 + 5
377             //     v3 = iadd v2, v2     ;; cost = 3 + 13 + 13
378             //     v4 = iadd v3, v3     ;; cost = 3 + 29 + 29
379             //     v5 = iadd v4, v4     ;; cost = 3 + 61 + 61
380             //     v6 = iadd v5, v5     ;; cost = 3 + 125 + 125
381             //     ;; etc...
382             //
383             // Such a chain can cause cost to saturate to infinity. How do we
384             // choose which e-node is best when there are multiple that have
385             // saturated to infinity? It doesn't matter. As long as invariant
386             // (2) for optimization rules is upheld by our rule set (see
387             // `cranelift/codegen/src/opts/README.md`) it is safe to choose
388             // *any* e-node in the e-class. At worst we will produce suboptimal
389             // code, but never an incorrectness.
390         }
391     }
392 
393     /// Elaborate use of an eclass, inserting any needed new
394     /// instructions before the given inst `before`. Should only be
395     /// given values corresponding to results of instructions or
396     /// blockparams.
elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue397     fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue {
398         debug_assert_ne!(value, Value::reserved_value());
399 
400         // Kick off the process by requesting this result
401         // value.
402         self.elab_stack
403             .push(ElabStackEntry::Start { value, before });
404 
405         // Now run the explicit-stack recursion until we reach
406         // the root.
407         self.process_elab_stack();
408         debug_assert_eq!(self.elab_result_stack.len(), 1);
409         self.elab_result_stack.pop().unwrap()
410     }
411 
412     /// Possibly rematerialize the instruction producing the value in
413     /// `arg` and rewrite `arg` to refer to it, if needed. Returns
414     /// `true` if a rewrite occurred.
maybe_remat_arg( remat_values: &FxHashSet<Value>, func: &mut Function, remat_copies: &mut FxHashMap<(Block, Value), Value>, insert_block: Block, before: Inst, arg: &mut ElaboratedValue, stats: &mut Stats, ) -> bool415     fn maybe_remat_arg(
416         remat_values: &FxHashSet<Value>,
417         func: &mut Function,
418         remat_copies: &mut FxHashMap<(Block, Value), Value>,
419         insert_block: Block,
420         before: Inst,
421         arg: &mut ElaboratedValue,
422         stats: &mut Stats,
423     ) -> bool {
424         // TODO (#7313): we may want to consider recursive
425         // rematerialization as well. We could process the arguments of
426         // the rematerialized instruction up to a certain depth. This
427         // would affect, e.g., adds-with-one-constant-arg, which are
428         // currently rematerialized. Right now we don't do this, to
429         // avoid the need for another fixpoint loop here.
430         if arg.in_block != insert_block && remat_values.contains(&arg.value) {
431             let new_value = match remat_copies.entry((insert_block, arg.value)) {
432                 HashEntry::Occupied(o) => *o.get(),
433                 HashEntry::Vacant(v) => {
434                     let inst = func.dfg.value_def(arg.value).inst().unwrap();
435                     debug_assert_eq!(func.dfg.inst_results(inst).len(), 1);
436                     let new_inst = func.dfg.clone_inst(inst);
437                     func.layout.insert_inst(new_inst, before);
438                     let new_result = func.dfg.inst_results(new_inst)[0];
439                     *v.insert(new_result)
440                 }
441             };
442             trace!("rematerialized {} as {}", arg.value, new_value);
443             arg.value = new_value;
444             stats.elaborate_remat += 1;
445             true
446         } else {
447             false
448         }
449     }
450 
process_elab_stack(&mut self)451     fn process_elab_stack(&mut self) {
452         while let Some(entry) = self.elab_stack.pop() {
453             match entry {
454                 ElabStackEntry::Start { value, before } => {
455                     debug_assert!(self.func.dfg.value_is_real(value));
456 
457                     self.stats.elaborate_visit_node += 1;
458 
459                     // Get the best option; we use `value` (latest
460                     // value) here so we have a full view of the
461                     // eclass.
462                     trace!("looking up best value for {}", value);
463                     let BestEntry(_, best_value) = self.value_to_best_value[value];
464                     trace!("elaborate: value {} -> best {}", value, best_value);
465                     debug_assert_ne!(best_value, Value::reserved_value());
466 
467                     if let Some(elab_val) =
468                         self.value_to_elaborated_value.get(&NullCtx, &best_value)
469                     {
470                         // Value is available; use it.
471                         trace!("elaborate: value {} -> {:?}", value, elab_val);
472                         self.stats.elaborate_memoize_hit += 1;
473                         self.elab_result_stack.push(*elab_val);
474                         continue;
475                     }
476 
477                     self.stats.elaborate_memoize_miss += 1;
478 
479                     // Now resolve the value to its definition to see
480                     // how we can compute it.
481                     let (inst, result_idx) = match self.func.dfg.value_def(best_value) {
482                         ValueDef::Result(inst, result_idx) => {
483                             trace!(
484                                 " -> value {} is result {} of {}",
485                                 best_value, result_idx, inst
486                             );
487                             (inst, result_idx)
488                         }
489                         ValueDef::Param(in_block, _) => {
490                             // We don't need to do anything to compute
491                             // this value; just push its result on the
492                             // result stack (blockparams are already
493                             // available).
494                             trace!(" -> value {} is a blockparam", best_value);
495                             self.elab_result_stack.push(ElaboratedValue {
496                                 in_block,
497                                 value: best_value,
498                             });
499                             continue;
500                         }
501                         ValueDef::Union(_, _) => {
502                             panic!("Should never have a Union value as the best value");
503                         }
504                     };
505 
506                     trace!(
507                         " -> result {} of inst {:?}",
508                         result_idx, self.func.dfg.insts[inst]
509                     );
510 
511                     // We're going to need to use this instruction
512                     // result, placing the instruction into the
513                     // layout. First, enqueue all args to be
514                     // elaborated. Push state to receive the results
515                     // and later elab this inst.
516                     let num_args = self.func.dfg.inst_values(inst).count();
517                     self.elab_stack.push(ElabStackEntry::PendingInst {
518                         inst,
519                         result_idx,
520                         num_args,
521                         before,
522                     });
523 
524                     // Push args in reverse order so we process the
525                     // first arg first.
526                     for arg in self.func.dfg.inst_values(inst).rev() {
527                         debug_assert_ne!(arg, Value::reserved_value());
528                         self.elab_stack
529                             .push(ElabStackEntry::Start { value: arg, before });
530                     }
531                 }
532 
533                 ElabStackEntry::PendingInst {
534                     inst,
535                     result_idx,
536                     num_args,
537                     before,
538                 } => {
539                     trace!(
540                         "PendingInst: {} result {} args {} before {}",
541                         inst, result_idx, num_args, before
542                     );
543 
544                     // We should have all args resolved at this
545                     // point. Grab them and drain them out, removing
546                     // them.
547                     let arg_idx = self.elab_result_stack.len() - num_args;
548                     let arg_values = &mut self.elab_result_stack[arg_idx..];
549 
550                     // Compute max loop depth.
551                     //
552                     // Note that if there are no arguments then this instruction
553                     // is allowed to get hoisted up one loop. This is not
554                     // usually used since no-argument values are things like
555                     // constants which are typically rematerialized, but for the
556                     // `vconst` instruction 128-bit constants aren't as easily
557                     // rematerialized. They're hoisted out of inner loops but
558                     // not to the function entry which may run the risk of
559                     // placing too much register pressure on the entire
560                     // function. This is modeled with the `.saturating_sub(1)`
561                     // as the default if there's otherwise no maximum.
562                     let loop_hoist_level = arg_values
563                         .iter()
564                         .map(|&value| {
565                             // Find the outermost loop level at which
566                             // the value's defining block *is not* a
567                             // member. This is the loop-nest level
568                             // whose hoist-block we hoist to.
569                             let hoist_level = self
570                                 .loop_stack
571                                 .iter()
572                                 .position(|loop_entry| {
573                                     !self.loop_analysis.is_in_loop(value.in_block, loop_entry.lp)
574                                 })
575                                 .unwrap_or(self.loop_stack.len());
576                             trace!(
577                                 " -> arg: elab_value {:?} hoist level {:?}",
578                                 value, hoist_level
579                             );
580                             hoist_level
581                         })
582                         .max()
583                         .unwrap_or(self.loop_stack.len().saturating_sub(1));
584                     trace!(
585                         " -> loop hoist level: {:?}; cur loop depth: {:?}, loop_stack: {:?}",
586                         loop_hoist_level,
587                         self.loop_stack.len(),
588                         self.loop_stack,
589                     );
590 
591                     // We know that this is a pure inst, because
592                     // non-pure roots have already been placed in the
593                     // value-to-elab'd-value map, so they will not
594                     // reach this stage of processing.
595                     //
596                     // We now must determine the location at which we
597                     // place the instruction. This is the current
598                     // block *unless* we hoist above a loop when all
599                     // args are loop-invariant (and this op is pure).
600                     let (scope_depth, before, insert_block) = if loop_hoist_level
601                         == self.loop_stack.len()
602                     {
603                         // Depends on some value at the current
604                         // loop depth, or remat forces it here:
605                         // place it at the current location.
606                         (
607                             self.value_to_elaborated_value.depth(),
608                             before,
609                             self.func.layout.inst_block(before).unwrap(),
610                         )
611                     } else {
612                         // Does not depend on any args at current
613                         // loop depth: hoist out of loop.
614                         self.stats.elaborate_licm_hoist += 1;
615                         let data = &self.loop_stack[loop_hoist_level];
616                         // `data.hoist_block` should dominate `before`'s block.
617                         let before_block = self.func.layout.inst_block(before).unwrap();
618                         debug_assert!(self.domtree.block_dominates(data.hoist_block, before_block));
619                         // Determine the instruction at which we
620                         // insert in `data.hoist_block`.
621                         let before = self.func.layout.last_inst(data.hoist_block).unwrap();
622                         (data.scope_depth as usize, before, data.hoist_block)
623                     };
624 
625                     trace!(
626                         " -> decided to place: before {} insert_block {}",
627                         before, insert_block
628                     );
629 
630                     // Now that we have the location for the
631                     // instruction, check if any of its args are remat
632                     // values. If so, and if we don't have a copy of
633                     // the rematerializing instruction for this block
634                     // yet, create one.
635                     let mut remat_arg = false;
636                     for arg_value in arg_values.iter_mut() {
637                         if Self::maybe_remat_arg(
638                             &self.remat_values,
639                             &mut self.func,
640                             &mut self.remat_copies,
641                             insert_block,
642                             before,
643                             arg_value,
644                             &mut self.stats,
645                         ) {
646                             remat_arg = true;
647                         }
648                     }
649 
650                     // Now we need to place `inst` at the computed
651                     // location (just before `before`). Note that
652                     // `inst` may already have been placed somewhere
653                     // else, because a pure node may be elaborated at
654                     // more than one place. In this case, we need to
655                     // duplicate the instruction (and return the
656                     // `Value`s for that duplicated instance instead).
657                     //
658                     // Also clone if we rematerialized, because we
659                     // don't want to rewrite the args in the original
660                     // copy.
661                     trace!("need inst {} before {}", inst, before);
662                     let inst = if self.func.layout.inst_block(inst).is_some() || remat_arg {
663                         // Clone the inst!
664                         let new_inst = self.func.dfg.clone_inst(inst);
665                         trace!(
666                             " -> inst {} already has a location; cloned to {}",
667                             inst, new_inst
668                         );
669                         // Create mappings in the
670                         // value-to-elab'd-value map from original
671                         // results to cloned results.
672                         for (&result, &new_result) in self
673                             .func
674                             .dfg
675                             .inst_results(inst)
676                             .iter()
677                             .zip(self.func.dfg.inst_results(new_inst).iter())
678                         {
679                             let elab_value = ElaboratedValue {
680                                 value: new_result,
681                                 in_block: insert_block,
682                             };
683                             let best_result = self.value_to_best_value[result];
684                             self.value_to_elaborated_value.insert_if_absent_with_depth(
685                                 &NullCtx,
686                                 best_result.1,
687                                 elab_value,
688                                 scope_depth,
689                             );
690 
691                             self.value_to_best_value[new_result] = best_result;
692 
693                             trace!(
694                                 " -> cloned inst has new result {} for orig {}",
695                                 new_result, result
696                             );
697                         }
698                         new_inst
699                     } else {
700                         trace!(" -> no location; using original inst");
701                         // Create identity mappings from result values
702                         // to themselves in this scope, since we're
703                         // using the original inst.
704                         for &result in self.func.dfg.inst_results(inst) {
705                             let elab_value = ElaboratedValue {
706                                 value: result,
707                                 in_block: insert_block,
708                             };
709                             let best_result = self.value_to_best_value[result];
710                             self.value_to_elaborated_value.insert_if_absent_with_depth(
711                                 &NullCtx,
712                                 best_result.1,
713                                 elab_value,
714                                 scope_depth,
715                             );
716                             trace!(" -> inserting identity mapping for {}", result);
717                         }
718                         inst
719                     };
720 
721                     // Place the inst just before `before`.
722                     assert!(
723                         is_pure_for_egraph(self.func, inst),
724                         "something has gone very wrong if we are elaborating effectful \
725                          instructions, they should have remained in the skeleton"
726                     );
727                     self.func.layout.insert_inst(inst, before);
728 
729                     // Update the inst's arguments.
730                     self.func
731                         .dfg
732                         .overwrite_inst_values(inst, arg_values.into_iter().map(|ev| ev.value));
733 
734                     // Now that we've consumed the arg values, pop
735                     // them off the stack.
736                     self.elab_result_stack.truncate(arg_idx);
737 
738                     // Push the requested result index of the
739                     // instruction onto the elab-results stack.
740                     self.elab_result_stack.push(ElaboratedValue {
741                         in_block: insert_block,
742                         value: self.func.dfg.inst_results(inst)[result_idx],
743                     });
744                 }
745             }
746         }
747     }
748 
elaborate_block(&mut self, elab_values: &mut Vec<Value>, idom: Option<Block>, block: Block)749     fn elaborate_block(&mut self, elab_values: &mut Vec<Value>, idom: Option<Block>, block: Block) {
750         trace!("elaborate_block: block {}", block);
751         self.start_block(idom, block);
752 
753         // Iterate over the side-effecting skeleton using the linked
754         // list in Layout. We will insert instructions that are
755         // elaborated *before* `inst`, so we can always use its
756         // next-link to continue the iteration.
757         let mut next_inst = self.func.layout.first_inst(block);
758         let mut first_branch = None;
759         while let Some(inst) = next_inst {
760             trace!(
761                 "elaborating inst {} with results {:?}",
762                 inst,
763                 self.func.dfg.inst_results(inst)
764             );
765             // Record the first branch we see in the block; all
766             // elaboration for args of *any* branch must be inserted
767             // before the *first* branch, because the branch group
768             // must remain contiguous at the end of the block.
769             if self.func.dfg.insts[inst].opcode().is_branch() && first_branch == None {
770                 first_branch = Some(inst);
771             }
772 
773             // Determine where elaboration inserts insts.
774             let before = first_branch.unwrap_or(inst);
775             trace!(" -> inserting before {}", before);
776 
777             elab_values.extend(self.func.dfg.inst_values(inst));
778             for arg in elab_values.iter_mut() {
779                 trace!(" -> arg {}", *arg);
780                 // Elaborate the arg, placing any newly-inserted insts
781                 // before `before`. Get the updated value, which may
782                 // be different than the original.
783                 let mut new_arg = self.elaborate_eclass_use(*arg, before);
784                 Self::maybe_remat_arg(
785                     &self.remat_values,
786                     &mut self.func,
787                     &mut self.remat_copies,
788                     block,
789                     inst,
790                     &mut new_arg,
791                     &mut self.stats,
792                 );
793                 trace!("   -> rewrote arg to {:?}", new_arg);
794                 *arg = new_arg.value;
795             }
796             self.func
797                 .dfg
798                 .overwrite_inst_values(inst, elab_values.drain(..));
799 
800             // We need to put the results of this instruction in the
801             // map now.
802             for &result in self.func.dfg.inst_results(inst) {
803                 trace!(" -> result {}", result);
804                 let best_result = self.value_to_best_value[result];
805                 self.value_to_elaborated_value.insert_if_absent(
806                     &NullCtx,
807                     best_result.1,
808                     ElaboratedValue {
809                         in_block: block,
810                         value: result,
811                     },
812                 );
813             }
814 
815             next_inst = self.func.layout.next_inst(inst);
816         }
817     }
818 
elaborate_domtree(&mut self, domtree: &DominatorTree)819     fn elaborate_domtree(&mut self, domtree: &DominatorTree) {
820         self.block_stack.push(BlockStackEntry::Elaborate {
821             block: self.func.layout.entry_block().unwrap(),
822             idom: None,
823         });
824 
825         // A temporary workspace for elaborate_block, allocated here to maximize the use of the
826         // allocation.
827         let mut elab_values = Vec::new();
828 
829         while let Some(top) = self.block_stack.pop() {
830             match top {
831                 BlockStackEntry::Elaborate { block, idom } => {
832                     self.block_stack.push(BlockStackEntry::Pop);
833                     self.value_to_elaborated_value.increment_depth();
834 
835                     self.elaborate_block(&mut elab_values, idom, block);
836 
837                     // Push children. We are doing a preorder
838                     // traversal so we do this after processing this
839                     // block above.
840                     let block_stack_end = self.block_stack.len();
841                     for child in self.ctrl_plane.shuffled(domtree.children(block)) {
842                         self.block_stack.push(BlockStackEntry::Elaborate {
843                             block: child,
844                             idom: Some(block),
845                         });
846                     }
847                     // Reverse what we just pushed so we elaborate in
848                     // original block order. (The domtree iter is a
849                     // single-ended iter over a singly-linked list so
850                     // we can't `.rev()` above.)
851                     self.block_stack[block_stack_end..].reverse();
852                 }
853                 BlockStackEntry::Pop => {
854                     self.value_to_elaborated_value.decrement_depth();
855                 }
856             }
857         }
858     }
859 
elaborate(&mut self)860     pub(crate) fn elaborate(&mut self) {
861         self.stats.elaborate_func += 1;
862         self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64;
863         self.compute_best_values();
864         self.elaborate_domtree(&self.domtree);
865         self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64;
866     }
867 }
868