176911c29SSSD //! Support for egraphs represented in the DataFlowGraph.
276911c29SSSD 
376911c29SSSD use crate::FxHashSet;
476911c29SSSD use crate::alias_analysis::{AliasAnalysis, LastStores};
576911c29SSSD use crate::ctxhash::{CtxEq, CtxHash, NullCtx};
676911c29SSSD use crate::cursor::{Cursor, CursorPosition, FuncCursor};
776911c29SSSD use crate::dominator_tree::DominatorTree;
876911c29SSSD use crate::egraph::elaborate::Elaborator;
976911c29SSSD use crate::inst_predicates::{is_mergeable_for_egraph, is_pure_for_egraph};
1076911c29SSSD use crate::ir::{
11*2f7dbd61SChris Fallin     Block, DataFlowGraph, Function, Inst, InstructionData, Type, Value, ValueDef, ValueListPool,
1276911c29SSSD };
1376911c29SSSD use crate::loop_analysis::LoopAnalysis;
1476911c29SSSD use crate::opts::IsleContext;
1576911c29SSSD use crate::opts::generated_code::SkeletonInstSimplification;
1676911c29SSSD use crate::scoped_hash_map::{Entry as ScopedEntry, ScopedHashMap};
1776911c29SSSD use crate::take_and_replace::TakeAndReplace;
1876911c29SSSD use crate::trace;
1976911c29SSSD use alloc::vec::Vec;
2076911c29SSSD use core::cmp::Ordering;
2176911c29SSSD use core::hash::Hasher;
2276911c29SSSD use cranelift_control::ControlPlane;
2376911c29SSSD use cranelift_entity::SecondaryMap;
2476911c29SSSD use cranelift_entity::packed_option::ReservedValue;
2576911c29SSSD use smallvec::SmallVec;
2676911c29SSSD 
2776911c29SSSD mod cost;
2876911c29SSSD mod elaborate;
2976911c29SSSD 
3076911c29SSSD /// Pass over a Function that does the whole aegraph thing.
3176911c29SSSD ///
3276911c29SSSD /// - Removes non-skeleton nodes from the Layout.
3376911c29SSSD /// - Performs a GVN-and-rule-application pass over all Values
3476911c29SSSD ///   reachable from the skeleton, potentially creating new Union
3576911c29SSSD ///   nodes (i.e., an aegraph) so that some values have multiple
3676911c29SSSD ///   representations.
3776911c29SSSD /// - Does "extraction" on the aegraph: selects the best value out of
3876911c29SSSD ///   the tree-of-Union nodes for each used value.
3976911c29SSSD /// - Does "scoped elaboration" on the aegraph: chooses one or more
4076911c29SSSD ///   locations for pure nodes to become instructions again in the
4176911c29SSSD ///   layout, as forced by the skeleton.
4276911c29SSSD ///
4376911c29SSSD /// At the beginning and end of this pass, the CLIF should be in a
4476911c29SSSD /// state that passes the verifier and, additionally, has no Union
4576911c29SSSD /// nodes. During the pass, Union nodes may exist, and instructions in
4676911c29SSSD /// the layout may refer to results of instructions that are not
4776911c29SSSD /// placed in the layout.
4876911c29SSSD pub struct EgraphPass<'a> {
4976911c29SSSD     /// The function we're operating on.
5076911c29SSSD     func: &'a mut Function,
5176911c29SSSD     /// Dominator tree for the CFG, used to visit blocks in pre-order
5276911c29SSSD     /// so we see value definitions before their uses, and also used for
5376911c29SSSD     /// O(1) dominance checks.
5476911c29SSSD     domtree: &'a DominatorTree,
5576911c29SSSD     /// Alias analysis, used during optimization.
5676911c29SSSD     alias_analysis: &'a mut AliasAnalysis<'a>,
5776911c29SSSD     /// Loop analysis results, used for built-in LICM during
5876911c29SSSD     /// elaboration.
5976911c29SSSD     loop_analysis: &'a LoopAnalysis,
6076911c29SSSD     /// Chaos-mode control-plane so we can test that we still get
6176911c29SSSD     /// correct results when our heuristics make bad decisions.
6276911c29SSSD     ctrl_plane: &'a mut ControlPlane,
6376911c29SSSD     /// Which Values do we want to rematerialize in each block where
6476911c29SSSD     /// they're used?
6576911c29SSSD     remat_values: FxHashSet<Value>,
6676911c29SSSD     /// Stats collected while we run this pass.
6776911c29SSSD     pub(crate) stats: Stats,
6876911c29SSSD }
6976911c29SSSD 
7076911c29SSSD /// The maximum number of rewrites we will take from a single call into ISLE.
7176911c29SSSD const MATCHES_LIMIT: usize = 5;
7276911c29SSSD 
7376911c29SSSD /// The maximum number of enodes in any given eclass.
7476911c29SSSD const ECLASS_ENODE_LIMIT: usize = 5;
7576911c29SSSD 
7676911c29SSSD /// Context passed through node insertion and optimization.
7776911c29SSSD pub(crate) struct OptimizeCtx<'opt, 'analysis>
7876911c29SSSD where
7976911c29SSSD     'analysis: 'opt,
8076911c29SSSD {
8176911c29SSSD     // Borrowed from EgraphPass:
8276911c29SSSD     pub(crate) func: &'opt mut Function,
8376911c29SSSD     pub(crate) value_to_opt_value: &'opt mut SecondaryMap<Value, Value>,
8476911c29SSSD     available_block: &'opt mut SecondaryMap<Value, Block>,
8576911c29SSSD     eclass_size: &'opt mut SecondaryMap<Value, u8>,
8676911c29SSSD     pub(crate) gvn_map: &'opt mut ScopedHashMap<(Type, InstructionData), Option<Value>>,
8776911c29SSSD     pub(crate) gvn_map_blocks: &'opt Vec<Block>,
8876911c29SSSD     pub(crate) remat_values: &'opt mut FxHashSet<Value>,
8976911c29SSSD     pub(crate) stats: &'opt mut Stats,
9076911c29SSSD     domtree: &'opt DominatorTree,
9176911c29SSSD     pub(crate) alias_analysis: &'opt mut AliasAnalysis<'analysis>,
9276911c29SSSD     pub(crate) alias_analysis_state: &'opt mut LastStores,
9376911c29SSSD     ctrl_plane: &'opt mut ControlPlane,
9476911c29SSSD     // Held locally during optimization of one node (recursively):
9576911c29SSSD     pub(crate) rewrite_depth: usize,
9676911c29SSSD     pub(crate) subsume_values: FxHashSet<Value>,
9776911c29SSSD     optimized_values: SmallVec<[Value; MATCHES_LIMIT]>,
9876911c29SSSD     optimized_insts: SmallVec<[SkeletonInstSimplification; MATCHES_LIMIT]>,
9976911c29SSSD }
10076911c29SSSD 
10176911c29SSSD /// For passing to `insert_pure_enode`. Sometimes the enode already
10276911c29SSSD /// exists as an Inst (from the original CLIF), and sometimes we're in
10376911c29SSSD /// the middle of creating it and want to avoid inserting it if
10476911c29SSSD /// possible until we know we need it.
10576911c29SSSD pub(crate) enum NewOrExistingInst {
10676911c29SSSD     New(InstructionData, Type),
10776911c29SSSD     Existing(Inst),
10876911c29SSSD }
10976911c29SSSD 
11076911c29SSSD impl NewOrExistingInst {
get_inst_key<'a>(&'a self, dfg: &'a DataFlowGraph) -> (Type, InstructionData)11176911c29SSSD     fn get_inst_key<'a>(&'a self, dfg: &'a DataFlowGraph) -> (Type, InstructionData) {
11276911c29SSSD         match self {
11376911c29SSSD             NewOrExistingInst::New(data, ty) => (*ty, *data),
11476911c29SSSD             NewOrExistingInst::Existing(inst) => {
11576911c29SSSD                 let ty = dfg.ctrl_typevar(*inst);
11676911c29SSSD                 (ty, dfg.insts[*inst])
11776911c29SSSD             }
11876911c29SSSD         }
11976911c29SSSD     }
12076911c29SSSD }
12176911c29SSSD 
12276911c29SSSD impl<'opt, 'analysis> OptimizeCtx<'opt, 'analysis>
12376911c29SSSD where
12476911c29SSSD     'analysis: 'opt,
12576911c29SSSD {
12676911c29SSSD     /// Optimization of a single instruction.
12776911c29SSSD     ///
12876911c29SSSD     /// This does a few things:
12976911c29SSSD     /// - Looks up the instruction in the GVN deduplication map. If we
13076911c29SSSD     ///   already have the same instruction somewhere else, with the
13176911c29SSSD     ///   same args, then we can alias the original instruction's
13276911c29SSSD     ///   results and omit this instruction entirely.
13376911c29SSSD     /// - If the instruction is "new" (not deduplicated), then apply
13476911c29SSSD     ///   optimization rules:
13576911c29SSSD     ///   - All of the mid-end rules written in ISLE.
13676911c29SSSD     ///   - Store-to-load forwarding.
13776911c29SSSD     /// - Update the value-to-opt-value map, and update the eclass
13876911c29SSSD     ///   union-find, if we rewrote the value to different form(s).
insert_pure_enode(&mut self, inst: NewOrExistingInst) -> Value13976911c29SSSD     pub(crate) fn insert_pure_enode(&mut self, inst: NewOrExistingInst) -> Value {
14076911c29SSSD         // Create the external context for looking up and updating the
14176911c29SSSD         // GVN map. This is necessary so that instructions themselves
14276911c29SSSD         // do not have to carry all the references or data for a full
14376911c29SSSD         // `Eq` or `Hash` impl.
14476911c29SSSD         let gvn_context = GVNContext {
14576911c29SSSD             value_lists: &self.func.dfg.value_lists,
14676911c29SSSD         };
14776911c29SSSD 
14876911c29SSSD         self.stats.pure_inst += 1;
14976911c29SSSD         if let NewOrExistingInst::New(..) = inst {
15076911c29SSSD             self.stats.new_inst += 1;
15176911c29SSSD         }
15276911c29SSSD 
15376911c29SSSD         // Does this instruction already exist? If so, add entries to
15476911c29SSSD         // the value-map to rewrite uses of its results to the results
15576911c29SSSD         // of the original (existing) instruction. If not, optimize
15676911c29SSSD         // the new instruction.
15776911c29SSSD         if let Some(&Some(orig_result)) = self
15876911c29SSSD             .gvn_map
15976911c29SSSD             .get(&gvn_context, &inst.get_inst_key(&self.func.dfg))
16076911c29SSSD         {
16176911c29SSSD             self.stats.pure_inst_deduped += 1;
16276911c29SSSD             if let NewOrExistingInst::Existing(inst) = inst {
16376911c29SSSD                 debug_assert_eq!(self.func.dfg.inst_results(inst).len(), 1);
16476911c29SSSD                 let result = self.func.dfg.first_result(inst);
16576911c29SSSD                 self.value_to_opt_value[result] = orig_result;
16676911c29SSSD                 self.available_block[result] = self.available_block[orig_result];
16776911c29SSSD             }
16876911c29SSSD             orig_result
16976911c29SSSD         } else {
17076911c29SSSD             // Now actually insert the InstructionData and attach
17176911c29SSSD             // result value (exactly one).
17276911c29SSSD             let (inst, result, ty) = match inst {
17376911c29SSSD                 NewOrExistingInst::New(data, typevar) => {
17476911c29SSSD                     self.stats.pure_inst_insert_new += 1;
17576911c29SSSD                     let inst = self.func.dfg.make_inst(data);
17676911c29SSSD                     // TODO: reuse return value?
17776911c29SSSD                     self.func.dfg.make_inst_results(inst, typevar);
17876911c29SSSD                     let result = self.func.dfg.first_result(inst);
17976911c29SSSD                     // New inst. We need to do the analysis of its result.
18076911c29SSSD                     (inst, result, typevar)
18176911c29SSSD                 }
18276911c29SSSD                 NewOrExistingInst::Existing(inst) => {
18376911c29SSSD                     self.stats.pure_inst_insert_orig += 1;
18476911c29SSSD                     let result = self.func.dfg.first_result(inst);
18576911c29SSSD                     let ty = self.func.dfg.ctrl_typevar(inst);
18676911c29SSSD                     (inst, result, ty)
18776911c29SSSD                 }
18876911c29SSSD             };
18976911c29SSSD 
19076911c29SSSD             self.available_block[result] = self.get_available_block(inst);
19176911c29SSSD             let opt_value = self.optimize_pure_enode(inst);
19276911c29SSSD             log::trace!("optimizing inst {inst} orig result {result} gave {opt_value}");
19376911c29SSSD 
19476911c29SSSD             let gvn_context = GVNContext {
19576911c29SSSD                 value_lists: &self.func.dfg.value_lists,
19676911c29SSSD             };
19776911c29SSSD             // Insert at level implied by args. This enables merging
19876911c29SSSD             // in LICM cases like:
19976911c29SSSD             //
20076911c29SSSD             // while (...) {
20176911c29SSSD             //   if (...) {
20276911c29SSSD             //     let x = loop_invariant_expr;
20376911c29SSSD             //   }
20476911c29SSSD             //   if (...) {
20576911c29SSSD             //     let x = loop_invariant_expr;
20676911c29SSSD             //   }
20776911c29SSSD             // }
20876911c29SSSD             //
20976911c29SSSD             // where the two instances of the expression otherwise
21076911c29SSSD             // wouldn't merge because each would be in a separate
21176911c29SSSD             // subscope of the scoped hashmap during traversal.
21276911c29SSSD             log::trace!(
21376911c29SSSD                 "value {} is available at {}",
21476911c29SSSD                 opt_value,
21576911c29SSSD                 self.available_block[opt_value]
21676911c29SSSD             );
21776911c29SSSD             let depth = self.depth_of_block_in_gvn_map(self.available_block[opt_value]);
21876911c29SSSD             self.gvn_map.insert_with_depth(
21976911c29SSSD                 &gvn_context,
22076911c29SSSD                 (ty, self.func.dfg.insts[inst]),
22176911c29SSSD                 Some(opt_value),
22276911c29SSSD                 depth,
22376911c29SSSD             );
22476911c29SSSD             self.value_to_opt_value[result] = opt_value;
22576911c29SSSD             opt_value
22676911c29SSSD         }
22776911c29SSSD     }
22876911c29SSSD 
22976911c29SSSD     /// Find the block where a pure instruction first becomes available,
23076911c29SSSD     /// defined as the block that is closest to the root where all of
23176911c29SSSD     /// its arguments are available. In the unusual case where a pure
23276911c29SSSD     /// instruction has no arguments (e.g. get_return_address), we can
23376911c29SSSD     /// place it anywhere, so it is available in the entry block.
23476911c29SSSD     ///
23576911c29SSSD     /// This function does not compute available blocks recursively.
23676911c29SSSD     /// All of the instruction's arguments must have had their available
23776911c29SSSD     /// blocks assigned already.
get_available_block(&self, inst: Inst) -> Block23876911c29SSSD     fn get_available_block(&self, inst: Inst) -> Block {
23976911c29SSSD         // Side-effecting instructions have different rules for where
24076911c29SSSD         // they become available, so this function does not apply.
24176911c29SSSD         debug_assert!(is_pure_for_egraph(self.func, inst));
24276911c29SSSD 
24376911c29SSSD         // Note that the def-point of all arguments to an instruction
24476911c29SSSD         // in SSA lie on a line of direct ancestors in the domtree, and
24576911c29SSSD         // so do their available-blocks. This means that for any pair of
24676911c29SSSD         // arguments, their available blocks are either the same or one
24776911c29SSSD         // strictly dominates the other. We just need to find any argument
24876911c29SSSD         // whose available block is deepest in the domtree.
24976911c29SSSD         self.func.dfg.insts[inst]
25076911c29SSSD             .arguments(&self.func.dfg.value_lists)
25176911c29SSSD             .iter()
25276911c29SSSD             .map(|&v| {
25376911c29SSSD                 let block = self.available_block[v];
25476911c29SSSD                 debug_assert!(!block.is_reserved_value());
25576911c29SSSD                 block
25676911c29SSSD             })
25776911c29SSSD             .max_by(|&x, &y| {
25876911c29SSSD                 if self.domtree.block_dominates(x, y) {
25976911c29SSSD                     Ordering::Less
26076911c29SSSD                 } else {
26176911c29SSSD                     debug_assert!(self.domtree.block_dominates(y, x));
26276911c29SSSD                     Ordering::Greater
26376911c29SSSD                 }
26476911c29SSSD             })
26576911c29SSSD             .unwrap_or(self.func.layout.entry_block().unwrap())
26676911c29SSSD     }
26776911c29SSSD 
depth_of_block_in_gvn_map(&self, block: Block) -> usize26876911c29SSSD     fn depth_of_block_in_gvn_map(&self, block: Block) -> usize {
26976911c29SSSD         log::trace!(
27076911c29SSSD             "finding depth of available block {} in domtree stack: {:?}",
27176911c29SSSD             block,
27276911c29SSSD             self.gvn_map_blocks
27376911c29SSSD         );
27476911c29SSSD         self.gvn_map_blocks
27576911c29SSSD             .iter()
27676911c29SSSD             .enumerate()
27776911c29SSSD             .rev()
27876911c29SSSD             .find(|&(_, b)| *b == block)
27976911c29SSSD             .unwrap()
28076911c29SSSD             .0
28176911c29SSSD     }
28276911c29SSSD 
28376911c29SSSD     /// Optimizes an enode by applying any matching mid-end rewrite
28476911c29SSSD     /// rules (or store-to-load forwarding, which is a special case),
28576911c29SSSD     /// unioning together all possible optimized (or rewritten) forms
28676911c29SSSD     /// of this expression into an eclass and returning the `Value`
28776911c29SSSD     /// that represents that eclass.
optimize_pure_enode(&mut self, inst: Inst) -> Value28876911c29SSSD     fn optimize_pure_enode(&mut self, inst: Inst) -> Value {
28976911c29SSSD         // A pure node always has exactly one result.
29076911c29SSSD         let orig_value = self.func.dfg.first_result(inst);
29176911c29SSSD 
29276911c29SSSD         let mut guard = TakeAndReplace::new(self, |x| &mut x.optimized_values);
29376911c29SSSD         let (ctx, optimized_values) = guard.get();
29476911c29SSSD 
29576911c29SSSD         // Limit rewrite depth. When we apply optimization rules, they
29676911c29SSSD         // may create new nodes (values) and those are, recursively,
29776911c29SSSD         // optimized eagerly as soon as they are created. So we may
29876911c29SSSD         // have more than one ISLE invocation on the stack. (This is
29976911c29SSSD         // necessary so that as the toplevel builds the
30076911c29SSSD         // right-hand-side expression bottom-up, it uses the "latest"
30176911c29SSSD         // optimized values for all the constituent parts.) To avoid
30276911c29SSSD         // infinite or problematic recursion, we bound the rewrite
30376911c29SSSD         // depth to a small constant here.
30476911c29SSSD         const REWRITE_LIMIT: usize = 5;
30576911c29SSSD         if ctx.rewrite_depth > REWRITE_LIMIT {
30676911c29SSSD             ctx.stats.rewrite_depth_limit += 1;
30776911c29SSSD             return orig_value;
30876911c29SSSD         }
30976911c29SSSD         ctx.rewrite_depth += 1;
31076911c29SSSD         trace!("Incrementing rewrite depth; now {}", ctx.rewrite_depth);
31176911c29SSSD 
31276911c29SSSD         // Invoke the ISLE toplevel constructor, getting all new
31376911c29SSSD         // values produced as equivalents to this value.
31476911c29SSSD         trace!("Calling into ISLE with original value {}", orig_value);
31576911c29SSSD         ctx.stats.rewrite_rule_invoked += 1;
31676911c29SSSD         debug_assert!(optimized_values.is_empty());
31776911c29SSSD         crate::opts::generated_code::constructor_simplify(
31876911c29SSSD             &mut IsleContext { ctx },
31976911c29SSSD             orig_value,
32076911c29SSSD             optimized_values,
32176911c29SSSD         );
32276911c29SSSD 
32376911c29SSSD         ctx.stats.rewrite_rule_results += optimized_values.len() as u64;
32476911c29SSSD 
32576911c29SSSD         // It's not supposed to matter what order `simplify` returns values in.
32676911c29SSSD         ctx.ctrl_plane.shuffle(optimized_values);
32776911c29SSSD 
32876911c29SSSD         let num_matches = optimized_values.len();
32976911c29SSSD         if num_matches > MATCHES_LIMIT {
33076911c29SSSD             trace!(
33176911c29SSSD                 "Reached maximum matches limit; too many optimized values \
33276911c29SSSD                  ({num_matches} > {MATCHES_LIMIT}); ignoring rest.",
33376911c29SSSD             );
33476911c29SSSD             optimized_values.truncate(MATCHES_LIMIT);
33576911c29SSSD         }
33676911c29SSSD 
33776911c29SSSD         // Sort and deduplicate optimized values, in case multiple
33876911c29SSSD         // rules produced the same simplification.
33976911c29SSSD         optimized_values.sort_unstable();
34076911c29SSSD         optimized_values.dedup();
34176911c29SSSD 
34276911c29SSSD         trace!("  -> returned from ISLE: {orig_value} -> {optimized_values:?}");
34376911c29SSSD 
34476911c29SSSD         // Construct a union-node tree representing the new eclass
34576911c29SSSD         // that results from rewriting. If any returned value was
34676911c29SSSD         // marked "subsume", take only that value. Otherwise,
34776911c29SSSD         // sequentially build the chain over the original value and
34876911c29SSSD         // all returned values.
34976911c29SSSD         let result_value = if let Some(&subsuming_value) = optimized_values
35076911c29SSSD             .iter()
35176911c29SSSD             .find(|&value| ctx.subsume_values.contains(value))
35276911c29SSSD         {
35376911c29SSSD             optimized_values.clear();
35476911c29SSSD             ctx.stats.pure_inst_subsume += 1;
35576911c29SSSD             subsuming_value
35676911c29SSSD         } else {
35776911c29SSSD             let mut union_value = orig_value;
35876911c29SSSD             let mut eclass_size = ctx.eclass_size[orig_value] + 1;
35976911c29SSSD             for optimized_value in optimized_values.drain(..) {
36076911c29SSSD                 trace!(
36176911c29SSSD                     "Returned from ISLE for {}, got {:?}",
36276911c29SSSD                     orig_value, optimized_value
36376911c29SSSD                 );
36476911c29SSSD                 if optimized_value == orig_value {
36576911c29SSSD                     trace!(" -> same as orig value; skipping");
36676911c29SSSD                     ctx.stats.pure_inst_rewrite_to_self += 1;
36776911c29SSSD                     continue;
36876911c29SSSD                 }
36976911c29SSSD                 let rhs_eclass_size = ctx.eclass_size[optimized_value] + 1;
37076911c29SSSD                 if usize::from(eclass_size) + usize::from(rhs_eclass_size) > ECLASS_ENODE_LIMIT {
37176911c29SSSD                     trace!(" -> reached eclass size limit");
37276911c29SSSD                     ctx.stats.eclass_size_limit += 1;
37376911c29SSSD                     break;
37476911c29SSSD                 }
37576911c29SSSD                 let old_union_value = union_value;
37676911c29SSSD                 union_value = ctx.func.dfg.union(old_union_value, optimized_value);
37776911c29SSSD                 eclass_size += rhs_eclass_size;
37876911c29SSSD                 ctx.eclass_size[union_value] = eclass_size - 1;
37976911c29SSSD                 ctx.stats.union += 1;
38076911c29SSSD                 trace!(" -> union: now {}", union_value);
38176911c29SSSD                 ctx.available_block[union_value] =
38276911c29SSSD                     ctx.merge_availability(old_union_value, optimized_value);
38376911c29SSSD             }
38476911c29SSSD             union_value
38576911c29SSSD         };
38676911c29SSSD 
38776911c29SSSD         ctx.rewrite_depth -= 1;
38876911c29SSSD         trace!("Decrementing rewrite depth; now {}", ctx.rewrite_depth);
38976911c29SSSD         if ctx.rewrite_depth == 0 {
39076911c29SSSD             ctx.subsume_values.clear();
39176911c29SSSD         }
39276911c29SSSD 
39376911c29SSSD         debug_assert!(ctx.optimized_values.is_empty());
39476911c29SSSD         result_value
39576911c29SSSD     }
39676911c29SSSD 
merge_availability(&self, a: Value, b: Value) -> Block39776911c29SSSD     fn merge_availability(&self, a: Value, b: Value) -> Block {
39876911c29SSSD         let a = self.available_block[a];
39976911c29SSSD         let b = self.available_block[b];
40076911c29SSSD         if self.domtree.block_dominates(a, b) {
40176911c29SSSD             a
40276911c29SSSD         } else {
40376911c29SSSD             b
40476911c29SSSD         }
40576911c29SSSD     }
40676911c29SSSD 
40776911c29SSSD     /// Optimize a "skeleton" instruction.
40876911c29SSSD     ///
40976911c29SSSD     /// Returns an optional command of how to continue processing the optimized
41076911c29SSSD     /// instruction (e.g. removing it or replacing it with a new instruction).
optimize_skeleton_inst( &mut self, inst: Inst, block: Block, ) -> Option<SkeletonInstSimplification>41176911c29SSSD     fn optimize_skeleton_inst(
41276911c29SSSD         &mut self,
41376911c29SSSD         inst: Inst,
41476911c29SSSD         block: Block,
41576911c29SSSD     ) -> Option<SkeletonInstSimplification> {
41676911c29SSSD         self.stats.skeleton_inst += 1;
41776911c29SSSD 
41876911c29SSSD         // If we have a rewrite rule for this instruction, do that first, so
41976911c29SSSD         // that GVN and alias analysis only see simplified skeleton
42076911c29SSSD         // instructions.
42176911c29SSSD         if let Some(cmd) = self.simplify_skeleton_inst(inst) {
42276911c29SSSD             self.stats.skeleton_inst_simplified += 1;
42376911c29SSSD             return Some(cmd);
42476911c29SSSD         }
42576911c29SSSD 
42676911c29SSSD         // First, can we try to deduplicate? We need to keep some copy
42776911c29SSSD         // of the instruction around because it's side-effecting, but
42876911c29SSSD         // we may be able to reuse an earlier instance of it.
42976911c29SSSD         if is_mergeable_for_egraph(self.func, inst) {
43076911c29SSSD             let result = self.func.dfg.inst_results(inst).get(0).copied();
43176911c29SSSD             trace!(" -> mergeable side-effecting op {}", inst);
43276911c29SSSD 
43376911c29SSSD             // Does this instruction already exist? If so, add entries to
43476911c29SSSD             // the value-map to rewrite uses of its results to the results
43576911c29SSSD             // of the original (existing) instruction. If not, optimize
43676911c29SSSD             // the new instruction.
43776911c29SSSD             //
43876911c29SSSD             // Note that the GVN map is scoped, which is important
43976911c29SSSD             // here: because effectful ops are not removed from the
44076911c29SSSD             // skeleton (`Layout`), we need to be mindful of whether
44176911c29SSSD             // our current position is dominated by an instance of the
44276911c29SSSD             // instruction. (See #5796 for details.)
44376911c29SSSD             let ty = self.func.dfg.ctrl_typevar(inst);
44476911c29SSSD             match self
44576911c29SSSD                 .gvn_map
44676911c29SSSD                 .entry(&NullCtx, (ty, self.func.dfg.insts[inst]))
44776911c29SSSD             {
44876911c29SSSD                 ScopedEntry::Occupied(o) => {
44976911c29SSSD                     let orig_result = *o.get();
45076911c29SSSD                     match (result, orig_result) {
45176911c29SSSD                         (Some(result), Some(orig_result)) => {
45276911c29SSSD                             // Hit in GVN map -- reuse value.
45376911c29SSSD                             self.stats.skeleton_inst_gvn += 1;
45476911c29SSSD                             self.value_to_opt_value[result] = orig_result;
45576911c29SSSD                             self.available_block[result] = self.available_block[orig_result];
45676911c29SSSD                             trace!(" -> merges result {} to {}", result, orig_result);
45776911c29SSSD                         }
45876911c29SSSD                         (None, None) => {
45976911c29SSSD                             // Hit in the GVN map, but the instruction doesn't
46076911c29SSSD                             // produce results, only side effects. Nothing else
46176911c29SSSD                             // to do here.
46276911c29SSSD                             self.stats.skeleton_inst_gvn += 1;
46376911c29SSSD                             trace!(" -> merges with dominating instruction");
46476911c29SSSD                         }
46576911c29SSSD                         (_, _) => unreachable!(),
46676911c29SSSD                     }
46776911c29SSSD                     Some(SkeletonInstSimplification::Remove)
46876911c29SSSD                 }
46976911c29SSSD                 ScopedEntry::Vacant(v) => {
47076911c29SSSD                     // Otherwise, insert it into the value-map.
47176911c29SSSD                     if let Some(result) = result {
47276911c29SSSD                         self.value_to_opt_value[result] = result;
47376911c29SSSD                         self.available_block[result] = block;
47476911c29SSSD                     }
47576911c29SSSD                     v.insert(result);
47676911c29SSSD                     trace!(" -> inserts as new (no GVN)");
47776911c29SSSD                     None
47876911c29SSSD                 }
47976911c29SSSD             }
48076911c29SSSD         }
48176911c29SSSD         // Otherwise, if a load or store, process it with the alias
48276911c29SSSD         // analysis to see if we can optimize it (rewrite in terms of
48376911c29SSSD         // an earlier load or stored value).
48476911c29SSSD         else if let Some(new_result) =
48576911c29SSSD             self.alias_analysis
48676911c29SSSD                 .process_inst(self.func, self.alias_analysis_state, inst)
48776911c29SSSD         {
48876911c29SSSD             self.stats.alias_analysis_removed += 1;
48976911c29SSSD             let result = self.func.dfg.first_result(inst);
49076911c29SSSD             trace!(
49176911c29SSSD                 " -> inst {} has result {} replaced with {}",
49276911c29SSSD                 inst, result, new_result
49376911c29SSSD             );
49476911c29SSSD             self.value_to_opt_value[result] = new_result;
49576911c29SSSD             self.available_block[result] = self.available_block[new_result];
49676911c29SSSD             Some(SkeletonInstSimplification::Remove)
49776911c29SSSD         }
49876911c29SSSD         // Otherwise, generic side-effecting op -- always keep it, and
49976911c29SSSD         // set its results to identity-map to original values.
50076911c29SSSD         else {
50176911c29SSSD             // Set all results to identity-map to themselves
50276911c29SSSD             // in the value-to-opt-value map.
50376911c29SSSD             for &result in self.func.dfg.inst_results(inst) {
50476911c29SSSD                 self.value_to_opt_value[result] = result;
50576911c29SSSD                 self.available_block[result] = block;
50676911c29SSSD             }
50776911c29SSSD             None
50876911c29SSSD         }
50976911c29SSSD     }
51076911c29SSSD 
51176911c29SSSD     /// Find the best simplification of the given skeleton instruction, if any,
51276911c29SSSD     /// by consulting our `simplify_skeleton` ISLE rules.
simplify_skeleton_inst(&mut self, inst: Inst) -> Option<SkeletonInstSimplification>51376911c29SSSD     fn simplify_skeleton_inst(&mut self, inst: Inst) -> Option<SkeletonInstSimplification> {
51476911c29SSSD         // We cannot currently simplify terminators, or simplify into
51576911c29SSSD         // terminators. Anything that could change the control-flow graph is off
51676911c29SSSD         // limits.
51776911c29SSSD         //
51876911c29SSSD         // Consider the following CLIF snippet:
51976911c29SSSD         //
52076911c29SSSD         //     block0(v0: i64):
52176911c29SSSD         //         v1 = iconst.i32 0
52276911c29SSSD         //         trapz v1, user42
52376911c29SSSD         //         v2 = load.i32 v0
52476911c29SSSD         //         brif v1, block1, block2
52576911c29SSSD         //     block1:
52676911c29SSSD         //         return v2
52776911c29SSSD         //     block2:
52876911c29SSSD         //         v3 = iconst.i32 1
52976911c29SSSD         //         v4 = iadd v2, v3
53076911c29SSSD         //         return v4
53176911c29SSSD         //
53276911c29SSSD         // We would ideally like to perform simplifications like replacing the
53376911c29SSSD         // `trapz` with an unconditional `trap` and the conditional `brif`
53476911c29SSSD         // branch with an unconditional `jump`. Note, however, that blocks
53576911c29SSSD         // `block1` and `block2` are dominated by `block0` and therefore can and
53676911c29SSSD         // do use values defined in `block0`. This presents challenges:
53776911c29SSSD         //
53876911c29SSSD         // * If we replace the `brif` with a `jump`, then we've mutated the
53976911c29SSSD         //   control-flow graph and removed that domination property. The uses
54076911c29SSSD         //   of `v2` and `v3` in those blocks become invalid.
54176911c29SSSD         //
54276911c29SSSD         // * Even worse, if we turn the `trapz` into a `trap`, we are
54376911c29SSSD         //   introducing a terminator into the middle of the block, which leaves
54476911c29SSSD         //   us with two choices to fix up the IR so that there aren't any
54576911c29SSSD         //   instructions following the terminator in the block:
54676911c29SSSD         //
54776911c29SSSD         //   1. We can split the unreachable instructions off into a new
54876911c29SSSD         //      block. However, there is no control-flow edge from the current
54976911c29SSSD         //      block to this new block and so, again, the new block isn't
55076911c29SSSD         //      dominated by the current block, and therefore the can't use
55176911c29SSSD         //      values defined in this block or any dominating it. The `load`
55276911c29SSSD         //      instruction uses `v0` but is not dominated by `v0`'s
55376911c29SSSD         //      definition.
55476911c29SSSD         //
55576911c29SSSD         //   2. Alternatively, we can simply delete the trailing instructions,
55676911c29SSSD         //      since they are unreachable. But then not only are the old
55776911c29SSSD         //      instructions' uses no longer dominated by their definitions, but
55876911c29SSSD         //      the definitions do not exist at all anymore!
55976911c29SSSD         //
56076911c29SSSD         // Whatever approach we would take, we would invalidate value uses, and
56176911c29SSSD         // would need to track and fix them up.
56276911c29SSSD         if self.func.dfg.insts[inst].opcode().is_branch() {
56376911c29SSSD             return None;
56476911c29SSSD         }
56576911c29SSSD 
56676911c29SSSD         let mut guard = TakeAndReplace::new(self, |x| &mut x.optimized_insts);
56776911c29SSSD         let (ctx, optimized_insts) = guard.get();
56876911c29SSSD 
56976911c29SSSD         crate::opts::generated_code::constructor_simplify_skeleton(
57076911c29SSSD             &mut IsleContext { ctx },
57176911c29SSSD             inst,
57276911c29SSSD             optimized_insts,
57376911c29SSSD         );
57476911c29SSSD 
57576911c29SSSD         let simplifications_len = optimized_insts.len();
57676911c29SSSD         log::trace!(" -> simplify_skeleton: yielded {simplifications_len} simplification(s)");
57776911c29SSSD         if simplifications_len > MATCHES_LIMIT {
57876911c29SSSD             log::trace!("      too many candidate simplifications; truncating to {MATCHES_LIMIT}");
57976911c29SSSD             optimized_insts.truncate(MATCHES_LIMIT);
58076911c29SSSD         }
58176911c29SSSD 
58276911c29SSSD         // Find the best simplification, if any, from our candidates.
58376911c29SSSD         //
58476911c29SSSD         // Unlike simplifying pure values, we do not add side-effectful
58576911c29SSSD         // instructions to the egraph, nor do we extract the best version via
58676911c29SSSD         // dynamic programming and considering the costs of operands. Instead,
58776911c29SSSD         // we greedily choose the best simplification. This is because there is
58876911c29SSSD         // an impedance mismatch: the egraph and our pure rewrites are centered
58976911c29SSSD         // around *values*, but we don't represent side-effects with values, we
59076911c29SSSD         // represent them implicitly in their *instructions*.
59176911c29SSSD         //
59276911c29SSSD         // The initial best choice is "no simplification, just use the original
59376911c29SSSD         // instruction" which has the original instruction's cost.
59476911c29SSSD         let mut best = None;
59576911c29SSSD         let mut best_cost = cost::Cost::of_skeleton_op(
59676911c29SSSD             ctx.func.dfg.insts[inst].opcode(),
59776911c29SSSD             ctx.func.dfg.inst_args(inst).len(),
59876911c29SSSD         );
59976911c29SSSD         while let Some(simplification) = optimized_insts.pop() {
60076911c29SSSD             let (new_inst, new_val) = match simplification {
60176911c29SSSD                 // We can't do better than completely removing the skeleton
60276911c29SSSD                 // instruction, so short-cicuit the loop and eagerly return the
60376911c29SSSD                 // `Remove*` simplifications.
60476911c29SSSD                 SkeletonInstSimplification::Remove => {
60576911c29SSSD                     log::trace!(" -> simplify_skeleton: remove inst");
60676911c29SSSD                     debug_assert!(ctx.func.dfg.inst_results(inst).is_empty());
60776911c29SSSD                     return Some(simplification);
60876911c29SSSD                 }
60976911c29SSSD                 SkeletonInstSimplification::RemoveWithVal { val } => {
61076911c29SSSD                     log::trace!(" -> simplify_skeleton: remove inst and use {val} as its result");
61176911c29SSSD                     if cfg!(debug_assertions) {
61276911c29SSSD                         let results = ctx.func.dfg.inst_results(inst);
61376911c29SSSD                         debug_assert_eq!(results.len(), 1);
61476911c29SSSD                         debug_assert_eq!(
61576911c29SSSD                             ctx.func.dfg.value_type(results[0]),
61676911c29SSSD                             ctx.func.dfg.value_type(val),
61776911c29SSSD                         );
61876911c29SSSD                     }
61976911c29SSSD                     return Some(simplification);
62076911c29SSSD                 }
62176911c29SSSD 
62276911c29SSSD                 // For instruction replacement simplification, we want to check
62376911c29SSSD                 // that the replacements define the same number and types of
62476911c29SSSD                 // values as the original instruction, and also determine
62576911c29SSSD                 // whether they are actually an improvement over (i.e. have
62676911c29SSSD                 // lower cost than) the original instruction.
62776911c29SSSD                 SkeletonInstSimplification::Replace { inst } => {
62876911c29SSSD                     log::trace!(
62976911c29SSSD                         " -> simplify_skeleton: replace inst with {inst}: {}",
63076911c29SSSD                         ctx.func.dfg.display_inst(inst)
63176911c29SSSD                     );
63276911c29SSSD                     (inst, None)
63376911c29SSSD                 }
63476911c29SSSD                 SkeletonInstSimplification::ReplaceWithVal { inst, val } => {
63576911c29SSSD                     log::trace!(
63676911c29SSSD                         " -> simplify_skeleton: replace inst with {val} and {inst}: {}",
63776911c29SSSD                         ctx.func.dfg.display_inst(inst)
63876911c29SSSD                     );
63976911c29SSSD                     (inst, Some(val))
64076911c29SSSD                 }
64176911c29SSSD             };
64276911c29SSSD 
64376911c29SSSD             if cfg!(debug_assertions) {
64476911c29SSSD                 let opcode = ctx.func.dfg.insts[inst].opcode();
64576911c29SSSD                 debug_assert!(
64676911c29SSSD                     !(opcode.is_terminator() || opcode.is_branch()),
64776911c29SSSD                     "simplifying control-flow instructions and terminators is not yet supported",
64876911c29SSSD                 );
64976911c29SSSD 
65076911c29SSSD                 let old_vals = ctx.func.dfg.inst_results(inst);
65176911c29SSSD                 let new_vals = if let Some(val) = new_val.as_ref() {
65276911c29SSSD                     core::slice::from_ref(val)
65376911c29SSSD                 } else {
65476911c29SSSD                     ctx.func.dfg.inst_results(new_inst)
65576911c29SSSD                 };
65676911c29SSSD                 debug_assert_eq!(
65776911c29SSSD                     old_vals.len(),
65876911c29SSSD                     new_vals.len(),
65976911c29SSSD                     "skeleton simplification should result in the same number of result values",
66076911c29SSSD                 );
66176911c29SSSD 
66276911c29SSSD                 for (old_val, new_val) in old_vals.iter().zip(new_vals) {
66376911c29SSSD                     let old_ty = ctx.func.dfg.value_type(*old_val);
66476911c29SSSD                     let new_ty = ctx.func.dfg.value_type(*new_val);
66576911c29SSSD                     debug_assert_eq!(
66676911c29SSSD                         old_ty, new_ty,
66776911c29SSSD                         "skeleton simplification should result in values of the correct type",
66876911c29SSSD                     );
66976911c29SSSD                 }
67076911c29SSSD             }
67176911c29SSSD 
67276911c29SSSD             // Our best simplification is the one with the least cost. Update
67376911c29SSSD             // `best` if necessary.
67476911c29SSSD             let cost = cost::Cost::of_skeleton_op(
67576911c29SSSD                 ctx.func.dfg.insts[new_inst].opcode(),
67676911c29SSSD                 ctx.func.dfg.inst_args(new_inst).len(),
67776911c29SSSD             );
67876911c29SSSD             if cost < best_cost {
67976911c29SSSD                 best = Some(simplification);
68076911c29SSSD                 best_cost = cost;
68176911c29SSSD             }
68276911c29SSSD         }
68376911c29SSSD 
68476911c29SSSD         // Return the best simplification!
68576911c29SSSD         best
68676911c29SSSD     }
68776911c29SSSD }
68876911c29SSSD 
68976911c29SSSD impl<'a> EgraphPass<'a> {
69076911c29SSSD     /// Create a new EgraphPass.
new( func: &'a mut Function, domtree: &'a DominatorTree, loop_analysis: &'a LoopAnalysis, alias_analysis: &'a mut AliasAnalysis<'a>, ctrl_plane: &'a mut ControlPlane, ) -> Self69176911c29SSSD     pub fn new(
69276911c29SSSD         func: &'a mut Function,
69376911c29SSSD         domtree: &'a DominatorTree,
69476911c29SSSD         loop_analysis: &'a LoopAnalysis,
69576911c29SSSD         alias_analysis: &'a mut AliasAnalysis<'a>,
69676911c29SSSD         ctrl_plane: &'a mut ControlPlane,
69776911c29SSSD     ) -> Self {
69876911c29SSSD         Self {
69976911c29SSSD             func,
70076911c29SSSD             domtree,
70176911c29SSSD             loop_analysis,
70276911c29SSSD             alias_analysis,
70376911c29SSSD             ctrl_plane,
70476911c29SSSD             stats: Stats::default(),
70576911c29SSSD             remat_values: FxHashSet::default(),
70676911c29SSSD         }
70776911c29SSSD     }
70876911c29SSSD 
70976911c29SSSD     /// Run the process.
run(&mut self)71076911c29SSSD     pub fn run(&mut self) {
71176911c29SSSD         self.remove_pure_and_optimize();
71276911c29SSSD 
71376911c29SSSD         trace!("egraph built:\n{}\n", self.func.display());
71476911c29SSSD         if cfg!(feature = "trace-log") {
71576911c29SSSD             for (value, def) in self.func.dfg.values_and_defs() {
71676911c29SSSD                 trace!(" -> {} = {:?}", value, def);
71776911c29SSSD                 match def {
71876911c29SSSD                     ValueDef::Result(i, 0) => {
71976911c29SSSD                         trace!("  -> {} = {:?}", i, self.func.dfg.insts[i]);
72076911c29SSSD                     }
72176911c29SSSD                     _ => {}
72276911c29SSSD                 }
72376911c29SSSD             }
72476911c29SSSD         }
72576911c29SSSD 
72676911c29SSSD         self.elaborate();
72776911c29SSSD 
72876911c29SSSD         log::trace!("stats: {:#?}", self.stats);
72976911c29SSSD     }
73076911c29SSSD 
73176911c29SSSD     /// Remove pure nodes from the `Layout` of the function, ensuring
73276911c29SSSD     /// that only the "side-effect skeleton" remains, and also
73376911c29SSSD     /// optimize the pure nodes. This is the first step of
73476911c29SSSD     /// egraph-based processing and turns the pure CFG-based CLIF into
73576911c29SSSD     /// a CFG skeleton with a sea of (optimized) nodes tying it
73676911c29SSSD     /// together.
73776911c29SSSD     ///
73876911c29SSSD     /// As we walk through the code, we eagerly apply optimization
73976911c29SSSD     /// rules; at any given point we have a "latest version" of an
74076911c29SSSD     /// eclass of possible representations for a `Value` in the
74176911c29SSSD     /// original program, which is itself a `Value` at the root of a
74276911c29SSSD     /// union-tree. We keep a map from the original values to these
74376911c29SSSD     /// optimized values. When we encounter any instruction (pure or
74476911c29SSSD     /// side-effecting skeleton) we rewrite its arguments to capture
74576911c29SSSD     /// the "latest" optimized forms of these values. (We need to do
74676911c29SSSD     /// this as part of this pass, and not later using a finished map,
74776911c29SSSD     /// because the eclass can continue to be updated and we need to
74876911c29SSSD     /// only refer to its subset that exists at this stage, to
74976911c29SSSD     /// maintain acyclicity.)
remove_pure_and_optimize(&mut self)75076911c29SSSD     fn remove_pure_and_optimize(&mut self) {
75176911c29SSSD         let mut cursor = FuncCursor::new(self.func);
75276911c29SSSD         let mut value_to_opt_value: SecondaryMap<Value, Value> =
75376911c29SSSD             SecondaryMap::with_default(Value::reserved_value());
75476911c29SSSD 
75576911c29SSSD         // Map from instruction to value for hash-consing of pure ops
75676911c29SSSD         // into the egraph. This can be a standard (non-scoped)
75776911c29SSSD         // hashmap because pure ops have no location: they are
75876911c29SSSD         // "outside of" control flow.
75976911c29SSSD         //
76076911c29SSSD         // Note also that we keep the controlling typevar (the `Type`
76176911c29SSSD         // in the tuple below) because it may disambiguate
76276911c29SSSD         // instructions that are identical except for type.
76376911c29SSSD         //
76476911c29SSSD         // We store both skeleton and non-skeleton instructions in the
76576911c29SSSD         // GVN map; for skeleton instructions, we only store those
76676911c29SSSD         // that are idempotent, i.e., still eligible to GVN. Note that
76776911c29SSSD         // some skeleton instructions are idempotent but do not
76876911c29SSSD         // produce a value: e.g., traps on a given condition. To allow
76976911c29SSSD         // for both cases, we store an `Option<Value>` as the value in
77076911c29SSSD         // this map.
77176911c29SSSD         let mut gvn_map: ScopedHashMap<(Type, InstructionData), Option<Value>> =
77276911c29SSSD             ScopedHashMap::with_capacity(cursor.func.dfg.num_values());
77376911c29SSSD 
77476911c29SSSD         // The block in the domtree preorder traversal at each level
77576911c29SSSD         // of the GVN map.
77676911c29SSSD         let mut gvn_map_blocks: Vec<Block> = vec![];
77776911c29SSSD 
77876911c29SSSD         // To get the best possible merging and canonicalization, we
77976911c29SSSD         // track where a value is "available" at: this is the
78076911c29SSSD         // domtree-nearest-ancestor join of all args if the value
78176911c29SSSD         // itself is pure, otherwise the block where the value is
78276911c29SSSD         // defined. (And for union nodes, the
78376911c29SSSD         // domtree-highest-ancestor, i.e., the meet or the dual to the
78476911c29SSSD         // above join.)
78576911c29SSSD         let mut available_block: SecondaryMap<Value, Block> =
78676911c29SSSD             SecondaryMap::with_default(Block::reserved_value());
78776911c29SSSD 
78876911c29SSSD         // To avoid blowing up eclasses too much, we track the size of
78976911c29SSSD         // each eclass reachable by a tree of union nodes from a given
79076911c29SSSD         // value ID, and we avoid union'ing additional values into an
79176911c29SSSD         // eclass when it reaches `ECLASS_ENODE_LIMIT`.
79276911c29SSSD         //
79376911c29SSSD         // For efficiency, this encodes size minus one: so a value of
79476911c29SSSD         // zero (which is cheap to bulk-initialize) means a singleton
79576911c29SSSD         // eclass of size one. This also allows us to avoid explicitly
79676911c29SSSD         // writing the size for any values that are not union nodes.
79776911c29SSSD         let mut eclass_size: SecondaryMap<Value, u8> = SecondaryMap::with_default(0);
79876911c29SSSD 
79976911c29SSSD         // This is an initial guess at the size we'll need, but we add
80076911c29SSSD         // more values as we build simplified alternative expressions so
80176911c29SSSD         // this is likely to realloc again later.
80276911c29SSSD         available_block.resize(cursor.func.dfg.num_values());
80376911c29SSSD 
80476911c29SSSD         // In domtree preorder, visit blocks. (TODO: factor out an
80576911c29SSSD         // iterator from this and elaborator.)
80676911c29SSSD         let root = cursor.layout().entry_block().unwrap();
80776911c29SSSD         enum StackEntry {
80876911c29SSSD             Visit(Block),
80976911c29SSSD             Pop,
81076911c29SSSD         }
81176911c29SSSD         let mut block_stack = vec![StackEntry::Visit(root)];
81276911c29SSSD         while let Some(entry) = block_stack.pop() {
81376911c29SSSD             match entry {
81476911c29SSSD                 StackEntry::Visit(block) => {
81576911c29SSSD                     // We popped this block; push children
81676911c29SSSD                     // immediately, then process this block.
81776911c29SSSD                     block_stack.push(StackEntry::Pop);
81876911c29SSSD                     block_stack.extend(
81976911c29SSSD                         self.ctrl_plane
82076911c29SSSD                             .shuffled(self.domtree.children(block))
82176911c29SSSD                             .map(StackEntry::Visit),
82276911c29SSSD                     );
82376911c29SSSD                     gvn_map.increment_depth();
82476911c29SSSD                     gvn_map_blocks.push(block);
82576911c29SSSD 
82676911c29SSSD                     trace!("Processing block {}", block);
82776911c29SSSD                     cursor.set_position(CursorPosition::Before(block));
82876911c29SSSD 
82976911c29SSSD                     let mut alias_analysis_state = self.alias_analysis.block_starting_state(block);
83076911c29SSSD 
83176911c29SSSD                     for &param in cursor.func.dfg.block_params(block) {
83276911c29SSSD                         trace!("creating initial singleton eclass for blockparam {}", param);
83376911c29SSSD                         value_to_opt_value[param] = param;
83476911c29SSSD                         available_block[param] = block;
83576911c29SSSD                     }
83676911c29SSSD                     while let Some(inst) = cursor.next_inst() {
83776911c29SSSD                         trace!(
83876911c29SSSD                             "Processing inst {inst}: {}",
83976911c29SSSD                             cursor.func.dfg.display_inst(inst),
84076911c29SSSD                         );
84176911c29SSSD 
84276911c29SSSD                         // Rewrite args of *all* instructions using the
84376911c29SSSD                         // value-to-opt-value map.
84476911c29SSSD                         cursor.func.dfg.map_inst_values(inst, |arg| {
84576911c29SSSD                             let new_value = value_to_opt_value[arg];
84676911c29SSSD                             trace!("rewriting arg {} of inst {} to {}", arg, inst, new_value);
84776911c29SSSD                             debug_assert_ne!(new_value, Value::reserved_value());
84876911c29SSSD                             new_value
84976911c29SSSD                         });
85076911c29SSSD 
85176911c29SSSD                         // Build a context for optimization, with borrows of
85276911c29SSSD                         // state. We can't invoke a method on `self` because
85376911c29SSSD                         // we've borrowed `self.func` mutably (as
85476911c29SSSD                         // `cursor.func`) so we pull apart the pieces instead
85576911c29SSSD                         // here.
85676911c29SSSD                         let mut ctx = OptimizeCtx {
85776911c29SSSD                             func: cursor.func,
85876911c29SSSD                             value_to_opt_value: &mut value_to_opt_value,
85976911c29SSSD                             gvn_map: &mut gvn_map,
86076911c29SSSD                             gvn_map_blocks: &mut gvn_map_blocks,
86176911c29SSSD                             available_block: &mut available_block,
86276911c29SSSD                             eclass_size: &mut eclass_size,
86376911c29SSSD                             rewrite_depth: 0,
86476911c29SSSD                             subsume_values: FxHashSet::default(),
86576911c29SSSD                             remat_values: &mut self.remat_values,
86676911c29SSSD                             stats: &mut self.stats,
86776911c29SSSD                             domtree: &self.domtree,
86876911c29SSSD                             alias_analysis: self.alias_analysis,
86976911c29SSSD                             alias_analysis_state: &mut alias_analysis_state,
87076911c29SSSD                             ctrl_plane: self.ctrl_plane,
87176911c29SSSD                             optimized_values: Default::default(),
87276911c29SSSD                             optimized_insts: Default::default(),
87376911c29SSSD                         };
87476911c29SSSD 
87576911c29SSSD                         if is_pure_for_egraph(ctx.func, inst) {
87676911c29SSSD                             // Insert into GVN map and optimize any new nodes
87776911c29SSSD                             // inserted (recursively performing this work for
87876911c29SSSD                             // any nodes the optimization rules produce).
87976911c29SSSD                             let inst = NewOrExistingInst::Existing(inst);
88076911c29SSSD                             ctx.insert_pure_enode(inst);
88176911c29SSSD                             // We've now rewritten all uses, or will when we
88276911c29SSSD                             // see them, and the instruction exists as a pure
88376911c29SSSD                             // enode in the eclass, so we can remove it.
88476911c29SSSD                             cursor.remove_inst_and_step_back();
88576911c29SSSD                         } else {
88676911c29SSSD                             if let Some(cmd) = ctx.optimize_skeleton_inst(inst, block) {
88776911c29SSSD                                 Self::execute_skeleton_inst_simplification(
88876911c29SSSD                                     cmd,
88976911c29SSSD                                     &mut cursor,
89076911c29SSSD                                     &mut value_to_opt_value,
89176911c29SSSD                                     inst,
89276911c29SSSD                                 );
89376911c29SSSD                             }
89476911c29SSSD                         }
89576911c29SSSD                     }
89676911c29SSSD                 }
89776911c29SSSD                 StackEntry::Pop => {
89876911c29SSSD                     gvn_map.decrement_depth();
89976911c29SSSD                     gvn_map_blocks.pop();
90076911c29SSSD                 }
90176911c29SSSD             }
90276911c29SSSD         }
90376911c29SSSD     }
90476911c29SSSD 
90576911c29SSSD     /// Execute a simplification of an instruction in the side-effectful
90676911c29SSSD     /// skeleton.
execute_skeleton_inst_simplification( simplification: SkeletonInstSimplification, cursor: &mut FuncCursor, value_to_opt_value: &mut SecondaryMap<Value, Value>, old_inst: Inst, )90776911c29SSSD     fn execute_skeleton_inst_simplification(
90876911c29SSSD         simplification: SkeletonInstSimplification,
90976911c29SSSD         cursor: &mut FuncCursor,
91076911c29SSSD         value_to_opt_value: &mut SecondaryMap<Value, Value>,
91176911c29SSSD         old_inst: Inst,
91276911c29SSSD     ) {
91376911c29SSSD         let mut forward_val = |cursor: &mut FuncCursor, old_val, new_val| {
91476911c29SSSD             cursor.func.dfg.change_to_alias(old_val, new_val);
91576911c29SSSD             value_to_opt_value[old_val] = new_val;
91676911c29SSSD         };
91776911c29SSSD 
91876911c29SSSD         let (new_inst, new_val) = match simplification {
91976911c29SSSD             SkeletonInstSimplification::Remove => {
92076911c29SSSD                 cursor.remove_inst_and_step_back();
92176911c29SSSD                 return;
92276911c29SSSD             }
92376911c29SSSD             SkeletonInstSimplification::RemoveWithVal { val } => {
92476911c29SSSD                 cursor.remove_inst_and_step_back();
92576911c29SSSD                 let old_val = cursor.func.dfg.first_result(old_inst);
92676911c29SSSD                 cursor.func.dfg.detach_inst_results(old_inst);
92776911c29SSSD                 forward_val(cursor, old_val, val);
92876911c29SSSD                 return;
92976911c29SSSD             }
93076911c29SSSD             SkeletonInstSimplification::Replace { inst } => (inst, None),
93176911c29SSSD             SkeletonInstSimplification::ReplaceWithVal { inst, val } => (inst, Some(val)),
93276911c29SSSD         };
93376911c29SSSD 
93476911c29SSSD         // Replace the old instruction with the new one.
93576911c29SSSD         cursor.replace_inst(new_inst);
93676911c29SSSD         debug_assert!(!cursor.func.dfg.insts[new_inst].opcode().is_terminator());
93776911c29SSSD 
93876911c29SSSD         // Redirect the old instruction's result values to our new instruction's
93976911c29SSSD         // result values.
94076911c29SSSD         let mut i = 0;
94176911c29SSSD         let mut next_new_val = |dfg: &crate::ir::DataFlowGraph| -> Value {
94276911c29SSSD             if let Some(val) = new_val {
94376911c29SSSD                 val
94476911c29SSSD             } else {
94576911c29SSSD                 let val = dfg.inst_results(new_inst)[i];
94676911c29SSSD                 i += 1;
94776911c29SSSD                 val
94876911c29SSSD             }
94976911c29SSSD         };
95076911c29SSSD         for i in 0..cursor.func.dfg.inst_results(old_inst).len() {
95176911c29SSSD             let old_val = cursor.func.dfg.inst_results(old_inst)[i];
95276911c29SSSD             let new_val = next_new_val(&cursor.func.dfg);
95376911c29SSSD             forward_val(cursor, old_val, new_val);
95476911c29SSSD         }
95576911c29SSSD 
95676911c29SSSD         // Back up so that the next iteration of the outer egraph loop will
95776911c29SSSD         // process the new instruction.
95876911c29SSSD         cursor.goto_inst(new_inst);
95976911c29SSSD         cursor.prev_inst();
96076911c29SSSD     }
96176911c29SSSD 
96276911c29SSSD     /// Scoped elaboration: compute a final ordering of op computation
96376911c29SSSD     /// for each block and update the given Func body. After this
96476911c29SSSD     /// runs, the function body is back into the state where every
96576911c29SSSD     /// Inst with an used result is placed in the layout (possibly
96676911c29SSSD     /// duplicated, if our code-motion logic decides this is the best
96776911c29SSSD     /// option).
96876911c29SSSD     ///
96976911c29SSSD     /// This works in concert with the domtree. We do a preorder
97076911c29SSSD     /// traversal of the domtree, tracking a scoped map from Id to
97176911c29SSSD     /// (new) Value. The map's scopes correspond to levels in the
97276911c29SSSD     /// domtree.
97376911c29SSSD     ///
97476911c29SSSD     /// At each block, we iterate forward over the side-effecting
97576911c29SSSD     /// eclasses, and recursively generate their arg eclasses, then
97676911c29SSSD     /// emit the ops themselves.
97776911c29SSSD     ///
97876911c29SSSD     /// To use an eclass in a given block, we first look it up in the
97976911c29SSSD     /// scoped map, and get the Value if already present. If not, we
98076911c29SSSD     /// need to generate it. We emit the extracted enode for this
98176911c29SSSD     /// eclass after recursively generating its args. Eclasses are
98276911c29SSSD     /// thus computed "as late as possible", but then memoized into
98376911c29SSSD     /// the Id-to-Value map and available to all dominated blocks and
98476911c29SSSD     /// for the rest of this block. (This subsumes GVN.)
elaborate(&mut self)98576911c29SSSD     fn elaborate(&mut self) {
98676911c29SSSD         let mut elaborator = Elaborator::new(
98776911c29SSSD             self.func,
98876911c29SSSD             &self.domtree,
98976911c29SSSD             self.loop_analysis,
99076911c29SSSD             &self.remat_values,
99176911c29SSSD             &mut self.stats,
99276911c29SSSD             self.ctrl_plane,
99376911c29SSSD         );
99476911c29SSSD         elaborator.elaborate();
99576911c29SSSD 
99676911c29SSSD         self.check_post_egraph();
99776911c29SSSD     }
99876911c29SSSD 
99976911c29SSSD     #[cfg(debug_assertions)]
check_post_egraph(&self)100076911c29SSSD     fn check_post_egraph(&self) {
100176911c29SSSD         // Verify that no union nodes are reachable from inst args,
100276911c29SSSD         // and that all inst args' defining instructions are in the
100376911c29SSSD         // layout.
100476911c29SSSD         for block in self.func.layout.blocks() {
100576911c29SSSD             for inst in self.func.layout.block_insts(block) {
100676911c29SSSD                 self.func
100776911c29SSSD                     .dfg
100876911c29SSSD                     .inst_values(inst)
100976911c29SSSD                     .for_each(|arg| match self.func.dfg.value_def(arg) {
101076911c29SSSD                         ValueDef::Result(i, _) => {
101176911c29SSSD                             debug_assert!(self.func.layout.inst_block(i).is_some());
101276911c29SSSD                         }
101376911c29SSSD                         ValueDef::Union(..) => {
101476911c29SSSD                             panic!("egraph union node {arg} still reachable at {inst}!");
101576911c29SSSD                         }
101676911c29SSSD                         _ => {}
101776911c29SSSD                     })
101876911c29SSSD             }
101976911c29SSSD         }
102076911c29SSSD     }
102176911c29SSSD 
102276911c29SSSD     #[cfg(not(debug_assertions))]
check_post_egraph(&self)102376911c29SSSD     fn check_post_egraph(&self) {}
102476911c29SSSD }
102576911c29SSSD 
102676911c29SSSD /// Implementation of external-context equality and hashing on
102776911c29SSSD /// InstructionData. This allows us to deduplicate instructions given
102876911c29SSSD /// some context that lets us see its value lists, so we don't need to
1029ab78bd82SHo Kim /// store arguments inline in the `InstructionData` (or alongside it in
103076911c29SSSD /// some newly-defined key type) in all cases.
103176911c29SSSD struct GVNContext<'a> {
103276911c29SSSD     value_lists: &'a ValueListPool,
103376911c29SSSD }
103476911c29SSSD 
103576911c29SSSD impl<'a> CtxEq<(Type, InstructionData), (Type, InstructionData)> for GVNContext<'a> {
ctx_eq( &self, (a_ty, a_inst): &(Type, InstructionData), (b_ty, b_inst): &(Type, InstructionData), ) -> bool103676911c29SSSD     fn ctx_eq(
103776911c29SSSD         &self,
103876911c29SSSD         (a_ty, a_inst): &(Type, InstructionData),
103976911c29SSSD         (b_ty, b_inst): &(Type, InstructionData),
104076911c29SSSD     ) -> bool {
104176911c29SSSD         a_ty == b_ty && a_inst.eq(b_inst, self.value_lists)
104276911c29SSSD     }
104376911c29SSSD }
104476911c29SSSD 
104576911c29SSSD impl<'a> CtxHash<(Type, InstructionData)> for GVNContext<'a> {
ctx_hash<H: Hasher>(&self, state: &mut H, (ty, inst): &(Type, InstructionData))104676911c29SSSD     fn ctx_hash<H: Hasher>(&self, state: &mut H, (ty, inst): &(Type, InstructionData)) {
104776911c29SSSD         core::hash::Hash::hash(&ty, state);
104876911c29SSSD         inst.hash(state, self.value_lists);
104976911c29SSSD     }
105076911c29SSSD }
105176911c29SSSD 
105276911c29SSSD /// Statistics collected during egraph-based processing.
105376911c29SSSD #[derive(Clone, Debug, Default)]
105476911c29SSSD pub(crate) struct Stats {
105576911c29SSSD     pub(crate) pure_inst: u64,
105676911c29SSSD     pub(crate) pure_inst_deduped: u64,
105776911c29SSSD     pub(crate) pure_inst_subsume: u64,
105876911c29SSSD     pub(crate) pure_inst_rewrite_to_self: u64,
105976911c29SSSD     pub(crate) pure_inst_insert_orig: u64,
106076911c29SSSD     pub(crate) pure_inst_insert_new: u64,
106176911c29SSSD     pub(crate) skeleton_inst: u64,
106276911c29SSSD     pub(crate) skeleton_inst_simplified: u64,
106376911c29SSSD     pub(crate) skeleton_inst_gvn: u64,
106476911c29SSSD     pub(crate) alias_analysis_removed: u64,
106576911c29SSSD     pub(crate) new_inst: u64,
106676911c29SSSD     pub(crate) union: u64,
106776911c29SSSD     pub(crate) subsume: u64,
106876911c29SSSD     pub(crate) remat: u64,
106976911c29SSSD     pub(crate) rewrite_rule_invoked: u64,
107076911c29SSSD     pub(crate) rewrite_rule_results: u64,
107176911c29SSSD     pub(crate) rewrite_depth_limit: u64,
107276911c29SSSD     pub(crate) elaborate_visit_node: u64,
107376911c29SSSD     pub(crate) elaborate_memoize_hit: u64,
107476911c29SSSD     pub(crate) elaborate_memoize_miss: u64,
107576911c29SSSD     pub(crate) elaborate_remat: u64,
107676911c29SSSD     pub(crate) elaborate_licm_hoist: u64,
107776911c29SSSD     pub(crate) elaborate_func: u64,
107876911c29SSSD     pub(crate) elaborate_func_pre_insts: u64,
107976911c29SSSD     pub(crate) elaborate_func_post_insts: u64,
108076911c29SSSD     pub(crate) eclass_size_limit: u64,
108176911c29SSSD }
1082