176911c29SSSD //! Support for egraphs represented in the DataFlowGraph. 276911c29SSSD 376911c29SSSD use crate::FxHashSet; 476911c29SSSD use crate::alias_analysis::{AliasAnalysis, LastStores}; 576911c29SSSD use crate::ctxhash::{CtxEq, CtxHash, NullCtx}; 676911c29SSSD use crate::cursor::{Cursor, CursorPosition, FuncCursor}; 776911c29SSSD use crate::dominator_tree::DominatorTree; 876911c29SSSD use crate::egraph::elaborate::Elaborator; 976911c29SSSD use crate::inst_predicates::{is_mergeable_for_egraph, is_pure_for_egraph}; 1076911c29SSSD use crate::ir::{ 11*2f7dbd61SChris Fallin Block, DataFlowGraph, Function, Inst, InstructionData, Type, Value, ValueDef, ValueListPool, 1276911c29SSSD }; 1376911c29SSSD use crate::loop_analysis::LoopAnalysis; 1476911c29SSSD use crate::opts::IsleContext; 1576911c29SSSD use crate::opts::generated_code::SkeletonInstSimplification; 1676911c29SSSD use crate::scoped_hash_map::{Entry as ScopedEntry, ScopedHashMap}; 1776911c29SSSD use crate::take_and_replace::TakeAndReplace; 1876911c29SSSD use crate::trace; 1976911c29SSSD use alloc::vec::Vec; 2076911c29SSSD use core::cmp::Ordering; 2176911c29SSSD use core::hash::Hasher; 2276911c29SSSD use cranelift_control::ControlPlane; 2376911c29SSSD use cranelift_entity::SecondaryMap; 2476911c29SSSD use cranelift_entity::packed_option::ReservedValue; 2576911c29SSSD use smallvec::SmallVec; 2676911c29SSSD 2776911c29SSSD mod cost; 2876911c29SSSD mod elaborate; 2976911c29SSSD 3076911c29SSSD /// Pass over a Function that does the whole aegraph thing. 3176911c29SSSD /// 3276911c29SSSD /// - Removes non-skeleton nodes from the Layout. 3376911c29SSSD /// - Performs a GVN-and-rule-application pass over all Values 3476911c29SSSD /// reachable from the skeleton, potentially creating new Union 3576911c29SSSD /// nodes (i.e., an aegraph) so that some values have multiple 3676911c29SSSD /// representations. 3776911c29SSSD /// - Does "extraction" on the aegraph: selects the best value out of 3876911c29SSSD /// the tree-of-Union nodes for each used value. 3976911c29SSSD /// - Does "scoped elaboration" on the aegraph: chooses one or more 4076911c29SSSD /// locations for pure nodes to become instructions again in the 4176911c29SSSD /// layout, as forced by the skeleton. 4276911c29SSSD /// 4376911c29SSSD /// At the beginning and end of this pass, the CLIF should be in a 4476911c29SSSD /// state that passes the verifier and, additionally, has no Union 4576911c29SSSD /// nodes. During the pass, Union nodes may exist, and instructions in 4676911c29SSSD /// the layout may refer to results of instructions that are not 4776911c29SSSD /// placed in the layout. 4876911c29SSSD pub struct EgraphPass<'a> { 4976911c29SSSD /// The function we're operating on. 5076911c29SSSD func: &'a mut Function, 5176911c29SSSD /// Dominator tree for the CFG, used to visit blocks in pre-order 5276911c29SSSD /// so we see value definitions before their uses, and also used for 5376911c29SSSD /// O(1) dominance checks. 5476911c29SSSD domtree: &'a DominatorTree, 5576911c29SSSD /// Alias analysis, used during optimization. 5676911c29SSSD alias_analysis: &'a mut AliasAnalysis<'a>, 5776911c29SSSD /// Loop analysis results, used for built-in LICM during 5876911c29SSSD /// elaboration. 5976911c29SSSD loop_analysis: &'a LoopAnalysis, 6076911c29SSSD /// Chaos-mode control-plane so we can test that we still get 6176911c29SSSD /// correct results when our heuristics make bad decisions. 6276911c29SSSD ctrl_plane: &'a mut ControlPlane, 6376911c29SSSD /// Which Values do we want to rematerialize in each block where 6476911c29SSSD /// they're used? 6576911c29SSSD remat_values: FxHashSet<Value>, 6676911c29SSSD /// Stats collected while we run this pass. 6776911c29SSSD pub(crate) stats: Stats, 6876911c29SSSD } 6976911c29SSSD 7076911c29SSSD /// The maximum number of rewrites we will take from a single call into ISLE. 7176911c29SSSD const MATCHES_LIMIT: usize = 5; 7276911c29SSSD 7376911c29SSSD /// The maximum number of enodes in any given eclass. 7476911c29SSSD const ECLASS_ENODE_LIMIT: usize = 5; 7576911c29SSSD 7676911c29SSSD /// Context passed through node insertion and optimization. 7776911c29SSSD pub(crate) struct OptimizeCtx<'opt, 'analysis> 7876911c29SSSD where 7976911c29SSSD 'analysis: 'opt, 8076911c29SSSD { 8176911c29SSSD // Borrowed from EgraphPass: 8276911c29SSSD pub(crate) func: &'opt mut Function, 8376911c29SSSD pub(crate) value_to_opt_value: &'opt mut SecondaryMap<Value, Value>, 8476911c29SSSD available_block: &'opt mut SecondaryMap<Value, Block>, 8576911c29SSSD eclass_size: &'opt mut SecondaryMap<Value, u8>, 8676911c29SSSD pub(crate) gvn_map: &'opt mut ScopedHashMap<(Type, InstructionData), Option<Value>>, 8776911c29SSSD pub(crate) gvn_map_blocks: &'opt Vec<Block>, 8876911c29SSSD pub(crate) remat_values: &'opt mut FxHashSet<Value>, 8976911c29SSSD pub(crate) stats: &'opt mut Stats, 9076911c29SSSD domtree: &'opt DominatorTree, 9176911c29SSSD pub(crate) alias_analysis: &'opt mut AliasAnalysis<'analysis>, 9276911c29SSSD pub(crate) alias_analysis_state: &'opt mut LastStores, 9376911c29SSSD ctrl_plane: &'opt mut ControlPlane, 9476911c29SSSD // Held locally during optimization of one node (recursively): 9576911c29SSSD pub(crate) rewrite_depth: usize, 9676911c29SSSD pub(crate) subsume_values: FxHashSet<Value>, 9776911c29SSSD optimized_values: SmallVec<[Value; MATCHES_LIMIT]>, 9876911c29SSSD optimized_insts: SmallVec<[SkeletonInstSimplification; MATCHES_LIMIT]>, 9976911c29SSSD } 10076911c29SSSD 10176911c29SSSD /// For passing to `insert_pure_enode`. Sometimes the enode already 10276911c29SSSD /// exists as an Inst (from the original CLIF), and sometimes we're in 10376911c29SSSD /// the middle of creating it and want to avoid inserting it if 10476911c29SSSD /// possible until we know we need it. 10576911c29SSSD pub(crate) enum NewOrExistingInst { 10676911c29SSSD New(InstructionData, Type), 10776911c29SSSD Existing(Inst), 10876911c29SSSD } 10976911c29SSSD 11076911c29SSSD impl NewOrExistingInst { get_inst_key<'a>(&'a self, dfg: &'a DataFlowGraph) -> (Type, InstructionData)11176911c29SSSD fn get_inst_key<'a>(&'a self, dfg: &'a DataFlowGraph) -> (Type, InstructionData) { 11276911c29SSSD match self { 11376911c29SSSD NewOrExistingInst::New(data, ty) => (*ty, *data), 11476911c29SSSD NewOrExistingInst::Existing(inst) => { 11576911c29SSSD let ty = dfg.ctrl_typevar(*inst); 11676911c29SSSD (ty, dfg.insts[*inst]) 11776911c29SSSD } 11876911c29SSSD } 11976911c29SSSD } 12076911c29SSSD } 12176911c29SSSD 12276911c29SSSD impl<'opt, 'analysis> OptimizeCtx<'opt, 'analysis> 12376911c29SSSD where 12476911c29SSSD 'analysis: 'opt, 12576911c29SSSD { 12676911c29SSSD /// Optimization of a single instruction. 12776911c29SSSD /// 12876911c29SSSD /// This does a few things: 12976911c29SSSD /// - Looks up the instruction in the GVN deduplication map. If we 13076911c29SSSD /// already have the same instruction somewhere else, with the 13176911c29SSSD /// same args, then we can alias the original instruction's 13276911c29SSSD /// results and omit this instruction entirely. 13376911c29SSSD /// - If the instruction is "new" (not deduplicated), then apply 13476911c29SSSD /// optimization rules: 13576911c29SSSD /// - All of the mid-end rules written in ISLE. 13676911c29SSSD /// - Store-to-load forwarding. 13776911c29SSSD /// - Update the value-to-opt-value map, and update the eclass 13876911c29SSSD /// union-find, if we rewrote the value to different form(s). insert_pure_enode(&mut self, inst: NewOrExistingInst) -> Value13976911c29SSSD pub(crate) fn insert_pure_enode(&mut self, inst: NewOrExistingInst) -> Value { 14076911c29SSSD // Create the external context for looking up and updating the 14176911c29SSSD // GVN map. This is necessary so that instructions themselves 14276911c29SSSD // do not have to carry all the references or data for a full 14376911c29SSSD // `Eq` or `Hash` impl. 14476911c29SSSD let gvn_context = GVNContext { 14576911c29SSSD value_lists: &self.func.dfg.value_lists, 14676911c29SSSD }; 14776911c29SSSD 14876911c29SSSD self.stats.pure_inst += 1; 14976911c29SSSD if let NewOrExistingInst::New(..) = inst { 15076911c29SSSD self.stats.new_inst += 1; 15176911c29SSSD } 15276911c29SSSD 15376911c29SSSD // Does this instruction already exist? If so, add entries to 15476911c29SSSD // the value-map to rewrite uses of its results to the results 15576911c29SSSD // of the original (existing) instruction. If not, optimize 15676911c29SSSD // the new instruction. 15776911c29SSSD if let Some(&Some(orig_result)) = self 15876911c29SSSD .gvn_map 15976911c29SSSD .get(&gvn_context, &inst.get_inst_key(&self.func.dfg)) 16076911c29SSSD { 16176911c29SSSD self.stats.pure_inst_deduped += 1; 16276911c29SSSD if let NewOrExistingInst::Existing(inst) = inst { 16376911c29SSSD debug_assert_eq!(self.func.dfg.inst_results(inst).len(), 1); 16476911c29SSSD let result = self.func.dfg.first_result(inst); 16576911c29SSSD self.value_to_opt_value[result] = orig_result; 16676911c29SSSD self.available_block[result] = self.available_block[orig_result]; 16776911c29SSSD } 16876911c29SSSD orig_result 16976911c29SSSD } else { 17076911c29SSSD // Now actually insert the InstructionData and attach 17176911c29SSSD // result value (exactly one). 17276911c29SSSD let (inst, result, ty) = match inst { 17376911c29SSSD NewOrExistingInst::New(data, typevar) => { 17476911c29SSSD self.stats.pure_inst_insert_new += 1; 17576911c29SSSD let inst = self.func.dfg.make_inst(data); 17676911c29SSSD // TODO: reuse return value? 17776911c29SSSD self.func.dfg.make_inst_results(inst, typevar); 17876911c29SSSD let result = self.func.dfg.first_result(inst); 17976911c29SSSD // New inst. We need to do the analysis of its result. 18076911c29SSSD (inst, result, typevar) 18176911c29SSSD } 18276911c29SSSD NewOrExistingInst::Existing(inst) => { 18376911c29SSSD self.stats.pure_inst_insert_orig += 1; 18476911c29SSSD let result = self.func.dfg.first_result(inst); 18576911c29SSSD let ty = self.func.dfg.ctrl_typevar(inst); 18676911c29SSSD (inst, result, ty) 18776911c29SSSD } 18876911c29SSSD }; 18976911c29SSSD 19076911c29SSSD self.available_block[result] = self.get_available_block(inst); 19176911c29SSSD let opt_value = self.optimize_pure_enode(inst); 19276911c29SSSD log::trace!("optimizing inst {inst} orig result {result} gave {opt_value}"); 19376911c29SSSD 19476911c29SSSD let gvn_context = GVNContext { 19576911c29SSSD value_lists: &self.func.dfg.value_lists, 19676911c29SSSD }; 19776911c29SSSD // Insert at level implied by args. This enables merging 19876911c29SSSD // in LICM cases like: 19976911c29SSSD // 20076911c29SSSD // while (...) { 20176911c29SSSD // if (...) { 20276911c29SSSD // let x = loop_invariant_expr; 20376911c29SSSD // } 20476911c29SSSD // if (...) { 20576911c29SSSD // let x = loop_invariant_expr; 20676911c29SSSD // } 20776911c29SSSD // } 20876911c29SSSD // 20976911c29SSSD // where the two instances of the expression otherwise 21076911c29SSSD // wouldn't merge because each would be in a separate 21176911c29SSSD // subscope of the scoped hashmap during traversal. 21276911c29SSSD log::trace!( 21376911c29SSSD "value {} is available at {}", 21476911c29SSSD opt_value, 21576911c29SSSD self.available_block[opt_value] 21676911c29SSSD ); 21776911c29SSSD let depth = self.depth_of_block_in_gvn_map(self.available_block[opt_value]); 21876911c29SSSD self.gvn_map.insert_with_depth( 21976911c29SSSD &gvn_context, 22076911c29SSSD (ty, self.func.dfg.insts[inst]), 22176911c29SSSD Some(opt_value), 22276911c29SSSD depth, 22376911c29SSSD ); 22476911c29SSSD self.value_to_opt_value[result] = opt_value; 22576911c29SSSD opt_value 22676911c29SSSD } 22776911c29SSSD } 22876911c29SSSD 22976911c29SSSD /// Find the block where a pure instruction first becomes available, 23076911c29SSSD /// defined as the block that is closest to the root where all of 23176911c29SSSD /// its arguments are available. In the unusual case where a pure 23276911c29SSSD /// instruction has no arguments (e.g. get_return_address), we can 23376911c29SSSD /// place it anywhere, so it is available in the entry block. 23476911c29SSSD /// 23576911c29SSSD /// This function does not compute available blocks recursively. 23676911c29SSSD /// All of the instruction's arguments must have had their available 23776911c29SSSD /// blocks assigned already. get_available_block(&self, inst: Inst) -> Block23876911c29SSSD fn get_available_block(&self, inst: Inst) -> Block { 23976911c29SSSD // Side-effecting instructions have different rules for where 24076911c29SSSD // they become available, so this function does not apply. 24176911c29SSSD debug_assert!(is_pure_for_egraph(self.func, inst)); 24276911c29SSSD 24376911c29SSSD // Note that the def-point of all arguments to an instruction 24476911c29SSSD // in SSA lie on a line of direct ancestors in the domtree, and 24576911c29SSSD // so do their available-blocks. This means that for any pair of 24676911c29SSSD // arguments, their available blocks are either the same or one 24776911c29SSSD // strictly dominates the other. We just need to find any argument 24876911c29SSSD // whose available block is deepest in the domtree. 24976911c29SSSD self.func.dfg.insts[inst] 25076911c29SSSD .arguments(&self.func.dfg.value_lists) 25176911c29SSSD .iter() 25276911c29SSSD .map(|&v| { 25376911c29SSSD let block = self.available_block[v]; 25476911c29SSSD debug_assert!(!block.is_reserved_value()); 25576911c29SSSD block 25676911c29SSSD }) 25776911c29SSSD .max_by(|&x, &y| { 25876911c29SSSD if self.domtree.block_dominates(x, y) { 25976911c29SSSD Ordering::Less 26076911c29SSSD } else { 26176911c29SSSD debug_assert!(self.domtree.block_dominates(y, x)); 26276911c29SSSD Ordering::Greater 26376911c29SSSD } 26476911c29SSSD }) 26576911c29SSSD .unwrap_or(self.func.layout.entry_block().unwrap()) 26676911c29SSSD } 26776911c29SSSD depth_of_block_in_gvn_map(&self, block: Block) -> usize26876911c29SSSD fn depth_of_block_in_gvn_map(&self, block: Block) -> usize { 26976911c29SSSD log::trace!( 27076911c29SSSD "finding depth of available block {} in domtree stack: {:?}", 27176911c29SSSD block, 27276911c29SSSD self.gvn_map_blocks 27376911c29SSSD ); 27476911c29SSSD self.gvn_map_blocks 27576911c29SSSD .iter() 27676911c29SSSD .enumerate() 27776911c29SSSD .rev() 27876911c29SSSD .find(|&(_, b)| *b == block) 27976911c29SSSD .unwrap() 28076911c29SSSD .0 28176911c29SSSD } 28276911c29SSSD 28376911c29SSSD /// Optimizes an enode by applying any matching mid-end rewrite 28476911c29SSSD /// rules (or store-to-load forwarding, which is a special case), 28576911c29SSSD /// unioning together all possible optimized (or rewritten) forms 28676911c29SSSD /// of this expression into an eclass and returning the `Value` 28776911c29SSSD /// that represents that eclass. optimize_pure_enode(&mut self, inst: Inst) -> Value28876911c29SSSD fn optimize_pure_enode(&mut self, inst: Inst) -> Value { 28976911c29SSSD // A pure node always has exactly one result. 29076911c29SSSD let orig_value = self.func.dfg.first_result(inst); 29176911c29SSSD 29276911c29SSSD let mut guard = TakeAndReplace::new(self, |x| &mut x.optimized_values); 29376911c29SSSD let (ctx, optimized_values) = guard.get(); 29476911c29SSSD 29576911c29SSSD // Limit rewrite depth. When we apply optimization rules, they 29676911c29SSSD // may create new nodes (values) and those are, recursively, 29776911c29SSSD // optimized eagerly as soon as they are created. So we may 29876911c29SSSD // have more than one ISLE invocation on the stack. (This is 29976911c29SSSD // necessary so that as the toplevel builds the 30076911c29SSSD // right-hand-side expression bottom-up, it uses the "latest" 30176911c29SSSD // optimized values for all the constituent parts.) To avoid 30276911c29SSSD // infinite or problematic recursion, we bound the rewrite 30376911c29SSSD // depth to a small constant here. 30476911c29SSSD const REWRITE_LIMIT: usize = 5; 30576911c29SSSD if ctx.rewrite_depth > REWRITE_LIMIT { 30676911c29SSSD ctx.stats.rewrite_depth_limit += 1; 30776911c29SSSD return orig_value; 30876911c29SSSD } 30976911c29SSSD ctx.rewrite_depth += 1; 31076911c29SSSD trace!("Incrementing rewrite depth; now {}", ctx.rewrite_depth); 31176911c29SSSD 31276911c29SSSD // Invoke the ISLE toplevel constructor, getting all new 31376911c29SSSD // values produced as equivalents to this value. 31476911c29SSSD trace!("Calling into ISLE with original value {}", orig_value); 31576911c29SSSD ctx.stats.rewrite_rule_invoked += 1; 31676911c29SSSD debug_assert!(optimized_values.is_empty()); 31776911c29SSSD crate::opts::generated_code::constructor_simplify( 31876911c29SSSD &mut IsleContext { ctx }, 31976911c29SSSD orig_value, 32076911c29SSSD optimized_values, 32176911c29SSSD ); 32276911c29SSSD 32376911c29SSSD ctx.stats.rewrite_rule_results += optimized_values.len() as u64; 32476911c29SSSD 32576911c29SSSD // It's not supposed to matter what order `simplify` returns values in. 32676911c29SSSD ctx.ctrl_plane.shuffle(optimized_values); 32776911c29SSSD 32876911c29SSSD let num_matches = optimized_values.len(); 32976911c29SSSD if num_matches > MATCHES_LIMIT { 33076911c29SSSD trace!( 33176911c29SSSD "Reached maximum matches limit; too many optimized values \ 33276911c29SSSD ({num_matches} > {MATCHES_LIMIT}); ignoring rest.", 33376911c29SSSD ); 33476911c29SSSD optimized_values.truncate(MATCHES_LIMIT); 33576911c29SSSD } 33676911c29SSSD 33776911c29SSSD // Sort and deduplicate optimized values, in case multiple 33876911c29SSSD // rules produced the same simplification. 33976911c29SSSD optimized_values.sort_unstable(); 34076911c29SSSD optimized_values.dedup(); 34176911c29SSSD 34276911c29SSSD trace!(" -> returned from ISLE: {orig_value} -> {optimized_values:?}"); 34376911c29SSSD 34476911c29SSSD // Construct a union-node tree representing the new eclass 34576911c29SSSD // that results from rewriting. If any returned value was 34676911c29SSSD // marked "subsume", take only that value. Otherwise, 34776911c29SSSD // sequentially build the chain over the original value and 34876911c29SSSD // all returned values. 34976911c29SSSD let result_value = if let Some(&subsuming_value) = optimized_values 35076911c29SSSD .iter() 35176911c29SSSD .find(|&value| ctx.subsume_values.contains(value)) 35276911c29SSSD { 35376911c29SSSD optimized_values.clear(); 35476911c29SSSD ctx.stats.pure_inst_subsume += 1; 35576911c29SSSD subsuming_value 35676911c29SSSD } else { 35776911c29SSSD let mut union_value = orig_value; 35876911c29SSSD let mut eclass_size = ctx.eclass_size[orig_value] + 1; 35976911c29SSSD for optimized_value in optimized_values.drain(..) { 36076911c29SSSD trace!( 36176911c29SSSD "Returned from ISLE for {}, got {:?}", 36276911c29SSSD orig_value, optimized_value 36376911c29SSSD ); 36476911c29SSSD if optimized_value == orig_value { 36576911c29SSSD trace!(" -> same as orig value; skipping"); 36676911c29SSSD ctx.stats.pure_inst_rewrite_to_self += 1; 36776911c29SSSD continue; 36876911c29SSSD } 36976911c29SSSD let rhs_eclass_size = ctx.eclass_size[optimized_value] + 1; 37076911c29SSSD if usize::from(eclass_size) + usize::from(rhs_eclass_size) > ECLASS_ENODE_LIMIT { 37176911c29SSSD trace!(" -> reached eclass size limit"); 37276911c29SSSD ctx.stats.eclass_size_limit += 1; 37376911c29SSSD break; 37476911c29SSSD } 37576911c29SSSD let old_union_value = union_value; 37676911c29SSSD union_value = ctx.func.dfg.union(old_union_value, optimized_value); 37776911c29SSSD eclass_size += rhs_eclass_size; 37876911c29SSSD ctx.eclass_size[union_value] = eclass_size - 1; 37976911c29SSSD ctx.stats.union += 1; 38076911c29SSSD trace!(" -> union: now {}", union_value); 38176911c29SSSD ctx.available_block[union_value] = 38276911c29SSSD ctx.merge_availability(old_union_value, optimized_value); 38376911c29SSSD } 38476911c29SSSD union_value 38576911c29SSSD }; 38676911c29SSSD 38776911c29SSSD ctx.rewrite_depth -= 1; 38876911c29SSSD trace!("Decrementing rewrite depth; now {}", ctx.rewrite_depth); 38976911c29SSSD if ctx.rewrite_depth == 0 { 39076911c29SSSD ctx.subsume_values.clear(); 39176911c29SSSD } 39276911c29SSSD 39376911c29SSSD debug_assert!(ctx.optimized_values.is_empty()); 39476911c29SSSD result_value 39576911c29SSSD } 39676911c29SSSD merge_availability(&self, a: Value, b: Value) -> Block39776911c29SSSD fn merge_availability(&self, a: Value, b: Value) -> Block { 39876911c29SSSD let a = self.available_block[a]; 39976911c29SSSD let b = self.available_block[b]; 40076911c29SSSD if self.domtree.block_dominates(a, b) { 40176911c29SSSD a 40276911c29SSSD } else { 40376911c29SSSD b 40476911c29SSSD } 40576911c29SSSD } 40676911c29SSSD 40776911c29SSSD /// Optimize a "skeleton" instruction. 40876911c29SSSD /// 40976911c29SSSD /// Returns an optional command of how to continue processing the optimized 41076911c29SSSD /// instruction (e.g. removing it or replacing it with a new instruction). optimize_skeleton_inst( &mut self, inst: Inst, block: Block, ) -> Option<SkeletonInstSimplification>41176911c29SSSD fn optimize_skeleton_inst( 41276911c29SSSD &mut self, 41376911c29SSSD inst: Inst, 41476911c29SSSD block: Block, 41576911c29SSSD ) -> Option<SkeletonInstSimplification> { 41676911c29SSSD self.stats.skeleton_inst += 1; 41776911c29SSSD 41876911c29SSSD // If we have a rewrite rule for this instruction, do that first, so 41976911c29SSSD // that GVN and alias analysis only see simplified skeleton 42076911c29SSSD // instructions. 42176911c29SSSD if let Some(cmd) = self.simplify_skeleton_inst(inst) { 42276911c29SSSD self.stats.skeleton_inst_simplified += 1; 42376911c29SSSD return Some(cmd); 42476911c29SSSD } 42576911c29SSSD 42676911c29SSSD // First, can we try to deduplicate? We need to keep some copy 42776911c29SSSD // of the instruction around because it's side-effecting, but 42876911c29SSSD // we may be able to reuse an earlier instance of it. 42976911c29SSSD if is_mergeable_for_egraph(self.func, inst) { 43076911c29SSSD let result = self.func.dfg.inst_results(inst).get(0).copied(); 43176911c29SSSD trace!(" -> mergeable side-effecting op {}", inst); 43276911c29SSSD 43376911c29SSSD // Does this instruction already exist? If so, add entries to 43476911c29SSSD // the value-map to rewrite uses of its results to the results 43576911c29SSSD // of the original (existing) instruction. If not, optimize 43676911c29SSSD // the new instruction. 43776911c29SSSD // 43876911c29SSSD // Note that the GVN map is scoped, which is important 43976911c29SSSD // here: because effectful ops are not removed from the 44076911c29SSSD // skeleton (`Layout`), we need to be mindful of whether 44176911c29SSSD // our current position is dominated by an instance of the 44276911c29SSSD // instruction. (See #5796 for details.) 44376911c29SSSD let ty = self.func.dfg.ctrl_typevar(inst); 44476911c29SSSD match self 44576911c29SSSD .gvn_map 44676911c29SSSD .entry(&NullCtx, (ty, self.func.dfg.insts[inst])) 44776911c29SSSD { 44876911c29SSSD ScopedEntry::Occupied(o) => { 44976911c29SSSD let orig_result = *o.get(); 45076911c29SSSD match (result, orig_result) { 45176911c29SSSD (Some(result), Some(orig_result)) => { 45276911c29SSSD // Hit in GVN map -- reuse value. 45376911c29SSSD self.stats.skeleton_inst_gvn += 1; 45476911c29SSSD self.value_to_opt_value[result] = orig_result; 45576911c29SSSD self.available_block[result] = self.available_block[orig_result]; 45676911c29SSSD trace!(" -> merges result {} to {}", result, orig_result); 45776911c29SSSD } 45876911c29SSSD (None, None) => { 45976911c29SSSD // Hit in the GVN map, but the instruction doesn't 46076911c29SSSD // produce results, only side effects. Nothing else 46176911c29SSSD // to do here. 46276911c29SSSD self.stats.skeleton_inst_gvn += 1; 46376911c29SSSD trace!(" -> merges with dominating instruction"); 46476911c29SSSD } 46576911c29SSSD (_, _) => unreachable!(), 46676911c29SSSD } 46776911c29SSSD Some(SkeletonInstSimplification::Remove) 46876911c29SSSD } 46976911c29SSSD ScopedEntry::Vacant(v) => { 47076911c29SSSD // Otherwise, insert it into the value-map. 47176911c29SSSD if let Some(result) = result { 47276911c29SSSD self.value_to_opt_value[result] = result; 47376911c29SSSD self.available_block[result] = block; 47476911c29SSSD } 47576911c29SSSD v.insert(result); 47676911c29SSSD trace!(" -> inserts as new (no GVN)"); 47776911c29SSSD None 47876911c29SSSD } 47976911c29SSSD } 48076911c29SSSD } 48176911c29SSSD // Otherwise, if a load or store, process it with the alias 48276911c29SSSD // analysis to see if we can optimize it (rewrite in terms of 48376911c29SSSD // an earlier load or stored value). 48476911c29SSSD else if let Some(new_result) = 48576911c29SSSD self.alias_analysis 48676911c29SSSD .process_inst(self.func, self.alias_analysis_state, inst) 48776911c29SSSD { 48876911c29SSSD self.stats.alias_analysis_removed += 1; 48976911c29SSSD let result = self.func.dfg.first_result(inst); 49076911c29SSSD trace!( 49176911c29SSSD " -> inst {} has result {} replaced with {}", 49276911c29SSSD inst, result, new_result 49376911c29SSSD ); 49476911c29SSSD self.value_to_opt_value[result] = new_result; 49576911c29SSSD self.available_block[result] = self.available_block[new_result]; 49676911c29SSSD Some(SkeletonInstSimplification::Remove) 49776911c29SSSD } 49876911c29SSSD // Otherwise, generic side-effecting op -- always keep it, and 49976911c29SSSD // set its results to identity-map to original values. 50076911c29SSSD else { 50176911c29SSSD // Set all results to identity-map to themselves 50276911c29SSSD // in the value-to-opt-value map. 50376911c29SSSD for &result in self.func.dfg.inst_results(inst) { 50476911c29SSSD self.value_to_opt_value[result] = result; 50576911c29SSSD self.available_block[result] = block; 50676911c29SSSD } 50776911c29SSSD None 50876911c29SSSD } 50976911c29SSSD } 51076911c29SSSD 51176911c29SSSD /// Find the best simplification of the given skeleton instruction, if any, 51276911c29SSSD /// by consulting our `simplify_skeleton` ISLE rules. simplify_skeleton_inst(&mut self, inst: Inst) -> Option<SkeletonInstSimplification>51376911c29SSSD fn simplify_skeleton_inst(&mut self, inst: Inst) -> Option<SkeletonInstSimplification> { 51476911c29SSSD // We cannot currently simplify terminators, or simplify into 51576911c29SSSD // terminators. Anything that could change the control-flow graph is off 51676911c29SSSD // limits. 51776911c29SSSD // 51876911c29SSSD // Consider the following CLIF snippet: 51976911c29SSSD // 52076911c29SSSD // block0(v0: i64): 52176911c29SSSD // v1 = iconst.i32 0 52276911c29SSSD // trapz v1, user42 52376911c29SSSD // v2 = load.i32 v0 52476911c29SSSD // brif v1, block1, block2 52576911c29SSSD // block1: 52676911c29SSSD // return v2 52776911c29SSSD // block2: 52876911c29SSSD // v3 = iconst.i32 1 52976911c29SSSD // v4 = iadd v2, v3 53076911c29SSSD // return v4 53176911c29SSSD // 53276911c29SSSD // We would ideally like to perform simplifications like replacing the 53376911c29SSSD // `trapz` with an unconditional `trap` and the conditional `brif` 53476911c29SSSD // branch with an unconditional `jump`. Note, however, that blocks 53576911c29SSSD // `block1` and `block2` are dominated by `block0` and therefore can and 53676911c29SSSD // do use values defined in `block0`. This presents challenges: 53776911c29SSSD // 53876911c29SSSD // * If we replace the `brif` with a `jump`, then we've mutated the 53976911c29SSSD // control-flow graph and removed that domination property. The uses 54076911c29SSSD // of `v2` and `v3` in those blocks become invalid. 54176911c29SSSD // 54276911c29SSSD // * Even worse, if we turn the `trapz` into a `trap`, we are 54376911c29SSSD // introducing a terminator into the middle of the block, which leaves 54476911c29SSSD // us with two choices to fix up the IR so that there aren't any 54576911c29SSSD // instructions following the terminator in the block: 54676911c29SSSD // 54776911c29SSSD // 1. We can split the unreachable instructions off into a new 54876911c29SSSD // block. However, there is no control-flow edge from the current 54976911c29SSSD // block to this new block and so, again, the new block isn't 55076911c29SSSD // dominated by the current block, and therefore the can't use 55176911c29SSSD // values defined in this block or any dominating it. The `load` 55276911c29SSSD // instruction uses `v0` but is not dominated by `v0`'s 55376911c29SSSD // definition. 55476911c29SSSD // 55576911c29SSSD // 2. Alternatively, we can simply delete the trailing instructions, 55676911c29SSSD // since they are unreachable. But then not only are the old 55776911c29SSSD // instructions' uses no longer dominated by their definitions, but 55876911c29SSSD // the definitions do not exist at all anymore! 55976911c29SSSD // 56076911c29SSSD // Whatever approach we would take, we would invalidate value uses, and 56176911c29SSSD // would need to track and fix them up. 56276911c29SSSD if self.func.dfg.insts[inst].opcode().is_branch() { 56376911c29SSSD return None; 56476911c29SSSD } 56576911c29SSSD 56676911c29SSSD let mut guard = TakeAndReplace::new(self, |x| &mut x.optimized_insts); 56776911c29SSSD let (ctx, optimized_insts) = guard.get(); 56876911c29SSSD 56976911c29SSSD crate::opts::generated_code::constructor_simplify_skeleton( 57076911c29SSSD &mut IsleContext { ctx }, 57176911c29SSSD inst, 57276911c29SSSD optimized_insts, 57376911c29SSSD ); 57476911c29SSSD 57576911c29SSSD let simplifications_len = optimized_insts.len(); 57676911c29SSSD log::trace!(" -> simplify_skeleton: yielded {simplifications_len} simplification(s)"); 57776911c29SSSD if simplifications_len > MATCHES_LIMIT { 57876911c29SSSD log::trace!(" too many candidate simplifications; truncating to {MATCHES_LIMIT}"); 57976911c29SSSD optimized_insts.truncate(MATCHES_LIMIT); 58076911c29SSSD } 58176911c29SSSD 58276911c29SSSD // Find the best simplification, if any, from our candidates. 58376911c29SSSD // 58476911c29SSSD // Unlike simplifying pure values, we do not add side-effectful 58576911c29SSSD // instructions to the egraph, nor do we extract the best version via 58676911c29SSSD // dynamic programming and considering the costs of operands. Instead, 58776911c29SSSD // we greedily choose the best simplification. This is because there is 58876911c29SSSD // an impedance mismatch: the egraph and our pure rewrites are centered 58976911c29SSSD // around *values*, but we don't represent side-effects with values, we 59076911c29SSSD // represent them implicitly in their *instructions*. 59176911c29SSSD // 59276911c29SSSD // The initial best choice is "no simplification, just use the original 59376911c29SSSD // instruction" which has the original instruction's cost. 59476911c29SSSD let mut best = None; 59576911c29SSSD let mut best_cost = cost::Cost::of_skeleton_op( 59676911c29SSSD ctx.func.dfg.insts[inst].opcode(), 59776911c29SSSD ctx.func.dfg.inst_args(inst).len(), 59876911c29SSSD ); 59976911c29SSSD while let Some(simplification) = optimized_insts.pop() { 60076911c29SSSD let (new_inst, new_val) = match simplification { 60176911c29SSSD // We can't do better than completely removing the skeleton 60276911c29SSSD // instruction, so short-cicuit the loop and eagerly return the 60376911c29SSSD // `Remove*` simplifications. 60476911c29SSSD SkeletonInstSimplification::Remove => { 60576911c29SSSD log::trace!(" -> simplify_skeleton: remove inst"); 60676911c29SSSD debug_assert!(ctx.func.dfg.inst_results(inst).is_empty()); 60776911c29SSSD return Some(simplification); 60876911c29SSSD } 60976911c29SSSD SkeletonInstSimplification::RemoveWithVal { val } => { 61076911c29SSSD log::trace!(" -> simplify_skeleton: remove inst and use {val} as its result"); 61176911c29SSSD if cfg!(debug_assertions) { 61276911c29SSSD let results = ctx.func.dfg.inst_results(inst); 61376911c29SSSD debug_assert_eq!(results.len(), 1); 61476911c29SSSD debug_assert_eq!( 61576911c29SSSD ctx.func.dfg.value_type(results[0]), 61676911c29SSSD ctx.func.dfg.value_type(val), 61776911c29SSSD ); 61876911c29SSSD } 61976911c29SSSD return Some(simplification); 62076911c29SSSD } 62176911c29SSSD 62276911c29SSSD // For instruction replacement simplification, we want to check 62376911c29SSSD // that the replacements define the same number and types of 62476911c29SSSD // values as the original instruction, and also determine 62576911c29SSSD // whether they are actually an improvement over (i.e. have 62676911c29SSSD // lower cost than) the original instruction. 62776911c29SSSD SkeletonInstSimplification::Replace { inst } => { 62876911c29SSSD log::trace!( 62976911c29SSSD " -> simplify_skeleton: replace inst with {inst}: {}", 63076911c29SSSD ctx.func.dfg.display_inst(inst) 63176911c29SSSD ); 63276911c29SSSD (inst, None) 63376911c29SSSD } 63476911c29SSSD SkeletonInstSimplification::ReplaceWithVal { inst, val } => { 63576911c29SSSD log::trace!( 63676911c29SSSD " -> simplify_skeleton: replace inst with {val} and {inst}: {}", 63776911c29SSSD ctx.func.dfg.display_inst(inst) 63876911c29SSSD ); 63976911c29SSSD (inst, Some(val)) 64076911c29SSSD } 64176911c29SSSD }; 64276911c29SSSD 64376911c29SSSD if cfg!(debug_assertions) { 64476911c29SSSD let opcode = ctx.func.dfg.insts[inst].opcode(); 64576911c29SSSD debug_assert!( 64676911c29SSSD !(opcode.is_terminator() || opcode.is_branch()), 64776911c29SSSD "simplifying control-flow instructions and terminators is not yet supported", 64876911c29SSSD ); 64976911c29SSSD 65076911c29SSSD let old_vals = ctx.func.dfg.inst_results(inst); 65176911c29SSSD let new_vals = if let Some(val) = new_val.as_ref() { 65276911c29SSSD core::slice::from_ref(val) 65376911c29SSSD } else { 65476911c29SSSD ctx.func.dfg.inst_results(new_inst) 65576911c29SSSD }; 65676911c29SSSD debug_assert_eq!( 65776911c29SSSD old_vals.len(), 65876911c29SSSD new_vals.len(), 65976911c29SSSD "skeleton simplification should result in the same number of result values", 66076911c29SSSD ); 66176911c29SSSD 66276911c29SSSD for (old_val, new_val) in old_vals.iter().zip(new_vals) { 66376911c29SSSD let old_ty = ctx.func.dfg.value_type(*old_val); 66476911c29SSSD let new_ty = ctx.func.dfg.value_type(*new_val); 66576911c29SSSD debug_assert_eq!( 66676911c29SSSD old_ty, new_ty, 66776911c29SSSD "skeleton simplification should result in values of the correct type", 66876911c29SSSD ); 66976911c29SSSD } 67076911c29SSSD } 67176911c29SSSD 67276911c29SSSD // Our best simplification is the one with the least cost. Update 67376911c29SSSD // `best` if necessary. 67476911c29SSSD let cost = cost::Cost::of_skeleton_op( 67576911c29SSSD ctx.func.dfg.insts[new_inst].opcode(), 67676911c29SSSD ctx.func.dfg.inst_args(new_inst).len(), 67776911c29SSSD ); 67876911c29SSSD if cost < best_cost { 67976911c29SSSD best = Some(simplification); 68076911c29SSSD best_cost = cost; 68176911c29SSSD } 68276911c29SSSD } 68376911c29SSSD 68476911c29SSSD // Return the best simplification! 68576911c29SSSD best 68676911c29SSSD } 68776911c29SSSD } 68876911c29SSSD 68976911c29SSSD impl<'a> EgraphPass<'a> { 69076911c29SSSD /// Create a new EgraphPass. new( func: &'a mut Function, domtree: &'a DominatorTree, loop_analysis: &'a LoopAnalysis, alias_analysis: &'a mut AliasAnalysis<'a>, ctrl_plane: &'a mut ControlPlane, ) -> Self69176911c29SSSD pub fn new( 69276911c29SSSD func: &'a mut Function, 69376911c29SSSD domtree: &'a DominatorTree, 69476911c29SSSD loop_analysis: &'a LoopAnalysis, 69576911c29SSSD alias_analysis: &'a mut AliasAnalysis<'a>, 69676911c29SSSD ctrl_plane: &'a mut ControlPlane, 69776911c29SSSD ) -> Self { 69876911c29SSSD Self { 69976911c29SSSD func, 70076911c29SSSD domtree, 70176911c29SSSD loop_analysis, 70276911c29SSSD alias_analysis, 70376911c29SSSD ctrl_plane, 70476911c29SSSD stats: Stats::default(), 70576911c29SSSD remat_values: FxHashSet::default(), 70676911c29SSSD } 70776911c29SSSD } 70876911c29SSSD 70976911c29SSSD /// Run the process. run(&mut self)71076911c29SSSD pub fn run(&mut self) { 71176911c29SSSD self.remove_pure_and_optimize(); 71276911c29SSSD 71376911c29SSSD trace!("egraph built:\n{}\n", self.func.display()); 71476911c29SSSD if cfg!(feature = "trace-log") { 71576911c29SSSD for (value, def) in self.func.dfg.values_and_defs() { 71676911c29SSSD trace!(" -> {} = {:?}", value, def); 71776911c29SSSD match def { 71876911c29SSSD ValueDef::Result(i, 0) => { 71976911c29SSSD trace!(" -> {} = {:?}", i, self.func.dfg.insts[i]); 72076911c29SSSD } 72176911c29SSSD _ => {} 72276911c29SSSD } 72376911c29SSSD } 72476911c29SSSD } 72576911c29SSSD 72676911c29SSSD self.elaborate(); 72776911c29SSSD 72876911c29SSSD log::trace!("stats: {:#?}", self.stats); 72976911c29SSSD } 73076911c29SSSD 73176911c29SSSD /// Remove pure nodes from the `Layout` of the function, ensuring 73276911c29SSSD /// that only the "side-effect skeleton" remains, and also 73376911c29SSSD /// optimize the pure nodes. This is the first step of 73476911c29SSSD /// egraph-based processing and turns the pure CFG-based CLIF into 73576911c29SSSD /// a CFG skeleton with a sea of (optimized) nodes tying it 73676911c29SSSD /// together. 73776911c29SSSD /// 73876911c29SSSD /// As we walk through the code, we eagerly apply optimization 73976911c29SSSD /// rules; at any given point we have a "latest version" of an 74076911c29SSSD /// eclass of possible representations for a `Value` in the 74176911c29SSSD /// original program, which is itself a `Value` at the root of a 74276911c29SSSD /// union-tree. We keep a map from the original values to these 74376911c29SSSD /// optimized values. When we encounter any instruction (pure or 74476911c29SSSD /// side-effecting skeleton) we rewrite its arguments to capture 74576911c29SSSD /// the "latest" optimized forms of these values. (We need to do 74676911c29SSSD /// this as part of this pass, and not later using a finished map, 74776911c29SSSD /// because the eclass can continue to be updated and we need to 74876911c29SSSD /// only refer to its subset that exists at this stage, to 74976911c29SSSD /// maintain acyclicity.) remove_pure_and_optimize(&mut self)75076911c29SSSD fn remove_pure_and_optimize(&mut self) { 75176911c29SSSD let mut cursor = FuncCursor::new(self.func); 75276911c29SSSD let mut value_to_opt_value: SecondaryMap<Value, Value> = 75376911c29SSSD SecondaryMap::with_default(Value::reserved_value()); 75476911c29SSSD 75576911c29SSSD // Map from instruction to value for hash-consing of pure ops 75676911c29SSSD // into the egraph. This can be a standard (non-scoped) 75776911c29SSSD // hashmap because pure ops have no location: they are 75876911c29SSSD // "outside of" control flow. 75976911c29SSSD // 76076911c29SSSD // Note also that we keep the controlling typevar (the `Type` 76176911c29SSSD // in the tuple below) because it may disambiguate 76276911c29SSSD // instructions that are identical except for type. 76376911c29SSSD // 76476911c29SSSD // We store both skeleton and non-skeleton instructions in the 76576911c29SSSD // GVN map; for skeleton instructions, we only store those 76676911c29SSSD // that are idempotent, i.e., still eligible to GVN. Note that 76776911c29SSSD // some skeleton instructions are idempotent but do not 76876911c29SSSD // produce a value: e.g., traps on a given condition. To allow 76976911c29SSSD // for both cases, we store an `Option<Value>` as the value in 77076911c29SSSD // this map. 77176911c29SSSD let mut gvn_map: ScopedHashMap<(Type, InstructionData), Option<Value>> = 77276911c29SSSD ScopedHashMap::with_capacity(cursor.func.dfg.num_values()); 77376911c29SSSD 77476911c29SSSD // The block in the domtree preorder traversal at each level 77576911c29SSSD // of the GVN map. 77676911c29SSSD let mut gvn_map_blocks: Vec<Block> = vec![]; 77776911c29SSSD 77876911c29SSSD // To get the best possible merging and canonicalization, we 77976911c29SSSD // track where a value is "available" at: this is the 78076911c29SSSD // domtree-nearest-ancestor join of all args if the value 78176911c29SSSD // itself is pure, otherwise the block where the value is 78276911c29SSSD // defined. (And for union nodes, the 78376911c29SSSD // domtree-highest-ancestor, i.e., the meet or the dual to the 78476911c29SSSD // above join.) 78576911c29SSSD let mut available_block: SecondaryMap<Value, Block> = 78676911c29SSSD SecondaryMap::with_default(Block::reserved_value()); 78776911c29SSSD 78876911c29SSSD // To avoid blowing up eclasses too much, we track the size of 78976911c29SSSD // each eclass reachable by a tree of union nodes from a given 79076911c29SSSD // value ID, and we avoid union'ing additional values into an 79176911c29SSSD // eclass when it reaches `ECLASS_ENODE_LIMIT`. 79276911c29SSSD // 79376911c29SSSD // For efficiency, this encodes size minus one: so a value of 79476911c29SSSD // zero (which is cheap to bulk-initialize) means a singleton 79576911c29SSSD // eclass of size one. This also allows us to avoid explicitly 79676911c29SSSD // writing the size for any values that are not union nodes. 79776911c29SSSD let mut eclass_size: SecondaryMap<Value, u8> = SecondaryMap::with_default(0); 79876911c29SSSD 79976911c29SSSD // This is an initial guess at the size we'll need, but we add 80076911c29SSSD // more values as we build simplified alternative expressions so 80176911c29SSSD // this is likely to realloc again later. 80276911c29SSSD available_block.resize(cursor.func.dfg.num_values()); 80376911c29SSSD 80476911c29SSSD // In domtree preorder, visit blocks. (TODO: factor out an 80576911c29SSSD // iterator from this and elaborator.) 80676911c29SSSD let root = cursor.layout().entry_block().unwrap(); 80776911c29SSSD enum StackEntry { 80876911c29SSSD Visit(Block), 80976911c29SSSD Pop, 81076911c29SSSD } 81176911c29SSSD let mut block_stack = vec![StackEntry::Visit(root)]; 81276911c29SSSD while let Some(entry) = block_stack.pop() { 81376911c29SSSD match entry { 81476911c29SSSD StackEntry::Visit(block) => { 81576911c29SSSD // We popped this block; push children 81676911c29SSSD // immediately, then process this block. 81776911c29SSSD block_stack.push(StackEntry::Pop); 81876911c29SSSD block_stack.extend( 81976911c29SSSD self.ctrl_plane 82076911c29SSSD .shuffled(self.domtree.children(block)) 82176911c29SSSD .map(StackEntry::Visit), 82276911c29SSSD ); 82376911c29SSSD gvn_map.increment_depth(); 82476911c29SSSD gvn_map_blocks.push(block); 82576911c29SSSD 82676911c29SSSD trace!("Processing block {}", block); 82776911c29SSSD cursor.set_position(CursorPosition::Before(block)); 82876911c29SSSD 82976911c29SSSD let mut alias_analysis_state = self.alias_analysis.block_starting_state(block); 83076911c29SSSD 83176911c29SSSD for ¶m in cursor.func.dfg.block_params(block) { 83276911c29SSSD trace!("creating initial singleton eclass for blockparam {}", param); 83376911c29SSSD value_to_opt_value[param] = param; 83476911c29SSSD available_block[param] = block; 83576911c29SSSD } 83676911c29SSSD while let Some(inst) = cursor.next_inst() { 83776911c29SSSD trace!( 83876911c29SSSD "Processing inst {inst}: {}", 83976911c29SSSD cursor.func.dfg.display_inst(inst), 84076911c29SSSD ); 84176911c29SSSD 84276911c29SSSD // Rewrite args of *all* instructions using the 84376911c29SSSD // value-to-opt-value map. 84476911c29SSSD cursor.func.dfg.map_inst_values(inst, |arg| { 84576911c29SSSD let new_value = value_to_opt_value[arg]; 84676911c29SSSD trace!("rewriting arg {} of inst {} to {}", arg, inst, new_value); 84776911c29SSSD debug_assert_ne!(new_value, Value::reserved_value()); 84876911c29SSSD new_value 84976911c29SSSD }); 85076911c29SSSD 85176911c29SSSD // Build a context for optimization, with borrows of 85276911c29SSSD // state. We can't invoke a method on `self` because 85376911c29SSSD // we've borrowed `self.func` mutably (as 85476911c29SSSD // `cursor.func`) so we pull apart the pieces instead 85576911c29SSSD // here. 85676911c29SSSD let mut ctx = OptimizeCtx { 85776911c29SSSD func: cursor.func, 85876911c29SSSD value_to_opt_value: &mut value_to_opt_value, 85976911c29SSSD gvn_map: &mut gvn_map, 86076911c29SSSD gvn_map_blocks: &mut gvn_map_blocks, 86176911c29SSSD available_block: &mut available_block, 86276911c29SSSD eclass_size: &mut eclass_size, 86376911c29SSSD rewrite_depth: 0, 86476911c29SSSD subsume_values: FxHashSet::default(), 86576911c29SSSD remat_values: &mut self.remat_values, 86676911c29SSSD stats: &mut self.stats, 86776911c29SSSD domtree: &self.domtree, 86876911c29SSSD alias_analysis: self.alias_analysis, 86976911c29SSSD alias_analysis_state: &mut alias_analysis_state, 87076911c29SSSD ctrl_plane: self.ctrl_plane, 87176911c29SSSD optimized_values: Default::default(), 87276911c29SSSD optimized_insts: Default::default(), 87376911c29SSSD }; 87476911c29SSSD 87576911c29SSSD if is_pure_for_egraph(ctx.func, inst) { 87676911c29SSSD // Insert into GVN map and optimize any new nodes 87776911c29SSSD // inserted (recursively performing this work for 87876911c29SSSD // any nodes the optimization rules produce). 87976911c29SSSD let inst = NewOrExistingInst::Existing(inst); 88076911c29SSSD ctx.insert_pure_enode(inst); 88176911c29SSSD // We've now rewritten all uses, or will when we 88276911c29SSSD // see them, and the instruction exists as a pure 88376911c29SSSD // enode in the eclass, so we can remove it. 88476911c29SSSD cursor.remove_inst_and_step_back(); 88576911c29SSSD } else { 88676911c29SSSD if let Some(cmd) = ctx.optimize_skeleton_inst(inst, block) { 88776911c29SSSD Self::execute_skeleton_inst_simplification( 88876911c29SSSD cmd, 88976911c29SSSD &mut cursor, 89076911c29SSSD &mut value_to_opt_value, 89176911c29SSSD inst, 89276911c29SSSD ); 89376911c29SSSD } 89476911c29SSSD } 89576911c29SSSD } 89676911c29SSSD } 89776911c29SSSD StackEntry::Pop => { 89876911c29SSSD gvn_map.decrement_depth(); 89976911c29SSSD gvn_map_blocks.pop(); 90076911c29SSSD } 90176911c29SSSD } 90276911c29SSSD } 90376911c29SSSD } 90476911c29SSSD 90576911c29SSSD /// Execute a simplification of an instruction in the side-effectful 90676911c29SSSD /// skeleton. execute_skeleton_inst_simplification( simplification: SkeletonInstSimplification, cursor: &mut FuncCursor, value_to_opt_value: &mut SecondaryMap<Value, Value>, old_inst: Inst, )90776911c29SSSD fn execute_skeleton_inst_simplification( 90876911c29SSSD simplification: SkeletonInstSimplification, 90976911c29SSSD cursor: &mut FuncCursor, 91076911c29SSSD value_to_opt_value: &mut SecondaryMap<Value, Value>, 91176911c29SSSD old_inst: Inst, 91276911c29SSSD ) { 91376911c29SSSD let mut forward_val = |cursor: &mut FuncCursor, old_val, new_val| { 91476911c29SSSD cursor.func.dfg.change_to_alias(old_val, new_val); 91576911c29SSSD value_to_opt_value[old_val] = new_val; 91676911c29SSSD }; 91776911c29SSSD 91876911c29SSSD let (new_inst, new_val) = match simplification { 91976911c29SSSD SkeletonInstSimplification::Remove => { 92076911c29SSSD cursor.remove_inst_and_step_back(); 92176911c29SSSD return; 92276911c29SSSD } 92376911c29SSSD SkeletonInstSimplification::RemoveWithVal { val } => { 92476911c29SSSD cursor.remove_inst_and_step_back(); 92576911c29SSSD let old_val = cursor.func.dfg.first_result(old_inst); 92676911c29SSSD cursor.func.dfg.detach_inst_results(old_inst); 92776911c29SSSD forward_val(cursor, old_val, val); 92876911c29SSSD return; 92976911c29SSSD } 93076911c29SSSD SkeletonInstSimplification::Replace { inst } => (inst, None), 93176911c29SSSD SkeletonInstSimplification::ReplaceWithVal { inst, val } => (inst, Some(val)), 93276911c29SSSD }; 93376911c29SSSD 93476911c29SSSD // Replace the old instruction with the new one. 93576911c29SSSD cursor.replace_inst(new_inst); 93676911c29SSSD debug_assert!(!cursor.func.dfg.insts[new_inst].opcode().is_terminator()); 93776911c29SSSD 93876911c29SSSD // Redirect the old instruction's result values to our new instruction's 93976911c29SSSD // result values. 94076911c29SSSD let mut i = 0; 94176911c29SSSD let mut next_new_val = |dfg: &crate::ir::DataFlowGraph| -> Value { 94276911c29SSSD if let Some(val) = new_val { 94376911c29SSSD val 94476911c29SSSD } else { 94576911c29SSSD let val = dfg.inst_results(new_inst)[i]; 94676911c29SSSD i += 1; 94776911c29SSSD val 94876911c29SSSD } 94976911c29SSSD }; 95076911c29SSSD for i in 0..cursor.func.dfg.inst_results(old_inst).len() { 95176911c29SSSD let old_val = cursor.func.dfg.inst_results(old_inst)[i]; 95276911c29SSSD let new_val = next_new_val(&cursor.func.dfg); 95376911c29SSSD forward_val(cursor, old_val, new_val); 95476911c29SSSD } 95576911c29SSSD 95676911c29SSSD // Back up so that the next iteration of the outer egraph loop will 95776911c29SSSD // process the new instruction. 95876911c29SSSD cursor.goto_inst(new_inst); 95976911c29SSSD cursor.prev_inst(); 96076911c29SSSD } 96176911c29SSSD 96276911c29SSSD /// Scoped elaboration: compute a final ordering of op computation 96376911c29SSSD /// for each block and update the given Func body. After this 96476911c29SSSD /// runs, the function body is back into the state where every 96576911c29SSSD /// Inst with an used result is placed in the layout (possibly 96676911c29SSSD /// duplicated, if our code-motion logic decides this is the best 96776911c29SSSD /// option). 96876911c29SSSD /// 96976911c29SSSD /// This works in concert with the domtree. We do a preorder 97076911c29SSSD /// traversal of the domtree, tracking a scoped map from Id to 97176911c29SSSD /// (new) Value. The map's scopes correspond to levels in the 97276911c29SSSD /// domtree. 97376911c29SSSD /// 97476911c29SSSD /// At each block, we iterate forward over the side-effecting 97576911c29SSSD /// eclasses, and recursively generate their arg eclasses, then 97676911c29SSSD /// emit the ops themselves. 97776911c29SSSD /// 97876911c29SSSD /// To use an eclass in a given block, we first look it up in the 97976911c29SSSD /// scoped map, and get the Value if already present. If not, we 98076911c29SSSD /// need to generate it. We emit the extracted enode for this 98176911c29SSSD /// eclass after recursively generating its args. Eclasses are 98276911c29SSSD /// thus computed "as late as possible", but then memoized into 98376911c29SSSD /// the Id-to-Value map and available to all dominated blocks and 98476911c29SSSD /// for the rest of this block. (This subsumes GVN.) elaborate(&mut self)98576911c29SSSD fn elaborate(&mut self) { 98676911c29SSSD let mut elaborator = Elaborator::new( 98776911c29SSSD self.func, 98876911c29SSSD &self.domtree, 98976911c29SSSD self.loop_analysis, 99076911c29SSSD &self.remat_values, 99176911c29SSSD &mut self.stats, 99276911c29SSSD self.ctrl_plane, 99376911c29SSSD ); 99476911c29SSSD elaborator.elaborate(); 99576911c29SSSD 99676911c29SSSD self.check_post_egraph(); 99776911c29SSSD } 99876911c29SSSD 99976911c29SSSD #[cfg(debug_assertions)] check_post_egraph(&self)100076911c29SSSD fn check_post_egraph(&self) { 100176911c29SSSD // Verify that no union nodes are reachable from inst args, 100276911c29SSSD // and that all inst args' defining instructions are in the 100376911c29SSSD // layout. 100476911c29SSSD for block in self.func.layout.blocks() { 100576911c29SSSD for inst in self.func.layout.block_insts(block) { 100676911c29SSSD self.func 100776911c29SSSD .dfg 100876911c29SSSD .inst_values(inst) 100976911c29SSSD .for_each(|arg| match self.func.dfg.value_def(arg) { 101076911c29SSSD ValueDef::Result(i, _) => { 101176911c29SSSD debug_assert!(self.func.layout.inst_block(i).is_some()); 101276911c29SSSD } 101376911c29SSSD ValueDef::Union(..) => { 101476911c29SSSD panic!("egraph union node {arg} still reachable at {inst}!"); 101576911c29SSSD } 101676911c29SSSD _ => {} 101776911c29SSSD }) 101876911c29SSSD } 101976911c29SSSD } 102076911c29SSSD } 102176911c29SSSD 102276911c29SSSD #[cfg(not(debug_assertions))] check_post_egraph(&self)102376911c29SSSD fn check_post_egraph(&self) {} 102476911c29SSSD } 102576911c29SSSD 102676911c29SSSD /// Implementation of external-context equality and hashing on 102776911c29SSSD /// InstructionData. This allows us to deduplicate instructions given 102876911c29SSSD /// some context that lets us see its value lists, so we don't need to 1029ab78bd82SHo Kim /// store arguments inline in the `InstructionData` (or alongside it in 103076911c29SSSD /// some newly-defined key type) in all cases. 103176911c29SSSD struct GVNContext<'a> { 103276911c29SSSD value_lists: &'a ValueListPool, 103376911c29SSSD } 103476911c29SSSD 103576911c29SSSD impl<'a> CtxEq<(Type, InstructionData), (Type, InstructionData)> for GVNContext<'a> { ctx_eq( &self, (a_ty, a_inst): &(Type, InstructionData), (b_ty, b_inst): &(Type, InstructionData), ) -> bool103676911c29SSSD fn ctx_eq( 103776911c29SSSD &self, 103876911c29SSSD (a_ty, a_inst): &(Type, InstructionData), 103976911c29SSSD (b_ty, b_inst): &(Type, InstructionData), 104076911c29SSSD ) -> bool { 104176911c29SSSD a_ty == b_ty && a_inst.eq(b_inst, self.value_lists) 104276911c29SSSD } 104376911c29SSSD } 104476911c29SSSD 104576911c29SSSD impl<'a> CtxHash<(Type, InstructionData)> for GVNContext<'a> { ctx_hash<H: Hasher>(&self, state: &mut H, (ty, inst): &(Type, InstructionData))104676911c29SSSD fn ctx_hash<H: Hasher>(&self, state: &mut H, (ty, inst): &(Type, InstructionData)) { 104776911c29SSSD core::hash::Hash::hash(&ty, state); 104876911c29SSSD inst.hash(state, self.value_lists); 104976911c29SSSD } 105076911c29SSSD } 105176911c29SSSD 105276911c29SSSD /// Statistics collected during egraph-based processing. 105376911c29SSSD #[derive(Clone, Debug, Default)] 105476911c29SSSD pub(crate) struct Stats { 105576911c29SSSD pub(crate) pure_inst: u64, 105676911c29SSSD pub(crate) pure_inst_deduped: u64, 105776911c29SSSD pub(crate) pure_inst_subsume: u64, 105876911c29SSSD pub(crate) pure_inst_rewrite_to_self: u64, 105976911c29SSSD pub(crate) pure_inst_insert_orig: u64, 106076911c29SSSD pub(crate) pure_inst_insert_new: u64, 106176911c29SSSD pub(crate) skeleton_inst: u64, 106276911c29SSSD pub(crate) skeleton_inst_simplified: u64, 106376911c29SSSD pub(crate) skeleton_inst_gvn: u64, 106476911c29SSSD pub(crate) alias_analysis_removed: u64, 106576911c29SSSD pub(crate) new_inst: u64, 106676911c29SSSD pub(crate) union: u64, 106776911c29SSSD pub(crate) subsume: u64, 106876911c29SSSD pub(crate) remat: u64, 106976911c29SSSD pub(crate) rewrite_rule_invoked: u64, 107076911c29SSSD pub(crate) rewrite_rule_results: u64, 107176911c29SSSD pub(crate) rewrite_depth_limit: u64, 107276911c29SSSD pub(crate) elaborate_visit_node: u64, 107376911c29SSSD pub(crate) elaborate_memoize_hit: u64, 107476911c29SSSD pub(crate) elaborate_memoize_miss: u64, 107576911c29SSSD pub(crate) elaborate_remat: u64, 107676911c29SSSD pub(crate) elaborate_licm_hoist: u64, 107776911c29SSSD pub(crate) elaborate_func: u64, 107876911c29SSSD pub(crate) elaborate_func_pre_insts: u64, 107976911c29SSSD pub(crate) elaborate_func_post_insts: u64, 108076911c29SSSD pub(crate) eclass_size_limit: u64, 108176911c29SSSD } 1082