1 //! Alias analysis, consisting of a "last store" pass and a "memory
2 //! values" pass. These two passes operate as one fused pass, and so
3 //! are implemented together here.
4 //!
5 //! We partition memory state into several *disjoint pieces* of
6 //! "abstract state". There are a finite number of such pieces:
7 //! currently, we call them "heap", "table", "vmctx", and "other".Any
8 //! given address in memory belongs to exactly one disjoint piece.
9 //!
10 //! One never tracks which piece a concrete address belongs to at
11 //! runtime; this is a purely static concept. Instead, all
12 //! memory-accessing instructions (loads and stores) are labeled with
13 //! one of these four categories in the `MemFlags`. It is forbidden
14 //! for a load or store to access memory under one category and a
15 //! later load or store to access the same memory under a different
16 //! category. This is ensured to be true by construction during
17 //! frontend translation into CLIF and during legalization.
18 //!
19 //! Given that this non-aliasing property is ensured by the producer
20 //! of CLIF, we can compute a *may-alias* property: one load or store
21 //! may-alias another load or store if both access the same category
22 //! of abstract state.
23 //!
24 //! The "last store" pass helps to compute this aliasing: it scans the
25 //! code, finding at each program point the last instruction that
26 //! *might have* written to a given part of abstract state.
27 //!
28 //! We can't say for sure that the "last store" *did* actually write
29 //! that state, but we know for sure that no instruction *later* than
30 //! it (up to the current instruction) did. However, we can get a
31 //! must-alias property from this: if at a given load or store, we
32 //! look backward to the "last store", *AND* we find that it has
33 //! exactly the same address expression and type, then we know that
34 //! the current instruction's access *must* be to the same memory
35 //! location.
36 //!
37 //! To get this must-alias property, we compute a sparse table of
38 //! "memory values": these are known equivalences between SSA `Value`s
39 //! and particular locations in memory. The memory-values table is a
40 //! mapping from (last store, address expression, type) to SSA
41 //! value. At a store, we can insert into this table directly. At a
42 //! load, we can also insert, if we don't already have a value (from
43 //! the store that produced the load's value).
44 //!
45 //! Then we can do two optimizations at once given this table. If a
46 //! load accesses a location identified by a (last store, address,
47 //! type) key already in the table, we replace it with the SSA value
48 //! for that memory location. This is usually known as "redundant load
49 //! elimination" if the value came from an earlier load of the same
50 //! location, or "store-to-load forwarding" if the value came from an
51 //! earlier store to the same location.
52 //!
53 //! In theory we could also do *dead-store elimination*, where if a
54 //! store overwrites a key in the table, *and* if no other load/store
55 //! to the abstract state category occurred, *and* no other trapping
56 //! instruction occurred (at which point we need an up-to-date memory
57 //! state because post-trap-termination memory state can be observed),
58 //! *and* we can prove the original store could not have trapped, then
59 //! we can eliminate the original store. Because this is so complex,
60 //! and the conditions for doing it correctly when post-trap state
61 //! must be correct likely reduce the potential benefit, we don't yet
62 //! do this.
63 
64 use crate::{
65     cursor::{Cursor, FuncCursor},
66     dominator_tree::DominatorTree,
67     inst_predicates::{
68         has_memory_fence_semantics, inst_addr_offset_type, inst_store_data, visit_block_succs,
69     },
70     ir::{immediates::Offset32, AliasRegion, Block, Function, Inst, Opcode, Type, Value},
71     trace,
72 };
73 use cranelift_entity::{packed_option::PackedOption, EntityRef};
74 use rustc_hash::{FxHashMap, FxHashSet};
75 
76 /// For a given program point, the vector of last-store instruction
77 /// indices for each disjoint category of abstract state.
78 #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
79 pub struct LastStores {
80     heap: PackedOption<Inst>,
81     table: PackedOption<Inst>,
82     vmctx: PackedOption<Inst>,
83     other: PackedOption<Inst>,
84 }
85 
86 impl LastStores {
87     fn update(&mut self, func: &Function, inst: Inst) {
88         let opcode = func.dfg.insts[inst].opcode();
89         if has_memory_fence_semantics(opcode) {
90             self.heap = inst.into();
91             self.table = inst.into();
92             self.vmctx = inst.into();
93             self.other = inst.into();
94         } else if opcode.can_store() {
95             if let Some(memflags) = func.dfg.insts[inst].memflags() {
96                 match memflags.alias_region() {
97                     None => self.other = inst.into(),
98                     Some(AliasRegion::Heap) => self.heap = inst.into(),
99                     Some(AliasRegion::Table) => self.table = inst.into(),
100                     Some(AliasRegion::Vmctx) => self.vmctx = inst.into(),
101                 }
102             } else {
103                 self.heap = inst.into();
104                 self.table = inst.into();
105                 self.vmctx = inst.into();
106                 self.other = inst.into();
107             }
108         }
109     }
110 
111     fn get_last_store(&self, func: &Function, inst: Inst) -> PackedOption<Inst> {
112         if let Some(memflags) = func.dfg.insts[inst].memflags() {
113             match memflags.alias_region() {
114                 None => self.other,
115                 Some(AliasRegion::Heap) => self.heap,
116                 Some(AliasRegion::Table) => self.table,
117                 Some(AliasRegion::Vmctx) => self.vmctx,
118             }
119         } else if func.dfg.insts[inst].opcode().can_load()
120             || func.dfg.insts[inst].opcode().can_store()
121         {
122             inst.into()
123         } else {
124             PackedOption::default()
125         }
126     }
127 
128     fn meet_from(&mut self, other: &LastStores, loc: Inst) {
129         let meet = |a: PackedOption<Inst>, b: PackedOption<Inst>| -> PackedOption<Inst> {
130             match (a.into(), b.into()) {
131                 (None, None) => None.into(),
132                 (Some(a), None) => a,
133                 (None, Some(b)) => b,
134                 (Some(a), Some(b)) if a == b => a,
135                 _ => loc.into(),
136             }
137         };
138 
139         self.heap = meet(self.heap, other.heap);
140         self.table = meet(self.table, other.table);
141         self.vmctx = meet(self.vmctx, other.vmctx);
142         self.other = meet(self.other, other.other);
143     }
144 }
145 
146 /// A key identifying a unique memory location.
147 ///
148 /// For the result of a load to be equivalent to the result of another
149 /// load, or the store data from a store, we need for (i) the
150 /// "version" of memory (here ensured by having the same last store
151 /// instruction to touch the disjoint category of abstract state we're
152 /// accessing); (ii) the address must be the same (here ensured by
153 /// having the same SSA value, which doesn't change after computed);
154 /// (iii) the offset must be the same; and (iv) the accessed type and
155 /// extension mode (e.g., 8-to-32, signed) must be the same.
156 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
157 struct MemoryLoc {
158     last_store: PackedOption<Inst>,
159     address: Value,
160     offset: Offset32,
161     ty: Type,
162     /// We keep the *opcode* of the instruction that produced the
163     /// value we record at this key if the opcode is anything other
164     /// than an ordinary load or store. This is needed when we
165     /// consider loads that extend the value: e.g., an 8-to-32
166     /// sign-extending load will produce a 32-bit value from an 8-bit
167     /// value in memory, so we can only reuse that (as part of RLE)
168     /// for another load with the same extending opcode.
169     ///
170     /// We could improve the transform to insert explicit extend ops
171     /// in place of extending loads when we know the memory value, but
172     /// we haven't yet done this.
173     extending_opcode: Option<Opcode>,
174 }
175 
176 /// An alias-analysis pass.
177 pub struct AliasAnalysis<'a> {
178     /// The domtree for the function.
179     domtree: &'a DominatorTree,
180 
181     /// Input state to a basic block.
182     block_input: FxHashMap<Block, LastStores>,
183 
184     /// Known memory-value equivalences. This is the result of the
185     /// analysis. This is a mapping from (last store, address
186     /// expression, offset, type) to SSA `Value`.
187     ///
188     /// We keep the defining inst around for quick dominance checks.
189     mem_values: FxHashMap<MemoryLoc, (Inst, Value)>,
190 }
191 
192 impl<'a> AliasAnalysis<'a> {
193     /// Perform an alias analysis pass.
194     pub fn new(func: &Function, domtree: &'a DominatorTree) -> AliasAnalysis<'a> {
195         trace!("alias analysis: input is:\n{:?}", func);
196         let mut analysis = AliasAnalysis {
197             domtree,
198             block_input: FxHashMap::default(),
199             mem_values: FxHashMap::default(),
200         };
201 
202         analysis.compute_block_input_states(func);
203         analysis
204     }
205 
206     fn compute_block_input_states(&mut self, func: &Function) {
207         let mut queue = vec![];
208         let mut queue_set = FxHashSet::default();
209         let entry = func.layout.entry_block().unwrap();
210         queue.push(entry);
211         queue_set.insert(entry);
212 
213         while let Some(block) = queue.pop() {
214             queue_set.remove(&block);
215             let mut state = self
216                 .block_input
217                 .entry(block)
218                 .or_insert_with(|| LastStores::default())
219                 .clone();
220 
221             trace!(
222                 "alias analysis: input to block{} is {:?}",
223                 block.index(),
224                 state
225             );
226 
227             for inst in func.layout.block_insts(block) {
228                 state.update(func, inst);
229                 trace!("after inst{}: state is {:?}", inst.index(), state);
230             }
231 
232             visit_block_succs(func, block, |_inst, succ, _from_table| {
233                 let succ_first_inst = func.layout.block_insts(succ).into_iter().next().unwrap();
234                 let updated = match self.block_input.get_mut(&succ) {
235                     Some(succ_state) => {
236                         let old = succ_state.clone();
237                         succ_state.meet_from(&state, succ_first_inst);
238                         *succ_state != old
239                     }
240                     None => {
241                         self.block_input.insert(succ, state.clone());
242                         true
243                     }
244                 };
245 
246                 if updated && queue_set.insert(succ) {
247                     queue.push(succ);
248                 }
249             });
250         }
251     }
252 
253     /// Get the starting state for a block.
254     pub fn block_starting_state(&self, block: Block) -> LastStores {
255         self.block_input
256             .get(&block)
257             .cloned()
258             .unwrap_or_else(|| LastStores::default())
259     }
260 
261     /// Process one instruction. Meant to be invoked in program order
262     /// within a block, and ideally in RPO or at least some domtree
263     /// preorder for maximal reuse.
264     ///
265     /// Returns `true` if instruction was removed.
266     pub fn process_inst(
267         &mut self,
268         func: &mut Function,
269         state: &mut LastStores,
270         inst: Inst,
271     ) -> Option<Value> {
272         trace!(
273             "alias analysis: scanning at inst{} with state {:?} ({:?})",
274             inst.index(),
275             state,
276             func.dfg.insts[inst],
277         );
278 
279         let replacing_value = if let Some((address, offset, ty)) = inst_addr_offset_type(func, inst)
280         {
281             let address = func.dfg.resolve_aliases(address);
282             let opcode = func.dfg.insts[inst].opcode();
283 
284             if opcode.can_store() {
285                 let store_data = inst_store_data(func, inst).unwrap();
286                 let store_data = func.dfg.resolve_aliases(store_data);
287                 let mem_loc = MemoryLoc {
288                     last_store: inst.into(),
289                     address,
290                     offset,
291                     ty,
292                     extending_opcode: get_ext_opcode(opcode),
293                 };
294                 trace!(
295                     "alias analysis: at inst{}: store with data v{} at loc {:?}",
296                     inst.index(),
297                     store_data.index(),
298                     mem_loc
299                 );
300                 self.mem_values.insert(mem_loc, (inst, store_data));
301 
302                 None
303             } else if opcode.can_load() {
304                 let last_store = state.get_last_store(func, inst);
305                 let load_result = func.dfg.inst_results(inst)[0];
306                 let mem_loc = MemoryLoc {
307                     last_store,
308                     address,
309                     offset,
310                     ty,
311                     extending_opcode: get_ext_opcode(opcode),
312                 };
313                 trace!(
314                     "alias analysis: at inst{}: load with last_store inst{} at loc {:?}",
315                     inst.index(),
316                     last_store.map(|inst| inst.index()).unwrap_or(usize::MAX),
317                     mem_loc
318                 );
319 
320                 // Is there a Value already known to be stored
321                 // at this specific memory location?  If so,
322                 // we can alias the load result to this
323                 // already-known Value.
324                 //
325                 // Check if the definition dominates this
326                 // location; it might not, if it comes from a
327                 // load (stores will always dominate though if
328                 // their `last_store` survives through
329                 // meet-points to this use-site).
330                 let aliased =
331                     if let Some((def_inst, value)) = self.mem_values.get(&mem_loc).cloned() {
332                         trace!(
333                             " -> sees known value v{} from inst{}",
334                             value.index(),
335                             def_inst.index()
336                         );
337                         if self.domtree.dominates(def_inst, inst, &func.layout) {
338                             trace!(
339                                 " -> dominates; value equiv from v{} to v{} inserted",
340                                 load_result.index(),
341                                 value.index()
342                             );
343                             Some(value)
344                         } else {
345                             None
346                         }
347                     } else {
348                         None
349                     };
350 
351                 // Otherwise, we can keep *this* load around
352                 // as a new equivalent value.
353                 if aliased.is_none() {
354                     trace!(
355                         " -> inserting load result v{} at loc {:?}",
356                         load_result.index(),
357                         mem_loc
358                     );
359                     self.mem_values.insert(mem_loc, (inst, load_result));
360                 }
361 
362                 aliased
363             } else {
364                 None
365             }
366         } else {
367             None
368         };
369 
370         state.update(func, inst);
371 
372         replacing_value
373     }
374 
375     /// Make a pass and update known-redundant loads to aliased
376     /// values. We interleave the updates with the memory-location
377     /// tracking because resolving some aliases may expose others
378     /// (e.g. in cases of double-indirection with two separate chains
379     /// of loads).
380     pub fn compute_and_update_aliases(&mut self, func: &mut Function) {
381         let mut pos = FuncCursor::new(func);
382 
383         while let Some(block) = pos.next_block() {
384             let mut state = self.block_starting_state(block);
385             while let Some(inst) = pos.next_inst() {
386                 if let Some(replaced_result) = self.process_inst(pos.func, &mut state, inst) {
387                     let result = pos.func.dfg.inst_results(inst)[0];
388                     pos.func.dfg.clear_results(inst);
389                     pos.func.dfg.change_to_alias(result, replaced_result);
390                     pos.remove_inst_and_step_back();
391                 }
392             }
393         }
394     }
395 }
396 
397 fn get_ext_opcode(op: Opcode) -> Option<Opcode> {
398     debug_assert!(op.can_load() || op.can_store());
399     match op {
400         Opcode::Load | Opcode::Store => None,
401         _ => Some(op),
402     }
403 }
404