1 //! Alias analysis, consisting of a "last store" pass and a "memory
2 //! values" pass. These two passes operate as one fused pass, and so
3 //! are implemented together here.
4 //!
5 //! We partition memory state into several *disjoint pieces* of
6 //! "abstract state". There are a finite number of such pieces:
7 //! currently, we call them "heap", "table", "vmctx", and "other".Any
8 //! given address in memory belongs to exactly one disjoint piece.
9 //!
10 //! One never tracks which piece a concrete address belongs to at
11 //! runtime; this is a purely static concept. Instead, all
12 //! memory-accessing instructions (loads and stores) are labeled with
13 //! one of these four categories in the `MemFlags`. It is forbidden
14 //! for a load or store to access memory under one category and a
15 //! later load or store to access the same memory under a different
16 //! category. This is ensured to be true by construction during
17 //! frontend translation into CLIF and during legalization.
18 //!
19 //! Given that this non-aliasing property is ensured by the producer
20 //! of CLIF, we can compute a *may-alias* property: one load or store
21 //! may-alias another load or store if both access the same category
22 //! of abstract state.
23 //!
24 //! The "last store" pass helps to compute this aliasing: it scans the
25 //! code, finding at each program point the last instruction that
26 //! *might have* written to a given part of abstract state.
27 //!
28 //! We can't say for sure that the "last store" *did* actually write
29 //! that state, but we know for sure that no instruction *later* than
30 //! it (up to the current instruction) did. However, we can get a
31 //! must-alias property from this: if at a given load or store, we
32 //! look backward to the "last store", *AND* we find that it has
33 //! exactly the same address expression and type, then we know that
34 //! the current instruction's access *must* be to the same memory
35 //! location.
36 //!
37 //! To get this must-alias property, we compute a sparse table of
38 //! "memory values": these are known equivalences between SSA `Value`s
39 //! and particular locations in memory. The memory-values table is a
40 //! mapping from (last store, address expression, type) to SSA
41 //! value. At a store, we can insert into this table directly. At a
42 //! load, we can also insert, if we don't already have a value (from
43 //! the store that produced the load's value).
44 //!
45 //! Then we can do two optimizations at once given this table. If a
46 //! load accesses a location identified by a (last store, address,
47 //! type) key already in the table, we replace it with the SSA value
48 //! for that memory location. This is usually known as "redundant load
49 //! elimination" if the value came from an earlier load of the same
50 //! location, or "store-to-load forwarding" if the value came from an
51 //! earlier store to the same location.
52 //!
53 //! In theory we could also do *dead-store elimination*, where if a
54 //! store overwrites a key in the table, *and* if no other load/store
55 //! to the abstract state category occurred, *and* no other trapping
56 //! instruction occurred (at which point we need an up-to-date memory
57 //! state because post-trap-termination memory state can be observed),
58 //! *and* we can prove the original store could not have trapped, then
59 //! we can eliminate the original store. Because this is so complex,
60 //! and the conditions for doing it correctly when post-trap state
61 //! must be correct likely reduce the potential benefit, we don't yet
62 //! do this.
63 
64 use crate::{
65     cursor::{Cursor, FuncCursor},
66     dominator_tree::DominatorTree,
67     fx::{FxHashMap, FxHashSet},
68     inst_predicates::{
69         has_memory_fence_semantics, inst_addr_offset_type, inst_store_data, visit_block_succs,
70     },
71     ir::{immediates::Offset32, Block, Function, Inst, Opcode, Type, Value},
72     trace,
73 };
74 use cranelift_entity::{packed_option::PackedOption, EntityRef};
75 
76 /// For a given program point, the vector of last-store instruction
77 /// indices for each disjoint category of abstract state.
78 #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
79 pub struct LastStores {
80     heap: PackedOption<Inst>,
81     table: PackedOption<Inst>,
82     vmctx: PackedOption<Inst>,
83     other: PackedOption<Inst>,
84 }
85 
86 impl LastStores {
87     fn update(&mut self, func: &Function, inst: Inst) {
88         let opcode = func.dfg[inst].opcode();
89         if has_memory_fence_semantics(opcode) {
90             self.heap = inst.into();
91             self.table = inst.into();
92             self.vmctx = inst.into();
93             self.other = inst.into();
94         } else if opcode.can_store() {
95             if let Some(memflags) = func.dfg[inst].memflags() {
96                 if memflags.heap() {
97                     self.heap = inst.into();
98                 } else if memflags.table() {
99                     self.table = inst.into();
100                 } else if memflags.vmctx() {
101                     self.vmctx = inst.into();
102                 } else {
103                     self.other = inst.into();
104                 }
105             } else {
106                 self.heap = inst.into();
107                 self.table = inst.into();
108                 self.vmctx = inst.into();
109                 self.other = inst.into();
110             }
111         }
112     }
113 
114     fn get_last_store(&self, func: &Function, inst: Inst) -> PackedOption<Inst> {
115         if let Some(memflags) = func.dfg[inst].memflags() {
116             if memflags.heap() {
117                 self.heap
118             } else if memflags.table() {
119                 self.table
120             } else if memflags.vmctx() {
121                 self.vmctx
122             } else {
123                 self.other
124             }
125         } else if func.dfg[inst].opcode().can_load() || func.dfg[inst].opcode().can_store() {
126             inst.into()
127         } else {
128             PackedOption::default()
129         }
130     }
131 
132     fn meet_from(&mut self, other: &LastStores, loc: Inst) {
133         let meet = |a: PackedOption<Inst>, b: PackedOption<Inst>| -> PackedOption<Inst> {
134             match (a.into(), b.into()) {
135                 (None, None) => None.into(),
136                 (Some(a), None) => a,
137                 (None, Some(b)) => b,
138                 (Some(a), Some(b)) if a == b => a,
139                 _ => loc.into(),
140             }
141         };
142 
143         self.heap = meet(self.heap, other.heap);
144         self.table = meet(self.table, other.table);
145         self.vmctx = meet(self.vmctx, other.vmctx);
146         self.other = meet(self.other, other.other);
147     }
148 }
149 
150 /// A key identifying a unique memory location.
151 ///
152 /// For the result of a load to be equivalent to the result of another
153 /// load, or the store data from a store, we need for (i) the
154 /// "version" of memory (here ensured by having the same last store
155 /// instruction to touch the disjoint category of abstract state we're
156 /// accessing); (ii) the address must be the same (here ensured by
157 /// having the same SSA value, which doesn't change after computed);
158 /// (iii) the offset must be the same; and (iv) the accessed type and
159 /// extension mode (e.g., 8-to-32, signed) must be the same.
160 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
161 struct MemoryLoc {
162     last_store: PackedOption<Inst>,
163     address: Value,
164     offset: Offset32,
165     ty: Type,
166     /// We keep the *opcode* of the instruction that produced the
167     /// value we record at this key if the opcode is anything other
168     /// than an ordinary load or store. This is needed when we
169     /// consider loads that extend the value: e.g., an 8-to-32
170     /// sign-extending load will produce a 32-bit value from an 8-bit
171     /// value in memory, so we can only reuse that (as part of RLE)
172     /// for another load with the same extending opcode.
173     ///
174     /// We could improve the transform to insert explicit extend ops
175     /// in place of extending loads when we know the memory value, but
176     /// we haven't yet done this.
177     extending_opcode: Option<Opcode>,
178 }
179 
180 /// An alias-analysis pass.
181 pub struct AliasAnalysis<'a> {
182     /// The domtree for the function.
183     domtree: &'a DominatorTree,
184 
185     /// Input state to a basic block.
186     block_input: FxHashMap<Block, LastStores>,
187 
188     /// Known memory-value equivalences. This is the result of the
189     /// analysis. This is a mapping from (last store, address
190     /// expression, offset, type) to SSA `Value`.
191     ///
192     /// We keep the defining inst around for quick dominance checks.
193     mem_values: FxHashMap<MemoryLoc, (Inst, Value)>,
194 }
195 
196 impl<'a> AliasAnalysis<'a> {
197     /// Perform an alias analysis pass.
198     pub fn new(func: &Function, domtree: &'a DominatorTree) -> AliasAnalysis<'a> {
199         trace!("alias analysis: input is:\n{:?}", func);
200         let mut analysis = AliasAnalysis {
201             domtree,
202             block_input: FxHashMap::default(),
203             mem_values: FxHashMap::default(),
204         };
205 
206         analysis.compute_block_input_states(func);
207         analysis
208     }
209 
210     fn compute_block_input_states(&mut self, func: &Function) {
211         let mut queue = vec![];
212         let mut queue_set = FxHashSet::default();
213         let entry = func.layout.entry_block().unwrap();
214         queue.push(entry);
215         queue_set.insert(entry);
216 
217         while let Some(block) = queue.pop() {
218             queue_set.remove(&block);
219             let mut state = self
220                 .block_input
221                 .entry(block)
222                 .or_insert_with(|| LastStores::default())
223                 .clone();
224 
225             trace!(
226                 "alias analysis: input to block{} is {:?}",
227                 block.index(),
228                 state
229             );
230 
231             for inst in func.layout.block_insts(block) {
232                 state.update(func, inst);
233                 trace!("after inst{}: state is {:?}", inst.index(), state);
234             }
235 
236             visit_block_succs(func, block, |_inst, succ, _from_table| {
237                 let succ_first_inst = func.layout.block_insts(succ).into_iter().next().unwrap();
238                 let updated = match self.block_input.get_mut(&succ) {
239                     Some(succ_state) => {
240                         let old = succ_state.clone();
241                         succ_state.meet_from(&state, succ_first_inst);
242                         *succ_state != old
243                     }
244                     None => {
245                         self.block_input.insert(succ, state.clone());
246                         true
247                     }
248                 };
249 
250                 if updated && queue_set.insert(succ) {
251                     queue.push(succ);
252                 }
253             });
254         }
255     }
256 
257     /// Get the starting state for a block.
258     pub fn block_starting_state(&self, block: Block) -> LastStores {
259         self.block_input
260             .get(&block)
261             .cloned()
262             .unwrap_or_else(|| LastStores::default())
263     }
264 
265     /// Process one instruction. Meant to be invoked in program order
266     /// within a block, and ideally in RPO or at least some domtree
267     /// preorder for maximal reuse.
268     ///
269     /// Returns `true` if instruction was removed.
270     pub fn process_inst(
271         &mut self,
272         func: &mut Function,
273         state: &mut LastStores,
274         inst: Inst,
275     ) -> Option<Value> {
276         trace!(
277             "alias analysis: scanning at inst{} with state {:?} ({:?})",
278             inst.index(),
279             state,
280             func.dfg[inst],
281         );
282 
283         let replacing_value = if let Some((address, offset, ty)) = inst_addr_offset_type(func, inst)
284         {
285             let address = func.dfg.resolve_aliases(address);
286             let opcode = func.dfg[inst].opcode();
287 
288             if opcode.can_store() {
289                 let store_data = inst_store_data(func, inst).unwrap();
290                 let store_data = func.dfg.resolve_aliases(store_data);
291                 let mem_loc = MemoryLoc {
292                     last_store: inst.into(),
293                     address,
294                     offset,
295                     ty,
296                     extending_opcode: get_ext_opcode(opcode),
297                 };
298                 trace!(
299                     "alias analysis: at inst{}: store with data v{} at loc {:?}",
300                     inst.index(),
301                     store_data.index(),
302                     mem_loc
303                 );
304                 self.mem_values.insert(mem_loc, (inst, store_data));
305 
306                 None
307             } else if opcode.can_load() {
308                 let last_store = state.get_last_store(func, inst);
309                 let load_result = func.dfg.inst_results(inst)[0];
310                 let mem_loc = MemoryLoc {
311                     last_store,
312                     address,
313                     offset,
314                     ty,
315                     extending_opcode: get_ext_opcode(opcode),
316                 };
317                 trace!(
318                     "alias analysis: at inst{}: load with last_store inst{} at loc {:?}",
319                     inst.index(),
320                     last_store.map(|inst| inst.index()).unwrap_or(usize::MAX),
321                     mem_loc
322                 );
323 
324                 // Is there a Value already known to be stored
325                 // at this specific memory location?  If so,
326                 // we can alias the load result to this
327                 // already-known Value.
328                 //
329                 // Check if the definition dominates this
330                 // location; it might not, if it comes from a
331                 // load (stores will always dominate though if
332                 // their `last_store` survives through
333                 // meet-points to this use-site).
334                 let aliased =
335                     if let Some((def_inst, value)) = self.mem_values.get(&mem_loc).cloned() {
336                         trace!(
337                             " -> sees known value v{} from inst{}",
338                             value.index(),
339                             def_inst.index()
340                         );
341                         if self.domtree.dominates(def_inst, inst, &func.layout) {
342                             trace!(
343                                 " -> dominates; value equiv from v{} to v{} inserted",
344                                 load_result.index(),
345                                 value.index()
346                             );
347                             Some(value)
348                         } else {
349                             None
350                         }
351                     } else {
352                         None
353                     };
354 
355                 // Otherwise, we can keep *this* load around
356                 // as a new equivalent value.
357                 if aliased.is_none() {
358                     trace!(
359                         " -> inserting load result v{} at loc {:?}",
360                         load_result.index(),
361                         mem_loc
362                     );
363                     self.mem_values.insert(mem_loc, (inst, load_result));
364                 }
365 
366                 aliased
367             } else {
368                 None
369             }
370         } else {
371             None
372         };
373 
374         state.update(func, inst);
375 
376         replacing_value
377     }
378 
379     /// Make a pass and update known-redundant loads to aliased
380     /// values. We interleave the updates with the memory-location
381     /// tracking because resolving some aliases may expose others
382     /// (e.g. in cases of double-indirection with two separate chains
383     /// of loads).
384     pub fn compute_and_update_aliases(&mut self, func: &mut Function) {
385         let mut pos = FuncCursor::new(func);
386 
387         while let Some(block) = pos.next_block() {
388             let mut state = self.block_starting_state(block);
389             while let Some(inst) = pos.next_inst() {
390                 if let Some(replaced_result) = self.process_inst(pos.func, &mut state, inst) {
391                     let result = pos.func.dfg.inst_results(inst)[0];
392                     pos.func.dfg.detach_results(inst);
393                     pos.func.dfg.change_to_alias(result, replaced_result);
394                     pos.remove_inst_and_step_back();
395                 }
396             }
397         }
398     }
399 }
400 
401 fn get_ext_opcode(op: Opcode) -> Option<Opcode> {
402     debug_assert!(op.can_load() || op.can_store());
403     match op {
404         Opcode::Load | Opcode::Store => None,
405         _ => Some(op),
406     }
407 }
408