1 //! Alias analysis, consisting of a "last store" pass and a "memory 2 //! values" pass. These two passes operate as one fused pass, and so 3 //! are implemented together here. 4 //! 5 //! We partition memory state into several *disjoint pieces* of 6 //! "abstract state". There are a finite number of such pieces: 7 //! currently, we call them "heap", "table", "vmctx", and "other".Any 8 //! given address in memory belongs to exactly one disjoint piece. 9 //! 10 //! One never tracks which piece a concrete address belongs to at 11 //! runtime; this is a purely static concept. Instead, all 12 //! memory-accessing instructions (loads and stores) are labeled with 13 //! one of these four categories in the `MemFlags`. It is forbidden 14 //! for a load or store to access memory under one category and a 15 //! later load or store to access the same memory under a different 16 //! category. This is ensured to be true by construction during 17 //! frontend translation into CLIF and during legalization. 18 //! 19 //! Given that this non-aliasing property is ensured by the producer 20 //! of CLIF, we can compute a *may-alias* property: one load or store 21 //! may-alias another load or store if both access the same category 22 //! of abstract state. 23 //! 24 //! The "last store" pass helps to compute this aliasing: it scans the 25 //! code, finding at each program point the last instruction that 26 //! *might have* written to a given part of abstract state. 27 //! 28 //! We can't say for sure that the "last store" *did* actually write 29 //! that state, but we know for sure that no instruction *later* than 30 //! it (up to the current instruction) did. However, we can get a 31 //! must-alias property from this: if at a given load or store, we 32 //! look backward to the "last store", *AND* we find that it has 33 //! exactly the same address expression and type, then we know that 34 //! the current instruction's access *must* be to the same memory 35 //! location. 36 //! 37 //! To get this must-alias property, we compute a sparse table of 38 //! "memory values": these are known equivalences between SSA `Value`s 39 //! and particular locations in memory. The memory-values table is a 40 //! mapping from (last store, address expression, type) to SSA 41 //! value. At a store, we can insert into this table directly. At a 42 //! load, we can also insert, if we don't already have a value (from 43 //! the store that produced the load's value). 44 //! 45 //! Then we can do two optimizations at once given this table. If a 46 //! load accesses a location identified by a (last store, address, 47 //! type) key already in the table, we replace it with the SSA value 48 //! for that memory location. This is usually known as "redundant load 49 //! elimination" if the value came from an earlier load of the same 50 //! location, or "store-to-load forwarding" if the value came from an 51 //! earlier store to the same location. 52 //! 53 //! In theory we could also do *dead-store elimination*, where if a 54 //! store overwrites a key in the table, *and* if no other load/store 55 //! to the abstract state category occurred, *and* no other trapping 56 //! instruction occurred (at which point we need an up-to-date memory 57 //! state because post-trap-termination memory state can be observed), 58 //! *and* we can prove the original store could not have trapped, then 59 //! we can eliminate the original store. Because this is so complex, 60 //! and the conditions for doing it correctly when post-trap state 61 //! must be correct likely reduce the potential benefit, we don't yet 62 //! do this. 63 64 use crate::{ 65 cursor::{Cursor, FuncCursor}, 66 dominator_tree::DominatorTree, 67 inst_predicates::{ 68 has_memory_fence_semantics, inst_addr_offset_type, inst_store_data, visit_block_succs, 69 }, 70 ir::{immediates::Offset32, AliasRegion, Block, Function, Inst, Opcode, Type, Value}, 71 trace, 72 }; 73 use cranelift_entity::{packed_option::PackedOption, EntityRef}; 74 use rustc_hash::{FxHashMap, FxHashSet}; 75 76 /// For a given program point, the vector of last-store instruction 77 /// indices for each disjoint category of abstract state. 78 #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] 79 pub struct LastStores { 80 heap: PackedOption<Inst>, 81 table: PackedOption<Inst>, 82 vmctx: PackedOption<Inst>, 83 other: PackedOption<Inst>, 84 } 85 86 impl LastStores { 87 fn update(&mut self, func: &Function, inst: Inst) { 88 let opcode = func.dfg.insts[inst].opcode(); 89 if has_memory_fence_semantics(opcode) { 90 self.heap = inst.into(); 91 self.table = inst.into(); 92 self.vmctx = inst.into(); 93 self.other = inst.into(); 94 } else if opcode.can_store() { 95 if let Some(memflags) = func.dfg.insts[inst].memflags() { 96 match memflags.alias_region() { 97 None => self.other = inst.into(), 98 Some(AliasRegion::Heap) => self.heap = inst.into(), 99 Some(AliasRegion::Table) => self.table = inst.into(), 100 Some(AliasRegion::Vmctx) => self.vmctx = inst.into(), 101 } 102 } else { 103 self.heap = inst.into(); 104 self.table = inst.into(); 105 self.vmctx = inst.into(); 106 self.other = inst.into(); 107 } 108 } 109 } 110 111 fn get_last_store(&self, func: &Function, inst: Inst) -> PackedOption<Inst> { 112 if let Some(memflags) = func.dfg.insts[inst].memflags() { 113 match memflags.alias_region() { 114 None => self.other, 115 Some(AliasRegion::Heap) => self.heap, 116 Some(AliasRegion::Table) => self.table, 117 Some(AliasRegion::Vmctx) => self.vmctx, 118 } 119 } else if func.dfg.insts[inst].opcode().can_load() 120 || func.dfg.insts[inst].opcode().can_store() 121 { 122 inst.into() 123 } else { 124 PackedOption::default() 125 } 126 } 127 128 fn meet_from(&mut self, other: &LastStores, loc: Inst) { 129 let meet = |a: PackedOption<Inst>, b: PackedOption<Inst>| -> PackedOption<Inst> { 130 match (a.into(), b.into()) { 131 (None, None) => None.into(), 132 (Some(a), None) => a, 133 (None, Some(b)) => b, 134 (Some(a), Some(b)) if a == b => a, 135 _ => loc.into(), 136 } 137 }; 138 139 self.heap = meet(self.heap, other.heap); 140 self.table = meet(self.table, other.table); 141 self.vmctx = meet(self.vmctx, other.vmctx); 142 self.other = meet(self.other, other.other); 143 } 144 } 145 146 /// A key identifying a unique memory location. 147 /// 148 /// For the result of a load to be equivalent to the result of another 149 /// load, or the store data from a store, we need for (i) the 150 /// "version" of memory (here ensured by having the same last store 151 /// instruction to touch the disjoint category of abstract state we're 152 /// accessing); (ii) the address must be the same (here ensured by 153 /// having the same SSA value, which doesn't change after computed); 154 /// (iii) the offset must be the same; and (iv) the accessed type and 155 /// extension mode (e.g., 8-to-32, signed) must be the same. 156 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] 157 struct MemoryLoc { 158 last_store: PackedOption<Inst>, 159 address: Value, 160 offset: Offset32, 161 ty: Type, 162 /// We keep the *opcode* of the instruction that produced the 163 /// value we record at this key if the opcode is anything other 164 /// than an ordinary load or store. This is needed when we 165 /// consider loads that extend the value: e.g., an 8-to-32 166 /// sign-extending load will produce a 32-bit value from an 8-bit 167 /// value in memory, so we can only reuse that (as part of RLE) 168 /// for another load with the same extending opcode. 169 /// 170 /// We could improve the transform to insert explicit extend ops 171 /// in place of extending loads when we know the memory value, but 172 /// we haven't yet done this. 173 extending_opcode: Option<Opcode>, 174 } 175 176 /// An alias-analysis pass. 177 pub struct AliasAnalysis<'a> { 178 /// The domtree for the function. 179 domtree: &'a DominatorTree, 180 181 /// Input state to a basic block. 182 block_input: FxHashMap<Block, LastStores>, 183 184 /// Known memory-value equivalences. This is the result of the 185 /// analysis. This is a mapping from (last store, address 186 /// expression, offset, type) to SSA `Value`. 187 /// 188 /// We keep the defining inst around for quick dominance checks. 189 mem_values: FxHashMap<MemoryLoc, (Inst, Value)>, 190 } 191 192 impl<'a> AliasAnalysis<'a> { 193 /// Perform an alias analysis pass. 194 pub fn new(func: &Function, domtree: &'a DominatorTree) -> AliasAnalysis<'a> { 195 trace!("alias analysis: input is:\n{:?}", func); 196 let mut analysis = AliasAnalysis { 197 domtree, 198 block_input: FxHashMap::default(), 199 mem_values: FxHashMap::default(), 200 }; 201 202 analysis.compute_block_input_states(func); 203 analysis 204 } 205 206 fn compute_block_input_states(&mut self, func: &Function) { 207 let mut queue = vec![]; 208 let mut queue_set = FxHashSet::default(); 209 let entry = func.layout.entry_block().unwrap(); 210 queue.push(entry); 211 queue_set.insert(entry); 212 213 while let Some(block) = queue.pop() { 214 queue_set.remove(&block); 215 let mut state = self 216 .block_input 217 .entry(block) 218 .or_insert_with(|| LastStores::default()) 219 .clone(); 220 221 trace!( 222 "alias analysis: input to block{} is {:?}", 223 block.index(), 224 state 225 ); 226 227 for inst in func.layout.block_insts(block) { 228 state.update(func, inst); 229 trace!("after inst{}: state is {:?}", inst.index(), state); 230 } 231 232 visit_block_succs(func, block, |_inst, succ, _from_table| { 233 let succ_first_inst = func.layout.block_insts(succ).into_iter().next().unwrap(); 234 let updated = match self.block_input.get_mut(&succ) { 235 Some(succ_state) => { 236 let old = succ_state.clone(); 237 succ_state.meet_from(&state, succ_first_inst); 238 *succ_state != old 239 } 240 None => { 241 self.block_input.insert(succ, state.clone()); 242 true 243 } 244 }; 245 246 if updated && queue_set.insert(succ) { 247 queue.push(succ); 248 } 249 }); 250 } 251 } 252 253 /// Get the starting state for a block. 254 pub fn block_starting_state(&self, block: Block) -> LastStores { 255 self.block_input 256 .get(&block) 257 .cloned() 258 .unwrap_or_else(|| LastStores::default()) 259 } 260 261 /// Process one instruction. Meant to be invoked in program order 262 /// within a block, and ideally in RPO or at least some domtree 263 /// preorder for maximal reuse. 264 /// 265 /// Returns `true` if instruction was removed. 266 pub fn process_inst( 267 &mut self, 268 func: &mut Function, 269 state: &mut LastStores, 270 inst: Inst, 271 ) -> Option<Value> { 272 trace!( 273 "alias analysis: scanning at inst{} with state {:?} ({:?})", 274 inst.index(), 275 state, 276 func.dfg.insts[inst], 277 ); 278 279 let replacing_value = if let Some((address, offset, ty)) = inst_addr_offset_type(func, inst) 280 { 281 let address = func.dfg.resolve_aliases(address); 282 let opcode = func.dfg.insts[inst].opcode(); 283 284 if opcode.can_store() { 285 let store_data = inst_store_data(func, inst).unwrap(); 286 let store_data = func.dfg.resolve_aliases(store_data); 287 let mem_loc = MemoryLoc { 288 last_store: inst.into(), 289 address, 290 offset, 291 ty, 292 extending_opcode: get_ext_opcode(opcode), 293 }; 294 trace!( 295 "alias analysis: at inst{}: store with data v{} at loc {:?}", 296 inst.index(), 297 store_data.index(), 298 mem_loc 299 ); 300 self.mem_values.insert(mem_loc, (inst, store_data)); 301 302 None 303 } else if opcode.can_load() { 304 let last_store = state.get_last_store(func, inst); 305 let load_result = func.dfg.inst_results(inst)[0]; 306 let mem_loc = MemoryLoc { 307 last_store, 308 address, 309 offset, 310 ty, 311 extending_opcode: get_ext_opcode(opcode), 312 }; 313 trace!( 314 "alias analysis: at inst{}: load with last_store inst{} at loc {:?}", 315 inst.index(), 316 last_store.map(|inst| inst.index()).unwrap_or(usize::MAX), 317 mem_loc 318 ); 319 320 // Is there a Value already known to be stored 321 // at this specific memory location? If so, 322 // we can alias the load result to this 323 // already-known Value. 324 // 325 // Check if the definition dominates this 326 // location; it might not, if it comes from a 327 // load (stores will always dominate though if 328 // their `last_store` survives through 329 // meet-points to this use-site). 330 let aliased = 331 if let Some((def_inst, value)) = self.mem_values.get(&mem_loc).cloned() { 332 trace!( 333 " -> sees known value v{} from inst{}", 334 value.index(), 335 def_inst.index() 336 ); 337 if self.domtree.dominates(def_inst, inst, &func.layout) { 338 trace!( 339 " -> dominates; value equiv from v{} to v{} inserted", 340 load_result.index(), 341 value.index() 342 ); 343 Some(value) 344 } else { 345 None 346 } 347 } else { 348 None 349 }; 350 351 // Otherwise, we can keep *this* load around 352 // as a new equivalent value. 353 if aliased.is_none() { 354 trace!( 355 " -> inserting load result v{} at loc {:?}", 356 load_result.index(), 357 mem_loc 358 ); 359 self.mem_values.insert(mem_loc, (inst, load_result)); 360 } 361 362 aliased 363 } else { 364 None 365 } 366 } else { 367 None 368 }; 369 370 state.update(func, inst); 371 372 replacing_value 373 } 374 375 /// Make a pass and update known-redundant loads to aliased 376 /// values. We interleave the updates with the memory-location 377 /// tracking because resolving some aliases may expose others 378 /// (e.g. in cases of double-indirection with two separate chains 379 /// of loads). 380 pub fn compute_and_update_aliases(&mut self, func: &mut Function) { 381 let mut pos = FuncCursor::new(func); 382 383 while let Some(block) = pos.next_block() { 384 let mut state = self.block_starting_state(block); 385 while let Some(inst) = pos.next_inst() { 386 if let Some(replaced_result) = self.process_inst(pos.func, &mut state, inst) { 387 let result = pos.func.dfg.inst_results(inst)[0]; 388 pos.func.dfg.clear_results(inst); 389 pos.func.dfg.change_to_alias(result, replaced_result); 390 pos.remove_inst_and_step_back(); 391 } 392 } 393 } 394 } 395 } 396 397 fn get_ext_opcode(op: Opcode) -> Option<Opcode> { 398 debug_assert!(op.can_load() || op.can_store()); 399 match op { 400 Opcode::Load | Opcode::Store => None, 401 _ => Some(op), 402 } 403 } 404