1 //! Alias analysis, consisting of a "last store" pass and a "memory 2 //! values" pass. These two passes operate as one fused pass, and so 3 //! are implemented together here. 4 //! 5 //! We partition memory state into several *disjoint pieces* of 6 //! "abstract state". There are a finite number of such pieces: 7 //! currently, we call them "heap", "table", "vmctx", and "other".Any 8 //! given address in memory belongs to exactly one disjoint piece. 9 //! 10 //! One never tracks which piece a concrete address belongs to at 11 //! runtime; this is a purely static concept. Instead, all 12 //! memory-accessing instructions (loads and stores) are labeled with 13 //! one of these four categories in the `MemFlags`. It is forbidden 14 //! for a load or store to access memory under one category and a 15 //! later load or store to access the same memory under a different 16 //! category. This is ensured to be true by construction during 17 //! frontend translation into CLIF and during legalization. 18 //! 19 //! Given that this non-aliasing property is ensured by the producer 20 //! of CLIF, we can compute a *may-alias* property: one load or store 21 //! may-alias another load or store if both access the same category 22 //! of abstract state. 23 //! 24 //! The "last store" pass helps to compute this aliasing: it scans the 25 //! code, finding at each program point the last instruction that 26 //! *might have* written to a given part of abstract state. 27 //! 28 //! We can't say for sure that the "last store" *did* actually write 29 //! that state, but we know for sure that no instruction *later* than 30 //! it (up to the current instruction) did. However, we can get a 31 //! must-alias property from this: if at a given load or store, we 32 //! look backward to the "last store", *AND* we find that it has 33 //! exactly the same address expression and type, then we know that 34 //! the current instruction's access *must* be to the same memory 35 //! location. 36 //! 37 //! To get this must-alias property, we compute a sparse table of 38 //! "memory values": these are known equivalences between SSA `Value`s 39 //! and particular locations in memory. The memory-values table is a 40 //! mapping from (last store, address expression, type) to SSA 41 //! value. At a store, we can insert into this table directly. At a 42 //! load, we can also insert, if we don't already have a value (from 43 //! the store that produced the load's value). 44 //! 45 //! Then we can do two optimizations at once given this table. If a 46 //! load accesses a location identified by a (last store, address, 47 //! type) key already in the table, we replace it with the SSA value 48 //! for that memory location. This is usually known as "redundant load 49 //! elimination" if the value came from an earlier load of the same 50 //! location, or "store-to-load forwarding" if the value came from an 51 //! earlier store to the same location. 52 //! 53 //! In theory we could also do *dead-store elimination*, where if a 54 //! store overwrites a key in the table, *and* if no other load/store 55 //! to the abstract state category occurred, *and* no other trapping 56 //! instruction occurred (at which point we need an up-to-date memory 57 //! state because post-trap-termination memory state can be observed), 58 //! *and* we can prove the original store could not have trapped, then 59 //! we can eliminate the original store. Because this is so complex, 60 //! and the conditions for doing it correctly when post-trap state 61 //! must be correct likely reduce the potential benefit, we don't yet 62 //! do this. 63 64 use crate::{ 65 cursor::{Cursor, FuncCursor}, 66 dominator_tree::DominatorTree, 67 fx::{FxHashMap, FxHashSet}, 68 inst_predicates::{ 69 has_memory_fence_semantics, inst_addr_offset_type, inst_store_data, visit_block_succs, 70 }, 71 ir::{immediates::Offset32, Block, Function, Inst, Opcode, Type, Value}, 72 trace, 73 }; 74 use cranelift_entity::{packed_option::PackedOption, EntityRef}; 75 76 /// For a given program point, the vector of last-store instruction 77 /// indices for each disjoint category of abstract state. 78 #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] 79 pub struct LastStores { 80 heap: PackedOption<Inst>, 81 table: PackedOption<Inst>, 82 vmctx: PackedOption<Inst>, 83 other: PackedOption<Inst>, 84 } 85 86 impl LastStores { 87 fn update(&mut self, func: &Function, inst: Inst) { 88 let opcode = func.dfg.insts[inst].opcode(); 89 if has_memory_fence_semantics(opcode) { 90 self.heap = inst.into(); 91 self.table = inst.into(); 92 self.vmctx = inst.into(); 93 self.other = inst.into(); 94 } else if opcode.can_store() { 95 if let Some(memflags) = func.dfg.insts[inst].memflags() { 96 if memflags.heap() { 97 self.heap = inst.into(); 98 } else if memflags.table() { 99 self.table = inst.into(); 100 } else if memflags.vmctx() { 101 self.vmctx = inst.into(); 102 } else { 103 self.other = inst.into(); 104 } 105 } else { 106 self.heap = inst.into(); 107 self.table = inst.into(); 108 self.vmctx = inst.into(); 109 self.other = inst.into(); 110 } 111 } 112 } 113 114 fn get_last_store(&self, func: &Function, inst: Inst) -> PackedOption<Inst> { 115 if let Some(memflags) = func.dfg.insts[inst].memflags() { 116 if memflags.heap() { 117 self.heap 118 } else if memflags.table() { 119 self.table 120 } else if memflags.vmctx() { 121 self.vmctx 122 } else { 123 self.other 124 } 125 } else if func.dfg.insts[inst].opcode().can_load() 126 || func.dfg.insts[inst].opcode().can_store() 127 { 128 inst.into() 129 } else { 130 PackedOption::default() 131 } 132 } 133 134 fn meet_from(&mut self, other: &LastStores, loc: Inst) { 135 let meet = |a: PackedOption<Inst>, b: PackedOption<Inst>| -> PackedOption<Inst> { 136 match (a.into(), b.into()) { 137 (None, None) => None.into(), 138 (Some(a), None) => a, 139 (None, Some(b)) => b, 140 (Some(a), Some(b)) if a == b => a, 141 _ => loc.into(), 142 } 143 }; 144 145 self.heap = meet(self.heap, other.heap); 146 self.table = meet(self.table, other.table); 147 self.vmctx = meet(self.vmctx, other.vmctx); 148 self.other = meet(self.other, other.other); 149 } 150 } 151 152 /// A key identifying a unique memory location. 153 /// 154 /// For the result of a load to be equivalent to the result of another 155 /// load, or the store data from a store, we need for (i) the 156 /// "version" of memory (here ensured by having the same last store 157 /// instruction to touch the disjoint category of abstract state we're 158 /// accessing); (ii) the address must be the same (here ensured by 159 /// having the same SSA value, which doesn't change after computed); 160 /// (iii) the offset must be the same; and (iv) the accessed type and 161 /// extension mode (e.g., 8-to-32, signed) must be the same. 162 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] 163 struct MemoryLoc { 164 last_store: PackedOption<Inst>, 165 address: Value, 166 offset: Offset32, 167 ty: Type, 168 /// We keep the *opcode* of the instruction that produced the 169 /// value we record at this key if the opcode is anything other 170 /// than an ordinary load or store. This is needed when we 171 /// consider loads that extend the value: e.g., an 8-to-32 172 /// sign-extending load will produce a 32-bit value from an 8-bit 173 /// value in memory, so we can only reuse that (as part of RLE) 174 /// for another load with the same extending opcode. 175 /// 176 /// We could improve the transform to insert explicit extend ops 177 /// in place of extending loads when we know the memory value, but 178 /// we haven't yet done this. 179 extending_opcode: Option<Opcode>, 180 } 181 182 /// An alias-analysis pass. 183 pub struct AliasAnalysis<'a> { 184 /// The domtree for the function. 185 domtree: &'a DominatorTree, 186 187 /// Input state to a basic block. 188 block_input: FxHashMap<Block, LastStores>, 189 190 /// Known memory-value equivalences. This is the result of the 191 /// analysis. This is a mapping from (last store, address 192 /// expression, offset, type) to SSA `Value`. 193 /// 194 /// We keep the defining inst around for quick dominance checks. 195 mem_values: FxHashMap<MemoryLoc, (Inst, Value)>, 196 } 197 198 impl<'a> AliasAnalysis<'a> { 199 /// Perform an alias analysis pass. 200 pub fn new(func: &Function, domtree: &'a DominatorTree) -> AliasAnalysis<'a> { 201 trace!("alias analysis: input is:\n{:?}", func); 202 let mut analysis = AliasAnalysis { 203 domtree, 204 block_input: FxHashMap::default(), 205 mem_values: FxHashMap::default(), 206 }; 207 208 analysis.compute_block_input_states(func); 209 analysis 210 } 211 212 fn compute_block_input_states(&mut self, func: &Function) { 213 let mut queue = vec![]; 214 let mut queue_set = FxHashSet::default(); 215 let entry = func.layout.entry_block().unwrap(); 216 queue.push(entry); 217 queue_set.insert(entry); 218 219 while let Some(block) = queue.pop() { 220 queue_set.remove(&block); 221 let mut state = self 222 .block_input 223 .entry(block) 224 .or_insert_with(|| LastStores::default()) 225 .clone(); 226 227 trace!( 228 "alias analysis: input to block{} is {:?}", 229 block.index(), 230 state 231 ); 232 233 for inst in func.layout.block_insts(block) { 234 state.update(func, inst); 235 trace!("after inst{}: state is {:?}", inst.index(), state); 236 } 237 238 visit_block_succs(func, block, |_inst, succ, _from_table| { 239 let succ_first_inst = func.layout.block_insts(succ).into_iter().next().unwrap(); 240 let updated = match self.block_input.get_mut(&succ) { 241 Some(succ_state) => { 242 let old = succ_state.clone(); 243 succ_state.meet_from(&state, succ_first_inst); 244 *succ_state != old 245 } 246 None => { 247 self.block_input.insert(succ, state.clone()); 248 true 249 } 250 }; 251 252 if updated && queue_set.insert(succ) { 253 queue.push(succ); 254 } 255 }); 256 } 257 } 258 259 /// Get the starting state for a block. 260 pub fn block_starting_state(&self, block: Block) -> LastStores { 261 self.block_input 262 .get(&block) 263 .cloned() 264 .unwrap_or_else(|| LastStores::default()) 265 } 266 267 /// Process one instruction. Meant to be invoked in program order 268 /// within a block, and ideally in RPO or at least some domtree 269 /// preorder for maximal reuse. 270 /// 271 /// Returns `true` if instruction was removed. 272 pub fn process_inst( 273 &mut self, 274 func: &mut Function, 275 state: &mut LastStores, 276 inst: Inst, 277 ) -> Option<Value> { 278 trace!( 279 "alias analysis: scanning at inst{} with state {:?} ({:?})", 280 inst.index(), 281 state, 282 func.dfg.insts[inst], 283 ); 284 285 let replacing_value = if let Some((address, offset, ty)) = inst_addr_offset_type(func, inst) 286 { 287 let address = func.dfg.resolve_aliases(address); 288 let opcode = func.dfg.insts[inst].opcode(); 289 290 if opcode.can_store() { 291 let store_data = inst_store_data(func, inst).unwrap(); 292 let store_data = func.dfg.resolve_aliases(store_data); 293 let mem_loc = MemoryLoc { 294 last_store: inst.into(), 295 address, 296 offset, 297 ty, 298 extending_opcode: get_ext_opcode(opcode), 299 }; 300 trace!( 301 "alias analysis: at inst{}: store with data v{} at loc {:?}", 302 inst.index(), 303 store_data.index(), 304 mem_loc 305 ); 306 self.mem_values.insert(mem_loc, (inst, store_data)); 307 308 None 309 } else if opcode.can_load() { 310 let last_store = state.get_last_store(func, inst); 311 let load_result = func.dfg.inst_results(inst)[0]; 312 let mem_loc = MemoryLoc { 313 last_store, 314 address, 315 offset, 316 ty, 317 extending_opcode: get_ext_opcode(opcode), 318 }; 319 trace!( 320 "alias analysis: at inst{}: load with last_store inst{} at loc {:?}", 321 inst.index(), 322 last_store.map(|inst| inst.index()).unwrap_or(usize::MAX), 323 mem_loc 324 ); 325 326 // Is there a Value already known to be stored 327 // at this specific memory location? If so, 328 // we can alias the load result to this 329 // already-known Value. 330 // 331 // Check if the definition dominates this 332 // location; it might not, if it comes from a 333 // load (stores will always dominate though if 334 // their `last_store` survives through 335 // meet-points to this use-site). 336 let aliased = 337 if let Some((def_inst, value)) = self.mem_values.get(&mem_loc).cloned() { 338 trace!( 339 " -> sees known value v{} from inst{}", 340 value.index(), 341 def_inst.index() 342 ); 343 if self.domtree.dominates(def_inst, inst, &func.layout) { 344 trace!( 345 " -> dominates; value equiv from v{} to v{} inserted", 346 load_result.index(), 347 value.index() 348 ); 349 Some(value) 350 } else { 351 None 352 } 353 } else { 354 None 355 }; 356 357 // Otherwise, we can keep *this* load around 358 // as a new equivalent value. 359 if aliased.is_none() { 360 trace!( 361 " -> inserting load result v{} at loc {:?}", 362 load_result.index(), 363 mem_loc 364 ); 365 self.mem_values.insert(mem_loc, (inst, load_result)); 366 } 367 368 aliased 369 } else { 370 None 371 } 372 } else { 373 None 374 }; 375 376 state.update(func, inst); 377 378 replacing_value 379 } 380 381 /// Make a pass and update known-redundant loads to aliased 382 /// values. We interleave the updates with the memory-location 383 /// tracking because resolving some aliases may expose others 384 /// (e.g. in cases of double-indirection with two separate chains 385 /// of loads). 386 pub fn compute_and_update_aliases(&mut self, func: &mut Function) { 387 let mut pos = FuncCursor::new(func); 388 389 while let Some(block) = pos.next_block() { 390 let mut state = self.block_starting_state(block); 391 while let Some(inst) = pos.next_inst() { 392 if let Some(replaced_result) = self.process_inst(pos.func, &mut state, inst) { 393 let result = pos.func.dfg.inst_results(inst)[0]; 394 pos.func.dfg.detach_results(inst); 395 pos.func.dfg.change_to_alias(result, replaced_result); 396 pos.remove_inst_and_step_back(); 397 } 398 } 399 } 400 } 401 } 402 403 fn get_ext_opcode(op: Opcode) -> Option<Opcode> { 404 debug_assert!(op.can_load() || op.can_store()); 405 match op { 406 Opcode::Load | Opcode::Store => None, 407 _ => Some(op), 408 } 409 } 410