1 //! Computation of basic block order in emitted code. 2 //! 3 //! This module handles the translation from CLIF BBs to VCode BBs. 4 //! 5 //! The basic idea is that we compute a sequence of "lowered blocks" that 6 //! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit 7 //! block on *every* edge). Conceptually, the lowering pipeline wants to insert 8 //! moves for phi-nodes on every block-to-block transfer; these blocks always 9 //! conceptually exist, but may be merged with an "original" CLIF block (and 10 //! hence not actually exist; this is equivalent to inserting the blocks only on 11 //! critical edges). 12 //! 13 //! In other words, starting from a CFG like this (where each "CLIF block" and 14 //! "(edge N->M)" is a separate basic block): 15 //! 16 //! ```plain 17 //! 18 //! CLIF block 0 19 //! / \ 20 //! (edge 0->1) (edge 0->2) 21 //! | | 22 //! CLIF block 1 CLIF block 2 23 //! \ / 24 //! (edge 1->3) (edge 2->3) 25 //! \ / 26 //! CLIF block 3 27 //! ``` 28 //! 29 //! We can produce a CFG of lowered blocks like so: 30 //! 31 //! ```plain 32 //! +--------------+ 33 //! | CLIF block 0 | 34 //! +--------------+ 35 //! / \ 36 //! +--------------+ +--------------+ 37 //! | (edge 0->1) | | (edge 0->2) | 38 //! | CLIF block 1 | | CLIF block 2 | 39 //! | (edge 1->3) | | (edge 2->3) | 40 //! +--------------+ +--------------+ 41 //! \ / 42 //! \ / 43 //! +------------+ 44 //! |CLIF block 3| 45 //! +------------+ 46 //! ``` 47 //! 48 //! Each `LoweredBlock` names just an original CLIF block, or just an edge block. 49 //! 50 //! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph 51 //! (never actually materialized, just defined by a "successors" function), and 52 //! compute the reverse postorder. 53 //! 54 //! This algorithm isn't perfect w.r.t. generated code quality: we don't, for 55 //! example, consider any information about whether edge blocks will actually 56 //! have content, because this computation happens as part of lowering *before* 57 //! regalloc, and regalloc may or may not insert moves/spills/reloads on any 58 //! particular edge. But it works relatively well and is conceptually simple. 59 //! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like 60 //! branch editing that in practice elides empty blocks and simplifies some of 61 //! the other redundancies that this scheme produces. 62 63 use crate::dominator_tree::DominatorTree; 64 use crate::entity::SecondaryMap; 65 use crate::inst_predicates::visit_block_succs; 66 use crate::ir::{Block, Function, Inst, Opcode}; 67 use crate::{FxHashMap, FxHashSet}; 68 use crate::{machinst::*, trace}; 69 70 /// Mapping from CLIF BBs to VCode BBs. 71 #[derive(Debug)] 72 pub struct BlockLoweringOrder { 73 /// Lowered blocks, in BlockIndex order. Each block is some combination of 74 /// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after; 75 /// see [LoweredBlock] for details. 76 lowered_order: Vec<LoweredBlock>, 77 /// BlockIndex values for successors for all lowered blocks, indexing `lowered_order`. 78 lowered_succ_indices: Vec<BlockIndex>, 79 /// Ranges in `lowered_succ_indices` giving the successor lists for each lowered 80 /// block. Indexed by lowering-order index (`BlockIndex`). 81 lowered_succ_ranges: Vec<(Option<Inst>, core::ops::Range<usize>)>, 82 /// BlockIndex for each original Block. 83 blockindex_by_block: SecondaryMap<Block, BlockIndex>, 84 /// Cold blocks. These blocks are not reordered in the 85 /// `lowered_order` above; the lowered order must respect RPO 86 /// (uses after defs) in order for lowering to be 87 /// correct. Instead, this set is used to provide `is_cold()`, 88 /// which is used by VCode emission to sink the blocks at the last 89 /// moment (when we actually emit bytes into the MachBuffer). 90 cold_blocks: FxHashSet<BlockIndex>, 91 /// Lowered blocks that are indirect branch targets. 92 indirect_branch_targets: FxHashSet<BlockIndex>, 93 } 94 95 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] 96 pub enum LoweredBlock { 97 /// Block in original CLIF. 98 Orig { 99 /// Original CLIF block. 100 block: Block, 101 }, 102 103 /// Critical edge between two CLIF blocks. 104 CriticalEdge { 105 /// The predecessor block. 106 pred: Block, 107 108 /// The successor block. 109 succ: Block, 110 111 /// The index of this branch in the successor edges from `pred`, following the same 112 /// indexing order as `inst_predicates::visit_block_succs`. This is used to distinguish 113 /// multiple edges between the same CLIF blocks. 114 succ_idx: u32, 115 }, 116 } 117 118 impl LoweredBlock { 119 /// Unwrap an `Orig` block. orig_block(&self) -> Option<Block>120 pub fn orig_block(&self) -> Option<Block> { 121 match self { 122 &LoweredBlock::Orig { block } => Some(block), 123 &LoweredBlock::CriticalEdge { .. } => None, 124 } 125 } 126 127 /// The associated in-edge predecessor, if this is a critical edge. 128 #[cfg(test)] in_edge(&self) -> Option<Block>129 pub fn in_edge(&self) -> Option<Block> { 130 match self { 131 &LoweredBlock::CriticalEdge { pred, .. } => Some(pred), 132 &LoweredBlock::Orig { .. } => None, 133 } 134 } 135 136 /// The associated out-edge successor, if this is a critical edge. 137 #[cfg(test)] out_edge(&self) -> Option<Block>138 pub fn out_edge(&self) -> Option<Block> { 139 match self { 140 &LoweredBlock::CriticalEdge { succ, .. } => Some(succ), 141 &LoweredBlock::Orig { .. } => None, 142 } 143 } 144 } 145 146 impl BlockLoweringOrder { 147 /// Compute and return a lowered block order for `f`. new( f: &Function, domtree: &DominatorTree, ctrl_plane: &mut ControlPlane, ) -> BlockLoweringOrder148 pub fn new( 149 f: &Function, 150 domtree: &DominatorTree, 151 ctrl_plane: &mut ControlPlane, 152 ) -> BlockLoweringOrder { 153 trace!("BlockLoweringOrder: function body {:?}", f); 154 155 // Step 1: compute the in-edge and out-edge count of every block. 156 let mut block_in_count = SecondaryMap::with_default(0); 157 let mut block_out_count = SecondaryMap::with_default(0); 158 159 // Block successors are stored as `LoweredBlocks` to simplify the construction of 160 // `lowered_succs` in the final result. Initially, all entries are `Orig` values, and are 161 // updated to be `CriticalEdge` when those cases are identified in step 2 below. 162 let mut block_succs: SmallVec<[LoweredBlock; 128]> = SmallVec::new(); 163 let mut block_succ_range = SecondaryMap::with_default(0..0); 164 165 let mut indirect_branch_target_clif_blocks = FxHashSet::default(); 166 167 for block in f.layout.blocks() { 168 let start = block_succs.len(); 169 visit_block_succs(f, block, |_, succ, from_table| { 170 block_out_count[block] += 1; 171 block_in_count[succ] += 1; 172 block_succs.push(LoweredBlock::Orig { block: succ }); 173 174 if from_table { 175 indirect_branch_target_clif_blocks.insert(succ); 176 } 177 }); 178 179 // Ensure that blocks terminated by br_table instructions 180 // with an empty jump table are still treated like 181 // conditional blocks from the point of view of critical 182 // edge splitting. Also do the same for TryCall and 183 // TryCallIndirect: we cannot have edge moves before the 184 // branch, even if they have empty handler tables and thus 185 // would otherwise have only one successor. 186 if let Some(inst) = f.layout.last_inst(block) { 187 match f.dfg.insts[inst].opcode() { 188 Opcode::BrTable | Opcode::TryCall | Opcode::TryCallIndirect => { 189 block_out_count[block] = block_out_count[block].max(2); 190 } 191 _ => {} 192 } 193 } 194 195 let end = block_succs.len(); 196 block_succ_range[block] = start..end; 197 } 198 199 // Step 2: walk the postorder from the domtree in reverse to produce our desired node 200 // lowering order, identifying critical edges to split along the way. 201 202 let mut lowered_order = Vec::new(); 203 let mut blockindex_by_block = SecondaryMap::with_default(BlockIndex::invalid()); 204 for &block in domtree.cfg_rpo() { 205 let idx = BlockIndex::new(lowered_order.len()); 206 lowered_order.push(LoweredBlock::Orig { block }); 207 blockindex_by_block[block] = idx; 208 209 if block_out_count[block] > 1 { 210 let range = block_succ_range[block].clone(); 211 212 // If chaos-mode is enabled in the control plane, iterate over 213 // the successors in an arbitrary order, which should have no 214 // impact on correctness. The order of the blocks is generally 215 // relevant: Uses must be seen before defs for dead-code 216 // elimination. 217 let succs = ctrl_plane.shuffled(block_succs[range].iter_mut().enumerate()); 218 219 for (succ_ix, lb) in succs { 220 let succ = lb.orig_block().unwrap(); 221 if block_in_count[succ] > 1 { 222 // Mutate the successor to be a critical edge, as `block` has multiple 223 // edges leaving it, and `succ` has multiple edges entering it. 224 *lb = LoweredBlock::CriticalEdge { 225 pred: block, 226 succ, 227 succ_idx: succ_ix as u32, 228 }; 229 lowered_order.push(*lb); 230 } 231 } 232 } 233 } 234 235 let lb_to_bindex = FxHashMap::from_iter( 236 lowered_order 237 .iter() 238 .enumerate() 239 .map(|(i, &lb)| (lb, BlockIndex::new(i))), 240 ); 241 242 // Step 3: build the successor tables given the lowering order. We can't perform this step 243 // during the creation of `lowering_order`, as we need `lb_to_bindex` to be fully populated 244 // first. 245 let mut lowered_succ_indices = Vec::new(); 246 let mut cold_blocks = FxHashSet::default(); 247 let mut indirect_branch_targets = FxHashSet::default(); 248 let lowered_succ_ranges = 249 Vec::from_iter(lowered_order.iter().enumerate().map(|(ix, lb)| { 250 let bindex = BlockIndex::new(ix); 251 let start = lowered_succ_indices.len(); 252 let opt_inst = match lb { 253 // Block successors are pulled directly over, as they'll have been mutated when 254 // determining the block order already. 255 &LoweredBlock::Orig { block } => { 256 let range = block_succ_range[block].clone(); 257 lowered_succ_indices 258 .extend(block_succs[range].iter().map(|lb| lb_to_bindex[lb])); 259 260 if f.layout.is_cold(block) { 261 cold_blocks.insert(bindex); 262 } 263 264 if indirect_branch_target_clif_blocks.contains(&block) { 265 indirect_branch_targets.insert(bindex); 266 } 267 268 let last = f.layout.last_inst(block).unwrap(); 269 let opcode = f.dfg.insts[last].opcode(); 270 271 assert!(opcode.is_terminator()); 272 273 opcode.is_branch().then_some(last) 274 } 275 276 // Critical edges won't have successor information in block_succ_range, but 277 // they only have a single known successor to record anyway. 278 &LoweredBlock::CriticalEdge { succ, .. } => { 279 let succ_index = lb_to_bindex[&LoweredBlock::Orig { block: succ }]; 280 lowered_succ_indices.push(succ_index); 281 282 // Edges inherit indirect branch and cold block metadata from their 283 // successor. 284 285 if f.layout.is_cold(succ) { 286 cold_blocks.insert(bindex); 287 } 288 289 if indirect_branch_target_clif_blocks.contains(&succ) { 290 indirect_branch_targets.insert(bindex); 291 } 292 293 None 294 } 295 }; 296 let end = lowered_succ_indices.len(); 297 (opt_inst, start..end) 298 })); 299 300 let result = BlockLoweringOrder { 301 lowered_order, 302 lowered_succ_indices, 303 lowered_succ_ranges, 304 blockindex_by_block, 305 cold_blocks, 306 indirect_branch_targets, 307 }; 308 309 trace!("BlockLoweringOrder: {:#?}", result); 310 result 311 } 312 313 /// Get the lowered order of blocks. lowered_order(&self) -> &[LoweredBlock]314 pub fn lowered_order(&self) -> &[LoweredBlock] { 315 &self.lowered_order[..] 316 } 317 318 /// Get the BlockIndex, if any, for a given Block. 319 /// 320 /// The result will be `None` if the given Block is unreachable 321 /// (and thus does not appear in the lowered order). lowered_index_for_block(&self, block: Block) -> Option<BlockIndex>322 pub fn lowered_index_for_block(&self, block: Block) -> Option<BlockIndex> { 323 let idx = self.blockindex_by_block[block]; 324 if idx.is_valid() { Some(idx) } else { None } 325 } 326 327 /// Get the successor indices for a lowered block. succ_indices(&self, block: BlockIndex) -> (Option<Inst>, &[BlockIndex])328 pub fn succ_indices(&self, block: BlockIndex) -> (Option<Inst>, &[BlockIndex]) { 329 let (opt_inst, range) = &self.lowered_succ_ranges[block.index()]; 330 (*opt_inst, &self.lowered_succ_indices[range.clone()]) 331 } 332 333 /// Determine whether the given lowered-block index is cold. is_cold(&self, block: BlockIndex) -> bool334 pub fn is_cold(&self, block: BlockIndex) -> bool { 335 self.cold_blocks.contains(&block) 336 } 337 338 /// Determine whether the given lowered block index is an indirect branch 339 /// target. is_indirect_branch_target(&self, block: BlockIndex) -> bool340 pub fn is_indirect_branch_target(&self, block: BlockIndex) -> bool { 341 self.indirect_branch_targets.contains(&block) 342 } 343 } 344 345 #[cfg(test)] 346 mod test { 347 use super::*; 348 use crate::cursor::{Cursor, FuncCursor}; 349 use crate::flowgraph::ControlFlowGraph; 350 use crate::ir::UserFuncName; 351 use crate::ir::types::*; 352 use crate::ir::{AbiParam, InstBuilder, Signature}; 353 use crate::isa::CallConv; 354 build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> BlockLoweringOrder355 fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> BlockLoweringOrder { 356 assert!(n_blocks > 0); 357 358 let name = UserFuncName::testcase("test0"); 359 let mut sig = Signature::new(CallConv::SystemV); 360 sig.params.push(AbiParam::new(I32)); 361 let mut func = Function::with_name_signature(name, sig); 362 let blocks = (0..n_blocks) 363 .map(|i| { 364 let bb = func.dfg.make_block(); 365 assert!(bb.as_u32() == i as u32); 366 bb 367 }) 368 .collect::<Vec<_>>(); 369 370 let arg0 = func.dfg.append_block_param(blocks[0], I32); 371 372 let mut pos = FuncCursor::new(&mut func); 373 374 let mut edge = 0; 375 for i in 0..n_blocks { 376 pos.insert_block(blocks[i]); 377 let mut succs = vec![]; 378 while edge < edges.len() && edges[edge].0 == i { 379 succs.push(edges[edge].1); 380 edge += 1; 381 } 382 if succs.len() == 0 { 383 pos.ins().return_(&[arg0]); 384 } else if succs.len() == 1 { 385 pos.ins().jump(blocks[succs[0]], &[]); 386 } else if succs.len() == 2 { 387 pos.ins() 388 .brif(arg0, blocks[succs[0]], &[], blocks[succs[1]], &[]); 389 } else { 390 panic!("Too many successors"); 391 } 392 } 393 394 let mut cfg = ControlFlowGraph::new(); 395 cfg.compute(&func); 396 let dom_tree = DominatorTree::with_function(&func, &cfg); 397 398 BlockLoweringOrder::new(&func, &dom_tree, &mut Default::default()) 399 } 400 401 #[test] test_blockorder_diamond()402 fn test_blockorder_diamond() { 403 let order = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]); 404 405 // This test case doesn't need to introduce any critical edges, as all regalloc allocations 406 // can sit on either the entry or exit of blocks 1 and 2. 407 assert_eq!(order.lowered_order.len(), 4); 408 } 409 410 #[test] test_blockorder_critedge()411 fn test_blockorder_critedge() { 412 // 0 413 // / \ 414 // 1 2 415 // / \ \ 416 // 3 4 | 417 // |\ _|____| 418 // | \/ | 419 // | /\ | 420 // 5 6 421 // 422 // (3 -> 5, and 3 -> 6 are critical edges and must be split) 423 // 424 let order = build_test_func( 425 7, 426 &[ 427 (0, 1), 428 (0, 2), 429 (1, 3), 430 (1, 4), 431 (2, 5), 432 (3, 5), 433 (3, 6), 434 (4, 6), 435 ], 436 ); 437 438 assert_eq!(order.lowered_order.len(), 9); 439 println!("ordered = {:?}", order.lowered_order); 440 441 // block 0 442 assert_eq!(order.lowered_order[0].orig_block().unwrap().as_u32(), 0); 443 assert!(order.lowered_order[0].in_edge().is_none()); 444 assert!(order.lowered_order[0].out_edge().is_none()); 445 446 // block 2 447 assert_eq!(order.lowered_order[1].orig_block().unwrap().as_u32(), 2); 448 assert!(order.lowered_order[1].in_edge().is_none()); 449 assert!(order.lowered_order[1].out_edge().is_none()); 450 451 // block 1 452 assert_eq!(order.lowered_order[2].orig_block().unwrap().as_u32(), 1); 453 assert!(order.lowered_order[2].in_edge().is_none()); 454 assert!(order.lowered_order[2].out_edge().is_none()); 455 456 // block 4 457 assert_eq!(order.lowered_order[3].orig_block().unwrap().as_u32(), 4); 458 assert!(order.lowered_order[3].in_edge().is_none()); 459 assert!(order.lowered_order[3].out_edge().is_none()); 460 461 // block 3 462 assert_eq!(order.lowered_order[4].orig_block().unwrap().as_u32(), 3); 463 assert!(order.lowered_order[4].in_edge().is_none()); 464 assert!(order.lowered_order[4].out_edge().is_none()); 465 466 // critical edge 3 -> 5 467 assert!(order.lowered_order[5].orig_block().is_none()); 468 assert_eq!(order.lowered_order[5].in_edge().unwrap().as_u32(), 3); 469 assert_eq!(order.lowered_order[5].out_edge().unwrap().as_u32(), 5); 470 471 // critical edge 3 -> 6 472 assert!(order.lowered_order[6].orig_block().is_none()); 473 assert_eq!(order.lowered_order[6].in_edge().unwrap().as_u32(), 3); 474 assert_eq!(order.lowered_order[6].out_edge().unwrap().as_u32(), 6); 475 476 // block 6 477 assert_eq!(order.lowered_order[7].orig_block().unwrap().as_u32(), 6); 478 assert!(order.lowered_order[7].in_edge().is_none()); 479 assert!(order.lowered_order[7].out_edge().is_none()); 480 481 // block 5 482 assert_eq!(order.lowered_order[8].orig_block().unwrap().as_u32(), 5); 483 assert!(order.lowered_order[8].in_edge().is_none()); 484 assert!(order.lowered_order[8].out_edge().is_none()); 485 } 486 } 487