1 //! Computation of basic block order in emitted code.
2 //!
3 //! This module handles the translation from CLIF BBs to VCode BBs.
4 //!
5 //! The basic idea is that we compute a sequence of "lowered blocks" that
6 //! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit
7 //! block on *every* edge). Conceptually, the lowering pipeline wants to insert
8 //! moves for phi-nodes on every block-to-block transfer; these blocks always
9 //! conceptually exist, but may be merged with an "original" CLIF block (and
10 //! hence not actually exist; this is equivalent to inserting the blocks only on
11 //! critical edges).
12 //!
13 //! In other words, starting from a CFG like this (where each "CLIF block" and
14 //! "(edge N->M)" is a separate basic block):
15 //!
16 //! ```plain
17 //!
18 //!              CLIF block 0
19 //!               /           \
20 //!       (edge 0->1)         (edge 0->2)
21 //!              |                |
22 //!       CLIF block 1         CLIF block 2
23 //!              \                /
24 //!           (edge 1->3)   (edge 2->3)
25 //!                   \      /
26 //!                 CLIF block 3
27 //! ```
28 //!
29 //! We can produce a CFG of lowered blocks like so:
30 //!
31 //! ```plain
32 //!            +--------------+
33 //!            | CLIF block 0 |
34 //!            +--------------+
35 //!               /           \
36 //!     +--------------+     +--------------+
37 //!     | (edge 0->1)  |     | (edge 0->2)  |
38 //!     | CLIF block 1 |     | CLIF block 2 |
39 //!     | (edge 1->3)  |     | (edge 2->3)  |
40 //!     +--------------+     +--------------+
41 //!                \           /
42 //!                 \         /
43 //!                +------------+
44 //!                |CLIF block 3|
45 //!                +------------+
46 //! ```
47 //!
48 //! Each `LoweredBlock` names just an original CLIF block, or just an edge block.
49 //!
50 //! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph
51 //! (never actually materialized, just defined by a "successors" function), and
52 //! compute the reverse postorder.
53 //!
54 //! This algorithm isn't perfect w.r.t. generated code quality: we don't, for
55 //! example, consider any information about whether edge blocks will actually
56 //! have content, because this computation happens as part of lowering *before*
57 //! regalloc, and regalloc may or may not insert moves/spills/reloads on any
58 //! particular edge. But it works relatively well and is conceptually simple.
59 //! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like
60 //! branch editing that in practice elides empty blocks and simplifies some of
61 //! the other redundancies that this scheme produces.
62 
63 use crate::dominator_tree::DominatorTree;
64 use crate::entity::SecondaryMap;
65 use crate::inst_predicates::visit_block_succs;
66 use crate::ir::{Block, Function, Inst, Opcode};
67 use crate::{FxHashMap, FxHashSet};
68 use crate::{machinst::*, trace};
69 
70 /// Mapping from CLIF BBs to VCode BBs.
71 #[derive(Debug)]
72 pub struct BlockLoweringOrder {
73     /// Lowered blocks, in BlockIndex order. Each block is some combination of
74     /// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after;
75     /// see [LoweredBlock] for details.
76     lowered_order: Vec<LoweredBlock>,
77     /// BlockIndex values for successors for all lowered blocks, indexing `lowered_order`.
78     lowered_succ_indices: Vec<BlockIndex>,
79     /// Ranges in `lowered_succ_indices` giving the successor lists for each lowered
80     /// block. Indexed by lowering-order index (`BlockIndex`).
81     lowered_succ_ranges: Vec<(Option<Inst>, core::ops::Range<usize>)>,
82     /// BlockIndex for each original Block.
83     blockindex_by_block: SecondaryMap<Block, BlockIndex>,
84     /// Cold blocks. These blocks are not reordered in the
85     /// `lowered_order` above; the lowered order must respect RPO
86     /// (uses after defs) in order for lowering to be
87     /// correct. Instead, this set is used to provide `is_cold()`,
88     /// which is used by VCode emission to sink the blocks at the last
89     /// moment (when we actually emit bytes into the MachBuffer).
90     cold_blocks: FxHashSet<BlockIndex>,
91     /// Lowered blocks that are indirect branch targets.
92     indirect_branch_targets: FxHashSet<BlockIndex>,
93 }
94 
95 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
96 pub enum LoweredBlock {
97     /// Block in original CLIF.
98     Orig {
99         /// Original CLIF block.
100         block: Block,
101     },
102 
103     /// Critical edge between two CLIF blocks.
104     CriticalEdge {
105         /// The predecessor block.
106         pred: Block,
107 
108         /// The successor block.
109         succ: Block,
110 
111         /// The index of this branch in the successor edges from `pred`, following the same
112         /// indexing order as `inst_predicates::visit_block_succs`. This is used to distinguish
113         /// multiple edges between the same CLIF blocks.
114         succ_idx: u32,
115     },
116 }
117 
118 impl LoweredBlock {
119     /// Unwrap an `Orig` block.
orig_block(&self) -> Option<Block>120     pub fn orig_block(&self) -> Option<Block> {
121         match self {
122             &LoweredBlock::Orig { block } => Some(block),
123             &LoweredBlock::CriticalEdge { .. } => None,
124         }
125     }
126 
127     /// The associated in-edge predecessor, if this is a critical edge.
128     #[cfg(test)]
in_edge(&self) -> Option<Block>129     pub fn in_edge(&self) -> Option<Block> {
130         match self {
131             &LoweredBlock::CriticalEdge { pred, .. } => Some(pred),
132             &LoweredBlock::Orig { .. } => None,
133         }
134     }
135 
136     /// The associated out-edge successor, if this is a critical edge.
137     #[cfg(test)]
out_edge(&self) -> Option<Block>138     pub fn out_edge(&self) -> Option<Block> {
139         match self {
140             &LoweredBlock::CriticalEdge { succ, .. } => Some(succ),
141             &LoweredBlock::Orig { .. } => None,
142         }
143     }
144 }
145 
146 impl BlockLoweringOrder {
147     /// Compute and return a lowered block order for `f`.
new( f: &Function, domtree: &DominatorTree, ctrl_plane: &mut ControlPlane, ) -> BlockLoweringOrder148     pub fn new(
149         f: &Function,
150         domtree: &DominatorTree,
151         ctrl_plane: &mut ControlPlane,
152     ) -> BlockLoweringOrder {
153         trace!("BlockLoweringOrder: function body {:?}", f);
154 
155         // Step 1: compute the in-edge and out-edge count of every block.
156         let mut block_in_count = SecondaryMap::with_default(0);
157         let mut block_out_count = SecondaryMap::with_default(0);
158 
159         // Block successors are stored as `LoweredBlocks` to simplify the construction of
160         // `lowered_succs` in the final result. Initially, all entries are `Orig` values, and are
161         // updated to be `CriticalEdge` when those cases are identified in step 2 below.
162         let mut block_succs: SmallVec<[LoweredBlock; 128]> = SmallVec::new();
163         let mut block_succ_range = SecondaryMap::with_default(0..0);
164 
165         let mut indirect_branch_target_clif_blocks = FxHashSet::default();
166 
167         for block in f.layout.blocks() {
168             let start = block_succs.len();
169             visit_block_succs(f, block, |_, succ, from_table| {
170                 block_out_count[block] += 1;
171                 block_in_count[succ] += 1;
172                 block_succs.push(LoweredBlock::Orig { block: succ });
173 
174                 if from_table {
175                     indirect_branch_target_clif_blocks.insert(succ);
176                 }
177             });
178 
179             // Ensure that blocks terminated by br_table instructions
180             // with an empty jump table are still treated like
181             // conditional blocks from the point of view of critical
182             // edge splitting. Also do the same for TryCall and
183             // TryCallIndirect: we cannot have edge moves before the
184             // branch, even if they have empty handler tables and thus
185             // would otherwise have only one successor.
186             if let Some(inst) = f.layout.last_inst(block) {
187                 match f.dfg.insts[inst].opcode() {
188                     Opcode::BrTable | Opcode::TryCall | Opcode::TryCallIndirect => {
189                         block_out_count[block] = block_out_count[block].max(2);
190                     }
191                     _ => {}
192                 }
193             }
194 
195             let end = block_succs.len();
196             block_succ_range[block] = start..end;
197         }
198 
199         // Step 2: walk the postorder from the domtree in reverse to produce our desired node
200         // lowering order, identifying critical edges to split along the way.
201 
202         let mut lowered_order = Vec::new();
203         let mut blockindex_by_block = SecondaryMap::with_default(BlockIndex::invalid());
204         for &block in domtree.cfg_rpo() {
205             let idx = BlockIndex::new(lowered_order.len());
206             lowered_order.push(LoweredBlock::Orig { block });
207             blockindex_by_block[block] = idx;
208 
209             if block_out_count[block] > 1 {
210                 let range = block_succ_range[block].clone();
211 
212                 // If chaos-mode is enabled in the control plane, iterate over
213                 // the successors in an arbitrary order, which should have no
214                 // impact on correctness. The order of the blocks is generally
215                 // relevant: Uses must be seen before defs for dead-code
216                 // elimination.
217                 let succs = ctrl_plane.shuffled(block_succs[range].iter_mut().enumerate());
218 
219                 for (succ_ix, lb) in succs {
220                     let succ = lb.orig_block().unwrap();
221                     if block_in_count[succ] > 1 {
222                         // Mutate the successor to be a critical edge, as `block` has multiple
223                         // edges leaving it, and `succ` has multiple edges entering it.
224                         *lb = LoweredBlock::CriticalEdge {
225                             pred: block,
226                             succ,
227                             succ_idx: succ_ix as u32,
228                         };
229                         lowered_order.push(*lb);
230                     }
231                 }
232             }
233         }
234 
235         let lb_to_bindex = FxHashMap::from_iter(
236             lowered_order
237                 .iter()
238                 .enumerate()
239                 .map(|(i, &lb)| (lb, BlockIndex::new(i))),
240         );
241 
242         // Step 3: build the successor tables given the lowering order. We can't perform this step
243         // during the creation of `lowering_order`, as we need `lb_to_bindex` to be fully populated
244         // first.
245         let mut lowered_succ_indices = Vec::new();
246         let mut cold_blocks = FxHashSet::default();
247         let mut indirect_branch_targets = FxHashSet::default();
248         let lowered_succ_ranges =
249             Vec::from_iter(lowered_order.iter().enumerate().map(|(ix, lb)| {
250                 let bindex = BlockIndex::new(ix);
251                 let start = lowered_succ_indices.len();
252                 let opt_inst = match lb {
253                     // Block successors are pulled directly over, as they'll have been mutated when
254                     // determining the block order already.
255                     &LoweredBlock::Orig { block } => {
256                         let range = block_succ_range[block].clone();
257                         lowered_succ_indices
258                             .extend(block_succs[range].iter().map(|lb| lb_to_bindex[lb]));
259 
260                         if f.layout.is_cold(block) {
261                             cold_blocks.insert(bindex);
262                         }
263 
264                         if indirect_branch_target_clif_blocks.contains(&block) {
265                             indirect_branch_targets.insert(bindex);
266                         }
267 
268                         let last = f.layout.last_inst(block).unwrap();
269                         let opcode = f.dfg.insts[last].opcode();
270 
271                         assert!(opcode.is_terminator());
272 
273                         opcode.is_branch().then_some(last)
274                     }
275 
276                     // Critical edges won't have successor information in block_succ_range, but
277                     // they only have a single known successor to record anyway.
278                     &LoweredBlock::CriticalEdge { succ, .. } => {
279                         let succ_index = lb_to_bindex[&LoweredBlock::Orig { block: succ }];
280                         lowered_succ_indices.push(succ_index);
281 
282                         // Edges inherit indirect branch and cold block metadata from their
283                         // successor.
284 
285                         if f.layout.is_cold(succ) {
286                             cold_blocks.insert(bindex);
287                         }
288 
289                         if indirect_branch_target_clif_blocks.contains(&succ) {
290                             indirect_branch_targets.insert(bindex);
291                         }
292 
293                         None
294                     }
295                 };
296                 let end = lowered_succ_indices.len();
297                 (opt_inst, start..end)
298             }));
299 
300         let result = BlockLoweringOrder {
301             lowered_order,
302             lowered_succ_indices,
303             lowered_succ_ranges,
304             blockindex_by_block,
305             cold_blocks,
306             indirect_branch_targets,
307         };
308 
309         trace!("BlockLoweringOrder: {:#?}", result);
310         result
311     }
312 
313     /// Get the lowered order of blocks.
lowered_order(&self) -> &[LoweredBlock]314     pub fn lowered_order(&self) -> &[LoweredBlock] {
315         &self.lowered_order[..]
316     }
317 
318     /// Get the BlockIndex, if any, for a given Block.
319     ///
320     /// The result will be `None` if the given Block is unreachable
321     /// (and thus does not appear in the lowered order).
lowered_index_for_block(&self, block: Block) -> Option<BlockIndex>322     pub fn lowered_index_for_block(&self, block: Block) -> Option<BlockIndex> {
323         let idx = self.blockindex_by_block[block];
324         if idx.is_valid() { Some(idx) } else { None }
325     }
326 
327     /// Get the successor indices for a lowered block.
succ_indices(&self, block: BlockIndex) -> (Option<Inst>, &[BlockIndex])328     pub fn succ_indices(&self, block: BlockIndex) -> (Option<Inst>, &[BlockIndex]) {
329         let (opt_inst, range) = &self.lowered_succ_ranges[block.index()];
330         (*opt_inst, &self.lowered_succ_indices[range.clone()])
331     }
332 
333     /// Determine whether the given lowered-block index is cold.
is_cold(&self, block: BlockIndex) -> bool334     pub fn is_cold(&self, block: BlockIndex) -> bool {
335         self.cold_blocks.contains(&block)
336     }
337 
338     /// Determine whether the given lowered block index is an indirect branch
339     /// target.
is_indirect_branch_target(&self, block: BlockIndex) -> bool340     pub fn is_indirect_branch_target(&self, block: BlockIndex) -> bool {
341         self.indirect_branch_targets.contains(&block)
342     }
343 }
344 
345 #[cfg(test)]
346 mod test {
347     use super::*;
348     use crate::cursor::{Cursor, FuncCursor};
349     use crate::flowgraph::ControlFlowGraph;
350     use crate::ir::UserFuncName;
351     use crate::ir::types::*;
352     use crate::ir::{AbiParam, InstBuilder, Signature};
353     use crate::isa::CallConv;
354 
build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> BlockLoweringOrder355     fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> BlockLoweringOrder {
356         assert!(n_blocks > 0);
357 
358         let name = UserFuncName::testcase("test0");
359         let mut sig = Signature::new(CallConv::SystemV);
360         sig.params.push(AbiParam::new(I32));
361         let mut func = Function::with_name_signature(name, sig);
362         let blocks = (0..n_blocks)
363             .map(|i| {
364                 let bb = func.dfg.make_block();
365                 assert!(bb.as_u32() == i as u32);
366                 bb
367             })
368             .collect::<Vec<_>>();
369 
370         let arg0 = func.dfg.append_block_param(blocks[0], I32);
371 
372         let mut pos = FuncCursor::new(&mut func);
373 
374         let mut edge = 0;
375         for i in 0..n_blocks {
376             pos.insert_block(blocks[i]);
377             let mut succs = vec![];
378             while edge < edges.len() && edges[edge].0 == i {
379                 succs.push(edges[edge].1);
380                 edge += 1;
381             }
382             if succs.len() == 0 {
383                 pos.ins().return_(&[arg0]);
384             } else if succs.len() == 1 {
385                 pos.ins().jump(blocks[succs[0]], &[]);
386             } else if succs.len() == 2 {
387                 pos.ins()
388                     .brif(arg0, blocks[succs[0]], &[], blocks[succs[1]], &[]);
389             } else {
390                 panic!("Too many successors");
391             }
392         }
393 
394         let mut cfg = ControlFlowGraph::new();
395         cfg.compute(&func);
396         let dom_tree = DominatorTree::with_function(&func, &cfg);
397 
398         BlockLoweringOrder::new(&func, &dom_tree, &mut Default::default())
399     }
400 
401     #[test]
test_blockorder_diamond()402     fn test_blockorder_diamond() {
403         let order = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
404 
405         // This test case doesn't need to introduce any critical edges, as all regalloc allocations
406         // can sit on either the entry or exit of blocks 1 and 2.
407         assert_eq!(order.lowered_order.len(), 4);
408     }
409 
410     #[test]
test_blockorder_critedge()411     fn test_blockorder_critedge() {
412         //            0
413         //          /   \
414         //         1     2
415         //        /  \     \
416         //       3    4    |
417         //       |\  _|____|
418         //       | \/ |
419         //       | /\ |
420         //       5    6
421         //
422         // (3 -> 5, and 3 -> 6 are critical edges and must be split)
423         //
424         let order = build_test_func(
425             7,
426             &[
427                 (0, 1),
428                 (0, 2),
429                 (1, 3),
430                 (1, 4),
431                 (2, 5),
432                 (3, 5),
433                 (3, 6),
434                 (4, 6),
435             ],
436         );
437 
438         assert_eq!(order.lowered_order.len(), 9);
439         println!("ordered = {:?}", order.lowered_order);
440 
441         // block 0
442         assert_eq!(order.lowered_order[0].orig_block().unwrap().as_u32(), 0);
443         assert!(order.lowered_order[0].in_edge().is_none());
444         assert!(order.lowered_order[0].out_edge().is_none());
445 
446         // block 2
447         assert_eq!(order.lowered_order[1].orig_block().unwrap().as_u32(), 2);
448         assert!(order.lowered_order[1].in_edge().is_none());
449         assert!(order.lowered_order[1].out_edge().is_none());
450 
451         // block 1
452         assert_eq!(order.lowered_order[2].orig_block().unwrap().as_u32(), 1);
453         assert!(order.lowered_order[2].in_edge().is_none());
454         assert!(order.lowered_order[2].out_edge().is_none());
455 
456         // block 4
457         assert_eq!(order.lowered_order[3].orig_block().unwrap().as_u32(), 4);
458         assert!(order.lowered_order[3].in_edge().is_none());
459         assert!(order.lowered_order[3].out_edge().is_none());
460 
461         // block 3
462         assert_eq!(order.lowered_order[4].orig_block().unwrap().as_u32(), 3);
463         assert!(order.lowered_order[4].in_edge().is_none());
464         assert!(order.lowered_order[4].out_edge().is_none());
465 
466         // critical edge 3 -> 5
467         assert!(order.lowered_order[5].orig_block().is_none());
468         assert_eq!(order.lowered_order[5].in_edge().unwrap().as_u32(), 3);
469         assert_eq!(order.lowered_order[5].out_edge().unwrap().as_u32(), 5);
470 
471         // critical edge 3 -> 6
472         assert!(order.lowered_order[6].orig_block().is_none());
473         assert_eq!(order.lowered_order[6].in_edge().unwrap().as_u32(), 3);
474         assert_eq!(order.lowered_order[6].out_edge().unwrap().as_u32(), 6);
475 
476         // block 6
477         assert_eq!(order.lowered_order[7].orig_block().unwrap().as_u32(), 6);
478         assert!(order.lowered_order[7].in_edge().is_none());
479         assert!(order.lowered_order[7].out_edge().is_none());
480 
481         // block 5
482         assert_eq!(order.lowered_order[8].orig_block().unwrap().as_u32(), 5);
483         assert!(order.lowered_order[8].in_edge().is_none());
484         assert!(order.lowered_order[8].out_edge().is_none());
485     }
486 }
487