1 //! This implements the VCode container: a CFG of Insts that have been lowered.
2 //!
3 //! VCode is virtual-register code. An instruction in VCode is almost a machine
4 //! instruction; however, its register slots can refer to virtual registers in
5 //! addition to real machine registers.
6 //!
7 //! VCode is structured with traditional basic blocks, and
8 //! each block must be terminated by an unconditional branch (one target), a
9 //! conditional branch (two targets), or a return (no targets). Note that this
10 //! slightly differs from the machine code of most ISAs: in most ISAs, a
11 //! conditional branch has one target (and the not-taken case falls through).
12 //! However, we expect that machine backends will elide branches to the following
13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15 //! play with layout prior to final binary emission, as well, if we want.
16 //!
17 //! See the main module comment in `mod.rs` for more details on the VCode-based
18 //! backend pipeline.
19 
20 use crate::ir::pcc::*;
21 use crate::ir::{self, types, Constant, ConstantData, ValueLabel};
22 use crate::machinst::*;
23 use crate::ranges::Ranges;
24 use crate::timing;
25 use crate::trace;
26 use crate::CodegenError;
27 use crate::{LabelValueLoc, ValueLocRange};
28 use regalloc2::{
29     Edit, Function as RegallocFunction, InstOrEdit, InstRange, MachineEnv, Operand,
30     OperandConstraint, OperandKind, PRegSet, RegClass,
31 };
32 use rustc_hash::FxHashMap;
33 
34 use core::mem::take;
35 use cranelift_entity::{entity_impl, Keys};
36 use std::collections::hash_map::Entry;
37 use std::collections::HashMap;
38 use std::fmt;
39 
40 /// Index referring to an instruction in VCode.
41 pub type InsnIndex = regalloc2::Inst;
42 
43 /// Index referring to a basic block in VCode.
44 pub type BlockIndex = regalloc2::Block;
45 
46 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
47 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
48 pub trait VCodeInst: MachInst + MachInstEmit {}
49 impl<I: MachInst + MachInstEmit> VCodeInst for I {}
50 
51 /// A function in "VCode" (virtualized-register code) form, after
52 /// lowering.  This is essentially a standard CFG of basic blocks,
53 /// where each basic block consists of lowered instructions produced
54 /// by the machine-specific backend.
55 ///
56 /// Note that the VCode is immutable once produced, and is not
57 /// modified by register allocation in particular. Rather, register
58 /// allocation on the `VCode` produces a separate `regalloc2::Output`
59 /// struct, and this can be passed to `emit`. `emit` in turn does not
60 /// modify the vcode, but produces an `EmitResult`, which contains the
61 /// machine code itself, and the associated disassembly and/or
62 /// metadata as requested.
63 pub struct VCode<I: VCodeInst> {
64     /// VReg IR-level types.
65     vreg_types: Vec<Type>,
66 
67     /// Lowered machine instructions in order corresponding to the original IR.
68     insts: Vec<I>,
69 
70     /// Operands: pre-regalloc references to virtual registers with
71     /// constraints, in one flattened array. This allows the regalloc
72     /// to efficiently access all operands without requiring expensive
73     /// matches or method invocations on insts.
74     operands: Vec<Operand>,
75 
76     /// Operand index ranges: for each instruction in `insts`, there
77     /// is a tuple here providing the range in `operands` for that
78     /// instruction's operands.
79     operand_ranges: Ranges,
80 
81     /// Clobbers: a sparse map from instruction indices to clobber masks.
82     clobbers: FxHashMap<InsnIndex, PRegSet>,
83 
84     /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
85     /// reasonable to keep one of these per instruction.)
86     srclocs: Vec<RelSourceLoc>,
87 
88     /// Entry block.
89     entry: BlockIndex,
90 
91     /// Block instruction indices.
92     block_ranges: Ranges,
93 
94     /// Block successors: index range in the `block_succs` list.
95     block_succ_range: Ranges,
96 
97     /// Block successor lists, concatenated into one vec. The
98     /// `block_succ_range` list of tuples above gives (start, end)
99     /// ranges within this list that correspond to each basic block's
100     /// successors.
101     block_succs: Vec<regalloc2::Block>,
102 
103     /// Block predecessors: index range in the `block_preds` list.
104     block_pred_range: Ranges,
105 
106     /// Block predecessor lists, concatenated into one vec. The
107     /// `block_pred_range` list of tuples above gives (start, end)
108     /// ranges within this list that correspond to each basic block's
109     /// predecessors.
110     block_preds: Vec<regalloc2::Block>,
111 
112     /// Block parameters: index range in `block_params` below.
113     block_params_range: Ranges,
114 
115     /// Block parameter lists, concatenated into one vec. The
116     /// `block_params_range` list of tuples above gives (start, end)
117     /// ranges within this list that correspond to each basic block's
118     /// blockparam vregs.
119     block_params: Vec<regalloc2::VReg>,
120 
121     /// Outgoing block arguments on branch instructions, concatenated
122     /// into one list.
123     ///
124     /// Note that this is conceptually a 3D array: we have a VReg list
125     /// per block, per successor. We flatten those three dimensions
126     /// into this 1D vec, then store index ranges in two levels of
127     /// indirection.
128     ///
129     /// Indexed by the indices in `branch_block_arg_succ_range`.
130     branch_block_args: Vec<regalloc2::VReg>,
131 
132     /// Array of sequences of (start, end) tuples in
133     /// `branch_block_args`, one for each successor; these sequences
134     /// for each block are concatenated.
135     ///
136     /// Indexed by the indices in `branch_block_arg_succ_range`.
137     branch_block_arg_range: Ranges,
138 
139     /// For a given block, indices in `branch_block_arg_range`
140     /// corresponding to all of its successors.
141     branch_block_arg_succ_range: Ranges,
142 
143     /// Block-order information.
144     block_order: BlockLoweringOrder,
145 
146     /// ABI object.
147     pub(crate) abi: Callee<I::ABIMachineSpec>,
148 
149     /// Constant information used during code emission. This should be
150     /// immutable across function compilations within the same module.
151     emit_info: I::Info,
152 
153     /// Reference-typed `regalloc2::VReg`s. The regalloc requires
154     /// these in a dense slice (as opposed to querying the
155     /// reftype-status of each vreg) for efficient iteration.
156     reftyped_vregs: Vec<VReg>,
157 
158     /// Constants.
159     pub(crate) constants: VCodeConstants,
160 
161     /// Value labels for debuginfo attached to vregs.
162     debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
163 
164     pub(crate) sigs: SigSet,
165 
166     /// Facts on VRegs, for proof-carrying code verification.
167     facts: Vec<Option<Fact>>,
168 }
169 
170 /// The result of `VCode::emit`. Contains all information computed
171 /// during emission: actual machine code, optionally a disassembly,
172 /// and optionally metadata about the code layout.
173 pub struct EmitResult {
174     /// The MachBuffer containing the machine code.
175     pub buffer: MachBufferFinalized<Stencil>,
176 
177     /// Offset of each basic block, recorded during emission. Computed
178     /// only if `debug_value_labels` is non-empty.
179     pub bb_offsets: Vec<CodeOffset>,
180 
181     /// Final basic-block edges, in terms of code offsets of
182     /// bb-starts. Computed only if `debug_value_labels` is non-empty.
183     pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
184 
185     /// Final length of function body.
186     pub func_body_len: CodeOffset,
187 
188     /// The pretty-printed disassembly, if any. This uses the same
189     /// pretty-printing for MachInsts as the pre-regalloc VCode Debug
190     /// implementation, but additionally includes the prologue and
191     /// epilogue(s), and makes use of the regalloc results.
192     pub disasm: Option<String>,
193 
194     /// Offsets of sized stackslots.
195     pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>,
196 
197     /// Offsets of dynamic stackslots.
198     pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>,
199 
200     /// Value-labels information (debug metadata).
201     pub value_labels_ranges: ValueLabelsRanges,
202 
203     /// Stack frame size.
204     pub frame_size: u32,
205 }
206 
207 /// A builder for a VCode function body.
208 ///
209 /// This builder has the ability to accept instructions in either
210 /// forward or reverse order, depending on the pass direction that
211 /// produces the VCode. The lowering from CLIF to VCode<MachInst>
212 /// ordinarily occurs in reverse order (in order to allow instructions
213 /// to be lowered only if used, and not merged) so a reversal will
214 /// occur at the end of lowering to ensure the VCode is in machine
215 /// order.
216 ///
217 /// If built in reverse, block and instruction indices used once the
218 /// VCode is built are relative to the final (reversed) order, not the
219 /// order of construction. Note that this means we do not know the
220 /// final block or instruction indices when building, so we do not
221 /// hand them out. (The user is assumed to know them when appending
222 /// terminator instructions with successor blocks.)
223 pub struct VCodeBuilder<I: VCodeInst> {
224     /// In-progress VCode.
225     pub(crate) vcode: VCode<I>,
226 
227     /// In what direction is the build occurring?
228     direction: VCodeBuildDirection,
229 
230     /// Debug-value label in-progress map, keyed by label. For each
231     /// label, we keep disjoint ranges mapping to vregs. We'll flatten
232     /// this into (vreg, range, label) tuples when done.
233     debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
234 }
235 
236 /// Direction in which a VCodeBuilder builds VCode.
237 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
238 pub enum VCodeBuildDirection {
239     // TODO: add `Forward` once we need it and can test it adequately.
240     /// Backward-build pass: we expect the producer to call `emit()`
241     /// with instructions in reverse program order within each block.
242     Backward,
243 }
244 
245 impl<I: VCodeInst> VCodeBuilder<I> {
246     /// Create a new VCodeBuilder.
247     pub fn new(
248         sigs: SigSet,
249         abi: Callee<I::ABIMachineSpec>,
250         emit_info: I::Info,
251         block_order: BlockLoweringOrder,
252         constants: VCodeConstants,
253         direction: VCodeBuildDirection,
254     ) -> VCodeBuilder<I> {
255         let vcode = VCode::new(sigs, abi, emit_info, block_order, constants);
256 
257         VCodeBuilder {
258             vcode,
259             direction,
260             debug_info: FxHashMap::default(),
261         }
262     }
263 
264     pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> {
265         self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs)
266     }
267 
268     /// Access the ABI object.
269     pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {
270         &self.vcode.abi
271     }
272 
273     /// Access the ABI object.
274     pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {
275         &mut self.vcode.abi
276     }
277 
278     pub fn sigs(&self) -> &SigSet {
279         &self.vcode.sigs
280     }
281 
282     pub fn sigs_mut(&mut self) -> &mut SigSet {
283         &mut self.vcode.sigs
284     }
285 
286     /// Access to the BlockLoweringOrder object.
287     pub fn block_order(&self) -> &BlockLoweringOrder {
288         &self.vcode.block_order
289     }
290 
291     /// Set the current block as the entry block.
292     pub fn set_entry(&mut self, block: BlockIndex) {
293         self.vcode.entry = block;
294     }
295 
296     /// End the current basic block. Must be called after emitting vcode insts
297     /// for IR insts and prior to ending the function (building the VCode).
298     pub fn end_bb(&mut self) {
299         let end_idx = self.vcode.insts.len();
300         // Add the instruction index range to the list of blocks.
301         self.vcode.block_ranges.push_end(end_idx);
302         // End the successors list.
303         let succ_end = self.vcode.block_succs.len();
304         self.vcode.block_succ_range.push_end(succ_end);
305         // End the blockparams list.
306         let block_params_end = self.vcode.block_params.len();
307         self.vcode.block_params_range.push_end(block_params_end);
308         // End the branch blockparam args list.
309         let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
310         self.vcode
311             .branch_block_arg_succ_range
312             .push_end(branch_block_arg_succ_end);
313     }
314 
315     pub fn add_block_param(&mut self, param: VirtualReg) {
316         self.vcode.block_params.push(param.into());
317     }
318 
319     fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
320         self.vcode
321             .branch_block_args
322             .extend(args.iter().map(|&arg| VReg::from(arg)));
323         let end = self.vcode.branch_block_args.len();
324         self.vcode.branch_block_arg_range.push_end(end);
325     }
326 
327     /// Push an instruction for the current BB and current IR inst
328     /// within the BB.
329     pub fn push(&mut self, insn: I, loc: RelSourceLoc) {
330         self.vcode.insts.push(insn);
331         self.vcode.srclocs.push(loc);
332     }
333 
334     /// Add a successor block with branch args.
335     pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
336         self.vcode.block_succs.push(block);
337         self.add_branch_args_for_succ(args);
338     }
339 
340     /// Add a debug value label to a register.
341     pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
342         // We'll fix up labels in reverse(). Because we're generating
343         // code bottom-to-top, the liverange of the label goes *from*
344         // the last index at which was defined (or 0, which is the end
345         // of the eventual function) *to* just this instruction, and
346         // no further.
347         let inst = InsnIndex::new(self.vcode.insts.len());
348         let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
349         let last = labels
350             .last()
351             .map(|(_start, end, _vreg)| *end)
352             .unwrap_or(InsnIndex::new(0));
353         labels.push((last, inst, reg.into()));
354     }
355 
356     /// Access the constants.
357     pub fn constants(&mut self) -> &mut VCodeConstants {
358         &mut self.vcode.constants
359     }
360 
361     fn compute_preds_from_succs(&mut self) {
362         // Do a linear-time counting sort: first determine how many
363         // times each block appears as a successor.
364         let mut starts = vec![0u32; self.vcode.num_blocks()];
365         for succ in &self.vcode.block_succs {
366             starts[succ.index()] += 1;
367         }
368 
369         // Determine for each block the starting index where that
370         // block's predecessors should go. This is equivalent to the
371         // ranges we need to store in block_pred_range.
372         self.vcode.block_pred_range.reserve(starts.len());
373         let mut end = 0;
374         for count in starts.iter_mut() {
375             let start = end;
376             end += *count;
377             *count = start;
378             self.vcode.block_pred_range.push_end(end as usize);
379         }
380         let end = end as usize;
381         debug_assert_eq!(end, self.vcode.block_succs.len());
382 
383         // Walk over the successors again, this time grouped by
384         // predecessor, and push the predecessor at the current
385         // starting position of each of its successors. We build
386         // each group of predecessors in whatever order Ranges::iter
387         // returns them; regalloc2 doesn't care.
388         self.vcode.block_preds.resize(end, BlockIndex::invalid());
389         for (pred, range) in self.vcode.block_succ_range.iter() {
390             let pred = BlockIndex::new(pred);
391             for succ in &self.vcode.block_succs[range] {
392                 let pos = &mut starts[succ.index()];
393                 self.vcode.block_preds[*pos as usize] = pred;
394                 *pos += 1;
395             }
396         }
397         debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid()));
398     }
399 
400     /// Called once, when a build in Backward order is complete, to
401     /// perform the overall reversal (into final forward order) and
402     /// finalize metadata accordingly.
403     fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) {
404         let n_insts = self.vcode.insts.len();
405         if n_insts == 0 {
406             return;
407         }
408 
409         // Reverse the per-block and per-inst sequences.
410         self.vcode.block_ranges.reverse_index();
411         self.vcode.block_ranges.reverse_target(n_insts);
412         // block_params_range is indexed by block (and blocks were
413         // traversed in reverse) so we reverse it; but block-param
414         // sequences in the concatenated vec can remain in reverse
415         // order (it is effectively an arena of arbitrarily-placed
416         // referenced sequences).
417         self.vcode.block_params_range.reverse_index();
418         // Likewise, we reverse block_succ_range, but the block_succ
419         // concatenated array can remain as-is.
420         self.vcode.block_succ_range.reverse_index();
421         self.vcode.insts.reverse();
422         self.vcode.srclocs.reverse();
423         // Likewise, branch_block_arg_succ_range is indexed by block
424         // so must be reversed.
425         self.vcode.branch_block_arg_succ_range.reverse_index();
426 
427         // To translate an instruction index *endpoint* in reversed
428         // order to forward order, compute `n_insts - i`.
429         //
430         // Why not `n_insts - 1 - i`? That would be correct to
431         // translate an individual instruction index (for ten insts 0
432         // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
433         // 0). But for the usual inclusive-start, exclusive-end range
434         // idiom, inclusive starts become exclusive ends and
435         // vice-versa, so e.g. an (inclusive) start of 0 becomes an
436         // (exclusive) end of 10.
437         let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
438 
439         // Generate debug-value labels based on per-label maps.
440         for (label, tuples) in &self.debug_info {
441             for &(start, end, vreg) in tuples {
442                 let vreg = vregs.resolve_vreg_alias(vreg);
443                 let fwd_start = translate(end);
444                 let fwd_end = translate(start);
445                 self.vcode
446                     .debug_value_labels
447                     .push((vreg, fwd_start, fwd_end, label.as_u32()));
448             }
449         }
450 
451         // Now sort debug value labels by VReg, as required
452         // by regalloc2.
453         self.vcode
454             .debug_value_labels
455             .sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
456     }
457 
458     fn collect_operands(&mut self, vregs: &VRegAllocator<I>) {
459         let allocatable = PRegSet::from(self.vcode.machine_env());
460         for (i, insn) in self.vcode.insts.iter_mut().enumerate() {
461             // Push operands from the instruction onto the operand list.
462             //
463             // We rename through the vreg alias table as we collect
464             // the operands. This is better than a separate post-pass
465             // over operands, because it has more cache locality:
466             // operands only need to pass through L1 once. This is
467             // also better than renaming instructions'
468             // operands/registers while lowering, because here we only
469             // need to do the `match` over the instruction to visit
470             // its register fields (which is slow, branchy code) once.
471 
472             let mut op_collector =
473                 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {
474                     vregs.resolve_vreg_alias(vreg)
475                 });
476             insn.get_operands(&mut op_collector);
477             let (ops, clobbers) = op_collector.finish();
478             self.vcode.operand_ranges.push_end(ops);
479 
480             if clobbers != PRegSet::default() {
481                 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
482             }
483 
484             if let Some((dst, src)) = insn.is_move() {
485                 // We should never see non-virtual registers present in move
486                 // instructions.
487                 assert!(
488                     src.is_virtual(),
489                     "the real register {:?} was used as the source of a move instruction",
490                     src
491                 );
492                 assert!(
493                     dst.to_reg().is_virtual(),
494                     "the real register {:?} was used as the destination of a move instruction",
495                     dst.to_reg()
496                 );
497             }
498         }
499 
500         // Translate blockparam args via the vreg aliases table as well.
501         for arg in &mut self.vcode.branch_block_args {
502             let new_arg = vregs.resolve_vreg_alias(*arg);
503             trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
504             *arg = new_arg;
505         }
506     }
507 
508     /// Build the final VCode.
509     pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> {
510         self.vcode.vreg_types = take(&mut vregs.vreg_types);
511         self.vcode.facts = take(&mut vregs.facts);
512         self.vcode.reftyped_vregs = take(&mut vregs.reftyped_vregs);
513 
514         if self.direction == VCodeBuildDirection::Backward {
515             self.reverse_and_finalize(&vregs);
516         }
517         self.collect_operands(&vregs);
518 
519         // Apply register aliases to the `reftyped_vregs` list since this list
520         // will be returned directly to `regalloc2` eventually and all
521         // operands/results of instructions will use the alias-resolved vregs
522         // from `regalloc2`'s perspective.
523         //
524         // Also note that `reftyped_vregs` can't have duplicates, so after the
525         // aliases are applied duplicates are removed.
526         for reg in self.vcode.reftyped_vregs.iter_mut() {
527             *reg = vregs.resolve_vreg_alias(*reg);
528         }
529         self.vcode.reftyped_vregs.sort();
530         self.vcode.reftyped_vregs.dedup();
531 
532         self.compute_preds_from_succs();
533         self.vcode.debug_value_labels.sort_unstable();
534 
535         // At this point, nothing in the vcode should mention any
536         // VReg which has been aliased. All the appropriate rewriting
537         // should have happened above. Just to be sure, let's
538         // double-check each field which has vregs.
539         // Note: can't easily check vcode.insts, resolved in collect_operands.
540         // Operands are resolved in collect_operands.
541         vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg()));
542         // Currently block params are never aliased to another vreg.
543         vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied());
544         // Branch block args are resolved in collect_operands.
545         vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied());
546         // Reftyped vregs are resolved above in this function.
547         vregs.debug_assert_no_vreg_aliases(self.vcode.reftyped_vregs.iter().copied());
548         // Debug value labels are resolved in reverse_and_finalize.
549         vregs.debug_assert_no_vreg_aliases(
550             self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg),
551         );
552         // Facts are resolved eagerly during set_vreg_alias.
553         vregs.debug_assert_no_vreg_aliases(
554             self.vcode
555                 .facts
556                 .iter()
557                 .zip(&vregs.vreg_types)
558                 .enumerate()
559                 .filter(|(_, (fact, _))| fact.is_some())
560                 .map(|(vreg, (_, &ty))| {
561                     let (regclasses, _) = I::rc_for_type(ty).unwrap();
562                     VReg::new(vreg, regclasses[0])
563                 }),
564         );
565 
566         self.vcode
567     }
568 }
569 
570 /// Is this type a reference type?
571 fn is_reftype(ty: Type) -> bool {
572     ty == types::R64 || ty == types::R32
573 }
574 
575 const NO_INST_OFFSET: CodeOffset = u32::MAX;
576 
577 impl<I: VCodeInst> VCode<I> {
578     /// New empty VCode.
579     fn new(
580         sigs: SigSet,
581         abi: Callee<I::ABIMachineSpec>,
582         emit_info: I::Info,
583         block_order: BlockLoweringOrder,
584         constants: VCodeConstants,
585     ) -> VCode<I> {
586         let n_blocks = block_order.lowered_order().len();
587         VCode {
588             sigs,
589             vreg_types: vec![],
590             insts: Vec::with_capacity(10 * n_blocks),
591             operands: Vec::with_capacity(30 * n_blocks),
592             operand_ranges: Ranges::with_capacity(10 * n_blocks),
593             clobbers: FxHashMap::default(),
594             srclocs: Vec::with_capacity(10 * n_blocks),
595             entry: BlockIndex::new(0),
596             block_ranges: Ranges::with_capacity(n_blocks),
597             block_succ_range: Ranges::with_capacity(n_blocks),
598             block_succs: Vec::with_capacity(n_blocks),
599             block_pred_range: Ranges::default(),
600             block_preds: Vec::new(),
601             block_params_range: Ranges::with_capacity(n_blocks),
602             block_params: Vec::with_capacity(5 * n_blocks),
603             branch_block_args: Vec::with_capacity(10 * n_blocks),
604             branch_block_arg_range: Ranges::with_capacity(2 * n_blocks),
605             branch_block_arg_succ_range: Ranges::with_capacity(n_blocks),
606             block_order,
607             abi,
608             emit_info,
609             reftyped_vregs: vec![],
610             constants,
611             debug_value_labels: vec![],
612             facts: vec![],
613         }
614     }
615 
616     /// Get the ABI-dependent MachineEnv for managing register allocation.
617     pub fn machine_env(&self) -> &MachineEnv {
618         self.abi.machine_env(&self.sigs)
619     }
620 
621     /// Get the number of blocks. Block indices will be in the range `0 ..
622     /// (self.num_blocks() - 1)`.
623     pub fn num_blocks(&self) -> usize {
624         self.block_ranges.len()
625     }
626 
627     /// The number of lowered instructions.
628     pub fn num_insts(&self) -> usize {
629         self.insts.len()
630     }
631 
632     fn compute_clobbers(&self, regalloc: &regalloc2::Output) -> Vec<Writable<RealReg>> {
633         let mut clobbered = PRegSet::default();
634 
635         // All moves are included in clobbers.
636         for (_, Edit::Move { to, .. }) in &regalloc.edits {
637             if let Some(preg) = to.as_reg() {
638                 clobbered.add(preg);
639             }
640         }
641 
642         for (i, range) in self.operand_ranges.iter() {
643             // Skip this instruction if not "included in clobbers" as
644             // per the MachInst. (Some backends use this to implement
645             // ABI specifics; e.g., excluding calls of the same ABI as
646             // the current function from clobbers, because by
647             // definition everything clobbered by the call can be
648             // clobbered by this function without saving as well.)
649             if !self.insts[i].is_included_in_clobbers() {
650                 continue;
651             }
652 
653             let operands = &self.operands[range.clone()];
654             let allocs = &regalloc.allocs[range];
655             for (operand, alloc) in operands.iter().zip(allocs.iter()) {
656                 if operand.kind() == OperandKind::Def {
657                     if let Some(preg) = alloc.as_reg() {
658                         clobbered.add(preg);
659                     }
660                 }
661             }
662 
663             // Also add explicitly-clobbered registers.
664             if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) {
665                 clobbered.union_from(inst_clobbered);
666             }
667         }
668 
669         clobbered
670             .into_iter()
671             .map(|preg| Writable::from_reg(RealReg::from(preg)))
672             .collect()
673     }
674 
675     /// Emit the instructions to a `MachBuffer`, containing fixed-up
676     /// code and external reloc/trap/etc. records ready for use. Takes
677     /// the regalloc results as well.
678     ///
679     /// Returns the machine code itself, and optionally metadata
680     /// and/or a disassembly, as an `EmitResult`. The `VCode` itself
681     /// is consumed by the emission process.
682     pub fn emit(
683         mut self,
684         regalloc: &regalloc2::Output,
685         want_disasm: bool,
686         flags: &settings::Flags,
687         ctrl_plane: &mut ControlPlane,
688     ) -> EmitResult
689     where
690         I: VCodeInst,
691     {
692         // To write into disasm string.
693         use core::fmt::Write;
694 
695         let _tt = timing::vcode_emit();
696         let mut buffer = MachBuffer::new();
697         let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
698 
699         // The first M MachLabels are reserved for block indices.
700         buffer.reserve_labels_for_blocks(self.num_blocks());
701 
702         // Register all allocated constants with the `MachBuffer` to ensure that
703         // any references to the constants during instructions can be handled
704         // correctly.
705         buffer.register_constants(&self.constants);
706 
707         // Construct the final order we emit code in: cold blocks at the end.
708         let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
709         let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
710         for block in 0..self.num_blocks() {
711             let block = BlockIndex::new(block);
712             if self.block_order.is_cold(block) {
713                 cold_blocks.push(block);
714             } else {
715                 final_order.push(block);
716             }
717         }
718         final_order.extend(cold_blocks.clone());
719 
720         // Compute/save info we need for the prologue: clobbers and
721         // number of spillslots.
722         //
723         // We clone `abi` here because we will mutate it as we
724         // generate the prologue and set other info, but we can't
725         // mutate `VCode`. The info it usually carries prior to
726         // setting clobbers is fairly minimal so this should be
727         // relatively cheap.
728         let clobbers = self.compute_clobbers(regalloc);
729         self.abi
730             .compute_frame_layout(&self.sigs, regalloc.num_spillslots, clobbers);
731 
732         // Emit blocks.
733         let mut cur_srcloc = None;
734         let mut last_offset = None;
735         let mut inst_offsets = vec![];
736         let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane));
737 
738         let mut disasm = String::new();
739 
740         if !self.debug_value_labels.is_empty() {
741             inst_offsets.resize(self.insts.len(), NO_INST_OFFSET);
742         }
743 
744         // Count edits per block ahead of time; this is needed for
745         // lookahead island emission. (We could derive it per-block
746         // with binary search in the edit list, but it's more
747         // efficient to do it in one pass here.)
748         let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];
749         let mut edit_idx = 0;
750         for block in 0..self.num_blocks() {
751             let end_inst = InsnIndex::new(self.block_ranges.get(block).end);
752             let start_edit_idx = edit_idx;
753             while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {
754                 edit_idx += 1;
755             }
756             let end_edit_idx = edit_idx;
757             ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);
758         }
759 
760         let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();
761         let mut bb_padding = match flags.bb_padding_log2_minus_one() {
762             0 => Vec::new(),
763             n => vec![0; 1 << (n - 1)],
764         };
765         let mut total_bb_padding = 0;
766 
767         for (block_order_idx, &block) in final_order.iter().enumerate() {
768             trace!("emitting block {:?}", block);
769 
770             // Call the new block hook for state
771             state.on_new_block();
772 
773             // Emit NOPs to align the block.
774             let new_offset = I::align_basic_block(buffer.cur_offset());
775             while new_offset > buffer.cur_offset() {
776                 // Pad with NOPs up to the aligned block offset.
777                 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
778                 nop.emit(&mut buffer, &self.emit_info, &mut Default::default());
779             }
780             assert_eq!(buffer.cur_offset(), new_offset);
781 
782             let do_emit = |inst: &I,
783                            disasm: &mut String,
784                            buffer: &mut MachBuffer<I>,
785                            state: &mut I::State| {
786                 if want_disasm && !inst.is_args() {
787                     let mut s = state.clone();
788                     writeln!(disasm, "  {}", inst.pretty_print_inst(&mut s)).unwrap();
789                 }
790                 inst.emit(buffer, &self.emit_info, state);
791             };
792 
793             // Is this the first block? Emit the prologue directly if so.
794             if block == self.entry {
795                 trace!(" -> entry block");
796                 buffer.start_srcloc(Default::default());
797                 for inst in &self.abi.gen_prologue() {
798                     do_emit(&inst, &mut disasm, &mut buffer, &mut state);
799                 }
800                 buffer.end_srcloc();
801             }
802 
803             // Now emit the regular block body.
804 
805             buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut());
806 
807             if want_disasm {
808                 writeln!(&mut disasm, "block{}:", block.index()).unwrap();
809             }
810 
811             if flags.machine_code_cfg_info() {
812                 // Track BB starts. If we have backed up due to MachBuffer
813                 // branch opts, note that the removed blocks were removed.
814                 let cur_offset = buffer.cur_offset();
815                 if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
816                     for i in (0..bb_starts.len()).rev() {
817                         if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
818                             break;
819                         }
820                         bb_starts[i] = None;
821                     }
822                 }
823                 bb_starts.push(Some(cur_offset));
824                 last_offset = Some(cur_offset);
825             }
826 
827             if let Some(block_start) = I::gen_block_start(
828                 self.block_order.is_indirect_branch_target(block),
829                 is_forward_edge_cfi_enabled,
830             ) {
831                 do_emit(&block_start, &mut disasm, &mut buffer, &mut state);
832             }
833 
834             for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
835                 match inst_or_edit {
836                     InstOrEdit::Inst(iix) => {
837                         if !self.debug_value_labels.is_empty() {
838                             // If we need to produce debug info,
839                             // record the offset of each instruction
840                             // so that we can translate value-label
841                             // ranges to machine-code offsets.
842 
843                             // Cold blocks violate monotonicity
844                             // assumptions elsewhere (that
845                             // instructions in inst-index order are in
846                             // order in machine code), so we omit
847                             // their offsets here. Value-label range
848                             // generation below will skip empty ranges
849                             // and ranges with to-offsets of zero.
850                             if !self.block_order.is_cold(block) {
851                                 inst_offsets[iix.index()] = buffer.cur_offset();
852                             }
853                         }
854 
855                         // Update the srcloc at this point in the buffer.
856                         let srcloc = self.srclocs[iix.index()];
857                         if cur_srcloc != Some(srcloc) {
858                             if cur_srcloc.is_some() {
859                                 buffer.end_srcloc();
860                             }
861                             buffer.start_srcloc(srcloc);
862                             cur_srcloc = Some(srcloc);
863                         }
864 
865                         // If this is a safepoint, compute a stack map
866                         // and pass it to the emit state.
867                         if self.insts[iix.index()].is_safepoint() {
868                             let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![];
869                             // Find the contiguous range of
870                             // (progpoint, allocation) safepoint slot
871                             // records in `regalloc.safepoint_slots`
872                             // for this instruction index.
873                             let safepoint_slots_start = regalloc
874                                 .safepoint_slots
875                                 .binary_search_by(|(progpoint, _alloc)| {
876                                     if progpoint.inst() >= iix {
877                                         std::cmp::Ordering::Greater
878                                     } else {
879                                         std::cmp::Ordering::Less
880                                     }
881                                 })
882                                 .unwrap_err();
883 
884                             for (_, alloc) in regalloc.safepoint_slots[safepoint_slots_start..]
885                                 .iter()
886                                 .take_while(|(progpoint, _)| progpoint.inst() == iix)
887                             {
888                                 let slot = alloc.as_stack().unwrap();
889                                 safepoint_slots.push(slot);
890                             }
891                             if !safepoint_slots.is_empty() {
892                                 let stack_map = self
893                                     .abi
894                                     .spillslots_to_stack_map(&safepoint_slots[..], &state);
895                                 state.pre_safepoint(stack_map);
896                             }
897                         }
898 
899                         // If the instruction we are about to emit is
900                         // a return, place an epilogue at this point
901                         // (and don't emit the return; the actual
902                         // epilogue will contain it).
903                         if self.insts[iix.index()].is_term() == MachTerminator::Ret {
904                             for inst in self.abi.gen_epilogue() {
905                                 do_emit(&inst, &mut disasm, &mut buffer, &mut state);
906                             }
907                         } else {
908                             // Update the operands for this inst using the
909                             // allocations from the regalloc result.
910                             let mut allocs = regalloc.inst_allocs(iix).iter();
911                             self.insts[iix.index()].get_operands(
912                                 &mut |reg: &mut Reg, constraint, _kind, _pos| {
913                                     let alloc = allocs
914                                         .next()
915                                         .expect("enough allocations for all operands")
916                                         .as_reg()
917                                         .expect("only register allocations, not stack allocations")
918                                         .into();
919 
920                                     if let OperandConstraint::FixedReg(rreg) = constraint {
921                                         debug_assert_eq!(Reg::from(rreg), alloc);
922                                     }
923                                     *reg = alloc;
924                                 },
925                             );
926                             debug_assert!(allocs.next().is_none());
927 
928                             // Emit the instruction!
929                             do_emit(
930                                 &self.insts[iix.index()],
931                                 &mut disasm,
932                                 &mut buffer,
933                                 &mut state,
934                             );
935                         }
936                     }
937 
938                     InstOrEdit::Edit(Edit::Move { from, to }) => {
939                         // Create a move/spill/reload instruction and
940                         // immediately emit it.
941                         match (from.as_reg(), to.as_reg()) {
942                             (Some(from), Some(to)) => {
943                                 // Reg-to-reg move.
944                                 let from_rreg = Reg::from(from);
945                                 let to_rreg = Writable::from_reg(Reg::from(to));
946                                 debug_assert_eq!(from.class(), to.class());
947                                 let ty = I::canonical_type_for_rc(from.class());
948                                 let mv = I::gen_move(to_rreg, from_rreg, ty);
949                                 do_emit(&mv, &mut disasm, &mut buffer, &mut state);
950                             }
951                             (Some(from), None) => {
952                                 // Spill from register to spillslot.
953                                 let to = to.as_stack().unwrap();
954                                 let from_rreg = RealReg::from(from);
955                                 let spill = self.abi.gen_spill(to, from_rreg);
956                                 do_emit(&spill, &mut disasm, &mut buffer, &mut state);
957                             }
958                             (None, Some(to)) => {
959                                 // Load from spillslot to register.
960                                 let from = from.as_stack().unwrap();
961                                 let to_rreg = Writable::from_reg(RealReg::from(to));
962                                 let reload = self.abi.gen_reload(to_rreg, from);
963                                 do_emit(&reload, &mut disasm, &mut buffer, &mut state);
964                             }
965                             (None, None) => {
966                                 panic!("regalloc2 should have eliminated stack-to-stack moves!");
967                             }
968                         }
969                     }
970                 }
971             }
972 
973             if cur_srcloc.is_some() {
974                 buffer.end_srcloc();
975                 cur_srcloc = None;
976             }
977 
978             // Do we need an island? Get the worst-case size of the next BB, add
979             // it to the optional padding behind the block, and pass this to the
980             // `MachBuffer` to determine if an island is necessary.
981             let worst_case_next_bb = if block_order_idx < final_order.len() - 1 {
982                 let next_block = final_order[block_order_idx + 1];
983                 let next_block_range = self.block_ranges.get(next_block.index());
984                 let next_block_size = next_block_range.len() as u32;
985                 let next_block_ra_insertions = ra_edits_per_block[next_block.index()];
986                 I::worst_case_size() * (next_block_size + next_block_ra_insertions)
987             } else {
988                 0
989             };
990             let padding = if bb_padding.is_empty() {
991                 0
992             } else {
993                 bb_padding.len() as u32 + I::LabelUse::ALIGN - 1
994             };
995             if buffer.island_needed(padding + worst_case_next_bb) {
996                 buffer.emit_island(padding + worst_case_next_bb, ctrl_plane);
997             }
998 
999             // Insert padding, if configured, to stress the `MachBuffer`'s
1000             // relocation and island calculations.
1001             //
1002             // Padding can get quite large during fuzzing though so place a
1003             // total cap on it where when a per-function threshold is exceeded
1004             // the padding is turned back down to zero. This avoids a small-ish
1005             // test case generating a GB+ memory footprint in Cranelift for
1006             // example.
1007             if !bb_padding.is_empty() {
1008                 buffer.put_data(&bb_padding);
1009                 buffer.align_to(I::LabelUse::ALIGN);
1010                 total_bb_padding += bb_padding.len();
1011                 if total_bb_padding > (150 << 20) {
1012                     bb_padding = Vec::new();
1013                 }
1014             }
1015         }
1016 
1017         // Do any optimizations on branches at tail of buffer, as if we had
1018         // bound one last label.
1019         buffer.optimize_branches(ctrl_plane);
1020 
1021         // emission state is not needed anymore, move control plane back out
1022         *ctrl_plane = state.take_ctrl_plane();
1023 
1024         let func_body_len = buffer.cur_offset();
1025 
1026         // Create `bb_edges` and final (filtered) `bb_starts`.
1027         let mut bb_edges = vec![];
1028         let mut bb_offsets = vec![];
1029         if flags.machine_code_cfg_info() {
1030             for block in 0..self.num_blocks() {
1031                 if bb_starts[block].is_none() {
1032                     // Block was deleted by MachBuffer; skip.
1033                     continue;
1034                 }
1035                 let from = bb_starts[block].unwrap();
1036 
1037                 bb_offsets.push(from);
1038                 // Resolve each `succ` label and add edges.
1039                 let succs = self.block_succs(BlockIndex::new(block));
1040                 for &succ in succs.iter() {
1041                     let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1042                     bb_edges.push((from, to));
1043                 }
1044             }
1045         }
1046 
1047         self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len);
1048         let value_labels_ranges =
1049             self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1050         let frame_size = self.abi.frame_size();
1051 
1052         EmitResult {
1053             buffer: buffer.finish(&self.constants, ctrl_plane),
1054             bb_offsets,
1055             bb_edges,
1056             func_body_len,
1057             disasm: if want_disasm { Some(disasm) } else { None },
1058             sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(),
1059             dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(),
1060             value_labels_ranges,
1061             frame_size,
1062         }
1063     }
1064 
1065     fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) {
1066         if self.debug_value_labels.is_empty() {
1067             return;
1068         }
1069 
1070         // During emission, branch removal can make offsets of instructions incorrect.
1071         // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj]
1072         // It will be recorded as (say):    [30]  [34]  [38]  [42]  [<would be 46>]
1073         // When the jumps get removed we are left with (in "inst_offsets"):
1074         // [insi][jmp0][jmp1][jmp2][insj][...]
1075         // [30]  [34]  [38]  [42]  [34]
1076         // Which violates the monotonicity invariant. This method sets offsets of these
1077         // removed instructions such as to make them appear zero-sized:
1078         // [insi][jmp0][jmp1][jmp2][insj][...]
1079         // [30]  [34]  [34]  [34]  [34]
1080         //
1081         let mut next_offset = func_body_len;
1082         for inst_index in (0..(inst_offsets.len() - 1)).rev() {
1083             let inst_offset = inst_offsets[inst_index];
1084 
1085             // Not all instructions get their offsets recorded.
1086             if inst_offset == NO_INST_OFFSET {
1087                 continue;
1088             }
1089 
1090             if inst_offset > next_offset {
1091                 trace!(
1092                     "Fixing code offset of the removed Inst {}: {} -> {}",
1093                     inst_index,
1094                     inst_offset,
1095                     next_offset
1096                 );
1097                 inst_offsets[inst_index] = next_offset;
1098                 continue;
1099             }
1100 
1101             next_offset = inst_offset;
1102         }
1103     }
1104 
1105     fn compute_value_labels_ranges(
1106         &self,
1107         regalloc: &regalloc2::Output,
1108         inst_offsets: &[CodeOffset],
1109         func_body_len: u32,
1110     ) -> ValueLabelsRanges {
1111         if self.debug_value_labels.is_empty() {
1112             return ValueLabelsRanges::default();
1113         }
1114 
1115         let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1116         for &(label, from, to, alloc) in &regalloc.debug_locations {
1117             let ranges = value_labels_ranges
1118                 .entry(ValueLabel::from_u32(label))
1119                 .or_insert_with(|| vec![]);
1120             let from_offset = inst_offsets[from.inst().index()];
1121             let to_offset = if to.inst().index() == inst_offsets.len() {
1122                 func_body_len
1123             } else {
1124                 inst_offsets[to.inst().index()]
1125             };
1126 
1127             // Empty ranges or unavailable offsets can happen
1128             // due to cold blocks and branch removal (see above).
1129             if from_offset == NO_INST_OFFSET
1130                 || to_offset == NO_INST_OFFSET
1131                 || from_offset == to_offset
1132             {
1133                 continue;
1134             }
1135 
1136             let loc = if let Some(preg) = alloc.as_reg() {
1137                 LabelValueLoc::Reg(Reg::from(preg))
1138             } else {
1139                 let slot = alloc.as_stack().unwrap();
1140                 let slot_offset = self.abi.get_spillslot_offset(slot);
1141                 let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset();
1142                 let caller_sp_to_cfa_offset =
1143                     crate::isa::unwind::systemv::caller_sp_to_cfa_offset();
1144                 // NOTE: this is a negative offset because it's relative to the caller's SP
1145                 let cfa_to_sp_offset =
1146                     -((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64);
1147                 LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset)
1148             };
1149 
1150             // ValueLocRanges are recorded by *instruction-end
1151             // offset*. `from_offset` is the *start* of the
1152             // instruction; that is the same as the end of another
1153             // instruction, so we only want to begin coverage once
1154             // we are past the previous instruction's end.
1155             let start = from_offset + 1;
1156 
1157             // Likewise, `end` is exclusive, but we want to
1158             // *include* the end of the last
1159             // instruction. `to_offset` is the start of the
1160             // `to`-instruction, which is the exclusive end, i.e.,
1161             // the first instruction not covered. That
1162             // instruction's start is the same as the end of the
1163             // last instruction that is included, so we go one
1164             // byte further to be sure to include it.
1165             let end = to_offset + 1;
1166 
1167             // Coalesce adjacent ranges that for the same location
1168             // to minimize output size here and for the consumers.
1169             if let Some(last_loc_range) = ranges.last_mut() {
1170                 if last_loc_range.loc == loc && last_loc_range.end == start {
1171                     trace!(
1172                         "Extending debug range for VL{} in {:?} to {}",
1173                         label,
1174                         loc,
1175                         end
1176                     );
1177                     last_loc_range.end = end;
1178                     continue;
1179                 }
1180             }
1181 
1182             trace!(
1183                 "Recording debug range for VL{} in {:?}: [Inst {}..Inst {}) [{}..{})",
1184                 label,
1185                 loc,
1186                 from.inst().index(),
1187                 to.inst().index(),
1188                 start,
1189                 end
1190             );
1191 
1192             ranges.push(ValueLocRange { loc, start, end });
1193         }
1194 
1195         value_labels_ranges
1196     }
1197 
1198     /// Get the IR block for a BlockIndex, if one exists.
1199     pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1200         self.block_order.lowered_order()[block.index()].orig_block()
1201     }
1202 
1203     /// Get the type of a VReg.
1204     pub fn vreg_type(&self, vreg: VReg) -> Type {
1205         self.vreg_types[vreg.vreg()]
1206     }
1207 
1208     /// Get the fact, if any, for a given VReg.
1209     pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> {
1210         self.facts[vreg.vreg()].as_ref()
1211     }
1212 
1213     /// Set the fact for a given VReg.
1214     pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) {
1215         trace!("set fact on {}: {:?}", vreg, fact);
1216         self.facts[vreg.vreg()] = Some(fact);
1217     }
1218 
1219     /// Does a given instruction define any facts?
1220     pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool {
1221         self.inst_operands(inst)
1222             .iter()
1223             .filter(|o| o.kind() == OperandKind::Def)
1224             .map(|o| o.vreg())
1225             .any(|vreg| self.facts[vreg.vreg()].is_some())
1226     }
1227 }
1228 
1229 impl<I: VCodeInst> std::ops::Index<InsnIndex> for VCode<I> {
1230     type Output = I;
1231     fn index(&self, idx: InsnIndex) -> &Self::Output {
1232         &self.insts[idx.index()]
1233     }
1234 }
1235 
1236 impl<I: VCodeInst> RegallocFunction for VCode<I> {
1237     fn num_insts(&self) -> usize {
1238         self.insts.len()
1239     }
1240 
1241     fn num_blocks(&self) -> usize {
1242         self.block_ranges.len()
1243     }
1244 
1245     fn entry_block(&self) -> BlockIndex {
1246         self.entry
1247     }
1248 
1249     fn block_insns(&self, block: BlockIndex) -> InstRange {
1250         let range = self.block_ranges.get(block.index());
1251         InstRange::forward(InsnIndex::new(range.start), InsnIndex::new(range.end))
1252     }
1253 
1254     fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1255         let range = self.block_succ_range.get(block.index());
1256         &self.block_succs[range]
1257     }
1258 
1259     fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1260         let range = self.block_pred_range.get(block.index());
1261         &self.block_preds[range]
1262     }
1263 
1264     fn block_params(&self, block: BlockIndex) -> &[VReg] {
1265         // As a special case we don't return block params for the entry block, as all the arguments
1266         // will be defined by the `Inst::Args` instruction.
1267         if block == self.entry {
1268             return &[];
1269         }
1270 
1271         let range = self.block_params_range.get(block.index());
1272         &self.block_params[range]
1273     }
1274 
1275     fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1276         let succ_range = self.branch_block_arg_succ_range.get(block.index());
1277         debug_assert!(succ_idx < succ_range.len());
1278         let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx);
1279         &self.branch_block_args[branch_block_args]
1280     }
1281 
1282     fn is_ret(&self, insn: InsnIndex) -> bool {
1283         match self.insts[insn.index()].is_term() {
1284             // We treat blocks terminated by an unconditional trap like a return for regalloc.
1285             MachTerminator::None => self.insts[insn.index()].is_trap(),
1286             MachTerminator::Ret | MachTerminator::RetCall => true,
1287             MachTerminator::Uncond | MachTerminator::Cond | MachTerminator::Indirect => false,
1288         }
1289     }
1290 
1291     fn is_branch(&self, insn: InsnIndex) -> bool {
1292         match self.insts[insn.index()].is_term() {
1293             MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true,
1294             _ => false,
1295         }
1296     }
1297 
1298     fn requires_refs_on_stack(&self, insn: InsnIndex) -> bool {
1299         self.insts[insn.index()].is_safepoint()
1300     }
1301 
1302     fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1303         let range = self.operand_ranges.get(insn.index());
1304         &self.operands[range]
1305     }
1306 
1307     fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1308         self.clobbers.get(&insn).cloned().unwrap_or_default()
1309     }
1310 
1311     fn num_vregs(&self) -> usize {
1312         self.vreg_types.len()
1313     }
1314 
1315     fn reftype_vregs(&self) -> &[VReg] {
1316         &self.reftyped_vregs
1317     }
1318 
1319     fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1320         &self.debug_value_labels
1321     }
1322 
1323     fn spillslot_size(&self, regclass: RegClass) -> usize {
1324         self.abi.get_spillslot_size(regclass) as usize
1325     }
1326 
1327     fn allow_multiple_vreg_defs(&self) -> bool {
1328         // At least the s390x backend requires this, because the
1329         // `Loop` pseudo-instruction aggregates all Operands so pinned
1330         // vregs (RealRegs) may occur more than once.
1331         true
1332     }
1333 }
1334 
1335 impl<I: VCodeInst> Debug for VRegAllocator<I> {
1336     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1337         writeln!(f, "VRegAllocator {{")?;
1338 
1339         let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1340         alias_keys.sort_unstable();
1341         for key in alias_keys {
1342             let dest = self.vreg_aliases.get(&key).unwrap();
1343             writeln!(f, "  {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1344         }
1345 
1346         for (vreg, fact) in self.facts.iter().enumerate() {
1347             if let Some(fact) = fact {
1348                 writeln!(f, "  v{} ! {}", vreg, fact)?;
1349             }
1350         }
1351 
1352         writeln!(f, "}}")
1353     }
1354 }
1355 
1356 impl<I: VCodeInst> fmt::Debug for VCode<I> {
1357     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1358         writeln!(f, "VCode {{")?;
1359         writeln!(f, "  Entry block: {}", self.entry.index())?;
1360 
1361         let mut state = Default::default();
1362 
1363         for block in 0..self.num_blocks() {
1364             let block = BlockIndex::new(block);
1365             writeln!(f, "Block {}:", block.index())?;
1366             if let Some(bb) = self.bindex_to_bb(block) {
1367                 writeln!(f, "    (original IR block: {})", bb)?;
1368             }
1369             for succ in self.block_succs(block) {
1370                 writeln!(f, "    (successor: Block {})", succ.index())?;
1371             }
1372             for inst in self.block_ranges.get(block.index()) {
1373                 writeln!(
1374                     f,
1375                     "  Inst {}: {}",
1376                     inst,
1377                     self.insts[inst].pretty_print_inst(&mut state)
1378                 )?;
1379                 if !self.operands.is_empty() {
1380                     for operand in self.inst_operands(InsnIndex::new(inst)) {
1381                         if operand.kind() == OperandKind::Def {
1382                             if let Some(fact) = &self.facts[operand.vreg().vreg()] {
1383                                 writeln!(f, "    v{} ! {}", operand.vreg().vreg(), fact)?;
1384                             }
1385                         }
1386                     }
1387                 }
1388             }
1389         }
1390 
1391         writeln!(f, "}}")?;
1392         Ok(())
1393     }
1394 }
1395 
1396 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder.
1397 pub struct VRegAllocator<I> {
1398     /// VReg IR-level types.
1399     vreg_types: Vec<Type>,
1400 
1401     /// Reference-typed `regalloc2::VReg`s. The regalloc requires
1402     /// these in a dense slice (as opposed to querying the
1403     /// reftype-status of each vreg) for efficient iteration.
1404     reftyped_vregs: Vec<VReg>,
1405 
1406     /// VReg aliases. When the final VCode is built we rewrite all
1407     /// uses of the keys in this table to their replacement values.
1408     ///
1409     /// We use these aliases to rename an instruction's expected
1410     /// result vregs to the returned vregs from lowering, which are
1411     /// usually freshly-allocated temps.
1412     vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
1413 
1414     /// A deferred error, to be bubbled up to the top level of the
1415     /// lowering algorithm. We take this approach because we cannot
1416     /// currently propagate a `Result` upward through ISLE code (the
1417     /// lowering rules) or some ABI code.
1418     deferred_error: Option<CodegenError>,
1419 
1420     /// Facts on VRegs, for proof-carrying code.
1421     facts: Vec<Option<Fact>>,
1422 
1423     /// The type of instruction that this allocator makes registers for.
1424     _inst: core::marker::PhantomData<I>,
1425 }
1426 
1427 impl<I: VCodeInst> VRegAllocator<I> {
1428     /// Make a new VRegAllocator.
1429     pub fn with_capacity(capacity: usize) -> Self {
1430         let capacity = first_user_vreg_index() + capacity;
1431         let mut vreg_types = Vec::with_capacity(capacity);
1432         vreg_types.resize(first_user_vreg_index(), types::INVALID);
1433         Self {
1434             vreg_types,
1435             reftyped_vregs: vec![],
1436             vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()),
1437             deferred_error: None,
1438             facts: Vec::with_capacity(capacity),
1439             _inst: core::marker::PhantomData::default(),
1440         }
1441     }
1442 
1443     /// Allocate a fresh ValueRegs.
1444     pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {
1445         if self.deferred_error.is_some() {
1446             return Err(CodegenError::CodeTooLarge);
1447         }
1448         let v = self.vreg_types.len();
1449         let (regclasses, tys) = I::rc_for_type(ty)?;
1450         if v + regclasses.len() >= VReg::MAX {
1451             return Err(CodegenError::CodeTooLarge);
1452         }
1453 
1454         let regs: ValueRegs<Reg> = match regclasses {
1455             &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
1456             &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
1457             // We can extend this if/when we support 32-bit targets; e.g.,
1458             // an i128 on a 32-bit machine will need up to four machine regs
1459             // for a `Value`.
1460             _ => panic!("Value must reside in 1 or 2 registers"),
1461         };
1462         for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
1463             let vreg = reg.to_virtual_reg().unwrap();
1464             debug_assert_eq!(self.vreg_types.len(), vreg.index());
1465             self.vreg_types.push(reg_ty);
1466             if is_reftype(reg_ty) {
1467                 self.reftyped_vregs.push(vreg.into());
1468             }
1469         }
1470 
1471         // Create empty facts for each allocated vreg.
1472         self.facts.resize(self.vreg_types.len(), None);
1473 
1474         Ok(regs)
1475     }
1476 
1477     /// Allocate a fresh ValueRegs, deferring any out-of-vregs
1478     /// errors. This is useful in places where we cannot bubble a
1479     /// `CodegenResult` upward easily, and which are known to be
1480     /// invoked from within the lowering loop that checks the deferred
1481     /// error status below.
1482     pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> {
1483         match self.alloc(ty) {
1484             Ok(x) => x,
1485             Err(e) => {
1486                 self.deferred_error = Some(e);
1487                 self.bogus_for_deferred_error(ty)
1488             }
1489         }
1490     }
1491 
1492     /// Take any deferred error that was accumulated by `alloc_with_deferred_error`.
1493     pub fn take_deferred_error(&mut self) -> Option<CodegenError> {
1494         self.deferred_error.take()
1495     }
1496 
1497     /// Produce an bogus VReg placeholder with the proper number of
1498     /// registers for the given type. This is meant to be used with
1499     /// deferred allocation errors (see `Lower::alloc_tmp()`).
1500     fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> {
1501         let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type");
1502         match regclasses {
1503             &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()),
1504             &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()),
1505             _ => panic!("Value must reside in 1 or 2 registers"),
1506         }
1507     }
1508 
1509     /// Rewrite any mention of `from` into `to`.
1510     pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
1511         let from = from.into();
1512         let resolved_to = self.resolve_vreg_alias(to.into());
1513         // Disallow cycles (see below).
1514         assert_ne!(resolved_to, from);
1515 
1516         // Maintain the invariant that PCC facts only exist on vregs
1517         // which aren't aliases. We want to preserve whatever was
1518         // stated about the vreg before its producer was lowered.
1519         if let Some(fact) = self.facts[from.vreg()].take() {
1520             self.set_fact(resolved_to, fact);
1521         }
1522 
1523         let old_alias = self.vreg_aliases.insert(from, resolved_to);
1524         debug_assert_eq!(old_alias, None);
1525     }
1526 
1527     fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg {
1528         // We prevent cycles from existing by resolving targets of
1529         // aliases eagerly before setting them. If the target resolves
1530         // to the origin of the alias, then a cycle would be created
1531         // and the alias is disallowed. Because of the structure of
1532         // SSA code (one instruction can refer to another's defs but
1533         // not vice-versa, except indirectly through
1534         // phis/blockparams), cycles should not occur as we use
1535         // aliases to redirect vregs to the temps that actually define
1536         // them.
1537         while let Some(to) = self.vreg_aliases.get(&vreg) {
1538             vreg = *to;
1539         }
1540         vreg
1541     }
1542 
1543     #[inline]
1544     fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) {
1545         debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg)));
1546     }
1547 
1548     /// Set the proof-carrying code fact on a given virtual register.
1549     ///
1550     /// Returns the old fact, if any (only one fact can be stored).
1551     fn set_fact(&mut self, vreg: regalloc2::VReg, fact: Fact) -> Option<Fact> {
1552         trace!("vreg {:?} has fact: {:?}", vreg, fact);
1553         debug_assert!(!self.vreg_aliases.contains_key(&vreg));
1554         self.facts[vreg.vreg()].replace(fact)
1555     }
1556 
1557     /// Set a fact only if one doesn't already exist.
1558     pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) {
1559         let vreg = self.resolve_vreg_alias(vreg.into());
1560         if self.facts[vreg.vreg()].is_none() {
1561             self.set_fact(vreg, fact);
1562         }
1563     }
1564 
1565     /// Allocate a fresh ValueRegs, with a given fact to apply if
1566     /// the value fits in one VReg.
1567     pub fn alloc_with_maybe_fact(
1568         &mut self,
1569         ty: Type,
1570         fact: Option<Fact>,
1571     ) -> CodegenResult<ValueRegs<Reg>> {
1572         let result = self.alloc(ty)?;
1573 
1574         // Ensure that we don't lose a fact on a value that splits
1575         // into multiple VRegs.
1576         assert!(result.len() == 1 || fact.is_none());
1577         if let Some(fact) = fact {
1578             self.set_fact(result.regs()[0].into(), fact);
1579         }
1580 
1581         Ok(result)
1582     }
1583 }
1584 
1585 /// This structure tracks the large constants used in VCode that will be emitted separately by the
1586 /// [MachBuffer].
1587 ///
1588 /// First, during the lowering phase, constants are inserted using
1589 /// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are
1590 /// used in this phase. Some deduplication is performed, when possible, as constant
1591 /// values are inserted.
1592 ///
1593 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1594 /// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1595 /// then writes the constant values to the buffer.
1596 #[derive(Default)]
1597 pub struct VCodeConstants {
1598     constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1599     pool_uses: HashMap<Constant, VCodeConstant>,
1600     well_known_uses: HashMap<*const [u8], VCodeConstant>,
1601     u64s: HashMap<[u8; 8], VCodeConstant>,
1602 }
1603 impl VCodeConstants {
1604     /// Initialize the structure with the expected number of constants.
1605     pub fn with_capacity(expected_num_constants: usize) -> Self {
1606         Self {
1607             constants: PrimaryMap::with_capacity(expected_num_constants),
1608             pool_uses: HashMap::with_capacity(expected_num_constants),
1609             well_known_uses: HashMap::new(),
1610             u64s: HashMap::new(),
1611         }
1612     }
1613 
1614     /// Insert a constant; using this method indicates that a constant value will be used and thus
1615     /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1616     /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1617     /// [VCodeConstantData::Generated].
1618     pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1619         match data {
1620             VCodeConstantData::Generated(_) => self.constants.push(data),
1621             VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1622                 None => {
1623                     let vcode_constant = self.constants.push(data);
1624                     self.pool_uses.insert(constant, vcode_constant);
1625                     vcode_constant
1626                 }
1627                 Some(&vcode_constant) => vcode_constant,
1628             },
1629             VCodeConstantData::WellKnown(data_ref) => {
1630                 match self.well_known_uses.entry(data_ref as *const [u8]) {
1631                     Entry::Vacant(v) => {
1632                         let vcode_constant = self.constants.push(data);
1633                         v.insert(vcode_constant);
1634                         vcode_constant
1635                     }
1636                     Entry::Occupied(o) => *o.get(),
1637                 }
1638             }
1639             VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1640                 Entry::Vacant(v) => {
1641                     let vcode_constant = self.constants.push(data);
1642                     v.insert(vcode_constant);
1643                     vcode_constant
1644                 }
1645                 Entry::Occupied(o) => *o.get(),
1646             },
1647         }
1648     }
1649 
1650     /// Return the number of constants inserted.
1651     pub fn len(&self) -> usize {
1652         self.constants.len()
1653     }
1654 
1655     /// Iterate over the `VCodeConstant` keys inserted in this structure.
1656     pub fn keys(&self) -> Keys<VCodeConstant> {
1657         self.constants.keys()
1658     }
1659 
1660     /// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this
1661     /// structure.
1662     pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1663         self.constants.iter()
1664     }
1665 
1666     /// Returns the data associated with the specified constant.
1667     pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData {
1668         &self.constants[c]
1669     }
1670 
1671     /// Checks if the given [VCodeConstantData] is registered as
1672     /// used by the pool.
1673     pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool {
1674         match constant {
1675             VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c),
1676             _ => false,
1677         }
1678     }
1679 }
1680 
1681 /// A use of a constant by one or more VCode instructions; see [VCodeConstants].
1682 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
1683 pub struct VCodeConstant(u32);
1684 entity_impl!(VCodeConstant);
1685 
1686 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
1687 /// these separately instead of as raw byte buffers allows us to avoid some duplication.
1688 pub enum VCodeConstantData {
1689     /// A constant already present in the Cranelift IR
1690     /// [ConstantPool](crate::ir::constant::ConstantPool).
1691     Pool(Constant, ConstantData),
1692     /// A reference to a well-known constant value that is statically encoded within the compiler.
1693     WellKnown(&'static [u8]),
1694     /// A constant value generated during lowering; the value may depend on the instruction context
1695     /// which makes it difficult to de-duplicate--if possible, use other variants.
1696     Generated(ConstantData),
1697     /// A constant of at most 64 bits. These are deduplicated as
1698     /// well. Stored as a fixed-size array of `u8` so that we do not
1699     /// encounter endianness problems when cross-compiling.
1700     U64([u8; 8]),
1701 }
1702 impl VCodeConstantData {
1703     /// Retrieve the constant data as a byte slice.
1704     pub fn as_slice(&self) -> &[u8] {
1705         match self {
1706             VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
1707             VCodeConstantData::WellKnown(d) => d,
1708             VCodeConstantData::U64(value) => &value[..],
1709         }
1710     }
1711 
1712     /// Calculate the alignment of the constant data.
1713     pub fn alignment(&self) -> u32 {
1714         if self.as_slice().len() <= 8 {
1715             8
1716         } else {
1717             16
1718         }
1719     }
1720 }
1721 
1722 #[cfg(test)]
1723 mod test {
1724     use super::*;
1725     use std::mem::size_of;
1726 
1727     #[test]
1728     fn size_of_constant_structs() {
1729         assert_eq!(size_of::<Constant>(), 4);
1730         assert_eq!(size_of::<VCodeConstant>(), 4);
1731         assert_eq!(size_of::<ConstantData>(), 24);
1732         assert_eq!(size_of::<VCodeConstantData>(), 32);
1733         assert_eq!(
1734             size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
1735             24
1736         );
1737         // TODO The VCodeConstants structure's memory size could be further optimized.
1738         // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
1739         // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
1740     }
1741 }
1742