1 //! This implements the VCode container: a CFG of Insts that have been lowered.
2 //!
3 //! VCode is virtual-register code. An instruction in VCode is almost a machine
4 //! instruction; however, its register slots can refer to virtual registers in
5 //! addition to real machine registers.
6 //!
7 //! VCode is structured with traditional basic blocks, and
8 //! each block must be terminated by an unconditional branch (one target), a
9 //! conditional branch (two targets), or a return (no targets). Note that this
10 //! slightly differs from the machine code of most ISAs: in most ISAs, a
11 //! conditional branch has one target (and the not-taken case falls through).
12 //! However, we expect that machine backends will elide branches to the following
13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15 //! play with layout prior to final binary emission, as well, if we want.
16 //!
17 //! See the main module comment in `mod.rs` for more details on the VCode-based
18 //! backend pipeline.
19 
20 use crate::fx::FxHashMap;
21 use crate::fx::FxHashSet;
22 use crate::ir::RelSourceLoc;
23 use crate::ir::{self, types, Constant, ConstantData, DynamicStackSlot, LabelValueLoc, ValueLabel};
24 use crate::machinst::*;
25 use crate::timing;
26 use crate::trace;
27 use crate::CodegenError;
28 use crate::ValueLocRange;
29 use cranelift_control::ControlPlane;
30 use regalloc2::{
31     Edit, Function as RegallocFunction, InstOrEdit, InstRange, Operand, OperandKind, PRegSet,
32     RegClass, VReg,
33 };
34 
35 use alloc::vec::Vec;
36 use cranelift_entity::{entity_impl, Keys, PrimaryMap};
37 use std::collections::hash_map::Entry;
38 use std::collections::HashMap;
39 use std::fmt;
40 
41 /// Index referring to an instruction in VCode.
42 pub type InsnIndex = regalloc2::Inst;
43 
44 /// Index referring to a basic block in VCode.
45 pub type BlockIndex = regalloc2::Block;
46 
47 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
48 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
49 pub trait VCodeInst: MachInst + MachInstEmit {}
50 impl<I: MachInst + MachInstEmit> VCodeInst for I {}
51 
52 /// A function in "VCode" (virtualized-register code) form, after
53 /// lowering.  This is essentially a standard CFG of basic blocks,
54 /// where each basic block consists of lowered instructions produced
55 /// by the machine-specific backend.
56 ///
57 /// Note that the VCode is immutable once produced, and is not
58 /// modified by register allocation in particular. Rather, register
59 /// allocation on the `VCode` produces a separate `regalloc2::Output`
60 /// struct, and this can be passed to `emit`. `emit` in turn does not
61 /// modify the vcode, but produces an `EmitResult`, which contains the
62 /// machine code itself, and the associated disassembly and/or
63 /// metadata as requested.
64 pub struct VCode<I: VCodeInst> {
65     /// VReg IR-level types.
66     vreg_types: Vec<Type>,
67 
68     /// Lowered machine instructions in order corresponding to the original IR.
69     insts: Vec<I>,
70 
71     /// Operands: pre-regalloc references to virtual registers with
72     /// constraints, in one flattened array. This allows the regalloc
73     /// to efficiently access all operands without requiring expensive
74     /// matches or method invocations on insts.
75     operands: Vec<Operand>,
76 
77     /// Operand index ranges: for each instruction in `insts`, there
78     /// is a tuple here providing the range in `operands` for that
79     /// instruction's operands.
80     operand_ranges: Vec<(u32, u32)>,
81 
82     /// Clobbers: a sparse map from instruction indices to clobber masks.
83     clobbers: FxHashMap<InsnIndex, PRegSet>,
84 
85     /// Move information: for a given InsnIndex, (src, dst) operand pair.
86     is_move: FxHashMap<InsnIndex, (Operand, Operand)>,
87 
88     /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
89     /// reasonable to keep one of these per instruction.)
90     srclocs: Vec<RelSourceLoc>,
91 
92     /// Entry block.
93     entry: BlockIndex,
94 
95     /// Block instruction indices.
96     block_ranges: Vec<(InsnIndex, InsnIndex)>,
97 
98     /// Block successors: index range in the `block_succs_preds` list.
99     block_succ_range: Vec<(u32, u32)>,
100 
101     /// Block predecessors: index range in the `block_succs_preds` list.
102     block_pred_range: Vec<(u32, u32)>,
103 
104     /// Block successor and predecessor lists, concatenated into one
105     /// Vec. The `block_succ_range` and `block_pred_range` lists of
106     /// tuples above give (start, end) ranges within this list that
107     /// correspond to each basic block's successors or predecessors,
108     /// respectively.
109     block_succs_preds: Vec<regalloc2::Block>,
110 
111     /// Block parameters: index range in `block_params` below.
112     block_params_range: Vec<(u32, u32)>,
113 
114     /// Block parameter lists, concatenated into one vec. The
115     /// `block_params_range` list of tuples above gives (start, end)
116     /// ranges within this list that correspond to each basic block's
117     /// blockparam vregs.
118     block_params: Vec<regalloc2::VReg>,
119 
120     /// Outgoing block arguments on branch instructions, concatenated
121     /// into one list.
122     ///
123     /// Note that this is conceptually a 3D array: we have a VReg list
124     /// per block, per successor. We flatten those three dimensions
125     /// into this 1D vec, then store index ranges in two levels of
126     /// indirection.
127     ///
128     /// Indexed by the indices in `branch_block_arg_succ_range`.
129     branch_block_args: Vec<regalloc2::VReg>,
130 
131     /// Array of sequences of (start, end) tuples in
132     /// `branch_block_args`, one for each successor; these sequences
133     /// for each block are concatenated.
134     ///
135     /// Indexed by the indices in `branch_block_arg_succ_range`.
136     branch_block_arg_range: Vec<(u32, u32)>,
137 
138     /// For a given block, indices in `branch_block_arg_range`
139     /// corresponding to all of its successors.
140     branch_block_arg_succ_range: Vec<(u32, u32)>,
141 
142     /// VReg aliases. Each key in this table is translated to its
143     /// value when gathering Operands from instructions. Aliases are
144     /// not chased transitively (we do not further look up the
145     /// translated reg to see if it is another alias).
146     ///
147     /// We use these aliases to rename an instruction's expected
148     /// result vregs to the returned vregs from lowering, which are
149     /// usually freshly-allocated temps.
150     ///
151     /// Operands and branch arguments will already have been
152     /// translated through this alias table; but it helps to make
153     /// sense of instructions when pretty-printed, for example.
154     vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
155 
156     /// Block-order information.
157     block_order: BlockLoweringOrder,
158 
159     /// ABI object.
160     pub(crate) abi: Callee<I::ABIMachineSpec>,
161 
162     /// Constant information used during code emission. This should be
163     /// immutable across function compilations within the same module.
164     emit_info: I::Info,
165 
166     /// Reference-typed `regalloc2::VReg`s. The regalloc requires
167     /// these in a dense slice (as opposed to querying the
168     /// reftype-status of each vreg) for efficient iteration.
169     reftyped_vregs: Vec<VReg>,
170 
171     /// Constants.
172     constants: VCodeConstants,
173 
174     /// Value labels for debuginfo attached to vregs.
175     debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
176 
177     pub(crate) sigs: SigSet,
178 }
179 
180 /// The result of `VCode::emit`. Contains all information computed
181 /// during emission: actual machine code, optionally a disassembly,
182 /// and optionally metadata about the code layout.
183 pub struct EmitResult<I: VCodeInst> {
184     /// The MachBuffer containing the machine code.
185     pub buffer: MachBuffer<I>,
186 
187     /// Offset of each basic block, recorded during emission. Computed
188     /// only if `debug_value_labels` is non-empty.
189     pub bb_offsets: Vec<CodeOffset>,
190 
191     /// Final basic-block edges, in terms of code offsets of
192     /// bb-starts. Computed only if `debug_value_labels` is non-empty.
193     pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
194 
195     /// Final instruction offsets, recorded during emission. Computed
196     /// only if `debug_value_labels` is non-empty.
197     pub inst_offsets: Vec<CodeOffset>,
198 
199     /// Final length of function body.
200     pub func_body_len: CodeOffset,
201 
202     /// The pretty-printed disassembly, if any. This uses the same
203     /// pretty-printing for MachInsts as the pre-regalloc VCode Debug
204     /// implementation, but additionally includes the prologue and
205     /// epilogue(s), and makes use of the regalloc results.
206     pub disasm: Option<String>,
207 
208     /// Offsets of sized stackslots.
209     pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>,
210 
211     /// Offsets of dynamic stackslots.
212     pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>,
213 
214     /// Value-labels information (debug metadata).
215     pub value_labels_ranges: ValueLabelsRanges,
216 
217     /// Stack frame size.
218     pub frame_size: u32,
219 
220     /// The alignment requirement for pc-relative loads.
221     pub alignment: u32,
222 }
223 
224 /// A builder for a VCode function body.
225 ///
226 /// This builder has the ability to accept instructions in either
227 /// forward or reverse order, depending on the pass direction that
228 /// produces the VCode. The lowering from CLIF to VCode<MachInst>
229 /// ordinarily occurs in reverse order (in order to allow instructions
230 /// to be lowered only if used, and not merged) so a reversal will
231 /// occur at the end of lowering to ensure the VCode is in machine
232 /// order.
233 ///
234 /// If built in reverse, block and instruction indices used once the
235 /// VCode is built are relative to the final (reversed) order, not the
236 /// order of construction. Note that this means we do not know the
237 /// final block or instruction indices when building, so we do not
238 /// hand them out. (The user is assumed to know them when appending
239 /// terminator instructions with successor blocks.)
240 pub struct VCodeBuilder<I: VCodeInst> {
241     /// In-progress VCode.
242     pub(crate) vcode: VCode<I>,
243 
244     /// In what direction is the build occuring?
245     direction: VCodeBuildDirection,
246 
247     /// Index of the last block-start in the vcode.
248     block_start: usize,
249 
250     /// Start of succs for the current block in the concatenated succs list.
251     succ_start: usize,
252 
253     /// Start of blockparams for the current block in the concatenated
254     /// blockparams list.
255     block_params_start: usize,
256 
257     /// Start of successor blockparam arg list entries in
258     /// the concatenated branch_block_arg_range list.
259     branch_block_arg_succ_start: usize,
260 
261     /// Current source location.
262     cur_srcloc: RelSourceLoc,
263 
264     /// Debug-value label in-progress map, keyed by label. For each
265     /// label, we keep disjoint ranges mapping to vregs. We'll flatten
266     /// this into (vreg, range, label) tuples when done.
267     debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
268 }
269 
270 /// Direction in which a VCodeBuilder builds VCode.
271 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
272 pub enum VCodeBuildDirection {
273     // TODO: add `Forward` once we need it and can test it adequately.
274     /// Backward-build pass: we expect the producer to call `emit()`
275     /// with instructions in reverse program order within each block.
276     Backward,
277 }
278 
279 impl<I: VCodeInst> VCodeBuilder<I> {
280     /// Create a new VCodeBuilder.
281     pub fn new(
282         sigs: SigSet,
283         abi: Callee<I::ABIMachineSpec>,
284         emit_info: I::Info,
285         block_order: BlockLoweringOrder,
286         constants: VCodeConstants,
287         direction: VCodeBuildDirection,
288     ) -> VCodeBuilder<I> {
289         let vcode = VCode::new(sigs, abi, emit_info, block_order, constants);
290 
291         VCodeBuilder {
292             vcode,
293             direction,
294             block_start: 0,
295             succ_start: 0,
296             block_params_start: 0,
297             branch_block_arg_succ_start: 0,
298             cur_srcloc: Default::default(),
299             debug_info: FxHashMap::default(),
300         }
301     }
302 
303     pub fn init_abi(&mut self, temps: Vec<Writable<Reg>>) {
304         self.vcode.abi.init(&self.vcode.sigs, temps);
305     }
306 
307     /// Access the ABI object.
308     pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {
309         &self.vcode.abi
310     }
311 
312     /// Access the ABI object.
313     pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {
314         &mut self.vcode.abi
315     }
316 
317     pub fn sigs(&self) -> &SigSet {
318         &self.vcode.sigs
319     }
320 
321     pub fn sigs_mut(&mut self) -> &mut SigSet {
322         &mut self.vcode.sigs
323     }
324 
325     /// Access to the BlockLoweringOrder object.
326     pub fn block_order(&self) -> &BlockLoweringOrder {
327         &self.vcode.block_order
328     }
329 
330     /// Set the current block as the entry block.
331     pub fn set_entry(&mut self, block: BlockIndex) {
332         self.vcode.entry = block;
333     }
334 
335     /// End the current basic block. Must be called after emitting vcode insts
336     /// for IR insts and prior to ending the function (building the VCode).
337     pub fn end_bb(&mut self) {
338         let start_idx = self.block_start;
339         let end_idx = self.vcode.insts.len();
340         self.block_start = end_idx;
341         // Add the instruction index range to the list of blocks.
342         self.vcode
343             .block_ranges
344             .push((InsnIndex::new(start_idx), InsnIndex::new(end_idx)));
345         // End the successors list.
346         let succ_end = self.vcode.block_succs_preds.len();
347         self.vcode
348             .block_succ_range
349             .push((self.succ_start as u32, succ_end as u32));
350         self.succ_start = succ_end;
351         // End the blockparams list.
352         let block_params_end = self.vcode.block_params.len();
353         self.vcode
354             .block_params_range
355             .push((self.block_params_start as u32, block_params_end as u32));
356         self.block_params_start = block_params_end;
357         // End the branch blockparam args list.
358         let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
359         self.vcode.branch_block_arg_succ_range.push((
360             self.branch_block_arg_succ_start as u32,
361             branch_block_arg_succ_end as u32,
362         ));
363         self.branch_block_arg_succ_start = branch_block_arg_succ_end;
364     }
365 
366     pub fn add_block_param(&mut self, param: VirtualReg) {
367         self.vcode.block_params.push(param.into());
368     }
369 
370     fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
371         let start = self.vcode.branch_block_args.len();
372         self.vcode
373             .branch_block_args
374             .extend(args.iter().map(|&arg| VReg::from(arg)));
375         let end = self.vcode.branch_block_args.len();
376         self.vcode
377             .branch_block_arg_range
378             .push((start as u32, end as u32));
379     }
380 
381     /// Push an instruction for the current BB and current IR inst
382     /// within the BB.
383     pub fn push(&mut self, insn: I) {
384         self.vcode.insts.push(insn);
385         self.vcode.srclocs.push(self.cur_srcloc);
386     }
387 
388     /// Add a successor block with branch args.
389     pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
390         self.vcode.block_succs_preds.push(block);
391         self.add_branch_args_for_succ(args);
392     }
393 
394     /// Set the current source location.
395     pub fn set_srcloc(&mut self, srcloc: RelSourceLoc) {
396         self.cur_srcloc = srcloc;
397     }
398 
399     /// Add a debug value label to a register.
400     pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
401         // We'll fix up labels in reverse(). Because we're generating
402         // code bottom-to-top, the liverange of the label goes *from*
403         // the last index at which was defined (or 0, which is the end
404         // of the eventual function) *to* just this instruction, and
405         // no further.
406         let inst = InsnIndex::new(self.vcode.insts.len());
407         let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
408         let last = labels
409             .last()
410             .map(|(_start, end, _vreg)| *end)
411             .unwrap_or(InsnIndex::new(0));
412         labels.push((last, inst, reg.into()));
413     }
414 
415     pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
416         let from = from.into();
417         let resolved_to = self.resolve_vreg_alias(to.into());
418         // Disallow cycles (see below).
419         assert_ne!(resolved_to, from);
420         self.vcode.vreg_aliases.insert(from, resolved_to);
421     }
422 
423     pub fn resolve_vreg_alias(&self, from: regalloc2::VReg) -> regalloc2::VReg {
424         Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, from)
425     }
426 
427     fn resolve_vreg_alias_impl(
428         aliases: &FxHashMap<regalloc2::VReg, regalloc2::VReg>,
429         from: regalloc2::VReg,
430     ) -> regalloc2::VReg {
431         // We prevent cycles from existing by resolving targets of
432         // aliases eagerly before setting them. If the target resolves
433         // to the origin of the alias, then a cycle would be created
434         // and the alias is disallowed. Because of the structure of
435         // SSA code (one instruction can refer to another's defs but
436         // not vice-versa, except indirectly through
437         // phis/blockparams), cycles should not occur as we use
438         // aliases to redirect vregs to the temps that actually define
439         // them.
440 
441         let mut vreg = from;
442         while let Some(to) = aliases.get(&vreg) {
443             vreg = *to;
444         }
445         vreg
446     }
447 
448     /// Access the constants.
449     pub fn constants(&mut self) -> &mut VCodeConstants {
450         &mut self.vcode.constants
451     }
452 
453     fn compute_preds_from_succs(&mut self) {
454         // Compute predecessors from successors. In order to gather
455         // all preds for a block into a contiguous sequence, we build
456         // a list of (succ, pred) tuples and then sort.
457         let mut succ_pred_edges: Vec<(BlockIndex, BlockIndex)> =
458             Vec::with_capacity(self.vcode.block_succs_preds.len());
459         for (pred, &(start, end)) in self.vcode.block_succ_range.iter().enumerate() {
460             let pred = BlockIndex::new(pred);
461             for i in start..end {
462                 let succ = BlockIndex::new(self.vcode.block_succs_preds[i as usize].index());
463                 succ_pred_edges.push((succ, pred));
464             }
465         }
466         succ_pred_edges.sort_unstable();
467 
468         let mut i = 0;
469         for succ in 0..self.vcode.num_blocks() {
470             let succ = BlockIndex::new(succ);
471             let start = self.vcode.block_succs_preds.len();
472             while i < succ_pred_edges.len() && succ_pred_edges[i].0 == succ {
473                 let pred = succ_pred_edges[i].1;
474                 self.vcode.block_succs_preds.push(pred);
475                 i += 1;
476             }
477             let end = self.vcode.block_succs_preds.len();
478             self.vcode.block_pred_range.push((start as u32, end as u32));
479         }
480     }
481 
482     /// Called once, when a build in Backward order is complete, to
483     /// perform the overall reversal (into final forward order) and
484     /// finalize metadata accordingly.
485     fn reverse_and_finalize(&mut self) {
486         let n_insts = self.vcode.insts.len();
487         if n_insts == 0 {
488             return;
489         }
490 
491         // Reverse the per-block and per-inst sequences.
492         self.vcode.block_ranges.reverse();
493         // block_params_range is indexed by block (and blocks were
494         // traversed in reverse) so we reverse it; but block-param
495         // sequences in the concatenated vec can remain in reverse
496         // order (it is effectively an arena of arbitrarily-placed
497         // referenced sequences).
498         self.vcode.block_params_range.reverse();
499         // Likewise, we reverse block_succ_range, but the block_succ
500         // concatenated array can remain as-is.
501         self.vcode.block_succ_range.reverse();
502         self.vcode.insts.reverse();
503         self.vcode.srclocs.reverse();
504         // Likewise, branch_block_arg_succ_range is indexed by block
505         // so must be reversed.
506         self.vcode.branch_block_arg_succ_range.reverse();
507 
508         // To translate an instruction index *endpoint* in reversed
509         // order to forward order, compute `n_insts - i`.
510         //
511         // Why not `n_insts - 1 - i`? That would be correct to
512         // translate an individual instruction index (for ten insts 0
513         // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
514         // 0). But for the usual inclusive-start, exclusive-end range
515         // idiom, inclusive starts become exclusive ends and
516         // vice-versa, so e.g. an (inclusive) start of 0 becomes an
517         // (exclusive) end of 10.
518         let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
519 
520         // Edit the block-range instruction indices.
521         for tuple in &mut self.vcode.block_ranges {
522             let (start, end) = *tuple;
523             *tuple = (translate(end), translate(start)); // Note reversed order.
524         }
525 
526         // Generate debug-value labels based on per-label maps.
527         for (label, tuples) in &self.debug_info {
528             for &(start, end, vreg) in tuples {
529                 let vreg = self.resolve_vreg_alias(vreg);
530                 let fwd_start = translate(end);
531                 let fwd_end = translate(start);
532                 self.vcode
533                     .debug_value_labels
534                     .push((vreg, fwd_start, fwd_end, label.as_u32()));
535             }
536         }
537 
538         // Now sort debug value labels by VReg, as required
539         // by regalloc2.
540         self.vcode
541             .debug_value_labels
542             .sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
543     }
544 
545     fn collect_operands(&mut self, allocatable: PRegSet) {
546         for (i, insn) in self.vcode.insts.iter().enumerate() {
547             // Push operands from the instruction onto the operand list.
548             //
549             // We rename through the vreg alias table as we collect
550             // the operands. This is better than a separate post-pass
551             // over operands, because it has more cache locality:
552             // operands only need to pass through L1 once. This is
553             // also better than renaming instructions'
554             // operands/registers while lowering, because here we only
555             // need to do the `match` over the instruction to visit
556             // its register fields (which is slow, branchy code) once.
557 
558             let vreg_aliases = &self.vcode.vreg_aliases;
559             let mut op_collector =
560                 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {
561                     Self::resolve_vreg_alias_impl(vreg_aliases, vreg)
562                 });
563             insn.get_operands(&mut op_collector);
564             let (ops, clobbers) = op_collector.finish();
565             self.vcode.operand_ranges.push(ops);
566 
567             if clobbers != PRegSet::default() {
568                 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
569             }
570 
571             if let Some((dst, src)) = insn.is_move() {
572                 // We should never see non-virtual registers present in move
573                 // instructions.
574                 assert!(
575                     src.is_virtual(),
576                     "the real register {:?} was used as the source of a move instruction",
577                     src
578                 );
579                 assert!(
580                     dst.to_reg().is_virtual(),
581                     "the real register {:?} was used as the destination of a move instruction",
582                     dst.to_reg()
583                 );
584 
585                 let src = Operand::reg_use(Self::resolve_vreg_alias_impl(vreg_aliases, src.into()));
586                 let dst = Operand::reg_def(Self::resolve_vreg_alias_impl(
587                     vreg_aliases,
588                     dst.to_reg().into(),
589                 ));
590 
591                 // Note that regalloc2 requires these in (src, dst) order.
592                 self.vcode.is_move.insert(InsnIndex::new(i), (src, dst));
593             }
594         }
595 
596         // Translate blockparam args via the vreg aliases table as well.
597         for arg in &mut self.vcode.branch_block_args {
598             let new_arg = Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *arg);
599             trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
600             *arg = new_arg;
601         }
602     }
603 
604     /// Build the final VCode.
605     pub fn build(mut self, allocatable: PRegSet, vregs: VRegAllocator<I>) -> VCode<I> {
606         self.vcode.vreg_types = vregs.vreg_types;
607         self.vcode.reftyped_vregs = vregs.reftyped_vregs;
608 
609         if self.direction == VCodeBuildDirection::Backward {
610             self.reverse_and_finalize();
611         }
612         self.collect_operands(allocatable);
613 
614         // Apply register aliases to the `reftyped_vregs` list since this list
615         // will be returned directly to `regalloc2` eventually and all
616         // operands/results of instructions will use the alias-resolved vregs
617         // from `regalloc2`'s perspective.
618         //
619         // Also note that `reftyped_vregs` can't have duplicates, so after the
620         // aliases are applied duplicates are removed.
621         for reg in self.vcode.reftyped_vregs.iter_mut() {
622             *reg = Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *reg);
623         }
624         self.vcode.reftyped_vregs.sort();
625         self.vcode.reftyped_vregs.dedup();
626 
627         self.compute_preds_from_succs();
628         self.vcode.debug_value_labels.sort_unstable();
629         self.vcode
630     }
631 }
632 
633 /// Is this type a reference type?
634 fn is_reftype(ty: Type) -> bool {
635     ty == types::R64 || ty == types::R32
636 }
637 
638 impl<I: VCodeInst> VCode<I> {
639     /// New empty VCode.
640     fn new(
641         sigs: SigSet,
642         abi: Callee<I::ABIMachineSpec>,
643         emit_info: I::Info,
644         block_order: BlockLoweringOrder,
645         constants: VCodeConstants,
646     ) -> VCode<I> {
647         let n_blocks = block_order.lowered_order().len();
648         VCode {
649             sigs,
650             vreg_types: vec![],
651             insts: Vec::with_capacity(10 * n_blocks),
652             operands: Vec::with_capacity(30 * n_blocks),
653             operand_ranges: Vec::with_capacity(10 * n_blocks),
654             clobbers: FxHashMap::default(),
655             is_move: FxHashMap::default(),
656             srclocs: Vec::with_capacity(10 * n_blocks),
657             entry: BlockIndex::new(0),
658             block_ranges: Vec::with_capacity(n_blocks),
659             block_succ_range: Vec::with_capacity(n_blocks),
660             block_succs_preds: Vec::with_capacity(2 * n_blocks),
661             block_pred_range: Vec::with_capacity(n_blocks),
662             block_params_range: Vec::with_capacity(n_blocks),
663             block_params: Vec::with_capacity(5 * n_blocks),
664             branch_block_args: Vec::with_capacity(10 * n_blocks),
665             branch_block_arg_range: Vec::with_capacity(2 * n_blocks),
666             branch_block_arg_succ_range: Vec::with_capacity(n_blocks),
667             block_order,
668             abi,
669             emit_info,
670             reftyped_vregs: vec![],
671             constants,
672             debug_value_labels: vec![],
673             vreg_aliases: FxHashMap::with_capacity_and_hasher(10 * n_blocks, Default::default()),
674         }
675     }
676 
677     /// Get the number of blocks. Block indices will be in the range `0 ..
678     /// (self.num_blocks() - 1)`.
679     pub fn num_blocks(&self) -> usize {
680         self.block_ranges.len()
681     }
682 
683     /// The number of lowered instructions.
684     pub fn num_insts(&self) -> usize {
685         self.insts.len()
686     }
687 
688     /// Get the successors for a block.
689     pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] {
690         let (start, end) = self.block_succ_range[block.index()];
691         &self.block_succs_preds[start as usize..end as usize]
692     }
693 
694     fn compute_clobbers(&self, regalloc: &regalloc2::Output) -> Vec<Writable<RealReg>> {
695         // Compute clobbered registers.
696         let mut clobbered = vec![];
697         let mut clobbered_set = FxHashSet::default();
698 
699         // All moves are included in clobbers.
700         for edit in &regalloc.edits {
701             let Edit::Move { to, .. } = edit.1;
702             if let Some(preg) = to.as_reg() {
703                 let reg = RealReg::from(preg);
704                 if clobbered_set.insert(reg) {
705                     clobbered.push(Writable::from_reg(reg));
706                 }
707             }
708         }
709 
710         for (i, (start, end)) in self.operand_ranges.iter().enumerate() {
711             // Skip this instruction if not "included in clobbers" as
712             // per the MachInst. (Some backends use this to implement
713             // ABI specifics; e.g., excluding calls of the same ABI as
714             // the current function from clobbers, because by
715             // definition everything clobbered by the call can be
716             // clobbered by this function without saving as well.)
717             if !self.insts[i].is_included_in_clobbers() {
718                 continue;
719             }
720 
721             let start = *start as usize;
722             let end = *end as usize;
723             let operands = &self.operands[start..end];
724             let allocs = &regalloc.allocs[start..end];
725             for (operand, alloc) in operands.iter().zip(allocs.iter()) {
726                 // We're interested only in writes (Mods or Defs).
727                 if operand.kind() == OperandKind::Use {
728                     continue;
729                 }
730                 if let Some(preg) = alloc.as_reg() {
731                     let reg = RealReg::from(preg);
732                     if clobbered_set.insert(reg) {
733                         clobbered.push(Writable::from_reg(reg));
734                     }
735                 }
736             }
737 
738             // Also add explicitly-clobbered registers.
739             for preg in self
740                 .clobbers
741                 .get(&InsnIndex::new(i))
742                 .cloned()
743                 .unwrap_or_default()
744             {
745                 let reg = RealReg::from(preg);
746                 if clobbered_set.insert(reg) {
747                     clobbered.push(Writable::from_reg(reg));
748                 }
749             }
750         }
751 
752         clobbered
753     }
754 
755     /// Emit the instructions to a `MachBuffer`, containing fixed-up
756     /// code and external reloc/trap/etc. records ready for use. Takes
757     /// the regalloc results as well.
758     ///
759     /// Returns the machine code itself, and optionally metadata
760     /// and/or a disassembly, as an `EmitResult`. The `VCode` itself
761     /// is consumed by the emission process.
762     pub fn emit(
763         mut self,
764         regalloc: &regalloc2::Output,
765         want_disasm: bool,
766         want_metadata: bool,
767         ctrl_plane: &mut ControlPlane,
768     ) -> EmitResult<I>
769     where
770         I: VCodeInst,
771     {
772         // To write into disasm string.
773         use core::fmt::Write;
774 
775         let _tt = timing::vcode_emit();
776         let mut buffer = MachBuffer::new();
777         let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
778 
779         // The first M MachLabels are reserved for block indices, the next N MachLabels for
780         // constants.
781         buffer.reserve_labels_for_blocks(self.num_blocks());
782         buffer.reserve_labels_for_constants(&self.constants);
783 
784         // Construct the final order we emit code in: cold blocks at the end.
785         let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
786         let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
787         for block in 0..self.num_blocks() {
788             let block = BlockIndex::new(block);
789             if self.block_order.is_cold(block) {
790                 cold_blocks.push(block);
791             } else {
792                 final_order.push(block);
793             }
794         }
795         final_order.extend(cold_blocks.clone());
796 
797         // Compute/save info we need for the prologue: clobbers and
798         // number of spillslots.
799         //
800         // We clone `abi` here because we will mutate it as we
801         // generate the prologue and set other info, but we can't
802         // mutate `VCode`. The info it usually carries prior to
803         // setting clobbers is fairly minimal so this should be
804         // relatively cheap.
805         let clobbers = self.compute_clobbers(regalloc);
806         self.abi.set_num_spillslots(regalloc.num_spillslots);
807         self.abi.set_clobbered(clobbers);
808 
809         // We need to generate the prologue in order to get the ABI
810         // object into the right state first. We'll emit it when we
811         // hit the right block below.
812         let prologue_insts = self.abi.gen_prologue(&self.sigs);
813 
814         // Emit blocks.
815         let mut cur_srcloc = None;
816         let mut last_offset = None;
817         let mut inst_offsets = vec![];
818         let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane));
819 
820         let mut disasm = String::new();
821 
822         if !self.debug_value_labels.is_empty() {
823             inst_offsets.resize(self.insts.len(), 0);
824         }
825 
826         // Count edits per block ahead of time; this is needed for
827         // lookahead island emission. (We could derive it per-block
828         // with binary search in the edit list, but it's more
829         // efficient to do it in one pass here.)
830         let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];
831         let mut edit_idx = 0;
832         for block in 0..self.num_blocks() {
833             let end_inst = self.block_ranges[block].1;
834             let start_edit_idx = edit_idx;
835             while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {
836                 edit_idx += 1;
837             }
838             let end_edit_idx = edit_idx;
839             ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);
840         }
841 
842         let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();
843 
844         for (block_order_idx, &block) in final_order.iter().enumerate() {
845             trace!("emitting block {:?}", block);
846             let new_offset = I::align_basic_block(buffer.cur_offset());
847             while new_offset > buffer.cur_offset() {
848                 // Pad with NOPs up to the aligned block offset.
849                 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
850                 nop.emit(&[], &mut buffer, &self.emit_info, &mut Default::default());
851             }
852             assert_eq!(buffer.cur_offset(), new_offset);
853 
854             let do_emit = |inst: &I,
855                            allocs: &[Allocation],
856                            disasm: &mut String,
857                            buffer: &mut MachBuffer<I>,
858                            state: &mut I::State| {
859                 if want_disasm && !inst.is_args() {
860                     let mut s = state.clone();
861                     writeln!(disasm, "  {}", inst.pretty_print_inst(allocs, &mut s)).unwrap();
862                 }
863                 inst.emit(allocs, buffer, &self.emit_info, state);
864             };
865 
866             // Is this the first block? Emit the prologue directly if so.
867             if block == self.entry {
868                 trace!(" -> entry block");
869                 buffer.start_srcloc(Default::default());
870                 state.pre_sourceloc(Default::default());
871                 for inst in &prologue_insts {
872                     do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state);
873                 }
874                 buffer.end_srcloc();
875             }
876 
877             // Now emit the regular block body.
878 
879             buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut());
880 
881             if want_disasm {
882                 writeln!(&mut disasm, "block{}:", block.index()).unwrap();
883             }
884 
885             if want_metadata {
886                 // Track BB starts. If we have backed up due to MachBuffer
887                 // branch opts, note that the removed blocks were removed.
888                 let cur_offset = buffer.cur_offset();
889                 if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
890                     for i in (0..bb_starts.len()).rev() {
891                         if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
892                             break;
893                         }
894                         bb_starts[i] = None;
895                     }
896                 }
897                 bb_starts.push(Some(cur_offset));
898                 last_offset = Some(cur_offset);
899             }
900 
901             if let Some(block_start) = I::gen_block_start(
902                 self.block_order.is_indirect_branch_target(block),
903                 is_forward_edge_cfi_enabled,
904             ) {
905                 do_emit(&block_start, &[], &mut disasm, &mut buffer, &mut state);
906             }
907 
908             for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
909                 match inst_or_edit {
910                     InstOrEdit::Inst(iix) => {
911                         if !self.debug_value_labels.is_empty() {
912                             // If we need to produce debug info,
913                             // record the offset of each instruction
914                             // so that we can translate value-label
915                             // ranges to machine-code offsets.
916 
917                             // Cold blocks violate monotonicity
918                             // assumptions elsewhere (that
919                             // instructions in inst-index order are in
920                             // order in machine code), so we omit
921                             // their offsets here. Value-label range
922                             // generation below will skip empty ranges
923                             // and ranges with to-offsets of zero.
924                             if !self.block_order.is_cold(block) {
925                                 inst_offsets[iix.index()] = buffer.cur_offset();
926                             }
927                         }
928 
929                         if self.insts[iix.index()].is_move().is_some() {
930                             // Skip moves in the pre-regalloc program;
931                             // all of these are incorporated by the
932                             // regalloc into its unified move handling
933                             // and they come out the other end, if
934                             // still needed (not elided), as
935                             // regalloc-inserted moves.
936                             continue;
937                         }
938 
939                         // Update the srcloc at this point in the buffer.
940                         let srcloc = self.srclocs[iix.index()];
941                         if cur_srcloc != Some(srcloc) {
942                             if cur_srcloc.is_some() {
943                                 buffer.end_srcloc();
944                             }
945                             buffer.start_srcloc(srcloc);
946                             cur_srcloc = Some(srcloc);
947                         }
948                         state.pre_sourceloc(cur_srcloc.unwrap_or_default());
949 
950                         // If this is a safepoint, compute a stack map
951                         // and pass it to the emit state.
952                         if self.insts[iix.index()].is_safepoint() {
953                             let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![];
954                             // Find the contiguous range of
955                             // (progpoint, allocation) safepoint slot
956                             // records in `regalloc.safepoint_slots`
957                             // for this instruction index.
958                             let safepoint_slots_start = regalloc
959                                 .safepoint_slots
960                                 .binary_search_by(|(progpoint, _alloc)| {
961                                     if progpoint.inst() >= iix {
962                                         std::cmp::Ordering::Greater
963                                     } else {
964                                         std::cmp::Ordering::Less
965                                     }
966                                 })
967                                 .unwrap_err();
968 
969                             for (_, alloc) in regalloc.safepoint_slots[safepoint_slots_start..]
970                                 .iter()
971                                 .take_while(|(progpoint, _)| progpoint.inst() == iix)
972                             {
973                                 let slot = alloc.as_stack().unwrap();
974                                 safepoint_slots.push(slot);
975                             }
976                             if !safepoint_slots.is_empty() {
977                                 let stack_map = self
978                                     .abi
979                                     .spillslots_to_stack_map(&safepoint_slots[..], &state);
980                                 state.pre_safepoint(stack_map);
981                             }
982                         }
983 
984                         // Get the allocations for this inst from the regalloc result.
985                         let allocs = regalloc.inst_allocs(iix);
986 
987                         // If the instruction we are about to emit is
988                         // a return, place an epilogue at this point
989                         // (and don't emit the return; the actual
990                         // epilogue will contain it).
991                         if self.insts[iix.index()].is_term() == MachTerminator::Ret {
992                             for inst in self.abi.gen_epilogue() {
993                                 do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state);
994                             }
995                         } else {
996                             // Emit the instruction!
997                             do_emit(
998                                 &self.insts[iix.index()],
999                                 allocs,
1000                                 &mut disasm,
1001                                 &mut buffer,
1002                                 &mut state,
1003                             );
1004                         }
1005                     }
1006 
1007                     InstOrEdit::Edit(Edit::Move { from, to }) => {
1008                         // Create a move/spill/reload instruction and
1009                         // immediately emit it.
1010                         match (from.as_reg(), to.as_reg()) {
1011                             (Some(from), Some(to)) => {
1012                                 // Reg-to-reg move.
1013                                 let from_rreg = Reg::from(from);
1014                                 let to_rreg = Writable::from_reg(Reg::from(to));
1015                                 debug_assert_eq!(from.class(), to.class());
1016                                 let ty = I::canonical_type_for_rc(from.class());
1017                                 let mv = I::gen_move(to_rreg, from_rreg, ty);
1018                                 do_emit(&mv, &[], &mut disasm, &mut buffer, &mut state);
1019                             }
1020                             (Some(from), None) => {
1021                                 // Spill from register to spillslot.
1022                                 let to = to.as_stack().unwrap();
1023                                 let from_rreg = RealReg::from(from);
1024                                 let spill = self.abi.gen_spill(to, from_rreg);
1025                                 do_emit(&spill, &[], &mut disasm, &mut buffer, &mut state);
1026                             }
1027                             (None, Some(to)) => {
1028                                 // Load from spillslot to register.
1029                                 let from = from.as_stack().unwrap();
1030                                 let to_rreg = Writable::from_reg(RealReg::from(to));
1031                                 let reload = self.abi.gen_reload(to_rreg, from);
1032                                 do_emit(&reload, &[], &mut disasm, &mut buffer, &mut state);
1033                             }
1034                             (None, None) => {
1035                                 panic!("regalloc2 should have eliminated stack-to-stack moves!");
1036                             }
1037                         }
1038                     }
1039                 }
1040             }
1041 
1042             if cur_srcloc.is_some() {
1043                 buffer.end_srcloc();
1044                 cur_srcloc = None;
1045             }
1046 
1047             // Do we need an island? Get the worst-case size of the
1048             // next BB and see if, having emitted that many bytes, we
1049             // will be beyond the deadline.
1050             if block_order_idx < final_order.len() - 1 {
1051                 let next_block = final_order[block_order_idx + 1];
1052                 let next_block_range = self.block_ranges[next_block.index()];
1053                 let next_block_size =
1054                     (next_block_range.1.index() - next_block_range.0.index()) as u32;
1055                 let next_block_ra_insertions = ra_edits_per_block[next_block.index()];
1056                 let worst_case_next_bb =
1057                     I::worst_case_size() * (next_block_size + next_block_ra_insertions);
1058                 if buffer.island_needed(worst_case_next_bb) {
1059                     buffer.emit_island(worst_case_next_bb, ctrl_plane);
1060                 }
1061             }
1062         }
1063 
1064         // emission state is not needed anymore, move control plane back out
1065         *ctrl_plane = state.take_ctrl_plane();
1066 
1067         // Emit the constants used by the function.
1068         let mut alignment = 1;
1069         for (constant, data) in self.constants.iter() {
1070             alignment = data.alignment().max(alignment);
1071 
1072             let label = buffer.get_label_for_constant(constant);
1073             buffer.defer_constant(label, data.alignment(), data.as_slice(), u32::max_value());
1074         }
1075 
1076         let func_body_len = buffer.cur_offset();
1077 
1078         // Create `bb_edges` and final (filtered) `bb_starts`.
1079         let mut bb_edges = vec![];
1080         let mut bb_offsets = vec![];
1081         if want_metadata {
1082             for block in 0..self.num_blocks() {
1083                 if bb_starts[block].is_none() {
1084                     // Block was deleted by MachBuffer; skip.
1085                     continue;
1086                 }
1087                 let from = bb_starts[block].unwrap();
1088 
1089                 bb_offsets.push(from);
1090                 // Resolve each `succ` label and add edges.
1091                 let succs = self.block_succs(BlockIndex::new(block));
1092                 for &succ in succs.iter() {
1093                     let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1094                     bb_edges.push((from, to));
1095                 }
1096             }
1097         }
1098 
1099         let value_labels_ranges =
1100             self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1101         let frame_size = self.abi.frame_size();
1102 
1103         EmitResult {
1104             buffer,
1105             bb_offsets,
1106             bb_edges,
1107             inst_offsets,
1108             func_body_len,
1109             disasm: if want_disasm { Some(disasm) } else { None },
1110             sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(),
1111             dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(),
1112             value_labels_ranges,
1113             frame_size,
1114             alignment,
1115         }
1116     }
1117 
1118     fn compute_value_labels_ranges(
1119         &self,
1120         regalloc: &regalloc2::Output,
1121         inst_offsets: &[CodeOffset],
1122         func_body_len: u32,
1123     ) -> ValueLabelsRanges {
1124         if self.debug_value_labels.is_empty() {
1125             return ValueLabelsRanges::default();
1126         }
1127 
1128         let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1129         for &(label, from, to, alloc) in &regalloc.debug_locations {
1130             let ranges = value_labels_ranges
1131                 .entry(ValueLabel::from_u32(label))
1132                 .or_insert_with(|| vec![]);
1133             let from_offset = inst_offsets[from.inst().index()];
1134             let to_offset = if to.inst().index() == inst_offsets.len() {
1135                 func_body_len
1136             } else {
1137                 inst_offsets[to.inst().index()]
1138             };
1139 
1140             // Empty range or to-offset of zero can happen because of
1141             // cold blocks (see above).
1142             if to_offset == 0 || from_offset == to_offset {
1143                 continue;
1144             }
1145 
1146             let loc = if let Some(preg) = alloc.as_reg() {
1147                 LabelValueLoc::Reg(Reg::from(preg))
1148             } else {
1149                 // We can't translate spillslot locations at the
1150                 // moment because ValueLabelLoc requires an
1151                 // instantaneous SP offset, and this can *change*
1152                 // within the range we have here because of callsites
1153                 // adjusting SP temporarily. To avoid the complexity
1154                 // of accurately plumbing through nominal-SP
1155                 // adjustment sites, we just omit debug info for
1156                 // values that are spilled. Not ideal, but debug info
1157                 // is best-effort.
1158                 continue;
1159             };
1160 
1161             ranges.push(ValueLocRange {
1162                 loc,
1163                 // ValueLocRanges are recorded by *instruction-end
1164                 // offset*. `from_offset` is the *start* of the
1165                 // instruction; that is the same as the end of another
1166                 // instruction, so we only want to begin coverage once
1167                 // we are past the previous instruction's end.
1168                 start: from_offset + 1,
1169                 // Likewise, `end` is exclusive, but we want to
1170                 // *include* the end of the last
1171                 // instruction. `to_offset` is the start of the
1172                 // `to`-instruction, which is the exclusive end, i.e.,
1173                 // the first instruction not covered. That
1174                 // instruction's start is the same as the end of the
1175                 // last instruction that is included, so we go one
1176                 // byte further to be sure to include it.
1177                 end: to_offset + 1,
1178             });
1179         }
1180 
1181         value_labels_ranges
1182     }
1183 
1184     /// Get the IR block for a BlockIndex, if one exists.
1185     pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1186         self.block_order.lowered_order()[block.index()].orig_block()
1187     }
1188 
1189     #[inline]
1190     fn assert_no_vreg_aliases<'a>(&self, list: &'a [VReg]) -> &'a [VReg] {
1191         for vreg in list {
1192             self.assert_not_vreg_alias(*vreg);
1193         }
1194         list
1195     }
1196 
1197     #[inline]
1198     fn assert_not_vreg_alias(&self, vreg: VReg) -> VReg {
1199         debug_assert!(VCodeBuilder::<I>::resolve_vreg_alias_impl(&self.vreg_aliases, vreg) == vreg);
1200         vreg
1201     }
1202 
1203     #[inline]
1204     fn assert_operand_not_vreg_alias(&self, op: Operand) -> Operand {
1205         // It should be true by construction that `Operand`s do not contain any
1206         // aliased vregs since they're all collected and mapped when the VCode
1207         // is itself constructed.
1208         self.assert_not_vreg_alias(op.vreg());
1209         op
1210     }
1211 }
1212 
1213 impl<I: VCodeInst> RegallocFunction for VCode<I> {
1214     fn num_insts(&self) -> usize {
1215         self.insts.len()
1216     }
1217 
1218     fn num_blocks(&self) -> usize {
1219         self.block_ranges.len()
1220     }
1221 
1222     fn entry_block(&self) -> BlockIndex {
1223         self.entry
1224     }
1225 
1226     fn block_insns(&self, block: BlockIndex) -> InstRange {
1227         let (start, end) = self.block_ranges[block.index()];
1228         InstRange::forward(start, end)
1229     }
1230 
1231     fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1232         let (start, end) = self.block_succ_range[block.index()];
1233         &self.block_succs_preds[start as usize..end as usize]
1234     }
1235 
1236     fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1237         let (start, end) = self.block_pred_range[block.index()];
1238         &self.block_succs_preds[start as usize..end as usize]
1239     }
1240 
1241     fn block_params(&self, block: BlockIndex) -> &[VReg] {
1242         // As a special case we don't return block params for the entry block, as all the arguments
1243         // will be defined by the `Inst::Args` instruction.
1244         if block == self.entry {
1245             return &[];
1246         }
1247 
1248         let (start, end) = self.block_params_range[block.index()];
1249         let ret = &self.block_params[start as usize..end as usize];
1250         // Currently block params are never aliased to another vreg, but
1251         // double-check just to be sure.
1252         self.assert_no_vreg_aliases(ret)
1253     }
1254 
1255     fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1256         let (succ_range_start, succ_range_end) = self.branch_block_arg_succ_range[block.index()];
1257         let succ_ranges =
1258             &self.branch_block_arg_range[succ_range_start as usize..succ_range_end as usize];
1259         let (branch_block_args_start, branch_block_args_end) = succ_ranges[succ_idx];
1260         let ret = &self.branch_block_args
1261             [branch_block_args_start as usize..branch_block_args_end as usize];
1262         self.assert_no_vreg_aliases(ret)
1263     }
1264 
1265     fn is_ret(&self, insn: InsnIndex) -> bool {
1266         match self.insts[insn.index()].is_term() {
1267             // We treat blocks terminated by an unconditional trap like a return for regalloc.
1268             MachTerminator::None => self.insts[insn.index()].is_trap(),
1269             MachTerminator::Ret => true,
1270             _ => false,
1271         }
1272     }
1273 
1274     fn is_branch(&self, insn: InsnIndex) -> bool {
1275         match self.insts[insn.index()].is_term() {
1276             MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true,
1277             _ => false,
1278         }
1279     }
1280 
1281     fn requires_refs_on_stack(&self, insn: InsnIndex) -> bool {
1282         self.insts[insn.index()].is_safepoint()
1283     }
1284 
1285     fn is_move(&self, insn: InsnIndex) -> Option<(Operand, Operand)> {
1286         let (a, b) = self.is_move.get(&insn)?;
1287         Some((
1288             self.assert_operand_not_vreg_alias(*a),
1289             self.assert_operand_not_vreg_alias(*b),
1290         ))
1291     }
1292 
1293     fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1294         let (start, end) = self.operand_ranges[insn.index()];
1295         let ret = &self.operands[start as usize..end as usize];
1296         for op in ret {
1297             self.assert_operand_not_vreg_alias(*op);
1298         }
1299         ret
1300     }
1301 
1302     fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1303         self.clobbers.get(&insn).cloned().unwrap_or_default()
1304     }
1305 
1306     fn num_vregs(&self) -> usize {
1307         std::cmp::max(self.vreg_types.len(), first_user_vreg_index())
1308     }
1309 
1310     fn reftype_vregs(&self) -> &[VReg] {
1311         self.assert_no_vreg_aliases(&self.reftyped_vregs[..])
1312     }
1313 
1314     fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1315         // VRegs here are inserted into `debug_value_labels` after code is
1316         // generated and aliases are fully defined, so no double-check that
1317         // aliases are not lingering.
1318         for (vreg, ..) in self.debug_value_labels.iter() {
1319             self.assert_not_vreg_alias(*vreg);
1320         }
1321         &self.debug_value_labels[..]
1322     }
1323 
1324     fn spillslot_size(&self, regclass: RegClass) -> usize {
1325         self.abi.get_spillslot_size(regclass) as usize
1326     }
1327 
1328     fn allow_multiple_vreg_defs(&self) -> bool {
1329         // At least the s390x backend requires this, because the
1330         // `Loop` pseudo-instruction aggregates all Operands so pinned
1331         // vregs (RealRegs) may occur more than once.
1332         true
1333     }
1334 }
1335 
1336 impl<I: VCodeInst> fmt::Debug for VCode<I> {
1337     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1338         writeln!(f, "VCode {{")?;
1339         writeln!(f, "  Entry block: {}", self.entry.index())?;
1340 
1341         let mut state = Default::default();
1342 
1343         let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1344         alias_keys.sort_unstable();
1345         for key in alias_keys {
1346             let dest = self.vreg_aliases.get(&key).unwrap();
1347             writeln!(f, "  {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1348         }
1349 
1350         for block in 0..self.num_blocks() {
1351             let block = BlockIndex::new(block);
1352             writeln!(f, "Block {}:", block.index())?;
1353             if let Some(bb) = self.bindex_to_bb(block) {
1354                 writeln!(f, "    (original IR block: {})", bb)?;
1355             }
1356             for succ in self.succs(block) {
1357                 writeln!(f, "    (successor: Block {})", succ.index())?;
1358             }
1359             let (start, end) = self.block_ranges[block.index()];
1360             writeln!(
1361                 f,
1362                 "    (instruction range: {} .. {})",
1363                 start.index(),
1364                 end.index()
1365             )?;
1366             for inst in start.index()..end.index() {
1367                 writeln!(
1368                     f,
1369                     "  Inst {}: {}",
1370                     inst,
1371                     self.insts[inst].pretty_print_inst(&[], &mut state)
1372                 )?;
1373             }
1374         }
1375 
1376         writeln!(f, "}}")?;
1377         Ok(())
1378     }
1379 }
1380 
1381 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder.
1382 pub struct VRegAllocator<I> {
1383     /// Next virtual register number to allocate.
1384     next_vreg: usize,
1385 
1386     /// VReg IR-level types.
1387     vreg_types: Vec<Type>,
1388 
1389     /// A set with the same contents as `reftyped_vregs`, in order to
1390     /// avoid inserting more than once.
1391     reftyped_vregs_set: FxHashSet<VReg>,
1392 
1393     /// Reference-typed `regalloc2::VReg`s. The regalloc requires
1394     /// these in a dense slice (as opposed to querying the
1395     /// reftype-status of each vreg) for efficient iteration.
1396     reftyped_vregs: Vec<VReg>,
1397 
1398     /// The type of instruction that this allocator makes registers for.
1399     _inst: core::marker::PhantomData<I>,
1400 }
1401 
1402 impl<I: VCodeInst> VRegAllocator<I> {
1403     /// Make a new VRegAllocator.
1404     pub fn new() -> Self {
1405         Self {
1406             next_vreg: first_user_vreg_index(),
1407             vreg_types: vec![],
1408             reftyped_vregs_set: FxHashSet::default(),
1409             reftyped_vregs: vec![],
1410             _inst: core::marker::PhantomData::default(),
1411         }
1412     }
1413 
1414     /// Allocate a fresh ValueRegs.
1415     pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {
1416         let v = self.next_vreg;
1417         let (regclasses, tys) = I::rc_for_type(ty)?;
1418         self.next_vreg += regclasses.len();
1419         if self.next_vreg >= VReg::MAX {
1420             return Err(CodegenError::CodeTooLarge);
1421         }
1422 
1423         let regs: ValueRegs<Reg> = match regclasses {
1424             &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
1425             &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
1426             // We can extend this if/when we support 32-bit targets; e.g.,
1427             // an i128 on a 32-bit machine will need up to four machine regs
1428             // for a `Value`.
1429             _ => panic!("Value must reside in 1 or 2 registers"),
1430         };
1431         for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
1432             self.set_vreg_type(reg.to_virtual_reg().unwrap(), reg_ty);
1433         }
1434         Ok(regs)
1435     }
1436 
1437     /// Set the type of this virtual register.
1438     pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
1439         if self.vreg_types.len() <= vreg.index() {
1440             self.vreg_types.resize(vreg.index() + 1, ir::types::INVALID);
1441         }
1442         self.vreg_types[vreg.index()] = ty;
1443         if is_reftype(ty) {
1444             let vreg: VReg = vreg.into();
1445             if self.reftyped_vregs_set.insert(vreg) {
1446                 self.reftyped_vregs.push(vreg);
1447             }
1448         }
1449     }
1450 }
1451 
1452 /// This structure tracks the large constants used in VCode that will be emitted separately by the
1453 /// [MachBuffer].
1454 ///
1455 /// First, during the lowering phase, constants are inserted using
1456 /// [VCodeConstants.insert]; an intermediate handle, [VCodeConstant], tracks what constants are
1457 /// used in this phase. Some deduplication is performed, when possible, as constant
1458 /// values are inserted.
1459 ///
1460 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1461 /// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1462 /// then writes the constant values to the buffer.
1463 #[derive(Default)]
1464 pub struct VCodeConstants {
1465     constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1466     pool_uses: HashMap<Constant, VCodeConstant>,
1467     well_known_uses: HashMap<*const [u8], VCodeConstant>,
1468     u64s: HashMap<[u8; 8], VCodeConstant>,
1469 }
1470 impl VCodeConstants {
1471     /// Initialize the structure with the expected number of constants.
1472     pub fn with_capacity(expected_num_constants: usize) -> Self {
1473         Self {
1474             constants: PrimaryMap::with_capacity(expected_num_constants),
1475             pool_uses: HashMap::with_capacity(expected_num_constants),
1476             well_known_uses: HashMap::new(),
1477             u64s: HashMap::new(),
1478         }
1479     }
1480 
1481     /// Insert a constant; using this method indicates that a constant value will be used and thus
1482     /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1483     /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1484     /// [VCodeConstantData::Generated].
1485     pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1486         match data {
1487             VCodeConstantData::Generated(_) => self.constants.push(data),
1488             VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1489                 None => {
1490                     let vcode_constant = self.constants.push(data);
1491                     self.pool_uses.insert(constant, vcode_constant);
1492                     vcode_constant
1493                 }
1494                 Some(&vcode_constant) => vcode_constant,
1495             },
1496             VCodeConstantData::WellKnown(data_ref) => {
1497                 match self.well_known_uses.entry(data_ref as *const [u8]) {
1498                     Entry::Vacant(v) => {
1499                         let vcode_constant = self.constants.push(data);
1500                         v.insert(vcode_constant);
1501                         vcode_constant
1502                     }
1503                     Entry::Occupied(o) => *o.get(),
1504                 }
1505             }
1506             VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1507                 Entry::Vacant(v) => {
1508                     let vcode_constant = self.constants.push(data);
1509                     v.insert(vcode_constant);
1510                     vcode_constant
1511                 }
1512                 Entry::Occupied(o) => *o.get(),
1513             },
1514         }
1515     }
1516 
1517     /// Return the number of constants inserted.
1518     pub fn len(&self) -> usize {
1519         self.constants.len()
1520     }
1521 
1522     /// Iterate over the [VCodeConstant] keys inserted in this structure.
1523     pub fn keys(&self) -> Keys<VCodeConstant> {
1524         self.constants.keys()
1525     }
1526 
1527     /// Iterate over the [VCodeConstant] keys and the data (as a byte slice) inserted in this
1528     /// structure.
1529     pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1530         self.constants.iter()
1531     }
1532 }
1533 
1534 /// A use of a constant by one or more VCode instructions; see [VCodeConstants].
1535 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
1536 pub struct VCodeConstant(u32);
1537 entity_impl!(VCodeConstant);
1538 
1539 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
1540 /// these separately instead of as raw byte buffers allows us to avoid some duplication.
1541 pub enum VCodeConstantData {
1542     /// A constant already present in the Cranelift IR
1543     /// [ConstantPool](crate::ir::constant::ConstantPool).
1544     Pool(Constant, ConstantData),
1545     /// A reference to a well-known constant value that is statically encoded within the compiler.
1546     WellKnown(&'static [u8]),
1547     /// A constant value generated during lowering; the value may depend on the instruction context
1548     /// which makes it difficult to de-duplicate--if possible, use other variants.
1549     Generated(ConstantData),
1550     /// A constant of at most 64 bits. These are deduplicated as
1551     /// well. Stored as a fixed-size array of `u8` so that we do not
1552     /// encounter endianness problems when cross-compiling.
1553     U64([u8; 8]),
1554 }
1555 impl VCodeConstantData {
1556     /// Retrieve the constant data as a byte slice.
1557     pub fn as_slice(&self) -> &[u8] {
1558         match self {
1559             VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
1560             VCodeConstantData::WellKnown(d) => d,
1561             VCodeConstantData::U64(value) => &value[..],
1562         }
1563     }
1564 
1565     /// Calculate the alignment of the constant data.
1566     pub fn alignment(&self) -> u32 {
1567         if self.as_slice().len() <= 8 {
1568             8
1569         } else {
1570             16
1571         }
1572     }
1573 }
1574 
1575 #[cfg(test)]
1576 mod test {
1577     use super::*;
1578     use std::mem::size_of;
1579 
1580     #[test]
1581     fn size_of_constant_structs() {
1582         assert_eq!(size_of::<Constant>(), 4);
1583         assert_eq!(size_of::<VCodeConstant>(), 4);
1584         assert_eq!(size_of::<ConstantData>(), 24);
1585         assert_eq!(size_of::<VCodeConstantData>(), 32);
1586         assert_eq!(
1587             size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
1588             24
1589         );
1590         // TODO The VCodeConstants structure's memory size could be further optimized.
1591         // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
1592         // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
1593     }
1594 }
1595