1 //! This implements the VCode container: a CFG of Insts that have been lowered.
2 //!
3 //! VCode is virtual-register code. An instruction in VCode is almost a machine
4 //! instruction; however, its register slots can refer to virtual registers in
5 //! addition to real machine registers.
6 //!
7 //! VCode is structured with traditional basic blocks, and
8 //! each block must be terminated by an unconditional branch (one target), a
9 //! conditional branch (two targets), or a return (no targets). Note that this
10 //! slightly differs from the machine code of most ISAs: in most ISAs, a
11 //! conditional branch has one target (and the not-taken case falls through).
12 //! However, we expect that machine backends will elide branches to the following
13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15 //! play with layout prior to final binary emission, as well, if we want.
16 //!
17 //! See the main module comment in `mod.rs` for more details on the VCode-based
18 //! backend pipeline.
19 
20 use crate::fx::FxHashMap;
21 use crate::fx::FxHashSet;
22 use crate::ir::{self, types, Constant, ConstantData, LabelValueLoc, SourceLoc, ValueLabel};
23 use crate::machinst::*;
24 use crate::timing;
25 use crate::ValueLocRange;
26 use regalloc2::{
27     Edit, Function as RegallocFunction, InstOrEdit, InstRange, Operand, OperandKind, PReg, PRegSet,
28     RegClass, VReg,
29 };
30 
31 use alloc::boxed::Box;
32 use alloc::vec::Vec;
33 use cranelift_entity::{entity_impl, Keys, PrimaryMap};
34 use std::collections::hash_map::Entry;
35 use std::collections::HashMap;
36 use std::fmt;
37 
38 /// Index referring to an instruction in VCode.
39 pub type InsnIndex = regalloc2::Inst;
40 
41 /// Index referring to a basic block in VCode.
42 pub type BlockIndex = regalloc2::Block;
43 
44 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
45 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
46 pub trait VCodeInst: MachInst + MachInstEmit {}
47 impl<I: MachInst + MachInstEmit> VCodeInst for I {}
48 
49 /// A function in "VCode" (virtualized-register code) form, after
50 /// lowering.  This is essentially a standard CFG of basic blocks,
51 /// where each basic block consists of lowered instructions produced
52 /// by the machine-specific backend.
53 ///
54 /// Note that the VCode is immutable once produced, and is not
55 /// modified by register allocation in particular. Rather, register
56 /// allocation on the `VCode` produces a separate `regalloc2::Output`
57 /// struct, and this can be passed to `emit`. `emit` in turn does not
58 /// modify the vcode, but produces an `EmitResult`, which contains the
59 /// machine code itself, and the associated disassembly and/or
60 /// metadata as requested.
61 pub struct VCode<I: VCodeInst> {
62     /// VReg IR-level types.
63     vreg_types: Vec<Type>,
64 
65     /// Do we have any ref values among our vregs?
66     have_ref_values: bool,
67 
68     /// Lowered machine instructions in order corresponding to the original IR.
69     insts: Vec<I>,
70 
71     /// Operands: pre-regalloc references to virtual registers with
72     /// constraints, in one flattened array. This allows the regalloc
73     /// to efficiently access all operands without requiring expensive
74     /// matches or method invocations on insts.
75     operands: Vec<Operand>,
76 
77     /// Operand index ranges: for each instruction in `insts`, there
78     /// is a tuple here providing the range in `operands` for that
79     /// instruction's operands.
80     operand_ranges: Vec<(u32, u32)>,
81 
82     /// Clobbers: a sparse map from instruction indices to clobber masks.
83     clobbers: FxHashMap<InsnIndex, PRegSet>,
84 
85     /// Move information: for a given InsnIndex, (src, dst) operand pair.
86     is_move: FxHashMap<InsnIndex, (Operand, Operand)>,
87 
88     /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
89     /// reasonable to keep one of these per instruction.)
90     srclocs: Vec<SourceLoc>,
91 
92     /// Entry block.
93     entry: BlockIndex,
94 
95     /// Block instruction indices.
96     block_ranges: Vec<(InsnIndex, InsnIndex)>,
97 
98     /// Block successors: index range in the `block_succs_preds` list.
99     block_succ_range: Vec<(u32, u32)>,
100 
101     /// Block predecessors: index range in the `block_succs_preds` list.
102     block_pred_range: Vec<(u32, u32)>,
103 
104     /// Block successor and predecessor lists, concatenated into one
105     /// Vec. The `block_succ_range` and `block_pred_range` lists of
106     /// tuples above give (start, end) ranges within this list that
107     /// correspond to each basic block's successors or predecessors,
108     /// respectively.
109     block_succs_preds: Vec<regalloc2::Block>,
110 
111     /// Block parameters: index range in `block_params` below.
112     block_params_range: Vec<(u32, u32)>,
113 
114     /// Block parameter lists, concatenated into one vec. The
115     /// `block_params_range` list of tuples above gives (start, end)
116     /// ranges within this list that correspond to each basic block's
117     /// blockparam vregs.
118     block_params: Vec<regalloc2::VReg>,
119 
120     /// Outgoing block arguments on branch instructions, concatenated
121     /// into one list.
122     ///
123     /// Note that this is conceptually a 3D array: we have a VReg list
124     /// per block, per successor. We flatten those three dimensions
125     /// into this 1D vec, then store index ranges in two levels of
126     /// indirection.
127     ///
128     /// Indexed by the indices in `branch_block_arg_succ_range`.
129     branch_block_args: Vec<regalloc2::VReg>,
130 
131     /// Array of sequences of (start, end) tuples in
132     /// `branch_block_args`, one for each successor; these sequences
133     /// for each block are concatenated.
134     ///
135     /// Indexed by the indices in `branch_block_arg_succ_range`.
136     branch_block_arg_range: Vec<(u32, u32)>,
137 
138     /// For a given block, indices in `branch_block_arg_range`
139     /// corresponding to all of its successors.
140     branch_block_arg_succ_range: Vec<(u32, u32)>,
141 
142     /// VReg aliases. Each key in this table is translated to its
143     /// value when gathering Operands from instructions. Aliases are
144     /// not chased transitively (we do not further look up the
145     /// translated reg to see if it is another alias).
146     ///
147     /// We use these aliases to rename an instruction's expected
148     /// result vregs to the returned vregs from lowering, which are
149     /// usually freshly-allocated temps.
150     ///
151     /// Operands and branch arguments will already have been
152     /// translated through this alias table; but it helps to make
153     /// sense of instructions when pretty-printed, for example.
154     vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
155 
156     /// Block-order information.
157     block_order: BlockLoweringOrder,
158 
159     /// ABI object.
160     abi: Box<dyn ABICallee<I = I>>,
161 
162     /// Constant information used during code emission. This should be
163     /// immutable across function compilations within the same module.
164     emit_info: I::Info,
165 
166     /// Reference-typed `regalloc2::VReg`s. The regalloc requires
167     /// these in a dense slice (as opposed to querying the
168     /// reftype-status of each vreg) for efficient iteration.
169     reftyped_vregs: Vec<VReg>,
170 
171     /// A set with the same contents as `reftyped_vregs`, in order to
172     /// avoid inserting more than once.
173     reftyped_vregs_set: FxHashSet<VReg>,
174 
175     /// Constants.
176     constants: VCodeConstants,
177 
178     /// Value labels for debuginfo attached to vregs.
179     debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
180 }
181 
182 /// The result of `VCode::emit`. Contains all information computed
183 /// during emission: actual machine code, optionally a disassembly,
184 /// and optionally metadata about the code layout.
185 pub struct EmitResult<I: VCodeInst> {
186     /// The MachBuffer containing the machine code.
187     pub buffer: MachBuffer<I>,
188 
189     /// Offset of each basic block, recorded during emission. Computed
190     /// only if `debug_value_labels` is non-empty.
191     pub bb_offsets: Vec<CodeOffset>,
192 
193     /// Final basic-block edges, in terms of code offsets of
194     /// bb-starts. Computed only if `debug_value_labels` is non-empty.
195     pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
196 
197     /// Final instruction offsets, recorded during emission. Computed
198     /// only if `debug_value_labels` is non-empty.
199     pub inst_offsets: Vec<CodeOffset>,
200 
201     /// Final length of function body.
202     pub func_body_len: CodeOffset,
203 
204     /// The pretty-printed disassembly, if any. This uses the same
205     /// pretty-printing for MachInsts as the pre-regalloc VCode Debug
206     /// implementation, but additionally includes the prologue and
207     /// epilogue(s), and makes use of the regalloc results.
208     pub disasm: Option<String>,
209 
210     /// Offsets of stackslots.
211     pub stackslot_offsets: PrimaryMap<StackSlot, u32>,
212 
213     /// Value-labels information (debug metadata).
214     pub value_labels_ranges: ValueLabelsRanges,
215 
216     /// Stack frame size.
217     pub frame_size: u32,
218 }
219 
220 /// A builder for a VCode function body.
221 ///
222 /// This builder has the ability to accept instructions in either
223 /// forward or reverse order, depending on the pass direction that
224 /// produces the VCode. The lowering from CLIF to VCode<MachInst>
225 /// ordinarily occurs in reverse order (in order to allow instructions
226 /// to be lowered only if used, and not merged) so a reversal will
227 /// occur at the end of lowering to ensure the VCode is in machine
228 /// order.
229 ///
230 /// If built in reverse, block and instruction indices used once the
231 /// VCode is built are relative to the final (reversed) order, not the
232 /// order of construction. Note that this means we do not know the
233 /// final block or instruction indices when building, so we do not
234 /// hand them out. (The user is assumed to know them when appending
235 /// terminator instructions with successor blocks.)
236 pub struct VCodeBuilder<I: VCodeInst> {
237     /// In-progress VCode.
238     vcode: VCode<I>,
239 
240     /// In what direction is the build occuring?
241     direction: VCodeBuildDirection,
242 
243     /// Index of the last block-start in the vcode.
244     block_start: usize,
245 
246     /// Start of succs for the current block in the concatenated succs list.
247     succ_start: usize,
248 
249     /// Start of blockparams for the current block in the concatenated
250     /// blockparams list.
251     block_params_start: usize,
252 
253     /// Start of successor blockparam arg list entries in
254     /// the concatenated branch_block_arg_range list.
255     branch_block_arg_succ_start: usize,
256 
257     /// Current source location.
258     cur_srcloc: SourceLoc,
259 
260     /// Debug-value label in-progress map, keyed by label. For each
261     /// label, we keep disjoint ranges mapping to vregs. We'll flatten
262     /// this into (vreg, range, label) tuples when done.
263     debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
264 }
265 
266 /// Direction in which a VCodeBuilder builds VCode.
267 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
268 pub enum VCodeBuildDirection {
269     // TODO: add `Forward` once we need it and can test it adequately.
270     /// Backward-build pass: we expect the producer to call `emit()`
271     /// with instructions in reverse program order within each block.
272     Backward,
273 }
274 
275 impl<I: VCodeInst> VCodeBuilder<I> {
276     /// Create a new VCodeBuilder.
277     pub fn new(
278         abi: Box<dyn ABICallee<I = I>>,
279         emit_info: I::Info,
280         block_order: BlockLoweringOrder,
281         constants: VCodeConstants,
282         direction: VCodeBuildDirection,
283     ) -> VCodeBuilder<I> {
284         let vcode = VCode::new(abi, emit_info, block_order, constants);
285 
286         VCodeBuilder {
287             vcode,
288             direction,
289             block_start: 0,
290             succ_start: 0,
291             block_params_start: 0,
292             branch_block_arg_succ_start: 0,
293             cur_srcloc: SourceLoc::default(),
294             debug_info: FxHashMap::default(),
295         }
296     }
297 
298     /// Access the ABI object.
299     pub fn abi(&mut self) -> &mut dyn ABICallee<I = I> {
300         &mut *self.vcode.abi
301     }
302 
303     /// Access to the BlockLoweringOrder object.
304     pub fn block_order(&self) -> &BlockLoweringOrder {
305         &self.vcode.block_order
306     }
307 
308     /// Set the type of a VReg.
309     pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
310         if self.vcode.vreg_types.len() <= vreg.index() {
311             self.vcode
312                 .vreg_types
313                 .resize(vreg.index() + 1, ir::types::I8);
314         }
315         self.vcode.vreg_types[vreg.index()] = ty;
316         if is_reftype(ty) {
317             let vreg: VReg = vreg.into();
318             if self.vcode.reftyped_vregs_set.insert(vreg) {
319                 self.vcode.reftyped_vregs.push(vreg);
320             }
321             self.vcode.have_ref_values = true;
322         }
323     }
324 
325     /// Get the type of a VReg.
326     pub fn get_vreg_type(&self, vreg: VirtualReg) -> Type {
327         self.vcode.vreg_types[vreg.index()]
328     }
329 
330     /// Set the current block as the entry block.
331     pub fn set_entry(&mut self, block: BlockIndex) {
332         self.vcode.entry = block;
333     }
334 
335     /// End the current basic block. Must be called after emitting vcode insts
336     /// for IR insts and prior to ending the function (building the VCode).
337     pub fn end_bb(&mut self) {
338         let start_idx = self.block_start;
339         let end_idx = self.vcode.insts.len();
340         self.block_start = end_idx;
341         // Add the instruction index range to the list of blocks.
342         self.vcode
343             .block_ranges
344             .push((InsnIndex::new(start_idx), InsnIndex::new(end_idx)));
345         // End the successors list.
346         let succ_end = self.vcode.block_succs_preds.len();
347         self.vcode
348             .block_succ_range
349             .push((self.succ_start as u32, succ_end as u32));
350         self.succ_start = succ_end;
351         // End the blockparams list.
352         let block_params_end = self.vcode.block_params.len();
353         self.vcode
354             .block_params_range
355             .push((self.block_params_start as u32, block_params_end as u32));
356         self.block_params_start = block_params_end;
357         // End the branch blockparam args list.
358         let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
359         self.vcode.branch_block_arg_succ_range.push((
360             self.branch_block_arg_succ_start as u32,
361             branch_block_arg_succ_end as u32,
362         ));
363         self.branch_block_arg_succ_start = branch_block_arg_succ_end;
364     }
365 
366     pub fn add_block_param(&mut self, param: VirtualReg, ty: Type) {
367         self.set_vreg_type(param, ty);
368         self.vcode.block_params.push(param.into());
369     }
370 
371     fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
372         let start = self.vcode.branch_block_args.len();
373         self.vcode
374             .branch_block_args
375             .extend(args.iter().map(|&arg| VReg::from(arg)));
376         let end = self.vcode.branch_block_args.len();
377         self.vcode
378             .branch_block_arg_range
379             .push((start as u32, end as u32));
380     }
381 
382     /// Push an instruction for the current BB and current IR inst
383     /// within the BB.
384     pub fn push(&mut self, insn: I) {
385         self.vcode.insts.push(insn);
386         self.vcode.srclocs.push(self.cur_srcloc);
387     }
388 
389     /// Add a successor block with branch args.
390     pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
391         self.vcode.block_succs_preds.push(block);
392         self.add_branch_args_for_succ(args);
393     }
394 
395     /// Set the current source location.
396     pub fn set_srcloc(&mut self, srcloc: SourceLoc) {
397         self.cur_srcloc = srcloc;
398     }
399 
400     /// Add a debug value label to a register.
401     pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
402         // We'll fix up labels in reverse(). Because we're generating
403         // code bottom-to-top, the liverange of the label goes *from*
404         // the last index at which was defined (or 0, which is the end
405         // of the eventual function) *to* just this instruction, and
406         // no further.
407         let inst = InsnIndex::new(self.vcode.insts.len());
408         let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
409         let last = labels
410             .last()
411             .map(|(_start, end, _vreg)| *end)
412             .unwrap_or(InsnIndex::new(0));
413         labels.push((last, inst, reg.into()));
414     }
415 
416     pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
417         let from = from.into();
418         let resolved_to = self.resolve_vreg_alias(to.into());
419         // Disallow cycles (see below).
420         assert_ne!(resolved_to, from);
421         self.vcode.vreg_aliases.insert(from, resolved_to);
422     }
423 
424     pub fn resolve_vreg_alias(&self, from: regalloc2::VReg) -> regalloc2::VReg {
425         Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, from)
426     }
427 
428     fn resolve_vreg_alias_impl(
429         aliases: &FxHashMap<regalloc2::VReg, regalloc2::VReg>,
430         from: regalloc2::VReg,
431     ) -> regalloc2::VReg {
432         // We prevent cycles from existing by resolving targets of
433         // aliases eagerly before setting them. If the target resolves
434         // to the origin of the alias, then a cycle would be created
435         // and the alias is disallowed. Because of the structure of
436         // SSA code (one instruction can refer to another's defs but
437         // not vice-versa, except indirectly through
438         // phis/blockparams), cycles should not occur as we use
439         // aliases to redirect vregs to the temps that actually define
440         // them.
441 
442         let mut vreg = from;
443         while let Some(to) = aliases.get(&vreg) {
444             vreg = *to;
445         }
446         vreg
447     }
448 
449     /// Access the constants.
450     pub fn constants(&mut self) -> &mut VCodeConstants {
451         &mut self.vcode.constants
452     }
453 
454     fn compute_preds_from_succs(&mut self) {
455         // Compute predecessors from successors. In order to gather
456         // all preds for a block into a contiguous sequence, we build
457         // a list of (succ, pred) tuples and then sort.
458         let mut succ_pred_edges: Vec<(BlockIndex, BlockIndex)> =
459             Vec::with_capacity(self.vcode.block_succs_preds.len());
460         for (pred, &(start, end)) in self.vcode.block_succ_range.iter().enumerate() {
461             let pred = BlockIndex::new(pred);
462             for i in start..end {
463                 let succ = BlockIndex::new(self.vcode.block_succs_preds[i as usize].index());
464                 succ_pred_edges.push((succ, pred));
465             }
466         }
467         succ_pred_edges.sort_unstable();
468 
469         let mut i = 0;
470         for succ in 0..self.vcode.num_blocks() {
471             let succ = BlockIndex::new(succ);
472             let start = self.vcode.block_succs_preds.len();
473             while i < succ_pred_edges.len() && succ_pred_edges[i].0 == succ {
474                 let pred = succ_pred_edges[i].1;
475                 self.vcode.block_succs_preds.push(pred);
476                 i += 1;
477             }
478             let end = self.vcode.block_succs_preds.len();
479             self.vcode.block_pred_range.push((start as u32, end as u32));
480         }
481     }
482 
483     /// Called once, when a build in Backward order is complete, to
484     /// perform the overall reversal (into final forward order) and
485     /// finalize metadata accordingly.
486     fn reverse_and_finalize(&mut self) {
487         let n_insts = self.vcode.insts.len();
488         if n_insts == 0 {
489             return;
490         }
491 
492         // Reverse the per-block and per-inst sequences.
493         self.vcode.block_ranges.reverse();
494         // block_params_range is indexed by block (and blocks were
495         // traversed in reverse) so we reverse it; but block-param
496         // sequences in the concatenated vec can remain in reverse
497         // order (it is effectively an arena of arbitrarily-placed
498         // referenced sequences).
499         self.vcode.block_params_range.reverse();
500         // Likewise, we reverse block_succ_range, but the block_succ
501         // concatenated array can remain as-is.
502         self.vcode.block_succ_range.reverse();
503         self.vcode.insts.reverse();
504         self.vcode.srclocs.reverse();
505         // Likewise, branch_block_arg_succ_range is indexed by block
506         // so must be reversed.
507         self.vcode.branch_block_arg_succ_range.reverse();
508 
509         // To translate an instruction index *endpoint* in reversed
510         // order to forward order, compute `n_insts - i`.
511         //
512         // Why not `n_insts - 1 - i`? That would be correct to
513         // translate an individual instruction index (for ten insts 0
514         // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
515         // 0). But for the usual inclusive-start, exclusive-end range
516         // idiom, inclusive starts become exclusive ends and
517         // vice-versa, so e.g. an (inclusive) start of 0 becomes an
518         // (exclusive) end of 10.
519         let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
520 
521         // Edit the block-range instruction indices.
522         for tuple in &mut self.vcode.block_ranges {
523             let (start, end) = *tuple;
524             *tuple = (translate(end), translate(start)); // Note reversed order.
525         }
526 
527         // Generate debug-value labels based on per-label maps.
528         for (label, tuples) in &self.debug_info {
529             for &(start, end, vreg) in tuples {
530                 let vreg = self.resolve_vreg_alias(vreg);
531                 let fwd_start = translate(end);
532                 let fwd_end = translate(start);
533                 self.vcode
534                     .debug_value_labels
535                     .push((vreg, fwd_start, fwd_end, label.as_u32()));
536             }
537         }
538 
539         // Now sort debug value labels by VReg, as required
540         // by regalloc2.
541         self.vcode
542             .debug_value_labels
543             .sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
544     }
545 
546     fn collect_operands(&mut self) {
547         for (i, insn) in self.vcode.insts.iter().enumerate() {
548             // Push operands from the instruction onto the operand list.
549             //
550             // We rename through the vreg alias table as we collect
551             // the operands. This is better than a separate post-pass
552             // over operands, because it has more cache locality:
553             // operands only need to pass through L1 once. This is
554             // also better than renaming instructions'
555             // operands/registers while lowering, because here we only
556             // need to do the `match` over the instruction to visit
557             // its register fields (which is slow, branchy code) once.
558 
559             let vreg_aliases = &self.vcode.vreg_aliases;
560             let mut op_collector = OperandCollector::new(&mut self.vcode.operands, |vreg| {
561                 Self::resolve_vreg_alias_impl(vreg_aliases, vreg)
562             });
563             insn.get_operands(&mut op_collector);
564             let (ops, clobbers) = op_collector.finish();
565             self.vcode.operand_ranges.push(ops);
566 
567             if clobbers != PRegSet::default() {
568                 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
569             }
570 
571             if let Some((dst, src)) = insn.is_move() {
572                 let src = Operand::reg_use(Self::resolve_vreg_alias_impl(vreg_aliases, src.into()));
573                 let dst = Operand::reg_def(Self::resolve_vreg_alias_impl(
574                     vreg_aliases,
575                     dst.to_reg().into(),
576                 ));
577                 // Note that regalloc2 requires these in (src, dst) order.
578                 self.vcode.is_move.insert(InsnIndex::new(i), (src, dst));
579             }
580         }
581 
582         // Translate blockparam args via the vreg aliases table as well.
583         for arg in &mut self.vcode.branch_block_args {
584             let new_arg = Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *arg);
585             log::trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
586             *arg = new_arg;
587         }
588     }
589 
590     /// Build the final VCode.
591     pub fn build(mut self) -> VCode<I> {
592         if self.direction == VCodeBuildDirection::Backward {
593             self.reverse_and_finalize();
594         }
595         self.collect_operands();
596         self.compute_preds_from_succs();
597         self.vcode.debug_value_labels.sort_unstable();
598         self.vcode
599     }
600 }
601 
602 /// Is this type a reference type?
603 fn is_reftype(ty: Type) -> bool {
604     ty == types::R64 || ty == types::R32
605 }
606 
607 impl<I: VCodeInst> VCode<I> {
608     /// New empty VCode.
609     fn new(
610         abi: Box<dyn ABICallee<I = I>>,
611         emit_info: I::Info,
612         block_order: BlockLoweringOrder,
613         constants: VCodeConstants,
614     ) -> VCode<I> {
615         let n_blocks = block_order.lowered_order().len();
616         VCode {
617             vreg_types: vec![],
618             have_ref_values: false,
619             insts: Vec::with_capacity(10 * n_blocks),
620             operands: Vec::with_capacity(30 * n_blocks),
621             operand_ranges: Vec::with_capacity(10 * n_blocks),
622             clobbers: FxHashMap::default(),
623             is_move: FxHashMap::default(),
624             srclocs: Vec::with_capacity(10 * n_blocks),
625             entry: BlockIndex::new(0),
626             block_ranges: Vec::with_capacity(n_blocks),
627             block_succ_range: Vec::with_capacity(n_blocks),
628             block_succs_preds: Vec::with_capacity(2 * n_blocks),
629             block_pred_range: Vec::with_capacity(n_blocks),
630             block_params_range: Vec::with_capacity(n_blocks),
631             block_params: Vec::with_capacity(5 * n_blocks),
632             branch_block_args: Vec::with_capacity(10 * n_blocks),
633             branch_block_arg_range: Vec::with_capacity(2 * n_blocks),
634             branch_block_arg_succ_range: Vec::with_capacity(n_blocks),
635             block_order,
636             abi,
637             emit_info,
638             reftyped_vregs: vec![],
639             reftyped_vregs_set: FxHashSet::default(),
640             constants,
641             debug_value_labels: vec![],
642             vreg_aliases: FxHashMap::with_capacity_and_hasher(10 * n_blocks, Default::default()),
643         }
644     }
645 
646     /// Get the number of blocks. Block indices will be in the range `0 ..
647     /// (self.num_blocks() - 1)`.
648     pub fn num_blocks(&self) -> usize {
649         self.block_ranges.len()
650     }
651 
652     /// Get the successors for a block.
653     pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] {
654         let (start, end) = self.block_succ_range[block.index()];
655         &self.block_succs_preds[start as usize..end as usize]
656     }
657 
658     fn compute_clobbers(&self, regalloc: &regalloc2::Output) -> Vec<Writable<RealReg>> {
659         // Compute clobbered registers.
660         let mut clobbered = vec![];
661         let mut clobbered_set = FxHashSet::default();
662 
663         // All moves are included in clobbers.
664         for edit in &regalloc.edits {
665             let Edit::Move { to, .. } = edit.1;
666             if let Some(preg) = to.as_reg() {
667                 let reg = RealReg::from(preg);
668                 if clobbered_set.insert(reg) {
669                     clobbered.push(Writable::from_reg(reg));
670                 }
671             }
672         }
673 
674         for (i, (start, end)) in self.operand_ranges.iter().enumerate() {
675             // Skip this instruction if not "included in clobbers" as
676             // per the MachInst. (Some backends use this to implement
677             // ABI specifics; e.g., excluding calls of the same ABI as
678             // the current function from clobbers, because by
679             // definition everything clobbered by the call can be
680             // clobbered by this function without saving as well.)
681             if !self.insts[i].is_included_in_clobbers() {
682                 continue;
683             }
684 
685             let start = *start as usize;
686             let end = *end as usize;
687             let operands = &self.operands[start..end];
688             let allocs = &regalloc.allocs[start..end];
689             for (operand, alloc) in operands.iter().zip(allocs.iter()) {
690                 // We're interested only in writes (Mods or Defs).
691                 if operand.kind() == OperandKind::Use {
692                     continue;
693                 }
694                 if let Some(preg) = alloc.as_reg() {
695                     let reg = RealReg::from(preg);
696                     if clobbered_set.insert(reg) {
697                         clobbered.push(Writable::from_reg(reg));
698                     }
699                 }
700             }
701 
702             // Also add explicitly-clobbered registers.
703             for preg in self
704                 .clobbers
705                 .get(&InsnIndex::new(i))
706                 .cloned()
707                 .unwrap_or_default()
708             {
709                 let reg = RealReg::from(preg);
710                 if clobbered_set.insert(reg) {
711                     clobbered.push(Writable::from_reg(reg));
712                 }
713             }
714         }
715 
716         clobbered
717     }
718 
719     /// Emit the instructions to a `MachBuffer`, containing fixed-up
720     /// code and external reloc/trap/etc. records ready for use. Takes
721     /// the regalloc results as well.
722     ///
723     /// Returns the machine code itself, and optionally metadata
724     /// and/or a disassembly, as an `EmitResult`. The `VCode` itself
725     /// is consumed by the emission process.
726     pub fn emit(
727         mut self,
728         regalloc: &regalloc2::Output,
729         want_disasm: bool,
730         want_metadata: bool,
731     ) -> EmitResult<I>
732     where
733         I: MachInstEmit,
734     {
735         // To write into disasm string.
736         use core::fmt::Write;
737 
738         let _tt = timing::vcode_emit();
739         let mut buffer = MachBuffer::new();
740         let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
741 
742         // The first M MachLabels are reserved for block indices, the next N MachLabels for
743         // constants.
744         buffer.reserve_labels_for_blocks(self.num_blocks());
745         buffer.reserve_labels_for_constants(&self.constants);
746 
747         // Construct the final order we emit code in: cold blocks at the end.
748         let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
749         let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
750         for block in 0..self.num_blocks() {
751             let block = BlockIndex::new(block);
752             if self.block_order.is_cold(block) {
753                 cold_blocks.push(block);
754             } else {
755                 final_order.push(block);
756             }
757         }
758         final_order.extend(cold_blocks.clone());
759 
760         // Compute/save info we need for the prologue: clobbers and
761         // number of spillslots.
762         //
763         // We clone `abi` here because we will mutate it as we
764         // generate the prologue and set other info, but we can't
765         // mutate `VCode`. The info it usually carries prior to
766         // setting clobbers is fairly minimal so this should be
767         // relatively cheap.
768         let clobbers = self.compute_clobbers(regalloc);
769         self.abi.set_num_spillslots(regalloc.num_spillslots);
770         self.abi.set_clobbered(clobbers);
771 
772         // We need to generate the prologue in order to get the ABI
773         // object into the right state first. We'll emit it when we
774         // hit the right block below.
775         let prologue_insts = self.abi.gen_prologue();
776 
777         // Emit blocks.
778         let mut cur_srcloc = None;
779         let mut last_offset = None;
780         let mut inst_offsets = vec![];
781         let mut state = I::State::new(&*self.abi);
782 
783         let mut disasm = String::new();
784 
785         if !self.debug_value_labels.is_empty() {
786             inst_offsets.resize(self.insts.len(), 0);
787         }
788 
789         for block in final_order {
790             log::trace!("emitting block {:?}", block);
791             let new_offset = I::align_basic_block(buffer.cur_offset());
792             while new_offset > buffer.cur_offset() {
793                 // Pad with NOPs up to the aligned block offset.
794                 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
795                 nop.emit(&[], &mut buffer, &self.emit_info, &mut Default::default());
796             }
797             assert_eq!(buffer.cur_offset(), new_offset);
798 
799             let do_emit = |inst: &I,
800                            allocs: &[Allocation],
801                            disasm: &mut String,
802                            buffer: &mut MachBuffer<I>,
803                            state: &mut I::State| {
804                 if want_disasm {
805                     let mut s = state.clone();
806                     writeln!(disasm, "  {}", inst.pretty_print_inst(allocs, &mut s)).unwrap();
807                 }
808                 inst.emit(allocs, buffer, &self.emit_info, state);
809             };
810 
811             // Is this the first block? Emit the prologue directly if so.
812             if block == self.entry {
813                 log::trace!(" -> entry block");
814                 buffer.start_srcloc(SourceLoc::default());
815                 state.pre_sourceloc(SourceLoc::default());
816                 for inst in &prologue_insts {
817                     do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state);
818                 }
819                 buffer.end_srcloc();
820             }
821 
822             // Now emit the regular block body.
823 
824             buffer.bind_label(MachLabel::from_block(block));
825 
826             if want_disasm {
827                 writeln!(&mut disasm, "block{}:", block.index()).unwrap();
828             }
829 
830             if want_metadata {
831                 // Track BB starts. If we have backed up due to MachBuffer
832                 // branch opts, note that the removed blocks were removed.
833                 let cur_offset = buffer.cur_offset();
834                 if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
835                     for i in (0..bb_starts.len()).rev() {
836                         if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
837                             break;
838                         }
839                         bb_starts[i] = None;
840                     }
841                 }
842                 bb_starts.push(Some(cur_offset));
843                 last_offset = Some(cur_offset);
844             }
845 
846             for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
847                 match inst_or_edit {
848                     InstOrEdit::Inst(iix) => {
849                         if !self.debug_value_labels.is_empty() {
850                             // If we need to produce debug info,
851                             // record the offset of each instruction
852                             // so that we can translate value-label
853                             // ranges to machine-code offsets.
854 
855                             // Cold blocks violate monotonicity
856                             // assumptions elsewhere (that
857                             // instructions in inst-index order are in
858                             // order in machine code), so we omit
859                             // their offsets here. Value-label range
860                             // generation below will skip empty ranges
861                             // and ranges with to-offsets of zero.
862                             if !self.block_order.is_cold(block) {
863                                 inst_offsets[iix.index()] = buffer.cur_offset();
864                             }
865                         }
866 
867                         if self.insts[iix.index()].is_move().is_some() {
868                             // Skip moves in the pre-regalloc program;
869                             // all of these are incorporated by the
870                             // regalloc into its unified move handling
871                             // and they come out the other end, if
872                             // still needed (not elided), as
873                             // regalloc-inserted moves.
874                             continue;
875                         }
876 
877                         // Update the srcloc at this point in the buffer.
878                         let srcloc = self.srclocs[iix.index()];
879                         if cur_srcloc != Some(srcloc) {
880                             if cur_srcloc.is_some() {
881                                 buffer.end_srcloc();
882                             }
883                             buffer.start_srcloc(srcloc);
884                             cur_srcloc = Some(srcloc);
885                         }
886                         state.pre_sourceloc(cur_srcloc.unwrap_or(SourceLoc::default()));
887 
888                         // If this is a safepoint, compute a stack map
889                         // and pass it to the emit state.
890                         if self.insts[iix.index()].is_safepoint() {
891                             let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![];
892                             // Find the contiguous range of
893                             // (progpoint, allocation) safepoint slot
894                             // records in `regalloc.safepoint_slots`
895                             // for this instruction index.
896                             let safepoint_slots_start = regalloc
897                                 .safepoint_slots
898                                 .binary_search_by(|(progpoint, _alloc)| {
899                                     if progpoint.inst() >= iix {
900                                         std::cmp::Ordering::Greater
901                                     } else {
902                                         std::cmp::Ordering::Less
903                                     }
904                                 })
905                                 .unwrap_err();
906 
907                             for (_, alloc) in regalloc.safepoint_slots[safepoint_slots_start..]
908                                 .iter()
909                                 .take_while(|(progpoint, _)| progpoint.inst() == iix)
910                             {
911                                 let slot = alloc.as_stack().unwrap();
912                                 safepoint_slots.push(slot);
913                             }
914                             if !safepoint_slots.is_empty() {
915                                 let stack_map = self
916                                     .abi
917                                     .spillslots_to_stack_map(&safepoint_slots[..], &state);
918                                 state.pre_safepoint(stack_map);
919                             }
920                         }
921 
922                         // Get the allocations for this inst from the regalloc result.
923                         let allocs = regalloc.inst_allocs(iix);
924 
925                         // If the instruction we are about to emit is
926                         // a return, place an epilogue at this point
927                         // (and don't emit the return; the actual
928                         // epilogue will contain it).
929                         if self.insts[iix.index()].is_term() == MachTerminator::Ret {
930                             for inst in self.abi.gen_epilogue() {
931                                 do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state);
932                             }
933                         } else {
934                             // Emit the instruction!
935                             do_emit(
936                                 &self.insts[iix.index()],
937                                 allocs,
938                                 &mut disasm,
939                                 &mut buffer,
940                                 &mut state,
941                             );
942                         }
943                     }
944 
945                     InstOrEdit::Edit(Edit::Move { from, to }) => {
946                         // Create a move/spill/reload instruction and
947                         // immediately emit it.
948                         match (from.as_reg(), to.as_reg()) {
949                             (Some(from), Some(to)) => {
950                                 // Reg-to-reg move.
951                                 let from_rreg = Reg::from(from);
952                                 let to_rreg = Writable::from_reg(Reg::from(to));
953                                 debug_assert_eq!(from.class(), to.class());
954                                 let ty = I::canonical_type_for_rc(from.class());
955                                 let mv = I::gen_move(to_rreg, from_rreg, ty);
956                                 do_emit(&mv, &[], &mut disasm, &mut buffer, &mut state);
957                             }
958                             (Some(from), None) => {
959                                 // Spill from register to spillslot.
960                                 let to = to.as_stack().unwrap();
961                                 let from_rreg = RealReg::from(from);
962                                 debug_assert_eq!(from.class(), to.class());
963                                 let spill = self.abi.gen_spill(to, from_rreg);
964                                 do_emit(&spill, &[], &mut disasm, &mut buffer, &mut state);
965                             }
966                             (None, Some(to)) => {
967                                 // Load from spillslot to register.
968                                 let from = from.as_stack().unwrap();
969                                 let to_rreg = Writable::from_reg(RealReg::from(to));
970                                 debug_assert_eq!(from.class(), to.class());
971                                 let reload = self.abi.gen_reload(to_rreg, from);
972                                 do_emit(&reload, &[], &mut disasm, &mut buffer, &mut state);
973                             }
974                             (None, None) => {
975                                 panic!("regalloc2 should have eliminated stack-to-stack moves!");
976                             }
977                         }
978                     }
979                 }
980             }
981 
982             if cur_srcloc.is_some() {
983                 buffer.end_srcloc();
984                 cur_srcloc = None;
985             }
986 
987             // Do we need an island? Get the worst-case size of the
988             // next BB and see if, having emitted that many bytes, we
989             // will be beyond the deadline.
990             if block.index() < (self.num_blocks() - 1) {
991                 let next_block = block.index() + 1;
992                 let next_block_range = self.block_ranges[next_block];
993                 let next_block_size = next_block_range.1.index() - next_block_range.0.index();
994                 let worst_case_next_bb = I::worst_case_size() * next_block_size as u32;
995                 if buffer.island_needed(worst_case_next_bb) {
996                     buffer.emit_island(worst_case_next_bb);
997                 }
998             }
999         }
1000 
1001         // Emit the constants used by the function.
1002         for (constant, data) in self.constants.iter() {
1003             let label = buffer.get_label_for_constant(constant);
1004             buffer.defer_constant(label, data.alignment(), data.as_slice(), u32::max_value());
1005         }
1006 
1007         let func_body_len = buffer.cur_offset();
1008 
1009         // Create `bb_edges` and final (filtered) `bb_starts`.
1010         let mut bb_edges = vec![];
1011         let mut bb_offsets = vec![];
1012         if want_metadata {
1013             for block in 0..self.num_blocks() {
1014                 if bb_starts[block].is_none() {
1015                     // Block was deleted by MachBuffer; skip.
1016                     continue;
1017                 }
1018                 let from = bb_starts[block].unwrap();
1019 
1020                 bb_offsets.push(from);
1021                 // Resolve each `succ` label and add edges.
1022                 let succs = self.block_succs(BlockIndex::new(block));
1023                 for &succ in succs.iter() {
1024                     let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1025                     bb_edges.push((from, to));
1026                 }
1027             }
1028         }
1029 
1030         let value_labels_ranges =
1031             self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1032         let frame_size = self.abi.frame_size();
1033 
1034         EmitResult {
1035             buffer,
1036             bb_offsets,
1037             bb_edges,
1038             inst_offsets,
1039             func_body_len,
1040             disasm: if want_disasm { Some(disasm) } else { None },
1041             stackslot_offsets: self.abi.stackslot_offsets().clone(),
1042             value_labels_ranges,
1043             frame_size,
1044         }
1045     }
1046 
1047     fn compute_value_labels_ranges(
1048         &self,
1049         regalloc: &regalloc2::Output,
1050         inst_offsets: &[CodeOffset],
1051         func_body_len: u32,
1052     ) -> ValueLabelsRanges {
1053         if self.debug_value_labels.is_empty() {
1054             return ValueLabelsRanges::default();
1055         }
1056 
1057         let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1058         for &(label, from, to, alloc) in &regalloc.debug_locations {
1059             let ranges = value_labels_ranges
1060                 .entry(ValueLabel::from_u32(label))
1061                 .or_insert_with(|| vec![]);
1062             let from_offset = inst_offsets[from.inst().index()];
1063             let to_offset = if to.inst().index() == inst_offsets.len() {
1064                 func_body_len
1065             } else {
1066                 inst_offsets[to.inst().index()]
1067             };
1068 
1069             // Empty range or to-offset of zero can happen because of
1070             // cold blocks (see above).
1071             if to_offset == 0 || from_offset == to_offset {
1072                 continue;
1073             }
1074 
1075             let loc = if let Some(preg) = alloc.as_reg() {
1076                 LabelValueLoc::Reg(Reg::from(preg))
1077             } else {
1078                 // We can't translate spillslot locations at the
1079                 // moment because ValueLabelLoc requires an
1080                 // instantaneous SP offset, and this can *change*
1081                 // within the range we have here because of callsites
1082                 // adjusting SP temporarily. To avoid the complexity
1083                 // of accurately plumbing through nominal-SP
1084                 // adjustment sites, we just omit debug info for
1085                 // values that are spilled. Not ideal, but debug info
1086                 // is best-effort.
1087                 continue;
1088             };
1089 
1090             ranges.push(ValueLocRange {
1091                 loc,
1092                 // ValueLocRanges are recorded by *instruction-end
1093                 // offset*. `from_offset` is the *start* of the
1094                 // instruction; that is the same as the end of another
1095                 // instruction, so we only want to begin coverage once
1096                 // we are past the previous instruction's end.
1097                 start: from_offset + 1,
1098                 // Likewise, `end` is exclusive, but we want to
1099                 // *include* the end of the last
1100                 // instruction. `to_offset` is the start of the
1101                 // `to`-instruction, which is the exclusive end, i.e.,
1102                 // the first instruction not covered. That
1103                 // instruction's start is the same as the end of the
1104                 // last instruction that is included, so we go one
1105                 // byte further to be sure to include it.
1106                 end: to_offset + 1,
1107             });
1108         }
1109 
1110         value_labels_ranges
1111     }
1112 
1113     /// Get the IR block for a BlockIndex, if one exists.
1114     pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1115         self.block_order.lowered_order()[block.index()].orig_block()
1116     }
1117 }
1118 
1119 impl<I: VCodeInst> RegallocFunction for VCode<I> {
1120     fn num_insts(&self) -> usize {
1121         self.insts.len()
1122     }
1123 
1124     fn num_blocks(&self) -> usize {
1125         self.block_ranges.len()
1126     }
1127 
1128     fn entry_block(&self) -> BlockIndex {
1129         self.entry
1130     }
1131 
1132     fn block_insns(&self, block: BlockIndex) -> InstRange {
1133         let (start, end) = self.block_ranges[block.index()];
1134         InstRange::forward(start, end)
1135     }
1136 
1137     fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1138         let (start, end) = self.block_succ_range[block.index()];
1139         &self.block_succs_preds[start as usize..end as usize]
1140     }
1141 
1142     fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1143         let (start, end) = self.block_pred_range[block.index()];
1144         &self.block_succs_preds[start as usize..end as usize]
1145     }
1146 
1147     fn block_params(&self, block: BlockIndex) -> &[VReg] {
1148         let (start, end) = self.block_params_range[block.index()];
1149         &self.block_params[start as usize..end as usize]
1150     }
1151 
1152     fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1153         let (succ_range_start, succ_range_end) = self.branch_block_arg_succ_range[block.index()];
1154         let succ_ranges =
1155             &self.branch_block_arg_range[succ_range_start as usize..succ_range_end as usize];
1156         let (branch_block_args_start, branch_block_args_end) = succ_ranges[succ_idx];
1157         &self.branch_block_args[branch_block_args_start as usize..branch_block_args_end as usize]
1158     }
1159 
1160     fn is_ret(&self, insn: InsnIndex) -> bool {
1161         match self.insts[insn.index()].is_term() {
1162             MachTerminator::Ret => true,
1163             _ => false,
1164         }
1165     }
1166 
1167     fn is_branch(&self, insn: InsnIndex) -> bool {
1168         match self.insts[insn.index()].is_term() {
1169             MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true,
1170             _ => false,
1171         }
1172     }
1173 
1174     fn requires_refs_on_stack(&self, insn: InsnIndex) -> bool {
1175         self.insts[insn.index()].is_safepoint()
1176     }
1177 
1178     fn is_move(&self, insn: InsnIndex) -> Option<(Operand, Operand)> {
1179         self.is_move.get(&insn).cloned()
1180     }
1181 
1182     fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1183         let (start, end) = self.operand_ranges[insn.index()];
1184         &self.operands[start as usize..end as usize]
1185     }
1186 
1187     fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1188         self.clobbers.get(&insn).cloned().unwrap_or_default()
1189     }
1190 
1191     fn num_vregs(&self) -> usize {
1192         std::cmp::max(self.vreg_types.len(), first_user_vreg_index())
1193     }
1194 
1195     fn reftype_vregs(&self) -> &[VReg] {
1196         &self.reftyped_vregs[..]
1197     }
1198 
1199     fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1200         &self.debug_value_labels[..]
1201     }
1202 
1203     fn is_pinned_vreg(&self, vreg: VReg) -> Option<PReg> {
1204         pinned_vreg_to_preg(vreg)
1205     }
1206 
1207     fn spillslot_size(&self, regclass: RegClass) -> usize {
1208         self.abi.get_spillslot_size(regclass) as usize
1209     }
1210 
1211     fn allow_multiple_vreg_defs(&self) -> bool {
1212         // At least the s390x backend requires this, because the
1213         // `Loop` pseudo-instruction aggregates all Operands so pinned
1214         // vregs (RealRegs) may occur more than once.
1215         true
1216     }
1217 }
1218 
1219 impl<I: VCodeInst> fmt::Debug for VCode<I> {
1220     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1221         writeln!(f, "VCode {{")?;
1222         writeln!(f, "  Entry block: {}", self.entry.index())?;
1223 
1224         let mut state = Default::default();
1225 
1226         let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1227         alias_keys.sort_unstable();
1228         for key in alias_keys {
1229             let dest = self.vreg_aliases.get(&key).unwrap();
1230             writeln!(f, "  {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1231         }
1232 
1233         for block in 0..self.num_blocks() {
1234             let block = BlockIndex::new(block);
1235             writeln!(f, "Block {}:", block.index())?;
1236             if let Some(bb) = self.bindex_to_bb(block) {
1237                 writeln!(f, "    (original IR block: {})", bb)?;
1238             }
1239             for succ in self.succs(block) {
1240                 writeln!(f, "    (successor: Block {})", succ.index())?;
1241             }
1242             let (start, end) = self.block_ranges[block.index()];
1243             writeln!(
1244                 f,
1245                 "    (instruction range: {} .. {})",
1246                 start.index(),
1247                 end.index()
1248             )?;
1249             for inst in start.index()..end.index() {
1250                 writeln!(
1251                     f,
1252                     "  Inst {}: {}",
1253                     inst,
1254                     self.insts[inst].pretty_print_inst(&[], &mut state)
1255                 )?;
1256             }
1257         }
1258 
1259         writeln!(f, "}}")?;
1260         Ok(())
1261     }
1262 }
1263 
1264 /// This structure tracks the large constants used in VCode that will be emitted separately by the
1265 /// [MachBuffer].
1266 ///
1267 /// First, during the lowering phase, constants are inserted using
1268 /// [VCodeConstants.insert]; an intermediate handle, [VCodeConstant], tracks what constants are
1269 /// used in this phase. Some deduplication is performed, when possible, as constant
1270 /// values are inserted.
1271 ///
1272 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1273 /// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1274 /// then writes the constant values to the buffer.
1275 #[derive(Default)]
1276 pub struct VCodeConstants {
1277     constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1278     pool_uses: HashMap<Constant, VCodeConstant>,
1279     well_known_uses: HashMap<*const [u8], VCodeConstant>,
1280     u64s: HashMap<[u8; 8], VCodeConstant>,
1281 }
1282 impl VCodeConstants {
1283     /// Initialize the structure with the expected number of constants.
1284     pub fn with_capacity(expected_num_constants: usize) -> Self {
1285         Self {
1286             constants: PrimaryMap::with_capacity(expected_num_constants),
1287             pool_uses: HashMap::with_capacity(expected_num_constants),
1288             well_known_uses: HashMap::new(),
1289             u64s: HashMap::new(),
1290         }
1291     }
1292 
1293     /// Insert a constant; using this method indicates that a constant value will be used and thus
1294     /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1295     /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1296     /// [VCodeConstantData::Generated].
1297     pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1298         match data {
1299             VCodeConstantData::Generated(_) => self.constants.push(data),
1300             VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1301                 None => {
1302                     let vcode_constant = self.constants.push(data);
1303                     self.pool_uses.insert(constant, vcode_constant);
1304                     vcode_constant
1305                 }
1306                 Some(&vcode_constant) => vcode_constant,
1307             },
1308             VCodeConstantData::WellKnown(data_ref) => {
1309                 match self.well_known_uses.entry(data_ref as *const [u8]) {
1310                     Entry::Vacant(v) => {
1311                         let vcode_constant = self.constants.push(data);
1312                         v.insert(vcode_constant);
1313                         vcode_constant
1314                     }
1315                     Entry::Occupied(o) => *o.get(),
1316                 }
1317             }
1318             VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1319                 Entry::Vacant(v) => {
1320                     let vcode_constant = self.constants.push(data);
1321                     v.insert(vcode_constant);
1322                     vcode_constant
1323                 }
1324                 Entry::Occupied(o) => *o.get(),
1325             },
1326         }
1327     }
1328 
1329     /// Return the number of constants inserted.
1330     pub fn len(&self) -> usize {
1331         self.constants.len()
1332     }
1333 
1334     /// Iterate over the [VCodeConstant] keys inserted in this structure.
1335     pub fn keys(&self) -> Keys<VCodeConstant> {
1336         self.constants.keys()
1337     }
1338 
1339     /// Iterate over the [VCodeConstant] keys and the data (as a byte slice) inserted in this
1340     /// structure.
1341     pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1342         self.constants.iter()
1343     }
1344 }
1345 
1346 /// A use of a constant by one or more VCode instructions; see [VCodeConstants].
1347 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
1348 pub struct VCodeConstant(u32);
1349 entity_impl!(VCodeConstant);
1350 
1351 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
1352 /// these separately instead of as raw byte buffers allows us to avoid some duplication.
1353 pub enum VCodeConstantData {
1354     /// A constant already present in the Cranelift IR
1355     /// [ConstantPool](crate::ir::constant::ConstantPool).
1356     Pool(Constant, ConstantData),
1357     /// A reference to a well-known constant value that is statically encoded within the compiler.
1358     WellKnown(&'static [u8]),
1359     /// A constant value generated during lowering; the value may depend on the instruction context
1360     /// which makes it difficult to de-duplicate--if possible, use other variants.
1361     Generated(ConstantData),
1362     /// A constant of at most 64 bits. These are deduplicated as
1363     /// well. Stored as a fixed-size array of `u8` so that we do not
1364     /// encounter endianness problems when cross-compiling.
1365     U64([u8; 8]),
1366 }
1367 impl VCodeConstantData {
1368     /// Retrieve the constant data as a byte slice.
1369     pub fn as_slice(&self) -> &[u8] {
1370         match self {
1371             VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
1372             VCodeConstantData::WellKnown(d) => d,
1373             VCodeConstantData::U64(value) => &value[..],
1374         }
1375     }
1376 
1377     /// Calculate the alignment of the constant data.
1378     pub fn alignment(&self) -> u32 {
1379         if self.as_slice().len() <= 8 {
1380             8
1381         } else {
1382             16
1383         }
1384     }
1385 }
1386 
1387 #[cfg(test)]
1388 mod test {
1389     use super::*;
1390     use std::mem::size_of;
1391 
1392     #[test]
1393     fn size_of_constant_structs() {
1394         assert_eq!(size_of::<Constant>(), 4);
1395         assert_eq!(size_of::<VCodeConstant>(), 4);
1396         assert_eq!(size_of::<ConstantData>(), 24);
1397         assert_eq!(size_of::<VCodeConstantData>(), 32);
1398         assert_eq!(
1399             size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
1400             24
1401         );
1402         // TODO The VCodeConstants structure's memory size could be further optimized.
1403         // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
1404         // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
1405     }
1406 }
1407