1 //! This implements the VCode container: a CFG of Insts that have been lowered.
2 //!
3 //! VCode is virtual-register code. An instruction in VCode is almost a machine
4 //! instruction; however, its register slots can refer to virtual registers in
5 //! addition to real machine registers.
6 //!
7 //! VCode is structured with traditional basic blocks, and
8 //! each block must be terminated by an unconditional branch (one target), a
9 //! conditional branch (two targets), or a return (no targets). Note that this
10 //! slightly differs from the machine code of most ISAs: in most ISAs, a
11 //! conditional branch has one target (and the not-taken case falls through).
12 //! However, we expect that machine backends will elide branches to the following
13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15 //! play with layout prior to final binary emission, as well, if we want.
16 //!
17 //! See the main module comment in `mod.rs` for more details on the VCode-based
18 //! backend pipeline.
19 
20 use crate::ir::pcc::*;
21 use crate::ir::{self, types, Constant, ConstantData, ValueLabel};
22 use crate::ranges::Ranges;
23 use crate::timing;
24 use crate::trace;
25 use crate::CodegenError;
26 use crate::{machinst::*, trace_log_enabled};
27 use crate::{LabelValueLoc, ValueLocRange};
28 use regalloc2::{
29     Edit, Function as RegallocFunction, InstOrEdit, InstRange, MachineEnv, Operand,
30     OperandConstraint, OperandKind, PRegSet, RegClass,
31 };
32 use rustc_hash::FxHashMap;
33 
34 use core::cmp::Ordering;
35 use core::fmt::{self, Write};
36 use core::mem::take;
37 use cranelift_entity::{entity_impl, Keys};
38 use std::collections::hash_map::Entry;
39 use std::collections::HashMap;
40 
41 /// Index referring to an instruction in VCode.
42 pub type InsnIndex = regalloc2::Inst;
43 
44 /// Extension trait for `InsnIndex` to allow conversion to a
45 /// `BackwardsInsnIndex`.
46 trait ToBackwardsInsnIndex {
47     fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex;
48 }
49 
50 impl ToBackwardsInsnIndex for InsnIndex {
51     fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex {
52         BackwardsInsnIndex::new(num_insts - self.index() - 1)
53     }
54 }
55 
56 /// An index referring to an instruction in the VCode when it is backwards,
57 /// during VCode construction.
58 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
59 #[cfg_attr(
60     feature = "enable-serde",
61     derive(::serde::Serialize, ::serde::Deserialize)
62 )]
63 pub struct BackwardsInsnIndex(InsnIndex);
64 
65 impl BackwardsInsnIndex {
66     pub fn new(i: usize) -> Self {
67         BackwardsInsnIndex(InsnIndex::new(i))
68     }
69 }
70 
71 /// Index referring to a basic block in VCode.
72 pub type BlockIndex = regalloc2::Block;
73 
74 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
75 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
76 pub trait VCodeInst: MachInst + MachInstEmit {}
77 impl<I: MachInst + MachInstEmit> VCodeInst for I {}
78 
79 /// A function in "VCode" (virtualized-register code) form, after
80 /// lowering.  This is essentially a standard CFG of basic blocks,
81 /// where each basic block consists of lowered instructions produced
82 /// by the machine-specific backend.
83 ///
84 /// Note that the VCode is immutable once produced, and is not
85 /// modified by register allocation in particular. Rather, register
86 /// allocation on the `VCode` produces a separate `regalloc2::Output`
87 /// struct, and this can be passed to `emit`. `emit` in turn does not
88 /// modify the vcode, but produces an `EmitResult`, which contains the
89 /// machine code itself, and the associated disassembly and/or
90 /// metadata as requested.
91 pub struct VCode<I: VCodeInst> {
92     /// VReg IR-level types.
93     vreg_types: Vec<Type>,
94 
95     /// Lowered machine instructions in order corresponding to the original IR.
96     insts: Vec<I>,
97 
98     /// A map from backwards instruction index to the user stack map for that
99     /// instruction.
100     ///
101     /// This is a sparse side table that only has entries for instructions that
102     /// are safepoints, and only for a subset of those that have an associated
103     /// user stack map.
104     user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>,
105 
106     /// Operands: pre-regalloc references to virtual registers with
107     /// constraints, in one flattened array. This allows the regalloc
108     /// to efficiently access all operands without requiring expensive
109     /// matches or method invocations on insts.
110     operands: Vec<Operand>,
111 
112     /// Operand index ranges: for each instruction in `insts`, there
113     /// is a tuple here providing the range in `operands` for that
114     /// instruction's operands.
115     operand_ranges: Ranges,
116 
117     /// Clobbers: a sparse map from instruction indices to clobber masks.
118     clobbers: FxHashMap<InsnIndex, PRegSet>,
119 
120     /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
121     /// reasonable to keep one of these per instruction.)
122     srclocs: Vec<RelSourceLoc>,
123 
124     /// Entry block.
125     entry: BlockIndex,
126 
127     /// Block instruction indices.
128     block_ranges: Ranges,
129 
130     /// Block successors: index range in the `block_succs` list.
131     block_succ_range: Ranges,
132 
133     /// Block successor lists, concatenated into one vec. The
134     /// `block_succ_range` list of tuples above gives (start, end)
135     /// ranges within this list that correspond to each basic block's
136     /// successors.
137     block_succs: Vec<regalloc2::Block>,
138 
139     /// Block predecessors: index range in the `block_preds` list.
140     block_pred_range: Ranges,
141 
142     /// Block predecessor lists, concatenated into one vec. The
143     /// `block_pred_range` list of tuples above gives (start, end)
144     /// ranges within this list that correspond to each basic block's
145     /// predecessors.
146     block_preds: Vec<regalloc2::Block>,
147 
148     /// Block parameters: index range in `block_params` below.
149     block_params_range: Ranges,
150 
151     /// Block parameter lists, concatenated into one vec. The
152     /// `block_params_range` list of tuples above gives (start, end)
153     /// ranges within this list that correspond to each basic block's
154     /// blockparam vregs.
155     block_params: Vec<regalloc2::VReg>,
156 
157     /// Outgoing block arguments on branch instructions, concatenated
158     /// into one list.
159     ///
160     /// Note that this is conceptually a 3D array: we have a VReg list
161     /// per block, per successor. We flatten those three dimensions
162     /// into this 1D vec, then store index ranges in two levels of
163     /// indirection.
164     ///
165     /// Indexed by the indices in `branch_block_arg_succ_range`.
166     branch_block_args: Vec<regalloc2::VReg>,
167 
168     /// Array of sequences of (start, end) tuples in
169     /// `branch_block_args`, one for each successor; these sequences
170     /// for each block are concatenated.
171     ///
172     /// Indexed by the indices in `branch_block_arg_succ_range`.
173     branch_block_arg_range: Ranges,
174 
175     /// For a given block, indices in `branch_block_arg_range`
176     /// corresponding to all of its successors.
177     branch_block_arg_succ_range: Ranges,
178 
179     /// Block-order information.
180     block_order: BlockLoweringOrder,
181 
182     /// ABI object.
183     pub(crate) abi: Callee<I::ABIMachineSpec>,
184 
185     /// Constant information used during code emission. This should be
186     /// immutable across function compilations within the same module.
187     emit_info: I::Info,
188 
189     /// Constants.
190     pub(crate) constants: VCodeConstants,
191 
192     /// Value labels for debuginfo attached to vregs.
193     debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
194 
195     pub(crate) sigs: SigSet,
196 
197     /// Facts on VRegs, for proof-carrying code verification.
198     facts: Vec<Option<Fact>>,
199 
200     log2_min_function_alignment: u8,
201 }
202 
203 /// The result of `VCode::emit`. Contains all information computed
204 /// during emission: actual machine code, optionally a disassembly,
205 /// and optionally metadata about the code layout.
206 pub struct EmitResult {
207     /// The MachBuffer containing the machine code.
208     pub buffer: MachBufferFinalized<Stencil>,
209 
210     /// Offset of each basic block, recorded during emission. Computed
211     /// only if `debug_value_labels` is non-empty.
212     pub bb_offsets: Vec<CodeOffset>,
213 
214     /// Final basic-block edges, in terms of code offsets of
215     /// bb-starts. Computed only if `debug_value_labels` is non-empty.
216     pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
217 
218     /// Final length of function body.
219     pub func_body_len: CodeOffset,
220 
221     /// The pretty-printed disassembly, if any. This uses the same
222     /// pretty-printing for MachInsts as the pre-regalloc VCode Debug
223     /// implementation, but additionally includes the prologue and
224     /// epilogue(s), and makes use of the regalloc results.
225     pub disasm: Option<String>,
226 
227     /// Offsets of sized stackslots.
228     pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>,
229 
230     /// Offsets of dynamic stackslots.
231     pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>,
232 
233     /// Value-labels information (debug metadata).
234     pub value_labels_ranges: ValueLabelsRanges,
235 
236     /// Stack frame size.
237     pub frame_size: u32,
238 }
239 
240 /// A builder for a VCode function body.
241 ///
242 /// This builder has the ability to accept instructions in either
243 /// forward or reverse order, depending on the pass direction that
244 /// produces the VCode. The lowering from CLIF to VCode<MachInst>
245 /// ordinarily occurs in reverse order (in order to allow instructions
246 /// to be lowered only if used, and not merged) so a reversal will
247 /// occur at the end of lowering to ensure the VCode is in machine
248 /// order.
249 ///
250 /// If built in reverse, block and instruction indices used once the
251 /// VCode is built are relative to the final (reversed) order, not the
252 /// order of construction. Note that this means we do not know the
253 /// final block or instruction indices when building, so we do not
254 /// hand them out. (The user is assumed to know them when appending
255 /// terminator instructions with successor blocks.)
256 pub struct VCodeBuilder<I: VCodeInst> {
257     /// In-progress VCode.
258     pub(crate) vcode: VCode<I>,
259 
260     /// In what direction is the build occurring?
261     direction: VCodeBuildDirection,
262 
263     /// Debug-value label in-progress map, keyed by label. For each
264     /// label, we keep disjoint ranges mapping to vregs. We'll flatten
265     /// this into (vreg, range, label) tuples when done.
266     debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
267 }
268 
269 /// Direction in which a VCodeBuilder builds VCode.
270 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
271 pub enum VCodeBuildDirection {
272     // TODO: add `Forward` once we need it and can test it adequately.
273     /// Backward-build pass: we expect the producer to call `emit()`
274     /// with instructions in reverse program order within each block.
275     Backward,
276 }
277 
278 impl<I: VCodeInst> VCodeBuilder<I> {
279     /// Create a new VCodeBuilder.
280     pub fn new(
281         sigs: SigSet,
282         abi: Callee<I::ABIMachineSpec>,
283         emit_info: I::Info,
284         block_order: BlockLoweringOrder,
285         constants: VCodeConstants,
286         direction: VCodeBuildDirection,
287         log2_min_function_alignment: u8,
288     ) -> Self {
289         let vcode = VCode::new(
290             sigs,
291             abi,
292             emit_info,
293             block_order,
294             constants,
295             log2_min_function_alignment,
296         );
297 
298         VCodeBuilder {
299             vcode,
300             direction,
301             debug_info: FxHashMap::default(),
302         }
303     }
304 
305     pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> {
306         self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs)
307     }
308 
309     /// Access the ABI object.
310     pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {
311         &self.vcode.abi
312     }
313 
314     /// Access the ABI object.
315     pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {
316         &mut self.vcode.abi
317     }
318 
319     pub fn sigs(&self) -> &SigSet {
320         &self.vcode.sigs
321     }
322 
323     pub fn sigs_mut(&mut self) -> &mut SigSet {
324         &mut self.vcode.sigs
325     }
326 
327     /// Access to the BlockLoweringOrder object.
328     pub fn block_order(&self) -> &BlockLoweringOrder {
329         &self.vcode.block_order
330     }
331 
332     /// Set the current block as the entry block.
333     pub fn set_entry(&mut self, block: BlockIndex) {
334         self.vcode.entry = block;
335     }
336 
337     /// End the current basic block. Must be called after emitting vcode insts
338     /// for IR insts and prior to ending the function (building the VCode).
339     pub fn end_bb(&mut self) {
340         let end_idx = self.vcode.insts.len();
341         // Add the instruction index range to the list of blocks.
342         self.vcode.block_ranges.push_end(end_idx);
343         // End the successors list.
344         let succ_end = self.vcode.block_succs.len();
345         self.vcode.block_succ_range.push_end(succ_end);
346         // End the blockparams list.
347         let block_params_end = self.vcode.block_params.len();
348         self.vcode.block_params_range.push_end(block_params_end);
349         // End the branch blockparam args list.
350         let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
351         self.vcode
352             .branch_block_arg_succ_range
353             .push_end(branch_block_arg_succ_end);
354     }
355 
356     pub fn add_block_param(&mut self, param: VirtualReg) {
357         self.vcode.block_params.push(param.into());
358     }
359 
360     fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
361         self.vcode
362             .branch_block_args
363             .extend(args.iter().map(|&arg| VReg::from(arg)));
364         let end = self.vcode.branch_block_args.len();
365         self.vcode.branch_block_arg_range.push_end(end);
366     }
367 
368     /// Push an instruction for the current BB and current IR inst
369     /// within the BB.
370     pub fn push(&mut self, insn: I, loc: RelSourceLoc) {
371         assert!(!insn.is_low_level_branch()); // These are not meant to be in VCode.
372         self.vcode.insts.push(insn);
373         self.vcode.srclocs.push(loc);
374     }
375 
376     /// Add a successor block with branch args.
377     pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
378         self.vcode.block_succs.push(block);
379         self.add_branch_args_for_succ(args);
380     }
381 
382     /// Add a debug value label to a register.
383     pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
384         // We'll fix up labels in reverse(). Because we're generating
385         // code bottom-to-top, the liverange of the label goes *from*
386         // the last index at which was defined (or 0, which is the end
387         // of the eventual function) *to* just this instruction, and
388         // no further.
389         let inst = InsnIndex::new(self.vcode.insts.len());
390         let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
391         let last = labels
392             .last()
393             .map(|(_start, end, _vreg)| *end)
394             .unwrap_or(InsnIndex::new(0));
395         labels.push((last, inst, reg.into()));
396     }
397 
398     /// Access the constants.
399     pub fn constants(&mut self) -> &mut VCodeConstants {
400         &mut self.vcode.constants
401     }
402 
403     fn compute_preds_from_succs(&mut self) {
404         // Do a linear-time counting sort: first determine how many
405         // times each block appears as a successor.
406         let mut starts = vec![0u32; self.vcode.num_blocks()];
407         for succ in &self.vcode.block_succs {
408             starts[succ.index()] += 1;
409         }
410 
411         // Determine for each block the starting index where that
412         // block's predecessors should go. This is equivalent to the
413         // ranges we need to store in block_pred_range.
414         self.vcode.block_pred_range.reserve(starts.len());
415         let mut end = 0;
416         for count in starts.iter_mut() {
417             let start = end;
418             end += *count;
419             *count = start;
420             self.vcode.block_pred_range.push_end(end as usize);
421         }
422         let end = end as usize;
423         debug_assert_eq!(end, self.vcode.block_succs.len());
424 
425         // Walk over the successors again, this time grouped by
426         // predecessor, and push the predecessor at the current
427         // starting position of each of its successors. We build
428         // each group of predecessors in whatever order Ranges::iter
429         // returns them; regalloc2 doesn't care.
430         self.vcode.block_preds.resize(end, BlockIndex::invalid());
431         for (pred, range) in self.vcode.block_succ_range.iter() {
432             let pred = BlockIndex::new(pred);
433             for succ in &self.vcode.block_succs[range] {
434                 let pos = &mut starts[succ.index()];
435                 self.vcode.block_preds[*pos as usize] = pred;
436                 *pos += 1;
437             }
438         }
439         debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid()));
440     }
441 
442     /// Called once, when a build in Backward order is complete, to
443     /// perform the overall reversal (into final forward order) and
444     /// finalize metadata accordingly.
445     fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) {
446         let n_insts = self.vcode.insts.len();
447         if n_insts == 0 {
448             return;
449         }
450 
451         // Reverse the per-block and per-inst sequences.
452         self.vcode.block_ranges.reverse_index();
453         self.vcode.block_ranges.reverse_target(n_insts);
454         // block_params_range is indexed by block (and blocks were
455         // traversed in reverse) so we reverse it; but block-param
456         // sequences in the concatenated vec can remain in reverse
457         // order (it is effectively an arena of arbitrarily-placed
458         // referenced sequences).
459         self.vcode.block_params_range.reverse_index();
460         // Likewise, we reverse block_succ_range, but the block_succ
461         // concatenated array can remain as-is.
462         self.vcode.block_succ_range.reverse_index();
463         self.vcode.insts.reverse();
464         self.vcode.srclocs.reverse();
465         // Likewise, branch_block_arg_succ_range is indexed by block
466         // so must be reversed.
467         self.vcode.branch_block_arg_succ_range.reverse_index();
468 
469         // To translate an instruction index *endpoint* in reversed
470         // order to forward order, compute `n_insts - i`.
471         //
472         // Why not `n_insts - 1 - i`? That would be correct to
473         // translate an individual instruction index (for ten insts 0
474         // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
475         // 0). But for the usual inclusive-start, exclusive-end range
476         // idiom, inclusive starts become exclusive ends and
477         // vice-versa, so e.g. an (inclusive) start of 0 becomes an
478         // (exclusive) end of 10.
479         let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
480 
481         // Generate debug-value labels based on per-label maps.
482         for (label, tuples) in &self.debug_info {
483             for &(start, end, vreg) in tuples {
484                 let vreg = vregs.resolve_vreg_alias(vreg);
485                 let fwd_start = translate(end);
486                 let fwd_end = translate(start);
487                 self.vcode
488                     .debug_value_labels
489                     .push((vreg, fwd_start, fwd_end, label.as_u32()));
490             }
491         }
492 
493         // Now sort debug value labels by VReg, as required
494         // by regalloc2.
495         self.vcode
496             .debug_value_labels
497             .sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
498     }
499 
500     fn collect_operands(&mut self, vregs: &VRegAllocator<I>) {
501         let allocatable = PRegSet::from(self.vcode.machine_env());
502         for (i, insn) in self.vcode.insts.iter_mut().enumerate() {
503             // Push operands from the instruction onto the operand list.
504             //
505             // We rename through the vreg alias table as we collect
506             // the operands. This is better than a separate post-pass
507             // over operands, because it has more cache locality:
508             // operands only need to pass through L1 once. This is
509             // also better than renaming instructions'
510             // operands/registers while lowering, because here we only
511             // need to do the `match` over the instruction to visit
512             // its register fields (which is slow, branchy code) once.
513 
514             let mut op_collector =
515                 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {
516                     vregs.resolve_vreg_alias(vreg)
517                 });
518             insn.get_operands(&mut op_collector);
519             let (ops, clobbers) = op_collector.finish();
520             self.vcode.operand_ranges.push_end(ops);
521 
522             if clobbers != PRegSet::default() {
523                 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
524             }
525 
526             if let Some((dst, src)) = insn.is_move() {
527                 // We should never see non-virtual registers present in move
528                 // instructions.
529                 assert!(
530                     src.is_virtual(),
531                     "the real register {src:?} was used as the source of a move instruction"
532                 );
533                 assert!(
534                     dst.to_reg().is_virtual(),
535                     "the real register {:?} was used as the destination of a move instruction",
536                     dst.to_reg()
537                 );
538             }
539         }
540 
541         // Translate blockparam args via the vreg aliases table as well.
542         for arg in &mut self.vcode.branch_block_args {
543             let new_arg = vregs.resolve_vreg_alias(*arg);
544             trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
545             *arg = new_arg;
546         }
547     }
548 
549     /// Build the final VCode.
550     pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> {
551         self.vcode.vreg_types = take(&mut vregs.vreg_types);
552         self.vcode.facts = take(&mut vregs.facts);
553 
554         if self.direction == VCodeBuildDirection::Backward {
555             self.reverse_and_finalize(&vregs);
556         }
557         self.collect_operands(&vregs);
558 
559         self.compute_preds_from_succs();
560         self.vcode.debug_value_labels.sort_unstable();
561 
562         // At this point, nothing in the vcode should mention any
563         // VReg which has been aliased. All the appropriate rewriting
564         // should have happened above. Just to be sure, let's
565         // double-check each field which has vregs.
566         // Note: can't easily check vcode.insts, resolved in collect_operands.
567         // Operands are resolved in collect_operands.
568         vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg()));
569         // Currently block params are never aliased to another vreg.
570         vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied());
571         // Branch block args are resolved in collect_operands.
572         vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied());
573         // Debug value labels are resolved in reverse_and_finalize.
574         vregs.debug_assert_no_vreg_aliases(
575             self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg),
576         );
577         // Facts are resolved eagerly during set_vreg_alias.
578         vregs.debug_assert_no_vreg_aliases(
579             self.vcode
580                 .facts
581                 .iter()
582                 .zip(&vregs.vreg_types)
583                 .enumerate()
584                 .filter(|(_, (fact, _))| fact.is_some())
585                 .map(|(vreg, (_, &ty))| {
586                     let (regclasses, _) = I::rc_for_type(ty).unwrap();
587                     VReg::new(vreg, regclasses[0])
588                 }),
589         );
590 
591         self.vcode
592     }
593 
594     /// Add a user stack map for the associated instruction.
595     pub fn add_user_stack_map(
596         &mut self,
597         inst: BackwardsInsnIndex,
598         entries: &[ir::UserStackMapEntry],
599     ) {
600         let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets());
601         let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map);
602         debug_assert!(old_entry.is_none());
603     }
604 }
605 
606 const NO_INST_OFFSET: CodeOffset = u32::MAX;
607 
608 impl<I: VCodeInst> VCode<I> {
609     /// New empty VCode.
610     fn new(
611         sigs: SigSet,
612         abi: Callee<I::ABIMachineSpec>,
613         emit_info: I::Info,
614         block_order: BlockLoweringOrder,
615         constants: VCodeConstants,
616         log2_min_function_alignment: u8,
617     ) -> Self {
618         let n_blocks = block_order.lowered_order().len();
619         VCode {
620             sigs,
621             vreg_types: vec![],
622             insts: Vec::with_capacity(10 * n_blocks),
623             user_stack_maps: FxHashMap::default(),
624             operands: Vec::with_capacity(30 * n_blocks),
625             operand_ranges: Ranges::with_capacity(10 * n_blocks),
626             clobbers: FxHashMap::default(),
627             srclocs: Vec::with_capacity(10 * n_blocks),
628             entry: BlockIndex::new(0),
629             block_ranges: Ranges::with_capacity(n_blocks),
630             block_succ_range: Ranges::with_capacity(n_blocks),
631             block_succs: Vec::with_capacity(n_blocks),
632             block_pred_range: Ranges::default(),
633             block_preds: Vec::new(),
634             block_params_range: Ranges::with_capacity(n_blocks),
635             block_params: Vec::with_capacity(5 * n_blocks),
636             branch_block_args: Vec::with_capacity(10 * n_blocks),
637             branch_block_arg_range: Ranges::with_capacity(2 * n_blocks),
638             branch_block_arg_succ_range: Ranges::with_capacity(n_blocks),
639             block_order,
640             abi,
641             emit_info,
642             constants,
643             debug_value_labels: vec![],
644             facts: vec![],
645             log2_min_function_alignment,
646         }
647     }
648 
649     /// Get the ABI-dependent MachineEnv for managing register allocation.
650     pub fn machine_env(&self) -> &MachineEnv {
651         self.abi.machine_env(&self.sigs)
652     }
653 
654     /// Get the number of blocks. Block indices will be in the range `0 ..
655     /// (self.num_blocks() - 1)`.
656     pub fn num_blocks(&self) -> usize {
657         self.block_ranges.len()
658     }
659 
660     /// The number of lowered instructions.
661     pub fn num_insts(&self) -> usize {
662         self.insts.len()
663     }
664 
665     fn compute_clobbers(&self, regalloc: &regalloc2::Output) -> Vec<Writable<RealReg>> {
666         let mut clobbered = PRegSet::default();
667 
668         // All moves are included in clobbers.
669         for (_, Edit::Move { to, .. }) in &regalloc.edits {
670             if let Some(preg) = to.as_reg() {
671                 clobbered.add(preg);
672             }
673         }
674 
675         for (i, range) in self.operand_ranges.iter() {
676             let operands = &self.operands[range.clone()];
677             let allocs = &regalloc.allocs[range];
678             for (operand, alloc) in operands.iter().zip(allocs.iter()) {
679                 if operand.kind() == OperandKind::Def {
680                     if let Some(preg) = alloc.as_reg() {
681                         clobbered.add(preg);
682                     }
683                 }
684             }
685 
686             // Also add explicitly-clobbered registers.
687             //
688             // Skip merging this instruction's clobber list if not
689             // "included in clobbers" as per the MachInst. (Some
690             // backends use this to implement ABI specifics; e.g.,
691             // excluding calls of the same ABI as the current function
692             // from clobbers, because by definition everything
693             // clobbered by the call can be clobbered by this function
694             // without saving as well.
695             //
696             // This is important for a particular optimization: when
697             // some registers are "half-clobbered", e.g. vector/float
698             // registers on aarch64, we want them to be seen as
699             // clobbered by regalloc so it avoids carrying values
700             // across calls in these registers but not seen as
701             // clobbered by prologue generation here (because the
702             // actual half-clobber implied by the clobber list fits
703             // within the clobbers that we allow without
704             // clobber-saves).
705             if self.insts[i].is_included_in_clobbers() {
706                 if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) {
707                     clobbered.union_from(inst_clobbered);
708                 }
709             }
710         }
711 
712         clobbered
713             .into_iter()
714             .map(|preg| Writable::from_reg(RealReg::from(preg)))
715             .collect()
716     }
717 
718     /// Emit the instructions to a `MachBuffer`, containing fixed-up
719     /// code and external reloc/trap/etc. records ready for use. Takes
720     /// the regalloc results as well.
721     ///
722     /// Returns the machine code itself, and optionally metadata
723     /// and/or a disassembly, as an `EmitResult`. The `VCode` itself
724     /// is consumed by the emission process.
725     pub fn emit(
726         mut self,
727         regalloc: &regalloc2::Output,
728         want_disasm: bool,
729         flags: &settings::Flags,
730         ctrl_plane: &mut ControlPlane,
731     ) -> EmitResult
732     where
733         I: VCodeInst,
734     {
735         let _tt = timing::vcode_emit();
736         let mut buffer = MachBuffer::new();
737         buffer.set_log2_min_function_alignment(self.log2_min_function_alignment);
738         let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
739 
740         // The first M MachLabels are reserved for block indices.
741         buffer.reserve_labels_for_blocks(self.num_blocks());
742 
743         // Register all allocated constants with the `MachBuffer` to ensure that
744         // any references to the constants during instructions can be handled
745         // correctly.
746         buffer.register_constants(&self.constants);
747 
748         // Construct the final order we emit code in: cold blocks at the end.
749         let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
750         let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
751         for block in 0..self.num_blocks() {
752             let block = BlockIndex::new(block);
753             if self.block_order.is_cold(block) {
754                 cold_blocks.push(block);
755             } else {
756                 final_order.push(block);
757             }
758         }
759         final_order.extend(cold_blocks.clone());
760 
761         // Compute/save info we need for the prologue: clobbers and
762         // number of spillslots.
763         //
764         // We clone `abi` here because we will mutate it as we
765         // generate the prologue and set other info, but we can't
766         // mutate `VCode`. The info it usually carries prior to
767         // setting clobbers is fairly minimal so this should be
768         // relatively cheap.
769         let clobbers = self.compute_clobbers(regalloc);
770         self.abi
771             .compute_frame_layout(&self.sigs, regalloc.num_spillslots, clobbers);
772 
773         // Emit blocks.
774         let mut cur_srcloc = None;
775         let mut last_offset = None;
776         let mut inst_offsets = vec![];
777         let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane));
778 
779         let mut disasm = String::new();
780 
781         if !self.debug_value_labels.is_empty() {
782             inst_offsets.resize(self.insts.len(), NO_INST_OFFSET);
783         }
784 
785         // Count edits per block ahead of time; this is needed for
786         // lookahead island emission. (We could derive it per-block
787         // with binary search in the edit list, but it's more
788         // efficient to do it in one pass here.)
789         let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];
790         let mut edit_idx = 0;
791         for block in 0..self.num_blocks() {
792             let end_inst = InsnIndex::new(self.block_ranges.get(block).end);
793             let start_edit_idx = edit_idx;
794             while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {
795                 edit_idx += 1;
796             }
797             let end_edit_idx = edit_idx;
798             ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);
799         }
800 
801         let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();
802         let mut bb_padding = match flags.bb_padding_log2_minus_one() {
803             0 => Vec::new(),
804             n => vec![0; 1 << (n - 1)],
805         };
806         let mut total_bb_padding = 0;
807 
808         for (block_order_idx, &block) in final_order.iter().enumerate() {
809             trace!("emitting block {:?}", block);
810 
811             // Call the new block hook for state
812             state.on_new_block();
813 
814             // Emit NOPs to align the block.
815             let new_offset = I::align_basic_block(buffer.cur_offset());
816             while new_offset > buffer.cur_offset() {
817                 // Pad with NOPs up to the aligned block offset.
818                 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
819                 nop.emit(&mut buffer, &self.emit_info, &mut Default::default());
820             }
821             assert_eq!(buffer.cur_offset(), new_offset);
822 
823             let do_emit = |inst: &I,
824                            disasm: &mut String,
825                            buffer: &mut MachBuffer<I>,
826                            state: &mut I::State| {
827                 if want_disasm && !inst.is_args() {
828                     let mut s = state.clone();
829                     writeln!(disasm, "  {}", inst.pretty_print_inst(&mut s)).unwrap();
830                 }
831                 inst.emit(buffer, &self.emit_info, state);
832             };
833 
834             // Is this the first block? Emit the prologue directly if so.
835             if block == self.entry {
836                 trace!(" -> entry block");
837                 buffer.start_srcloc(Default::default());
838                 for inst in &self.abi.gen_prologue() {
839                     do_emit(&inst, &mut disasm, &mut buffer, &mut state);
840                 }
841                 buffer.end_srcloc();
842             }
843 
844             // Now emit the regular block body.
845 
846             buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut());
847 
848             if want_disasm {
849                 writeln!(&mut disasm, "block{}:", block.index()).unwrap();
850             }
851 
852             if flags.machine_code_cfg_info() {
853                 // Track BB starts. If we have backed up due to MachBuffer
854                 // branch opts, note that the removed blocks were removed.
855                 let cur_offset = buffer.cur_offset();
856                 if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
857                     for i in (0..bb_starts.len()).rev() {
858                         if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
859                             break;
860                         }
861                         bb_starts[i] = None;
862                     }
863                 }
864                 bb_starts.push(Some(cur_offset));
865                 last_offset = Some(cur_offset);
866             }
867 
868             if let Some(block_start) = I::gen_block_start(
869                 self.block_order.is_indirect_branch_target(block),
870                 is_forward_edge_cfi_enabled,
871             ) {
872                 do_emit(&block_start, &mut disasm, &mut buffer, &mut state);
873             }
874 
875             for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
876                 match inst_or_edit {
877                     InstOrEdit::Inst(iix) => {
878                         if !self.debug_value_labels.is_empty() {
879                             // If we need to produce debug info,
880                             // record the offset of each instruction
881                             // so that we can translate value-label
882                             // ranges to machine-code offsets.
883 
884                             // Cold blocks violate monotonicity
885                             // assumptions elsewhere (that
886                             // instructions in inst-index order are in
887                             // order in machine code), so we omit
888                             // their offsets here. Value-label range
889                             // generation below will skip empty ranges
890                             // and ranges with to-offsets of zero.
891                             if !self.block_order.is_cold(block) {
892                                 inst_offsets[iix.index()] = buffer.cur_offset();
893                             }
894                         }
895 
896                         // Update the srcloc at this point in the buffer.
897                         let srcloc = self.srclocs[iix.index()];
898                         if cur_srcloc != Some(srcloc) {
899                             if cur_srcloc.is_some() {
900                                 buffer.end_srcloc();
901                             }
902                             buffer.start_srcloc(srcloc);
903                             cur_srcloc = Some(srcloc);
904                         }
905 
906                         // If this is a safepoint, compute a stack map
907                         // and pass it to the emit state.
908                         let stack_map_disasm = if self.insts[iix.index()].is_safepoint() {
909                             let (user_stack_map, user_stack_map_disasm) = {
910                                 // The `user_stack_maps` is keyed by reverse
911                                 // instruction index, so we must flip the
912                                 // index. We can't put this into a helper method
913                                 // due to borrowck issues because parts of
914                                 // `self` are borrowed mutably elsewhere in this
915                                 // function.
916                                 let index = iix.to_backwards_insn_index(self.num_insts());
917                                 let user_stack_map = self.user_stack_maps.remove(&index);
918                                 let user_stack_map_disasm =
919                                     user_stack_map.as_ref().map(|m| format!("  ; {m:?}"));
920                                 (user_stack_map, user_stack_map_disasm)
921                             };
922 
923                             state.pre_safepoint(user_stack_map);
924 
925                             user_stack_map_disasm
926                         } else {
927                             None
928                         };
929 
930                         // If the instruction we are about to emit is
931                         // a return, place an epilogue at this point
932                         // (and don't emit the return; the actual
933                         // epilogue will contain it).
934                         if self.insts[iix.index()].is_term() == MachTerminator::Ret {
935                             for inst in self.abi.gen_epilogue() {
936                                 do_emit(&inst, &mut disasm, &mut buffer, &mut state);
937                             }
938                         } else {
939                             // Update the operands for this inst using the
940                             // allocations from the regalloc result.
941                             let mut allocs = regalloc.inst_allocs(iix).iter();
942                             self.insts[iix.index()].get_operands(
943                                 &mut |reg: &mut Reg, constraint, _kind, _pos| {
944                                     let alloc =
945                                         allocs.next().expect("enough allocations for all operands");
946 
947                                     if let Some(alloc) = alloc.as_reg() {
948                                         let alloc: Reg = alloc.into();
949                                         if let OperandConstraint::FixedReg(rreg) = constraint {
950                                             debug_assert_eq!(Reg::from(rreg), alloc);
951                                         }
952                                         *reg = alloc;
953                                     } else if let Some(alloc) = alloc.as_stack() {
954                                         let alloc: Reg = alloc.into();
955                                         *reg = alloc;
956                                     }
957                                 },
958                             );
959                             debug_assert!(allocs.next().is_none());
960 
961                             // Emit the instruction!
962                             do_emit(
963                                 &self.insts[iix.index()],
964                                 &mut disasm,
965                                 &mut buffer,
966                                 &mut state,
967                             );
968                             if let Some(stack_map_disasm) = stack_map_disasm {
969                                 disasm.push_str(&stack_map_disasm);
970                                 disasm.push('\n');
971                             }
972                         }
973                     }
974 
975                     InstOrEdit::Edit(Edit::Move { from, to }) => {
976                         // Create a move/spill/reload instruction and
977                         // immediately emit it.
978                         match (from.as_reg(), to.as_reg()) {
979                             (Some(from), Some(to)) => {
980                                 // Reg-to-reg move.
981                                 let from_rreg = Reg::from(from);
982                                 let to_rreg = Writable::from_reg(Reg::from(to));
983                                 debug_assert_eq!(from.class(), to.class());
984                                 let ty = I::canonical_type_for_rc(from.class());
985                                 let mv = I::gen_move(to_rreg, from_rreg, ty);
986                                 do_emit(&mv, &mut disasm, &mut buffer, &mut state);
987                             }
988                             (Some(from), None) => {
989                                 // Spill from register to spillslot.
990                                 let to = to.as_stack().unwrap();
991                                 let from_rreg = RealReg::from(from);
992                                 let spill = self.abi.gen_spill(to, from_rreg);
993                                 do_emit(&spill, &mut disasm, &mut buffer, &mut state);
994                             }
995                             (None, Some(to)) => {
996                                 // Load from spillslot to register.
997                                 let from = from.as_stack().unwrap();
998                                 let to_rreg = Writable::from_reg(RealReg::from(to));
999                                 let reload = self.abi.gen_reload(to_rreg, from);
1000                                 do_emit(&reload, &mut disasm, &mut buffer, &mut state);
1001                             }
1002                             (None, None) => {
1003                                 panic!("regalloc2 should have eliminated stack-to-stack moves!");
1004                             }
1005                         }
1006                     }
1007                 }
1008             }
1009 
1010             if cur_srcloc.is_some() {
1011                 buffer.end_srcloc();
1012                 cur_srcloc = None;
1013             }
1014 
1015             // Do we need an island? Get the worst-case size of the next BB, add
1016             // it to the optional padding behind the block, and pass this to the
1017             // `MachBuffer` to determine if an island is necessary.
1018             let worst_case_next_bb = if block_order_idx < final_order.len() - 1 {
1019                 let next_block = final_order[block_order_idx + 1];
1020                 let next_block_range = self.block_ranges.get(next_block.index());
1021                 let next_block_size = next_block_range.len() as u32;
1022                 let next_block_ra_insertions = ra_edits_per_block[next_block.index()];
1023                 I::worst_case_size() * (next_block_size + next_block_ra_insertions)
1024             } else {
1025                 0
1026             };
1027             let padding = if bb_padding.is_empty() {
1028                 0
1029             } else {
1030                 bb_padding.len() as u32 + I::LabelUse::ALIGN - 1
1031             };
1032             if buffer.island_needed(padding + worst_case_next_bb) {
1033                 buffer.emit_island(padding + worst_case_next_bb, ctrl_plane);
1034             }
1035 
1036             // Insert padding, if configured, to stress the `MachBuffer`'s
1037             // relocation and island calculations.
1038             //
1039             // Padding can get quite large during fuzzing though so place a
1040             // total cap on it where when a per-function threshold is exceeded
1041             // the padding is turned back down to zero. This avoids a small-ish
1042             // test case generating a GB+ memory footprint in Cranelift for
1043             // example.
1044             if !bb_padding.is_empty() {
1045                 buffer.put_data(&bb_padding);
1046                 buffer.align_to(I::LabelUse::ALIGN);
1047                 total_bb_padding += bb_padding.len();
1048                 if total_bb_padding > (150 << 20) {
1049                     bb_padding = Vec::new();
1050                 }
1051             }
1052         }
1053 
1054         debug_assert!(
1055             self.user_stack_maps.is_empty(),
1056             "any stack maps should have been consumed by instruction emission, still have: {:#?}",
1057             self.user_stack_maps,
1058         );
1059 
1060         // Do any optimizations on branches at tail of buffer, as if we had
1061         // bound one last label.
1062         buffer.optimize_branches(ctrl_plane);
1063 
1064         // emission state is not needed anymore, move control plane back out
1065         *ctrl_plane = state.take_ctrl_plane();
1066 
1067         let func_body_len = buffer.cur_offset();
1068 
1069         // Create `bb_edges` and final (filtered) `bb_starts`.
1070         let mut bb_edges = vec![];
1071         let mut bb_offsets = vec![];
1072         if flags.machine_code_cfg_info() {
1073             for block in 0..self.num_blocks() {
1074                 if bb_starts[block].is_none() {
1075                     // Block was deleted by MachBuffer; skip.
1076                     continue;
1077                 }
1078                 let from = bb_starts[block].unwrap();
1079 
1080                 bb_offsets.push(from);
1081                 // Resolve each `succ` label and add edges.
1082                 let succs = self.block_succs(BlockIndex::new(block));
1083                 for &succ in succs.iter() {
1084                     let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1085                     bb_edges.push((from, to));
1086                 }
1087             }
1088         }
1089 
1090         self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len);
1091         let value_labels_ranges =
1092             self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1093         let frame_size = self.abi.frame_size();
1094 
1095         EmitResult {
1096             buffer: buffer.finish(&self.constants, ctrl_plane),
1097             bb_offsets,
1098             bb_edges,
1099             func_body_len,
1100             disasm: if want_disasm { Some(disasm) } else { None },
1101             sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(),
1102             dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(),
1103             value_labels_ranges,
1104             frame_size,
1105         }
1106     }
1107 
1108     fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) {
1109         if self.debug_value_labels.is_empty() {
1110             return;
1111         }
1112 
1113         // During emission, branch removal can make offsets of instructions incorrect.
1114         // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj]
1115         // It will be recorded as (say):    [30]  [34]  [38]  [42]  [<would be 46>]
1116         // When the jumps get removed we are left with (in "inst_offsets"):
1117         // [insi][jmp0][jmp1][jmp2][insj][...]
1118         // [30]  [34]  [38]  [42]  [34]
1119         // Which violates the monotonicity invariant. This method sets offsets of these
1120         // removed instructions such as to make them appear zero-sized:
1121         // [insi][jmp0][jmp1][jmp2][insj][...]
1122         // [30]  [34]  [34]  [34]  [34]
1123         //
1124         let mut next_offset = func_body_len;
1125         for inst_index in (0..(inst_offsets.len() - 1)).rev() {
1126             let inst_offset = inst_offsets[inst_index];
1127 
1128             // Not all instructions get their offsets recorded.
1129             if inst_offset == NO_INST_OFFSET {
1130                 continue;
1131             }
1132 
1133             if inst_offset > next_offset {
1134                 trace!(
1135                     "Fixing code offset of the removed Inst {}: {} -> {}",
1136                     inst_index,
1137                     inst_offset,
1138                     next_offset
1139                 );
1140                 inst_offsets[inst_index] = next_offset;
1141                 continue;
1142             }
1143 
1144             next_offset = inst_offset;
1145         }
1146     }
1147 
1148     fn compute_value_labels_ranges(
1149         &self,
1150         regalloc: &regalloc2::Output,
1151         inst_offsets: &[CodeOffset],
1152         func_body_len: u32,
1153     ) -> ValueLabelsRanges {
1154         if self.debug_value_labels.is_empty() {
1155             return ValueLabelsRanges::default();
1156         }
1157 
1158         if trace_log_enabled!() {
1159             self.log_value_labels_ranges(regalloc, inst_offsets);
1160         }
1161 
1162         let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1163         for &(label, from, to, alloc) in &regalloc.debug_locations {
1164             let label = ValueLabel::from_u32(label);
1165             let ranges = value_labels_ranges.entry(label).or_insert_with(|| vec![]);
1166             let from_offset = inst_offsets[from.inst().index()];
1167             let to_offset = if to.inst().index() == inst_offsets.len() {
1168                 func_body_len
1169             } else {
1170                 inst_offsets[to.inst().index()]
1171             };
1172 
1173             // Empty ranges or unavailable offsets can happen
1174             // due to cold blocks and branch removal (see above).
1175             if from_offset == NO_INST_OFFSET
1176                 || to_offset == NO_INST_OFFSET
1177                 || from_offset == to_offset
1178             {
1179                 continue;
1180             }
1181 
1182             let loc = if let Some(preg) = alloc.as_reg() {
1183                 LabelValueLoc::Reg(Reg::from(preg))
1184             } else {
1185                 let slot = alloc.as_stack().unwrap();
1186                 let slot_offset = self.abi.get_spillslot_offset(slot);
1187                 let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset();
1188                 let caller_sp_to_cfa_offset =
1189                     crate::isa::unwind::systemv::caller_sp_to_cfa_offset();
1190                 // NOTE: this is a negative offset because it's relative to the caller's SP
1191                 let cfa_to_sp_offset =
1192                     -((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64);
1193                 LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset)
1194             };
1195 
1196             // ValueLocRanges are recorded by *instruction-end
1197             // offset*. `from_offset` is the *start* of the
1198             // instruction; that is the same as the end of another
1199             // instruction, so we only want to begin coverage once
1200             // we are past the previous instruction's end.
1201             let start = from_offset + 1;
1202 
1203             // Likewise, `end` is exclusive, but we want to
1204             // *include* the end of the last
1205             // instruction. `to_offset` is the start of the
1206             // `to`-instruction, which is the exclusive end, i.e.,
1207             // the first instruction not covered. That
1208             // instruction's start is the same as the end of the
1209             // last instruction that is included, so we go one
1210             // byte further to be sure to include it.
1211             let end = to_offset + 1;
1212 
1213             // Coalesce adjacent ranges that for the same location
1214             // to minimize output size here and for the consumers.
1215             if let Some(last_loc_range) = ranges.last_mut() {
1216                 if last_loc_range.loc == loc && last_loc_range.end == start {
1217                     trace!(
1218                         "Extending debug range for {:?} in {:?} to {}",
1219                         label,
1220                         loc,
1221                         end
1222                     );
1223                     last_loc_range.end = end;
1224                     continue;
1225                 }
1226             }
1227 
1228             trace!(
1229                 "Recording debug range for {:?} in {:?}: [Inst {}..Inst {}) [{}..{})",
1230                 label,
1231                 loc,
1232                 from.inst().index(),
1233                 to.inst().index(),
1234                 start,
1235                 end
1236             );
1237 
1238             ranges.push(ValueLocRange { loc, start, end });
1239         }
1240 
1241         value_labels_ranges
1242     }
1243 
1244     fn log_value_labels_ranges(&self, regalloc: &regalloc2::Output, inst_offsets: &[CodeOffset]) {
1245         debug_assert!(trace_log_enabled!());
1246 
1247         // What debug labels do we have? Note we'll skip those that have not been
1248         // allocated any location at all. They will show up as numeric gaps in the table.
1249         let mut labels = vec![];
1250         for &(label, _, _, _) in &regalloc.debug_locations {
1251             if Some(&label) == labels.last() {
1252                 continue;
1253             }
1254             labels.push(label);
1255         }
1256 
1257         // Reformat the data on what VRegs were the VLs assigned to by lowering, since
1258         // the array we have is sorted by VReg, and we want it sorted by VL for easy
1259         // access in the loop below.
1260         let mut vregs = vec![];
1261         for &(vreg, start, end, label) in &self.debug_value_labels {
1262             if matches!(labels.binary_search(&label), Ok(_)) {
1263                 vregs.push((label, start, end, vreg));
1264             }
1265         }
1266         vregs.sort_unstable_by(
1267             |(l_label, l_start, _, _), (r_label, r_start, _, _)| match l_label.cmp(r_label) {
1268                 Ordering::Equal => l_start.cmp(r_start),
1269                 cmp => cmp,
1270             },
1271         );
1272 
1273         #[derive(PartialEq)]
1274         enum Mode {
1275             Measure,
1276             Emit,
1277         }
1278         #[derive(PartialEq)]
1279         enum Row {
1280             Head,
1281             Line,
1282             Inst(usize),
1283         }
1284 
1285         let mut widths = vec![0; 2 + 2 * labels.len()];
1286         let mut row = String::new();
1287         let mut output_row = |row_kind: Row, mode: Mode| {
1288             let mut column_index = 0;
1289             row.clear();
1290 
1291             macro_rules! output_cell_impl {
1292                 ($fill:literal, $span:literal, $($cell_fmt:tt)*) => {
1293                     let column_start = row.len();
1294                     {
1295                         row.push('|');
1296                         write!(row, $($cell_fmt)*).unwrap();
1297                     }
1298 
1299                     let next_column_index = column_index + $span;
1300                     let expected_width: usize = widths[column_index..next_column_index].iter().sum();
1301                     if mode == Mode::Measure {
1302                         let actual_width = row.len() - column_start;
1303                         if actual_width > expected_width {
1304                             widths[next_column_index - 1] += actual_width - expected_width;
1305                         }
1306                     } else {
1307                         let column_end = column_start + expected_width;
1308                         while row.len() != column_end {
1309                             row.push($fill);
1310                         }
1311                     }
1312                     column_index = next_column_index;
1313                 };
1314             }
1315             macro_rules! output_cell {
1316                 ($($cell_fmt:tt)*) => {
1317                     output_cell_impl!(' ', 1, $($cell_fmt)*);
1318                 };
1319             }
1320 
1321             match row_kind {
1322                 Row::Head => {
1323                     output_cell!("Inst");
1324                     output_cell!("IP");
1325                     for label in &labels {
1326                         output_cell_impl!(' ', 2, "{:?}", ValueLabel::from_u32(*label));
1327                     }
1328                 }
1329                 Row::Line => {
1330                     debug_assert!(mode == Mode::Emit);
1331                     output_cell_impl!('-', 1, "");
1332                     output_cell_impl!('-', 1, "");
1333                     for _ in &labels {
1334                         output_cell_impl!('-', 2, "");
1335                     }
1336                 }
1337                 Row::Inst(inst_index) => {
1338                     output_cell!("Inst {inst_index} ");
1339                     output_cell!("{} ", inst_offsets[inst_index]);
1340 
1341                     // The ranges which we query below operate on the logic of
1342                     // "IP(inst) == IP after inst", while the rows of our table
1343                     // represent IPs 'before' "inst", so we need to convert "inst"
1344                     // into these "IP after" coordinates.
1345                     let inst_ip_index = inst_index.wrapping_sub(1);
1346                     for label in &labels {
1347                         // First, the VReg.
1348                         use regalloc2::Inst;
1349                         let vreg_cmp = |inst: usize,
1350                                         vreg_label: &u32,
1351                                         range_start: &Inst,
1352                                         range_end: &Inst| {
1353                             match vreg_label.cmp(&label) {
1354                                 Ordering::Equal => {
1355                                     if range_end.index() <= inst {
1356                                         Ordering::Less
1357                                     } else if range_start.index() > inst {
1358                                         Ordering::Greater
1359                                     } else {
1360                                         Ordering::Equal
1361                                     }
1362                                 }
1363                                 cmp => cmp,
1364                             }
1365                         };
1366                         let vreg_index =
1367                             vregs.binary_search_by(|(l, s, e, _)| vreg_cmp(inst_ip_index, l, s, e));
1368                         if let Ok(vreg_index) = vreg_index {
1369                             let mut prev_vreg = None;
1370                             if inst_ip_index > 0 {
1371                                 let prev_vreg_index = vregs.binary_search_by(|(l, s, e, _)| {
1372                                     vreg_cmp(inst_ip_index - 1, l, s, e)
1373                                 });
1374                                 if let Ok(prev_vreg_index) = prev_vreg_index {
1375                                     prev_vreg = Some(vregs[prev_vreg_index].3);
1376                                 }
1377                             }
1378 
1379                             let vreg = vregs[vreg_index].3;
1380                             if Some(vreg) == prev_vreg {
1381                                 output_cell!("*");
1382                             } else {
1383                                 output_cell!("{}", vreg);
1384                             }
1385                         } else {
1386                             output_cell!("");
1387                         }
1388 
1389                         // Second, the allocated location.
1390                         let range_index = regalloc.debug_locations.binary_search_by(
1391                             |(range_label, range_start, range_end, _)| match range_label.cmp(label)
1392                             {
1393                                 Ordering::Equal => {
1394                                     if range_end.inst().index() <= inst_ip_index {
1395                                         Ordering::Less
1396                                     } else if range_start.inst().index() > inst_ip_index {
1397                                         Ordering::Greater
1398                                     } else {
1399                                         Ordering::Equal
1400                                     }
1401                                 }
1402                                 cmp => cmp,
1403                             },
1404                         );
1405                         if let Ok(range_index) = range_index {
1406                             // Live at this instruction, print the location.
1407                             if let Some(reg) = regalloc.debug_locations[range_index].3.as_reg() {
1408                                 output_cell!("{:?}", Reg::from(reg));
1409                             } else {
1410                                 output_cell!("Stk");
1411                             }
1412                         } else {
1413                             // Not live at this instruction.
1414                             output_cell!("");
1415                         }
1416                     }
1417                 }
1418             }
1419             row.push('|');
1420 
1421             if mode == Mode::Emit {
1422                 trace!("{}", row.as_str());
1423             }
1424         };
1425 
1426         for inst_index in 0..inst_offsets.len() {
1427             output_row(Row::Inst(inst_index), Mode::Measure);
1428         }
1429         output_row(Row::Head, Mode::Measure);
1430 
1431         output_row(Row::Head, Mode::Emit);
1432         output_row(Row::Line, Mode::Emit);
1433         for inst_index in 0..inst_offsets.len() {
1434             output_row(Row::Inst(inst_index), Mode::Emit);
1435         }
1436     }
1437 
1438     /// Get the IR block for a BlockIndex, if one exists.
1439     pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1440         self.block_order.lowered_order()[block.index()].orig_block()
1441     }
1442 
1443     /// Get the type of a VReg.
1444     pub fn vreg_type(&self, vreg: VReg) -> Type {
1445         self.vreg_types[vreg.vreg()]
1446     }
1447 
1448     /// Get the fact, if any, for a given VReg.
1449     pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> {
1450         self.facts[vreg.vreg()].as_ref()
1451     }
1452 
1453     /// Set the fact for a given VReg.
1454     pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) {
1455         trace!("set fact on {}: {:?}", vreg, fact);
1456         self.facts[vreg.vreg()] = Some(fact);
1457     }
1458 
1459     /// Does a given instruction define any facts?
1460     pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool {
1461         self.inst_operands(inst)
1462             .iter()
1463             .filter(|o| o.kind() == OperandKind::Def)
1464             .map(|o| o.vreg())
1465             .any(|vreg| self.facts[vreg.vreg()].is_some())
1466     }
1467 
1468     /// Get the user stack map associated with the given forward instruction index.
1469     pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> {
1470         let index = inst.to_backwards_insn_index(self.num_insts());
1471         self.user_stack_maps.get(&index)
1472     }
1473 }
1474 
1475 impl<I: VCodeInst> std::ops::Index<InsnIndex> for VCode<I> {
1476     type Output = I;
1477     fn index(&self, idx: InsnIndex) -> &Self::Output {
1478         &self.insts[idx.index()]
1479     }
1480 }
1481 
1482 impl<I: VCodeInst> RegallocFunction for VCode<I> {
1483     fn num_insts(&self) -> usize {
1484         self.insts.len()
1485     }
1486 
1487     fn num_blocks(&self) -> usize {
1488         self.block_ranges.len()
1489     }
1490 
1491     fn entry_block(&self) -> BlockIndex {
1492         self.entry
1493     }
1494 
1495     fn block_insns(&self, block: BlockIndex) -> InstRange {
1496         let range = self.block_ranges.get(block.index());
1497         InstRange::new(InsnIndex::new(range.start), InsnIndex::new(range.end))
1498     }
1499 
1500     fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1501         let range = self.block_succ_range.get(block.index());
1502         &self.block_succs[range]
1503     }
1504 
1505     fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1506         let range = self.block_pred_range.get(block.index());
1507         &self.block_preds[range]
1508     }
1509 
1510     fn block_params(&self, block: BlockIndex) -> &[VReg] {
1511         // As a special case we don't return block params for the entry block, as all the arguments
1512         // will be defined by the `Inst::Args` instruction.
1513         if block == self.entry {
1514             return &[];
1515         }
1516 
1517         let range = self.block_params_range.get(block.index());
1518         &self.block_params[range]
1519     }
1520 
1521     fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1522         let succ_range = self.branch_block_arg_succ_range.get(block.index());
1523         debug_assert!(succ_idx < succ_range.len());
1524         let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx);
1525         &self.branch_block_args[branch_block_args]
1526     }
1527 
1528     fn is_ret(&self, insn: InsnIndex) -> bool {
1529         match self.insts[insn.index()].is_term() {
1530             // We treat blocks terminated by an unconditional trap like a return for regalloc.
1531             MachTerminator::None => self.insts[insn.index()].is_trap(),
1532             MachTerminator::Ret | MachTerminator::RetCall => true,
1533             MachTerminator::Branch => false,
1534         }
1535     }
1536 
1537     fn is_branch(&self, insn: InsnIndex) -> bool {
1538         match self.insts[insn.index()].is_term() {
1539             MachTerminator::Branch => true,
1540             _ => false,
1541         }
1542     }
1543 
1544     fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1545         let range = self.operand_ranges.get(insn.index());
1546         &self.operands[range]
1547     }
1548 
1549     fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1550         self.clobbers.get(&insn).cloned().unwrap_or_default()
1551     }
1552 
1553     fn num_vregs(&self) -> usize {
1554         self.vreg_types.len()
1555     }
1556 
1557     fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1558         &self.debug_value_labels
1559     }
1560 
1561     fn spillslot_size(&self, regclass: RegClass) -> usize {
1562         self.abi.get_spillslot_size(regclass) as usize
1563     }
1564 
1565     fn allow_multiple_vreg_defs(&self) -> bool {
1566         // At least the s390x backend requires this, because the
1567         // `Loop` pseudo-instruction aggregates all Operands so pinned
1568         // vregs (RealRegs) may occur more than once.
1569         true
1570     }
1571 }
1572 
1573 impl<I: VCodeInst> Debug for VRegAllocator<I> {
1574     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1575         writeln!(f, "VRegAllocator {{")?;
1576 
1577         let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1578         alias_keys.sort_unstable();
1579         for key in alias_keys {
1580             let dest = self.vreg_aliases.get(&key).unwrap();
1581             writeln!(f, "  {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1582         }
1583 
1584         for (vreg, fact) in self.facts.iter().enumerate() {
1585             if let Some(fact) = fact {
1586                 writeln!(f, "  v{vreg} ! {fact}")?;
1587             }
1588         }
1589 
1590         writeln!(f, "}}")
1591     }
1592 }
1593 
1594 impl<I: VCodeInst> fmt::Debug for VCode<I> {
1595     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1596         writeln!(f, "VCode {{")?;
1597         writeln!(f, "  Entry block: {}", self.entry.index())?;
1598 
1599         let mut state = Default::default();
1600 
1601         for block in 0..self.num_blocks() {
1602             let block = BlockIndex::new(block);
1603             writeln!(
1604                 f,
1605                 "Block {}({:?}):",
1606                 block.index(),
1607                 self.block_params(block)
1608             )?;
1609             if let Some(bb) = self.bindex_to_bb(block) {
1610                 writeln!(f, "    (original IR block: {bb})")?;
1611             }
1612             for (succ_idx, succ) in self.block_succs(block).iter().enumerate() {
1613                 writeln!(
1614                     f,
1615                     "    (successor: Block {}({:?}))",
1616                     succ.index(),
1617                     self.branch_blockparams(block, InsnIndex::new(0) /* dummy */, succ_idx)
1618                 )?;
1619             }
1620             for inst in self.block_ranges.get(block.index()) {
1621                 writeln!(
1622                     f,
1623                     "  Inst {}: {}",
1624                     inst,
1625                     self.insts[inst].pretty_print_inst(&mut state)
1626                 )?;
1627                 if !self.operands.is_empty() {
1628                     for operand in self.inst_operands(InsnIndex::new(inst)) {
1629                         if operand.kind() == OperandKind::Def {
1630                             if let Some(fact) = &self.facts[operand.vreg().vreg()] {
1631                                 writeln!(f, "    v{} ! {}", operand.vreg().vreg(), fact)?;
1632                             }
1633                         }
1634                     }
1635                 }
1636                 if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) {
1637                     writeln!(f, "    {user_stack_map:?}")?;
1638                 }
1639             }
1640         }
1641 
1642         writeln!(f, "}}")?;
1643         Ok(())
1644     }
1645 }
1646 
1647 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder.
1648 pub struct VRegAllocator<I> {
1649     /// VReg IR-level types.
1650     vreg_types: Vec<Type>,
1651 
1652     /// VReg aliases. When the final VCode is built we rewrite all
1653     /// uses of the keys in this table to their replacement values.
1654     ///
1655     /// We use these aliases to rename an instruction's expected
1656     /// result vregs to the returned vregs from lowering, which are
1657     /// usually freshly-allocated temps.
1658     vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
1659 
1660     /// A deferred error, to be bubbled up to the top level of the
1661     /// lowering algorithm. We take this approach because we cannot
1662     /// currently propagate a `Result` upward through ISLE code (the
1663     /// lowering rules) or some ABI code.
1664     deferred_error: Option<CodegenError>,
1665 
1666     /// Facts on VRegs, for proof-carrying code.
1667     facts: Vec<Option<Fact>>,
1668 
1669     /// The type of instruction that this allocator makes registers for.
1670     _inst: core::marker::PhantomData<I>,
1671 }
1672 
1673 impl<I: VCodeInst> VRegAllocator<I> {
1674     /// Make a new VRegAllocator.
1675     pub fn with_capacity(capacity: usize) -> Self {
1676         let capacity = first_user_vreg_index() + capacity;
1677         let mut vreg_types = Vec::with_capacity(capacity);
1678         vreg_types.resize(first_user_vreg_index(), types::INVALID);
1679         Self {
1680             vreg_types,
1681             vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()),
1682             deferred_error: None,
1683             facts: Vec::with_capacity(capacity),
1684             _inst: core::marker::PhantomData::default(),
1685         }
1686     }
1687 
1688     /// Allocate a fresh ValueRegs.
1689     pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {
1690         if self.deferred_error.is_some() {
1691             return Err(CodegenError::CodeTooLarge);
1692         }
1693         let v = self.vreg_types.len();
1694         let (regclasses, tys) = I::rc_for_type(ty)?;
1695         if v + regclasses.len() >= VReg::MAX {
1696             return Err(CodegenError::CodeTooLarge);
1697         }
1698 
1699         let regs: ValueRegs<Reg> = match regclasses {
1700             &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
1701             &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
1702             // We can extend this if/when we support 32-bit targets; e.g.,
1703             // an i128 on a 32-bit machine will need up to four machine regs
1704             // for a `Value`.
1705             _ => panic!("Value must reside in 1 or 2 registers"),
1706         };
1707         for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
1708             let vreg = reg.to_virtual_reg().unwrap();
1709             debug_assert_eq!(self.vreg_types.len(), vreg.index());
1710             self.vreg_types.push(reg_ty);
1711         }
1712 
1713         // Create empty facts for each allocated vreg.
1714         self.facts.resize(self.vreg_types.len(), None);
1715 
1716         Ok(regs)
1717     }
1718 
1719     /// Allocate a fresh ValueRegs, deferring any out-of-vregs
1720     /// errors. This is useful in places where we cannot bubble a
1721     /// `CodegenResult` upward easily, and which are known to be
1722     /// invoked from within the lowering loop that checks the deferred
1723     /// error status below.
1724     pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> {
1725         match self.alloc(ty) {
1726             Ok(x) => x,
1727             Err(e) => {
1728                 self.deferred_error = Some(e);
1729                 self.bogus_for_deferred_error(ty)
1730             }
1731         }
1732     }
1733 
1734     /// Take any deferred error that was accumulated by `alloc_with_deferred_error`.
1735     pub fn take_deferred_error(&mut self) -> Option<CodegenError> {
1736         self.deferred_error.take()
1737     }
1738 
1739     /// Produce an bogus VReg placeholder with the proper number of
1740     /// registers for the given type. This is meant to be used with
1741     /// deferred allocation errors (see `Lower::alloc_tmp()`).
1742     fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> {
1743         let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type");
1744         match regclasses {
1745             &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()),
1746             &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()),
1747             _ => panic!("Value must reside in 1 or 2 registers"),
1748         }
1749     }
1750 
1751     /// Rewrite any mention of `from` into `to`.
1752     pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
1753         let from = from.into();
1754         let resolved_to = self.resolve_vreg_alias(to.into());
1755         // Disallow cycles (see below).
1756         assert_ne!(resolved_to, from);
1757 
1758         // Maintain the invariant that PCC facts only exist on vregs
1759         // which aren't aliases. We want to preserve whatever was
1760         // stated about the vreg before its producer was lowered.
1761         if let Some(fact) = self.facts[from.vreg()].take() {
1762             self.set_fact(resolved_to, fact);
1763         }
1764 
1765         let old_alias = self.vreg_aliases.insert(from, resolved_to);
1766         debug_assert_eq!(old_alias, None);
1767     }
1768 
1769     fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg {
1770         // We prevent cycles from existing by resolving targets of
1771         // aliases eagerly before setting them. If the target resolves
1772         // to the origin of the alias, then a cycle would be created
1773         // and the alias is disallowed. Because of the structure of
1774         // SSA code (one instruction can refer to another's defs but
1775         // not vice-versa, except indirectly through
1776         // phis/blockparams), cycles should not occur as we use
1777         // aliases to redirect vregs to the temps that actually define
1778         // them.
1779         while let Some(to) = self.vreg_aliases.get(&vreg) {
1780             vreg = *to;
1781         }
1782         vreg
1783     }
1784 
1785     #[inline]
1786     fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) {
1787         debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg)));
1788     }
1789 
1790     /// Set the proof-carrying code fact on a given virtual register.
1791     ///
1792     /// Returns the old fact, if any (only one fact can be stored).
1793     fn set_fact(&mut self, vreg: regalloc2::VReg, fact: Fact) -> Option<Fact> {
1794         trace!("vreg {:?} has fact: {:?}", vreg, fact);
1795         debug_assert!(!self.vreg_aliases.contains_key(&vreg));
1796         self.facts[vreg.vreg()].replace(fact)
1797     }
1798 
1799     /// Set a fact only if one doesn't already exist.
1800     pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) {
1801         let vreg = self.resolve_vreg_alias(vreg.into());
1802         if self.facts[vreg.vreg()].is_none() {
1803             self.set_fact(vreg, fact);
1804         }
1805     }
1806 
1807     /// Allocate a fresh ValueRegs, with a given fact to apply if
1808     /// the value fits in one VReg.
1809     pub fn alloc_with_maybe_fact(
1810         &mut self,
1811         ty: Type,
1812         fact: Option<Fact>,
1813     ) -> CodegenResult<ValueRegs<Reg>> {
1814         let result = self.alloc(ty)?;
1815 
1816         // Ensure that we don't lose a fact on a value that splits
1817         // into multiple VRegs.
1818         assert!(result.len() == 1 || fact.is_none());
1819         if let Some(fact) = fact {
1820             self.set_fact(result.regs()[0].into(), fact);
1821         }
1822 
1823         Ok(result)
1824     }
1825 }
1826 
1827 /// This structure tracks the large constants used in VCode that will be emitted separately by the
1828 /// [MachBuffer].
1829 ///
1830 /// First, during the lowering phase, constants are inserted using
1831 /// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are
1832 /// used in this phase. Some deduplication is performed, when possible, as constant
1833 /// values are inserted.
1834 ///
1835 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1836 /// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1837 /// then writes the constant values to the buffer.
1838 #[derive(Default)]
1839 pub struct VCodeConstants {
1840     constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1841     pool_uses: HashMap<Constant, VCodeConstant>,
1842     well_known_uses: HashMap<*const [u8], VCodeConstant>,
1843     u64s: HashMap<[u8; 8], VCodeConstant>,
1844 }
1845 impl VCodeConstants {
1846     /// Initialize the structure with the expected number of constants.
1847     pub fn with_capacity(expected_num_constants: usize) -> Self {
1848         Self {
1849             constants: PrimaryMap::with_capacity(expected_num_constants),
1850             pool_uses: HashMap::with_capacity(expected_num_constants),
1851             well_known_uses: HashMap::new(),
1852             u64s: HashMap::new(),
1853         }
1854     }
1855 
1856     /// Insert a constant; using this method indicates that a constant value will be used and thus
1857     /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1858     /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1859     /// [VCodeConstantData::Generated].
1860     pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1861         match data {
1862             VCodeConstantData::Generated(_) => self.constants.push(data),
1863             VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1864                 None => {
1865                     let vcode_constant = self.constants.push(data);
1866                     self.pool_uses.insert(constant, vcode_constant);
1867                     vcode_constant
1868                 }
1869                 Some(&vcode_constant) => vcode_constant,
1870             },
1871             VCodeConstantData::WellKnown(data_ref) => {
1872                 match self.well_known_uses.entry(data_ref as *const [u8]) {
1873                     Entry::Vacant(v) => {
1874                         let vcode_constant = self.constants.push(data);
1875                         v.insert(vcode_constant);
1876                         vcode_constant
1877                     }
1878                     Entry::Occupied(o) => *o.get(),
1879                 }
1880             }
1881             VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1882                 Entry::Vacant(v) => {
1883                     let vcode_constant = self.constants.push(data);
1884                     v.insert(vcode_constant);
1885                     vcode_constant
1886                 }
1887                 Entry::Occupied(o) => *o.get(),
1888             },
1889         }
1890     }
1891 
1892     /// Return the number of constants inserted.
1893     pub fn len(&self) -> usize {
1894         self.constants.len()
1895     }
1896 
1897     /// Iterate over the `VCodeConstant` keys inserted in this structure.
1898     pub fn keys(&self) -> Keys<VCodeConstant> {
1899         self.constants.keys()
1900     }
1901 
1902     /// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this
1903     /// structure.
1904     pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1905         self.constants.iter()
1906     }
1907 
1908     /// Returns the data associated with the specified constant.
1909     pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData {
1910         &self.constants[c]
1911     }
1912 
1913     /// Checks if the given [VCodeConstantData] is registered as
1914     /// used by the pool.
1915     pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool {
1916         match constant {
1917             VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c),
1918             _ => false,
1919         }
1920     }
1921 }
1922 
1923 /// A use of a constant by one or more VCode instructions; see [VCodeConstants].
1924 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
1925 pub struct VCodeConstant(u32);
1926 entity_impl!(VCodeConstant);
1927 
1928 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
1929 /// these separately instead of as raw byte buffers allows us to avoid some duplication.
1930 pub enum VCodeConstantData {
1931     /// A constant already present in the Cranelift IR
1932     /// [ConstantPool](crate::ir::constant::ConstantPool).
1933     Pool(Constant, ConstantData),
1934     /// A reference to a well-known constant value that is statically encoded within the compiler.
1935     WellKnown(&'static [u8]),
1936     /// A constant value generated during lowering; the value may depend on the instruction context
1937     /// which makes it difficult to de-duplicate--if possible, use other variants.
1938     Generated(ConstantData),
1939     /// A constant of at most 64 bits. These are deduplicated as
1940     /// well. Stored as a fixed-size array of `u8` so that we do not
1941     /// encounter endianness problems when cross-compiling.
1942     U64([u8; 8]),
1943 }
1944 impl VCodeConstantData {
1945     /// Retrieve the constant data as a byte slice.
1946     pub fn as_slice(&self) -> &[u8] {
1947         match self {
1948             VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
1949             VCodeConstantData::WellKnown(d) => d,
1950             VCodeConstantData::U64(value) => &value[..],
1951         }
1952     }
1953 
1954     /// Calculate the alignment of the constant data.
1955     pub fn alignment(&self) -> u32 {
1956         if self.as_slice().len() <= 8 {
1957             8
1958         } else {
1959             16
1960         }
1961     }
1962 }
1963 
1964 #[cfg(test)]
1965 mod test {
1966     use super::*;
1967     use std::mem::size_of;
1968 
1969     #[test]
1970     fn size_of_constant_structs() {
1971         assert_eq!(size_of::<Constant>(), 4);
1972         assert_eq!(size_of::<VCodeConstant>(), 4);
1973         assert_eq!(size_of::<ConstantData>(), 3 * size_of::<usize>());
1974         assert_eq!(size_of::<VCodeConstantData>(), 4 * size_of::<usize>());
1975         assert_eq!(
1976             size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
1977             3 * size_of::<usize>()
1978         );
1979         // TODO The VCodeConstants structure's memory size could be further optimized.
1980         // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
1981         // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
1982     }
1983 }
1984