1 //! This implements the VCode container: a CFG of Insts that have been lowered.
2 //!
3 //! VCode is virtual-register code. An instruction in VCode is almost a machine
4 //! instruction; however, its register slots can refer to virtual registers in
5 //! addition to real machine registers.
6 //!
7 //! VCode is structured with traditional basic blocks, and
8 //! each block must be terminated by an unconditional branch (one target), a
9 //! conditional branch (two targets), or a return (no targets). Note that this
10 //! slightly differs from the machine code of most ISAs: in most ISAs, a
11 //! conditional branch has one target (and the not-taken case falls through).
12 //! However, we expect that machine backends will elide branches to the following
13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15 //! play with layout prior to final binary emission, as well, if we want.
16 //!
17 //! See the main module comment in `mod.rs` for more details on the VCode-based
18 //! backend pipeline.
19 
20 use crate::CodegenError;
21 use crate::FxHashMap;
22 use crate::ir::{self, Constant, ConstantData, ValueLabel, types};
23 use crate::ranges::Ranges;
24 use crate::timing;
25 use crate::trace;
26 use crate::{LabelValueLoc, ValueLocRange};
27 use crate::{machinst::*, trace_log_enabled};
28 use regalloc2::{
29     Edit, Function as RegallocFunction, InstOrEdit, InstPosition, InstRange, Operand,
30     OperandConstraint, OperandKind, PRegSet, ProgPoint, RegClass,
31 };
32 
33 use crate::HashMap;
34 use crate::hash_map::Entry;
35 use core::cmp::Ordering;
36 use core::fmt::{self, Write};
37 use core::mem::take;
38 use core::ops::Range;
39 use cranelift_entity::{Keys, entity_impl};
40 
41 /// Index referring to an instruction in VCode.
42 pub type InsnIndex = regalloc2::Inst;
43 
44 /// Extension trait for `InsnIndex` to allow conversion to a
45 /// `BackwardsInsnIndex`.
46 trait ToBackwardsInsnIndex {
47     fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex;
48 }
49 
50 impl ToBackwardsInsnIndex for InsnIndex {
51     fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex {
52         BackwardsInsnIndex::new(num_insts - self.index() - 1)
53     }
54 }
55 
56 /// An index referring to an instruction in the VCode when it is backwards,
57 /// during VCode construction.
58 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
59 #[cfg_attr(
60     feature = "enable-serde",
61     derive(::serde::Serialize, ::serde::Deserialize)
62 )]
63 pub struct BackwardsInsnIndex(InsnIndex);
64 
65 impl BackwardsInsnIndex {
66     pub fn new(i: usize) -> Self {
67         BackwardsInsnIndex(InsnIndex::new(i))
68     }
69 }
70 
71 /// Index referring to a basic block in VCode.
72 pub type BlockIndex = regalloc2::Block;
73 
74 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
75 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
76 pub trait VCodeInst: MachInst + MachInstEmit {}
77 impl<I: MachInst + MachInstEmit> VCodeInst for I {}
78 
79 /// A function in "VCode" (virtualized-register code) form, after
80 /// lowering.  This is essentially a standard CFG of basic blocks,
81 /// where each basic block consists of lowered instructions produced
82 /// by the machine-specific backend.
83 ///
84 /// Note that the VCode is immutable once produced, and is not
85 /// modified by register allocation in particular. Rather, register
86 /// allocation on the `VCode` produces a separate `regalloc2::Output`
87 /// struct, and this can be passed to `emit`. `emit` in turn does not
88 /// modify the vcode, but produces an `EmitResult`, which contains the
89 /// machine code itself, and the associated disassembly and/or
90 /// metadata as requested.
91 pub struct VCode<I: VCodeInst> {
92     /// VReg IR-level types.
93     vreg_types: Vec<Type>,
94 
95     /// Lowered machine instructions in order corresponding to the original IR.
96     insts: Vec<I>,
97 
98     /// A map from backwards instruction index to the user stack map for that
99     /// instruction.
100     ///
101     /// This is a sparse side table that only has entries for instructions that
102     /// are safepoints, and only for a subset of those that have an associated
103     /// user stack map.
104     user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>,
105 
106     /// A map from backwards instruction index to the debug tags for
107     /// that instruction. Each entry indexes a range in the
108     /// `debug_tag_pool`.
109     debug_tags: FxHashMap<BackwardsInsnIndex, Range<u32>>,
110 
111     /// Pooled storage for sequences of debug tags; indexed by entries
112     /// in `debug_tags`.
113     debug_tag_pool: Vec<ir::DebugTag>,
114 
115     /// Operands: pre-regalloc references to virtual registers with
116     /// constraints, in one flattened array. This allows the regalloc
117     /// to efficiently access all operands without requiring expensive
118     /// matches or method invocations on insts.
119     operands: Vec<Operand>,
120 
121     /// Operand index ranges: for each instruction in `insts`, there
122     /// is a tuple here providing the range in `operands` for that
123     /// instruction's operands.
124     operand_ranges: Ranges,
125 
126     /// Clobbers: a sparse map from instruction indices to clobber masks.
127     clobbers: FxHashMap<InsnIndex, PRegSet>,
128 
129     /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
130     /// reasonable to keep one of these per instruction.)
131     srclocs: Vec<RelSourceLoc>,
132 
133     /// Entry block.
134     entry: BlockIndex,
135 
136     /// Block instruction indices.
137     block_ranges: Ranges,
138 
139     /// Block successors: index range in the `block_succs` list.
140     block_succ_range: Ranges,
141 
142     /// Block successor lists, concatenated into one vec. The
143     /// `block_succ_range` list of tuples above gives (start, end)
144     /// ranges within this list that correspond to each basic block's
145     /// successors.
146     block_succs: Vec<regalloc2::Block>,
147 
148     /// Block predecessors: index range in the `block_preds` list.
149     block_pred_range: Ranges,
150 
151     /// Block predecessor lists, concatenated into one vec. The
152     /// `block_pred_range` list of tuples above gives (start, end)
153     /// ranges within this list that correspond to each basic block's
154     /// predecessors.
155     block_preds: Vec<regalloc2::Block>,
156 
157     /// Block parameters: index range in `block_params` below.
158     block_params_range: Ranges,
159 
160     /// Block parameter lists, concatenated into one vec. The
161     /// `block_params_range` list of tuples above gives (start, end)
162     /// ranges within this list that correspond to each basic block's
163     /// blockparam vregs.
164     block_params: Vec<regalloc2::VReg>,
165 
166     /// Outgoing block arguments on branch instructions, concatenated
167     /// into one list.
168     ///
169     /// Note that this is conceptually a 3D array: we have a VReg list
170     /// per block, per successor. We flatten those three dimensions
171     /// into this 1D vec, then store index ranges in two levels of
172     /// indirection.
173     ///
174     /// Indexed by the indices in `branch_block_arg_succ_range`.
175     branch_block_args: Vec<regalloc2::VReg>,
176 
177     /// Array of sequences of (start, end) tuples in
178     /// `branch_block_args`, one for each successor; these sequences
179     /// for each block are concatenated.
180     ///
181     /// Indexed by the indices in `branch_block_arg_succ_range`.
182     branch_block_arg_range: Ranges,
183 
184     /// For a given block, indices in `branch_block_arg_range`
185     /// corresponding to all of its successors.
186     branch_block_arg_succ_range: Ranges,
187 
188     /// Block-order information.
189     block_order: BlockLoweringOrder,
190 
191     /// ABI object.
192     pub(crate) abi: Callee<I::ABIMachineSpec>,
193 
194     /// Constant information used during code emission. This should be
195     /// immutable across function compilations within the same module.
196     emit_info: I::Info,
197 
198     /// Constants.
199     pub(crate) constants: VCodeConstants,
200 
201     /// Value labels for debuginfo attached to vregs.
202     debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
203 
204     pub(crate) sigs: SigSet,
205 
206     log2_min_function_alignment: u8,
207 }
208 
209 /// The result of `VCode::emit`. Contains all information computed
210 /// during emission: actual machine code, optionally a disassembly,
211 /// and optionally metadata about the code layout.
212 pub struct EmitResult {
213     /// The MachBuffer containing the machine code.
214     pub buffer: MachBufferFinalized<Stencil>,
215 
216     /// Offset of each basic block, recorded during emission. Computed
217     /// only if `machine_code_cfg_info` is enabled.
218     pub bb_offsets: Vec<CodeOffset>,
219 
220     /// Final basic-block edges, in terms of code offsets of
221     /// bb-starts. Computed only if `machine_code_cfg_info` is enabled.
222     pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
223 
224     /// The pretty-printed disassembly, if any. This uses the same
225     /// pretty-printing for MachInsts as the pre-regalloc VCode Debug
226     /// implementation, but additionally includes the prologue and
227     /// epilogue(s), and makes use of the regalloc results.
228     pub disasm: Option<String>,
229 
230     /// Value-labels information (debug metadata).
231     pub value_labels_ranges: ValueLabelsRanges,
232 }
233 
234 /// A builder for a VCode function body.
235 ///
236 /// This builder has the ability to accept instructions in either
237 /// forward or reverse order, depending on the pass direction that
238 /// produces the VCode. The lowering from CLIF to VCode<MachInst>
239 /// ordinarily occurs in reverse order (in order to allow instructions
240 /// to be lowered only if used, and not merged) so a reversal will
241 /// occur at the end of lowering to ensure the VCode is in machine
242 /// order.
243 ///
244 /// If built in reverse, block and instruction indices used once the
245 /// VCode is built are relative to the final (reversed) order, not the
246 /// order of construction. Note that this means we do not know the
247 /// final block or instruction indices when building, so we do not
248 /// hand them out. (The user is assumed to know them when appending
249 /// terminator instructions with successor blocks.)
250 pub struct VCodeBuilder<I: VCodeInst> {
251     /// In-progress VCode.
252     pub(crate) vcode: VCode<I>,
253 
254     /// In what direction is the build occurring?
255     direction: VCodeBuildDirection,
256 
257     /// Debug-value label in-progress map, keyed by label. For each
258     /// label, we keep disjoint ranges mapping to vregs. We'll flatten
259     /// this into (vreg, range, label) tuples when done.
260     debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
261 }
262 
263 /// Direction in which a VCodeBuilder builds VCode.
264 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
265 pub enum VCodeBuildDirection {
266     // TODO: add `Forward` once we need it and can test it adequately.
267     /// Backward-build pass: we expect the producer to call `emit()`
268     /// with instructions in reverse program order within each block.
269     Backward,
270 }
271 
272 impl<I: VCodeInst> VCodeBuilder<I> {
273     /// Create a new VCodeBuilder.
274     pub fn new(
275         sigs: SigSet,
276         abi: Callee<I::ABIMachineSpec>,
277         emit_info: I::Info,
278         block_order: BlockLoweringOrder,
279         constants: VCodeConstants,
280         direction: VCodeBuildDirection,
281         log2_min_function_alignment: u8,
282     ) -> Self {
283         let vcode = VCode::new(
284             sigs,
285             abi,
286             emit_info,
287             block_order,
288             constants,
289             log2_min_function_alignment,
290         );
291 
292         VCodeBuilder {
293             vcode,
294             direction,
295             debug_info: FxHashMap::default(),
296         }
297     }
298 
299     pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> {
300         self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs)
301     }
302 
303     /// Access the ABI object.
304     pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {
305         &self.vcode.abi
306     }
307 
308     /// Access the ABI object.
309     pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {
310         &mut self.vcode.abi
311     }
312 
313     pub fn sigs(&self) -> &SigSet {
314         &self.vcode.sigs
315     }
316 
317     pub fn sigs_mut(&mut self) -> &mut SigSet {
318         &mut self.vcode.sigs
319     }
320 
321     /// Access to the BlockLoweringOrder object.
322     pub fn block_order(&self) -> &BlockLoweringOrder {
323         &self.vcode.block_order
324     }
325 
326     /// Set the current block as the entry block.
327     pub fn set_entry(&mut self, block: BlockIndex) {
328         self.vcode.entry = block;
329     }
330 
331     /// End the current basic block. Must be called after emitting vcode insts
332     /// for IR insts and prior to ending the function (building the VCode).
333     pub fn end_bb(&mut self) {
334         let end_idx = self.vcode.insts.len();
335         // Add the instruction index range to the list of blocks.
336         self.vcode.block_ranges.push_end(end_idx);
337         // End the successors list.
338         let succ_end = self.vcode.block_succs.len();
339         self.vcode.block_succ_range.push_end(succ_end);
340         // End the blockparams list.
341         let block_params_end = self.vcode.block_params.len();
342         self.vcode.block_params_range.push_end(block_params_end);
343         // End the branch blockparam args list.
344         let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
345         self.vcode
346             .branch_block_arg_succ_range
347             .push_end(branch_block_arg_succ_end);
348     }
349 
350     pub fn add_block_param(&mut self, param: VirtualReg) {
351         self.vcode.block_params.push(param.into());
352     }
353 
354     fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
355         self.vcode
356             .branch_block_args
357             .extend(args.iter().map(|&arg| VReg::from(arg)));
358         let end = self.vcode.branch_block_args.len();
359         self.vcode.branch_block_arg_range.push_end(end);
360     }
361 
362     /// Push an instruction for the current BB and current IR inst
363     /// within the BB.
364     pub fn push(&mut self, insn: I, loc: RelSourceLoc) {
365         assert!(!insn.is_low_level_branch()); // These are not meant to be in VCode.
366         self.vcode.insts.push(insn);
367         self.vcode.srclocs.push(loc);
368     }
369 
370     /// Add a successor block with branch args.
371     pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
372         self.vcode.block_succs.push(block);
373         self.add_branch_args_for_succ(args);
374     }
375 
376     /// Add a debug value label to a register.
377     pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
378         // 1) In the reversed order, we consider the instructions
379         //    that define ranges in the "debug_info" array to refer
380         //    to the IP **after** them (when reversed):
381         //      IP[2]__| Inst 3 |
382         //      IP[1]__| Inst 2 |
383         //      IP[0]__| Inst 1 |
384         //             | Inst 0 |
385         //    This is so that we can represent IP[<function start>],
386         //    done at the cost of not being to represent IP[<function end>],
387         //    which is OK since no values will be live at that point.
388         // 2) The live range for "reg" begins at the current IP
389         //    and continues until the next, in execution order,
390         //    VReg that defines "label". Since the ranges are open
391         //    at the end, the subtraction of 1 cancels out:
392         //      [last..current IP] <=>
393         //      [last..last emitted inst index] <=>
394         //      [last..next_inst_index - 1] <=>
395         //      [last..next_inst_index)
396         //
397         let next_inst_index = self.vcode.insts.len();
398         if next_inst_index == 0 {
399             // This would produce a defective [0..0) range.
400             return;
401         }
402         let next_inst = InsnIndex::new(next_inst_index);
403         let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
404         let last = labels
405             .last()
406             .map(|(_start, end, _vreg)| *end)
407             .unwrap_or(InsnIndex::new(0));
408         labels.push((last, next_inst, reg.into()));
409     }
410 
411     /// Access the constants.
412     pub fn constants(&mut self) -> &mut VCodeConstants {
413         &mut self.vcode.constants
414     }
415 
416     fn compute_preds_from_succs(&mut self) {
417         // Do a linear-time counting sort: first determine how many
418         // times each block appears as a successor.
419         let mut starts = vec![0u32; self.vcode.num_blocks()];
420         for succ in &self.vcode.block_succs {
421             starts[succ.index()] += 1;
422         }
423 
424         // Determine for each block the starting index where that
425         // block's predecessors should go. This is equivalent to the
426         // ranges we need to store in block_pred_range.
427         self.vcode.block_pred_range.reserve(starts.len());
428         let mut end = 0;
429         for count in starts.iter_mut() {
430             let start = end;
431             end += *count;
432             *count = start;
433             self.vcode.block_pred_range.push_end(end as usize);
434         }
435         let end = end as usize;
436         debug_assert_eq!(end, self.vcode.block_succs.len());
437 
438         // Walk over the successors again, this time grouped by
439         // predecessor, and push the predecessor at the current
440         // starting position of each of its successors. We build
441         // each group of predecessors in whatever order Ranges::iter
442         // returns them; regalloc2 doesn't care.
443         self.vcode.block_preds.resize(end, BlockIndex::invalid());
444         for (pred, range) in self.vcode.block_succ_range.iter() {
445             let pred = BlockIndex::new(pred);
446             for succ in &self.vcode.block_succs[range] {
447                 let pos = &mut starts[succ.index()];
448                 self.vcode.block_preds[*pos as usize] = pred;
449                 *pos += 1;
450             }
451         }
452         debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid()));
453     }
454 
455     /// Called once, when a build in Backward order is complete, to
456     /// perform the overall reversal (into final forward order) and
457     /// finalize metadata accordingly.
458     fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) {
459         let n_insts = self.vcode.insts.len();
460         if n_insts == 0 {
461             return;
462         }
463 
464         // Reverse the per-block and per-inst sequences.
465         self.vcode.block_ranges.reverse_index();
466         self.vcode.block_ranges.reverse_target(n_insts);
467         // block_params_range is indexed by block (and blocks were
468         // traversed in reverse) so we reverse it; but block-param
469         // sequences in the concatenated vec can remain in reverse
470         // order (it is effectively an arena of arbitrarily-placed
471         // referenced sequences).
472         self.vcode.block_params_range.reverse_index();
473         // Likewise, we reverse block_succ_range, but the block_succ
474         // concatenated array can remain as-is.
475         self.vcode.block_succ_range.reverse_index();
476         self.vcode.insts.reverse();
477         self.vcode.srclocs.reverse();
478         // Likewise, branch_block_arg_succ_range is indexed by block
479         // so must be reversed.
480         self.vcode.branch_block_arg_succ_range.reverse_index();
481 
482         // To translate an instruction index *endpoint* in reversed
483         // order to forward order, compute `n_insts - i`.
484         //
485         // Why not `n_insts - 1 - i`? That would be correct to
486         // translate an individual instruction index (for ten insts 0
487         // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
488         // 0). But for the usual inclusive-start, exclusive-end range
489         // idiom, inclusive starts become exclusive ends and
490         // vice-versa, so e.g. an (inclusive) start of 0 becomes an
491         // (exclusive) end of 10.
492         let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
493 
494         // Generate debug-value labels based on per-label maps.
495         for (label, tuples) in &self.debug_info {
496             for &(start, end, vreg) in tuples {
497                 let vreg = vregs.resolve_vreg_alias(vreg);
498                 let fwd_start = translate(end);
499                 let fwd_end = translate(start);
500                 self.vcode
501                     .debug_value_labels
502                     .push((vreg, fwd_start, fwd_end, label.as_u32()));
503             }
504         }
505 
506         // Now sort debug value labels by VReg, as required
507         // by regalloc2.
508         self.vcode
509             .debug_value_labels
510             .sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
511     }
512 
513     fn collect_operands(&mut self, vregs: &VRegAllocator<I>) {
514         let allocatable = PRegSet::from(self.vcode.abi.machine_env());
515         for (i, insn) in self.vcode.insts.iter_mut().enumerate() {
516             // Push operands from the instruction onto the operand list.
517             //
518             // We rename through the vreg alias table as we collect
519             // the operands. This is better than a separate post-pass
520             // over operands, because it has more cache locality:
521             // operands only need to pass through L1 once. This is
522             // also better than renaming instructions'
523             // operands/registers while lowering, because here we only
524             // need to do the `match` over the instruction to visit
525             // its register fields (which is slow, branchy code) once.
526 
527             let mut op_collector =
528                 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {
529                     vregs.resolve_vreg_alias(vreg)
530                 });
531             insn.get_operands(&mut op_collector);
532             let (ops, clobbers) = op_collector.finish();
533             self.vcode.operand_ranges.push_end(ops);
534 
535             if clobbers != PRegSet::default() {
536                 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
537             }
538 
539             if let Some((dst, src)) = insn.is_move() {
540                 // We should never see non-virtual registers present in move
541                 // instructions.
542                 assert!(
543                     src.is_virtual(),
544                     "the real register {src:?} was used as the source of a move instruction"
545                 );
546                 assert!(
547                     dst.to_reg().is_virtual(),
548                     "the real register {:?} was used as the destination of a move instruction",
549                     dst.to_reg()
550                 );
551             }
552         }
553 
554         // Translate blockparam args via the vreg aliases table as well.
555         for arg in &mut self.vcode.branch_block_args {
556             let new_arg = vregs.resolve_vreg_alias(*arg);
557             trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
558             *arg = new_arg;
559         }
560     }
561 
562     /// Build the final VCode.
563     pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> {
564         self.vcode.vreg_types = take(&mut vregs.vreg_types);
565 
566         if self.direction == VCodeBuildDirection::Backward {
567             self.reverse_and_finalize(&vregs);
568         }
569         self.collect_operands(&vregs);
570 
571         self.compute_preds_from_succs();
572         self.vcode.debug_value_labels.sort_unstable();
573 
574         // At this point, nothing in the vcode should mention any
575         // VReg which has been aliased. All the appropriate rewriting
576         // should have happened above. Just to be sure, let's
577         // double-check each field which has vregs.
578         // Note: can't easily check vcode.insts, resolved in collect_operands.
579         // Operands are resolved in collect_operands.
580         vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg()));
581         // Currently block params are never aliased to another vreg.
582         vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied());
583         // Branch block args are resolved in collect_operands.
584         vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied());
585         // Debug value labels are resolved in reverse_and_finalize.
586         vregs.debug_assert_no_vreg_aliases(
587             self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg),
588         );
589 
590         self.vcode
591     }
592 
593     /// Add a user stack map for the associated instruction.
594     pub fn add_user_stack_map(
595         &mut self,
596         inst: BackwardsInsnIndex,
597         entries: &[ir::UserStackMapEntry],
598     ) {
599         let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets());
600         let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map);
601         debug_assert!(old_entry.is_none());
602     }
603 
604     /// Add debug tags for the associated instruction.
605     pub fn add_debug_tags(&mut self, inst: BackwardsInsnIndex, entries: &[ir::DebugTag]) {
606         let start = u32::try_from(self.vcode.debug_tag_pool.len()).unwrap();
607         self.vcode.debug_tag_pool.extend(entries.iter().cloned());
608         let end = u32::try_from(self.vcode.debug_tag_pool.len()).unwrap();
609         self.vcode.debug_tags.insert(inst, start..end);
610     }
611 }
612 
613 const NO_INST_OFFSET: CodeOffset = u32::MAX;
614 
615 impl<I: VCodeInst> VCode<I> {
616     /// New empty VCode.
617     fn new(
618         sigs: SigSet,
619         abi: Callee<I::ABIMachineSpec>,
620         emit_info: I::Info,
621         block_order: BlockLoweringOrder,
622         constants: VCodeConstants,
623         log2_min_function_alignment: u8,
624     ) -> Self {
625         let n_blocks = block_order.lowered_order().len();
626         VCode {
627             sigs,
628             vreg_types: vec![],
629             insts: Vec::with_capacity(10 * n_blocks),
630             user_stack_maps: FxHashMap::default(),
631             debug_tags: FxHashMap::default(),
632             debug_tag_pool: vec![],
633             operands: Vec::with_capacity(30 * n_blocks),
634             operand_ranges: Ranges::with_capacity(10 * n_blocks),
635             clobbers: FxHashMap::default(),
636             srclocs: Vec::with_capacity(10 * n_blocks),
637             entry: BlockIndex::new(0),
638             block_ranges: Ranges::with_capacity(n_blocks),
639             block_succ_range: Ranges::with_capacity(n_blocks),
640             block_succs: Vec::with_capacity(n_blocks),
641             block_pred_range: Ranges::default(),
642             block_preds: Vec::new(),
643             block_params_range: Ranges::with_capacity(n_blocks),
644             block_params: Vec::with_capacity(5 * n_blocks),
645             branch_block_args: Vec::with_capacity(10 * n_blocks),
646             branch_block_arg_range: Ranges::with_capacity(2 * n_blocks),
647             branch_block_arg_succ_range: Ranges::with_capacity(n_blocks),
648             block_order,
649             abi,
650             emit_info,
651             constants,
652             debug_value_labels: vec![],
653             log2_min_function_alignment,
654         }
655     }
656 
657     /// Get the number of blocks. Block indices will be in the range `0 ..
658     /// (self.num_blocks() - 1)`.
659     pub fn num_blocks(&self) -> usize {
660         self.block_ranges.len()
661     }
662 
663     /// The number of lowered instructions.
664     pub fn num_insts(&self) -> usize {
665         self.insts.len()
666     }
667 
668     fn compute_clobbers_and_function_calls(
669         &self,
670         regalloc: &regalloc2::Output,
671     ) -> (Vec<Writable<RealReg>>, FunctionCalls) {
672         let mut clobbered = PRegSet::default();
673         let mut function_calls = FunctionCalls::None;
674 
675         // All moves are included in clobbers.
676         for (_, Edit::Move { to, .. }) in &regalloc.edits {
677             if let Some(preg) = to.as_reg() {
678                 clobbered.add(preg);
679             }
680         }
681 
682         for (i, range) in self.operand_ranges.iter() {
683             let operands = &self.operands[range.clone()];
684             let allocs = &regalloc.allocs[range];
685             for (operand, alloc) in operands.iter().zip(allocs.iter()) {
686                 if operand.kind() == OperandKind::Def {
687                     if let Some(preg) = alloc.as_reg() {
688                         clobbered.add(preg);
689                     }
690                 }
691             }
692 
693             function_calls.update(self.insts[i].call_type());
694 
695             // Also add explicitly-clobbered registers.
696             //
697             // Skip merging this instruction's clobber list if not
698             // "included in clobbers" as per the MachInst. (Some
699             // backends use this to implement ABI specifics; e.g.,
700             // excluding calls of the same ABI as the current function
701             // from clobbers, because by definition everything
702             // clobbered by the call can be clobbered by this function
703             // without saving as well.
704             //
705             // This is important for a particular optimization: when
706             // some registers are "half-clobbered", e.g. vector/float
707             // registers on aarch64, we want them to be seen as
708             // clobbered by regalloc so it avoids carrying values
709             // across calls in these registers but not seen as
710             // clobbered by prologue generation here (because the
711             // actual half-clobber implied by the clobber list fits
712             // within the clobbers that we allow without
713             // clobber-saves).
714             if self.insts[i].is_included_in_clobbers() {
715                 if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) {
716                     clobbered.union_from(inst_clobbered);
717                 }
718             }
719         }
720 
721         let clobbered_regs = clobbered
722             .into_iter()
723             .map(|preg| Writable::from_reg(RealReg::from(preg)))
724             .collect();
725 
726         (clobbered_regs, function_calls)
727     }
728 
729     /// Emit the instructions to a `MachBuffer`, containing fixed-up
730     /// code and external reloc/trap/etc. records ready for use. Takes
731     /// the regalloc results as well.
732     ///
733     /// Returns the machine code itself, and optionally metadata
734     /// and/or a disassembly, as an `EmitResult`. The `VCode` itself
735     /// is consumed by the emission process.
736     pub fn emit(
737         mut self,
738         regalloc: &regalloc2::Output,
739         want_disasm: bool,
740         flags: &settings::Flags,
741         ctrl_plane: &mut ControlPlane,
742     ) -> EmitResult
743     where
744         I: VCodeInst,
745     {
746         let _tt = timing::vcode_emit();
747         let mut buffer = MachBuffer::new();
748         buffer.set_log2_min_function_alignment(self.log2_min_function_alignment);
749         let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
750 
751         // The first M MachLabels are reserved for block indices.
752         buffer.reserve_labels_for_blocks(self.num_blocks());
753 
754         // Register all allocated constants with the `MachBuffer` to ensure that
755         // any references to the constants during instructions can be handled
756         // correctly.
757         buffer.register_constants(&self.constants);
758 
759         // Construct the final order we emit code in: cold blocks at the end.
760         let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
761         let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
762         for block in 0..self.num_blocks() {
763             let block = BlockIndex::new(block);
764             if self.block_order.is_cold(block) {
765                 cold_blocks.push(block);
766             } else {
767                 final_order.push(block);
768             }
769         }
770         final_order.extend(cold_blocks.clone());
771 
772         // Compute/save info we need for the prologue: clobbers and
773         // number of spillslots.
774         //
775         // We clone `abi` here because we will mutate it as we
776         // generate the prologue and set other info, but we can't
777         // mutate `VCode`. The info it usually carries prior to
778         // setting clobbers is fairly minimal so this should be
779         // relatively cheap.
780         let (clobbers, function_calls) = self.compute_clobbers_and_function_calls(regalloc);
781         self.abi.compute_frame_layout(
782             &self.sigs,
783             regalloc.num_spillslots,
784             clobbers,
785             function_calls,
786         );
787 
788         // Emit blocks.
789         let mut cur_srcloc = None;
790         let mut last_offset = None;
791         let mut inst_offsets = vec![];
792         let mut state = I::State::new(&self.abi, core::mem::take(ctrl_plane));
793 
794         let mut disasm = String::new();
795 
796         if !self.debug_value_labels.is_empty() {
797             inst_offsets.resize(self.insts.len(), NO_INST_OFFSET);
798         }
799 
800         // Count edits per block ahead of time; this is needed for
801         // lookahead island emission. (We could derive it per-block
802         // with binary search in the edit list, but it's more
803         // efficient to do it in one pass here.)
804         let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];
805         let mut edit_idx = 0;
806         for block in 0..self.num_blocks() {
807             let end_inst = InsnIndex::new(self.block_ranges.get(block).end);
808             let start_edit_idx = edit_idx;
809             while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {
810                 edit_idx += 1;
811             }
812             let end_edit_idx = edit_idx;
813             ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);
814         }
815 
816         let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();
817         let mut bb_padding = match flags.bb_padding_log2_minus_one() {
818             0 => Vec::new(),
819             n => vec![0; 1 << (n - 1)],
820         };
821         let mut total_bb_padding = 0;
822 
823         for (block_order_idx, &block) in final_order.iter().enumerate() {
824             trace!("emitting block {:?}", block);
825 
826             // Call the new block hook for state
827             state.on_new_block();
828 
829             // Emit NOPs to align the block.
830             let new_offset = I::align_basic_block(buffer.cur_offset());
831             while new_offset > buffer.cur_offset() {
832                 // Pad with NOPs up to the aligned block offset.
833                 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
834                 nop.emit(&mut buffer, &self.emit_info, &mut Default::default());
835             }
836             assert_eq!(buffer.cur_offset(), new_offset);
837 
838             let do_emit = |inst: &I,
839                            disasm: &mut String,
840                            buffer: &mut MachBuffer<I>,
841                            state: &mut I::State| {
842                 if want_disasm && !inst.is_args() {
843                     let mut s = state.clone();
844                     writeln!(disasm, "  {}", inst.pretty_print_inst(&mut s)).unwrap();
845                 }
846                 inst.emit(buffer, &self.emit_info, state);
847             };
848 
849             // Is this the first block? Emit the prologue directly if so.
850             if block == self.entry {
851                 trace!(" -> entry block");
852                 buffer.start_srcloc(Default::default());
853                 for inst in &self.abi.gen_prologue() {
854                     do_emit(&inst, &mut disasm, &mut buffer, &mut state);
855                 }
856                 buffer.end_srcloc();
857             }
858 
859             // Now emit the regular block body.
860 
861             buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut());
862 
863             if want_disasm {
864                 writeln!(&mut disasm, "block{}:", block.index()).unwrap();
865             }
866 
867             if flags.machine_code_cfg_info() {
868                 // Track BB starts. If we have backed up due to MachBuffer
869                 // branch opts, note that the removed blocks were removed.
870                 let cur_offset = buffer.cur_offset();
871                 if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
872                     for i in (0..bb_starts.len()).rev() {
873                         if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
874                             break;
875                         }
876                         bb_starts[i] = None;
877                     }
878                 }
879                 bb_starts.push(Some(cur_offset));
880                 last_offset = Some(cur_offset);
881             }
882 
883             if let Some(block_start) = I::gen_block_start(
884                 self.block_order.is_indirect_branch_target(block),
885                 is_forward_edge_cfi_enabled,
886             ) {
887                 do_emit(&block_start, &mut disasm, &mut buffer, &mut state);
888             }
889 
890             for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
891                 match inst_or_edit {
892                     InstOrEdit::Inst(iix) => {
893                         if !self.debug_value_labels.is_empty() {
894                             // If we need to produce debug info,
895                             // record the offset of each instruction
896                             // so that we can translate value-label
897                             // ranges to machine-code offsets.
898 
899                             // Cold blocks violate monotonicity
900                             // assumptions elsewhere (that
901                             // instructions in inst-index order are in
902                             // order in machine code), so we omit
903                             // their offsets here. Value-label range
904                             // generation below will skip empty ranges
905                             // and ranges with to-offsets of zero.
906                             if !self.block_order.is_cold(block) {
907                                 inst_offsets[iix.index()] = buffer.cur_offset();
908                             }
909                         }
910 
911                         // Update the srcloc at this point in the buffer.
912                         let srcloc = self.srclocs[iix.index()];
913                         if cur_srcloc != Some(srcloc) {
914                             if cur_srcloc.is_some() {
915                                 buffer.end_srcloc();
916                             }
917                             buffer.start_srcloc(srcloc);
918                             cur_srcloc = Some(srcloc);
919                         }
920 
921                         // If this is a safepoint, compute a stack map
922                         // and pass it to the emit state.
923                         let stack_map_disasm = if self.insts[iix.index()].is_safepoint() {
924                             let (user_stack_map, user_stack_map_disasm) = {
925                                 // The `user_stack_maps` is keyed by reverse
926                                 // instruction index, so we must flip the
927                                 // index. We can't put this into a helper method
928                                 // due to borrowck issues because parts of
929                                 // `self` are borrowed mutably elsewhere in this
930                                 // function.
931                                 let index = iix.to_backwards_insn_index(self.num_insts());
932                                 let user_stack_map = self.user_stack_maps.remove(&index);
933                                 let user_stack_map_disasm = if want_disasm {
934                                     user_stack_map.as_ref().map(|m| format!("  ; {m:?}"))
935                                 } else {
936                                     None
937                                 };
938                                 (user_stack_map, user_stack_map_disasm)
939                             };
940 
941                             state.pre_safepoint(user_stack_map);
942 
943                             user_stack_map_disasm
944                         } else {
945                             None
946                         };
947 
948                         // Place debug tags in the emission buffer
949                         // either at the offset prior to the
950                         // instruction or after the instruction,
951                         // depending on whether this is a call. See
952                         // the documentation on [`MachDebugTagPos`]
953                         // for details on why.
954                         let mut debug_tag_disasm = None;
955                         let mut place_debug_tags =
956                             |this: &VCode<I>, pos: MachDebugTagPos, buffer: &mut MachBuffer<I>| {
957                                 // As above, translate the forward instruction
958                                 // index to a backward index for the lookup.
959                                 let debug_tag_range = {
960                                     let index = iix.to_backwards_insn_index(this.num_insts());
961                                     this.debug_tags.get(&index)
962                                 };
963                                 if let Some(range) = debug_tag_range {
964                                     let start = usize::try_from(range.start).unwrap();
965                                     let end = usize::try_from(range.end).unwrap();
966                                     let tags = &this.debug_tag_pool[start..end];
967 
968                                     if want_disasm {
969                                         debug_tag_disasm =
970                                             Some(format!("  ; ^-- debug @ {pos:?}: {tags:?}"));
971                                     }
972                                     buffer.push_debug_tags(pos, tags);
973                                 }
974                             };
975                         let debug_tag_pos =
976                             if self.insts[iix.index()].call_type() == CallType::Regular {
977                                 MachDebugTagPos::Post
978                             } else {
979                                 MachDebugTagPos::Pre
980                             };
981 
982                         if debug_tag_pos == MachDebugTagPos::Pre {
983                             place_debug_tags(&self, debug_tag_pos, &mut buffer);
984                         }
985 
986                         // If the instruction we are about to emit is
987                         // a return, place an epilogue at this point
988                         // (and don't emit the return; the actual
989                         // epilogue will contain it).
990                         if self.insts[iix.index()].is_term() == MachTerminator::Ret {
991                             log::trace!("emitting epilogue");
992                             for inst in self.abi.gen_epilogue() {
993                                 do_emit(&inst, &mut disasm, &mut buffer, &mut state);
994                             }
995                         } else {
996                             // Update the operands for this inst using the
997                             // allocations from the regalloc result.
998                             let mut allocs = regalloc.inst_allocs(iix).iter();
999                             self.insts[iix.index()].get_operands(
1000                                 &mut |reg: &mut Reg, constraint, _kind, _pos| {
1001                                     let alloc =
1002                                         allocs.next().expect("enough allocations for all operands");
1003 
1004                                     if let Some(alloc) = alloc.as_reg() {
1005                                         let alloc: Reg = alloc.into();
1006                                         if let OperandConstraint::FixedReg(rreg) = constraint {
1007                                             debug_assert_eq!(Reg::from(rreg), alloc);
1008                                         }
1009                                         *reg = alloc;
1010                                     } else if let Some(alloc) = alloc.as_stack() {
1011                                         let alloc: Reg = alloc.into();
1012                                         *reg = alloc;
1013                                     }
1014                                 },
1015                             );
1016                             debug_assert!(allocs.next().is_none());
1017 
1018                             log::trace!("emitting: {:?}", self.insts[iix.index()]);
1019 
1020                             // Emit the instruction!
1021                             do_emit(
1022                                 &self.insts[iix.index()],
1023                                 &mut disasm,
1024                                 &mut buffer,
1025                                 &mut state,
1026                             );
1027 
1028                             if debug_tag_pos == MachDebugTagPos::Post {
1029                                 place_debug_tags(&self, debug_tag_pos, &mut buffer);
1030                             }
1031 
1032                             if let Some(stack_map_disasm) = stack_map_disasm {
1033                                 disasm.push_str(&stack_map_disasm);
1034                                 disasm.push('\n');
1035                             }
1036                             if let Some(debug_tag_disasm) = debug_tag_disasm {
1037                                 disasm.push_str(&debug_tag_disasm);
1038                                 disasm.push('\n');
1039                             }
1040                         }
1041                     }
1042 
1043                     InstOrEdit::Edit(Edit::Move { from, to }) => {
1044                         // Create a move/spill/reload instruction and
1045                         // immediately emit it.
1046                         match (from.as_reg(), to.as_reg()) {
1047                             (Some(from), Some(to)) => {
1048                                 // Reg-to-reg move.
1049                                 let from_rreg = Reg::from(from);
1050                                 let to_rreg = Writable::from_reg(Reg::from(to));
1051                                 debug_assert_eq!(from.class(), to.class());
1052                                 let ty = I::canonical_type_for_rc(from.class());
1053                                 let mv = I::gen_move(to_rreg, from_rreg, ty);
1054                                 do_emit(&mv, &mut disasm, &mut buffer, &mut state);
1055                             }
1056                             (Some(from), None) => {
1057                                 // Spill from register to spillslot.
1058                                 let to = to.as_stack().unwrap();
1059                                 let from_rreg = RealReg::from(from);
1060                                 let spill = self.abi.gen_spill(to, from_rreg);
1061                                 do_emit(&spill, &mut disasm, &mut buffer, &mut state);
1062                             }
1063                             (None, Some(to)) => {
1064                                 // Load from spillslot to register.
1065                                 let from = from.as_stack().unwrap();
1066                                 let to_rreg = Writable::from_reg(RealReg::from(to));
1067                                 let reload = self.abi.gen_reload(to_rreg, from);
1068                                 do_emit(&reload, &mut disasm, &mut buffer, &mut state);
1069                             }
1070                             (None, None) => {
1071                                 panic!("regalloc2 should have eliminated stack-to-stack moves!");
1072                             }
1073                         }
1074                     }
1075                 }
1076             }
1077 
1078             if cur_srcloc.is_some() {
1079                 buffer.end_srcloc();
1080                 cur_srcloc = None;
1081             }
1082 
1083             // Do we need an island? Get the worst-case size of the next BB, add
1084             // it to the optional padding behind the block, and pass this to the
1085             // `MachBuffer` to determine if an island is necessary.
1086             let worst_case_next_bb = if block_order_idx < final_order.len() - 1 {
1087                 let next_block = final_order[block_order_idx + 1];
1088                 let next_block_range = self.block_ranges.get(next_block.index());
1089                 let next_block_size = next_block_range.len() as u32;
1090                 let next_block_ra_insertions = ra_edits_per_block[next_block.index()];
1091                 I::worst_case_size() * (next_block_size + next_block_ra_insertions)
1092             } else {
1093                 0
1094             };
1095             let padding = if bb_padding.is_empty() {
1096                 0
1097             } else {
1098                 bb_padding.len() as u32 + I::LabelUse::ALIGN - 1
1099             };
1100             if buffer.island_needed(padding + worst_case_next_bb) {
1101                 buffer.emit_island(padding + worst_case_next_bb, ctrl_plane);
1102             }
1103 
1104             // Insert padding, if configured, to stress the `MachBuffer`'s
1105             // relocation and island calculations.
1106             //
1107             // Padding can get quite large during fuzzing though so place a
1108             // total cap on it where when a per-function threshold is exceeded
1109             // the padding is turned back down to zero. This avoids a small-ish
1110             // test case generating a GB+ memory footprint in Cranelift for
1111             // example.
1112             if !bb_padding.is_empty() {
1113                 buffer.put_data(&bb_padding);
1114                 buffer.align_to(I::LabelUse::ALIGN);
1115                 total_bb_padding += bb_padding.len();
1116                 if total_bb_padding > (150 << 20) {
1117                     bb_padding = Vec::new();
1118                 }
1119             }
1120         }
1121 
1122         debug_assert!(
1123             self.user_stack_maps.is_empty(),
1124             "any stack maps should have been consumed by instruction emission, still have: {:#?}",
1125             self.user_stack_maps,
1126         );
1127 
1128         // Do any optimizations on branches at tail of buffer, as if we had
1129         // bound one last label.
1130         buffer.optimize_branches(ctrl_plane);
1131 
1132         // emission state is not needed anymore, move control plane back out
1133         *ctrl_plane = state.take_ctrl_plane();
1134 
1135         let func_body_len = buffer.cur_offset();
1136 
1137         // Create `bb_edges` and final (filtered) `bb_starts`.
1138         let mut bb_edges = vec![];
1139         let mut bb_offsets = vec![];
1140         if flags.machine_code_cfg_info() {
1141             for block in 0..self.num_blocks() {
1142                 if bb_starts[block].is_none() {
1143                     // Block was deleted by MachBuffer; skip.
1144                     continue;
1145                 }
1146                 let from = bb_starts[block].unwrap();
1147 
1148                 bb_offsets.push(from);
1149                 // Resolve each `succ` label and add edges.
1150                 let succs = self.block_succs(BlockIndex::new(block));
1151                 for &succ in succs.iter() {
1152                     let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1153                     bb_edges.push((from, to));
1154                 }
1155             }
1156         }
1157 
1158         self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len);
1159         let value_labels_ranges =
1160             self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1161 
1162         // Store metadata about frame layout in the MachBuffer.
1163         buffer.set_frame_layout(self.abi.frame_slot_metadata());
1164 
1165         EmitResult {
1166             buffer: buffer.finish(&self.constants, ctrl_plane),
1167             bb_offsets,
1168             bb_edges,
1169             disasm: if want_disasm { Some(disasm) } else { None },
1170             value_labels_ranges,
1171         }
1172     }
1173 
1174     fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) {
1175         if self.debug_value_labels.is_empty() {
1176             return;
1177         }
1178 
1179         // During emission, branch removal can make offsets of instructions incorrect.
1180         // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj]
1181         // It will be recorded as (say):    [30]  [34]  [38]  [42]  [<would be 46>]
1182         // When the jumps get removed we are left with (in "inst_offsets"):
1183         // [insi][jmp0][jmp1][jmp2][insj][...]
1184         // [30]  [34]  [38]  [42]  [34]
1185         // Which violates the monotonicity invariant. This method sets offsets of these
1186         // removed instructions such as to make them appear zero-sized:
1187         // [insi][jmp0][jmp1][jmp2][insj][...]
1188         // [30]  [34]  [34]  [34]  [34]
1189         //
1190         let mut next_offset = func_body_len;
1191         for inst_index in (0..(inst_offsets.len() - 1)).rev() {
1192             let inst_offset = inst_offsets[inst_index];
1193 
1194             // Not all instructions get their offsets recorded.
1195             if inst_offset == NO_INST_OFFSET {
1196                 continue;
1197             }
1198 
1199             if inst_offset > next_offset {
1200                 trace!(
1201                     "Fixing code offset of the removed Inst {}: {} -> {}",
1202                     inst_index, inst_offset, next_offset
1203                 );
1204                 inst_offsets[inst_index] = next_offset;
1205                 continue;
1206             }
1207 
1208             next_offset = inst_offset;
1209         }
1210     }
1211 
1212     fn compute_value_labels_ranges(
1213         &self,
1214         regalloc: &regalloc2::Output,
1215         inst_offsets: &[CodeOffset],
1216         func_body_len: u32,
1217     ) -> ValueLabelsRanges {
1218         if self.debug_value_labels.is_empty() {
1219             return ValueLabelsRanges::default();
1220         }
1221 
1222         if trace_log_enabled!() {
1223             self.log_value_labels_ranges(regalloc, inst_offsets);
1224         }
1225 
1226         let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1227         for &(label, from, to, alloc) in &regalloc.debug_locations {
1228             let label = ValueLabel::from_u32(label);
1229             let ranges = value_labels_ranges.entry(label).or_insert_with(|| vec![]);
1230             let prog_point_to_inst = |prog_point: ProgPoint| {
1231                 let mut inst = prog_point.inst();
1232                 if prog_point.pos() == InstPosition::After {
1233                     inst = inst.next();
1234                 }
1235                 inst.index()
1236             };
1237             let inst_to_offset = |inst_index: usize| {
1238                 // Skip over cold blocks.
1239                 for offset in &inst_offsets[inst_index..] {
1240                     if *offset != NO_INST_OFFSET {
1241                         return *offset;
1242                     }
1243                 }
1244                 func_body_len
1245             };
1246             let from_inst_index = prog_point_to_inst(from);
1247             let to_inst_index = prog_point_to_inst(to);
1248             let from_offset = inst_to_offset(from_inst_index);
1249             let to_offset = inst_to_offset(to_inst_index);
1250 
1251             // Empty ranges or unavailable offsets can happen
1252             // due to cold blocks and branch removal (see above).
1253             if from_offset == to_offset {
1254                 continue;
1255             }
1256 
1257             let loc = if let Some(preg) = alloc.as_reg() {
1258                 LabelValueLoc::Reg(Reg::from(preg))
1259             } else {
1260                 #[cfg(not(feature = "unwind"))]
1261                 continue;
1262 
1263                 #[cfg(feature = "unwind")]
1264                 {
1265                     let slot = alloc.as_stack().unwrap();
1266                     let slot_offset = self.abi.get_spillslot_offset(slot);
1267                     let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset();
1268                     let caller_sp_to_cfa_offset =
1269                         crate::isa::unwind::systemv::caller_sp_to_cfa_offset();
1270                     // NOTE: this is a negative offset because it's relative to the caller's SP
1271                     let cfa_to_sp_offset =
1272                         -((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64);
1273                     LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset)
1274                 }
1275             };
1276 
1277             // Coalesce adjacent ranges that for the same location
1278             // to minimize output size here and for the consumers.
1279             if let Some(last_loc_range) = ranges.last_mut() {
1280                 if last_loc_range.loc == loc && last_loc_range.end == from_offset {
1281                     trace!(
1282                         "Extending debug range for {:?} in {:?} to Inst {} ({})",
1283                         label, loc, to_inst_index, to_offset
1284                     );
1285                     last_loc_range.end = to_offset;
1286                     continue;
1287                 }
1288             }
1289 
1290             trace!(
1291                 "Recording debug range for {:?} in {:?}: [Inst {}..Inst {}) [{}..{})",
1292                 label, loc, from_inst_index, to_inst_index, from_offset, to_offset
1293             );
1294 
1295             ranges.push(ValueLocRange {
1296                 loc,
1297                 start: from_offset,
1298                 end: to_offset,
1299             });
1300         }
1301 
1302         value_labels_ranges
1303     }
1304 
1305     fn log_value_labels_ranges(&self, regalloc: &regalloc2::Output, inst_offsets: &[CodeOffset]) {
1306         debug_assert!(trace_log_enabled!());
1307 
1308         // What debug labels do we have? Note we'll skip those that have not been
1309         // allocated any location at all. They will show up as numeric gaps in the table.
1310         let mut labels = vec![];
1311         for &(label, _, _, _) in &regalloc.debug_locations {
1312             if Some(&label) == labels.last() {
1313                 continue;
1314             }
1315             labels.push(label);
1316         }
1317 
1318         // Reformat the data on what VRegs were the VLs assigned to by lowering, since
1319         // the array we have is sorted by VReg, and we want it sorted by VL for easy
1320         // access in the loop below.
1321         let mut vregs = vec![];
1322         for &(vreg, start, end, label) in &self.debug_value_labels {
1323             if matches!(labels.binary_search(&label), Ok(_)) {
1324                 vregs.push((label, start, end, vreg));
1325             }
1326         }
1327         vregs.sort_unstable_by(
1328             |(l_label, l_start, _, _), (r_label, r_start, _, _)| match l_label.cmp(r_label) {
1329                 Ordering::Equal => l_start.cmp(r_start),
1330                 cmp => cmp,
1331             },
1332         );
1333 
1334         #[derive(PartialEq)]
1335         enum Mode {
1336             Measure,
1337             Emit,
1338         }
1339         #[derive(PartialEq)]
1340         enum Row {
1341             Head,
1342             Line,
1343             Inst(usize, usize),
1344         }
1345 
1346         let mut widths = vec![0; 3 + 2 * labels.len()];
1347         let mut row = String::new();
1348         let mut output_row = |row_kind: Row, mode: Mode| {
1349             let mut column_index = 0;
1350             row.clear();
1351 
1352             macro_rules! output_cell_impl {
1353                 ($fill:literal, $span:literal, $($cell_fmt:tt)*) => {
1354                     let column_start = row.len();
1355                     {
1356                         row.push('|');
1357                         write!(row, $($cell_fmt)*).unwrap();
1358                     }
1359 
1360                     let next_column_index = column_index + $span;
1361                     let expected_width: usize = widths[column_index..next_column_index].iter().sum();
1362                     if mode == Mode::Measure {
1363                         let actual_width = row.len() - column_start;
1364                         if actual_width > expected_width {
1365                             widths[next_column_index - 1] += actual_width - expected_width;
1366                         }
1367                     } else {
1368                         let column_end = column_start + expected_width;
1369                         while row.len() != column_end {
1370                             row.push($fill);
1371                         }
1372                     }
1373                     column_index = next_column_index;
1374                 };
1375             }
1376             macro_rules! output_cell {
1377                 ($($cell_fmt:tt)*) => {
1378                     output_cell_impl!(' ', 1, $($cell_fmt)*);
1379                 };
1380             }
1381 
1382             match row_kind {
1383                 Row::Head => {
1384                     output_cell!("BB");
1385                     output_cell!("Inst");
1386                     output_cell!("IP");
1387                     for label in &labels {
1388                         output_cell_impl!(' ', 2, "{:?}", ValueLabel::from_u32(*label));
1389                     }
1390                 }
1391                 Row::Line => {
1392                     debug_assert!(mode == Mode::Emit);
1393                     for _ in 0..3 {
1394                         output_cell_impl!('-', 1, "");
1395                     }
1396                     for _ in &labels {
1397                         output_cell_impl!('-', 2, "");
1398                     }
1399                 }
1400                 Row::Inst(block_index, inst_index) => {
1401                     debug_assert!(inst_index < self.num_insts());
1402                     if self.block_ranges.get(block_index).start == inst_index {
1403                         output_cell!("B{}", block_index);
1404                     } else {
1405                         output_cell!("");
1406                     }
1407                     output_cell!("Inst {inst_index} ");
1408                     output_cell!("{} ", inst_offsets[inst_index]);
1409 
1410                     for label in &labels {
1411                         // First, the VReg.
1412                         use regalloc2::Inst;
1413                         let vreg_cmp = |inst: usize,
1414                                         vreg_label: &u32,
1415                                         range_start: &Inst,
1416                                         range_end: &Inst| {
1417                             match vreg_label.cmp(&label) {
1418                                 Ordering::Equal => {
1419                                     if range_end.index() <= inst {
1420                                         Ordering::Less
1421                                     } else if range_start.index() > inst {
1422                                         Ordering::Greater
1423                                     } else {
1424                                         Ordering::Equal
1425                                     }
1426                                 }
1427                                 cmp => cmp,
1428                             }
1429                         };
1430                         let vreg_index =
1431                             vregs.binary_search_by(|(l, s, e, _)| vreg_cmp(inst_index, l, s, e));
1432                         if let Ok(vreg_index) = vreg_index {
1433                             let mut prev_vreg = None;
1434                             if inst_index > 0 {
1435                                 let prev_vreg_index = vregs.binary_search_by(|(l, s, e, _)| {
1436                                     vreg_cmp(inst_index - 1, l, s, e)
1437                                 });
1438                                 if let Ok(prev_vreg_index) = prev_vreg_index {
1439                                     prev_vreg = Some(vregs[prev_vreg_index].3);
1440                                 }
1441                             }
1442 
1443                             let vreg = vregs[vreg_index].3;
1444                             if Some(vreg) == prev_vreg {
1445                                 output_cell!("*");
1446                             } else {
1447                                 output_cell!("{}", vreg);
1448                             }
1449                         } else {
1450                             output_cell!("");
1451                         }
1452 
1453                         // Second, the allocated location.
1454                         let inst_prog_point = ProgPoint::before(Inst::new(inst_index));
1455                         let range_index = regalloc.debug_locations.binary_search_by(
1456                             |(range_label, range_start, range_end, _)| match range_label.cmp(label)
1457                             {
1458                                 Ordering::Equal => {
1459                                     if *range_end <= inst_prog_point {
1460                                         Ordering::Less
1461                                     } else if *range_start > inst_prog_point {
1462                                         Ordering::Greater
1463                                     } else {
1464                                         Ordering::Equal
1465                                     }
1466                                 }
1467                                 cmp => cmp,
1468                             },
1469                         );
1470                         if let Ok(range_index) = range_index {
1471                             // Live at this instruction, print the location.
1472                             if let Some(reg) = regalloc.debug_locations[range_index].3.as_reg() {
1473                                 output_cell!("{:?}", Reg::from(reg));
1474                             } else {
1475                                 output_cell!("Stk");
1476                             }
1477                         } else {
1478                             // Not live at this instruction.
1479                             output_cell!("");
1480                         }
1481                     }
1482                 }
1483             }
1484             row.push('|');
1485 
1486             if mode == Mode::Emit {
1487                 trace!("{}", row.as_str());
1488             }
1489         };
1490 
1491         for block_index in 0..self.num_blocks() {
1492             for inst_index in self.block_ranges.get(block_index) {
1493                 output_row(Row::Inst(block_index, inst_index), Mode::Measure);
1494             }
1495         }
1496         output_row(Row::Head, Mode::Measure);
1497 
1498         output_row(Row::Head, Mode::Emit);
1499         output_row(Row::Line, Mode::Emit);
1500         for block_index in 0..self.num_blocks() {
1501             for inst_index in self.block_ranges.get(block_index) {
1502                 output_row(Row::Inst(block_index, inst_index), Mode::Emit);
1503             }
1504         }
1505     }
1506 
1507     /// Get the IR block for a BlockIndex, if one exists.
1508     pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1509         self.block_order.lowered_order()[block.index()].orig_block()
1510     }
1511 
1512     /// Get the user stack map associated with the given forward instruction index.
1513     pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> {
1514         let index = inst.to_backwards_insn_index(self.num_insts());
1515         self.user_stack_maps.get(&index)
1516     }
1517 }
1518 
1519 impl<I: VCodeInst> core::ops::Index<InsnIndex> for VCode<I> {
1520     type Output = I;
1521     fn index(&self, idx: InsnIndex) -> &Self::Output {
1522         &self.insts[idx.index()]
1523     }
1524 }
1525 
1526 impl<I: VCodeInst> RegallocFunction for VCode<I> {
1527     fn num_insts(&self) -> usize {
1528         self.insts.len()
1529     }
1530 
1531     fn num_blocks(&self) -> usize {
1532         self.block_ranges.len()
1533     }
1534 
1535     fn entry_block(&self) -> BlockIndex {
1536         self.entry
1537     }
1538 
1539     fn block_insns(&self, block: BlockIndex) -> InstRange {
1540         let range = self.block_ranges.get(block.index());
1541         InstRange::new(InsnIndex::new(range.start), InsnIndex::new(range.end))
1542     }
1543 
1544     fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1545         let range = self.block_succ_range.get(block.index());
1546         &self.block_succs[range]
1547     }
1548 
1549     fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1550         let range = self.block_pred_range.get(block.index());
1551         &self.block_preds[range]
1552     }
1553 
1554     fn block_params(&self, block: BlockIndex) -> &[VReg] {
1555         // As a special case we don't return block params for the entry block, as all the arguments
1556         // will be defined by the `Inst::Args` instruction.
1557         if block == self.entry {
1558             return &[];
1559         }
1560 
1561         let range = self.block_params_range.get(block.index());
1562         &self.block_params[range]
1563     }
1564 
1565     fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1566         let succ_range = self.branch_block_arg_succ_range.get(block.index());
1567         debug_assert!(succ_idx < succ_range.len());
1568         let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx);
1569         &self.branch_block_args[branch_block_args]
1570     }
1571 
1572     fn is_ret(&self, insn: InsnIndex) -> bool {
1573         match self.insts[insn.index()].is_term() {
1574             // We treat blocks terminated by an unconditional trap like a return for regalloc.
1575             MachTerminator::None => self.insts[insn.index()].is_trap(),
1576             MachTerminator::Ret | MachTerminator::RetCall => true,
1577             MachTerminator::Branch => false,
1578         }
1579     }
1580 
1581     fn is_branch(&self, insn: InsnIndex) -> bool {
1582         match self.insts[insn.index()].is_term() {
1583             MachTerminator::Branch => true,
1584             _ => false,
1585         }
1586     }
1587 
1588     fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1589         let range = self.operand_ranges.get(insn.index());
1590         &self.operands[range]
1591     }
1592 
1593     fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1594         self.clobbers.get(&insn).cloned().unwrap_or_default()
1595     }
1596 
1597     fn num_vregs(&self) -> usize {
1598         self.vreg_types.len()
1599     }
1600 
1601     fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1602         &self.debug_value_labels
1603     }
1604 
1605     fn spillslot_size(&self, regclass: RegClass) -> usize {
1606         self.abi.get_spillslot_size(regclass) as usize
1607     }
1608 
1609     fn allow_multiple_vreg_defs(&self) -> bool {
1610         // At least the s390x backend requires this, because the
1611         // `Loop` pseudo-instruction aggregates all Operands so pinned
1612         // vregs (RealRegs) may occur more than once.
1613         true
1614     }
1615 }
1616 
1617 impl<I: VCodeInst> Debug for VRegAllocator<I> {
1618     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1619         writeln!(f, "VRegAllocator {{")?;
1620 
1621         let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1622         alias_keys.sort_unstable();
1623         for key in alias_keys {
1624             let dest = self.vreg_aliases.get(&key).unwrap();
1625             writeln!(f, "  {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1626         }
1627 
1628         writeln!(f, "}}")
1629     }
1630 }
1631 
1632 impl<I: VCodeInst> fmt::Debug for VCode<I> {
1633     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1634         writeln!(f, "VCode {{")?;
1635         writeln!(f, "  Entry block: {}", self.entry.index())?;
1636 
1637         let mut state = Default::default();
1638 
1639         for block in 0..self.num_blocks() {
1640             let block = BlockIndex::new(block);
1641             writeln!(
1642                 f,
1643                 "Block {}({:?}):",
1644                 block.index(),
1645                 self.block_params(block)
1646             )?;
1647             if let Some(bb) = self.bindex_to_bb(block) {
1648                 writeln!(f, "    (original IR block: {bb})")?;
1649             }
1650             for (succ_idx, succ) in self.block_succs(block).iter().enumerate() {
1651                 writeln!(
1652                     f,
1653                     "    (successor: Block {}({:?}))",
1654                     succ.index(),
1655                     self.branch_blockparams(block, InsnIndex::new(0) /* dummy */, succ_idx)
1656                 )?;
1657             }
1658             for inst in self.block_ranges.get(block.index()) {
1659                 writeln!(
1660                     f,
1661                     "  Inst {}: {}",
1662                     inst,
1663                     self.insts[inst].pretty_print_inst(&mut state)
1664                 )?;
1665                 if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) {
1666                     writeln!(f, "    {user_stack_map:?}")?;
1667                 }
1668             }
1669         }
1670 
1671         writeln!(f, "}}")?;
1672         Ok(())
1673     }
1674 }
1675 
1676 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder.
1677 pub struct VRegAllocator<I> {
1678     /// VReg IR-level types.
1679     vreg_types: Vec<Type>,
1680 
1681     /// VReg aliases. When the final VCode is built we rewrite all
1682     /// uses of the keys in this table to their replacement values.
1683     ///
1684     /// We use these aliases to rename an instruction's expected
1685     /// result vregs to the returned vregs from lowering, which are
1686     /// usually freshly-allocated temps.
1687     vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
1688 
1689     /// A deferred error, to be bubbled up to the top level of the
1690     /// lowering algorithm. We take this approach because we cannot
1691     /// currently propagate a `Result` upward through ISLE code (the
1692     /// lowering rules) or some ABI code.
1693     deferred_error: Option<CodegenError>,
1694 
1695     /// The type of instruction that this allocator makes registers for.
1696     _inst: core::marker::PhantomData<I>,
1697 }
1698 
1699 impl<I: VCodeInst> VRegAllocator<I> {
1700     /// Make a new VRegAllocator.
1701     pub fn with_capacity(capacity: usize) -> Self {
1702         let capacity = first_user_vreg_index() + capacity;
1703         let mut vreg_types = Vec::with_capacity(capacity);
1704         vreg_types.resize(first_user_vreg_index(), types::INVALID);
1705         Self {
1706             vreg_types,
1707             vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()),
1708             deferred_error: None,
1709             _inst: core::marker::PhantomData::default(),
1710         }
1711     }
1712 
1713     /// Allocate a fresh ValueRegs.
1714     pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {
1715         if self.deferred_error.is_some() {
1716             return Err(CodegenError::CodeTooLarge);
1717         }
1718         let v = self.vreg_types.len();
1719         let (regclasses, tys) = I::rc_for_type(ty)?;
1720 
1721         // Check that new indices are in-bounds for regalloc2's
1722         // VReg/Operand representation.
1723         if v + regclasses.len() > VReg::MAX {
1724             return Err(CodegenError::CodeTooLarge);
1725         }
1726 
1727         // Check that new indices are in-bounds for our Reg
1728         // bit-packing on top of the RA2 types, which represents
1729         // spillslots as well.
1730         let check = |vreg: regalloc2::VReg| -> CodegenResult<Reg> {
1731             Reg::from_virtual_reg_checked(vreg).ok_or(CodegenError::CodeTooLarge)
1732         };
1733 
1734         let regs: ValueRegs<Reg> = match regclasses {
1735             &[rc0] => ValueRegs::one(check(VReg::new(v, rc0))?),
1736             &[rc0, rc1] => ValueRegs::two(check(VReg::new(v, rc0))?, check(VReg::new(v + 1, rc1))?),
1737             // We can extend this if/when we support 32-bit targets; e.g.,
1738             // an i128 on a 32-bit machine will need up to four machine regs
1739             // for a `Value`.
1740             _ => panic!("Value must reside in 1 or 2 registers"),
1741         };
1742         for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
1743             let vreg = reg.to_virtual_reg().unwrap();
1744             debug_assert_eq!(self.vreg_types.len(), vreg.index());
1745             self.vreg_types.push(reg_ty);
1746         }
1747 
1748         Ok(regs)
1749     }
1750 
1751     /// Allocate a fresh ValueRegs, deferring any out-of-vregs
1752     /// errors. This is useful in places where we cannot bubble a
1753     /// `CodegenResult` upward easily, and which are known to be
1754     /// invoked from within the lowering loop that checks the deferred
1755     /// error status below.
1756     pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> {
1757         match self.alloc(ty) {
1758             Ok(x) => x,
1759             Err(e) => {
1760                 self.deferred_error = Some(e);
1761                 self.bogus_for_deferred_error(ty)
1762             }
1763         }
1764     }
1765 
1766     /// Take any deferred error that was accumulated by `alloc_with_deferred_error`.
1767     pub fn take_deferred_error(&mut self) -> Option<CodegenError> {
1768         self.deferred_error.take()
1769     }
1770 
1771     /// Produce an bogus VReg placeholder with the proper number of
1772     /// registers for the given type. This is meant to be used with
1773     /// deferred allocation errors (see `Lower::alloc_tmp()`).
1774     fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> {
1775         let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type");
1776         match regclasses {
1777             &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()),
1778             &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()),
1779             _ => panic!("Value must reside in 1 or 2 registers"),
1780         }
1781     }
1782 
1783     /// Rewrite any mention of `from` into `to`.
1784     pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
1785         let from = from.into();
1786         let resolved_to = self.resolve_vreg_alias(to.into());
1787         // Disallow cycles (see below).
1788         assert_ne!(resolved_to, from);
1789 
1790         let old_alias = self.vreg_aliases.insert(from, resolved_to);
1791         debug_assert_eq!(old_alias, None);
1792     }
1793 
1794     fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg {
1795         // We prevent cycles from existing by resolving targets of
1796         // aliases eagerly before setting them. If the target resolves
1797         // to the origin of the alias, then a cycle would be created
1798         // and the alias is disallowed. Because of the structure of
1799         // SSA code (one instruction can refer to another's defs but
1800         // not vice-versa, except indirectly through
1801         // phis/blockparams), cycles should not occur as we use
1802         // aliases to redirect vregs to the temps that actually define
1803         // them.
1804         while let Some(to) = self.vreg_aliases.get(&vreg) {
1805             vreg = *to;
1806         }
1807         vreg
1808     }
1809 
1810     #[inline]
1811     fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) {
1812         debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg)));
1813     }
1814 }
1815 
1816 /// This structure tracks the large constants used in VCode that will be emitted separately by the
1817 /// [MachBuffer].
1818 ///
1819 /// First, during the lowering phase, constants are inserted using
1820 /// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are
1821 /// used in this phase. Some deduplication is performed, when possible, as constant
1822 /// values are inserted.
1823 ///
1824 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1825 /// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1826 /// then writes the constant values to the buffer.
1827 #[derive(Default)]
1828 pub struct VCodeConstants {
1829     constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1830     pool_uses: HashMap<Constant, VCodeConstant>,
1831     well_known_uses: HashMap<*const [u8], VCodeConstant>,
1832     u64s: HashMap<[u8; 8], VCodeConstant>,
1833 }
1834 impl VCodeConstants {
1835     /// Initialize the structure with the expected number of constants.
1836     pub fn with_capacity(expected_num_constants: usize) -> Self {
1837         Self {
1838             constants: PrimaryMap::with_capacity(expected_num_constants),
1839             pool_uses: HashMap::with_capacity(expected_num_constants),
1840             well_known_uses: HashMap::new(),
1841             u64s: HashMap::new(),
1842         }
1843     }
1844 
1845     /// Insert a constant; using this method indicates that a constant value will be used and thus
1846     /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1847     /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1848     /// [VCodeConstantData::Generated].
1849     pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1850         match data {
1851             VCodeConstantData::Generated(_) => self.constants.push(data),
1852             VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1853                 None => {
1854                     let vcode_constant = self.constants.push(data);
1855                     self.pool_uses.insert(constant, vcode_constant);
1856                     vcode_constant
1857                 }
1858                 Some(&vcode_constant) => vcode_constant,
1859             },
1860             VCodeConstantData::WellKnown(data_ref) => {
1861                 match self.well_known_uses.entry(data_ref as *const [u8]) {
1862                     Entry::Vacant(v) => {
1863                         let vcode_constant = self.constants.push(data);
1864                         v.insert(vcode_constant);
1865                         vcode_constant
1866                     }
1867                     Entry::Occupied(o) => *o.get(),
1868                 }
1869             }
1870             VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1871                 Entry::Vacant(v) => {
1872                     let vcode_constant = self.constants.push(data);
1873                     v.insert(vcode_constant);
1874                     vcode_constant
1875                 }
1876                 Entry::Occupied(o) => *o.get(),
1877             },
1878         }
1879     }
1880 
1881     /// Return the number of constants inserted.
1882     pub fn len(&self) -> usize {
1883         self.constants.len()
1884     }
1885 
1886     /// Iterate over the `VCodeConstant` keys inserted in this structure.
1887     pub fn keys(&self) -> Keys<VCodeConstant> {
1888         self.constants.keys()
1889     }
1890 
1891     /// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this
1892     /// structure.
1893     pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1894         self.constants.iter()
1895     }
1896 
1897     /// Returns the data associated with the specified constant.
1898     pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData {
1899         &self.constants[c]
1900     }
1901 
1902     /// Checks if the given [VCodeConstantData] is registered as
1903     /// used by the pool.
1904     pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool {
1905         match constant {
1906             VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c),
1907             _ => false,
1908         }
1909     }
1910 }
1911 
1912 /// A use of a constant by one or more VCode instructions; see [VCodeConstants].
1913 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
1914 pub struct VCodeConstant(u32);
1915 entity_impl!(VCodeConstant);
1916 
1917 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
1918 /// these separately instead of as raw byte buffers allows us to avoid some duplication.
1919 pub enum VCodeConstantData {
1920     /// A constant already present in the Cranelift IR
1921     /// [ConstantPool](crate::ir::constant::ConstantPool).
1922     Pool(Constant, ConstantData),
1923     /// A reference to a well-known constant value that is statically encoded within the compiler.
1924     WellKnown(&'static [u8]),
1925     /// A constant value generated during lowering; the value may depend on the instruction context
1926     /// which makes it difficult to de-duplicate--if possible, use other variants.
1927     Generated(ConstantData),
1928     /// A constant of at most 64 bits. These are deduplicated as
1929     /// well. Stored as a fixed-size array of `u8` so that we do not
1930     /// encounter endianness problems when cross-compiling.
1931     U64([u8; 8]),
1932 }
1933 impl VCodeConstantData {
1934     /// Retrieve the constant data as a byte slice.
1935     pub fn as_slice(&self) -> &[u8] {
1936         match self {
1937             VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
1938             VCodeConstantData::WellKnown(d) => d,
1939             VCodeConstantData::U64(value) => &value[..],
1940         }
1941     }
1942 
1943     /// Calculate the alignment of the constant data.
1944     pub fn alignment(&self) -> u32 {
1945         if self.as_slice().len() <= 8 { 8 } else { 16 }
1946     }
1947 }
1948 
1949 #[cfg(test)]
1950 mod test {
1951     use super::*;
1952     use core::mem::size_of;
1953 
1954     #[test]
1955     fn size_of_constant_structs() {
1956         assert_eq!(size_of::<Constant>(), 4);
1957         assert_eq!(size_of::<VCodeConstant>(), 4);
1958         assert_eq!(size_of::<ConstantData>(), 3 * size_of::<usize>());
1959         assert_eq!(size_of::<VCodeConstantData>(), 4 * size_of::<usize>());
1960         assert_eq!(
1961             size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
1962             3 * size_of::<usize>()
1963         );
1964         // TODO The VCodeConstants structure's memory size could be further optimized.
1965         // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
1966         // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
1967     }
1968 }
1969