1 //! This implements the VCode container: a CFG of Insts that have been lowered.
2 //!
3 //! VCode is virtual-register code. An instruction in VCode is almost a machine
4 //! instruction; however, its register slots can refer to virtual registers in
5 //! addition to real machine registers.
6 //!
7 //! VCode is structured with traditional basic blocks, and
8 //! each block must be terminated by an unconditional branch (one target), a
9 //! conditional branch (two targets), or a return (no targets). Note that this
10 //! slightly differs from the machine code of most ISAs: in most ISAs, a
11 //! conditional branch has one target (and the not-taken case falls through).
12 //! However, we expect that machine backends will elide branches to the following
13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15 //! play with layout prior to final binary emission, as well, if we want.
16 //!
17 //! See the main module comment in `mod.rs` for more details on the VCode-based
18 //! backend pipeline.
19 
20 use crate::CodegenError;
21 use crate::FxHashMap;
22 use crate::ir::pcc::*;
23 use crate::ir::{self, Constant, ConstantData, ValueLabel, types};
24 use crate::ranges::Ranges;
25 use crate::timing;
26 use crate::trace;
27 use crate::{LabelValueLoc, ValueLocRange};
28 use crate::{machinst::*, trace_log_enabled};
29 use regalloc2::{
30     Edit, Function as RegallocFunction, InstOrEdit, InstPosition, InstRange, Operand,
31     OperandConstraint, OperandKind, PRegSet, ProgPoint, RegClass,
32 };
33 
34 use crate::HashMap;
35 use crate::hash_map::Entry;
36 use core::cmp::Ordering;
37 use core::fmt::{self, Write};
38 use core::mem::take;
39 use core::ops::Range;
40 use cranelift_entity::{Keys, entity_impl};
41 
42 /// Index referring to an instruction in VCode.
43 pub type InsnIndex = regalloc2::Inst;
44 
45 /// Extension trait for `InsnIndex` to allow conversion to a
46 /// `BackwardsInsnIndex`.
47 trait ToBackwardsInsnIndex {
48     fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex;
49 }
50 
51 impl ToBackwardsInsnIndex for InsnIndex {
52     fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex {
53         BackwardsInsnIndex::new(num_insts - self.index() - 1)
54     }
55 }
56 
57 /// An index referring to an instruction in the VCode when it is backwards,
58 /// during VCode construction.
59 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
60 #[cfg_attr(
61     feature = "enable-serde",
62     derive(::serde::Serialize, ::serde::Deserialize)
63 )]
64 pub struct BackwardsInsnIndex(InsnIndex);
65 
66 impl BackwardsInsnIndex {
67     pub fn new(i: usize) -> Self {
68         BackwardsInsnIndex(InsnIndex::new(i))
69     }
70 }
71 
72 /// Index referring to a basic block in VCode.
73 pub type BlockIndex = regalloc2::Block;
74 
75 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
76 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
77 pub trait VCodeInst: MachInst + MachInstEmit {}
78 impl<I: MachInst + MachInstEmit> VCodeInst for I {}
79 
80 /// A function in "VCode" (virtualized-register code) form, after
81 /// lowering.  This is essentially a standard CFG of basic blocks,
82 /// where each basic block consists of lowered instructions produced
83 /// by the machine-specific backend.
84 ///
85 /// Note that the VCode is immutable once produced, and is not
86 /// modified by register allocation in particular. Rather, register
87 /// allocation on the `VCode` produces a separate `regalloc2::Output`
88 /// struct, and this can be passed to `emit`. `emit` in turn does not
89 /// modify the vcode, but produces an `EmitResult`, which contains the
90 /// machine code itself, and the associated disassembly and/or
91 /// metadata as requested.
92 pub struct VCode<I: VCodeInst> {
93     /// VReg IR-level types.
94     vreg_types: Vec<Type>,
95 
96     /// Lowered machine instructions in order corresponding to the original IR.
97     insts: Vec<I>,
98 
99     /// A map from backwards instruction index to the user stack map for that
100     /// instruction.
101     ///
102     /// This is a sparse side table that only has entries for instructions that
103     /// are safepoints, and only for a subset of those that have an associated
104     /// user stack map.
105     user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>,
106 
107     /// A map from backwards instruction index to the debug tags for
108     /// that instruction. Each entry indexes a range in the
109     /// `debug_tag_pool`.
110     debug_tags: FxHashMap<BackwardsInsnIndex, Range<u32>>,
111 
112     /// Pooled storage for sequences of debug tags; indexed by entries
113     /// in `debug_tags`.
114     debug_tag_pool: Vec<ir::DebugTag>,
115 
116     /// Operands: pre-regalloc references to virtual registers with
117     /// constraints, in one flattened array. This allows the regalloc
118     /// to efficiently access all operands without requiring expensive
119     /// matches or method invocations on insts.
120     operands: Vec<Operand>,
121 
122     /// Operand index ranges: for each instruction in `insts`, there
123     /// is a tuple here providing the range in `operands` for that
124     /// instruction's operands.
125     operand_ranges: Ranges,
126 
127     /// Clobbers: a sparse map from instruction indices to clobber masks.
128     clobbers: FxHashMap<InsnIndex, PRegSet>,
129 
130     /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
131     /// reasonable to keep one of these per instruction.)
132     srclocs: Vec<RelSourceLoc>,
133 
134     /// Entry block.
135     entry: BlockIndex,
136 
137     /// Block instruction indices.
138     block_ranges: Ranges,
139 
140     /// Block successors: index range in the `block_succs` list.
141     block_succ_range: Ranges,
142 
143     /// Block successor lists, concatenated into one vec. The
144     /// `block_succ_range` list of tuples above gives (start, end)
145     /// ranges within this list that correspond to each basic block's
146     /// successors.
147     block_succs: Vec<regalloc2::Block>,
148 
149     /// Block predecessors: index range in the `block_preds` list.
150     block_pred_range: Ranges,
151 
152     /// Block predecessor lists, concatenated into one vec. The
153     /// `block_pred_range` list of tuples above gives (start, end)
154     /// ranges within this list that correspond to each basic block's
155     /// predecessors.
156     block_preds: Vec<regalloc2::Block>,
157 
158     /// Block parameters: index range in `block_params` below.
159     block_params_range: Ranges,
160 
161     /// Block parameter lists, concatenated into one vec. The
162     /// `block_params_range` list of tuples above gives (start, end)
163     /// ranges within this list that correspond to each basic block's
164     /// blockparam vregs.
165     block_params: Vec<regalloc2::VReg>,
166 
167     /// Outgoing block arguments on branch instructions, concatenated
168     /// into one list.
169     ///
170     /// Note that this is conceptually a 3D array: we have a VReg list
171     /// per block, per successor. We flatten those three dimensions
172     /// into this 1D vec, then store index ranges in two levels of
173     /// indirection.
174     ///
175     /// Indexed by the indices in `branch_block_arg_succ_range`.
176     branch_block_args: Vec<regalloc2::VReg>,
177 
178     /// Array of sequences of (start, end) tuples in
179     /// `branch_block_args`, one for each successor; these sequences
180     /// for each block are concatenated.
181     ///
182     /// Indexed by the indices in `branch_block_arg_succ_range`.
183     branch_block_arg_range: Ranges,
184 
185     /// For a given block, indices in `branch_block_arg_range`
186     /// corresponding to all of its successors.
187     branch_block_arg_succ_range: Ranges,
188 
189     /// Block-order information.
190     block_order: BlockLoweringOrder,
191 
192     /// ABI object.
193     pub(crate) abi: Callee<I::ABIMachineSpec>,
194 
195     /// Constant information used during code emission. This should be
196     /// immutable across function compilations within the same module.
197     emit_info: I::Info,
198 
199     /// Constants.
200     pub(crate) constants: VCodeConstants,
201 
202     /// Value labels for debuginfo attached to vregs.
203     debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
204 
205     pub(crate) sigs: SigSet,
206 
207     /// Facts on VRegs, for proof-carrying code verification.
208     facts: Vec<Option<Fact>>,
209 
210     log2_min_function_alignment: u8,
211 }
212 
213 /// The result of `VCode::emit`. Contains all information computed
214 /// during emission: actual machine code, optionally a disassembly,
215 /// and optionally metadata about the code layout.
216 pub struct EmitResult {
217     /// The MachBuffer containing the machine code.
218     pub buffer: MachBufferFinalized<Stencil>,
219 
220     /// Offset of each basic block, recorded during emission. Computed
221     /// only if `machine_code_cfg_info` is enabled.
222     pub bb_offsets: Vec<CodeOffset>,
223 
224     /// Final basic-block edges, in terms of code offsets of
225     /// bb-starts. Computed only if `machine_code_cfg_info` is enabled.
226     pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
227 
228     /// The pretty-printed disassembly, if any. This uses the same
229     /// pretty-printing for MachInsts as the pre-regalloc VCode Debug
230     /// implementation, but additionally includes the prologue and
231     /// epilogue(s), and makes use of the regalloc results.
232     pub disasm: Option<String>,
233 
234     /// Value-labels information (debug metadata).
235     pub value_labels_ranges: ValueLabelsRanges,
236 }
237 
238 /// A builder for a VCode function body.
239 ///
240 /// This builder has the ability to accept instructions in either
241 /// forward or reverse order, depending on the pass direction that
242 /// produces the VCode. The lowering from CLIF to VCode<MachInst>
243 /// ordinarily occurs in reverse order (in order to allow instructions
244 /// to be lowered only if used, and not merged) so a reversal will
245 /// occur at the end of lowering to ensure the VCode is in machine
246 /// order.
247 ///
248 /// If built in reverse, block and instruction indices used once the
249 /// VCode is built are relative to the final (reversed) order, not the
250 /// order of construction. Note that this means we do not know the
251 /// final block or instruction indices when building, so we do not
252 /// hand them out. (The user is assumed to know them when appending
253 /// terminator instructions with successor blocks.)
254 pub struct VCodeBuilder<I: VCodeInst> {
255     /// In-progress VCode.
256     pub(crate) vcode: VCode<I>,
257 
258     /// In what direction is the build occurring?
259     direction: VCodeBuildDirection,
260 
261     /// Debug-value label in-progress map, keyed by label. For each
262     /// label, we keep disjoint ranges mapping to vregs. We'll flatten
263     /// this into (vreg, range, label) tuples when done.
264     debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
265 }
266 
267 /// Direction in which a VCodeBuilder builds VCode.
268 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
269 pub enum VCodeBuildDirection {
270     // TODO: add `Forward` once we need it and can test it adequately.
271     /// Backward-build pass: we expect the producer to call `emit()`
272     /// with instructions in reverse program order within each block.
273     Backward,
274 }
275 
276 impl<I: VCodeInst> VCodeBuilder<I> {
277     /// Create a new VCodeBuilder.
278     pub fn new(
279         sigs: SigSet,
280         abi: Callee<I::ABIMachineSpec>,
281         emit_info: I::Info,
282         block_order: BlockLoweringOrder,
283         constants: VCodeConstants,
284         direction: VCodeBuildDirection,
285         log2_min_function_alignment: u8,
286     ) -> Self {
287         let vcode = VCode::new(
288             sigs,
289             abi,
290             emit_info,
291             block_order,
292             constants,
293             log2_min_function_alignment,
294         );
295 
296         VCodeBuilder {
297             vcode,
298             direction,
299             debug_info: FxHashMap::default(),
300         }
301     }
302 
303     pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> {
304         self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs)
305     }
306 
307     /// Access the ABI object.
308     pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {
309         &self.vcode.abi
310     }
311 
312     /// Access the ABI object.
313     pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {
314         &mut self.vcode.abi
315     }
316 
317     pub fn sigs(&self) -> &SigSet {
318         &self.vcode.sigs
319     }
320 
321     pub fn sigs_mut(&mut self) -> &mut SigSet {
322         &mut self.vcode.sigs
323     }
324 
325     /// Access to the BlockLoweringOrder object.
326     pub fn block_order(&self) -> &BlockLoweringOrder {
327         &self.vcode.block_order
328     }
329 
330     /// Set the current block as the entry block.
331     pub fn set_entry(&mut self, block: BlockIndex) {
332         self.vcode.entry = block;
333     }
334 
335     /// End the current basic block. Must be called after emitting vcode insts
336     /// for IR insts and prior to ending the function (building the VCode).
337     pub fn end_bb(&mut self) {
338         let end_idx = self.vcode.insts.len();
339         // Add the instruction index range to the list of blocks.
340         self.vcode.block_ranges.push_end(end_idx);
341         // End the successors list.
342         let succ_end = self.vcode.block_succs.len();
343         self.vcode.block_succ_range.push_end(succ_end);
344         // End the blockparams list.
345         let block_params_end = self.vcode.block_params.len();
346         self.vcode.block_params_range.push_end(block_params_end);
347         // End the branch blockparam args list.
348         let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
349         self.vcode
350             .branch_block_arg_succ_range
351             .push_end(branch_block_arg_succ_end);
352     }
353 
354     pub fn add_block_param(&mut self, param: VirtualReg) {
355         self.vcode.block_params.push(param.into());
356     }
357 
358     fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
359         self.vcode
360             .branch_block_args
361             .extend(args.iter().map(|&arg| VReg::from(arg)));
362         let end = self.vcode.branch_block_args.len();
363         self.vcode.branch_block_arg_range.push_end(end);
364     }
365 
366     /// Push an instruction for the current BB and current IR inst
367     /// within the BB.
368     pub fn push(&mut self, insn: I, loc: RelSourceLoc) {
369         assert!(!insn.is_low_level_branch()); // These are not meant to be in VCode.
370         self.vcode.insts.push(insn);
371         self.vcode.srclocs.push(loc);
372     }
373 
374     /// Add a successor block with branch args.
375     pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
376         self.vcode.block_succs.push(block);
377         self.add_branch_args_for_succ(args);
378     }
379 
380     /// Add a debug value label to a register.
381     pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
382         // 1) In the reversed order, we consider the instructions
383         //    that define ranges in the "debug_info" array to refer
384         //    to the IP **after** them (when reversed):
385         //      IP[2]__| Inst 3 |
386         //      IP[1]__| Inst 2 |
387         //      IP[0]__| Inst 1 |
388         //             | Inst 0 |
389         //    This is so that we can represent IP[<function start>],
390         //    done at the cost of not being to represent IP[<function end>],
391         //    which is OK since no values will be live at that point.
392         // 2) The live range for "reg" begins at the current IP
393         //    and continues until the next, in execution order,
394         //    VReg that defines "label". Since the ranges are open
395         //    at the end, the subtraction of 1 cancels out:
396         //      [last..current IP] <=>
397         //      [last..last emitted inst index] <=>
398         //      [last..next_inst_index - 1] <=>
399         //      [last..next_inst_index)
400         //
401         let next_inst_index = self.vcode.insts.len();
402         if next_inst_index == 0 {
403             // This would produce a defective [0..0) range.
404             return;
405         }
406         let next_inst = InsnIndex::new(next_inst_index);
407         let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
408         let last = labels
409             .last()
410             .map(|(_start, end, _vreg)| *end)
411             .unwrap_or(InsnIndex::new(0));
412         labels.push((last, next_inst, reg.into()));
413     }
414 
415     /// Access the constants.
416     pub fn constants(&mut self) -> &mut VCodeConstants {
417         &mut self.vcode.constants
418     }
419 
420     fn compute_preds_from_succs(&mut self) {
421         // Do a linear-time counting sort: first determine how many
422         // times each block appears as a successor.
423         let mut starts = vec![0u32; self.vcode.num_blocks()];
424         for succ in &self.vcode.block_succs {
425             starts[succ.index()] += 1;
426         }
427 
428         // Determine for each block the starting index where that
429         // block's predecessors should go. This is equivalent to the
430         // ranges we need to store in block_pred_range.
431         self.vcode.block_pred_range.reserve(starts.len());
432         let mut end = 0;
433         for count in starts.iter_mut() {
434             let start = end;
435             end += *count;
436             *count = start;
437             self.vcode.block_pred_range.push_end(end as usize);
438         }
439         let end = end as usize;
440         debug_assert_eq!(end, self.vcode.block_succs.len());
441 
442         // Walk over the successors again, this time grouped by
443         // predecessor, and push the predecessor at the current
444         // starting position of each of its successors. We build
445         // each group of predecessors in whatever order Ranges::iter
446         // returns them; regalloc2 doesn't care.
447         self.vcode.block_preds.resize(end, BlockIndex::invalid());
448         for (pred, range) in self.vcode.block_succ_range.iter() {
449             let pred = BlockIndex::new(pred);
450             for succ in &self.vcode.block_succs[range] {
451                 let pos = &mut starts[succ.index()];
452                 self.vcode.block_preds[*pos as usize] = pred;
453                 *pos += 1;
454             }
455         }
456         debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid()));
457     }
458 
459     /// Called once, when a build in Backward order is complete, to
460     /// perform the overall reversal (into final forward order) and
461     /// finalize metadata accordingly.
462     fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) {
463         let n_insts = self.vcode.insts.len();
464         if n_insts == 0 {
465             return;
466         }
467 
468         // Reverse the per-block and per-inst sequences.
469         self.vcode.block_ranges.reverse_index();
470         self.vcode.block_ranges.reverse_target(n_insts);
471         // block_params_range is indexed by block (and blocks were
472         // traversed in reverse) so we reverse it; but block-param
473         // sequences in the concatenated vec can remain in reverse
474         // order (it is effectively an arena of arbitrarily-placed
475         // referenced sequences).
476         self.vcode.block_params_range.reverse_index();
477         // Likewise, we reverse block_succ_range, but the block_succ
478         // concatenated array can remain as-is.
479         self.vcode.block_succ_range.reverse_index();
480         self.vcode.insts.reverse();
481         self.vcode.srclocs.reverse();
482         // Likewise, branch_block_arg_succ_range is indexed by block
483         // so must be reversed.
484         self.vcode.branch_block_arg_succ_range.reverse_index();
485 
486         // To translate an instruction index *endpoint* in reversed
487         // order to forward order, compute `n_insts - i`.
488         //
489         // Why not `n_insts - 1 - i`? That would be correct to
490         // translate an individual instruction index (for ten insts 0
491         // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
492         // 0). But for the usual inclusive-start, exclusive-end range
493         // idiom, inclusive starts become exclusive ends and
494         // vice-versa, so e.g. an (inclusive) start of 0 becomes an
495         // (exclusive) end of 10.
496         let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
497 
498         // Generate debug-value labels based on per-label maps.
499         for (label, tuples) in &self.debug_info {
500             for &(start, end, vreg) in tuples {
501                 let vreg = vregs.resolve_vreg_alias(vreg);
502                 let fwd_start = translate(end);
503                 let fwd_end = translate(start);
504                 self.vcode
505                     .debug_value_labels
506                     .push((vreg, fwd_start, fwd_end, label.as_u32()));
507             }
508         }
509 
510         // Now sort debug value labels by VReg, as required
511         // by regalloc2.
512         self.vcode
513             .debug_value_labels
514             .sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
515     }
516 
517     fn collect_operands(&mut self, vregs: &VRegAllocator<I>) {
518         let allocatable = PRegSet::from(self.vcode.abi.machine_env());
519         for (i, insn) in self.vcode.insts.iter_mut().enumerate() {
520             // Push operands from the instruction onto the operand list.
521             //
522             // We rename through the vreg alias table as we collect
523             // the operands. This is better than a separate post-pass
524             // over operands, because it has more cache locality:
525             // operands only need to pass through L1 once. This is
526             // also better than renaming instructions'
527             // operands/registers while lowering, because here we only
528             // need to do the `match` over the instruction to visit
529             // its register fields (which is slow, branchy code) once.
530 
531             let mut op_collector =
532                 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {
533                     vregs.resolve_vreg_alias(vreg)
534                 });
535             insn.get_operands(&mut op_collector);
536             let (ops, clobbers) = op_collector.finish();
537             self.vcode.operand_ranges.push_end(ops);
538 
539             if clobbers != PRegSet::default() {
540                 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
541             }
542 
543             if let Some((dst, src)) = insn.is_move() {
544                 // We should never see non-virtual registers present in move
545                 // instructions.
546                 assert!(
547                     src.is_virtual(),
548                     "the real register {src:?} was used as the source of a move instruction"
549                 );
550                 assert!(
551                     dst.to_reg().is_virtual(),
552                     "the real register {:?} was used as the destination of a move instruction",
553                     dst.to_reg()
554                 );
555             }
556         }
557 
558         // Translate blockparam args via the vreg aliases table as well.
559         for arg in &mut self.vcode.branch_block_args {
560             let new_arg = vregs.resolve_vreg_alias(*arg);
561             trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
562             *arg = new_arg;
563         }
564     }
565 
566     /// Build the final VCode.
567     pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> {
568         self.vcode.vreg_types = take(&mut vregs.vreg_types);
569         self.vcode.facts = take(&mut vregs.facts);
570 
571         if self.direction == VCodeBuildDirection::Backward {
572             self.reverse_and_finalize(&vregs);
573         }
574         self.collect_operands(&vregs);
575 
576         self.compute_preds_from_succs();
577         self.vcode.debug_value_labels.sort_unstable();
578 
579         // At this point, nothing in the vcode should mention any
580         // VReg which has been aliased. All the appropriate rewriting
581         // should have happened above. Just to be sure, let's
582         // double-check each field which has vregs.
583         // Note: can't easily check vcode.insts, resolved in collect_operands.
584         // Operands are resolved in collect_operands.
585         vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg()));
586         // Currently block params are never aliased to another vreg.
587         vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied());
588         // Branch block args are resolved in collect_operands.
589         vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied());
590         // Debug value labels are resolved in reverse_and_finalize.
591         vregs.debug_assert_no_vreg_aliases(
592             self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg),
593         );
594         // Facts are resolved eagerly during set_vreg_alias.
595         vregs.debug_assert_no_vreg_aliases(
596             self.vcode
597                 .facts
598                 .iter()
599                 .zip(&vregs.vreg_types)
600                 .enumerate()
601                 .filter(|(_, (fact, _))| fact.is_some())
602                 .map(|(vreg, (_, &ty))| {
603                     let (regclasses, _) = I::rc_for_type(ty).unwrap();
604                     VReg::new(vreg, regclasses[0])
605                 }),
606         );
607 
608         self.vcode
609     }
610 
611     /// Add a user stack map for the associated instruction.
612     pub fn add_user_stack_map(
613         &mut self,
614         inst: BackwardsInsnIndex,
615         entries: &[ir::UserStackMapEntry],
616     ) {
617         let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets());
618         let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map);
619         debug_assert!(old_entry.is_none());
620     }
621 
622     /// Add debug tags for the associated instruction.
623     pub fn add_debug_tags(&mut self, inst: BackwardsInsnIndex, entries: &[ir::DebugTag]) {
624         let start = u32::try_from(self.vcode.debug_tag_pool.len()).unwrap();
625         self.vcode.debug_tag_pool.extend(entries.iter().cloned());
626         let end = u32::try_from(self.vcode.debug_tag_pool.len()).unwrap();
627         self.vcode.debug_tags.insert(inst, start..end);
628     }
629 }
630 
631 const NO_INST_OFFSET: CodeOffset = u32::MAX;
632 
633 impl<I: VCodeInst> VCode<I> {
634     /// New empty VCode.
635     fn new(
636         sigs: SigSet,
637         abi: Callee<I::ABIMachineSpec>,
638         emit_info: I::Info,
639         block_order: BlockLoweringOrder,
640         constants: VCodeConstants,
641         log2_min_function_alignment: u8,
642     ) -> Self {
643         let n_blocks = block_order.lowered_order().len();
644         VCode {
645             sigs,
646             vreg_types: vec![],
647             insts: Vec::with_capacity(10 * n_blocks),
648             user_stack_maps: FxHashMap::default(),
649             debug_tags: FxHashMap::default(),
650             debug_tag_pool: vec![],
651             operands: Vec::with_capacity(30 * n_blocks),
652             operand_ranges: Ranges::with_capacity(10 * n_blocks),
653             clobbers: FxHashMap::default(),
654             srclocs: Vec::with_capacity(10 * n_blocks),
655             entry: BlockIndex::new(0),
656             block_ranges: Ranges::with_capacity(n_blocks),
657             block_succ_range: Ranges::with_capacity(n_blocks),
658             block_succs: Vec::with_capacity(n_blocks),
659             block_pred_range: Ranges::default(),
660             block_preds: Vec::new(),
661             block_params_range: Ranges::with_capacity(n_blocks),
662             block_params: Vec::with_capacity(5 * n_blocks),
663             branch_block_args: Vec::with_capacity(10 * n_blocks),
664             branch_block_arg_range: Ranges::with_capacity(2 * n_blocks),
665             branch_block_arg_succ_range: Ranges::with_capacity(n_blocks),
666             block_order,
667             abi,
668             emit_info,
669             constants,
670             debug_value_labels: vec![],
671             facts: vec![],
672             log2_min_function_alignment,
673         }
674     }
675 
676     /// Get the number of blocks. Block indices will be in the range `0 ..
677     /// (self.num_blocks() - 1)`.
678     pub fn num_blocks(&self) -> usize {
679         self.block_ranges.len()
680     }
681 
682     /// The number of lowered instructions.
683     pub fn num_insts(&self) -> usize {
684         self.insts.len()
685     }
686 
687     fn compute_clobbers_and_function_calls(
688         &self,
689         regalloc: &regalloc2::Output,
690     ) -> (Vec<Writable<RealReg>>, FunctionCalls) {
691         let mut clobbered = PRegSet::default();
692         let mut function_calls = FunctionCalls::None;
693 
694         // All moves are included in clobbers.
695         for (_, Edit::Move { to, .. }) in &regalloc.edits {
696             if let Some(preg) = to.as_reg() {
697                 clobbered.add(preg);
698             }
699         }
700 
701         for (i, range) in self.operand_ranges.iter() {
702             let operands = &self.operands[range.clone()];
703             let allocs = &regalloc.allocs[range];
704             for (operand, alloc) in operands.iter().zip(allocs.iter()) {
705                 if operand.kind() == OperandKind::Def {
706                     if let Some(preg) = alloc.as_reg() {
707                         clobbered.add(preg);
708                     }
709                 }
710             }
711 
712             function_calls.update(self.insts[i].call_type());
713 
714             // Also add explicitly-clobbered registers.
715             //
716             // Skip merging this instruction's clobber list if not
717             // "included in clobbers" as per the MachInst. (Some
718             // backends use this to implement ABI specifics; e.g.,
719             // excluding calls of the same ABI as the current function
720             // from clobbers, because by definition everything
721             // clobbered by the call can be clobbered by this function
722             // without saving as well.
723             //
724             // This is important for a particular optimization: when
725             // some registers are "half-clobbered", e.g. vector/float
726             // registers on aarch64, we want them to be seen as
727             // clobbered by regalloc so it avoids carrying values
728             // across calls in these registers but not seen as
729             // clobbered by prologue generation here (because the
730             // actual half-clobber implied by the clobber list fits
731             // within the clobbers that we allow without
732             // clobber-saves).
733             if self.insts[i].is_included_in_clobbers() {
734                 if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) {
735                     clobbered.union_from(inst_clobbered);
736                 }
737             }
738         }
739 
740         let clobbered_regs = clobbered
741             .into_iter()
742             .map(|preg| Writable::from_reg(RealReg::from(preg)))
743             .collect();
744 
745         (clobbered_regs, function_calls)
746     }
747 
748     /// Emit the instructions to a `MachBuffer`, containing fixed-up
749     /// code and external reloc/trap/etc. records ready for use. Takes
750     /// the regalloc results as well.
751     ///
752     /// Returns the machine code itself, and optionally metadata
753     /// and/or a disassembly, as an `EmitResult`. The `VCode` itself
754     /// is consumed by the emission process.
755     pub fn emit(
756         mut self,
757         regalloc: &regalloc2::Output,
758         want_disasm: bool,
759         flags: &settings::Flags,
760         ctrl_plane: &mut ControlPlane,
761     ) -> EmitResult
762     where
763         I: VCodeInst,
764     {
765         let _tt = timing::vcode_emit();
766         let mut buffer = MachBuffer::new();
767         buffer.set_log2_min_function_alignment(self.log2_min_function_alignment);
768         let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
769 
770         // The first M MachLabels are reserved for block indices.
771         buffer.reserve_labels_for_blocks(self.num_blocks());
772 
773         // Register all allocated constants with the `MachBuffer` to ensure that
774         // any references to the constants during instructions can be handled
775         // correctly.
776         buffer.register_constants(&self.constants);
777 
778         // Construct the final order we emit code in: cold blocks at the end.
779         let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
780         let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
781         for block in 0..self.num_blocks() {
782             let block = BlockIndex::new(block);
783             if self.block_order.is_cold(block) {
784                 cold_blocks.push(block);
785             } else {
786                 final_order.push(block);
787             }
788         }
789         final_order.extend(cold_blocks.clone());
790 
791         // Compute/save info we need for the prologue: clobbers and
792         // number of spillslots.
793         //
794         // We clone `abi` here because we will mutate it as we
795         // generate the prologue and set other info, but we can't
796         // mutate `VCode`. The info it usually carries prior to
797         // setting clobbers is fairly minimal so this should be
798         // relatively cheap.
799         let (clobbers, function_calls) = self.compute_clobbers_and_function_calls(regalloc);
800         self.abi.compute_frame_layout(
801             &self.sigs,
802             regalloc.num_spillslots,
803             clobbers,
804             function_calls,
805         );
806 
807         // Emit blocks.
808         let mut cur_srcloc = None;
809         let mut last_offset = None;
810         let mut inst_offsets = vec![];
811         let mut state = I::State::new(&self.abi, core::mem::take(ctrl_plane));
812 
813         let mut disasm = String::new();
814 
815         if !self.debug_value_labels.is_empty() {
816             inst_offsets.resize(self.insts.len(), NO_INST_OFFSET);
817         }
818 
819         // Count edits per block ahead of time; this is needed for
820         // lookahead island emission. (We could derive it per-block
821         // with binary search in the edit list, but it's more
822         // efficient to do it in one pass here.)
823         let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];
824         let mut edit_idx = 0;
825         for block in 0..self.num_blocks() {
826             let end_inst = InsnIndex::new(self.block_ranges.get(block).end);
827             let start_edit_idx = edit_idx;
828             while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {
829                 edit_idx += 1;
830             }
831             let end_edit_idx = edit_idx;
832             ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);
833         }
834 
835         let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();
836         let mut bb_padding = match flags.bb_padding_log2_minus_one() {
837             0 => Vec::new(),
838             n => vec![0; 1 << (n - 1)],
839         };
840         let mut total_bb_padding = 0;
841 
842         for (block_order_idx, &block) in final_order.iter().enumerate() {
843             trace!("emitting block {:?}", block);
844 
845             // Call the new block hook for state
846             state.on_new_block();
847 
848             // Emit NOPs to align the block.
849             let new_offset = I::align_basic_block(buffer.cur_offset());
850             while new_offset > buffer.cur_offset() {
851                 // Pad with NOPs up to the aligned block offset.
852                 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
853                 nop.emit(&mut buffer, &self.emit_info, &mut Default::default());
854             }
855             assert_eq!(buffer.cur_offset(), new_offset);
856 
857             let do_emit = |inst: &I,
858                            disasm: &mut String,
859                            buffer: &mut MachBuffer<I>,
860                            state: &mut I::State| {
861                 if want_disasm && !inst.is_args() {
862                     let mut s = state.clone();
863                     writeln!(disasm, "  {}", inst.pretty_print_inst(&mut s)).unwrap();
864                 }
865                 inst.emit(buffer, &self.emit_info, state);
866             };
867 
868             // Is this the first block? Emit the prologue directly if so.
869             if block == self.entry {
870                 trace!(" -> entry block");
871                 buffer.start_srcloc(Default::default());
872                 for inst in &self.abi.gen_prologue() {
873                     do_emit(&inst, &mut disasm, &mut buffer, &mut state);
874                 }
875                 buffer.end_srcloc();
876             }
877 
878             // Now emit the regular block body.
879 
880             buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut());
881 
882             if want_disasm {
883                 writeln!(&mut disasm, "block{}:", block.index()).unwrap();
884             }
885 
886             if flags.machine_code_cfg_info() {
887                 // Track BB starts. If we have backed up due to MachBuffer
888                 // branch opts, note that the removed blocks were removed.
889                 let cur_offset = buffer.cur_offset();
890                 if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
891                     for i in (0..bb_starts.len()).rev() {
892                         if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
893                             break;
894                         }
895                         bb_starts[i] = None;
896                     }
897                 }
898                 bb_starts.push(Some(cur_offset));
899                 last_offset = Some(cur_offset);
900             }
901 
902             if let Some(block_start) = I::gen_block_start(
903                 self.block_order.is_indirect_branch_target(block),
904                 is_forward_edge_cfi_enabled,
905             ) {
906                 do_emit(&block_start, &mut disasm, &mut buffer, &mut state);
907             }
908 
909             for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
910                 match inst_or_edit {
911                     InstOrEdit::Inst(iix) => {
912                         if !self.debug_value_labels.is_empty() {
913                             // If we need to produce debug info,
914                             // record the offset of each instruction
915                             // so that we can translate value-label
916                             // ranges to machine-code offsets.
917 
918                             // Cold blocks violate monotonicity
919                             // assumptions elsewhere (that
920                             // instructions in inst-index order are in
921                             // order in machine code), so we omit
922                             // their offsets here. Value-label range
923                             // generation below will skip empty ranges
924                             // and ranges with to-offsets of zero.
925                             if !self.block_order.is_cold(block) {
926                                 inst_offsets[iix.index()] = buffer.cur_offset();
927                             }
928                         }
929 
930                         // Update the srcloc at this point in the buffer.
931                         let srcloc = self.srclocs[iix.index()];
932                         if cur_srcloc != Some(srcloc) {
933                             if cur_srcloc.is_some() {
934                                 buffer.end_srcloc();
935                             }
936                             buffer.start_srcloc(srcloc);
937                             cur_srcloc = Some(srcloc);
938                         }
939 
940                         // If this is a safepoint, compute a stack map
941                         // and pass it to the emit state.
942                         let stack_map_disasm = if self.insts[iix.index()].is_safepoint() {
943                             let (user_stack_map, user_stack_map_disasm) = {
944                                 // The `user_stack_maps` is keyed by reverse
945                                 // instruction index, so we must flip the
946                                 // index. We can't put this into a helper method
947                                 // due to borrowck issues because parts of
948                                 // `self` are borrowed mutably elsewhere in this
949                                 // function.
950                                 let index = iix.to_backwards_insn_index(self.num_insts());
951                                 let user_stack_map = self.user_stack_maps.remove(&index);
952                                 let user_stack_map_disasm = if want_disasm {
953                                     user_stack_map.as_ref().map(|m| format!("  ; {m:?}"))
954                                 } else {
955                                     None
956                                 };
957                                 (user_stack_map, user_stack_map_disasm)
958                             };
959 
960                             state.pre_safepoint(user_stack_map);
961 
962                             user_stack_map_disasm
963                         } else {
964                             None
965                         };
966 
967                         // Place debug tags in the emission buffer
968                         // either at the offset prior to the
969                         // instruction or after the instruction,
970                         // depending on whether this is a call. See
971                         // the documentation on [`MachDebugTagPos`]
972                         // for details on why.
973                         let mut debug_tag_disasm = None;
974                         let mut place_debug_tags =
975                             |this: &VCode<I>, pos: MachDebugTagPos, buffer: &mut MachBuffer<I>| {
976                                 // As above, translate the forward instruction
977                                 // index to a backward index for the lookup.
978                                 let debug_tag_range = {
979                                     let index = iix.to_backwards_insn_index(this.num_insts());
980                                     this.debug_tags.get(&index)
981                                 };
982                                 if let Some(range) = debug_tag_range {
983                                     let start = usize::try_from(range.start).unwrap();
984                                     let end = usize::try_from(range.end).unwrap();
985                                     let tags = &this.debug_tag_pool[start..end];
986 
987                                     if want_disasm {
988                                         debug_tag_disasm =
989                                             Some(format!("  ; ^-- debug @ {pos:?}: {tags:?}"));
990                                     }
991                                     buffer.push_debug_tags(pos, tags);
992                                 }
993                             };
994                         let debug_tag_pos =
995                             if self.insts[iix.index()].call_type() == CallType::Regular {
996                                 MachDebugTagPos::Post
997                             } else {
998                                 MachDebugTagPos::Pre
999                             };
1000 
1001                         if debug_tag_pos == MachDebugTagPos::Pre {
1002                             place_debug_tags(&self, debug_tag_pos, &mut buffer);
1003                         }
1004 
1005                         // If the instruction we are about to emit is
1006                         // a return, place an epilogue at this point
1007                         // (and don't emit the return; the actual
1008                         // epilogue will contain it).
1009                         if self.insts[iix.index()].is_term() == MachTerminator::Ret {
1010                             log::trace!("emitting epilogue");
1011                             for inst in self.abi.gen_epilogue() {
1012                                 do_emit(&inst, &mut disasm, &mut buffer, &mut state);
1013                             }
1014                         } else {
1015                             // Update the operands for this inst using the
1016                             // allocations from the regalloc result.
1017                             let mut allocs = regalloc.inst_allocs(iix).iter();
1018                             self.insts[iix.index()].get_operands(
1019                                 &mut |reg: &mut Reg, constraint, _kind, _pos| {
1020                                     let alloc =
1021                                         allocs.next().expect("enough allocations for all operands");
1022 
1023                                     if let Some(alloc) = alloc.as_reg() {
1024                                         let alloc: Reg = alloc.into();
1025                                         if let OperandConstraint::FixedReg(rreg) = constraint {
1026                                             debug_assert_eq!(Reg::from(rreg), alloc);
1027                                         }
1028                                         *reg = alloc;
1029                                     } else if let Some(alloc) = alloc.as_stack() {
1030                                         let alloc: Reg = alloc.into();
1031                                         *reg = alloc;
1032                                     }
1033                                 },
1034                             );
1035                             debug_assert!(allocs.next().is_none());
1036 
1037                             log::trace!("emitting: {:?}", self.insts[iix.index()]);
1038 
1039                             // Emit the instruction!
1040                             do_emit(
1041                                 &self.insts[iix.index()],
1042                                 &mut disasm,
1043                                 &mut buffer,
1044                                 &mut state,
1045                             );
1046 
1047                             if debug_tag_pos == MachDebugTagPos::Post {
1048                                 place_debug_tags(&self, debug_tag_pos, &mut buffer);
1049                             }
1050 
1051                             if let Some(stack_map_disasm) = stack_map_disasm {
1052                                 disasm.push_str(&stack_map_disasm);
1053                                 disasm.push('\n');
1054                             }
1055                             if let Some(debug_tag_disasm) = debug_tag_disasm {
1056                                 disasm.push_str(&debug_tag_disasm);
1057                                 disasm.push('\n');
1058                             }
1059                         }
1060                     }
1061 
1062                     InstOrEdit::Edit(Edit::Move { from, to }) => {
1063                         // Create a move/spill/reload instruction and
1064                         // immediately emit it.
1065                         match (from.as_reg(), to.as_reg()) {
1066                             (Some(from), Some(to)) => {
1067                                 // Reg-to-reg move.
1068                                 let from_rreg = Reg::from(from);
1069                                 let to_rreg = Writable::from_reg(Reg::from(to));
1070                                 debug_assert_eq!(from.class(), to.class());
1071                                 let ty = I::canonical_type_for_rc(from.class());
1072                                 let mv = I::gen_move(to_rreg, from_rreg, ty);
1073                                 do_emit(&mv, &mut disasm, &mut buffer, &mut state);
1074                             }
1075                             (Some(from), None) => {
1076                                 // Spill from register to spillslot.
1077                                 let to = to.as_stack().unwrap();
1078                                 let from_rreg = RealReg::from(from);
1079                                 let spill = self.abi.gen_spill(to, from_rreg);
1080                                 do_emit(&spill, &mut disasm, &mut buffer, &mut state);
1081                             }
1082                             (None, Some(to)) => {
1083                                 // Load from spillslot to register.
1084                                 let from = from.as_stack().unwrap();
1085                                 let to_rreg = Writable::from_reg(RealReg::from(to));
1086                                 let reload = self.abi.gen_reload(to_rreg, from);
1087                                 do_emit(&reload, &mut disasm, &mut buffer, &mut state);
1088                             }
1089                             (None, None) => {
1090                                 panic!("regalloc2 should have eliminated stack-to-stack moves!");
1091                             }
1092                         }
1093                     }
1094                 }
1095             }
1096 
1097             if cur_srcloc.is_some() {
1098                 buffer.end_srcloc();
1099                 cur_srcloc = None;
1100             }
1101 
1102             // Do we need an island? Get the worst-case size of the next BB, add
1103             // it to the optional padding behind the block, and pass this to the
1104             // `MachBuffer` to determine if an island is necessary.
1105             let worst_case_next_bb = if block_order_idx < final_order.len() - 1 {
1106                 let next_block = final_order[block_order_idx + 1];
1107                 let next_block_range = self.block_ranges.get(next_block.index());
1108                 let next_block_size = next_block_range.len() as u32;
1109                 let next_block_ra_insertions = ra_edits_per_block[next_block.index()];
1110                 I::worst_case_size() * (next_block_size + next_block_ra_insertions)
1111             } else {
1112                 0
1113             };
1114             let padding = if bb_padding.is_empty() {
1115                 0
1116             } else {
1117                 bb_padding.len() as u32 + I::LabelUse::ALIGN - 1
1118             };
1119             if buffer.island_needed(padding + worst_case_next_bb) {
1120                 buffer.emit_island(padding + worst_case_next_bb, ctrl_plane);
1121             }
1122 
1123             // Insert padding, if configured, to stress the `MachBuffer`'s
1124             // relocation and island calculations.
1125             //
1126             // Padding can get quite large during fuzzing though so place a
1127             // total cap on it where when a per-function threshold is exceeded
1128             // the padding is turned back down to zero. This avoids a small-ish
1129             // test case generating a GB+ memory footprint in Cranelift for
1130             // example.
1131             if !bb_padding.is_empty() {
1132                 buffer.put_data(&bb_padding);
1133                 buffer.align_to(I::LabelUse::ALIGN);
1134                 total_bb_padding += bb_padding.len();
1135                 if total_bb_padding > (150 << 20) {
1136                     bb_padding = Vec::new();
1137                 }
1138             }
1139         }
1140 
1141         debug_assert!(
1142             self.user_stack_maps.is_empty(),
1143             "any stack maps should have been consumed by instruction emission, still have: {:#?}",
1144             self.user_stack_maps,
1145         );
1146 
1147         // Do any optimizations on branches at tail of buffer, as if we had
1148         // bound one last label.
1149         buffer.optimize_branches(ctrl_plane);
1150 
1151         // emission state is not needed anymore, move control plane back out
1152         *ctrl_plane = state.take_ctrl_plane();
1153 
1154         let func_body_len = buffer.cur_offset();
1155 
1156         // Create `bb_edges` and final (filtered) `bb_starts`.
1157         let mut bb_edges = vec![];
1158         let mut bb_offsets = vec![];
1159         if flags.machine_code_cfg_info() {
1160             for block in 0..self.num_blocks() {
1161                 if bb_starts[block].is_none() {
1162                     // Block was deleted by MachBuffer; skip.
1163                     continue;
1164                 }
1165                 let from = bb_starts[block].unwrap();
1166 
1167                 bb_offsets.push(from);
1168                 // Resolve each `succ` label and add edges.
1169                 let succs = self.block_succs(BlockIndex::new(block));
1170                 for &succ in succs.iter() {
1171                     let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1172                     bb_edges.push((from, to));
1173                 }
1174             }
1175         }
1176 
1177         self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len);
1178         let value_labels_ranges =
1179             self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1180 
1181         // Store metadata about frame layout in the MachBuffer.
1182         buffer.set_frame_layout(self.abi.frame_slot_metadata());
1183 
1184         EmitResult {
1185             buffer: buffer.finish(&self.constants, ctrl_plane),
1186             bb_offsets,
1187             bb_edges,
1188             disasm: if want_disasm { Some(disasm) } else { None },
1189             value_labels_ranges,
1190         }
1191     }
1192 
1193     fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) {
1194         if self.debug_value_labels.is_empty() {
1195             return;
1196         }
1197 
1198         // During emission, branch removal can make offsets of instructions incorrect.
1199         // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj]
1200         // It will be recorded as (say):    [30]  [34]  [38]  [42]  [<would be 46>]
1201         // When the jumps get removed we are left with (in "inst_offsets"):
1202         // [insi][jmp0][jmp1][jmp2][insj][...]
1203         // [30]  [34]  [38]  [42]  [34]
1204         // Which violates the monotonicity invariant. This method sets offsets of these
1205         // removed instructions such as to make them appear zero-sized:
1206         // [insi][jmp0][jmp1][jmp2][insj][...]
1207         // [30]  [34]  [34]  [34]  [34]
1208         //
1209         let mut next_offset = func_body_len;
1210         for inst_index in (0..(inst_offsets.len() - 1)).rev() {
1211             let inst_offset = inst_offsets[inst_index];
1212 
1213             // Not all instructions get their offsets recorded.
1214             if inst_offset == NO_INST_OFFSET {
1215                 continue;
1216             }
1217 
1218             if inst_offset > next_offset {
1219                 trace!(
1220                     "Fixing code offset of the removed Inst {}: {} -> {}",
1221                     inst_index, inst_offset, next_offset
1222                 );
1223                 inst_offsets[inst_index] = next_offset;
1224                 continue;
1225             }
1226 
1227             next_offset = inst_offset;
1228         }
1229     }
1230 
1231     fn compute_value_labels_ranges(
1232         &self,
1233         regalloc: &regalloc2::Output,
1234         inst_offsets: &[CodeOffset],
1235         func_body_len: u32,
1236     ) -> ValueLabelsRanges {
1237         if self.debug_value_labels.is_empty() {
1238             return ValueLabelsRanges::default();
1239         }
1240 
1241         if trace_log_enabled!() {
1242             self.log_value_labels_ranges(regalloc, inst_offsets);
1243         }
1244 
1245         let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1246         for &(label, from, to, alloc) in &regalloc.debug_locations {
1247             let label = ValueLabel::from_u32(label);
1248             let ranges = value_labels_ranges.entry(label).or_insert_with(|| vec![]);
1249             let prog_point_to_inst = |prog_point: ProgPoint| {
1250                 let mut inst = prog_point.inst();
1251                 if prog_point.pos() == InstPosition::After {
1252                     inst = inst.next();
1253                 }
1254                 inst.index()
1255             };
1256             let from_inst_index = prog_point_to_inst(from);
1257             let to_inst_index = prog_point_to_inst(to);
1258             let from_offset = inst_offsets[from_inst_index];
1259             let to_offset = if to_inst_index == inst_offsets.len() {
1260                 func_body_len
1261             } else {
1262                 inst_offsets[to_inst_index]
1263             };
1264 
1265             // Empty ranges or unavailable offsets can happen
1266             // due to cold blocks and branch removal (see above).
1267             if from_offset == NO_INST_OFFSET
1268                 || to_offset == NO_INST_OFFSET
1269                 || from_offset == to_offset
1270             {
1271                 continue;
1272             }
1273 
1274             let loc = if let Some(preg) = alloc.as_reg() {
1275                 LabelValueLoc::Reg(Reg::from(preg))
1276             } else {
1277                 #[cfg(not(feature = "unwind"))]
1278                 continue;
1279 
1280                 #[cfg(feature = "unwind")]
1281                 {
1282                     let slot = alloc.as_stack().unwrap();
1283                     let slot_offset = self.abi.get_spillslot_offset(slot);
1284                     let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset();
1285                     let caller_sp_to_cfa_offset =
1286                         crate::isa::unwind::systemv::caller_sp_to_cfa_offset();
1287                     // NOTE: this is a negative offset because it's relative to the caller's SP
1288                     let cfa_to_sp_offset =
1289                         -((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64);
1290                     LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset)
1291                 }
1292             };
1293 
1294             // Coalesce adjacent ranges that for the same location
1295             // to minimize output size here and for the consumers.
1296             if let Some(last_loc_range) = ranges.last_mut() {
1297                 if last_loc_range.loc == loc && last_loc_range.end == from_offset {
1298                     trace!(
1299                         "Extending debug range for {:?} in {:?} to Inst {} ({})",
1300                         label, loc, to_inst_index, to_offset
1301                     );
1302                     last_loc_range.end = to_offset;
1303                     continue;
1304                 }
1305             }
1306 
1307             trace!(
1308                 "Recording debug range for {:?} in {:?}: [Inst {}..Inst {}) [{}..{})",
1309                 label, loc, from_inst_index, to_inst_index, from_offset, to_offset
1310             );
1311 
1312             ranges.push(ValueLocRange {
1313                 loc,
1314                 start: from_offset,
1315                 end: to_offset,
1316             });
1317         }
1318 
1319         value_labels_ranges
1320     }
1321 
1322     fn log_value_labels_ranges(&self, regalloc: &regalloc2::Output, inst_offsets: &[CodeOffset]) {
1323         debug_assert!(trace_log_enabled!());
1324 
1325         // What debug labels do we have? Note we'll skip those that have not been
1326         // allocated any location at all. They will show up as numeric gaps in the table.
1327         let mut labels = vec![];
1328         for &(label, _, _, _) in &regalloc.debug_locations {
1329             if Some(&label) == labels.last() {
1330                 continue;
1331             }
1332             labels.push(label);
1333         }
1334 
1335         // Reformat the data on what VRegs were the VLs assigned to by lowering, since
1336         // the array we have is sorted by VReg, and we want it sorted by VL for easy
1337         // access in the loop below.
1338         let mut vregs = vec![];
1339         for &(vreg, start, end, label) in &self.debug_value_labels {
1340             if matches!(labels.binary_search(&label), Ok(_)) {
1341                 vregs.push((label, start, end, vreg));
1342             }
1343         }
1344         vregs.sort_unstable_by(
1345             |(l_label, l_start, _, _), (r_label, r_start, _, _)| match l_label.cmp(r_label) {
1346                 Ordering::Equal => l_start.cmp(r_start),
1347                 cmp => cmp,
1348             },
1349         );
1350 
1351         #[derive(PartialEq)]
1352         enum Mode {
1353             Measure,
1354             Emit,
1355         }
1356         #[derive(PartialEq)]
1357         enum Row {
1358             Head,
1359             Line,
1360             Inst(usize, usize),
1361         }
1362 
1363         let mut widths = vec![0; 3 + 2 * labels.len()];
1364         let mut row = String::new();
1365         let mut output_row = |row_kind: Row, mode: Mode| {
1366             let mut column_index = 0;
1367             row.clear();
1368 
1369             macro_rules! output_cell_impl {
1370                 ($fill:literal, $span:literal, $($cell_fmt:tt)*) => {
1371                     let column_start = row.len();
1372                     {
1373                         row.push('|');
1374                         write!(row, $($cell_fmt)*).unwrap();
1375                     }
1376 
1377                     let next_column_index = column_index + $span;
1378                     let expected_width: usize = widths[column_index..next_column_index].iter().sum();
1379                     if mode == Mode::Measure {
1380                         let actual_width = row.len() - column_start;
1381                         if actual_width > expected_width {
1382                             widths[next_column_index - 1] += actual_width - expected_width;
1383                         }
1384                     } else {
1385                         let column_end = column_start + expected_width;
1386                         while row.len() != column_end {
1387                             row.push($fill);
1388                         }
1389                     }
1390                     column_index = next_column_index;
1391                 };
1392             }
1393             macro_rules! output_cell {
1394                 ($($cell_fmt:tt)*) => {
1395                     output_cell_impl!(' ', 1, $($cell_fmt)*);
1396                 };
1397             }
1398 
1399             match row_kind {
1400                 Row::Head => {
1401                     output_cell!("BB");
1402                     output_cell!("Inst");
1403                     output_cell!("IP");
1404                     for label in &labels {
1405                         output_cell_impl!(' ', 2, "{:?}", ValueLabel::from_u32(*label));
1406                     }
1407                 }
1408                 Row::Line => {
1409                     debug_assert!(mode == Mode::Emit);
1410                     for _ in 0..3 {
1411                         output_cell_impl!('-', 1, "");
1412                     }
1413                     for _ in &labels {
1414                         output_cell_impl!('-', 2, "");
1415                     }
1416                 }
1417                 Row::Inst(block_index, inst_index) => {
1418                     debug_assert!(inst_index < self.num_insts());
1419                     if self.block_ranges.get(block_index).start == inst_index {
1420                         output_cell!("B{}", block_index);
1421                     } else {
1422                         output_cell!("");
1423                     }
1424                     output_cell!("Inst {inst_index} ");
1425                     output_cell!("{} ", inst_offsets[inst_index]);
1426 
1427                     for label in &labels {
1428                         // First, the VReg.
1429                         use regalloc2::Inst;
1430                         let vreg_cmp = |inst: usize,
1431                                         vreg_label: &u32,
1432                                         range_start: &Inst,
1433                                         range_end: &Inst| {
1434                             match vreg_label.cmp(&label) {
1435                                 Ordering::Equal => {
1436                                     if range_end.index() <= inst {
1437                                         Ordering::Less
1438                                     } else if range_start.index() > inst {
1439                                         Ordering::Greater
1440                                     } else {
1441                                         Ordering::Equal
1442                                     }
1443                                 }
1444                                 cmp => cmp,
1445                             }
1446                         };
1447                         let vreg_index =
1448                             vregs.binary_search_by(|(l, s, e, _)| vreg_cmp(inst_index, l, s, e));
1449                         if let Ok(vreg_index) = vreg_index {
1450                             let mut prev_vreg = None;
1451                             if inst_index > 0 {
1452                                 let prev_vreg_index = vregs.binary_search_by(|(l, s, e, _)| {
1453                                     vreg_cmp(inst_index - 1, l, s, e)
1454                                 });
1455                                 if let Ok(prev_vreg_index) = prev_vreg_index {
1456                                     prev_vreg = Some(vregs[prev_vreg_index].3);
1457                                 }
1458                             }
1459 
1460                             let vreg = vregs[vreg_index].3;
1461                             if Some(vreg) == prev_vreg {
1462                                 output_cell!("*");
1463                             } else {
1464                                 output_cell!("{}", vreg);
1465                             }
1466                         } else {
1467                             output_cell!("");
1468                         }
1469 
1470                         // Second, the allocated location.
1471                         let inst_prog_point = ProgPoint::before(Inst::new(inst_index));
1472                         let range_index = regalloc.debug_locations.binary_search_by(
1473                             |(range_label, range_start, range_end, _)| match range_label.cmp(label)
1474                             {
1475                                 Ordering::Equal => {
1476                                     if *range_end <= inst_prog_point {
1477                                         Ordering::Less
1478                                     } else if *range_start > inst_prog_point {
1479                                         Ordering::Greater
1480                                     } else {
1481                                         Ordering::Equal
1482                                     }
1483                                 }
1484                                 cmp => cmp,
1485                             },
1486                         );
1487                         if let Ok(range_index) = range_index {
1488                             // Live at this instruction, print the location.
1489                             if let Some(reg) = regalloc.debug_locations[range_index].3.as_reg() {
1490                                 output_cell!("{:?}", Reg::from(reg));
1491                             } else {
1492                                 output_cell!("Stk");
1493                             }
1494                         } else {
1495                             // Not live at this instruction.
1496                             output_cell!("");
1497                         }
1498                     }
1499                 }
1500             }
1501             row.push('|');
1502 
1503             if mode == Mode::Emit {
1504                 trace!("{}", row.as_str());
1505             }
1506         };
1507 
1508         for block_index in 0..self.num_blocks() {
1509             for inst_index in self.block_ranges.get(block_index) {
1510                 output_row(Row::Inst(block_index, inst_index), Mode::Measure);
1511             }
1512         }
1513         output_row(Row::Head, Mode::Measure);
1514 
1515         output_row(Row::Head, Mode::Emit);
1516         output_row(Row::Line, Mode::Emit);
1517         for block_index in 0..self.num_blocks() {
1518             for inst_index in self.block_ranges.get(block_index) {
1519                 output_row(Row::Inst(block_index, inst_index), Mode::Emit);
1520             }
1521         }
1522     }
1523 
1524     /// Get the IR block for a BlockIndex, if one exists.
1525     pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1526         self.block_order.lowered_order()[block.index()].orig_block()
1527     }
1528 
1529     /// Get the type of a VReg.
1530     pub fn vreg_type(&self, vreg: VReg) -> Type {
1531         self.vreg_types[vreg.vreg()]
1532     }
1533 
1534     /// Get the fact, if any, for a given VReg.
1535     pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> {
1536         self.facts[vreg.vreg()].as_ref()
1537     }
1538 
1539     /// Set the fact for a given VReg.
1540     pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) {
1541         trace!("set fact on {}: {:?}", vreg, fact);
1542         self.facts[vreg.vreg()] = Some(fact);
1543     }
1544 
1545     /// Does a given instruction define any facts?
1546     pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool {
1547         self.inst_operands(inst)
1548             .iter()
1549             .filter(|o| o.kind() == OperandKind::Def)
1550             .map(|o| o.vreg())
1551             .any(|vreg| self.facts[vreg.vreg()].is_some())
1552     }
1553 
1554     /// Get the user stack map associated with the given forward instruction index.
1555     pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> {
1556         let index = inst.to_backwards_insn_index(self.num_insts());
1557         self.user_stack_maps.get(&index)
1558     }
1559 }
1560 
1561 impl<I: VCodeInst> core::ops::Index<InsnIndex> for VCode<I> {
1562     type Output = I;
1563     fn index(&self, idx: InsnIndex) -> &Self::Output {
1564         &self.insts[idx.index()]
1565     }
1566 }
1567 
1568 impl<I: VCodeInst> RegallocFunction for VCode<I> {
1569     fn num_insts(&self) -> usize {
1570         self.insts.len()
1571     }
1572 
1573     fn num_blocks(&self) -> usize {
1574         self.block_ranges.len()
1575     }
1576 
1577     fn entry_block(&self) -> BlockIndex {
1578         self.entry
1579     }
1580 
1581     fn block_insns(&self, block: BlockIndex) -> InstRange {
1582         let range = self.block_ranges.get(block.index());
1583         InstRange::new(InsnIndex::new(range.start), InsnIndex::new(range.end))
1584     }
1585 
1586     fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1587         let range = self.block_succ_range.get(block.index());
1588         &self.block_succs[range]
1589     }
1590 
1591     fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1592         let range = self.block_pred_range.get(block.index());
1593         &self.block_preds[range]
1594     }
1595 
1596     fn block_params(&self, block: BlockIndex) -> &[VReg] {
1597         // As a special case we don't return block params for the entry block, as all the arguments
1598         // will be defined by the `Inst::Args` instruction.
1599         if block == self.entry {
1600             return &[];
1601         }
1602 
1603         let range = self.block_params_range.get(block.index());
1604         &self.block_params[range]
1605     }
1606 
1607     fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1608         let succ_range = self.branch_block_arg_succ_range.get(block.index());
1609         debug_assert!(succ_idx < succ_range.len());
1610         let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx);
1611         &self.branch_block_args[branch_block_args]
1612     }
1613 
1614     fn is_ret(&self, insn: InsnIndex) -> bool {
1615         match self.insts[insn.index()].is_term() {
1616             // We treat blocks terminated by an unconditional trap like a return for regalloc.
1617             MachTerminator::None => self.insts[insn.index()].is_trap(),
1618             MachTerminator::Ret | MachTerminator::RetCall => true,
1619             MachTerminator::Branch => false,
1620         }
1621     }
1622 
1623     fn is_branch(&self, insn: InsnIndex) -> bool {
1624         match self.insts[insn.index()].is_term() {
1625             MachTerminator::Branch => true,
1626             _ => false,
1627         }
1628     }
1629 
1630     fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1631         let range = self.operand_ranges.get(insn.index());
1632         &self.operands[range]
1633     }
1634 
1635     fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1636         self.clobbers.get(&insn).cloned().unwrap_or_default()
1637     }
1638 
1639     fn num_vregs(&self) -> usize {
1640         self.vreg_types.len()
1641     }
1642 
1643     fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1644         &self.debug_value_labels
1645     }
1646 
1647     fn spillslot_size(&self, regclass: RegClass) -> usize {
1648         self.abi.get_spillslot_size(regclass) as usize
1649     }
1650 
1651     fn allow_multiple_vreg_defs(&self) -> bool {
1652         // At least the s390x backend requires this, because the
1653         // `Loop` pseudo-instruction aggregates all Operands so pinned
1654         // vregs (RealRegs) may occur more than once.
1655         true
1656     }
1657 }
1658 
1659 impl<I: VCodeInst> Debug for VRegAllocator<I> {
1660     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1661         writeln!(f, "VRegAllocator {{")?;
1662 
1663         let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1664         alias_keys.sort_unstable();
1665         for key in alias_keys {
1666             let dest = self.vreg_aliases.get(&key).unwrap();
1667             writeln!(f, "  {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1668         }
1669 
1670         for (vreg, fact) in self.facts.iter().enumerate() {
1671             if let Some(fact) = fact {
1672                 writeln!(f, "  v{vreg} ! {fact}")?;
1673             }
1674         }
1675 
1676         writeln!(f, "}}")
1677     }
1678 }
1679 
1680 impl<I: VCodeInst> fmt::Debug for VCode<I> {
1681     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1682         writeln!(f, "VCode {{")?;
1683         writeln!(f, "  Entry block: {}", self.entry.index())?;
1684 
1685         let mut state = Default::default();
1686 
1687         for block in 0..self.num_blocks() {
1688             let block = BlockIndex::new(block);
1689             writeln!(
1690                 f,
1691                 "Block {}({:?}):",
1692                 block.index(),
1693                 self.block_params(block)
1694             )?;
1695             if let Some(bb) = self.bindex_to_bb(block) {
1696                 writeln!(f, "    (original IR block: {bb})")?;
1697             }
1698             for (succ_idx, succ) in self.block_succs(block).iter().enumerate() {
1699                 writeln!(
1700                     f,
1701                     "    (successor: Block {}({:?}))",
1702                     succ.index(),
1703                     self.branch_blockparams(block, InsnIndex::new(0) /* dummy */, succ_idx)
1704                 )?;
1705             }
1706             for inst in self.block_ranges.get(block.index()) {
1707                 writeln!(
1708                     f,
1709                     "  Inst {}: {}",
1710                     inst,
1711                     self.insts[inst].pretty_print_inst(&mut state)
1712                 )?;
1713                 if !self.operands.is_empty() {
1714                     for operand in self.inst_operands(InsnIndex::new(inst)) {
1715                         if operand.kind() == OperandKind::Def {
1716                             if let Some(fact) = &self.facts[operand.vreg().vreg()] {
1717                                 writeln!(f, "    v{} ! {}", operand.vreg().vreg(), fact)?;
1718                             }
1719                         }
1720                     }
1721                 }
1722                 if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) {
1723                     writeln!(f, "    {user_stack_map:?}")?;
1724                 }
1725             }
1726         }
1727 
1728         writeln!(f, "}}")?;
1729         Ok(())
1730     }
1731 }
1732 
1733 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder.
1734 pub struct VRegAllocator<I> {
1735     /// VReg IR-level types.
1736     vreg_types: Vec<Type>,
1737 
1738     /// VReg aliases. When the final VCode is built we rewrite all
1739     /// uses of the keys in this table to their replacement values.
1740     ///
1741     /// We use these aliases to rename an instruction's expected
1742     /// result vregs to the returned vregs from lowering, which are
1743     /// usually freshly-allocated temps.
1744     vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
1745 
1746     /// A deferred error, to be bubbled up to the top level of the
1747     /// lowering algorithm. We take this approach because we cannot
1748     /// currently propagate a `Result` upward through ISLE code (the
1749     /// lowering rules) or some ABI code.
1750     deferred_error: Option<CodegenError>,
1751 
1752     /// Facts on VRegs, for proof-carrying code.
1753     facts: Vec<Option<Fact>>,
1754 
1755     /// The type of instruction that this allocator makes registers for.
1756     _inst: core::marker::PhantomData<I>,
1757 }
1758 
1759 impl<I: VCodeInst> VRegAllocator<I> {
1760     /// Make a new VRegAllocator.
1761     pub fn with_capacity(capacity: usize) -> Self {
1762         let capacity = first_user_vreg_index() + capacity;
1763         let mut vreg_types = Vec::with_capacity(capacity);
1764         vreg_types.resize(first_user_vreg_index(), types::INVALID);
1765         Self {
1766             vreg_types,
1767             vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()),
1768             deferred_error: None,
1769             facts: Vec::with_capacity(capacity),
1770             _inst: core::marker::PhantomData::default(),
1771         }
1772     }
1773 
1774     /// Allocate a fresh ValueRegs.
1775     pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {
1776         if self.deferred_error.is_some() {
1777             return Err(CodegenError::CodeTooLarge);
1778         }
1779         let v = self.vreg_types.len();
1780         let (regclasses, tys) = I::rc_for_type(ty)?;
1781         if v + regclasses.len() >= VReg::MAX {
1782             return Err(CodegenError::CodeTooLarge);
1783         }
1784 
1785         let regs: ValueRegs<Reg> = match regclasses {
1786             &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
1787             &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
1788             // We can extend this if/when we support 32-bit targets; e.g.,
1789             // an i128 on a 32-bit machine will need up to four machine regs
1790             // for a `Value`.
1791             _ => panic!("Value must reside in 1 or 2 registers"),
1792         };
1793         for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
1794             let vreg = reg.to_virtual_reg().unwrap();
1795             debug_assert_eq!(self.vreg_types.len(), vreg.index());
1796             self.vreg_types.push(reg_ty);
1797         }
1798 
1799         // Create empty facts for each allocated vreg.
1800         self.facts.resize(self.vreg_types.len(), None);
1801 
1802         Ok(regs)
1803     }
1804 
1805     /// Allocate a fresh ValueRegs, deferring any out-of-vregs
1806     /// errors. This is useful in places where we cannot bubble a
1807     /// `CodegenResult` upward easily, and which are known to be
1808     /// invoked from within the lowering loop that checks the deferred
1809     /// error status below.
1810     pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> {
1811         match self.alloc(ty) {
1812             Ok(x) => x,
1813             Err(e) => {
1814                 self.deferred_error = Some(e);
1815                 self.bogus_for_deferred_error(ty)
1816             }
1817         }
1818     }
1819 
1820     /// Take any deferred error that was accumulated by `alloc_with_deferred_error`.
1821     pub fn take_deferred_error(&mut self) -> Option<CodegenError> {
1822         self.deferred_error.take()
1823     }
1824 
1825     /// Produce an bogus VReg placeholder with the proper number of
1826     /// registers for the given type. This is meant to be used with
1827     /// deferred allocation errors (see `Lower::alloc_tmp()`).
1828     fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> {
1829         let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type");
1830         match regclasses {
1831             &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()),
1832             &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()),
1833             _ => panic!("Value must reside in 1 or 2 registers"),
1834         }
1835     }
1836 
1837     /// Rewrite any mention of `from` into `to`.
1838     pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
1839         let from = from.into();
1840         let resolved_to = self.resolve_vreg_alias(to.into());
1841         // Disallow cycles (see below).
1842         assert_ne!(resolved_to, from);
1843 
1844         // Maintain the invariant that PCC facts only exist on vregs
1845         // which aren't aliases. We want to preserve whatever was
1846         // stated about the vreg before its producer was lowered.
1847         if let Some(fact) = self.facts[from.vreg()].take() {
1848             self.set_fact(resolved_to, fact);
1849         }
1850 
1851         let old_alias = self.vreg_aliases.insert(from, resolved_to);
1852         debug_assert_eq!(old_alias, None);
1853     }
1854 
1855     fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg {
1856         // We prevent cycles from existing by resolving targets of
1857         // aliases eagerly before setting them. If the target resolves
1858         // to the origin of the alias, then a cycle would be created
1859         // and the alias is disallowed. Because of the structure of
1860         // SSA code (one instruction can refer to another's defs but
1861         // not vice-versa, except indirectly through
1862         // phis/blockparams), cycles should not occur as we use
1863         // aliases to redirect vregs to the temps that actually define
1864         // them.
1865         while let Some(to) = self.vreg_aliases.get(&vreg) {
1866             vreg = *to;
1867         }
1868         vreg
1869     }
1870 
1871     #[inline]
1872     fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) {
1873         debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg)));
1874     }
1875 
1876     /// Set the proof-carrying code fact on a given virtual register.
1877     ///
1878     /// Returns the old fact, if any (only one fact can be stored).
1879     fn set_fact(&mut self, vreg: regalloc2::VReg, fact: Fact) -> Option<Fact> {
1880         trace!("vreg {:?} has fact: {:?}", vreg, fact);
1881         debug_assert!(!self.vreg_aliases.contains_key(&vreg));
1882         self.facts[vreg.vreg()].replace(fact)
1883     }
1884 
1885     /// Set a fact only if one doesn't already exist.
1886     pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) {
1887         let vreg = self.resolve_vreg_alias(vreg.into());
1888         if self.facts[vreg.vreg()].is_none() {
1889             self.set_fact(vreg, fact);
1890         }
1891     }
1892 
1893     /// Allocate a fresh ValueRegs, with a given fact to apply if
1894     /// the value fits in one VReg.
1895     pub fn alloc_with_maybe_fact(
1896         &mut self,
1897         ty: Type,
1898         fact: Option<Fact>,
1899     ) -> CodegenResult<ValueRegs<Reg>> {
1900         let result = self.alloc(ty)?;
1901 
1902         // Ensure that we don't lose a fact on a value that splits
1903         // into multiple VRegs.
1904         assert!(result.len() == 1 || fact.is_none());
1905         if let Some(fact) = fact {
1906             self.set_fact(result.regs()[0].into(), fact);
1907         }
1908 
1909         Ok(result)
1910     }
1911 }
1912 
1913 /// This structure tracks the large constants used in VCode that will be emitted separately by the
1914 /// [MachBuffer].
1915 ///
1916 /// First, during the lowering phase, constants are inserted using
1917 /// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are
1918 /// used in this phase. Some deduplication is performed, when possible, as constant
1919 /// values are inserted.
1920 ///
1921 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1922 /// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1923 /// then writes the constant values to the buffer.
1924 #[derive(Default)]
1925 pub struct VCodeConstants {
1926     constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1927     pool_uses: HashMap<Constant, VCodeConstant>,
1928     well_known_uses: HashMap<*const [u8], VCodeConstant>,
1929     u64s: HashMap<[u8; 8], VCodeConstant>,
1930 }
1931 impl VCodeConstants {
1932     /// Initialize the structure with the expected number of constants.
1933     pub fn with_capacity(expected_num_constants: usize) -> Self {
1934         Self {
1935             constants: PrimaryMap::with_capacity(expected_num_constants),
1936             pool_uses: HashMap::with_capacity(expected_num_constants),
1937             well_known_uses: HashMap::new(),
1938             u64s: HashMap::new(),
1939         }
1940     }
1941 
1942     /// Insert a constant; using this method indicates that a constant value will be used and thus
1943     /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1944     /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1945     /// [VCodeConstantData::Generated].
1946     pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1947         match data {
1948             VCodeConstantData::Generated(_) => self.constants.push(data),
1949             VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1950                 None => {
1951                     let vcode_constant = self.constants.push(data);
1952                     self.pool_uses.insert(constant, vcode_constant);
1953                     vcode_constant
1954                 }
1955                 Some(&vcode_constant) => vcode_constant,
1956             },
1957             VCodeConstantData::WellKnown(data_ref) => {
1958                 match self.well_known_uses.entry(data_ref as *const [u8]) {
1959                     Entry::Vacant(v) => {
1960                         let vcode_constant = self.constants.push(data);
1961                         v.insert(vcode_constant);
1962                         vcode_constant
1963                     }
1964                     Entry::Occupied(o) => *o.get(),
1965                 }
1966             }
1967             VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1968                 Entry::Vacant(v) => {
1969                     let vcode_constant = self.constants.push(data);
1970                     v.insert(vcode_constant);
1971                     vcode_constant
1972                 }
1973                 Entry::Occupied(o) => *o.get(),
1974             },
1975         }
1976     }
1977 
1978     /// Return the number of constants inserted.
1979     pub fn len(&self) -> usize {
1980         self.constants.len()
1981     }
1982 
1983     /// Iterate over the `VCodeConstant` keys inserted in this structure.
1984     pub fn keys(&self) -> Keys<VCodeConstant> {
1985         self.constants.keys()
1986     }
1987 
1988     /// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this
1989     /// structure.
1990     pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1991         self.constants.iter()
1992     }
1993 
1994     /// Returns the data associated with the specified constant.
1995     pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData {
1996         &self.constants[c]
1997     }
1998 
1999     /// Checks if the given [VCodeConstantData] is registered as
2000     /// used by the pool.
2001     pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool {
2002         match constant {
2003             VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c),
2004             _ => false,
2005         }
2006     }
2007 }
2008 
2009 /// A use of a constant by one or more VCode instructions; see [VCodeConstants].
2010 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
2011 pub struct VCodeConstant(u32);
2012 entity_impl!(VCodeConstant);
2013 
2014 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
2015 /// these separately instead of as raw byte buffers allows us to avoid some duplication.
2016 pub enum VCodeConstantData {
2017     /// A constant already present in the Cranelift IR
2018     /// [ConstantPool](crate::ir::constant::ConstantPool).
2019     Pool(Constant, ConstantData),
2020     /// A reference to a well-known constant value that is statically encoded within the compiler.
2021     WellKnown(&'static [u8]),
2022     /// A constant value generated during lowering; the value may depend on the instruction context
2023     /// which makes it difficult to de-duplicate--if possible, use other variants.
2024     Generated(ConstantData),
2025     /// A constant of at most 64 bits. These are deduplicated as
2026     /// well. Stored as a fixed-size array of `u8` so that we do not
2027     /// encounter endianness problems when cross-compiling.
2028     U64([u8; 8]),
2029 }
2030 impl VCodeConstantData {
2031     /// Retrieve the constant data as a byte slice.
2032     pub fn as_slice(&self) -> &[u8] {
2033         match self {
2034             VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
2035             VCodeConstantData::WellKnown(d) => d,
2036             VCodeConstantData::U64(value) => &value[..],
2037         }
2038     }
2039 
2040     /// Calculate the alignment of the constant data.
2041     pub fn alignment(&self) -> u32 {
2042         if self.as_slice().len() <= 8 { 8 } else { 16 }
2043     }
2044 }
2045 
2046 #[cfg(test)]
2047 mod test {
2048     use super::*;
2049     use core::mem::size_of;
2050 
2051     #[test]
2052     fn size_of_constant_structs() {
2053         assert_eq!(size_of::<Constant>(), 4);
2054         assert_eq!(size_of::<VCodeConstant>(), 4);
2055         assert_eq!(size_of::<ConstantData>(), 3 * size_of::<usize>());
2056         assert_eq!(size_of::<VCodeConstantData>(), 4 * size_of::<usize>());
2057         assert_eq!(
2058             size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
2059             3 * size_of::<usize>()
2060         );
2061         // TODO The VCodeConstants structure's memory size could be further optimized.
2062         // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
2063         // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
2064     }
2065 }
2066