1 //! This implements the VCode container: a CFG of Insts that have been lowered.
2 //!
3 //! VCode is virtual-register code. An instruction in VCode is almost a machine
4 //! instruction; however, its register slots can refer to virtual registers in
5 //! addition to real machine registers.
6 //!
7 //! VCode is structured with traditional basic blocks, and
8 //! each block must be terminated by an unconditional branch (one target), a
9 //! conditional branch (two targets), or a return (no targets). Note that this
10 //! slightly differs from the machine code of most ISAs: in most ISAs, a
11 //! conditional branch has one target (and the not-taken case falls through).
12 //! However, we expect that machine backends will elide branches to the following
13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15 //! play with layout prior to final binary emission, as well, if we want.
16 //!
17 //! See the main module comment in `mod.rs` for more details on the VCode-based
18 //! backend pipeline.
19 
20 use crate::ir::pcc::*;
21 use crate::ir::{self, types, Constant, ConstantData, ValueLabel};
22 use crate::machinst::*;
23 use crate::ranges::Ranges;
24 use crate::timing;
25 use crate::trace;
26 use crate::CodegenError;
27 use crate::{LabelValueLoc, ValueLocRange};
28 use regalloc2::{
29     Edit, Function as RegallocFunction, InstOrEdit, InstRange, MachineEnv, Operand,
30     OperandConstraint, OperandKind, PRegSet, RegClass,
31 };
32 use rustc_hash::FxHashMap;
33 
34 use core::mem::take;
35 use cranelift_entity::{entity_impl, Keys};
36 use std::collections::hash_map::Entry;
37 use std::collections::HashMap;
38 use std::fmt;
39 
40 /// Index referring to an instruction in VCode.
41 pub type InsnIndex = regalloc2::Inst;
42 
43 /// Extension trait for `InsnIndex` to allow conversion to a
44 /// `BackwardsInsnIndex`.
45 trait ToBackwardsInsnIndex {
46     fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex;
47 }
48 
49 impl ToBackwardsInsnIndex for InsnIndex {
50     fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex {
51         BackwardsInsnIndex::new(num_insts - self.index() - 1)
52     }
53 }
54 
55 /// An index referring to an instruction in the VCode when it is backwards,
56 /// during VCode construction.
57 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
58 #[cfg_attr(
59     feature = "enable-serde",
60     derive(::serde::Serialize, ::serde::Deserialize)
61 )]
62 pub struct BackwardsInsnIndex(InsnIndex);
63 
64 impl BackwardsInsnIndex {
65     pub fn new(i: usize) -> Self {
66         BackwardsInsnIndex(InsnIndex::new(i))
67     }
68 }
69 
70 /// Index referring to a basic block in VCode.
71 pub type BlockIndex = regalloc2::Block;
72 
73 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
74 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
75 pub trait VCodeInst: MachInst + MachInstEmit {}
76 impl<I: MachInst + MachInstEmit> VCodeInst for I {}
77 
78 /// A function in "VCode" (virtualized-register code) form, after
79 /// lowering.  This is essentially a standard CFG of basic blocks,
80 /// where each basic block consists of lowered instructions produced
81 /// by the machine-specific backend.
82 ///
83 /// Note that the VCode is immutable once produced, and is not
84 /// modified by register allocation in particular. Rather, register
85 /// allocation on the `VCode` produces a separate `regalloc2::Output`
86 /// struct, and this can be passed to `emit`. `emit` in turn does not
87 /// modify the vcode, but produces an `EmitResult`, which contains the
88 /// machine code itself, and the associated disassembly and/or
89 /// metadata as requested.
90 pub struct VCode<I: VCodeInst> {
91     /// VReg IR-level types.
92     vreg_types: Vec<Type>,
93 
94     /// Lowered machine instructions in order corresponding to the original IR.
95     insts: Vec<I>,
96 
97     /// A map from backwards instruction index to the user stack map for that
98     /// instruction.
99     ///
100     /// This is a sparse side table that only has entries for instructions that
101     /// are safepoints, and only for a subset of those that have an associated
102     /// user stack map.
103     user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>,
104 
105     /// Operands: pre-regalloc references to virtual registers with
106     /// constraints, in one flattened array. This allows the regalloc
107     /// to efficiently access all operands without requiring expensive
108     /// matches or method invocations on insts.
109     operands: Vec<Operand>,
110 
111     /// Operand index ranges: for each instruction in `insts`, there
112     /// is a tuple here providing the range in `operands` for that
113     /// instruction's operands.
114     operand_ranges: Ranges,
115 
116     /// Clobbers: a sparse map from instruction indices to clobber masks.
117     clobbers: FxHashMap<InsnIndex, PRegSet>,
118 
119     /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
120     /// reasonable to keep one of these per instruction.)
121     srclocs: Vec<RelSourceLoc>,
122 
123     /// Entry block.
124     entry: BlockIndex,
125 
126     /// Block instruction indices.
127     block_ranges: Ranges,
128 
129     /// Block successors: index range in the `block_succs` list.
130     block_succ_range: Ranges,
131 
132     /// Block successor lists, concatenated into one vec. The
133     /// `block_succ_range` list of tuples above gives (start, end)
134     /// ranges within this list that correspond to each basic block's
135     /// successors.
136     block_succs: Vec<regalloc2::Block>,
137 
138     /// Block predecessors: index range in the `block_preds` list.
139     block_pred_range: Ranges,
140 
141     /// Block predecessor lists, concatenated into one vec. The
142     /// `block_pred_range` list of tuples above gives (start, end)
143     /// ranges within this list that correspond to each basic block's
144     /// predecessors.
145     block_preds: Vec<regalloc2::Block>,
146 
147     /// Block parameters: index range in `block_params` below.
148     block_params_range: Ranges,
149 
150     /// Block parameter lists, concatenated into one vec. The
151     /// `block_params_range` list of tuples above gives (start, end)
152     /// ranges within this list that correspond to each basic block's
153     /// blockparam vregs.
154     block_params: Vec<regalloc2::VReg>,
155 
156     /// Outgoing block arguments on branch instructions, concatenated
157     /// into one list.
158     ///
159     /// Note that this is conceptually a 3D array: we have a VReg list
160     /// per block, per successor. We flatten those three dimensions
161     /// into this 1D vec, then store index ranges in two levels of
162     /// indirection.
163     ///
164     /// Indexed by the indices in `branch_block_arg_succ_range`.
165     branch_block_args: Vec<regalloc2::VReg>,
166 
167     /// Array of sequences of (start, end) tuples in
168     /// `branch_block_args`, one for each successor; these sequences
169     /// for each block are concatenated.
170     ///
171     /// Indexed by the indices in `branch_block_arg_succ_range`.
172     branch_block_arg_range: Ranges,
173 
174     /// For a given block, indices in `branch_block_arg_range`
175     /// corresponding to all of its successors.
176     branch_block_arg_succ_range: Ranges,
177 
178     /// Block-order information.
179     block_order: BlockLoweringOrder,
180 
181     /// ABI object.
182     pub(crate) abi: Callee<I::ABIMachineSpec>,
183 
184     /// Constant information used during code emission. This should be
185     /// immutable across function compilations within the same module.
186     emit_info: I::Info,
187 
188     /// Constants.
189     pub(crate) constants: VCodeConstants,
190 
191     /// Value labels for debuginfo attached to vregs.
192     debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
193 
194     pub(crate) sigs: SigSet,
195 
196     /// Facts on VRegs, for proof-carrying code verification.
197     facts: Vec<Option<Fact>>,
198 }
199 
200 /// The result of `VCode::emit`. Contains all information computed
201 /// during emission: actual machine code, optionally a disassembly,
202 /// and optionally metadata about the code layout.
203 pub struct EmitResult {
204     /// The MachBuffer containing the machine code.
205     pub buffer: MachBufferFinalized<Stencil>,
206 
207     /// Offset of each basic block, recorded during emission. Computed
208     /// only if `debug_value_labels` is non-empty.
209     pub bb_offsets: Vec<CodeOffset>,
210 
211     /// Final basic-block edges, in terms of code offsets of
212     /// bb-starts. Computed only if `debug_value_labels` is non-empty.
213     pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
214 
215     /// Final length of function body.
216     pub func_body_len: CodeOffset,
217 
218     /// The pretty-printed disassembly, if any. This uses the same
219     /// pretty-printing for MachInsts as the pre-regalloc VCode Debug
220     /// implementation, but additionally includes the prologue and
221     /// epilogue(s), and makes use of the regalloc results.
222     pub disasm: Option<String>,
223 
224     /// Offsets of sized stackslots.
225     pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>,
226 
227     /// Offsets of dynamic stackslots.
228     pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>,
229 
230     /// Value-labels information (debug metadata).
231     pub value_labels_ranges: ValueLabelsRanges,
232 
233     /// Stack frame size.
234     pub frame_size: u32,
235 }
236 
237 /// A builder for a VCode function body.
238 ///
239 /// This builder has the ability to accept instructions in either
240 /// forward or reverse order, depending on the pass direction that
241 /// produces the VCode. The lowering from CLIF to VCode<MachInst>
242 /// ordinarily occurs in reverse order (in order to allow instructions
243 /// to be lowered only if used, and not merged) so a reversal will
244 /// occur at the end of lowering to ensure the VCode is in machine
245 /// order.
246 ///
247 /// If built in reverse, block and instruction indices used once the
248 /// VCode is built are relative to the final (reversed) order, not the
249 /// order of construction. Note that this means we do not know the
250 /// final block or instruction indices when building, so we do not
251 /// hand them out. (The user is assumed to know them when appending
252 /// terminator instructions with successor blocks.)
253 pub struct VCodeBuilder<I: VCodeInst> {
254     /// In-progress VCode.
255     pub(crate) vcode: VCode<I>,
256 
257     /// In what direction is the build occurring?
258     direction: VCodeBuildDirection,
259 
260     /// Debug-value label in-progress map, keyed by label. For each
261     /// label, we keep disjoint ranges mapping to vregs. We'll flatten
262     /// this into (vreg, range, label) tuples when done.
263     debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
264 }
265 
266 /// Direction in which a VCodeBuilder builds VCode.
267 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
268 pub enum VCodeBuildDirection {
269     // TODO: add `Forward` once we need it and can test it adequately.
270     /// Backward-build pass: we expect the producer to call `emit()`
271     /// with instructions in reverse program order within each block.
272     Backward,
273 }
274 
275 impl<I: VCodeInst> VCodeBuilder<I> {
276     /// Create a new VCodeBuilder.
277     pub fn new(
278         sigs: SigSet,
279         abi: Callee<I::ABIMachineSpec>,
280         emit_info: I::Info,
281         block_order: BlockLoweringOrder,
282         constants: VCodeConstants,
283         direction: VCodeBuildDirection,
284     ) -> Self {
285         let vcode = VCode::new(sigs, abi, emit_info, block_order, constants);
286 
287         VCodeBuilder {
288             vcode,
289             direction,
290             debug_info: FxHashMap::default(),
291         }
292     }
293 
294     pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> {
295         self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs)
296     }
297 
298     /// Access the ABI object.
299     pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {
300         &self.vcode.abi
301     }
302 
303     /// Access the ABI object.
304     pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {
305         &mut self.vcode.abi
306     }
307 
308     pub fn sigs(&self) -> &SigSet {
309         &self.vcode.sigs
310     }
311 
312     pub fn sigs_mut(&mut self) -> &mut SigSet {
313         &mut self.vcode.sigs
314     }
315 
316     /// Access to the BlockLoweringOrder object.
317     pub fn block_order(&self) -> &BlockLoweringOrder {
318         &self.vcode.block_order
319     }
320 
321     /// Set the current block as the entry block.
322     pub fn set_entry(&mut self, block: BlockIndex) {
323         self.vcode.entry = block;
324     }
325 
326     /// End the current basic block. Must be called after emitting vcode insts
327     /// for IR insts and prior to ending the function (building the VCode).
328     pub fn end_bb(&mut self) {
329         let end_idx = self.vcode.insts.len();
330         // Add the instruction index range to the list of blocks.
331         self.vcode.block_ranges.push_end(end_idx);
332         // End the successors list.
333         let succ_end = self.vcode.block_succs.len();
334         self.vcode.block_succ_range.push_end(succ_end);
335         // End the blockparams list.
336         let block_params_end = self.vcode.block_params.len();
337         self.vcode.block_params_range.push_end(block_params_end);
338         // End the branch blockparam args list.
339         let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
340         self.vcode
341             .branch_block_arg_succ_range
342             .push_end(branch_block_arg_succ_end);
343     }
344 
345     pub fn add_block_param(&mut self, param: VirtualReg) {
346         self.vcode.block_params.push(param.into());
347     }
348 
349     fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
350         self.vcode
351             .branch_block_args
352             .extend(args.iter().map(|&arg| VReg::from(arg)));
353         let end = self.vcode.branch_block_args.len();
354         self.vcode.branch_block_arg_range.push_end(end);
355     }
356 
357     /// Push an instruction for the current BB and current IR inst
358     /// within the BB.
359     pub fn push(&mut self, insn: I, loc: RelSourceLoc) {
360         assert!(!insn.is_low_level_branch()); // These are not meant to be in VCode.
361         self.vcode.insts.push(insn);
362         self.vcode.srclocs.push(loc);
363     }
364 
365     /// Add a successor block with branch args.
366     pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
367         self.vcode.block_succs.push(block);
368         self.add_branch_args_for_succ(args);
369     }
370 
371     /// Add a debug value label to a register.
372     pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
373         // We'll fix up labels in reverse(). Because we're generating
374         // code bottom-to-top, the liverange of the label goes *from*
375         // the last index at which was defined (or 0, which is the end
376         // of the eventual function) *to* just this instruction, and
377         // no further.
378         let inst = InsnIndex::new(self.vcode.insts.len());
379         let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
380         let last = labels
381             .last()
382             .map(|(_start, end, _vreg)| *end)
383             .unwrap_or(InsnIndex::new(0));
384         labels.push((last, inst, reg.into()));
385     }
386 
387     /// Access the constants.
388     pub fn constants(&mut self) -> &mut VCodeConstants {
389         &mut self.vcode.constants
390     }
391 
392     fn compute_preds_from_succs(&mut self) {
393         // Do a linear-time counting sort: first determine how many
394         // times each block appears as a successor.
395         let mut starts = vec![0u32; self.vcode.num_blocks()];
396         for succ in &self.vcode.block_succs {
397             starts[succ.index()] += 1;
398         }
399 
400         // Determine for each block the starting index where that
401         // block's predecessors should go. This is equivalent to the
402         // ranges we need to store in block_pred_range.
403         self.vcode.block_pred_range.reserve(starts.len());
404         let mut end = 0;
405         for count in starts.iter_mut() {
406             let start = end;
407             end += *count;
408             *count = start;
409             self.vcode.block_pred_range.push_end(end as usize);
410         }
411         let end = end as usize;
412         debug_assert_eq!(end, self.vcode.block_succs.len());
413 
414         // Walk over the successors again, this time grouped by
415         // predecessor, and push the predecessor at the current
416         // starting position of each of its successors. We build
417         // each group of predecessors in whatever order Ranges::iter
418         // returns them; regalloc2 doesn't care.
419         self.vcode.block_preds.resize(end, BlockIndex::invalid());
420         for (pred, range) in self.vcode.block_succ_range.iter() {
421             let pred = BlockIndex::new(pred);
422             for succ in &self.vcode.block_succs[range] {
423                 let pos = &mut starts[succ.index()];
424                 self.vcode.block_preds[*pos as usize] = pred;
425                 *pos += 1;
426             }
427         }
428         debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid()));
429     }
430 
431     /// Called once, when a build in Backward order is complete, to
432     /// perform the overall reversal (into final forward order) and
433     /// finalize metadata accordingly.
434     fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) {
435         let n_insts = self.vcode.insts.len();
436         if n_insts == 0 {
437             return;
438         }
439 
440         // Reverse the per-block and per-inst sequences.
441         self.vcode.block_ranges.reverse_index();
442         self.vcode.block_ranges.reverse_target(n_insts);
443         // block_params_range is indexed by block (and blocks were
444         // traversed in reverse) so we reverse it; but block-param
445         // sequences in the concatenated vec can remain in reverse
446         // order (it is effectively an arena of arbitrarily-placed
447         // referenced sequences).
448         self.vcode.block_params_range.reverse_index();
449         // Likewise, we reverse block_succ_range, but the block_succ
450         // concatenated array can remain as-is.
451         self.vcode.block_succ_range.reverse_index();
452         self.vcode.insts.reverse();
453         self.vcode.srclocs.reverse();
454         // Likewise, branch_block_arg_succ_range is indexed by block
455         // so must be reversed.
456         self.vcode.branch_block_arg_succ_range.reverse_index();
457 
458         // To translate an instruction index *endpoint* in reversed
459         // order to forward order, compute `n_insts - i`.
460         //
461         // Why not `n_insts - 1 - i`? That would be correct to
462         // translate an individual instruction index (for ten insts 0
463         // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
464         // 0). But for the usual inclusive-start, exclusive-end range
465         // idiom, inclusive starts become exclusive ends and
466         // vice-versa, so e.g. an (inclusive) start of 0 becomes an
467         // (exclusive) end of 10.
468         let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
469 
470         // Generate debug-value labels based on per-label maps.
471         for (label, tuples) in &self.debug_info {
472             for &(start, end, vreg) in tuples {
473                 let vreg = vregs.resolve_vreg_alias(vreg);
474                 let fwd_start = translate(end);
475                 let fwd_end = translate(start);
476                 self.vcode
477                     .debug_value_labels
478                     .push((vreg, fwd_start, fwd_end, label.as_u32()));
479             }
480         }
481 
482         // Now sort debug value labels by VReg, as required
483         // by regalloc2.
484         self.vcode
485             .debug_value_labels
486             .sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
487     }
488 
489     fn collect_operands(&mut self, vregs: &VRegAllocator<I>) {
490         let allocatable = PRegSet::from(self.vcode.machine_env());
491         for (i, insn) in self.vcode.insts.iter_mut().enumerate() {
492             // Push operands from the instruction onto the operand list.
493             //
494             // We rename through the vreg alias table as we collect
495             // the operands. This is better than a separate post-pass
496             // over operands, because it has more cache locality:
497             // operands only need to pass through L1 once. This is
498             // also better than renaming instructions'
499             // operands/registers while lowering, because here we only
500             // need to do the `match` over the instruction to visit
501             // its register fields (which is slow, branchy code) once.
502 
503             let mut op_collector =
504                 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {
505                     vregs.resolve_vreg_alias(vreg)
506                 });
507             insn.get_operands(&mut op_collector);
508             let (ops, clobbers) = op_collector.finish();
509             self.vcode.operand_ranges.push_end(ops);
510 
511             if clobbers != PRegSet::default() {
512                 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
513             }
514 
515             if let Some((dst, src)) = insn.is_move() {
516                 // We should never see non-virtual registers present in move
517                 // instructions.
518                 assert!(
519                     src.is_virtual(),
520                     "the real register {src:?} was used as the source of a move instruction"
521                 );
522                 assert!(
523                     dst.to_reg().is_virtual(),
524                     "the real register {:?} was used as the destination of a move instruction",
525                     dst.to_reg()
526                 );
527             }
528         }
529 
530         // Translate blockparam args via the vreg aliases table as well.
531         for arg in &mut self.vcode.branch_block_args {
532             let new_arg = vregs.resolve_vreg_alias(*arg);
533             trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
534             *arg = new_arg;
535         }
536     }
537 
538     /// Build the final VCode.
539     pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> {
540         self.vcode.vreg_types = take(&mut vregs.vreg_types);
541         self.vcode.facts = take(&mut vregs.facts);
542 
543         if self.direction == VCodeBuildDirection::Backward {
544             self.reverse_and_finalize(&vregs);
545         }
546         self.collect_operands(&vregs);
547 
548         self.compute_preds_from_succs();
549         self.vcode.debug_value_labels.sort_unstable();
550 
551         // At this point, nothing in the vcode should mention any
552         // VReg which has been aliased. All the appropriate rewriting
553         // should have happened above. Just to be sure, let's
554         // double-check each field which has vregs.
555         // Note: can't easily check vcode.insts, resolved in collect_operands.
556         // Operands are resolved in collect_operands.
557         vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg()));
558         // Currently block params are never aliased to another vreg.
559         vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied());
560         // Branch block args are resolved in collect_operands.
561         vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied());
562         // Debug value labels are resolved in reverse_and_finalize.
563         vregs.debug_assert_no_vreg_aliases(
564             self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg),
565         );
566         // Facts are resolved eagerly during set_vreg_alias.
567         vregs.debug_assert_no_vreg_aliases(
568             self.vcode
569                 .facts
570                 .iter()
571                 .zip(&vregs.vreg_types)
572                 .enumerate()
573                 .filter(|(_, (fact, _))| fact.is_some())
574                 .map(|(vreg, (_, &ty))| {
575                     let (regclasses, _) = I::rc_for_type(ty).unwrap();
576                     VReg::new(vreg, regclasses[0])
577                 }),
578         );
579 
580         self.vcode
581     }
582 
583     /// Add a user stack map for the associated instruction.
584     pub fn add_user_stack_map(
585         &mut self,
586         inst: BackwardsInsnIndex,
587         entries: &[ir::UserStackMapEntry],
588     ) {
589         let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets());
590         let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map);
591         debug_assert!(old_entry.is_none());
592     }
593 }
594 
595 const NO_INST_OFFSET: CodeOffset = u32::MAX;
596 
597 impl<I: VCodeInst> VCode<I> {
598     /// New empty VCode.
599     fn new(
600         sigs: SigSet,
601         abi: Callee<I::ABIMachineSpec>,
602         emit_info: I::Info,
603         block_order: BlockLoweringOrder,
604         constants: VCodeConstants,
605     ) -> Self {
606         let n_blocks = block_order.lowered_order().len();
607         VCode {
608             sigs,
609             vreg_types: vec![],
610             insts: Vec::with_capacity(10 * n_blocks),
611             user_stack_maps: FxHashMap::default(),
612             operands: Vec::with_capacity(30 * n_blocks),
613             operand_ranges: Ranges::with_capacity(10 * n_blocks),
614             clobbers: FxHashMap::default(),
615             srclocs: Vec::with_capacity(10 * n_blocks),
616             entry: BlockIndex::new(0),
617             block_ranges: Ranges::with_capacity(n_blocks),
618             block_succ_range: Ranges::with_capacity(n_blocks),
619             block_succs: Vec::with_capacity(n_blocks),
620             block_pred_range: Ranges::default(),
621             block_preds: Vec::new(),
622             block_params_range: Ranges::with_capacity(n_blocks),
623             block_params: Vec::with_capacity(5 * n_blocks),
624             branch_block_args: Vec::with_capacity(10 * n_blocks),
625             branch_block_arg_range: Ranges::with_capacity(2 * n_blocks),
626             branch_block_arg_succ_range: Ranges::with_capacity(n_blocks),
627             block_order,
628             abi,
629             emit_info,
630             constants,
631             debug_value_labels: vec![],
632             facts: vec![],
633         }
634     }
635 
636     /// Get the ABI-dependent MachineEnv for managing register allocation.
637     pub fn machine_env(&self) -> &MachineEnv {
638         self.abi.machine_env(&self.sigs)
639     }
640 
641     /// Get the number of blocks. Block indices will be in the range `0 ..
642     /// (self.num_blocks() - 1)`.
643     pub fn num_blocks(&self) -> usize {
644         self.block_ranges.len()
645     }
646 
647     /// The number of lowered instructions.
648     pub fn num_insts(&self) -> usize {
649         self.insts.len()
650     }
651 
652     fn compute_clobbers(&self, regalloc: &regalloc2::Output) -> Vec<Writable<RealReg>> {
653         let mut clobbered = PRegSet::default();
654 
655         // All moves are included in clobbers.
656         for (_, Edit::Move { to, .. }) in &regalloc.edits {
657             if let Some(preg) = to.as_reg() {
658                 clobbered.add(preg);
659             }
660         }
661 
662         for (i, range) in self.operand_ranges.iter() {
663             // Skip this instruction if not "included in clobbers" as
664             // per the MachInst. (Some backends use this to implement
665             // ABI specifics; e.g., excluding calls of the same ABI as
666             // the current function from clobbers, because by
667             // definition everything clobbered by the call can be
668             // clobbered by this function without saving as well.)
669             if !self.insts[i].is_included_in_clobbers() {
670                 continue;
671             }
672 
673             let operands = &self.operands[range.clone()];
674             let allocs = &regalloc.allocs[range];
675             for (operand, alloc) in operands.iter().zip(allocs.iter()) {
676                 if operand.kind() == OperandKind::Def {
677                     if let Some(preg) = alloc.as_reg() {
678                         clobbered.add(preg);
679                     }
680                 }
681             }
682 
683             // Also add explicitly-clobbered registers.
684             if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) {
685                 clobbered.union_from(inst_clobbered);
686             }
687         }
688 
689         clobbered
690             .into_iter()
691             .map(|preg| Writable::from_reg(RealReg::from(preg)))
692             .collect()
693     }
694 
695     /// Emit the instructions to a `MachBuffer`, containing fixed-up
696     /// code and external reloc/trap/etc. records ready for use. Takes
697     /// the regalloc results as well.
698     ///
699     /// Returns the machine code itself, and optionally metadata
700     /// and/or a disassembly, as an `EmitResult`. The `VCode` itself
701     /// is consumed by the emission process.
702     pub fn emit(
703         mut self,
704         regalloc: &regalloc2::Output,
705         want_disasm: bool,
706         flags: &settings::Flags,
707         ctrl_plane: &mut ControlPlane,
708     ) -> EmitResult
709     where
710         I: VCodeInst,
711     {
712         // To write into disasm string.
713         use core::fmt::Write;
714 
715         let _tt = timing::vcode_emit();
716         let mut buffer = MachBuffer::new();
717         let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
718 
719         // The first M MachLabels are reserved for block indices.
720         buffer.reserve_labels_for_blocks(self.num_blocks());
721 
722         // Register all allocated constants with the `MachBuffer` to ensure that
723         // any references to the constants during instructions can be handled
724         // correctly.
725         buffer.register_constants(&self.constants);
726 
727         // Construct the final order we emit code in: cold blocks at the end.
728         let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
729         let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
730         for block in 0..self.num_blocks() {
731             let block = BlockIndex::new(block);
732             if self.block_order.is_cold(block) {
733                 cold_blocks.push(block);
734             } else {
735                 final_order.push(block);
736             }
737         }
738         final_order.extend(cold_blocks.clone());
739 
740         // Compute/save info we need for the prologue: clobbers and
741         // number of spillslots.
742         //
743         // We clone `abi` here because we will mutate it as we
744         // generate the prologue and set other info, but we can't
745         // mutate `VCode`. The info it usually carries prior to
746         // setting clobbers is fairly minimal so this should be
747         // relatively cheap.
748         let clobbers = self.compute_clobbers(regalloc);
749         self.abi
750             .compute_frame_layout(&self.sigs, regalloc.num_spillslots, clobbers);
751 
752         // Emit blocks.
753         let mut cur_srcloc = None;
754         let mut last_offset = None;
755         let mut inst_offsets = vec![];
756         let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane));
757 
758         let mut disasm = String::new();
759 
760         if !self.debug_value_labels.is_empty() {
761             inst_offsets.resize(self.insts.len(), NO_INST_OFFSET);
762         }
763 
764         // Count edits per block ahead of time; this is needed for
765         // lookahead island emission. (We could derive it per-block
766         // with binary search in the edit list, but it's more
767         // efficient to do it in one pass here.)
768         let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];
769         let mut edit_idx = 0;
770         for block in 0..self.num_blocks() {
771             let end_inst = InsnIndex::new(self.block_ranges.get(block).end);
772             let start_edit_idx = edit_idx;
773             while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {
774                 edit_idx += 1;
775             }
776             let end_edit_idx = edit_idx;
777             ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);
778         }
779 
780         let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();
781         let mut bb_padding = match flags.bb_padding_log2_minus_one() {
782             0 => Vec::new(),
783             n => vec![0; 1 << (n - 1)],
784         };
785         let mut total_bb_padding = 0;
786 
787         for (block_order_idx, &block) in final_order.iter().enumerate() {
788             trace!("emitting block {:?}", block);
789 
790             // Call the new block hook for state
791             state.on_new_block();
792 
793             // Emit NOPs to align the block.
794             let new_offset = I::align_basic_block(buffer.cur_offset());
795             while new_offset > buffer.cur_offset() {
796                 // Pad with NOPs up to the aligned block offset.
797                 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
798                 nop.emit(&mut buffer, &self.emit_info, &mut Default::default());
799             }
800             assert_eq!(buffer.cur_offset(), new_offset);
801 
802             let do_emit = |inst: &I,
803                            disasm: &mut String,
804                            buffer: &mut MachBuffer<I>,
805                            state: &mut I::State| {
806                 if want_disasm && !inst.is_args() {
807                     let mut s = state.clone();
808                     writeln!(disasm, "  {}", inst.pretty_print_inst(&mut s)).unwrap();
809                 }
810                 inst.emit(buffer, &self.emit_info, state);
811             };
812 
813             // Is this the first block? Emit the prologue directly if so.
814             if block == self.entry {
815                 trace!(" -> entry block");
816                 buffer.start_srcloc(Default::default());
817                 for inst in &self.abi.gen_prologue() {
818                     do_emit(&inst, &mut disasm, &mut buffer, &mut state);
819                 }
820                 buffer.end_srcloc();
821             }
822 
823             // Now emit the regular block body.
824 
825             buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut());
826 
827             if want_disasm {
828                 writeln!(&mut disasm, "block{}:", block.index()).unwrap();
829             }
830 
831             if flags.machine_code_cfg_info() {
832                 // Track BB starts. If we have backed up due to MachBuffer
833                 // branch opts, note that the removed blocks were removed.
834                 let cur_offset = buffer.cur_offset();
835                 if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
836                     for i in (0..bb_starts.len()).rev() {
837                         if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
838                             break;
839                         }
840                         bb_starts[i] = None;
841                     }
842                 }
843                 bb_starts.push(Some(cur_offset));
844                 last_offset = Some(cur_offset);
845             }
846 
847             if let Some(block_start) = I::gen_block_start(
848                 self.block_order.is_indirect_branch_target(block),
849                 is_forward_edge_cfi_enabled,
850             ) {
851                 do_emit(&block_start, &mut disasm, &mut buffer, &mut state);
852             }
853 
854             for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
855                 match inst_or_edit {
856                     InstOrEdit::Inst(iix) => {
857                         if !self.debug_value_labels.is_empty() {
858                             // If we need to produce debug info,
859                             // record the offset of each instruction
860                             // so that we can translate value-label
861                             // ranges to machine-code offsets.
862 
863                             // Cold blocks violate monotonicity
864                             // assumptions elsewhere (that
865                             // instructions in inst-index order are in
866                             // order in machine code), so we omit
867                             // their offsets here. Value-label range
868                             // generation below will skip empty ranges
869                             // and ranges with to-offsets of zero.
870                             if !self.block_order.is_cold(block) {
871                                 inst_offsets[iix.index()] = buffer.cur_offset();
872                             }
873                         }
874 
875                         // Update the srcloc at this point in the buffer.
876                         let srcloc = self.srclocs[iix.index()];
877                         if cur_srcloc != Some(srcloc) {
878                             if cur_srcloc.is_some() {
879                                 buffer.end_srcloc();
880                             }
881                             buffer.start_srcloc(srcloc);
882                             cur_srcloc = Some(srcloc);
883                         }
884 
885                         // If this is a safepoint, compute a stack map
886                         // and pass it to the emit state.
887                         let stack_map_disasm = if self.insts[iix.index()].is_safepoint() {
888                             let (user_stack_map, user_stack_map_disasm) = {
889                                 // The `user_stack_maps` is keyed by reverse
890                                 // instruction index, so we must flip the
891                                 // index. We can't put this into a helper method
892                                 // due to borrowck issues because parts of
893                                 // `self` are borrowed mutably elsewhere in this
894                                 // function.
895                                 let index = iix.to_backwards_insn_index(self.num_insts());
896                                 let user_stack_map = self.user_stack_maps.remove(&index);
897                                 let user_stack_map_disasm =
898                                     user_stack_map.as_ref().map(|m| format!("  ; {m:?}"));
899                                 (user_stack_map, user_stack_map_disasm)
900                             };
901 
902                             state.pre_safepoint(user_stack_map);
903 
904                             user_stack_map_disasm
905                         } else {
906                             None
907                         };
908 
909                         // If the instruction we are about to emit is
910                         // a return, place an epilogue at this point
911                         // (and don't emit the return; the actual
912                         // epilogue will contain it).
913                         if self.insts[iix.index()].is_term() == MachTerminator::Ret {
914                             for inst in self.abi.gen_epilogue() {
915                                 do_emit(&inst, &mut disasm, &mut buffer, &mut state);
916                             }
917                         } else {
918                             // Update the operands for this inst using the
919                             // allocations from the regalloc result.
920                             let mut allocs = regalloc.inst_allocs(iix).iter();
921                             self.insts[iix.index()].get_operands(
922                                 &mut |reg: &mut Reg, constraint, _kind, _pos| {
923                                     let alloc = allocs
924                                         .next()
925                                         .expect("enough allocations for all operands")
926                                         .as_reg()
927                                         .expect("only register allocations, not stack allocations")
928                                         .into();
929 
930                                     if let OperandConstraint::FixedReg(rreg) = constraint {
931                                         debug_assert_eq!(Reg::from(rreg), alloc);
932                                     }
933                                     *reg = alloc;
934                                 },
935                             );
936                             debug_assert!(allocs.next().is_none());
937 
938                             // Emit the instruction!
939                             do_emit(
940                                 &self.insts[iix.index()],
941                                 &mut disasm,
942                                 &mut buffer,
943                                 &mut state,
944                             );
945                             if let Some(stack_map_disasm) = stack_map_disasm {
946                                 disasm.push_str(&stack_map_disasm);
947                                 disasm.push('\n');
948                             }
949                         }
950                     }
951 
952                     InstOrEdit::Edit(Edit::Move { from, to }) => {
953                         // Create a move/spill/reload instruction and
954                         // immediately emit it.
955                         match (from.as_reg(), to.as_reg()) {
956                             (Some(from), Some(to)) => {
957                                 // Reg-to-reg move.
958                                 let from_rreg = Reg::from(from);
959                                 let to_rreg = Writable::from_reg(Reg::from(to));
960                                 debug_assert_eq!(from.class(), to.class());
961                                 let ty = I::canonical_type_for_rc(from.class());
962                                 let mv = I::gen_move(to_rreg, from_rreg, ty);
963                                 do_emit(&mv, &mut disasm, &mut buffer, &mut state);
964                             }
965                             (Some(from), None) => {
966                                 // Spill from register to spillslot.
967                                 let to = to.as_stack().unwrap();
968                                 let from_rreg = RealReg::from(from);
969                                 let spill = self.abi.gen_spill(to, from_rreg);
970                                 do_emit(&spill, &mut disasm, &mut buffer, &mut state);
971                             }
972                             (None, Some(to)) => {
973                                 // Load from spillslot to register.
974                                 let from = from.as_stack().unwrap();
975                                 let to_rreg = Writable::from_reg(RealReg::from(to));
976                                 let reload = self.abi.gen_reload(to_rreg, from);
977                                 do_emit(&reload, &mut disasm, &mut buffer, &mut state);
978                             }
979                             (None, None) => {
980                                 panic!("regalloc2 should have eliminated stack-to-stack moves!");
981                             }
982                         }
983                     }
984                 }
985             }
986 
987             if cur_srcloc.is_some() {
988                 buffer.end_srcloc();
989                 cur_srcloc = None;
990             }
991 
992             // Do we need an island? Get the worst-case size of the next BB, add
993             // it to the optional padding behind the block, and pass this to the
994             // `MachBuffer` to determine if an island is necessary.
995             let worst_case_next_bb = if block_order_idx < final_order.len() - 1 {
996                 let next_block = final_order[block_order_idx + 1];
997                 let next_block_range = self.block_ranges.get(next_block.index());
998                 let next_block_size = next_block_range.len() as u32;
999                 let next_block_ra_insertions = ra_edits_per_block[next_block.index()];
1000                 I::worst_case_size() * (next_block_size + next_block_ra_insertions)
1001             } else {
1002                 0
1003             };
1004             let padding = if bb_padding.is_empty() {
1005                 0
1006             } else {
1007                 bb_padding.len() as u32 + I::LabelUse::ALIGN - 1
1008             };
1009             if buffer.island_needed(padding + worst_case_next_bb) {
1010                 buffer.emit_island(padding + worst_case_next_bb, ctrl_plane);
1011             }
1012 
1013             // Insert padding, if configured, to stress the `MachBuffer`'s
1014             // relocation and island calculations.
1015             //
1016             // Padding can get quite large during fuzzing though so place a
1017             // total cap on it where when a per-function threshold is exceeded
1018             // the padding is turned back down to zero. This avoids a small-ish
1019             // test case generating a GB+ memory footprint in Cranelift for
1020             // example.
1021             if !bb_padding.is_empty() {
1022                 buffer.put_data(&bb_padding);
1023                 buffer.align_to(I::LabelUse::ALIGN);
1024                 total_bb_padding += bb_padding.len();
1025                 if total_bb_padding > (150 << 20) {
1026                     bb_padding = Vec::new();
1027                 }
1028             }
1029         }
1030 
1031         debug_assert!(
1032             self.user_stack_maps.is_empty(),
1033             "any stack maps should have been consumed by instruction emission, still have: {:#?}",
1034             self.user_stack_maps,
1035         );
1036 
1037         // Do any optimizations on branches at tail of buffer, as if we had
1038         // bound one last label.
1039         buffer.optimize_branches(ctrl_plane);
1040 
1041         // emission state is not needed anymore, move control plane back out
1042         *ctrl_plane = state.take_ctrl_plane();
1043 
1044         let func_body_len = buffer.cur_offset();
1045 
1046         // Create `bb_edges` and final (filtered) `bb_starts`.
1047         let mut bb_edges = vec![];
1048         let mut bb_offsets = vec![];
1049         if flags.machine_code_cfg_info() {
1050             for block in 0..self.num_blocks() {
1051                 if bb_starts[block].is_none() {
1052                     // Block was deleted by MachBuffer; skip.
1053                     continue;
1054                 }
1055                 let from = bb_starts[block].unwrap();
1056 
1057                 bb_offsets.push(from);
1058                 // Resolve each `succ` label and add edges.
1059                 let succs = self.block_succs(BlockIndex::new(block));
1060                 for &succ in succs.iter() {
1061                     let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1062                     bb_edges.push((from, to));
1063                 }
1064             }
1065         }
1066 
1067         self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len);
1068         let value_labels_ranges =
1069             self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1070         let frame_size = self.abi.frame_size();
1071 
1072         EmitResult {
1073             buffer: buffer.finish(&self.constants, ctrl_plane),
1074             bb_offsets,
1075             bb_edges,
1076             func_body_len,
1077             disasm: if want_disasm { Some(disasm) } else { None },
1078             sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(),
1079             dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(),
1080             value_labels_ranges,
1081             frame_size,
1082         }
1083     }
1084 
1085     fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) {
1086         if self.debug_value_labels.is_empty() {
1087             return;
1088         }
1089 
1090         // During emission, branch removal can make offsets of instructions incorrect.
1091         // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj]
1092         // It will be recorded as (say):    [30]  [34]  [38]  [42]  [<would be 46>]
1093         // When the jumps get removed we are left with (in "inst_offsets"):
1094         // [insi][jmp0][jmp1][jmp2][insj][...]
1095         // [30]  [34]  [38]  [42]  [34]
1096         // Which violates the monotonicity invariant. This method sets offsets of these
1097         // removed instructions such as to make them appear zero-sized:
1098         // [insi][jmp0][jmp1][jmp2][insj][...]
1099         // [30]  [34]  [34]  [34]  [34]
1100         //
1101         let mut next_offset = func_body_len;
1102         for inst_index in (0..(inst_offsets.len() - 1)).rev() {
1103             let inst_offset = inst_offsets[inst_index];
1104 
1105             // Not all instructions get their offsets recorded.
1106             if inst_offset == NO_INST_OFFSET {
1107                 continue;
1108             }
1109 
1110             if inst_offset > next_offset {
1111                 trace!(
1112                     "Fixing code offset of the removed Inst {}: {} -> {}",
1113                     inst_index,
1114                     inst_offset,
1115                     next_offset
1116                 );
1117                 inst_offsets[inst_index] = next_offset;
1118                 continue;
1119             }
1120 
1121             next_offset = inst_offset;
1122         }
1123     }
1124 
1125     fn compute_value_labels_ranges(
1126         &self,
1127         regalloc: &regalloc2::Output,
1128         inst_offsets: &[CodeOffset],
1129         func_body_len: u32,
1130     ) -> ValueLabelsRanges {
1131         if self.debug_value_labels.is_empty() {
1132             return ValueLabelsRanges::default();
1133         }
1134 
1135         let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1136         for &(label, from, to, alloc) in &regalloc.debug_locations {
1137             let ranges = value_labels_ranges
1138                 .entry(ValueLabel::from_u32(label))
1139                 .or_insert_with(|| vec![]);
1140             let from_offset = inst_offsets[from.inst().index()];
1141             let to_offset = if to.inst().index() == inst_offsets.len() {
1142                 func_body_len
1143             } else {
1144                 inst_offsets[to.inst().index()]
1145             };
1146 
1147             // Empty ranges or unavailable offsets can happen
1148             // due to cold blocks and branch removal (see above).
1149             if from_offset == NO_INST_OFFSET
1150                 || to_offset == NO_INST_OFFSET
1151                 || from_offset == to_offset
1152             {
1153                 continue;
1154             }
1155 
1156             let loc = if let Some(preg) = alloc.as_reg() {
1157                 LabelValueLoc::Reg(Reg::from(preg))
1158             } else {
1159                 let slot = alloc.as_stack().unwrap();
1160                 let slot_offset = self.abi.get_spillslot_offset(slot);
1161                 let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset();
1162                 let caller_sp_to_cfa_offset =
1163                     crate::isa::unwind::systemv::caller_sp_to_cfa_offset();
1164                 // NOTE: this is a negative offset because it's relative to the caller's SP
1165                 let cfa_to_sp_offset =
1166                     -((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64);
1167                 LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset)
1168             };
1169 
1170             // ValueLocRanges are recorded by *instruction-end
1171             // offset*. `from_offset` is the *start* of the
1172             // instruction; that is the same as the end of another
1173             // instruction, so we only want to begin coverage once
1174             // we are past the previous instruction's end.
1175             let start = from_offset + 1;
1176 
1177             // Likewise, `end` is exclusive, but we want to
1178             // *include* the end of the last
1179             // instruction. `to_offset` is the start of the
1180             // `to`-instruction, which is the exclusive end, i.e.,
1181             // the first instruction not covered. That
1182             // instruction's start is the same as the end of the
1183             // last instruction that is included, so we go one
1184             // byte further to be sure to include it.
1185             let end = to_offset + 1;
1186 
1187             // Coalesce adjacent ranges that for the same location
1188             // to minimize output size here and for the consumers.
1189             if let Some(last_loc_range) = ranges.last_mut() {
1190                 if last_loc_range.loc == loc && last_loc_range.end == start {
1191                     trace!(
1192                         "Extending debug range for VL{} in {:?} to {}",
1193                         label,
1194                         loc,
1195                         end
1196                     );
1197                     last_loc_range.end = end;
1198                     continue;
1199                 }
1200             }
1201 
1202             trace!(
1203                 "Recording debug range for VL{} in {:?}: [Inst {}..Inst {}) [{}..{})",
1204                 label,
1205                 loc,
1206                 from.inst().index(),
1207                 to.inst().index(),
1208                 start,
1209                 end
1210             );
1211 
1212             ranges.push(ValueLocRange { loc, start, end });
1213         }
1214 
1215         value_labels_ranges
1216     }
1217 
1218     /// Get the IR block for a BlockIndex, if one exists.
1219     pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1220         self.block_order.lowered_order()[block.index()].orig_block()
1221     }
1222 
1223     /// Get the type of a VReg.
1224     pub fn vreg_type(&self, vreg: VReg) -> Type {
1225         self.vreg_types[vreg.vreg()]
1226     }
1227 
1228     /// Get the fact, if any, for a given VReg.
1229     pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> {
1230         self.facts[vreg.vreg()].as_ref()
1231     }
1232 
1233     /// Set the fact for a given VReg.
1234     pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) {
1235         trace!("set fact on {}: {:?}", vreg, fact);
1236         self.facts[vreg.vreg()] = Some(fact);
1237     }
1238 
1239     /// Does a given instruction define any facts?
1240     pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool {
1241         self.inst_operands(inst)
1242             .iter()
1243             .filter(|o| o.kind() == OperandKind::Def)
1244             .map(|o| o.vreg())
1245             .any(|vreg| self.facts[vreg.vreg()].is_some())
1246     }
1247 
1248     /// Get the user stack map associated with the given forward instruction index.
1249     pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> {
1250         let index = inst.to_backwards_insn_index(self.num_insts());
1251         self.user_stack_maps.get(&index)
1252     }
1253 }
1254 
1255 impl<I: VCodeInst> std::ops::Index<InsnIndex> for VCode<I> {
1256     type Output = I;
1257     fn index(&self, idx: InsnIndex) -> &Self::Output {
1258         &self.insts[idx.index()]
1259     }
1260 }
1261 
1262 impl<I: VCodeInst> RegallocFunction for VCode<I> {
1263     fn num_insts(&self) -> usize {
1264         self.insts.len()
1265     }
1266 
1267     fn num_blocks(&self) -> usize {
1268         self.block_ranges.len()
1269     }
1270 
1271     fn entry_block(&self) -> BlockIndex {
1272         self.entry
1273     }
1274 
1275     fn block_insns(&self, block: BlockIndex) -> InstRange {
1276         let range = self.block_ranges.get(block.index());
1277         InstRange::new(InsnIndex::new(range.start), InsnIndex::new(range.end))
1278     }
1279 
1280     fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1281         let range = self.block_succ_range.get(block.index());
1282         &self.block_succs[range]
1283     }
1284 
1285     fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1286         let range = self.block_pred_range.get(block.index());
1287         &self.block_preds[range]
1288     }
1289 
1290     fn block_params(&self, block: BlockIndex) -> &[VReg] {
1291         // As a special case we don't return block params for the entry block, as all the arguments
1292         // will be defined by the `Inst::Args` instruction.
1293         if block == self.entry {
1294             return &[];
1295         }
1296 
1297         let range = self.block_params_range.get(block.index());
1298         &self.block_params[range]
1299     }
1300 
1301     fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1302         let succ_range = self.branch_block_arg_succ_range.get(block.index());
1303         debug_assert!(succ_idx < succ_range.len());
1304         let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx);
1305         &self.branch_block_args[branch_block_args]
1306     }
1307 
1308     fn is_ret(&self, insn: InsnIndex) -> bool {
1309         match self.insts[insn.index()].is_term() {
1310             // We treat blocks terminated by an unconditional trap like a return for regalloc.
1311             MachTerminator::None => self.insts[insn.index()].is_trap(),
1312             MachTerminator::Ret | MachTerminator::RetCall => true,
1313             MachTerminator::Uncond | MachTerminator::Cond | MachTerminator::Indirect => false,
1314         }
1315     }
1316 
1317     fn is_branch(&self, insn: InsnIndex) -> bool {
1318         match self.insts[insn.index()].is_term() {
1319             MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true,
1320             _ => false,
1321         }
1322     }
1323 
1324     fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1325         let range = self.operand_ranges.get(insn.index());
1326         &self.operands[range]
1327     }
1328 
1329     fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1330         self.clobbers.get(&insn).cloned().unwrap_or_default()
1331     }
1332 
1333     fn num_vregs(&self) -> usize {
1334         self.vreg_types.len()
1335     }
1336 
1337     fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1338         &self.debug_value_labels
1339     }
1340 
1341     fn spillslot_size(&self, regclass: RegClass) -> usize {
1342         self.abi.get_spillslot_size(regclass) as usize
1343     }
1344 
1345     fn allow_multiple_vreg_defs(&self) -> bool {
1346         // At least the s390x backend requires this, because the
1347         // `Loop` pseudo-instruction aggregates all Operands so pinned
1348         // vregs (RealRegs) may occur more than once.
1349         true
1350     }
1351 }
1352 
1353 impl<I: VCodeInst> Debug for VRegAllocator<I> {
1354     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1355         writeln!(f, "VRegAllocator {{")?;
1356 
1357         let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1358         alias_keys.sort_unstable();
1359         for key in alias_keys {
1360             let dest = self.vreg_aliases.get(&key).unwrap();
1361             writeln!(f, "  {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1362         }
1363 
1364         for (vreg, fact) in self.facts.iter().enumerate() {
1365             if let Some(fact) = fact {
1366                 writeln!(f, "  v{vreg} ! {fact}")?;
1367             }
1368         }
1369 
1370         writeln!(f, "}}")
1371     }
1372 }
1373 
1374 impl<I: VCodeInst> fmt::Debug for VCode<I> {
1375     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1376         writeln!(f, "VCode {{")?;
1377         writeln!(f, "  Entry block: {}", self.entry.index())?;
1378 
1379         let mut state = Default::default();
1380 
1381         for block in 0..self.num_blocks() {
1382             let block = BlockIndex::new(block);
1383             writeln!(
1384                 f,
1385                 "Block {}({:?}):",
1386                 block.index(),
1387                 self.block_params(block)
1388             )?;
1389             if let Some(bb) = self.bindex_to_bb(block) {
1390                 writeln!(f, "    (original IR block: {bb})")?;
1391             }
1392             for (succ_idx, succ) in self.block_succs(block).iter().enumerate() {
1393                 writeln!(
1394                     f,
1395                     "    (successor: Block {}({:?}))",
1396                     succ.index(),
1397                     self.branch_blockparams(block, InsnIndex::new(0) /* dummy */, succ_idx)
1398                 )?;
1399             }
1400             for inst in self.block_ranges.get(block.index()) {
1401                 writeln!(
1402                     f,
1403                     "  Inst {}: {}",
1404                     inst,
1405                     self.insts[inst].pretty_print_inst(&mut state)
1406                 )?;
1407                 if !self.operands.is_empty() {
1408                     for operand in self.inst_operands(InsnIndex::new(inst)) {
1409                         if operand.kind() == OperandKind::Def {
1410                             if let Some(fact) = &self.facts[operand.vreg().vreg()] {
1411                                 writeln!(f, "    v{} ! {}", operand.vreg().vreg(), fact)?;
1412                             }
1413                         }
1414                     }
1415                 }
1416                 if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) {
1417                     writeln!(f, "    {user_stack_map:?}")?;
1418                 }
1419             }
1420         }
1421 
1422         writeln!(f, "}}")?;
1423         Ok(())
1424     }
1425 }
1426 
1427 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder.
1428 pub struct VRegAllocator<I> {
1429     /// VReg IR-level types.
1430     vreg_types: Vec<Type>,
1431 
1432     /// VReg aliases. When the final VCode is built we rewrite all
1433     /// uses of the keys in this table to their replacement values.
1434     ///
1435     /// We use these aliases to rename an instruction's expected
1436     /// result vregs to the returned vregs from lowering, which are
1437     /// usually freshly-allocated temps.
1438     vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
1439 
1440     /// A deferred error, to be bubbled up to the top level of the
1441     /// lowering algorithm. We take this approach because we cannot
1442     /// currently propagate a `Result` upward through ISLE code (the
1443     /// lowering rules) or some ABI code.
1444     deferred_error: Option<CodegenError>,
1445 
1446     /// Facts on VRegs, for proof-carrying code.
1447     facts: Vec<Option<Fact>>,
1448 
1449     /// The type of instruction that this allocator makes registers for.
1450     _inst: core::marker::PhantomData<I>,
1451 }
1452 
1453 impl<I: VCodeInst> VRegAllocator<I> {
1454     /// Make a new VRegAllocator.
1455     pub fn with_capacity(capacity: usize) -> Self {
1456         let capacity = first_user_vreg_index() + capacity;
1457         let mut vreg_types = Vec::with_capacity(capacity);
1458         vreg_types.resize(first_user_vreg_index(), types::INVALID);
1459         Self {
1460             vreg_types,
1461             vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()),
1462             deferred_error: None,
1463             facts: Vec::with_capacity(capacity),
1464             _inst: core::marker::PhantomData::default(),
1465         }
1466     }
1467 
1468     /// Allocate a fresh ValueRegs.
1469     pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {
1470         if self.deferred_error.is_some() {
1471             return Err(CodegenError::CodeTooLarge);
1472         }
1473         let v = self.vreg_types.len();
1474         let (regclasses, tys) = I::rc_for_type(ty)?;
1475         if v + regclasses.len() >= VReg::MAX {
1476             return Err(CodegenError::CodeTooLarge);
1477         }
1478 
1479         let regs: ValueRegs<Reg> = match regclasses {
1480             &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
1481             &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
1482             // We can extend this if/when we support 32-bit targets; e.g.,
1483             // an i128 on a 32-bit machine will need up to four machine regs
1484             // for a `Value`.
1485             _ => panic!("Value must reside in 1 or 2 registers"),
1486         };
1487         for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
1488             let vreg = reg.to_virtual_reg().unwrap();
1489             debug_assert_eq!(self.vreg_types.len(), vreg.index());
1490             self.vreg_types.push(reg_ty);
1491         }
1492 
1493         // Create empty facts for each allocated vreg.
1494         self.facts.resize(self.vreg_types.len(), None);
1495 
1496         Ok(regs)
1497     }
1498 
1499     /// Allocate a fresh ValueRegs, deferring any out-of-vregs
1500     /// errors. This is useful in places where we cannot bubble a
1501     /// `CodegenResult` upward easily, and which are known to be
1502     /// invoked from within the lowering loop that checks the deferred
1503     /// error status below.
1504     pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> {
1505         match self.alloc(ty) {
1506             Ok(x) => x,
1507             Err(e) => {
1508                 self.deferred_error = Some(e);
1509                 self.bogus_for_deferred_error(ty)
1510             }
1511         }
1512     }
1513 
1514     /// Take any deferred error that was accumulated by `alloc_with_deferred_error`.
1515     pub fn take_deferred_error(&mut self) -> Option<CodegenError> {
1516         self.deferred_error.take()
1517     }
1518 
1519     /// Produce an bogus VReg placeholder with the proper number of
1520     /// registers for the given type. This is meant to be used with
1521     /// deferred allocation errors (see `Lower::alloc_tmp()`).
1522     fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> {
1523         let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type");
1524         match regclasses {
1525             &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()),
1526             &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()),
1527             _ => panic!("Value must reside in 1 or 2 registers"),
1528         }
1529     }
1530 
1531     /// Rewrite any mention of `from` into `to`.
1532     pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
1533         let from = from.into();
1534         let resolved_to = self.resolve_vreg_alias(to.into());
1535         // Disallow cycles (see below).
1536         assert_ne!(resolved_to, from);
1537 
1538         // Maintain the invariant that PCC facts only exist on vregs
1539         // which aren't aliases. We want to preserve whatever was
1540         // stated about the vreg before its producer was lowered.
1541         if let Some(fact) = self.facts[from.vreg()].take() {
1542             self.set_fact(resolved_to, fact);
1543         }
1544 
1545         let old_alias = self.vreg_aliases.insert(from, resolved_to);
1546         debug_assert_eq!(old_alias, None);
1547     }
1548 
1549     fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg {
1550         // We prevent cycles from existing by resolving targets of
1551         // aliases eagerly before setting them. If the target resolves
1552         // to the origin of the alias, then a cycle would be created
1553         // and the alias is disallowed. Because of the structure of
1554         // SSA code (one instruction can refer to another's defs but
1555         // not vice-versa, except indirectly through
1556         // phis/blockparams), cycles should not occur as we use
1557         // aliases to redirect vregs to the temps that actually define
1558         // them.
1559         while let Some(to) = self.vreg_aliases.get(&vreg) {
1560             vreg = *to;
1561         }
1562         vreg
1563     }
1564 
1565     #[inline]
1566     fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) {
1567         debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg)));
1568     }
1569 
1570     /// Set the proof-carrying code fact on a given virtual register.
1571     ///
1572     /// Returns the old fact, if any (only one fact can be stored).
1573     fn set_fact(&mut self, vreg: regalloc2::VReg, fact: Fact) -> Option<Fact> {
1574         trace!("vreg {:?} has fact: {:?}", vreg, fact);
1575         debug_assert!(!self.vreg_aliases.contains_key(&vreg));
1576         self.facts[vreg.vreg()].replace(fact)
1577     }
1578 
1579     /// Set a fact only if one doesn't already exist.
1580     pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) {
1581         let vreg = self.resolve_vreg_alias(vreg.into());
1582         if self.facts[vreg.vreg()].is_none() {
1583             self.set_fact(vreg, fact);
1584         }
1585     }
1586 
1587     /// Allocate a fresh ValueRegs, with a given fact to apply if
1588     /// the value fits in one VReg.
1589     pub fn alloc_with_maybe_fact(
1590         &mut self,
1591         ty: Type,
1592         fact: Option<Fact>,
1593     ) -> CodegenResult<ValueRegs<Reg>> {
1594         let result = self.alloc(ty)?;
1595 
1596         // Ensure that we don't lose a fact on a value that splits
1597         // into multiple VRegs.
1598         assert!(result.len() == 1 || fact.is_none());
1599         if let Some(fact) = fact {
1600             self.set_fact(result.regs()[0].into(), fact);
1601         }
1602 
1603         Ok(result)
1604     }
1605 }
1606 
1607 /// This structure tracks the large constants used in VCode that will be emitted separately by the
1608 /// [MachBuffer].
1609 ///
1610 /// First, during the lowering phase, constants are inserted using
1611 /// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are
1612 /// used in this phase. Some deduplication is performed, when possible, as constant
1613 /// values are inserted.
1614 ///
1615 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1616 /// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1617 /// then writes the constant values to the buffer.
1618 #[derive(Default)]
1619 pub struct VCodeConstants {
1620     constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1621     pool_uses: HashMap<Constant, VCodeConstant>,
1622     well_known_uses: HashMap<*const [u8], VCodeConstant>,
1623     u64s: HashMap<[u8; 8], VCodeConstant>,
1624 }
1625 impl VCodeConstants {
1626     /// Initialize the structure with the expected number of constants.
1627     pub fn with_capacity(expected_num_constants: usize) -> Self {
1628         Self {
1629             constants: PrimaryMap::with_capacity(expected_num_constants),
1630             pool_uses: HashMap::with_capacity(expected_num_constants),
1631             well_known_uses: HashMap::new(),
1632             u64s: HashMap::new(),
1633         }
1634     }
1635 
1636     /// Insert a constant; using this method indicates that a constant value will be used and thus
1637     /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1638     /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1639     /// [VCodeConstantData::Generated].
1640     pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1641         match data {
1642             VCodeConstantData::Generated(_) => self.constants.push(data),
1643             VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1644                 None => {
1645                     let vcode_constant = self.constants.push(data);
1646                     self.pool_uses.insert(constant, vcode_constant);
1647                     vcode_constant
1648                 }
1649                 Some(&vcode_constant) => vcode_constant,
1650             },
1651             VCodeConstantData::WellKnown(data_ref) => {
1652                 match self.well_known_uses.entry(data_ref as *const [u8]) {
1653                     Entry::Vacant(v) => {
1654                         let vcode_constant = self.constants.push(data);
1655                         v.insert(vcode_constant);
1656                         vcode_constant
1657                     }
1658                     Entry::Occupied(o) => *o.get(),
1659                 }
1660             }
1661             VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1662                 Entry::Vacant(v) => {
1663                     let vcode_constant = self.constants.push(data);
1664                     v.insert(vcode_constant);
1665                     vcode_constant
1666                 }
1667                 Entry::Occupied(o) => *o.get(),
1668             },
1669         }
1670     }
1671 
1672     /// Return the number of constants inserted.
1673     pub fn len(&self) -> usize {
1674         self.constants.len()
1675     }
1676 
1677     /// Iterate over the `VCodeConstant` keys inserted in this structure.
1678     pub fn keys(&self) -> Keys<VCodeConstant> {
1679         self.constants.keys()
1680     }
1681 
1682     /// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this
1683     /// structure.
1684     pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1685         self.constants.iter()
1686     }
1687 
1688     /// Returns the data associated with the specified constant.
1689     pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData {
1690         &self.constants[c]
1691     }
1692 
1693     /// Checks if the given [VCodeConstantData] is registered as
1694     /// used by the pool.
1695     pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool {
1696         match constant {
1697             VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c),
1698             _ => false,
1699         }
1700     }
1701 }
1702 
1703 /// A use of a constant by one or more VCode instructions; see [VCodeConstants].
1704 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
1705 pub struct VCodeConstant(u32);
1706 entity_impl!(VCodeConstant);
1707 
1708 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
1709 /// these separately instead of as raw byte buffers allows us to avoid some duplication.
1710 pub enum VCodeConstantData {
1711     /// A constant already present in the Cranelift IR
1712     /// [ConstantPool](crate::ir::constant::ConstantPool).
1713     Pool(Constant, ConstantData),
1714     /// A reference to a well-known constant value that is statically encoded within the compiler.
1715     WellKnown(&'static [u8]),
1716     /// A constant value generated during lowering; the value may depend on the instruction context
1717     /// which makes it difficult to de-duplicate--if possible, use other variants.
1718     Generated(ConstantData),
1719     /// A constant of at most 64 bits. These are deduplicated as
1720     /// well. Stored as a fixed-size array of `u8` so that we do not
1721     /// encounter endianness problems when cross-compiling.
1722     U64([u8; 8]),
1723 }
1724 impl VCodeConstantData {
1725     /// Retrieve the constant data as a byte slice.
1726     pub fn as_slice(&self) -> &[u8] {
1727         match self {
1728             VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
1729             VCodeConstantData::WellKnown(d) => d,
1730             VCodeConstantData::U64(value) => &value[..],
1731         }
1732     }
1733 
1734     /// Calculate the alignment of the constant data.
1735     pub fn alignment(&self) -> u32 {
1736         if self.as_slice().len() <= 8 {
1737             8
1738         } else {
1739             16
1740         }
1741     }
1742 }
1743 
1744 #[cfg(test)]
1745 mod test {
1746     use super::*;
1747     use std::mem::size_of;
1748 
1749     #[test]
1750     fn size_of_constant_structs() {
1751         assert_eq!(size_of::<Constant>(), 4);
1752         assert_eq!(size_of::<VCodeConstant>(), 4);
1753         assert_eq!(size_of::<ConstantData>(), 3 * size_of::<usize>());
1754         assert_eq!(size_of::<VCodeConstantData>(), 4 * size_of::<usize>());
1755         assert_eq!(
1756             size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
1757             3 * size_of::<usize>()
1758         );
1759         // TODO The VCodeConstants structure's memory size could be further optimized.
1760         // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
1761         // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
1762     }
1763 }
1764