1 //! This implements the VCode container: a CFG of Insts that have been lowered.
2 //!
3 //! VCode is virtual-register code. An instruction in VCode is almost a machine
4 //! instruction; however, its register slots can refer to virtual registers in
5 //! addition to real machine registers.
6 //!
7 //! VCode is structured with traditional basic blocks, and
8 //! each block must be terminated by an unconditional branch (one target), a
9 //! conditional branch (two targets), or a return (no targets). Note that this
10 //! slightly differs from the machine code of most ISAs: in most ISAs, a
11 //! conditional branch has one target (and the not-taken case falls through).
12 //! However, we expect that machine backends will elide branches to the following
13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15 //! play with layout prior to final binary emission, as well, if we want.
16 //!
17 //! See the main module comment in `mod.rs` for more details on the VCode-based
18 //! backend pipeline.
19 
20 use crate::ir::pcc::*;
21 use crate::ir::{self, types, Constant, ConstantData, ValueLabel};
22 use crate::machinst::*;
23 use crate::ranges::Ranges;
24 use crate::timing;
25 use crate::trace;
26 use crate::CodegenError;
27 use crate::{LabelValueLoc, ValueLocRange};
28 use regalloc2::{
29     Edit, Function as RegallocFunction, InstOrEdit, InstRange, MachineEnv, Operand,
30     OperandConstraint, OperandKind, PRegSet, RegClass,
31 };
32 use rustc_hash::FxHashMap;
33 
34 use core::mem::take;
35 use cranelift_entity::{entity_impl, Keys};
36 use std::collections::hash_map::Entry;
37 use std::collections::HashMap;
38 use std::fmt;
39 
40 /// Index referring to an instruction in VCode.
41 pub type InsnIndex = regalloc2::Inst;
42 
43 /// Extension trait for `InsnIndex` to allow conversion to a
44 /// `BackwardsInsnIndex`.
45 trait ToBackwardsInsnIndex {
46     fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex;
47 }
48 
49 impl ToBackwardsInsnIndex for InsnIndex {
50     fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex {
51         BackwardsInsnIndex::new(num_insts - self.index() - 1)
52     }
53 }
54 
55 /// An index referring to an instruction in the VCode when it is backwards,
56 /// during VCode construction.
57 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
58 #[cfg_attr(
59     feature = "enable-serde",
60     derive(::serde::Serialize, ::serde::Deserialize)
61 )]
62 pub struct BackwardsInsnIndex(InsnIndex);
63 
64 impl BackwardsInsnIndex {
65     pub fn new(i: usize) -> Self {
66         BackwardsInsnIndex(InsnIndex::new(i))
67     }
68 }
69 
70 /// Index referring to a basic block in VCode.
71 pub type BlockIndex = regalloc2::Block;
72 
73 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
74 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
75 pub trait VCodeInst: MachInst + MachInstEmit {}
76 impl<I: MachInst + MachInstEmit> VCodeInst for I {}
77 
78 /// A function in "VCode" (virtualized-register code) form, after
79 /// lowering.  This is essentially a standard CFG of basic blocks,
80 /// where each basic block consists of lowered instructions produced
81 /// by the machine-specific backend.
82 ///
83 /// Note that the VCode is immutable once produced, and is not
84 /// modified by register allocation in particular. Rather, register
85 /// allocation on the `VCode` produces a separate `regalloc2::Output`
86 /// struct, and this can be passed to `emit`. `emit` in turn does not
87 /// modify the vcode, but produces an `EmitResult`, which contains the
88 /// machine code itself, and the associated disassembly and/or
89 /// metadata as requested.
90 pub struct VCode<I: VCodeInst> {
91     /// VReg IR-level types.
92     vreg_types: Vec<Type>,
93 
94     /// Lowered machine instructions in order corresponding to the original IR.
95     insts: Vec<I>,
96 
97     /// A map from backwards instruction index to the user stack map for that
98     /// instruction.
99     ///
100     /// This is a sparse side table that only has entries for instructions that
101     /// are safepoints, and only for a subset of those that have an associated
102     /// user stack map.
103     user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>,
104 
105     /// Operands: pre-regalloc references to virtual registers with
106     /// constraints, in one flattened array. This allows the regalloc
107     /// to efficiently access all operands without requiring expensive
108     /// matches or method invocations on insts.
109     operands: Vec<Operand>,
110 
111     /// Operand index ranges: for each instruction in `insts`, there
112     /// is a tuple here providing the range in `operands` for that
113     /// instruction's operands.
114     operand_ranges: Ranges,
115 
116     /// Clobbers: a sparse map from instruction indices to clobber masks.
117     clobbers: FxHashMap<InsnIndex, PRegSet>,
118 
119     /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
120     /// reasonable to keep one of these per instruction.)
121     srclocs: Vec<RelSourceLoc>,
122 
123     /// Entry block.
124     entry: BlockIndex,
125 
126     /// Block instruction indices.
127     block_ranges: Ranges,
128 
129     /// Block successors: index range in the `block_succs` list.
130     block_succ_range: Ranges,
131 
132     /// Block successor lists, concatenated into one vec. The
133     /// `block_succ_range` list of tuples above gives (start, end)
134     /// ranges within this list that correspond to each basic block's
135     /// successors.
136     block_succs: Vec<regalloc2::Block>,
137 
138     /// Block predecessors: index range in the `block_preds` list.
139     block_pred_range: Ranges,
140 
141     /// Block predecessor lists, concatenated into one vec. The
142     /// `block_pred_range` list of tuples above gives (start, end)
143     /// ranges within this list that correspond to each basic block's
144     /// predecessors.
145     block_preds: Vec<regalloc2::Block>,
146 
147     /// Block parameters: index range in `block_params` below.
148     block_params_range: Ranges,
149 
150     /// Block parameter lists, concatenated into one vec. The
151     /// `block_params_range` list of tuples above gives (start, end)
152     /// ranges within this list that correspond to each basic block's
153     /// blockparam vregs.
154     block_params: Vec<regalloc2::VReg>,
155 
156     /// Outgoing block arguments on branch instructions, concatenated
157     /// into one list.
158     ///
159     /// Note that this is conceptually a 3D array: we have a VReg list
160     /// per block, per successor. We flatten those three dimensions
161     /// into this 1D vec, then store index ranges in two levels of
162     /// indirection.
163     ///
164     /// Indexed by the indices in `branch_block_arg_succ_range`.
165     branch_block_args: Vec<regalloc2::VReg>,
166 
167     /// Array of sequences of (start, end) tuples in
168     /// `branch_block_args`, one for each successor; these sequences
169     /// for each block are concatenated.
170     ///
171     /// Indexed by the indices in `branch_block_arg_succ_range`.
172     branch_block_arg_range: Ranges,
173 
174     /// For a given block, indices in `branch_block_arg_range`
175     /// corresponding to all of its successors.
176     branch_block_arg_succ_range: Ranges,
177 
178     /// Block-order information.
179     block_order: BlockLoweringOrder,
180 
181     /// ABI object.
182     pub(crate) abi: Callee<I::ABIMachineSpec>,
183 
184     /// Constant information used during code emission. This should be
185     /// immutable across function compilations within the same module.
186     emit_info: I::Info,
187 
188     /// Constants.
189     pub(crate) constants: VCodeConstants,
190 
191     /// Value labels for debuginfo attached to vregs.
192     debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
193 
194     pub(crate) sigs: SigSet,
195 
196     /// Facts on VRegs, for proof-carrying code verification.
197     facts: Vec<Option<Fact>>,
198 
199     log2_min_function_alignment: u8,
200 }
201 
202 /// The result of `VCode::emit`. Contains all information computed
203 /// during emission: actual machine code, optionally a disassembly,
204 /// and optionally metadata about the code layout.
205 pub struct EmitResult {
206     /// The MachBuffer containing the machine code.
207     pub buffer: MachBufferFinalized<Stencil>,
208 
209     /// Offset of each basic block, recorded during emission. Computed
210     /// only if `debug_value_labels` is non-empty.
211     pub bb_offsets: Vec<CodeOffset>,
212 
213     /// Final basic-block edges, in terms of code offsets of
214     /// bb-starts. Computed only if `debug_value_labels` is non-empty.
215     pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
216 
217     /// Final length of function body.
218     pub func_body_len: CodeOffset,
219 
220     /// The pretty-printed disassembly, if any. This uses the same
221     /// pretty-printing for MachInsts as the pre-regalloc VCode Debug
222     /// implementation, but additionally includes the prologue and
223     /// epilogue(s), and makes use of the regalloc results.
224     pub disasm: Option<String>,
225 
226     /// Offsets of sized stackslots.
227     pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>,
228 
229     /// Offsets of dynamic stackslots.
230     pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>,
231 
232     /// Value-labels information (debug metadata).
233     pub value_labels_ranges: ValueLabelsRanges,
234 
235     /// Stack frame size.
236     pub frame_size: u32,
237 }
238 
239 /// A builder for a VCode function body.
240 ///
241 /// This builder has the ability to accept instructions in either
242 /// forward or reverse order, depending on the pass direction that
243 /// produces the VCode. The lowering from CLIF to VCode<MachInst>
244 /// ordinarily occurs in reverse order (in order to allow instructions
245 /// to be lowered only if used, and not merged) so a reversal will
246 /// occur at the end of lowering to ensure the VCode is in machine
247 /// order.
248 ///
249 /// If built in reverse, block and instruction indices used once the
250 /// VCode is built are relative to the final (reversed) order, not the
251 /// order of construction. Note that this means we do not know the
252 /// final block or instruction indices when building, so we do not
253 /// hand them out. (The user is assumed to know them when appending
254 /// terminator instructions with successor blocks.)
255 pub struct VCodeBuilder<I: VCodeInst> {
256     /// In-progress VCode.
257     pub(crate) vcode: VCode<I>,
258 
259     /// In what direction is the build occurring?
260     direction: VCodeBuildDirection,
261 
262     /// Debug-value label in-progress map, keyed by label. For each
263     /// label, we keep disjoint ranges mapping to vregs. We'll flatten
264     /// this into (vreg, range, label) tuples when done.
265     debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
266 }
267 
268 /// Direction in which a VCodeBuilder builds VCode.
269 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
270 pub enum VCodeBuildDirection {
271     // TODO: add `Forward` once we need it and can test it adequately.
272     /// Backward-build pass: we expect the producer to call `emit()`
273     /// with instructions in reverse program order within each block.
274     Backward,
275 }
276 
277 impl<I: VCodeInst> VCodeBuilder<I> {
278     /// Create a new VCodeBuilder.
279     pub fn new(
280         sigs: SigSet,
281         abi: Callee<I::ABIMachineSpec>,
282         emit_info: I::Info,
283         block_order: BlockLoweringOrder,
284         constants: VCodeConstants,
285         direction: VCodeBuildDirection,
286         log2_min_function_alignment: u8,
287     ) -> Self {
288         let vcode = VCode::new(
289             sigs,
290             abi,
291             emit_info,
292             block_order,
293             constants,
294             log2_min_function_alignment,
295         );
296 
297         VCodeBuilder {
298             vcode,
299             direction,
300             debug_info: FxHashMap::default(),
301         }
302     }
303 
304     pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> {
305         self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs)
306     }
307 
308     /// Access the ABI object.
309     pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {
310         &self.vcode.abi
311     }
312 
313     /// Access the ABI object.
314     pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {
315         &mut self.vcode.abi
316     }
317 
318     pub fn sigs(&self) -> &SigSet {
319         &self.vcode.sigs
320     }
321 
322     pub fn sigs_mut(&mut self) -> &mut SigSet {
323         &mut self.vcode.sigs
324     }
325 
326     /// Access to the BlockLoweringOrder object.
327     pub fn block_order(&self) -> &BlockLoweringOrder {
328         &self.vcode.block_order
329     }
330 
331     /// Set the current block as the entry block.
332     pub fn set_entry(&mut self, block: BlockIndex) {
333         self.vcode.entry = block;
334     }
335 
336     /// End the current basic block. Must be called after emitting vcode insts
337     /// for IR insts and prior to ending the function (building the VCode).
338     pub fn end_bb(&mut self) {
339         let end_idx = self.vcode.insts.len();
340         // Add the instruction index range to the list of blocks.
341         self.vcode.block_ranges.push_end(end_idx);
342         // End the successors list.
343         let succ_end = self.vcode.block_succs.len();
344         self.vcode.block_succ_range.push_end(succ_end);
345         // End the blockparams list.
346         let block_params_end = self.vcode.block_params.len();
347         self.vcode.block_params_range.push_end(block_params_end);
348         // End the branch blockparam args list.
349         let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
350         self.vcode
351             .branch_block_arg_succ_range
352             .push_end(branch_block_arg_succ_end);
353     }
354 
355     pub fn add_block_param(&mut self, param: VirtualReg) {
356         self.vcode.block_params.push(param.into());
357     }
358 
359     fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
360         self.vcode
361             .branch_block_args
362             .extend(args.iter().map(|&arg| VReg::from(arg)));
363         let end = self.vcode.branch_block_args.len();
364         self.vcode.branch_block_arg_range.push_end(end);
365     }
366 
367     /// Push an instruction for the current BB and current IR inst
368     /// within the BB.
369     pub fn push(&mut self, insn: I, loc: RelSourceLoc) {
370         assert!(!insn.is_low_level_branch()); // These are not meant to be in VCode.
371         self.vcode.insts.push(insn);
372         self.vcode.srclocs.push(loc);
373     }
374 
375     /// Add a successor block with branch args.
376     pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
377         self.vcode.block_succs.push(block);
378         self.add_branch_args_for_succ(args);
379     }
380 
381     /// Add a debug value label to a register.
382     pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
383         // We'll fix up labels in reverse(). Because we're generating
384         // code bottom-to-top, the liverange of the label goes *from*
385         // the last index at which was defined (or 0, which is the end
386         // of the eventual function) *to* just this instruction, and
387         // no further.
388         let inst = InsnIndex::new(self.vcode.insts.len());
389         let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
390         let last = labels
391             .last()
392             .map(|(_start, end, _vreg)| *end)
393             .unwrap_or(InsnIndex::new(0));
394         labels.push((last, inst, reg.into()));
395     }
396 
397     /// Access the constants.
398     pub fn constants(&mut self) -> &mut VCodeConstants {
399         &mut self.vcode.constants
400     }
401 
402     fn compute_preds_from_succs(&mut self) {
403         // Do a linear-time counting sort: first determine how many
404         // times each block appears as a successor.
405         let mut starts = vec![0u32; self.vcode.num_blocks()];
406         for succ in &self.vcode.block_succs {
407             starts[succ.index()] += 1;
408         }
409 
410         // Determine for each block the starting index where that
411         // block's predecessors should go. This is equivalent to the
412         // ranges we need to store in block_pred_range.
413         self.vcode.block_pred_range.reserve(starts.len());
414         let mut end = 0;
415         for count in starts.iter_mut() {
416             let start = end;
417             end += *count;
418             *count = start;
419             self.vcode.block_pred_range.push_end(end as usize);
420         }
421         let end = end as usize;
422         debug_assert_eq!(end, self.vcode.block_succs.len());
423 
424         // Walk over the successors again, this time grouped by
425         // predecessor, and push the predecessor at the current
426         // starting position of each of its successors. We build
427         // each group of predecessors in whatever order Ranges::iter
428         // returns them; regalloc2 doesn't care.
429         self.vcode.block_preds.resize(end, BlockIndex::invalid());
430         for (pred, range) in self.vcode.block_succ_range.iter() {
431             let pred = BlockIndex::new(pred);
432             for succ in &self.vcode.block_succs[range] {
433                 let pos = &mut starts[succ.index()];
434                 self.vcode.block_preds[*pos as usize] = pred;
435                 *pos += 1;
436             }
437         }
438         debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid()));
439     }
440 
441     /// Called once, when a build in Backward order is complete, to
442     /// perform the overall reversal (into final forward order) and
443     /// finalize metadata accordingly.
444     fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) {
445         let n_insts = self.vcode.insts.len();
446         if n_insts == 0 {
447             return;
448         }
449 
450         // Reverse the per-block and per-inst sequences.
451         self.vcode.block_ranges.reverse_index();
452         self.vcode.block_ranges.reverse_target(n_insts);
453         // block_params_range is indexed by block (and blocks were
454         // traversed in reverse) so we reverse it; but block-param
455         // sequences in the concatenated vec can remain in reverse
456         // order (it is effectively an arena of arbitrarily-placed
457         // referenced sequences).
458         self.vcode.block_params_range.reverse_index();
459         // Likewise, we reverse block_succ_range, but the block_succ
460         // concatenated array can remain as-is.
461         self.vcode.block_succ_range.reverse_index();
462         self.vcode.insts.reverse();
463         self.vcode.srclocs.reverse();
464         // Likewise, branch_block_arg_succ_range is indexed by block
465         // so must be reversed.
466         self.vcode.branch_block_arg_succ_range.reverse_index();
467 
468         // To translate an instruction index *endpoint* in reversed
469         // order to forward order, compute `n_insts - i`.
470         //
471         // Why not `n_insts - 1 - i`? That would be correct to
472         // translate an individual instruction index (for ten insts 0
473         // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
474         // 0). But for the usual inclusive-start, exclusive-end range
475         // idiom, inclusive starts become exclusive ends and
476         // vice-versa, so e.g. an (inclusive) start of 0 becomes an
477         // (exclusive) end of 10.
478         let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
479 
480         // Generate debug-value labels based on per-label maps.
481         for (label, tuples) in &self.debug_info {
482             for &(start, end, vreg) in tuples {
483                 let vreg = vregs.resolve_vreg_alias(vreg);
484                 let fwd_start = translate(end);
485                 let fwd_end = translate(start);
486                 self.vcode
487                     .debug_value_labels
488                     .push((vreg, fwd_start, fwd_end, label.as_u32()));
489             }
490         }
491 
492         // Now sort debug value labels by VReg, as required
493         // by regalloc2.
494         self.vcode
495             .debug_value_labels
496             .sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
497     }
498 
499     fn collect_operands(&mut self, vregs: &VRegAllocator<I>) {
500         let allocatable = PRegSet::from(self.vcode.machine_env());
501         for (i, insn) in self.vcode.insts.iter_mut().enumerate() {
502             // Push operands from the instruction onto the operand list.
503             //
504             // We rename through the vreg alias table as we collect
505             // the operands. This is better than a separate post-pass
506             // over operands, because it has more cache locality:
507             // operands only need to pass through L1 once. This is
508             // also better than renaming instructions'
509             // operands/registers while lowering, because here we only
510             // need to do the `match` over the instruction to visit
511             // its register fields (which is slow, branchy code) once.
512 
513             let mut op_collector =
514                 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {
515                     vregs.resolve_vreg_alias(vreg)
516                 });
517             insn.get_operands(&mut op_collector);
518             let (ops, clobbers) = op_collector.finish();
519             self.vcode.operand_ranges.push_end(ops);
520 
521             if clobbers != PRegSet::default() {
522                 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
523             }
524 
525             if let Some((dst, src)) = insn.is_move() {
526                 // We should never see non-virtual registers present in move
527                 // instructions.
528                 assert!(
529                     src.is_virtual(),
530                     "the real register {src:?} was used as the source of a move instruction"
531                 );
532                 assert!(
533                     dst.to_reg().is_virtual(),
534                     "the real register {:?} was used as the destination of a move instruction",
535                     dst.to_reg()
536                 );
537             }
538         }
539 
540         // Translate blockparam args via the vreg aliases table as well.
541         for arg in &mut self.vcode.branch_block_args {
542             let new_arg = vregs.resolve_vreg_alias(*arg);
543             trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
544             *arg = new_arg;
545         }
546     }
547 
548     /// Build the final VCode.
549     pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> {
550         self.vcode.vreg_types = take(&mut vregs.vreg_types);
551         self.vcode.facts = take(&mut vregs.facts);
552 
553         if self.direction == VCodeBuildDirection::Backward {
554             self.reverse_and_finalize(&vregs);
555         }
556         self.collect_operands(&vregs);
557 
558         self.compute_preds_from_succs();
559         self.vcode.debug_value_labels.sort_unstable();
560 
561         // At this point, nothing in the vcode should mention any
562         // VReg which has been aliased. All the appropriate rewriting
563         // should have happened above. Just to be sure, let's
564         // double-check each field which has vregs.
565         // Note: can't easily check vcode.insts, resolved in collect_operands.
566         // Operands are resolved in collect_operands.
567         vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg()));
568         // Currently block params are never aliased to another vreg.
569         vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied());
570         // Branch block args are resolved in collect_operands.
571         vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied());
572         // Debug value labels are resolved in reverse_and_finalize.
573         vregs.debug_assert_no_vreg_aliases(
574             self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg),
575         );
576         // Facts are resolved eagerly during set_vreg_alias.
577         vregs.debug_assert_no_vreg_aliases(
578             self.vcode
579                 .facts
580                 .iter()
581                 .zip(&vregs.vreg_types)
582                 .enumerate()
583                 .filter(|(_, (fact, _))| fact.is_some())
584                 .map(|(vreg, (_, &ty))| {
585                     let (regclasses, _) = I::rc_for_type(ty).unwrap();
586                     VReg::new(vreg, regclasses[0])
587                 }),
588         );
589 
590         self.vcode
591     }
592 
593     /// Add a user stack map for the associated instruction.
594     pub fn add_user_stack_map(
595         &mut self,
596         inst: BackwardsInsnIndex,
597         entries: &[ir::UserStackMapEntry],
598     ) {
599         let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets());
600         let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map);
601         debug_assert!(old_entry.is_none());
602     }
603 }
604 
605 const NO_INST_OFFSET: CodeOffset = u32::MAX;
606 
607 impl<I: VCodeInst> VCode<I> {
608     /// New empty VCode.
609     fn new(
610         sigs: SigSet,
611         abi: Callee<I::ABIMachineSpec>,
612         emit_info: I::Info,
613         block_order: BlockLoweringOrder,
614         constants: VCodeConstants,
615         log2_min_function_alignment: u8,
616     ) -> Self {
617         let n_blocks = block_order.lowered_order().len();
618         VCode {
619             sigs,
620             vreg_types: vec![],
621             insts: Vec::with_capacity(10 * n_blocks),
622             user_stack_maps: FxHashMap::default(),
623             operands: Vec::with_capacity(30 * n_blocks),
624             operand_ranges: Ranges::with_capacity(10 * n_blocks),
625             clobbers: FxHashMap::default(),
626             srclocs: Vec::with_capacity(10 * n_blocks),
627             entry: BlockIndex::new(0),
628             block_ranges: Ranges::with_capacity(n_blocks),
629             block_succ_range: Ranges::with_capacity(n_blocks),
630             block_succs: Vec::with_capacity(n_blocks),
631             block_pred_range: Ranges::default(),
632             block_preds: Vec::new(),
633             block_params_range: Ranges::with_capacity(n_blocks),
634             block_params: Vec::with_capacity(5 * n_blocks),
635             branch_block_args: Vec::with_capacity(10 * n_blocks),
636             branch_block_arg_range: Ranges::with_capacity(2 * n_blocks),
637             branch_block_arg_succ_range: Ranges::with_capacity(n_blocks),
638             block_order,
639             abi,
640             emit_info,
641             constants,
642             debug_value_labels: vec![],
643             facts: vec![],
644             log2_min_function_alignment,
645         }
646     }
647 
648     /// Get the ABI-dependent MachineEnv for managing register allocation.
649     pub fn machine_env(&self) -> &MachineEnv {
650         self.abi.machine_env(&self.sigs)
651     }
652 
653     /// Get the number of blocks. Block indices will be in the range `0 ..
654     /// (self.num_blocks() - 1)`.
655     pub fn num_blocks(&self) -> usize {
656         self.block_ranges.len()
657     }
658 
659     /// The number of lowered instructions.
660     pub fn num_insts(&self) -> usize {
661         self.insts.len()
662     }
663 
664     fn compute_clobbers(&self, regalloc: &regalloc2::Output) -> Vec<Writable<RealReg>> {
665         let mut clobbered = PRegSet::default();
666 
667         // All moves are included in clobbers.
668         for (_, Edit::Move { to, .. }) in &regalloc.edits {
669             if let Some(preg) = to.as_reg() {
670                 clobbered.add(preg);
671             }
672         }
673 
674         for (i, range) in self.operand_ranges.iter() {
675             // Skip this instruction if not "included in clobbers" as
676             // per the MachInst. (Some backends use this to implement
677             // ABI specifics; e.g., excluding calls of the same ABI as
678             // the current function from clobbers, because by
679             // definition everything clobbered by the call can be
680             // clobbered by this function without saving as well.)
681             if !self.insts[i].is_included_in_clobbers() {
682                 continue;
683             }
684 
685             let operands = &self.operands[range.clone()];
686             let allocs = &regalloc.allocs[range];
687             for (operand, alloc) in operands.iter().zip(allocs.iter()) {
688                 if operand.kind() == OperandKind::Def {
689                     if let Some(preg) = alloc.as_reg() {
690                         clobbered.add(preg);
691                     }
692                 }
693             }
694 
695             // Also add explicitly-clobbered registers.
696             if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) {
697                 clobbered.union_from(inst_clobbered);
698             }
699         }
700 
701         clobbered
702             .into_iter()
703             .map(|preg| Writable::from_reg(RealReg::from(preg)))
704             .collect()
705     }
706 
707     /// Emit the instructions to a `MachBuffer`, containing fixed-up
708     /// code and external reloc/trap/etc. records ready for use. Takes
709     /// the regalloc results as well.
710     ///
711     /// Returns the machine code itself, and optionally metadata
712     /// and/or a disassembly, as an `EmitResult`. The `VCode` itself
713     /// is consumed by the emission process.
714     pub fn emit(
715         mut self,
716         regalloc: &regalloc2::Output,
717         want_disasm: bool,
718         flags: &settings::Flags,
719         ctrl_plane: &mut ControlPlane,
720     ) -> EmitResult
721     where
722         I: VCodeInst,
723     {
724         // To write into disasm string.
725         use core::fmt::Write;
726 
727         let _tt = timing::vcode_emit();
728         let mut buffer = MachBuffer::new();
729         buffer.set_log2_min_function_alignment(self.log2_min_function_alignment);
730         let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
731 
732         // The first M MachLabels are reserved for block indices.
733         buffer.reserve_labels_for_blocks(self.num_blocks());
734 
735         // Register all allocated constants with the `MachBuffer` to ensure that
736         // any references to the constants during instructions can be handled
737         // correctly.
738         buffer.register_constants(&self.constants);
739 
740         // Construct the final order we emit code in: cold blocks at the end.
741         let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
742         let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
743         for block in 0..self.num_blocks() {
744             let block = BlockIndex::new(block);
745             if self.block_order.is_cold(block) {
746                 cold_blocks.push(block);
747             } else {
748                 final_order.push(block);
749             }
750         }
751         final_order.extend(cold_blocks.clone());
752 
753         // Compute/save info we need for the prologue: clobbers and
754         // number of spillslots.
755         //
756         // We clone `abi` here because we will mutate it as we
757         // generate the prologue and set other info, but we can't
758         // mutate `VCode`. The info it usually carries prior to
759         // setting clobbers is fairly minimal so this should be
760         // relatively cheap.
761         let clobbers = self.compute_clobbers(regalloc);
762         self.abi
763             .compute_frame_layout(&self.sigs, regalloc.num_spillslots, clobbers);
764 
765         // Emit blocks.
766         let mut cur_srcloc = None;
767         let mut last_offset = None;
768         let mut inst_offsets = vec![];
769         let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane));
770 
771         let mut disasm = String::new();
772 
773         if !self.debug_value_labels.is_empty() {
774             inst_offsets.resize(self.insts.len(), NO_INST_OFFSET);
775         }
776 
777         // Count edits per block ahead of time; this is needed for
778         // lookahead island emission. (We could derive it per-block
779         // with binary search in the edit list, but it's more
780         // efficient to do it in one pass here.)
781         let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];
782         let mut edit_idx = 0;
783         for block in 0..self.num_blocks() {
784             let end_inst = InsnIndex::new(self.block_ranges.get(block).end);
785             let start_edit_idx = edit_idx;
786             while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {
787                 edit_idx += 1;
788             }
789             let end_edit_idx = edit_idx;
790             ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);
791         }
792 
793         let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();
794         let mut bb_padding = match flags.bb_padding_log2_minus_one() {
795             0 => Vec::new(),
796             n => vec![0; 1 << (n - 1)],
797         };
798         let mut total_bb_padding = 0;
799 
800         for (block_order_idx, &block) in final_order.iter().enumerate() {
801             trace!("emitting block {:?}", block);
802 
803             // Call the new block hook for state
804             state.on_new_block();
805 
806             // Emit NOPs to align the block.
807             let new_offset = I::align_basic_block(buffer.cur_offset());
808             while new_offset > buffer.cur_offset() {
809                 // Pad with NOPs up to the aligned block offset.
810                 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
811                 nop.emit(&mut buffer, &self.emit_info, &mut Default::default());
812             }
813             assert_eq!(buffer.cur_offset(), new_offset);
814 
815             let do_emit = |inst: &I,
816                            disasm: &mut String,
817                            buffer: &mut MachBuffer<I>,
818                            state: &mut I::State| {
819                 if want_disasm && !inst.is_args() {
820                     let mut s = state.clone();
821                     writeln!(disasm, "  {}", inst.pretty_print_inst(&mut s)).unwrap();
822                 }
823                 inst.emit(buffer, &self.emit_info, state);
824             };
825 
826             // Is this the first block? Emit the prologue directly if so.
827             if block == self.entry {
828                 trace!(" -> entry block");
829                 buffer.start_srcloc(Default::default());
830                 for inst in &self.abi.gen_prologue() {
831                     do_emit(&inst, &mut disasm, &mut buffer, &mut state);
832                 }
833                 buffer.end_srcloc();
834             }
835 
836             // Now emit the regular block body.
837 
838             buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut());
839 
840             if want_disasm {
841                 writeln!(&mut disasm, "block{}:", block.index()).unwrap();
842             }
843 
844             if flags.machine_code_cfg_info() {
845                 // Track BB starts. If we have backed up due to MachBuffer
846                 // branch opts, note that the removed blocks were removed.
847                 let cur_offset = buffer.cur_offset();
848                 if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
849                     for i in (0..bb_starts.len()).rev() {
850                         if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
851                             break;
852                         }
853                         bb_starts[i] = None;
854                     }
855                 }
856                 bb_starts.push(Some(cur_offset));
857                 last_offset = Some(cur_offset);
858             }
859 
860             if let Some(block_start) = I::gen_block_start(
861                 self.block_order.is_indirect_branch_target(block),
862                 is_forward_edge_cfi_enabled,
863             ) {
864                 do_emit(&block_start, &mut disasm, &mut buffer, &mut state);
865             }
866 
867             for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
868                 match inst_or_edit {
869                     InstOrEdit::Inst(iix) => {
870                         if !self.debug_value_labels.is_empty() {
871                             // If we need to produce debug info,
872                             // record the offset of each instruction
873                             // so that we can translate value-label
874                             // ranges to machine-code offsets.
875 
876                             // Cold blocks violate monotonicity
877                             // assumptions elsewhere (that
878                             // instructions in inst-index order are in
879                             // order in machine code), so we omit
880                             // their offsets here. Value-label range
881                             // generation below will skip empty ranges
882                             // and ranges with to-offsets of zero.
883                             if !self.block_order.is_cold(block) {
884                                 inst_offsets[iix.index()] = buffer.cur_offset();
885                             }
886                         }
887 
888                         // Update the srcloc at this point in the buffer.
889                         let srcloc = self.srclocs[iix.index()];
890                         if cur_srcloc != Some(srcloc) {
891                             if cur_srcloc.is_some() {
892                                 buffer.end_srcloc();
893                             }
894                             buffer.start_srcloc(srcloc);
895                             cur_srcloc = Some(srcloc);
896                         }
897 
898                         // If this is a safepoint, compute a stack map
899                         // and pass it to the emit state.
900                         let stack_map_disasm = if self.insts[iix.index()].is_safepoint() {
901                             let (user_stack_map, user_stack_map_disasm) = {
902                                 // The `user_stack_maps` is keyed by reverse
903                                 // instruction index, so we must flip the
904                                 // index. We can't put this into a helper method
905                                 // due to borrowck issues because parts of
906                                 // `self` are borrowed mutably elsewhere in this
907                                 // function.
908                                 let index = iix.to_backwards_insn_index(self.num_insts());
909                                 let user_stack_map = self.user_stack_maps.remove(&index);
910                                 let user_stack_map_disasm =
911                                     user_stack_map.as_ref().map(|m| format!("  ; {m:?}"));
912                                 (user_stack_map, user_stack_map_disasm)
913                             };
914 
915                             state.pre_safepoint(user_stack_map);
916 
917                             user_stack_map_disasm
918                         } else {
919                             None
920                         };
921 
922                         // If the instruction we are about to emit is
923                         // a return, place an epilogue at this point
924                         // (and don't emit the return; the actual
925                         // epilogue will contain it).
926                         if self.insts[iix.index()].is_term() == MachTerminator::Ret {
927                             for inst in self.abi.gen_epilogue() {
928                                 do_emit(&inst, &mut disasm, &mut buffer, &mut state);
929                             }
930                         } else {
931                             // Update the operands for this inst using the
932                             // allocations from the regalloc result.
933                             let mut allocs = regalloc.inst_allocs(iix).iter();
934                             self.insts[iix.index()].get_operands(
935                                 &mut |reg: &mut Reg, constraint, _kind, _pos| {
936                                     let alloc = allocs
937                                         .next()
938                                         .expect("enough allocations for all operands")
939                                         .as_reg()
940                                         .expect("only register allocations, not stack allocations")
941                                         .into();
942 
943                                     if let OperandConstraint::FixedReg(rreg) = constraint {
944                                         debug_assert_eq!(Reg::from(rreg), alloc);
945                                     }
946                                     *reg = alloc;
947                                 },
948                             );
949                             debug_assert!(allocs.next().is_none());
950 
951                             // Emit the instruction!
952                             do_emit(
953                                 &self.insts[iix.index()],
954                                 &mut disasm,
955                                 &mut buffer,
956                                 &mut state,
957                             );
958                             if let Some(stack_map_disasm) = stack_map_disasm {
959                                 disasm.push_str(&stack_map_disasm);
960                                 disasm.push('\n');
961                             }
962                         }
963                     }
964 
965                     InstOrEdit::Edit(Edit::Move { from, to }) => {
966                         // Create a move/spill/reload instruction and
967                         // immediately emit it.
968                         match (from.as_reg(), to.as_reg()) {
969                             (Some(from), Some(to)) => {
970                                 // Reg-to-reg move.
971                                 let from_rreg = Reg::from(from);
972                                 let to_rreg = Writable::from_reg(Reg::from(to));
973                                 debug_assert_eq!(from.class(), to.class());
974                                 let ty = I::canonical_type_for_rc(from.class());
975                                 let mv = I::gen_move(to_rreg, from_rreg, ty);
976                                 do_emit(&mv, &mut disasm, &mut buffer, &mut state);
977                             }
978                             (Some(from), None) => {
979                                 // Spill from register to spillslot.
980                                 let to = to.as_stack().unwrap();
981                                 let from_rreg = RealReg::from(from);
982                                 let spill = self.abi.gen_spill(to, from_rreg);
983                                 do_emit(&spill, &mut disasm, &mut buffer, &mut state);
984                             }
985                             (None, Some(to)) => {
986                                 // Load from spillslot to register.
987                                 let from = from.as_stack().unwrap();
988                                 let to_rreg = Writable::from_reg(RealReg::from(to));
989                                 let reload = self.abi.gen_reload(to_rreg, from);
990                                 do_emit(&reload, &mut disasm, &mut buffer, &mut state);
991                             }
992                             (None, None) => {
993                                 panic!("regalloc2 should have eliminated stack-to-stack moves!");
994                             }
995                         }
996                     }
997                 }
998             }
999 
1000             if cur_srcloc.is_some() {
1001                 buffer.end_srcloc();
1002                 cur_srcloc = None;
1003             }
1004 
1005             // Do we need an island? Get the worst-case size of the next BB, add
1006             // it to the optional padding behind the block, and pass this to the
1007             // `MachBuffer` to determine if an island is necessary.
1008             let worst_case_next_bb = if block_order_idx < final_order.len() - 1 {
1009                 let next_block = final_order[block_order_idx + 1];
1010                 let next_block_range = self.block_ranges.get(next_block.index());
1011                 let next_block_size = next_block_range.len() as u32;
1012                 let next_block_ra_insertions = ra_edits_per_block[next_block.index()];
1013                 I::worst_case_size() * (next_block_size + next_block_ra_insertions)
1014             } else {
1015                 0
1016             };
1017             let padding = if bb_padding.is_empty() {
1018                 0
1019             } else {
1020                 bb_padding.len() as u32 + I::LabelUse::ALIGN - 1
1021             };
1022             if buffer.island_needed(padding + worst_case_next_bb) {
1023                 buffer.emit_island(padding + worst_case_next_bb, ctrl_plane);
1024             }
1025 
1026             // Insert padding, if configured, to stress the `MachBuffer`'s
1027             // relocation and island calculations.
1028             //
1029             // Padding can get quite large during fuzzing though so place a
1030             // total cap on it where when a per-function threshold is exceeded
1031             // the padding is turned back down to zero. This avoids a small-ish
1032             // test case generating a GB+ memory footprint in Cranelift for
1033             // example.
1034             if !bb_padding.is_empty() {
1035                 buffer.put_data(&bb_padding);
1036                 buffer.align_to(I::LabelUse::ALIGN);
1037                 total_bb_padding += bb_padding.len();
1038                 if total_bb_padding > (150 << 20) {
1039                     bb_padding = Vec::new();
1040                 }
1041             }
1042         }
1043 
1044         debug_assert!(
1045             self.user_stack_maps.is_empty(),
1046             "any stack maps should have been consumed by instruction emission, still have: {:#?}",
1047             self.user_stack_maps,
1048         );
1049 
1050         // Do any optimizations on branches at tail of buffer, as if we had
1051         // bound one last label.
1052         buffer.optimize_branches(ctrl_plane);
1053 
1054         // emission state is not needed anymore, move control plane back out
1055         *ctrl_plane = state.take_ctrl_plane();
1056 
1057         let func_body_len = buffer.cur_offset();
1058 
1059         // Create `bb_edges` and final (filtered) `bb_starts`.
1060         let mut bb_edges = vec![];
1061         let mut bb_offsets = vec![];
1062         if flags.machine_code_cfg_info() {
1063             for block in 0..self.num_blocks() {
1064                 if bb_starts[block].is_none() {
1065                     // Block was deleted by MachBuffer; skip.
1066                     continue;
1067                 }
1068                 let from = bb_starts[block].unwrap();
1069 
1070                 bb_offsets.push(from);
1071                 // Resolve each `succ` label and add edges.
1072                 let succs = self.block_succs(BlockIndex::new(block));
1073                 for &succ in succs.iter() {
1074                     let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1075                     bb_edges.push((from, to));
1076                 }
1077             }
1078         }
1079 
1080         self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len);
1081         let value_labels_ranges =
1082             self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1083         let frame_size = self.abi.frame_size();
1084 
1085         EmitResult {
1086             buffer: buffer.finish(&self.constants, ctrl_plane),
1087             bb_offsets,
1088             bb_edges,
1089             func_body_len,
1090             disasm: if want_disasm { Some(disasm) } else { None },
1091             sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(),
1092             dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(),
1093             value_labels_ranges,
1094             frame_size,
1095         }
1096     }
1097 
1098     fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) {
1099         if self.debug_value_labels.is_empty() {
1100             return;
1101         }
1102 
1103         // During emission, branch removal can make offsets of instructions incorrect.
1104         // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj]
1105         // It will be recorded as (say):    [30]  [34]  [38]  [42]  [<would be 46>]
1106         // When the jumps get removed we are left with (in "inst_offsets"):
1107         // [insi][jmp0][jmp1][jmp2][insj][...]
1108         // [30]  [34]  [38]  [42]  [34]
1109         // Which violates the monotonicity invariant. This method sets offsets of these
1110         // removed instructions such as to make them appear zero-sized:
1111         // [insi][jmp0][jmp1][jmp2][insj][...]
1112         // [30]  [34]  [34]  [34]  [34]
1113         //
1114         let mut next_offset = func_body_len;
1115         for inst_index in (0..(inst_offsets.len() - 1)).rev() {
1116             let inst_offset = inst_offsets[inst_index];
1117 
1118             // Not all instructions get their offsets recorded.
1119             if inst_offset == NO_INST_OFFSET {
1120                 continue;
1121             }
1122 
1123             if inst_offset > next_offset {
1124                 trace!(
1125                     "Fixing code offset of the removed Inst {}: {} -> {}",
1126                     inst_index,
1127                     inst_offset,
1128                     next_offset
1129                 );
1130                 inst_offsets[inst_index] = next_offset;
1131                 continue;
1132             }
1133 
1134             next_offset = inst_offset;
1135         }
1136     }
1137 
1138     fn compute_value_labels_ranges(
1139         &self,
1140         regalloc: &regalloc2::Output,
1141         inst_offsets: &[CodeOffset],
1142         func_body_len: u32,
1143     ) -> ValueLabelsRanges {
1144         if self.debug_value_labels.is_empty() {
1145             return ValueLabelsRanges::default();
1146         }
1147 
1148         let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1149         for &(label, from, to, alloc) in &regalloc.debug_locations {
1150             let ranges = value_labels_ranges
1151                 .entry(ValueLabel::from_u32(label))
1152                 .or_insert_with(|| vec![]);
1153             let from_offset = inst_offsets[from.inst().index()];
1154             let to_offset = if to.inst().index() == inst_offsets.len() {
1155                 func_body_len
1156             } else {
1157                 inst_offsets[to.inst().index()]
1158             };
1159 
1160             // Empty ranges or unavailable offsets can happen
1161             // due to cold blocks and branch removal (see above).
1162             if from_offset == NO_INST_OFFSET
1163                 || to_offset == NO_INST_OFFSET
1164                 || from_offset == to_offset
1165             {
1166                 continue;
1167             }
1168 
1169             let loc = if let Some(preg) = alloc.as_reg() {
1170                 LabelValueLoc::Reg(Reg::from(preg))
1171             } else {
1172                 let slot = alloc.as_stack().unwrap();
1173                 let slot_offset = self.abi.get_spillslot_offset(slot);
1174                 let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset();
1175                 let caller_sp_to_cfa_offset =
1176                     crate::isa::unwind::systemv::caller_sp_to_cfa_offset();
1177                 // NOTE: this is a negative offset because it's relative to the caller's SP
1178                 let cfa_to_sp_offset =
1179                     -((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64);
1180                 LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset)
1181             };
1182 
1183             // ValueLocRanges are recorded by *instruction-end
1184             // offset*. `from_offset` is the *start* of the
1185             // instruction; that is the same as the end of another
1186             // instruction, so we only want to begin coverage once
1187             // we are past the previous instruction's end.
1188             let start = from_offset + 1;
1189 
1190             // Likewise, `end` is exclusive, but we want to
1191             // *include* the end of the last
1192             // instruction. `to_offset` is the start of the
1193             // `to`-instruction, which is the exclusive end, i.e.,
1194             // the first instruction not covered. That
1195             // instruction's start is the same as the end of the
1196             // last instruction that is included, so we go one
1197             // byte further to be sure to include it.
1198             let end = to_offset + 1;
1199 
1200             // Coalesce adjacent ranges that for the same location
1201             // to minimize output size here and for the consumers.
1202             if let Some(last_loc_range) = ranges.last_mut() {
1203                 if last_loc_range.loc == loc && last_loc_range.end == start {
1204                     trace!(
1205                         "Extending debug range for VL{} in {:?} to {}",
1206                         label,
1207                         loc,
1208                         end
1209                     );
1210                     last_loc_range.end = end;
1211                     continue;
1212                 }
1213             }
1214 
1215             trace!(
1216                 "Recording debug range for VL{} in {:?}: [Inst {}..Inst {}) [{}..{})",
1217                 label,
1218                 loc,
1219                 from.inst().index(),
1220                 to.inst().index(),
1221                 start,
1222                 end
1223             );
1224 
1225             ranges.push(ValueLocRange { loc, start, end });
1226         }
1227 
1228         value_labels_ranges
1229     }
1230 
1231     /// Get the IR block for a BlockIndex, if one exists.
1232     pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1233         self.block_order.lowered_order()[block.index()].orig_block()
1234     }
1235 
1236     /// Get the type of a VReg.
1237     pub fn vreg_type(&self, vreg: VReg) -> Type {
1238         self.vreg_types[vreg.vreg()]
1239     }
1240 
1241     /// Get the fact, if any, for a given VReg.
1242     pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> {
1243         self.facts[vreg.vreg()].as_ref()
1244     }
1245 
1246     /// Set the fact for a given VReg.
1247     pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) {
1248         trace!("set fact on {}: {:?}", vreg, fact);
1249         self.facts[vreg.vreg()] = Some(fact);
1250     }
1251 
1252     /// Does a given instruction define any facts?
1253     pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool {
1254         self.inst_operands(inst)
1255             .iter()
1256             .filter(|o| o.kind() == OperandKind::Def)
1257             .map(|o| o.vreg())
1258             .any(|vreg| self.facts[vreg.vreg()].is_some())
1259     }
1260 
1261     /// Get the user stack map associated with the given forward instruction index.
1262     pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> {
1263         let index = inst.to_backwards_insn_index(self.num_insts());
1264         self.user_stack_maps.get(&index)
1265     }
1266 }
1267 
1268 impl<I: VCodeInst> std::ops::Index<InsnIndex> for VCode<I> {
1269     type Output = I;
1270     fn index(&self, idx: InsnIndex) -> &Self::Output {
1271         &self.insts[idx.index()]
1272     }
1273 }
1274 
1275 impl<I: VCodeInst> RegallocFunction for VCode<I> {
1276     fn num_insts(&self) -> usize {
1277         self.insts.len()
1278     }
1279 
1280     fn num_blocks(&self) -> usize {
1281         self.block_ranges.len()
1282     }
1283 
1284     fn entry_block(&self) -> BlockIndex {
1285         self.entry
1286     }
1287 
1288     fn block_insns(&self, block: BlockIndex) -> InstRange {
1289         let range = self.block_ranges.get(block.index());
1290         InstRange::new(InsnIndex::new(range.start), InsnIndex::new(range.end))
1291     }
1292 
1293     fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1294         let range = self.block_succ_range.get(block.index());
1295         &self.block_succs[range]
1296     }
1297 
1298     fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1299         let range = self.block_pred_range.get(block.index());
1300         &self.block_preds[range]
1301     }
1302 
1303     fn block_params(&self, block: BlockIndex) -> &[VReg] {
1304         // As a special case we don't return block params for the entry block, as all the arguments
1305         // will be defined by the `Inst::Args` instruction.
1306         if block == self.entry {
1307             return &[];
1308         }
1309 
1310         let range = self.block_params_range.get(block.index());
1311         &self.block_params[range]
1312     }
1313 
1314     fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1315         let succ_range = self.branch_block_arg_succ_range.get(block.index());
1316         debug_assert!(succ_idx < succ_range.len());
1317         let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx);
1318         &self.branch_block_args[branch_block_args]
1319     }
1320 
1321     fn is_ret(&self, insn: InsnIndex) -> bool {
1322         match self.insts[insn.index()].is_term() {
1323             // We treat blocks terminated by an unconditional trap like a return for regalloc.
1324             MachTerminator::None => self.insts[insn.index()].is_trap(),
1325             MachTerminator::Ret | MachTerminator::RetCall => true,
1326             MachTerminator::Uncond | MachTerminator::Cond | MachTerminator::Indirect => false,
1327         }
1328     }
1329 
1330     fn is_branch(&self, insn: InsnIndex) -> bool {
1331         match self.insts[insn.index()].is_term() {
1332             MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true,
1333             _ => false,
1334         }
1335     }
1336 
1337     fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1338         let range = self.operand_ranges.get(insn.index());
1339         &self.operands[range]
1340     }
1341 
1342     fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1343         self.clobbers.get(&insn).cloned().unwrap_or_default()
1344     }
1345 
1346     fn num_vregs(&self) -> usize {
1347         self.vreg_types.len()
1348     }
1349 
1350     fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1351         &self.debug_value_labels
1352     }
1353 
1354     fn spillslot_size(&self, regclass: RegClass) -> usize {
1355         self.abi.get_spillslot_size(regclass) as usize
1356     }
1357 
1358     fn allow_multiple_vreg_defs(&self) -> bool {
1359         // At least the s390x backend requires this, because the
1360         // `Loop` pseudo-instruction aggregates all Operands so pinned
1361         // vregs (RealRegs) may occur more than once.
1362         true
1363     }
1364 }
1365 
1366 impl<I: VCodeInst> Debug for VRegAllocator<I> {
1367     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1368         writeln!(f, "VRegAllocator {{")?;
1369 
1370         let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1371         alias_keys.sort_unstable();
1372         for key in alias_keys {
1373             let dest = self.vreg_aliases.get(&key).unwrap();
1374             writeln!(f, "  {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1375         }
1376 
1377         for (vreg, fact) in self.facts.iter().enumerate() {
1378             if let Some(fact) = fact {
1379                 writeln!(f, "  v{vreg} ! {fact}")?;
1380             }
1381         }
1382 
1383         writeln!(f, "}}")
1384     }
1385 }
1386 
1387 impl<I: VCodeInst> fmt::Debug for VCode<I> {
1388     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1389         writeln!(f, "VCode {{")?;
1390         writeln!(f, "  Entry block: {}", self.entry.index())?;
1391 
1392         let mut state = Default::default();
1393 
1394         for block in 0..self.num_blocks() {
1395             let block = BlockIndex::new(block);
1396             writeln!(
1397                 f,
1398                 "Block {}({:?}):",
1399                 block.index(),
1400                 self.block_params(block)
1401             )?;
1402             if let Some(bb) = self.bindex_to_bb(block) {
1403                 writeln!(f, "    (original IR block: {bb})")?;
1404             }
1405             for (succ_idx, succ) in self.block_succs(block).iter().enumerate() {
1406                 writeln!(
1407                     f,
1408                     "    (successor: Block {}({:?}))",
1409                     succ.index(),
1410                     self.branch_blockparams(block, InsnIndex::new(0) /* dummy */, succ_idx)
1411                 )?;
1412             }
1413             for inst in self.block_ranges.get(block.index()) {
1414                 writeln!(
1415                     f,
1416                     "  Inst {}: {}",
1417                     inst,
1418                     self.insts[inst].pretty_print_inst(&mut state)
1419                 )?;
1420                 if !self.operands.is_empty() {
1421                     for operand in self.inst_operands(InsnIndex::new(inst)) {
1422                         if operand.kind() == OperandKind::Def {
1423                             if let Some(fact) = &self.facts[operand.vreg().vreg()] {
1424                                 writeln!(f, "    v{} ! {}", operand.vreg().vreg(), fact)?;
1425                             }
1426                         }
1427                     }
1428                 }
1429                 if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) {
1430                     writeln!(f, "    {user_stack_map:?}")?;
1431                 }
1432             }
1433         }
1434 
1435         writeln!(f, "}}")?;
1436         Ok(())
1437     }
1438 }
1439 
1440 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder.
1441 pub struct VRegAllocator<I> {
1442     /// VReg IR-level types.
1443     vreg_types: Vec<Type>,
1444 
1445     /// VReg aliases. When the final VCode is built we rewrite all
1446     /// uses of the keys in this table to their replacement values.
1447     ///
1448     /// We use these aliases to rename an instruction's expected
1449     /// result vregs to the returned vregs from lowering, which are
1450     /// usually freshly-allocated temps.
1451     vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
1452 
1453     /// A deferred error, to be bubbled up to the top level of the
1454     /// lowering algorithm. We take this approach because we cannot
1455     /// currently propagate a `Result` upward through ISLE code (the
1456     /// lowering rules) or some ABI code.
1457     deferred_error: Option<CodegenError>,
1458 
1459     /// Facts on VRegs, for proof-carrying code.
1460     facts: Vec<Option<Fact>>,
1461 
1462     /// The type of instruction that this allocator makes registers for.
1463     _inst: core::marker::PhantomData<I>,
1464 }
1465 
1466 impl<I: VCodeInst> VRegAllocator<I> {
1467     /// Make a new VRegAllocator.
1468     pub fn with_capacity(capacity: usize) -> Self {
1469         let capacity = first_user_vreg_index() + capacity;
1470         let mut vreg_types = Vec::with_capacity(capacity);
1471         vreg_types.resize(first_user_vreg_index(), types::INVALID);
1472         Self {
1473             vreg_types,
1474             vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()),
1475             deferred_error: None,
1476             facts: Vec::with_capacity(capacity),
1477             _inst: core::marker::PhantomData::default(),
1478         }
1479     }
1480 
1481     /// Allocate a fresh ValueRegs.
1482     pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {
1483         if self.deferred_error.is_some() {
1484             return Err(CodegenError::CodeTooLarge);
1485         }
1486         let v = self.vreg_types.len();
1487         let (regclasses, tys) = I::rc_for_type(ty)?;
1488         if v + regclasses.len() >= VReg::MAX {
1489             return Err(CodegenError::CodeTooLarge);
1490         }
1491 
1492         let regs: ValueRegs<Reg> = match regclasses {
1493             &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
1494             &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
1495             // We can extend this if/when we support 32-bit targets; e.g.,
1496             // an i128 on a 32-bit machine will need up to four machine regs
1497             // for a `Value`.
1498             _ => panic!("Value must reside in 1 or 2 registers"),
1499         };
1500         for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
1501             let vreg = reg.to_virtual_reg().unwrap();
1502             debug_assert_eq!(self.vreg_types.len(), vreg.index());
1503             self.vreg_types.push(reg_ty);
1504         }
1505 
1506         // Create empty facts for each allocated vreg.
1507         self.facts.resize(self.vreg_types.len(), None);
1508 
1509         Ok(regs)
1510     }
1511 
1512     /// Allocate a fresh ValueRegs, deferring any out-of-vregs
1513     /// errors. This is useful in places where we cannot bubble a
1514     /// `CodegenResult` upward easily, and which are known to be
1515     /// invoked from within the lowering loop that checks the deferred
1516     /// error status below.
1517     pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> {
1518         match self.alloc(ty) {
1519             Ok(x) => x,
1520             Err(e) => {
1521                 self.deferred_error = Some(e);
1522                 self.bogus_for_deferred_error(ty)
1523             }
1524         }
1525     }
1526 
1527     /// Take any deferred error that was accumulated by `alloc_with_deferred_error`.
1528     pub fn take_deferred_error(&mut self) -> Option<CodegenError> {
1529         self.deferred_error.take()
1530     }
1531 
1532     /// Produce an bogus VReg placeholder with the proper number of
1533     /// registers for the given type. This is meant to be used with
1534     /// deferred allocation errors (see `Lower::alloc_tmp()`).
1535     fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> {
1536         let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type");
1537         match regclasses {
1538             &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()),
1539             &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()),
1540             _ => panic!("Value must reside in 1 or 2 registers"),
1541         }
1542     }
1543 
1544     /// Rewrite any mention of `from` into `to`.
1545     pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
1546         let from = from.into();
1547         let resolved_to = self.resolve_vreg_alias(to.into());
1548         // Disallow cycles (see below).
1549         assert_ne!(resolved_to, from);
1550 
1551         // Maintain the invariant that PCC facts only exist on vregs
1552         // which aren't aliases. We want to preserve whatever was
1553         // stated about the vreg before its producer was lowered.
1554         if let Some(fact) = self.facts[from.vreg()].take() {
1555             self.set_fact(resolved_to, fact);
1556         }
1557 
1558         let old_alias = self.vreg_aliases.insert(from, resolved_to);
1559         debug_assert_eq!(old_alias, None);
1560     }
1561 
1562     fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg {
1563         // We prevent cycles from existing by resolving targets of
1564         // aliases eagerly before setting them. If the target resolves
1565         // to the origin of the alias, then a cycle would be created
1566         // and the alias is disallowed. Because of the structure of
1567         // SSA code (one instruction can refer to another's defs but
1568         // not vice-versa, except indirectly through
1569         // phis/blockparams), cycles should not occur as we use
1570         // aliases to redirect vregs to the temps that actually define
1571         // them.
1572         while let Some(to) = self.vreg_aliases.get(&vreg) {
1573             vreg = *to;
1574         }
1575         vreg
1576     }
1577 
1578     #[inline]
1579     fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) {
1580         debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg)));
1581     }
1582 
1583     /// Set the proof-carrying code fact on a given virtual register.
1584     ///
1585     /// Returns the old fact, if any (only one fact can be stored).
1586     fn set_fact(&mut self, vreg: regalloc2::VReg, fact: Fact) -> Option<Fact> {
1587         trace!("vreg {:?} has fact: {:?}", vreg, fact);
1588         debug_assert!(!self.vreg_aliases.contains_key(&vreg));
1589         self.facts[vreg.vreg()].replace(fact)
1590     }
1591 
1592     /// Set a fact only if one doesn't already exist.
1593     pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) {
1594         let vreg = self.resolve_vreg_alias(vreg.into());
1595         if self.facts[vreg.vreg()].is_none() {
1596             self.set_fact(vreg, fact);
1597         }
1598     }
1599 
1600     /// Allocate a fresh ValueRegs, with a given fact to apply if
1601     /// the value fits in one VReg.
1602     pub fn alloc_with_maybe_fact(
1603         &mut self,
1604         ty: Type,
1605         fact: Option<Fact>,
1606     ) -> CodegenResult<ValueRegs<Reg>> {
1607         let result = self.alloc(ty)?;
1608 
1609         // Ensure that we don't lose a fact on a value that splits
1610         // into multiple VRegs.
1611         assert!(result.len() == 1 || fact.is_none());
1612         if let Some(fact) = fact {
1613             self.set_fact(result.regs()[0].into(), fact);
1614         }
1615 
1616         Ok(result)
1617     }
1618 }
1619 
1620 /// This structure tracks the large constants used in VCode that will be emitted separately by the
1621 /// [MachBuffer].
1622 ///
1623 /// First, during the lowering phase, constants are inserted using
1624 /// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are
1625 /// used in this phase. Some deduplication is performed, when possible, as constant
1626 /// values are inserted.
1627 ///
1628 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1629 /// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1630 /// then writes the constant values to the buffer.
1631 #[derive(Default)]
1632 pub struct VCodeConstants {
1633     constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1634     pool_uses: HashMap<Constant, VCodeConstant>,
1635     well_known_uses: HashMap<*const [u8], VCodeConstant>,
1636     u64s: HashMap<[u8; 8], VCodeConstant>,
1637 }
1638 impl VCodeConstants {
1639     /// Initialize the structure with the expected number of constants.
1640     pub fn with_capacity(expected_num_constants: usize) -> Self {
1641         Self {
1642             constants: PrimaryMap::with_capacity(expected_num_constants),
1643             pool_uses: HashMap::with_capacity(expected_num_constants),
1644             well_known_uses: HashMap::new(),
1645             u64s: HashMap::new(),
1646         }
1647     }
1648 
1649     /// Insert a constant; using this method indicates that a constant value will be used and thus
1650     /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1651     /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1652     /// [VCodeConstantData::Generated].
1653     pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1654         match data {
1655             VCodeConstantData::Generated(_) => self.constants.push(data),
1656             VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1657                 None => {
1658                     let vcode_constant = self.constants.push(data);
1659                     self.pool_uses.insert(constant, vcode_constant);
1660                     vcode_constant
1661                 }
1662                 Some(&vcode_constant) => vcode_constant,
1663             },
1664             VCodeConstantData::WellKnown(data_ref) => {
1665                 match self.well_known_uses.entry(data_ref as *const [u8]) {
1666                     Entry::Vacant(v) => {
1667                         let vcode_constant = self.constants.push(data);
1668                         v.insert(vcode_constant);
1669                         vcode_constant
1670                     }
1671                     Entry::Occupied(o) => *o.get(),
1672                 }
1673             }
1674             VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1675                 Entry::Vacant(v) => {
1676                     let vcode_constant = self.constants.push(data);
1677                     v.insert(vcode_constant);
1678                     vcode_constant
1679                 }
1680                 Entry::Occupied(o) => *o.get(),
1681             },
1682         }
1683     }
1684 
1685     /// Return the number of constants inserted.
1686     pub fn len(&self) -> usize {
1687         self.constants.len()
1688     }
1689 
1690     /// Iterate over the `VCodeConstant` keys inserted in this structure.
1691     pub fn keys(&self) -> Keys<VCodeConstant> {
1692         self.constants.keys()
1693     }
1694 
1695     /// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this
1696     /// structure.
1697     pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1698         self.constants.iter()
1699     }
1700 
1701     /// Returns the data associated with the specified constant.
1702     pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData {
1703         &self.constants[c]
1704     }
1705 
1706     /// Checks if the given [VCodeConstantData] is registered as
1707     /// used by the pool.
1708     pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool {
1709         match constant {
1710             VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c),
1711             _ => false,
1712         }
1713     }
1714 }
1715 
1716 /// A use of a constant by one or more VCode instructions; see [VCodeConstants].
1717 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
1718 pub struct VCodeConstant(u32);
1719 entity_impl!(VCodeConstant);
1720 
1721 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
1722 /// these separately instead of as raw byte buffers allows us to avoid some duplication.
1723 pub enum VCodeConstantData {
1724     /// A constant already present in the Cranelift IR
1725     /// [ConstantPool](crate::ir::constant::ConstantPool).
1726     Pool(Constant, ConstantData),
1727     /// A reference to a well-known constant value that is statically encoded within the compiler.
1728     WellKnown(&'static [u8]),
1729     /// A constant value generated during lowering; the value may depend on the instruction context
1730     /// which makes it difficult to de-duplicate--if possible, use other variants.
1731     Generated(ConstantData),
1732     /// A constant of at most 64 bits. These are deduplicated as
1733     /// well. Stored as a fixed-size array of `u8` so that we do not
1734     /// encounter endianness problems when cross-compiling.
1735     U64([u8; 8]),
1736 }
1737 impl VCodeConstantData {
1738     /// Retrieve the constant data as a byte slice.
1739     pub fn as_slice(&self) -> &[u8] {
1740         match self {
1741             VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
1742             VCodeConstantData::WellKnown(d) => d,
1743             VCodeConstantData::U64(value) => &value[..],
1744         }
1745     }
1746 
1747     /// Calculate the alignment of the constant data.
1748     pub fn alignment(&self) -> u32 {
1749         if self.as_slice().len() <= 8 {
1750             8
1751         } else {
1752             16
1753         }
1754     }
1755 }
1756 
1757 #[cfg(test)]
1758 mod test {
1759     use super::*;
1760     use std::mem::size_of;
1761 
1762     #[test]
1763     fn size_of_constant_structs() {
1764         assert_eq!(size_of::<Constant>(), 4);
1765         assert_eq!(size_of::<VCodeConstant>(), 4);
1766         assert_eq!(size_of::<ConstantData>(), 3 * size_of::<usize>());
1767         assert_eq!(size_of::<VCodeConstantData>(), 4 * size_of::<usize>());
1768         assert_eq!(
1769             size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
1770             3 * size_of::<usize>()
1771         );
1772         // TODO The VCodeConstants structure's memory size could be further optimized.
1773         // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
1774         // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
1775     }
1776 }
1777