1 //! This implements the VCode container: a CFG of Insts that have been lowered.
2 //!
3 //! VCode is virtual-register code. An instruction in VCode is almost a machine
4 //! instruction; however, its register slots can refer to virtual registers in
5 //! addition to real machine registers.
6 //!
7 //! VCode is structured with traditional basic blocks, and
8 //! each block must be terminated by an unconditional branch (one target), a
9 //! conditional branch (two targets), or a return (no targets). Note that this
10 //! slightly differs from the machine code of most ISAs: in most ISAs, a
11 //! conditional branch has one target (and the not-taken case falls through).
12 //! However, we expect that machine backends will elide branches to the following
13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15 //! play with layout prior to final binary emission, as well, if we want.
16 //!
17 //! See the main module comment in `mod.rs` for more details on the VCode-based
18 //! backend pipeline.
19 
20 use crate::ir::pcc::*;
21 use crate::ir::{self, types, Constant, ConstantData, ValueLabel};
22 use crate::machinst::*;
23 use crate::ranges::Ranges;
24 use crate::timing;
25 use crate::trace;
26 use crate::CodegenError;
27 use crate::{LabelValueLoc, ValueLocRange};
28 use regalloc2::{
29     Edit, Function as RegallocFunction, InstOrEdit, InstRange, MachineEnv, Operand,
30     OperandConstraint, OperandKind, PRegSet, RegClass,
31 };
32 use rustc_hash::FxHashMap;
33 
34 use core::mem::take;
35 use cranelift_entity::{entity_impl, Keys};
36 use std::collections::hash_map::Entry;
37 use std::collections::HashMap;
38 use std::fmt;
39 
40 /// Index referring to an instruction in VCode.
41 pub type InsnIndex = regalloc2::Inst;
42 
43 /// Extension trait for `InsnIndex` to allow conversion to a
44 /// `BackwardsInsnIndex`.
45 trait ToBackwardsInsnIndex {
46     fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex;
47 }
48 
49 impl ToBackwardsInsnIndex for InsnIndex {
50     fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex {
51         BackwardsInsnIndex::new(num_insts - self.index() - 1)
52     }
53 }
54 
55 /// An index referring to an instruction in the VCode when it is backwards,
56 /// during VCode construction.
57 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
58 #[cfg_attr(
59     feature = "enable-serde",
60     derive(::serde::Serialize, ::serde::Deserialize)
61 )]
62 pub struct BackwardsInsnIndex(InsnIndex);
63 
64 impl BackwardsInsnIndex {
65     pub fn new(i: usize) -> Self {
66         BackwardsInsnIndex(InsnIndex::new(i))
67     }
68 }
69 
70 /// Index referring to a basic block in VCode.
71 pub type BlockIndex = regalloc2::Block;
72 
73 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
74 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
75 pub trait VCodeInst: MachInst + MachInstEmit {}
76 impl<I: MachInst + MachInstEmit> VCodeInst for I {}
77 
78 /// A function in "VCode" (virtualized-register code) form, after
79 /// lowering.  This is essentially a standard CFG of basic blocks,
80 /// where each basic block consists of lowered instructions produced
81 /// by the machine-specific backend.
82 ///
83 /// Note that the VCode is immutable once produced, and is not
84 /// modified by register allocation in particular. Rather, register
85 /// allocation on the `VCode` produces a separate `regalloc2::Output`
86 /// struct, and this can be passed to `emit`. `emit` in turn does not
87 /// modify the vcode, but produces an `EmitResult`, which contains the
88 /// machine code itself, and the associated disassembly and/or
89 /// metadata as requested.
90 pub struct VCode<I: VCodeInst> {
91     /// VReg IR-level types.
92     vreg_types: Vec<Type>,
93 
94     /// Lowered machine instructions in order corresponding to the original IR.
95     insts: Vec<I>,
96 
97     /// A map from backwards instruction index to the user stack map for that
98     /// instruction.
99     ///
100     /// This is a sparse side table that only has entries for instructions that
101     /// are safepoints, and only for a subset of those that have an associated
102     /// user stack map.
103     user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>,
104 
105     /// Operands: pre-regalloc references to virtual registers with
106     /// constraints, in one flattened array. This allows the regalloc
107     /// to efficiently access all operands without requiring expensive
108     /// matches or method invocations on insts.
109     operands: Vec<Operand>,
110 
111     /// Operand index ranges: for each instruction in `insts`, there
112     /// is a tuple here providing the range in `operands` for that
113     /// instruction's operands.
114     operand_ranges: Ranges,
115 
116     /// Clobbers: a sparse map from instruction indices to clobber masks.
117     clobbers: FxHashMap<InsnIndex, PRegSet>,
118 
119     /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
120     /// reasonable to keep one of these per instruction.)
121     srclocs: Vec<RelSourceLoc>,
122 
123     /// Entry block.
124     entry: BlockIndex,
125 
126     /// Block instruction indices.
127     block_ranges: Ranges,
128 
129     /// Block successors: index range in the `block_succs` list.
130     block_succ_range: Ranges,
131 
132     /// Block successor lists, concatenated into one vec. The
133     /// `block_succ_range` list of tuples above gives (start, end)
134     /// ranges within this list that correspond to each basic block's
135     /// successors.
136     block_succs: Vec<regalloc2::Block>,
137 
138     /// Block predecessors: index range in the `block_preds` list.
139     block_pred_range: Ranges,
140 
141     /// Block predecessor lists, concatenated into one vec. The
142     /// `block_pred_range` list of tuples above gives (start, end)
143     /// ranges within this list that correspond to each basic block's
144     /// predecessors.
145     block_preds: Vec<regalloc2::Block>,
146 
147     /// Block parameters: index range in `block_params` below.
148     block_params_range: Ranges,
149 
150     /// Block parameter lists, concatenated into one vec. The
151     /// `block_params_range` list of tuples above gives (start, end)
152     /// ranges within this list that correspond to each basic block's
153     /// blockparam vregs.
154     block_params: Vec<regalloc2::VReg>,
155 
156     /// Outgoing block arguments on branch instructions, concatenated
157     /// into one list.
158     ///
159     /// Note that this is conceptually a 3D array: we have a VReg list
160     /// per block, per successor. We flatten those three dimensions
161     /// into this 1D vec, then store index ranges in two levels of
162     /// indirection.
163     ///
164     /// Indexed by the indices in `branch_block_arg_succ_range`.
165     branch_block_args: Vec<regalloc2::VReg>,
166 
167     /// Array of sequences of (start, end) tuples in
168     /// `branch_block_args`, one for each successor; these sequences
169     /// for each block are concatenated.
170     ///
171     /// Indexed by the indices in `branch_block_arg_succ_range`.
172     branch_block_arg_range: Ranges,
173 
174     /// For a given block, indices in `branch_block_arg_range`
175     /// corresponding to all of its successors.
176     branch_block_arg_succ_range: Ranges,
177 
178     /// Block-order information.
179     block_order: BlockLoweringOrder,
180 
181     /// ABI object.
182     pub(crate) abi: Callee<I::ABIMachineSpec>,
183 
184     /// Constant information used during code emission. This should be
185     /// immutable across function compilations within the same module.
186     emit_info: I::Info,
187 
188     /// Constants.
189     pub(crate) constants: VCodeConstants,
190 
191     /// Value labels for debuginfo attached to vregs.
192     debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
193 
194     pub(crate) sigs: SigSet,
195 
196     /// Facts on VRegs, for proof-carrying code verification.
197     facts: Vec<Option<Fact>>,
198 
199     log2_min_function_alignment: u8,
200 }
201 
202 /// The result of `VCode::emit`. Contains all information computed
203 /// during emission: actual machine code, optionally a disassembly,
204 /// and optionally metadata about the code layout.
205 pub struct EmitResult {
206     /// The MachBuffer containing the machine code.
207     pub buffer: MachBufferFinalized<Stencil>,
208 
209     /// Offset of each basic block, recorded during emission. Computed
210     /// only if `debug_value_labels` is non-empty.
211     pub bb_offsets: Vec<CodeOffset>,
212 
213     /// Final basic-block edges, in terms of code offsets of
214     /// bb-starts. Computed only if `debug_value_labels` is non-empty.
215     pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
216 
217     /// Final length of function body.
218     pub func_body_len: CodeOffset,
219 
220     /// The pretty-printed disassembly, if any. This uses the same
221     /// pretty-printing for MachInsts as the pre-regalloc VCode Debug
222     /// implementation, but additionally includes the prologue and
223     /// epilogue(s), and makes use of the regalloc results.
224     pub disasm: Option<String>,
225 
226     /// Offsets of sized stackslots.
227     pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>,
228 
229     /// Offsets of dynamic stackslots.
230     pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>,
231 
232     /// Value-labels information (debug metadata).
233     pub value_labels_ranges: ValueLabelsRanges,
234 
235     /// Stack frame size.
236     pub frame_size: u32,
237 }
238 
239 /// A builder for a VCode function body.
240 ///
241 /// This builder has the ability to accept instructions in either
242 /// forward or reverse order, depending on the pass direction that
243 /// produces the VCode. The lowering from CLIF to VCode<MachInst>
244 /// ordinarily occurs in reverse order (in order to allow instructions
245 /// to be lowered only if used, and not merged) so a reversal will
246 /// occur at the end of lowering to ensure the VCode is in machine
247 /// order.
248 ///
249 /// If built in reverse, block and instruction indices used once the
250 /// VCode is built are relative to the final (reversed) order, not the
251 /// order of construction. Note that this means we do not know the
252 /// final block or instruction indices when building, so we do not
253 /// hand them out. (The user is assumed to know them when appending
254 /// terminator instructions with successor blocks.)
255 pub struct VCodeBuilder<I: VCodeInst> {
256     /// In-progress VCode.
257     pub(crate) vcode: VCode<I>,
258 
259     /// In what direction is the build occurring?
260     direction: VCodeBuildDirection,
261 
262     /// Debug-value label in-progress map, keyed by label. For each
263     /// label, we keep disjoint ranges mapping to vregs. We'll flatten
264     /// this into (vreg, range, label) tuples when done.
265     debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
266 }
267 
268 /// Direction in which a VCodeBuilder builds VCode.
269 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
270 pub enum VCodeBuildDirection {
271     // TODO: add `Forward` once we need it and can test it adequately.
272     /// Backward-build pass: we expect the producer to call `emit()`
273     /// with instructions in reverse program order within each block.
274     Backward,
275 }
276 
277 impl<I: VCodeInst> VCodeBuilder<I> {
278     /// Create a new VCodeBuilder.
279     pub fn new(
280         sigs: SigSet,
281         abi: Callee<I::ABIMachineSpec>,
282         emit_info: I::Info,
283         block_order: BlockLoweringOrder,
284         constants: VCodeConstants,
285         direction: VCodeBuildDirection,
286         log2_min_function_alignment: u8,
287     ) -> Self {
288         let vcode = VCode::new(
289             sigs,
290             abi,
291             emit_info,
292             block_order,
293             constants,
294             log2_min_function_alignment,
295         );
296 
297         VCodeBuilder {
298             vcode,
299             direction,
300             debug_info: FxHashMap::default(),
301         }
302     }
303 
304     pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> {
305         self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs)
306     }
307 
308     /// Access the ABI object.
309     pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {
310         &self.vcode.abi
311     }
312 
313     /// Access the ABI object.
314     pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {
315         &mut self.vcode.abi
316     }
317 
318     pub fn sigs(&self) -> &SigSet {
319         &self.vcode.sigs
320     }
321 
322     pub fn sigs_mut(&mut self) -> &mut SigSet {
323         &mut self.vcode.sigs
324     }
325 
326     /// Access to the BlockLoweringOrder object.
327     pub fn block_order(&self) -> &BlockLoweringOrder {
328         &self.vcode.block_order
329     }
330 
331     /// Set the current block as the entry block.
332     pub fn set_entry(&mut self, block: BlockIndex) {
333         self.vcode.entry = block;
334     }
335 
336     /// End the current basic block. Must be called after emitting vcode insts
337     /// for IR insts and prior to ending the function (building the VCode).
338     pub fn end_bb(&mut self) {
339         let end_idx = self.vcode.insts.len();
340         // Add the instruction index range to the list of blocks.
341         self.vcode.block_ranges.push_end(end_idx);
342         // End the successors list.
343         let succ_end = self.vcode.block_succs.len();
344         self.vcode.block_succ_range.push_end(succ_end);
345         // End the blockparams list.
346         let block_params_end = self.vcode.block_params.len();
347         self.vcode.block_params_range.push_end(block_params_end);
348         // End the branch blockparam args list.
349         let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
350         self.vcode
351             .branch_block_arg_succ_range
352             .push_end(branch_block_arg_succ_end);
353     }
354 
355     pub fn add_block_param(&mut self, param: VirtualReg) {
356         self.vcode.block_params.push(param.into());
357     }
358 
359     fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
360         self.vcode
361             .branch_block_args
362             .extend(args.iter().map(|&arg| VReg::from(arg)));
363         let end = self.vcode.branch_block_args.len();
364         self.vcode.branch_block_arg_range.push_end(end);
365     }
366 
367     /// Push an instruction for the current BB and current IR inst
368     /// within the BB.
369     pub fn push(&mut self, insn: I, loc: RelSourceLoc) {
370         assert!(!insn.is_low_level_branch()); // These are not meant to be in VCode.
371         self.vcode.insts.push(insn);
372         self.vcode.srclocs.push(loc);
373     }
374 
375     /// Add a successor block with branch args.
376     pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
377         self.vcode.block_succs.push(block);
378         self.add_branch_args_for_succ(args);
379     }
380 
381     /// Add a debug value label to a register.
382     pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
383         // We'll fix up labels in reverse(). Because we're generating
384         // code bottom-to-top, the liverange of the label goes *from*
385         // the last index at which was defined (or 0, which is the end
386         // of the eventual function) *to* just this instruction, and
387         // no further.
388         let inst = InsnIndex::new(self.vcode.insts.len());
389         let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
390         let last = labels
391             .last()
392             .map(|(_start, end, _vreg)| *end)
393             .unwrap_or(InsnIndex::new(0));
394         labels.push((last, inst, reg.into()));
395     }
396 
397     /// Access the constants.
398     pub fn constants(&mut self) -> &mut VCodeConstants {
399         &mut self.vcode.constants
400     }
401 
402     fn compute_preds_from_succs(&mut self) {
403         // Do a linear-time counting sort: first determine how many
404         // times each block appears as a successor.
405         let mut starts = vec![0u32; self.vcode.num_blocks()];
406         for succ in &self.vcode.block_succs {
407             starts[succ.index()] += 1;
408         }
409 
410         // Determine for each block the starting index where that
411         // block's predecessors should go. This is equivalent to the
412         // ranges we need to store in block_pred_range.
413         self.vcode.block_pred_range.reserve(starts.len());
414         let mut end = 0;
415         for count in starts.iter_mut() {
416             let start = end;
417             end += *count;
418             *count = start;
419             self.vcode.block_pred_range.push_end(end as usize);
420         }
421         let end = end as usize;
422         debug_assert_eq!(end, self.vcode.block_succs.len());
423 
424         // Walk over the successors again, this time grouped by
425         // predecessor, and push the predecessor at the current
426         // starting position of each of its successors. We build
427         // each group of predecessors in whatever order Ranges::iter
428         // returns them; regalloc2 doesn't care.
429         self.vcode.block_preds.resize(end, BlockIndex::invalid());
430         for (pred, range) in self.vcode.block_succ_range.iter() {
431             let pred = BlockIndex::new(pred);
432             for succ in &self.vcode.block_succs[range] {
433                 let pos = &mut starts[succ.index()];
434                 self.vcode.block_preds[*pos as usize] = pred;
435                 *pos += 1;
436             }
437         }
438         debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid()));
439     }
440 
441     /// Called once, when a build in Backward order is complete, to
442     /// perform the overall reversal (into final forward order) and
443     /// finalize metadata accordingly.
444     fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) {
445         let n_insts = self.vcode.insts.len();
446         if n_insts == 0 {
447             return;
448         }
449 
450         // Reverse the per-block and per-inst sequences.
451         self.vcode.block_ranges.reverse_index();
452         self.vcode.block_ranges.reverse_target(n_insts);
453         // block_params_range is indexed by block (and blocks were
454         // traversed in reverse) so we reverse it; but block-param
455         // sequences in the concatenated vec can remain in reverse
456         // order (it is effectively an arena of arbitrarily-placed
457         // referenced sequences).
458         self.vcode.block_params_range.reverse_index();
459         // Likewise, we reverse block_succ_range, but the block_succ
460         // concatenated array can remain as-is.
461         self.vcode.block_succ_range.reverse_index();
462         self.vcode.insts.reverse();
463         self.vcode.srclocs.reverse();
464         // Likewise, branch_block_arg_succ_range is indexed by block
465         // so must be reversed.
466         self.vcode.branch_block_arg_succ_range.reverse_index();
467 
468         // To translate an instruction index *endpoint* in reversed
469         // order to forward order, compute `n_insts - i`.
470         //
471         // Why not `n_insts - 1 - i`? That would be correct to
472         // translate an individual instruction index (for ten insts 0
473         // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
474         // 0). But for the usual inclusive-start, exclusive-end range
475         // idiom, inclusive starts become exclusive ends and
476         // vice-versa, so e.g. an (inclusive) start of 0 becomes an
477         // (exclusive) end of 10.
478         let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
479 
480         // Generate debug-value labels based on per-label maps.
481         for (label, tuples) in &self.debug_info {
482             for &(start, end, vreg) in tuples {
483                 let vreg = vregs.resolve_vreg_alias(vreg);
484                 let fwd_start = translate(end);
485                 let fwd_end = translate(start);
486                 self.vcode
487                     .debug_value_labels
488                     .push((vreg, fwd_start, fwd_end, label.as_u32()));
489             }
490         }
491 
492         // Now sort debug value labels by VReg, as required
493         // by regalloc2.
494         self.vcode
495             .debug_value_labels
496             .sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
497     }
498 
499     fn collect_operands(&mut self, vregs: &VRegAllocator<I>) {
500         let allocatable = PRegSet::from(self.vcode.machine_env());
501         for (i, insn) in self.vcode.insts.iter_mut().enumerate() {
502             // Push operands from the instruction onto the operand list.
503             //
504             // We rename through the vreg alias table as we collect
505             // the operands. This is better than a separate post-pass
506             // over operands, because it has more cache locality:
507             // operands only need to pass through L1 once. This is
508             // also better than renaming instructions'
509             // operands/registers while lowering, because here we only
510             // need to do the `match` over the instruction to visit
511             // its register fields (which is slow, branchy code) once.
512 
513             let mut op_collector =
514                 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {
515                     vregs.resolve_vreg_alias(vreg)
516                 });
517             insn.get_operands(&mut op_collector);
518             let (ops, clobbers) = op_collector.finish();
519             self.vcode.operand_ranges.push_end(ops);
520 
521             if clobbers != PRegSet::default() {
522                 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
523             }
524 
525             if let Some((dst, src)) = insn.is_move() {
526                 // We should never see non-virtual registers present in move
527                 // instructions.
528                 assert!(
529                     src.is_virtual(),
530                     "the real register {src:?} was used as the source of a move instruction"
531                 );
532                 assert!(
533                     dst.to_reg().is_virtual(),
534                     "the real register {:?} was used as the destination of a move instruction",
535                     dst.to_reg()
536                 );
537             }
538         }
539 
540         // Translate blockparam args via the vreg aliases table as well.
541         for arg in &mut self.vcode.branch_block_args {
542             let new_arg = vregs.resolve_vreg_alias(*arg);
543             trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
544             *arg = new_arg;
545         }
546     }
547 
548     /// Build the final VCode.
549     pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> {
550         self.vcode.vreg_types = take(&mut vregs.vreg_types);
551         self.vcode.facts = take(&mut vregs.facts);
552 
553         if self.direction == VCodeBuildDirection::Backward {
554             self.reverse_and_finalize(&vregs);
555         }
556         self.collect_operands(&vregs);
557 
558         self.compute_preds_from_succs();
559         self.vcode.debug_value_labels.sort_unstable();
560 
561         // At this point, nothing in the vcode should mention any
562         // VReg which has been aliased. All the appropriate rewriting
563         // should have happened above. Just to be sure, let's
564         // double-check each field which has vregs.
565         // Note: can't easily check vcode.insts, resolved in collect_operands.
566         // Operands are resolved in collect_operands.
567         vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg()));
568         // Currently block params are never aliased to another vreg.
569         vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied());
570         // Branch block args are resolved in collect_operands.
571         vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied());
572         // Debug value labels are resolved in reverse_and_finalize.
573         vregs.debug_assert_no_vreg_aliases(
574             self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg),
575         );
576         // Facts are resolved eagerly during set_vreg_alias.
577         vregs.debug_assert_no_vreg_aliases(
578             self.vcode
579                 .facts
580                 .iter()
581                 .zip(&vregs.vreg_types)
582                 .enumerate()
583                 .filter(|(_, (fact, _))| fact.is_some())
584                 .map(|(vreg, (_, &ty))| {
585                     let (regclasses, _) = I::rc_for_type(ty).unwrap();
586                     VReg::new(vreg, regclasses[0])
587                 }),
588         );
589 
590         self.vcode
591     }
592 
593     /// Add a user stack map for the associated instruction.
594     pub fn add_user_stack_map(
595         &mut self,
596         inst: BackwardsInsnIndex,
597         entries: &[ir::UserStackMapEntry],
598     ) {
599         let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets());
600         let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map);
601         debug_assert!(old_entry.is_none());
602     }
603 }
604 
605 const NO_INST_OFFSET: CodeOffset = u32::MAX;
606 
607 impl<I: VCodeInst> VCode<I> {
608     /// New empty VCode.
609     fn new(
610         sigs: SigSet,
611         abi: Callee<I::ABIMachineSpec>,
612         emit_info: I::Info,
613         block_order: BlockLoweringOrder,
614         constants: VCodeConstants,
615         log2_min_function_alignment: u8,
616     ) -> Self {
617         let n_blocks = block_order.lowered_order().len();
618         VCode {
619             sigs,
620             vreg_types: vec![],
621             insts: Vec::with_capacity(10 * n_blocks),
622             user_stack_maps: FxHashMap::default(),
623             operands: Vec::with_capacity(30 * n_blocks),
624             operand_ranges: Ranges::with_capacity(10 * n_blocks),
625             clobbers: FxHashMap::default(),
626             srclocs: Vec::with_capacity(10 * n_blocks),
627             entry: BlockIndex::new(0),
628             block_ranges: Ranges::with_capacity(n_blocks),
629             block_succ_range: Ranges::with_capacity(n_blocks),
630             block_succs: Vec::with_capacity(n_blocks),
631             block_pred_range: Ranges::default(),
632             block_preds: Vec::new(),
633             block_params_range: Ranges::with_capacity(n_blocks),
634             block_params: Vec::with_capacity(5 * n_blocks),
635             branch_block_args: Vec::with_capacity(10 * n_blocks),
636             branch_block_arg_range: Ranges::with_capacity(2 * n_blocks),
637             branch_block_arg_succ_range: Ranges::with_capacity(n_blocks),
638             block_order,
639             abi,
640             emit_info,
641             constants,
642             debug_value_labels: vec![],
643             facts: vec![],
644             log2_min_function_alignment,
645         }
646     }
647 
648     /// Get the ABI-dependent MachineEnv for managing register allocation.
649     pub fn machine_env(&self) -> &MachineEnv {
650         self.abi.machine_env(&self.sigs)
651     }
652 
653     /// Get the number of blocks. Block indices will be in the range `0 ..
654     /// (self.num_blocks() - 1)`.
655     pub fn num_blocks(&self) -> usize {
656         self.block_ranges.len()
657     }
658 
659     /// The number of lowered instructions.
660     pub fn num_insts(&self) -> usize {
661         self.insts.len()
662     }
663 
664     fn compute_clobbers(&self, regalloc: &regalloc2::Output) -> Vec<Writable<RealReg>> {
665         let mut clobbered = PRegSet::default();
666 
667         // All moves are included in clobbers.
668         for (_, Edit::Move { to, .. }) in &regalloc.edits {
669             if let Some(preg) = to.as_reg() {
670                 clobbered.add(preg);
671             }
672         }
673 
674         for (i, range) in self.operand_ranges.iter() {
675             let operands = &self.operands[range.clone()];
676             let allocs = &regalloc.allocs[range];
677             for (operand, alloc) in operands.iter().zip(allocs.iter()) {
678                 if operand.kind() == OperandKind::Def {
679                     if let Some(preg) = alloc.as_reg() {
680                         clobbered.add(preg);
681                     }
682                 }
683             }
684 
685             // Also add explicitly-clobbered registers.
686             //
687             // Skip merging this instruction's clobber list if not
688             // "included in clobbers" as per the MachInst. (Some
689             // backends use this to implement ABI specifics; e.g.,
690             // excluding calls of the same ABI as the current function
691             // from clobbers, because by definition everything
692             // clobbered by the call can be clobbered by this function
693             // without saving as well.
694             //
695             // This is important for a particular optimization: when
696             // some registers are "half-clobbered", e.g. vector/float
697             // registers on aarch64, we want them to be seen as
698             // clobbered by regalloc so it avoids carrying values
699             // across calls in these registers but not seen as
700             // clobbered by prologue generation here (because the
701             // actual half-clobber implied by the clobber list fits
702             // within the clobbers that we allow without
703             // clobber-saves).
704             if self.insts[i].is_included_in_clobbers() {
705                 if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) {
706                     clobbered.union_from(inst_clobbered);
707                 }
708             }
709         }
710 
711         clobbered
712             .into_iter()
713             .map(|preg| Writable::from_reg(RealReg::from(preg)))
714             .collect()
715     }
716 
717     /// Emit the instructions to a `MachBuffer`, containing fixed-up
718     /// code and external reloc/trap/etc. records ready for use. Takes
719     /// the regalloc results as well.
720     ///
721     /// Returns the machine code itself, and optionally metadata
722     /// and/or a disassembly, as an `EmitResult`. The `VCode` itself
723     /// is consumed by the emission process.
724     pub fn emit(
725         mut self,
726         regalloc: &regalloc2::Output,
727         want_disasm: bool,
728         flags: &settings::Flags,
729         ctrl_plane: &mut ControlPlane,
730     ) -> EmitResult
731     where
732         I: VCodeInst,
733     {
734         // To write into disasm string.
735         use core::fmt::Write;
736 
737         let _tt = timing::vcode_emit();
738         let mut buffer = MachBuffer::new();
739         buffer.set_log2_min_function_alignment(self.log2_min_function_alignment);
740         let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
741 
742         // The first M MachLabels are reserved for block indices.
743         buffer.reserve_labels_for_blocks(self.num_blocks());
744 
745         // Register all allocated constants with the `MachBuffer` to ensure that
746         // any references to the constants during instructions can be handled
747         // correctly.
748         buffer.register_constants(&self.constants);
749 
750         // Construct the final order we emit code in: cold blocks at the end.
751         let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
752         let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
753         for block in 0..self.num_blocks() {
754             let block = BlockIndex::new(block);
755             if self.block_order.is_cold(block) {
756                 cold_blocks.push(block);
757             } else {
758                 final_order.push(block);
759             }
760         }
761         final_order.extend(cold_blocks.clone());
762 
763         // Compute/save info we need for the prologue: clobbers and
764         // number of spillslots.
765         //
766         // We clone `abi` here because we will mutate it as we
767         // generate the prologue and set other info, but we can't
768         // mutate `VCode`. The info it usually carries prior to
769         // setting clobbers is fairly minimal so this should be
770         // relatively cheap.
771         let clobbers = self.compute_clobbers(regalloc);
772         self.abi
773             .compute_frame_layout(&self.sigs, regalloc.num_spillslots, clobbers);
774 
775         // Emit blocks.
776         let mut cur_srcloc = None;
777         let mut last_offset = None;
778         let mut inst_offsets = vec![];
779         let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane));
780 
781         let mut disasm = String::new();
782 
783         if !self.debug_value_labels.is_empty() {
784             inst_offsets.resize(self.insts.len(), NO_INST_OFFSET);
785         }
786 
787         // Count edits per block ahead of time; this is needed for
788         // lookahead island emission. (We could derive it per-block
789         // with binary search in the edit list, but it's more
790         // efficient to do it in one pass here.)
791         let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];
792         let mut edit_idx = 0;
793         for block in 0..self.num_blocks() {
794             let end_inst = InsnIndex::new(self.block_ranges.get(block).end);
795             let start_edit_idx = edit_idx;
796             while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {
797                 edit_idx += 1;
798             }
799             let end_edit_idx = edit_idx;
800             ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);
801         }
802 
803         let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();
804         let mut bb_padding = match flags.bb_padding_log2_minus_one() {
805             0 => Vec::new(),
806             n => vec![0; 1 << (n - 1)],
807         };
808         let mut total_bb_padding = 0;
809 
810         for (block_order_idx, &block) in final_order.iter().enumerate() {
811             trace!("emitting block {:?}", block);
812 
813             // Call the new block hook for state
814             state.on_new_block();
815 
816             // Emit NOPs to align the block.
817             let new_offset = I::align_basic_block(buffer.cur_offset());
818             while new_offset > buffer.cur_offset() {
819                 // Pad with NOPs up to the aligned block offset.
820                 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
821                 nop.emit(&mut buffer, &self.emit_info, &mut Default::default());
822             }
823             assert_eq!(buffer.cur_offset(), new_offset);
824 
825             let do_emit = |inst: &I,
826                            disasm: &mut String,
827                            buffer: &mut MachBuffer<I>,
828                            state: &mut I::State| {
829                 if want_disasm && !inst.is_args() {
830                     let mut s = state.clone();
831                     writeln!(disasm, "  {}", inst.pretty_print_inst(&mut s)).unwrap();
832                 }
833                 inst.emit(buffer, &self.emit_info, state);
834             };
835 
836             // Is this the first block? Emit the prologue directly if so.
837             if block == self.entry {
838                 trace!(" -> entry block");
839                 buffer.start_srcloc(Default::default());
840                 for inst in &self.abi.gen_prologue() {
841                     do_emit(&inst, &mut disasm, &mut buffer, &mut state);
842                 }
843                 buffer.end_srcloc();
844             }
845 
846             // Now emit the regular block body.
847 
848             buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut());
849 
850             if want_disasm {
851                 writeln!(&mut disasm, "block{}:", block.index()).unwrap();
852             }
853 
854             if flags.machine_code_cfg_info() {
855                 // Track BB starts. If we have backed up due to MachBuffer
856                 // branch opts, note that the removed blocks were removed.
857                 let cur_offset = buffer.cur_offset();
858                 if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
859                     for i in (0..bb_starts.len()).rev() {
860                         if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
861                             break;
862                         }
863                         bb_starts[i] = None;
864                     }
865                 }
866                 bb_starts.push(Some(cur_offset));
867                 last_offset = Some(cur_offset);
868             }
869 
870             if let Some(block_start) = I::gen_block_start(
871                 self.block_order.is_indirect_branch_target(block),
872                 is_forward_edge_cfi_enabled,
873             ) {
874                 do_emit(&block_start, &mut disasm, &mut buffer, &mut state);
875             }
876 
877             for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
878                 match inst_or_edit {
879                     InstOrEdit::Inst(iix) => {
880                         if !self.debug_value_labels.is_empty() {
881                             // If we need to produce debug info,
882                             // record the offset of each instruction
883                             // so that we can translate value-label
884                             // ranges to machine-code offsets.
885 
886                             // Cold blocks violate monotonicity
887                             // assumptions elsewhere (that
888                             // instructions in inst-index order are in
889                             // order in machine code), so we omit
890                             // their offsets here. Value-label range
891                             // generation below will skip empty ranges
892                             // and ranges with to-offsets of zero.
893                             if !self.block_order.is_cold(block) {
894                                 inst_offsets[iix.index()] = buffer.cur_offset();
895                             }
896                         }
897 
898                         // Update the srcloc at this point in the buffer.
899                         let srcloc = self.srclocs[iix.index()];
900                         if cur_srcloc != Some(srcloc) {
901                             if cur_srcloc.is_some() {
902                                 buffer.end_srcloc();
903                             }
904                             buffer.start_srcloc(srcloc);
905                             cur_srcloc = Some(srcloc);
906                         }
907 
908                         // If this is a safepoint, compute a stack map
909                         // and pass it to the emit state.
910                         let stack_map_disasm = if self.insts[iix.index()].is_safepoint() {
911                             let (user_stack_map, user_stack_map_disasm) = {
912                                 // The `user_stack_maps` is keyed by reverse
913                                 // instruction index, so we must flip the
914                                 // index. We can't put this into a helper method
915                                 // due to borrowck issues because parts of
916                                 // `self` are borrowed mutably elsewhere in this
917                                 // function.
918                                 let index = iix.to_backwards_insn_index(self.num_insts());
919                                 let user_stack_map = self.user_stack_maps.remove(&index);
920                                 let user_stack_map_disasm =
921                                     user_stack_map.as_ref().map(|m| format!("  ; {m:?}"));
922                                 (user_stack_map, user_stack_map_disasm)
923                             };
924 
925                             state.pre_safepoint(user_stack_map);
926 
927                             user_stack_map_disasm
928                         } else {
929                             None
930                         };
931 
932                         // If the instruction we are about to emit is
933                         // a return, place an epilogue at this point
934                         // (and don't emit the return; the actual
935                         // epilogue will contain it).
936                         if self.insts[iix.index()].is_term() == MachTerminator::Ret {
937                             for inst in self.abi.gen_epilogue() {
938                                 do_emit(&inst, &mut disasm, &mut buffer, &mut state);
939                             }
940                         } else {
941                             // Update the operands for this inst using the
942                             // allocations from the regalloc result.
943                             let mut allocs = regalloc.inst_allocs(iix).iter();
944                             self.insts[iix.index()].get_operands(
945                                 &mut |reg: &mut Reg, constraint, _kind, _pos| {
946                                     let alloc =
947                                         allocs.next().expect("enough allocations for all operands");
948 
949                                     if let Some(alloc) = alloc.as_reg() {
950                                         let alloc: Reg = alloc.into();
951                                         if let OperandConstraint::FixedReg(rreg) = constraint {
952                                             debug_assert_eq!(Reg::from(rreg), alloc);
953                                         }
954                                         *reg = alloc;
955                                     } else if let Some(alloc) = alloc.as_stack() {
956                                         let alloc: Reg = alloc.into();
957                                         *reg = alloc;
958                                     }
959                                 },
960                             );
961                             debug_assert!(allocs.next().is_none());
962 
963                             // Emit the instruction!
964                             do_emit(
965                                 &self.insts[iix.index()],
966                                 &mut disasm,
967                                 &mut buffer,
968                                 &mut state,
969                             );
970                             if let Some(stack_map_disasm) = stack_map_disasm {
971                                 disasm.push_str(&stack_map_disasm);
972                                 disasm.push('\n');
973                             }
974                         }
975                     }
976 
977                     InstOrEdit::Edit(Edit::Move { from, to }) => {
978                         // Create a move/spill/reload instruction and
979                         // immediately emit it.
980                         match (from.as_reg(), to.as_reg()) {
981                             (Some(from), Some(to)) => {
982                                 // Reg-to-reg move.
983                                 let from_rreg = Reg::from(from);
984                                 let to_rreg = Writable::from_reg(Reg::from(to));
985                                 debug_assert_eq!(from.class(), to.class());
986                                 let ty = I::canonical_type_for_rc(from.class());
987                                 let mv = I::gen_move(to_rreg, from_rreg, ty);
988                                 do_emit(&mv, &mut disasm, &mut buffer, &mut state);
989                             }
990                             (Some(from), None) => {
991                                 // Spill from register to spillslot.
992                                 let to = to.as_stack().unwrap();
993                                 let from_rreg = RealReg::from(from);
994                                 let spill = self.abi.gen_spill(to, from_rreg);
995                                 do_emit(&spill, &mut disasm, &mut buffer, &mut state);
996                             }
997                             (None, Some(to)) => {
998                                 // Load from spillslot to register.
999                                 let from = from.as_stack().unwrap();
1000                                 let to_rreg = Writable::from_reg(RealReg::from(to));
1001                                 let reload = self.abi.gen_reload(to_rreg, from);
1002                                 do_emit(&reload, &mut disasm, &mut buffer, &mut state);
1003                             }
1004                             (None, None) => {
1005                                 panic!("regalloc2 should have eliminated stack-to-stack moves!");
1006                             }
1007                         }
1008                     }
1009                 }
1010             }
1011 
1012             if cur_srcloc.is_some() {
1013                 buffer.end_srcloc();
1014                 cur_srcloc = None;
1015             }
1016 
1017             // Do we need an island? Get the worst-case size of the next BB, add
1018             // it to the optional padding behind the block, and pass this to the
1019             // `MachBuffer` to determine if an island is necessary.
1020             let worst_case_next_bb = if block_order_idx < final_order.len() - 1 {
1021                 let next_block = final_order[block_order_idx + 1];
1022                 let next_block_range = self.block_ranges.get(next_block.index());
1023                 let next_block_size = next_block_range.len() as u32;
1024                 let next_block_ra_insertions = ra_edits_per_block[next_block.index()];
1025                 I::worst_case_size() * (next_block_size + next_block_ra_insertions)
1026             } else {
1027                 0
1028             };
1029             let padding = if bb_padding.is_empty() {
1030                 0
1031             } else {
1032                 bb_padding.len() as u32 + I::LabelUse::ALIGN - 1
1033             };
1034             if buffer.island_needed(padding + worst_case_next_bb) {
1035                 buffer.emit_island(padding + worst_case_next_bb, ctrl_plane);
1036             }
1037 
1038             // Insert padding, if configured, to stress the `MachBuffer`'s
1039             // relocation and island calculations.
1040             //
1041             // Padding can get quite large during fuzzing though so place a
1042             // total cap on it where when a per-function threshold is exceeded
1043             // the padding is turned back down to zero. This avoids a small-ish
1044             // test case generating a GB+ memory footprint in Cranelift for
1045             // example.
1046             if !bb_padding.is_empty() {
1047                 buffer.put_data(&bb_padding);
1048                 buffer.align_to(I::LabelUse::ALIGN);
1049                 total_bb_padding += bb_padding.len();
1050                 if total_bb_padding > (150 << 20) {
1051                     bb_padding = Vec::new();
1052                 }
1053             }
1054         }
1055 
1056         debug_assert!(
1057             self.user_stack_maps.is_empty(),
1058             "any stack maps should have been consumed by instruction emission, still have: {:#?}",
1059             self.user_stack_maps,
1060         );
1061 
1062         // Do any optimizations on branches at tail of buffer, as if we had
1063         // bound one last label.
1064         buffer.optimize_branches(ctrl_plane);
1065 
1066         // emission state is not needed anymore, move control plane back out
1067         *ctrl_plane = state.take_ctrl_plane();
1068 
1069         let func_body_len = buffer.cur_offset();
1070 
1071         // Create `bb_edges` and final (filtered) `bb_starts`.
1072         let mut bb_edges = vec![];
1073         let mut bb_offsets = vec![];
1074         if flags.machine_code_cfg_info() {
1075             for block in 0..self.num_blocks() {
1076                 if bb_starts[block].is_none() {
1077                     // Block was deleted by MachBuffer; skip.
1078                     continue;
1079                 }
1080                 let from = bb_starts[block].unwrap();
1081 
1082                 bb_offsets.push(from);
1083                 // Resolve each `succ` label and add edges.
1084                 let succs = self.block_succs(BlockIndex::new(block));
1085                 for &succ in succs.iter() {
1086                     let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1087                     bb_edges.push((from, to));
1088                 }
1089             }
1090         }
1091 
1092         self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len);
1093         let value_labels_ranges =
1094             self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1095         let frame_size = self.abi.frame_size();
1096 
1097         EmitResult {
1098             buffer: buffer.finish(&self.constants, ctrl_plane),
1099             bb_offsets,
1100             bb_edges,
1101             func_body_len,
1102             disasm: if want_disasm { Some(disasm) } else { None },
1103             sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(),
1104             dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(),
1105             value_labels_ranges,
1106             frame_size,
1107         }
1108     }
1109 
1110     fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) {
1111         if self.debug_value_labels.is_empty() {
1112             return;
1113         }
1114 
1115         // During emission, branch removal can make offsets of instructions incorrect.
1116         // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj]
1117         // It will be recorded as (say):    [30]  [34]  [38]  [42]  [<would be 46>]
1118         // When the jumps get removed we are left with (in "inst_offsets"):
1119         // [insi][jmp0][jmp1][jmp2][insj][...]
1120         // [30]  [34]  [38]  [42]  [34]
1121         // Which violates the monotonicity invariant. This method sets offsets of these
1122         // removed instructions such as to make them appear zero-sized:
1123         // [insi][jmp0][jmp1][jmp2][insj][...]
1124         // [30]  [34]  [34]  [34]  [34]
1125         //
1126         let mut next_offset = func_body_len;
1127         for inst_index in (0..(inst_offsets.len() - 1)).rev() {
1128             let inst_offset = inst_offsets[inst_index];
1129 
1130             // Not all instructions get their offsets recorded.
1131             if inst_offset == NO_INST_OFFSET {
1132                 continue;
1133             }
1134 
1135             if inst_offset > next_offset {
1136                 trace!(
1137                     "Fixing code offset of the removed Inst {}: {} -> {}",
1138                     inst_index,
1139                     inst_offset,
1140                     next_offset
1141                 );
1142                 inst_offsets[inst_index] = next_offset;
1143                 continue;
1144             }
1145 
1146             next_offset = inst_offset;
1147         }
1148     }
1149 
1150     fn compute_value_labels_ranges(
1151         &self,
1152         regalloc: &regalloc2::Output,
1153         inst_offsets: &[CodeOffset],
1154         func_body_len: u32,
1155     ) -> ValueLabelsRanges {
1156         if self.debug_value_labels.is_empty() {
1157             return ValueLabelsRanges::default();
1158         }
1159 
1160         let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1161         for &(label, from, to, alloc) in &regalloc.debug_locations {
1162             let ranges = value_labels_ranges
1163                 .entry(ValueLabel::from_u32(label))
1164                 .or_insert_with(|| vec![]);
1165             let from_offset = inst_offsets[from.inst().index()];
1166             let to_offset = if to.inst().index() == inst_offsets.len() {
1167                 func_body_len
1168             } else {
1169                 inst_offsets[to.inst().index()]
1170             };
1171 
1172             // Empty ranges or unavailable offsets can happen
1173             // due to cold blocks and branch removal (see above).
1174             if from_offset == NO_INST_OFFSET
1175                 || to_offset == NO_INST_OFFSET
1176                 || from_offset == to_offset
1177             {
1178                 continue;
1179             }
1180 
1181             let loc = if let Some(preg) = alloc.as_reg() {
1182                 LabelValueLoc::Reg(Reg::from(preg))
1183             } else {
1184                 let slot = alloc.as_stack().unwrap();
1185                 let slot_offset = self.abi.get_spillslot_offset(slot);
1186                 let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset();
1187                 let caller_sp_to_cfa_offset =
1188                     crate::isa::unwind::systemv::caller_sp_to_cfa_offset();
1189                 // NOTE: this is a negative offset because it's relative to the caller's SP
1190                 let cfa_to_sp_offset =
1191                     -((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64);
1192                 LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset)
1193             };
1194 
1195             // ValueLocRanges are recorded by *instruction-end
1196             // offset*. `from_offset` is the *start* of the
1197             // instruction; that is the same as the end of another
1198             // instruction, so we only want to begin coverage once
1199             // we are past the previous instruction's end.
1200             let start = from_offset + 1;
1201 
1202             // Likewise, `end` is exclusive, but we want to
1203             // *include* the end of the last
1204             // instruction. `to_offset` is the start of the
1205             // `to`-instruction, which is the exclusive end, i.e.,
1206             // the first instruction not covered. That
1207             // instruction's start is the same as the end of the
1208             // last instruction that is included, so we go one
1209             // byte further to be sure to include it.
1210             let end = to_offset + 1;
1211 
1212             // Coalesce adjacent ranges that for the same location
1213             // to minimize output size here and for the consumers.
1214             if let Some(last_loc_range) = ranges.last_mut() {
1215                 if last_loc_range.loc == loc && last_loc_range.end == start {
1216                     trace!(
1217                         "Extending debug range for VL{} in {:?} to {}",
1218                         label,
1219                         loc,
1220                         end
1221                     );
1222                     last_loc_range.end = end;
1223                     continue;
1224                 }
1225             }
1226 
1227             trace!(
1228                 "Recording debug range for VL{} in {:?}: [Inst {}..Inst {}) [{}..{})",
1229                 label,
1230                 loc,
1231                 from.inst().index(),
1232                 to.inst().index(),
1233                 start,
1234                 end
1235             );
1236 
1237             ranges.push(ValueLocRange { loc, start, end });
1238         }
1239 
1240         value_labels_ranges
1241     }
1242 
1243     /// Get the IR block for a BlockIndex, if one exists.
1244     pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1245         self.block_order.lowered_order()[block.index()].orig_block()
1246     }
1247 
1248     /// Get the type of a VReg.
1249     pub fn vreg_type(&self, vreg: VReg) -> Type {
1250         self.vreg_types[vreg.vreg()]
1251     }
1252 
1253     /// Get the fact, if any, for a given VReg.
1254     pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> {
1255         self.facts[vreg.vreg()].as_ref()
1256     }
1257 
1258     /// Set the fact for a given VReg.
1259     pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) {
1260         trace!("set fact on {}: {:?}", vreg, fact);
1261         self.facts[vreg.vreg()] = Some(fact);
1262     }
1263 
1264     /// Does a given instruction define any facts?
1265     pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool {
1266         self.inst_operands(inst)
1267             .iter()
1268             .filter(|o| o.kind() == OperandKind::Def)
1269             .map(|o| o.vreg())
1270             .any(|vreg| self.facts[vreg.vreg()].is_some())
1271     }
1272 
1273     /// Get the user stack map associated with the given forward instruction index.
1274     pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> {
1275         let index = inst.to_backwards_insn_index(self.num_insts());
1276         self.user_stack_maps.get(&index)
1277     }
1278 }
1279 
1280 impl<I: VCodeInst> std::ops::Index<InsnIndex> for VCode<I> {
1281     type Output = I;
1282     fn index(&self, idx: InsnIndex) -> &Self::Output {
1283         &self.insts[idx.index()]
1284     }
1285 }
1286 
1287 impl<I: VCodeInst> RegallocFunction for VCode<I> {
1288     fn num_insts(&self) -> usize {
1289         self.insts.len()
1290     }
1291 
1292     fn num_blocks(&self) -> usize {
1293         self.block_ranges.len()
1294     }
1295 
1296     fn entry_block(&self) -> BlockIndex {
1297         self.entry
1298     }
1299 
1300     fn block_insns(&self, block: BlockIndex) -> InstRange {
1301         let range = self.block_ranges.get(block.index());
1302         InstRange::new(InsnIndex::new(range.start), InsnIndex::new(range.end))
1303     }
1304 
1305     fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1306         let range = self.block_succ_range.get(block.index());
1307         &self.block_succs[range]
1308     }
1309 
1310     fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1311         let range = self.block_pred_range.get(block.index());
1312         &self.block_preds[range]
1313     }
1314 
1315     fn block_params(&self, block: BlockIndex) -> &[VReg] {
1316         // As a special case we don't return block params for the entry block, as all the arguments
1317         // will be defined by the `Inst::Args` instruction.
1318         if block == self.entry {
1319             return &[];
1320         }
1321 
1322         let range = self.block_params_range.get(block.index());
1323         &self.block_params[range]
1324     }
1325 
1326     fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1327         let succ_range = self.branch_block_arg_succ_range.get(block.index());
1328         debug_assert!(succ_idx < succ_range.len());
1329         let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx);
1330         &self.branch_block_args[branch_block_args]
1331     }
1332 
1333     fn is_ret(&self, insn: InsnIndex) -> bool {
1334         match self.insts[insn.index()].is_term() {
1335             // We treat blocks terminated by an unconditional trap like a return for regalloc.
1336             MachTerminator::None => self.insts[insn.index()].is_trap(),
1337             MachTerminator::Ret | MachTerminator::RetCall => true,
1338             MachTerminator::Uncond | MachTerminator::Cond | MachTerminator::Indirect => false,
1339         }
1340     }
1341 
1342     fn is_branch(&self, insn: InsnIndex) -> bool {
1343         match self.insts[insn.index()].is_term() {
1344             MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true,
1345             _ => false,
1346         }
1347     }
1348 
1349     fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1350         let range = self.operand_ranges.get(insn.index());
1351         &self.operands[range]
1352     }
1353 
1354     fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1355         self.clobbers.get(&insn).cloned().unwrap_or_default()
1356     }
1357 
1358     fn num_vregs(&self) -> usize {
1359         self.vreg_types.len()
1360     }
1361 
1362     fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1363         &self.debug_value_labels
1364     }
1365 
1366     fn spillslot_size(&self, regclass: RegClass) -> usize {
1367         self.abi.get_spillslot_size(regclass) as usize
1368     }
1369 
1370     fn allow_multiple_vreg_defs(&self) -> bool {
1371         // At least the s390x backend requires this, because the
1372         // `Loop` pseudo-instruction aggregates all Operands so pinned
1373         // vregs (RealRegs) may occur more than once.
1374         true
1375     }
1376 }
1377 
1378 impl<I: VCodeInst> Debug for VRegAllocator<I> {
1379     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1380         writeln!(f, "VRegAllocator {{")?;
1381 
1382         let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1383         alias_keys.sort_unstable();
1384         for key in alias_keys {
1385             let dest = self.vreg_aliases.get(&key).unwrap();
1386             writeln!(f, "  {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1387         }
1388 
1389         for (vreg, fact) in self.facts.iter().enumerate() {
1390             if let Some(fact) = fact {
1391                 writeln!(f, "  v{vreg} ! {fact}")?;
1392             }
1393         }
1394 
1395         writeln!(f, "}}")
1396     }
1397 }
1398 
1399 impl<I: VCodeInst> fmt::Debug for VCode<I> {
1400     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1401         writeln!(f, "VCode {{")?;
1402         writeln!(f, "  Entry block: {}", self.entry.index())?;
1403 
1404         let mut state = Default::default();
1405 
1406         for block in 0..self.num_blocks() {
1407             let block = BlockIndex::new(block);
1408             writeln!(
1409                 f,
1410                 "Block {}({:?}):",
1411                 block.index(),
1412                 self.block_params(block)
1413             )?;
1414             if let Some(bb) = self.bindex_to_bb(block) {
1415                 writeln!(f, "    (original IR block: {bb})")?;
1416             }
1417             for (succ_idx, succ) in self.block_succs(block).iter().enumerate() {
1418                 writeln!(
1419                     f,
1420                     "    (successor: Block {}({:?}))",
1421                     succ.index(),
1422                     self.branch_blockparams(block, InsnIndex::new(0) /* dummy */, succ_idx)
1423                 )?;
1424             }
1425             for inst in self.block_ranges.get(block.index()) {
1426                 writeln!(
1427                     f,
1428                     "  Inst {}: {}",
1429                     inst,
1430                     self.insts[inst].pretty_print_inst(&mut state)
1431                 )?;
1432                 if !self.operands.is_empty() {
1433                     for operand in self.inst_operands(InsnIndex::new(inst)) {
1434                         if operand.kind() == OperandKind::Def {
1435                             if let Some(fact) = &self.facts[operand.vreg().vreg()] {
1436                                 writeln!(f, "    v{} ! {}", operand.vreg().vreg(), fact)?;
1437                             }
1438                         }
1439                     }
1440                 }
1441                 if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) {
1442                     writeln!(f, "    {user_stack_map:?}")?;
1443                 }
1444             }
1445         }
1446 
1447         writeln!(f, "}}")?;
1448         Ok(())
1449     }
1450 }
1451 
1452 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder.
1453 pub struct VRegAllocator<I> {
1454     /// VReg IR-level types.
1455     vreg_types: Vec<Type>,
1456 
1457     /// VReg aliases. When the final VCode is built we rewrite all
1458     /// uses of the keys in this table to their replacement values.
1459     ///
1460     /// We use these aliases to rename an instruction's expected
1461     /// result vregs to the returned vregs from lowering, which are
1462     /// usually freshly-allocated temps.
1463     vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
1464 
1465     /// A deferred error, to be bubbled up to the top level of the
1466     /// lowering algorithm. We take this approach because we cannot
1467     /// currently propagate a `Result` upward through ISLE code (the
1468     /// lowering rules) or some ABI code.
1469     deferred_error: Option<CodegenError>,
1470 
1471     /// Facts on VRegs, for proof-carrying code.
1472     facts: Vec<Option<Fact>>,
1473 
1474     /// The type of instruction that this allocator makes registers for.
1475     _inst: core::marker::PhantomData<I>,
1476 }
1477 
1478 impl<I: VCodeInst> VRegAllocator<I> {
1479     /// Make a new VRegAllocator.
1480     pub fn with_capacity(capacity: usize) -> Self {
1481         let capacity = first_user_vreg_index() + capacity;
1482         let mut vreg_types = Vec::with_capacity(capacity);
1483         vreg_types.resize(first_user_vreg_index(), types::INVALID);
1484         Self {
1485             vreg_types,
1486             vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()),
1487             deferred_error: None,
1488             facts: Vec::with_capacity(capacity),
1489             _inst: core::marker::PhantomData::default(),
1490         }
1491     }
1492 
1493     /// Allocate a fresh ValueRegs.
1494     pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {
1495         if self.deferred_error.is_some() {
1496             return Err(CodegenError::CodeTooLarge);
1497         }
1498         let v = self.vreg_types.len();
1499         let (regclasses, tys) = I::rc_for_type(ty)?;
1500         if v + regclasses.len() >= VReg::MAX {
1501             return Err(CodegenError::CodeTooLarge);
1502         }
1503 
1504         let regs: ValueRegs<Reg> = match regclasses {
1505             &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
1506             &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
1507             // We can extend this if/when we support 32-bit targets; e.g.,
1508             // an i128 on a 32-bit machine will need up to four machine regs
1509             // for a `Value`.
1510             _ => panic!("Value must reside in 1 or 2 registers"),
1511         };
1512         for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
1513             let vreg = reg.to_virtual_reg().unwrap();
1514             debug_assert_eq!(self.vreg_types.len(), vreg.index());
1515             self.vreg_types.push(reg_ty);
1516         }
1517 
1518         // Create empty facts for each allocated vreg.
1519         self.facts.resize(self.vreg_types.len(), None);
1520 
1521         Ok(regs)
1522     }
1523 
1524     /// Allocate a fresh ValueRegs, deferring any out-of-vregs
1525     /// errors. This is useful in places where we cannot bubble a
1526     /// `CodegenResult` upward easily, and which are known to be
1527     /// invoked from within the lowering loop that checks the deferred
1528     /// error status below.
1529     pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> {
1530         match self.alloc(ty) {
1531             Ok(x) => x,
1532             Err(e) => {
1533                 self.deferred_error = Some(e);
1534                 self.bogus_for_deferred_error(ty)
1535             }
1536         }
1537     }
1538 
1539     /// Take any deferred error that was accumulated by `alloc_with_deferred_error`.
1540     pub fn take_deferred_error(&mut self) -> Option<CodegenError> {
1541         self.deferred_error.take()
1542     }
1543 
1544     /// Produce an bogus VReg placeholder with the proper number of
1545     /// registers for the given type. This is meant to be used with
1546     /// deferred allocation errors (see `Lower::alloc_tmp()`).
1547     fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> {
1548         let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type");
1549         match regclasses {
1550             &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()),
1551             &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()),
1552             _ => panic!("Value must reside in 1 or 2 registers"),
1553         }
1554     }
1555 
1556     /// Rewrite any mention of `from` into `to`.
1557     pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
1558         let from = from.into();
1559         let resolved_to = self.resolve_vreg_alias(to.into());
1560         // Disallow cycles (see below).
1561         assert_ne!(resolved_to, from);
1562 
1563         // Maintain the invariant that PCC facts only exist on vregs
1564         // which aren't aliases. We want to preserve whatever was
1565         // stated about the vreg before its producer was lowered.
1566         if let Some(fact) = self.facts[from.vreg()].take() {
1567             self.set_fact(resolved_to, fact);
1568         }
1569 
1570         let old_alias = self.vreg_aliases.insert(from, resolved_to);
1571         debug_assert_eq!(old_alias, None);
1572     }
1573 
1574     fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg {
1575         // We prevent cycles from existing by resolving targets of
1576         // aliases eagerly before setting them. If the target resolves
1577         // to the origin of the alias, then a cycle would be created
1578         // and the alias is disallowed. Because of the structure of
1579         // SSA code (one instruction can refer to another's defs but
1580         // not vice-versa, except indirectly through
1581         // phis/blockparams), cycles should not occur as we use
1582         // aliases to redirect vregs to the temps that actually define
1583         // them.
1584         while let Some(to) = self.vreg_aliases.get(&vreg) {
1585             vreg = *to;
1586         }
1587         vreg
1588     }
1589 
1590     #[inline]
1591     fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) {
1592         debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg)));
1593     }
1594 
1595     /// Set the proof-carrying code fact on a given virtual register.
1596     ///
1597     /// Returns the old fact, if any (only one fact can be stored).
1598     fn set_fact(&mut self, vreg: regalloc2::VReg, fact: Fact) -> Option<Fact> {
1599         trace!("vreg {:?} has fact: {:?}", vreg, fact);
1600         debug_assert!(!self.vreg_aliases.contains_key(&vreg));
1601         self.facts[vreg.vreg()].replace(fact)
1602     }
1603 
1604     /// Set a fact only if one doesn't already exist.
1605     pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) {
1606         let vreg = self.resolve_vreg_alias(vreg.into());
1607         if self.facts[vreg.vreg()].is_none() {
1608             self.set_fact(vreg, fact);
1609         }
1610     }
1611 
1612     /// Allocate a fresh ValueRegs, with a given fact to apply if
1613     /// the value fits in one VReg.
1614     pub fn alloc_with_maybe_fact(
1615         &mut self,
1616         ty: Type,
1617         fact: Option<Fact>,
1618     ) -> CodegenResult<ValueRegs<Reg>> {
1619         let result = self.alloc(ty)?;
1620 
1621         // Ensure that we don't lose a fact on a value that splits
1622         // into multiple VRegs.
1623         assert!(result.len() == 1 || fact.is_none());
1624         if let Some(fact) = fact {
1625             self.set_fact(result.regs()[0].into(), fact);
1626         }
1627 
1628         Ok(result)
1629     }
1630 }
1631 
1632 /// This structure tracks the large constants used in VCode that will be emitted separately by the
1633 /// [MachBuffer].
1634 ///
1635 /// First, during the lowering phase, constants are inserted using
1636 /// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are
1637 /// used in this phase. Some deduplication is performed, when possible, as constant
1638 /// values are inserted.
1639 ///
1640 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1641 /// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1642 /// then writes the constant values to the buffer.
1643 #[derive(Default)]
1644 pub struct VCodeConstants {
1645     constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1646     pool_uses: HashMap<Constant, VCodeConstant>,
1647     well_known_uses: HashMap<*const [u8], VCodeConstant>,
1648     u64s: HashMap<[u8; 8], VCodeConstant>,
1649 }
1650 impl VCodeConstants {
1651     /// Initialize the structure with the expected number of constants.
1652     pub fn with_capacity(expected_num_constants: usize) -> Self {
1653         Self {
1654             constants: PrimaryMap::with_capacity(expected_num_constants),
1655             pool_uses: HashMap::with_capacity(expected_num_constants),
1656             well_known_uses: HashMap::new(),
1657             u64s: HashMap::new(),
1658         }
1659     }
1660 
1661     /// Insert a constant; using this method indicates that a constant value will be used and thus
1662     /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1663     /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1664     /// [VCodeConstantData::Generated].
1665     pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1666         match data {
1667             VCodeConstantData::Generated(_) => self.constants.push(data),
1668             VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1669                 None => {
1670                     let vcode_constant = self.constants.push(data);
1671                     self.pool_uses.insert(constant, vcode_constant);
1672                     vcode_constant
1673                 }
1674                 Some(&vcode_constant) => vcode_constant,
1675             },
1676             VCodeConstantData::WellKnown(data_ref) => {
1677                 match self.well_known_uses.entry(data_ref as *const [u8]) {
1678                     Entry::Vacant(v) => {
1679                         let vcode_constant = self.constants.push(data);
1680                         v.insert(vcode_constant);
1681                         vcode_constant
1682                     }
1683                     Entry::Occupied(o) => *o.get(),
1684                 }
1685             }
1686             VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1687                 Entry::Vacant(v) => {
1688                     let vcode_constant = self.constants.push(data);
1689                     v.insert(vcode_constant);
1690                     vcode_constant
1691                 }
1692                 Entry::Occupied(o) => *o.get(),
1693             },
1694         }
1695     }
1696 
1697     /// Return the number of constants inserted.
1698     pub fn len(&self) -> usize {
1699         self.constants.len()
1700     }
1701 
1702     /// Iterate over the `VCodeConstant` keys inserted in this structure.
1703     pub fn keys(&self) -> Keys<VCodeConstant> {
1704         self.constants.keys()
1705     }
1706 
1707     /// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this
1708     /// structure.
1709     pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1710         self.constants.iter()
1711     }
1712 
1713     /// Returns the data associated with the specified constant.
1714     pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData {
1715         &self.constants[c]
1716     }
1717 
1718     /// Checks if the given [VCodeConstantData] is registered as
1719     /// used by the pool.
1720     pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool {
1721         match constant {
1722             VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c),
1723             _ => false,
1724         }
1725     }
1726 }
1727 
1728 /// A use of a constant by one or more VCode instructions; see [VCodeConstants].
1729 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
1730 pub struct VCodeConstant(u32);
1731 entity_impl!(VCodeConstant);
1732 
1733 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
1734 /// these separately instead of as raw byte buffers allows us to avoid some duplication.
1735 pub enum VCodeConstantData {
1736     /// A constant already present in the Cranelift IR
1737     /// [ConstantPool](crate::ir::constant::ConstantPool).
1738     Pool(Constant, ConstantData),
1739     /// A reference to a well-known constant value that is statically encoded within the compiler.
1740     WellKnown(&'static [u8]),
1741     /// A constant value generated during lowering; the value may depend on the instruction context
1742     /// which makes it difficult to de-duplicate--if possible, use other variants.
1743     Generated(ConstantData),
1744     /// A constant of at most 64 bits. These are deduplicated as
1745     /// well. Stored as a fixed-size array of `u8` so that we do not
1746     /// encounter endianness problems when cross-compiling.
1747     U64([u8; 8]),
1748 }
1749 impl VCodeConstantData {
1750     /// Retrieve the constant data as a byte slice.
1751     pub fn as_slice(&self) -> &[u8] {
1752         match self {
1753             VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
1754             VCodeConstantData::WellKnown(d) => d,
1755             VCodeConstantData::U64(value) => &value[..],
1756         }
1757     }
1758 
1759     /// Calculate the alignment of the constant data.
1760     pub fn alignment(&self) -> u32 {
1761         if self.as_slice().len() <= 8 {
1762             8
1763         } else {
1764             16
1765         }
1766     }
1767 }
1768 
1769 #[cfg(test)]
1770 mod test {
1771     use super::*;
1772     use std::mem::size_of;
1773 
1774     #[test]
1775     fn size_of_constant_structs() {
1776         assert_eq!(size_of::<Constant>(), 4);
1777         assert_eq!(size_of::<VCodeConstant>(), 4);
1778         assert_eq!(size_of::<ConstantData>(), 3 * size_of::<usize>());
1779         assert_eq!(size_of::<VCodeConstantData>(), 4 * size_of::<usize>());
1780         assert_eq!(
1781             size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
1782             3 * size_of::<usize>()
1783         );
1784         // TODO The VCodeConstants structure's memory size could be further optimized.
1785         // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
1786         // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
1787     }
1788 }
1789