1 //! This implements the VCode container: a CFG of Insts that have been lowered.
2 //!
3 //! VCode is virtual-register code. An instruction in VCode is almost a machine
4 //! instruction; however, its register slots can refer to virtual registers in
5 //! addition to real machine registers.
6 //!
7 //! VCode is structured with traditional basic blocks, and
8 //! each block must be terminated by an unconditional branch (one target), a
9 //! conditional branch (two targets), or a return (no targets). Note that this
10 //! slightly differs from the machine code of most ISAs: in most ISAs, a
11 //! conditional branch has one target (and the not-taken case falls through).
12 //! However, we expect that machine backends will elide branches to the following
13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15 //! play with layout prior to final binary emission, as well, if we want.
16 //!
17 //! See the main module comment in `mod.rs` for more details on the VCode-based
18 //! backend pipeline.
19 
20 use crate::fx::FxHashMap;
21 use crate::fx::FxHashSet;
22 use crate::ir::pcc::*;
23 use crate::ir::{self, types, Constant, ConstantData, ValueLabel};
24 use crate::machinst::*;
25 use crate::timing;
26 use crate::trace;
27 use crate::CodegenError;
28 use crate::{LabelValueLoc, ValueLocRange};
29 use regalloc2::{
30     Edit, Function as RegallocFunction, InstOrEdit, InstRange, MachineEnv, Operand, OperandKind,
31     PRegSet, RegClass,
32 };
33 
34 use cranelift_entity::{entity_impl, Keys};
35 use std::collections::hash_map::Entry;
36 use std::collections::HashMap;
37 use std::fmt;
38 
39 /// Index referring to an instruction in VCode.
40 pub type InsnIndex = regalloc2::Inst;
41 
42 /// Index referring to a basic block in VCode.
43 pub type BlockIndex = regalloc2::Block;
44 
45 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
46 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
47 pub trait VCodeInst: MachInst + MachInstEmit {}
48 impl<I: MachInst + MachInstEmit> VCodeInst for I {}
49 
50 /// A function in "VCode" (virtualized-register code) form, after
51 /// lowering.  This is essentially a standard CFG of basic blocks,
52 /// where each basic block consists of lowered instructions produced
53 /// by the machine-specific backend.
54 ///
55 /// Note that the VCode is immutable once produced, and is not
56 /// modified by register allocation in particular. Rather, register
57 /// allocation on the `VCode` produces a separate `regalloc2::Output`
58 /// struct, and this can be passed to `emit`. `emit` in turn does not
59 /// modify the vcode, but produces an `EmitResult`, which contains the
60 /// machine code itself, and the associated disassembly and/or
61 /// metadata as requested.
62 pub struct VCode<I: VCodeInst> {
63     /// VReg IR-level types.
64     vreg_types: Vec<Type>,
65 
66     /// Lowered machine instructions in order corresponding to the original IR.
67     insts: Vec<I>,
68 
69     /// Operands: pre-regalloc references to virtual registers with
70     /// constraints, in one flattened array. This allows the regalloc
71     /// to efficiently access all operands without requiring expensive
72     /// matches or method invocations on insts.
73     operands: Vec<Operand>,
74 
75     /// Operand index ranges: for each instruction in `insts`, there
76     /// is a tuple here providing the range in `operands` for that
77     /// instruction's operands.
78     operand_ranges: Vec<(u32, u32)>,
79 
80     /// Clobbers: a sparse map from instruction indices to clobber masks.
81     clobbers: FxHashMap<InsnIndex, PRegSet>,
82 
83     /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
84     /// reasonable to keep one of these per instruction.)
85     srclocs: Vec<RelSourceLoc>,
86 
87     /// Entry block.
88     entry: BlockIndex,
89 
90     /// Block instruction indices.
91     block_ranges: Vec<(InsnIndex, InsnIndex)>,
92 
93     /// Block successors: index range in the `block_succs_preds` list.
94     block_succ_range: Vec<(u32, u32)>,
95 
96     /// Block predecessors: index range in the `block_succs_preds` list.
97     block_pred_range: Vec<(u32, u32)>,
98 
99     /// Block successor and predecessor lists, concatenated into one
100     /// Vec. The `block_succ_range` and `block_pred_range` lists of
101     /// tuples above give (start, end) ranges within this list that
102     /// correspond to each basic block's successors or predecessors,
103     /// respectively.
104     block_succs_preds: Vec<regalloc2::Block>,
105 
106     /// Block parameters: index range in `block_params` below.
107     block_params_range: Vec<(u32, u32)>,
108 
109     /// Block parameter lists, concatenated into one vec. The
110     /// `block_params_range` list of tuples above gives (start, end)
111     /// ranges within this list that correspond to each basic block's
112     /// blockparam vregs.
113     block_params: Vec<regalloc2::VReg>,
114 
115     /// Outgoing block arguments on branch instructions, concatenated
116     /// into one list.
117     ///
118     /// Note that this is conceptually a 3D array: we have a VReg list
119     /// per block, per successor. We flatten those three dimensions
120     /// into this 1D vec, then store index ranges in two levels of
121     /// indirection.
122     ///
123     /// Indexed by the indices in `branch_block_arg_succ_range`.
124     branch_block_args: Vec<regalloc2::VReg>,
125 
126     /// Array of sequences of (start, end) tuples in
127     /// `branch_block_args`, one for each successor; these sequences
128     /// for each block are concatenated.
129     ///
130     /// Indexed by the indices in `branch_block_arg_succ_range`.
131     branch_block_arg_range: Vec<(u32, u32)>,
132 
133     /// For a given block, indices in `branch_block_arg_range`
134     /// corresponding to all of its successors.
135     branch_block_arg_succ_range: Vec<(u32, u32)>,
136 
137     /// VReg aliases. Each key in this table is translated to its
138     /// value when gathering Operands from instructions. Aliases are
139     /// not chased transitively (we do not further look up the
140     /// translated reg to see if it is another alias).
141     ///
142     /// We use these aliases to rename an instruction's expected
143     /// result vregs to the returned vregs from lowering, which are
144     /// usually freshly-allocated temps.
145     ///
146     /// Operands and branch arguments will already have been
147     /// translated through this alias table; but it helps to make
148     /// sense of instructions when pretty-printed, for example.
149     vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
150 
151     /// Block-order information.
152     block_order: BlockLoweringOrder,
153 
154     /// ABI object.
155     pub(crate) abi: Callee<I::ABIMachineSpec>,
156 
157     /// Constant information used during code emission. This should be
158     /// immutable across function compilations within the same module.
159     emit_info: I::Info,
160 
161     /// Reference-typed `regalloc2::VReg`s. The regalloc requires
162     /// these in a dense slice (as opposed to querying the
163     /// reftype-status of each vreg) for efficient iteration.
164     reftyped_vregs: Vec<VReg>,
165 
166     /// Constants.
167     pub(crate) constants: VCodeConstants,
168 
169     /// Value labels for debuginfo attached to vregs.
170     debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
171 
172     pub(crate) sigs: SigSet,
173 
174     /// Facts on VRegs, for proof-carrying code verification.
175     facts: Vec<Option<Fact>>,
176 }
177 
178 /// The result of `VCode::emit`. Contains all information computed
179 /// during emission: actual machine code, optionally a disassembly,
180 /// and optionally metadata about the code layout.
181 pub struct EmitResult {
182     /// The MachBuffer containing the machine code.
183     pub buffer: MachBufferFinalized<Stencil>,
184 
185     /// Offset of each basic block, recorded during emission. Computed
186     /// only if `debug_value_labels` is non-empty.
187     pub bb_offsets: Vec<CodeOffset>,
188 
189     /// Final basic-block edges, in terms of code offsets of
190     /// bb-starts. Computed only if `debug_value_labels` is non-empty.
191     pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
192 
193     /// Final length of function body.
194     pub func_body_len: CodeOffset,
195 
196     /// The pretty-printed disassembly, if any. This uses the same
197     /// pretty-printing for MachInsts as the pre-regalloc VCode Debug
198     /// implementation, but additionally includes the prologue and
199     /// epilogue(s), and makes use of the regalloc results.
200     pub disasm: Option<String>,
201 
202     /// Offsets of sized stackslots.
203     pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>,
204 
205     /// Offsets of dynamic stackslots.
206     pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>,
207 
208     /// Value-labels information (debug metadata).
209     pub value_labels_ranges: ValueLabelsRanges,
210 
211     /// Stack frame size.
212     pub frame_size: u32,
213 }
214 
215 /// A builder for a VCode function body.
216 ///
217 /// This builder has the ability to accept instructions in either
218 /// forward or reverse order, depending on the pass direction that
219 /// produces the VCode. The lowering from CLIF to VCode<MachInst>
220 /// ordinarily occurs in reverse order (in order to allow instructions
221 /// to be lowered only if used, and not merged) so a reversal will
222 /// occur at the end of lowering to ensure the VCode is in machine
223 /// order.
224 ///
225 /// If built in reverse, block and instruction indices used once the
226 /// VCode is built are relative to the final (reversed) order, not the
227 /// order of construction. Note that this means we do not know the
228 /// final block or instruction indices when building, so we do not
229 /// hand them out. (The user is assumed to know them when appending
230 /// terminator instructions with successor blocks.)
231 pub struct VCodeBuilder<I: VCodeInst> {
232     /// In-progress VCode.
233     pub(crate) vcode: VCode<I>,
234 
235     /// In what direction is the build occuring?
236     direction: VCodeBuildDirection,
237 
238     /// Index of the last block-start in the vcode.
239     block_start: usize,
240 
241     /// Start of succs for the current block in the concatenated succs list.
242     succ_start: usize,
243 
244     /// Start of blockparams for the current block in the concatenated
245     /// blockparams list.
246     block_params_start: usize,
247 
248     /// Start of successor blockparam arg list entries in
249     /// the concatenated branch_block_arg_range list.
250     branch_block_arg_succ_start: usize,
251 
252     /// Current source location.
253     cur_srcloc: RelSourceLoc,
254 
255     /// Debug-value label in-progress map, keyed by label. For each
256     /// label, we keep disjoint ranges mapping to vregs. We'll flatten
257     /// this into (vreg, range, label) tuples when done.
258     debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
259 }
260 
261 /// Direction in which a VCodeBuilder builds VCode.
262 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
263 pub enum VCodeBuildDirection {
264     // TODO: add `Forward` once we need it and can test it adequately.
265     /// Backward-build pass: we expect the producer to call `emit()`
266     /// with instructions in reverse program order within each block.
267     Backward,
268 }
269 
270 impl<I: VCodeInst> VCodeBuilder<I> {
271     /// Create a new VCodeBuilder.
272     pub fn new(
273         sigs: SigSet,
274         abi: Callee<I::ABIMachineSpec>,
275         emit_info: I::Info,
276         block_order: BlockLoweringOrder,
277         constants: VCodeConstants,
278         direction: VCodeBuildDirection,
279     ) -> VCodeBuilder<I> {
280         let vcode = VCode::new(sigs, abi, emit_info, block_order, constants);
281 
282         VCodeBuilder {
283             vcode,
284             direction,
285             block_start: 0,
286             succ_start: 0,
287             block_params_start: 0,
288             branch_block_arg_succ_start: 0,
289             cur_srcloc: Default::default(),
290             debug_info: FxHashMap::default(),
291         }
292     }
293 
294     pub fn init_abi(&mut self, temps: Vec<Writable<Reg>>) {
295         self.vcode.abi.init(&self.vcode.sigs, temps);
296     }
297 
298     /// Access the ABI object.
299     pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {
300         &self.vcode.abi
301     }
302 
303     /// Access the ABI object.
304     pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {
305         &mut self.vcode.abi
306     }
307 
308     pub fn sigs(&self) -> &SigSet {
309         &self.vcode.sigs
310     }
311 
312     pub fn sigs_mut(&mut self) -> &mut SigSet {
313         &mut self.vcode.sigs
314     }
315 
316     /// Access to the BlockLoweringOrder object.
317     pub fn block_order(&self) -> &BlockLoweringOrder {
318         &self.vcode.block_order
319     }
320 
321     /// Set the current block as the entry block.
322     pub fn set_entry(&mut self, block: BlockIndex) {
323         self.vcode.entry = block;
324     }
325 
326     /// End the current basic block. Must be called after emitting vcode insts
327     /// for IR insts and prior to ending the function (building the VCode).
328     pub fn end_bb(&mut self) {
329         let start_idx = self.block_start;
330         let end_idx = self.vcode.insts.len();
331         self.block_start = end_idx;
332         // Add the instruction index range to the list of blocks.
333         self.vcode
334             .block_ranges
335             .push((InsnIndex::new(start_idx), InsnIndex::new(end_idx)));
336         // End the successors list.
337         let succ_end = self.vcode.block_succs_preds.len();
338         self.vcode
339             .block_succ_range
340             .push((self.succ_start as u32, succ_end as u32));
341         self.succ_start = succ_end;
342         // End the blockparams list.
343         let block_params_end = self.vcode.block_params.len();
344         self.vcode
345             .block_params_range
346             .push((self.block_params_start as u32, block_params_end as u32));
347         self.block_params_start = block_params_end;
348         // End the branch blockparam args list.
349         let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
350         self.vcode.branch_block_arg_succ_range.push((
351             self.branch_block_arg_succ_start as u32,
352             branch_block_arg_succ_end as u32,
353         ));
354         self.branch_block_arg_succ_start = branch_block_arg_succ_end;
355     }
356 
357     pub fn add_block_param(&mut self, param: VirtualReg) {
358         self.vcode.block_params.push(param.into());
359     }
360 
361     fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
362         let start = self.vcode.branch_block_args.len();
363         self.vcode
364             .branch_block_args
365             .extend(args.iter().map(|&arg| VReg::from(arg)));
366         let end = self.vcode.branch_block_args.len();
367         self.vcode
368             .branch_block_arg_range
369             .push((start as u32, end as u32));
370     }
371 
372     /// Push an instruction for the current BB and current IR inst
373     /// within the BB.
374     pub fn push(&mut self, insn: I) {
375         self.vcode.insts.push(insn);
376         self.vcode.srclocs.push(self.cur_srcloc);
377     }
378 
379     /// Add a successor block with branch args.
380     pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
381         self.vcode.block_succs_preds.push(block);
382         self.add_branch_args_for_succ(args);
383     }
384 
385     /// Set the current source location.
386     pub fn set_srcloc(&mut self, srcloc: RelSourceLoc) {
387         self.cur_srcloc = srcloc;
388     }
389 
390     /// Add a debug value label to a register.
391     pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
392         // We'll fix up labels in reverse(). Because we're generating
393         // code bottom-to-top, the liverange of the label goes *from*
394         // the last index at which was defined (or 0, which is the end
395         // of the eventual function) *to* just this instruction, and
396         // no further.
397         let inst = InsnIndex::new(self.vcode.insts.len());
398         let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
399         let last = labels
400             .last()
401             .map(|(_start, end, _vreg)| *end)
402             .unwrap_or(InsnIndex::new(0));
403         labels.push((last, inst, reg.into()));
404     }
405 
406     pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
407         let from = from.into();
408         let resolved_to = self.vcode.resolve_vreg_alias(to.into());
409         // Disallow cycles (see below).
410         assert_ne!(resolved_to, from);
411         self.vcode.vreg_aliases.insert(from, resolved_to);
412     }
413 
414     /// Access the constants.
415     pub fn constants(&mut self) -> &mut VCodeConstants {
416         &mut self.vcode.constants
417     }
418 
419     fn compute_preds_from_succs(&mut self) {
420         // Compute predecessors from successors. In order to gather
421         // all preds for a block into a contiguous sequence, we build
422         // a list of (succ, pred) tuples and then sort.
423         let mut succ_pred_edges: Vec<(BlockIndex, BlockIndex)> =
424             Vec::with_capacity(self.vcode.block_succs_preds.len());
425         for (pred, &(start, end)) in self.vcode.block_succ_range.iter().enumerate() {
426             let pred = BlockIndex::new(pred);
427             for i in start..end {
428                 let succ = BlockIndex::new(self.vcode.block_succs_preds[i as usize].index());
429                 succ_pred_edges.push((succ, pred));
430             }
431         }
432         succ_pred_edges.sort_unstable();
433 
434         let mut i = 0;
435         for succ in 0..self.vcode.num_blocks() {
436             let succ = BlockIndex::new(succ);
437             let start = self.vcode.block_succs_preds.len();
438             while i < succ_pred_edges.len() && succ_pred_edges[i].0 == succ {
439                 let pred = succ_pred_edges[i].1;
440                 self.vcode.block_succs_preds.push(pred);
441                 i += 1;
442             }
443             let end = self.vcode.block_succs_preds.len();
444             self.vcode.block_pred_range.push((start as u32, end as u32));
445         }
446     }
447 
448     /// Called once, when a build in Backward order is complete, to
449     /// perform the overall reversal (into final forward order) and
450     /// finalize metadata accordingly.
451     fn reverse_and_finalize(&mut self) {
452         let n_insts = self.vcode.insts.len();
453         if n_insts == 0 {
454             return;
455         }
456 
457         // Reverse the per-block and per-inst sequences.
458         self.vcode.block_ranges.reverse();
459         // block_params_range is indexed by block (and blocks were
460         // traversed in reverse) so we reverse it; but block-param
461         // sequences in the concatenated vec can remain in reverse
462         // order (it is effectively an arena of arbitrarily-placed
463         // referenced sequences).
464         self.vcode.block_params_range.reverse();
465         // Likewise, we reverse block_succ_range, but the block_succ
466         // concatenated array can remain as-is.
467         self.vcode.block_succ_range.reverse();
468         self.vcode.insts.reverse();
469         self.vcode.srclocs.reverse();
470         // Likewise, branch_block_arg_succ_range is indexed by block
471         // so must be reversed.
472         self.vcode.branch_block_arg_succ_range.reverse();
473 
474         // To translate an instruction index *endpoint* in reversed
475         // order to forward order, compute `n_insts - i`.
476         //
477         // Why not `n_insts - 1 - i`? That would be correct to
478         // translate an individual instruction index (for ten insts 0
479         // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
480         // 0). But for the usual inclusive-start, exclusive-end range
481         // idiom, inclusive starts become exclusive ends and
482         // vice-versa, so e.g. an (inclusive) start of 0 becomes an
483         // (exclusive) end of 10.
484         let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
485 
486         // Edit the block-range instruction indices.
487         for tuple in &mut self.vcode.block_ranges {
488             let (start, end) = *tuple;
489             *tuple = (translate(end), translate(start)); // Note reversed order.
490         }
491 
492         // Generate debug-value labels based on per-label maps.
493         for (label, tuples) in &self.debug_info {
494             for &(start, end, vreg) in tuples {
495                 let vreg = self.vcode.resolve_vreg_alias(vreg);
496                 let fwd_start = translate(end);
497                 let fwd_end = translate(start);
498                 self.vcode
499                     .debug_value_labels
500                     .push((vreg, fwd_start, fwd_end, label.as_u32()));
501             }
502         }
503 
504         // Now sort debug value labels by VReg, as required
505         // by regalloc2.
506         self.vcode
507             .debug_value_labels
508             .sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
509     }
510 
511     fn collect_operands(&mut self) {
512         for (i, insn) in self.vcode.insts.iter().enumerate() {
513             // Push operands from the instruction onto the operand list.
514             //
515             // We rename through the vreg alias table as we collect
516             // the operands. This is better than a separate post-pass
517             // over operands, because it has more cache locality:
518             // operands only need to pass through L1 once. This is
519             // also better than renaming instructions'
520             // operands/registers while lowering, because here we only
521             // need to do the `match` over the instruction to visit
522             // its register fields (which is slow, branchy code) once.
523 
524             let vreg_aliases = &self.vcode.vreg_aliases;
525             let allocatable = PRegSet::from(self.vcode.machine_env());
526             let mut op_collector =
527                 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {
528                     VCode::<I>::resolve_vreg_alias_impl(vreg_aliases, vreg)
529                 });
530             insn.get_operands(&mut op_collector);
531             let (ops, clobbers) = op_collector.finish();
532             self.vcode.operand_ranges.push(ops);
533 
534             if clobbers != PRegSet::default() {
535                 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
536             }
537 
538             if let Some((dst, src)) = insn.is_move() {
539                 // We should never see non-virtual registers present in move
540                 // instructions.
541                 assert!(
542                     src.is_virtual(),
543                     "the real register {:?} was used as the source of a move instruction",
544                     src
545                 );
546                 assert!(
547                     dst.to_reg().is_virtual(),
548                     "the real register {:?} was used as the destination of a move instruction",
549                     dst.to_reg()
550                 );
551             }
552         }
553 
554         // Translate blockparam args via the vreg aliases table as well.
555         for arg in &mut self.vcode.branch_block_args {
556             let new_arg = VCode::<I>::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *arg);
557             trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
558             *arg = new_arg;
559         }
560     }
561 
562     /// Build the final VCode.
563     pub fn build(mut self, vregs: VRegAllocator<I>) -> VCode<I> {
564         self.vcode.vreg_types = vregs.vreg_types;
565         self.vcode.facts = vregs.facts;
566         self.vcode.reftyped_vregs = vregs.reftyped_vregs;
567 
568         if self.direction == VCodeBuildDirection::Backward {
569             self.reverse_and_finalize();
570         }
571         self.collect_operands();
572 
573         // Apply register aliases to the `reftyped_vregs` list since this list
574         // will be returned directly to `regalloc2` eventually and all
575         // operands/results of instructions will use the alias-resolved vregs
576         // from `regalloc2`'s perspective.
577         //
578         // Also note that `reftyped_vregs` can't have duplicates, so after the
579         // aliases are applied duplicates are removed.
580         for reg in self.vcode.reftyped_vregs.iter_mut() {
581             *reg = VCode::<I>::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *reg);
582         }
583         self.vcode.reftyped_vregs.sort();
584         self.vcode.reftyped_vregs.dedup();
585 
586         self.compute_preds_from_succs();
587         self.vcode.debug_value_labels.sort_unstable();
588         self.vcode
589     }
590 }
591 
592 /// Is this type a reference type?
593 fn is_reftype(ty: Type) -> bool {
594     ty == types::R64 || ty == types::R32
595 }
596 
597 const NO_INST_OFFSET: CodeOffset = u32::MAX;
598 
599 impl<I: VCodeInst> VCode<I> {
600     /// New empty VCode.
601     fn new(
602         sigs: SigSet,
603         abi: Callee<I::ABIMachineSpec>,
604         emit_info: I::Info,
605         block_order: BlockLoweringOrder,
606         constants: VCodeConstants,
607     ) -> VCode<I> {
608         let n_blocks = block_order.lowered_order().len();
609         VCode {
610             sigs,
611             vreg_types: vec![],
612             insts: Vec::with_capacity(10 * n_blocks),
613             operands: Vec::with_capacity(30 * n_blocks),
614             operand_ranges: Vec::with_capacity(10 * n_blocks),
615             clobbers: FxHashMap::default(),
616             srclocs: Vec::with_capacity(10 * n_blocks),
617             entry: BlockIndex::new(0),
618             block_ranges: Vec::with_capacity(n_blocks),
619             block_succ_range: Vec::with_capacity(n_blocks),
620             block_succs_preds: Vec::with_capacity(2 * n_blocks),
621             block_pred_range: Vec::with_capacity(n_blocks),
622             block_params_range: Vec::with_capacity(n_blocks),
623             block_params: Vec::with_capacity(5 * n_blocks),
624             branch_block_args: Vec::with_capacity(10 * n_blocks),
625             branch_block_arg_range: Vec::with_capacity(2 * n_blocks),
626             branch_block_arg_succ_range: Vec::with_capacity(n_blocks),
627             block_order,
628             abi,
629             emit_info,
630             reftyped_vregs: vec![],
631             constants,
632             debug_value_labels: vec![],
633             vreg_aliases: FxHashMap::with_capacity_and_hasher(10 * n_blocks, Default::default()),
634             facts: vec![],
635         }
636     }
637 
638     /// Get the ABI-dependent MachineEnv for managing register allocation.
639     pub fn machine_env(&self) -> &MachineEnv {
640         self.abi.machine_env(&self.sigs)
641     }
642 
643     /// Get the number of blocks. Block indices will be in the range `0 ..
644     /// (self.num_blocks() - 1)`.
645     pub fn num_blocks(&self) -> usize {
646         self.block_ranges.len()
647     }
648 
649     /// The number of lowered instructions.
650     pub fn num_insts(&self) -> usize {
651         self.insts.len()
652     }
653 
654     /// Get the successors for a block.
655     pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] {
656         let (start, end) = self.block_succ_range[block.index()];
657         &self.block_succs_preds[start as usize..end as usize]
658     }
659 
660     fn compute_clobbers(&self, regalloc: &regalloc2::Output) -> Vec<Writable<RealReg>> {
661         // Compute clobbered registers.
662         let mut clobbered = vec![];
663         let mut clobbered_set = FxHashSet::default();
664 
665         // All moves are included in clobbers.
666         for edit in &regalloc.edits {
667             let Edit::Move { to, .. } = edit.1;
668             if let Some(preg) = to.as_reg() {
669                 let reg = RealReg::from(preg);
670                 if clobbered_set.insert(reg) {
671                     clobbered.push(Writable::from_reg(reg));
672                 }
673             }
674         }
675 
676         for (i, (start, end)) in self.operand_ranges.iter().enumerate() {
677             // Skip this instruction if not "included in clobbers" as
678             // per the MachInst. (Some backends use this to implement
679             // ABI specifics; e.g., excluding calls of the same ABI as
680             // the current function from clobbers, because by
681             // definition everything clobbered by the call can be
682             // clobbered by this function without saving as well.)
683             if !self.insts[i].is_included_in_clobbers() {
684                 continue;
685             }
686 
687             let start = *start as usize;
688             let end = *end as usize;
689             let operands = &self.operands[start..end];
690             let allocs = &regalloc.allocs[start..end];
691             for (operand, alloc) in operands.iter().zip(allocs.iter()) {
692                 // We're interested only in writes (Mods or Defs).
693                 if operand.kind() == OperandKind::Use {
694                     continue;
695                 }
696                 if let Some(preg) = alloc.as_reg() {
697                     let reg = RealReg::from(preg);
698                     if clobbered_set.insert(reg) {
699                         clobbered.push(Writable::from_reg(reg));
700                     }
701                 }
702             }
703 
704             // Also add explicitly-clobbered registers.
705             for preg in self
706                 .clobbers
707                 .get(&InsnIndex::new(i))
708                 .cloned()
709                 .unwrap_or_default()
710             {
711                 let reg = RealReg::from(preg);
712                 if clobbered_set.insert(reg) {
713                     clobbered.push(Writable::from_reg(reg));
714                 }
715             }
716         }
717 
718         clobbered
719     }
720 
721     /// Emit the instructions to a `MachBuffer`, containing fixed-up
722     /// code and external reloc/trap/etc. records ready for use. Takes
723     /// the regalloc results as well.
724     ///
725     /// Returns the machine code itself, and optionally metadata
726     /// and/or a disassembly, as an `EmitResult`. The `VCode` itself
727     /// is consumed by the emission process.
728     pub fn emit(
729         mut self,
730         regalloc: &regalloc2::Output,
731         want_disasm: bool,
732         flags: &settings::Flags,
733         ctrl_plane: &mut ControlPlane,
734     ) -> EmitResult
735     where
736         I: VCodeInst,
737     {
738         // To write into disasm string.
739         use core::fmt::Write;
740 
741         let _tt = timing::vcode_emit();
742         let mut buffer = MachBuffer::new();
743         let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
744 
745         // The first M MachLabels are reserved for block indices.
746         buffer.reserve_labels_for_blocks(self.num_blocks());
747 
748         // Register all allocated constants with the `MachBuffer` to ensure that
749         // any references to the constants during instructions can be handled
750         // correctly.
751         buffer.register_constants(&self.constants);
752 
753         // Construct the final order we emit code in: cold blocks at the end.
754         let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
755         let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
756         for block in 0..self.num_blocks() {
757             let block = BlockIndex::new(block);
758             if self.block_order.is_cold(block) {
759                 cold_blocks.push(block);
760             } else {
761                 final_order.push(block);
762             }
763         }
764         final_order.extend(cold_blocks.clone());
765 
766         // Compute/save info we need for the prologue: clobbers and
767         // number of spillslots.
768         //
769         // We clone `abi` here because we will mutate it as we
770         // generate the prologue and set other info, but we can't
771         // mutate `VCode`. The info it usually carries prior to
772         // setting clobbers is fairly minimal so this should be
773         // relatively cheap.
774         let clobbers = self.compute_clobbers(regalloc);
775         self.abi.set_num_spillslots(regalloc.num_spillslots);
776         self.abi.set_clobbered(clobbers);
777         self.abi.compute_frame_layout(&self.sigs);
778 
779         // Emit blocks.
780         let mut cur_srcloc = None;
781         let mut last_offset = None;
782         let mut inst_offsets = vec![];
783         let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane));
784 
785         let mut disasm = String::new();
786 
787         if !self.debug_value_labels.is_empty() {
788             inst_offsets.resize(self.insts.len(), NO_INST_OFFSET);
789         }
790 
791         // Count edits per block ahead of time; this is needed for
792         // lookahead island emission. (We could derive it per-block
793         // with binary search in the edit list, but it's more
794         // efficient to do it in one pass here.)
795         let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];
796         let mut edit_idx = 0;
797         for block in 0..self.num_blocks() {
798             let end_inst = self.block_ranges[block].1;
799             let start_edit_idx = edit_idx;
800             while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {
801                 edit_idx += 1;
802             }
803             let end_edit_idx = edit_idx;
804             ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);
805         }
806 
807         let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();
808         let mut bb_padding = match flags.bb_padding_log2_minus_one() {
809             0 => Vec::new(),
810             n => vec![0; 1 << (n - 1)],
811         };
812         let mut total_bb_padding = 0;
813 
814         for (block_order_idx, &block) in final_order.iter().enumerate() {
815             trace!("emitting block {:?}", block);
816 
817             // Call the new block hook for state
818             state.on_new_block();
819 
820             // Emit NOPs to align the block.
821             let new_offset = I::align_basic_block(buffer.cur_offset());
822             while new_offset > buffer.cur_offset() {
823                 // Pad with NOPs up to the aligned block offset.
824                 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
825                 nop.emit(&[], &mut buffer, &self.emit_info, &mut Default::default());
826             }
827             assert_eq!(buffer.cur_offset(), new_offset);
828 
829             let do_emit = |inst: &I,
830                            allocs: &[Allocation],
831                            disasm: &mut String,
832                            buffer: &mut MachBuffer<I>,
833                            state: &mut I::State| {
834                 if want_disasm && !inst.is_args() {
835                     let mut s = state.clone();
836                     writeln!(disasm, "  {}", inst.pretty_print_inst(allocs, &mut s)).unwrap();
837                 }
838                 inst.emit(allocs, buffer, &self.emit_info, state);
839             };
840 
841             // Is this the first block? Emit the prologue directly if so.
842             if block == self.entry {
843                 trace!(" -> entry block");
844                 buffer.start_srcloc(Default::default());
845                 state.pre_sourceloc(Default::default());
846                 for inst in &self.abi.gen_prologue() {
847                     do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state);
848                 }
849                 buffer.end_srcloc();
850             }
851 
852             // Now emit the regular block body.
853 
854             buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut());
855 
856             if want_disasm {
857                 writeln!(&mut disasm, "block{}:", block.index()).unwrap();
858             }
859 
860             if flags.machine_code_cfg_info() {
861                 // Track BB starts. If we have backed up due to MachBuffer
862                 // branch opts, note that the removed blocks were removed.
863                 let cur_offset = buffer.cur_offset();
864                 if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
865                     for i in (0..bb_starts.len()).rev() {
866                         if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
867                             break;
868                         }
869                         bb_starts[i] = None;
870                     }
871                 }
872                 bb_starts.push(Some(cur_offset));
873                 last_offset = Some(cur_offset);
874             }
875 
876             if let Some(block_start) = I::gen_block_start(
877                 self.block_order.is_indirect_branch_target(block),
878                 is_forward_edge_cfi_enabled,
879             ) {
880                 do_emit(&block_start, &[], &mut disasm, &mut buffer, &mut state);
881             }
882 
883             for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
884                 match inst_or_edit {
885                     InstOrEdit::Inst(iix) => {
886                         if !self.debug_value_labels.is_empty() {
887                             // If we need to produce debug info,
888                             // record the offset of each instruction
889                             // so that we can translate value-label
890                             // ranges to machine-code offsets.
891 
892                             // Cold blocks violate monotonicity
893                             // assumptions elsewhere (that
894                             // instructions in inst-index order are in
895                             // order in machine code), so we omit
896                             // their offsets here. Value-label range
897                             // generation below will skip empty ranges
898                             // and ranges with to-offsets of zero.
899                             if !self.block_order.is_cold(block) {
900                                 inst_offsets[iix.index()] = buffer.cur_offset();
901                             }
902                         }
903 
904                         // Update the srcloc at this point in the buffer.
905                         let srcloc = self.srclocs[iix.index()];
906                         if cur_srcloc != Some(srcloc) {
907                             if cur_srcloc.is_some() {
908                                 buffer.end_srcloc();
909                             }
910                             buffer.start_srcloc(srcloc);
911                             cur_srcloc = Some(srcloc);
912                         }
913                         state.pre_sourceloc(cur_srcloc.unwrap_or_default());
914 
915                         // If this is a safepoint, compute a stack map
916                         // and pass it to the emit state.
917                         if self.insts[iix.index()].is_safepoint() {
918                             let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![];
919                             // Find the contiguous range of
920                             // (progpoint, allocation) safepoint slot
921                             // records in `regalloc.safepoint_slots`
922                             // for this instruction index.
923                             let safepoint_slots_start = regalloc
924                                 .safepoint_slots
925                                 .binary_search_by(|(progpoint, _alloc)| {
926                                     if progpoint.inst() >= iix {
927                                         std::cmp::Ordering::Greater
928                                     } else {
929                                         std::cmp::Ordering::Less
930                                     }
931                                 })
932                                 .unwrap_err();
933 
934                             for (_, alloc) in regalloc.safepoint_slots[safepoint_slots_start..]
935                                 .iter()
936                                 .take_while(|(progpoint, _)| progpoint.inst() == iix)
937                             {
938                                 let slot = alloc.as_stack().unwrap();
939                                 safepoint_slots.push(slot);
940                             }
941                             if !safepoint_slots.is_empty() {
942                                 let stack_map = self
943                                     .abi
944                                     .spillslots_to_stack_map(&safepoint_slots[..], &state);
945                                 state.pre_safepoint(stack_map);
946                             }
947                         }
948 
949                         // Get the allocations for this inst from the regalloc result.
950                         let allocs = regalloc.inst_allocs(iix);
951 
952                         // If the instruction we are about to emit is
953                         // a return, place an epilogue at this point
954                         // (and don't emit the return; the actual
955                         // epilogue will contain it).
956                         if self.insts[iix.index()].is_term() == MachTerminator::Ret {
957                             for inst in self.abi.gen_epilogue() {
958                                 do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state);
959                             }
960                         } else {
961                             // Emit the instruction!
962                             do_emit(
963                                 &self.insts[iix.index()],
964                                 allocs,
965                                 &mut disasm,
966                                 &mut buffer,
967                                 &mut state,
968                             );
969                         }
970                     }
971 
972                     InstOrEdit::Edit(Edit::Move { from, to }) => {
973                         // Create a move/spill/reload instruction and
974                         // immediately emit it.
975                         match (from.as_reg(), to.as_reg()) {
976                             (Some(from), Some(to)) => {
977                                 // Reg-to-reg move.
978                                 let from_rreg = Reg::from(from);
979                                 let to_rreg = Writable::from_reg(Reg::from(to));
980                                 debug_assert_eq!(from.class(), to.class());
981                                 let ty = I::canonical_type_for_rc(from.class());
982                                 let mv = I::gen_move(to_rreg, from_rreg, ty);
983                                 do_emit(&mv, &[], &mut disasm, &mut buffer, &mut state);
984                             }
985                             (Some(from), None) => {
986                                 // Spill from register to spillslot.
987                                 let to = to.as_stack().unwrap();
988                                 let from_rreg = RealReg::from(from);
989                                 let spill = self.abi.gen_spill(to, from_rreg);
990                                 do_emit(&spill, &[], &mut disasm, &mut buffer, &mut state);
991                             }
992                             (None, Some(to)) => {
993                                 // Load from spillslot to register.
994                                 let from = from.as_stack().unwrap();
995                                 let to_rreg = Writable::from_reg(RealReg::from(to));
996                                 let reload = self.abi.gen_reload(to_rreg, from);
997                                 do_emit(&reload, &[], &mut disasm, &mut buffer, &mut state);
998                             }
999                             (None, None) => {
1000                                 panic!("regalloc2 should have eliminated stack-to-stack moves!");
1001                             }
1002                         }
1003                     }
1004                 }
1005             }
1006 
1007             if cur_srcloc.is_some() {
1008                 buffer.end_srcloc();
1009                 cur_srcloc = None;
1010             }
1011 
1012             // Do we need an island? Get the worst-case size of the next BB, add
1013             // it to the optional padding behind the block, and pass this to the
1014             // `MachBuffer` to determine if an island is necessary.
1015             let worst_case_next_bb = if block_order_idx < final_order.len() - 1 {
1016                 let next_block = final_order[block_order_idx + 1];
1017                 let next_block_range = self.block_ranges[next_block.index()];
1018                 let next_block_size =
1019                     (next_block_range.1.index() - next_block_range.0.index()) as u32;
1020                 let next_block_ra_insertions = ra_edits_per_block[next_block.index()];
1021                 I::worst_case_size() * (next_block_size + next_block_ra_insertions)
1022             } else {
1023                 0
1024             };
1025             let padding = if bb_padding.is_empty() {
1026                 0
1027             } else {
1028                 bb_padding.len() as u32 + I::LabelUse::ALIGN - 1
1029             };
1030             if buffer.island_needed(padding + worst_case_next_bb) {
1031                 buffer.emit_island(padding + worst_case_next_bb, ctrl_plane);
1032             }
1033 
1034             // Insert padding, if configured, to stress the `MachBuffer`'s
1035             // relocation and island calculations.
1036             //
1037             // Padding can get quite large during fuzzing though so place a
1038             // total cap on it where when a per-function threshold is exceeded
1039             // the padding is turned back down to zero. This avoids a small-ish
1040             // test case generating a GB+ memory footprint in Cranelift for
1041             // example.
1042             if !bb_padding.is_empty() {
1043                 buffer.put_data(&bb_padding);
1044                 buffer.align_to(I::LabelUse::ALIGN);
1045                 total_bb_padding += bb_padding.len();
1046                 if total_bb_padding > (150 << 20) {
1047                     bb_padding = Vec::new();
1048                 }
1049             }
1050         }
1051 
1052         // Do any optimizations on branches at tail of buffer, as if we had
1053         // bound one last label.
1054         buffer.optimize_branches(ctrl_plane);
1055 
1056         // emission state is not needed anymore, move control plane back out
1057         *ctrl_plane = state.take_ctrl_plane();
1058 
1059         let func_body_len = buffer.cur_offset();
1060 
1061         // Create `bb_edges` and final (filtered) `bb_starts`.
1062         let mut bb_edges = vec![];
1063         let mut bb_offsets = vec![];
1064         if flags.machine_code_cfg_info() {
1065             for block in 0..self.num_blocks() {
1066                 if bb_starts[block].is_none() {
1067                     // Block was deleted by MachBuffer; skip.
1068                     continue;
1069                 }
1070                 let from = bb_starts[block].unwrap();
1071 
1072                 bb_offsets.push(from);
1073                 // Resolve each `succ` label and add edges.
1074                 let succs = self.block_succs(BlockIndex::new(block));
1075                 for &succ in succs.iter() {
1076                     let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1077                     bb_edges.push((from, to));
1078                 }
1079             }
1080         }
1081 
1082         self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len);
1083         let value_labels_ranges =
1084             self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1085         let frame_size = self.abi.frame_size();
1086 
1087         EmitResult {
1088             buffer: buffer.finish(&self.constants, ctrl_plane),
1089             bb_offsets,
1090             bb_edges,
1091             func_body_len,
1092             disasm: if want_disasm { Some(disasm) } else { None },
1093             sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(),
1094             dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(),
1095             value_labels_ranges,
1096             frame_size,
1097         }
1098     }
1099 
1100     fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) {
1101         if self.debug_value_labels.is_empty() {
1102             return;
1103         }
1104 
1105         // During emission, branch removal can make offsets of instructions incorrect.
1106         // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj]
1107         // It will be recorded as (say):    [30]  [34]  [38]  [42]  [<would be 46>]
1108         // When the jumps get removed we are left with (in "inst_offsets"):
1109         // [insi][jmp0][jmp1][jmp2][insj][...]
1110         // [30]  [34]  [38]  [42]  [34]
1111         // Which violates the monotonicity invariant. This method sets offsets of these
1112         // removed instructions such as to make them appear zero-sized:
1113         // [insi][jmp0][jmp1][jmp2][insj][...]
1114         // [30]  [34]  [34]  [34]  [34]
1115         //
1116         let mut next_offset = func_body_len;
1117         for inst_index in (0..(inst_offsets.len() - 1)).rev() {
1118             let inst_offset = inst_offsets[inst_index];
1119 
1120             // Not all instructions get their offsets recorded.
1121             if inst_offset == NO_INST_OFFSET {
1122                 continue;
1123             }
1124 
1125             if inst_offset > next_offset {
1126                 trace!(
1127                     "Fixing code offset of the removed Inst {}: {} -> {}",
1128                     inst_index,
1129                     inst_offset,
1130                     next_offset
1131                 );
1132                 inst_offsets[inst_index] = next_offset;
1133                 continue;
1134             }
1135 
1136             next_offset = inst_offset;
1137         }
1138     }
1139 
1140     fn compute_value_labels_ranges(
1141         &self,
1142         regalloc: &regalloc2::Output,
1143         inst_offsets: &[CodeOffset],
1144         func_body_len: u32,
1145     ) -> ValueLabelsRanges {
1146         if self.debug_value_labels.is_empty() {
1147             return ValueLabelsRanges::default();
1148         }
1149 
1150         let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1151         for &(label, from, to, alloc) in &regalloc.debug_locations {
1152             let ranges = value_labels_ranges
1153                 .entry(ValueLabel::from_u32(label))
1154                 .or_insert_with(|| vec![]);
1155             let from_offset = inst_offsets[from.inst().index()];
1156             let to_offset = if to.inst().index() == inst_offsets.len() {
1157                 func_body_len
1158             } else {
1159                 inst_offsets[to.inst().index()]
1160             };
1161 
1162             // Empty ranges or unavailable offsets can happen
1163             // due to cold blocks and branch removal (see above).
1164             if from_offset == NO_INST_OFFSET
1165                 || to_offset == NO_INST_OFFSET
1166                 || from_offset == to_offset
1167             {
1168                 continue;
1169             }
1170 
1171             let loc = if let Some(preg) = alloc.as_reg() {
1172                 LabelValueLoc::Reg(Reg::from(preg))
1173             } else {
1174                 let slot = alloc.as_stack().unwrap();
1175                 let sp_offset = self.abi.get_spillslot_offset(slot);
1176                 let sp_to_caller_sp_offset = self.abi.nominal_sp_to_caller_sp_offset();
1177                 let caller_sp_to_cfa_offset =
1178                     crate::isa::unwind::systemv::caller_sp_to_cfa_offset();
1179                 let cfa_to_sp_offset = -((sp_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64);
1180                 LabelValueLoc::CFAOffset(cfa_to_sp_offset + sp_offset)
1181             };
1182 
1183             // ValueLocRanges are recorded by *instruction-end
1184             // offset*. `from_offset` is the *start* of the
1185             // instruction; that is the same as the end of another
1186             // instruction, so we only want to begin coverage once
1187             // we are past the previous instruction's end.
1188             let start = from_offset + 1;
1189 
1190             // Likewise, `end` is exclusive, but we want to
1191             // *include* the end of the last
1192             // instruction. `to_offset` is the start of the
1193             // `to`-instruction, which is the exclusive end, i.e.,
1194             // the first instruction not covered. That
1195             // instruction's start is the same as the end of the
1196             // last instruction that is included, so we go one
1197             // byte further to be sure to include it.
1198             let end = to_offset + 1;
1199 
1200             // Coalesce adjacent ranges that for the same location
1201             // to minimize output size here and for the consumers.
1202             if let Some(last_loc_range) = ranges.last_mut() {
1203                 if last_loc_range.loc == loc && last_loc_range.end == start {
1204                     trace!(
1205                         "Extending debug range for VL{} in {:?} to {}",
1206                         label,
1207                         loc,
1208                         end
1209                     );
1210                     last_loc_range.end = end;
1211                     continue;
1212                 }
1213             }
1214 
1215             trace!(
1216                 "Recording debug range for VL{} in {:?}: [Inst {}..Inst {}) [{}..{})",
1217                 label,
1218                 loc,
1219                 from.inst().index(),
1220                 to.inst().index(),
1221                 start,
1222                 end
1223             );
1224 
1225             ranges.push(ValueLocRange { loc, start, end });
1226         }
1227 
1228         value_labels_ranges
1229     }
1230 
1231     /// Get the IR block for a BlockIndex, if one exists.
1232     pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1233         self.block_order.lowered_order()[block.index()].orig_block()
1234     }
1235 
1236     pub fn resolve_vreg_alias(&self, from: regalloc2::VReg) -> regalloc2::VReg {
1237         Self::resolve_vreg_alias_impl(&self.vreg_aliases, from)
1238     }
1239 
1240     /// Implementation of alias resolution. Separate helper that does
1241     /// not borrow `self` in order to allow working around borrowing
1242     /// restrictions.
1243     fn resolve_vreg_alias_impl(
1244         aliases: &FxHashMap<regalloc2::VReg, regalloc2::VReg>,
1245         from: regalloc2::VReg,
1246     ) -> regalloc2::VReg {
1247         // We prevent cycles from existing by resolving targets of
1248         // aliases eagerly before setting them. If the target resolves
1249         // to the origin of the alias, then a cycle would be created
1250         // and the alias is disallowed. Because of the structure of
1251         // SSA code (one instruction can refer to another's defs but
1252         // not vice-versa, except indirectly through
1253         // phis/blockparams), cycles should not occur as we use
1254         // aliases to redirect vregs to the temps that actually define
1255         // them.
1256 
1257         let mut vreg = from;
1258         while let Some(to) = aliases.get(&vreg) {
1259             vreg = *to;
1260         }
1261         vreg
1262     }
1263 
1264     #[inline]
1265     fn assert_no_vreg_aliases<'a>(&self, list: &'a [VReg]) -> &'a [VReg] {
1266         for vreg in list {
1267             self.assert_not_vreg_alias(*vreg);
1268         }
1269         list
1270     }
1271 
1272     #[inline]
1273     fn assert_not_vreg_alias(&self, vreg: VReg) -> VReg {
1274         debug_assert!(self.resolve_vreg_alias(vreg) == vreg);
1275         vreg
1276     }
1277 
1278     #[inline]
1279     fn assert_operand_not_vreg_alias(&self, op: Operand) -> Operand {
1280         // It should be true by construction that `Operand`s do not contain any
1281         // aliased vregs since they're all collected and mapped when the VCode
1282         // is itself constructed.
1283         self.assert_not_vreg_alias(op.vreg());
1284         op
1285     }
1286 
1287     /// Get the type of a VReg.
1288     pub fn vreg_type(&self, vreg: VReg) -> Type {
1289         self.vreg_types[vreg.vreg()]
1290     }
1291 
1292     /// Get the fact, if any, for a given VReg.
1293     pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> {
1294         let vreg = self.resolve_vreg_alias(vreg);
1295         self.facts[vreg.vreg()].as_ref()
1296     }
1297 
1298     /// Set the fact for a given VReg.
1299     pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) {
1300         let vreg = self.resolve_vreg_alias(vreg);
1301         trace!("set fact on {}: {:?}", vreg, fact);
1302         self.facts[vreg.vreg()] = Some(fact);
1303     }
1304 
1305     /// Does a given instruction define any facts?
1306     pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool {
1307         self.inst_operands(inst)
1308             .iter()
1309             .filter(|o| o.kind() == OperandKind::Def)
1310             .map(|o| o.vreg())
1311             .any(|vreg| self.facts[vreg.vreg()].is_some())
1312     }
1313 }
1314 
1315 impl<I: VCodeInst> std::ops::Index<InsnIndex> for VCode<I> {
1316     type Output = I;
1317     fn index(&self, idx: InsnIndex) -> &Self::Output {
1318         &self.insts[idx.index()]
1319     }
1320 }
1321 
1322 impl<I: VCodeInst> RegallocFunction for VCode<I> {
1323     fn num_insts(&self) -> usize {
1324         self.insts.len()
1325     }
1326 
1327     fn num_blocks(&self) -> usize {
1328         self.block_ranges.len()
1329     }
1330 
1331     fn entry_block(&self) -> BlockIndex {
1332         self.entry
1333     }
1334 
1335     fn block_insns(&self, block: BlockIndex) -> InstRange {
1336         let (start, end) = self.block_ranges[block.index()];
1337         InstRange::forward(start, end)
1338     }
1339 
1340     fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1341         let (start, end) = self.block_succ_range[block.index()];
1342         &self.block_succs_preds[start as usize..end as usize]
1343     }
1344 
1345     fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1346         let (start, end) = self.block_pred_range[block.index()];
1347         &self.block_succs_preds[start as usize..end as usize]
1348     }
1349 
1350     fn block_params(&self, block: BlockIndex) -> &[VReg] {
1351         // As a special case we don't return block params for the entry block, as all the arguments
1352         // will be defined by the `Inst::Args` instruction.
1353         if block == self.entry {
1354             return &[];
1355         }
1356 
1357         let (start, end) = self.block_params_range[block.index()];
1358         let ret = &self.block_params[start as usize..end as usize];
1359         // Currently block params are never aliased to another vreg, but
1360         // double-check just to be sure.
1361         self.assert_no_vreg_aliases(ret)
1362     }
1363 
1364     fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1365         let (succ_range_start, succ_range_end) = self.branch_block_arg_succ_range[block.index()];
1366         let succ_ranges =
1367             &self.branch_block_arg_range[succ_range_start as usize..succ_range_end as usize];
1368         let (branch_block_args_start, branch_block_args_end) = succ_ranges[succ_idx];
1369         let ret = &self.branch_block_args
1370             [branch_block_args_start as usize..branch_block_args_end as usize];
1371         self.assert_no_vreg_aliases(ret)
1372     }
1373 
1374     fn is_ret(&self, insn: InsnIndex) -> bool {
1375         match self.insts[insn.index()].is_term() {
1376             // We treat blocks terminated by an unconditional trap like a return for regalloc.
1377             MachTerminator::None => self.insts[insn.index()].is_trap(),
1378             MachTerminator::Ret | MachTerminator::RetCall => true,
1379             MachTerminator::Uncond | MachTerminator::Cond | MachTerminator::Indirect => false,
1380         }
1381     }
1382 
1383     fn is_branch(&self, insn: InsnIndex) -> bool {
1384         match self.insts[insn.index()].is_term() {
1385             MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true,
1386             _ => false,
1387         }
1388     }
1389 
1390     fn requires_refs_on_stack(&self, insn: InsnIndex) -> bool {
1391         self.insts[insn.index()].is_safepoint()
1392     }
1393 
1394     fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1395         let (start, end) = self.operand_ranges[insn.index()];
1396         let ret = &self.operands[start as usize..end as usize];
1397         for op in ret {
1398             self.assert_operand_not_vreg_alias(*op);
1399         }
1400         ret
1401     }
1402 
1403     fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1404         self.clobbers.get(&insn).cloned().unwrap_or_default()
1405     }
1406 
1407     fn num_vregs(&self) -> usize {
1408         std::cmp::max(self.vreg_types.len(), first_user_vreg_index())
1409     }
1410 
1411     fn reftype_vregs(&self) -> &[VReg] {
1412         self.assert_no_vreg_aliases(&self.reftyped_vregs[..])
1413     }
1414 
1415     fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1416         // VRegs here are inserted into `debug_value_labels` after code is
1417         // generated and aliases are fully defined, so no double-check that
1418         // aliases are not lingering.
1419         for (vreg, ..) in self.debug_value_labels.iter() {
1420             self.assert_not_vreg_alias(*vreg);
1421         }
1422         &self.debug_value_labels[..]
1423     }
1424 
1425     fn spillslot_size(&self, regclass: RegClass) -> usize {
1426         self.abi.get_spillslot_size(regclass) as usize
1427     }
1428 
1429     fn allow_multiple_vreg_defs(&self) -> bool {
1430         // At least the s390x backend requires this, because the
1431         // `Loop` pseudo-instruction aggregates all Operands so pinned
1432         // vregs (RealRegs) may occur more than once.
1433         true
1434     }
1435 }
1436 
1437 impl<I: VCodeInst> fmt::Debug for VCode<I> {
1438     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1439         writeln!(f, "VCode {{")?;
1440         writeln!(f, "  Entry block: {}", self.entry.index())?;
1441 
1442         let mut state = Default::default();
1443 
1444         let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1445         alias_keys.sort_unstable();
1446         for key in alias_keys {
1447             let dest = self.vreg_aliases.get(&key).unwrap();
1448             writeln!(f, "  {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1449         }
1450 
1451         for block in 0..self.num_blocks() {
1452             let block = BlockIndex::new(block);
1453             writeln!(f, "Block {}:", block.index())?;
1454             if let Some(bb) = self.bindex_to_bb(block) {
1455                 writeln!(f, "    (original IR block: {})", bb)?;
1456             }
1457             for succ in self.succs(block) {
1458                 writeln!(f, "    (successor: Block {})", succ.index())?;
1459             }
1460             let (start, end) = self.block_ranges[block.index()];
1461             writeln!(
1462                 f,
1463                 "    (instruction range: {} .. {})",
1464                 start.index(),
1465                 end.index()
1466             )?;
1467             for inst in start.index()..end.index() {
1468                 writeln!(
1469                     f,
1470                     "  Inst {}: {}",
1471                     inst,
1472                     self.insts[inst].pretty_print_inst(&[], &mut state)
1473                 )?;
1474                 for operand in self.inst_operands(InsnIndex::new(inst)) {
1475                     if operand.kind() == OperandKind::Def {
1476                         if let Some(fact) = &self.facts[operand.vreg().vreg()] {
1477                             writeln!(f, "    v{} ! {}", operand.vreg().vreg(), fact)?;
1478                         }
1479                     }
1480                 }
1481             }
1482         }
1483 
1484         writeln!(f, "}}")?;
1485         Ok(())
1486     }
1487 }
1488 
1489 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder.
1490 pub struct VRegAllocator<I> {
1491     /// Next virtual register number to allocate.
1492     next_vreg: usize,
1493 
1494     /// VReg IR-level types.
1495     vreg_types: Vec<Type>,
1496 
1497     /// A set with the same contents as `reftyped_vregs`, in order to
1498     /// avoid inserting more than once.
1499     reftyped_vregs_set: FxHashSet<VReg>,
1500 
1501     /// Reference-typed `regalloc2::VReg`s. The regalloc requires
1502     /// these in a dense slice (as opposed to querying the
1503     /// reftype-status of each vreg) for efficient iteration.
1504     reftyped_vregs: Vec<VReg>,
1505 
1506     /// A deferred error, to be bubbled up to the top level of the
1507     /// lowering algorithm. We take this approach because we cannot
1508     /// currently propagate a `Result` upward through ISLE code (the
1509     /// lowering rules) or some ABI code.
1510     deferred_error: Option<CodegenError>,
1511 
1512     /// Facts on VRegs, for proof-carrying code.
1513     facts: Vec<Option<Fact>>,
1514 
1515     /// The type of instruction that this allocator makes registers for.
1516     _inst: core::marker::PhantomData<I>,
1517 }
1518 
1519 impl<I: VCodeInst> VRegAllocator<I> {
1520     /// Make a new VRegAllocator.
1521     pub fn new() -> Self {
1522         Self {
1523             next_vreg: first_user_vreg_index(),
1524             vreg_types: vec![],
1525             facts: vec![],
1526             reftyped_vregs_set: FxHashSet::default(),
1527             reftyped_vregs: vec![],
1528             deferred_error: None,
1529             _inst: core::marker::PhantomData::default(),
1530         }
1531     }
1532 
1533     /// Allocate a fresh ValueRegs.
1534     pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {
1535         if self.deferred_error.is_some() {
1536             return Err(CodegenError::CodeTooLarge);
1537         }
1538         let v = self.next_vreg;
1539         let (regclasses, tys) = I::rc_for_type(ty)?;
1540         self.next_vreg += regclasses.len();
1541         if self.next_vreg >= VReg::MAX {
1542             return Err(CodegenError::CodeTooLarge);
1543         }
1544 
1545         let regs: ValueRegs<Reg> = match regclasses {
1546             &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
1547             &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
1548             // We can extend this if/when we support 32-bit targets; e.g.,
1549             // an i128 on a 32-bit machine will need up to four machine regs
1550             // for a `Value`.
1551             _ => panic!("Value must reside in 1 or 2 registers"),
1552         };
1553         for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
1554             self.set_vreg_type(reg.to_virtual_reg().unwrap(), reg_ty);
1555         }
1556 
1557         // Create empty facts for each allocated vreg.
1558         self.facts
1559             .resize(usize::try_from(self.next_vreg).unwrap(), None);
1560 
1561         Ok(regs)
1562     }
1563 
1564     /// Allocate a fresh ValueRegs, deferring any out-of-vregs
1565     /// errors. This is useful in places where we cannot bubble a
1566     /// `CodegenResult` upward easily, and which are known to be
1567     /// invoked from within the lowering loop that checks the deferred
1568     /// error status below.
1569     pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> {
1570         match self.alloc(ty) {
1571             Ok(x) => x,
1572             Err(e) => {
1573                 self.deferred_error = Some(e);
1574                 self.bogus_for_deferred_error(ty)
1575             }
1576         }
1577     }
1578 
1579     /// Take any deferred error that was accumulated by `alloc_with_deferred_error`.
1580     pub fn take_deferred_error(&mut self) -> Option<CodegenError> {
1581         self.deferred_error.take()
1582     }
1583 
1584     /// Produce an bogus VReg placeholder with the proper number of
1585     /// registers for the given type. This is meant to be used with
1586     /// deferred allocation errors (see `Lower::alloc_tmp()`).
1587     fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> {
1588         let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type");
1589         match regclasses {
1590             &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()),
1591             &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()),
1592             _ => panic!("Value must reside in 1 or 2 registers"),
1593         }
1594     }
1595 
1596     /// Set the type of this virtual register.
1597     pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
1598         if self.vreg_types.len() <= vreg.index() {
1599             self.vreg_types.resize(vreg.index() + 1, ir::types::INVALID);
1600         }
1601         self.vreg_types[vreg.index()] = ty;
1602         if is_reftype(ty) {
1603             let vreg: VReg = vreg.into();
1604             if self.reftyped_vregs_set.insert(vreg) {
1605                 self.reftyped_vregs.push(vreg);
1606             }
1607         }
1608     }
1609 
1610     /// Set the proof-carrying code fact on a given virtual register.
1611     ///
1612     /// Returns the old fact, if any (only one fact can be stored).
1613     pub fn set_fact(&mut self, vreg: VirtualReg, fact: Fact) -> Option<Fact> {
1614         trace!("vreg {:?} has fact: {:?}", vreg, fact);
1615         self.facts[vreg.index()].replace(fact)
1616     }
1617 
1618     /// Take (and remove) a fact about a VReg. Used when setting up
1619     /// aliases: we want to move a fact from the alias vreg to the
1620     /// aliased vreg, to preserve facts about a value that were stated
1621     /// before we lowered its producer.
1622     pub fn take_fact(&mut self, vreg: VirtualReg) -> Option<Fact> {
1623         self.facts[vreg.index()].take()
1624     }
1625 
1626     /// Set a fact only if one doesn't already exist.
1627     pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) {
1628         if self.facts[vreg.index()].is_none() {
1629             self.set_fact(vreg, fact);
1630         }
1631     }
1632 
1633     /// Allocate a fresh ValueRegs, with a given fact to apply if
1634     /// the value fits in one VReg.
1635     pub fn alloc_with_maybe_fact(
1636         &mut self,
1637         ty: Type,
1638         fact: Option<Fact>,
1639     ) -> CodegenResult<ValueRegs<Reg>> {
1640         let result = self.alloc(ty)?;
1641 
1642         // Ensure that we don't lose a fact on a value that splits
1643         // into multiple VRegs.
1644         assert!(result.len() == 1 || fact.is_none());
1645         if let Some(fact) = fact {
1646             self.set_fact(result.regs()[0].to_virtual_reg().unwrap(), fact);
1647         }
1648 
1649         Ok(result)
1650     }
1651 }
1652 
1653 /// This structure tracks the large constants used in VCode that will be emitted separately by the
1654 /// [MachBuffer].
1655 ///
1656 /// First, during the lowering phase, constants are inserted using
1657 /// [VCodeConstants.insert]; an intermediate handle, [VCodeConstant], tracks what constants are
1658 /// used in this phase. Some deduplication is performed, when possible, as constant
1659 /// values are inserted.
1660 ///
1661 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1662 /// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1663 /// then writes the constant values to the buffer.
1664 #[derive(Default)]
1665 pub struct VCodeConstants {
1666     constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1667     pool_uses: HashMap<Constant, VCodeConstant>,
1668     well_known_uses: HashMap<*const [u8], VCodeConstant>,
1669     u64s: HashMap<[u8; 8], VCodeConstant>,
1670 }
1671 impl VCodeConstants {
1672     /// Initialize the structure with the expected number of constants.
1673     pub fn with_capacity(expected_num_constants: usize) -> Self {
1674         Self {
1675             constants: PrimaryMap::with_capacity(expected_num_constants),
1676             pool_uses: HashMap::with_capacity(expected_num_constants),
1677             well_known_uses: HashMap::new(),
1678             u64s: HashMap::new(),
1679         }
1680     }
1681 
1682     /// Insert a constant; using this method indicates that a constant value will be used and thus
1683     /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1684     /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1685     /// [VCodeConstantData::Generated].
1686     pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1687         match data {
1688             VCodeConstantData::Generated(_) => self.constants.push(data),
1689             VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1690                 None => {
1691                     let vcode_constant = self.constants.push(data);
1692                     self.pool_uses.insert(constant, vcode_constant);
1693                     vcode_constant
1694                 }
1695                 Some(&vcode_constant) => vcode_constant,
1696             },
1697             VCodeConstantData::WellKnown(data_ref) => {
1698                 match self.well_known_uses.entry(data_ref as *const [u8]) {
1699                     Entry::Vacant(v) => {
1700                         let vcode_constant = self.constants.push(data);
1701                         v.insert(vcode_constant);
1702                         vcode_constant
1703                     }
1704                     Entry::Occupied(o) => *o.get(),
1705                 }
1706             }
1707             VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1708                 Entry::Vacant(v) => {
1709                     let vcode_constant = self.constants.push(data);
1710                     v.insert(vcode_constant);
1711                     vcode_constant
1712                 }
1713                 Entry::Occupied(o) => *o.get(),
1714             },
1715         }
1716     }
1717 
1718     /// Return the number of constants inserted.
1719     pub fn len(&self) -> usize {
1720         self.constants.len()
1721     }
1722 
1723     /// Iterate over the [VCodeConstant] keys inserted in this structure.
1724     pub fn keys(&self) -> Keys<VCodeConstant> {
1725         self.constants.keys()
1726     }
1727 
1728     /// Iterate over the [VCodeConstant] keys and the data (as a byte slice) inserted in this
1729     /// structure.
1730     pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1731         self.constants.iter()
1732     }
1733 
1734     /// Returns the data associated with the specified constant.
1735     pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData {
1736         &self.constants[c]
1737     }
1738 
1739     /// Checks if the given [VCodeConstantData] is registered as
1740     /// used by the pool.
1741     pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool {
1742         match constant {
1743             VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c),
1744             _ => false,
1745         }
1746     }
1747 }
1748 
1749 /// A use of a constant by one or more VCode instructions; see [VCodeConstants].
1750 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
1751 pub struct VCodeConstant(u32);
1752 entity_impl!(VCodeConstant);
1753 
1754 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
1755 /// these separately instead of as raw byte buffers allows us to avoid some duplication.
1756 pub enum VCodeConstantData {
1757     /// A constant already present in the Cranelift IR
1758     /// [ConstantPool](crate::ir::constant::ConstantPool).
1759     Pool(Constant, ConstantData),
1760     /// A reference to a well-known constant value that is statically encoded within the compiler.
1761     WellKnown(&'static [u8]),
1762     /// A constant value generated during lowering; the value may depend on the instruction context
1763     /// which makes it difficult to de-duplicate--if possible, use other variants.
1764     Generated(ConstantData),
1765     /// A constant of at most 64 bits. These are deduplicated as
1766     /// well. Stored as a fixed-size array of `u8` so that we do not
1767     /// encounter endianness problems when cross-compiling.
1768     U64([u8; 8]),
1769 }
1770 impl VCodeConstantData {
1771     /// Retrieve the constant data as a byte slice.
1772     pub fn as_slice(&self) -> &[u8] {
1773         match self {
1774             VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
1775             VCodeConstantData::WellKnown(d) => d,
1776             VCodeConstantData::U64(value) => &value[..],
1777         }
1778     }
1779 
1780     /// Calculate the alignment of the constant data.
1781     pub fn alignment(&self) -> u32 {
1782         if self.as_slice().len() <= 8 {
1783             8
1784         } else {
1785             16
1786         }
1787     }
1788 }
1789 
1790 #[cfg(test)]
1791 mod test {
1792     use super::*;
1793     use std::mem::size_of;
1794 
1795     #[test]
1796     fn size_of_constant_structs() {
1797         assert_eq!(size_of::<Constant>(), 4);
1798         assert_eq!(size_of::<VCodeConstant>(), 4);
1799         assert_eq!(size_of::<ConstantData>(), 24);
1800         assert_eq!(size_of::<VCodeConstantData>(), 32);
1801         assert_eq!(
1802             size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
1803             24
1804         );
1805         // TODO The VCodeConstants structure's memory size could be further optimized.
1806         // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
1807         // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
1808     }
1809 }
1810