1 //! Implementation of a standard Pulley ABI.
2 
3 use super::{PulleyFlags, PulleyTargetKind, inst::*};
4 use crate::isa::pulley_shared::PointerWidth;
5 use crate::{
6     CodegenResult,
7     ir::{self, MemFlags, Signature, types::*},
8     isa,
9     machinst::*,
10     settings,
11 };
12 use alloc::borrow::ToOwned;
13 use alloc::vec::Vec;
14 use core::marker::PhantomData;
15 use cranelift_bitset::ScalarBitSet;
16 use regalloc2::{MachineEnv, PRegSet};
17 use smallvec::{SmallVec, smallvec};
18 
19 /// Support for the Pulley ABI from the callee side (within a function body).
20 pub(crate) type PulleyCallee<P> = Callee<PulleyMachineDeps<P>>;
21 
22 /// Pulley-specific ABI behavior. This struct just serves as an implementation
23 /// point for the trait; it is never actually instantiated.
24 pub struct PulleyMachineDeps<P>
25 where
26     P: PulleyTargetKind,
27 {
28     _phantom: PhantomData<P>,
29 }
30 
31 impl<P> ABIMachineSpec for PulleyMachineDeps<P>
32 where
33     P: PulleyTargetKind,
34 {
35     type I = InstAndKind<P>;
36     type F = PulleyFlags;
37 
38     /// This is the limit for the size of argument and return-value areas on the
39     /// stack. We place a reasonable limit here to avoid integer overflow issues
40     /// with 32-bit arithmetic: for now, 128 MB.
41     const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
42 
word_bits() -> u3243     fn word_bits() -> u32 {
44         P::pointer_width().bits().into()
45     }
46 
47     /// Return required stack alignment in bytes.
stack_align(_call_conv: isa::CallConv) -> u3248     fn stack_align(_call_conv: isa::CallConv) -> u32 {
49         16
50     }
51 
compute_arg_locs( call_conv: isa::CallConv, flags: &settings::Flags, params: &[ir::AbiParam], args_or_rets: ArgsOrRets, add_ret_area_ptr: bool, mut args: ArgsAccumulator, ) -> CodegenResult<(u32, Option<usize>)>52     fn compute_arg_locs(
53         call_conv: isa::CallConv,
54         flags: &settings::Flags,
55         params: &[ir::AbiParam],
56         args_or_rets: ArgsOrRets,
57         add_ret_area_ptr: bool,
58         mut args: ArgsAccumulator,
59     ) -> CodegenResult<(u32, Option<usize>)> {
60         // NB: make sure this method stays in sync with
61         // `cranelift_pulley::interp::Vm::call`.
62         //
63         // In general we use the first half of all register banks as argument
64         // passing registers because, well, why not for now. Currently the only
65         // exception is x15 which is reserved as a single caller-saved register
66         // not used for arguments. This is used in `ReturnCallIndirect` to hold
67         // the location of where we're jumping to.
68 
69         let x_end = 14;
70         let f_end = 15;
71         let v_end = 15;
72 
73         let mut next_x_reg = 0;
74         let mut next_f_reg = 0;
75         let mut next_v_reg = 0;
76         let mut next_stack: u32 = 0;
77 
78         let ret_area_ptr = if add_ret_area_ptr {
79             debug_assert_eq!(args_or_rets, ArgsOrRets::Args);
80             next_x_reg += 1;
81             Some(ABIArg::reg(
82                 x_reg(next_x_reg - 1).to_real_reg().unwrap(),
83                 I64,
84                 ir::ArgumentExtension::None,
85                 ir::ArgumentPurpose::Normal,
86             ))
87         } else {
88             None
89         };
90 
91         for param in params {
92             // Find the regclass(es) of the register(s) used to store a value of
93             // this type.
94             let (rcs, reg_tys) = Self::I::rc_for_type(param.value_type)?;
95 
96             let mut slots = ABIArgSlotVec::new();
97             for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
98                 let next_reg = if (next_x_reg <= x_end) && *rc == RegClass::Int {
99                     let x = Some(x_reg(next_x_reg));
100                     next_x_reg += 1;
101                     x
102                 } else if (next_f_reg <= f_end) && *rc == RegClass::Float {
103                     let f = Some(f_reg(next_f_reg));
104                     next_f_reg += 1;
105                     f
106                 } else if (next_v_reg <= v_end) && *rc == RegClass::Vector {
107                     let v = Some(v_reg(next_v_reg));
108                     next_v_reg += 1;
109                     v
110                 } else {
111                     None
112                 };
113 
114                 if let Some(reg) = next_reg {
115                     slots.push(ABIArgSlot::Reg {
116                         reg: reg.to_real_reg().unwrap(),
117                         ty: *reg_ty,
118                         extension: param.extension,
119                     });
120                 } else {
121                     if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {
122                         return Err(crate::CodegenError::Unsupported(
123                             "Too many return values to fit in registers. \
124                             Use a StructReturn argument instead. (#9510)"
125                                 .to_owned(),
126                         ));
127                     }
128 
129                     // Compute size and 16-byte stack alignment happens
130                     // separately after all args.
131                     let size = reg_ty.bits() / 8;
132                     let size = core::cmp::max(size, 8);
133 
134                     // Align.
135                     debug_assert!(size.is_power_of_two());
136                     next_stack = align_to(next_stack, size);
137 
138                     slots.push(ABIArgSlot::Stack {
139                         offset: i64::from(next_stack),
140                         ty: *reg_ty,
141                         extension: param.extension,
142                     });
143 
144                     next_stack += size;
145                 }
146             }
147 
148             args.push(ABIArg::Slots {
149                 slots,
150                 purpose: param.purpose,
151             });
152         }
153 
154         let pos = if let Some(ret_area_ptr) = ret_area_ptr {
155             args.push_non_formal(ret_area_ptr);
156             Some(args.args().len() - 1)
157         } else {
158             None
159         };
160 
161         next_stack = align_to(next_stack, Self::stack_align(call_conv));
162 
163         Ok((next_stack, pos))
164     }
165 
gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I166     fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {
167         let mut flags = MemFlags::trusted();
168         // Stack loads/stores of vectors always use little-endianness to avoid
169         // implementing a byte-swap of vectors on big-endian platforms.
170         if ty.is_vector() {
171             flags.set_endianness(ir::Endianness::Little);
172         }
173         Inst::gen_load(into_reg, mem.into(), ty, flags).into()
174     }
175 
gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I176     fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {
177         let mut flags = MemFlags::trusted();
178         // Stack loads/stores of vectors always use little-endianness to avoid
179         // implementing a byte-swap of vectors on big-endian platforms.
180         if ty.is_vector() {
181             flags.set_endianness(ir::Endianness::Little);
182         }
183         Inst::gen_store(mem.into(), from_reg, ty, flags).into()
184     }
185 
gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I186     fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I {
187         Self::I::gen_move(to_reg, from_reg, ty)
188     }
189 
gen_extend( dst: Writable<Reg>, src: Reg, signed: bool, from_bits: u8, to_bits: u8, ) -> Self::I190     fn gen_extend(
191         dst: Writable<Reg>,
192         src: Reg,
193         signed: bool,
194         from_bits: u8,
195         to_bits: u8,
196     ) -> Self::I {
197         assert!(from_bits < to_bits);
198         let src = XReg::new(src).unwrap();
199         let dst = dst.try_into().unwrap();
200         match (signed, from_bits) {
201             (true, 8) => RawInst::Sext8 { dst, src }.into(),
202             (true, 16) => RawInst::Sext16 { dst, src }.into(),
203             (true, 32) => RawInst::Sext32 { dst, src }.into(),
204             (false, 8) => RawInst::Zext8 { dst, src }.into(),
205             (false, 16) => RawInst::Zext16 { dst, src }.into(),
206             (false, 32) => RawInst::Zext32 { dst, src }.into(),
207             _ => unimplemented!("extend {from_bits} to {to_bits} as signed? {signed}"),
208         }
209     }
210 
get_ext_mode( _call_conv: isa::CallConv, specified: ir::ArgumentExtension, ) -> ir::ArgumentExtension211     fn get_ext_mode(
212         _call_conv: isa::CallConv,
213         specified: ir::ArgumentExtension,
214     ) -> ir::ArgumentExtension {
215         specified
216     }
217 
gen_args(args: Vec<ArgPair>) -> Self::I218     fn gen_args(args: Vec<ArgPair>) -> Self::I {
219         Inst::Args { args }.into()
220     }
221 
gen_rets(rets: Vec<RetPair>) -> Self::I222     fn gen_rets(rets: Vec<RetPair>) -> Self::I {
223         Inst::Rets { rets }.into()
224     }
225 
get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg226     fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
227         spilltmp_reg()
228     }
229 
gen_add_imm( _call_conv: isa::CallConv, into_reg: Writable<Reg>, from_reg: Reg, imm: u32, ) -> SmallInstVec<Self::I>230     fn gen_add_imm(
231         _call_conv: isa::CallConv,
232         into_reg: Writable<Reg>,
233         from_reg: Reg,
234         imm: u32,
235     ) -> SmallInstVec<Self::I> {
236         let dst = into_reg.try_into().unwrap();
237         let imm = imm as i32;
238         smallvec![
239             RawInst::Xconst32 { dst, imm }.into(),
240             RawInst::Xadd32 {
241                 dst,
242                 src1: from_reg.try_into().unwrap(),
243                 src2: dst.to_reg(),
244             }
245             .into()
246         ]
247     }
248 
gen_stack_lower_bound_trap(_limit_reg: Reg) -> SmallInstVec<Self::I>249     fn gen_stack_lower_bound_trap(_limit_reg: Reg) -> SmallInstVec<Self::I> {
250         unimplemented!("pulley shouldn't need stack bound checks")
251     }
252 
gen_get_stack_addr(mem: StackAMode, dst: Writable<Reg>) -> Self::I253     fn gen_get_stack_addr(mem: StackAMode, dst: Writable<Reg>) -> Self::I {
254         let dst = dst.to_reg();
255         let dst = XReg::new(dst).unwrap();
256         let dst = WritableXReg::from_reg(dst);
257         let mem = mem.into();
258         Inst::LoadAddr { dst, mem }.into()
259     }
260 
gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I261     fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {
262         let base = XReg::try_from(base).unwrap();
263         let mem = Amode::RegOffset { base, offset };
264         Inst::gen_load(into_reg, mem, ty, MemFlags::trusted()).into()
265     }
266 
gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I267     fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I {
268         let base = XReg::try_from(base).unwrap();
269         let mem = Amode::RegOffset { base, offset };
270         Inst::gen_store(mem, from_reg, ty, MemFlags::trusted()).into()
271     }
272 
gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I>273     fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I> {
274         if amount == 0 {
275             return smallvec![];
276         }
277 
278         let inst = if amount < 0 {
279             let amount = amount.checked_neg().unwrap();
280             if let Ok(amt) = u32::try_from(amount) {
281                 RawInst::StackAlloc32 { amt }
282             } else {
283                 unreachable!()
284             }
285         } else {
286             if let Ok(amt) = u32::try_from(amount) {
287                 RawInst::StackFree32 { amt }
288             } else {
289                 unreachable!()
290             }
291         };
292         smallvec![inst.into()]
293     }
294 
295     /// Generates the entire prologue for the function.
296     ///
297     /// Note that this is different from other backends where it's not spread
298     /// out among a few individual functions. That's because the goal here is to
299     /// generate a single macro-instruction for the entire prologue in the most
300     /// common cases and we don't want to spread the logic over multiple
301     /// functions.
302     ///
303     /// The general machinst methods are split to accommodate stack checks and
304     /// things like stack probes, all of which are empty on Pulley because
305     /// Pulley has its own stack check mechanism.
gen_prologue_frame_setup( _call_conv: isa::CallConv, _flags: &settings::Flags, _isa_flags: &PulleyFlags, frame_layout: &FrameLayout, ) -> SmallInstVec<Self::I>306     fn gen_prologue_frame_setup(
307         _call_conv: isa::CallConv,
308         _flags: &settings::Flags,
309         _isa_flags: &PulleyFlags,
310         frame_layout: &FrameLayout,
311     ) -> SmallInstVec<Self::I> {
312         let mut insts = SmallVec::new();
313 
314         let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
315         if incoming_args_diff > 0 {
316             // Decrement SP by the amount of additional incoming argument space
317             // we need
318             insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32)));
319         }
320 
321         let style = frame_layout.pulley_frame_style();
322 
323         match &style {
324             FrameStyle::None => {}
325             FrameStyle::PulleyBasicSetup { frame_size } => {
326                 insts.push(RawInst::PushFrame.into());
327                 insts.extend(Self::gen_sp_reg_adjust(
328                     -i32::try_from(*frame_size).unwrap(),
329                 ));
330             }
331             FrameStyle::PulleySetupAndSaveClobbers {
332                 frame_size,
333                 saved_by_pulley,
334             } => insts.push(
335                 RawInst::PushFrameSave {
336                     amt: *frame_size,
337                     regs: pulley_interpreter::UpperRegSet::from_bitset(*saved_by_pulley),
338                 }
339                 .into(),
340             ),
341             FrameStyle::Manual { frame_size } => insts.extend(Self::gen_sp_reg_adjust(
342                 -i32::try_from(*frame_size).unwrap(),
343             )),
344         }
345 
346         for (offset, ty, reg) in frame_layout.manually_managed_clobbers(&style) {
347             let mut flags = MemFlags::trusted();
348             if ty.is_vector() {
349                 flags.set_endianness(ir::Endianness::Little);
350             }
351             insts.push(Inst::gen_store(Amode::SpOffset { offset }, reg, ty, flags).into());
352         }
353 
354         insts
355     }
356 
357     /// Reverse of `gen_prologue_frame_setup`.
gen_epilogue_frame_restore( _call_conv: isa::CallConv, _flags: &settings::Flags, _isa_flags: &PulleyFlags, frame_layout: &FrameLayout, ) -> SmallInstVec<Self::I>358     fn gen_epilogue_frame_restore(
359         _call_conv: isa::CallConv,
360         _flags: &settings::Flags,
361         _isa_flags: &PulleyFlags,
362         frame_layout: &FrameLayout,
363     ) -> SmallInstVec<Self::I> {
364         let mut insts = SmallVec::new();
365 
366         let style = frame_layout.pulley_frame_style();
367 
368         // Restore clobbered registers that are manually managed in Cranelift.
369         for (offset, ty, reg) in frame_layout.manually_managed_clobbers(&style) {
370             let mut flags = MemFlags::trusted();
371             if ty.is_vector() {
372                 flags.set_endianness(ir::Endianness::Little);
373             }
374             insts.push(
375                 Inst::gen_load(
376                     Writable::from_reg(reg),
377                     Amode::SpOffset { offset },
378                     ty,
379                     flags,
380                 )
381                 .into(),
382             );
383         }
384 
385         // Perform the inverse of `gen_prologue_frame_setup`.
386         match &style {
387             FrameStyle::None => {}
388             FrameStyle::PulleyBasicSetup { frame_size } => {
389                 insts.extend(Self::gen_sp_reg_adjust(i32::try_from(*frame_size).unwrap()));
390                 insts.push(RawInst::PopFrame.into());
391             }
392             FrameStyle::PulleySetupAndSaveClobbers {
393                 frame_size,
394                 saved_by_pulley,
395             } => insts.push(
396                 RawInst::PopFrameRestore {
397                     amt: *frame_size,
398                     regs: pulley_interpreter::UpperRegSet::from_bitset(*saved_by_pulley),
399                 }
400                 .into(),
401             ),
402             FrameStyle::Manual { frame_size } => {
403                 insts.extend(Self::gen_sp_reg_adjust(i32::try_from(*frame_size).unwrap()))
404             }
405         }
406 
407         insts
408     }
409 
gen_return( call_conv: isa::CallConv, _isa_flags: &PulleyFlags, frame_layout: &FrameLayout, ) -> SmallInstVec<Self::I>410     fn gen_return(
411         call_conv: isa::CallConv,
412         _isa_flags: &PulleyFlags,
413         frame_layout: &FrameLayout,
414     ) -> SmallInstVec<Self::I> {
415         let mut insts = SmallVec::new();
416 
417         // Handle final stack adjustments for the tail-call ABI.
418         if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 {
419             insts.extend(Self::gen_sp_reg_adjust(
420                 frame_layout.tail_args_size.try_into().unwrap(),
421             ));
422         }
423         insts.push(RawInst::Ret {}.into());
424 
425         insts
426     }
427 
gen_probestack(_insts: &mut SmallInstVec<Self::I>, _frame_size: u32)428     fn gen_probestack(_insts: &mut SmallInstVec<Self::I>, _frame_size: u32) {
429         // Pulley doesn't implement stack probes since all stack pointer
430         // decrements are checked already.
431     }
432 
gen_clobber_save( _call_conv: isa::CallConv, _flags: &settings::Flags, _frame_layout: &FrameLayout, ) -> SmallVec<[Self::I; 16]>433     fn gen_clobber_save(
434         _call_conv: isa::CallConv,
435         _flags: &settings::Flags,
436         _frame_layout: &FrameLayout,
437     ) -> SmallVec<[Self::I; 16]> {
438         // Note that this is intentionally empty because everything necessary
439         // was already done in `gen_prologue_frame_setup`.
440         SmallVec::new()
441     }
442 
gen_clobber_restore( _call_conv: isa::CallConv, _flags: &settings::Flags, _frame_layout: &FrameLayout, ) -> SmallVec<[Self::I; 16]>443     fn gen_clobber_restore(
444         _call_conv: isa::CallConv,
445         _flags: &settings::Flags,
446         _frame_layout: &FrameLayout,
447     ) -> SmallVec<[Self::I; 16]> {
448         // Intentionally empty as restores happen for Pulley in `gen_return`.
449         SmallVec::new()
450     }
451 
gen_memcpy<F: FnMut(Type) -> Writable<Reg>>( _call_conv: isa::CallConv, _dst: Reg, _src: Reg, _size: usize, _alloc_tmp: F, ) -> SmallVec<[Self::I; 8]>452     fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
453         _call_conv: isa::CallConv,
454         _dst: Reg,
455         _src: Reg,
456         _size: usize,
457         _alloc_tmp: F,
458     ) -> SmallVec<[Self::I; 8]> {
459         todo!()
460     }
461 
get_number_of_spillslots_for_value( rc: RegClass, _target_vector_bytes: u32, _isa_flags: &PulleyFlags, ) -> u32462     fn get_number_of_spillslots_for_value(
463         rc: RegClass,
464         _target_vector_bytes: u32,
465         _isa_flags: &PulleyFlags,
466     ) -> u32 {
467         // Spill slots are the size of a "word" or a pointer, but Pulley
468         // registers are 8-byte for integers/floats regardless of pointer size.
469         // Calculate the number of slots necessary to store 8 bytes.
470         let slots_for_8bytes = match P::pointer_width() {
471             PointerWidth::PointerWidth32 => 2,
472             PointerWidth::PointerWidth64 => 1,
473         };
474         match rc {
475             // Int/float registers are 8-bytes
476             RegClass::Int | RegClass::Float => slots_for_8bytes,
477             // Vector registers are 16 bytes
478             RegClass::Vector => 2 * slots_for_8bytes,
479         }
480     }
481 
get_machine_env(_flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv482     fn get_machine_env(_flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
483         static MACHINE_ENV: MachineEnv = create_reg_environment();
484         &MACHINE_ENV
485     }
486 
get_regs_clobbered_by_call( call_conv_of_callee: isa::CallConv, is_exception: bool, ) -> PRegSet487     fn get_regs_clobbered_by_call(
488         call_conv_of_callee: isa::CallConv,
489         is_exception: bool,
490     ) -> PRegSet {
491         if is_exception {
492             ALL_CLOBBERS
493         } else if call_conv_of_callee == isa::CallConv::PreserveAll {
494             NO_CLOBBERS
495         } else {
496             DEFAULT_CLOBBERS
497         }
498     }
499 
compute_frame_layout( call_conv: isa::CallConv, flags: &settings::Flags, _sig: &Signature, regs: &[Writable<RealReg>], function_calls: FunctionCalls, incoming_args_size: u32, tail_args_size: u32, stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout500     fn compute_frame_layout(
501         call_conv: isa::CallConv,
502         flags: &settings::Flags,
503         _sig: &Signature,
504         regs: &[Writable<RealReg>],
505         function_calls: FunctionCalls,
506         incoming_args_size: u32,
507         tail_args_size: u32,
508         stackslots_size: u32,
509         fixed_frame_storage_size: u32,
510         outgoing_args_size: u32,
511     ) -> FrameLayout {
512         let is_callee_save = |reg: &Writable<RealReg>| match call_conv {
513             isa::CallConv::PreserveAll => true,
514             _ => DEFAULT_CALLEE_SAVES.contains(reg.to_reg().into()),
515         };
516         let mut regs: Vec<Writable<RealReg>> =
517             regs.iter().cloned().filter(is_callee_save).collect();
518 
519         regs.sort_unstable();
520 
521         // Compute clobber size.
522         let clobber_size = compute_clobber_size(&regs);
523 
524         // Compute linkage frame size.
525         let setup_area_size = if flags.preserve_frame_pointers()
526             || function_calls != FunctionCalls::None
527             // The function arguments that are passed on the stack are addressed
528             // relative to the Frame Pointer.
529             || incoming_args_size > 0
530             || clobber_size > 0
531             || fixed_frame_storage_size > 0
532         {
533             P::pointer_width().bytes() * 2 // FP, LR
534         } else {
535             0
536         };
537 
538         FrameLayout {
539             word_bytes: u32::from(P::pointer_width().bytes()),
540             incoming_args_size,
541             tail_args_size,
542             setup_area_size: setup_area_size.into(),
543             clobber_size,
544             fixed_frame_storage_size,
545             stackslots_size,
546             outgoing_args_size,
547             clobbered_callee_saves: regs,
548             function_calls,
549         }
550     }
551 
gen_inline_probestack( _insts: &mut SmallInstVec<Self::I>, _call_conv: isa::CallConv, _frame_size: u32, _guard_size: u32, )552     fn gen_inline_probestack(
553         _insts: &mut SmallInstVec<Self::I>,
554         _call_conv: isa::CallConv,
555         _frame_size: u32,
556         _guard_size: u32,
557     ) {
558         // Pulley doesn't need inline probestacks because it always checks stack
559         // decrements.
560     }
561 
retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg>562     fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg> {
563         // Use x15 as a temp if needed: clobbered, not a
564         // retval.
565         Writable::from_reg(regs::x_reg(15))
566     }
567 
exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg]568     fn exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg] {
569         const PAYLOAD_REGS: &'static [Reg] = &[
570             Reg::from_real_reg(regs::px_reg(0)),
571             Reg::from_real_reg(regs::px_reg(1)),
572         ];
573         match call_conv {
574             isa::CallConv::SystemV | isa::CallConv::Tail | isa::CallConv::PreserveAll => {
575                 PAYLOAD_REGS
576             }
577             isa::CallConv::Fast
578             | isa::CallConv::WindowsFastcall
579             | isa::CallConv::AppleAarch64
580             | isa::CallConv::Probestack
581             | isa::CallConv::Winch => &[],
582         }
583     }
584 }
585 
586 /// Different styles of management of fp/lr and clobbered registers.
587 ///
588 /// This helps decide, depending on Cranelift settings and frame layout, what
589 /// macro instruction is used to setup the pulley frame.
590 enum FrameStyle {
591     /// No management is happening, fp/lr aren't saved by Pulley or Cranelift.
592     /// No stack is being allocated either.
593     None,
594 
595     /// Pulley saves the fp/lr combo and then stack adjustments/clobbers are
596     /// handled manually.
597     PulleyBasicSetup { frame_size: u32 },
598 
599     /// Pulley is managing the fp/lr combo, the stack size, and clobbered
600     /// X-class registers.
601     ///
602     /// Note that `saved_by_pulley` is not the exhaustive set of clobbered
603     /// registers. It's only those that are part of the `PushFrameSave`
604     /// instruction.
605     PulleySetupAndSaveClobbers {
606         /// The size of the frame, including clobbers, that's being allocated.
607         frame_size: u16,
608         /// Registers that pulley is saving/restoring.
609         saved_by_pulley: ScalarBitSet<u16>,
610     },
611 
612     /// Cranelift is manually managing everything, both clobbers and stack
613     /// increments/decrements.
614     ///
615     /// Note that fp/lr are not saved in this mode.
616     Manual {
617         /// The size of the stack being allocated.
618         frame_size: u32,
619     },
620 }
621 
622 /// Pulley-specific helpers when dealing with ABI code.
623 impl FrameLayout {
624     /// Whether or not this frame saves fp/lr.
setup_frame(&self) -> bool625     fn setup_frame(&self) -> bool {
626         self.setup_area_size > 0
627     }
628 
629     /// Returns the stack size allocated by this function, excluding incoming
630     /// tail args or the optional "setup area" of fp/lr.
stack_size(&self) -> u32631     fn stack_size(&self) -> u32 {
632         self.clobber_size + self.fixed_frame_storage_size + self.outgoing_args_size
633     }
634 
635     /// Returns the style of frame being used for this function.
636     ///
637     /// See `FrameStyle` for more information.
pulley_frame_style(&self) -> FrameStyle638     fn pulley_frame_style(&self) -> FrameStyle {
639         let saved_by_pulley = self.clobbered_xregs_saved_by_pulley();
640         match (
641             self.stack_size(),
642             self.setup_frame(),
643             saved_by_pulley.is_empty(),
644         ) {
645             // No stack allocated, not saving fp/lr, no clobbers, nothing to do
646             (0, false, true) => FrameStyle::None,
647 
648             // No stack allocated, saving fp/lr, no clobbers, so this is
649             // pulley-managed via push/pop_frame.
650             (0, true, true) => FrameStyle::PulleyBasicSetup { frame_size: 0 },
651 
652             // Some stack is being allocated and pulley is managing fp/lr. Let
653             // pulley manage clobbered registers as well, regardless if they're
654             // present or not.
655             //
656             // If the stack is too large then `PulleyBasicSetup` is used
657             // otherwise we'll be pushing `PushFrameSave` and `PopFrameRestore`.
658             (frame_size, true, _) => match frame_size.try_into() {
659                 Ok(frame_size) => FrameStyle::PulleySetupAndSaveClobbers {
660                     frame_size,
661                     saved_by_pulley,
662                 },
663                 Err(_) => FrameStyle::PulleyBasicSetup { frame_size },
664             },
665 
666             // Some stack is being allocated, but pulley isn't managing fp/lr,
667             // so we're manually doing everything.
668             (frame_size, false, true) => FrameStyle::Manual { frame_size },
669 
670             // If there's no frame setup and there's clobbered registers this
671             // technically should have already hit a case above, so panic here.
672             (_, false, false) => unreachable!(),
673         }
674     }
675 
676     /// Returns the set of clobbered registers that Pulley is managing via its
677     /// macro instructions rather than the generated code.
clobbered_xregs_saved_by_pulley(&self) -> ScalarBitSet<u16>678     fn clobbered_xregs_saved_by_pulley(&self) -> ScalarBitSet<u16> {
679         let mut clobbered: ScalarBitSet<u16> = ScalarBitSet::new();
680         // Pulley only manages clobbers if it's also managing fp/lr.
681         if !self.setup_frame() {
682             return clobbered;
683         }
684         let mut found_manual_clobber = false;
685         for reg in self.clobbered_callee_saves.iter() {
686             let r_reg = reg.to_reg();
687             // Pulley can only manage clobbers of integer registers at this
688             // time, float registers are managed manually.
689             //
690             // Also assert that all pulley-managed clobbers come first,
691             // otherwise the loop below in `manually_managed_clobbers` is
692             // incorrect.
693             if r_reg.class() == RegClass::Int {
694                 assert!(!found_manual_clobber);
695                 if let Some(offset) = r_reg.hw_enc().checked_sub(16) {
696                     clobbered.insert(offset);
697                 }
698             } else {
699                 found_manual_clobber = true;
700             }
701         }
702         clobbered
703     }
704 
705     /// Returns an iterator over the clobbers that Cranelift is managing, not
706     /// Pulley.
707     ///
708     /// If this frame has clobbers then they're either saved by Pulley with
709     /// `FrameStyle::PulleySetupAndSaveClobbers`. Cranelift might need to manage
710     /// these registers depending on Cranelift settings. Cranelift also always
711     /// manages floating-point registers.
manually_managed_clobbers<'a>( &'a self, style: &'a FrameStyle, ) -> impl Iterator<Item = (i32, Type, Reg)> + 'a712     fn manually_managed_clobbers<'a>(
713         &'a self,
714         style: &'a FrameStyle,
715     ) -> impl Iterator<Item = (i32, Type, Reg)> + 'a {
716         let mut offset = self.stack_size();
717         self.clobbered_callee_saves.iter().filter_map(move |reg| {
718             // Allocate space for this clobber no matter what. If pulley is
719             // managing this then we're just accounting for the pulley-saved
720             // registers as well. Note that all pulley-managed registers come
721             // first in the list here.
722             offset -= 8;
723             let r_reg = reg.to_reg();
724             let ty = match r_reg.class() {
725                 RegClass::Int => {
726                     // If this register is saved by pulley, skip this clobber.
727                     if let FrameStyle::PulleySetupAndSaveClobbers {
728                         saved_by_pulley, ..
729                     } = style
730                     {
731                         if let Some(reg) = r_reg.hw_enc().checked_sub(16) {
732                             if saved_by_pulley.contains(reg) {
733                                 return None;
734                             }
735                         }
736                     }
737                     I64
738                 }
739                 RegClass::Float => F64,
740                 RegClass::Vector => I8X16,
741             };
742             let offset = i32::try_from(offset).unwrap();
743             Some((offset, ty, Reg::from(reg.to_reg())))
744         })
745     }
746 }
747 
748 const DEFAULT_CALLEE_SAVES: PRegSet = PRegSet::empty()
749     // Integer registers.
750     .with(px_reg(16))
751     .with(px_reg(17))
752     .with(px_reg(18))
753     .with(px_reg(19))
754     .with(px_reg(20))
755     .with(px_reg(21))
756     .with(px_reg(22))
757     .with(px_reg(23))
758     .with(px_reg(24))
759     .with(px_reg(25))
760     .with(px_reg(26))
761     .with(px_reg(27))
762     .with(px_reg(28))
763     .with(px_reg(29))
764     .with(px_reg(30))
765     .with(px_reg(31))
766     // Note: no float/vector registers are callee-saved.
767 ;
768 
compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32769 fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
770     let mut clobbered_size = 0;
771     for reg in clobbers {
772         match reg.to_reg().class() {
773             RegClass::Int => {
774                 clobbered_size += 8;
775             }
776             RegClass::Float => {
777                 clobbered_size += 8;
778             }
779             RegClass::Vector => {
780                 // No alignment concerns: the Pulley virtual CPU
781                 // supports unaligned vector load/stores.
782                 clobbered_size += 16;
783             }
784         }
785     }
786     align_to(clobbered_size, 16)
787 }
788 
789 const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty()
790     // Integer registers: the first 16 get clobbered.
791     .with(px_reg(0))
792     .with(px_reg(1))
793     .with(px_reg(2))
794     .with(px_reg(3))
795     .with(px_reg(4))
796     .with(px_reg(5))
797     .with(px_reg(6))
798     .with(px_reg(7))
799     .with(px_reg(8))
800     .with(px_reg(9))
801     .with(px_reg(10))
802     .with(px_reg(11))
803     .with(px_reg(12))
804     .with(px_reg(13))
805     .with(px_reg(14))
806     .with(px_reg(15))
807     // All float registers get clobbered.
808     .with(pf_reg(0))
809     .with(pf_reg(1))
810     .with(pf_reg(2))
811     .with(pf_reg(3))
812     .with(pf_reg(4))
813     .with(pf_reg(5))
814     .with(pf_reg(6))
815     .with(pf_reg(7))
816     .with(pf_reg(8))
817     .with(pf_reg(9))
818     .with(pf_reg(10))
819     .with(pf_reg(11))
820     .with(pf_reg(12))
821     .with(pf_reg(13))
822     .with(pf_reg(14))
823     .with(pf_reg(15))
824     .with(pf_reg(16))
825     .with(pf_reg(17))
826     .with(pf_reg(18))
827     .with(pf_reg(19))
828     .with(pf_reg(20))
829     .with(pf_reg(21))
830     .with(pf_reg(22))
831     .with(pf_reg(23))
832     .with(pf_reg(24))
833     .with(pf_reg(25))
834     .with(pf_reg(26))
835     .with(pf_reg(27))
836     .with(pf_reg(28))
837     .with(pf_reg(29))
838     .with(pf_reg(30))
839     .with(pf_reg(31))
840     // All vector registers get clobbered.
841     .with(pv_reg(0))
842     .with(pv_reg(1))
843     .with(pv_reg(2))
844     .with(pv_reg(3))
845     .with(pv_reg(4))
846     .with(pv_reg(5))
847     .with(pv_reg(6))
848     .with(pv_reg(7))
849     .with(pv_reg(8))
850     .with(pv_reg(9))
851     .with(pv_reg(10))
852     .with(pv_reg(11))
853     .with(pv_reg(12))
854     .with(pv_reg(13))
855     .with(pv_reg(14))
856     .with(pv_reg(15))
857     .with(pv_reg(16))
858     .with(pv_reg(17))
859     .with(pv_reg(18))
860     .with(pv_reg(19))
861     .with(pv_reg(20))
862     .with(pv_reg(21))
863     .with(pv_reg(22))
864     .with(pv_reg(23))
865     .with(pv_reg(24))
866     .with(pv_reg(25))
867     .with(pv_reg(26))
868     .with(pv_reg(27))
869     .with(pv_reg(28))
870     .with(pv_reg(29))
871     .with(pv_reg(30))
872     .with(pv_reg(31));
873 
874 const ALL_CLOBBERS: PRegSet = PRegSet::empty()
875     .with(px_reg(0))
876     .with(px_reg(1))
877     .with(px_reg(2))
878     .with(px_reg(3))
879     .with(px_reg(4))
880     .with(px_reg(5))
881     .with(px_reg(6))
882     .with(px_reg(7))
883     .with(px_reg(8))
884     .with(px_reg(9))
885     .with(px_reg(10))
886     .with(px_reg(11))
887     .with(px_reg(12))
888     .with(px_reg(13))
889     .with(px_reg(14))
890     .with(px_reg(15))
891     .with(px_reg(16))
892     .with(px_reg(17))
893     .with(px_reg(18))
894     .with(px_reg(19))
895     .with(px_reg(20))
896     .with(px_reg(21))
897     .with(px_reg(22))
898     .with(px_reg(23))
899     .with(px_reg(24))
900     .with(px_reg(25))
901     .with(px_reg(26))
902     .with(px_reg(27))
903     .with(px_reg(28))
904     .with(px_reg(29))
905     .with(px_reg(30))
906     .with(px_reg(31))
907     .with(pf_reg(0))
908     .with(pf_reg(1))
909     .with(pf_reg(2))
910     .with(pf_reg(3))
911     .with(pf_reg(4))
912     .with(pf_reg(5))
913     .with(pf_reg(6))
914     .with(pf_reg(7))
915     .with(pf_reg(8))
916     .with(pf_reg(9))
917     .with(pf_reg(10))
918     .with(pf_reg(11))
919     .with(pf_reg(12))
920     .with(pf_reg(13))
921     .with(pf_reg(14))
922     .with(pf_reg(15))
923     .with(pf_reg(16))
924     .with(pf_reg(17))
925     .with(pf_reg(18))
926     .with(pf_reg(19))
927     .with(pf_reg(20))
928     .with(pf_reg(21))
929     .with(pf_reg(22))
930     .with(pf_reg(23))
931     .with(pf_reg(24))
932     .with(pf_reg(25))
933     .with(pf_reg(26))
934     .with(pf_reg(27))
935     .with(pf_reg(28))
936     .with(pf_reg(29))
937     .with(pf_reg(30))
938     .with(pf_reg(31))
939     .with(pv_reg(0))
940     .with(pv_reg(1))
941     .with(pv_reg(2))
942     .with(pv_reg(3))
943     .with(pv_reg(4))
944     .with(pv_reg(5))
945     .with(pv_reg(6))
946     .with(pv_reg(7))
947     .with(pv_reg(8))
948     .with(pv_reg(9))
949     .with(pv_reg(10))
950     .with(pv_reg(11))
951     .with(pv_reg(12))
952     .with(pv_reg(13))
953     .with(pv_reg(14))
954     .with(pv_reg(15))
955     .with(pv_reg(16))
956     .with(pv_reg(17))
957     .with(pv_reg(18))
958     .with(pv_reg(19))
959     .with(pv_reg(20))
960     .with(pv_reg(21))
961     .with(pv_reg(22))
962     .with(pv_reg(23))
963     .with(pv_reg(24))
964     .with(pv_reg(25))
965     .with(pv_reg(26))
966     .with(pv_reg(27))
967     .with(pv_reg(28))
968     .with(pv_reg(29))
969     .with(pv_reg(30))
970     .with(pv_reg(31));
971 
972 const NO_CLOBBERS: PRegSet = PRegSet::empty();
973 
create_reg_environment() -> MachineEnv974 const fn create_reg_environment() -> MachineEnv {
975     // Prefer caller-saved registers over callee-saved registers, because that
976     // way we don't need to emit code to save and restore them if we don't
977     // mutate them.
978 
979     let preferred_regs_by_class: [PRegSet; 3] = [
980         PRegSet::empty()
981             .with(px_reg(0))
982             .with(px_reg(1))
983             .with(px_reg(2))
984             .with(px_reg(3))
985             .with(px_reg(4))
986             .with(px_reg(5))
987             .with(px_reg(6))
988             .with(px_reg(7))
989             .with(px_reg(8))
990             .with(px_reg(9))
991             .with(px_reg(10))
992             .with(px_reg(11))
993             .with(px_reg(12))
994             .with(px_reg(13))
995             .with(px_reg(14))
996             .with(px_reg(15)),
997         PRegSet::empty()
998             .with(pf_reg(0))
999             .with(pf_reg(1))
1000             .with(pf_reg(2))
1001             .with(pf_reg(3))
1002             .with(pf_reg(4))
1003             .with(pf_reg(5))
1004             .with(pf_reg(6))
1005             .with(pf_reg(7))
1006             .with(pf_reg(8))
1007             .with(pf_reg(9))
1008             .with(pf_reg(10))
1009             .with(pf_reg(11))
1010             .with(pf_reg(12))
1011             .with(pf_reg(13))
1012             .with(pf_reg(14))
1013             .with(pf_reg(15))
1014             .with(pf_reg(16))
1015             .with(pf_reg(17))
1016             .with(pf_reg(18))
1017             .with(pf_reg(19))
1018             .with(pf_reg(20))
1019             .with(pf_reg(21))
1020             .with(pf_reg(22))
1021             .with(pf_reg(23))
1022             .with(pf_reg(24))
1023             .with(pf_reg(25))
1024             .with(pf_reg(26))
1025             .with(pf_reg(27))
1026             .with(pf_reg(28))
1027             .with(pf_reg(29))
1028             .with(pf_reg(30))
1029             .with(pf_reg(31)),
1030         PRegSet::empty()
1031             .with(pv_reg(0))
1032             .with(pv_reg(1))
1033             .with(pv_reg(2))
1034             .with(pv_reg(3))
1035             .with(pv_reg(4))
1036             .with(pv_reg(5))
1037             .with(pv_reg(6))
1038             .with(pv_reg(7))
1039             .with(pv_reg(8))
1040             .with(pv_reg(9))
1041             .with(pv_reg(10))
1042             .with(pv_reg(11))
1043             .with(pv_reg(12))
1044             .with(pv_reg(13))
1045             .with(pv_reg(14))
1046             .with(pv_reg(15))
1047             .with(pv_reg(16))
1048             .with(pv_reg(17))
1049             .with(pv_reg(18))
1050             .with(pv_reg(19))
1051             .with(pv_reg(20))
1052             .with(pv_reg(21))
1053             .with(pv_reg(22))
1054             .with(pv_reg(23))
1055             .with(pv_reg(24))
1056             .with(pv_reg(25))
1057             .with(pv_reg(26))
1058             .with(pv_reg(27))
1059             .with(pv_reg(28))
1060             .with(pv_reg(29))
1061             .with(pv_reg(30))
1062             .with(pv_reg(31)),
1063     ];
1064 
1065     let non_preferred_regs_by_class: [PRegSet; 3] = [
1066         PRegSet::empty()
1067             .with(px_reg(16))
1068             .with(px_reg(17))
1069             .with(px_reg(18))
1070             .with(px_reg(19))
1071             .with(px_reg(20))
1072             .with(px_reg(21))
1073             .with(px_reg(22))
1074             .with(px_reg(23))
1075             .with(px_reg(24))
1076             .with(px_reg(25))
1077             .with(px_reg(26))
1078             .with(px_reg(27))
1079             .with(px_reg(28))
1080             .with(px_reg(29)),
1081         PRegSet::empty(),
1082         PRegSet::empty(),
1083     ];
1084 
1085     debug_assert!(XReg::SPECIAL_START == 30);
1086 
1087     MachineEnv {
1088         preferred_regs_by_class,
1089         non_preferred_regs_by_class,
1090         fixed_stack_slots: vec![],
1091         scratch_by_class: [None, None, None],
1092     }
1093 }
1094