1 //! Implementation of a standard Pulley ABI.
2
3 use super::{PulleyFlags, PulleyTargetKind, inst::*};
4 use crate::isa::pulley_shared::PointerWidth;
5 use crate::{
6 CodegenResult,
7 ir::{self, MemFlags, Signature, types::*},
8 isa,
9 machinst::*,
10 settings,
11 };
12 use alloc::borrow::ToOwned;
13 use alloc::vec::Vec;
14 use core::marker::PhantomData;
15 use cranelift_bitset::ScalarBitSet;
16 use regalloc2::{MachineEnv, PRegSet};
17 use smallvec::{SmallVec, smallvec};
18
19 /// Support for the Pulley ABI from the callee side (within a function body).
20 pub(crate) type PulleyCallee<P> = Callee<PulleyMachineDeps<P>>;
21
22 /// Pulley-specific ABI behavior. This struct just serves as an implementation
23 /// point for the trait; it is never actually instantiated.
24 pub struct PulleyMachineDeps<P>
25 where
26 P: PulleyTargetKind,
27 {
28 _phantom: PhantomData<P>,
29 }
30
31 impl<P> ABIMachineSpec for PulleyMachineDeps<P>
32 where
33 P: PulleyTargetKind,
34 {
35 type I = InstAndKind<P>;
36 type F = PulleyFlags;
37
38 /// This is the limit for the size of argument and return-value areas on the
39 /// stack. We place a reasonable limit here to avoid integer overflow issues
40 /// with 32-bit arithmetic: for now, 128 MB.
41 const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
42
word_bits() -> u3243 fn word_bits() -> u32 {
44 P::pointer_width().bits().into()
45 }
46
47 /// Return required stack alignment in bytes.
stack_align(_call_conv: isa::CallConv) -> u3248 fn stack_align(_call_conv: isa::CallConv) -> u32 {
49 16
50 }
51
compute_arg_locs( call_conv: isa::CallConv, flags: &settings::Flags, params: &[ir::AbiParam], args_or_rets: ArgsOrRets, add_ret_area_ptr: bool, mut args: ArgsAccumulator, ) -> CodegenResult<(u32, Option<usize>)>52 fn compute_arg_locs(
53 call_conv: isa::CallConv,
54 flags: &settings::Flags,
55 params: &[ir::AbiParam],
56 args_or_rets: ArgsOrRets,
57 add_ret_area_ptr: bool,
58 mut args: ArgsAccumulator,
59 ) -> CodegenResult<(u32, Option<usize>)> {
60 // NB: make sure this method stays in sync with
61 // `cranelift_pulley::interp::Vm::call`.
62 //
63 // In general we use the first half of all register banks as argument
64 // passing registers because, well, why not for now. Currently the only
65 // exception is x15 which is reserved as a single caller-saved register
66 // not used for arguments. This is used in `ReturnCallIndirect` to hold
67 // the location of where we're jumping to.
68
69 let x_end = 14;
70 let f_end = 15;
71 let v_end = 15;
72
73 let mut next_x_reg = 0;
74 let mut next_f_reg = 0;
75 let mut next_v_reg = 0;
76 let mut next_stack: u32 = 0;
77
78 let ret_area_ptr = if add_ret_area_ptr {
79 debug_assert_eq!(args_or_rets, ArgsOrRets::Args);
80 next_x_reg += 1;
81 Some(ABIArg::reg(
82 x_reg(next_x_reg - 1).to_real_reg().unwrap(),
83 I64,
84 ir::ArgumentExtension::None,
85 ir::ArgumentPurpose::Normal,
86 ))
87 } else {
88 None
89 };
90
91 for param in params {
92 // Find the regclass(es) of the register(s) used to store a value of
93 // this type.
94 let (rcs, reg_tys) = Self::I::rc_for_type(param.value_type)?;
95
96 let mut slots = ABIArgSlotVec::new();
97 for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
98 let next_reg = if (next_x_reg <= x_end) && *rc == RegClass::Int {
99 let x = Some(x_reg(next_x_reg));
100 next_x_reg += 1;
101 x
102 } else if (next_f_reg <= f_end) && *rc == RegClass::Float {
103 let f = Some(f_reg(next_f_reg));
104 next_f_reg += 1;
105 f
106 } else if (next_v_reg <= v_end) && *rc == RegClass::Vector {
107 let v = Some(v_reg(next_v_reg));
108 next_v_reg += 1;
109 v
110 } else {
111 None
112 };
113
114 if let Some(reg) = next_reg {
115 slots.push(ABIArgSlot::Reg {
116 reg: reg.to_real_reg().unwrap(),
117 ty: *reg_ty,
118 extension: param.extension,
119 });
120 } else {
121 if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {
122 return Err(crate::CodegenError::Unsupported(
123 "Too many return values to fit in registers. \
124 Use a StructReturn argument instead. (#9510)"
125 .to_owned(),
126 ));
127 }
128
129 // Compute size and 16-byte stack alignment happens
130 // separately after all args.
131 let size = reg_ty.bits() / 8;
132 let size = core::cmp::max(size, 8);
133
134 // Align.
135 debug_assert!(size.is_power_of_two());
136 next_stack = align_to(next_stack, size);
137
138 slots.push(ABIArgSlot::Stack {
139 offset: i64::from(next_stack),
140 ty: *reg_ty,
141 extension: param.extension,
142 });
143
144 next_stack += size;
145 }
146 }
147
148 args.push(ABIArg::Slots {
149 slots,
150 purpose: param.purpose,
151 });
152 }
153
154 let pos = if let Some(ret_area_ptr) = ret_area_ptr {
155 args.push_non_formal(ret_area_ptr);
156 Some(args.args().len() - 1)
157 } else {
158 None
159 };
160
161 next_stack = align_to(next_stack, Self::stack_align(call_conv));
162
163 Ok((next_stack, pos))
164 }
165
gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I166 fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {
167 let mut flags = MemFlags::trusted();
168 // Stack loads/stores of vectors always use little-endianness to avoid
169 // implementing a byte-swap of vectors on big-endian platforms.
170 if ty.is_vector() {
171 flags.set_endianness(ir::Endianness::Little);
172 }
173 Inst::gen_load(into_reg, mem.into(), ty, flags).into()
174 }
175
gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I176 fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {
177 let mut flags = MemFlags::trusted();
178 // Stack loads/stores of vectors always use little-endianness to avoid
179 // implementing a byte-swap of vectors on big-endian platforms.
180 if ty.is_vector() {
181 flags.set_endianness(ir::Endianness::Little);
182 }
183 Inst::gen_store(mem.into(), from_reg, ty, flags).into()
184 }
185
gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I186 fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I {
187 Self::I::gen_move(to_reg, from_reg, ty)
188 }
189
gen_extend( dst: Writable<Reg>, src: Reg, signed: bool, from_bits: u8, to_bits: u8, ) -> Self::I190 fn gen_extend(
191 dst: Writable<Reg>,
192 src: Reg,
193 signed: bool,
194 from_bits: u8,
195 to_bits: u8,
196 ) -> Self::I {
197 assert!(from_bits < to_bits);
198 let src = XReg::new(src).unwrap();
199 let dst = dst.try_into().unwrap();
200 match (signed, from_bits) {
201 (true, 8) => RawInst::Sext8 { dst, src }.into(),
202 (true, 16) => RawInst::Sext16 { dst, src }.into(),
203 (true, 32) => RawInst::Sext32 { dst, src }.into(),
204 (false, 8) => RawInst::Zext8 { dst, src }.into(),
205 (false, 16) => RawInst::Zext16 { dst, src }.into(),
206 (false, 32) => RawInst::Zext32 { dst, src }.into(),
207 _ => unimplemented!("extend {from_bits} to {to_bits} as signed? {signed}"),
208 }
209 }
210
get_ext_mode( _call_conv: isa::CallConv, specified: ir::ArgumentExtension, ) -> ir::ArgumentExtension211 fn get_ext_mode(
212 _call_conv: isa::CallConv,
213 specified: ir::ArgumentExtension,
214 ) -> ir::ArgumentExtension {
215 specified
216 }
217
gen_args(args: Vec<ArgPair>) -> Self::I218 fn gen_args(args: Vec<ArgPair>) -> Self::I {
219 Inst::Args { args }.into()
220 }
221
gen_rets(rets: Vec<RetPair>) -> Self::I222 fn gen_rets(rets: Vec<RetPair>) -> Self::I {
223 Inst::Rets { rets }.into()
224 }
225
get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg226 fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
227 spilltmp_reg()
228 }
229
gen_add_imm( _call_conv: isa::CallConv, into_reg: Writable<Reg>, from_reg: Reg, imm: u32, ) -> SmallInstVec<Self::I>230 fn gen_add_imm(
231 _call_conv: isa::CallConv,
232 into_reg: Writable<Reg>,
233 from_reg: Reg,
234 imm: u32,
235 ) -> SmallInstVec<Self::I> {
236 let dst = into_reg.try_into().unwrap();
237 let imm = imm as i32;
238 smallvec![
239 RawInst::Xconst32 { dst, imm }.into(),
240 RawInst::Xadd32 {
241 dst,
242 src1: from_reg.try_into().unwrap(),
243 src2: dst.to_reg(),
244 }
245 .into()
246 ]
247 }
248
gen_stack_lower_bound_trap(_limit_reg: Reg) -> SmallInstVec<Self::I>249 fn gen_stack_lower_bound_trap(_limit_reg: Reg) -> SmallInstVec<Self::I> {
250 unimplemented!("pulley shouldn't need stack bound checks")
251 }
252
gen_get_stack_addr(mem: StackAMode, dst: Writable<Reg>) -> Self::I253 fn gen_get_stack_addr(mem: StackAMode, dst: Writable<Reg>) -> Self::I {
254 let dst = dst.to_reg();
255 let dst = XReg::new(dst).unwrap();
256 let dst = WritableXReg::from_reg(dst);
257 let mem = mem.into();
258 Inst::LoadAddr { dst, mem }.into()
259 }
260
gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I261 fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {
262 let base = XReg::try_from(base).unwrap();
263 let mem = Amode::RegOffset { base, offset };
264 Inst::gen_load(into_reg, mem, ty, MemFlags::trusted()).into()
265 }
266
gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I267 fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I {
268 let base = XReg::try_from(base).unwrap();
269 let mem = Amode::RegOffset { base, offset };
270 Inst::gen_store(mem, from_reg, ty, MemFlags::trusted()).into()
271 }
272
gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I>273 fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I> {
274 if amount == 0 {
275 return smallvec![];
276 }
277
278 let inst = if amount < 0 {
279 let amount = amount.checked_neg().unwrap();
280 if let Ok(amt) = u32::try_from(amount) {
281 RawInst::StackAlloc32 { amt }
282 } else {
283 unreachable!()
284 }
285 } else {
286 if let Ok(amt) = u32::try_from(amount) {
287 RawInst::StackFree32 { amt }
288 } else {
289 unreachable!()
290 }
291 };
292 smallvec![inst.into()]
293 }
294
295 /// Generates the entire prologue for the function.
296 ///
297 /// Note that this is different from other backends where it's not spread
298 /// out among a few individual functions. That's because the goal here is to
299 /// generate a single macro-instruction for the entire prologue in the most
300 /// common cases and we don't want to spread the logic over multiple
301 /// functions.
302 ///
303 /// The general machinst methods are split to accommodate stack checks and
304 /// things like stack probes, all of which are empty on Pulley because
305 /// Pulley has its own stack check mechanism.
gen_prologue_frame_setup( _call_conv: isa::CallConv, _flags: &settings::Flags, _isa_flags: &PulleyFlags, frame_layout: &FrameLayout, ) -> SmallInstVec<Self::I>306 fn gen_prologue_frame_setup(
307 _call_conv: isa::CallConv,
308 _flags: &settings::Flags,
309 _isa_flags: &PulleyFlags,
310 frame_layout: &FrameLayout,
311 ) -> SmallInstVec<Self::I> {
312 let mut insts = SmallVec::new();
313
314 let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
315 if incoming_args_diff > 0 {
316 // Decrement SP by the amount of additional incoming argument space
317 // we need
318 insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32)));
319 }
320
321 let style = frame_layout.pulley_frame_style();
322
323 match &style {
324 FrameStyle::None => {}
325 FrameStyle::PulleyBasicSetup { frame_size } => {
326 insts.push(RawInst::PushFrame.into());
327 insts.extend(Self::gen_sp_reg_adjust(
328 -i32::try_from(*frame_size).unwrap(),
329 ));
330 }
331 FrameStyle::PulleySetupAndSaveClobbers {
332 frame_size,
333 saved_by_pulley,
334 } => insts.push(
335 RawInst::PushFrameSave {
336 amt: *frame_size,
337 regs: pulley_interpreter::UpperRegSet::from_bitset(*saved_by_pulley),
338 }
339 .into(),
340 ),
341 FrameStyle::Manual { frame_size } => insts.extend(Self::gen_sp_reg_adjust(
342 -i32::try_from(*frame_size).unwrap(),
343 )),
344 }
345
346 for (offset, ty, reg) in frame_layout.manually_managed_clobbers(&style) {
347 let mut flags = MemFlags::trusted();
348 if ty.is_vector() {
349 flags.set_endianness(ir::Endianness::Little);
350 }
351 insts.push(Inst::gen_store(Amode::SpOffset { offset }, reg, ty, flags).into());
352 }
353
354 insts
355 }
356
357 /// Reverse of `gen_prologue_frame_setup`.
gen_epilogue_frame_restore( _call_conv: isa::CallConv, _flags: &settings::Flags, _isa_flags: &PulleyFlags, frame_layout: &FrameLayout, ) -> SmallInstVec<Self::I>358 fn gen_epilogue_frame_restore(
359 _call_conv: isa::CallConv,
360 _flags: &settings::Flags,
361 _isa_flags: &PulleyFlags,
362 frame_layout: &FrameLayout,
363 ) -> SmallInstVec<Self::I> {
364 let mut insts = SmallVec::new();
365
366 let style = frame_layout.pulley_frame_style();
367
368 // Restore clobbered registers that are manually managed in Cranelift.
369 for (offset, ty, reg) in frame_layout.manually_managed_clobbers(&style) {
370 let mut flags = MemFlags::trusted();
371 if ty.is_vector() {
372 flags.set_endianness(ir::Endianness::Little);
373 }
374 insts.push(
375 Inst::gen_load(
376 Writable::from_reg(reg),
377 Amode::SpOffset { offset },
378 ty,
379 flags,
380 )
381 .into(),
382 );
383 }
384
385 // Perform the inverse of `gen_prologue_frame_setup`.
386 match &style {
387 FrameStyle::None => {}
388 FrameStyle::PulleyBasicSetup { frame_size } => {
389 insts.extend(Self::gen_sp_reg_adjust(i32::try_from(*frame_size).unwrap()));
390 insts.push(RawInst::PopFrame.into());
391 }
392 FrameStyle::PulleySetupAndSaveClobbers {
393 frame_size,
394 saved_by_pulley,
395 } => insts.push(
396 RawInst::PopFrameRestore {
397 amt: *frame_size,
398 regs: pulley_interpreter::UpperRegSet::from_bitset(*saved_by_pulley),
399 }
400 .into(),
401 ),
402 FrameStyle::Manual { frame_size } => {
403 insts.extend(Self::gen_sp_reg_adjust(i32::try_from(*frame_size).unwrap()))
404 }
405 }
406
407 insts
408 }
409
gen_return( call_conv: isa::CallConv, _isa_flags: &PulleyFlags, frame_layout: &FrameLayout, ) -> SmallInstVec<Self::I>410 fn gen_return(
411 call_conv: isa::CallConv,
412 _isa_flags: &PulleyFlags,
413 frame_layout: &FrameLayout,
414 ) -> SmallInstVec<Self::I> {
415 let mut insts = SmallVec::new();
416
417 // Handle final stack adjustments for the tail-call ABI.
418 if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 {
419 insts.extend(Self::gen_sp_reg_adjust(
420 frame_layout.tail_args_size.try_into().unwrap(),
421 ));
422 }
423 insts.push(RawInst::Ret {}.into());
424
425 insts
426 }
427
gen_probestack(_insts: &mut SmallInstVec<Self::I>, _frame_size: u32)428 fn gen_probestack(_insts: &mut SmallInstVec<Self::I>, _frame_size: u32) {
429 // Pulley doesn't implement stack probes since all stack pointer
430 // decrements are checked already.
431 }
432
gen_clobber_save( _call_conv: isa::CallConv, _flags: &settings::Flags, _frame_layout: &FrameLayout, ) -> SmallVec<[Self::I; 16]>433 fn gen_clobber_save(
434 _call_conv: isa::CallConv,
435 _flags: &settings::Flags,
436 _frame_layout: &FrameLayout,
437 ) -> SmallVec<[Self::I; 16]> {
438 // Note that this is intentionally empty because everything necessary
439 // was already done in `gen_prologue_frame_setup`.
440 SmallVec::new()
441 }
442
gen_clobber_restore( _call_conv: isa::CallConv, _flags: &settings::Flags, _frame_layout: &FrameLayout, ) -> SmallVec<[Self::I; 16]>443 fn gen_clobber_restore(
444 _call_conv: isa::CallConv,
445 _flags: &settings::Flags,
446 _frame_layout: &FrameLayout,
447 ) -> SmallVec<[Self::I; 16]> {
448 // Intentionally empty as restores happen for Pulley in `gen_return`.
449 SmallVec::new()
450 }
451
gen_memcpy<F: FnMut(Type) -> Writable<Reg>>( _call_conv: isa::CallConv, _dst: Reg, _src: Reg, _size: usize, _alloc_tmp: F, ) -> SmallVec<[Self::I; 8]>452 fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
453 _call_conv: isa::CallConv,
454 _dst: Reg,
455 _src: Reg,
456 _size: usize,
457 _alloc_tmp: F,
458 ) -> SmallVec<[Self::I; 8]> {
459 todo!()
460 }
461
get_number_of_spillslots_for_value( rc: RegClass, _target_vector_bytes: u32, _isa_flags: &PulleyFlags, ) -> u32462 fn get_number_of_spillslots_for_value(
463 rc: RegClass,
464 _target_vector_bytes: u32,
465 _isa_flags: &PulleyFlags,
466 ) -> u32 {
467 // Spill slots are the size of a "word" or a pointer, but Pulley
468 // registers are 8-byte for integers/floats regardless of pointer size.
469 // Calculate the number of slots necessary to store 8 bytes.
470 let slots_for_8bytes = match P::pointer_width() {
471 PointerWidth::PointerWidth32 => 2,
472 PointerWidth::PointerWidth64 => 1,
473 };
474 match rc {
475 // Int/float registers are 8-bytes
476 RegClass::Int | RegClass::Float => slots_for_8bytes,
477 // Vector registers are 16 bytes
478 RegClass::Vector => 2 * slots_for_8bytes,
479 }
480 }
481
get_machine_env(_flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv482 fn get_machine_env(_flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
483 static MACHINE_ENV: MachineEnv = create_reg_environment();
484 &MACHINE_ENV
485 }
486
get_regs_clobbered_by_call( call_conv_of_callee: isa::CallConv, is_exception: bool, ) -> PRegSet487 fn get_regs_clobbered_by_call(
488 call_conv_of_callee: isa::CallConv,
489 is_exception: bool,
490 ) -> PRegSet {
491 if is_exception {
492 ALL_CLOBBERS
493 } else if call_conv_of_callee == isa::CallConv::PreserveAll {
494 NO_CLOBBERS
495 } else {
496 DEFAULT_CLOBBERS
497 }
498 }
499
compute_frame_layout( call_conv: isa::CallConv, flags: &settings::Flags, _sig: &Signature, regs: &[Writable<RealReg>], function_calls: FunctionCalls, incoming_args_size: u32, tail_args_size: u32, stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout500 fn compute_frame_layout(
501 call_conv: isa::CallConv,
502 flags: &settings::Flags,
503 _sig: &Signature,
504 regs: &[Writable<RealReg>],
505 function_calls: FunctionCalls,
506 incoming_args_size: u32,
507 tail_args_size: u32,
508 stackslots_size: u32,
509 fixed_frame_storage_size: u32,
510 outgoing_args_size: u32,
511 ) -> FrameLayout {
512 let is_callee_save = |reg: &Writable<RealReg>| match call_conv {
513 isa::CallConv::PreserveAll => true,
514 _ => DEFAULT_CALLEE_SAVES.contains(reg.to_reg().into()),
515 };
516 let mut regs: Vec<Writable<RealReg>> =
517 regs.iter().cloned().filter(is_callee_save).collect();
518
519 regs.sort_unstable();
520
521 // Compute clobber size.
522 let clobber_size = compute_clobber_size(®s);
523
524 // Compute linkage frame size.
525 let setup_area_size = if flags.preserve_frame_pointers()
526 || function_calls != FunctionCalls::None
527 // The function arguments that are passed on the stack are addressed
528 // relative to the Frame Pointer.
529 || incoming_args_size > 0
530 || clobber_size > 0
531 || fixed_frame_storage_size > 0
532 {
533 P::pointer_width().bytes() * 2 // FP, LR
534 } else {
535 0
536 };
537
538 FrameLayout {
539 word_bytes: u32::from(P::pointer_width().bytes()),
540 incoming_args_size,
541 tail_args_size,
542 setup_area_size: setup_area_size.into(),
543 clobber_size,
544 fixed_frame_storage_size,
545 stackslots_size,
546 outgoing_args_size,
547 clobbered_callee_saves: regs,
548 function_calls,
549 }
550 }
551
gen_inline_probestack( _insts: &mut SmallInstVec<Self::I>, _call_conv: isa::CallConv, _frame_size: u32, _guard_size: u32, )552 fn gen_inline_probestack(
553 _insts: &mut SmallInstVec<Self::I>,
554 _call_conv: isa::CallConv,
555 _frame_size: u32,
556 _guard_size: u32,
557 ) {
558 // Pulley doesn't need inline probestacks because it always checks stack
559 // decrements.
560 }
561
retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg>562 fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg> {
563 // Use x15 as a temp if needed: clobbered, not a
564 // retval.
565 Writable::from_reg(regs::x_reg(15))
566 }
567
exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg]568 fn exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg] {
569 const PAYLOAD_REGS: &'static [Reg] = &[
570 Reg::from_real_reg(regs::px_reg(0)),
571 Reg::from_real_reg(regs::px_reg(1)),
572 ];
573 match call_conv {
574 isa::CallConv::SystemV | isa::CallConv::Tail | isa::CallConv::PreserveAll => {
575 PAYLOAD_REGS
576 }
577 isa::CallConv::Fast
578 | isa::CallConv::WindowsFastcall
579 | isa::CallConv::AppleAarch64
580 | isa::CallConv::Probestack
581 | isa::CallConv::Winch => &[],
582 }
583 }
584 }
585
586 /// Different styles of management of fp/lr and clobbered registers.
587 ///
588 /// This helps decide, depending on Cranelift settings and frame layout, what
589 /// macro instruction is used to setup the pulley frame.
590 enum FrameStyle {
591 /// No management is happening, fp/lr aren't saved by Pulley or Cranelift.
592 /// No stack is being allocated either.
593 None,
594
595 /// Pulley saves the fp/lr combo and then stack adjustments/clobbers are
596 /// handled manually.
597 PulleyBasicSetup { frame_size: u32 },
598
599 /// Pulley is managing the fp/lr combo, the stack size, and clobbered
600 /// X-class registers.
601 ///
602 /// Note that `saved_by_pulley` is not the exhaustive set of clobbered
603 /// registers. It's only those that are part of the `PushFrameSave`
604 /// instruction.
605 PulleySetupAndSaveClobbers {
606 /// The size of the frame, including clobbers, that's being allocated.
607 frame_size: u16,
608 /// Registers that pulley is saving/restoring.
609 saved_by_pulley: ScalarBitSet<u16>,
610 },
611
612 /// Cranelift is manually managing everything, both clobbers and stack
613 /// increments/decrements.
614 ///
615 /// Note that fp/lr are not saved in this mode.
616 Manual {
617 /// The size of the stack being allocated.
618 frame_size: u32,
619 },
620 }
621
622 /// Pulley-specific helpers when dealing with ABI code.
623 impl FrameLayout {
624 /// Whether or not this frame saves fp/lr.
setup_frame(&self) -> bool625 fn setup_frame(&self) -> bool {
626 self.setup_area_size > 0
627 }
628
629 /// Returns the stack size allocated by this function, excluding incoming
630 /// tail args or the optional "setup area" of fp/lr.
stack_size(&self) -> u32631 fn stack_size(&self) -> u32 {
632 self.clobber_size + self.fixed_frame_storage_size + self.outgoing_args_size
633 }
634
635 /// Returns the style of frame being used for this function.
636 ///
637 /// See `FrameStyle` for more information.
pulley_frame_style(&self) -> FrameStyle638 fn pulley_frame_style(&self) -> FrameStyle {
639 let saved_by_pulley = self.clobbered_xregs_saved_by_pulley();
640 match (
641 self.stack_size(),
642 self.setup_frame(),
643 saved_by_pulley.is_empty(),
644 ) {
645 // No stack allocated, not saving fp/lr, no clobbers, nothing to do
646 (0, false, true) => FrameStyle::None,
647
648 // No stack allocated, saving fp/lr, no clobbers, so this is
649 // pulley-managed via push/pop_frame.
650 (0, true, true) => FrameStyle::PulleyBasicSetup { frame_size: 0 },
651
652 // Some stack is being allocated and pulley is managing fp/lr. Let
653 // pulley manage clobbered registers as well, regardless if they're
654 // present or not.
655 //
656 // If the stack is too large then `PulleyBasicSetup` is used
657 // otherwise we'll be pushing `PushFrameSave` and `PopFrameRestore`.
658 (frame_size, true, _) => match frame_size.try_into() {
659 Ok(frame_size) => FrameStyle::PulleySetupAndSaveClobbers {
660 frame_size,
661 saved_by_pulley,
662 },
663 Err(_) => FrameStyle::PulleyBasicSetup { frame_size },
664 },
665
666 // Some stack is being allocated, but pulley isn't managing fp/lr,
667 // so we're manually doing everything.
668 (frame_size, false, true) => FrameStyle::Manual { frame_size },
669
670 // If there's no frame setup and there's clobbered registers this
671 // technically should have already hit a case above, so panic here.
672 (_, false, false) => unreachable!(),
673 }
674 }
675
676 /// Returns the set of clobbered registers that Pulley is managing via its
677 /// macro instructions rather than the generated code.
clobbered_xregs_saved_by_pulley(&self) -> ScalarBitSet<u16>678 fn clobbered_xregs_saved_by_pulley(&self) -> ScalarBitSet<u16> {
679 let mut clobbered: ScalarBitSet<u16> = ScalarBitSet::new();
680 // Pulley only manages clobbers if it's also managing fp/lr.
681 if !self.setup_frame() {
682 return clobbered;
683 }
684 let mut found_manual_clobber = false;
685 for reg in self.clobbered_callee_saves.iter() {
686 let r_reg = reg.to_reg();
687 // Pulley can only manage clobbers of integer registers at this
688 // time, float registers are managed manually.
689 //
690 // Also assert that all pulley-managed clobbers come first,
691 // otherwise the loop below in `manually_managed_clobbers` is
692 // incorrect.
693 if r_reg.class() == RegClass::Int {
694 assert!(!found_manual_clobber);
695 if let Some(offset) = r_reg.hw_enc().checked_sub(16) {
696 clobbered.insert(offset);
697 }
698 } else {
699 found_manual_clobber = true;
700 }
701 }
702 clobbered
703 }
704
705 /// Returns an iterator over the clobbers that Cranelift is managing, not
706 /// Pulley.
707 ///
708 /// If this frame has clobbers then they're either saved by Pulley with
709 /// `FrameStyle::PulleySetupAndSaveClobbers`. Cranelift might need to manage
710 /// these registers depending on Cranelift settings. Cranelift also always
711 /// manages floating-point registers.
manually_managed_clobbers<'a>( &'a self, style: &'a FrameStyle, ) -> impl Iterator<Item = (i32, Type, Reg)> + 'a712 fn manually_managed_clobbers<'a>(
713 &'a self,
714 style: &'a FrameStyle,
715 ) -> impl Iterator<Item = (i32, Type, Reg)> + 'a {
716 let mut offset = self.stack_size();
717 self.clobbered_callee_saves.iter().filter_map(move |reg| {
718 // Allocate space for this clobber no matter what. If pulley is
719 // managing this then we're just accounting for the pulley-saved
720 // registers as well. Note that all pulley-managed registers come
721 // first in the list here.
722 offset -= 8;
723 let r_reg = reg.to_reg();
724 let ty = match r_reg.class() {
725 RegClass::Int => {
726 // If this register is saved by pulley, skip this clobber.
727 if let FrameStyle::PulleySetupAndSaveClobbers {
728 saved_by_pulley, ..
729 } = style
730 {
731 if let Some(reg) = r_reg.hw_enc().checked_sub(16) {
732 if saved_by_pulley.contains(reg) {
733 return None;
734 }
735 }
736 }
737 I64
738 }
739 RegClass::Float => F64,
740 RegClass::Vector => I8X16,
741 };
742 let offset = i32::try_from(offset).unwrap();
743 Some((offset, ty, Reg::from(reg.to_reg())))
744 })
745 }
746 }
747
748 const DEFAULT_CALLEE_SAVES: PRegSet = PRegSet::empty()
749 // Integer registers.
750 .with(px_reg(16))
751 .with(px_reg(17))
752 .with(px_reg(18))
753 .with(px_reg(19))
754 .with(px_reg(20))
755 .with(px_reg(21))
756 .with(px_reg(22))
757 .with(px_reg(23))
758 .with(px_reg(24))
759 .with(px_reg(25))
760 .with(px_reg(26))
761 .with(px_reg(27))
762 .with(px_reg(28))
763 .with(px_reg(29))
764 .with(px_reg(30))
765 .with(px_reg(31))
766 // Note: no float/vector registers are callee-saved.
767 ;
768
compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32769 fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
770 let mut clobbered_size = 0;
771 for reg in clobbers {
772 match reg.to_reg().class() {
773 RegClass::Int => {
774 clobbered_size += 8;
775 }
776 RegClass::Float => {
777 clobbered_size += 8;
778 }
779 RegClass::Vector => {
780 // No alignment concerns: the Pulley virtual CPU
781 // supports unaligned vector load/stores.
782 clobbered_size += 16;
783 }
784 }
785 }
786 align_to(clobbered_size, 16)
787 }
788
789 const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty()
790 // Integer registers: the first 16 get clobbered.
791 .with(px_reg(0))
792 .with(px_reg(1))
793 .with(px_reg(2))
794 .with(px_reg(3))
795 .with(px_reg(4))
796 .with(px_reg(5))
797 .with(px_reg(6))
798 .with(px_reg(7))
799 .with(px_reg(8))
800 .with(px_reg(9))
801 .with(px_reg(10))
802 .with(px_reg(11))
803 .with(px_reg(12))
804 .with(px_reg(13))
805 .with(px_reg(14))
806 .with(px_reg(15))
807 // All float registers get clobbered.
808 .with(pf_reg(0))
809 .with(pf_reg(1))
810 .with(pf_reg(2))
811 .with(pf_reg(3))
812 .with(pf_reg(4))
813 .with(pf_reg(5))
814 .with(pf_reg(6))
815 .with(pf_reg(7))
816 .with(pf_reg(8))
817 .with(pf_reg(9))
818 .with(pf_reg(10))
819 .with(pf_reg(11))
820 .with(pf_reg(12))
821 .with(pf_reg(13))
822 .with(pf_reg(14))
823 .with(pf_reg(15))
824 .with(pf_reg(16))
825 .with(pf_reg(17))
826 .with(pf_reg(18))
827 .with(pf_reg(19))
828 .with(pf_reg(20))
829 .with(pf_reg(21))
830 .with(pf_reg(22))
831 .with(pf_reg(23))
832 .with(pf_reg(24))
833 .with(pf_reg(25))
834 .with(pf_reg(26))
835 .with(pf_reg(27))
836 .with(pf_reg(28))
837 .with(pf_reg(29))
838 .with(pf_reg(30))
839 .with(pf_reg(31))
840 // All vector registers get clobbered.
841 .with(pv_reg(0))
842 .with(pv_reg(1))
843 .with(pv_reg(2))
844 .with(pv_reg(3))
845 .with(pv_reg(4))
846 .with(pv_reg(5))
847 .with(pv_reg(6))
848 .with(pv_reg(7))
849 .with(pv_reg(8))
850 .with(pv_reg(9))
851 .with(pv_reg(10))
852 .with(pv_reg(11))
853 .with(pv_reg(12))
854 .with(pv_reg(13))
855 .with(pv_reg(14))
856 .with(pv_reg(15))
857 .with(pv_reg(16))
858 .with(pv_reg(17))
859 .with(pv_reg(18))
860 .with(pv_reg(19))
861 .with(pv_reg(20))
862 .with(pv_reg(21))
863 .with(pv_reg(22))
864 .with(pv_reg(23))
865 .with(pv_reg(24))
866 .with(pv_reg(25))
867 .with(pv_reg(26))
868 .with(pv_reg(27))
869 .with(pv_reg(28))
870 .with(pv_reg(29))
871 .with(pv_reg(30))
872 .with(pv_reg(31));
873
874 const ALL_CLOBBERS: PRegSet = PRegSet::empty()
875 .with(px_reg(0))
876 .with(px_reg(1))
877 .with(px_reg(2))
878 .with(px_reg(3))
879 .with(px_reg(4))
880 .with(px_reg(5))
881 .with(px_reg(6))
882 .with(px_reg(7))
883 .with(px_reg(8))
884 .with(px_reg(9))
885 .with(px_reg(10))
886 .with(px_reg(11))
887 .with(px_reg(12))
888 .with(px_reg(13))
889 .with(px_reg(14))
890 .with(px_reg(15))
891 .with(px_reg(16))
892 .with(px_reg(17))
893 .with(px_reg(18))
894 .with(px_reg(19))
895 .with(px_reg(20))
896 .with(px_reg(21))
897 .with(px_reg(22))
898 .with(px_reg(23))
899 .with(px_reg(24))
900 .with(px_reg(25))
901 .with(px_reg(26))
902 .with(px_reg(27))
903 .with(px_reg(28))
904 .with(px_reg(29))
905 .with(px_reg(30))
906 .with(px_reg(31))
907 .with(pf_reg(0))
908 .with(pf_reg(1))
909 .with(pf_reg(2))
910 .with(pf_reg(3))
911 .with(pf_reg(4))
912 .with(pf_reg(5))
913 .with(pf_reg(6))
914 .with(pf_reg(7))
915 .with(pf_reg(8))
916 .with(pf_reg(9))
917 .with(pf_reg(10))
918 .with(pf_reg(11))
919 .with(pf_reg(12))
920 .with(pf_reg(13))
921 .with(pf_reg(14))
922 .with(pf_reg(15))
923 .with(pf_reg(16))
924 .with(pf_reg(17))
925 .with(pf_reg(18))
926 .with(pf_reg(19))
927 .with(pf_reg(20))
928 .with(pf_reg(21))
929 .with(pf_reg(22))
930 .with(pf_reg(23))
931 .with(pf_reg(24))
932 .with(pf_reg(25))
933 .with(pf_reg(26))
934 .with(pf_reg(27))
935 .with(pf_reg(28))
936 .with(pf_reg(29))
937 .with(pf_reg(30))
938 .with(pf_reg(31))
939 .with(pv_reg(0))
940 .with(pv_reg(1))
941 .with(pv_reg(2))
942 .with(pv_reg(3))
943 .with(pv_reg(4))
944 .with(pv_reg(5))
945 .with(pv_reg(6))
946 .with(pv_reg(7))
947 .with(pv_reg(8))
948 .with(pv_reg(9))
949 .with(pv_reg(10))
950 .with(pv_reg(11))
951 .with(pv_reg(12))
952 .with(pv_reg(13))
953 .with(pv_reg(14))
954 .with(pv_reg(15))
955 .with(pv_reg(16))
956 .with(pv_reg(17))
957 .with(pv_reg(18))
958 .with(pv_reg(19))
959 .with(pv_reg(20))
960 .with(pv_reg(21))
961 .with(pv_reg(22))
962 .with(pv_reg(23))
963 .with(pv_reg(24))
964 .with(pv_reg(25))
965 .with(pv_reg(26))
966 .with(pv_reg(27))
967 .with(pv_reg(28))
968 .with(pv_reg(29))
969 .with(pv_reg(30))
970 .with(pv_reg(31));
971
972 const NO_CLOBBERS: PRegSet = PRegSet::empty();
973
create_reg_environment() -> MachineEnv974 const fn create_reg_environment() -> MachineEnv {
975 // Prefer caller-saved registers over callee-saved registers, because that
976 // way we don't need to emit code to save and restore them if we don't
977 // mutate them.
978
979 let preferred_regs_by_class: [PRegSet; 3] = [
980 PRegSet::empty()
981 .with(px_reg(0))
982 .with(px_reg(1))
983 .with(px_reg(2))
984 .with(px_reg(3))
985 .with(px_reg(4))
986 .with(px_reg(5))
987 .with(px_reg(6))
988 .with(px_reg(7))
989 .with(px_reg(8))
990 .with(px_reg(9))
991 .with(px_reg(10))
992 .with(px_reg(11))
993 .with(px_reg(12))
994 .with(px_reg(13))
995 .with(px_reg(14))
996 .with(px_reg(15)),
997 PRegSet::empty()
998 .with(pf_reg(0))
999 .with(pf_reg(1))
1000 .with(pf_reg(2))
1001 .with(pf_reg(3))
1002 .with(pf_reg(4))
1003 .with(pf_reg(5))
1004 .with(pf_reg(6))
1005 .with(pf_reg(7))
1006 .with(pf_reg(8))
1007 .with(pf_reg(9))
1008 .with(pf_reg(10))
1009 .with(pf_reg(11))
1010 .with(pf_reg(12))
1011 .with(pf_reg(13))
1012 .with(pf_reg(14))
1013 .with(pf_reg(15))
1014 .with(pf_reg(16))
1015 .with(pf_reg(17))
1016 .with(pf_reg(18))
1017 .with(pf_reg(19))
1018 .with(pf_reg(20))
1019 .with(pf_reg(21))
1020 .with(pf_reg(22))
1021 .with(pf_reg(23))
1022 .with(pf_reg(24))
1023 .with(pf_reg(25))
1024 .with(pf_reg(26))
1025 .with(pf_reg(27))
1026 .with(pf_reg(28))
1027 .with(pf_reg(29))
1028 .with(pf_reg(30))
1029 .with(pf_reg(31)),
1030 PRegSet::empty()
1031 .with(pv_reg(0))
1032 .with(pv_reg(1))
1033 .with(pv_reg(2))
1034 .with(pv_reg(3))
1035 .with(pv_reg(4))
1036 .with(pv_reg(5))
1037 .with(pv_reg(6))
1038 .with(pv_reg(7))
1039 .with(pv_reg(8))
1040 .with(pv_reg(9))
1041 .with(pv_reg(10))
1042 .with(pv_reg(11))
1043 .with(pv_reg(12))
1044 .with(pv_reg(13))
1045 .with(pv_reg(14))
1046 .with(pv_reg(15))
1047 .with(pv_reg(16))
1048 .with(pv_reg(17))
1049 .with(pv_reg(18))
1050 .with(pv_reg(19))
1051 .with(pv_reg(20))
1052 .with(pv_reg(21))
1053 .with(pv_reg(22))
1054 .with(pv_reg(23))
1055 .with(pv_reg(24))
1056 .with(pv_reg(25))
1057 .with(pv_reg(26))
1058 .with(pv_reg(27))
1059 .with(pv_reg(28))
1060 .with(pv_reg(29))
1061 .with(pv_reg(30))
1062 .with(pv_reg(31)),
1063 ];
1064
1065 let non_preferred_regs_by_class: [PRegSet; 3] = [
1066 PRegSet::empty()
1067 .with(px_reg(16))
1068 .with(px_reg(17))
1069 .with(px_reg(18))
1070 .with(px_reg(19))
1071 .with(px_reg(20))
1072 .with(px_reg(21))
1073 .with(px_reg(22))
1074 .with(px_reg(23))
1075 .with(px_reg(24))
1076 .with(px_reg(25))
1077 .with(px_reg(26))
1078 .with(px_reg(27))
1079 .with(px_reg(28))
1080 .with(px_reg(29)),
1081 PRegSet::empty(),
1082 PRegSet::empty(),
1083 ];
1084
1085 debug_assert!(XReg::SPECIAL_START == 30);
1086
1087 MachineEnv {
1088 preferred_regs_by_class,
1089 non_preferred_regs_by_class,
1090 fixed_stack_slots: vec![],
1091 scratch_by_class: [None, None, None],
1092 }
1093 }
1094