1 //! Implementation of a standard AArch64 ABI.
2 
3 use crate::CodegenResult;
4 use crate::ir;
5 use crate::ir::MemFlags;
6 use crate::ir::types;
7 use crate::ir::types::*;
8 use crate::ir::{ExternalName, LibCall, Signature, dynamic_to_fixed};
9 use crate::isa;
10 use crate::isa::aarch64::{inst::*, settings as aarch64_settings};
11 use crate::isa::unwind::UnwindInst;
12 use crate::isa::winch;
13 use crate::machinst::*;
14 use crate::settings;
15 use alloc::borrow::ToOwned;
16 use alloc::boxed::Box;
17 use alloc::vec::Vec;
18 use regalloc2::{MachineEnv, PReg, PRegSet};
19 use smallvec::{SmallVec, smallvec};
20 
21 // We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
22 // these ABIs are very similar.
23 
24 /// Support for the AArch64 ABI from the callee side (within a function body).
25 pub(crate) type AArch64Callee = Callee<AArch64MachineDeps>;
26 
27 impl From<StackAMode> for AMode {
from(stack: StackAMode) -> AMode28     fn from(stack: StackAMode) -> AMode {
29         match stack {
30             StackAMode::IncomingArg(off, stack_args_size) => AMode::IncomingArg {
31                 off: i64::from(stack_args_size) - off,
32             },
33             StackAMode::Slot(off) => AMode::SlotOffset { off },
34             StackAMode::OutgoingArg(off) => AMode::SPOffset { off },
35         }
36     }
37 }
38 
39 // Returns the size of stack space needed to store the
40 // `clobbered_callee_saved` registers.
compute_clobber_size( call_conv: isa::CallConv, clobbered_callee_saves: &[Writable<RealReg>], ) -> u3241 fn compute_clobber_size(
42     call_conv: isa::CallConv,
43     clobbered_callee_saves: &[Writable<RealReg>],
44 ) -> u32 {
45     let mut int_regs = 0;
46     let mut vec_regs = 0;
47     for &reg in clobbered_callee_saves {
48         match reg.to_reg().class() {
49             RegClass::Int => {
50                 int_regs += 1;
51             }
52             RegClass::Float => {
53                 vec_regs += 1;
54             }
55             RegClass::Vector => unreachable!(),
56         }
57     }
58 
59     // Round up to multiple of 2, to keep 16-byte stack alignment.
60     let int_save_bytes = (int_regs + (int_regs & 1)) * 8;
61     let vec_save_bytes = if call_conv == isa::CallConv::PreserveAll {
62         // In the PreserveAll ABI, we save the entire vector register,
63         // i.e., all 128 bits.
64         vec_regs * 16
65     } else {
66         // The Procedure Call Standard for the Arm 64-bit Architecture
67         // (AAPCS64, including several related ABIs such as the one used by
68         // Windows) mandates saving only the bottom 8 bytes of the vector
69         // registers, so we round up the number of registers to ensure
70         // proper stack alignment (similarly to the situation with
71         // `int_reg`).
72         let vec_reg_size = 8;
73         let vec_save_padding = vec_regs & 1;
74         // FIXME: SVE: ABI is different to Neon, so do we treat all vec regs as Z-regs?
75         (vec_regs + vec_save_padding) * vec_reg_size
76     };
77 
78     int_save_bytes + vec_save_bytes
79 }
80 
81 /// AArch64-specific ABI behavior. This struct just serves as an implementation
82 /// point for the trait; it is never actually instantiated.
83 pub struct AArch64MachineDeps;
84 
85 impl IsaFlags for aarch64_settings::Flags {
is_forward_edge_cfi_enabled(&self) -> bool86     fn is_forward_edge_cfi_enabled(&self) -> bool {
87         self.use_bti()
88     }
89 }
90 
91 impl ABIMachineSpec for AArch64MachineDeps {
92     type I = Inst;
93 
94     type F = aarch64_settings::Flags;
95 
96     /// This is the limit for the size of argument and return-value areas on the
97     /// stack. We place a reasonable limit here to avoid integer overflow issues
98     /// with 32-bit arithmetic: for now, 128 MB.
99     const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
100 
word_bits() -> u32101     fn word_bits() -> u32 {
102         64
103     }
104 
105     /// Return required stack alignment in bytes.
stack_align(_call_conv: isa::CallConv) -> u32106     fn stack_align(_call_conv: isa::CallConv) -> u32 {
107         16
108     }
109 
compute_arg_locs( call_conv: isa::CallConv, flags: &settings::Flags, params: &[ir::AbiParam], args_or_rets: ArgsOrRets, add_ret_area_ptr: bool, mut args: ArgsAccumulator, ) -> CodegenResult<(u32, Option<usize>)>110     fn compute_arg_locs(
111         call_conv: isa::CallConv,
112         flags: &settings::Flags,
113         params: &[ir::AbiParam],
114         args_or_rets: ArgsOrRets,
115         add_ret_area_ptr: bool,
116         mut args: ArgsAccumulator,
117     ) -> CodegenResult<(u32, Option<usize>)> {
118         let is_apple_cc = call_conv == isa::CallConv::AppleAarch64;
119         let is_winch_return = call_conv == isa::CallConv::Winch && args_or_rets == ArgsOrRets::Rets;
120 
121         // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4.
122         //
123         // MacOS aarch64 is slightly different, see also
124         // https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms.
125         // We are diverging from the MacOS aarch64 implementation in the
126         // following ways:
127         // - sign- and zero- extensions of data types less than 32 bits are not
128         // implemented yet.
129         // - we align the arguments stack space to a 16-bytes boundary, while
130         // the MacOS allows aligning only on 8 bytes. In practice it means we're
131         // slightly overallocating when calling, which is fine, and doesn't
132         // break our other invariants that the stack is always allocated in
133         // 16-bytes chunks.
134 
135         let mut next_xreg = if call_conv == isa::CallConv::Tail {
136             // We reserve `x0` for the return area pointer. For simplicity, we
137             // reserve it even when there is no return area pointer needed. This
138             // also means that identity functions don't have to shuffle arguments to
139             // different return registers because we shifted all argument register
140             // numbers down by one to make space for the return area pointer.
141             //
142             // Also, we cannot use all allocatable GPRs as arguments because we need
143             // at least one allocatable register for holding the callee address in
144             // indirect calls. So skip `x1` also, reserving it for that role.
145             2
146         } else {
147             0
148         };
149         let mut next_vreg = 0;
150         let mut next_stack: u32 = 0;
151 
152         // Note on return values: on the regular ABI, we may return values
153         // in 8 registers for V128 and I64 registers independently of the
154         // number of register values returned in the other class. That is,
155         // we can return values in up to 8 integer and
156         // 8 vector registers at once.
157         let max_per_class_reg_vals = 8; // x0-x7 and v0-v7
158         let mut remaining_reg_vals = 16;
159 
160         let ret_area_ptr = if add_ret_area_ptr {
161             debug_assert_eq!(args_or_rets, ArgsOrRets::Args);
162             if call_conv != isa::CallConv::Winch {
163                 // In the AAPCS64 calling convention the return area pointer is
164                 // stored in x8.
165                 Some(ABIArg::reg(
166                     xreg(8).to_real_reg().unwrap(),
167                     I64,
168                     ir::ArgumentExtension::None,
169                     ir::ArgumentPurpose::Normal,
170                 ))
171             } else {
172                 // Use x0 for the return area pointer in the Winch calling convention
173                 // to simplify the ABI handling code in Winch by avoiding an AArch64
174                 // special case to assign it to x8.
175                 next_xreg += 1;
176                 Some(ABIArg::reg(
177                     xreg(0).to_real_reg().unwrap(),
178                     I64,
179                     ir::ArgumentExtension::None,
180                     ir::ArgumentPurpose::Normal,
181                 ))
182             }
183         } else {
184             None
185         };
186 
187         for (i, param) in params.into_iter().enumerate() {
188             if is_apple_cc && param.value_type == types::F128 && !flags.enable_llvm_abi_extensions()
189             {
190                 panic!(
191                     "f128 args/return values not supported for apple_aarch64 unless LLVM ABI extensions are enabled"
192                 );
193             }
194 
195             let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?;
196 
197             if let ir::ArgumentPurpose::StructReturn = param.purpose {
198                 assert!(
199                     call_conv != isa::CallConv::Tail,
200                     "support for StructReturn parameters is not implemented for the `tail` \
201                     calling convention yet",
202                 );
203             }
204 
205             if let ir::ArgumentPurpose::StructArgument(_) = param.purpose {
206                 panic!(
207                     "StructArgument parameters are not supported on arm64. \
208                     Use regular pointer arguments instead."
209                 );
210             }
211 
212             if let ir::ArgumentPurpose::StructReturn = param.purpose {
213                 // FIXME add assert_eq!(args_or_rets, ArgsOrRets::Args); once
214                 // ensure_struct_return_ptr_is_returned is gone.
215                 assert!(
216                     param.value_type == types::I64,
217                     "StructReturn must be a pointer sized integer"
218                 );
219                 args.push(ABIArg::Slots {
220                     slots: smallvec![ABIArgSlot::Reg {
221                         reg: xreg(8).to_real_reg().unwrap(),
222                         ty: types::I64,
223                         extension: param.extension,
224                     },],
225                     purpose: ir::ArgumentPurpose::StructReturn,
226                 });
227                 continue;
228             }
229 
230             // Handle multi register params
231             //
232             // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#642parameter-passing-rules), (Section 6.4.2 Stage C).
233             //
234             // For arguments with alignment of 16 we round up the register number
235             // to the next even value. So we can never allocate for example an i128
236             // to X1 and X2, we have to skip one register and do X2, X3
237             // (Stage C.8)
238             // Note: The Apple ABI deviates a bit here. They don't respect Stage C.8
239             // and will happily allocate a i128 to X1 and X2
240             //
241             // For integer types with alignment of 16 we also have the additional
242             // restriction of passing the lower half in Xn and the upper half in Xn+1
243             // (Stage C.9)
244             //
245             // For examples of how LLVM handles this: https://godbolt.org/z/bhd3vvEfh
246             //
247             // On the Apple ABI it is unspecified if we can spill half the value into the stack
248             // i.e load the lower half into x7 and the upper half into the stack
249             // LLVM does not seem to do this, so we are going to replicate that behaviour
250             let is_multi_reg = rcs.len() >= 2;
251             if is_multi_reg {
252                 assert!(
253                     rcs.len() == 2,
254                     "Unable to handle multi reg params with more than 2 regs"
255                 );
256                 assert!(
257                     rcs == &[RegClass::Int, RegClass::Int],
258                     "Unable to handle non i64 regs"
259                 );
260 
261                 let reg_class_space = max_per_class_reg_vals - next_xreg;
262                 let reg_space = remaining_reg_vals;
263 
264                 if reg_space >= 2 && reg_class_space >= 2 {
265                     // The aarch64 ABI does not allow us to start a split argument
266                     // at an odd numbered register. So we need to skip one register
267                     //
268                     // TODO: The Fast ABI should probably not skip the register
269                     if !is_apple_cc && next_xreg % 2 != 0 {
270                         next_xreg += 1;
271                     }
272 
273                     let lower_reg = xreg(next_xreg);
274                     let upper_reg = xreg(next_xreg + 1);
275 
276                     args.push(ABIArg::Slots {
277                         slots: smallvec![
278                             ABIArgSlot::Reg {
279                                 reg: lower_reg.to_real_reg().unwrap(),
280                                 ty: reg_types[0],
281                                 extension: param.extension,
282                             },
283                             ABIArgSlot::Reg {
284                                 reg: upper_reg.to_real_reg().unwrap(),
285                                 ty: reg_types[1],
286                                 extension: param.extension,
287                             },
288                         ],
289                         purpose: param.purpose,
290                     });
291 
292                     next_xreg += 2;
293                     remaining_reg_vals -= 2;
294                     continue;
295                 }
296             } else {
297                 // Single Register parameters
298                 let rc = rcs[0];
299                 let next_reg = match rc {
300                     RegClass::Int => &mut next_xreg,
301                     RegClass::Float => &mut next_vreg,
302                     RegClass::Vector => unreachable!(),
303                 };
304 
305                 let push_to_reg = if is_winch_return {
306                     // Winch uses the first register to return the last result
307                     i == params.len() - 1
308                 } else {
309                     // Use max_per_class_reg_vals & remaining_reg_vals otherwise
310                     *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0
311                 };
312 
313                 if push_to_reg {
314                     let reg = match rc {
315                         RegClass::Int => xreg(*next_reg),
316                         RegClass::Float => vreg(*next_reg),
317                         RegClass::Vector => unreachable!(),
318                     };
319                     // Overlay Z-regs on V-regs for parameter passing.
320                     let ty = if param.value_type.is_dynamic_vector() {
321                         dynamic_to_fixed(param.value_type)
322                     } else {
323                         param.value_type
324                     };
325                     args.push(ABIArg::reg(
326                         reg.to_real_reg().unwrap(),
327                         ty,
328                         param.extension,
329                         param.purpose,
330                     ));
331                     *next_reg += 1;
332                     remaining_reg_vals -= 1;
333                     continue;
334                 }
335             }
336 
337             // Spill to the stack
338 
339             if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {
340                 return Err(crate::CodegenError::Unsupported(
341                     "Too many return values to fit in registers. \
342                     Use a StructReturn argument instead. (#9510)"
343                         .to_owned(),
344                 ));
345             }
346 
347             // Compute the stack slot's size.
348             let size = (ty_bits(param.value_type) / 8) as u32;
349 
350             let size = if is_apple_cc || is_winch_return {
351                 // MacOS and Winch aarch64 allows stack slots with
352                 // sizes less than 8 bytes. They still need to be
353                 // properly aligned on their natural data alignment,
354                 // though.
355                 size
356             } else {
357                 // Every arg takes a minimum slot of 8 bytes. (16-byte stack
358                 // alignment happens separately after all args.)
359                 core::cmp::max(size, 8)
360             };
361 
362             if !is_winch_return {
363                 // Align the stack slot.
364                 debug_assert!(size.is_power_of_two());
365                 next_stack = align_to(next_stack, size);
366             }
367 
368             let slots = reg_types
369                 .iter()
370                 .copied()
371                 // Build the stack locations from each slot
372                 .scan(next_stack, |next_stack, ty| {
373                     let slot_offset = *next_stack as i64;
374                     *next_stack += (ty_bits(ty) / 8) as u32;
375 
376                     Some((ty, slot_offset))
377                 })
378                 .map(|(ty, offset)| ABIArgSlot::Stack {
379                     offset,
380                     ty,
381                     extension: param.extension,
382                 })
383                 .collect();
384 
385             args.push(ABIArg::Slots {
386                 slots,
387                 purpose: param.purpose,
388             });
389 
390             next_stack += size;
391         }
392 
393         let extra_arg = if let Some(ret_area_ptr) = ret_area_ptr {
394             args.push_non_formal(ret_area_ptr);
395             Some(args.args().len() - 1)
396         } else {
397             None
398         };
399 
400         if is_winch_return {
401             winch::reverse_stack(args, next_stack, false);
402         }
403 
404         next_stack = align_to(next_stack, 16);
405 
406         Ok((next_stack, extra_arg))
407     }
408 
gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst409     fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
410         Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())
411     }
412 
gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst413     fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {
414         Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted())
415     }
416 
gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst417     fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
418         Inst::gen_move(to_reg, from_reg, ty)
419     }
420 
gen_extend( to_reg: Writable<Reg>, from_reg: Reg, signed: bool, from_bits: u8, to_bits: u8, ) -> Inst421     fn gen_extend(
422         to_reg: Writable<Reg>,
423         from_reg: Reg,
424         signed: bool,
425         from_bits: u8,
426         to_bits: u8,
427     ) -> Inst {
428         assert!(from_bits < to_bits);
429         Inst::Extend {
430             rd: to_reg,
431             rn: from_reg,
432             signed,
433             from_bits,
434             to_bits,
435         }
436     }
437 
gen_args(args: Vec<ArgPair>) -> Inst438     fn gen_args(args: Vec<ArgPair>) -> Inst {
439         Inst::Args { args }
440     }
441 
gen_rets(rets: Vec<RetPair>) -> Inst442     fn gen_rets(rets: Vec<RetPair>) -> Inst {
443         Inst::Rets { rets }
444     }
445 
gen_add_imm( _call_conv: isa::CallConv, into_reg: Writable<Reg>, from_reg: Reg, imm: u32, ) -> SmallInstVec<Inst>446     fn gen_add_imm(
447         _call_conv: isa::CallConv,
448         into_reg: Writable<Reg>,
449         from_reg: Reg,
450         imm: u32,
451     ) -> SmallInstVec<Inst> {
452         let imm = imm as u64;
453         let mut insts = SmallVec::new();
454         if let Some(imm12) = Imm12::maybe_from_u64(imm) {
455             insts.push(Inst::AluRRImm12 {
456                 alu_op: ALUOp::Add,
457                 size: OperandSize::Size64,
458                 rd: into_reg,
459                 rn: from_reg,
460                 imm12,
461             });
462         } else {
463             let scratch2 = writable_tmp2_reg();
464             assert_ne!(scratch2.to_reg(), from_reg);
465             // `gen_add_imm` is only ever called after register allocation has taken place, and as a
466             // result it's ok to reuse the scratch2 register here. If that changes, we'll need to
467             // plumb through a way to allocate temporary virtual registers
468             insts.extend(Inst::load_constant(scratch2, imm));
469             insts.push(Inst::AluRRRExtend {
470                 alu_op: ALUOp::Add,
471                 size: OperandSize::Size64,
472                 rd: into_reg,
473                 rn: from_reg,
474                 rm: scratch2.to_reg(),
475                 extendop: ExtendOp::UXTX,
476             });
477         }
478         insts
479     }
480 
gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst>481     fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
482         let mut insts = SmallVec::new();
483         insts.push(Inst::AluRRRExtend {
484             alu_op: ALUOp::SubS,
485             size: OperandSize::Size64,
486             rd: writable_zero_reg(),
487             rn: stack_reg(),
488             rm: limit_reg,
489             extendop: ExtendOp::UXTX,
490         });
491         insts.push(Inst::TrapIf {
492             trap_code: ir::TrapCode::STACK_OVERFLOW,
493             // Here `Lo` == "less than" when interpreting the two
494             // operands as unsigned integers.
495             kind: CondBrKind::Cond(Cond::Lo),
496         });
497         insts
498     }
499 
gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Inst500     fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Inst {
501         // FIXME: Do something different for dynamic types?
502         let mem = mem.into();
503         Inst::LoadAddr { rd: into_reg, mem }
504     }
505 
get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg506     fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
507         spilltmp_reg()
508     }
509 
gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst510     fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
511         let mem = AMode::RegOffset {
512             rn: base,
513             off: offset as i64,
514         };
515         Inst::gen_load(into_reg, mem, ty, MemFlags::trusted())
516     }
517 
gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst518     fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
519         let mem = AMode::RegOffset {
520             rn: base,
521             off: offset as i64,
522         };
523         Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())
524     }
525 
gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst>526     fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
527         if amount == 0 {
528             return SmallVec::new();
529         }
530 
531         let (amount, is_sub) = if amount > 0 {
532             (amount as u64, false)
533         } else {
534             (-amount as u64, true)
535         };
536 
537         let alu_op = if is_sub { ALUOp::Sub } else { ALUOp::Add };
538 
539         let mut ret = SmallVec::new();
540         if let Some(imm12) = Imm12::maybe_from_u64(amount) {
541             let adj_inst = Inst::AluRRImm12 {
542                 alu_op,
543                 size: OperandSize::Size64,
544                 rd: writable_stack_reg(),
545                 rn: stack_reg(),
546                 imm12,
547             };
548             ret.push(adj_inst);
549         } else {
550             let tmp = writable_spilltmp_reg();
551             // `gen_sp_reg_adjust` is called after regalloc2, so it's acceptable to reuse `tmp` for
552             // intermediates in `load_constant`.
553             let const_inst = Inst::load_constant(tmp, amount);
554             let adj_inst = Inst::AluRRRExtend {
555                 alu_op,
556                 size: OperandSize::Size64,
557                 rd: writable_stack_reg(),
558                 rn: stack_reg(),
559                 rm: tmp.to_reg(),
560                 extendop: ExtendOp::UXTX,
561             };
562             ret.extend(const_inst);
563             ret.push(adj_inst);
564         }
565         ret
566     }
567 
gen_prologue_frame_setup( call_conv: isa::CallConv, flags: &settings::Flags, isa_flags: &aarch64_settings::Flags, frame_layout: &FrameLayout, ) -> SmallInstVec<Inst>568     fn gen_prologue_frame_setup(
569         call_conv: isa::CallConv,
570         flags: &settings::Flags,
571         isa_flags: &aarch64_settings::Flags,
572         frame_layout: &FrameLayout,
573     ) -> SmallInstVec<Inst> {
574         let setup_frame = frame_layout.setup_area_size > 0;
575         let mut insts = SmallVec::new();
576 
577         match Self::select_api_key(isa_flags, call_conv, setup_frame) {
578             Some(key) => {
579                 insts.push(Inst::Paci { key });
580                 if flags.unwind_info() {
581                     insts.push(Inst::Unwind {
582                         inst: UnwindInst::Aarch64SetPointerAuth {
583                             return_addresses: true,
584                         },
585                     });
586                 }
587             }
588             None => {
589                 if isa_flags.use_bti() {
590                     insts.push(Inst::Bti {
591                         targets: BranchTargetType::C,
592                     });
593                 }
594 
595                 if flags.unwind_info() && call_conv == isa::CallConv::AppleAarch64 {
596                     // The macOS unwinder seems to require this.
597                     insts.push(Inst::Unwind {
598                         inst: UnwindInst::Aarch64SetPointerAuth {
599                             return_addresses: false,
600                         },
601                     });
602                 }
603             }
604         }
605 
606         if setup_frame {
607             // stp fp (x29), lr (x30), [sp, #-16]!
608             insts.push(Inst::StoreP64 {
609                 rt: fp_reg(),
610                 rt2: link_reg(),
611                 mem: PairAMode::SPPreIndexed {
612                     simm7: SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
613                 },
614                 flags: MemFlags::trusted(),
615             });
616 
617             if flags.unwind_info() {
618                 insts.push(Inst::Unwind {
619                     inst: UnwindInst::PushFrameRegs {
620                         offset_upward_to_caller_sp: frame_layout.setup_area_size,
621                     },
622                 });
623             }
624 
625             // mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because
626             // the usual encoding (`ORR`) does not work with SP.
627             insts.push(Inst::AluRRImm12 {
628                 alu_op: ALUOp::Add,
629                 size: OperandSize::Size64,
630                 rd: writable_fp_reg(),
631                 rn: stack_reg(),
632                 imm12: Imm12 {
633                     bits: 0,
634                     shift12: false,
635                 },
636             });
637         }
638 
639         insts
640     }
641 
gen_epilogue_frame_restore( call_conv: isa::CallConv, _flags: &settings::Flags, _isa_flags: &aarch64_settings::Flags, frame_layout: &FrameLayout, ) -> SmallInstVec<Inst>642     fn gen_epilogue_frame_restore(
643         call_conv: isa::CallConv,
644         _flags: &settings::Flags,
645         _isa_flags: &aarch64_settings::Flags,
646         frame_layout: &FrameLayout,
647     ) -> SmallInstVec<Inst> {
648         let setup_frame = frame_layout.setup_area_size > 0;
649         let mut insts = SmallVec::new();
650 
651         if setup_frame {
652             // N.B.: sp is already adjusted to the appropriate place by the
653             // clobber-restore code (which also frees the fixed frame). Hence, there
654             // is no need for the usual `mov sp, fp` here.
655 
656             // `ldp fp, lr, [sp], #16`
657             insts.push(Inst::LoadP64 {
658                 rt: writable_fp_reg(),
659                 rt2: writable_link_reg(),
660                 mem: PairAMode::SPPostIndexed {
661                     simm7: SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
662                 },
663                 flags: MemFlags::trusted(),
664             });
665         }
666 
667         if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 {
668             insts.extend(Self::gen_sp_reg_adjust(
669                 frame_layout.tail_args_size.try_into().unwrap(),
670             ));
671         }
672 
673         insts
674     }
675 
gen_return( call_conv: isa::CallConv, isa_flags: &aarch64_settings::Flags, frame_layout: &FrameLayout, ) -> SmallInstVec<Inst>676     fn gen_return(
677         call_conv: isa::CallConv,
678         isa_flags: &aarch64_settings::Flags,
679         frame_layout: &FrameLayout,
680     ) -> SmallInstVec<Inst> {
681         let setup_frame = frame_layout.setup_area_size > 0;
682 
683         match Self::select_api_key(isa_flags, call_conv, setup_frame) {
684             Some(key) => {
685                 smallvec![Inst::AuthenticatedRet {
686                     key,
687                     is_hint: !isa_flags.has_pauth(),
688                 }]
689             }
690             None => {
691                 smallvec![Inst::Ret {}]
692             }
693         }
694     }
695 
gen_probestack(_insts: &mut SmallInstVec<Self::I>, _: u32)696     fn gen_probestack(_insts: &mut SmallInstVec<Self::I>, _: u32) {
697         // TODO: implement if we ever require stack probes on an AArch64 host
698         // (unlikely unless Lucet is ported)
699         unimplemented!("Stack probing is unimplemented on AArch64");
700     }
701 
gen_inline_probestack( insts: &mut SmallInstVec<Self::I>, _call_conv: isa::CallConv, frame_size: u32, guard_size: u32, )702     fn gen_inline_probestack(
703         insts: &mut SmallInstVec<Self::I>,
704         _call_conv: isa::CallConv,
705         frame_size: u32,
706         guard_size: u32,
707     ) {
708         // The stack probe loop currently takes 6 instructions and each inline
709         // probe takes 2 (ish, these numbers sort of depend on the constants).
710         // Set this to 3 to keep the max size of the probe to 6 instructions.
711         const PROBE_MAX_UNROLL: u32 = 3;
712 
713         // Calculate how many probes we need to perform. Round down, as we only
714         // need to probe whole guard_size regions we'd otherwise skip over.
715         let probe_count = frame_size / guard_size;
716         if probe_count == 0 {
717             // No probe necessary
718         } else if probe_count <= PROBE_MAX_UNROLL {
719             Self::gen_probestack_unroll(insts, guard_size, probe_count)
720         } else {
721             Self::gen_probestack_loop(insts, frame_size, guard_size)
722         }
723     }
724 
gen_clobber_save( call_conv: isa::CallConv, flags: &settings::Flags, frame_layout: &FrameLayout, ) -> SmallVec<[Inst; 16]>725     fn gen_clobber_save(
726         call_conv: isa::CallConv,
727         flags: &settings::Flags,
728         frame_layout: &FrameLayout,
729     ) -> SmallVec<[Inst; 16]> {
730         let (clobbered_int, clobbered_vec) = frame_layout.clobbered_callee_saves_by_class();
731 
732         let mut insts = SmallVec::new();
733         let setup_frame = frame_layout.setup_area_size > 0;
734 
735         // When a return_call within this function required more stack arguments than we have
736         // present, resize the incoming argument area of the frame to accommodate those arguments.
737         let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
738         if incoming_args_diff > 0 {
739             // Decrement SP to account for the additional space required by a tail call.
740             insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32)));
741             if flags.unwind_info() {
742                 insts.push(Inst::Unwind {
743                     inst: UnwindInst::StackAlloc {
744                         size: incoming_args_diff,
745                     },
746                 });
747             }
748 
749             // Move fp and lr down.
750             if setup_frame {
751                 // Reload the frame pointer from the stack.
752                 insts.push(Inst::ULoad64 {
753                     rd: regs::writable_fp_reg(),
754                     mem: AMode::SPOffset {
755                         off: i64::from(incoming_args_diff),
756                     },
757                     flags: MemFlags::trusted(),
758                 });
759 
760                 // Store the frame pointer and link register again at the new SP
761                 insts.push(Inst::StoreP64 {
762                     rt: fp_reg(),
763                     rt2: link_reg(),
764                     mem: PairAMode::SignedOffset {
765                         reg: regs::stack_reg(),
766                         simm7: SImm7Scaled::maybe_from_i64(0, types::I64).unwrap(),
767                     },
768                     flags: MemFlags::trusted(),
769                 });
770 
771                 // Keep the frame pointer in sync
772                 insts.push(Self::gen_move(
773                     regs::writable_fp_reg(),
774                     regs::stack_reg(),
775                     types::I64,
776                 ));
777             }
778         }
779 
780         if flags.unwind_info() && setup_frame {
781             // The *unwind* frame (but not the actual frame) starts at the
782             // clobbers, just below the saved FP/LR pair.
783             insts.push(Inst::Unwind {
784                 inst: UnwindInst::DefineNewFrame {
785                     offset_downward_to_clobbers: frame_layout.clobber_size,
786                     offset_upward_to_caller_sp: frame_layout.setup_area_size,
787                 },
788             });
789         }
790 
791         // We use pre-indexed addressing modes here, rather than the possibly
792         // more efficient "subtract sp once then used fixed offsets" scheme,
793         // because (i) we cannot necessarily guarantee that the offset of a
794         // clobber-save slot will be within a SImm7Scaled (+504-byte) offset
795         // range of the whole frame including other slots, it is more complex to
796         // conditionally generate a two-stage SP adjustment (clobbers then fixed
797         // frame) otherwise, and generally we just want to maintain simplicity
798         // here for maintainability.  Because clobbers are at the top of the
799         // frame, just below FP, all that is necessary is to use the pre-indexed
800         // "push" `[sp, #-16]!` addressing mode.
801         //
802         // `frame_offset` tracks offset above start-of-clobbers for unwind-info
803         // purposes.
804         let mut clobber_offset = frame_layout.clobber_size;
805         let clobber_offset_change = 16;
806         let iter = clobbered_int.chunks_exact(2);
807 
808         if let [rd] = iter.remainder() {
809             let rd: Reg = rd.to_reg().into();
810 
811             debug_assert_eq!(rd.class(), RegClass::Int);
812             // str rd, [sp, #-16]!
813             insts.push(Inst::Store64 {
814                 rd,
815                 mem: AMode::SPPreIndexed {
816                     simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
817                 },
818                 flags: MemFlags::trusted(),
819             });
820 
821             if flags.unwind_info() {
822                 clobber_offset -= clobber_offset_change as u32;
823                 insts.push(Inst::Unwind {
824                     inst: UnwindInst::SaveReg {
825                         clobber_offset,
826                         reg: rd.to_real_reg().unwrap(),
827                     },
828                 });
829             }
830         }
831 
832         let mut iter = iter.rev();
833 
834         while let Some([rt, rt2]) = iter.next() {
835             // .to_reg().into(): Writable<RealReg> --> RealReg --> Reg
836             let rt: Reg = rt.to_reg().into();
837             let rt2: Reg = rt2.to_reg().into();
838 
839             debug_assert!(rt.class() == RegClass::Int);
840             debug_assert!(rt2.class() == RegClass::Int);
841 
842             // stp rt, rt2, [sp, #-16]!
843             insts.push(Inst::StoreP64 {
844                 rt,
845                 rt2,
846                 mem: PairAMode::SPPreIndexed {
847                     simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(),
848                 },
849                 flags: MemFlags::trusted(),
850             });
851 
852             if flags.unwind_info() {
853                 clobber_offset -= clobber_offset_change as u32;
854                 insts.push(Inst::Unwind {
855                     inst: UnwindInst::SaveReg {
856                         clobber_offset,
857                         reg: rt.to_real_reg().unwrap(),
858                     },
859                 });
860                 insts.push(Inst::Unwind {
861                     inst: UnwindInst::SaveReg {
862                         clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32,
863                         reg: rt2.to_real_reg().unwrap(),
864                     },
865                 });
866             }
867         }
868 
869         if call_conv == isa::CallConv::PreserveAll {
870             // Store full vector registers in PreserveAll convention.
871             for reg in clobbered_vec.iter().rev() {
872                 let inst = Inst::FpuStore128 {
873                     rd: reg.to_reg().into(),
874                     mem: AMode::SPPreIndexed {
875                         simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
876                     },
877                     flags: MemFlags::trusted(),
878                 };
879                 insts.push(inst);
880                 // N.B.: no unwind info: we don't have a way to
881                 // represent "full register" anyway.
882             }
883         } else {
884             let store_vec_reg_half = |rd| Inst::FpuStore64 {
885                 rd,
886                 mem: AMode::SPPreIndexed {
887                     simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
888                 },
889                 flags: MemFlags::trusted(),
890             };
891             let iter = clobbered_vec.chunks_exact(2);
892 
893             if let [rd] = iter.remainder() {
894                 let rd: Reg = rd.to_reg().into();
895 
896                 debug_assert_eq!(rd.class(), RegClass::Float);
897                 insts.push(store_vec_reg_half(rd));
898 
899                 if flags.unwind_info() {
900                     clobber_offset -= clobber_offset_change as u32;
901                     insts.push(Inst::Unwind {
902                         inst: UnwindInst::SaveReg {
903                             clobber_offset,
904                             reg: rd.to_real_reg().unwrap(),
905                         },
906                     });
907                 }
908             }
909 
910             let store_vec_reg_half_pair = |rt, rt2| {
911                 let clobber_offset_change = 16;
912 
913                 (
914                     Inst::FpuStoreP64 {
915                         rt,
916                         rt2,
917                         mem: PairAMode::SPPreIndexed {
918                             simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64)
919                                 .unwrap(),
920                         },
921                         flags: MemFlags::trusted(),
922                     },
923                     clobber_offset_change as u32,
924                 )
925             };
926             let mut iter = iter.rev();
927 
928             while let Some([rt, rt2]) = iter.next() {
929                 let rt: Reg = rt.to_reg().into();
930                 let rt2: Reg = rt2.to_reg().into();
931 
932                 debug_assert_eq!(rt.class(), RegClass::Float);
933                 debug_assert_eq!(rt2.class(), RegClass::Float);
934 
935                 let (inst, clobber_offset_change) = store_vec_reg_half_pair(rt, rt2);
936 
937                 insts.push(inst);
938 
939                 if flags.unwind_info() {
940                     clobber_offset -= clobber_offset_change;
941                     insts.push(Inst::Unwind {
942                         inst: UnwindInst::SaveReg {
943                             clobber_offset,
944                             reg: rt.to_real_reg().unwrap(),
945                         },
946                     });
947                     insts.push(Inst::Unwind {
948                         inst: UnwindInst::SaveReg {
949                             clobber_offset: clobber_offset + clobber_offset_change / 2,
950                             reg: rt2.to_real_reg().unwrap(),
951                         },
952                     });
953                 }
954             }
955         }
956 
957         // Allocate the fixed frame below the clobbers if necessary.
958         let stack_size = frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
959         if stack_size > 0 {
960             insts.extend(Self::gen_sp_reg_adjust(-(stack_size as i32)));
961             if flags.unwind_info() {
962                 insts.push(Inst::Unwind {
963                     inst: UnwindInst::StackAlloc { size: stack_size },
964                 });
965             }
966         }
967 
968         insts
969     }
970 
gen_clobber_restore( call_conv: isa::CallConv, _flags: &settings::Flags, frame_layout: &FrameLayout, ) -> SmallVec<[Inst; 16]>971     fn gen_clobber_restore(
972         call_conv: isa::CallConv,
973         _flags: &settings::Flags,
974         frame_layout: &FrameLayout,
975     ) -> SmallVec<[Inst; 16]> {
976         let mut insts = SmallVec::new();
977         let (clobbered_int, clobbered_vec) = frame_layout.clobbered_callee_saves_by_class();
978 
979         // Free the fixed frame if necessary.
980         let stack_size = frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
981         if stack_size > 0 {
982             insts.extend(Self::gen_sp_reg_adjust(stack_size as i32));
983         }
984 
985         if call_conv == isa::CallConv::PreserveAll {
986             for reg in clobbered_vec.iter() {
987                 let inst = Inst::FpuLoad128 {
988                     rd: reg.map(|r| r.into()),
989                     mem: AMode::SPPostIndexed {
990                         simm9: SImm9::maybe_from_i64(16).unwrap(),
991                     },
992                     flags: MemFlags::trusted(),
993                 };
994                 insts.push(inst);
995                 // N.B.: no unwind info; we don't have a way to
996                 // represent "full vector register saved" anyway.
997             }
998         } else {
999             let load_vec_reg_half = |rd| Inst::FpuLoad64 {
1000                 rd,
1001                 mem: AMode::SPPostIndexed {
1002                     simm9: SImm9::maybe_from_i64(16).unwrap(),
1003                 },
1004                 flags: MemFlags::trusted(),
1005             };
1006             let load_vec_reg_half_pair = |rt, rt2| Inst::FpuLoadP64 {
1007                 rt,
1008                 rt2,
1009                 mem: PairAMode::SPPostIndexed {
1010                     simm7: SImm7Scaled::maybe_from_i64(16, F64).unwrap(),
1011                 },
1012                 flags: MemFlags::trusted(),
1013             };
1014 
1015             let mut iter = clobbered_vec.chunks_exact(2);
1016 
1017             while let Some([rt, rt2]) = iter.next() {
1018                 let rt: Writable<Reg> = rt.map(|r| r.into());
1019                 let rt2: Writable<Reg> = rt2.map(|r| r.into());
1020 
1021                 debug_assert_eq!(rt.to_reg().class(), RegClass::Float);
1022                 debug_assert_eq!(rt2.to_reg().class(), RegClass::Float);
1023                 insts.push(load_vec_reg_half_pair(rt, rt2));
1024             }
1025 
1026             debug_assert!(iter.remainder().len() <= 1);
1027 
1028             if let [rd] = iter.remainder() {
1029                 let rd: Writable<Reg> = rd.map(|r| r.into());
1030 
1031                 debug_assert_eq!(rd.to_reg().class(), RegClass::Float);
1032                 insts.push(load_vec_reg_half(rd));
1033             }
1034         }
1035 
1036         let mut iter = clobbered_int.chunks_exact(2);
1037 
1038         while let Some([rt, rt2]) = iter.next() {
1039             let rt: Writable<Reg> = rt.map(|r| r.into());
1040             let rt2: Writable<Reg> = rt2.map(|r| r.into());
1041 
1042             debug_assert_eq!(rt.to_reg().class(), RegClass::Int);
1043             debug_assert_eq!(rt2.to_reg().class(), RegClass::Int);
1044             // ldp rt, rt2, [sp], #16
1045             insts.push(Inst::LoadP64 {
1046                 rt,
1047                 rt2,
1048                 mem: PairAMode::SPPostIndexed {
1049                     simm7: SImm7Scaled::maybe_from_i64(16, I64).unwrap(),
1050                 },
1051                 flags: MemFlags::trusted(),
1052             });
1053         }
1054 
1055         debug_assert!(iter.remainder().len() <= 1);
1056 
1057         if let [rd] = iter.remainder() {
1058             let rd: Writable<Reg> = rd.map(|r| r.into());
1059 
1060             debug_assert_eq!(rd.to_reg().class(), RegClass::Int);
1061             // ldr rd, [sp], #16
1062             insts.push(Inst::ULoad64 {
1063                 rd,
1064                 mem: AMode::SPPostIndexed {
1065                     simm9: SImm9::maybe_from_i64(16).unwrap(),
1066                 },
1067                 flags: MemFlags::trusted(),
1068             });
1069         }
1070 
1071         insts
1072     }
1073 
gen_memcpy<F: FnMut(Type) -> Writable<Reg>>( call_conv: isa::CallConv, dst: Reg, src: Reg, size: usize, mut alloc_tmp: F, ) -> SmallVec<[Self::I; 8]>1074     fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
1075         call_conv: isa::CallConv,
1076         dst: Reg,
1077         src: Reg,
1078         size: usize,
1079         mut alloc_tmp: F,
1080     ) -> SmallVec<[Self::I; 8]> {
1081         let mut insts = SmallVec::new();
1082         let arg0 = writable_xreg(0);
1083         let arg1 = writable_xreg(1);
1084         let arg2 = writable_xreg(2);
1085         let tmp = alloc_tmp(Self::word_type());
1086         insts.extend(Inst::load_constant(tmp, size as u64));
1087         insts.push(Inst::Call {
1088             info: Box::new(CallInfo {
1089                 dest: ExternalName::LibCall(LibCall::Memcpy),
1090                 uses: smallvec![
1091                     CallArgPair {
1092                         vreg: dst,
1093                         preg: arg0.to_reg()
1094                     },
1095                     CallArgPair {
1096                         vreg: src,
1097                         preg: arg1.to_reg()
1098                     },
1099                     CallArgPair {
1100                         vreg: tmp.to_reg(),
1101                         preg: arg2.to_reg()
1102                     }
1103                 ],
1104                 defs: smallvec![],
1105                 clobbers: Self::get_regs_clobbered_by_call(call_conv, false),
1106                 caller_conv: call_conv,
1107                 callee_conv: call_conv,
1108                 callee_pop_size: 0,
1109                 try_call_info: None,
1110                 patchable: false,
1111             }),
1112         });
1113         insts
1114     }
1115 
get_number_of_spillslots_for_value( rc: RegClass, vector_size: u32, _isa_flags: &Self::F, ) -> u321116     fn get_number_of_spillslots_for_value(
1117         rc: RegClass,
1118         vector_size: u32,
1119         _isa_flags: &Self::F,
1120     ) -> u32 {
1121         assert_eq!(vector_size % 8, 0);
1122         // We allocate in terms of 8-byte slots.
1123         match rc {
1124             RegClass::Int => 1,
1125             RegClass::Float => vector_size / 8,
1126             RegClass::Vector => unreachable!(),
1127         }
1128     }
1129 
get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv1130     fn get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
1131         if flags.enable_pinned_reg() {
1132             static MACHINE_ENV: MachineEnv = create_reg_env(true);
1133             &MACHINE_ENV
1134         } else {
1135             static MACHINE_ENV: MachineEnv = create_reg_env(false);
1136             &MACHINE_ENV
1137         }
1138     }
1139 
get_regs_clobbered_by_call(call_conv: isa::CallConv, is_exception: bool) -> PRegSet1140     fn get_regs_clobbered_by_call(call_conv: isa::CallConv, is_exception: bool) -> PRegSet {
1141         match (call_conv, is_exception) {
1142             (isa::CallConv::Tail, true) => ALL_CLOBBERS,
1143             (isa::CallConv::Winch, true) => ALL_CLOBBERS,
1144             (isa::CallConv::Winch, false) => WINCH_CLOBBERS,
1145             // Note that "PreserveAll" actually preserves nothing at
1146             // the callsite if used for a `try_call`, because the
1147             // unwinder ABI for `try_call`s is still "no clobbered
1148             // register restores" for this ABI (so as to work with
1149             // Wasmtime).
1150             (isa::CallConv::PreserveAll, true) => ALL_CLOBBERS,
1151             (isa::CallConv::SystemV, _) => DEFAULT_AAPCS_CLOBBERS,
1152             (isa::CallConv::PreserveAll, _) => NO_CLOBBERS,
1153             (_, false) => DEFAULT_AAPCS_CLOBBERS,
1154             (_, true) => panic!("unimplemented clobbers for exn abi of {call_conv:?}"),
1155         }
1156     }
1157 
get_ext_mode( call_conv: isa::CallConv, specified: ir::ArgumentExtension, ) -> ir::ArgumentExtension1158     fn get_ext_mode(
1159         call_conv: isa::CallConv,
1160         specified: ir::ArgumentExtension,
1161     ) -> ir::ArgumentExtension {
1162         if call_conv == isa::CallConv::AppleAarch64 {
1163             specified
1164         } else {
1165             ir::ArgumentExtension::None
1166         }
1167     }
1168 
compute_frame_layout( call_conv: isa::CallConv, flags: &settings::Flags, sig: &Signature, regs: &[Writable<RealReg>], function_calls: FunctionCalls, incoming_args_size: u32, tail_args_size: u32, stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout1169     fn compute_frame_layout(
1170         call_conv: isa::CallConv,
1171         flags: &settings::Flags,
1172         sig: &Signature,
1173         regs: &[Writable<RealReg>],
1174         function_calls: FunctionCalls,
1175         incoming_args_size: u32,
1176         tail_args_size: u32,
1177         stackslots_size: u32,
1178         fixed_frame_storage_size: u32,
1179         outgoing_args_size: u32,
1180     ) -> FrameLayout {
1181         let mut regs: Vec<Writable<RealReg>> = regs
1182             .iter()
1183             .cloned()
1184             .filter(|r| {
1185                 is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), sig, r.to_reg())
1186             })
1187             .collect();
1188 
1189         // Sort registers for deterministic code output. We can do an unstable
1190         // sort because the registers will be unique (there are no dups).
1191         regs.sort_unstable();
1192 
1193         // Compute clobber size.
1194         let clobber_size = compute_clobber_size(call_conv, &regs);
1195 
1196         // Compute linkage frame size.
1197         let setup_area_size = if flags.preserve_frame_pointers()
1198             || function_calls != FunctionCalls::None
1199             // The function arguments that are passed on the stack are addressed
1200             // relative to the Frame Pointer.
1201             || incoming_args_size > 0
1202             || clobber_size > 0
1203             || fixed_frame_storage_size > 0
1204         {
1205             16 // FP, LR
1206         } else {
1207             0
1208         };
1209 
1210         // Return FrameLayout structure.
1211         FrameLayout {
1212             word_bytes: 8,
1213             incoming_args_size,
1214             tail_args_size,
1215             setup_area_size,
1216             clobber_size,
1217             fixed_frame_storage_size,
1218             stackslots_size,
1219             outgoing_args_size,
1220             clobbered_callee_saves: regs,
1221             function_calls,
1222         }
1223     }
1224 
retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg>1225     fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg> {
1226         // Use x9 as a temp if needed: clobbered, not a
1227         // retval.
1228         regs::writable_xreg(9)
1229     }
1230 
exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg]1231     fn exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg] {
1232         const PAYLOAD_REGS: &'static [Reg] = &[regs::xreg(0), regs::xreg(1)];
1233         match call_conv {
1234             isa::CallConv::SystemV | isa::CallConv::Tail | isa::CallConv::PreserveAll => {
1235                 PAYLOAD_REGS
1236             }
1237             _ => &[],
1238         }
1239     }
1240 }
1241 
1242 impl AArch64MachineDeps {
gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32)1243     fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
1244         // When manually unrolling adjust the stack pointer and then write a zero
1245         // to the stack at that offset. This generates something like
1246         // `sub sp, sp, #1, lsl #12` followed by `stur wzr, [sp]`.
1247         //
1248         // We do this because valgrind expects us to never write beyond the stack
1249         // pointer and associated redzone.
1250         // See: https://github.com/bytecodealliance/wasmtime/issues/7454
1251         for _ in 0..probe_count {
1252             insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));
1253 
1254             insts.push(Inst::gen_store(
1255                 AMode::SPOffset { off: 0 },
1256                 zero_reg(),
1257                 I32,
1258                 MemFlags::trusted(),
1259             ));
1260         }
1261 
1262         // Restore the stack pointer to its original value
1263         insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));
1264     }
1265 
gen_probestack_loop(insts: &mut SmallInstVec<Inst>, frame_size: u32, guard_size: u32)1266     fn gen_probestack_loop(insts: &mut SmallInstVec<Inst>, frame_size: u32, guard_size: u32) {
1267         // The non-unrolled version uses two temporary registers. The
1268         // `start` contains the current offset from sp and counts downwards
1269         // during the loop by increments of `guard_size`. The `end` is
1270         // the size of the frame and where we stop.
1271         //
1272         // Note that this emission is all post-regalloc so it should be ok
1273         // to use the temporary registers here as input/output as the loop
1274         // itself is not allowed to use the registers.
1275         let start = writable_spilltmp_reg();
1276         let end = writable_tmp2_reg();
1277         // `gen_inline_probestack` is called after regalloc2, so it's acceptable to reuse
1278         // `start` and `end` as temporaries in load_constant.
1279         insts.extend(Inst::load_constant(start, 0));
1280         insts.extend(Inst::load_constant(end, frame_size.into()));
1281         insts.push(Inst::StackProbeLoop {
1282             start,
1283             end: end.to_reg(),
1284             step: Imm12::maybe_from_u64(guard_size.into()).unwrap(),
1285         });
1286     }
1287 
select_api_key( isa_flags: &aarch64_settings::Flags, call_conv: isa::CallConv, setup_frame: bool, ) -> Option<APIKey>1288     pub fn select_api_key(
1289         isa_flags: &aarch64_settings::Flags,
1290         call_conv: isa::CallConv,
1291         setup_frame: bool,
1292     ) -> Option<APIKey> {
1293         if isa_flags.sign_return_address() && (setup_frame || isa_flags.sign_return_address_all()) {
1294             // The `tail` calling convention uses a zero modifier rather than SP
1295             // because tail calls may happen with a different stack pointer than
1296             // when the function was entered, meaning that it won't be the same when
1297             // the return address is decrypted.
1298             Some(if isa_flags.sign_return_address_with_bkey() {
1299                 match call_conv {
1300                     isa::CallConv::Tail => APIKey::BZ,
1301                     _ => APIKey::BSP,
1302                 }
1303             } else {
1304                 match call_conv {
1305                     isa::CallConv::Tail => APIKey::AZ,
1306                     _ => APIKey::ASP,
1307                 }
1308             })
1309         } else {
1310             None
1311         }
1312     }
1313 }
1314 
1315 /// Is the given register saved in the prologue if clobbered, i.e., is it a
1316 /// callee-save?
is_reg_saved_in_prologue( call_conv: isa::CallConv, enable_pinned_reg: bool, sig: &Signature, r: RealReg, ) -> bool1317 fn is_reg_saved_in_prologue(
1318     call_conv: isa::CallConv,
1319     enable_pinned_reg: bool,
1320     sig: &Signature,
1321     r: RealReg,
1322 ) -> bool {
1323     if call_conv == isa::CallConv::PreserveAll {
1324         return true;
1325     }
1326 
1327     // FIXME: We need to inspect whether a function is returning Z or P regs too.
1328     let save_z_regs = sig
1329         .params
1330         .iter()
1331         .filter(|p| p.value_type.is_dynamic_vector())
1332         .count()
1333         != 0;
1334 
1335     match r.class() {
1336         RegClass::Int => {
1337             // x19 - x28 inclusive are callee-saves.
1338             // However, x21 is the pinned reg if `enable_pinned_reg`
1339             // is set, and is implicitly globally-allocated, hence not
1340             // callee-saved in prologues.
1341             if enable_pinned_reg && r.hw_enc() == PINNED_REG {
1342                 false
1343             } else {
1344                 r.hw_enc() >= 19 && r.hw_enc() <= 28
1345             }
1346         }
1347         RegClass::Float => {
1348             // If a subroutine takes at least one argument in scalable vector registers
1349             // or scalable predicate registers, or if it is a function that returns
1350             // results in such registers, it must ensure that the entire contents of
1351             // z8-z23 are preserved across the call. In other cases it need only
1352             // preserve the low 64 bits of z8-z15.
1353             if save_z_regs {
1354                 r.hw_enc() >= 8 && r.hw_enc() <= 23
1355             } else {
1356                 // v8 - v15 inclusive are callee-saves.
1357                 r.hw_enc() >= 8 && r.hw_enc() <= 15
1358             }
1359         }
1360         RegClass::Vector => unreachable!(),
1361     }
1362 }
1363 
default_aapcs_clobbers() -> PRegSet1364 const fn default_aapcs_clobbers() -> PRegSet {
1365     PRegSet::empty()
1366         // x0 - x17 inclusive are caller-saves.
1367         .with(xreg_preg(0))
1368         .with(xreg_preg(1))
1369         .with(xreg_preg(2))
1370         .with(xreg_preg(3))
1371         .with(xreg_preg(4))
1372         .with(xreg_preg(5))
1373         .with(xreg_preg(6))
1374         .with(xreg_preg(7))
1375         .with(xreg_preg(8))
1376         .with(xreg_preg(9))
1377         .with(xreg_preg(10))
1378         .with(xreg_preg(11))
1379         .with(xreg_preg(12))
1380         .with(xreg_preg(13))
1381         .with(xreg_preg(14))
1382         .with(xreg_preg(15))
1383         .with(xreg_preg(16))
1384         .with(xreg_preg(17))
1385         // v0 - v7 inclusive and v16 - v31 inclusive are
1386         // caller-saves. The upper 64 bits of v8 - v15 inclusive are
1387         // also caller-saves.  However, because we cannot currently
1388         // represent partial registers to regalloc2, we indicate here
1389         // that every vector register is caller-save. Because this
1390         // function is used at *callsites*, approximating in this
1391         // direction (save more than necessary) is conservative and
1392         // thus safe.
1393         //
1394         // Note that we exclude clobbers from a call instruction when
1395         // a call instruction's callee has the same ABI as the caller
1396         // (the current function body); this is safe (anything
1397         // clobbered by callee can be clobbered by caller as well) and
1398         // avoids unnecessary saves of v8-v15 in the prologue even
1399         // though we include them as defs here.
1400         .with(vreg_preg(0))
1401         .with(vreg_preg(1))
1402         .with(vreg_preg(2))
1403         .with(vreg_preg(3))
1404         .with(vreg_preg(4))
1405         .with(vreg_preg(5))
1406         .with(vreg_preg(6))
1407         .with(vreg_preg(7))
1408         .with(vreg_preg(8))
1409         .with(vreg_preg(9))
1410         .with(vreg_preg(10))
1411         .with(vreg_preg(11))
1412         .with(vreg_preg(12))
1413         .with(vreg_preg(13))
1414         .with(vreg_preg(14))
1415         .with(vreg_preg(15))
1416         .with(vreg_preg(16))
1417         .with(vreg_preg(17))
1418         .with(vreg_preg(18))
1419         .with(vreg_preg(19))
1420         .with(vreg_preg(20))
1421         .with(vreg_preg(21))
1422         .with(vreg_preg(22))
1423         .with(vreg_preg(23))
1424         .with(vreg_preg(24))
1425         .with(vreg_preg(25))
1426         .with(vreg_preg(26))
1427         .with(vreg_preg(27))
1428         .with(vreg_preg(28))
1429         .with(vreg_preg(29))
1430         .with(vreg_preg(30))
1431         .with(vreg_preg(31))
1432 }
1433 
winch_clobbers() -> PRegSet1434 const fn winch_clobbers() -> PRegSet {
1435     PRegSet::empty()
1436         .with(xreg_preg(0))
1437         .with(xreg_preg(1))
1438         .with(xreg_preg(2))
1439         .with(xreg_preg(3))
1440         .with(xreg_preg(4))
1441         .with(xreg_preg(5))
1442         .with(xreg_preg(6))
1443         .with(xreg_preg(7))
1444         .with(xreg_preg(8))
1445         .with(xreg_preg(9))
1446         .with(xreg_preg(10))
1447         .with(xreg_preg(11))
1448         .with(xreg_preg(12))
1449         .with(xreg_preg(13))
1450         .with(xreg_preg(14))
1451         .with(xreg_preg(15))
1452         .with(xreg_preg(16))
1453         .with(xreg_preg(17))
1454         // x18 is used to carry platform state and is not allocatable by Winch.
1455         //
1456         // x19 - x27 are considered caller-saved in Winch's calling convention.
1457         .with(xreg_preg(19))
1458         .with(xreg_preg(20))
1459         .with(xreg_preg(21))
1460         .with(xreg_preg(22))
1461         .with(xreg_preg(23))
1462         .with(xreg_preg(24))
1463         .with(xreg_preg(25))
1464         .with(xreg_preg(26))
1465         .with(xreg_preg(27))
1466         // x28 is used as the shadow stack pointer and is considered
1467         // callee-saved.
1468         //
1469         // All vregs are considered caller-saved.
1470         .with(vreg_preg(0))
1471         .with(vreg_preg(1))
1472         .with(vreg_preg(2))
1473         .with(vreg_preg(3))
1474         .with(vreg_preg(4))
1475         .with(vreg_preg(5))
1476         .with(vreg_preg(6))
1477         .with(vreg_preg(7))
1478         .with(vreg_preg(8))
1479         .with(vreg_preg(9))
1480         .with(vreg_preg(10))
1481         .with(vreg_preg(11))
1482         .with(vreg_preg(12))
1483         .with(vreg_preg(13))
1484         .with(vreg_preg(14))
1485         .with(vreg_preg(15))
1486         .with(vreg_preg(16))
1487         .with(vreg_preg(17))
1488         .with(vreg_preg(18))
1489         .with(vreg_preg(19))
1490         .with(vreg_preg(20))
1491         .with(vreg_preg(21))
1492         .with(vreg_preg(22))
1493         .with(vreg_preg(23))
1494         .with(vreg_preg(24))
1495         .with(vreg_preg(25))
1496         .with(vreg_preg(26))
1497         .with(vreg_preg(27))
1498         .with(vreg_preg(28))
1499         .with(vreg_preg(29))
1500         .with(vreg_preg(30))
1501         .with(vreg_preg(31))
1502 }
1503 
all_clobbers() -> PRegSet1504 const fn all_clobbers() -> PRegSet {
1505     PRegSet::empty()
1506         // integer registers: x0 to x28 inclusive. (x29 is FP, x30 is
1507         // LR, x31 is SP/ZR.)
1508         .with(xreg_preg(0))
1509         .with(xreg_preg(1))
1510         .with(xreg_preg(2))
1511         .with(xreg_preg(3))
1512         .with(xreg_preg(4))
1513         .with(xreg_preg(5))
1514         .with(xreg_preg(6))
1515         .with(xreg_preg(7))
1516         .with(xreg_preg(8))
1517         .with(xreg_preg(9))
1518         .with(xreg_preg(10))
1519         .with(xreg_preg(11))
1520         .with(xreg_preg(12))
1521         .with(xreg_preg(13))
1522         .with(xreg_preg(14))
1523         .with(xreg_preg(15))
1524         .with(xreg_preg(16))
1525         .with(xreg_preg(17))
1526         .with(xreg_preg(18))
1527         .with(xreg_preg(19))
1528         .with(xreg_preg(20))
1529         .with(xreg_preg(21))
1530         .with(xreg_preg(22))
1531         .with(xreg_preg(23))
1532         .with(xreg_preg(24))
1533         .with(xreg_preg(25))
1534         .with(xreg_preg(26))
1535         .with(xreg_preg(27))
1536         .with(xreg_preg(28))
1537         // vector registers: v0 to v31 inclusive.
1538         .with(vreg_preg(0))
1539         .with(vreg_preg(1))
1540         .with(vreg_preg(2))
1541         .with(vreg_preg(3))
1542         .with(vreg_preg(4))
1543         .with(vreg_preg(5))
1544         .with(vreg_preg(6))
1545         .with(vreg_preg(7))
1546         .with(vreg_preg(8))
1547         .with(vreg_preg(9))
1548         .with(vreg_preg(10))
1549         .with(vreg_preg(11))
1550         .with(vreg_preg(12))
1551         .with(vreg_preg(13))
1552         .with(vreg_preg(14))
1553         .with(vreg_preg(15))
1554         .with(vreg_preg(16))
1555         .with(vreg_preg(17))
1556         .with(vreg_preg(18))
1557         .with(vreg_preg(19))
1558         .with(vreg_preg(20))
1559         .with(vreg_preg(21))
1560         .with(vreg_preg(22))
1561         .with(vreg_preg(23))
1562         .with(vreg_preg(24))
1563         .with(vreg_preg(25))
1564         .with(vreg_preg(26))
1565         .with(vreg_preg(27))
1566         .with(vreg_preg(28))
1567         .with(vreg_preg(29))
1568         .with(vreg_preg(30))
1569         .with(vreg_preg(31))
1570 }
1571 
1572 const DEFAULT_AAPCS_CLOBBERS: PRegSet = default_aapcs_clobbers();
1573 const WINCH_CLOBBERS: PRegSet = winch_clobbers();
1574 const ALL_CLOBBERS: PRegSet = all_clobbers();
1575 const NO_CLOBBERS: PRegSet = PRegSet::empty();
1576 
create_reg_env(enable_pinned_reg: bool) -> MachineEnv1577 const fn create_reg_env(enable_pinned_reg: bool) -> MachineEnv {
1578     const fn preg(r: Reg) -> PReg {
1579         r.to_real_reg().unwrap().preg()
1580     }
1581 
1582     let mut env = MachineEnv {
1583         preferred_regs_by_class: [
1584             PRegSet::empty()
1585                 .with(preg(xreg(0)))
1586                 .with(preg(xreg(1)))
1587                 .with(preg(xreg(2)))
1588                 .with(preg(xreg(3)))
1589                 .with(preg(xreg(4)))
1590                 .with(preg(xreg(5)))
1591                 .with(preg(xreg(6)))
1592                 .with(preg(xreg(7)))
1593                 .with(preg(xreg(8)))
1594                 .with(preg(xreg(9)))
1595                 .with(preg(xreg(10)))
1596                 .with(preg(xreg(11)))
1597                 .with(preg(xreg(12)))
1598                 .with(preg(xreg(13)))
1599                 .with(preg(xreg(14)))
1600                 .with(preg(xreg(15))),
1601             // x16 and x17 are spilltmp and tmp2 (see above).
1602             // x18 could be used by the platform to carry inter-procedural state;
1603             // conservatively assume so and make it not allocatable.
1604             // x19-28 are callee-saved and so not preferred.
1605             // x21 is the pinned register (if enabled) and not allocatable if so.
1606             // x29 is FP, x30 is LR, x31 is SP/ZR.
1607             PRegSet::empty()
1608                 .with(preg(vreg(0)))
1609                 .with(preg(vreg(1)))
1610                 .with(preg(vreg(2)))
1611                 .with(preg(vreg(3)))
1612                 .with(preg(vreg(4)))
1613                 .with(preg(vreg(5)))
1614                 .with(preg(vreg(6)))
1615                 .with(preg(vreg(7)))
1616                 // v8-15 are callee-saved and so not preferred.
1617                 .with(preg(vreg(16)))
1618                 .with(preg(vreg(17)))
1619                 .with(preg(vreg(18)))
1620                 .with(preg(vreg(19)))
1621                 .with(preg(vreg(20)))
1622                 .with(preg(vreg(21)))
1623                 .with(preg(vreg(22)))
1624                 .with(preg(vreg(23)))
1625                 .with(preg(vreg(24)))
1626                 .with(preg(vreg(25)))
1627                 .with(preg(vreg(26)))
1628                 .with(preg(vreg(27)))
1629                 .with(preg(vreg(28)))
1630                 .with(preg(vreg(29)))
1631                 .with(preg(vreg(30)))
1632                 .with(preg(vreg(31))),
1633             // Vector Regclass is unused
1634             PRegSet::empty(),
1635         ],
1636         non_preferred_regs_by_class: [
1637             PRegSet::empty()
1638                 .with(preg(xreg(19)))
1639                 .with(preg(xreg(20)))
1640                 // x21 is pinned reg if enabled; we add to this list below if not.
1641                 .with(preg(xreg(22)))
1642                 .with(preg(xreg(23)))
1643                 .with(preg(xreg(24)))
1644                 .with(preg(xreg(25)))
1645                 .with(preg(xreg(26)))
1646                 .with(preg(xreg(27)))
1647                 .with(preg(xreg(28))),
1648             PRegSet::empty()
1649                 .with(preg(vreg(8)))
1650                 .with(preg(vreg(9)))
1651                 .with(preg(vreg(10)))
1652                 .with(preg(vreg(11)))
1653                 .with(preg(vreg(12)))
1654                 .with(preg(vreg(13)))
1655                 .with(preg(vreg(14)))
1656                 .with(preg(vreg(15))),
1657             // Vector Regclass is unused
1658             PRegSet::empty(),
1659         ],
1660         fixed_stack_slots: vec![],
1661         scratch_by_class: [None, None, None],
1662     };
1663 
1664     if !enable_pinned_reg {
1665         debug_assert!(PINNED_REG == 21);
1666         env.non_preferred_regs_by_class[0].add(preg(xreg(PINNED_REG)));
1667     }
1668 
1669     env
1670 }
1671