1 //! Implementation of a standard AArch64 ABI.
2
3 use crate::CodegenResult;
4 use crate::ir;
5 use crate::ir::MemFlags;
6 use crate::ir::types;
7 use crate::ir::types::*;
8 use crate::ir::{ExternalName, LibCall, Signature, dynamic_to_fixed};
9 use crate::isa;
10 use crate::isa::aarch64::{inst::*, settings as aarch64_settings};
11 use crate::isa::unwind::UnwindInst;
12 use crate::isa::winch;
13 use crate::machinst::*;
14 use crate::settings;
15 use alloc::borrow::ToOwned;
16 use alloc::boxed::Box;
17 use alloc::vec::Vec;
18 use regalloc2::{MachineEnv, PReg, PRegSet};
19 use smallvec::{SmallVec, smallvec};
20
21 // We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
22 // these ABIs are very similar.
23
24 /// Support for the AArch64 ABI from the callee side (within a function body).
25 pub(crate) type AArch64Callee = Callee<AArch64MachineDeps>;
26
27 impl From<StackAMode> for AMode {
from(stack: StackAMode) -> AMode28 fn from(stack: StackAMode) -> AMode {
29 match stack {
30 StackAMode::IncomingArg(off, stack_args_size) => AMode::IncomingArg {
31 off: i64::from(stack_args_size) - off,
32 },
33 StackAMode::Slot(off) => AMode::SlotOffset { off },
34 StackAMode::OutgoingArg(off) => AMode::SPOffset { off },
35 }
36 }
37 }
38
39 // Returns the size of stack space needed to store the
40 // `clobbered_callee_saved` registers.
compute_clobber_size( call_conv: isa::CallConv, clobbered_callee_saves: &[Writable<RealReg>], ) -> u3241 fn compute_clobber_size(
42 call_conv: isa::CallConv,
43 clobbered_callee_saves: &[Writable<RealReg>],
44 ) -> u32 {
45 let mut int_regs = 0;
46 let mut vec_regs = 0;
47 for ® in clobbered_callee_saves {
48 match reg.to_reg().class() {
49 RegClass::Int => {
50 int_regs += 1;
51 }
52 RegClass::Float => {
53 vec_regs += 1;
54 }
55 RegClass::Vector => unreachable!(),
56 }
57 }
58
59 // Round up to multiple of 2, to keep 16-byte stack alignment.
60 let int_save_bytes = (int_regs + (int_regs & 1)) * 8;
61 let vec_save_bytes = if call_conv == isa::CallConv::PreserveAll {
62 // In the PreserveAll ABI, we save the entire vector register,
63 // i.e., all 128 bits.
64 vec_regs * 16
65 } else {
66 // The Procedure Call Standard for the Arm 64-bit Architecture
67 // (AAPCS64, including several related ABIs such as the one used by
68 // Windows) mandates saving only the bottom 8 bytes of the vector
69 // registers, so we round up the number of registers to ensure
70 // proper stack alignment (similarly to the situation with
71 // `int_reg`).
72 let vec_reg_size = 8;
73 let vec_save_padding = vec_regs & 1;
74 // FIXME: SVE: ABI is different to Neon, so do we treat all vec regs as Z-regs?
75 (vec_regs + vec_save_padding) * vec_reg_size
76 };
77
78 int_save_bytes + vec_save_bytes
79 }
80
81 /// AArch64-specific ABI behavior. This struct just serves as an implementation
82 /// point for the trait; it is never actually instantiated.
83 pub struct AArch64MachineDeps;
84
85 impl IsaFlags for aarch64_settings::Flags {
is_forward_edge_cfi_enabled(&self) -> bool86 fn is_forward_edge_cfi_enabled(&self) -> bool {
87 self.use_bti()
88 }
89 }
90
91 impl ABIMachineSpec for AArch64MachineDeps {
92 type I = Inst;
93
94 type F = aarch64_settings::Flags;
95
96 /// This is the limit for the size of argument and return-value areas on the
97 /// stack. We place a reasonable limit here to avoid integer overflow issues
98 /// with 32-bit arithmetic: for now, 128 MB.
99 const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
100
word_bits() -> u32101 fn word_bits() -> u32 {
102 64
103 }
104
105 /// Return required stack alignment in bytes.
stack_align(_call_conv: isa::CallConv) -> u32106 fn stack_align(_call_conv: isa::CallConv) -> u32 {
107 16
108 }
109
compute_arg_locs( call_conv: isa::CallConv, flags: &settings::Flags, params: &[ir::AbiParam], args_or_rets: ArgsOrRets, add_ret_area_ptr: bool, mut args: ArgsAccumulator, ) -> CodegenResult<(u32, Option<usize>)>110 fn compute_arg_locs(
111 call_conv: isa::CallConv,
112 flags: &settings::Flags,
113 params: &[ir::AbiParam],
114 args_or_rets: ArgsOrRets,
115 add_ret_area_ptr: bool,
116 mut args: ArgsAccumulator,
117 ) -> CodegenResult<(u32, Option<usize>)> {
118 let is_apple_cc = call_conv == isa::CallConv::AppleAarch64;
119 let is_winch_return = call_conv == isa::CallConv::Winch && args_or_rets == ArgsOrRets::Rets;
120
121 // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4.
122 //
123 // MacOS aarch64 is slightly different, see also
124 // https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms.
125 // We are diverging from the MacOS aarch64 implementation in the
126 // following ways:
127 // - sign- and zero- extensions of data types less than 32 bits are not
128 // implemented yet.
129 // - we align the arguments stack space to a 16-bytes boundary, while
130 // the MacOS allows aligning only on 8 bytes. In practice it means we're
131 // slightly overallocating when calling, which is fine, and doesn't
132 // break our other invariants that the stack is always allocated in
133 // 16-bytes chunks.
134
135 let mut next_xreg = if call_conv == isa::CallConv::Tail {
136 // We reserve `x0` for the return area pointer. For simplicity, we
137 // reserve it even when there is no return area pointer needed. This
138 // also means that identity functions don't have to shuffle arguments to
139 // different return registers because we shifted all argument register
140 // numbers down by one to make space for the return area pointer.
141 //
142 // Also, we cannot use all allocatable GPRs as arguments because we need
143 // at least one allocatable register for holding the callee address in
144 // indirect calls. So skip `x1` also, reserving it for that role.
145 2
146 } else {
147 0
148 };
149 let mut next_vreg = 0;
150 let mut next_stack: u32 = 0;
151
152 // Note on return values: on the regular ABI, we may return values
153 // in 8 registers for V128 and I64 registers independently of the
154 // number of register values returned in the other class. That is,
155 // we can return values in up to 8 integer and
156 // 8 vector registers at once.
157 let max_per_class_reg_vals = 8; // x0-x7 and v0-v7
158 let mut remaining_reg_vals = 16;
159
160 let ret_area_ptr = if add_ret_area_ptr {
161 debug_assert_eq!(args_or_rets, ArgsOrRets::Args);
162 if call_conv != isa::CallConv::Winch {
163 // In the AAPCS64 calling convention the return area pointer is
164 // stored in x8.
165 Some(ABIArg::reg(
166 xreg(8).to_real_reg().unwrap(),
167 I64,
168 ir::ArgumentExtension::None,
169 ir::ArgumentPurpose::Normal,
170 ))
171 } else {
172 // Use x0 for the return area pointer in the Winch calling convention
173 // to simplify the ABI handling code in Winch by avoiding an AArch64
174 // special case to assign it to x8.
175 next_xreg += 1;
176 Some(ABIArg::reg(
177 xreg(0).to_real_reg().unwrap(),
178 I64,
179 ir::ArgumentExtension::None,
180 ir::ArgumentPurpose::Normal,
181 ))
182 }
183 } else {
184 None
185 };
186
187 for (i, param) in params.into_iter().enumerate() {
188 if is_apple_cc && param.value_type == types::F128 && !flags.enable_llvm_abi_extensions()
189 {
190 panic!(
191 "f128 args/return values not supported for apple_aarch64 unless LLVM ABI extensions are enabled"
192 );
193 }
194
195 let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?;
196
197 if let ir::ArgumentPurpose::StructReturn = param.purpose {
198 assert!(
199 call_conv != isa::CallConv::Tail,
200 "support for StructReturn parameters is not implemented for the `tail` \
201 calling convention yet",
202 );
203 }
204
205 if let ir::ArgumentPurpose::StructArgument(_) = param.purpose {
206 panic!(
207 "StructArgument parameters are not supported on arm64. \
208 Use regular pointer arguments instead."
209 );
210 }
211
212 if let ir::ArgumentPurpose::StructReturn = param.purpose {
213 // FIXME add assert_eq!(args_or_rets, ArgsOrRets::Args); once
214 // ensure_struct_return_ptr_is_returned is gone.
215 assert!(
216 param.value_type == types::I64,
217 "StructReturn must be a pointer sized integer"
218 );
219 args.push(ABIArg::Slots {
220 slots: smallvec![ABIArgSlot::Reg {
221 reg: xreg(8).to_real_reg().unwrap(),
222 ty: types::I64,
223 extension: param.extension,
224 },],
225 purpose: ir::ArgumentPurpose::StructReturn,
226 });
227 continue;
228 }
229
230 // Handle multi register params
231 //
232 // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#642parameter-passing-rules), (Section 6.4.2 Stage C).
233 //
234 // For arguments with alignment of 16 we round up the register number
235 // to the next even value. So we can never allocate for example an i128
236 // to X1 and X2, we have to skip one register and do X2, X3
237 // (Stage C.8)
238 // Note: The Apple ABI deviates a bit here. They don't respect Stage C.8
239 // and will happily allocate a i128 to X1 and X2
240 //
241 // For integer types with alignment of 16 we also have the additional
242 // restriction of passing the lower half in Xn and the upper half in Xn+1
243 // (Stage C.9)
244 //
245 // For examples of how LLVM handles this: https://godbolt.org/z/bhd3vvEfh
246 //
247 // On the Apple ABI it is unspecified if we can spill half the value into the stack
248 // i.e load the lower half into x7 and the upper half into the stack
249 // LLVM does not seem to do this, so we are going to replicate that behaviour
250 let is_multi_reg = rcs.len() >= 2;
251 if is_multi_reg {
252 assert!(
253 rcs.len() == 2,
254 "Unable to handle multi reg params with more than 2 regs"
255 );
256 assert!(
257 rcs == &[RegClass::Int, RegClass::Int],
258 "Unable to handle non i64 regs"
259 );
260
261 let reg_class_space = max_per_class_reg_vals - next_xreg;
262 let reg_space = remaining_reg_vals;
263
264 if reg_space >= 2 && reg_class_space >= 2 {
265 // The aarch64 ABI does not allow us to start a split argument
266 // at an odd numbered register. So we need to skip one register
267 //
268 // TODO: The Fast ABI should probably not skip the register
269 if !is_apple_cc && next_xreg % 2 != 0 {
270 next_xreg += 1;
271 }
272
273 let lower_reg = xreg(next_xreg);
274 let upper_reg = xreg(next_xreg + 1);
275
276 args.push(ABIArg::Slots {
277 slots: smallvec![
278 ABIArgSlot::Reg {
279 reg: lower_reg.to_real_reg().unwrap(),
280 ty: reg_types[0],
281 extension: param.extension,
282 },
283 ABIArgSlot::Reg {
284 reg: upper_reg.to_real_reg().unwrap(),
285 ty: reg_types[1],
286 extension: param.extension,
287 },
288 ],
289 purpose: param.purpose,
290 });
291
292 next_xreg += 2;
293 remaining_reg_vals -= 2;
294 continue;
295 }
296 } else {
297 // Single Register parameters
298 let rc = rcs[0];
299 let next_reg = match rc {
300 RegClass::Int => &mut next_xreg,
301 RegClass::Float => &mut next_vreg,
302 RegClass::Vector => unreachable!(),
303 };
304
305 let push_to_reg = if is_winch_return {
306 // Winch uses the first register to return the last result
307 i == params.len() - 1
308 } else {
309 // Use max_per_class_reg_vals & remaining_reg_vals otherwise
310 *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0
311 };
312
313 if push_to_reg {
314 let reg = match rc {
315 RegClass::Int => xreg(*next_reg),
316 RegClass::Float => vreg(*next_reg),
317 RegClass::Vector => unreachable!(),
318 };
319 // Overlay Z-regs on V-regs for parameter passing.
320 let ty = if param.value_type.is_dynamic_vector() {
321 dynamic_to_fixed(param.value_type)
322 } else {
323 param.value_type
324 };
325 args.push(ABIArg::reg(
326 reg.to_real_reg().unwrap(),
327 ty,
328 param.extension,
329 param.purpose,
330 ));
331 *next_reg += 1;
332 remaining_reg_vals -= 1;
333 continue;
334 }
335 }
336
337 // Spill to the stack
338
339 if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {
340 return Err(crate::CodegenError::Unsupported(
341 "Too many return values to fit in registers. \
342 Use a StructReturn argument instead. (#9510)"
343 .to_owned(),
344 ));
345 }
346
347 // Compute the stack slot's size.
348 let size = (ty_bits(param.value_type) / 8) as u32;
349
350 let size = if is_apple_cc || is_winch_return {
351 // MacOS and Winch aarch64 allows stack slots with
352 // sizes less than 8 bytes. They still need to be
353 // properly aligned on their natural data alignment,
354 // though.
355 size
356 } else {
357 // Every arg takes a minimum slot of 8 bytes. (16-byte stack
358 // alignment happens separately after all args.)
359 core::cmp::max(size, 8)
360 };
361
362 if !is_winch_return {
363 // Align the stack slot.
364 debug_assert!(size.is_power_of_two());
365 next_stack = align_to(next_stack, size);
366 }
367
368 let slots = reg_types
369 .iter()
370 .copied()
371 // Build the stack locations from each slot
372 .scan(next_stack, |next_stack, ty| {
373 let slot_offset = *next_stack as i64;
374 *next_stack += (ty_bits(ty) / 8) as u32;
375
376 Some((ty, slot_offset))
377 })
378 .map(|(ty, offset)| ABIArgSlot::Stack {
379 offset,
380 ty,
381 extension: param.extension,
382 })
383 .collect();
384
385 args.push(ABIArg::Slots {
386 slots,
387 purpose: param.purpose,
388 });
389
390 next_stack += size;
391 }
392
393 let extra_arg = if let Some(ret_area_ptr) = ret_area_ptr {
394 args.push_non_formal(ret_area_ptr);
395 Some(args.args().len() - 1)
396 } else {
397 None
398 };
399
400 if is_winch_return {
401 winch::reverse_stack(args, next_stack, false);
402 }
403
404 next_stack = align_to(next_stack, 16);
405
406 Ok((next_stack, extra_arg))
407 }
408
gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst409 fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
410 Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())
411 }
412
gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst413 fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {
414 Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted())
415 }
416
gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst417 fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
418 Inst::gen_move(to_reg, from_reg, ty)
419 }
420
gen_extend( to_reg: Writable<Reg>, from_reg: Reg, signed: bool, from_bits: u8, to_bits: u8, ) -> Inst421 fn gen_extend(
422 to_reg: Writable<Reg>,
423 from_reg: Reg,
424 signed: bool,
425 from_bits: u8,
426 to_bits: u8,
427 ) -> Inst {
428 assert!(from_bits < to_bits);
429 Inst::Extend {
430 rd: to_reg,
431 rn: from_reg,
432 signed,
433 from_bits,
434 to_bits,
435 }
436 }
437
gen_args(args: Vec<ArgPair>) -> Inst438 fn gen_args(args: Vec<ArgPair>) -> Inst {
439 Inst::Args { args }
440 }
441
gen_rets(rets: Vec<RetPair>) -> Inst442 fn gen_rets(rets: Vec<RetPair>) -> Inst {
443 Inst::Rets { rets }
444 }
445
gen_add_imm( _call_conv: isa::CallConv, into_reg: Writable<Reg>, from_reg: Reg, imm: u32, ) -> SmallInstVec<Inst>446 fn gen_add_imm(
447 _call_conv: isa::CallConv,
448 into_reg: Writable<Reg>,
449 from_reg: Reg,
450 imm: u32,
451 ) -> SmallInstVec<Inst> {
452 let imm = imm as u64;
453 let mut insts = SmallVec::new();
454 if let Some(imm12) = Imm12::maybe_from_u64(imm) {
455 insts.push(Inst::AluRRImm12 {
456 alu_op: ALUOp::Add,
457 size: OperandSize::Size64,
458 rd: into_reg,
459 rn: from_reg,
460 imm12,
461 });
462 } else {
463 let scratch2 = writable_tmp2_reg();
464 assert_ne!(scratch2.to_reg(), from_reg);
465 // `gen_add_imm` is only ever called after register allocation has taken place, and as a
466 // result it's ok to reuse the scratch2 register here. If that changes, we'll need to
467 // plumb through a way to allocate temporary virtual registers
468 insts.extend(Inst::load_constant(scratch2, imm));
469 insts.push(Inst::AluRRRExtend {
470 alu_op: ALUOp::Add,
471 size: OperandSize::Size64,
472 rd: into_reg,
473 rn: from_reg,
474 rm: scratch2.to_reg(),
475 extendop: ExtendOp::UXTX,
476 });
477 }
478 insts
479 }
480
gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst>481 fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
482 let mut insts = SmallVec::new();
483 insts.push(Inst::AluRRRExtend {
484 alu_op: ALUOp::SubS,
485 size: OperandSize::Size64,
486 rd: writable_zero_reg(),
487 rn: stack_reg(),
488 rm: limit_reg,
489 extendop: ExtendOp::UXTX,
490 });
491 insts.push(Inst::TrapIf {
492 trap_code: ir::TrapCode::STACK_OVERFLOW,
493 // Here `Lo` == "less than" when interpreting the two
494 // operands as unsigned integers.
495 kind: CondBrKind::Cond(Cond::Lo),
496 });
497 insts
498 }
499
gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Inst500 fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Inst {
501 // FIXME: Do something different for dynamic types?
502 let mem = mem.into();
503 Inst::LoadAddr { rd: into_reg, mem }
504 }
505
get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg506 fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
507 spilltmp_reg()
508 }
509
gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst510 fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
511 let mem = AMode::RegOffset {
512 rn: base,
513 off: offset as i64,
514 };
515 Inst::gen_load(into_reg, mem, ty, MemFlags::trusted())
516 }
517
gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst518 fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
519 let mem = AMode::RegOffset {
520 rn: base,
521 off: offset as i64,
522 };
523 Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())
524 }
525
gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst>526 fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
527 if amount == 0 {
528 return SmallVec::new();
529 }
530
531 let (amount, is_sub) = if amount > 0 {
532 (amount as u64, false)
533 } else {
534 (-amount as u64, true)
535 };
536
537 let alu_op = if is_sub { ALUOp::Sub } else { ALUOp::Add };
538
539 let mut ret = SmallVec::new();
540 if let Some(imm12) = Imm12::maybe_from_u64(amount) {
541 let adj_inst = Inst::AluRRImm12 {
542 alu_op,
543 size: OperandSize::Size64,
544 rd: writable_stack_reg(),
545 rn: stack_reg(),
546 imm12,
547 };
548 ret.push(adj_inst);
549 } else {
550 let tmp = writable_spilltmp_reg();
551 // `gen_sp_reg_adjust` is called after regalloc2, so it's acceptable to reuse `tmp` for
552 // intermediates in `load_constant`.
553 let const_inst = Inst::load_constant(tmp, amount);
554 let adj_inst = Inst::AluRRRExtend {
555 alu_op,
556 size: OperandSize::Size64,
557 rd: writable_stack_reg(),
558 rn: stack_reg(),
559 rm: tmp.to_reg(),
560 extendop: ExtendOp::UXTX,
561 };
562 ret.extend(const_inst);
563 ret.push(adj_inst);
564 }
565 ret
566 }
567
gen_prologue_frame_setup( call_conv: isa::CallConv, flags: &settings::Flags, isa_flags: &aarch64_settings::Flags, frame_layout: &FrameLayout, ) -> SmallInstVec<Inst>568 fn gen_prologue_frame_setup(
569 call_conv: isa::CallConv,
570 flags: &settings::Flags,
571 isa_flags: &aarch64_settings::Flags,
572 frame_layout: &FrameLayout,
573 ) -> SmallInstVec<Inst> {
574 let setup_frame = frame_layout.setup_area_size > 0;
575 let mut insts = SmallVec::new();
576
577 match Self::select_api_key(isa_flags, call_conv, setup_frame) {
578 Some(key) => {
579 insts.push(Inst::Paci { key });
580 if flags.unwind_info() {
581 insts.push(Inst::Unwind {
582 inst: UnwindInst::Aarch64SetPointerAuth {
583 return_addresses: true,
584 },
585 });
586 }
587 }
588 None => {
589 if isa_flags.use_bti() {
590 insts.push(Inst::Bti {
591 targets: BranchTargetType::C,
592 });
593 }
594
595 if flags.unwind_info() && call_conv == isa::CallConv::AppleAarch64 {
596 // The macOS unwinder seems to require this.
597 insts.push(Inst::Unwind {
598 inst: UnwindInst::Aarch64SetPointerAuth {
599 return_addresses: false,
600 },
601 });
602 }
603 }
604 }
605
606 if setup_frame {
607 // stp fp (x29), lr (x30), [sp, #-16]!
608 insts.push(Inst::StoreP64 {
609 rt: fp_reg(),
610 rt2: link_reg(),
611 mem: PairAMode::SPPreIndexed {
612 simm7: SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
613 },
614 flags: MemFlags::trusted(),
615 });
616
617 if flags.unwind_info() {
618 insts.push(Inst::Unwind {
619 inst: UnwindInst::PushFrameRegs {
620 offset_upward_to_caller_sp: frame_layout.setup_area_size,
621 },
622 });
623 }
624
625 // mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because
626 // the usual encoding (`ORR`) does not work with SP.
627 insts.push(Inst::AluRRImm12 {
628 alu_op: ALUOp::Add,
629 size: OperandSize::Size64,
630 rd: writable_fp_reg(),
631 rn: stack_reg(),
632 imm12: Imm12 {
633 bits: 0,
634 shift12: false,
635 },
636 });
637 }
638
639 insts
640 }
641
gen_epilogue_frame_restore( call_conv: isa::CallConv, _flags: &settings::Flags, _isa_flags: &aarch64_settings::Flags, frame_layout: &FrameLayout, ) -> SmallInstVec<Inst>642 fn gen_epilogue_frame_restore(
643 call_conv: isa::CallConv,
644 _flags: &settings::Flags,
645 _isa_flags: &aarch64_settings::Flags,
646 frame_layout: &FrameLayout,
647 ) -> SmallInstVec<Inst> {
648 let setup_frame = frame_layout.setup_area_size > 0;
649 let mut insts = SmallVec::new();
650
651 if setup_frame {
652 // N.B.: sp is already adjusted to the appropriate place by the
653 // clobber-restore code (which also frees the fixed frame). Hence, there
654 // is no need for the usual `mov sp, fp` here.
655
656 // `ldp fp, lr, [sp], #16`
657 insts.push(Inst::LoadP64 {
658 rt: writable_fp_reg(),
659 rt2: writable_link_reg(),
660 mem: PairAMode::SPPostIndexed {
661 simm7: SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
662 },
663 flags: MemFlags::trusted(),
664 });
665 }
666
667 if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 {
668 insts.extend(Self::gen_sp_reg_adjust(
669 frame_layout.tail_args_size.try_into().unwrap(),
670 ));
671 }
672
673 insts
674 }
675
gen_return( call_conv: isa::CallConv, isa_flags: &aarch64_settings::Flags, frame_layout: &FrameLayout, ) -> SmallInstVec<Inst>676 fn gen_return(
677 call_conv: isa::CallConv,
678 isa_flags: &aarch64_settings::Flags,
679 frame_layout: &FrameLayout,
680 ) -> SmallInstVec<Inst> {
681 let setup_frame = frame_layout.setup_area_size > 0;
682
683 match Self::select_api_key(isa_flags, call_conv, setup_frame) {
684 Some(key) => {
685 smallvec![Inst::AuthenticatedRet {
686 key,
687 is_hint: !isa_flags.has_pauth(),
688 }]
689 }
690 None => {
691 smallvec![Inst::Ret {}]
692 }
693 }
694 }
695
gen_probestack(_insts: &mut SmallInstVec<Self::I>, _: u32)696 fn gen_probestack(_insts: &mut SmallInstVec<Self::I>, _: u32) {
697 // TODO: implement if we ever require stack probes on an AArch64 host
698 // (unlikely unless Lucet is ported)
699 unimplemented!("Stack probing is unimplemented on AArch64");
700 }
701
gen_inline_probestack( insts: &mut SmallInstVec<Self::I>, _call_conv: isa::CallConv, frame_size: u32, guard_size: u32, )702 fn gen_inline_probestack(
703 insts: &mut SmallInstVec<Self::I>,
704 _call_conv: isa::CallConv,
705 frame_size: u32,
706 guard_size: u32,
707 ) {
708 // The stack probe loop currently takes 6 instructions and each inline
709 // probe takes 2 (ish, these numbers sort of depend on the constants).
710 // Set this to 3 to keep the max size of the probe to 6 instructions.
711 const PROBE_MAX_UNROLL: u32 = 3;
712
713 // Calculate how many probes we need to perform. Round down, as we only
714 // need to probe whole guard_size regions we'd otherwise skip over.
715 let probe_count = frame_size / guard_size;
716 if probe_count == 0 {
717 // No probe necessary
718 } else if probe_count <= PROBE_MAX_UNROLL {
719 Self::gen_probestack_unroll(insts, guard_size, probe_count)
720 } else {
721 Self::gen_probestack_loop(insts, frame_size, guard_size)
722 }
723 }
724
gen_clobber_save( call_conv: isa::CallConv, flags: &settings::Flags, frame_layout: &FrameLayout, ) -> SmallVec<[Inst; 16]>725 fn gen_clobber_save(
726 call_conv: isa::CallConv,
727 flags: &settings::Flags,
728 frame_layout: &FrameLayout,
729 ) -> SmallVec<[Inst; 16]> {
730 let (clobbered_int, clobbered_vec) = frame_layout.clobbered_callee_saves_by_class();
731
732 let mut insts = SmallVec::new();
733 let setup_frame = frame_layout.setup_area_size > 0;
734
735 // When a return_call within this function required more stack arguments than we have
736 // present, resize the incoming argument area of the frame to accommodate those arguments.
737 let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
738 if incoming_args_diff > 0 {
739 // Decrement SP to account for the additional space required by a tail call.
740 insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32)));
741 if flags.unwind_info() {
742 insts.push(Inst::Unwind {
743 inst: UnwindInst::StackAlloc {
744 size: incoming_args_diff,
745 },
746 });
747 }
748
749 // Move fp and lr down.
750 if setup_frame {
751 // Reload the frame pointer from the stack.
752 insts.push(Inst::ULoad64 {
753 rd: regs::writable_fp_reg(),
754 mem: AMode::SPOffset {
755 off: i64::from(incoming_args_diff),
756 },
757 flags: MemFlags::trusted(),
758 });
759
760 // Store the frame pointer and link register again at the new SP
761 insts.push(Inst::StoreP64 {
762 rt: fp_reg(),
763 rt2: link_reg(),
764 mem: PairAMode::SignedOffset {
765 reg: regs::stack_reg(),
766 simm7: SImm7Scaled::maybe_from_i64(0, types::I64).unwrap(),
767 },
768 flags: MemFlags::trusted(),
769 });
770
771 // Keep the frame pointer in sync
772 insts.push(Self::gen_move(
773 regs::writable_fp_reg(),
774 regs::stack_reg(),
775 types::I64,
776 ));
777 }
778 }
779
780 if flags.unwind_info() && setup_frame {
781 // The *unwind* frame (but not the actual frame) starts at the
782 // clobbers, just below the saved FP/LR pair.
783 insts.push(Inst::Unwind {
784 inst: UnwindInst::DefineNewFrame {
785 offset_downward_to_clobbers: frame_layout.clobber_size,
786 offset_upward_to_caller_sp: frame_layout.setup_area_size,
787 },
788 });
789 }
790
791 // We use pre-indexed addressing modes here, rather than the possibly
792 // more efficient "subtract sp once then used fixed offsets" scheme,
793 // because (i) we cannot necessarily guarantee that the offset of a
794 // clobber-save slot will be within a SImm7Scaled (+504-byte) offset
795 // range of the whole frame including other slots, it is more complex to
796 // conditionally generate a two-stage SP adjustment (clobbers then fixed
797 // frame) otherwise, and generally we just want to maintain simplicity
798 // here for maintainability. Because clobbers are at the top of the
799 // frame, just below FP, all that is necessary is to use the pre-indexed
800 // "push" `[sp, #-16]!` addressing mode.
801 //
802 // `frame_offset` tracks offset above start-of-clobbers for unwind-info
803 // purposes.
804 let mut clobber_offset = frame_layout.clobber_size;
805 let clobber_offset_change = 16;
806 let iter = clobbered_int.chunks_exact(2);
807
808 if let [rd] = iter.remainder() {
809 let rd: Reg = rd.to_reg().into();
810
811 debug_assert_eq!(rd.class(), RegClass::Int);
812 // str rd, [sp, #-16]!
813 insts.push(Inst::Store64 {
814 rd,
815 mem: AMode::SPPreIndexed {
816 simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
817 },
818 flags: MemFlags::trusted(),
819 });
820
821 if flags.unwind_info() {
822 clobber_offset -= clobber_offset_change as u32;
823 insts.push(Inst::Unwind {
824 inst: UnwindInst::SaveReg {
825 clobber_offset,
826 reg: rd.to_real_reg().unwrap(),
827 },
828 });
829 }
830 }
831
832 let mut iter = iter.rev();
833
834 while let Some([rt, rt2]) = iter.next() {
835 // .to_reg().into(): Writable<RealReg> --> RealReg --> Reg
836 let rt: Reg = rt.to_reg().into();
837 let rt2: Reg = rt2.to_reg().into();
838
839 debug_assert!(rt.class() == RegClass::Int);
840 debug_assert!(rt2.class() == RegClass::Int);
841
842 // stp rt, rt2, [sp, #-16]!
843 insts.push(Inst::StoreP64 {
844 rt,
845 rt2,
846 mem: PairAMode::SPPreIndexed {
847 simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(),
848 },
849 flags: MemFlags::trusted(),
850 });
851
852 if flags.unwind_info() {
853 clobber_offset -= clobber_offset_change as u32;
854 insts.push(Inst::Unwind {
855 inst: UnwindInst::SaveReg {
856 clobber_offset,
857 reg: rt.to_real_reg().unwrap(),
858 },
859 });
860 insts.push(Inst::Unwind {
861 inst: UnwindInst::SaveReg {
862 clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32,
863 reg: rt2.to_real_reg().unwrap(),
864 },
865 });
866 }
867 }
868
869 if call_conv == isa::CallConv::PreserveAll {
870 // Store full vector registers in PreserveAll convention.
871 for reg in clobbered_vec.iter().rev() {
872 let inst = Inst::FpuStore128 {
873 rd: reg.to_reg().into(),
874 mem: AMode::SPPreIndexed {
875 simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
876 },
877 flags: MemFlags::trusted(),
878 };
879 insts.push(inst);
880 // N.B.: no unwind info: we don't have a way to
881 // represent "full register" anyway.
882 }
883 } else {
884 let store_vec_reg_half = |rd| Inst::FpuStore64 {
885 rd,
886 mem: AMode::SPPreIndexed {
887 simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
888 },
889 flags: MemFlags::trusted(),
890 };
891 let iter = clobbered_vec.chunks_exact(2);
892
893 if let [rd] = iter.remainder() {
894 let rd: Reg = rd.to_reg().into();
895
896 debug_assert_eq!(rd.class(), RegClass::Float);
897 insts.push(store_vec_reg_half(rd));
898
899 if flags.unwind_info() {
900 clobber_offset -= clobber_offset_change as u32;
901 insts.push(Inst::Unwind {
902 inst: UnwindInst::SaveReg {
903 clobber_offset,
904 reg: rd.to_real_reg().unwrap(),
905 },
906 });
907 }
908 }
909
910 let store_vec_reg_half_pair = |rt, rt2| {
911 let clobber_offset_change = 16;
912
913 (
914 Inst::FpuStoreP64 {
915 rt,
916 rt2,
917 mem: PairAMode::SPPreIndexed {
918 simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64)
919 .unwrap(),
920 },
921 flags: MemFlags::trusted(),
922 },
923 clobber_offset_change as u32,
924 )
925 };
926 let mut iter = iter.rev();
927
928 while let Some([rt, rt2]) = iter.next() {
929 let rt: Reg = rt.to_reg().into();
930 let rt2: Reg = rt2.to_reg().into();
931
932 debug_assert_eq!(rt.class(), RegClass::Float);
933 debug_assert_eq!(rt2.class(), RegClass::Float);
934
935 let (inst, clobber_offset_change) = store_vec_reg_half_pair(rt, rt2);
936
937 insts.push(inst);
938
939 if flags.unwind_info() {
940 clobber_offset -= clobber_offset_change;
941 insts.push(Inst::Unwind {
942 inst: UnwindInst::SaveReg {
943 clobber_offset,
944 reg: rt.to_real_reg().unwrap(),
945 },
946 });
947 insts.push(Inst::Unwind {
948 inst: UnwindInst::SaveReg {
949 clobber_offset: clobber_offset + clobber_offset_change / 2,
950 reg: rt2.to_real_reg().unwrap(),
951 },
952 });
953 }
954 }
955 }
956
957 // Allocate the fixed frame below the clobbers if necessary.
958 let stack_size = frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
959 if stack_size > 0 {
960 insts.extend(Self::gen_sp_reg_adjust(-(stack_size as i32)));
961 if flags.unwind_info() {
962 insts.push(Inst::Unwind {
963 inst: UnwindInst::StackAlloc { size: stack_size },
964 });
965 }
966 }
967
968 insts
969 }
970
gen_clobber_restore( call_conv: isa::CallConv, _flags: &settings::Flags, frame_layout: &FrameLayout, ) -> SmallVec<[Inst; 16]>971 fn gen_clobber_restore(
972 call_conv: isa::CallConv,
973 _flags: &settings::Flags,
974 frame_layout: &FrameLayout,
975 ) -> SmallVec<[Inst; 16]> {
976 let mut insts = SmallVec::new();
977 let (clobbered_int, clobbered_vec) = frame_layout.clobbered_callee_saves_by_class();
978
979 // Free the fixed frame if necessary.
980 let stack_size = frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
981 if stack_size > 0 {
982 insts.extend(Self::gen_sp_reg_adjust(stack_size as i32));
983 }
984
985 if call_conv == isa::CallConv::PreserveAll {
986 for reg in clobbered_vec.iter() {
987 let inst = Inst::FpuLoad128 {
988 rd: reg.map(|r| r.into()),
989 mem: AMode::SPPostIndexed {
990 simm9: SImm9::maybe_from_i64(16).unwrap(),
991 },
992 flags: MemFlags::trusted(),
993 };
994 insts.push(inst);
995 // N.B.: no unwind info; we don't have a way to
996 // represent "full vector register saved" anyway.
997 }
998 } else {
999 let load_vec_reg_half = |rd| Inst::FpuLoad64 {
1000 rd,
1001 mem: AMode::SPPostIndexed {
1002 simm9: SImm9::maybe_from_i64(16).unwrap(),
1003 },
1004 flags: MemFlags::trusted(),
1005 };
1006 let load_vec_reg_half_pair = |rt, rt2| Inst::FpuLoadP64 {
1007 rt,
1008 rt2,
1009 mem: PairAMode::SPPostIndexed {
1010 simm7: SImm7Scaled::maybe_from_i64(16, F64).unwrap(),
1011 },
1012 flags: MemFlags::trusted(),
1013 };
1014
1015 let mut iter = clobbered_vec.chunks_exact(2);
1016
1017 while let Some([rt, rt2]) = iter.next() {
1018 let rt: Writable<Reg> = rt.map(|r| r.into());
1019 let rt2: Writable<Reg> = rt2.map(|r| r.into());
1020
1021 debug_assert_eq!(rt.to_reg().class(), RegClass::Float);
1022 debug_assert_eq!(rt2.to_reg().class(), RegClass::Float);
1023 insts.push(load_vec_reg_half_pair(rt, rt2));
1024 }
1025
1026 debug_assert!(iter.remainder().len() <= 1);
1027
1028 if let [rd] = iter.remainder() {
1029 let rd: Writable<Reg> = rd.map(|r| r.into());
1030
1031 debug_assert_eq!(rd.to_reg().class(), RegClass::Float);
1032 insts.push(load_vec_reg_half(rd));
1033 }
1034 }
1035
1036 let mut iter = clobbered_int.chunks_exact(2);
1037
1038 while let Some([rt, rt2]) = iter.next() {
1039 let rt: Writable<Reg> = rt.map(|r| r.into());
1040 let rt2: Writable<Reg> = rt2.map(|r| r.into());
1041
1042 debug_assert_eq!(rt.to_reg().class(), RegClass::Int);
1043 debug_assert_eq!(rt2.to_reg().class(), RegClass::Int);
1044 // ldp rt, rt2, [sp], #16
1045 insts.push(Inst::LoadP64 {
1046 rt,
1047 rt2,
1048 mem: PairAMode::SPPostIndexed {
1049 simm7: SImm7Scaled::maybe_from_i64(16, I64).unwrap(),
1050 },
1051 flags: MemFlags::trusted(),
1052 });
1053 }
1054
1055 debug_assert!(iter.remainder().len() <= 1);
1056
1057 if let [rd] = iter.remainder() {
1058 let rd: Writable<Reg> = rd.map(|r| r.into());
1059
1060 debug_assert_eq!(rd.to_reg().class(), RegClass::Int);
1061 // ldr rd, [sp], #16
1062 insts.push(Inst::ULoad64 {
1063 rd,
1064 mem: AMode::SPPostIndexed {
1065 simm9: SImm9::maybe_from_i64(16).unwrap(),
1066 },
1067 flags: MemFlags::trusted(),
1068 });
1069 }
1070
1071 insts
1072 }
1073
gen_memcpy<F: FnMut(Type) -> Writable<Reg>>( call_conv: isa::CallConv, dst: Reg, src: Reg, size: usize, mut alloc_tmp: F, ) -> SmallVec<[Self::I; 8]>1074 fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
1075 call_conv: isa::CallConv,
1076 dst: Reg,
1077 src: Reg,
1078 size: usize,
1079 mut alloc_tmp: F,
1080 ) -> SmallVec<[Self::I; 8]> {
1081 let mut insts = SmallVec::new();
1082 let arg0 = writable_xreg(0);
1083 let arg1 = writable_xreg(1);
1084 let arg2 = writable_xreg(2);
1085 let tmp = alloc_tmp(Self::word_type());
1086 insts.extend(Inst::load_constant(tmp, size as u64));
1087 insts.push(Inst::Call {
1088 info: Box::new(CallInfo {
1089 dest: ExternalName::LibCall(LibCall::Memcpy),
1090 uses: smallvec![
1091 CallArgPair {
1092 vreg: dst,
1093 preg: arg0.to_reg()
1094 },
1095 CallArgPair {
1096 vreg: src,
1097 preg: arg1.to_reg()
1098 },
1099 CallArgPair {
1100 vreg: tmp.to_reg(),
1101 preg: arg2.to_reg()
1102 }
1103 ],
1104 defs: smallvec![],
1105 clobbers: Self::get_regs_clobbered_by_call(call_conv, false),
1106 caller_conv: call_conv,
1107 callee_conv: call_conv,
1108 callee_pop_size: 0,
1109 try_call_info: None,
1110 patchable: false,
1111 }),
1112 });
1113 insts
1114 }
1115
get_number_of_spillslots_for_value( rc: RegClass, vector_size: u32, _isa_flags: &Self::F, ) -> u321116 fn get_number_of_spillslots_for_value(
1117 rc: RegClass,
1118 vector_size: u32,
1119 _isa_flags: &Self::F,
1120 ) -> u32 {
1121 assert_eq!(vector_size % 8, 0);
1122 // We allocate in terms of 8-byte slots.
1123 match rc {
1124 RegClass::Int => 1,
1125 RegClass::Float => vector_size / 8,
1126 RegClass::Vector => unreachable!(),
1127 }
1128 }
1129
get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv1130 fn get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
1131 if flags.enable_pinned_reg() {
1132 static MACHINE_ENV: MachineEnv = create_reg_env(true);
1133 &MACHINE_ENV
1134 } else {
1135 static MACHINE_ENV: MachineEnv = create_reg_env(false);
1136 &MACHINE_ENV
1137 }
1138 }
1139
get_regs_clobbered_by_call(call_conv: isa::CallConv, is_exception: bool) -> PRegSet1140 fn get_regs_clobbered_by_call(call_conv: isa::CallConv, is_exception: bool) -> PRegSet {
1141 match (call_conv, is_exception) {
1142 (isa::CallConv::Tail, true) => ALL_CLOBBERS,
1143 (isa::CallConv::Winch, true) => ALL_CLOBBERS,
1144 (isa::CallConv::Winch, false) => WINCH_CLOBBERS,
1145 // Note that "PreserveAll" actually preserves nothing at
1146 // the callsite if used for a `try_call`, because the
1147 // unwinder ABI for `try_call`s is still "no clobbered
1148 // register restores" for this ABI (so as to work with
1149 // Wasmtime).
1150 (isa::CallConv::PreserveAll, true) => ALL_CLOBBERS,
1151 (isa::CallConv::SystemV, _) => DEFAULT_AAPCS_CLOBBERS,
1152 (isa::CallConv::PreserveAll, _) => NO_CLOBBERS,
1153 (_, false) => DEFAULT_AAPCS_CLOBBERS,
1154 (_, true) => panic!("unimplemented clobbers for exn abi of {call_conv:?}"),
1155 }
1156 }
1157
get_ext_mode( call_conv: isa::CallConv, specified: ir::ArgumentExtension, ) -> ir::ArgumentExtension1158 fn get_ext_mode(
1159 call_conv: isa::CallConv,
1160 specified: ir::ArgumentExtension,
1161 ) -> ir::ArgumentExtension {
1162 if call_conv == isa::CallConv::AppleAarch64 {
1163 specified
1164 } else {
1165 ir::ArgumentExtension::None
1166 }
1167 }
1168
compute_frame_layout( call_conv: isa::CallConv, flags: &settings::Flags, sig: &Signature, regs: &[Writable<RealReg>], function_calls: FunctionCalls, incoming_args_size: u32, tail_args_size: u32, stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout1169 fn compute_frame_layout(
1170 call_conv: isa::CallConv,
1171 flags: &settings::Flags,
1172 sig: &Signature,
1173 regs: &[Writable<RealReg>],
1174 function_calls: FunctionCalls,
1175 incoming_args_size: u32,
1176 tail_args_size: u32,
1177 stackslots_size: u32,
1178 fixed_frame_storage_size: u32,
1179 outgoing_args_size: u32,
1180 ) -> FrameLayout {
1181 let mut regs: Vec<Writable<RealReg>> = regs
1182 .iter()
1183 .cloned()
1184 .filter(|r| {
1185 is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), sig, r.to_reg())
1186 })
1187 .collect();
1188
1189 // Sort registers for deterministic code output. We can do an unstable
1190 // sort because the registers will be unique (there are no dups).
1191 regs.sort_unstable();
1192
1193 // Compute clobber size.
1194 let clobber_size = compute_clobber_size(call_conv, ®s);
1195
1196 // Compute linkage frame size.
1197 let setup_area_size = if flags.preserve_frame_pointers()
1198 || function_calls != FunctionCalls::None
1199 // The function arguments that are passed on the stack are addressed
1200 // relative to the Frame Pointer.
1201 || incoming_args_size > 0
1202 || clobber_size > 0
1203 || fixed_frame_storage_size > 0
1204 {
1205 16 // FP, LR
1206 } else {
1207 0
1208 };
1209
1210 // Return FrameLayout structure.
1211 FrameLayout {
1212 word_bytes: 8,
1213 incoming_args_size,
1214 tail_args_size,
1215 setup_area_size,
1216 clobber_size,
1217 fixed_frame_storage_size,
1218 stackslots_size,
1219 outgoing_args_size,
1220 clobbered_callee_saves: regs,
1221 function_calls,
1222 }
1223 }
1224
retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg>1225 fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg> {
1226 // Use x9 as a temp if needed: clobbered, not a
1227 // retval.
1228 regs::writable_xreg(9)
1229 }
1230
exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg]1231 fn exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg] {
1232 const PAYLOAD_REGS: &'static [Reg] = &[regs::xreg(0), regs::xreg(1)];
1233 match call_conv {
1234 isa::CallConv::SystemV | isa::CallConv::Tail | isa::CallConv::PreserveAll => {
1235 PAYLOAD_REGS
1236 }
1237 _ => &[],
1238 }
1239 }
1240 }
1241
1242 impl AArch64MachineDeps {
gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32)1243 fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
1244 // When manually unrolling adjust the stack pointer and then write a zero
1245 // to the stack at that offset. This generates something like
1246 // `sub sp, sp, #1, lsl #12` followed by `stur wzr, [sp]`.
1247 //
1248 // We do this because valgrind expects us to never write beyond the stack
1249 // pointer and associated redzone.
1250 // See: https://github.com/bytecodealliance/wasmtime/issues/7454
1251 for _ in 0..probe_count {
1252 insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));
1253
1254 insts.push(Inst::gen_store(
1255 AMode::SPOffset { off: 0 },
1256 zero_reg(),
1257 I32,
1258 MemFlags::trusted(),
1259 ));
1260 }
1261
1262 // Restore the stack pointer to its original value
1263 insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));
1264 }
1265
gen_probestack_loop(insts: &mut SmallInstVec<Inst>, frame_size: u32, guard_size: u32)1266 fn gen_probestack_loop(insts: &mut SmallInstVec<Inst>, frame_size: u32, guard_size: u32) {
1267 // The non-unrolled version uses two temporary registers. The
1268 // `start` contains the current offset from sp and counts downwards
1269 // during the loop by increments of `guard_size`. The `end` is
1270 // the size of the frame and where we stop.
1271 //
1272 // Note that this emission is all post-regalloc so it should be ok
1273 // to use the temporary registers here as input/output as the loop
1274 // itself is not allowed to use the registers.
1275 let start = writable_spilltmp_reg();
1276 let end = writable_tmp2_reg();
1277 // `gen_inline_probestack` is called after regalloc2, so it's acceptable to reuse
1278 // `start` and `end` as temporaries in load_constant.
1279 insts.extend(Inst::load_constant(start, 0));
1280 insts.extend(Inst::load_constant(end, frame_size.into()));
1281 insts.push(Inst::StackProbeLoop {
1282 start,
1283 end: end.to_reg(),
1284 step: Imm12::maybe_from_u64(guard_size.into()).unwrap(),
1285 });
1286 }
1287
select_api_key( isa_flags: &aarch64_settings::Flags, call_conv: isa::CallConv, setup_frame: bool, ) -> Option<APIKey>1288 pub fn select_api_key(
1289 isa_flags: &aarch64_settings::Flags,
1290 call_conv: isa::CallConv,
1291 setup_frame: bool,
1292 ) -> Option<APIKey> {
1293 if isa_flags.sign_return_address() && (setup_frame || isa_flags.sign_return_address_all()) {
1294 // The `tail` calling convention uses a zero modifier rather than SP
1295 // because tail calls may happen with a different stack pointer than
1296 // when the function was entered, meaning that it won't be the same when
1297 // the return address is decrypted.
1298 Some(if isa_flags.sign_return_address_with_bkey() {
1299 match call_conv {
1300 isa::CallConv::Tail => APIKey::BZ,
1301 _ => APIKey::BSP,
1302 }
1303 } else {
1304 match call_conv {
1305 isa::CallConv::Tail => APIKey::AZ,
1306 _ => APIKey::ASP,
1307 }
1308 })
1309 } else {
1310 None
1311 }
1312 }
1313 }
1314
1315 /// Is the given register saved in the prologue if clobbered, i.e., is it a
1316 /// callee-save?
is_reg_saved_in_prologue( call_conv: isa::CallConv, enable_pinned_reg: bool, sig: &Signature, r: RealReg, ) -> bool1317 fn is_reg_saved_in_prologue(
1318 call_conv: isa::CallConv,
1319 enable_pinned_reg: bool,
1320 sig: &Signature,
1321 r: RealReg,
1322 ) -> bool {
1323 if call_conv == isa::CallConv::PreserveAll {
1324 return true;
1325 }
1326
1327 // FIXME: We need to inspect whether a function is returning Z or P regs too.
1328 let save_z_regs = sig
1329 .params
1330 .iter()
1331 .filter(|p| p.value_type.is_dynamic_vector())
1332 .count()
1333 != 0;
1334
1335 match r.class() {
1336 RegClass::Int => {
1337 // x19 - x28 inclusive are callee-saves.
1338 // However, x21 is the pinned reg if `enable_pinned_reg`
1339 // is set, and is implicitly globally-allocated, hence not
1340 // callee-saved in prologues.
1341 if enable_pinned_reg && r.hw_enc() == PINNED_REG {
1342 false
1343 } else {
1344 r.hw_enc() >= 19 && r.hw_enc() <= 28
1345 }
1346 }
1347 RegClass::Float => {
1348 // If a subroutine takes at least one argument in scalable vector registers
1349 // or scalable predicate registers, or if it is a function that returns
1350 // results in such registers, it must ensure that the entire contents of
1351 // z8-z23 are preserved across the call. In other cases it need only
1352 // preserve the low 64 bits of z8-z15.
1353 if save_z_regs {
1354 r.hw_enc() >= 8 && r.hw_enc() <= 23
1355 } else {
1356 // v8 - v15 inclusive are callee-saves.
1357 r.hw_enc() >= 8 && r.hw_enc() <= 15
1358 }
1359 }
1360 RegClass::Vector => unreachable!(),
1361 }
1362 }
1363
default_aapcs_clobbers() -> PRegSet1364 const fn default_aapcs_clobbers() -> PRegSet {
1365 PRegSet::empty()
1366 // x0 - x17 inclusive are caller-saves.
1367 .with(xreg_preg(0))
1368 .with(xreg_preg(1))
1369 .with(xreg_preg(2))
1370 .with(xreg_preg(3))
1371 .with(xreg_preg(4))
1372 .with(xreg_preg(5))
1373 .with(xreg_preg(6))
1374 .with(xreg_preg(7))
1375 .with(xreg_preg(8))
1376 .with(xreg_preg(9))
1377 .with(xreg_preg(10))
1378 .with(xreg_preg(11))
1379 .with(xreg_preg(12))
1380 .with(xreg_preg(13))
1381 .with(xreg_preg(14))
1382 .with(xreg_preg(15))
1383 .with(xreg_preg(16))
1384 .with(xreg_preg(17))
1385 // v0 - v7 inclusive and v16 - v31 inclusive are
1386 // caller-saves. The upper 64 bits of v8 - v15 inclusive are
1387 // also caller-saves. However, because we cannot currently
1388 // represent partial registers to regalloc2, we indicate here
1389 // that every vector register is caller-save. Because this
1390 // function is used at *callsites*, approximating in this
1391 // direction (save more than necessary) is conservative and
1392 // thus safe.
1393 //
1394 // Note that we exclude clobbers from a call instruction when
1395 // a call instruction's callee has the same ABI as the caller
1396 // (the current function body); this is safe (anything
1397 // clobbered by callee can be clobbered by caller as well) and
1398 // avoids unnecessary saves of v8-v15 in the prologue even
1399 // though we include them as defs here.
1400 .with(vreg_preg(0))
1401 .with(vreg_preg(1))
1402 .with(vreg_preg(2))
1403 .with(vreg_preg(3))
1404 .with(vreg_preg(4))
1405 .with(vreg_preg(5))
1406 .with(vreg_preg(6))
1407 .with(vreg_preg(7))
1408 .with(vreg_preg(8))
1409 .with(vreg_preg(9))
1410 .with(vreg_preg(10))
1411 .with(vreg_preg(11))
1412 .with(vreg_preg(12))
1413 .with(vreg_preg(13))
1414 .with(vreg_preg(14))
1415 .with(vreg_preg(15))
1416 .with(vreg_preg(16))
1417 .with(vreg_preg(17))
1418 .with(vreg_preg(18))
1419 .with(vreg_preg(19))
1420 .with(vreg_preg(20))
1421 .with(vreg_preg(21))
1422 .with(vreg_preg(22))
1423 .with(vreg_preg(23))
1424 .with(vreg_preg(24))
1425 .with(vreg_preg(25))
1426 .with(vreg_preg(26))
1427 .with(vreg_preg(27))
1428 .with(vreg_preg(28))
1429 .with(vreg_preg(29))
1430 .with(vreg_preg(30))
1431 .with(vreg_preg(31))
1432 }
1433
winch_clobbers() -> PRegSet1434 const fn winch_clobbers() -> PRegSet {
1435 PRegSet::empty()
1436 .with(xreg_preg(0))
1437 .with(xreg_preg(1))
1438 .with(xreg_preg(2))
1439 .with(xreg_preg(3))
1440 .with(xreg_preg(4))
1441 .with(xreg_preg(5))
1442 .with(xreg_preg(6))
1443 .with(xreg_preg(7))
1444 .with(xreg_preg(8))
1445 .with(xreg_preg(9))
1446 .with(xreg_preg(10))
1447 .with(xreg_preg(11))
1448 .with(xreg_preg(12))
1449 .with(xreg_preg(13))
1450 .with(xreg_preg(14))
1451 .with(xreg_preg(15))
1452 .with(xreg_preg(16))
1453 .with(xreg_preg(17))
1454 // x18 is used to carry platform state and is not allocatable by Winch.
1455 //
1456 // x19 - x27 are considered caller-saved in Winch's calling convention.
1457 .with(xreg_preg(19))
1458 .with(xreg_preg(20))
1459 .with(xreg_preg(21))
1460 .with(xreg_preg(22))
1461 .with(xreg_preg(23))
1462 .with(xreg_preg(24))
1463 .with(xreg_preg(25))
1464 .with(xreg_preg(26))
1465 .with(xreg_preg(27))
1466 // x28 is used as the shadow stack pointer and is considered
1467 // callee-saved.
1468 //
1469 // All vregs are considered caller-saved.
1470 .with(vreg_preg(0))
1471 .with(vreg_preg(1))
1472 .with(vreg_preg(2))
1473 .with(vreg_preg(3))
1474 .with(vreg_preg(4))
1475 .with(vreg_preg(5))
1476 .with(vreg_preg(6))
1477 .with(vreg_preg(7))
1478 .with(vreg_preg(8))
1479 .with(vreg_preg(9))
1480 .with(vreg_preg(10))
1481 .with(vreg_preg(11))
1482 .with(vreg_preg(12))
1483 .with(vreg_preg(13))
1484 .with(vreg_preg(14))
1485 .with(vreg_preg(15))
1486 .with(vreg_preg(16))
1487 .with(vreg_preg(17))
1488 .with(vreg_preg(18))
1489 .with(vreg_preg(19))
1490 .with(vreg_preg(20))
1491 .with(vreg_preg(21))
1492 .with(vreg_preg(22))
1493 .with(vreg_preg(23))
1494 .with(vreg_preg(24))
1495 .with(vreg_preg(25))
1496 .with(vreg_preg(26))
1497 .with(vreg_preg(27))
1498 .with(vreg_preg(28))
1499 .with(vreg_preg(29))
1500 .with(vreg_preg(30))
1501 .with(vreg_preg(31))
1502 }
1503
all_clobbers() -> PRegSet1504 const fn all_clobbers() -> PRegSet {
1505 PRegSet::empty()
1506 // integer registers: x0 to x28 inclusive. (x29 is FP, x30 is
1507 // LR, x31 is SP/ZR.)
1508 .with(xreg_preg(0))
1509 .with(xreg_preg(1))
1510 .with(xreg_preg(2))
1511 .with(xreg_preg(3))
1512 .with(xreg_preg(4))
1513 .with(xreg_preg(5))
1514 .with(xreg_preg(6))
1515 .with(xreg_preg(7))
1516 .with(xreg_preg(8))
1517 .with(xreg_preg(9))
1518 .with(xreg_preg(10))
1519 .with(xreg_preg(11))
1520 .with(xreg_preg(12))
1521 .with(xreg_preg(13))
1522 .with(xreg_preg(14))
1523 .with(xreg_preg(15))
1524 .with(xreg_preg(16))
1525 .with(xreg_preg(17))
1526 .with(xreg_preg(18))
1527 .with(xreg_preg(19))
1528 .with(xreg_preg(20))
1529 .with(xreg_preg(21))
1530 .with(xreg_preg(22))
1531 .with(xreg_preg(23))
1532 .with(xreg_preg(24))
1533 .with(xreg_preg(25))
1534 .with(xreg_preg(26))
1535 .with(xreg_preg(27))
1536 .with(xreg_preg(28))
1537 // vector registers: v0 to v31 inclusive.
1538 .with(vreg_preg(0))
1539 .with(vreg_preg(1))
1540 .with(vreg_preg(2))
1541 .with(vreg_preg(3))
1542 .with(vreg_preg(4))
1543 .with(vreg_preg(5))
1544 .with(vreg_preg(6))
1545 .with(vreg_preg(7))
1546 .with(vreg_preg(8))
1547 .with(vreg_preg(9))
1548 .with(vreg_preg(10))
1549 .with(vreg_preg(11))
1550 .with(vreg_preg(12))
1551 .with(vreg_preg(13))
1552 .with(vreg_preg(14))
1553 .with(vreg_preg(15))
1554 .with(vreg_preg(16))
1555 .with(vreg_preg(17))
1556 .with(vreg_preg(18))
1557 .with(vreg_preg(19))
1558 .with(vreg_preg(20))
1559 .with(vreg_preg(21))
1560 .with(vreg_preg(22))
1561 .with(vreg_preg(23))
1562 .with(vreg_preg(24))
1563 .with(vreg_preg(25))
1564 .with(vreg_preg(26))
1565 .with(vreg_preg(27))
1566 .with(vreg_preg(28))
1567 .with(vreg_preg(29))
1568 .with(vreg_preg(30))
1569 .with(vreg_preg(31))
1570 }
1571
1572 const DEFAULT_AAPCS_CLOBBERS: PRegSet = default_aapcs_clobbers();
1573 const WINCH_CLOBBERS: PRegSet = winch_clobbers();
1574 const ALL_CLOBBERS: PRegSet = all_clobbers();
1575 const NO_CLOBBERS: PRegSet = PRegSet::empty();
1576
create_reg_env(enable_pinned_reg: bool) -> MachineEnv1577 const fn create_reg_env(enable_pinned_reg: bool) -> MachineEnv {
1578 const fn preg(r: Reg) -> PReg {
1579 r.to_real_reg().unwrap().preg()
1580 }
1581
1582 let mut env = MachineEnv {
1583 preferred_regs_by_class: [
1584 PRegSet::empty()
1585 .with(preg(xreg(0)))
1586 .with(preg(xreg(1)))
1587 .with(preg(xreg(2)))
1588 .with(preg(xreg(3)))
1589 .with(preg(xreg(4)))
1590 .with(preg(xreg(5)))
1591 .with(preg(xreg(6)))
1592 .with(preg(xreg(7)))
1593 .with(preg(xreg(8)))
1594 .with(preg(xreg(9)))
1595 .with(preg(xreg(10)))
1596 .with(preg(xreg(11)))
1597 .with(preg(xreg(12)))
1598 .with(preg(xreg(13)))
1599 .with(preg(xreg(14)))
1600 .with(preg(xreg(15))),
1601 // x16 and x17 are spilltmp and tmp2 (see above).
1602 // x18 could be used by the platform to carry inter-procedural state;
1603 // conservatively assume so and make it not allocatable.
1604 // x19-28 are callee-saved and so not preferred.
1605 // x21 is the pinned register (if enabled) and not allocatable if so.
1606 // x29 is FP, x30 is LR, x31 is SP/ZR.
1607 PRegSet::empty()
1608 .with(preg(vreg(0)))
1609 .with(preg(vreg(1)))
1610 .with(preg(vreg(2)))
1611 .with(preg(vreg(3)))
1612 .with(preg(vreg(4)))
1613 .with(preg(vreg(5)))
1614 .with(preg(vreg(6)))
1615 .with(preg(vreg(7)))
1616 // v8-15 are callee-saved and so not preferred.
1617 .with(preg(vreg(16)))
1618 .with(preg(vreg(17)))
1619 .with(preg(vreg(18)))
1620 .with(preg(vreg(19)))
1621 .with(preg(vreg(20)))
1622 .with(preg(vreg(21)))
1623 .with(preg(vreg(22)))
1624 .with(preg(vreg(23)))
1625 .with(preg(vreg(24)))
1626 .with(preg(vreg(25)))
1627 .with(preg(vreg(26)))
1628 .with(preg(vreg(27)))
1629 .with(preg(vreg(28)))
1630 .with(preg(vreg(29)))
1631 .with(preg(vreg(30)))
1632 .with(preg(vreg(31))),
1633 // Vector Regclass is unused
1634 PRegSet::empty(),
1635 ],
1636 non_preferred_regs_by_class: [
1637 PRegSet::empty()
1638 .with(preg(xreg(19)))
1639 .with(preg(xreg(20)))
1640 // x21 is pinned reg if enabled; we add to this list below if not.
1641 .with(preg(xreg(22)))
1642 .with(preg(xreg(23)))
1643 .with(preg(xreg(24)))
1644 .with(preg(xreg(25)))
1645 .with(preg(xreg(26)))
1646 .with(preg(xreg(27)))
1647 .with(preg(xreg(28))),
1648 PRegSet::empty()
1649 .with(preg(vreg(8)))
1650 .with(preg(vreg(9)))
1651 .with(preg(vreg(10)))
1652 .with(preg(vreg(11)))
1653 .with(preg(vreg(12)))
1654 .with(preg(vreg(13)))
1655 .with(preg(vreg(14)))
1656 .with(preg(vreg(15))),
1657 // Vector Regclass is unused
1658 PRegSet::empty(),
1659 ],
1660 fixed_stack_slots: vec![],
1661 scratch_by_class: [None, None, None],
1662 };
1663
1664 if !enable_pinned_reg {
1665 debug_assert!(PINNED_REG == 21);
1666 env.non_preferred_regs_by_class[0].add(preg(xreg(PINNED_REG)));
1667 }
1668
1669 env
1670 }
1671