1 //! Assembler library implementation for Aarch64.
2 use super::{address::Address, regs};
3 use crate::CallingConvention;
4 use crate::aarch64::regs::zero;
5 use crate::masm::{
6     DivKind, Extend, ExtendKind, FloatCmpKind, Imm, IntCmpKind, RemKind, RoundingMode, ShiftKind,
7     Signed, TRUSTED_FLAGS, TruncKind,
8 };
9 use crate::{
10     constant_pool::ConstantPool,
11     masm::OperandSize,
12     reg::{Reg, WritableReg, writable},
13 };
14 
15 use cranelift_codegen::PatchRegion;
16 use cranelift_codegen::isa::aarch64::inst::emit::{enc_arith_rrr, enc_move_wide, enc_movk};
17 use cranelift_codegen::isa::aarch64::inst::{
18     ASIMDFPModImm, FpuToIntOp, MoveWideConst, NZCV, UImm5,
19 };
20 use cranelift_codegen::{
21     Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState, MachLabel,
22     Writable,
23     ir::{ExternalName, MemFlags, SourceLoc, TrapCode, UserExternalNameRef},
24     isa::aarch64::inst::{
25         self, ALUOp, ALUOp3, AMode, BitOp, BranchTarget, Cond, CondBrKind, ExtendOp,
26         FPULeftShiftImm, FPUOp1, FPUOp2,
27         FPUOpRI::{self, UShr32, UShr64},
28         FPUOpRIMod, FPURightShiftImm, FpuRoundMode, Imm12, ImmLogic, ImmShift, Inst, IntToFpuOp,
29         PairAMode, ScalarSize, VecLanesOp, VecMisc2, VectorSize,
30         emit::{EmitInfo, EmitState},
31     },
32     settings,
33 };
34 use regalloc2::RegClass;
35 use wasmtime_core::math::{f32_cvt_to_int_bounds, f64_cvt_to_int_bounds};
36 
37 impl From<OperandSize> for inst::OperandSize {
38     fn from(size: OperandSize) -> Self {
39         match size {
40             OperandSize::S32 => Self::Size32,
41             OperandSize::S64 => Self::Size64,
42             s => panic!("Invalid operand size {s:?}"),
43         }
44     }
45 }
46 
47 impl From<IntCmpKind> for Cond {
48     fn from(value: IntCmpKind) -> Self {
49         match value {
50             IntCmpKind::Eq => Cond::Eq,
51             IntCmpKind::Ne => Cond::Ne,
52             IntCmpKind::LtS => Cond::Lt,
53             IntCmpKind::LtU => Cond::Lo,
54             IntCmpKind::GtS => Cond::Gt,
55             IntCmpKind::GtU => Cond::Hi,
56             IntCmpKind::LeS => Cond::Le,
57             IntCmpKind::LeU => Cond::Ls,
58             IntCmpKind::GeS => Cond::Ge,
59             IntCmpKind::GeU => Cond::Hs,
60         }
61     }
62 }
63 
64 impl From<FloatCmpKind> for Cond {
65     fn from(value: FloatCmpKind) -> Self {
66         match value {
67             FloatCmpKind::Eq => Cond::Eq,
68             FloatCmpKind::Ne => Cond::Ne,
69             FloatCmpKind::Lt => Cond::Mi,
70             FloatCmpKind::Gt => Cond::Gt,
71             FloatCmpKind::Le => Cond::Ls,
72             FloatCmpKind::Ge => Cond::Ge,
73         }
74     }
75 }
76 
77 impl From<OperandSize> for ScalarSize {
78     fn from(size: OperandSize) -> ScalarSize {
79         match size {
80             OperandSize::S8 => ScalarSize::Size8,
81             OperandSize::S16 => ScalarSize::Size16,
82             OperandSize::S32 => ScalarSize::Size32,
83             OperandSize::S64 => ScalarSize::Size64,
84             OperandSize::S128 => ScalarSize::Size128,
85         }
86     }
87 }
88 
89 impl From<ShiftKind> for ALUOp {
90     fn from(kind: ShiftKind) -> Self {
91         match kind {
92             ShiftKind::Shl => ALUOp::Lsl,
93             ShiftKind::ShrS => ALUOp::Asr,
94             ShiftKind::ShrU => ALUOp::Lsr,
95             ShiftKind::Rotr => ALUOp::Extr,
96             ShiftKind::Rotl => ALUOp::Extr,
97         }
98     }
99 }
100 
101 /// Low level assembler implementation for Aarch64.
102 pub(crate) struct Assembler {
103     /// The machine instruction buffer.
104     buffer: MachBuffer<Inst>,
105     /// Constant emission information.
106     emit_info: EmitInfo,
107     /// Emission state.
108     emit_state: EmitState,
109     /// Constant pool.
110     pool: ConstantPool,
111 }
112 
113 impl Assembler {
114     /// Create a new Aarch64 assembler.
115     pub fn new(shared_flags: settings::Flags) -> Self {
116         Self {
117             buffer: MachBuffer::<Inst>::new(),
118             emit_state: Default::default(),
119             emit_info: EmitInfo::new(shared_flags),
120             pool: ConstantPool::new(),
121         }
122     }
123 }
124 
125 impl Assembler {
126     /// Return the emitted code.
127     pub fn finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final> {
128         let stencil = self
129             .buffer
130             .finish(&self.pool.constants(), self.emit_state.ctrl_plane_mut());
131         stencil.apply_base_srcloc(loc.unwrap_or_default())
132     }
133 
134     fn emit(&mut self, inst: Inst) {
135         self.emit_with_island(inst, Inst::worst_case_size());
136     }
137 
138     fn emit_with_island(&mut self, inst: Inst, needed_space: u32) {
139         if self.buffer.island_needed(needed_space) {
140             let label = self.buffer.get_label();
141             let jmp = Inst::Jump {
142                 dest: BranchTarget::Label(label),
143             };
144             jmp.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);
145             self.buffer
146                 .emit_island(needed_space, self.emit_state.ctrl_plane_mut());
147             self.buffer
148                 .bind_label(label, self.emit_state.ctrl_plane_mut());
149         }
150         inst.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);
151     }
152 
153     /// Adds a constant to the constant pool, returning its address.
154     pub fn add_constant(&mut self, constant: &[u8]) -> Address {
155         let handle = self.pool.register(constant, &mut self.buffer);
156         Address::constant(handle)
157     }
158 
159     /// Store a pair of registers.
160     pub fn stp(&mut self, xt1: Reg, xt2: Reg, addr: Address) {
161         let mem: PairAMode = addr.try_into().unwrap();
162         self.emit(Inst::StoreP64 {
163             rt: xt1.into(),
164             rt2: xt2.into(),
165             mem,
166             flags: MemFlags::trusted(),
167         });
168     }
169 
170     /// Store a register.
171     pub fn str(&mut self, reg: Reg, addr: Address, size: OperandSize, flags: MemFlags) {
172         let mem: AMode = addr.try_into().unwrap();
173 
174         use OperandSize::*;
175         let inst = match (reg.is_int(), size) {
176             (_, S8) => Inst::Store8 {
177                 rd: reg.into(),
178                 mem,
179                 flags,
180             },
181             (_, S16) => Inst::Store16 {
182                 rd: reg.into(),
183                 mem,
184                 flags,
185             },
186             (true, S32) => Inst::Store32 {
187                 rd: reg.into(),
188                 mem,
189                 flags,
190             },
191             (false, S32) => Inst::FpuStore32 {
192                 rd: reg.into(),
193                 mem,
194                 flags,
195             },
196             (true, S64) => Inst::Store64 {
197                 rd: reg.into(),
198                 mem,
199                 flags,
200             },
201             (false, S64) => Inst::FpuStore64 {
202                 rd: reg.into(),
203                 mem,
204                 flags,
205             },
206             (_, S128) => Inst::FpuStore128 {
207                 rd: reg.into(),
208                 mem,
209                 flags,
210             },
211         };
212 
213         self.emit(inst);
214     }
215 
216     /// Load a signed register.
217     pub fn sload(&mut self, addr: Address, rd: WritableReg, size: OperandSize, flags: MemFlags) {
218         self.ldr(addr, rd, size, true, flags);
219     }
220 
221     /// Load an unsigned register.
222     pub fn uload(&mut self, addr: Address, rd: WritableReg, size: OperandSize, flags: MemFlags) {
223         self.ldr(addr, rd, size, false, flags);
224     }
225 
226     /// Load address into a register.
227     fn ldr(
228         &mut self,
229         addr: Address,
230         rd: WritableReg,
231         size: OperandSize,
232         signed: bool,
233         flags: MemFlags,
234     ) {
235         use OperandSize::*;
236         let writable_reg = rd.map(Into::into);
237         let mem: AMode = addr.try_into().unwrap();
238 
239         let inst = match (rd.to_reg().is_int(), signed, size) {
240             (_, false, S8) => Inst::ULoad8 {
241                 rd: writable_reg,
242                 mem,
243                 flags,
244             },
245             (_, true, S8) => Inst::SLoad8 {
246                 rd: writable_reg,
247                 mem,
248                 flags,
249             },
250             (_, false, S16) => Inst::ULoad16 {
251                 rd: writable_reg,
252                 mem,
253                 flags,
254             },
255             (_, true, S16) => Inst::SLoad16 {
256                 rd: writable_reg,
257                 mem,
258                 flags,
259             },
260             (true, false, S32) => Inst::ULoad32 {
261                 rd: writable_reg,
262                 mem,
263                 flags,
264             },
265             (false, _, S32) => Inst::FpuLoad32 {
266                 rd: writable_reg,
267                 mem,
268                 flags,
269             },
270             (true, true, S32) => Inst::SLoad32 {
271                 rd: writable_reg,
272                 mem,
273                 flags,
274             },
275             (true, _, S64) => Inst::ULoad64 {
276                 rd: writable_reg,
277                 mem,
278                 flags,
279             },
280             (false, _, S64) => Inst::FpuLoad64 {
281                 rd: writable_reg,
282                 mem,
283                 flags,
284             },
285             (_, _, S128) => Inst::FpuLoad128 {
286                 rd: writable_reg,
287                 mem,
288                 flags,
289             },
290         };
291 
292         self.emit(inst);
293     }
294 
295     /// Load a pair of registers.
296     pub fn ldp(&mut self, xt1: Reg, xt2: Reg, addr: Address) {
297         let writable_xt1 = Writable::from_reg(xt1.into());
298         let writable_xt2 = Writable::from_reg(xt2.into());
299         let mem = addr.try_into().unwrap();
300 
301         self.emit(Inst::LoadP64 {
302             rt: writable_xt1,
303             rt2: writable_xt2,
304             mem,
305             flags: MemFlags::trusted(),
306         });
307     }
308 
309     /// Emit a series of instructions to move an arbitrary 64-bit immediate
310     /// into the destination register.
311     /// The emitted instructions will depend on the destination register class.
312     pub fn mov_ir(&mut self, rd: WritableReg, imm: Imm, size: OperandSize) {
313         match rd.to_reg().class() {
314             RegClass::Int => {
315                 Inst::load_constant(rd.map(Into::into), imm.unwrap_as_u64())
316                     .into_iter()
317                     .for_each(|i| self.emit(i));
318             }
319             RegClass::Float => {
320                 match ASIMDFPModImm::maybe_from_u64(imm.unwrap_as_u64(), size.into()) {
321                     Some(imm) => {
322                         self.emit(Inst::FpuMoveFPImm {
323                             rd: rd.map(Into::into),
324                             imm,
325                             size: size.into(),
326                         });
327                     }
328                     _ => {
329                         let addr = self.add_constant(&imm.to_bytes());
330                         self.uload(addr, rd, size, TRUSTED_FLAGS);
331                     }
332                 }
333             }
334             _ => unreachable!(),
335         }
336     }
337 
338     /// Register to register move.
339     pub fn mov_rr(&mut self, rm: Reg, rd: WritableReg, size: OperandSize) {
340         let writable_rd = rd.map(Into::into);
341         self.emit(Inst::Mov {
342             size: size.into(),
343             rd: writable_rd,
344             rm: rm.into(),
345         });
346     }
347 
348     /// Floating point register to register move.
349     pub fn fmov_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
350         let writable = rd.map(Into::into);
351         let inst = match size {
352             OperandSize::S32 => Inst::FpuMove32 {
353                 rd: writable,
354                 rn: rn.into(),
355             },
356             OperandSize::S64 => Inst::FpuMove64 {
357                 rd: writable,
358                 rn: rn.into(),
359             },
360             _ => unreachable!(),
361         };
362 
363         self.emit(inst);
364     }
365 
366     pub fn mov_to_fpu(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
367         let writable_rd = rd.map(Into::into);
368         self.emit(Inst::MovToFpu {
369             size: size.into(),
370             rd: writable_rd,
371             rn: rn.into(),
372         });
373     }
374 
375     pub fn mov_from_vec(&mut self, rn: Reg, rd: WritableReg, idx: u8, size: OperandSize) {
376         self.emit(Inst::MovFromVec {
377             rd: rd.map(Into::into),
378             rn: rn.into(),
379             idx,
380             size: size.into(),
381         });
382     }
383 
384     /// Add immediate and register.
385     pub fn add_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {
386         self.alu_rri(ALUOp::Add, imm, rn, rd, size);
387     }
388 
389     /// Add immediate and register, setting overflow flags.
390     pub fn adds_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {
391         self.alu_rri(ALUOp::AddS, imm, rn, rd, size);
392     }
393 
394     /// Add with three registers.
395     pub fn add_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
396         self.alu_rrr_extend(ALUOp::Add, rm, rn, rd, size);
397     }
398 
399     /// Add with three registers, setting overflow flags.
400     pub fn adds_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
401         self.alu_rrr_extend(ALUOp::AddS, rm, rn, rd, size);
402     }
403 
404     /// Add across Vector.
405     pub fn addv(&mut self, rn: Reg, rd: WritableReg, size: VectorSize) {
406         self.emit(Inst::VecLanes {
407             op: VecLanesOp::Addv,
408             rd: rd.map(Into::into),
409             rn: rn.into(),
410             size,
411         });
412     }
413 
414     /// Subtract immediate and register.
415     pub fn sub_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {
416         self.alu_rri(ALUOp::Sub, imm, rn, rd, size);
417     }
418 
419     /// Subtract immediate and register, setting flags.
420     pub fn subs_ir(&mut self, imm: Imm12, rn: Reg, size: OperandSize) {
421         self.alu_rri(ALUOp::SubS, imm, rn, writable!(regs::zero()), size);
422     }
423 
424     /// Subtract with three registers.
425     pub fn sub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
426         self.alu_rrr_extend(ALUOp::Sub, rm, rn, rd, size);
427     }
428 
429     /// Subtract with three registers, setting flags.
430     pub fn subs_rrr(&mut self, rm: Reg, rn: Reg, size: OperandSize) {
431         self.alu_rrr_extend(ALUOp::SubS, rm, rn, writable!(regs::zero()), size);
432     }
433 
434     /// Multiply with three registers.
435     pub fn mul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
436         self.alu_rrrr(ALUOp3::MAdd, rm, rn, rd, regs::zero(), size);
437     }
438 
439     /// Signed/unsigned division with three registers.
440     pub fn div_rrr(
441         &mut self,
442         divisor: Reg,
443         dividend: Reg,
444         dest: Writable<Reg>,
445         kind: DivKind,
446         size: OperandSize,
447     ) {
448         // Check for division by 0.
449         self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO, size);
450 
451         // check for overflow
452         if kind == DivKind::Signed {
453             // Check for divisor overflow.
454             self.alu_rri(
455                 ALUOp::AddS,
456                 Imm12::maybe_from_u64(1).expect("1 to fit in 12 bits"),
457                 divisor,
458                 writable!(zero()),
459                 size,
460             );
461 
462             // Check if the dividend is 1.
463             self.emit(Inst::CCmpImm {
464                 size: size.into(),
465                 rn: dividend.into(),
466                 imm: UImm5::maybe_from_u8(1).expect("1 fits in 5 bits"),
467                 nzcv: NZCV::new(false, false, false, false),
468                 cond: Cond::Eq,
469             });
470 
471             // Finally, trap if the previous operation overflowed.
472             self.trapif(Cond::Vs, TrapCode::INTEGER_OVERFLOW);
473         }
474 
475         // `cranelift-codegen` doesn't support emitting sdiv for anything but I64,
476         // we therefore sign-extend the operand.
477         // see: https://github.com/bytecodealliance/wasmtime/issues/9766
478         let size = if size == OperandSize::S32 && kind == DivKind::Signed {
479             self.extend(
480                 divisor,
481                 writable!(divisor),
482                 ExtendKind::Signed(Extend::<Signed>::I64Extend32),
483             );
484             self.extend(
485                 dividend,
486                 writable!(dividend),
487                 ExtendKind::Signed(Extend::<Signed>::I64Extend32),
488             );
489             OperandSize::S64
490         } else {
491             size
492         };
493 
494         let op = match kind {
495             DivKind::Signed => ALUOp::SDiv,
496             DivKind::Unsigned => ALUOp::UDiv,
497         };
498 
499         self.alu_rrr(op, divisor, dividend, dest.map(Into::into), size);
500     }
501 
502     /// Signed/unsigned remainder operation with three registers.
503     pub fn rem_rrr(
504         &mut self,
505         divisor: Reg,
506         dividend: Reg,
507         dest: Writable<Reg>,
508         scratch: WritableReg,
509         kind: RemKind,
510         size: OperandSize,
511     ) {
512         // Check for division by 0
513         self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO, size);
514 
515         // `cranelift-codegen` doesn't support emitting sdiv for anything but I64,
516         // we therefore sign-extend the operand.
517         // see: https://github.com/bytecodealliance/wasmtime/issues/9766
518         let size = if size == OperandSize::S32 && kind.is_signed() {
519             self.extend(
520                 divisor,
521                 writable!(divisor),
522                 ExtendKind::Signed(Extend::<Signed>::I64Extend32),
523             );
524             self.extend(
525                 dividend,
526                 writable!(dividend),
527                 ExtendKind::Signed(Extend::<Signed>::I64Extend32),
528             );
529             OperandSize::S64
530         } else {
531             size
532         };
533 
534         let op = match kind {
535             RemKind::Signed => ALUOp::SDiv,
536             RemKind::Unsigned => ALUOp::UDiv,
537         };
538 
539         self.alu_rrr(op, divisor, dividend, scratch, size);
540 
541         self.alu_rrrr(
542             ALUOp3::MSub,
543             scratch.to_reg(),
544             divisor,
545             dest.map(Into::into),
546             dividend,
547             size,
548         );
549     }
550 
551     /// And with three registers.
552     pub fn and_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
553         self.alu_rrr(ALUOp::And, rm, rn, rd, size);
554     }
555 
556     /// And immediate and register.
557     pub fn and_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {
558         self.alu_rri_logic(ALUOp::And, imm, rn, rd, size);
559     }
560 
561     /// Or with three registers.
562     pub fn or_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
563         self.alu_rrr(ALUOp::Orr, rm, rn, rd, size);
564     }
565 
566     /// Or immediate and register.
567     pub fn or_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {
568         self.alu_rri_logic(ALUOp::Orr, imm, rn, rd, size);
569     }
570 
571     /// Xor with three registers.
572     pub fn xor_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
573         self.alu_rrr(ALUOp::Eor, rm, rn, rd, size);
574     }
575 
576     /// Xor immediate and register.
577     pub fn xor_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {
578         self.alu_rri_logic(ALUOp::Eor, imm, rn, rd, size);
579     }
580 
581     /// Shift with three registers.
582     pub fn shift_rrr(
583         &mut self,
584         rm: Reg,
585         rn: Reg,
586         rd: WritableReg,
587         kind: ShiftKind,
588         size: OperandSize,
589     ) {
590         let shift_op: ALUOp = kind.into();
591         // In the case of rotate left, we negate the register containing the
592         // shift value.
593         if kind == ShiftKind::Rotl {
594             self.alu_rrr(ALUOp::Sub, rm, regs::zero(), writable!(rm), size);
595             self.alu_rrr(shift_op, rm, rn, rd, size);
596         } else {
597             self.alu_rrr(shift_op, rm, rn, rd, size);
598         }
599     }
600 
601     /// Shift immediate and register.
602     pub fn shift_ir(
603         &mut self,
604         imm: ImmShift,
605         rn: Reg,
606         rd: WritableReg,
607         kind: ShiftKind,
608         size: OperandSize,
609     ) {
610         let shift_op: ALUOp = kind.into();
611         // In the case of rotate left, we emit rotate right with type_size -
612         // value.
613         if kind == ShiftKind::Rotl {
614             let value_size = size.num_bits();
615             let mut imm_val = value_size.wrapping_sub(imm.value());
616             imm_val &= value_size - 1;
617             let negated_imm = ImmShift::maybe_from_u64(imm_val as u64).unwrap();
618 
619             self.alu_rri_shift(shift_op, negated_imm, rn, rd, size);
620         } else {
621             self.alu_rri_shift(shift_op, imm, rn, rd, size);
622         }
623     }
624 
625     /// Count Leading Zeros.
626     pub fn clz(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
627         self.bit_rr(BitOp::Clz, rn, rd, size);
628     }
629 
630     /// Reverse Bits reverses the bit order in a register.
631     pub fn rbit(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
632         self.bit_rr(BitOp::RBit, rn, rd, size);
633     }
634 
635     /// Float add with three registers.
636     pub fn fadd_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
637         self.fpu_rrr(FPUOp2::Add, rm, rn, rd, size);
638     }
639 
640     /// Float sub with three registers.
641     pub fn fsub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
642         self.fpu_rrr(FPUOp2::Sub, rm, rn, rd, size);
643     }
644 
645     /// Float multiply with three registers.
646     pub fn fmul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
647         self.fpu_rrr(FPUOp2::Mul, rm, rn, rd, size);
648     }
649 
650     /// Float division with three registers.
651     pub fn fdiv_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
652         self.fpu_rrr(FPUOp2::Div, rm, rn, rd, size);
653     }
654 
655     /// Float max with three registers.
656     pub fn fmax_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
657         self.fpu_rrr(FPUOp2::Max, rm, rn, rd, size);
658     }
659 
660     /// Float min with three registers.
661     pub fn fmin_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
662         self.fpu_rrr(FPUOp2::Min, rm, rn, rd, size);
663     }
664 
665     /// Float neg with two registers.
666     pub fn fneg_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
667         self.fpu_rr(FPUOp1::Neg, rn, rd, size);
668     }
669 
670     /// Float abs with two registers.
671     pub fn fabs_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
672         self.fpu_rr(FPUOp1::Abs, rn, rd, size);
673     }
674 
675     /// Float sqrt with two registers.
676     pub fn fsqrt_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
677         self.fpu_rr(FPUOp1::Sqrt, rn, rd, size);
678     }
679 
680     /// Float round (ceil, trunc, floor) with two registers.
681     pub fn fround_rr(&mut self, rn: Reg, rd: WritableReg, mode: RoundingMode, size: OperandSize) {
682         let fpu_mode = match (mode, size) {
683             (RoundingMode::Nearest, OperandSize::S32) => FpuRoundMode::Nearest32,
684             (RoundingMode::Up, OperandSize::S32) => FpuRoundMode::Plus32,
685             (RoundingMode::Down, OperandSize::S32) => FpuRoundMode::Minus32,
686             (RoundingMode::Zero, OperandSize::S32) => FpuRoundMode::Zero32,
687             (RoundingMode::Nearest, OperandSize::S64) => FpuRoundMode::Nearest64,
688             (RoundingMode::Up, OperandSize::S64) => FpuRoundMode::Plus64,
689             (RoundingMode::Down, OperandSize::S64) => FpuRoundMode::Minus64,
690             (RoundingMode::Zero, OperandSize::S64) => FpuRoundMode::Zero64,
691             (m, o) => panic!("Invalid rounding mode or operand size {m:?}, {o:?}"),
692         };
693         self.fpu_round(fpu_mode, rn, rd)
694     }
695 
696     /// Float unsigned shift right with two registers and an immediate.
697     pub fn fushr_rri(&mut self, rn: Reg, rd: WritableReg, amount: u8, size: OperandSize) {
698         let imm = FPURightShiftImm {
699             amount,
700             lane_size_in_bits: size.num_bits(),
701         };
702         let ushr = match size {
703             OperandSize::S32 => UShr32(imm),
704             OperandSize::S64 => UShr64(imm),
705             _ => unreachable!(),
706         };
707         self.fpu_rri(ushr, rn, rd)
708     }
709 
710     /// Float unsigned shift left and insert with three registers
711     /// and an immediate.
712     pub fn fsli_rri_mod(
713         &mut self,
714         ri: Reg,
715         rn: Reg,
716         rd: WritableReg,
717         amount: u8,
718         size: OperandSize,
719     ) {
720         let imm = FPULeftShiftImm {
721             amount,
722             lane_size_in_bits: size.num_bits(),
723         };
724         let sli = match size {
725             OperandSize::S32 => FPUOpRIMod::Sli32(imm),
726             OperandSize::S64 => FPUOpRIMod::Sli64(imm),
727             _ => unreachable!(),
728         };
729         self.fpu_rri_mod(sli, ri, rn, rd)
730     }
731 
732     /// Float compare.
733     pub fn fcmp(&mut self, rn: Reg, rm: Reg, size: OperandSize) {
734         self.emit(Inst::FpuCmp {
735             size: size.into(),
736             rn: rn.into(),
737             rm: rm.into(),
738         })
739     }
740 
741     /// Convert an signed integer to a float.
742     pub fn cvt_sint_to_float(
743         &mut self,
744         rn: Reg,
745         rd: WritableReg,
746         src_size: OperandSize,
747         dst_size: OperandSize,
748     ) {
749         let op = match (src_size, dst_size) {
750             (OperandSize::S32, OperandSize::S32) => IntToFpuOp::I32ToF32,
751             (OperandSize::S64, OperandSize::S32) => IntToFpuOp::I64ToF32,
752             (OperandSize::S32, OperandSize::S64) => IntToFpuOp::I32ToF64,
753             (OperandSize::S64, OperandSize::S64) => IntToFpuOp::I64ToF64,
754             _ => unreachable!(),
755         };
756 
757         self.emit(Inst::IntToFpu {
758             op,
759             rd: rd.map(Into::into),
760             rn: rn.into(),
761         });
762     }
763 
764     /// Convert an unsigned integer to a float.
765     pub fn cvt_uint_to_float(
766         &mut self,
767         rn: Reg,
768         rd: WritableReg,
769         src_size: OperandSize,
770         dst_size: OperandSize,
771     ) {
772         let op = match (src_size, dst_size) {
773             (OperandSize::S32, OperandSize::S32) => IntToFpuOp::U32ToF32,
774             (OperandSize::S64, OperandSize::S32) => IntToFpuOp::U64ToF32,
775             (OperandSize::S32, OperandSize::S64) => IntToFpuOp::U32ToF64,
776             (OperandSize::S64, OperandSize::S64) => IntToFpuOp::U64ToF64,
777             _ => unreachable!(),
778         };
779 
780         self.emit(Inst::IntToFpu {
781             op,
782             rd: rd.map(Into::into),
783             rn: rn.into(),
784         });
785     }
786 
787     /// Change precision of float.
788     pub fn cvt_float_to_float(
789         &mut self,
790         rn: Reg,
791         rd: WritableReg,
792         src_size: OperandSize,
793         dst_size: OperandSize,
794     ) {
795         let (fpu_op, size) = match (src_size, dst_size) {
796             (OperandSize::S32, OperandSize::S64) => (FPUOp1::Cvt32To64, ScalarSize::Size32),
797             (OperandSize::S64, OperandSize::S32) => (FPUOp1::Cvt64To32, ScalarSize::Size64),
798             _ => unimplemented!(),
799         };
800         self.emit(Inst::FpuRR {
801             fpu_op,
802             size,
803             rd: rd.map(Into::into),
804             rn: rn.into(),
805         });
806     }
807 
808     /// Return instruction.
809     pub fn ret(&mut self) {
810         self.emit(Inst::Ret {});
811     }
812 
813     /// An unconditional branch.
814     pub fn jmp(&mut self, target: MachLabel) {
815         self.emit(Inst::Jump {
816             dest: BranchTarget::Label(target),
817         });
818     }
819 
820     /// A conditional branch.
821     pub fn jmp_if(&mut self, kind: Cond, taken: MachLabel) {
822         self.emit(Inst::CondBr {
823             taken: BranchTarget::Label(taken),
824             not_taken: BranchTarget::ResolvedOffset(4),
825             kind: CondBrKind::Cond(kind),
826         });
827     }
828 
829     /// Emits a jump table sequence.
830     pub fn jmp_table(
831         &mut self,
832         targets: &[MachLabel],
833         default: MachLabel,
834         index: Reg,
835         tmp1: Reg,
836         tmp2: Reg,
837     ) {
838         self.emit_with_island(
839             Inst::JTSequence {
840                 default,
841                 targets: Box::new(targets.to_vec()),
842                 ridx: index.into(),
843                 rtmp1: Writable::from_reg(tmp1.into()),
844                 rtmp2: Writable::from_reg(tmp2.into()),
845             },
846             // number of bytes needed for the jumptable sequence:
847             // 4 bytes per instruction, with 8 instructions base + the size of
848             // the jumptable more.
849             (4 * (8 + targets.len())).try_into().unwrap(),
850         );
851     }
852 
853     /// Conditional Set sets the destination register to 1 if the condition
854     /// is true, and otherwise sets it to 0.
855     pub fn cset(&mut self, rd: WritableReg, cond: Cond) {
856         self.emit(Inst::CSet {
857             rd: rd.map(Into::into),
858             cond,
859         });
860     }
861 
862     /// If the condition is true, `csel` writes rn to rd. If the
863     /// condition is false, it writes rm to rd
864     pub fn csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond) {
865         self.emit(Inst::CSel {
866             rd: rd.map(Into::into),
867             rn: rn.into(),
868             rm: rm.into(),
869             cond,
870         });
871     }
872 
873     /// If the condition is true, `csel` writes rn to rd. If the
874     /// condition is false, it writes rm to rd
875     pub fn fpu_csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond, size: OperandSize) {
876         match size {
877             OperandSize::S32 => {
878                 self.emit(Inst::FpuCSel32 {
879                     rd: rd.map(Into::into),
880                     rn: rn.into(),
881                     rm: rm.into(),
882                     cond,
883                 });
884             }
885             OperandSize::S64 => {
886                 self.emit(Inst::FpuCSel64 {
887                     rd: rd.map(Into::into),
888                     rn: rn.into(),
889                     rm: rm.into(),
890                     cond,
891                 });
892             }
893             _ => todo!(),
894         }
895     }
896 
897     /// Population count per byte.
898     pub fn cnt(&mut self, rd: WritableReg) {
899         self.emit(Inst::VecMisc {
900             op: VecMisc2::Cnt,
901             rd: rd.map(Into::into),
902             rn: rd.to_reg().into(),
903             size: VectorSize::Size8x8,
904         });
905     }
906 
907     pub fn extend(&mut self, rn: Reg, rd: WritableReg, kind: ExtendKind) {
908         self.emit(Inst::Extend {
909             rd: rd.map(Into::into),
910             rn: rn.into(),
911             signed: kind.signed(),
912             from_bits: kind.from_bits(),
913             to_bits: kind.to_bits(),
914         })
915     }
916 
917     /// Bitwise AND (shifted register), setting flags.
918     pub fn ands_rr(&mut self, rn: Reg, rm: Reg, size: OperandSize) {
919         self.alu_rrr(ALUOp::AndS, rm, rn, writable!(regs::zero()), size);
920     }
921 
922     /// Permanently Undefined.
923     pub fn udf(&mut self, code: TrapCode) {
924         self.emit(Inst::Udf { trap_code: code });
925     }
926 
927     /// Conditional trap.
928     pub fn trapif(&mut self, cc: Cond, code: TrapCode) {
929         self.emit(Inst::TrapIf {
930             kind: CondBrKind::Cond(cc),
931             trap_code: code,
932         });
933     }
934 
935     /// Trap if `rn` is zero.
936     pub fn trapz(&mut self, rn: Reg, code: TrapCode, size: OperandSize) {
937         self.emit(Inst::TrapIf {
938             kind: CondBrKind::Zero(rn.into(), size.into()),
939             trap_code: code,
940         });
941     }
942 
943     // Helpers for ALU operations.
944 
945     fn alu_rri(&mut self, op: ALUOp, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {
946         self.emit(Inst::AluRRImm12 {
947             alu_op: op,
948             size: size.into(),
949             rd: rd.map(Into::into),
950             rn: rn.into(),
951             imm12: imm,
952         });
953     }
954 
955     fn alu_rri_logic(
956         &mut self,
957         op: ALUOp,
958         imm: ImmLogic,
959         rn: Reg,
960         rd: WritableReg,
961         size: OperandSize,
962     ) {
963         self.emit(Inst::AluRRImmLogic {
964             alu_op: op,
965             size: size.into(),
966             rd: rd.map(Into::into),
967             rn: rn.into(),
968             imml: imm,
969         });
970     }
971 
972     fn alu_rri_shift(
973         &mut self,
974         op: ALUOp,
975         imm: ImmShift,
976         rn: Reg,
977         rd: WritableReg,
978         size: OperandSize,
979     ) {
980         self.emit(Inst::AluRRImmShift {
981             alu_op: op,
982             size: size.into(),
983             rd: rd.map(Into::into),
984             rn: rn.into(),
985             immshift: imm,
986         });
987     }
988 
989     fn alu_rrr(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
990         self.emit(Inst::AluRRR {
991             alu_op: op,
992             size: size.into(),
993             rd: rd.map(Into::into),
994             rn: rn.into(),
995             rm: rm.into(),
996         });
997     }
998 
999     fn alu_rrr_extend(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
1000         self.emit(Inst::AluRRRExtend {
1001             alu_op: op,
1002             size: size.into(),
1003             rd: rd.map(Into::into),
1004             rn: rn.into(),
1005             rm: rm.into(),
1006             extendop: ExtendOp::UXTX,
1007         });
1008     }
1009 
1010     fn alu_rrrr(
1011         &mut self,
1012         op: ALUOp3,
1013         rm: Reg,
1014         rn: Reg,
1015         rd: WritableReg,
1016         ra: Reg,
1017         size: OperandSize,
1018     ) {
1019         self.emit(Inst::AluRRRR {
1020             alu_op: op,
1021             size: size.into(),
1022             rd: rd.map(Into::into),
1023             rn: rn.into(),
1024             rm: rm.into(),
1025             ra: ra.into(),
1026         });
1027     }
1028 
1029     fn fpu_rrr(&mut self, op: FPUOp2, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
1030         self.emit(Inst::FpuRRR {
1031             fpu_op: op,
1032             size: size.into(),
1033             rd: rd.map(Into::into),
1034             rn: rn.into(),
1035             rm: rm.into(),
1036         });
1037     }
1038 
1039     fn fpu_rri(&mut self, op: FPUOpRI, rn: Reg, rd: WritableReg) {
1040         self.emit(Inst::FpuRRI {
1041             fpu_op: op,
1042             rd: rd.map(Into::into),
1043             rn: rn.into(),
1044         });
1045     }
1046 
1047     fn fpu_rri_mod(&mut self, op: FPUOpRIMod, ri: Reg, rn: Reg, rd: WritableReg) {
1048         self.emit(Inst::FpuRRIMod {
1049             fpu_op: op,
1050             rd: rd.map(Into::into),
1051             ri: ri.into(),
1052             rn: rn.into(),
1053         });
1054     }
1055 
1056     fn fpu_rr(&mut self, op: FPUOp1, rn: Reg, rd: WritableReg, size: OperandSize) {
1057         self.emit(Inst::FpuRR {
1058             fpu_op: op,
1059             size: size.into(),
1060             rd: rd.map(Into::into),
1061             rn: rn.into(),
1062         });
1063     }
1064 
1065     fn fpu_round(&mut self, op: FpuRoundMode, rn: Reg, rd: WritableReg) {
1066         self.emit(Inst::FpuRound {
1067             op,
1068             rd: rd.map(Into::into),
1069             rn: rn.into(),
1070         });
1071     }
1072 
1073     fn bit_rr(&mut self, op: BitOp, rn: Reg, rd: WritableReg, size: OperandSize) {
1074         self.emit(Inst::BitRR {
1075             op,
1076             size: size.into(),
1077             rd: rd.map(Into::into),
1078             rn: rn.into(),
1079         });
1080     }
1081 
1082     /// Get a label from the underlying machine code buffer.
1083     pub fn get_label(&mut self) -> MachLabel {
1084         self.buffer.get_label()
1085     }
1086 
1087     /// Get a mutable reference to underlying
1088     /// machine buffer.
1089     pub fn buffer_mut(&mut self) -> &mut MachBuffer<Inst> {
1090         &mut self.buffer
1091     }
1092 
1093     /// Get a reference to the underlying machine buffer.
1094     pub fn buffer(&self) -> &MachBuffer<Inst> {
1095         &self.buffer
1096     }
1097 
1098     /// Emit a direct call to a function defined locally and
1099     /// referenced to by `name`.
1100     pub fn call_with_name(&mut self, name: UserExternalNameRef, call_conv: CallingConvention) {
1101         self.emit(Inst::Call {
1102             info: Box::new(cranelift_codegen::CallInfo::empty(
1103                 ExternalName::user(name),
1104                 call_conv.into(),
1105             )),
1106         })
1107     }
1108 
1109     /// Emit an indirect call to a function whose address is
1110     /// stored the `callee` register.
1111     pub fn call_with_reg(&mut self, callee: Reg, call_conv: CallingConvention) {
1112         self.emit(Inst::CallInd {
1113             info: Box::new(cranelift_codegen::CallInfo::empty(
1114                 callee.into(),
1115                 call_conv.into(),
1116             )),
1117         })
1118     }
1119 
1120     /// Load the min value for an integer of size out_size, as a floating-point
1121     /// of size `in-size`, into register `rd`.
1122     fn min_fp_value(
1123         &mut self,
1124         signed: bool,
1125         in_size: OperandSize,
1126         out_size: OperandSize,
1127         rd: Writable<Reg>,
1128     ) {
1129         match in_size {
1130             OperandSize::S32 => {
1131                 let (min, _) = f32_cvt_to_int_bounds(signed, out_size.num_bits().into());
1132                 self.mov_ir(rd, Imm::f32(min.to_bits()), in_size);
1133             }
1134             OperandSize::S64 => {
1135                 let (min, _) = f64_cvt_to_int_bounds(signed, out_size.num_bits().into());
1136                 self.mov_ir(rd, Imm::f64(min.to_bits()), in_size);
1137             }
1138             s => unreachable!("unsupported floating-point size: {}bit", s.num_bits()),
1139         };
1140     }
1141 
1142     /// Load the max value for an integer of size out_size, as a floating-point
1143     /// of size `in_size`, into register `rd`.
1144     fn max_fp_value(
1145         &mut self,
1146         signed: bool,
1147         in_size: OperandSize,
1148         out_size: OperandSize,
1149         rd: Writable<Reg>,
1150     ) {
1151         match in_size {
1152             OperandSize::S32 => {
1153                 let (_, max) = f32_cvt_to_int_bounds(signed, out_size.num_bits().into());
1154                 self.mov_ir(rd, Imm::f32(max.to_bits()), in_size);
1155             }
1156             OperandSize::S64 => {
1157                 let (_, max) = f64_cvt_to_int_bounds(signed, out_size.num_bits().into());
1158                 self.mov_ir(rd, Imm::f64(max.to_bits()), in_size);
1159             }
1160             s => unreachable!("unsupported floating-point size: {}bit", s.num_bits()),
1161         };
1162     }
1163 
1164     /// Emit instructions to check if the value in `rn` is NaN.
1165     fn check_nan(&mut self, rn: Reg, size: OperandSize) {
1166         self.fcmp(rn, rn, size);
1167         self.trapif(Cond::Vs, TrapCode::BAD_CONVERSION_TO_INTEGER);
1168     }
1169 
1170     /// Convert the floating point of size `src_size` stored in `src`, into a integer of size
1171     /// `dst_size`, storing the result in `dst`.
1172     pub fn fpu_to_int(
1173         &mut self,
1174         dst: Writable<Reg>,
1175         src: Reg,
1176         tmp_reg: WritableReg,
1177         src_size: OperandSize,
1178         dst_size: OperandSize,
1179         kind: TruncKind,
1180         signed: bool,
1181     ) {
1182         if kind.is_unchecked() {
1183             // Confusingly, when `kind` is `Unchecked` is when we actually need to perform the checks:
1184             // - check if fp is NaN
1185             // - check bounds
1186             self.check_nan(src, src_size);
1187 
1188             self.min_fp_value(signed, src_size, dst_size, tmp_reg);
1189             self.fcmp(src, tmp_reg.to_reg(), src_size);
1190             self.trapif(Cond::Le, TrapCode::INTEGER_OVERFLOW);
1191 
1192             self.max_fp_value(signed, src_size, dst_size, tmp_reg);
1193             self.fcmp(src, tmp_reg.to_reg(), src_size);
1194             self.trapif(Cond::Ge, TrapCode::INTEGER_OVERFLOW);
1195         }
1196 
1197         self.cvt_fpu_to_int(dst, src, src_size, dst_size, signed)
1198     }
1199 
1200     /// Select and emit the appropriate `fcvt*` instruction
1201     pub fn cvt_fpu_to_int(
1202         &mut self,
1203         dst: Writable<Reg>,
1204         src: Reg,
1205         src_size: OperandSize,
1206         dst_size: OperandSize,
1207         signed: bool,
1208     ) {
1209         let op = match (src_size, dst_size, signed) {
1210             (OperandSize::S32, OperandSize::S32, false) => FpuToIntOp::F32ToU32,
1211             (OperandSize::S32, OperandSize::S32, true) => FpuToIntOp::F32ToI32,
1212             (OperandSize::S32, OperandSize::S64, false) => FpuToIntOp::F32ToU64,
1213             (OperandSize::S32, OperandSize::S64, true) => FpuToIntOp::F32ToI64,
1214             (OperandSize::S64, OperandSize::S32, false) => FpuToIntOp::F64ToU32,
1215             (OperandSize::S64, OperandSize::S32, true) => FpuToIntOp::F64ToI32,
1216             (OperandSize::S64, OperandSize::S64, false) => FpuToIntOp::F64ToU64,
1217             (OperandSize::S64, OperandSize::S64, true) => FpuToIntOp::F64ToI64,
1218             (fsize, int_size, signed) => unimplemented!(
1219                 "unsupported conversion: f{} to {}{}",
1220                 fsize.num_bits(),
1221                 if signed { "i" } else { "u" },
1222                 int_size.num_bits(),
1223             ),
1224         };
1225 
1226         self.emit(Inst::FpuToInt {
1227             op,
1228             rd: dst.map(Into::into),
1229             rn: src.into(),
1230         });
1231     }
1232 }
1233 
1234 /// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted,
1235 /// but the immediate is not yet known.
1236 pub(crate) struct PatchableAddToReg {
1237     /// The region to be patched in the [`MachBuffer`]. It contains
1238     /// space for 3 32-bit instructions, i.e. it's 12 bytes long.
1239     region: PatchRegion,
1240 
1241     // The destination register for the add instruction.
1242     reg: Writable<Reg>,
1243 
1244     // The temporary register used to hold the immediate value.
1245     tmp: Writable<Reg>,
1246 }
1247 
1248 impl PatchableAddToReg {
1249     /// Create a new [`PatchableAddToReg`] by capturing a region in the output
1250     /// buffer containing an instruction sequence that loads an immediate into a
1251     /// register `tmp`, then adds it to a register `reg`. The [`MachBuffer`]
1252     /// will have that instruction sequence written to the region, though the
1253     /// immediate loaded into `tmp` will be `0` until the `::finalize` method is
1254     /// called.
1255     pub(crate) fn new(reg: Writable<Reg>, tmp: Writable<Reg>, buf: &mut MachBuffer<Inst>) -> Self {
1256         let insns = Self::add_immediate_instruction_sequence(reg, tmp, 0);
1257         let open = buf.start_patchable();
1258         buf.put_data(&insns);
1259         let region = buf.end_patchable(open);
1260 
1261         Self { region, reg, tmp }
1262     }
1263 
1264     fn add_immediate_instruction_sequence(
1265         reg: Writable<Reg>,
1266         tmp: Writable<Reg>,
1267         imm: i32,
1268     ) -> [u8; 12] {
1269         let imm_hi = imm as u64 & 0xffff_0000;
1270         let imm_hi = MoveWideConst::maybe_from_u64(imm_hi).unwrap();
1271 
1272         let imm_lo = imm as u64 & 0x0000_ffff;
1273         let imm_lo = MoveWideConst::maybe_from_u64(imm_lo).unwrap();
1274 
1275         let size = OperandSize::S64.into();
1276 
1277         let tmp = tmp.map(Into::into);
1278         let rd = reg.map(Into::into);
1279 
1280         // This is "movz to bits 16-31 of 64 bit reg tmp and zero the rest"
1281         let mov_insn = enc_move_wide(inst::MoveWideOp::MovZ, tmp, imm_hi, size);
1282 
1283         // This is "movk to bits 0-15 of 64 bit reg tmp"
1284         let movk_insn = enc_movk(tmp, imm_lo, size);
1285 
1286         // This is "add tmp to rd". The opcodes are somewhat buried in the
1287         // instruction encoder so we just repeat them here.
1288         let add_bits_31_21: u32 = 0b00001011_000 | (size.sf_bit() << 10);
1289         let add_bits_15_10: u32 = 0;
1290         let add_insn = enc_arith_rrr(
1291             add_bits_31_21,
1292             add_bits_15_10,
1293             rd,
1294             rd.to_reg(),
1295             tmp.to_reg(),
1296         );
1297 
1298         let mut buf = [0u8; 12];
1299         buf[0..4].copy_from_slice(&mov_insn.to_le_bytes());
1300         buf[4..8].copy_from_slice(&movk_insn.to_le_bytes());
1301         buf[8..12].copy_from_slice(&add_insn.to_le_bytes());
1302         buf
1303     }
1304 
1305     /// Patch the [`MachBuffer`] with the known constant to be added to the register. The final
1306     /// value is passed in as an i32, but the instruction encoding is fixed when
1307     /// [`PatchableAddToReg::new`] is called.
1308     pub(crate) fn finalize(self, val: i32, buffer: &mut MachBuffer<Inst>) {
1309         let insns = Self::add_immediate_instruction_sequence(self.reg, self.tmp, val);
1310         let slice = self.region.patch(buffer);
1311         assert_eq!(slice.len(), insns.len());
1312         slice.copy_from_slice(&insns);
1313     }
1314 }
1315