1 //! Assembler library implementation for Aarch64.
2 use super::regs;
3 use crate::CallingConvention;
4 use crate::aarch64::regs::zero;
5 use crate::masm::{
6     DivKind, Extend, ExtendKind, FloatCmpKind, Imm, IntCmpKind, RemKind, RoundingMode, ShiftKind,
7     Signed, TRUSTED_FLAGS, TruncKind,
8 };
9 use crate::{
10     constant_pool::ConstantPool,
11     masm::OperandSize,
12     reg::{Reg, WritableReg, writable},
13 };
14 
15 use cranelift_codegen::isa::aarch64;
16 use cranelift_codegen::isa::aarch64::inst::emit::{enc_arith_rrr, enc_move_wide, enc_movk};
17 use cranelift_codegen::isa::aarch64::inst::{
18     ASIMDFPModImm, FpuToIntOp, MoveWideConst, NZCV, UImm5,
19 };
20 use cranelift_codegen::{
21     Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState, MachLabel,
22     Writable,
23     ir::{ExternalName, MemFlags, SourceLoc, TrapCode, UserExternalNameRef},
24     isa::aarch64::inst::{
25         self, ALUOp, ALUOp3, AMode, BitOp, BranchTarget, Cond, CondBrKind, ExtendOp,
26         FPULeftShiftImm, FPUOp1, FPUOp2,
27         FPUOpRI::{self, UShr32, UShr64},
28         FPUOpRIMod, FPURightShiftImm, FpuRoundMode, Imm12, ImmLogic, ImmShift, Inst, IntToFpuOp,
29         PairAMode, ScalarSize, VecLanesOp, VecMisc2, VectorSize,
30         emit::{EmitInfo, EmitState},
31     },
32     settings,
33 };
34 use cranelift_codegen::{PatchRegion, VCodeConstant};
35 use regalloc2::RegClass;
36 use wasmtime_core::math::{f32_cvt_to_int_bounds, f64_cvt_to_int_bounds};
37 
38 impl From<OperandSize> for inst::OperandSize {
from(size: OperandSize) -> Self39     fn from(size: OperandSize) -> Self {
40         match size {
41             OperandSize::S32 => Self::Size32,
42             OperandSize::S64 => Self::Size64,
43             s => panic!("Invalid operand size {s:?}"),
44         }
45     }
46 }
47 
48 impl From<IntCmpKind> for Cond {
from(value: IntCmpKind) -> Self49     fn from(value: IntCmpKind) -> Self {
50         match value {
51             IntCmpKind::Eq => Cond::Eq,
52             IntCmpKind::Ne => Cond::Ne,
53             IntCmpKind::LtS => Cond::Lt,
54             IntCmpKind::LtU => Cond::Lo,
55             IntCmpKind::GtS => Cond::Gt,
56             IntCmpKind::GtU => Cond::Hi,
57             IntCmpKind::LeS => Cond::Le,
58             IntCmpKind::LeU => Cond::Ls,
59             IntCmpKind::GeS => Cond::Ge,
60             IntCmpKind::GeU => Cond::Hs,
61         }
62     }
63 }
64 
65 impl From<FloatCmpKind> for Cond {
from(value: FloatCmpKind) -> Self66     fn from(value: FloatCmpKind) -> Self {
67         match value {
68             FloatCmpKind::Eq => Cond::Eq,
69             FloatCmpKind::Ne => Cond::Ne,
70             FloatCmpKind::Lt => Cond::Mi,
71             FloatCmpKind::Gt => Cond::Gt,
72             FloatCmpKind::Le => Cond::Ls,
73             FloatCmpKind::Ge => Cond::Ge,
74         }
75     }
76 }
77 
78 impl From<OperandSize> for ScalarSize {
from(size: OperandSize) -> ScalarSize79     fn from(size: OperandSize) -> ScalarSize {
80         match size {
81             OperandSize::S8 => ScalarSize::Size8,
82             OperandSize::S16 => ScalarSize::Size16,
83             OperandSize::S32 => ScalarSize::Size32,
84             OperandSize::S64 => ScalarSize::Size64,
85             OperandSize::S128 => ScalarSize::Size128,
86         }
87     }
88 }
89 
90 impl From<ShiftKind> for ALUOp {
from(kind: ShiftKind) -> Self91     fn from(kind: ShiftKind) -> Self {
92         match kind {
93             ShiftKind::Shl => ALUOp::Lsl,
94             ShiftKind::ShrS => ALUOp::Asr,
95             ShiftKind::ShrU => ALUOp::Lsr,
96             ShiftKind::Rotr => ALUOp::Extr,
97             ShiftKind::Rotl => ALUOp::Extr,
98         }
99     }
100 }
101 
102 /// Low level assembler implementation for Aarch64.
103 pub(crate) struct Assembler {
104     /// The machine instruction buffer.
105     buffer: MachBuffer<Inst>,
106     /// Constant emission information.
107     emit_info: EmitInfo,
108     /// Emission state.
109     emit_state: EmitState,
110     /// Constant pool.
111     pool: ConstantPool,
112 }
113 
114 impl Assembler {
115     /// Create a new Aarch64 assembler.
new(shared_flags: settings::Flags, isa_flags: aarch64::settings::Flags) -> Self116     pub fn new(shared_flags: settings::Flags, isa_flags: aarch64::settings::Flags) -> Self {
117         Self {
118             buffer: MachBuffer::<Inst>::new(),
119             emit_state: Default::default(),
120             emit_info: EmitInfo::new(shared_flags, isa_flags),
121             pool: ConstantPool::new(),
122         }
123     }
124 }
125 
126 impl Assembler {
127     /// Return the emitted code.
finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final>128     pub fn finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final> {
129         let stencil = self
130             .buffer
131             .finish(&self.pool.constants(), self.emit_state.ctrl_plane_mut());
132         stencil.apply_base_srcloc(loc.unwrap_or_default())
133     }
134 
emit(&mut self, inst: Inst)135     fn emit(&mut self, inst: Inst) {
136         self.emit_with_island(inst, Inst::worst_case_size());
137     }
138 
emit_with_island(&mut self, inst: Inst, needed_space: u32)139     fn emit_with_island(&mut self, inst: Inst, needed_space: u32) {
140         if self.buffer.island_needed(needed_space) {
141             let label = self.buffer.get_label();
142             let jmp = Inst::Jump {
143                 dest: BranchTarget::Label(label),
144             };
145             jmp.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);
146             self.buffer
147                 .emit_island(needed_space, self.emit_state.ctrl_plane_mut());
148             self.buffer
149                 .bind_label(label, self.emit_state.ctrl_plane_mut());
150         }
151         inst.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);
152     }
153 
154     /// Adds a constant to the constant pool, returning its address.
add_constant(&mut self, constant: &[u8]) -> VCodeConstant155     pub fn add_constant(&mut self, constant: &[u8]) -> VCodeConstant {
156         let handle = self.pool.register(constant, &mut self.buffer);
157         handle
158     }
159 
160     /// Store a pair of registers.
stp(&mut self, xt1: Reg, xt2: Reg, mem: PairAMode)161     pub fn stp(&mut self, xt1: Reg, xt2: Reg, mem: PairAMode) {
162         self.emit(Inst::StoreP64 {
163             rt: xt1.into(),
164             rt2: xt2.into(),
165             mem,
166             flags: MemFlags::trusted(),
167         });
168     }
169 
170     /// Store a register.
str(&mut self, reg: Reg, mem: AMode, size: OperandSize, flags: MemFlags)171     pub fn str(&mut self, reg: Reg, mem: AMode, size: OperandSize, flags: MemFlags) {
172         use OperandSize::*;
173         let inst = match (reg.is_int(), size) {
174             (_, S8) => Inst::Store8 {
175                 rd: reg.into(),
176                 mem,
177                 flags,
178             },
179             (_, S16) => Inst::Store16 {
180                 rd: reg.into(),
181                 mem,
182                 flags,
183             },
184             (true, S32) => Inst::Store32 {
185                 rd: reg.into(),
186                 mem,
187                 flags,
188             },
189             (false, S32) => Inst::FpuStore32 {
190                 rd: reg.into(),
191                 mem,
192                 flags,
193             },
194             (true, S64) => Inst::Store64 {
195                 rd: reg.into(),
196                 mem,
197                 flags,
198             },
199             (false, S64) => Inst::FpuStore64 {
200                 rd: reg.into(),
201                 mem,
202                 flags,
203             },
204             (_, S128) => Inst::FpuStore128 {
205                 rd: reg.into(),
206                 mem,
207                 flags,
208             },
209         };
210 
211         self.emit(inst);
212     }
213 
214     /// Load a signed register.
sload(&mut self, mem: AMode, rd: WritableReg, size: OperandSize, flags: MemFlags)215     pub fn sload(&mut self, mem: AMode, rd: WritableReg, size: OperandSize, flags: MemFlags) {
216         self.ldr(mem, rd, size, true, flags);
217     }
218 
219     /// Load an unsigned register.
uload(&mut self, mem: AMode, rd: WritableReg, size: OperandSize, flags: MemFlags)220     pub fn uload(&mut self, mem: AMode, rd: WritableReg, size: OperandSize, flags: MemFlags) {
221         self.ldr(mem, rd, size, false, flags);
222     }
223 
224     /// Load address into a register.
ldr( &mut self, mem: AMode, rd: WritableReg, size: OperandSize, signed: bool, flags: MemFlags, )225     fn ldr(
226         &mut self,
227         mem: AMode,
228         rd: WritableReg,
229         size: OperandSize,
230         signed: bool,
231         flags: MemFlags,
232     ) {
233         use OperandSize::*;
234         let writable_reg = rd.map(Into::into);
235 
236         let inst = match (rd.to_reg().is_int(), signed, size) {
237             (_, false, S8) => Inst::ULoad8 {
238                 rd: writable_reg,
239                 mem,
240                 flags,
241             },
242             (_, true, S8) => Inst::SLoad8 {
243                 rd: writable_reg,
244                 mem,
245                 flags,
246             },
247             (_, false, S16) => Inst::ULoad16 {
248                 rd: writable_reg,
249                 mem,
250                 flags,
251             },
252             (_, true, S16) => Inst::SLoad16 {
253                 rd: writable_reg,
254                 mem,
255                 flags,
256             },
257             (true, false, S32) => Inst::ULoad32 {
258                 rd: writable_reg,
259                 mem,
260                 flags,
261             },
262             (false, _, S32) => Inst::FpuLoad32 {
263                 rd: writable_reg,
264                 mem,
265                 flags,
266             },
267             (true, true, S32) => Inst::SLoad32 {
268                 rd: writable_reg,
269                 mem,
270                 flags,
271             },
272             (true, _, S64) => Inst::ULoad64 {
273                 rd: writable_reg,
274                 mem,
275                 flags,
276             },
277             (false, _, S64) => Inst::FpuLoad64 {
278                 rd: writable_reg,
279                 mem,
280                 flags,
281             },
282             (_, _, S128) => Inst::FpuLoad128 {
283                 rd: writable_reg,
284                 mem,
285                 flags,
286             },
287         };
288 
289         self.emit(inst);
290     }
291 
292     /// Load a pair of registers.
ldp(&mut self, xt1: Reg, xt2: Reg, mem: PairAMode)293     pub fn ldp(&mut self, xt1: Reg, xt2: Reg, mem: PairAMode) {
294         let writable_xt1 = Writable::from_reg(xt1.into());
295         let writable_xt2 = Writable::from_reg(xt2.into());
296 
297         self.emit(Inst::LoadP64 {
298             rt: writable_xt1,
299             rt2: writable_xt2,
300             mem,
301             flags: MemFlags::trusted(),
302         });
303     }
304 
305     /// Emit a series of instructions to move an arbitrary 64-bit immediate
306     /// into the destination register.
307     /// The emitted instructions will depend on the destination register class.
mov_ir(&mut self, rd: WritableReg, imm: Imm, size: OperandSize)308     pub fn mov_ir(&mut self, rd: WritableReg, imm: Imm, size: OperandSize) {
309         match rd.to_reg().class() {
310             RegClass::Int => {
311                 Inst::load_constant(rd.map(Into::into), imm.unwrap_as_u64())
312                     .into_iter()
313                     .for_each(|i| self.emit(i));
314             }
315             RegClass::Float => {
316                 match ASIMDFPModImm::maybe_from_u64(imm.unwrap_as_u64(), size.into()) {
317                     Some(imm) => {
318                         self.emit(Inst::FpuMoveFPImm {
319                             rd: rd.map(Into::into),
320                             imm,
321                             size: size.into(),
322                         });
323                     }
324                     _ => {
325                         let constant = self.add_constant(&imm.to_bytes());
326                         let addr = AMode::Const { addr: constant };
327                         self.uload(addr, rd, size, TRUSTED_FLAGS);
328                     }
329                 }
330             }
331             _ => unreachable!(),
332         }
333     }
334 
335     /// Register to register move.
mov_rr(&mut self, rm: Reg, rd: WritableReg, size: OperandSize)336     pub fn mov_rr(&mut self, rm: Reg, rd: WritableReg, size: OperandSize) {
337         let writable_rd = rd.map(Into::into);
338         self.emit(Inst::Mov {
339             size: size.into(),
340             rd: writable_rd,
341             rm: rm.into(),
342         });
343     }
344 
345     /// Floating point register to register move.
fmov_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize)346     pub fn fmov_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
347         let writable = rd.map(Into::into);
348         let inst = match size {
349             OperandSize::S32 => Inst::FpuMove32 {
350                 rd: writable,
351                 rn: rn.into(),
352             },
353             OperandSize::S64 => Inst::FpuMove64 {
354                 rd: writable,
355                 rn: rn.into(),
356             },
357             _ => unreachable!(),
358         };
359 
360         self.emit(inst);
361     }
362 
mov_to_fpu(&mut self, rn: Reg, rd: WritableReg, size: OperandSize)363     pub fn mov_to_fpu(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
364         let writable_rd = rd.map(Into::into);
365         self.emit(Inst::MovToFpu {
366             size: size.into(),
367             rd: writable_rd,
368             rn: rn.into(),
369         });
370     }
371 
mov_from_vec(&mut self, rn: Reg, rd: WritableReg, idx: u8, size: OperandSize)372     pub fn mov_from_vec(&mut self, rn: Reg, rd: WritableReg, idx: u8, size: OperandSize) {
373         self.emit(Inst::MovFromVec {
374             rd: rd.map(Into::into),
375             rn: rn.into(),
376             idx,
377             size: size.into(),
378         });
379     }
380 
381     /// Add immediate and register.
add_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize)382     pub fn add_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {
383         self.alu_rri(ALUOp::Add, imm, rn, rd, size);
384     }
385 
386     /// Add immediate and register, setting overflow flags.
adds_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize)387     pub fn adds_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {
388         self.alu_rri(ALUOp::AddS, imm, rn, rd, size);
389     }
390 
391     /// Add with three registers.
add_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)392     pub fn add_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
393         self.alu_rrr_extend(ALUOp::Add, rm, rn, rd, size, ExtendOp::UXTX);
394     }
395 
396     /// Add with three registers and explicit extend operation.
add_rrr_with_extend( &mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize, extendop: ExtendOp, )397     pub fn add_rrr_with_extend(
398         &mut self,
399         rm: Reg,
400         rn: Reg,
401         rd: WritableReg,
402         size: OperandSize,
403         extendop: ExtendOp,
404     ) {
405         self.alu_rrr_extend(ALUOp::Add, rm, rn, rd, size, extendop);
406     }
407 
408     /// Add with three registers, setting overflow flags.
adds_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)409     pub fn adds_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
410         self.alu_rrr_extend(ALUOp::AddS, rm, rn, rd, size, ExtendOp::UXTX);
411     }
412 
413     /// Add across Vector.
addv(&mut self, rn: Reg, rd: WritableReg, size: VectorSize)414     pub fn addv(&mut self, rn: Reg, rd: WritableReg, size: VectorSize) {
415         self.emit(Inst::VecLanes {
416             op: VecLanesOp::Addv,
417             rd: rd.map(Into::into),
418             rn: rn.into(),
419             size,
420         });
421     }
422 
423     /// Subtract immediate and register.
sub_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize)424     pub fn sub_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {
425         self.alu_rri(ALUOp::Sub, imm, rn, rd, size);
426     }
427 
428     /// Subtract immediate and register, setting flags.
subs_ir(&mut self, imm: Imm12, rn: Reg, size: OperandSize)429     pub fn subs_ir(&mut self, imm: Imm12, rn: Reg, size: OperandSize) {
430         self.alu_rri(ALUOp::SubS, imm, rn, writable!(regs::zero()), size);
431     }
432 
433     /// Subtract with three registers.
sub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)434     pub fn sub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
435         self.alu_rrr_extend(ALUOp::Sub, rm, rn, rd, size, ExtendOp::UXTX);
436     }
437 
438     /// Subtract with three registers, setting flags.
subs_rrr(&mut self, rm: Reg, rn: Reg, size: OperandSize)439     pub fn subs_rrr(&mut self, rm: Reg, rn: Reg, size: OperandSize) {
440         self.alu_rrr_extend(
441             ALUOp::SubS,
442             rm,
443             rn,
444             writable!(regs::zero()),
445             size,
446             ExtendOp::UXTX,
447         );
448     }
449 
450     /// Multiply with three registers.
mul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)451     pub fn mul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
452         self.alu_rrrr(ALUOp3::MAdd, rm, rn, rd, regs::zero(), size);
453     }
454 
455     /// Signed/unsigned division with three registers.
div_rrr( &mut self, divisor: Reg, dividend: Reg, dest: Writable<Reg>, kind: DivKind, size: OperandSize, )456     pub fn div_rrr(
457         &mut self,
458         divisor: Reg,
459         dividend: Reg,
460         dest: Writable<Reg>,
461         kind: DivKind,
462         size: OperandSize,
463     ) {
464         // Check for division by 0.
465         self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO, size);
466 
467         // check for overflow
468         if kind == DivKind::Signed {
469             // Check for divisor overflow.
470             self.alu_rri(
471                 ALUOp::AddS,
472                 Imm12::maybe_from_u64(1).expect("1 to fit in 12 bits"),
473                 divisor,
474                 writable!(zero()),
475                 size,
476             );
477 
478             // Check if the dividend is 1.
479             self.emit(Inst::CCmpImm {
480                 size: size.into(),
481                 rn: dividend.into(),
482                 imm: UImm5::maybe_from_u8(1).expect("1 fits in 5 bits"),
483                 nzcv: NZCV::new(false, false, false, false),
484                 cond: Cond::Eq,
485             });
486 
487             // Finally, trap if the previous operation overflowed.
488             self.trapif(Cond::Vs, TrapCode::INTEGER_OVERFLOW);
489         }
490 
491         // `cranelift-codegen` doesn't support emitting sdiv for anything but I64,
492         // we therefore sign-extend the operand.
493         // see: https://github.com/bytecodealliance/wasmtime/issues/9766
494         let size = if size == OperandSize::S32 && kind == DivKind::Signed {
495             self.extend(
496                 divisor,
497                 writable!(divisor),
498                 ExtendKind::Signed(Extend::<Signed>::I64Extend32),
499             );
500             self.extend(
501                 dividend,
502                 writable!(dividend),
503                 ExtendKind::Signed(Extend::<Signed>::I64Extend32),
504             );
505             OperandSize::S64
506         } else {
507             size
508         };
509 
510         let op = match kind {
511             DivKind::Signed => ALUOp::SDiv,
512             DivKind::Unsigned => ALUOp::UDiv,
513         };
514 
515         self.alu_rrr(op, divisor, dividend, dest.map(Into::into), size);
516     }
517 
518     /// Signed/unsigned remainder operation with three registers.
rem_rrr( &mut self, divisor: Reg, dividend: Reg, dest: Writable<Reg>, scratch: WritableReg, kind: RemKind, size: OperandSize, )519     pub fn rem_rrr(
520         &mut self,
521         divisor: Reg,
522         dividend: Reg,
523         dest: Writable<Reg>,
524         scratch: WritableReg,
525         kind: RemKind,
526         size: OperandSize,
527     ) {
528         // Check for division by 0
529         self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO, size);
530 
531         // `cranelift-codegen` doesn't support emitting sdiv for anything but I64,
532         // we therefore sign-extend the operand.
533         // see: https://github.com/bytecodealliance/wasmtime/issues/9766
534         let size = if size == OperandSize::S32 && kind.is_signed() {
535             self.extend(
536                 divisor,
537                 writable!(divisor),
538                 ExtendKind::Signed(Extend::<Signed>::I64Extend32),
539             );
540             self.extend(
541                 dividend,
542                 writable!(dividend),
543                 ExtendKind::Signed(Extend::<Signed>::I64Extend32),
544             );
545             OperandSize::S64
546         } else {
547             size
548         };
549 
550         let op = match kind {
551             RemKind::Signed => ALUOp::SDiv,
552             RemKind::Unsigned => ALUOp::UDiv,
553         };
554 
555         self.alu_rrr(op, divisor, dividend, scratch, size);
556 
557         self.alu_rrrr(
558             ALUOp3::MSub,
559             scratch.to_reg(),
560             divisor,
561             dest.map(Into::into),
562             dividend,
563             size,
564         );
565     }
566 
567     /// And with three registers.
and_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)568     pub fn and_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
569         self.alu_rrr(ALUOp::And, rm, rn, rd, size);
570     }
571 
572     /// And immediate and register.
and_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize)573     pub fn and_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {
574         self.alu_rri_logic(ALUOp::And, imm, rn, rd, size);
575     }
576 
577     /// Or with three registers.
or_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)578     pub fn or_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
579         self.alu_rrr(ALUOp::Orr, rm, rn, rd, size);
580     }
581 
582     /// Or immediate and register.
or_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize)583     pub fn or_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {
584         self.alu_rri_logic(ALUOp::Orr, imm, rn, rd, size);
585     }
586 
587     /// Xor with three registers.
xor_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)588     pub fn xor_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
589         self.alu_rrr(ALUOp::Eor, rm, rn, rd, size);
590     }
591 
592     /// Xor immediate and register.
xor_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize)593     pub fn xor_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {
594         self.alu_rri_logic(ALUOp::Eor, imm, rn, rd, size);
595     }
596 
597     /// Shift with three registers.
shift_rrr( &mut self, rm: Reg, rn: Reg, rd: WritableReg, kind: ShiftKind, size: OperandSize, )598     pub fn shift_rrr(
599         &mut self,
600         rm: Reg,
601         rn: Reg,
602         rd: WritableReg,
603         kind: ShiftKind,
604         size: OperandSize,
605     ) {
606         let shift_op: ALUOp = kind.into();
607         // In the case of rotate left, we negate the register containing the
608         // shift value.
609         if kind == ShiftKind::Rotl {
610             self.alu_rrr(ALUOp::Sub, rm, regs::zero(), writable!(rm), size);
611             self.alu_rrr(shift_op, rm, rn, rd, size);
612         } else {
613             self.alu_rrr(shift_op, rm, rn, rd, size);
614         }
615     }
616 
617     /// Shift immediate and register.
shift_ir( &mut self, imm: ImmShift, rn: Reg, rd: WritableReg, kind: ShiftKind, size: OperandSize, )618     pub fn shift_ir(
619         &mut self,
620         imm: ImmShift,
621         rn: Reg,
622         rd: WritableReg,
623         kind: ShiftKind,
624         size: OperandSize,
625     ) {
626         let shift_op: ALUOp = kind.into();
627         // In the case of rotate left, we emit rotate right with type_size -
628         // value.
629         if kind == ShiftKind::Rotl {
630             let value_size = size.num_bits();
631             let mut imm_val = value_size.wrapping_sub(imm.value());
632             imm_val &= value_size - 1;
633             let negated_imm = ImmShift::maybe_from_u64(imm_val as u64).unwrap();
634 
635             self.alu_rri_shift(shift_op, negated_imm, rn, rd, size);
636         } else {
637             self.alu_rri_shift(shift_op, imm, rn, rd, size);
638         }
639     }
640 
641     /// Count Leading Zeros.
clz(&mut self, rn: Reg, rd: WritableReg, size: OperandSize)642     pub fn clz(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
643         self.bit_rr(BitOp::Clz, rn, rd, size);
644     }
645 
646     /// Reverse Bits reverses the bit order in a register.
rbit(&mut self, rn: Reg, rd: WritableReg, size: OperandSize)647     pub fn rbit(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
648         self.bit_rr(BitOp::RBit, rn, rd, size);
649     }
650 
651     /// Float add with three registers.
fadd_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)652     pub fn fadd_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
653         self.fpu_rrr(FPUOp2::Add, rm, rn, rd, size);
654     }
655 
656     /// Float sub with three registers.
fsub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)657     pub fn fsub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
658         self.fpu_rrr(FPUOp2::Sub, rm, rn, rd, size);
659     }
660 
661     /// Float multiply with three registers.
fmul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)662     pub fn fmul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
663         self.fpu_rrr(FPUOp2::Mul, rm, rn, rd, size);
664     }
665 
666     /// Float division with three registers.
fdiv_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)667     pub fn fdiv_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
668         self.fpu_rrr(FPUOp2::Div, rm, rn, rd, size);
669     }
670 
671     /// Float max with three registers.
fmax_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)672     pub fn fmax_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
673         self.fpu_rrr(FPUOp2::Max, rm, rn, rd, size);
674     }
675 
676     /// Float min with three registers.
fmin_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)677     pub fn fmin_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
678         self.fpu_rrr(FPUOp2::Min, rm, rn, rd, size);
679     }
680 
681     /// Float neg with two registers.
fneg_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize)682     pub fn fneg_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
683         self.fpu_rr(FPUOp1::Neg, rn, rd, size);
684     }
685 
686     /// Float abs with two registers.
fabs_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize)687     pub fn fabs_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
688         self.fpu_rr(FPUOp1::Abs, rn, rd, size);
689     }
690 
691     /// Float sqrt with two registers.
fsqrt_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize)692     pub fn fsqrt_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
693         self.fpu_rr(FPUOp1::Sqrt, rn, rd, size);
694     }
695 
696     /// Float round (ceil, trunc, floor) with two registers.
fround_rr(&mut self, rn: Reg, rd: WritableReg, mode: RoundingMode, size: OperandSize)697     pub fn fround_rr(&mut self, rn: Reg, rd: WritableReg, mode: RoundingMode, size: OperandSize) {
698         let fpu_mode = match (mode, size) {
699             (RoundingMode::Nearest, OperandSize::S32) => FpuRoundMode::Nearest32,
700             (RoundingMode::Up, OperandSize::S32) => FpuRoundMode::Plus32,
701             (RoundingMode::Down, OperandSize::S32) => FpuRoundMode::Minus32,
702             (RoundingMode::Zero, OperandSize::S32) => FpuRoundMode::Zero32,
703             (RoundingMode::Nearest, OperandSize::S64) => FpuRoundMode::Nearest64,
704             (RoundingMode::Up, OperandSize::S64) => FpuRoundMode::Plus64,
705             (RoundingMode::Down, OperandSize::S64) => FpuRoundMode::Minus64,
706             (RoundingMode::Zero, OperandSize::S64) => FpuRoundMode::Zero64,
707             (m, o) => panic!("Invalid rounding mode or operand size {m:?}, {o:?}"),
708         };
709         self.fpu_round(fpu_mode, rn, rd)
710     }
711 
712     /// Float unsigned shift right with two registers and an immediate.
fushr_rri(&mut self, rn: Reg, rd: WritableReg, amount: u8, size: OperandSize)713     pub fn fushr_rri(&mut self, rn: Reg, rd: WritableReg, amount: u8, size: OperandSize) {
714         let imm = FPURightShiftImm {
715             amount,
716             lane_size_in_bits: size.num_bits(),
717         };
718         let ushr = match size {
719             OperandSize::S32 => UShr32(imm),
720             OperandSize::S64 => UShr64(imm),
721             _ => unreachable!(),
722         };
723         self.fpu_rri(ushr, rn, rd)
724     }
725 
726     /// Float unsigned shift left and insert with three registers
727     /// and an immediate.
fsli_rri_mod( &mut self, ri: Reg, rn: Reg, rd: WritableReg, amount: u8, size: OperandSize, )728     pub fn fsli_rri_mod(
729         &mut self,
730         ri: Reg,
731         rn: Reg,
732         rd: WritableReg,
733         amount: u8,
734         size: OperandSize,
735     ) {
736         let imm = FPULeftShiftImm {
737             amount,
738             lane_size_in_bits: size.num_bits(),
739         };
740         let sli = match size {
741             OperandSize::S32 => FPUOpRIMod::Sli32(imm),
742             OperandSize::S64 => FPUOpRIMod::Sli64(imm),
743             _ => unreachable!(),
744         };
745         self.fpu_rri_mod(sli, ri, rn, rd)
746     }
747 
748     /// Float compare.
fcmp(&mut self, rn: Reg, rm: Reg, size: OperandSize)749     pub fn fcmp(&mut self, rn: Reg, rm: Reg, size: OperandSize) {
750         self.emit(Inst::FpuCmp {
751             size: size.into(),
752             rn: rn.into(),
753             rm: rm.into(),
754         })
755     }
756 
757     /// Convert an signed integer to a float.
cvt_sint_to_float( &mut self, rn: Reg, rd: WritableReg, src_size: OperandSize, dst_size: OperandSize, )758     pub fn cvt_sint_to_float(
759         &mut self,
760         rn: Reg,
761         rd: WritableReg,
762         src_size: OperandSize,
763         dst_size: OperandSize,
764     ) {
765         let op = match (src_size, dst_size) {
766             (OperandSize::S32, OperandSize::S32) => IntToFpuOp::I32ToF32,
767             (OperandSize::S64, OperandSize::S32) => IntToFpuOp::I64ToF32,
768             (OperandSize::S32, OperandSize::S64) => IntToFpuOp::I32ToF64,
769             (OperandSize::S64, OperandSize::S64) => IntToFpuOp::I64ToF64,
770             _ => unreachable!(),
771         };
772 
773         self.emit(Inst::IntToFpu {
774             op,
775             rd: rd.map(Into::into),
776             rn: rn.into(),
777         });
778     }
779 
780     /// Convert an unsigned integer to a float.
cvt_uint_to_float( &mut self, rn: Reg, rd: WritableReg, src_size: OperandSize, dst_size: OperandSize, )781     pub fn cvt_uint_to_float(
782         &mut self,
783         rn: Reg,
784         rd: WritableReg,
785         src_size: OperandSize,
786         dst_size: OperandSize,
787     ) {
788         let op = match (src_size, dst_size) {
789             (OperandSize::S32, OperandSize::S32) => IntToFpuOp::U32ToF32,
790             (OperandSize::S64, OperandSize::S32) => IntToFpuOp::U64ToF32,
791             (OperandSize::S32, OperandSize::S64) => IntToFpuOp::U32ToF64,
792             (OperandSize::S64, OperandSize::S64) => IntToFpuOp::U64ToF64,
793             _ => unreachable!(),
794         };
795 
796         self.emit(Inst::IntToFpu {
797             op,
798             rd: rd.map(Into::into),
799             rn: rn.into(),
800         });
801     }
802 
803     /// Change precision of float.
cvt_float_to_float( &mut self, rn: Reg, rd: WritableReg, src_size: OperandSize, dst_size: OperandSize, )804     pub fn cvt_float_to_float(
805         &mut self,
806         rn: Reg,
807         rd: WritableReg,
808         src_size: OperandSize,
809         dst_size: OperandSize,
810     ) {
811         let (fpu_op, size) = match (src_size, dst_size) {
812             (OperandSize::S32, OperandSize::S64) => (FPUOp1::Cvt32To64, ScalarSize::Size32),
813             (OperandSize::S64, OperandSize::S32) => (FPUOp1::Cvt64To32, ScalarSize::Size64),
814             _ => unimplemented!(),
815         };
816         self.emit(Inst::FpuRR {
817             fpu_op,
818             size,
819             rd: rd.map(Into::into),
820             rn: rn.into(),
821         });
822     }
823 
824     /// Return instruction.
ret(&mut self)825     pub fn ret(&mut self) {
826         self.emit(Inst::Ret {});
827     }
828 
829     /// An unconditional branch.
jmp(&mut self, target: MachLabel)830     pub fn jmp(&mut self, target: MachLabel) {
831         self.emit(Inst::Jump {
832             dest: BranchTarget::Label(target),
833         });
834     }
835 
836     /// A conditional branch.
jmp_if(&mut self, kind: Cond, taken: MachLabel)837     pub fn jmp_if(&mut self, kind: Cond, taken: MachLabel) {
838         self.emit(Inst::CondBr {
839             taken: BranchTarget::Label(taken),
840             not_taken: BranchTarget::ResolvedOffset(4),
841             kind: CondBrKind::Cond(kind),
842         });
843     }
844 
845     /// Emits a jump table sequence.
jmp_table( &mut self, targets: &[MachLabel], default: MachLabel, index: Reg, tmp1: Reg, tmp2: Reg, )846     pub fn jmp_table(
847         &mut self,
848         targets: &[MachLabel],
849         default: MachLabel,
850         index: Reg,
851         tmp1: Reg,
852         tmp2: Reg,
853     ) {
854         self.emit_with_island(
855             Inst::JTSequence {
856                 default,
857                 targets: Box::new(targets.to_vec()),
858                 ridx: index.into(),
859                 rtmp1: Writable::from_reg(tmp1.into()),
860                 rtmp2: Writable::from_reg(tmp2.into()),
861             },
862             // number of bytes needed for the jumptable sequence:
863             // 4 bytes per instruction, with 8 instructions base + the size of
864             // the jumptable more.
865             (4 * (8 + targets.len())).try_into().unwrap(),
866         );
867     }
868 
869     /// Conditional Set sets the destination register to 1 if the condition
870     /// is true, and otherwise sets it to 0.
cset(&mut self, rd: WritableReg, cond: Cond)871     pub fn cset(&mut self, rd: WritableReg, cond: Cond) {
872         self.emit(Inst::CSet {
873             rd: rd.map(Into::into),
874             cond,
875         });
876     }
877 
878     /// If the condition is true, `csel` writes rn to rd. If the
879     /// condition is false, it writes rm to rd
csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond)880     pub fn csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond) {
881         self.emit(Inst::CSel {
882             rd: rd.map(Into::into),
883             rn: rn.into(),
884             rm: rm.into(),
885             cond,
886         });
887     }
888 
889     /// If the condition is true, `csel` writes rn to rd. If the
890     /// condition is false, it writes rm to rd
fpu_csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond, size: OperandSize)891     pub fn fpu_csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond, size: OperandSize) {
892         match size {
893             OperandSize::S32 => {
894                 self.emit(Inst::FpuCSel32 {
895                     rd: rd.map(Into::into),
896                     rn: rn.into(),
897                     rm: rm.into(),
898                     cond,
899                 });
900             }
901             OperandSize::S64 => {
902                 self.emit(Inst::FpuCSel64 {
903                     rd: rd.map(Into::into),
904                     rn: rn.into(),
905                     rm: rm.into(),
906                     cond,
907                 });
908             }
909             _ => todo!(),
910         }
911     }
912 
913     /// Population count per byte.
cnt(&mut self, rd: WritableReg)914     pub fn cnt(&mut self, rd: WritableReg) {
915         self.emit(Inst::VecMisc {
916             op: VecMisc2::Cnt,
917             rd: rd.map(Into::into),
918             rn: rd.to_reg().into(),
919             size: VectorSize::Size8x8,
920         });
921     }
922 
extend(&mut self, rn: Reg, rd: WritableReg, kind: ExtendKind)923     pub fn extend(&mut self, rn: Reg, rd: WritableReg, kind: ExtendKind) {
924         self.emit(Inst::Extend {
925             rd: rd.map(Into::into),
926             rn: rn.into(),
927             signed: kind.signed(),
928             from_bits: kind.from_bits(),
929             to_bits: kind.to_bits(),
930         })
931     }
932 
933     /// Bitwise AND (shifted register), setting flags.
ands_rr(&mut self, rn: Reg, rm: Reg, size: OperandSize)934     pub fn ands_rr(&mut self, rn: Reg, rm: Reg, size: OperandSize) {
935         self.alu_rrr(ALUOp::AndS, rm, rn, writable!(regs::zero()), size);
936     }
937 
938     /// Permanently Undefined.
udf(&mut self, code: TrapCode)939     pub fn udf(&mut self, code: TrapCode) {
940         self.emit(Inst::Udf { trap_code: code });
941     }
942 
943     /// Conditional trap.
trapif(&mut self, cc: Cond, code: TrapCode)944     pub fn trapif(&mut self, cc: Cond, code: TrapCode) {
945         self.emit(Inst::TrapIf {
946             kind: CondBrKind::Cond(cc),
947             trap_code: code,
948         });
949     }
950 
951     /// Trap if `rn` is zero.
trapz(&mut self, rn: Reg, code: TrapCode, size: OperandSize)952     pub fn trapz(&mut self, rn: Reg, code: TrapCode, size: OperandSize) {
953         self.emit(Inst::TrapIf {
954             kind: CondBrKind::Zero(rn.into(), size.into()),
955             trap_code: code,
956         });
957     }
958 
959     // Helpers for ALU operations.
960 
alu_rri(&mut self, op: ALUOp, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize)961     fn alu_rri(&mut self, op: ALUOp, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {
962         self.emit(Inst::AluRRImm12 {
963             alu_op: op,
964             size: size.into(),
965             rd: rd.map(Into::into),
966             rn: rn.into(),
967             imm12: imm,
968         });
969     }
970 
alu_rri_logic( &mut self, op: ALUOp, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize, )971     fn alu_rri_logic(
972         &mut self,
973         op: ALUOp,
974         imm: ImmLogic,
975         rn: Reg,
976         rd: WritableReg,
977         size: OperandSize,
978     ) {
979         self.emit(Inst::AluRRImmLogic {
980             alu_op: op,
981             size: size.into(),
982             rd: rd.map(Into::into),
983             rn: rn.into(),
984             imml: imm,
985         });
986     }
987 
alu_rri_shift( &mut self, op: ALUOp, imm: ImmShift, rn: Reg, rd: WritableReg, size: OperandSize, )988     fn alu_rri_shift(
989         &mut self,
990         op: ALUOp,
991         imm: ImmShift,
992         rn: Reg,
993         rd: WritableReg,
994         size: OperandSize,
995     ) {
996         self.emit(Inst::AluRRImmShift {
997             alu_op: op,
998             size: size.into(),
999             rd: rd.map(Into::into),
1000             rn: rn.into(),
1001             immshift: imm,
1002         });
1003     }
1004 
alu_rrr(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)1005     fn alu_rrr(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
1006         self.emit(Inst::AluRRR {
1007             alu_op: op,
1008             size: size.into(),
1009             rd: rd.map(Into::into),
1010             rn: rn.into(),
1011             rm: rm.into(),
1012         });
1013     }
1014 
alu_rrr_extend( &mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize, extendop: ExtendOp, )1015     fn alu_rrr_extend(
1016         &mut self,
1017         op: ALUOp,
1018         rm: Reg,
1019         rn: Reg,
1020         rd: WritableReg,
1021         size: OperandSize,
1022         extendop: ExtendOp,
1023     ) {
1024         self.emit(Inst::AluRRRExtend {
1025             alu_op: op,
1026             size: size.into(),
1027             rd: rd.map(Into::into),
1028             rn: rn.into(),
1029             rm: rm.into(),
1030             extendop,
1031         });
1032     }
1033 
alu_rrrr( &mut self, op: ALUOp3, rm: Reg, rn: Reg, rd: WritableReg, ra: Reg, size: OperandSize, )1034     fn alu_rrrr(
1035         &mut self,
1036         op: ALUOp3,
1037         rm: Reg,
1038         rn: Reg,
1039         rd: WritableReg,
1040         ra: Reg,
1041         size: OperandSize,
1042     ) {
1043         self.emit(Inst::AluRRRR {
1044             alu_op: op,
1045             size: size.into(),
1046             rd: rd.map(Into::into),
1047             rn: rn.into(),
1048             rm: rm.into(),
1049             ra: ra.into(),
1050         });
1051     }
1052 
fpu_rrr(&mut self, op: FPUOp2, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)1053     fn fpu_rrr(&mut self, op: FPUOp2, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
1054         self.emit(Inst::FpuRRR {
1055             fpu_op: op,
1056             size: size.into(),
1057             rd: rd.map(Into::into),
1058             rn: rn.into(),
1059             rm: rm.into(),
1060         });
1061     }
1062 
fpu_rri(&mut self, op: FPUOpRI, rn: Reg, rd: WritableReg)1063     fn fpu_rri(&mut self, op: FPUOpRI, rn: Reg, rd: WritableReg) {
1064         self.emit(Inst::FpuRRI {
1065             fpu_op: op,
1066             rd: rd.map(Into::into),
1067             rn: rn.into(),
1068         });
1069     }
1070 
fpu_rri_mod(&mut self, op: FPUOpRIMod, ri: Reg, rn: Reg, rd: WritableReg)1071     fn fpu_rri_mod(&mut self, op: FPUOpRIMod, ri: Reg, rn: Reg, rd: WritableReg) {
1072         self.emit(Inst::FpuRRIMod {
1073             fpu_op: op,
1074             rd: rd.map(Into::into),
1075             ri: ri.into(),
1076             rn: rn.into(),
1077         });
1078     }
1079 
fpu_rr(&mut self, op: FPUOp1, rn: Reg, rd: WritableReg, size: OperandSize)1080     fn fpu_rr(&mut self, op: FPUOp1, rn: Reg, rd: WritableReg, size: OperandSize) {
1081         self.emit(Inst::FpuRR {
1082             fpu_op: op,
1083             size: size.into(),
1084             rd: rd.map(Into::into),
1085             rn: rn.into(),
1086         });
1087     }
1088 
fpu_round(&mut self, op: FpuRoundMode, rn: Reg, rd: WritableReg)1089     fn fpu_round(&mut self, op: FpuRoundMode, rn: Reg, rd: WritableReg) {
1090         self.emit(Inst::FpuRound {
1091             op,
1092             rd: rd.map(Into::into),
1093             rn: rn.into(),
1094         });
1095     }
1096 
bit_rr(&mut self, op: BitOp, rn: Reg, rd: WritableReg, size: OperandSize)1097     fn bit_rr(&mut self, op: BitOp, rn: Reg, rd: WritableReg, size: OperandSize) {
1098         self.emit(Inst::BitRR {
1099             op,
1100             size: size.into(),
1101             rd: rd.map(Into::into),
1102             rn: rn.into(),
1103         });
1104     }
1105 
1106     /// Get a label from the underlying machine code buffer.
get_label(&mut self) -> MachLabel1107     pub fn get_label(&mut self) -> MachLabel {
1108         self.buffer.get_label()
1109     }
1110 
1111     /// Get a mutable reference to underlying
1112     /// machine buffer.
buffer_mut(&mut self) -> &mut MachBuffer<Inst>1113     pub fn buffer_mut(&mut self) -> &mut MachBuffer<Inst> {
1114         &mut self.buffer
1115     }
1116 
1117     /// Get a reference to the underlying machine buffer.
buffer(&self) -> &MachBuffer<Inst>1118     pub fn buffer(&self) -> &MachBuffer<Inst> {
1119         &self.buffer
1120     }
1121 
1122     /// Emit a direct call to a function defined locally and
1123     /// referenced to by `name`.
call_with_name(&mut self, name: UserExternalNameRef, call_conv: CallingConvention)1124     pub fn call_with_name(&mut self, name: UserExternalNameRef, call_conv: CallingConvention) {
1125         self.emit(Inst::Call {
1126             info: Box::new(cranelift_codegen::CallInfo::empty(
1127                 ExternalName::user(name),
1128                 call_conv.into(),
1129             )),
1130         })
1131     }
1132 
1133     /// Emit an indirect call to a function whose address is
1134     /// stored the `callee` register.
call_with_reg(&mut self, callee: Reg, call_conv: CallingConvention)1135     pub fn call_with_reg(&mut self, callee: Reg, call_conv: CallingConvention) {
1136         self.emit(Inst::CallInd {
1137             info: Box::new(cranelift_codegen::CallInfo::empty(
1138                 callee.into(),
1139                 call_conv.into(),
1140             )),
1141         })
1142     }
1143 
1144     /// Load the min value for an integer of size out_size, as a floating-point
1145     /// of size `in-size`, into register `rd`.
min_fp_value( &mut self, signed: bool, in_size: OperandSize, out_size: OperandSize, rd: Writable<Reg>, )1146     fn min_fp_value(
1147         &mut self,
1148         signed: bool,
1149         in_size: OperandSize,
1150         out_size: OperandSize,
1151         rd: Writable<Reg>,
1152     ) {
1153         match in_size {
1154             OperandSize::S32 => {
1155                 let (min, _) = f32_cvt_to_int_bounds(signed, out_size.num_bits().into());
1156                 self.mov_ir(rd, Imm::f32(min.to_bits()), in_size);
1157             }
1158             OperandSize::S64 => {
1159                 let (min, _) = f64_cvt_to_int_bounds(signed, out_size.num_bits().into());
1160                 self.mov_ir(rd, Imm::f64(min.to_bits()), in_size);
1161             }
1162             s => unreachable!("unsupported floating-point size: {}bit", s.num_bits()),
1163         };
1164     }
1165 
1166     /// Load the max value for an integer of size out_size, as a floating-point
1167     /// of size `in_size`, into register `rd`.
max_fp_value( &mut self, signed: bool, in_size: OperandSize, out_size: OperandSize, rd: Writable<Reg>, )1168     fn max_fp_value(
1169         &mut self,
1170         signed: bool,
1171         in_size: OperandSize,
1172         out_size: OperandSize,
1173         rd: Writable<Reg>,
1174     ) {
1175         match in_size {
1176             OperandSize::S32 => {
1177                 let (_, max) = f32_cvt_to_int_bounds(signed, out_size.num_bits().into());
1178                 self.mov_ir(rd, Imm::f32(max.to_bits()), in_size);
1179             }
1180             OperandSize::S64 => {
1181                 let (_, max) = f64_cvt_to_int_bounds(signed, out_size.num_bits().into());
1182                 self.mov_ir(rd, Imm::f64(max.to_bits()), in_size);
1183             }
1184             s => unreachable!("unsupported floating-point size: {}bit", s.num_bits()),
1185         };
1186     }
1187 
1188     /// Emit instructions to check if the value in `rn` is NaN.
check_nan(&mut self, rn: Reg, size: OperandSize)1189     fn check_nan(&mut self, rn: Reg, size: OperandSize) {
1190         self.fcmp(rn, rn, size);
1191         self.trapif(Cond::Vs, TrapCode::BAD_CONVERSION_TO_INTEGER);
1192     }
1193 
1194     /// Convert the floating point of size `src_size` stored in `src`, into a integer of size
1195     /// `dst_size`, storing the result in `dst`.
fpu_to_int( &mut self, dst: Writable<Reg>, src: Reg, tmp_reg: WritableReg, src_size: OperandSize, dst_size: OperandSize, kind: TruncKind, signed: bool, )1196     pub fn fpu_to_int(
1197         &mut self,
1198         dst: Writable<Reg>,
1199         src: Reg,
1200         tmp_reg: WritableReg,
1201         src_size: OperandSize,
1202         dst_size: OperandSize,
1203         kind: TruncKind,
1204         signed: bool,
1205     ) {
1206         if kind.is_unchecked() {
1207             // Confusingly, when `kind` is `Unchecked` is when we actually need to perform the checks:
1208             // - check if fp is NaN
1209             // - check bounds
1210             self.check_nan(src, src_size);
1211 
1212             self.min_fp_value(signed, src_size, dst_size, tmp_reg);
1213             self.fcmp(src, tmp_reg.to_reg(), src_size);
1214             self.trapif(Cond::Le, TrapCode::INTEGER_OVERFLOW);
1215 
1216             self.max_fp_value(signed, src_size, dst_size, tmp_reg);
1217             self.fcmp(src, tmp_reg.to_reg(), src_size);
1218             self.trapif(Cond::Ge, TrapCode::INTEGER_OVERFLOW);
1219         }
1220 
1221         self.cvt_fpu_to_int(dst, src, src_size, dst_size, signed)
1222     }
1223 
1224     /// Select and emit the appropriate `fcvt*` instruction
cvt_fpu_to_int( &mut self, dst: Writable<Reg>, src: Reg, src_size: OperandSize, dst_size: OperandSize, signed: bool, )1225     pub fn cvt_fpu_to_int(
1226         &mut self,
1227         dst: Writable<Reg>,
1228         src: Reg,
1229         src_size: OperandSize,
1230         dst_size: OperandSize,
1231         signed: bool,
1232     ) {
1233         let op = match (src_size, dst_size, signed) {
1234             (OperandSize::S32, OperandSize::S32, false) => FpuToIntOp::F32ToU32,
1235             (OperandSize::S32, OperandSize::S32, true) => FpuToIntOp::F32ToI32,
1236             (OperandSize::S32, OperandSize::S64, false) => FpuToIntOp::F32ToU64,
1237             (OperandSize::S32, OperandSize::S64, true) => FpuToIntOp::F32ToI64,
1238             (OperandSize::S64, OperandSize::S32, false) => FpuToIntOp::F64ToU32,
1239             (OperandSize::S64, OperandSize::S32, true) => FpuToIntOp::F64ToI32,
1240             (OperandSize::S64, OperandSize::S64, false) => FpuToIntOp::F64ToU64,
1241             (OperandSize::S64, OperandSize::S64, true) => FpuToIntOp::F64ToI64,
1242             (fsize, int_size, signed) => unimplemented!(
1243                 "unsupported conversion: f{} to {}{}",
1244                 fsize.num_bits(),
1245                 if signed { "i" } else { "u" },
1246                 int_size.num_bits(),
1247             ),
1248         };
1249 
1250         self.emit(Inst::FpuToInt {
1251             op,
1252             rd: dst.map(Into::into),
1253             rn: src.into(),
1254         });
1255     }
1256 }
1257 
1258 /// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted,
1259 /// but the immediate is not yet known.
1260 pub(crate) struct PatchableAddToReg {
1261     /// The region to be patched in the [`MachBuffer`]. It contains
1262     /// space for 3 32-bit instructions, i.e. it's 12 bytes long.
1263     region: PatchRegion,
1264 
1265     // The destination register for the add instruction.
1266     reg: Writable<Reg>,
1267 
1268     // The temporary register used to hold the immediate value.
1269     tmp: Writable<Reg>,
1270 }
1271 
1272 impl PatchableAddToReg {
1273     /// Create a new [`PatchableAddToReg`] by capturing a region in the output
1274     /// buffer containing an instruction sequence that loads an immediate into a
1275     /// register `tmp`, then adds it to a register `reg`. The [`MachBuffer`]
1276     /// will have that instruction sequence written to the region, though the
1277     /// immediate loaded into `tmp` will be `0` until the `::finalize` method is
1278     /// called.
new(reg: Writable<Reg>, tmp: Writable<Reg>, buf: &mut MachBuffer<Inst>) -> Self1279     pub(crate) fn new(reg: Writable<Reg>, tmp: Writable<Reg>, buf: &mut MachBuffer<Inst>) -> Self {
1280         let insns = Self::add_immediate_instruction_sequence(reg, tmp, 0);
1281         let open = buf.start_patchable();
1282         buf.put_data(&insns);
1283         let region = buf.end_patchable(open);
1284 
1285         Self { region, reg, tmp }
1286     }
1287 
add_immediate_instruction_sequence( reg: Writable<Reg>, tmp: Writable<Reg>, imm: i32, ) -> [u8; 12]1288     fn add_immediate_instruction_sequence(
1289         reg: Writable<Reg>,
1290         tmp: Writable<Reg>,
1291         imm: i32,
1292     ) -> [u8; 12] {
1293         let imm_hi = imm as u64 & 0xffff_0000;
1294         let imm_hi = MoveWideConst::maybe_from_u64(imm_hi).unwrap();
1295 
1296         let imm_lo = imm as u64 & 0x0000_ffff;
1297         let imm_lo = MoveWideConst::maybe_from_u64(imm_lo).unwrap();
1298 
1299         let size = OperandSize::S64.into();
1300 
1301         let tmp = tmp.map(Into::into);
1302         let rd = reg.map(Into::into);
1303 
1304         // This is "movz to bits 16-31 of 64 bit reg tmp and zero the rest"
1305         let mov_insn = enc_move_wide(inst::MoveWideOp::MovZ, tmp, imm_hi, size);
1306 
1307         // This is "movk to bits 0-15 of 64 bit reg tmp"
1308         let movk_insn = enc_movk(tmp, imm_lo, size);
1309 
1310         // This is "add tmp to rd". The opcodes are somewhat buried in the
1311         // instruction encoder so we just repeat them here.
1312         let add_bits_31_21: u32 = 0b00001011_000 | (size.sf_bit() << 10);
1313         let add_bits_15_10: u32 = 0;
1314         let add_insn = enc_arith_rrr(
1315             add_bits_31_21,
1316             add_bits_15_10,
1317             rd,
1318             rd.to_reg(),
1319             tmp.to_reg(),
1320         );
1321 
1322         let mut buf = [0u8; 12];
1323         buf[0..4].copy_from_slice(&mov_insn.to_le_bytes());
1324         buf[4..8].copy_from_slice(&movk_insn.to_le_bytes());
1325         buf[8..12].copy_from_slice(&add_insn.to_le_bytes());
1326         buf
1327     }
1328 
1329     /// Patch the [`MachBuffer`] with the known constant to be added to the register. The final
1330     /// value is passed in as an i32, but the instruction encoding is fixed when
1331     /// [`PatchableAddToReg::new`] is called.
finalize(self, val: i32, buffer: &mut MachBuffer<Inst>)1332     pub(crate) fn finalize(self, val: i32, buffer: &mut MachBuffer<Inst>) {
1333         let insns = Self::add_immediate_instruction_sequence(self.reg, self.tmp, val);
1334         let slice = self.region.patch(buffer);
1335         assert_eq!(slice.len(), insns.len());
1336         slice.copy_from_slice(&insns);
1337     }
1338 }
1339