//! Assembler library implementation for Aarch64. use super::{address::Address, regs}; use crate::CallingConvention; use crate::aarch64::regs::zero; use crate::masm::{ DivKind, Extend, ExtendKind, FloatCmpKind, Imm, IntCmpKind, RemKind, RoundingMode, ShiftKind, Signed, TRUSTED_FLAGS, TruncKind, }; use crate::{ constant_pool::ConstantPool, masm::OperandSize, reg::{Reg, WritableReg, writable}, }; use cranelift_codegen::PatchRegion; use cranelift_codegen::isa::aarch64::inst::emit::{enc_arith_rrr, enc_move_wide, enc_movk}; use cranelift_codegen::isa::aarch64::inst::{ ASIMDFPModImm, FpuToIntOp, MoveWideConst, NZCV, UImm5, }; use cranelift_codegen::{ Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState, MachLabel, Writable, ir::{ExternalName, MemFlags, SourceLoc, TrapCode, UserExternalNameRef}, isa::aarch64::inst::{ self, ALUOp, ALUOp3, AMode, BitOp, BranchTarget, Cond, CondBrKind, ExtendOp, FPULeftShiftImm, FPUOp1, FPUOp2, FPUOpRI::{self, UShr32, UShr64}, FPUOpRIMod, FPURightShiftImm, FpuRoundMode, Imm12, ImmLogic, ImmShift, Inst, IntToFpuOp, PairAMode, ScalarSize, VecLanesOp, VecMisc2, VectorSize, emit::{EmitInfo, EmitState}, }, settings, }; use regalloc2::RegClass; use wasmtime_core::math::{f32_cvt_to_int_bounds, f64_cvt_to_int_bounds}; impl From for inst::OperandSize { fn from(size: OperandSize) -> Self { match size { OperandSize::S32 => Self::Size32, OperandSize::S64 => Self::Size64, s => panic!("Invalid operand size {s:?}"), } } } impl From for Cond { fn from(value: IntCmpKind) -> Self { match value { IntCmpKind::Eq => Cond::Eq, IntCmpKind::Ne => Cond::Ne, IntCmpKind::LtS => Cond::Lt, IntCmpKind::LtU => Cond::Lo, IntCmpKind::GtS => Cond::Gt, IntCmpKind::GtU => Cond::Hi, IntCmpKind::LeS => Cond::Le, IntCmpKind::LeU => Cond::Ls, IntCmpKind::GeS => Cond::Ge, IntCmpKind::GeU => Cond::Hs, } } } impl From for Cond { fn from(value: FloatCmpKind) -> Self { match value { FloatCmpKind::Eq => Cond::Eq, FloatCmpKind::Ne => Cond::Ne, FloatCmpKind::Lt => Cond::Mi, FloatCmpKind::Gt => Cond::Gt, FloatCmpKind::Le => Cond::Ls, FloatCmpKind::Ge => Cond::Ge, } } } impl From for ScalarSize { fn from(size: OperandSize) -> ScalarSize { match size { OperandSize::S8 => ScalarSize::Size8, OperandSize::S16 => ScalarSize::Size16, OperandSize::S32 => ScalarSize::Size32, OperandSize::S64 => ScalarSize::Size64, OperandSize::S128 => ScalarSize::Size128, } } } impl From for ALUOp { fn from(kind: ShiftKind) -> Self { match kind { ShiftKind::Shl => ALUOp::Lsl, ShiftKind::ShrS => ALUOp::Asr, ShiftKind::ShrU => ALUOp::Lsr, ShiftKind::Rotr => ALUOp::Extr, ShiftKind::Rotl => ALUOp::Extr, } } } /// Low level assembler implementation for Aarch64. pub(crate) struct Assembler { /// The machine instruction buffer. buffer: MachBuffer, /// Constant emission information. emit_info: EmitInfo, /// Emission state. emit_state: EmitState, /// Constant pool. pool: ConstantPool, } impl Assembler { /// Create a new Aarch64 assembler. pub fn new(shared_flags: settings::Flags) -> Self { Self { buffer: MachBuffer::::new(), emit_state: Default::default(), emit_info: EmitInfo::new(shared_flags), pool: ConstantPool::new(), } } } impl Assembler { /// Return the emitted code. pub fn finalize(mut self, loc: Option) -> MachBufferFinalized { let stencil = self .buffer .finish(&self.pool.constants(), self.emit_state.ctrl_plane_mut()); stencil.apply_base_srcloc(loc.unwrap_or_default()) } fn emit(&mut self, inst: Inst) { self.emit_with_island(inst, Inst::worst_case_size()); } fn emit_with_island(&mut self, inst: Inst, needed_space: u32) { if self.buffer.island_needed(needed_space) { let label = self.buffer.get_label(); let jmp = Inst::Jump { dest: BranchTarget::Label(label), }; jmp.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state); self.buffer .emit_island(needed_space, self.emit_state.ctrl_plane_mut()); self.buffer .bind_label(label, self.emit_state.ctrl_plane_mut()); } inst.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state); } /// Adds a constant to the constant pool, returning its address. pub fn add_constant(&mut self, constant: &[u8]) -> Address { let handle = self.pool.register(constant, &mut self.buffer); Address::constant(handle) } /// Store a pair of registers. pub fn stp(&mut self, xt1: Reg, xt2: Reg, addr: Address) { let mem: PairAMode = addr.try_into().unwrap(); self.emit(Inst::StoreP64 { rt: xt1.into(), rt2: xt2.into(), mem, flags: MemFlags::trusted(), }); } /// Store a register. pub fn str(&mut self, reg: Reg, addr: Address, size: OperandSize, flags: MemFlags) { let mem: AMode = addr.try_into().unwrap(); use OperandSize::*; let inst = match (reg.is_int(), size) { (_, S8) => Inst::Store8 { rd: reg.into(), mem, flags, }, (_, S16) => Inst::Store16 { rd: reg.into(), mem, flags, }, (true, S32) => Inst::Store32 { rd: reg.into(), mem, flags, }, (false, S32) => Inst::FpuStore32 { rd: reg.into(), mem, flags, }, (true, S64) => Inst::Store64 { rd: reg.into(), mem, flags, }, (false, S64) => Inst::FpuStore64 { rd: reg.into(), mem, flags, }, (_, S128) => Inst::FpuStore128 { rd: reg.into(), mem, flags, }, }; self.emit(inst); } /// Load a signed register. pub fn sload(&mut self, addr: Address, rd: WritableReg, size: OperandSize, flags: MemFlags) { self.ldr(addr, rd, size, true, flags); } /// Load an unsigned register. pub fn uload(&mut self, addr: Address, rd: WritableReg, size: OperandSize, flags: MemFlags) { self.ldr(addr, rd, size, false, flags); } /// Load address into a register. fn ldr( &mut self, addr: Address, rd: WritableReg, size: OperandSize, signed: bool, flags: MemFlags, ) { use OperandSize::*; let writable_reg = rd.map(Into::into); let mem: AMode = addr.try_into().unwrap(); let inst = match (rd.to_reg().is_int(), signed, size) { (_, false, S8) => Inst::ULoad8 { rd: writable_reg, mem, flags, }, (_, true, S8) => Inst::SLoad8 { rd: writable_reg, mem, flags, }, (_, false, S16) => Inst::ULoad16 { rd: writable_reg, mem, flags, }, (_, true, S16) => Inst::SLoad16 { rd: writable_reg, mem, flags, }, (true, false, S32) => Inst::ULoad32 { rd: writable_reg, mem, flags, }, (false, _, S32) => Inst::FpuLoad32 { rd: writable_reg, mem, flags, }, (true, true, S32) => Inst::SLoad32 { rd: writable_reg, mem, flags, }, (true, _, S64) => Inst::ULoad64 { rd: writable_reg, mem, flags, }, (false, _, S64) => Inst::FpuLoad64 { rd: writable_reg, mem, flags, }, (_, _, S128) => Inst::FpuLoad128 { rd: writable_reg, mem, flags, }, }; self.emit(inst); } /// Load a pair of registers. pub fn ldp(&mut self, xt1: Reg, xt2: Reg, addr: Address) { let writable_xt1 = Writable::from_reg(xt1.into()); let writable_xt2 = Writable::from_reg(xt2.into()); let mem = addr.try_into().unwrap(); self.emit(Inst::LoadP64 { rt: writable_xt1, rt2: writable_xt2, mem, flags: MemFlags::trusted(), }); } /// Emit a series of instructions to move an arbitrary 64-bit immediate /// into the destination register. /// The emitted instructions will depend on the destination register class. pub fn mov_ir(&mut self, rd: WritableReg, imm: Imm, size: OperandSize) { match rd.to_reg().class() { RegClass::Int => { Inst::load_constant(rd.map(Into::into), imm.unwrap_as_u64()) .into_iter() .for_each(|i| self.emit(i)); } RegClass::Float => { match ASIMDFPModImm::maybe_from_u64(imm.unwrap_as_u64(), size.into()) { Some(imm) => { self.emit(Inst::FpuMoveFPImm { rd: rd.map(Into::into), imm, size: size.into(), }); } _ => { let addr = self.add_constant(&imm.to_bytes()); self.uload(addr, rd, size, TRUSTED_FLAGS); } } } _ => unreachable!(), } } /// Register to register move. pub fn mov_rr(&mut self, rm: Reg, rd: WritableReg, size: OperandSize) { let writable_rd = rd.map(Into::into); self.emit(Inst::Mov { size: size.into(), rd: writable_rd, rm: rm.into(), }); } /// Floating point register to register move. pub fn fmov_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { let writable = rd.map(Into::into); let inst = match size { OperandSize::S32 => Inst::FpuMove32 { rd: writable, rn: rn.into(), }, OperandSize::S64 => Inst::FpuMove64 { rd: writable, rn: rn.into(), }, _ => unreachable!(), }; self.emit(inst); } pub fn mov_to_fpu(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { let writable_rd = rd.map(Into::into); self.emit(Inst::MovToFpu { size: size.into(), rd: writable_rd, rn: rn.into(), }); } pub fn mov_from_vec(&mut self, rn: Reg, rd: WritableReg, idx: u8, size: OperandSize) { self.emit(Inst::MovFromVec { rd: rd.map(Into::into), rn: rn.into(), idx, size: size.into(), }); } /// Add immediate and register. pub fn add_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) { self.alu_rri(ALUOp::Add, imm, rn, rd, size); } /// Add immediate and register, setting overflow flags. pub fn adds_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) { self.alu_rri(ALUOp::AddS, imm, rn, rd, size); } /// Add with three registers. pub fn add_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.alu_rrr_extend(ALUOp::Add, rm, rn, rd, size); } /// Add with three registers, setting overflow flags. pub fn adds_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.alu_rrr_extend(ALUOp::AddS, rm, rn, rd, size); } /// Add across Vector. pub fn addv(&mut self, rn: Reg, rd: WritableReg, size: VectorSize) { self.emit(Inst::VecLanes { op: VecLanesOp::Addv, rd: rd.map(Into::into), rn: rn.into(), size, }); } /// Subtract immediate and register. pub fn sub_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) { self.alu_rri(ALUOp::Sub, imm, rn, rd, size); } /// Subtract immediate and register, setting flags. pub fn subs_ir(&mut self, imm: Imm12, rn: Reg, size: OperandSize) { self.alu_rri(ALUOp::SubS, imm, rn, writable!(regs::zero()), size); } /// Subtract with three registers. pub fn sub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.alu_rrr_extend(ALUOp::Sub, rm, rn, rd, size); } /// Subtract with three registers, setting flags. pub fn subs_rrr(&mut self, rm: Reg, rn: Reg, size: OperandSize) { self.alu_rrr_extend(ALUOp::SubS, rm, rn, writable!(regs::zero()), size); } /// Multiply with three registers. pub fn mul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.alu_rrrr(ALUOp3::MAdd, rm, rn, rd, regs::zero(), size); } /// Signed/unsigned division with three registers. pub fn div_rrr( &mut self, divisor: Reg, dividend: Reg, dest: Writable, kind: DivKind, size: OperandSize, ) { // Check for division by 0. self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO, size); // check for overflow if kind == DivKind::Signed { // Check for divisor overflow. self.alu_rri( ALUOp::AddS, Imm12::maybe_from_u64(1).expect("1 to fit in 12 bits"), divisor, writable!(zero()), size, ); // Check if the dividend is 1. self.emit(Inst::CCmpImm { size: size.into(), rn: dividend.into(), imm: UImm5::maybe_from_u8(1).expect("1 fits in 5 bits"), nzcv: NZCV::new(false, false, false, false), cond: Cond::Eq, }); // Finally, trap if the previous operation overflowed. self.trapif(Cond::Vs, TrapCode::INTEGER_OVERFLOW); } // `cranelift-codegen` doesn't support emitting sdiv for anything but I64, // we therefore sign-extend the operand. // see: https://github.com/bytecodealliance/wasmtime/issues/9766 let size = if size == OperandSize::S32 && kind == DivKind::Signed { self.extend( divisor, writable!(divisor), ExtendKind::Signed(Extend::::I64Extend32), ); self.extend( dividend, writable!(dividend), ExtendKind::Signed(Extend::::I64Extend32), ); OperandSize::S64 } else { size }; let op = match kind { DivKind::Signed => ALUOp::SDiv, DivKind::Unsigned => ALUOp::UDiv, }; self.alu_rrr(op, divisor, dividend, dest.map(Into::into), size); } /// Signed/unsigned remainder operation with three registers. pub fn rem_rrr( &mut self, divisor: Reg, dividend: Reg, dest: Writable, scratch: WritableReg, kind: RemKind, size: OperandSize, ) { // Check for division by 0 self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO, size); // `cranelift-codegen` doesn't support emitting sdiv for anything but I64, // we therefore sign-extend the operand. // see: https://github.com/bytecodealliance/wasmtime/issues/9766 let size = if size == OperandSize::S32 && kind.is_signed() { self.extend( divisor, writable!(divisor), ExtendKind::Signed(Extend::::I64Extend32), ); self.extend( dividend, writable!(dividend), ExtendKind::Signed(Extend::::I64Extend32), ); OperandSize::S64 } else { size }; let op = match kind { RemKind::Signed => ALUOp::SDiv, RemKind::Unsigned => ALUOp::UDiv, }; self.alu_rrr(op, divisor, dividend, scratch, size); self.alu_rrrr( ALUOp3::MSub, scratch.to_reg(), divisor, dest.map(Into::into), dividend, size, ); } /// And with three registers. pub fn and_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.alu_rrr(ALUOp::And, rm, rn, rd, size); } /// And immediate and register. pub fn and_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) { self.alu_rri_logic(ALUOp::And, imm, rn, rd, size); } /// Or with three registers. pub fn or_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.alu_rrr(ALUOp::Orr, rm, rn, rd, size); } /// Or immediate and register. pub fn or_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) { self.alu_rri_logic(ALUOp::Orr, imm, rn, rd, size); } /// Xor with three registers. pub fn xor_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.alu_rrr(ALUOp::Eor, rm, rn, rd, size); } /// Xor immediate and register. pub fn xor_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) { self.alu_rri_logic(ALUOp::Eor, imm, rn, rd, size); } /// Shift with three registers. pub fn shift_rrr( &mut self, rm: Reg, rn: Reg, rd: WritableReg, kind: ShiftKind, size: OperandSize, ) { let shift_op: ALUOp = kind.into(); // In the case of rotate left, we negate the register containing the // shift value. if kind == ShiftKind::Rotl { self.alu_rrr(ALUOp::Sub, rm, regs::zero(), writable!(rm), size); self.alu_rrr(shift_op, rm, rn, rd, size); } else { self.alu_rrr(shift_op, rm, rn, rd, size); } } /// Shift immediate and register. pub fn shift_ir( &mut self, imm: ImmShift, rn: Reg, rd: WritableReg, kind: ShiftKind, size: OperandSize, ) { let shift_op: ALUOp = kind.into(); // In the case of rotate left, we emit rotate right with type_size - // value. if kind == ShiftKind::Rotl { let value_size = size.num_bits(); let mut imm_val = value_size.wrapping_sub(imm.value()); imm_val &= value_size - 1; let negated_imm = ImmShift::maybe_from_u64(imm_val as u64).unwrap(); self.alu_rri_shift(shift_op, negated_imm, rn, rd, size); } else { self.alu_rri_shift(shift_op, imm, rn, rd, size); } } /// Count Leading Zeros. pub fn clz(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { self.bit_rr(BitOp::Clz, rn, rd, size); } /// Reverse Bits reverses the bit order in a register. pub fn rbit(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { self.bit_rr(BitOp::RBit, rn, rd, size); } /// Float add with three registers. pub fn fadd_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.fpu_rrr(FPUOp2::Add, rm, rn, rd, size); } /// Float sub with three registers. pub fn fsub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.fpu_rrr(FPUOp2::Sub, rm, rn, rd, size); } /// Float multiply with three registers. pub fn fmul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.fpu_rrr(FPUOp2::Mul, rm, rn, rd, size); } /// Float division with three registers. pub fn fdiv_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.fpu_rrr(FPUOp2::Div, rm, rn, rd, size); } /// Float max with three registers. pub fn fmax_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.fpu_rrr(FPUOp2::Max, rm, rn, rd, size); } /// Float min with three registers. pub fn fmin_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.fpu_rrr(FPUOp2::Min, rm, rn, rd, size); } /// Float neg with two registers. pub fn fneg_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { self.fpu_rr(FPUOp1::Neg, rn, rd, size); } /// Float abs with two registers. pub fn fabs_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { self.fpu_rr(FPUOp1::Abs, rn, rd, size); } /// Float sqrt with two registers. pub fn fsqrt_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { self.fpu_rr(FPUOp1::Sqrt, rn, rd, size); } /// Float round (ceil, trunc, floor) with two registers. pub fn fround_rr(&mut self, rn: Reg, rd: WritableReg, mode: RoundingMode, size: OperandSize) { let fpu_mode = match (mode, size) { (RoundingMode::Nearest, OperandSize::S32) => FpuRoundMode::Nearest32, (RoundingMode::Up, OperandSize::S32) => FpuRoundMode::Plus32, (RoundingMode::Down, OperandSize::S32) => FpuRoundMode::Minus32, (RoundingMode::Zero, OperandSize::S32) => FpuRoundMode::Zero32, (RoundingMode::Nearest, OperandSize::S64) => FpuRoundMode::Nearest64, (RoundingMode::Up, OperandSize::S64) => FpuRoundMode::Plus64, (RoundingMode::Down, OperandSize::S64) => FpuRoundMode::Minus64, (RoundingMode::Zero, OperandSize::S64) => FpuRoundMode::Zero64, (m, o) => panic!("Invalid rounding mode or operand size {m:?}, {o:?}"), }; self.fpu_round(fpu_mode, rn, rd) } /// Float unsigned shift right with two registers and an immediate. pub fn fushr_rri(&mut self, rn: Reg, rd: WritableReg, amount: u8, size: OperandSize) { let imm = FPURightShiftImm { amount, lane_size_in_bits: size.num_bits(), }; let ushr = match size { OperandSize::S32 => UShr32(imm), OperandSize::S64 => UShr64(imm), _ => unreachable!(), }; self.fpu_rri(ushr, rn, rd) } /// Float unsigned shift left and insert with three registers /// and an immediate. pub fn fsli_rri_mod( &mut self, ri: Reg, rn: Reg, rd: WritableReg, amount: u8, size: OperandSize, ) { let imm = FPULeftShiftImm { amount, lane_size_in_bits: size.num_bits(), }; let sli = match size { OperandSize::S32 => FPUOpRIMod::Sli32(imm), OperandSize::S64 => FPUOpRIMod::Sli64(imm), _ => unreachable!(), }; self.fpu_rri_mod(sli, ri, rn, rd) } /// Float compare. pub fn fcmp(&mut self, rn: Reg, rm: Reg, size: OperandSize) { self.emit(Inst::FpuCmp { size: size.into(), rn: rn.into(), rm: rm.into(), }) } /// Convert an signed integer to a float. pub fn cvt_sint_to_float( &mut self, rn: Reg, rd: WritableReg, src_size: OperandSize, dst_size: OperandSize, ) { let op = match (src_size, dst_size) { (OperandSize::S32, OperandSize::S32) => IntToFpuOp::I32ToF32, (OperandSize::S64, OperandSize::S32) => IntToFpuOp::I64ToF32, (OperandSize::S32, OperandSize::S64) => IntToFpuOp::I32ToF64, (OperandSize::S64, OperandSize::S64) => IntToFpuOp::I64ToF64, _ => unreachable!(), }; self.emit(Inst::IntToFpu { op, rd: rd.map(Into::into), rn: rn.into(), }); } /// Convert an unsigned integer to a float. pub fn cvt_uint_to_float( &mut self, rn: Reg, rd: WritableReg, src_size: OperandSize, dst_size: OperandSize, ) { let op = match (src_size, dst_size) { (OperandSize::S32, OperandSize::S32) => IntToFpuOp::U32ToF32, (OperandSize::S64, OperandSize::S32) => IntToFpuOp::U64ToF32, (OperandSize::S32, OperandSize::S64) => IntToFpuOp::U32ToF64, (OperandSize::S64, OperandSize::S64) => IntToFpuOp::U64ToF64, _ => unreachable!(), }; self.emit(Inst::IntToFpu { op, rd: rd.map(Into::into), rn: rn.into(), }); } /// Change precision of float. pub fn cvt_float_to_float( &mut self, rn: Reg, rd: WritableReg, src_size: OperandSize, dst_size: OperandSize, ) { let (fpu_op, size) = match (src_size, dst_size) { (OperandSize::S32, OperandSize::S64) => (FPUOp1::Cvt32To64, ScalarSize::Size32), (OperandSize::S64, OperandSize::S32) => (FPUOp1::Cvt64To32, ScalarSize::Size64), _ => unimplemented!(), }; self.emit(Inst::FpuRR { fpu_op, size, rd: rd.map(Into::into), rn: rn.into(), }); } /// Return instruction. pub fn ret(&mut self) { self.emit(Inst::Ret {}); } /// An unconditional branch. pub fn jmp(&mut self, target: MachLabel) { self.emit(Inst::Jump { dest: BranchTarget::Label(target), }); } /// A conditional branch. pub fn jmp_if(&mut self, kind: Cond, taken: MachLabel) { self.emit(Inst::CondBr { taken: BranchTarget::Label(taken), not_taken: BranchTarget::ResolvedOffset(4), kind: CondBrKind::Cond(kind), }); } /// Emits a jump table sequence. pub fn jmp_table( &mut self, targets: &[MachLabel], default: MachLabel, index: Reg, tmp1: Reg, tmp2: Reg, ) { self.emit_with_island( Inst::JTSequence { default, targets: Box::new(targets.to_vec()), ridx: index.into(), rtmp1: Writable::from_reg(tmp1.into()), rtmp2: Writable::from_reg(tmp2.into()), }, // number of bytes needed for the jumptable sequence: // 4 bytes per instruction, with 8 instructions base + the size of // the jumptable more. (4 * (8 + targets.len())).try_into().unwrap(), ); } /// Conditional Set sets the destination register to 1 if the condition /// is true, and otherwise sets it to 0. pub fn cset(&mut self, rd: WritableReg, cond: Cond) { self.emit(Inst::CSet { rd: rd.map(Into::into), cond, }); } /// If the condition is true, `csel` writes rn to rd. If the /// condition is false, it writes rm to rd pub fn csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond) { self.emit(Inst::CSel { rd: rd.map(Into::into), rn: rn.into(), rm: rm.into(), cond, }); } /// If the condition is true, `csel` writes rn to rd. If the /// condition is false, it writes rm to rd pub fn fpu_csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond, size: OperandSize) { match size { OperandSize::S32 => { self.emit(Inst::FpuCSel32 { rd: rd.map(Into::into), rn: rn.into(), rm: rm.into(), cond, }); } OperandSize::S64 => { self.emit(Inst::FpuCSel64 { rd: rd.map(Into::into), rn: rn.into(), rm: rm.into(), cond, }); } _ => todo!(), } } /// Population count per byte. pub fn cnt(&mut self, rd: WritableReg) { self.emit(Inst::VecMisc { op: VecMisc2::Cnt, rd: rd.map(Into::into), rn: rd.to_reg().into(), size: VectorSize::Size8x8, }); } pub fn extend(&mut self, rn: Reg, rd: WritableReg, kind: ExtendKind) { self.emit(Inst::Extend { rd: rd.map(Into::into), rn: rn.into(), signed: kind.signed(), from_bits: kind.from_bits(), to_bits: kind.to_bits(), }) } /// Bitwise AND (shifted register), setting flags. pub fn ands_rr(&mut self, rn: Reg, rm: Reg, size: OperandSize) { self.alu_rrr(ALUOp::AndS, rm, rn, writable!(regs::zero()), size); } /// Permanently Undefined. pub fn udf(&mut self, code: TrapCode) { self.emit(Inst::Udf { trap_code: code }); } /// Conditional trap. pub fn trapif(&mut self, cc: Cond, code: TrapCode) { self.emit(Inst::TrapIf { kind: CondBrKind::Cond(cc), trap_code: code, }); } /// Trap if `rn` is zero. pub fn trapz(&mut self, rn: Reg, code: TrapCode, size: OperandSize) { self.emit(Inst::TrapIf { kind: CondBrKind::Zero(rn.into(), size.into()), trap_code: code, }); } // Helpers for ALU operations. fn alu_rri(&mut self, op: ALUOp, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) { self.emit(Inst::AluRRImm12 { alu_op: op, size: size.into(), rd: rd.map(Into::into), rn: rn.into(), imm12: imm, }); } fn alu_rri_logic( &mut self, op: ALUOp, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize, ) { self.emit(Inst::AluRRImmLogic { alu_op: op, size: size.into(), rd: rd.map(Into::into), rn: rn.into(), imml: imm, }); } fn alu_rri_shift( &mut self, op: ALUOp, imm: ImmShift, rn: Reg, rd: WritableReg, size: OperandSize, ) { self.emit(Inst::AluRRImmShift { alu_op: op, size: size.into(), rd: rd.map(Into::into), rn: rn.into(), immshift: imm, }); } fn alu_rrr(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.emit(Inst::AluRRR { alu_op: op, size: size.into(), rd: rd.map(Into::into), rn: rn.into(), rm: rm.into(), }); } fn alu_rrr_extend(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.emit(Inst::AluRRRExtend { alu_op: op, size: size.into(), rd: rd.map(Into::into), rn: rn.into(), rm: rm.into(), extendop: ExtendOp::UXTX, }); } fn alu_rrrr( &mut self, op: ALUOp3, rm: Reg, rn: Reg, rd: WritableReg, ra: Reg, size: OperandSize, ) { self.emit(Inst::AluRRRR { alu_op: op, size: size.into(), rd: rd.map(Into::into), rn: rn.into(), rm: rm.into(), ra: ra.into(), }); } fn fpu_rrr(&mut self, op: FPUOp2, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { self.emit(Inst::FpuRRR { fpu_op: op, size: size.into(), rd: rd.map(Into::into), rn: rn.into(), rm: rm.into(), }); } fn fpu_rri(&mut self, op: FPUOpRI, rn: Reg, rd: WritableReg) { self.emit(Inst::FpuRRI { fpu_op: op, rd: rd.map(Into::into), rn: rn.into(), }); } fn fpu_rri_mod(&mut self, op: FPUOpRIMod, ri: Reg, rn: Reg, rd: WritableReg) { self.emit(Inst::FpuRRIMod { fpu_op: op, rd: rd.map(Into::into), ri: ri.into(), rn: rn.into(), }); } fn fpu_rr(&mut self, op: FPUOp1, rn: Reg, rd: WritableReg, size: OperandSize) { self.emit(Inst::FpuRR { fpu_op: op, size: size.into(), rd: rd.map(Into::into), rn: rn.into(), }); } fn fpu_round(&mut self, op: FpuRoundMode, rn: Reg, rd: WritableReg) { self.emit(Inst::FpuRound { op, rd: rd.map(Into::into), rn: rn.into(), }); } fn bit_rr(&mut self, op: BitOp, rn: Reg, rd: WritableReg, size: OperandSize) { self.emit(Inst::BitRR { op, size: size.into(), rd: rd.map(Into::into), rn: rn.into(), }); } /// Get a label from the underlying machine code buffer. pub fn get_label(&mut self) -> MachLabel { self.buffer.get_label() } /// Get a mutable reference to underlying /// machine buffer. pub fn buffer_mut(&mut self) -> &mut MachBuffer { &mut self.buffer } /// Get a reference to the underlying machine buffer. pub fn buffer(&self) -> &MachBuffer { &self.buffer } /// Emit a direct call to a function defined locally and /// referenced to by `name`. pub fn call_with_name(&mut self, name: UserExternalNameRef, call_conv: CallingConvention) { self.emit(Inst::Call { info: Box::new(cranelift_codegen::CallInfo::empty( ExternalName::user(name), call_conv.into(), )), }) } /// Emit an indirect call to a function whose address is /// stored the `callee` register. pub fn call_with_reg(&mut self, callee: Reg, call_conv: CallingConvention) { self.emit(Inst::CallInd { info: Box::new(cranelift_codegen::CallInfo::empty( callee.into(), call_conv.into(), )), }) } /// Load the min value for an integer of size out_size, as a floating-point /// of size `in-size`, into register `rd`. fn min_fp_value( &mut self, signed: bool, in_size: OperandSize, out_size: OperandSize, rd: Writable, ) { match in_size { OperandSize::S32 => { let (min, _) = f32_cvt_to_int_bounds(signed, out_size.num_bits().into()); self.mov_ir(rd, Imm::f32(min.to_bits()), in_size); } OperandSize::S64 => { let (min, _) = f64_cvt_to_int_bounds(signed, out_size.num_bits().into()); self.mov_ir(rd, Imm::f64(min.to_bits()), in_size); } s => unreachable!("unsupported floating-point size: {}bit", s.num_bits()), }; } /// Load the max value for an integer of size out_size, as a floating-point /// of size `in_size`, into register `rd`. fn max_fp_value( &mut self, signed: bool, in_size: OperandSize, out_size: OperandSize, rd: Writable, ) { match in_size { OperandSize::S32 => { let (_, max) = f32_cvt_to_int_bounds(signed, out_size.num_bits().into()); self.mov_ir(rd, Imm::f32(max.to_bits()), in_size); } OperandSize::S64 => { let (_, max) = f64_cvt_to_int_bounds(signed, out_size.num_bits().into()); self.mov_ir(rd, Imm::f64(max.to_bits()), in_size); } s => unreachable!("unsupported floating-point size: {}bit", s.num_bits()), }; } /// Emit instructions to check if the value in `rn` is NaN. fn check_nan(&mut self, rn: Reg, size: OperandSize) { self.fcmp(rn, rn, size); self.trapif(Cond::Vs, TrapCode::BAD_CONVERSION_TO_INTEGER); } /// Convert the floating point of size `src_size` stored in `src`, into a integer of size /// `dst_size`, storing the result in `dst`. pub fn fpu_to_int( &mut self, dst: Writable, src: Reg, tmp_reg: WritableReg, src_size: OperandSize, dst_size: OperandSize, kind: TruncKind, signed: bool, ) { if kind.is_unchecked() { // Confusingly, when `kind` is `Unchecked` is when we actually need to perform the checks: // - check if fp is NaN // - check bounds self.check_nan(src, src_size); self.min_fp_value(signed, src_size, dst_size, tmp_reg); self.fcmp(src, tmp_reg.to_reg(), src_size); self.trapif(Cond::Le, TrapCode::INTEGER_OVERFLOW); self.max_fp_value(signed, src_size, dst_size, tmp_reg); self.fcmp(src, tmp_reg.to_reg(), src_size); self.trapif(Cond::Ge, TrapCode::INTEGER_OVERFLOW); } self.cvt_fpu_to_int(dst, src, src_size, dst_size, signed) } /// Select and emit the appropriate `fcvt*` instruction pub fn cvt_fpu_to_int( &mut self, dst: Writable, src: Reg, src_size: OperandSize, dst_size: OperandSize, signed: bool, ) { let op = match (src_size, dst_size, signed) { (OperandSize::S32, OperandSize::S32, false) => FpuToIntOp::F32ToU32, (OperandSize::S32, OperandSize::S32, true) => FpuToIntOp::F32ToI32, (OperandSize::S32, OperandSize::S64, false) => FpuToIntOp::F32ToU64, (OperandSize::S32, OperandSize::S64, true) => FpuToIntOp::F32ToI64, (OperandSize::S64, OperandSize::S32, false) => FpuToIntOp::F64ToU32, (OperandSize::S64, OperandSize::S32, true) => FpuToIntOp::F64ToI32, (OperandSize::S64, OperandSize::S64, false) => FpuToIntOp::F64ToU64, (OperandSize::S64, OperandSize::S64, true) => FpuToIntOp::F64ToI64, (fsize, int_size, signed) => unimplemented!( "unsupported conversion: f{} to {}{}", fsize.num_bits(), if signed { "i" } else { "u" }, int_size.num_bits(), ), }; self.emit(Inst::FpuToInt { op, rd: dst.map(Into::into), rn: src.into(), }); } } /// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted, /// but the immediate is not yet known. pub(crate) struct PatchableAddToReg { /// The region to be patched in the [`MachBuffer`]. It contains /// space for 3 32-bit instructions, i.e. it's 12 bytes long. region: PatchRegion, // The destination register for the add instruction. reg: Writable, // The temporary register used to hold the immediate value. tmp: Writable, } impl PatchableAddToReg { /// Create a new [`PatchableAddToReg`] by capturing a region in the output /// buffer containing an instruction sequence that loads an immediate into a /// register `tmp`, then adds it to a register `reg`. The [`MachBuffer`] /// will have that instruction sequence written to the region, though the /// immediate loaded into `tmp` will be `0` until the `::finalize` method is /// called. pub(crate) fn new(reg: Writable, tmp: Writable, buf: &mut MachBuffer) -> Self { let insns = Self::add_immediate_instruction_sequence(reg, tmp, 0); let open = buf.start_patchable(); buf.put_data(&insns); let region = buf.end_patchable(open); Self { region, reg, tmp } } fn add_immediate_instruction_sequence( reg: Writable, tmp: Writable, imm: i32, ) -> [u8; 12] { let imm_hi = imm as u64 & 0xffff_0000; let imm_hi = MoveWideConst::maybe_from_u64(imm_hi).unwrap(); let imm_lo = imm as u64 & 0x0000_ffff; let imm_lo = MoveWideConst::maybe_from_u64(imm_lo).unwrap(); let size = OperandSize::S64.into(); let tmp = tmp.map(Into::into); let rd = reg.map(Into::into); // This is "movz to bits 16-31 of 64 bit reg tmp and zero the rest" let mov_insn = enc_move_wide(inst::MoveWideOp::MovZ, tmp, imm_hi, size); // This is "movk to bits 0-15 of 64 bit reg tmp" let movk_insn = enc_movk(tmp, imm_lo, size); // This is "add tmp to rd". The opcodes are somewhat buried in the // instruction encoder so we just repeat them here. let add_bits_31_21: u32 = 0b00001011_000 | (size.sf_bit() << 10); let add_bits_15_10: u32 = 0; let add_insn = enc_arith_rrr( add_bits_31_21, add_bits_15_10, rd, rd.to_reg(), tmp.to_reg(), ); let mut buf = [0u8; 12]; buf[0..4].copy_from_slice(&mov_insn.to_le_bytes()); buf[4..8].copy_from_slice(&movk_insn.to_le_bytes()); buf[8..12].copy_from_slice(&add_insn.to_le_bytes()); buf } /// Patch the [`MachBuffer`] with the known constant to be added to the register. The final /// value is passed in as an i32, but the instruction encoding is fixed when /// [`PatchableAddToReg::new`] is called. pub(crate) fn finalize(self, val: i32, buffer: &mut MachBuffer) { let insns = Self::add_immediate_instruction_sequence(self.reg, self.tmp, val); let slice = self.region.patch(buffer); assert_eq!(slice.len(), insns.len()); slice.copy_from_slice(&insns); } }