1 //! Assembler library implementation for Aarch64. 2 use super::regs; 3 use crate::CallingConvention; 4 use crate::aarch64::regs::zero; 5 use crate::masm::{ 6 DivKind, Extend, ExtendKind, FloatCmpKind, Imm, IntCmpKind, RemKind, RoundingMode, ShiftKind, 7 Signed, TRUSTED_FLAGS, TruncKind, 8 }; 9 use crate::{ 10 constant_pool::ConstantPool, 11 masm::OperandSize, 12 reg::{Reg, WritableReg, writable}, 13 }; 14 15 use cranelift_codegen::isa::aarch64; 16 use cranelift_codegen::isa::aarch64::inst::emit::{enc_arith_rrr, enc_move_wide, enc_movk}; 17 use cranelift_codegen::isa::aarch64::inst::{ 18 ASIMDFPModImm, FpuToIntOp, MoveWideConst, NZCV, UImm5, 19 }; 20 use cranelift_codegen::{ 21 Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState, MachLabel, 22 Writable, 23 ir::{ExternalName, MemFlags, SourceLoc, TrapCode, UserExternalNameRef}, 24 isa::aarch64::inst::{ 25 self, ALUOp, ALUOp3, AMode, BitOp, BranchTarget, Cond, CondBrKind, ExtendOp, 26 FPULeftShiftImm, FPUOp1, FPUOp2, 27 FPUOpRI::{self, UShr32, UShr64}, 28 FPUOpRIMod, FPURightShiftImm, FpuRoundMode, Imm12, ImmLogic, ImmShift, Inst, IntToFpuOp, 29 PairAMode, ScalarSize, VecLanesOp, VecMisc2, VectorSize, 30 emit::{EmitInfo, EmitState}, 31 }, 32 settings, 33 }; 34 use cranelift_codegen::{PatchRegion, VCodeConstant}; 35 use regalloc2::RegClass; 36 use wasmtime_core::math::{f32_cvt_to_int_bounds, f64_cvt_to_int_bounds}; 37 38 impl From<OperandSize> for inst::OperandSize { from(size: OperandSize) -> Self39 fn from(size: OperandSize) -> Self { 40 match size { 41 OperandSize::S32 => Self::Size32, 42 OperandSize::S64 => Self::Size64, 43 s => panic!("Invalid operand size {s:?}"), 44 } 45 } 46 } 47 48 impl From<IntCmpKind> for Cond { from(value: IntCmpKind) -> Self49 fn from(value: IntCmpKind) -> Self { 50 match value { 51 IntCmpKind::Eq => Cond::Eq, 52 IntCmpKind::Ne => Cond::Ne, 53 IntCmpKind::LtS => Cond::Lt, 54 IntCmpKind::LtU => Cond::Lo, 55 IntCmpKind::GtS => Cond::Gt, 56 IntCmpKind::GtU => Cond::Hi, 57 IntCmpKind::LeS => Cond::Le, 58 IntCmpKind::LeU => Cond::Ls, 59 IntCmpKind::GeS => Cond::Ge, 60 IntCmpKind::GeU => Cond::Hs, 61 } 62 } 63 } 64 65 impl From<FloatCmpKind> for Cond { from(value: FloatCmpKind) -> Self66 fn from(value: FloatCmpKind) -> Self { 67 match value { 68 FloatCmpKind::Eq => Cond::Eq, 69 FloatCmpKind::Ne => Cond::Ne, 70 FloatCmpKind::Lt => Cond::Mi, 71 FloatCmpKind::Gt => Cond::Gt, 72 FloatCmpKind::Le => Cond::Ls, 73 FloatCmpKind::Ge => Cond::Ge, 74 } 75 } 76 } 77 78 impl From<OperandSize> for ScalarSize { from(size: OperandSize) -> ScalarSize79 fn from(size: OperandSize) -> ScalarSize { 80 match size { 81 OperandSize::S8 => ScalarSize::Size8, 82 OperandSize::S16 => ScalarSize::Size16, 83 OperandSize::S32 => ScalarSize::Size32, 84 OperandSize::S64 => ScalarSize::Size64, 85 OperandSize::S128 => ScalarSize::Size128, 86 } 87 } 88 } 89 90 impl From<ShiftKind> for ALUOp { from(kind: ShiftKind) -> Self91 fn from(kind: ShiftKind) -> Self { 92 match kind { 93 ShiftKind::Shl => ALUOp::Lsl, 94 ShiftKind::ShrS => ALUOp::Asr, 95 ShiftKind::ShrU => ALUOp::Lsr, 96 ShiftKind::Rotr => ALUOp::Extr, 97 ShiftKind::Rotl => ALUOp::Extr, 98 } 99 } 100 } 101 102 /// Low level assembler implementation for Aarch64. 103 pub(crate) struct Assembler { 104 /// The machine instruction buffer. 105 buffer: MachBuffer<Inst>, 106 /// Constant emission information. 107 emit_info: EmitInfo, 108 /// Emission state. 109 emit_state: EmitState, 110 /// Constant pool. 111 pool: ConstantPool, 112 } 113 114 impl Assembler { 115 /// Create a new Aarch64 assembler. new(shared_flags: settings::Flags, isa_flags: aarch64::settings::Flags) -> Self116 pub fn new(shared_flags: settings::Flags, isa_flags: aarch64::settings::Flags) -> Self { 117 Self { 118 buffer: MachBuffer::<Inst>::new(), 119 emit_state: Default::default(), 120 emit_info: EmitInfo::new(shared_flags, isa_flags), 121 pool: ConstantPool::new(), 122 } 123 } 124 } 125 126 impl Assembler { 127 /// Return the emitted code. finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final>128 pub fn finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final> { 129 let stencil = self 130 .buffer 131 .finish(&self.pool.constants(), self.emit_state.ctrl_plane_mut()); 132 stencil.apply_base_srcloc(loc.unwrap_or_default()) 133 } 134 emit(&mut self, inst: Inst)135 fn emit(&mut self, inst: Inst) { 136 self.emit_with_island(inst, Inst::worst_case_size()); 137 } 138 emit_with_island(&mut self, inst: Inst, needed_space: u32)139 fn emit_with_island(&mut self, inst: Inst, needed_space: u32) { 140 if self.buffer.island_needed(needed_space) { 141 let label = self.buffer.get_label(); 142 let jmp = Inst::Jump { 143 dest: BranchTarget::Label(label), 144 }; 145 jmp.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state); 146 self.buffer 147 .emit_island(needed_space, self.emit_state.ctrl_plane_mut()); 148 self.buffer 149 .bind_label(label, self.emit_state.ctrl_plane_mut()); 150 } 151 inst.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state); 152 } 153 154 /// Adds a constant to the constant pool, returning its address. add_constant(&mut self, constant: &[u8]) -> VCodeConstant155 pub fn add_constant(&mut self, constant: &[u8]) -> VCodeConstant { 156 let handle = self.pool.register(constant, &mut self.buffer); 157 handle 158 } 159 160 /// Store a pair of registers. stp(&mut self, xt1: Reg, xt2: Reg, mem: PairAMode)161 pub fn stp(&mut self, xt1: Reg, xt2: Reg, mem: PairAMode) { 162 self.emit(Inst::StoreP64 { 163 rt: xt1.into(), 164 rt2: xt2.into(), 165 mem, 166 flags: MemFlags::trusted(), 167 }); 168 } 169 170 /// Store a register. str(&mut self, reg: Reg, mem: AMode, size: OperandSize, flags: MemFlags)171 pub fn str(&mut self, reg: Reg, mem: AMode, size: OperandSize, flags: MemFlags) { 172 use OperandSize::*; 173 let inst = match (reg.is_int(), size) { 174 (_, S8) => Inst::Store8 { 175 rd: reg.into(), 176 mem, 177 flags, 178 }, 179 (_, S16) => Inst::Store16 { 180 rd: reg.into(), 181 mem, 182 flags, 183 }, 184 (true, S32) => Inst::Store32 { 185 rd: reg.into(), 186 mem, 187 flags, 188 }, 189 (false, S32) => Inst::FpuStore32 { 190 rd: reg.into(), 191 mem, 192 flags, 193 }, 194 (true, S64) => Inst::Store64 { 195 rd: reg.into(), 196 mem, 197 flags, 198 }, 199 (false, S64) => Inst::FpuStore64 { 200 rd: reg.into(), 201 mem, 202 flags, 203 }, 204 (_, S128) => Inst::FpuStore128 { 205 rd: reg.into(), 206 mem, 207 flags, 208 }, 209 }; 210 211 self.emit(inst); 212 } 213 214 /// Load a signed register. sload(&mut self, mem: AMode, rd: WritableReg, size: OperandSize, flags: MemFlags)215 pub fn sload(&mut self, mem: AMode, rd: WritableReg, size: OperandSize, flags: MemFlags) { 216 self.ldr(mem, rd, size, true, flags); 217 } 218 219 /// Load an unsigned register. uload(&mut self, mem: AMode, rd: WritableReg, size: OperandSize, flags: MemFlags)220 pub fn uload(&mut self, mem: AMode, rd: WritableReg, size: OperandSize, flags: MemFlags) { 221 self.ldr(mem, rd, size, false, flags); 222 } 223 224 /// Load address into a register. ldr( &mut self, mem: AMode, rd: WritableReg, size: OperandSize, signed: bool, flags: MemFlags, )225 fn ldr( 226 &mut self, 227 mem: AMode, 228 rd: WritableReg, 229 size: OperandSize, 230 signed: bool, 231 flags: MemFlags, 232 ) { 233 use OperandSize::*; 234 let writable_reg = rd.map(Into::into); 235 236 let inst = match (rd.to_reg().is_int(), signed, size) { 237 (_, false, S8) => Inst::ULoad8 { 238 rd: writable_reg, 239 mem, 240 flags, 241 }, 242 (_, true, S8) => Inst::SLoad8 { 243 rd: writable_reg, 244 mem, 245 flags, 246 }, 247 (_, false, S16) => Inst::ULoad16 { 248 rd: writable_reg, 249 mem, 250 flags, 251 }, 252 (_, true, S16) => Inst::SLoad16 { 253 rd: writable_reg, 254 mem, 255 flags, 256 }, 257 (true, false, S32) => Inst::ULoad32 { 258 rd: writable_reg, 259 mem, 260 flags, 261 }, 262 (false, _, S32) => Inst::FpuLoad32 { 263 rd: writable_reg, 264 mem, 265 flags, 266 }, 267 (true, true, S32) => Inst::SLoad32 { 268 rd: writable_reg, 269 mem, 270 flags, 271 }, 272 (true, _, S64) => Inst::ULoad64 { 273 rd: writable_reg, 274 mem, 275 flags, 276 }, 277 (false, _, S64) => Inst::FpuLoad64 { 278 rd: writable_reg, 279 mem, 280 flags, 281 }, 282 (_, _, S128) => Inst::FpuLoad128 { 283 rd: writable_reg, 284 mem, 285 flags, 286 }, 287 }; 288 289 self.emit(inst); 290 } 291 292 /// Load a pair of registers. ldp(&mut self, xt1: Reg, xt2: Reg, mem: PairAMode)293 pub fn ldp(&mut self, xt1: Reg, xt2: Reg, mem: PairAMode) { 294 let writable_xt1 = Writable::from_reg(xt1.into()); 295 let writable_xt2 = Writable::from_reg(xt2.into()); 296 297 self.emit(Inst::LoadP64 { 298 rt: writable_xt1, 299 rt2: writable_xt2, 300 mem, 301 flags: MemFlags::trusted(), 302 }); 303 } 304 305 /// Emit a series of instructions to move an arbitrary 64-bit immediate 306 /// into the destination register. 307 /// The emitted instructions will depend on the destination register class. mov_ir(&mut self, rd: WritableReg, imm: Imm, size: OperandSize)308 pub fn mov_ir(&mut self, rd: WritableReg, imm: Imm, size: OperandSize) { 309 match rd.to_reg().class() { 310 RegClass::Int => { 311 Inst::load_constant(rd.map(Into::into), imm.unwrap_as_u64()) 312 .into_iter() 313 .for_each(|i| self.emit(i)); 314 } 315 RegClass::Float => { 316 match ASIMDFPModImm::maybe_from_u64(imm.unwrap_as_u64(), size.into()) { 317 Some(imm) => { 318 self.emit(Inst::FpuMoveFPImm { 319 rd: rd.map(Into::into), 320 imm, 321 size: size.into(), 322 }); 323 } 324 _ => { 325 let constant = self.add_constant(&imm.to_bytes()); 326 let addr = AMode::Const { addr: constant }; 327 self.uload(addr, rd, size, TRUSTED_FLAGS); 328 } 329 } 330 } 331 _ => unreachable!(), 332 } 333 } 334 335 /// Register to register move. mov_rr(&mut self, rm: Reg, rd: WritableReg, size: OperandSize)336 pub fn mov_rr(&mut self, rm: Reg, rd: WritableReg, size: OperandSize) { 337 let writable_rd = rd.map(Into::into); 338 self.emit(Inst::Mov { 339 size: size.into(), 340 rd: writable_rd, 341 rm: rm.into(), 342 }); 343 } 344 345 /// Floating point register to register move. fmov_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize)346 pub fn fmov_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { 347 let writable = rd.map(Into::into); 348 let inst = match size { 349 OperandSize::S32 => Inst::FpuMove32 { 350 rd: writable, 351 rn: rn.into(), 352 }, 353 OperandSize::S64 => Inst::FpuMove64 { 354 rd: writable, 355 rn: rn.into(), 356 }, 357 _ => unreachable!(), 358 }; 359 360 self.emit(inst); 361 } 362 mov_to_fpu(&mut self, rn: Reg, rd: WritableReg, size: OperandSize)363 pub fn mov_to_fpu(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { 364 let writable_rd = rd.map(Into::into); 365 self.emit(Inst::MovToFpu { 366 size: size.into(), 367 rd: writable_rd, 368 rn: rn.into(), 369 }); 370 } 371 mov_from_vec(&mut self, rn: Reg, rd: WritableReg, idx: u8, size: OperandSize)372 pub fn mov_from_vec(&mut self, rn: Reg, rd: WritableReg, idx: u8, size: OperandSize) { 373 self.emit(Inst::MovFromVec { 374 rd: rd.map(Into::into), 375 rn: rn.into(), 376 idx, 377 size: size.into(), 378 }); 379 } 380 381 /// Add immediate and register. add_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize)382 pub fn add_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) { 383 self.alu_rri(ALUOp::Add, imm, rn, rd, size); 384 } 385 386 /// Add immediate and register, setting overflow flags. adds_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize)387 pub fn adds_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) { 388 self.alu_rri(ALUOp::AddS, imm, rn, rd, size); 389 } 390 391 /// Add with three registers. add_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)392 pub fn add_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { 393 self.alu_rrr_extend(ALUOp::Add, rm, rn, rd, size, ExtendOp::UXTX); 394 } 395 396 /// Add with three registers and explicit extend operation. add_rrr_with_extend( &mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize, extendop: ExtendOp, )397 pub fn add_rrr_with_extend( 398 &mut self, 399 rm: Reg, 400 rn: Reg, 401 rd: WritableReg, 402 size: OperandSize, 403 extendop: ExtendOp, 404 ) { 405 self.alu_rrr_extend(ALUOp::Add, rm, rn, rd, size, extendop); 406 } 407 408 /// Add with three registers, setting overflow flags. adds_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)409 pub fn adds_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { 410 self.alu_rrr_extend(ALUOp::AddS, rm, rn, rd, size, ExtendOp::UXTX); 411 } 412 413 /// Add across Vector. addv(&mut self, rn: Reg, rd: WritableReg, size: VectorSize)414 pub fn addv(&mut self, rn: Reg, rd: WritableReg, size: VectorSize) { 415 self.emit(Inst::VecLanes { 416 op: VecLanesOp::Addv, 417 rd: rd.map(Into::into), 418 rn: rn.into(), 419 size, 420 }); 421 } 422 423 /// Subtract immediate and register. sub_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize)424 pub fn sub_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) { 425 self.alu_rri(ALUOp::Sub, imm, rn, rd, size); 426 } 427 428 /// Subtract immediate and register, setting flags. subs_ir(&mut self, imm: Imm12, rn: Reg, size: OperandSize)429 pub fn subs_ir(&mut self, imm: Imm12, rn: Reg, size: OperandSize) { 430 self.alu_rri(ALUOp::SubS, imm, rn, writable!(regs::zero()), size); 431 } 432 433 /// Subtract with three registers. sub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)434 pub fn sub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { 435 self.alu_rrr_extend(ALUOp::Sub, rm, rn, rd, size, ExtendOp::UXTX); 436 } 437 438 /// Subtract with three registers, setting flags. subs_rrr(&mut self, rm: Reg, rn: Reg, size: OperandSize)439 pub fn subs_rrr(&mut self, rm: Reg, rn: Reg, size: OperandSize) { 440 self.alu_rrr_extend( 441 ALUOp::SubS, 442 rm, 443 rn, 444 writable!(regs::zero()), 445 size, 446 ExtendOp::UXTX, 447 ); 448 } 449 450 /// Multiply with three registers. mul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)451 pub fn mul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { 452 self.alu_rrrr(ALUOp3::MAdd, rm, rn, rd, regs::zero(), size); 453 } 454 455 /// Signed/unsigned division with three registers. div_rrr( &mut self, divisor: Reg, dividend: Reg, dest: Writable<Reg>, kind: DivKind, size: OperandSize, )456 pub fn div_rrr( 457 &mut self, 458 divisor: Reg, 459 dividend: Reg, 460 dest: Writable<Reg>, 461 kind: DivKind, 462 size: OperandSize, 463 ) { 464 // Check for division by 0. 465 self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO, size); 466 467 // check for overflow 468 if kind == DivKind::Signed { 469 // Check for divisor overflow. 470 self.alu_rri( 471 ALUOp::AddS, 472 Imm12::maybe_from_u64(1).expect("1 to fit in 12 bits"), 473 divisor, 474 writable!(zero()), 475 size, 476 ); 477 478 // Check if the dividend is 1. 479 self.emit(Inst::CCmpImm { 480 size: size.into(), 481 rn: dividend.into(), 482 imm: UImm5::maybe_from_u8(1).expect("1 fits in 5 bits"), 483 nzcv: NZCV::new(false, false, false, false), 484 cond: Cond::Eq, 485 }); 486 487 // Finally, trap if the previous operation overflowed. 488 self.trapif(Cond::Vs, TrapCode::INTEGER_OVERFLOW); 489 } 490 491 // `cranelift-codegen` doesn't support emitting sdiv for anything but I64, 492 // we therefore sign-extend the operand. 493 // see: https://github.com/bytecodealliance/wasmtime/issues/9766 494 let size = if size == OperandSize::S32 && kind == DivKind::Signed { 495 self.extend( 496 divisor, 497 writable!(divisor), 498 ExtendKind::Signed(Extend::<Signed>::I64Extend32), 499 ); 500 self.extend( 501 dividend, 502 writable!(dividend), 503 ExtendKind::Signed(Extend::<Signed>::I64Extend32), 504 ); 505 OperandSize::S64 506 } else { 507 size 508 }; 509 510 let op = match kind { 511 DivKind::Signed => ALUOp::SDiv, 512 DivKind::Unsigned => ALUOp::UDiv, 513 }; 514 515 self.alu_rrr(op, divisor, dividend, dest.map(Into::into), size); 516 } 517 518 /// Signed/unsigned remainder operation with three registers. rem_rrr( &mut self, divisor: Reg, dividend: Reg, dest: Writable<Reg>, scratch: WritableReg, kind: RemKind, size: OperandSize, )519 pub fn rem_rrr( 520 &mut self, 521 divisor: Reg, 522 dividend: Reg, 523 dest: Writable<Reg>, 524 scratch: WritableReg, 525 kind: RemKind, 526 size: OperandSize, 527 ) { 528 // Check for division by 0 529 self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO, size); 530 531 // `cranelift-codegen` doesn't support emitting sdiv for anything but I64, 532 // we therefore sign-extend the operand. 533 // see: https://github.com/bytecodealliance/wasmtime/issues/9766 534 let size = if size == OperandSize::S32 && kind.is_signed() { 535 self.extend( 536 divisor, 537 writable!(divisor), 538 ExtendKind::Signed(Extend::<Signed>::I64Extend32), 539 ); 540 self.extend( 541 dividend, 542 writable!(dividend), 543 ExtendKind::Signed(Extend::<Signed>::I64Extend32), 544 ); 545 OperandSize::S64 546 } else { 547 size 548 }; 549 550 let op = match kind { 551 RemKind::Signed => ALUOp::SDiv, 552 RemKind::Unsigned => ALUOp::UDiv, 553 }; 554 555 self.alu_rrr(op, divisor, dividend, scratch, size); 556 557 self.alu_rrrr( 558 ALUOp3::MSub, 559 scratch.to_reg(), 560 divisor, 561 dest.map(Into::into), 562 dividend, 563 size, 564 ); 565 } 566 567 /// And with three registers. and_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)568 pub fn and_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { 569 self.alu_rrr(ALUOp::And, rm, rn, rd, size); 570 } 571 572 /// And immediate and register. and_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize)573 pub fn and_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) { 574 self.alu_rri_logic(ALUOp::And, imm, rn, rd, size); 575 } 576 577 /// Or with three registers. or_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)578 pub fn or_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { 579 self.alu_rrr(ALUOp::Orr, rm, rn, rd, size); 580 } 581 582 /// Or immediate and register. or_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize)583 pub fn or_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) { 584 self.alu_rri_logic(ALUOp::Orr, imm, rn, rd, size); 585 } 586 587 /// Xor with three registers. xor_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)588 pub fn xor_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { 589 self.alu_rrr(ALUOp::Eor, rm, rn, rd, size); 590 } 591 592 /// Xor immediate and register. xor_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize)593 pub fn xor_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) { 594 self.alu_rri_logic(ALUOp::Eor, imm, rn, rd, size); 595 } 596 597 /// Shift with three registers. shift_rrr( &mut self, rm: Reg, rn: Reg, rd: WritableReg, kind: ShiftKind, size: OperandSize, )598 pub fn shift_rrr( 599 &mut self, 600 rm: Reg, 601 rn: Reg, 602 rd: WritableReg, 603 kind: ShiftKind, 604 size: OperandSize, 605 ) { 606 let shift_op: ALUOp = kind.into(); 607 // In the case of rotate left, we negate the register containing the 608 // shift value. 609 if kind == ShiftKind::Rotl { 610 self.alu_rrr(ALUOp::Sub, rm, regs::zero(), writable!(rm), size); 611 self.alu_rrr(shift_op, rm, rn, rd, size); 612 } else { 613 self.alu_rrr(shift_op, rm, rn, rd, size); 614 } 615 } 616 617 /// Shift immediate and register. shift_ir( &mut self, imm: ImmShift, rn: Reg, rd: WritableReg, kind: ShiftKind, size: OperandSize, )618 pub fn shift_ir( 619 &mut self, 620 imm: ImmShift, 621 rn: Reg, 622 rd: WritableReg, 623 kind: ShiftKind, 624 size: OperandSize, 625 ) { 626 let shift_op: ALUOp = kind.into(); 627 // In the case of rotate left, we emit rotate right with type_size - 628 // value. 629 if kind == ShiftKind::Rotl { 630 let value_size = size.num_bits(); 631 let mut imm_val = value_size.wrapping_sub(imm.value()); 632 imm_val &= value_size - 1; 633 let negated_imm = ImmShift::maybe_from_u64(imm_val as u64).unwrap(); 634 635 self.alu_rri_shift(shift_op, negated_imm, rn, rd, size); 636 } else { 637 self.alu_rri_shift(shift_op, imm, rn, rd, size); 638 } 639 } 640 641 /// Count Leading Zeros. clz(&mut self, rn: Reg, rd: WritableReg, size: OperandSize)642 pub fn clz(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { 643 self.bit_rr(BitOp::Clz, rn, rd, size); 644 } 645 646 /// Reverse Bits reverses the bit order in a register. rbit(&mut self, rn: Reg, rd: WritableReg, size: OperandSize)647 pub fn rbit(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { 648 self.bit_rr(BitOp::RBit, rn, rd, size); 649 } 650 651 /// Float add with three registers. fadd_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)652 pub fn fadd_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { 653 self.fpu_rrr(FPUOp2::Add, rm, rn, rd, size); 654 } 655 656 /// Float sub with three registers. fsub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)657 pub fn fsub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { 658 self.fpu_rrr(FPUOp2::Sub, rm, rn, rd, size); 659 } 660 661 /// Float multiply with three registers. fmul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)662 pub fn fmul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { 663 self.fpu_rrr(FPUOp2::Mul, rm, rn, rd, size); 664 } 665 666 /// Float division with three registers. fdiv_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)667 pub fn fdiv_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { 668 self.fpu_rrr(FPUOp2::Div, rm, rn, rd, size); 669 } 670 671 /// Float max with three registers. fmax_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)672 pub fn fmax_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { 673 self.fpu_rrr(FPUOp2::Max, rm, rn, rd, size); 674 } 675 676 /// Float min with three registers. fmin_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)677 pub fn fmin_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { 678 self.fpu_rrr(FPUOp2::Min, rm, rn, rd, size); 679 } 680 681 /// Float neg with two registers. fneg_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize)682 pub fn fneg_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { 683 self.fpu_rr(FPUOp1::Neg, rn, rd, size); 684 } 685 686 /// Float abs with two registers. fabs_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize)687 pub fn fabs_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { 688 self.fpu_rr(FPUOp1::Abs, rn, rd, size); 689 } 690 691 /// Float sqrt with two registers. fsqrt_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize)692 pub fn fsqrt_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) { 693 self.fpu_rr(FPUOp1::Sqrt, rn, rd, size); 694 } 695 696 /// Float round (ceil, trunc, floor) with two registers. fround_rr(&mut self, rn: Reg, rd: WritableReg, mode: RoundingMode, size: OperandSize)697 pub fn fround_rr(&mut self, rn: Reg, rd: WritableReg, mode: RoundingMode, size: OperandSize) { 698 let fpu_mode = match (mode, size) { 699 (RoundingMode::Nearest, OperandSize::S32) => FpuRoundMode::Nearest32, 700 (RoundingMode::Up, OperandSize::S32) => FpuRoundMode::Plus32, 701 (RoundingMode::Down, OperandSize::S32) => FpuRoundMode::Minus32, 702 (RoundingMode::Zero, OperandSize::S32) => FpuRoundMode::Zero32, 703 (RoundingMode::Nearest, OperandSize::S64) => FpuRoundMode::Nearest64, 704 (RoundingMode::Up, OperandSize::S64) => FpuRoundMode::Plus64, 705 (RoundingMode::Down, OperandSize::S64) => FpuRoundMode::Minus64, 706 (RoundingMode::Zero, OperandSize::S64) => FpuRoundMode::Zero64, 707 (m, o) => panic!("Invalid rounding mode or operand size {m:?}, {o:?}"), 708 }; 709 self.fpu_round(fpu_mode, rn, rd) 710 } 711 712 /// Float unsigned shift right with two registers and an immediate. fushr_rri(&mut self, rn: Reg, rd: WritableReg, amount: u8, size: OperandSize)713 pub fn fushr_rri(&mut self, rn: Reg, rd: WritableReg, amount: u8, size: OperandSize) { 714 let imm = FPURightShiftImm { 715 amount, 716 lane_size_in_bits: size.num_bits(), 717 }; 718 let ushr = match size { 719 OperandSize::S32 => UShr32(imm), 720 OperandSize::S64 => UShr64(imm), 721 _ => unreachable!(), 722 }; 723 self.fpu_rri(ushr, rn, rd) 724 } 725 726 /// Float unsigned shift left and insert with three registers 727 /// and an immediate. fsli_rri_mod( &mut self, ri: Reg, rn: Reg, rd: WritableReg, amount: u8, size: OperandSize, )728 pub fn fsli_rri_mod( 729 &mut self, 730 ri: Reg, 731 rn: Reg, 732 rd: WritableReg, 733 amount: u8, 734 size: OperandSize, 735 ) { 736 let imm = FPULeftShiftImm { 737 amount, 738 lane_size_in_bits: size.num_bits(), 739 }; 740 let sli = match size { 741 OperandSize::S32 => FPUOpRIMod::Sli32(imm), 742 OperandSize::S64 => FPUOpRIMod::Sli64(imm), 743 _ => unreachable!(), 744 }; 745 self.fpu_rri_mod(sli, ri, rn, rd) 746 } 747 748 /// Float compare. fcmp(&mut self, rn: Reg, rm: Reg, size: OperandSize)749 pub fn fcmp(&mut self, rn: Reg, rm: Reg, size: OperandSize) { 750 self.emit(Inst::FpuCmp { 751 size: size.into(), 752 rn: rn.into(), 753 rm: rm.into(), 754 }) 755 } 756 757 /// Convert an signed integer to a float. cvt_sint_to_float( &mut self, rn: Reg, rd: WritableReg, src_size: OperandSize, dst_size: OperandSize, )758 pub fn cvt_sint_to_float( 759 &mut self, 760 rn: Reg, 761 rd: WritableReg, 762 src_size: OperandSize, 763 dst_size: OperandSize, 764 ) { 765 let op = match (src_size, dst_size) { 766 (OperandSize::S32, OperandSize::S32) => IntToFpuOp::I32ToF32, 767 (OperandSize::S64, OperandSize::S32) => IntToFpuOp::I64ToF32, 768 (OperandSize::S32, OperandSize::S64) => IntToFpuOp::I32ToF64, 769 (OperandSize::S64, OperandSize::S64) => IntToFpuOp::I64ToF64, 770 _ => unreachable!(), 771 }; 772 773 self.emit(Inst::IntToFpu { 774 op, 775 rd: rd.map(Into::into), 776 rn: rn.into(), 777 }); 778 } 779 780 /// Convert an unsigned integer to a float. cvt_uint_to_float( &mut self, rn: Reg, rd: WritableReg, src_size: OperandSize, dst_size: OperandSize, )781 pub fn cvt_uint_to_float( 782 &mut self, 783 rn: Reg, 784 rd: WritableReg, 785 src_size: OperandSize, 786 dst_size: OperandSize, 787 ) { 788 let op = match (src_size, dst_size) { 789 (OperandSize::S32, OperandSize::S32) => IntToFpuOp::U32ToF32, 790 (OperandSize::S64, OperandSize::S32) => IntToFpuOp::U64ToF32, 791 (OperandSize::S32, OperandSize::S64) => IntToFpuOp::U32ToF64, 792 (OperandSize::S64, OperandSize::S64) => IntToFpuOp::U64ToF64, 793 _ => unreachable!(), 794 }; 795 796 self.emit(Inst::IntToFpu { 797 op, 798 rd: rd.map(Into::into), 799 rn: rn.into(), 800 }); 801 } 802 803 /// Change precision of float. cvt_float_to_float( &mut self, rn: Reg, rd: WritableReg, src_size: OperandSize, dst_size: OperandSize, )804 pub fn cvt_float_to_float( 805 &mut self, 806 rn: Reg, 807 rd: WritableReg, 808 src_size: OperandSize, 809 dst_size: OperandSize, 810 ) { 811 let (fpu_op, size) = match (src_size, dst_size) { 812 (OperandSize::S32, OperandSize::S64) => (FPUOp1::Cvt32To64, ScalarSize::Size32), 813 (OperandSize::S64, OperandSize::S32) => (FPUOp1::Cvt64To32, ScalarSize::Size64), 814 _ => unimplemented!(), 815 }; 816 self.emit(Inst::FpuRR { 817 fpu_op, 818 size, 819 rd: rd.map(Into::into), 820 rn: rn.into(), 821 }); 822 } 823 824 /// Return instruction. ret(&mut self)825 pub fn ret(&mut self) { 826 self.emit(Inst::Ret {}); 827 } 828 829 /// An unconditional branch. jmp(&mut self, target: MachLabel)830 pub fn jmp(&mut self, target: MachLabel) { 831 self.emit(Inst::Jump { 832 dest: BranchTarget::Label(target), 833 }); 834 } 835 836 /// A conditional branch. jmp_if(&mut self, kind: Cond, taken: MachLabel)837 pub fn jmp_if(&mut self, kind: Cond, taken: MachLabel) { 838 self.emit(Inst::CondBr { 839 taken: BranchTarget::Label(taken), 840 not_taken: BranchTarget::ResolvedOffset(4), 841 kind: CondBrKind::Cond(kind), 842 }); 843 } 844 845 /// Emits a jump table sequence. jmp_table( &mut self, targets: &[MachLabel], default: MachLabel, index: Reg, tmp1: Reg, tmp2: Reg, )846 pub fn jmp_table( 847 &mut self, 848 targets: &[MachLabel], 849 default: MachLabel, 850 index: Reg, 851 tmp1: Reg, 852 tmp2: Reg, 853 ) { 854 self.emit_with_island( 855 Inst::JTSequence { 856 default, 857 targets: Box::new(targets.to_vec()), 858 ridx: index.into(), 859 rtmp1: Writable::from_reg(tmp1.into()), 860 rtmp2: Writable::from_reg(tmp2.into()), 861 }, 862 // number of bytes needed for the jumptable sequence: 863 // 4 bytes per instruction, with 8 instructions base + the size of 864 // the jumptable more. 865 (4 * (8 + targets.len())).try_into().unwrap(), 866 ); 867 } 868 869 /// Conditional Set sets the destination register to 1 if the condition 870 /// is true, and otherwise sets it to 0. cset(&mut self, rd: WritableReg, cond: Cond)871 pub fn cset(&mut self, rd: WritableReg, cond: Cond) { 872 self.emit(Inst::CSet { 873 rd: rd.map(Into::into), 874 cond, 875 }); 876 } 877 878 /// If the condition is true, `csel` writes rn to rd. If the 879 /// condition is false, it writes rm to rd csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond)880 pub fn csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond) { 881 self.emit(Inst::CSel { 882 rd: rd.map(Into::into), 883 rn: rn.into(), 884 rm: rm.into(), 885 cond, 886 }); 887 } 888 889 /// If the condition is true, `csel` writes rn to rd. If the 890 /// condition is false, it writes rm to rd fpu_csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond, size: OperandSize)891 pub fn fpu_csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond, size: OperandSize) { 892 match size { 893 OperandSize::S32 => { 894 self.emit(Inst::FpuCSel32 { 895 rd: rd.map(Into::into), 896 rn: rn.into(), 897 rm: rm.into(), 898 cond, 899 }); 900 } 901 OperandSize::S64 => { 902 self.emit(Inst::FpuCSel64 { 903 rd: rd.map(Into::into), 904 rn: rn.into(), 905 rm: rm.into(), 906 cond, 907 }); 908 } 909 _ => todo!(), 910 } 911 } 912 913 /// Population count per byte. cnt(&mut self, rd: WritableReg)914 pub fn cnt(&mut self, rd: WritableReg) { 915 self.emit(Inst::VecMisc { 916 op: VecMisc2::Cnt, 917 rd: rd.map(Into::into), 918 rn: rd.to_reg().into(), 919 size: VectorSize::Size8x8, 920 }); 921 } 922 extend(&mut self, rn: Reg, rd: WritableReg, kind: ExtendKind)923 pub fn extend(&mut self, rn: Reg, rd: WritableReg, kind: ExtendKind) { 924 self.emit(Inst::Extend { 925 rd: rd.map(Into::into), 926 rn: rn.into(), 927 signed: kind.signed(), 928 from_bits: kind.from_bits(), 929 to_bits: kind.to_bits(), 930 }) 931 } 932 933 /// Bitwise AND (shifted register), setting flags. ands_rr(&mut self, rn: Reg, rm: Reg, size: OperandSize)934 pub fn ands_rr(&mut self, rn: Reg, rm: Reg, size: OperandSize) { 935 self.alu_rrr(ALUOp::AndS, rm, rn, writable!(regs::zero()), size); 936 } 937 938 /// Permanently Undefined. udf(&mut self, code: TrapCode)939 pub fn udf(&mut self, code: TrapCode) { 940 self.emit(Inst::Udf { trap_code: code }); 941 } 942 943 /// Conditional trap. trapif(&mut self, cc: Cond, code: TrapCode)944 pub fn trapif(&mut self, cc: Cond, code: TrapCode) { 945 self.emit(Inst::TrapIf { 946 kind: CondBrKind::Cond(cc), 947 trap_code: code, 948 }); 949 } 950 951 /// Trap if `rn` is zero. trapz(&mut self, rn: Reg, code: TrapCode, size: OperandSize)952 pub fn trapz(&mut self, rn: Reg, code: TrapCode, size: OperandSize) { 953 self.emit(Inst::TrapIf { 954 kind: CondBrKind::Zero(rn.into(), size.into()), 955 trap_code: code, 956 }); 957 } 958 959 // Helpers for ALU operations. 960 alu_rri(&mut self, op: ALUOp, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize)961 fn alu_rri(&mut self, op: ALUOp, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) { 962 self.emit(Inst::AluRRImm12 { 963 alu_op: op, 964 size: size.into(), 965 rd: rd.map(Into::into), 966 rn: rn.into(), 967 imm12: imm, 968 }); 969 } 970 alu_rri_logic( &mut self, op: ALUOp, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize, )971 fn alu_rri_logic( 972 &mut self, 973 op: ALUOp, 974 imm: ImmLogic, 975 rn: Reg, 976 rd: WritableReg, 977 size: OperandSize, 978 ) { 979 self.emit(Inst::AluRRImmLogic { 980 alu_op: op, 981 size: size.into(), 982 rd: rd.map(Into::into), 983 rn: rn.into(), 984 imml: imm, 985 }); 986 } 987 alu_rri_shift( &mut self, op: ALUOp, imm: ImmShift, rn: Reg, rd: WritableReg, size: OperandSize, )988 fn alu_rri_shift( 989 &mut self, 990 op: ALUOp, 991 imm: ImmShift, 992 rn: Reg, 993 rd: WritableReg, 994 size: OperandSize, 995 ) { 996 self.emit(Inst::AluRRImmShift { 997 alu_op: op, 998 size: size.into(), 999 rd: rd.map(Into::into), 1000 rn: rn.into(), 1001 immshift: imm, 1002 }); 1003 } 1004 alu_rrr(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)1005 fn alu_rrr(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { 1006 self.emit(Inst::AluRRR { 1007 alu_op: op, 1008 size: size.into(), 1009 rd: rd.map(Into::into), 1010 rn: rn.into(), 1011 rm: rm.into(), 1012 }); 1013 } 1014 alu_rrr_extend( &mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize, extendop: ExtendOp, )1015 fn alu_rrr_extend( 1016 &mut self, 1017 op: ALUOp, 1018 rm: Reg, 1019 rn: Reg, 1020 rd: WritableReg, 1021 size: OperandSize, 1022 extendop: ExtendOp, 1023 ) { 1024 self.emit(Inst::AluRRRExtend { 1025 alu_op: op, 1026 size: size.into(), 1027 rd: rd.map(Into::into), 1028 rn: rn.into(), 1029 rm: rm.into(), 1030 extendop, 1031 }); 1032 } 1033 alu_rrrr( &mut self, op: ALUOp3, rm: Reg, rn: Reg, rd: WritableReg, ra: Reg, size: OperandSize, )1034 fn alu_rrrr( 1035 &mut self, 1036 op: ALUOp3, 1037 rm: Reg, 1038 rn: Reg, 1039 rd: WritableReg, 1040 ra: Reg, 1041 size: OperandSize, 1042 ) { 1043 self.emit(Inst::AluRRRR { 1044 alu_op: op, 1045 size: size.into(), 1046 rd: rd.map(Into::into), 1047 rn: rn.into(), 1048 rm: rm.into(), 1049 ra: ra.into(), 1050 }); 1051 } 1052 fpu_rrr(&mut self, op: FPUOp2, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize)1053 fn fpu_rrr(&mut self, op: FPUOp2, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) { 1054 self.emit(Inst::FpuRRR { 1055 fpu_op: op, 1056 size: size.into(), 1057 rd: rd.map(Into::into), 1058 rn: rn.into(), 1059 rm: rm.into(), 1060 }); 1061 } 1062 fpu_rri(&mut self, op: FPUOpRI, rn: Reg, rd: WritableReg)1063 fn fpu_rri(&mut self, op: FPUOpRI, rn: Reg, rd: WritableReg) { 1064 self.emit(Inst::FpuRRI { 1065 fpu_op: op, 1066 rd: rd.map(Into::into), 1067 rn: rn.into(), 1068 }); 1069 } 1070 fpu_rri_mod(&mut self, op: FPUOpRIMod, ri: Reg, rn: Reg, rd: WritableReg)1071 fn fpu_rri_mod(&mut self, op: FPUOpRIMod, ri: Reg, rn: Reg, rd: WritableReg) { 1072 self.emit(Inst::FpuRRIMod { 1073 fpu_op: op, 1074 rd: rd.map(Into::into), 1075 ri: ri.into(), 1076 rn: rn.into(), 1077 }); 1078 } 1079 fpu_rr(&mut self, op: FPUOp1, rn: Reg, rd: WritableReg, size: OperandSize)1080 fn fpu_rr(&mut self, op: FPUOp1, rn: Reg, rd: WritableReg, size: OperandSize) { 1081 self.emit(Inst::FpuRR { 1082 fpu_op: op, 1083 size: size.into(), 1084 rd: rd.map(Into::into), 1085 rn: rn.into(), 1086 }); 1087 } 1088 fpu_round(&mut self, op: FpuRoundMode, rn: Reg, rd: WritableReg)1089 fn fpu_round(&mut self, op: FpuRoundMode, rn: Reg, rd: WritableReg) { 1090 self.emit(Inst::FpuRound { 1091 op, 1092 rd: rd.map(Into::into), 1093 rn: rn.into(), 1094 }); 1095 } 1096 bit_rr(&mut self, op: BitOp, rn: Reg, rd: WritableReg, size: OperandSize)1097 fn bit_rr(&mut self, op: BitOp, rn: Reg, rd: WritableReg, size: OperandSize) { 1098 self.emit(Inst::BitRR { 1099 op, 1100 size: size.into(), 1101 rd: rd.map(Into::into), 1102 rn: rn.into(), 1103 }); 1104 } 1105 1106 /// Get a label from the underlying machine code buffer. get_label(&mut self) -> MachLabel1107 pub fn get_label(&mut self) -> MachLabel { 1108 self.buffer.get_label() 1109 } 1110 1111 /// Get a mutable reference to underlying 1112 /// machine buffer. buffer_mut(&mut self) -> &mut MachBuffer<Inst>1113 pub fn buffer_mut(&mut self) -> &mut MachBuffer<Inst> { 1114 &mut self.buffer 1115 } 1116 1117 /// Get a reference to the underlying machine buffer. buffer(&self) -> &MachBuffer<Inst>1118 pub fn buffer(&self) -> &MachBuffer<Inst> { 1119 &self.buffer 1120 } 1121 1122 /// Emit a direct call to a function defined locally and 1123 /// referenced to by `name`. call_with_name(&mut self, name: UserExternalNameRef, call_conv: CallingConvention)1124 pub fn call_with_name(&mut self, name: UserExternalNameRef, call_conv: CallingConvention) { 1125 self.emit(Inst::Call { 1126 info: Box::new(cranelift_codegen::CallInfo::empty( 1127 ExternalName::user(name), 1128 call_conv.into(), 1129 )), 1130 }) 1131 } 1132 1133 /// Emit an indirect call to a function whose address is 1134 /// stored the `callee` register. call_with_reg(&mut self, callee: Reg, call_conv: CallingConvention)1135 pub fn call_with_reg(&mut self, callee: Reg, call_conv: CallingConvention) { 1136 self.emit(Inst::CallInd { 1137 info: Box::new(cranelift_codegen::CallInfo::empty( 1138 callee.into(), 1139 call_conv.into(), 1140 )), 1141 }) 1142 } 1143 1144 /// Load the min value for an integer of size out_size, as a floating-point 1145 /// of size `in-size`, into register `rd`. min_fp_value( &mut self, signed: bool, in_size: OperandSize, out_size: OperandSize, rd: Writable<Reg>, )1146 fn min_fp_value( 1147 &mut self, 1148 signed: bool, 1149 in_size: OperandSize, 1150 out_size: OperandSize, 1151 rd: Writable<Reg>, 1152 ) { 1153 match in_size { 1154 OperandSize::S32 => { 1155 let (min, _) = f32_cvt_to_int_bounds(signed, out_size.num_bits().into()); 1156 self.mov_ir(rd, Imm::f32(min.to_bits()), in_size); 1157 } 1158 OperandSize::S64 => { 1159 let (min, _) = f64_cvt_to_int_bounds(signed, out_size.num_bits().into()); 1160 self.mov_ir(rd, Imm::f64(min.to_bits()), in_size); 1161 } 1162 s => unreachable!("unsupported floating-point size: {}bit", s.num_bits()), 1163 }; 1164 } 1165 1166 /// Load the max value for an integer of size out_size, as a floating-point 1167 /// of size `in_size`, into register `rd`. max_fp_value( &mut self, signed: bool, in_size: OperandSize, out_size: OperandSize, rd: Writable<Reg>, )1168 fn max_fp_value( 1169 &mut self, 1170 signed: bool, 1171 in_size: OperandSize, 1172 out_size: OperandSize, 1173 rd: Writable<Reg>, 1174 ) { 1175 match in_size { 1176 OperandSize::S32 => { 1177 let (_, max) = f32_cvt_to_int_bounds(signed, out_size.num_bits().into()); 1178 self.mov_ir(rd, Imm::f32(max.to_bits()), in_size); 1179 } 1180 OperandSize::S64 => { 1181 let (_, max) = f64_cvt_to_int_bounds(signed, out_size.num_bits().into()); 1182 self.mov_ir(rd, Imm::f64(max.to_bits()), in_size); 1183 } 1184 s => unreachable!("unsupported floating-point size: {}bit", s.num_bits()), 1185 }; 1186 } 1187 1188 /// Emit instructions to check if the value in `rn` is NaN. check_nan(&mut self, rn: Reg, size: OperandSize)1189 fn check_nan(&mut self, rn: Reg, size: OperandSize) { 1190 self.fcmp(rn, rn, size); 1191 self.trapif(Cond::Vs, TrapCode::BAD_CONVERSION_TO_INTEGER); 1192 } 1193 1194 /// Convert the floating point of size `src_size` stored in `src`, into a integer of size 1195 /// `dst_size`, storing the result in `dst`. fpu_to_int( &mut self, dst: Writable<Reg>, src: Reg, tmp_reg: WritableReg, src_size: OperandSize, dst_size: OperandSize, kind: TruncKind, signed: bool, )1196 pub fn fpu_to_int( 1197 &mut self, 1198 dst: Writable<Reg>, 1199 src: Reg, 1200 tmp_reg: WritableReg, 1201 src_size: OperandSize, 1202 dst_size: OperandSize, 1203 kind: TruncKind, 1204 signed: bool, 1205 ) { 1206 if kind.is_unchecked() { 1207 // Confusingly, when `kind` is `Unchecked` is when we actually need to perform the checks: 1208 // - check if fp is NaN 1209 // - check bounds 1210 self.check_nan(src, src_size); 1211 1212 self.min_fp_value(signed, src_size, dst_size, tmp_reg); 1213 self.fcmp(src, tmp_reg.to_reg(), src_size); 1214 self.trapif(Cond::Le, TrapCode::INTEGER_OVERFLOW); 1215 1216 self.max_fp_value(signed, src_size, dst_size, tmp_reg); 1217 self.fcmp(src, tmp_reg.to_reg(), src_size); 1218 self.trapif(Cond::Ge, TrapCode::INTEGER_OVERFLOW); 1219 } 1220 1221 self.cvt_fpu_to_int(dst, src, src_size, dst_size, signed) 1222 } 1223 1224 /// Select and emit the appropriate `fcvt*` instruction cvt_fpu_to_int( &mut self, dst: Writable<Reg>, src: Reg, src_size: OperandSize, dst_size: OperandSize, signed: bool, )1225 pub fn cvt_fpu_to_int( 1226 &mut self, 1227 dst: Writable<Reg>, 1228 src: Reg, 1229 src_size: OperandSize, 1230 dst_size: OperandSize, 1231 signed: bool, 1232 ) { 1233 let op = match (src_size, dst_size, signed) { 1234 (OperandSize::S32, OperandSize::S32, false) => FpuToIntOp::F32ToU32, 1235 (OperandSize::S32, OperandSize::S32, true) => FpuToIntOp::F32ToI32, 1236 (OperandSize::S32, OperandSize::S64, false) => FpuToIntOp::F32ToU64, 1237 (OperandSize::S32, OperandSize::S64, true) => FpuToIntOp::F32ToI64, 1238 (OperandSize::S64, OperandSize::S32, false) => FpuToIntOp::F64ToU32, 1239 (OperandSize::S64, OperandSize::S32, true) => FpuToIntOp::F64ToI32, 1240 (OperandSize::S64, OperandSize::S64, false) => FpuToIntOp::F64ToU64, 1241 (OperandSize::S64, OperandSize::S64, true) => FpuToIntOp::F64ToI64, 1242 (fsize, int_size, signed) => unimplemented!( 1243 "unsupported conversion: f{} to {}{}", 1244 fsize.num_bits(), 1245 if signed { "i" } else { "u" }, 1246 int_size.num_bits(), 1247 ), 1248 }; 1249 1250 self.emit(Inst::FpuToInt { 1251 op, 1252 rd: dst.map(Into::into), 1253 rn: src.into(), 1254 }); 1255 } 1256 } 1257 1258 /// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted, 1259 /// but the immediate is not yet known. 1260 pub(crate) struct PatchableAddToReg { 1261 /// The region to be patched in the [`MachBuffer`]. It contains 1262 /// space for 3 32-bit instructions, i.e. it's 12 bytes long. 1263 region: PatchRegion, 1264 1265 // The destination register for the add instruction. 1266 reg: Writable<Reg>, 1267 1268 // The temporary register used to hold the immediate value. 1269 tmp: Writable<Reg>, 1270 } 1271 1272 impl PatchableAddToReg { 1273 /// Create a new [`PatchableAddToReg`] by capturing a region in the output 1274 /// buffer containing an instruction sequence that loads an immediate into a 1275 /// register `tmp`, then adds it to a register `reg`. The [`MachBuffer`] 1276 /// will have that instruction sequence written to the region, though the 1277 /// immediate loaded into `tmp` will be `0` until the `::finalize` method is 1278 /// called. new(reg: Writable<Reg>, tmp: Writable<Reg>, buf: &mut MachBuffer<Inst>) -> Self1279 pub(crate) fn new(reg: Writable<Reg>, tmp: Writable<Reg>, buf: &mut MachBuffer<Inst>) -> Self { 1280 let insns = Self::add_immediate_instruction_sequence(reg, tmp, 0); 1281 let open = buf.start_patchable(); 1282 buf.put_data(&insns); 1283 let region = buf.end_patchable(open); 1284 1285 Self { region, reg, tmp } 1286 } 1287 add_immediate_instruction_sequence( reg: Writable<Reg>, tmp: Writable<Reg>, imm: i32, ) -> [u8; 12]1288 fn add_immediate_instruction_sequence( 1289 reg: Writable<Reg>, 1290 tmp: Writable<Reg>, 1291 imm: i32, 1292 ) -> [u8; 12] { 1293 let imm_hi = imm as u64 & 0xffff_0000; 1294 let imm_hi = MoveWideConst::maybe_from_u64(imm_hi).unwrap(); 1295 1296 let imm_lo = imm as u64 & 0x0000_ffff; 1297 let imm_lo = MoveWideConst::maybe_from_u64(imm_lo).unwrap(); 1298 1299 let size = OperandSize::S64.into(); 1300 1301 let tmp = tmp.map(Into::into); 1302 let rd = reg.map(Into::into); 1303 1304 // This is "movz to bits 16-31 of 64 bit reg tmp and zero the rest" 1305 let mov_insn = enc_move_wide(inst::MoveWideOp::MovZ, tmp, imm_hi, size); 1306 1307 // This is "movk to bits 0-15 of 64 bit reg tmp" 1308 let movk_insn = enc_movk(tmp, imm_lo, size); 1309 1310 // This is "add tmp to rd". The opcodes are somewhat buried in the 1311 // instruction encoder so we just repeat them here. 1312 let add_bits_31_21: u32 = 0b00001011_000 | (size.sf_bit() << 10); 1313 let add_bits_15_10: u32 = 0; 1314 let add_insn = enc_arith_rrr( 1315 add_bits_31_21, 1316 add_bits_15_10, 1317 rd, 1318 rd.to_reg(), 1319 tmp.to_reg(), 1320 ); 1321 1322 let mut buf = [0u8; 12]; 1323 buf[0..4].copy_from_slice(&mov_insn.to_le_bytes()); 1324 buf[4..8].copy_from_slice(&movk_insn.to_le_bytes()); 1325 buf[8..12].copy_from_slice(&add_insn.to_le_bytes()); 1326 buf 1327 } 1328 1329 /// Patch the [`MachBuffer`] with the known constant to be added to the register. The final 1330 /// value is passed in as an i32, but the instruction encoding is fixed when 1331 /// [`PatchableAddToReg::new`] is called. finalize(self, val: i32, buffer: &mut MachBuffer<Inst>)1332 pub(crate) fn finalize(self, val: i32, buffer: &mut MachBuffer<Inst>) { 1333 let insns = Self::add_immediate_instruction_sequence(self.reg, self.tmp, val); 1334 let slice = self.region.patch(buffer); 1335 assert_eq!(slice.len(), insns.len()); 1336 slice.copy_from_slice(&insns); 1337 } 1338 } 1339