1 //! Interpretation of pulley bytecode. 2 3 use crate::decode::*; 4 use crate::encode::Encode; 5 use crate::imms::*; 6 use crate::profile::{ExecutingPc, ExecutingPcRef}; 7 use crate::regs::*; 8 use alloc::string::ToString; 9 use alloc::vec::Vec; 10 use core::fmt; 11 use core::mem; 12 use core::ops::ControlFlow; 13 use core::ops::{Index, IndexMut}; 14 use core::ptr::NonNull; 15 use pulley_macros::interp_disable_if_cfg; 16 use wasmtime_core::math::{WasmFloat, f32_cvt_to_int_bounds, f64_cvt_to_int_bounds}; 17 18 mod debug; 19 #[cfg(all(not(pulley_tail_calls), not(pulley_assume_llvm_makes_tail_calls)))] 20 mod match_loop; 21 #[cfg(any(pulley_tail_calls, pulley_assume_llvm_makes_tail_calls))] 22 mod tail_loop; 23 24 const DEFAULT_STACK_SIZE: usize = 1 << 20; // 1 MiB 25 26 /// A virtual machine for interpreting Pulley bytecode. 27 pub struct Vm { 28 state: MachineState, 29 executing_pc: ExecutingPc, 30 } 31 32 impl Default for Vm { 33 fn default() -> Self { 34 Vm::new() 35 } 36 } 37 38 impl Vm { 39 /// Create a new virtual machine with the default stack size. 40 pub fn new() -> Self { 41 Self::with_stack(DEFAULT_STACK_SIZE) 42 } 43 44 /// Create a new virtual machine with the given stack. 45 pub fn with_stack(stack_size: usize) -> Self { 46 Self { 47 state: MachineState::with_stack(stack_size), 48 executing_pc: ExecutingPc::default(), 49 } 50 } 51 52 /// Get a shared reference to this VM's machine state. 53 pub fn state(&self) -> &MachineState { 54 &self.state 55 } 56 57 /// Get an exclusive reference to this VM's machine state. 58 pub fn state_mut(&mut self) -> &mut MachineState { 59 &mut self.state 60 } 61 62 /// Call a bytecode function. 63 /// 64 /// The given `func` must point to the beginning of a valid Pulley bytecode 65 /// function. 66 /// 67 /// The given `args` must match the number and type of arguments that 68 /// function expects. 69 /// 70 /// The given `rets` must match the function's actual return types. 71 /// 72 /// Returns either the resulting values, or the PC at which a trap was 73 /// raised. 74 pub unsafe fn call<'a, T>( 75 &'a mut self, 76 func: NonNull<u8>, 77 args: &[Val], 78 rets: T, 79 ) -> DoneReason<impl Iterator<Item = Val> + use<'a, T>> 80 where 81 T: IntoIterator<Item = RegType> + 'a, 82 { 83 unsafe { 84 let lr = self.call_start(args); 85 86 match self.call_run(func) { 87 DoneReason::ReturnToHost(()) => DoneReason::ReturnToHost(self.call_end(lr, rets)), 88 DoneReason::Trap { pc, kind } => DoneReason::Trap { pc, kind }, 89 DoneReason::CallIndirectHost { id, resume } => { 90 DoneReason::CallIndirectHost { id, resume } 91 } 92 } 93 } 94 } 95 96 /// Performs the initial part of [`Vm::call`] in setting up the `args` 97 /// provided in registers according to Pulley's ABI. 98 /// 99 /// # Return 100 /// 101 /// Returns the old `lr` register value. The current `lr` value is replaced 102 /// with a sentinel that triggers a return to the host when returned-to. 103 /// 104 /// # Unsafety 105 /// 106 /// All the same unsafety as `call` and additionally, you must 107 /// invoke `call_run` and then `call_end` after calling `call_start`. 108 /// If you don't want to wrangle these invocations, use `call` instead 109 /// of `call_{start,run,end}`. 110 pub unsafe fn call_start<'a>(&'a mut self, args: &[Val]) -> *mut u8 { 111 // NB: make sure this method stays in sync with 112 // `PulleyMachineDeps::compute_arg_locs`! 113 114 let mut x_args = (0..15).map(|x| unsafe { XReg::new_unchecked(x) }); 115 let mut f_args = (0..16).map(|f| unsafe { FReg::new_unchecked(f) }); 116 #[cfg(not(pulley_disable_interp_simd))] 117 let mut v_args = (0..16).map(|v| unsafe { VReg::new_unchecked(v) }); 118 119 for arg in args { 120 match arg { 121 Val::XReg(val) => match x_args.next() { 122 Some(reg) => self.state[reg] = *val, 123 None => todo!("stack slots"), 124 }, 125 Val::FReg(val) => match f_args.next() { 126 Some(reg) => self.state[reg] = *val, 127 None => todo!("stack slots"), 128 }, 129 #[cfg(not(pulley_disable_interp_simd))] 130 Val::VReg(val) => match v_args.next() { 131 Some(reg) => self.state[reg] = *val, 132 None => todo!("stack slots"), 133 }, 134 } 135 } 136 137 mem::replace(&mut self.state.lr, HOST_RETURN_ADDR) 138 } 139 140 /// Peforms the internal part of [`Vm::call`] where bytecode is actually 141 /// executed. 142 /// 143 /// # Unsafety 144 /// 145 /// In addition to all the invariants documented for `call`, you 146 /// may only invoke `call_run` after invoking `call_start` to 147 /// initialize this call's arguments. 148 pub unsafe fn call_run(&mut self, pc: NonNull<u8>) -> DoneReason<()> { 149 self.state.debug_assert_done_reason_none(); 150 let interpreter = Interpreter { 151 state: &mut self.state, 152 pc: unsafe { UnsafeBytecodeStream::new(pc) }, 153 executing_pc: self.executing_pc.as_ref(), 154 }; 155 let done = interpreter.run(); 156 self.state.done_decode(done) 157 } 158 159 /// Peforms the tail end of [`Vm::call`] by returning the values as 160 /// determined by `rets` according to Pulley's ABI. 161 /// 162 /// The `old_ret` value should have been provided from `call_start` 163 /// previously. 164 /// 165 /// # Unsafety 166 /// 167 /// In addition to the invariants documented for `call`, this may 168 /// only be called after `call_run`. 169 pub unsafe fn call_end<'a>( 170 &'a mut self, 171 old_ret: *mut u8, 172 rets: impl IntoIterator<Item = RegType> + 'a, 173 ) -> impl Iterator<Item = Val> + 'a { 174 self.state.lr = old_ret; 175 // NB: make sure this method stays in sync with 176 // `PulleyMachineDeps::compute_arg_locs`! 177 178 let mut x_rets = (0..15).map(|x| unsafe { XReg::new_unchecked(x) }); 179 let mut f_rets = (0..16).map(|f| unsafe { FReg::new_unchecked(f) }); 180 #[cfg(not(pulley_disable_interp_simd))] 181 let mut v_rets = (0..16).map(|v| unsafe { VReg::new_unchecked(v) }); 182 183 rets.into_iter().map(move |ty| match ty { 184 RegType::XReg => match x_rets.next() { 185 Some(reg) => Val::XReg(self.state[reg]), 186 None => todo!("stack slots"), 187 }, 188 RegType::FReg => match f_rets.next() { 189 Some(reg) => Val::FReg(self.state[reg]), 190 None => todo!("stack slots"), 191 }, 192 #[cfg(not(pulley_disable_interp_simd))] 193 RegType::VReg => match v_rets.next() { 194 Some(reg) => Val::VReg(self.state[reg]), 195 None => todo!("stack slots"), 196 }, 197 #[cfg(pulley_disable_interp_simd)] 198 RegType::VReg => panic!("simd support disabled at compile time"), 199 }) 200 } 201 202 /// Returns the current `fp` register value. 203 pub fn fp(&self) -> *mut u8 { 204 self.state.fp 205 } 206 207 /// Returns the current `lr` register value. 208 pub fn lr(&self) -> *mut u8 { 209 self.state.lr 210 } 211 212 /// Sets the current `fp` register value. 213 pub unsafe fn set_fp(&mut self, fp: *mut u8) { 214 self.state.fp = fp; 215 } 216 217 /// Sets the current `lr` register value. 218 pub unsafe fn set_lr(&mut self, lr: *mut u8) { 219 self.state.lr = lr; 220 } 221 222 /// Gets a handle to the currently executing program counter for this 223 /// interpreter which can be read from other threads. 224 // 225 // Note that despite this field still existing with `not(feature = 226 // "profile")` it's hidden from the public API in that scenario as it has no 227 // methods anyway. 228 #[cfg(feature = "profile")] 229 pub fn executing_pc(&self) -> &ExecutingPc { 230 &self.executing_pc 231 } 232 } 233 234 impl Drop for Vm { 235 fn drop(&mut self) { 236 self.executing_pc.set_done(); 237 } 238 } 239 240 /// The type of a register in the Pulley machine state. 241 #[derive(Clone, Copy, Debug)] 242 pub enum RegType { 243 /// An `x` register: integers. 244 XReg, 245 246 /// An `f` register: floats. 247 FReg, 248 249 /// A `v` register: vectors. 250 VReg, 251 } 252 253 /// A value that can be stored in a register. 254 #[derive(Clone, Copy, Debug)] 255 pub enum Val { 256 /// An `x` register value: integers. 257 XReg(XRegVal), 258 259 /// An `f` register value: floats. 260 FReg(FRegVal), 261 262 /// A `v` register value: vectors. 263 #[cfg(not(pulley_disable_interp_simd))] 264 VReg(VRegVal), 265 } 266 267 impl fmt::LowerHex for Val { 268 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 269 match self { 270 Val::XReg(v) => fmt::LowerHex::fmt(v, f), 271 Val::FReg(v) => fmt::LowerHex::fmt(v, f), 272 #[cfg(not(pulley_disable_interp_simd))] 273 Val::VReg(v) => fmt::LowerHex::fmt(v, f), 274 } 275 } 276 } 277 278 impl From<XRegVal> for Val { 279 fn from(value: XRegVal) -> Self { 280 Val::XReg(value) 281 } 282 } 283 284 impl From<u64> for Val { 285 fn from(value: u64) -> Self { 286 XRegVal::new_u64(value).into() 287 } 288 } 289 290 impl From<u32> for Val { 291 fn from(value: u32) -> Self { 292 XRegVal::new_u32(value).into() 293 } 294 } 295 296 impl From<i64> for Val { 297 fn from(value: i64) -> Self { 298 XRegVal::new_i64(value).into() 299 } 300 } 301 302 impl From<i32> for Val { 303 fn from(value: i32) -> Self { 304 XRegVal::new_i32(value).into() 305 } 306 } 307 308 impl<T> From<*mut T> for Val { 309 fn from(value: *mut T) -> Self { 310 XRegVal::new_ptr(value).into() 311 } 312 } 313 314 impl From<FRegVal> for Val { 315 fn from(value: FRegVal) -> Self { 316 Val::FReg(value) 317 } 318 } 319 320 impl From<f64> for Val { 321 fn from(value: f64) -> Self { 322 FRegVal::new_f64(value).into() 323 } 324 } 325 326 impl From<f32> for Val { 327 fn from(value: f32) -> Self { 328 FRegVal::new_f32(value).into() 329 } 330 } 331 332 #[cfg(not(pulley_disable_interp_simd))] 333 impl From<VRegVal> for Val { 334 fn from(value: VRegVal) -> Self { 335 Val::VReg(value) 336 } 337 } 338 339 /// An `x` register value: integers. 340 #[derive(Copy, Clone)] 341 pub struct XRegVal(XRegUnion); 342 343 impl PartialEq for XRegVal { 344 fn eq(&self, other: &Self) -> bool { 345 self.get_u64() == other.get_u64() 346 } 347 } 348 349 impl Eq for XRegVal {} 350 351 impl fmt::Debug for XRegVal { 352 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 353 f.debug_struct("XRegVal") 354 .field("as_u64", &self.get_u64()) 355 .finish() 356 } 357 } 358 359 impl fmt::LowerHex for XRegVal { 360 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 361 fmt::LowerHex::fmt(&self.get_u64(), f) 362 } 363 } 364 365 /// Contents of an "x" register, or a general-purpose register. 366 /// 367 /// This is represented as a Rust `union` to make it easier to access typed 368 /// views of this, notably the `ptr` field which enables preserving a bit of 369 /// provenance for Rust for values stored as a pointer and read as a pointer. 370 /// 371 /// Note that the actual in-memory representation of this value is handled 372 /// carefully at this time. Pulley bytecode exposes the ability to store a 373 /// 32-bit result into a register and then read the 64-bit contents of the 374 /// register. This leaves us with the question of what to do with the upper bits 375 /// of the register when the 32-bit result is generated. Possibilities for 376 /// handling this are: 377 /// 378 /// 1. Do nothing, just store the 32-bit value. The problem with this approach 379 /// means that the "upper bits" are now endianness-dependent. That means that 380 /// the state of the register is now platform-dependent. 381 /// 2. Sign or zero-extend. This restores platform-independent behavior but 382 /// requires an extra store on 32-bit platforms because they can probably 383 /// only store 32-bits at a time. 384 /// 3. Always store the values in this union as little-endian. This means that 385 /// big-endian platforms have to do a byte-swap but otherwise it has 386 /// platform-independent behavior. 387 /// 388 /// This union chooses route (3) at this time where the values here are always 389 /// stored in little-endian form (even the `ptr` field). That guarantees 390 /// cross-platform behavior while also minimizing the amount of data stored on 391 /// writes. 392 /// 393 /// In the future we may wish to benchmark this and possibly change this. 394 /// Technically Cranelift-generated bytecode should never rely on the upper bits 395 /// of a register if it didn't previously write them so this in theory doesn't 396 /// actually matter for Cranelift or wasm semantics. The only cost right now is 397 /// to big-endian platforms though and it's not certain how crucial performance 398 /// will be there. 399 /// 400 /// One final note is that this notably contrasts with native CPUs where 401 /// native ISAs like RISC-V specifically define the entire register on every 402 /// instruction, even if only the low half contains a significant result. Pulley 403 /// is unlikely to become out-of-order within the CPU itself as it's interpreted 404 /// meaning that severing data-dependencies with previous operations is 405 /// hypothesized to not be too important. If this is ever a problem though it 406 /// could increase the likelihood we go for route (2) above instead (or maybe 407 /// even (1)). 408 #[derive(Copy, Clone)] 409 union XRegUnion { 410 i32: i32, 411 u32: u32, 412 i64: i64, 413 u64: u64, 414 415 // Note that this is intentionally `usize` and not an actual pointer like 416 // `*mut u8`. The reason for this is that provenance is required in Rust for 417 // pointers but Cranelift has no pointer type and thus no concept of 418 // provenance. That means that at-rest it's not known whether the value has 419 // provenance or not and basically means that Pulley is required to use 420 // "permissive provenance" in Rust as opposed to strict provenance. 421 // 422 // That's more-or-less a long-winded way of saying that storage of a pointer 423 // in this value is done with `.expose_provenance()` and reading a pointer 424 // uses `with_exposed_provenance_mut(..)`. 425 ptr: usize, 426 } 427 428 impl Default for XRegVal { 429 fn default() -> Self { 430 Self(unsafe { mem::zeroed() }) 431 } 432 } 433 434 #[expect(missing_docs, reason = "self-describing methods")] 435 impl XRegVal { 436 pub fn new_i32(x: i32) -> Self { 437 let mut val = XRegVal::default(); 438 val.set_i32(x); 439 val 440 } 441 442 pub fn new_u32(x: u32) -> Self { 443 let mut val = XRegVal::default(); 444 val.set_u32(x); 445 val 446 } 447 448 pub fn new_i64(x: i64) -> Self { 449 let mut val = XRegVal::default(); 450 val.set_i64(x); 451 val 452 } 453 454 pub fn new_u64(x: u64) -> Self { 455 let mut val = XRegVal::default(); 456 val.set_u64(x); 457 val 458 } 459 460 pub fn new_ptr<T>(ptr: *mut T) -> Self { 461 let mut val = XRegVal::default(); 462 val.set_ptr(ptr); 463 val 464 } 465 466 pub fn get_i32(&self) -> i32 { 467 let x = unsafe { self.0.i32 }; 468 i32::from_le(x) 469 } 470 471 pub fn get_u32(&self) -> u32 { 472 let x = unsafe { self.0.u32 }; 473 u32::from_le(x) 474 } 475 476 pub fn get_i64(&self) -> i64 { 477 let x = unsafe { self.0.i64 }; 478 i64::from_le(x) 479 } 480 481 pub fn get_u64(&self) -> u64 { 482 let x = unsafe { self.0.u64 }; 483 u64::from_le(x) 484 } 485 486 pub fn get_ptr<T>(&self) -> *mut T { 487 let ptr = unsafe { self.0.ptr }; 488 core::ptr::with_exposed_provenance_mut(usize::from_le(ptr)) 489 } 490 491 pub fn set_i32(&mut self, x: i32) { 492 self.0.i32 = x.to_le(); 493 } 494 495 pub fn set_u32(&mut self, x: u32) { 496 self.0.u32 = x.to_le(); 497 } 498 499 pub fn set_i64(&mut self, x: i64) { 500 self.0.i64 = x.to_le(); 501 } 502 503 pub fn set_u64(&mut self, x: u64) { 504 self.0.u64 = x.to_le(); 505 } 506 507 pub fn set_ptr<T>(&mut self, ptr: *mut T) { 508 self.0.ptr = ptr.expose_provenance().to_le(); 509 } 510 } 511 512 /// An `f` register value: floats. 513 #[derive(Copy, Clone)] 514 pub struct FRegVal(FRegUnion); 515 516 impl fmt::Debug for FRegVal { 517 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 518 f.debug_struct("FRegVal") 519 .field("as_f32", &self.get_f32()) 520 .field("as_f64", &self.get_f64()) 521 .finish() 522 } 523 } 524 525 impl fmt::LowerHex for FRegVal { 526 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 527 fmt::LowerHex::fmt(&self.get_f64().to_bits(), f) 528 } 529 } 530 531 // NB: like `XRegUnion` values here are always little-endian, see the 532 // documentation above for more details. 533 #[derive(Copy, Clone)] 534 union FRegUnion { 535 f32: u32, 536 f64: u64, 537 } 538 539 impl Default for FRegVal { 540 fn default() -> Self { 541 Self(unsafe { mem::zeroed() }) 542 } 543 } 544 545 #[expect(missing_docs, reason = "self-describing methods")] 546 impl FRegVal { 547 pub fn new_f32(f: f32) -> Self { 548 let mut val = Self::default(); 549 val.set_f32(f); 550 val 551 } 552 553 pub fn new_f64(f: f64) -> Self { 554 let mut val = Self::default(); 555 val.set_f64(f); 556 val 557 } 558 559 pub fn get_f32(&self) -> f32 { 560 let val = unsafe { self.0.f32 }; 561 f32::from_le_bytes(val.to_ne_bytes()) 562 } 563 564 pub fn get_f64(&self) -> f64 { 565 let val = unsafe { self.0.f64 }; 566 f64::from_le_bytes(val.to_ne_bytes()) 567 } 568 569 pub fn set_f32(&mut self, val: f32) { 570 self.0.f32 = u32::from_ne_bytes(val.to_le_bytes()); 571 } 572 573 pub fn set_f64(&mut self, val: f64) { 574 self.0.f64 = u64::from_ne_bytes(val.to_le_bytes()); 575 } 576 } 577 578 /// A `v` register value: vectors. 579 #[derive(Copy, Clone)] 580 #[cfg(not(pulley_disable_interp_simd))] 581 pub struct VRegVal(VRegUnion); 582 583 #[cfg(not(pulley_disable_interp_simd))] 584 impl fmt::Debug for VRegVal { 585 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 586 f.debug_struct("VRegVal") 587 .field("as_u128", &unsafe { self.0.u128 }) 588 .finish() 589 } 590 } 591 592 #[cfg(not(pulley_disable_interp_simd))] 593 impl fmt::LowerHex for VRegVal { 594 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 595 fmt::LowerHex::fmt(unsafe { &self.0.u128 }, f) 596 } 597 } 598 599 /// 128-bit vector registers. 600 /// 601 /// This register is always stored in little-endian order and has different 602 /// constraints than `XRegVal` and `FRegVal` above. Notably all fields of this 603 /// union are the same width so all bits are always defined. Note that 604 /// little-endian is required though so bitcasts between different shapes of 605 /// vectors works. This union cannot be stored in big-endian. 606 #[derive(Copy, Clone)] 607 #[repr(align(16))] 608 #[cfg(not(pulley_disable_interp_simd))] 609 union VRegUnion { 610 u128: u128, 611 i8x16: [i8; 16], 612 i16x8: [i16; 8], 613 i32x4: [i32; 4], 614 i64x2: [i64; 2], 615 u8x16: [u8; 16], 616 u16x8: [u16; 8], 617 u32x4: [u32; 4], 618 u64x2: [u64; 2], 619 // Note that these are `u32` and `u64`, not f32/f64. That's only because 620 // f32/f64 don't have `.to_le()` and `::from_le()` so need to go through the 621 // bits anyway. 622 f32x4: [u32; 4], 623 f64x2: [u64; 2], 624 } 625 626 #[cfg(not(pulley_disable_interp_simd))] 627 impl Default for VRegVal { 628 fn default() -> Self { 629 Self(unsafe { mem::zeroed() }) 630 } 631 } 632 633 #[expect(missing_docs, reason = "self-describing methods")] 634 #[cfg(not(pulley_disable_interp_simd))] 635 impl VRegVal { 636 pub fn new_u128(i: u128) -> Self { 637 let mut val = Self::default(); 638 val.set_u128(i); 639 val 640 } 641 642 pub fn get_u128(&self) -> u128 { 643 let val = unsafe { self.0.u128 }; 644 u128::from_le(val) 645 } 646 647 pub fn set_u128(&mut self, val: u128) { 648 self.0.u128 = val.to_le(); 649 } 650 651 fn get_i8x16(&self) -> [i8; 16] { 652 let val = unsafe { self.0.i8x16 }; 653 val.map(|e| i8::from_le(e)) 654 } 655 656 fn set_i8x16(&mut self, val: [i8; 16]) { 657 self.0.i8x16 = val.map(|e| e.to_le()); 658 } 659 660 fn get_u8x16(&self) -> [u8; 16] { 661 let val = unsafe { self.0.u8x16 }; 662 val.map(|e| u8::from_le(e)) 663 } 664 665 fn set_u8x16(&mut self, val: [u8; 16]) { 666 self.0.u8x16 = val.map(|e| e.to_le()); 667 } 668 669 fn get_i16x8(&self) -> [i16; 8] { 670 let val = unsafe { self.0.i16x8 }; 671 val.map(|e| i16::from_le(e)) 672 } 673 674 fn set_i16x8(&mut self, val: [i16; 8]) { 675 self.0.i16x8 = val.map(|e| e.to_le()); 676 } 677 678 fn get_u16x8(&self) -> [u16; 8] { 679 let val = unsafe { self.0.u16x8 }; 680 val.map(|e| u16::from_le(e)) 681 } 682 683 fn set_u16x8(&mut self, val: [u16; 8]) { 684 self.0.u16x8 = val.map(|e| e.to_le()); 685 } 686 687 fn get_i32x4(&self) -> [i32; 4] { 688 let val = unsafe { self.0.i32x4 }; 689 val.map(|e| i32::from_le(e)) 690 } 691 692 fn set_i32x4(&mut self, val: [i32; 4]) { 693 self.0.i32x4 = val.map(|e| e.to_le()); 694 } 695 696 fn get_u32x4(&self) -> [u32; 4] { 697 let val = unsafe { self.0.u32x4 }; 698 val.map(|e| u32::from_le(e)) 699 } 700 701 fn set_u32x4(&mut self, val: [u32; 4]) { 702 self.0.u32x4 = val.map(|e| e.to_le()); 703 } 704 705 fn get_i64x2(&self) -> [i64; 2] { 706 let val = unsafe { self.0.i64x2 }; 707 val.map(|e| i64::from_le(e)) 708 } 709 710 fn set_i64x2(&mut self, val: [i64; 2]) { 711 self.0.i64x2 = val.map(|e| e.to_le()); 712 } 713 714 fn get_u64x2(&self) -> [u64; 2] { 715 let val = unsafe { self.0.u64x2 }; 716 val.map(|e| u64::from_le(e)) 717 } 718 719 fn set_u64x2(&mut self, val: [u64; 2]) { 720 self.0.u64x2 = val.map(|e| e.to_le()); 721 } 722 723 fn get_f64x2(&self) -> [f64; 2] { 724 let val = unsafe { self.0.f64x2 }; 725 val.map(|e| f64::from_bits(u64::from_le(e))) 726 } 727 728 fn set_f64x2(&mut self, val: [f64; 2]) { 729 self.0.f64x2 = val.map(|e| e.to_bits().to_le()); 730 } 731 732 fn get_f32x4(&self) -> [f32; 4] { 733 let val = unsafe { self.0.f32x4 }; 734 val.map(|e| f32::from_bits(u32::from_le(e))) 735 } 736 737 fn set_f32x4(&mut self, val: [f32; 4]) { 738 self.0.f32x4 = val.map(|e| e.to_bits().to_le()); 739 } 740 } 741 742 /// The machine state for a Pulley virtual machine: the various registers and 743 /// stack. 744 pub struct MachineState { 745 x_regs: [XRegVal; XReg::RANGE.end as usize], 746 f_regs: [FRegVal; FReg::RANGE.end as usize], 747 #[cfg(not(pulley_disable_interp_simd))] 748 v_regs: [VRegVal; VReg::RANGE.end as usize], 749 fp: *mut u8, 750 lr: *mut u8, 751 stack: Stack, 752 done_reason: Option<DoneReason<()>>, 753 } 754 755 unsafe impl Send for MachineState {} 756 unsafe impl Sync for MachineState {} 757 758 /// Helper structure to store the state of the Pulley stack. 759 /// 760 /// The Pulley stack notably needs to be a 16-byte aligned allocation on the 761 /// host to ensure that addresses handed out are indeed 16-byte aligned. This is 762 /// done with a custom `Vec<T>` internally where `T` has size and align of 16. 763 /// This is manually done with a helper `Align16` type below. 764 struct Stack { 765 storage: Vec<Align16>, 766 } 767 768 /// Helper type used with `Stack` above. 769 #[derive(Copy, Clone)] 770 #[repr(align(16))] 771 struct Align16 { 772 // Just here to give the structure a size of 16. The alignment is always 16 773 // regardless of what the host platform's alignment of u128 is. 774 _unused: u128, 775 } 776 777 impl Stack { 778 /// Creates a new stack which will have a byte size of at least `size`. 779 /// 780 /// The allocated stack might be slightly larger due to rounding necessary. 781 fn new(size: usize) -> Stack { 782 Stack { 783 // Round up `size` to the nearest multiple of 16. Note that the 784 // stack is also allocated here but not initialized, and that's 785 // intentional as pulley bytecode should always initialize the stack 786 // before use. 787 storage: Vec::with_capacity((size + 15) / 16), 788 } 789 } 790 791 /// Returns a pointer to the top of the stack (the highest address). 792 /// 793 /// Note that the returned pointer has provenance for the entire stack 794 /// allocation, however, not just the top. 795 fn top(&mut self) -> *mut u8 { 796 let len = self.len(); 797 unsafe { self.base().add(len) } 798 } 799 800 /// Returns a pointer to the base of the stack (the lowest address). 801 /// 802 /// Note that the returned pointer has provenance for the entire stack 803 /// allocation, however, not just the top. 804 fn base(&mut self) -> *mut u8 { 805 self.storage.as_mut_ptr().cast::<u8>() 806 } 807 808 /// Returns the length, in bytes, of this stack allocation. 809 fn len(&self) -> usize { 810 self.storage.capacity() * mem::size_of::<Align16>() 811 } 812 } 813 814 impl fmt::Debug for MachineState { 815 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 816 let MachineState { 817 x_regs, 818 f_regs, 819 #[cfg(not(pulley_disable_interp_simd))] 820 v_regs, 821 stack: _, 822 done_reason: _, 823 fp: _, 824 lr: _, 825 } = self; 826 827 struct RegMap<'a, R>(&'a [R], fn(u8) -> alloc::string::String); 828 829 impl<R: fmt::Debug> fmt::Debug for RegMap<'_, R> { 830 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 831 let mut f = f.debug_map(); 832 for (i, r) in self.0.iter().enumerate() { 833 f.entry(&(self.1)(i as u8), r); 834 } 835 f.finish() 836 } 837 } 838 839 let mut f = f.debug_struct("MachineState"); 840 841 f.field( 842 "x_regs", 843 &RegMap(x_regs, |i| XReg::new(i).unwrap().to_string()), 844 ) 845 .field( 846 "f_regs", 847 &RegMap(f_regs, |i| FReg::new(i).unwrap().to_string()), 848 ); 849 #[cfg(not(pulley_disable_interp_simd))] 850 f.field( 851 "v_regs", 852 &RegMap(v_regs, |i| VReg::new(i).unwrap().to_string()), 853 ); 854 f.finish_non_exhaustive() 855 } 856 } 857 858 macro_rules! index_reg { 859 ($reg_ty:ty,$value_ty:ty,$field:ident) => { 860 impl Index<$reg_ty> for Vm { 861 type Output = $value_ty; 862 863 fn index(&self, reg: $reg_ty) -> &Self::Output { 864 &self.state[reg] 865 } 866 } 867 868 impl IndexMut<$reg_ty> for Vm { 869 fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output { 870 &mut self.state[reg] 871 } 872 } 873 874 impl Index<$reg_ty> for MachineState { 875 type Output = $value_ty; 876 877 fn index(&self, reg: $reg_ty) -> &Self::Output { 878 &self.$field[reg.index()] 879 } 880 } 881 882 impl IndexMut<$reg_ty> for MachineState { 883 fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output { 884 &mut self.$field[reg.index()] 885 } 886 } 887 }; 888 } 889 890 index_reg!(XReg, XRegVal, x_regs); 891 index_reg!(FReg, FRegVal, f_regs); 892 #[cfg(not(pulley_disable_interp_simd))] 893 index_reg!(VReg, VRegVal, v_regs); 894 895 /// Sentinel return address that signals the end of the call stack. 896 const HOST_RETURN_ADDR: *mut u8 = usize::MAX as *mut u8; 897 898 impl MachineState { 899 fn with_stack(stack_size: usize) -> Self { 900 let mut state = Self { 901 x_regs: [Default::default(); XReg::RANGE.end as usize], 902 f_regs: Default::default(), 903 #[cfg(not(pulley_disable_interp_simd))] 904 v_regs: Default::default(), 905 stack: Stack::new(stack_size), 906 done_reason: None, 907 fp: HOST_RETURN_ADDR, 908 lr: HOST_RETURN_ADDR, 909 }; 910 911 let sp = state.stack.top(); 912 state[XReg::sp] = XRegVal::new_ptr(sp); 913 914 state 915 } 916 } 917 918 /// Inner private module to prevent creation of the `Done` structure outside of 919 /// this module. 920 mod done { 921 use super::{Encode, Interpreter, MachineState}; 922 use core::ops::ControlFlow; 923 use core::ptr::NonNull; 924 925 /// Zero-sized sentinel indicating that pulley execution has halted. 926 /// 927 /// The reason for halting is stored in `MachineState`. 928 #[derive(Copy, Clone, Debug, PartialEq, Eq)] 929 pub struct Done { 930 _priv: (), 931 } 932 933 /// Reason that the pulley interpreter has ceased execution. 934 pub enum DoneReason<T> { 935 /// A trap happened at this bytecode instruction. 936 Trap { 937 /// Which instruction is raising this trap. 938 pc: NonNull<u8>, 939 /// The kind of trap being raised, if known. 940 kind: Option<TrapKind>, 941 }, 942 /// The `call_indirect_host` instruction was executed. 943 CallIndirectHost { 944 /// The payload of `call_indirect_host`. 945 id: u8, 946 /// Where to resume execution after the host has finished. 947 resume: NonNull<u8>, 948 }, 949 /// Pulley has finished and the provided value is being returned. 950 ReturnToHost(T), 951 } 952 953 /// Stored within `DoneReason::Trap`. 954 #[expect(missing_docs, reason = "self-describing variants")] 955 pub enum TrapKind { 956 DivideByZero, 957 IntegerOverflow, 958 BadConversionToInteger, 959 MemoryOutOfBounds, 960 DisabledOpcode, 961 StackOverflow, 962 } 963 964 impl MachineState { 965 pub(super) fn debug_assert_done_reason_none(&mut self) { 966 debug_assert!(self.done_reason.is_none()); 967 } 968 969 pub(super) fn done_decode(&mut self, Done { _priv }: Done) -> DoneReason<()> { 970 self.done_reason.take().unwrap() 971 } 972 } 973 974 impl Interpreter<'_> { 975 /// Finishes execution by recording `DoneReason::Trap`. 976 /// 977 /// This method takes an `I` generic parameter indicating which 978 /// instruction is executing this function and generating a trap. That's 979 /// used to go backwards from the current `pc` which is just beyond the 980 /// instruction to point to the instruction itself in the trap metadata 981 /// returned from the interpreter. 982 #[cold] 983 pub fn done_trap<I: Encode>(&mut self) -> ControlFlow<Done> { 984 self.done_trap_kind::<I>(None) 985 } 986 987 /// Same as `done_trap` but with an explicit `TrapKind`. 988 #[cold] 989 pub fn done_trap_kind<I: Encode>(&mut self, kind: Option<TrapKind>) -> ControlFlow<Done> { 990 let pc = self.current_pc::<I>(); 991 self.state.done_reason = Some(DoneReason::Trap { pc, kind }); 992 ControlFlow::Break(Done { _priv: () }) 993 } 994 995 /// Finishes execution by recording `DoneReason::CallIndirectHost`. 996 #[cold] 997 pub fn done_call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> { 998 self.state.done_reason = Some(DoneReason::CallIndirectHost { 999 id, 1000 resume: self.pc.as_ptr(), 1001 }); 1002 ControlFlow::Break(Done { _priv: () }) 1003 } 1004 1005 /// Finishes execution by recording `DoneReason::ReturnToHost`. 1006 #[cold] 1007 pub fn done_return_to_host(&mut self) -> ControlFlow<Done> { 1008 self.state.done_reason = Some(DoneReason::ReturnToHost(())); 1009 ControlFlow::Break(Done { _priv: () }) 1010 } 1011 } 1012 } 1013 1014 use done::Done; 1015 pub use done::{DoneReason, TrapKind}; 1016 1017 struct Interpreter<'a> { 1018 state: &'a mut MachineState, 1019 pc: UnsafeBytecodeStream, 1020 executing_pc: ExecutingPcRef<'a>, 1021 } 1022 1023 impl Interpreter<'_> { 1024 /// Calculates the `offset` for the current instruction `I`. 1025 #[inline] 1026 fn pc_rel<I: Encode>(&mut self, offset: PcRelOffset) -> NonNull<u8> { 1027 let offset = isize::try_from(i32::from(offset)).unwrap(); 1028 unsafe { self.current_pc::<I>().offset(offset) } 1029 } 1030 1031 /// Performs a relative jump of `offset` bytes from the current instruction. 1032 /// 1033 /// This will jump from the start of the current instruction, identified by 1034 /// `I`, `offset` bytes away. Note that the `self.pc` at the start of this 1035 /// function actually points to the instruction after this one so `I` is 1036 /// necessary to go back to ourselves after which we then go `offset` away. 1037 #[inline] 1038 fn pc_rel_jump<I: Encode>(&mut self, offset: PcRelOffset) -> ControlFlow<Done> { 1039 let new_pc = self.pc_rel::<I>(offset); 1040 self.pc = unsafe { UnsafeBytecodeStream::new(new_pc) }; 1041 ControlFlow::Continue(()) 1042 } 1043 1044 /// Returns the PC of the current instruction where `I` is the static type 1045 /// representing the current instruction. 1046 fn current_pc<I: Encode>(&self) -> NonNull<u8> { 1047 unsafe { self.pc.offset(-isize::from(I::WIDTH)).as_ptr() } 1048 } 1049 1050 /// `sp -= size_of::<T>(); *sp = val;` 1051 /// 1052 /// Note that `I` is the instruction which is pushing data to use if a trap 1053 /// is generated. 1054 #[must_use] 1055 fn push<I: Encode, T>(&mut self, val: T) -> ControlFlow<Done> { 1056 let new_sp = self.state[XReg::sp].get_ptr::<T>().wrapping_sub(1); 1057 self.set_sp::<I>(new_sp.cast())?; 1058 unsafe { 1059 new_sp.write_unaligned(val); 1060 } 1061 ControlFlow::Continue(()) 1062 } 1063 1064 /// `ret = *sp; sp -= size_of::<T>()` 1065 fn pop<T>(&mut self) -> T { 1066 let sp = self.state[XReg::sp].get_ptr::<T>(); 1067 let val = unsafe { sp.read_unaligned() }; 1068 self.set_sp_unchecked(sp.wrapping_add(1)); 1069 val 1070 } 1071 1072 /// Sets the stack pointer to the `sp` provided. 1073 /// 1074 /// Returns a trap if this would result in stack overflow, or if `sp` is 1075 /// beneath the base pointer of `self.state.stack`. 1076 /// 1077 /// The `I` parameter here is the instruction that is setting the stack 1078 /// pointer and is used to calculate this instruction's own `pc` if this 1079 /// instruction traps. 1080 #[must_use] 1081 fn set_sp<I: Encode>(&mut self, sp: *mut u8) -> ControlFlow<Done> { 1082 let sp_raw = sp as usize; 1083 let base_raw = self.state.stack.base() as usize; 1084 if sp_raw < base_raw { 1085 return self.done_trap_kind::<I>(Some(TrapKind::StackOverflow)); 1086 } 1087 self.set_sp_unchecked(sp); 1088 ControlFlow::Continue(()) 1089 } 1090 1091 /// Same as `set_sp` but does not check to see if `sp` is in-bounds. Should 1092 /// only be used with stack increment operations such as `pop`. 1093 fn set_sp_unchecked<T>(&mut self, sp: *mut T) { 1094 if cfg!(debug_assertions) { 1095 let sp_raw = sp as usize; 1096 let base = self.state.stack.base() as usize; 1097 let end = base + self.state.stack.len(); 1098 assert!(base <= sp_raw && sp_raw <= end); 1099 } 1100 self.state[XReg::sp].set_ptr(sp); 1101 } 1102 1103 /// Loads a value of `T` using native-endian byte ordering from the `addr` 1104 /// specified. 1105 /// 1106 /// The `I` type parameter is the instruction issuing this load which is 1107 /// used in case of traps to calculate the trapping pc. 1108 /// 1109 /// Returns `ControlFlow::Break` if a trap happens or 1110 /// `ControlFlow::Continue` if the value was loaded successfully. 1111 /// 1112 /// # Unsafety 1113 /// 1114 /// Safety of this method relies on the safety of the original bytecode 1115 /// itself and correctly annotating both `T` and `I`. 1116 #[must_use] 1117 unsafe fn load_ne<T, I: Encode>(&mut self, addr: impl AddressingMode) -> ControlFlow<Done, T> { 1118 unsafe { addr.load_ne::<T, I>(self) } 1119 } 1120 1121 /// Stores a `val` to the `addr` specified. 1122 /// 1123 /// The `I` type parameter is the instruction issuing this store which is 1124 /// used in case of traps to calculate the trapping pc. 1125 /// 1126 /// Returns `ControlFlow::Break` if a trap happens or 1127 /// `ControlFlow::Continue` if the value was stored successfully. 1128 /// 1129 /// # Unsafety 1130 /// 1131 /// Safety of this method relies on the safety of the original bytecode 1132 /// itself and correctly annotating both `T` and `I`. 1133 #[must_use] 1134 unsafe fn store_ne<T, I: Encode>( 1135 &mut self, 1136 addr: impl AddressingMode, 1137 val: T, 1138 ) -> ControlFlow<Done> { 1139 unsafe { addr.store_ne::<T, I>(self, val) } 1140 } 1141 1142 fn check_xnn_from_f32<I: Encode>( 1143 &mut self, 1144 val: f32, 1145 (lo, hi): (f32, f32), 1146 ) -> ControlFlow<Done> { 1147 self.check_xnn_from_f64::<I>(val.into(), (lo.into(), hi.into())) 1148 } 1149 1150 fn check_xnn_from_f64<I: Encode>( 1151 &mut self, 1152 val: f64, 1153 (lo, hi): (f64, f64), 1154 ) -> ControlFlow<Done> { 1155 if val != val { 1156 return self.done_trap_kind::<I>(Some(TrapKind::BadConversionToInteger)); 1157 } 1158 let val = val.wasm_trunc(); 1159 if val <= lo || val >= hi { 1160 return self.done_trap_kind::<I>(Some(TrapKind::IntegerOverflow)); 1161 } 1162 ControlFlow::Continue(()) 1163 } 1164 1165 #[cfg(not(pulley_disable_interp_simd))] 1166 fn get_i128(&self, lo: XReg, hi: XReg) -> i128 { 1167 let lo = self.state[lo].get_u64(); 1168 let hi = self.state[hi].get_i64(); 1169 i128::from(lo) | (i128::from(hi) << 64) 1170 } 1171 1172 #[cfg(not(pulley_disable_interp_simd))] 1173 fn set_i128(&mut self, lo: XReg, hi: XReg, val: i128) { 1174 self.state[lo].set_u64(val as u64); 1175 self.state[hi].set_u64((val >> 64) as u64); 1176 } 1177 1178 fn record_executing_pc_for_profiling(&mut self) { 1179 // Note that this is a no-op if `feature = "profile"` is disabled. 1180 self.executing_pc.record(self.pc.as_ptr().as_ptr() as usize); 1181 } 1182 } 1183 1184 /// Helper trait to encompass the various addressing modes of Pulley. 1185 trait AddressingMode: Sized { 1186 /// Calculates the native host address `*mut T` corresponding to this 1187 /// addressing mode. 1188 /// 1189 /// # Safety 1190 /// 1191 /// Relies on the original bytecode being safe to execute as this will 1192 /// otherwise perform unsafe byte offsets for example which requires the 1193 /// original bytecode to be correct. 1194 #[must_use] 1195 unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>; 1196 1197 /// Loads a value of `T` from this address, using native-endian byte order. 1198 /// 1199 /// For more information see [`Interpreter::load_ne`]. 1200 #[must_use] 1201 unsafe fn load_ne<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, T> { 1202 let ret = unsafe { self.addr::<T, I>(i)?.read_unaligned() }; 1203 ControlFlow::Continue(ret) 1204 } 1205 1206 /// Stores a `val` to this address, using native-endian byte order. 1207 /// 1208 /// For more information see [`Interpreter::store_ne`]. 1209 #[must_use] 1210 unsafe fn store_ne<T, I: Encode>(self, i: &mut Interpreter<'_>, val: T) -> ControlFlow<Done> { 1211 unsafe { 1212 self.addr::<T, I>(i)?.write_unaligned(val); 1213 } 1214 ControlFlow::Continue(()) 1215 } 1216 } 1217 1218 impl AddressingMode for AddrO32 { 1219 unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> { 1220 // Note that this addressing mode cannot return `ControlFlow::Break` 1221 // which is intentional. It's expected that LLVM optimizes away any 1222 // branches callers have. 1223 unsafe { 1224 ControlFlow::Continue( 1225 i.state[self.addr] 1226 .get_ptr::<T>() 1227 .byte_offset(self.offset as isize), 1228 ) 1229 } 1230 } 1231 } 1232 1233 impl AddressingMode for AddrZ { 1234 unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> { 1235 // This addressing mode defines loading/storing to the null address as 1236 // a trap, but all other addresses are allowed. 1237 let host_addr = i.state[self.addr].get_ptr::<T>(); 1238 if host_addr.is_null() { 1239 i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?; 1240 unreachable!(); 1241 } 1242 unsafe { 1243 let addr = host_addr.byte_offset(self.offset as isize); 1244 ControlFlow::Continue(addr) 1245 } 1246 } 1247 } 1248 1249 impl AddressingMode for AddrG32 { 1250 unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> { 1251 // Test if `bound - offset - T` is less than the wasm address to 1252 // generate a trap. It's a guarantee of this instruction that these 1253 // subtractions don't overflow. 1254 let bound = i.state[self.host_heap_bound].get_u64() as usize; 1255 let offset = usize::from(self.offset); 1256 let wasm_addr = i.state[self.wasm_addr].get_u32() as usize; 1257 if wasm_addr > bound - offset - size_of::<T>() { 1258 i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?; 1259 unreachable!(); 1260 } 1261 unsafe { 1262 let addr = i.state[self.host_heap_base] 1263 .get_ptr::<T>() 1264 .byte_add(wasm_addr) 1265 .byte_add(offset); 1266 ControlFlow::Continue(addr) 1267 } 1268 } 1269 } 1270 1271 impl AddressingMode for AddrG32Bne { 1272 unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> { 1273 // Same as `AddrG32` above except that the bound is loaded from memory. 1274 let bound = unsafe { 1275 *i.state[self.host_heap_bound_addr] 1276 .get_ptr::<usize>() 1277 .byte_add(usize::from(self.host_heap_bound_offset)) 1278 }; 1279 let wasm_addr = i.state[self.wasm_addr].get_u32() as usize; 1280 let offset = usize::from(self.offset); 1281 if wasm_addr > bound - offset - size_of::<T>() { 1282 i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?; 1283 unreachable!(); 1284 } 1285 unsafe { 1286 let addr = i.state[self.host_heap_base] 1287 .get_ptr::<T>() 1288 .byte_add(wasm_addr) 1289 .byte_add(offset); 1290 ControlFlow::Continue(addr) 1291 } 1292 } 1293 } 1294 1295 #[test] 1296 fn simple_push_pop() { 1297 let mut state = MachineState::with_stack(16); 1298 let pc = ExecutingPc::default(); 1299 unsafe { 1300 let mut bytecode = [0; 10]; 1301 let mut i = Interpreter { 1302 state: &mut state, 1303 // this isn't actually read so just manufacture a dummy one 1304 pc: UnsafeBytecodeStream::new(NonNull::new(bytecode.as_mut_ptr().offset(4)).unwrap()), 1305 executing_pc: pc.as_ref(), 1306 }; 1307 assert!(i.push::<crate::Ret, _>(0_i32).is_continue()); 1308 assert_eq!(i.pop::<i32>(), 0_i32); 1309 assert!(i.push::<crate::Ret, _>(1_i32).is_continue()); 1310 assert!(i.push::<crate::Ret, _>(2_i32).is_continue()); 1311 assert!(i.push::<crate::Ret, _>(3_i32).is_continue()); 1312 assert!(i.push::<crate::Ret, _>(4_i32).is_continue()); 1313 assert!(i.push::<crate::Ret, _>(5_i32).is_break()); 1314 assert!(i.push::<crate::Ret, _>(6_i32).is_break()); 1315 assert_eq!(i.pop::<i32>(), 4_i32); 1316 assert_eq!(i.pop::<i32>(), 3_i32); 1317 assert_eq!(i.pop::<i32>(), 2_i32); 1318 assert_eq!(i.pop::<i32>(), 1_i32); 1319 } 1320 } 1321 1322 macro_rules! br_if_imm { 1323 ($( 1324 fn $snake:ident(&mut self, a: XReg, b: $imm:ident, offset: PcRelOffset) 1325 = $camel:ident / $op:tt / $get:ident; 1326 )*) => {$( 1327 fn $snake(&mut self, a: XReg, b: $imm, offset: PcRelOffset) -> ControlFlow<Done> { 1328 let a = self.state[a].$get(); 1329 if a $op b.into() { 1330 self.pc_rel_jump::<crate::$camel>(offset) 1331 } else { 1332 ControlFlow::Continue(()) 1333 } 1334 } 1335 )*}; 1336 } 1337 1338 impl OpVisitor for Interpreter<'_> { 1339 type BytecodeStream = UnsafeBytecodeStream; 1340 type Return = ControlFlow<Done>; 1341 1342 fn bytecode(&mut self) -> &mut UnsafeBytecodeStream { 1343 &mut self.pc 1344 } 1345 1346 fn nop(&mut self) -> ControlFlow<Done> { 1347 ControlFlow::Continue(()) 1348 } 1349 1350 fn ret(&mut self) -> ControlFlow<Done> { 1351 let lr = self.state.lr; 1352 if lr == HOST_RETURN_ADDR { 1353 self.done_return_to_host() 1354 } else { 1355 self.pc = unsafe { UnsafeBytecodeStream::new(NonNull::new_unchecked(lr)) }; 1356 ControlFlow::Continue(()) 1357 } 1358 } 1359 1360 fn call(&mut self, offset: PcRelOffset) -> ControlFlow<Done> { 1361 let return_addr = self.pc.as_ptr(); 1362 self.state.lr = return_addr.as_ptr(); 1363 self.pc_rel_jump::<crate::Call>(offset) 1364 } 1365 1366 fn call1(&mut self, arg1: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 1367 let return_addr = self.pc.as_ptr(); 1368 self.state.lr = return_addr.as_ptr(); 1369 self.state[XReg::x0] = self.state[arg1]; 1370 self.pc_rel_jump::<crate::Call1>(offset) 1371 } 1372 1373 fn call2(&mut self, arg1: XReg, arg2: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 1374 let return_addr = self.pc.as_ptr(); 1375 self.state.lr = return_addr.as_ptr(); 1376 let (x0, x1) = (self.state[arg1], self.state[arg2]); 1377 self.state[XReg::x0] = x0; 1378 self.state[XReg::x1] = x1; 1379 self.pc_rel_jump::<crate::Call2>(offset) 1380 } 1381 1382 fn call3( 1383 &mut self, 1384 arg1: XReg, 1385 arg2: XReg, 1386 arg3: XReg, 1387 offset: PcRelOffset, 1388 ) -> ControlFlow<Done> { 1389 let return_addr = self.pc.as_ptr(); 1390 self.state.lr = return_addr.as_ptr(); 1391 let (x0, x1, x2) = (self.state[arg1], self.state[arg2], self.state[arg3]); 1392 self.state[XReg::x0] = x0; 1393 self.state[XReg::x1] = x1; 1394 self.state[XReg::x2] = x2; 1395 self.pc_rel_jump::<crate::Call3>(offset) 1396 } 1397 1398 fn call4( 1399 &mut self, 1400 arg1: XReg, 1401 arg2: XReg, 1402 arg3: XReg, 1403 arg4: XReg, 1404 offset: PcRelOffset, 1405 ) -> ControlFlow<Done> { 1406 let return_addr = self.pc.as_ptr(); 1407 self.state.lr = return_addr.as_ptr(); 1408 let (x0, x1, x2, x3) = ( 1409 self.state[arg1], 1410 self.state[arg2], 1411 self.state[arg3], 1412 self.state[arg4], 1413 ); 1414 self.state[XReg::x0] = x0; 1415 self.state[XReg::x1] = x1; 1416 self.state[XReg::x2] = x2; 1417 self.state[XReg::x3] = x3; 1418 self.pc_rel_jump::<crate::Call4>(offset) 1419 } 1420 1421 fn call_indirect(&mut self, dst: XReg) -> ControlFlow<Done> { 1422 let return_addr = self.pc.as_ptr(); 1423 self.state.lr = return_addr.as_ptr(); 1424 // SAFETY: part of the unsafe contract of the interpreter is only valid 1425 // bytecode is interpreted, so the jump destination is part of the validity 1426 // of the bytecode itself. 1427 unsafe { 1428 self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[dst].get_ptr())); 1429 } 1430 ControlFlow::Continue(()) 1431 } 1432 1433 fn jump(&mut self, offset: PcRelOffset) -> ControlFlow<Done> { 1434 self.pc_rel_jump::<crate::Jump>(offset) 1435 } 1436 1437 fn xjump(&mut self, reg: XReg) -> ControlFlow<Done> { 1438 unsafe { 1439 self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[reg].get_ptr())); 1440 } 1441 ControlFlow::Continue(()) 1442 } 1443 1444 fn br_if32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 1445 let cond = self.state[cond].get_u32(); 1446 if cond != 0 { 1447 self.pc_rel_jump::<crate::BrIf>(offset) 1448 } else { 1449 ControlFlow::Continue(()) 1450 } 1451 } 1452 1453 fn br_if_not32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 1454 let cond = self.state[cond].get_u32(); 1455 if cond == 0 { 1456 self.pc_rel_jump::<crate::BrIfNot>(offset) 1457 } else { 1458 ControlFlow::Continue(()) 1459 } 1460 } 1461 1462 fn br_if_xeq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 1463 let a = self.state[a].get_u32(); 1464 let b = self.state[b].get_u32(); 1465 if a == b { 1466 self.pc_rel_jump::<crate::BrIfXeq32>(offset) 1467 } else { 1468 ControlFlow::Continue(()) 1469 } 1470 } 1471 1472 fn br_if_xneq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 1473 let a = self.state[a].get_u32(); 1474 let b = self.state[b].get_u32(); 1475 if a != b { 1476 self.pc_rel_jump::<crate::BrIfXneq32>(offset) 1477 } else { 1478 ControlFlow::Continue(()) 1479 } 1480 } 1481 1482 fn br_if_xslt32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 1483 let a = self.state[a].get_i32(); 1484 let b = self.state[b].get_i32(); 1485 if a < b { 1486 self.pc_rel_jump::<crate::BrIfXslt32>(offset) 1487 } else { 1488 ControlFlow::Continue(()) 1489 } 1490 } 1491 1492 fn br_if_xslteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 1493 let a = self.state[a].get_i32(); 1494 let b = self.state[b].get_i32(); 1495 if a <= b { 1496 self.pc_rel_jump::<crate::BrIfXslteq32>(offset) 1497 } else { 1498 ControlFlow::Continue(()) 1499 } 1500 } 1501 1502 fn br_if_xult32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 1503 let a = self.state[a].get_u32(); 1504 let b = self.state[b].get_u32(); 1505 if a < b { 1506 self.pc_rel_jump::<crate::BrIfXult32>(offset) 1507 } else { 1508 ControlFlow::Continue(()) 1509 } 1510 } 1511 1512 fn br_if_xulteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 1513 let a = self.state[a].get_u32(); 1514 let b = self.state[b].get_u32(); 1515 if a <= b { 1516 self.pc_rel_jump::<crate::BrIfXulteq32>(offset) 1517 } else { 1518 ControlFlow::Continue(()) 1519 } 1520 } 1521 1522 fn br_if_xeq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 1523 let a = self.state[a].get_u64(); 1524 let b = self.state[b].get_u64(); 1525 if a == b { 1526 self.pc_rel_jump::<crate::BrIfXeq64>(offset) 1527 } else { 1528 ControlFlow::Continue(()) 1529 } 1530 } 1531 1532 fn br_if_xneq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 1533 let a = self.state[a].get_u64(); 1534 let b = self.state[b].get_u64(); 1535 if a != b { 1536 self.pc_rel_jump::<crate::BrIfXneq64>(offset) 1537 } else { 1538 ControlFlow::Continue(()) 1539 } 1540 } 1541 1542 fn br_if_xslt64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 1543 let a = self.state[a].get_i64(); 1544 let b = self.state[b].get_i64(); 1545 if a < b { 1546 self.pc_rel_jump::<crate::BrIfXslt64>(offset) 1547 } else { 1548 ControlFlow::Continue(()) 1549 } 1550 } 1551 1552 fn br_if_xslteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 1553 let a = self.state[a].get_i64(); 1554 let b = self.state[b].get_i64(); 1555 if a <= b { 1556 self.pc_rel_jump::<crate::BrIfXslteq64>(offset) 1557 } else { 1558 ControlFlow::Continue(()) 1559 } 1560 } 1561 1562 fn br_if_xult64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 1563 let a = self.state[a].get_u64(); 1564 let b = self.state[b].get_u64(); 1565 if a < b { 1566 self.pc_rel_jump::<crate::BrIfXult64>(offset) 1567 } else { 1568 ControlFlow::Continue(()) 1569 } 1570 } 1571 1572 fn br_if_xulteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 1573 let a = self.state[a].get_u64(); 1574 let b = self.state[b].get_u64(); 1575 if a <= b { 1576 self.pc_rel_jump::<crate::BrIfXulteq64>(offset) 1577 } else { 1578 ControlFlow::Continue(()) 1579 } 1580 } 1581 1582 br_if_imm! { 1583 fn br_if_xeq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset) 1584 = BrIfXeq32I8 / == / get_i32; 1585 fn br_if_xeq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset) 1586 = BrIfXeq32I32 / == / get_i32; 1587 fn br_if_xneq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset) 1588 = BrIfXneq32I8 / != / get_i32; 1589 fn br_if_xneq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset) 1590 = BrIfXneq32I32 / != / get_i32; 1591 1592 fn br_if_xslt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset) 1593 = BrIfXslt32I8 / < / get_i32; 1594 fn br_if_xslt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset) 1595 = BrIfXslt32I32 / < / get_i32; 1596 fn br_if_xsgt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset) 1597 = BrIfXsgt32I8 / > / get_i32; 1598 fn br_if_xsgt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset) 1599 = BrIfXsgt32I32 / > / get_i32; 1600 fn br_if_xslteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset) 1601 = BrIfXslteq32I8 / <= / get_i32; 1602 fn br_if_xslteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset) 1603 = BrIfXslteq32I32 / <= / get_i32; 1604 fn br_if_xsgteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset) 1605 = BrIfXsgteq32I8 / >= / get_i32; 1606 fn br_if_xsgteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset) 1607 = BrIfXsgteq32I32 / >= / get_i32; 1608 1609 fn br_if_xult32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset) 1610 = BrIfXult32U8 / < / get_u32; 1611 fn br_if_xult32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset) 1612 = BrIfXult32U32 / < / get_u32; 1613 fn br_if_xugt32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset) 1614 = BrIfXugt32U8 / > / get_u32; 1615 fn br_if_xugt32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset) 1616 = BrIfXugt32U32 / > / get_u32; 1617 fn br_if_xulteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset) 1618 = BrIfXulteq32U8 / <= / get_u32; 1619 fn br_if_xulteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset) 1620 = BrIfXulteq32U32 / <= / get_u32; 1621 fn br_if_xugteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset) 1622 = BrIfXugteq32U8 / >= / get_u32; 1623 fn br_if_xugteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset) 1624 = BrIfXugteq32U32 / >= / get_u32; 1625 1626 fn br_if_xeq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset) 1627 = BrIfXeq64I8 / == / get_i64; 1628 fn br_if_xeq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset) 1629 = BrIfXeq64I32 / == / get_i64; 1630 fn br_if_xneq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset) 1631 = BrIfXneq64I8 / != / get_i64; 1632 fn br_if_xneq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset) 1633 = BrIfXneq64I32 / != / get_i64; 1634 1635 fn br_if_xslt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset) 1636 = BrIfXslt64I8 / < / get_i64; 1637 fn br_if_xslt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset) 1638 = BrIfXslt64I32 / < / get_i64; 1639 fn br_if_xsgt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset) 1640 = BrIfXsgt64I8 / > / get_i64; 1641 fn br_if_xsgt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset) 1642 = BrIfXsgt64I32 / > / get_i64; 1643 fn br_if_xslteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset) 1644 = BrIfXslteq64I8 / <= / get_i64; 1645 fn br_if_xslteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset) 1646 = BrIfXslteq64I32 / <= / get_i64; 1647 fn br_if_xsgteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset) 1648 = BrIfXsgteq64I8 / >= / get_i64; 1649 fn br_if_xsgteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset) 1650 = BrIfXsgteq64I32 / >= / get_i64; 1651 1652 fn br_if_xult64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset) 1653 = BrIfXult64U8 / < / get_u64; 1654 fn br_if_xult64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset) 1655 = BrIfXult64U32 / < / get_u64; 1656 fn br_if_xugt64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset) 1657 = BrIfXugt64U8 / > / get_u64; 1658 fn br_if_xugt64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset) 1659 = BrIfXugt64U32 / > / get_u64; 1660 fn br_if_xulteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset) 1661 = BrIfXulteq64U8 / <= / get_u64; 1662 fn br_if_xulteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset) 1663 = BrIfXulteq64U32 / <= / get_u64; 1664 fn br_if_xugteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset) 1665 = BrIfXugteq64U8 / >= / get_u64; 1666 fn br_if_xugteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset) 1667 = BrIfXugteq64U32 / >= / get_u64; 1668 } 1669 1670 fn xmov(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 1671 let val = self.state[src]; 1672 self.state[dst] = val; 1673 ControlFlow::Continue(()) 1674 } 1675 1676 fn xconst8(&mut self, dst: XReg, imm: i8) -> ControlFlow<Done> { 1677 self.state[dst].set_i64(i64::from(imm)); 1678 ControlFlow::Continue(()) 1679 } 1680 1681 fn xzero(&mut self, dst: XReg) -> ControlFlow<Done> { 1682 self.state[dst].set_i64(0); 1683 ControlFlow::Continue(()) 1684 } 1685 1686 fn xone(&mut self, dst: XReg) -> ControlFlow<Done> { 1687 self.state[dst].set_i64(1); 1688 ControlFlow::Continue(()) 1689 } 1690 1691 fn xconst16(&mut self, dst: XReg, imm: i16) -> ControlFlow<Done> { 1692 self.state[dst].set_i64(i64::from(imm)); 1693 ControlFlow::Continue(()) 1694 } 1695 1696 fn xconst32(&mut self, dst: XReg, imm: i32) -> ControlFlow<Done> { 1697 self.state[dst].set_i64(i64::from(imm)); 1698 ControlFlow::Continue(()) 1699 } 1700 1701 fn xconst64(&mut self, dst: XReg, imm: i64) -> ControlFlow<Done> { 1702 self.state[dst].set_i64(imm); 1703 ControlFlow::Continue(()) 1704 } 1705 1706 fn xadd32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1707 let a = self.state[operands.src1].get_u32(); 1708 let b = self.state[operands.src2].get_u32(); 1709 self.state[operands.dst].set_u32(a.wrapping_add(b)); 1710 ControlFlow::Continue(()) 1711 } 1712 1713 fn xadd32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> { 1714 self.xadd32_u32(dst, src1, src2.into()) 1715 } 1716 1717 fn xadd32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> { 1718 let a = self.state[src1].get_u32(); 1719 self.state[dst].set_u32(a.wrapping_add(src2)); 1720 ControlFlow::Continue(()) 1721 } 1722 1723 fn xadd64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1724 let a = self.state[operands.src1].get_u64(); 1725 let b = self.state[operands.src2].get_u64(); 1726 self.state[operands.dst].set_u64(a.wrapping_add(b)); 1727 ControlFlow::Continue(()) 1728 } 1729 1730 fn xadd64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> { 1731 self.xadd64_u32(dst, src1, src2.into()) 1732 } 1733 1734 fn xadd64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> { 1735 let a = self.state[src1].get_u64(); 1736 self.state[dst].set_u64(a.wrapping_add(src2.into())); 1737 ControlFlow::Continue(()) 1738 } 1739 1740 fn xmadd32(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> { 1741 let a = self.state[src1].get_u32(); 1742 let b = self.state[src2].get_u32(); 1743 let c = self.state[src3].get_u32(); 1744 self.state[dst].set_u32(a.wrapping_mul(b).wrapping_add(c)); 1745 ControlFlow::Continue(()) 1746 } 1747 1748 fn xmadd64(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> { 1749 let a = self.state[src1].get_u64(); 1750 let b = self.state[src2].get_u64(); 1751 let c = self.state[src3].get_u64(); 1752 self.state[dst].set_u64(a.wrapping_mul(b).wrapping_add(c)); 1753 ControlFlow::Continue(()) 1754 } 1755 1756 fn xsub32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1757 let a = self.state[operands.src1].get_u32(); 1758 let b = self.state[operands.src2].get_u32(); 1759 self.state[operands.dst].set_u32(a.wrapping_sub(b)); 1760 ControlFlow::Continue(()) 1761 } 1762 1763 fn xsub32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> { 1764 self.xsub32_u32(dst, src1, src2.into()) 1765 } 1766 1767 fn xsub32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> { 1768 let a = self.state[src1].get_u32(); 1769 self.state[dst].set_u32(a.wrapping_sub(src2)); 1770 ControlFlow::Continue(()) 1771 } 1772 1773 fn xsub64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1774 let a = self.state[operands.src1].get_u64(); 1775 let b = self.state[operands.src2].get_u64(); 1776 self.state[operands.dst].set_u64(a.wrapping_sub(b)); 1777 ControlFlow::Continue(()) 1778 } 1779 1780 fn xsub64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> { 1781 self.xsub64_u32(dst, src1, src2.into()) 1782 } 1783 1784 fn xsub64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> { 1785 let a = self.state[src1].get_u64(); 1786 self.state[dst].set_u64(a.wrapping_sub(src2.into())); 1787 ControlFlow::Continue(()) 1788 } 1789 1790 fn xmul32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1791 let a = self.state[operands.src1].get_u32(); 1792 let b = self.state[operands.src2].get_u32(); 1793 self.state[operands.dst].set_u32(a.wrapping_mul(b)); 1794 ControlFlow::Continue(()) 1795 } 1796 1797 fn xmul32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> { 1798 self.xmul32_s32(dst, src1, src2.into()) 1799 } 1800 1801 fn xmul32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> { 1802 let a = self.state[src1].get_i32(); 1803 self.state[dst].set_i32(a.wrapping_mul(src2)); 1804 ControlFlow::Continue(()) 1805 } 1806 1807 fn xmul64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1808 let a = self.state[operands.src1].get_u64(); 1809 let b = self.state[operands.src2].get_u64(); 1810 self.state[operands.dst].set_u64(a.wrapping_mul(b)); 1811 ControlFlow::Continue(()) 1812 } 1813 1814 fn xmul64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> { 1815 self.xmul64_s32(dst, src1, src2.into()) 1816 } 1817 1818 fn xmul64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> { 1819 let a = self.state[src1].get_i64(); 1820 self.state[dst].set_i64(a.wrapping_mul(src2.into())); 1821 ControlFlow::Continue(()) 1822 } 1823 1824 fn xshl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1825 let a = self.state[operands.src1].get_u32(); 1826 let b = self.state[operands.src2].get_u32(); 1827 self.state[operands.dst].set_u32(a.wrapping_shl(b)); 1828 ControlFlow::Continue(()) 1829 } 1830 1831 fn xshr32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1832 let a = self.state[operands.src1].get_u32(); 1833 let b = self.state[operands.src2].get_u32(); 1834 self.state[operands.dst].set_u32(a.wrapping_shr(b)); 1835 ControlFlow::Continue(()) 1836 } 1837 1838 fn xshr32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1839 let a = self.state[operands.src1].get_i32(); 1840 let b = self.state[operands.src2].get_u32(); 1841 self.state[operands.dst].set_i32(a.wrapping_shr(b)); 1842 ControlFlow::Continue(()) 1843 } 1844 1845 fn xshl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1846 let a = self.state[operands.src1].get_u64(); 1847 let b = self.state[operands.src2].get_u32(); 1848 self.state[operands.dst].set_u64(a.wrapping_shl(b)); 1849 ControlFlow::Continue(()) 1850 } 1851 1852 fn xshr64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1853 let a = self.state[operands.src1].get_u64(); 1854 let b = self.state[operands.src2].get_u32(); 1855 self.state[operands.dst].set_u64(a.wrapping_shr(b)); 1856 ControlFlow::Continue(()) 1857 } 1858 1859 fn xshr64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1860 let a = self.state[operands.src1].get_i64(); 1861 let b = self.state[operands.src2].get_u32(); 1862 self.state[operands.dst].set_i64(a.wrapping_shr(b)); 1863 ControlFlow::Continue(()) 1864 } 1865 1866 fn xshl32_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> { 1867 let a = self.state[operands.src1].get_u32(); 1868 let b = u32::from(u8::from(operands.src2)); 1869 self.state[operands.dst].set_u32(a.wrapping_shl(b)); 1870 ControlFlow::Continue(()) 1871 } 1872 1873 fn xshr32_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> { 1874 let a = self.state[operands.src1].get_u32(); 1875 let b = u32::from(u8::from(operands.src2)); 1876 self.state[operands.dst].set_u32(a.wrapping_shr(b)); 1877 ControlFlow::Continue(()) 1878 } 1879 1880 fn xshr32_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> { 1881 let a = self.state[operands.src1].get_i32(); 1882 let b = u32::from(u8::from(operands.src2)); 1883 self.state[operands.dst].set_i32(a.wrapping_shr(b)); 1884 ControlFlow::Continue(()) 1885 } 1886 1887 fn xshl64_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> { 1888 let a = self.state[operands.src1].get_u64(); 1889 let b = u32::from(u8::from(operands.src2)); 1890 self.state[operands.dst].set_u64(a.wrapping_shl(b)); 1891 ControlFlow::Continue(()) 1892 } 1893 1894 fn xshr64_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> { 1895 let a = self.state[operands.src1].get_u64(); 1896 let b = u32::from(u8::from(operands.src2)); 1897 self.state[operands.dst].set_u64(a.wrapping_shr(b)); 1898 ControlFlow::Continue(()) 1899 } 1900 1901 fn xshr64_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> { 1902 let a = self.state[operands.src1].get_i64(); 1903 let b = u32::from(u8::from(operands.src2)); 1904 self.state[operands.dst].set_i64(a.wrapping_shr(b)); 1905 ControlFlow::Continue(()) 1906 } 1907 1908 fn xneg32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 1909 let a = self.state[src].get_i32(); 1910 self.state[dst].set_i32(a.wrapping_neg()); 1911 ControlFlow::Continue(()) 1912 } 1913 1914 fn xneg64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 1915 let a = self.state[src].get_i64(); 1916 self.state[dst].set_i64(a.wrapping_neg()); 1917 ControlFlow::Continue(()) 1918 } 1919 1920 fn xeq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1921 let a = self.state[operands.src1].get_u64(); 1922 let b = self.state[operands.src2].get_u64(); 1923 self.state[operands.dst].set_u32(u32::from(a == b)); 1924 ControlFlow::Continue(()) 1925 } 1926 1927 fn xneq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1928 let a = self.state[operands.src1].get_u64(); 1929 let b = self.state[operands.src2].get_u64(); 1930 self.state[operands.dst].set_u32(u32::from(a != b)); 1931 ControlFlow::Continue(()) 1932 } 1933 1934 fn xslt64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1935 let a = self.state[operands.src1].get_i64(); 1936 let b = self.state[operands.src2].get_i64(); 1937 self.state[operands.dst].set_u32(u32::from(a < b)); 1938 ControlFlow::Continue(()) 1939 } 1940 1941 fn xslteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1942 let a = self.state[operands.src1].get_i64(); 1943 let b = self.state[operands.src2].get_i64(); 1944 self.state[operands.dst].set_u32(u32::from(a <= b)); 1945 ControlFlow::Continue(()) 1946 } 1947 1948 fn xult64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1949 let a = self.state[operands.src1].get_u64(); 1950 let b = self.state[operands.src2].get_u64(); 1951 self.state[operands.dst].set_u32(u32::from(a < b)); 1952 ControlFlow::Continue(()) 1953 } 1954 1955 fn xulteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1956 let a = self.state[operands.src1].get_u64(); 1957 let b = self.state[operands.src2].get_u64(); 1958 self.state[operands.dst].set_u32(u32::from(a <= b)); 1959 ControlFlow::Continue(()) 1960 } 1961 1962 fn xeq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1963 let a = self.state[operands.src1].get_u32(); 1964 let b = self.state[operands.src2].get_u32(); 1965 self.state[operands.dst].set_u32(u32::from(a == b)); 1966 ControlFlow::Continue(()) 1967 } 1968 1969 fn xneq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1970 let a = self.state[operands.src1].get_u32(); 1971 let b = self.state[operands.src2].get_u32(); 1972 self.state[operands.dst].set_u32(u32::from(a != b)); 1973 ControlFlow::Continue(()) 1974 } 1975 1976 fn xslt32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1977 let a = self.state[operands.src1].get_i32(); 1978 let b = self.state[operands.src2].get_i32(); 1979 self.state[operands.dst].set_u32(u32::from(a < b)); 1980 ControlFlow::Continue(()) 1981 } 1982 1983 fn xslteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1984 let a = self.state[operands.src1].get_i32(); 1985 let b = self.state[operands.src2].get_i32(); 1986 self.state[operands.dst].set_u32(u32::from(a <= b)); 1987 ControlFlow::Continue(()) 1988 } 1989 1990 fn xult32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1991 let a = self.state[operands.src1].get_u32(); 1992 let b = self.state[operands.src2].get_u32(); 1993 self.state[operands.dst].set_u32(u32::from(a < b)); 1994 ControlFlow::Continue(()) 1995 } 1996 1997 fn xulteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 1998 let a = self.state[operands.src1].get_u32(); 1999 let b = self.state[operands.src2].get_u32(); 2000 self.state[operands.dst].set_u32(u32::from(a <= b)); 2001 ControlFlow::Continue(()) 2002 } 2003 2004 fn push_frame(&mut self) -> ControlFlow<Done> { 2005 self.push::<crate::PushFrame, _>(self.state.lr)?; 2006 self.push::<crate::PushFrame, _>(self.state.fp)?; 2007 self.state.fp = self.state[XReg::sp].get_ptr(); 2008 ControlFlow::Continue(()) 2009 } 2010 2011 #[inline] 2012 fn push_frame_save(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> { 2013 // Decrement the stack pointer `amt` bytes plus 2 pointers more for 2014 // fp/lr. 2015 let ptr_size = size_of::<usize>(); 2016 let full_amt = usize::from(amt) + 2 * ptr_size; 2017 let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(full_amt); 2018 self.set_sp::<crate::PushFrameSave>(new_sp)?; 2019 2020 unsafe { 2021 // Emulate `push_frame` by placing `lr` and `fp` onto the stack, in 2022 // that order, at the top of the allocated area. 2023 self.store_ne::<_, crate::PushFrameSave>( 2024 AddrO32 { 2025 addr: XReg::sp, 2026 offset: (full_amt - 1 * ptr_size) as i32, 2027 }, 2028 self.state.lr, 2029 )?; 2030 self.store_ne::<_, crate::PushFrameSave>( 2031 AddrO32 { 2032 addr: XReg::sp, 2033 offset: (full_amt - 2 * ptr_size) as i32, 2034 }, 2035 self.state.fp, 2036 )?; 2037 2038 // Set `fp` to the top of our frame, where `fp` is stored. 2039 let mut offset = amt as i32; 2040 self.state.fp = self.state[XReg::sp] 2041 .get_ptr::<u8>() 2042 .byte_offset(offset as isize); 2043 2044 // Next save any registers in `regs` to the stack. 2045 for reg in regs { 2046 offset -= 8; 2047 self.store_ne::<_, crate::PushFrameSave>( 2048 AddrO32 { 2049 addr: XReg::sp, 2050 offset, 2051 }, 2052 self.state[reg].get_u64(), 2053 )?; 2054 } 2055 } 2056 ControlFlow::Continue(()) 2057 } 2058 2059 fn pop_frame_restore(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> { 2060 // Restore all registers in `regs`, followed by the normal `pop_frame` 2061 // opcode below to restore fp/lr. 2062 unsafe { 2063 let mut offset = i32::from(amt); 2064 for reg in regs { 2065 offset -= 8; 2066 let val = self.load_ne::<_, crate::PopFrameRestore>(AddrO32 { 2067 addr: XReg::sp, 2068 offset, 2069 })?; 2070 self.state[reg].set_u64(val); 2071 } 2072 } 2073 self.pop_frame() 2074 } 2075 2076 fn pop_frame(&mut self) -> ControlFlow<Done> { 2077 self.set_sp_unchecked(self.state.fp); 2078 let fp = self.pop(); 2079 let lr = self.pop(); 2080 self.state.fp = fp; 2081 self.state.lr = lr; 2082 ControlFlow::Continue(()) 2083 } 2084 2085 fn br_table32(&mut self, idx: XReg, amt: u32) -> ControlFlow<Done> { 2086 let idx = self.state[idx].get_u32().min(amt - 1) as isize; 2087 // SAFETY: part of the contract of the interpreter is only dealing with 2088 // valid bytecode, so this offset should be safe. 2089 self.pc = unsafe { self.pc.offset(idx * 4) }; 2090 2091 // Decode the `PcRelOffset` without tampering with `self.pc` as the 2092 // jump is relative to `self.pc`. 2093 let mut tmp = self.pc; 2094 let Ok(rel) = PcRelOffset::decode(&mut tmp); 2095 let offset = isize::try_from(i32::from(rel)).unwrap(); 2096 self.pc = unsafe { self.pc.offset(offset) }; 2097 ControlFlow::Continue(()) 2098 } 2099 2100 fn stack_alloc32(&mut self, amt: u32) -> ControlFlow<Done> { 2101 let amt = usize::try_from(amt).unwrap(); 2102 let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(amt); 2103 self.set_sp::<crate::StackAlloc32>(new_sp)?; 2104 ControlFlow::Continue(()) 2105 } 2106 2107 fn stack_free32(&mut self, amt: u32) -> ControlFlow<Done> { 2108 let amt = usize::try_from(amt).unwrap(); 2109 let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_add(amt); 2110 self.set_sp_unchecked(new_sp); 2111 ControlFlow::Continue(()) 2112 } 2113 2114 fn zext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2115 let src = self.state[src].get_u64() as u8; 2116 self.state[dst].set_u64(src.into()); 2117 ControlFlow::Continue(()) 2118 } 2119 2120 fn zext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2121 let src = self.state[src].get_u64() as u16; 2122 self.state[dst].set_u64(src.into()); 2123 ControlFlow::Continue(()) 2124 } 2125 2126 fn zext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2127 let src = self.state[src].get_u64() as u32; 2128 self.state[dst].set_u64(src.into()); 2129 ControlFlow::Continue(()) 2130 } 2131 2132 fn sext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2133 let src = self.state[src].get_i64() as i8; 2134 self.state[dst].set_i64(src.into()); 2135 ControlFlow::Continue(()) 2136 } 2137 2138 fn sext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2139 let src = self.state[src].get_i64() as i16; 2140 self.state[dst].set_i64(src.into()); 2141 ControlFlow::Continue(()) 2142 } 2143 2144 fn sext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2145 let src = self.state[src].get_i64() as i32; 2146 self.state[dst].set_i64(src.into()); 2147 ControlFlow::Continue(()) 2148 } 2149 2150 fn xdiv32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2151 let a = self.state[operands.src1].get_i32(); 2152 let b = self.state[operands.src2].get_i32(); 2153 match a.checked_div(b) { 2154 Some(result) => { 2155 self.state[operands.dst].set_i32(result); 2156 ControlFlow::Continue(()) 2157 } 2158 None => { 2159 let kind = if b == 0 { 2160 TrapKind::DivideByZero 2161 } else { 2162 TrapKind::IntegerOverflow 2163 }; 2164 self.done_trap_kind::<crate::XDiv32S>(Some(kind)) 2165 } 2166 } 2167 } 2168 2169 fn xdiv64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2170 let a = self.state[operands.src1].get_i64(); 2171 let b = self.state[operands.src2].get_i64(); 2172 match a.checked_div(b) { 2173 Some(result) => { 2174 self.state[operands.dst].set_i64(result); 2175 ControlFlow::Continue(()) 2176 } 2177 None => { 2178 let kind = if b == 0 { 2179 TrapKind::DivideByZero 2180 } else { 2181 TrapKind::IntegerOverflow 2182 }; 2183 self.done_trap_kind::<crate::XDiv64S>(Some(kind)) 2184 } 2185 } 2186 } 2187 2188 fn xdiv32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2189 let a = self.state[operands.src1].get_u32(); 2190 let b = self.state[operands.src2].get_u32(); 2191 match a.checked_div(b) { 2192 Some(result) => { 2193 self.state[operands.dst].set_u32(result); 2194 ControlFlow::Continue(()) 2195 } 2196 None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)), 2197 } 2198 } 2199 2200 fn xdiv64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2201 let a = self.state[operands.src1].get_u64(); 2202 let b = self.state[operands.src2].get_u64(); 2203 match a.checked_div(b) { 2204 Some(result) => { 2205 self.state[operands.dst].set_u64(result); 2206 ControlFlow::Continue(()) 2207 } 2208 None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)), 2209 } 2210 } 2211 2212 fn xrem32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2213 let a = self.state[operands.src1].get_i32(); 2214 let b = self.state[operands.src2].get_i32(); 2215 let result = if a == i32::MIN && b == -1 { 2216 Some(0) 2217 } else { 2218 a.checked_rem(b) 2219 }; 2220 match result { 2221 Some(result) => { 2222 self.state[operands.dst].set_i32(result); 2223 ControlFlow::Continue(()) 2224 } 2225 None => self.done_trap_kind::<crate::XRem32S>(Some(TrapKind::DivideByZero)), 2226 } 2227 } 2228 2229 fn xrem64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2230 let a = self.state[operands.src1].get_i64(); 2231 let b = self.state[operands.src2].get_i64(); 2232 let result = if a == i64::MIN && b == -1 { 2233 Some(0) 2234 } else { 2235 a.checked_rem(b) 2236 }; 2237 match result { 2238 Some(result) => { 2239 self.state[operands.dst].set_i64(result); 2240 ControlFlow::Continue(()) 2241 } 2242 None => self.done_trap_kind::<crate::XRem64S>(Some(TrapKind::DivideByZero)), 2243 } 2244 } 2245 2246 fn xrem32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2247 let a = self.state[operands.src1].get_u32(); 2248 let b = self.state[operands.src2].get_u32(); 2249 match a.checked_rem(b) { 2250 Some(result) => { 2251 self.state[operands.dst].set_u32(result); 2252 ControlFlow::Continue(()) 2253 } 2254 None => self.done_trap_kind::<crate::XRem32U>(Some(TrapKind::DivideByZero)), 2255 } 2256 } 2257 2258 fn xrem64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2259 let a = self.state[operands.src1].get_u64(); 2260 let b = self.state[operands.src2].get_u64(); 2261 match a.checked_rem(b) { 2262 Some(result) => { 2263 self.state[operands.dst].set_u64(result); 2264 ControlFlow::Continue(()) 2265 } 2266 None => self.done_trap_kind::<crate::XRem64U>(Some(TrapKind::DivideByZero)), 2267 } 2268 } 2269 2270 fn xband32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2271 let a = self.state[operands.src1].get_u32(); 2272 let b = self.state[operands.src2].get_u32(); 2273 self.state[operands.dst].set_u32(a & b); 2274 ControlFlow::Continue(()) 2275 } 2276 2277 fn xband32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> { 2278 self.xband32_s32(dst, src1, src2.into()) 2279 } 2280 2281 fn xband32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> { 2282 let a = self.state[src1].get_i32(); 2283 self.state[dst].set_i32(a & src2); 2284 ControlFlow::Continue(()) 2285 } 2286 2287 fn xband64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2288 let a = self.state[operands.src1].get_u64(); 2289 let b = self.state[operands.src2].get_u64(); 2290 self.state[operands.dst].set_u64(a & b); 2291 ControlFlow::Continue(()) 2292 } 2293 2294 fn xband64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> { 2295 self.xband64_s32(dst, src1, src2.into()) 2296 } 2297 2298 fn xband64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> { 2299 let a = self.state[src1].get_i64(); 2300 self.state[dst].set_i64(a & i64::from(src2)); 2301 ControlFlow::Continue(()) 2302 } 2303 2304 fn xbor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2305 let a = self.state[operands.src1].get_u32(); 2306 let b = self.state[operands.src2].get_u32(); 2307 self.state[operands.dst].set_u32(a | b); 2308 ControlFlow::Continue(()) 2309 } 2310 2311 fn xbor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> { 2312 self.xbor32_s32(dst, src1, src2.into()) 2313 } 2314 2315 fn xbor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> { 2316 let a = self.state[src1].get_i32(); 2317 self.state[dst].set_i32(a | src2); 2318 ControlFlow::Continue(()) 2319 } 2320 2321 fn xbor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2322 let a = self.state[operands.src1].get_u64(); 2323 let b = self.state[operands.src2].get_u64(); 2324 self.state[operands.dst].set_u64(a | b); 2325 ControlFlow::Continue(()) 2326 } 2327 2328 fn xbor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> { 2329 self.xbor64_s32(dst, src1, src2.into()) 2330 } 2331 2332 fn xbor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> { 2333 let a = self.state[src1].get_i64(); 2334 self.state[dst].set_i64(a | i64::from(src2)); 2335 ControlFlow::Continue(()) 2336 } 2337 2338 fn xbxor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2339 let a = self.state[operands.src1].get_u32(); 2340 let b = self.state[operands.src2].get_u32(); 2341 self.state[operands.dst].set_u32(a ^ b); 2342 ControlFlow::Continue(()) 2343 } 2344 2345 fn xbxor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> { 2346 self.xbxor32_s32(dst, src1, src2.into()) 2347 } 2348 2349 fn xbxor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> { 2350 let a = self.state[src1].get_i32(); 2351 self.state[dst].set_i32(a ^ src2); 2352 ControlFlow::Continue(()) 2353 } 2354 2355 fn xbxor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2356 let a = self.state[operands.src1].get_u64(); 2357 let b = self.state[operands.src2].get_u64(); 2358 self.state[operands.dst].set_u64(a ^ b); 2359 ControlFlow::Continue(()) 2360 } 2361 2362 fn xbxor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> { 2363 self.xbxor64_s32(dst, src1, src2.into()) 2364 } 2365 2366 fn xbxor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> { 2367 let a = self.state[src1].get_i64(); 2368 self.state[dst].set_i64(a ^ i64::from(src2)); 2369 ControlFlow::Continue(()) 2370 } 2371 2372 fn xbnot32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2373 let a = self.state[src].get_u32(); 2374 self.state[dst].set_u32(!a); 2375 ControlFlow::Continue(()) 2376 } 2377 2378 fn xbnot64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2379 let a = self.state[src].get_u64(); 2380 self.state[dst].set_u64(!a); 2381 ControlFlow::Continue(()) 2382 } 2383 2384 fn xmin32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2385 let a = self.state[operands.src1].get_u32(); 2386 let b = self.state[operands.src2].get_u32(); 2387 self.state[operands.dst].set_u32(a.min(b)); 2388 ControlFlow::Continue(()) 2389 } 2390 2391 fn xmin32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2392 let a = self.state[operands.src1].get_i32(); 2393 let b = self.state[operands.src2].get_i32(); 2394 self.state[operands.dst].set_i32(a.min(b)); 2395 ControlFlow::Continue(()) 2396 } 2397 2398 fn xmax32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2399 let a = self.state[operands.src1].get_u32(); 2400 let b = self.state[operands.src2].get_u32(); 2401 self.state[operands.dst].set_u32(a.max(b)); 2402 ControlFlow::Continue(()) 2403 } 2404 2405 fn xmax32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2406 let a = self.state[operands.src1].get_i32(); 2407 let b = self.state[operands.src2].get_i32(); 2408 self.state[operands.dst].set_i32(a.max(b)); 2409 ControlFlow::Continue(()) 2410 } 2411 2412 fn xmin64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2413 let a = self.state[operands.src1].get_u64(); 2414 let b = self.state[operands.src2].get_u64(); 2415 self.state[operands.dst].set_u64(a.min(b)); 2416 ControlFlow::Continue(()) 2417 } 2418 2419 fn xmin64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2420 let a = self.state[operands.src1].get_i64(); 2421 let b = self.state[operands.src2].get_i64(); 2422 self.state[operands.dst].set_i64(a.min(b)); 2423 ControlFlow::Continue(()) 2424 } 2425 2426 fn xmax64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2427 let a = self.state[operands.src1].get_u64(); 2428 let b = self.state[operands.src2].get_u64(); 2429 self.state[operands.dst].set_u64(a.max(b)); 2430 ControlFlow::Continue(()) 2431 } 2432 2433 fn xmax64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2434 let a = self.state[operands.src1].get_i64(); 2435 let b = self.state[operands.src2].get_i64(); 2436 self.state[operands.dst].set_i64(a.max(b)); 2437 ControlFlow::Continue(()) 2438 } 2439 2440 fn xctz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2441 let a = self.state[src].get_u32(); 2442 self.state[dst].set_u32(a.trailing_zeros()); 2443 ControlFlow::Continue(()) 2444 } 2445 2446 fn xctz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2447 let a = self.state[src].get_u64(); 2448 self.state[dst].set_u64(a.trailing_zeros().into()); 2449 ControlFlow::Continue(()) 2450 } 2451 2452 fn xclz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2453 let a = self.state[src].get_u32(); 2454 self.state[dst].set_u32(a.leading_zeros()); 2455 ControlFlow::Continue(()) 2456 } 2457 2458 fn xclz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2459 let a = self.state[src].get_u64(); 2460 self.state[dst].set_u64(a.leading_zeros().into()); 2461 ControlFlow::Continue(()) 2462 } 2463 2464 fn xpopcnt32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2465 let a = self.state[src].get_u32(); 2466 self.state[dst].set_u32(a.count_ones()); 2467 ControlFlow::Continue(()) 2468 } 2469 2470 fn xpopcnt64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2471 let a = self.state[src].get_u64(); 2472 self.state[dst].set_u64(a.count_ones().into()); 2473 ControlFlow::Continue(()) 2474 } 2475 2476 fn xrotl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2477 let a = self.state[operands.src1].get_u32(); 2478 let b = self.state[operands.src2].get_u32(); 2479 self.state[operands.dst].set_u32(a.rotate_left(b)); 2480 ControlFlow::Continue(()) 2481 } 2482 2483 fn xrotl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2484 let a = self.state[operands.src1].get_u64(); 2485 let b = self.state[operands.src2].get_u32(); 2486 self.state[operands.dst].set_u64(a.rotate_left(b)); 2487 ControlFlow::Continue(()) 2488 } 2489 2490 fn xrotr32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2491 let a = self.state[operands.src1].get_u32(); 2492 let b = self.state[operands.src2].get_u32(); 2493 self.state[operands.dst].set_u32(a.rotate_right(b)); 2494 ControlFlow::Continue(()) 2495 } 2496 2497 fn xrotr64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2498 let a = self.state[operands.src1].get_u64(); 2499 let b = self.state[operands.src2].get_u32(); 2500 self.state[operands.dst].set_u64(a.rotate_right(b)); 2501 ControlFlow::Continue(()) 2502 } 2503 2504 fn xselect32( 2505 &mut self, 2506 dst: XReg, 2507 cond: XReg, 2508 if_nonzero: XReg, 2509 if_zero: XReg, 2510 ) -> ControlFlow<Done> { 2511 let result = if self.state[cond].get_u32() != 0 { 2512 self.state[if_nonzero].get_u32() 2513 } else { 2514 self.state[if_zero].get_u32() 2515 }; 2516 self.state[dst].set_u32(result); 2517 ControlFlow::Continue(()) 2518 } 2519 2520 fn xselect64( 2521 &mut self, 2522 dst: XReg, 2523 cond: XReg, 2524 if_nonzero: XReg, 2525 if_zero: XReg, 2526 ) -> ControlFlow<Done> { 2527 let result = if self.state[cond].get_u32() != 0 { 2528 self.state[if_nonzero].get_u64() 2529 } else { 2530 self.state[if_zero].get_u64() 2531 }; 2532 self.state[dst].set_u64(result); 2533 ControlFlow::Continue(()) 2534 } 2535 2536 fn xabs32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2537 let a = self.state[src].get_i32(); 2538 self.state[dst].set_i32(a.wrapping_abs()); 2539 ControlFlow::Continue(()) 2540 } 2541 2542 fn xabs64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2543 let a = self.state[src].get_i64(); 2544 self.state[dst].set_i64(a.wrapping_abs()); 2545 ControlFlow::Continue(()) 2546 } 2547 2548 // ========================================================================= 2549 // o32 addressing modes 2550 2551 fn xload8_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> { 2552 let result = unsafe { self.load_ne::<u8, crate::XLoad8U32O32>(addr)? }; 2553 self.state[dst].set_u32(result.into()); 2554 ControlFlow::Continue(()) 2555 } 2556 2557 fn xload8_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> { 2558 let result = unsafe { self.load_ne::<i8, crate::XLoad8S32O32>(addr)? }; 2559 self.state[dst].set_i32(result.into()); 2560 ControlFlow::Continue(()) 2561 } 2562 2563 fn xload16le_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> { 2564 let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32O32>(addr)? }; 2565 self.state[dst].set_u32(u16::from_le(result).into()); 2566 ControlFlow::Continue(()) 2567 } 2568 2569 fn xload16le_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> { 2570 let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32O32>(addr)? }; 2571 self.state[dst].set_i32(i16::from_le(result).into()); 2572 ControlFlow::Continue(()) 2573 } 2574 2575 fn xload32le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> { 2576 let result = unsafe { self.load_ne::<i32, crate::XLoad32LeO32>(addr)? }; 2577 self.state[dst].set_i32(i32::from_le(result)); 2578 ControlFlow::Continue(()) 2579 } 2580 2581 fn xload64le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> { 2582 let result = unsafe { self.load_ne::<i64, crate::XLoad64LeO32>(addr)? }; 2583 self.state[dst].set_i64(i64::from_le(result)); 2584 ControlFlow::Continue(()) 2585 } 2586 2587 fn xstore8_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> { 2588 let val = self.state[val].get_u32() as u8; 2589 unsafe { 2590 self.store_ne::<u8, crate::XStore8O32>(addr, val)?; 2591 } 2592 ControlFlow::Continue(()) 2593 } 2594 2595 fn xstore16le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> { 2596 let val = self.state[val].get_u32() as u16; 2597 unsafe { 2598 self.store_ne::<u16, crate::XStore16LeO32>(addr, val.to_le())?; 2599 } 2600 ControlFlow::Continue(()) 2601 } 2602 2603 fn xstore32le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> { 2604 let val = self.state[val].get_u32(); 2605 unsafe { 2606 self.store_ne::<u32, crate::XStore32LeO32>(addr, val.to_le())?; 2607 } 2608 ControlFlow::Continue(()) 2609 } 2610 2611 fn xstore64le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> { 2612 let val = self.state[val].get_u64(); 2613 unsafe { 2614 self.store_ne::<u64, crate::XStore64LeO32>(addr, val.to_le())?; 2615 } 2616 ControlFlow::Continue(()) 2617 } 2618 2619 // ========================================================================= 2620 // g32 addressing modes 2621 2622 fn xload8_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> { 2623 let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32>(addr)? }; 2624 self.state[dst].set_u32(result.into()); 2625 ControlFlow::Continue(()) 2626 } 2627 2628 fn xload8_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> { 2629 let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32>(addr)? }; 2630 self.state[dst].set_i32(result.into()); 2631 ControlFlow::Continue(()) 2632 } 2633 2634 fn xload16le_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> { 2635 let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32>(addr)? }; 2636 self.state[dst].set_u32(u16::from_le(result).into()); 2637 ControlFlow::Continue(()) 2638 } 2639 2640 fn xload16le_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> { 2641 let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32>(addr)? }; 2642 self.state[dst].set_i32(i16::from_le(result).into()); 2643 ControlFlow::Continue(()) 2644 } 2645 2646 fn xload32le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> { 2647 let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32>(addr)? }; 2648 self.state[dst].set_i32(i32::from_le(result)); 2649 ControlFlow::Continue(()) 2650 } 2651 2652 fn xload64le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> { 2653 let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32>(addr)? }; 2654 self.state[dst].set_i64(i64::from_le(result)); 2655 ControlFlow::Continue(()) 2656 } 2657 2658 fn xstore8_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> { 2659 let val = self.state[val].get_u32() as u8; 2660 unsafe { 2661 self.store_ne::<u8, crate::XStore8G32>(addr, val)?; 2662 } 2663 ControlFlow::Continue(()) 2664 } 2665 2666 fn xstore16le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> { 2667 let val = self.state[val].get_u32() as u16; 2668 unsafe { 2669 self.store_ne::<u16, crate::XStore16LeG32>(addr, val.to_le())?; 2670 } 2671 ControlFlow::Continue(()) 2672 } 2673 2674 fn xstore32le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> { 2675 let val = self.state[val].get_u32(); 2676 unsafe { 2677 self.store_ne::<u32, crate::XStore32LeG32>(addr, val.to_le())?; 2678 } 2679 ControlFlow::Continue(()) 2680 } 2681 2682 fn xstore64le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> { 2683 let val = self.state[val].get_u64(); 2684 unsafe { 2685 self.store_ne::<u64, crate::XStore64LeG32>(addr, val.to_le())?; 2686 } 2687 ControlFlow::Continue(()) 2688 } 2689 2690 // ========================================================================= 2691 // z addressing modes 2692 2693 fn xload8_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> { 2694 let result = unsafe { self.load_ne::<u8, crate::XLoad8U32Z>(addr)? }; 2695 self.state[dst].set_u32(result.into()); 2696 ControlFlow::Continue(()) 2697 } 2698 2699 fn xload8_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> { 2700 let result = unsafe { self.load_ne::<i8, crate::XLoad8S32Z>(addr)? }; 2701 self.state[dst].set_i32(result.into()); 2702 ControlFlow::Continue(()) 2703 } 2704 2705 fn xload16le_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> { 2706 let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32Z>(addr)? }; 2707 self.state[dst].set_u32(u16::from_le(result).into()); 2708 ControlFlow::Continue(()) 2709 } 2710 2711 fn xload16le_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> { 2712 let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32Z>(addr)? }; 2713 self.state[dst].set_i32(i16::from_le(result).into()); 2714 ControlFlow::Continue(()) 2715 } 2716 2717 fn xload32le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> { 2718 let result = unsafe { self.load_ne::<i32, crate::XLoad32LeZ>(addr)? }; 2719 self.state[dst].set_i32(i32::from_le(result)); 2720 ControlFlow::Continue(()) 2721 } 2722 2723 fn xload64le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> { 2724 let result = unsafe { self.load_ne::<i64, crate::XLoad64LeZ>(addr)? }; 2725 self.state[dst].set_i64(i64::from_le(result)); 2726 ControlFlow::Continue(()) 2727 } 2728 2729 fn xstore8_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> { 2730 let val = self.state[val].get_u32() as u8; 2731 unsafe { 2732 self.store_ne::<u8, crate::XStore8Z>(addr, val)?; 2733 } 2734 ControlFlow::Continue(()) 2735 } 2736 2737 fn xstore16le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> { 2738 let val = self.state[val].get_u32() as u16; 2739 unsafe { 2740 self.store_ne::<u16, crate::XStore16LeZ>(addr, val.to_le())?; 2741 } 2742 ControlFlow::Continue(()) 2743 } 2744 2745 fn xstore32le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> { 2746 let val = self.state[val].get_u32(); 2747 unsafe { 2748 self.store_ne::<u32, crate::XStore32LeZ>(addr, val.to_le())?; 2749 } 2750 ControlFlow::Continue(()) 2751 } 2752 2753 fn xstore64le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> { 2754 let val = self.state[val].get_u64(); 2755 unsafe { 2756 self.store_ne::<u64, crate::XStore64LeZ>(addr, val.to_le())?; 2757 } 2758 ControlFlow::Continue(()) 2759 } 2760 2761 // ========================================================================= 2762 // g32bne addressing modes 2763 2764 fn xload8_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> { 2765 let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32Bne>(addr)? }; 2766 self.state[dst].set_u32(result.into()); 2767 ControlFlow::Continue(()) 2768 } 2769 2770 fn xload8_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> { 2771 let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32Bne>(addr)? }; 2772 self.state[dst].set_i32(result.into()); 2773 ControlFlow::Continue(()) 2774 } 2775 2776 fn xload16le_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> { 2777 let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32Bne>(addr)? }; 2778 self.state[dst].set_u32(u16::from_le(result).into()); 2779 ControlFlow::Continue(()) 2780 } 2781 2782 fn xload16le_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> { 2783 let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32Bne>(addr)? }; 2784 self.state[dst].set_i32(i16::from_le(result).into()); 2785 ControlFlow::Continue(()) 2786 } 2787 2788 fn xload32le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> { 2789 let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32Bne>(addr)? }; 2790 self.state[dst].set_i32(i32::from_le(result)); 2791 ControlFlow::Continue(()) 2792 } 2793 2794 fn xload64le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> { 2795 let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32Bne>(addr)? }; 2796 self.state[dst].set_i64(i64::from_le(result)); 2797 ControlFlow::Continue(()) 2798 } 2799 2800 fn xstore8_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> { 2801 let val = self.state[val].get_u32() as u8; 2802 unsafe { 2803 self.store_ne::<u8, crate::XStore8G32Bne>(addr, val)?; 2804 } 2805 ControlFlow::Continue(()) 2806 } 2807 2808 fn xstore16le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> { 2809 let val = self.state[val].get_u32() as u16; 2810 unsafe { 2811 self.store_ne::<u16, crate::XStore16LeG32Bne>(addr, val.to_le())?; 2812 } 2813 ControlFlow::Continue(()) 2814 } 2815 2816 fn xstore32le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> { 2817 let val = self.state[val].get_u32(); 2818 unsafe { 2819 self.store_ne::<u32, crate::XStore32LeG32Bne>(addr, val.to_le())?; 2820 } 2821 ControlFlow::Continue(()) 2822 } 2823 2824 fn xstore64le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> { 2825 let val = self.state[val].get_u64(); 2826 unsafe { 2827 self.store_ne::<u64, crate::XStore64LeG32Bne>(addr, val.to_le())?; 2828 } 2829 ControlFlow::Continue(()) 2830 } 2831 } 2832 2833 impl ExtendedOpVisitor for Interpreter<'_> { 2834 fn trap(&mut self) -> ControlFlow<Done> { 2835 self.done_trap::<crate::Trap>() 2836 } 2837 2838 fn call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> { 2839 self.done_call_indirect_host(id) 2840 } 2841 2842 fn xpcadd(&mut self, dst: XReg, offset: PcRelOffset) -> ControlFlow<Done> { 2843 let pc = self.pc_rel::<crate::Xpcadd>(offset); 2844 self.state[dst].set_ptr(pc.as_ptr()); 2845 ControlFlow::Continue(()) 2846 } 2847 2848 fn bswap32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2849 let src = self.state[src].get_u32(); 2850 self.state[dst].set_u32(src.swap_bytes()); 2851 ControlFlow::Continue(()) 2852 } 2853 2854 fn bswap64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> { 2855 let src = self.state[src].get_u64(); 2856 self.state[dst].set_u64(src.swap_bytes()); 2857 ControlFlow::Continue(()) 2858 } 2859 2860 fn xbmask32(&mut self, dst: XReg, src: XReg) -> Self::Return { 2861 let a = self.state[src].get_u32(); 2862 if a == 0 { 2863 self.state[dst].set_u32(0); 2864 } else { 2865 self.state[dst].set_i32(-1); 2866 } 2867 ControlFlow::Continue(()) 2868 } 2869 2870 fn xbmask64(&mut self, dst: XReg, src: XReg) -> Self::Return { 2871 let a = self.state[src].get_u64(); 2872 if a == 0 { 2873 self.state[dst].set_u64(0); 2874 } else { 2875 self.state[dst].set_i64(-1); 2876 } 2877 ControlFlow::Continue(()) 2878 } 2879 2880 fn xadd32_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2881 let a = self.state[operands.src1].get_u32(); 2882 let b = self.state[operands.src2].get_u32(); 2883 match a.checked_add(b) { 2884 Some(c) => { 2885 self.state[operands.dst].set_u32(c); 2886 ControlFlow::Continue(()) 2887 } 2888 None => self.done_trap::<crate::Xadd32UoverflowTrap>(), 2889 } 2890 } 2891 2892 fn xadd64_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2893 let a = self.state[operands.src1].get_u64(); 2894 let b = self.state[operands.src2].get_u64(); 2895 match a.checked_add(b) { 2896 Some(c) => { 2897 self.state[operands.dst].set_u64(c); 2898 ControlFlow::Continue(()) 2899 } 2900 None => self.done_trap::<crate::Xadd64UoverflowTrap>(), 2901 } 2902 } 2903 2904 fn xmulhi64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2905 let a = self.state[operands.src1].get_i64(); 2906 let b = self.state[operands.src2].get_i64(); 2907 let result = ((i128::from(a) * i128::from(b)) >> 64) as i64; 2908 self.state[operands.dst].set_i64(result); 2909 ControlFlow::Continue(()) 2910 } 2911 2912 fn xmulhi64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> { 2913 let a = self.state[operands.src1].get_u64(); 2914 let b = self.state[operands.src2].get_u64(); 2915 let result = ((u128::from(a) * u128::from(b)) >> 64) as u64; 2916 self.state[operands.dst].set_u64(result); 2917 ControlFlow::Continue(()) 2918 } 2919 2920 // ========================================================================= 2921 // o32 addressing modes for big-endian X-registers 2922 2923 fn xload16be_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> { 2924 let result = unsafe { self.load_ne::<u16, crate::XLoad16BeU32O32>(addr)? }; 2925 self.state[dst].set_u32(u16::from_be(result).into()); 2926 ControlFlow::Continue(()) 2927 } 2928 2929 fn xload16be_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> { 2930 let result = unsafe { self.load_ne::<i16, crate::XLoad16BeS32O32>(addr)? }; 2931 self.state[dst].set_i32(i16::from_be(result).into()); 2932 ControlFlow::Continue(()) 2933 } 2934 2935 fn xload32be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> { 2936 let result = unsafe { self.load_ne::<i32, crate::XLoad32BeO32>(addr)? }; 2937 self.state[dst].set_i32(i32::from_be(result)); 2938 ControlFlow::Continue(()) 2939 } 2940 2941 fn xload64be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> { 2942 let result = unsafe { self.load_ne::<i64, crate::XLoad64BeO32>(addr)? }; 2943 self.state[dst].set_i64(i64::from_be(result)); 2944 ControlFlow::Continue(()) 2945 } 2946 2947 fn xstore16be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> { 2948 let val = self.state[val].get_u32() as u16; 2949 unsafe { 2950 self.store_ne::<u16, crate::XStore16BeO32>(addr, val.to_be())?; 2951 } 2952 ControlFlow::Continue(()) 2953 } 2954 2955 fn xstore32be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> { 2956 let val = self.state[val].get_u32(); 2957 unsafe { 2958 self.store_ne::<u32, crate::XStore32BeO32>(addr, val.to_be())?; 2959 } 2960 ControlFlow::Continue(()) 2961 } 2962 2963 fn xstore64be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> { 2964 let val = self.state[val].get_u64(); 2965 unsafe { 2966 self.store_ne::<u64, crate::XStore64BeO32>(addr, val.to_be())?; 2967 } 2968 ControlFlow::Continue(()) 2969 } 2970 2971 // ========================================================================= 2972 // o32 addressing modes for little-endian F-registers 2973 2974 fn fload32le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> { 2975 let val = unsafe { self.load_ne::<u32, crate::Fload32LeO32>(addr)? }; 2976 self.state[dst].set_f32(f32::from_bits(u32::from_le(val))); 2977 ControlFlow::Continue(()) 2978 } 2979 2980 fn fload64le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> { 2981 let val = unsafe { self.load_ne::<u64, crate::Fload64LeO32>(addr)? }; 2982 self.state[dst].set_f64(f64::from_bits(u64::from_le(val))); 2983 ControlFlow::Continue(()) 2984 } 2985 2986 fn fstore32le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> { 2987 let val = self.state[src].get_f32(); 2988 unsafe { 2989 self.store_ne::<u32, crate::Fstore32LeO32>(addr, val.to_bits().to_le())?; 2990 } 2991 ControlFlow::Continue(()) 2992 } 2993 2994 fn fstore64le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> { 2995 let val = self.state[src].get_f64(); 2996 unsafe { 2997 self.store_ne::<u64, crate::Fstore64LeO32>(addr, val.to_bits().to_le())?; 2998 } 2999 ControlFlow::Continue(()) 3000 } 3001 3002 // ========================================================================= 3003 // o32 addressing modes for big-endian F-registers 3004 3005 fn fload32be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> { 3006 let val = unsafe { self.load_ne::<u32, crate::Fload32BeO32>(addr)? }; 3007 self.state[dst].set_f32(f32::from_bits(u32::from_be(val))); 3008 ControlFlow::Continue(()) 3009 } 3010 3011 fn fload64be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> { 3012 let val = unsafe { self.load_ne::<u64, crate::Fload64BeO32>(addr)? }; 3013 self.state[dst].set_f64(f64::from_bits(u64::from_be(val))); 3014 ControlFlow::Continue(()) 3015 } 3016 3017 fn fstore32be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> { 3018 let val = self.state[src].get_f32(); 3019 unsafe { 3020 self.store_ne::<u32, crate::Fstore32BeO32>(addr, val.to_bits().to_be())?; 3021 } 3022 ControlFlow::Continue(()) 3023 } 3024 3025 fn fstore64be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> { 3026 let val = self.state[src].get_f64(); 3027 unsafe { 3028 self.store_ne::<u64, crate::Fstore64BeO32>(addr, val.to_bits().to_be())?; 3029 } 3030 ControlFlow::Continue(()) 3031 } 3032 3033 // ========================================================================= 3034 // z addressing modes for little-endian F-registers 3035 3036 fn fload32le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> { 3037 let val = unsafe { self.load_ne::<u32, crate::Fload32LeZ>(addr)? }; 3038 self.state[dst].set_f32(f32::from_bits(u32::from_le(val))); 3039 ControlFlow::Continue(()) 3040 } 3041 3042 fn fload64le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> { 3043 let val = unsafe { self.load_ne::<u64, crate::Fload64LeZ>(addr)? }; 3044 self.state[dst].set_f64(f64::from_bits(u64::from_le(val))); 3045 ControlFlow::Continue(()) 3046 } 3047 3048 fn fstore32le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> { 3049 let val = self.state[src].get_f32(); 3050 unsafe { 3051 self.store_ne::<u32, crate::Fstore32LeZ>(addr, val.to_bits().to_le())?; 3052 } 3053 ControlFlow::Continue(()) 3054 } 3055 3056 fn fstore64le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> { 3057 let val = self.state[src].get_f64(); 3058 unsafe { 3059 self.store_ne::<u64, crate::Fstore64LeZ>(addr, val.to_bits().to_le())?; 3060 } 3061 ControlFlow::Continue(()) 3062 } 3063 3064 // ========================================================================= 3065 // g32 addressing modes for little-endian F-registers 3066 3067 fn fload32le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> { 3068 let val = unsafe { self.load_ne::<u32, crate::Fload32LeG32>(addr)? }; 3069 self.state[dst].set_f32(f32::from_bits(u32::from_le(val))); 3070 ControlFlow::Continue(()) 3071 } 3072 3073 fn fload64le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> { 3074 let val = unsafe { self.load_ne::<u64, crate::Fload64LeG32>(addr)? }; 3075 self.state[dst].set_f64(f64::from_bits(u64::from_le(val))); 3076 ControlFlow::Continue(()) 3077 } 3078 3079 fn fstore32le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> { 3080 let val = self.state[src].get_f32(); 3081 unsafe { 3082 self.store_ne::<u32, crate::Fstore32LeG32>(addr, val.to_bits().to_le())?; 3083 } 3084 ControlFlow::Continue(()) 3085 } 3086 3087 fn fstore64le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> { 3088 let val = self.state[src].get_f64(); 3089 unsafe { 3090 self.store_ne::<u64, crate::Fstore64LeG32>(addr, val.to_bits().to_le())?; 3091 } 3092 ControlFlow::Continue(()) 3093 } 3094 3095 // ========================================================================= 3096 // o32 addressing modes for little-endian V-registers 3097 3098 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3099 fn vload128le_o32(&mut self, dst: VReg, addr: AddrO32) -> ControlFlow<Done> { 3100 let val = unsafe { self.load_ne::<u128, crate::VLoad128O32>(addr)? }; 3101 self.state[dst].set_u128(u128::from_le(val)); 3102 ControlFlow::Continue(()) 3103 } 3104 3105 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3106 fn vstore128le_o32(&mut self, addr: AddrO32, src: VReg) -> ControlFlow<Done> { 3107 let val = self.state[src].get_u128(); 3108 unsafe { 3109 self.store_ne::<u128, crate::Vstore128LeO32>(addr, val.to_le())?; 3110 } 3111 ControlFlow::Continue(()) 3112 } 3113 3114 // ========================================================================= 3115 // z addressing modes for little-endian V-registers 3116 3117 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3118 fn vload128le_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> { 3119 let val = unsafe { self.load_ne::<u128, crate::VLoad128Z>(addr)? }; 3120 self.state[dst].set_u128(u128::from_le(val)); 3121 ControlFlow::Continue(()) 3122 } 3123 3124 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3125 fn vstore128le_z(&mut self, addr: AddrZ, src: VReg) -> ControlFlow<Done> { 3126 let val = self.state[src].get_u128(); 3127 unsafe { 3128 self.store_ne::<u128, crate::Vstore128LeZ>(addr, val.to_le())?; 3129 } 3130 ControlFlow::Continue(()) 3131 } 3132 3133 // ========================================================================= 3134 // g32 addressing modes for little-endian V-registers 3135 3136 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3137 fn vload128le_g32(&mut self, dst: VReg, addr: AddrG32) -> ControlFlow<Done> { 3138 let val = unsafe { self.load_ne::<u128, crate::VLoad128G32>(addr)? }; 3139 self.state[dst].set_u128(u128::from_le(val)); 3140 ControlFlow::Continue(()) 3141 } 3142 3143 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3144 fn vstore128le_g32(&mut self, addr: AddrG32, src: VReg) -> ControlFlow<Done> { 3145 let val = self.state[src].get_u128(); 3146 unsafe { 3147 self.store_ne::<u128, crate::Vstore128LeG32>(addr, val.to_le())?; 3148 } 3149 ControlFlow::Continue(()) 3150 } 3151 3152 fn xmov_fp(&mut self, dst: XReg) -> ControlFlow<Done> { 3153 let fp = self.state.fp; 3154 self.state[dst].set_ptr(fp); 3155 ControlFlow::Continue(()) 3156 } 3157 3158 fn xmov_lr(&mut self, dst: XReg) -> ControlFlow<Done> { 3159 let lr = self.state.lr; 3160 self.state[dst].set_ptr(lr); 3161 ControlFlow::Continue(()) 3162 } 3163 3164 fn fmov(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3165 let val = self.state[src]; 3166 self.state[dst] = val; 3167 ControlFlow::Continue(()) 3168 } 3169 3170 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3171 fn vmov(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 3172 let val = self.state[src]; 3173 self.state[dst] = val; 3174 ControlFlow::Continue(()) 3175 } 3176 3177 fn fconst32(&mut self, dst: FReg, bits: u32) -> ControlFlow<Done> { 3178 self.state[dst].set_f32(f32::from_bits(bits)); 3179 ControlFlow::Continue(()) 3180 } 3181 3182 fn fconst64(&mut self, dst: FReg, bits: u64) -> ControlFlow<Done> { 3183 self.state[dst].set_f64(f64::from_bits(bits)); 3184 ControlFlow::Continue(()) 3185 } 3186 3187 fn bitcast_int_from_float_32(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3188 let val = self.state[src].get_f32(); 3189 self.state[dst].set_u32(val.to_bits()); 3190 ControlFlow::Continue(()) 3191 } 3192 3193 fn bitcast_int_from_float_64(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3194 let val = self.state[src].get_f64(); 3195 self.state[dst].set_u64(val.to_bits()); 3196 ControlFlow::Continue(()) 3197 } 3198 3199 fn bitcast_float_from_int_32(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> { 3200 let val = self.state[src].get_u32(); 3201 self.state[dst].set_f32(f32::from_bits(val)); 3202 ControlFlow::Continue(()) 3203 } 3204 3205 fn bitcast_float_from_int_64(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> { 3206 let val = self.state[src].get_u64(); 3207 self.state[dst].set_f64(f64::from_bits(val)); 3208 ControlFlow::Continue(()) 3209 } 3210 3211 fn feq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> { 3212 let a = self.state[src1].get_f32(); 3213 let b = self.state[src2].get_f32(); 3214 self.state[dst].set_u32(u32::from(a == b)); 3215 ControlFlow::Continue(()) 3216 } 3217 3218 fn fneq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> { 3219 let a = self.state[src1].get_f32(); 3220 let b = self.state[src2].get_f32(); 3221 self.state[dst].set_u32(u32::from(a != b)); 3222 ControlFlow::Continue(()) 3223 } 3224 3225 fn flt32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> { 3226 let a = self.state[src1].get_f32(); 3227 let b = self.state[src2].get_f32(); 3228 self.state[dst].set_u32(u32::from(a < b)); 3229 ControlFlow::Continue(()) 3230 } 3231 3232 fn flteq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> { 3233 let a = self.state[src1].get_f32(); 3234 let b = self.state[src2].get_f32(); 3235 self.state[dst].set_u32(u32::from(a <= b)); 3236 ControlFlow::Continue(()) 3237 } 3238 3239 fn feq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> { 3240 let a = self.state[src1].get_f64(); 3241 let b = self.state[src2].get_f64(); 3242 self.state[dst].set_u32(u32::from(a == b)); 3243 ControlFlow::Continue(()) 3244 } 3245 3246 fn fneq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> { 3247 let a = self.state[src1].get_f64(); 3248 let b = self.state[src2].get_f64(); 3249 self.state[dst].set_u32(u32::from(a != b)); 3250 ControlFlow::Continue(()) 3251 } 3252 3253 fn flt64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> { 3254 let a = self.state[src1].get_f64(); 3255 let b = self.state[src2].get_f64(); 3256 self.state[dst].set_u32(u32::from(a < b)); 3257 ControlFlow::Continue(()) 3258 } 3259 3260 fn flteq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> { 3261 let a = self.state[src1].get_f64(); 3262 let b = self.state[src2].get_f64(); 3263 self.state[dst].set_u32(u32::from(a <= b)); 3264 ControlFlow::Continue(()) 3265 } 3266 3267 fn fselect32( 3268 &mut self, 3269 dst: FReg, 3270 cond: XReg, 3271 if_nonzero: FReg, 3272 if_zero: FReg, 3273 ) -> ControlFlow<Done> { 3274 let result = if self.state[cond].get_u32() != 0 { 3275 self.state[if_nonzero].get_f32() 3276 } else { 3277 self.state[if_zero].get_f32() 3278 }; 3279 self.state[dst].set_f32(result); 3280 ControlFlow::Continue(()) 3281 } 3282 3283 fn fselect64( 3284 &mut self, 3285 dst: FReg, 3286 cond: XReg, 3287 if_nonzero: FReg, 3288 if_zero: FReg, 3289 ) -> ControlFlow<Done> { 3290 let result = if self.state[cond].get_u32() != 0 { 3291 self.state[if_nonzero].get_f64() 3292 } else { 3293 self.state[if_zero].get_f64() 3294 }; 3295 self.state[dst].set_f64(result); 3296 ControlFlow::Continue(()) 3297 } 3298 3299 fn f32_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> { 3300 let a = self.state[src].get_i32(); 3301 self.state[dst].set_f32(a as f32); 3302 ControlFlow::Continue(()) 3303 } 3304 3305 fn f32_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> { 3306 let a = self.state[src].get_u32(); 3307 self.state[dst].set_f32(a as f32); 3308 ControlFlow::Continue(()) 3309 } 3310 3311 fn f32_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> { 3312 let a = self.state[src].get_i64(); 3313 self.state[dst].set_f32(a as f32); 3314 ControlFlow::Continue(()) 3315 } 3316 3317 fn f32_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> { 3318 let a = self.state[src].get_u64(); 3319 self.state[dst].set_f32(a as f32); 3320 ControlFlow::Continue(()) 3321 } 3322 3323 fn f64_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> { 3324 let a = self.state[src].get_i32(); 3325 self.state[dst].set_f64(a as f64); 3326 ControlFlow::Continue(()) 3327 } 3328 3329 fn f64_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> { 3330 let a = self.state[src].get_u32(); 3331 self.state[dst].set_f64(a as f64); 3332 ControlFlow::Continue(()) 3333 } 3334 3335 fn f64_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> { 3336 let a = self.state[src].get_i64(); 3337 self.state[dst].set_f64(a as f64); 3338 ControlFlow::Continue(()) 3339 } 3340 3341 fn f64_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> { 3342 let a = self.state[src].get_u64(); 3343 self.state[dst].set_f64(a as f64); 3344 ControlFlow::Continue(()) 3345 } 3346 3347 fn x32_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3348 let a = self.state[src].get_f32(); 3349 self.check_xnn_from_f32::<crate::X32FromF32S>(a, f32_cvt_to_int_bounds(true, 32))?; 3350 self.state[dst].set_i32(a as i32); 3351 ControlFlow::Continue(()) 3352 } 3353 3354 fn x32_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3355 let a = self.state[src].get_f32(); 3356 self.check_xnn_from_f32::<crate::X32FromF32U>(a, f32_cvt_to_int_bounds(false, 32))?; 3357 self.state[dst].set_u32(a as u32); 3358 ControlFlow::Continue(()) 3359 } 3360 3361 fn x64_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3362 let a = self.state[src].get_f32(); 3363 self.check_xnn_from_f32::<crate::X64FromF32S>(a, f32_cvt_to_int_bounds(true, 64))?; 3364 self.state[dst].set_i64(a as i64); 3365 ControlFlow::Continue(()) 3366 } 3367 3368 fn x64_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3369 let a = self.state[src].get_f32(); 3370 self.check_xnn_from_f32::<crate::X64FromF32U>(a, f32_cvt_to_int_bounds(false, 64))?; 3371 self.state[dst].set_u64(a as u64); 3372 ControlFlow::Continue(()) 3373 } 3374 3375 fn x32_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3376 let a = self.state[src].get_f64(); 3377 self.check_xnn_from_f64::<crate::X32FromF64S>(a, f64_cvt_to_int_bounds(true, 32))?; 3378 self.state[dst].set_i32(a as i32); 3379 ControlFlow::Continue(()) 3380 } 3381 3382 fn x32_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3383 let a = self.state[src].get_f64(); 3384 self.check_xnn_from_f64::<crate::X32FromF64U>(a, f64_cvt_to_int_bounds(false, 32))?; 3385 self.state[dst].set_u32(a as u32); 3386 ControlFlow::Continue(()) 3387 } 3388 3389 fn x64_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3390 let a = self.state[src].get_f64(); 3391 self.check_xnn_from_f64::<crate::X64FromF64S>(a, f64_cvt_to_int_bounds(true, 64))?; 3392 self.state[dst].set_i64(a as i64); 3393 ControlFlow::Continue(()) 3394 } 3395 3396 fn x64_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3397 let a = self.state[src].get_f64(); 3398 self.check_xnn_from_f64::<crate::X64FromF64U>(a, f64_cvt_to_int_bounds(false, 64))?; 3399 self.state[dst].set_u64(a as u64); 3400 ControlFlow::Continue(()) 3401 } 3402 3403 fn x32_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3404 let a = self.state[src].get_f32(); 3405 self.state[dst].set_i32(a as i32); 3406 ControlFlow::Continue(()) 3407 } 3408 3409 fn x32_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3410 let a = self.state[src].get_f32(); 3411 self.state[dst].set_u32(a as u32); 3412 ControlFlow::Continue(()) 3413 } 3414 3415 fn x64_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3416 let a = self.state[src].get_f32(); 3417 self.state[dst].set_i64(a as i64); 3418 ControlFlow::Continue(()) 3419 } 3420 3421 fn x64_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3422 let a = self.state[src].get_f32(); 3423 self.state[dst].set_u64(a as u64); 3424 ControlFlow::Continue(()) 3425 } 3426 3427 fn x32_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3428 let a = self.state[src].get_f64(); 3429 self.state[dst].set_i32(a as i32); 3430 ControlFlow::Continue(()) 3431 } 3432 3433 fn x32_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3434 let a = self.state[src].get_f64(); 3435 self.state[dst].set_u32(a as u32); 3436 ControlFlow::Continue(()) 3437 } 3438 3439 fn x64_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3440 let a = self.state[src].get_f64(); 3441 self.state[dst].set_i64(a as i64); 3442 ControlFlow::Continue(()) 3443 } 3444 3445 fn x64_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> { 3446 let a = self.state[src].get_f64(); 3447 self.state[dst].set_u64(a as u64); 3448 ControlFlow::Continue(()) 3449 } 3450 3451 fn f32_from_f64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3452 let a = self.state[src].get_f64(); 3453 self.state[dst].set_f32(a as f32); 3454 ControlFlow::Continue(()) 3455 } 3456 3457 fn f64_from_f32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3458 let a = self.state[src].get_f32(); 3459 self.state[dst].set_f64(a.into()); 3460 ControlFlow::Continue(()) 3461 } 3462 3463 fn fcopysign32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> { 3464 let a = self.state[operands.src1].get_f32(); 3465 let b = self.state[operands.src2].get_f32(); 3466 self.state[operands.dst].set_f32(a.wasm_copysign(b)); 3467 ControlFlow::Continue(()) 3468 } 3469 3470 fn fcopysign64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> { 3471 let a = self.state[operands.src1].get_f64(); 3472 let b = self.state[operands.src2].get_f64(); 3473 self.state[operands.dst].set_f64(a.wasm_copysign(b)); 3474 ControlFlow::Continue(()) 3475 } 3476 3477 fn fadd32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> { 3478 let a = self.state[operands.src1].get_f32(); 3479 let b = self.state[operands.src2].get_f32(); 3480 self.state[operands.dst].set_f32(a + b); 3481 ControlFlow::Continue(()) 3482 } 3483 3484 fn fsub32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> { 3485 let a = self.state[operands.src1].get_f32(); 3486 let b = self.state[operands.src2].get_f32(); 3487 self.state[operands.dst].set_f32(a - b); 3488 ControlFlow::Continue(()) 3489 } 3490 3491 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3492 fn vsubf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 3493 let mut a = self.state[operands.src1].get_f32x4(); 3494 let b = self.state[operands.src2].get_f32x4(); 3495 for (a, b) in a.iter_mut().zip(b) { 3496 *a = *a - b; 3497 } 3498 self.state[operands.dst].set_f32x4(a); 3499 ControlFlow::Continue(()) 3500 } 3501 3502 fn fmul32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> { 3503 let a = self.state[operands.src1].get_f32(); 3504 let b = self.state[operands.src2].get_f32(); 3505 self.state[operands.dst].set_f32(a * b); 3506 ControlFlow::Continue(()) 3507 } 3508 3509 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3510 fn vmulf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 3511 let mut a = self.state[operands.src1].get_f32x4(); 3512 let b = self.state[operands.src2].get_f32x4(); 3513 for (a, b) in a.iter_mut().zip(b) { 3514 *a = *a * b; 3515 } 3516 self.state[operands.dst].set_f32x4(a); 3517 ControlFlow::Continue(()) 3518 } 3519 3520 fn fdiv32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> { 3521 let a = self.state[operands.src1].get_f32(); 3522 let b = self.state[operands.src2].get_f32(); 3523 self.state[operands.dst].set_f32(a / b); 3524 ControlFlow::Continue(()) 3525 } 3526 3527 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3528 fn vdivf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 3529 let a = self.state[operands.src1].get_f32x4(); 3530 let b = self.state[operands.src2].get_f32x4(); 3531 let mut result = [0.0f32; 4]; 3532 3533 for i in 0..4 { 3534 result[i] = a[i] / b[i]; 3535 } 3536 3537 self.state[operands.dst].set_f32x4(result); 3538 ControlFlow::Continue(()) 3539 } 3540 3541 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3542 fn vdivf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 3543 let a = self.state[operands.src1].get_f64x2(); 3544 let b = self.state[operands.src2].get_f64x2(); 3545 let mut result = [0.0f64; 2]; 3546 3547 for i in 0..2 { 3548 result[i] = a[i] / b[i]; 3549 } 3550 3551 self.state[operands.dst].set_f64x2(result); 3552 ControlFlow::Continue(()) 3553 } 3554 3555 fn fmaximum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> { 3556 let a = self.state[operands.src1].get_f32(); 3557 let b = self.state[operands.src2].get_f32(); 3558 self.state[operands.dst].set_f32(a.wasm_maximum(b)); 3559 ControlFlow::Continue(()) 3560 } 3561 3562 fn fminimum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> { 3563 let a = self.state[operands.src1].get_f32(); 3564 let b = self.state[operands.src2].get_f32(); 3565 self.state[operands.dst].set_f32(a.wasm_minimum(b)); 3566 ControlFlow::Continue(()) 3567 } 3568 3569 fn ftrunc32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3570 let a = self.state[src].get_f32(); 3571 self.state[dst].set_f32(a.wasm_trunc()); 3572 ControlFlow::Continue(()) 3573 } 3574 3575 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3576 fn vtrunc32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 3577 let mut a = self.state[src].get_f32x4(); 3578 for elem in a.iter_mut() { 3579 *elem = elem.wasm_trunc(); 3580 } 3581 self.state[dst].set_f32x4(a); 3582 ControlFlow::Continue(()) 3583 } 3584 3585 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3586 fn vtrunc64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 3587 let mut a = self.state[src].get_f64x2(); 3588 for elem in a.iter_mut() { 3589 *elem = elem.wasm_trunc(); 3590 } 3591 self.state[dst].set_f64x2(a); 3592 ControlFlow::Continue(()) 3593 } 3594 3595 fn ffloor32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3596 let a = self.state[src].get_f32(); 3597 self.state[dst].set_f32(a.wasm_floor()); 3598 ControlFlow::Continue(()) 3599 } 3600 3601 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3602 fn vfloor32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 3603 let mut a = self.state[src].get_f32x4(); 3604 for elem in a.iter_mut() { 3605 *elem = elem.wasm_floor(); 3606 } 3607 self.state[dst].set_f32x4(a); 3608 ControlFlow::Continue(()) 3609 } 3610 3611 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3612 fn vfloor64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 3613 let mut a = self.state[src].get_f64x2(); 3614 for elem in a.iter_mut() { 3615 *elem = elem.wasm_floor(); 3616 } 3617 self.state[dst].set_f64x2(a); 3618 ControlFlow::Continue(()) 3619 } 3620 3621 fn fceil32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3622 let a = self.state[src].get_f32(); 3623 self.state[dst].set_f32(a.wasm_ceil()); 3624 ControlFlow::Continue(()) 3625 } 3626 3627 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3628 fn vceil32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 3629 let mut a = self.state[src].get_f32x4(); 3630 for elem in a.iter_mut() { 3631 *elem = elem.wasm_ceil(); 3632 } 3633 self.state[dst].set_f32x4(a); 3634 3635 ControlFlow::Continue(()) 3636 } 3637 3638 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3639 fn vceil64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 3640 let mut a = self.state[src].get_f64x2(); 3641 for elem in a.iter_mut() { 3642 *elem = elem.wasm_ceil(); 3643 } 3644 self.state[dst].set_f64x2(a); 3645 3646 ControlFlow::Continue(()) 3647 } 3648 3649 fn fnearest32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3650 let a = self.state[src].get_f32(); 3651 self.state[dst].set_f32(a.wasm_nearest()); 3652 ControlFlow::Continue(()) 3653 } 3654 3655 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3656 fn vnearest32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 3657 let mut a = self.state[src].get_f32x4(); 3658 for elem in a.iter_mut() { 3659 *elem = elem.wasm_nearest(); 3660 } 3661 self.state[dst].set_f32x4(a); 3662 ControlFlow::Continue(()) 3663 } 3664 3665 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3666 fn vnearest64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 3667 let mut a = self.state[src].get_f64x2(); 3668 for elem in a.iter_mut() { 3669 *elem = elem.wasm_nearest(); 3670 } 3671 self.state[dst].set_f64x2(a); 3672 ControlFlow::Continue(()) 3673 } 3674 3675 fn fsqrt32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3676 let a = self.state[src].get_f32(); 3677 self.state[dst].set_f32(a.wasm_sqrt()); 3678 ControlFlow::Continue(()) 3679 } 3680 3681 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3682 fn vsqrt32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 3683 let mut a = self.state[src].get_f32x4(); 3684 for elem in a.iter_mut() { 3685 *elem = elem.wasm_sqrt(); 3686 } 3687 self.state[dst].set_f32x4(a); 3688 ControlFlow::Continue(()) 3689 } 3690 3691 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3692 fn vsqrt64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 3693 let mut a = self.state[src].get_f64x2(); 3694 for elem in a.iter_mut() { 3695 *elem = elem.wasm_sqrt(); 3696 } 3697 self.state[dst].set_f64x2(a); 3698 ControlFlow::Continue(()) 3699 } 3700 3701 fn fneg32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3702 let a = self.state[src].get_f32(); 3703 self.state[dst].set_f32(-a); 3704 ControlFlow::Continue(()) 3705 } 3706 3707 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3708 fn vnegf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 3709 let mut a = self.state[src].get_f32x4(); 3710 for elem in a.iter_mut() { 3711 *elem = -*elem; 3712 } 3713 self.state[dst].set_f32x4(a); 3714 ControlFlow::Continue(()) 3715 } 3716 3717 fn fabs32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3718 let a = self.state[src].get_f32(); 3719 self.state[dst].set_f32(a.wasm_abs()); 3720 ControlFlow::Continue(()) 3721 } 3722 3723 fn fadd64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> { 3724 let a = self.state[operands.src1].get_f64(); 3725 let b = self.state[operands.src2].get_f64(); 3726 self.state[operands.dst].set_f64(a + b); 3727 ControlFlow::Continue(()) 3728 } 3729 3730 fn fsub64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> { 3731 let a = self.state[operands.src1].get_f64(); 3732 let b = self.state[operands.src2].get_f64(); 3733 self.state[operands.dst].set_f64(a - b); 3734 ControlFlow::Continue(()) 3735 } 3736 3737 fn fmul64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> { 3738 let a = self.state[operands.src1].get_f64(); 3739 let b = self.state[operands.src2].get_f64(); 3740 self.state[operands.dst].set_f64(a * b); 3741 ControlFlow::Continue(()) 3742 } 3743 3744 fn fdiv64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> { 3745 let a = self.state[operands.src1].get_f64(); 3746 let b = self.state[operands.src2].get_f64(); 3747 self.state[operands.dst].set_f64(a / b); 3748 ControlFlow::Continue(()) 3749 } 3750 3751 fn fmaximum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> { 3752 let a = self.state[operands.src1].get_f64(); 3753 let b = self.state[operands.src2].get_f64(); 3754 self.state[operands.dst].set_f64(a.wasm_maximum(b)); 3755 ControlFlow::Continue(()) 3756 } 3757 3758 fn fminimum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> { 3759 let a = self.state[operands.src1].get_f64(); 3760 let b = self.state[operands.src2].get_f64(); 3761 self.state[operands.dst].set_f64(a.wasm_minimum(b)); 3762 ControlFlow::Continue(()) 3763 } 3764 3765 fn ftrunc64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3766 let a = self.state[src].get_f64(); 3767 self.state[dst].set_f64(a.wasm_trunc()); 3768 ControlFlow::Continue(()) 3769 } 3770 3771 fn ffloor64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3772 let a = self.state[src].get_f64(); 3773 self.state[dst].set_f64(a.wasm_floor()); 3774 ControlFlow::Continue(()) 3775 } 3776 3777 fn fceil64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3778 let a = self.state[src].get_f64(); 3779 self.state[dst].set_f64(a.wasm_ceil()); 3780 ControlFlow::Continue(()) 3781 } 3782 3783 fn fnearest64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3784 let a = self.state[src].get_f64(); 3785 self.state[dst].set_f64(a.wasm_nearest()); 3786 ControlFlow::Continue(()) 3787 } 3788 3789 fn fsqrt64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3790 let a = self.state[src].get_f64(); 3791 self.state[dst].set_f64(a.wasm_sqrt()); 3792 ControlFlow::Continue(()) 3793 } 3794 3795 fn fneg64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3796 let a = self.state[src].get_f64(); 3797 self.state[dst].set_f64(-a); 3798 ControlFlow::Continue(()) 3799 } 3800 3801 fn fabs64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> { 3802 let a = self.state[src].get_f64(); 3803 self.state[dst].set_f64(a.wasm_abs()); 3804 ControlFlow::Continue(()) 3805 } 3806 3807 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3808 fn vaddi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 3809 let mut a = self.state[operands.src1].get_i8x16(); 3810 let b = self.state[operands.src2].get_i8x16(); 3811 for (a, b) in a.iter_mut().zip(b) { 3812 *a = a.wrapping_add(b); 3813 } 3814 self.state[operands.dst].set_i8x16(a); 3815 ControlFlow::Continue(()) 3816 } 3817 3818 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3819 fn vaddi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 3820 let mut a = self.state[operands.src1].get_i16x8(); 3821 let b = self.state[operands.src2].get_i16x8(); 3822 for (a, b) in a.iter_mut().zip(b) { 3823 *a = a.wrapping_add(b); 3824 } 3825 self.state[operands.dst].set_i16x8(a); 3826 ControlFlow::Continue(()) 3827 } 3828 3829 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3830 fn vaddi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 3831 let mut a = self.state[operands.src1].get_i32x4(); 3832 let b = self.state[operands.src2].get_i32x4(); 3833 for (a, b) in a.iter_mut().zip(b) { 3834 *a = a.wrapping_add(b); 3835 } 3836 self.state[operands.dst].set_i32x4(a); 3837 ControlFlow::Continue(()) 3838 } 3839 3840 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3841 fn vaddi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 3842 let mut a = self.state[operands.src1].get_i64x2(); 3843 let b = self.state[operands.src2].get_i64x2(); 3844 for (a, b) in a.iter_mut().zip(b) { 3845 *a = a.wrapping_add(b); 3846 } 3847 self.state[operands.dst].set_i64x2(a); 3848 ControlFlow::Continue(()) 3849 } 3850 3851 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3852 fn vaddf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 3853 let mut a = self.state[operands.src1].get_f32x4(); 3854 let b = self.state[operands.src2].get_f32x4(); 3855 for (a, b) in a.iter_mut().zip(b) { 3856 *a += b; 3857 } 3858 self.state[operands.dst].set_f32x4(a); 3859 ControlFlow::Continue(()) 3860 } 3861 3862 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3863 fn vaddf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 3864 let mut a = self.state[operands.src1].get_f64x2(); 3865 let b = self.state[operands.src2].get_f64x2(); 3866 for (a, b) in a.iter_mut().zip(b) { 3867 *a += b; 3868 } 3869 self.state[operands.dst].set_f64x2(a); 3870 ControlFlow::Continue(()) 3871 } 3872 3873 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3874 fn vaddi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 3875 let mut a = self.state[operands.src1].get_i8x16(); 3876 let b = self.state[operands.src2].get_i8x16(); 3877 for (a, b) in a.iter_mut().zip(b) { 3878 *a = (*a).saturating_add(b); 3879 } 3880 self.state[operands.dst].set_i8x16(a); 3881 ControlFlow::Continue(()) 3882 } 3883 3884 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3885 fn vaddu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 3886 let mut a = self.state[operands.src1].get_u8x16(); 3887 let b = self.state[operands.src2].get_u8x16(); 3888 for (a, b) in a.iter_mut().zip(b) { 3889 *a = (*a).saturating_add(b); 3890 } 3891 self.state[operands.dst].set_u8x16(a); 3892 ControlFlow::Continue(()) 3893 } 3894 3895 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3896 fn vaddi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 3897 let mut a = self.state[operands.src1].get_i16x8(); 3898 let b = self.state[operands.src2].get_i16x8(); 3899 for (a, b) in a.iter_mut().zip(b) { 3900 *a = (*a).saturating_add(b); 3901 } 3902 self.state[operands.dst].set_i16x8(a); 3903 ControlFlow::Continue(()) 3904 } 3905 3906 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3907 fn vaddu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 3908 let mut a = self.state[operands.src1].get_u16x8(); 3909 let b = self.state[operands.src2].get_u16x8(); 3910 for (a, b) in a.iter_mut().zip(b) { 3911 *a = (*a).saturating_add(b); 3912 } 3913 self.state[operands.dst].set_u16x8(a); 3914 ControlFlow::Continue(()) 3915 } 3916 3917 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3918 fn vaddpairwisei16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 3919 let a = self.state[operands.src1].get_i16x8(); 3920 let b = self.state[operands.src2].get_i16x8(); 3921 let mut result = [0i16; 8]; 3922 let half = result.len() / 2; 3923 for i in 0..half { 3924 result[i] = a[2 * i].wrapping_add(a[2 * i + 1]); 3925 result[i + half] = b[2 * i].wrapping_add(b[2 * i + 1]); 3926 } 3927 self.state[operands.dst].set_i16x8(result); 3928 ControlFlow::Continue(()) 3929 } 3930 3931 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3932 fn vaddpairwisei32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 3933 let a = self.state[operands.src1].get_i32x4(); 3934 let b = self.state[operands.src2].get_i32x4(); 3935 let mut result = [0i32; 4]; 3936 result[0] = a[0].wrapping_add(a[1]); 3937 result[1] = a[2].wrapping_add(a[3]); 3938 result[2] = b[0].wrapping_add(b[1]); 3939 result[3] = b[2].wrapping_add(b[3]); 3940 self.state[operands.dst].set_i32x4(result); 3941 ControlFlow::Continue(()) 3942 } 3943 3944 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3945 fn vshli8x16(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> { 3946 let a = self.state[operands.src1].get_i8x16(); 3947 let b = self.state[operands.src2].get_u32(); 3948 self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shl(b))); 3949 ControlFlow::Continue(()) 3950 } 3951 3952 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3953 fn vshli16x8(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> { 3954 let a = self.state[operands.src1].get_i16x8(); 3955 let b = self.state[operands.src2].get_u32(); 3956 self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shl(b))); 3957 ControlFlow::Continue(()) 3958 } 3959 3960 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3961 fn vshli32x4(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> { 3962 let a = self.state[operands.src1].get_i32x4(); 3963 let b = self.state[operands.src2].get_u32(); 3964 self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shl(b))); 3965 ControlFlow::Continue(()) 3966 } 3967 3968 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3969 fn vshli64x2(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> { 3970 let a = self.state[operands.src1].get_i64x2(); 3971 let b = self.state[operands.src2].get_u32(); 3972 self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shl(b))); 3973 ControlFlow::Continue(()) 3974 } 3975 3976 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3977 fn vshri8x16_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> { 3978 let a = self.state[operands.src1].get_i8x16(); 3979 let b = self.state[operands.src2].get_u32(); 3980 self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shr(b))); 3981 ControlFlow::Continue(()) 3982 } 3983 3984 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3985 fn vshri16x8_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> { 3986 let a = self.state[operands.src1].get_i16x8(); 3987 let b = self.state[operands.src2].get_u32(); 3988 self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shr(b))); 3989 ControlFlow::Continue(()) 3990 } 3991 3992 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 3993 fn vshri32x4_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> { 3994 let a = self.state[operands.src1].get_i32x4(); 3995 let b = self.state[operands.src2].get_u32(); 3996 self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shr(b))); 3997 ControlFlow::Continue(()) 3998 } 3999 4000 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4001 fn vshri64x2_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> { 4002 let a = self.state[operands.src1].get_i64x2(); 4003 let b = self.state[operands.src2].get_u32(); 4004 self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shr(b))); 4005 ControlFlow::Continue(()) 4006 } 4007 4008 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4009 fn vshri8x16_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> { 4010 let a = self.state[operands.src1].get_u8x16(); 4011 let b = self.state[operands.src2].get_u32(); 4012 self.state[operands.dst].set_u8x16(a.map(|a| a.wrapping_shr(b))); 4013 ControlFlow::Continue(()) 4014 } 4015 4016 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4017 fn vshri16x8_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> { 4018 let a = self.state[operands.src1].get_u16x8(); 4019 let b = self.state[operands.src2].get_u32(); 4020 self.state[operands.dst].set_u16x8(a.map(|a| a.wrapping_shr(b))); 4021 ControlFlow::Continue(()) 4022 } 4023 4024 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4025 fn vshri32x4_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> { 4026 let a = self.state[operands.src1].get_u32x4(); 4027 let b = self.state[operands.src2].get_u32(); 4028 self.state[operands.dst].set_u32x4(a.map(|a| a.wrapping_shr(b))); 4029 ControlFlow::Continue(()) 4030 } 4031 4032 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4033 fn vshri64x2_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> { 4034 let a = self.state[operands.src1].get_u64x2(); 4035 let b = self.state[operands.src2].get_u32(); 4036 self.state[operands.dst].set_u64x2(a.map(|a| a.wrapping_shr(b))); 4037 ControlFlow::Continue(()) 4038 } 4039 4040 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4041 fn vconst128(&mut self, dst: VReg, val: u128) -> ControlFlow<Done> { 4042 self.state[dst].set_u128(val); 4043 ControlFlow::Continue(()) 4044 } 4045 4046 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4047 fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> { 4048 let val = self.state[src].get_u32() as u8; 4049 self.state[dst].set_u8x16([val; 16]); 4050 ControlFlow::Continue(()) 4051 } 4052 4053 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4054 fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> { 4055 let val = self.state[src].get_u32() as u16; 4056 self.state[dst].set_u16x8([val; 8]); 4057 ControlFlow::Continue(()) 4058 } 4059 4060 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4061 fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> { 4062 let val = self.state[src].get_u32(); 4063 self.state[dst].set_u32x4([val; 4]); 4064 ControlFlow::Continue(()) 4065 } 4066 4067 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4068 fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> { 4069 let val = self.state[src].get_u64(); 4070 self.state[dst].set_u64x2([val; 2]); 4071 ControlFlow::Continue(()) 4072 } 4073 4074 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4075 fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> { 4076 let val = self.state[src].get_f32(); 4077 self.state[dst].set_f32x4([val; 4]); 4078 ControlFlow::Continue(()) 4079 } 4080 4081 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4082 fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> { 4083 let val = self.state[src].get_f64(); 4084 self.state[dst].set_f64x2([val; 2]); 4085 ControlFlow::Continue(()) 4086 } 4087 4088 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4089 fn vload8x8_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> { 4090 let val = unsafe { self.load_ne::<[i8; 8], crate::VLoad8x8SZ>(addr)? }; 4091 self.state[dst].set_i16x8(val.map(|i| i.into())); 4092 ControlFlow::Continue(()) 4093 } 4094 4095 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4096 fn vload8x8_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> { 4097 let val = unsafe { self.load_ne::<[u8; 8], crate::VLoad8x8UZ>(addr)? }; 4098 self.state[dst].set_u16x8(val.map(|i| i.into())); 4099 ControlFlow::Continue(()) 4100 } 4101 4102 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4103 fn vload16x4le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> { 4104 let val = unsafe { self.load_ne::<[i16; 4], crate::VLoad16x4LeSZ>(addr)? }; 4105 self.state[dst].set_i32x4(val.map(|i| i16::from_le(i).into())); 4106 ControlFlow::Continue(()) 4107 } 4108 4109 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4110 fn vload16x4le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> { 4111 let val = unsafe { self.load_ne::<[u16; 4], crate::VLoad16x4LeUZ>(addr)? }; 4112 self.state[dst].set_u32x4(val.map(|i| u16::from_le(i).into())); 4113 ControlFlow::Continue(()) 4114 } 4115 4116 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4117 fn vload32x2le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> { 4118 let val = unsafe { self.load_ne::<[i32; 2], crate::VLoad32x2LeSZ>(addr)? }; 4119 self.state[dst].set_i64x2(val.map(|i| i32::from_le(i).into())); 4120 ControlFlow::Continue(()) 4121 } 4122 4123 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4124 fn vload32x2le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> { 4125 let val = unsafe { self.load_ne::<[u32; 2], crate::VLoad32x2LeUZ>(addr)? }; 4126 self.state[dst].set_u64x2(val.map(|i| u32::from_le(i).into())); 4127 ControlFlow::Continue(()) 4128 } 4129 4130 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4131 fn vband128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4132 let a = self.state[operands.src1].get_u128(); 4133 let b = self.state[operands.src2].get_u128(); 4134 self.state[operands.dst].set_u128(a & b); 4135 ControlFlow::Continue(()) 4136 } 4137 4138 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4139 fn vbor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4140 let a = self.state[operands.src1].get_u128(); 4141 let b = self.state[operands.src2].get_u128(); 4142 self.state[operands.dst].set_u128(a | b); 4143 ControlFlow::Continue(()) 4144 } 4145 4146 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4147 fn vbxor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4148 let a = self.state[operands.src1].get_u128(); 4149 let b = self.state[operands.src2].get_u128(); 4150 self.state[operands.dst].set_u128(a ^ b); 4151 ControlFlow::Continue(()) 4152 } 4153 4154 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4155 fn vbnot128(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4156 let a = self.state[src].get_u128(); 4157 self.state[dst].set_u128(!a); 4158 ControlFlow::Continue(()) 4159 } 4160 4161 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4162 fn vbitselect128(&mut self, dst: VReg, c: VReg, x: VReg, y: VReg) -> ControlFlow<Done> { 4163 let c = self.state[c].get_u128(); 4164 let x = self.state[x].get_u128(); 4165 let y = self.state[y].get_u128(); 4166 self.state[dst].set_u128((c & x) | (!c & y)); 4167 ControlFlow::Continue(()) 4168 } 4169 4170 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4171 fn vbitmask8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> { 4172 let a = self.state[src].get_u8x16(); 4173 let mut result = 0; 4174 for item in a.iter().rev() { 4175 result <<= 1; 4176 result |= (*item >> 7) as u32; 4177 } 4178 self.state[dst].set_u32(result); 4179 ControlFlow::Continue(()) 4180 } 4181 4182 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4183 fn vbitmask16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> { 4184 let a = self.state[src].get_u16x8(); 4185 let mut result = 0; 4186 for item in a.iter().rev() { 4187 result <<= 1; 4188 result |= (*item >> 15) as u32; 4189 } 4190 self.state[dst].set_u32(result); 4191 ControlFlow::Continue(()) 4192 } 4193 4194 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4195 fn vbitmask32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> { 4196 let a = self.state[src].get_u32x4(); 4197 let mut result = 0; 4198 for item in a.iter().rev() { 4199 result <<= 1; 4200 result |= *item >> 31; 4201 } 4202 self.state[dst].set_u32(result); 4203 ControlFlow::Continue(()) 4204 } 4205 4206 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4207 fn vbitmask64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> { 4208 let a = self.state[src].get_u64x2(); 4209 let mut result = 0; 4210 for item in a.iter().rev() { 4211 result <<= 1; 4212 result |= (*item >> 63) as u32; 4213 } 4214 self.state[dst].set_u32(result); 4215 ControlFlow::Continue(()) 4216 } 4217 4218 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4219 fn valltrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> { 4220 let a = self.state[src].get_u8x16(); 4221 let result = a.iter().all(|a| *a != 0); 4222 self.state[dst].set_u32(u32::from(result)); 4223 ControlFlow::Continue(()) 4224 } 4225 4226 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4227 fn valltrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> { 4228 let a = self.state[src].get_u16x8(); 4229 let result = a.iter().all(|a| *a != 0); 4230 self.state[dst].set_u32(u32::from(result)); 4231 ControlFlow::Continue(()) 4232 } 4233 4234 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4235 fn valltrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> { 4236 let a = self.state[src].get_u32x4(); 4237 let result = a.iter().all(|a| *a != 0); 4238 self.state[dst].set_u32(u32::from(result)); 4239 ControlFlow::Continue(()) 4240 } 4241 4242 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4243 fn valltrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> { 4244 let a = self.state[src].get_u64x2(); 4245 let result = a.iter().all(|a| *a != 0); 4246 self.state[dst].set_u32(u32::from(result)); 4247 ControlFlow::Continue(()) 4248 } 4249 4250 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4251 fn vanytrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> { 4252 let a = self.state[src].get_u8x16(); 4253 let result = a.iter().any(|a| *a != 0); 4254 self.state[dst].set_u32(u32::from(result)); 4255 ControlFlow::Continue(()) 4256 } 4257 4258 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4259 fn vanytrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> { 4260 let a = self.state[src].get_u16x8(); 4261 let result = a.iter().any(|a| *a != 0); 4262 self.state[dst].set_u32(u32::from(result)); 4263 ControlFlow::Continue(()) 4264 } 4265 4266 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4267 fn vanytrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> { 4268 let a = self.state[src].get_u32x4(); 4269 let result = a.iter().any(|a| *a != 0); 4270 self.state[dst].set_u32(u32::from(result)); 4271 ControlFlow::Continue(()) 4272 } 4273 4274 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4275 fn vanytrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> { 4276 let a = self.state[src].get_u64x2(); 4277 let result = a.iter().any(|a| *a != 0); 4278 self.state[dst].set_u32(u32::from(result)); 4279 ControlFlow::Continue(()) 4280 } 4281 4282 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4283 fn vf32x4_from_i32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4284 let a = self.state[src].get_i32x4(); 4285 self.state[dst].set_f32x4(a.map(|i| i as f32)); 4286 ControlFlow::Continue(()) 4287 } 4288 4289 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4290 fn vf32x4_from_i32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4291 let a = self.state[src].get_u32x4(); 4292 self.state[dst].set_f32x4(a.map(|i| i as f32)); 4293 ControlFlow::Continue(()) 4294 } 4295 4296 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4297 fn vf64x2_from_i64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4298 let a = self.state[src].get_i64x2(); 4299 self.state[dst].set_f64x2(a.map(|i| i as f64)); 4300 ControlFlow::Continue(()) 4301 } 4302 4303 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4304 fn vf64x2_from_i64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4305 let a = self.state[src].get_u64x2(); 4306 self.state[dst].set_f64x2(a.map(|i| i as f64)); 4307 ControlFlow::Continue(()) 4308 } 4309 4310 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4311 fn vi32x4_from_f32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4312 let a = self.state[src].get_f32x4(); 4313 self.state[dst].set_i32x4(a.map(|f| f as i32)); 4314 ControlFlow::Continue(()) 4315 } 4316 4317 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4318 fn vi32x4_from_f32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4319 let a = self.state[src].get_f32x4(); 4320 self.state[dst].set_u32x4(a.map(|f| f as u32)); 4321 ControlFlow::Continue(()) 4322 } 4323 4324 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4325 fn vi64x2_from_f64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4326 let a = self.state[src].get_f64x2(); 4327 self.state[dst].set_i64x2(a.map(|f| f as i64)); 4328 ControlFlow::Continue(()) 4329 } 4330 4331 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4332 fn vi64x2_from_f64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4333 let a = self.state[src].get_f64x2(); 4334 self.state[dst].set_u64x2(a.map(|f| f as u64)); 4335 ControlFlow::Continue(()) 4336 } 4337 4338 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4339 fn vwidenlow8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4340 let a = *self.state[src].get_i8x16().first_chunk().unwrap(); 4341 self.state[dst].set_i16x8(a.map(|i| i.into())); 4342 ControlFlow::Continue(()) 4343 } 4344 4345 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4346 fn vwidenlow8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4347 let a = *self.state[src].get_u8x16().first_chunk().unwrap(); 4348 self.state[dst].set_u16x8(a.map(|i| i.into())); 4349 ControlFlow::Continue(()) 4350 } 4351 4352 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4353 fn vwidenlow16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4354 let a = *self.state[src].get_i16x8().first_chunk().unwrap(); 4355 self.state[dst].set_i32x4(a.map(|i| i.into())); 4356 ControlFlow::Continue(()) 4357 } 4358 4359 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4360 fn vwidenlow16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4361 let a = *self.state[src].get_u16x8().first_chunk().unwrap(); 4362 self.state[dst].set_u32x4(a.map(|i| i.into())); 4363 ControlFlow::Continue(()) 4364 } 4365 4366 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4367 fn vwidenlow32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4368 let a = *self.state[src].get_i32x4().first_chunk().unwrap(); 4369 self.state[dst].set_i64x2(a.map(|i| i.into())); 4370 ControlFlow::Continue(()) 4371 } 4372 4373 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4374 fn vwidenlow32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4375 let a = *self.state[src].get_u32x4().first_chunk().unwrap(); 4376 self.state[dst].set_u64x2(a.map(|i| i.into())); 4377 ControlFlow::Continue(()) 4378 } 4379 4380 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4381 fn vwidenhigh8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4382 let a = *self.state[src].get_i8x16().last_chunk().unwrap(); 4383 self.state[dst].set_i16x8(a.map(|i| i.into())); 4384 ControlFlow::Continue(()) 4385 } 4386 4387 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4388 fn vwidenhigh8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4389 let a = *self.state[src].get_u8x16().last_chunk().unwrap(); 4390 self.state[dst].set_u16x8(a.map(|i| i.into())); 4391 ControlFlow::Continue(()) 4392 } 4393 4394 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4395 fn vwidenhigh16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4396 let a = *self.state[src].get_i16x8().last_chunk().unwrap(); 4397 self.state[dst].set_i32x4(a.map(|i| i.into())); 4398 ControlFlow::Continue(()) 4399 } 4400 4401 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4402 fn vwidenhigh16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4403 let a = *self.state[src].get_u16x8().last_chunk().unwrap(); 4404 self.state[dst].set_u32x4(a.map(|i| i.into())); 4405 ControlFlow::Continue(()) 4406 } 4407 4408 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4409 fn vwidenhigh32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4410 let a = *self.state[src].get_i32x4().last_chunk().unwrap(); 4411 self.state[dst].set_i64x2(a.map(|i| i.into())); 4412 ControlFlow::Continue(()) 4413 } 4414 4415 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4416 fn vwidenhigh32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4417 let a = *self.state[src].get_u32x4().last_chunk().unwrap(); 4418 self.state[dst].set_u64x2(a.map(|i| i.into())); 4419 ControlFlow::Continue(()) 4420 } 4421 4422 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4423 fn vnarrow16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4424 let a = self.state[operands.src1].get_i16x8(); 4425 let b = self.state[operands.src2].get_i16x8(); 4426 let mut result = [0; 16]; 4427 for (i, d) in a.iter().chain(&b).zip(&mut result) { 4428 *d = (*i) 4429 .try_into() 4430 .unwrap_or(if *i < 0 { i8::MIN } else { i8::MAX }); 4431 } 4432 self.state[operands.dst].set_i8x16(result); 4433 ControlFlow::Continue(()) 4434 } 4435 4436 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4437 fn vnarrow16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4438 let a = self.state[operands.src1].get_i16x8(); 4439 let b = self.state[operands.src2].get_i16x8(); 4440 let mut result = [0; 16]; 4441 for (i, d) in a.iter().chain(&b).zip(&mut result) { 4442 *d = (*i) 4443 .try_into() 4444 .unwrap_or(if *i < 0 { u8::MIN } else { u8::MAX }); 4445 } 4446 self.state[operands.dst].set_u8x16(result); 4447 ControlFlow::Continue(()) 4448 } 4449 4450 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4451 fn vnarrow32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4452 let a = self.state[operands.src1].get_i32x4(); 4453 let b = self.state[operands.src2].get_i32x4(); 4454 let mut result = [0; 8]; 4455 for (i, d) in a.iter().chain(&b).zip(&mut result) { 4456 *d = (*i) 4457 .try_into() 4458 .unwrap_or(if *i < 0 { i16::MIN } else { i16::MAX }); 4459 } 4460 self.state[operands.dst].set_i16x8(result); 4461 ControlFlow::Continue(()) 4462 } 4463 4464 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4465 fn vnarrow32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4466 let a = self.state[operands.src1].get_i32x4(); 4467 let b = self.state[operands.src2].get_i32x4(); 4468 let mut result = [0; 8]; 4469 for (i, d) in a.iter().chain(&b).zip(&mut result) { 4470 *d = (*i) 4471 .try_into() 4472 .unwrap_or(if *i < 0 { u16::MIN } else { u16::MAX }); 4473 } 4474 self.state[operands.dst].set_u16x8(result); 4475 ControlFlow::Continue(()) 4476 } 4477 4478 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4479 fn vnarrow64x2_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4480 let a = self.state[operands.src1].get_i64x2(); 4481 let b = self.state[operands.src2].get_i64x2(); 4482 let mut result = [0; 4]; 4483 for (i, d) in a.iter().chain(&b).zip(&mut result) { 4484 *d = (*i) 4485 .try_into() 4486 .unwrap_or(if *i < 0 { i32::MIN } else { i32::MAX }); 4487 } 4488 self.state[operands.dst].set_i32x4(result); 4489 ControlFlow::Continue(()) 4490 } 4491 4492 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4493 fn vnarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4494 let a = self.state[operands.src1].get_i64x2(); 4495 let b = self.state[operands.src2].get_i64x2(); 4496 let mut result = [0; 4]; 4497 for (i, d) in a.iter().chain(&b).zip(&mut result) { 4498 *d = (*i) 4499 .try_into() 4500 .unwrap_or(if *i < 0 { u32::MIN } else { u32::MAX }); 4501 } 4502 self.state[operands.dst].set_u32x4(result); 4503 ControlFlow::Continue(()) 4504 } 4505 4506 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4507 fn vunarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4508 let a = self.state[operands.src1].get_u64x2(); 4509 let b = self.state[operands.src2].get_u64x2(); 4510 let mut result = [0; 4]; 4511 for (i, d) in a.iter().chain(&b).zip(&mut result) { 4512 *d = (*i).try_into().unwrap_or(u32::MAX); 4513 } 4514 self.state[operands.dst].set_u32x4(result); 4515 ControlFlow::Continue(()) 4516 } 4517 4518 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4519 fn vfpromotelow(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4520 let a = self.state[src].get_f32x4(); 4521 self.state[dst].set_f64x2([a[0].into(), a[1].into()]); 4522 ControlFlow::Continue(()) 4523 } 4524 4525 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4526 fn vfdemote(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4527 let a = self.state[src].get_f64x2(); 4528 self.state[dst].set_f32x4([a[0] as f32, a[1] as f32, 0.0, 0.0]); 4529 ControlFlow::Continue(()) 4530 } 4531 4532 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4533 fn vsubi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4534 let mut a = self.state[operands.src1].get_i8x16(); 4535 let b = self.state[operands.src2].get_i8x16(); 4536 for (a, b) in a.iter_mut().zip(b) { 4537 *a = a.wrapping_sub(b); 4538 } 4539 self.state[operands.dst].set_i8x16(a); 4540 ControlFlow::Continue(()) 4541 } 4542 4543 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4544 fn vsubi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4545 let mut a = self.state[operands.src1].get_i16x8(); 4546 let b = self.state[operands.src2].get_i16x8(); 4547 for (a, b) in a.iter_mut().zip(b) { 4548 *a = a.wrapping_sub(b); 4549 } 4550 self.state[operands.dst].set_i16x8(a); 4551 ControlFlow::Continue(()) 4552 } 4553 4554 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4555 fn vsubi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4556 let mut a = self.state[operands.src1].get_i32x4(); 4557 let b = self.state[operands.src2].get_i32x4(); 4558 for (a, b) in a.iter_mut().zip(b) { 4559 *a = a.wrapping_sub(b); 4560 } 4561 self.state[operands.dst].set_i32x4(a); 4562 ControlFlow::Continue(()) 4563 } 4564 4565 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4566 fn vsubi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4567 let mut a = self.state[operands.src1].get_i64x2(); 4568 let b = self.state[operands.src2].get_i64x2(); 4569 for (a, b) in a.iter_mut().zip(b) { 4570 *a = a.wrapping_sub(b); 4571 } 4572 self.state[operands.dst].set_i64x2(a); 4573 ControlFlow::Continue(()) 4574 } 4575 4576 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4577 fn vsubi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4578 let mut a = self.state[operands.src1].get_i8x16(); 4579 let b = self.state[operands.src2].get_i8x16(); 4580 for (a, b) in a.iter_mut().zip(b) { 4581 *a = a.saturating_sub(b); 4582 } 4583 self.state[operands.dst].set_i8x16(a); 4584 ControlFlow::Continue(()) 4585 } 4586 4587 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4588 fn vsubu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4589 let mut a = self.state[operands.src1].get_u8x16(); 4590 let b = self.state[operands.src2].get_u8x16(); 4591 for (a, b) in a.iter_mut().zip(b) { 4592 *a = a.saturating_sub(b); 4593 } 4594 self.state[operands.dst].set_u8x16(a); 4595 ControlFlow::Continue(()) 4596 } 4597 4598 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4599 fn vsubi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4600 let mut a = self.state[operands.src1].get_i16x8(); 4601 let b = self.state[operands.src2].get_i16x8(); 4602 for (a, b) in a.iter_mut().zip(b) { 4603 *a = a.saturating_sub(b); 4604 } 4605 self.state[operands.dst].set_i16x8(a); 4606 ControlFlow::Continue(()) 4607 } 4608 4609 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4610 fn vsubu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4611 let mut a = self.state[operands.src1].get_u16x8(); 4612 let b = self.state[operands.src2].get_u16x8(); 4613 for (a, b) in a.iter_mut().zip(b) { 4614 *a = a.saturating_sub(b); 4615 } 4616 self.state[operands.dst].set_u16x8(a); 4617 ControlFlow::Continue(()) 4618 } 4619 4620 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4621 fn vsubf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4622 let mut a = self.state[operands.src1].get_f64x2(); 4623 let b = self.state[operands.src2].get_f64x2(); 4624 for (a, b) in a.iter_mut().zip(b) { 4625 *a = *a - b; 4626 } 4627 self.state[operands.dst].set_f64x2(a); 4628 ControlFlow::Continue(()) 4629 } 4630 4631 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4632 fn vmuli8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4633 let mut a = self.state[operands.src1].get_i8x16(); 4634 let b = self.state[operands.src2].get_i8x16(); 4635 for (a, b) in a.iter_mut().zip(b) { 4636 *a = a.wrapping_mul(b); 4637 } 4638 self.state[operands.dst].set_i8x16(a); 4639 ControlFlow::Continue(()) 4640 } 4641 4642 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4643 fn vmuli16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4644 let mut a = self.state[operands.src1].get_i16x8(); 4645 let b = self.state[operands.src2].get_i16x8(); 4646 for (a, b) in a.iter_mut().zip(b) { 4647 *a = a.wrapping_mul(b); 4648 } 4649 self.state[operands.dst].set_i16x8(a); 4650 ControlFlow::Continue(()) 4651 } 4652 4653 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4654 fn vmuli32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4655 let mut a = self.state[operands.src1].get_i32x4(); 4656 let b = self.state[operands.src2].get_i32x4(); 4657 for (a, b) in a.iter_mut().zip(b) { 4658 *a = a.wrapping_mul(b); 4659 } 4660 self.state[operands.dst].set_i32x4(a); 4661 ControlFlow::Continue(()) 4662 } 4663 4664 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4665 fn vmuli64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4666 let mut a = self.state[operands.src1].get_i64x2(); 4667 let b = self.state[operands.src2].get_i64x2(); 4668 for (a, b) in a.iter_mut().zip(b) { 4669 *a = a.wrapping_mul(b); 4670 } 4671 self.state[operands.dst].set_i64x2(a); 4672 ControlFlow::Continue(()) 4673 } 4674 4675 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4676 fn vmulf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4677 let mut a = self.state[operands.src1].get_f64x2(); 4678 let b = self.state[operands.src2].get_f64x2(); 4679 for (a, b) in a.iter_mut().zip(b) { 4680 *a = *a * b; 4681 } 4682 self.state[operands.dst].set_f64x2(a); 4683 ControlFlow::Continue(()) 4684 } 4685 4686 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4687 fn vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4688 let mut a = self.state[operands.src1].get_i16x8(); 4689 let b = self.state[operands.src2].get_i16x8(); 4690 const MIN: i32 = i16::MIN as i32; 4691 const MAX: i32 = i16::MAX as i32; 4692 for (a, b) in a.iter_mut().zip(b) { 4693 let r = (i32::from(*a) * i32::from(b) + (1 << 14)) >> 15; 4694 *a = r.clamp(MIN, MAX) as i16; 4695 } 4696 self.state[operands.dst].set_i16x8(a); 4697 ControlFlow::Continue(()) 4698 } 4699 4700 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4701 fn vpopcnt8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 4702 let a = self.state[src].get_u8x16(); 4703 self.state[dst].set_u8x16(a.map(|i| i.count_ones() as u8)); 4704 ControlFlow::Continue(()) 4705 } 4706 4707 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4708 fn xextractv8x16(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> { 4709 let a = unsafe { *self.state[src].get_u8x16().get_unchecked(usize::from(lane)) }; 4710 self.state[dst].set_u32(u32::from(a)); 4711 ControlFlow::Continue(()) 4712 } 4713 4714 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4715 fn xextractv16x8(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> { 4716 let a = unsafe { *self.state[src].get_u16x8().get_unchecked(usize::from(lane)) }; 4717 self.state[dst].set_u32(u32::from(a)); 4718 ControlFlow::Continue(()) 4719 } 4720 4721 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4722 fn xextractv32x4(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> { 4723 let a = unsafe { *self.state[src].get_u32x4().get_unchecked(usize::from(lane)) }; 4724 self.state[dst].set_u32(a); 4725 ControlFlow::Continue(()) 4726 } 4727 4728 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4729 fn xextractv64x2(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> { 4730 let a = unsafe { *self.state[src].get_u64x2().get_unchecked(usize::from(lane)) }; 4731 self.state[dst].set_u64(a); 4732 ControlFlow::Continue(()) 4733 } 4734 4735 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4736 fn fextractv32x4(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> { 4737 let a = unsafe { *self.state[src].get_f32x4().get_unchecked(usize::from(lane)) }; 4738 self.state[dst].set_f32(a); 4739 ControlFlow::Continue(()) 4740 } 4741 4742 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4743 fn fextractv64x2(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> { 4744 let a = unsafe { *self.state[src].get_f64x2().get_unchecked(usize::from(lane)) }; 4745 self.state[dst].set_f64(a); 4746 ControlFlow::Continue(()) 4747 } 4748 4749 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4750 fn vinsertx8( 4751 &mut self, 4752 operands: BinaryOperands<VReg, VReg, XReg>, 4753 lane: u8, 4754 ) -> ControlFlow<Done> { 4755 let mut a = self.state[operands.src1].get_u8x16(); 4756 let b = self.state[operands.src2].get_u32() as u8; 4757 unsafe { 4758 *a.get_unchecked_mut(usize::from(lane)) = b; 4759 } 4760 self.state[operands.dst].set_u8x16(a); 4761 ControlFlow::Continue(()) 4762 } 4763 4764 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4765 fn vinsertx16( 4766 &mut self, 4767 operands: BinaryOperands<VReg, VReg, XReg>, 4768 lane: u8, 4769 ) -> ControlFlow<Done> { 4770 let mut a = self.state[operands.src1].get_u16x8(); 4771 let b = self.state[operands.src2].get_u32() as u16; 4772 unsafe { 4773 *a.get_unchecked_mut(usize::from(lane)) = b; 4774 } 4775 self.state[operands.dst].set_u16x8(a); 4776 ControlFlow::Continue(()) 4777 } 4778 4779 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4780 fn vinsertx32( 4781 &mut self, 4782 operands: BinaryOperands<VReg, VReg, XReg>, 4783 lane: u8, 4784 ) -> ControlFlow<Done> { 4785 let mut a = self.state[operands.src1].get_u32x4(); 4786 let b = self.state[operands.src2].get_u32(); 4787 unsafe { 4788 *a.get_unchecked_mut(usize::from(lane)) = b; 4789 } 4790 self.state[operands.dst].set_u32x4(a); 4791 ControlFlow::Continue(()) 4792 } 4793 4794 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4795 fn vinsertx64( 4796 &mut self, 4797 operands: BinaryOperands<VReg, VReg, XReg>, 4798 lane: u8, 4799 ) -> ControlFlow<Done> { 4800 let mut a = self.state[operands.src1].get_u64x2(); 4801 let b = self.state[operands.src2].get_u64(); 4802 unsafe { 4803 *a.get_unchecked_mut(usize::from(lane)) = b; 4804 } 4805 self.state[operands.dst].set_u64x2(a); 4806 ControlFlow::Continue(()) 4807 } 4808 4809 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4810 fn vinsertf32( 4811 &mut self, 4812 operands: BinaryOperands<VReg, VReg, FReg>, 4813 lane: u8, 4814 ) -> ControlFlow<Done> { 4815 let mut a = self.state[operands.src1].get_f32x4(); 4816 let b = self.state[operands.src2].get_f32(); 4817 unsafe { 4818 *a.get_unchecked_mut(usize::from(lane)) = b; 4819 } 4820 self.state[operands.dst].set_f32x4(a); 4821 ControlFlow::Continue(()) 4822 } 4823 4824 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4825 fn vinsertf64( 4826 &mut self, 4827 operands: BinaryOperands<VReg, VReg, FReg>, 4828 lane: u8, 4829 ) -> ControlFlow<Done> { 4830 let mut a = self.state[operands.src1].get_f64x2(); 4831 let b = self.state[operands.src2].get_f64(); 4832 unsafe { 4833 *a.get_unchecked_mut(usize::from(lane)) = b; 4834 } 4835 self.state[operands.dst].set_f64x2(a); 4836 ControlFlow::Continue(()) 4837 } 4838 4839 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4840 fn veq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4841 let a = self.state[operands.src1].get_u8x16(); 4842 let b = self.state[operands.src2].get_u8x16(); 4843 let mut c = [0; 16]; 4844 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 4845 *c = if a == b { u8::MAX } else { 0 }; 4846 } 4847 self.state[operands.dst].set_u8x16(c); 4848 ControlFlow::Continue(()) 4849 } 4850 4851 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4852 fn vneq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4853 let a = self.state[operands.src1].get_u8x16(); 4854 let b = self.state[operands.src2].get_u8x16(); 4855 let mut c = [0; 16]; 4856 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 4857 *c = if a != b { u8::MAX } else { 0 }; 4858 } 4859 self.state[operands.dst].set_u8x16(c); 4860 ControlFlow::Continue(()) 4861 } 4862 4863 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4864 fn vslt8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4865 let a = self.state[operands.src1].get_i8x16(); 4866 let b = self.state[operands.src2].get_i8x16(); 4867 let mut c = [0; 16]; 4868 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 4869 *c = if a < b { u8::MAX } else { 0 }; 4870 } 4871 self.state[operands.dst].set_u8x16(c); 4872 ControlFlow::Continue(()) 4873 } 4874 4875 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4876 fn vslteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4877 let a = self.state[operands.src1].get_i8x16(); 4878 let b = self.state[operands.src2].get_i8x16(); 4879 let mut c = [0; 16]; 4880 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 4881 *c = if a <= b { u8::MAX } else { 0 }; 4882 } 4883 self.state[operands.dst].set_u8x16(c); 4884 ControlFlow::Continue(()) 4885 } 4886 4887 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4888 fn vult8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4889 let a = self.state[operands.src1].get_u8x16(); 4890 let b = self.state[operands.src2].get_u8x16(); 4891 let mut c = [0; 16]; 4892 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 4893 *c = if a < b { u8::MAX } else { 0 }; 4894 } 4895 self.state[operands.dst].set_u8x16(c); 4896 ControlFlow::Continue(()) 4897 } 4898 4899 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4900 fn vulteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4901 let a = self.state[operands.src1].get_u8x16(); 4902 let b = self.state[operands.src2].get_u8x16(); 4903 let mut c = [0; 16]; 4904 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 4905 *c = if a <= b { u8::MAX } else { 0 }; 4906 } 4907 self.state[operands.dst].set_u8x16(c); 4908 ControlFlow::Continue(()) 4909 } 4910 4911 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4912 fn veq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4913 let a = self.state[operands.src1].get_u16x8(); 4914 let b = self.state[operands.src2].get_u16x8(); 4915 let mut c = [0; 8]; 4916 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 4917 *c = if a == b { u16::MAX } else { 0 }; 4918 } 4919 self.state[operands.dst].set_u16x8(c); 4920 ControlFlow::Continue(()) 4921 } 4922 4923 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4924 fn vneq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4925 let a = self.state[operands.src1].get_u16x8(); 4926 let b = self.state[operands.src2].get_u16x8(); 4927 let mut c = [0; 8]; 4928 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 4929 *c = if a != b { u16::MAX } else { 0 }; 4930 } 4931 self.state[operands.dst].set_u16x8(c); 4932 ControlFlow::Continue(()) 4933 } 4934 4935 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4936 fn vslt16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4937 let a = self.state[operands.src1].get_i16x8(); 4938 let b = self.state[operands.src2].get_i16x8(); 4939 let mut c = [0; 8]; 4940 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 4941 *c = if a < b { u16::MAX } else { 0 }; 4942 } 4943 self.state[operands.dst].set_u16x8(c); 4944 ControlFlow::Continue(()) 4945 } 4946 4947 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4948 fn vslteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4949 let a = self.state[operands.src1].get_i16x8(); 4950 let b = self.state[operands.src2].get_i16x8(); 4951 let mut c = [0; 8]; 4952 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 4953 *c = if a <= b { u16::MAX } else { 0 }; 4954 } 4955 self.state[operands.dst].set_u16x8(c); 4956 ControlFlow::Continue(()) 4957 } 4958 4959 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4960 fn vult16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4961 let a = self.state[operands.src1].get_u16x8(); 4962 let b = self.state[operands.src2].get_u16x8(); 4963 let mut c = [0; 8]; 4964 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 4965 *c = if a < b { u16::MAX } else { 0 }; 4966 } 4967 self.state[operands.dst].set_u16x8(c); 4968 ControlFlow::Continue(()) 4969 } 4970 4971 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4972 fn vulteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4973 let a = self.state[operands.src1].get_u16x8(); 4974 let b = self.state[operands.src2].get_u16x8(); 4975 let mut c = [0; 8]; 4976 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 4977 *c = if a <= b { u16::MAX } else { 0 }; 4978 } 4979 self.state[operands.dst].set_u16x8(c); 4980 ControlFlow::Continue(()) 4981 } 4982 4983 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4984 fn veq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4985 let a = self.state[operands.src1].get_u32x4(); 4986 let b = self.state[operands.src2].get_u32x4(); 4987 let mut c = [0; 4]; 4988 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 4989 *c = if a == b { u32::MAX } else { 0 }; 4990 } 4991 self.state[operands.dst].set_u32x4(c); 4992 ControlFlow::Continue(()) 4993 } 4994 4995 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 4996 fn vneq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 4997 let a = self.state[operands.src1].get_u32x4(); 4998 let b = self.state[operands.src2].get_u32x4(); 4999 let mut c = [0; 4]; 5000 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5001 *c = if a != b { u32::MAX } else { 0 }; 5002 } 5003 self.state[operands.dst].set_u32x4(c); 5004 ControlFlow::Continue(()) 5005 } 5006 5007 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5008 fn vslt32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5009 let a = self.state[operands.src1].get_i32x4(); 5010 let b = self.state[operands.src2].get_i32x4(); 5011 let mut c = [0; 4]; 5012 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5013 *c = if a < b { u32::MAX } else { 0 }; 5014 } 5015 self.state[operands.dst].set_u32x4(c); 5016 ControlFlow::Continue(()) 5017 } 5018 5019 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5020 fn vslteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5021 let a = self.state[operands.src1].get_i32x4(); 5022 let b = self.state[operands.src2].get_i32x4(); 5023 let mut c = [0; 4]; 5024 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5025 *c = if a <= b { u32::MAX } else { 0 }; 5026 } 5027 self.state[operands.dst].set_u32x4(c); 5028 ControlFlow::Continue(()) 5029 } 5030 5031 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5032 fn vult32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5033 let a = self.state[operands.src1].get_u32x4(); 5034 let b = self.state[operands.src2].get_u32x4(); 5035 let mut c = [0; 4]; 5036 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5037 *c = if a < b { u32::MAX } else { 0 }; 5038 } 5039 self.state[operands.dst].set_u32x4(c); 5040 ControlFlow::Continue(()) 5041 } 5042 5043 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5044 fn vulteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5045 let a = self.state[operands.src1].get_u32x4(); 5046 let b = self.state[operands.src2].get_u32x4(); 5047 let mut c = [0; 4]; 5048 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5049 *c = if a <= b { u32::MAX } else { 0 }; 5050 } 5051 self.state[operands.dst].set_u32x4(c); 5052 ControlFlow::Continue(()) 5053 } 5054 5055 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5056 fn veq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5057 let a = self.state[operands.src1].get_u64x2(); 5058 let b = self.state[operands.src2].get_u64x2(); 5059 let mut c = [0; 2]; 5060 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5061 *c = if a == b { u64::MAX } else { 0 }; 5062 } 5063 self.state[operands.dst].set_u64x2(c); 5064 ControlFlow::Continue(()) 5065 } 5066 5067 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5068 fn vneq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5069 let a = self.state[operands.src1].get_u64x2(); 5070 let b = self.state[operands.src2].get_u64x2(); 5071 let mut c = [0; 2]; 5072 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5073 *c = if a != b { u64::MAX } else { 0 }; 5074 } 5075 self.state[operands.dst].set_u64x2(c); 5076 ControlFlow::Continue(()) 5077 } 5078 5079 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5080 fn vslt64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5081 let a = self.state[operands.src1].get_i64x2(); 5082 let b = self.state[operands.src2].get_i64x2(); 5083 let mut c = [0; 2]; 5084 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5085 *c = if a < b { u64::MAX } else { 0 }; 5086 } 5087 self.state[operands.dst].set_u64x2(c); 5088 ControlFlow::Continue(()) 5089 } 5090 5091 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5092 fn vslteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5093 let a = self.state[operands.src1].get_i64x2(); 5094 let b = self.state[operands.src2].get_i64x2(); 5095 let mut c = [0; 2]; 5096 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5097 *c = if a <= b { u64::MAX } else { 0 }; 5098 } 5099 self.state[operands.dst].set_u64x2(c); 5100 ControlFlow::Continue(()) 5101 } 5102 5103 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5104 fn vult64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5105 let a = self.state[operands.src1].get_u64x2(); 5106 let b = self.state[operands.src2].get_u64x2(); 5107 let mut c = [0; 2]; 5108 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5109 *c = if a < b { u64::MAX } else { 0 }; 5110 } 5111 self.state[operands.dst].set_u64x2(c); 5112 ControlFlow::Continue(()) 5113 } 5114 5115 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5116 fn vulteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5117 let a = self.state[operands.src1].get_u64x2(); 5118 let b = self.state[operands.src2].get_u64x2(); 5119 let mut c = [0; 2]; 5120 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5121 *c = if a <= b { u64::MAX } else { 0 }; 5122 } 5123 self.state[operands.dst].set_u64x2(c); 5124 ControlFlow::Continue(()) 5125 } 5126 5127 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5128 fn vneg8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 5129 let a = self.state[src].get_i8x16(); 5130 self.state[dst].set_i8x16(a.map(|i| i.wrapping_neg())); 5131 ControlFlow::Continue(()) 5132 } 5133 5134 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5135 fn vneg16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 5136 let a = self.state[src].get_i16x8(); 5137 self.state[dst].set_i16x8(a.map(|i| i.wrapping_neg())); 5138 ControlFlow::Continue(()) 5139 } 5140 5141 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5142 fn vneg32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 5143 let a = self.state[src].get_i32x4(); 5144 self.state[dst].set_i32x4(a.map(|i| i.wrapping_neg())); 5145 ControlFlow::Continue(()) 5146 } 5147 5148 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5149 fn vneg64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 5150 let a = self.state[src].get_i64x2(); 5151 self.state[dst].set_i64x2(a.map(|i| i.wrapping_neg())); 5152 ControlFlow::Continue(()) 5153 } 5154 5155 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5156 fn vnegf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 5157 let a = self.state[src].get_f64x2(); 5158 self.state[dst].set_f64x2(a.map(|i| -i)); 5159 ControlFlow::Continue(()) 5160 } 5161 5162 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5163 fn vmin8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5164 let mut a = self.state[operands.src1].get_i8x16(); 5165 let b = self.state[operands.src2].get_i8x16(); 5166 for (a, b) in a.iter_mut().zip(&b) { 5167 *a = (*a).min(*b); 5168 } 5169 self.state[operands.dst].set_i8x16(a); 5170 ControlFlow::Continue(()) 5171 } 5172 5173 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5174 fn vmin8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5175 let mut a = self.state[operands.src1].get_u8x16(); 5176 let b = self.state[operands.src2].get_u8x16(); 5177 for (a, b) in a.iter_mut().zip(&b) { 5178 *a = (*a).min(*b); 5179 } 5180 self.state[operands.dst].set_u8x16(a); 5181 ControlFlow::Continue(()) 5182 } 5183 5184 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5185 fn vmin16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5186 let mut a = self.state[operands.src1].get_i16x8(); 5187 let b = self.state[operands.src2].get_i16x8(); 5188 for (a, b) in a.iter_mut().zip(&b) { 5189 *a = (*a).min(*b); 5190 } 5191 self.state[operands.dst].set_i16x8(a); 5192 ControlFlow::Continue(()) 5193 } 5194 5195 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5196 fn vmin16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5197 let mut a = self.state[operands.src1].get_u16x8(); 5198 let b = self.state[operands.src2].get_u16x8(); 5199 for (a, b) in a.iter_mut().zip(&b) { 5200 *a = (*a).min(*b); 5201 } 5202 self.state[operands.dst].set_u16x8(a); 5203 ControlFlow::Continue(()) 5204 } 5205 5206 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5207 fn vmin32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5208 let mut a = self.state[operands.src1].get_i32x4(); 5209 let b = self.state[operands.src2].get_i32x4(); 5210 for (a, b) in a.iter_mut().zip(&b) { 5211 *a = (*a).min(*b); 5212 } 5213 self.state[operands.dst].set_i32x4(a); 5214 ControlFlow::Continue(()) 5215 } 5216 5217 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5218 fn vmin32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5219 let mut a = self.state[operands.src1].get_u32x4(); 5220 let b = self.state[operands.src2].get_u32x4(); 5221 for (a, b) in a.iter_mut().zip(&b) { 5222 *a = (*a).min(*b); 5223 } 5224 self.state[operands.dst].set_u32x4(a); 5225 ControlFlow::Continue(()) 5226 } 5227 5228 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5229 fn vmax8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5230 let mut a = self.state[operands.src1].get_i8x16(); 5231 let b = self.state[operands.src2].get_i8x16(); 5232 for (a, b) in a.iter_mut().zip(&b) { 5233 *a = (*a).max(*b); 5234 } 5235 self.state[operands.dst].set_i8x16(a); 5236 ControlFlow::Continue(()) 5237 } 5238 5239 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5240 fn vmax8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5241 let mut a = self.state[operands.src1].get_u8x16(); 5242 let b = self.state[operands.src2].get_u8x16(); 5243 for (a, b) in a.iter_mut().zip(&b) { 5244 *a = (*a).max(*b); 5245 } 5246 self.state[operands.dst].set_u8x16(a); 5247 ControlFlow::Continue(()) 5248 } 5249 5250 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5251 fn vmax16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5252 let mut a = self.state[operands.src1].get_i16x8(); 5253 let b = self.state[operands.src2].get_i16x8(); 5254 for (a, b) in a.iter_mut().zip(&b) { 5255 *a = (*a).max(*b); 5256 } 5257 self.state[operands.dst].set_i16x8(a); 5258 ControlFlow::Continue(()) 5259 } 5260 5261 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5262 fn vmax16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5263 let mut a = self.state[operands.src1].get_u16x8(); 5264 let b = self.state[operands.src2].get_u16x8(); 5265 for (a, b) in a.iter_mut().zip(&b) { 5266 *a = (*a).max(*b); 5267 } 5268 self.state[operands.dst].set_u16x8(a); 5269 ControlFlow::Continue(()) 5270 } 5271 5272 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5273 fn vmax32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5274 let mut a = self.state[operands.src1].get_i32x4(); 5275 let b = self.state[operands.src2].get_i32x4(); 5276 for (a, b) in a.iter_mut().zip(&b) { 5277 *a = (*a).max(*b); 5278 } 5279 self.state[operands.dst].set_i32x4(a); 5280 ControlFlow::Continue(()) 5281 } 5282 5283 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5284 fn vmax32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5285 let mut a = self.state[operands.src1].get_u32x4(); 5286 let b = self.state[operands.src2].get_u32x4(); 5287 for (a, b) in a.iter_mut().zip(&b) { 5288 *a = (*a).max(*b); 5289 } 5290 self.state[operands.dst].set_u32x4(a); 5291 ControlFlow::Continue(()) 5292 } 5293 5294 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5295 fn vabs8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 5296 let a = self.state[src].get_i8x16(); 5297 self.state[dst].set_i8x16(a.map(|i| i.wrapping_abs())); 5298 ControlFlow::Continue(()) 5299 } 5300 5301 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5302 fn vabs16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 5303 let a = self.state[src].get_i16x8(); 5304 self.state[dst].set_i16x8(a.map(|i| i.wrapping_abs())); 5305 ControlFlow::Continue(()) 5306 } 5307 5308 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5309 fn vabs32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 5310 let a = self.state[src].get_i32x4(); 5311 self.state[dst].set_i32x4(a.map(|i| i.wrapping_abs())); 5312 ControlFlow::Continue(()) 5313 } 5314 5315 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5316 fn vabs64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 5317 let a = self.state[src].get_i64x2(); 5318 self.state[dst].set_i64x2(a.map(|i| i.wrapping_abs())); 5319 ControlFlow::Continue(()) 5320 } 5321 5322 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5323 fn vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 5324 let a = self.state[src].get_f32x4(); 5325 self.state[dst].set_f32x4(a.map(|i| i.wasm_abs())); 5326 ControlFlow::Continue(()) 5327 } 5328 5329 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5330 fn vabsf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> { 5331 let a = self.state[src].get_f64x2(); 5332 self.state[dst].set_f64x2(a.map(|i| i.wasm_abs())); 5333 ControlFlow::Continue(()) 5334 } 5335 5336 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5337 fn vmaximumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5338 let mut a = self.state[operands.src1].get_f32x4(); 5339 let b = self.state[operands.src2].get_f32x4(); 5340 for (a, b) in a.iter_mut().zip(&b) { 5341 *a = a.wasm_maximum(*b); 5342 } 5343 self.state[operands.dst].set_f32x4(a); 5344 ControlFlow::Continue(()) 5345 } 5346 5347 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5348 fn vmaximumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5349 let mut a = self.state[operands.src1].get_f64x2(); 5350 let b = self.state[operands.src2].get_f64x2(); 5351 for (a, b) in a.iter_mut().zip(&b) { 5352 *a = a.wasm_maximum(*b); 5353 } 5354 self.state[operands.dst].set_f64x2(a); 5355 ControlFlow::Continue(()) 5356 } 5357 5358 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5359 fn vminimumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5360 let mut a = self.state[operands.src1].get_f32x4(); 5361 let b = self.state[operands.src2].get_f32x4(); 5362 for (a, b) in a.iter_mut().zip(&b) { 5363 *a = a.wasm_minimum(*b); 5364 } 5365 self.state[operands.dst].set_f32x4(a); 5366 ControlFlow::Continue(()) 5367 } 5368 5369 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5370 fn vminimumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5371 let mut a = self.state[operands.src1].get_f64x2(); 5372 let b = self.state[operands.src2].get_f64x2(); 5373 for (a, b) in a.iter_mut().zip(&b) { 5374 *a = a.wasm_minimum(*b); 5375 } 5376 self.state[operands.dst].set_f64x2(a); 5377 ControlFlow::Continue(()) 5378 } 5379 5380 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5381 fn vshuffle(&mut self, dst: VReg, src1: VReg, src2: VReg, mask: u128) -> ControlFlow<Done> { 5382 let a = self.state[src1].get_u8x16(); 5383 let b = self.state[src2].get_u8x16(); 5384 let result = mask.to_le_bytes().map(|m| { 5385 if m < 16 { 5386 a[m as usize] 5387 } else { 5388 b[m as usize - 16] 5389 } 5390 }); 5391 self.state[dst].set_u8x16(result); 5392 ControlFlow::Continue(()) 5393 } 5394 5395 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5396 fn vswizzlei8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5397 let src1 = self.state[operands.src1].get_i8x16(); 5398 let src2 = self.state[operands.src2].get_i8x16(); 5399 let mut dst = [0i8; 16]; 5400 for (i, &idx) in src2.iter().enumerate() { 5401 if (idx as usize) < 16 { 5402 dst[i] = src1[idx as usize]; 5403 } else { 5404 dst[i] = 0 5405 } 5406 } 5407 self.state[operands.dst].set_i8x16(dst); 5408 ControlFlow::Continue(()) 5409 } 5410 5411 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5412 fn vavground8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5413 let mut a = self.state[operands.src1].get_u8x16(); 5414 let b = self.state[operands.src2].get_u8x16(); 5415 for (a, b) in a.iter_mut().zip(&b) { 5416 // use wider precision to avoid overflow 5417 *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u8; 5418 } 5419 self.state[operands.dst].set_u8x16(a); 5420 ControlFlow::Continue(()) 5421 } 5422 5423 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5424 fn vavground16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5425 let mut a = self.state[operands.src1].get_u16x8(); 5426 let b = self.state[operands.src2].get_u16x8(); 5427 for (a, b) in a.iter_mut().zip(&b) { 5428 // use wider precision to avoid overflow 5429 *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u16; 5430 } 5431 self.state[operands.dst].set_u16x8(a); 5432 ControlFlow::Continue(()) 5433 } 5434 5435 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5436 fn veqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5437 let a = self.state[operands.src1].get_f32x4(); 5438 let b = self.state[operands.src2].get_f32x4(); 5439 let mut c = [0; 4]; 5440 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5441 *c = if a == b { u32::MAX } else { 0 }; 5442 } 5443 self.state[operands.dst].set_u32x4(c); 5444 ControlFlow::Continue(()) 5445 } 5446 5447 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5448 fn vneqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5449 let a = self.state[operands.src1].get_f32x4(); 5450 let b = self.state[operands.src2].get_f32x4(); 5451 let mut c = [0; 4]; 5452 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5453 *c = if a != b { u32::MAX } else { 0 }; 5454 } 5455 self.state[operands.dst].set_u32x4(c); 5456 ControlFlow::Continue(()) 5457 } 5458 5459 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5460 fn vltf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5461 let a = self.state[operands.src1].get_f32x4(); 5462 let b = self.state[operands.src2].get_f32x4(); 5463 let mut c = [0; 4]; 5464 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5465 *c = if a < b { u32::MAX } else { 0 }; 5466 } 5467 self.state[operands.dst].set_u32x4(c); 5468 ControlFlow::Continue(()) 5469 } 5470 5471 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5472 fn vlteqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5473 let a = self.state[operands.src1].get_f32x4(); 5474 let b = self.state[operands.src2].get_f32x4(); 5475 let mut c = [0; 4]; 5476 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5477 *c = if a <= b { u32::MAX } else { 0 }; 5478 } 5479 self.state[operands.dst].set_u32x4(c); 5480 ControlFlow::Continue(()) 5481 } 5482 5483 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5484 fn veqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5485 let a = self.state[operands.src1].get_f64x2(); 5486 let b = self.state[operands.src2].get_f64x2(); 5487 let mut c = [0; 2]; 5488 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5489 *c = if a == b { u64::MAX } else { 0 }; 5490 } 5491 self.state[operands.dst].set_u64x2(c); 5492 ControlFlow::Continue(()) 5493 } 5494 5495 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5496 fn vneqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5497 let a = self.state[operands.src1].get_f64x2(); 5498 let b = self.state[operands.src2].get_f64x2(); 5499 let mut c = [0; 2]; 5500 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5501 *c = if a != b { u64::MAX } else { 0 }; 5502 } 5503 self.state[operands.dst].set_u64x2(c); 5504 ControlFlow::Continue(()) 5505 } 5506 5507 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5508 fn vltf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5509 let a = self.state[operands.src1].get_f64x2(); 5510 let b = self.state[operands.src2].get_f64x2(); 5511 let mut c = [0; 2]; 5512 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5513 *c = if a < b { u64::MAX } else { 0 }; 5514 } 5515 self.state[operands.dst].set_u64x2(c); 5516 ControlFlow::Continue(()) 5517 } 5518 5519 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5520 fn vlteqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> { 5521 let a = self.state[operands.src1].get_f64x2(); 5522 let b = self.state[operands.src2].get_f64x2(); 5523 let mut c = [0; 2]; 5524 for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { 5525 *c = if a <= b { u64::MAX } else { 0 }; 5526 } 5527 self.state[operands.dst].set_u64x2(c); 5528 ControlFlow::Continue(()) 5529 } 5530 5531 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5532 fn vfma32x4(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> { 5533 let mut a = self.state[a].get_f32x4(); 5534 let b = self.state[b].get_f32x4(); 5535 let c = self.state[c].get_f32x4(); 5536 for ((a, b), c) in a.iter_mut().zip(b).zip(c) { 5537 *a = a.wasm_mul_add(b, c); 5538 } 5539 self.state[dst].set_f32x4(a); 5540 ControlFlow::Continue(()) 5541 } 5542 5543 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5544 fn vfma64x2(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> { 5545 let mut a = self.state[a].get_f64x2(); 5546 let b = self.state[b].get_f64x2(); 5547 let c = self.state[c].get_f64x2(); 5548 for ((a, b), c) in a.iter_mut().zip(b).zip(c) { 5549 *a = a.wasm_mul_add(b, c); 5550 } 5551 self.state[dst].set_f64x2(a); 5552 ControlFlow::Continue(()) 5553 } 5554 5555 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5556 fn vselect( 5557 &mut self, 5558 dst: VReg, 5559 cond: XReg, 5560 if_nonzero: VReg, 5561 if_zero: VReg, 5562 ) -> ControlFlow<Done> { 5563 let result = if self.state[cond].get_u32() != 0 { 5564 self.state[if_nonzero] 5565 } else { 5566 self.state[if_zero] 5567 }; 5568 self.state[dst] = result; 5569 ControlFlow::Continue(()) 5570 } 5571 5572 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5573 fn xadd128( 5574 &mut self, 5575 dst_lo: XReg, 5576 dst_hi: XReg, 5577 lhs_lo: XReg, 5578 lhs_hi: XReg, 5579 rhs_lo: XReg, 5580 rhs_hi: XReg, 5581 ) -> ControlFlow<Done> { 5582 let lhs = self.get_i128(lhs_lo, lhs_hi); 5583 let rhs = self.get_i128(rhs_lo, rhs_hi); 5584 let result = lhs.wrapping_add(rhs); 5585 self.set_i128(dst_lo, dst_hi, result); 5586 ControlFlow::Continue(()) 5587 } 5588 5589 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5590 fn xsub128( 5591 &mut self, 5592 dst_lo: XReg, 5593 dst_hi: XReg, 5594 lhs_lo: XReg, 5595 lhs_hi: XReg, 5596 rhs_lo: XReg, 5597 rhs_hi: XReg, 5598 ) -> ControlFlow<Done> { 5599 let lhs = self.get_i128(lhs_lo, lhs_hi); 5600 let rhs = self.get_i128(rhs_lo, rhs_hi); 5601 let result = lhs.wrapping_sub(rhs); 5602 self.set_i128(dst_lo, dst_hi, result); 5603 ControlFlow::Continue(()) 5604 } 5605 5606 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5607 fn xwidemul64_s( 5608 &mut self, 5609 dst_lo: XReg, 5610 dst_hi: XReg, 5611 lhs: XReg, 5612 rhs: XReg, 5613 ) -> ControlFlow<Done> { 5614 let lhs = self.state[lhs].get_i64(); 5615 let rhs = self.state[rhs].get_i64(); 5616 let result = i128::from(lhs).wrapping_mul(i128::from(rhs)); 5617 self.set_i128(dst_lo, dst_hi, result); 5618 ControlFlow::Continue(()) 5619 } 5620 5621 #[interp_disable_if_cfg(pulley_disable_interp_simd)] 5622 fn xwidemul64_u( 5623 &mut self, 5624 dst_lo: XReg, 5625 dst_hi: XReg, 5626 lhs: XReg, 5627 rhs: XReg, 5628 ) -> ControlFlow<Done> { 5629 let lhs = self.state[lhs].get_u64(); 5630 let rhs = self.state[rhs].get_u64(); 5631 let result = u128::from(lhs).wrapping_mul(u128::from(rhs)); 5632 self.set_i128(dst_lo, dst_hi, result as i128); 5633 ControlFlow::Continue(()) 5634 } 5635 } 5636