xref: /wasmtime-44.0.1/pulley/src/interp.rs (revision 2283e84f)
1 //! Interpretation of pulley bytecode.
2 
3 use crate::decode::*;
4 use crate::encode::Encode;
5 use crate::imms::*;
6 use crate::profile::{ExecutingPc, ExecutingPcRef};
7 use crate::regs::*;
8 use alloc::string::ToString;
9 use core::fmt;
10 use core::mem;
11 use core::ops::ControlFlow;
12 use core::ops::{Index, IndexMut};
13 use core::ptr::NonNull;
14 use pulley_macros::interp_disable_if_cfg;
15 use wasmtime_core::alloc::TryVec;
16 use wasmtime_core::error::OutOfMemory;
17 use wasmtime_core::math::{WasmFloat, f32_cvt_to_int_bounds, f64_cvt_to_int_bounds};
18 
19 mod debug;
20 #[cfg(all(not(pulley_tail_calls), not(pulley_assume_llvm_makes_tail_calls)))]
21 mod match_loop;
22 #[cfg(any(pulley_tail_calls, pulley_assume_llvm_makes_tail_calls))]
23 mod tail_loop;
24 
25 const DEFAULT_STACK_SIZE: usize = 1 << 20; // 1 MiB
26 
27 /// A virtual machine for interpreting Pulley bytecode.
28 pub struct Vm {
29     state: MachineState,
30     executing_pc: ExecutingPc,
31 }
32 
33 impl Vm {
34     /// Create a new virtual machine with the default stack size.
new() -> Result<Self, OutOfMemory>35     pub fn new() -> Result<Self, OutOfMemory> {
36         Self::with_stack(DEFAULT_STACK_SIZE)
37     }
38 
39     /// Create a new virtual machine with the given stack.
with_stack(stack_size: usize) -> Result<Self, OutOfMemory>40     pub fn with_stack(stack_size: usize) -> Result<Self, OutOfMemory> {
41         Ok(Self {
42             state: MachineState::with_stack(stack_size)?,
43             executing_pc: ExecutingPc::default(),
44         })
45     }
46 
47     /// Get a shared reference to this VM's machine state.
state(&self) -> &MachineState48     pub fn state(&self) -> &MachineState {
49         &self.state
50     }
51 
52     /// Get an exclusive reference to this VM's machine state.
state_mut(&mut self) -> &mut MachineState53     pub fn state_mut(&mut self) -> &mut MachineState {
54         &mut self.state
55     }
56 
57     /// Call a bytecode function.
58     ///
59     /// The given `func` must point to the beginning of a valid Pulley bytecode
60     /// function.
61     ///
62     /// The given `args` must match the number and type of arguments that
63     /// function expects.
64     ///
65     /// The given `rets` must match the function's actual return types.
66     ///
67     /// Returns either the resulting values, or the PC at which a trap was
68     /// raised.
call<'a, T>( &'a mut self, func: NonNull<u8>, args: &[Val], rets: T, ) -> DoneReason<impl Iterator<Item = Val> + use<'a, T>> where T: IntoIterator<Item = RegType> + 'a,69     pub unsafe fn call<'a, T>(
70         &'a mut self,
71         func: NonNull<u8>,
72         args: &[Val],
73         rets: T,
74     ) -> DoneReason<impl Iterator<Item = Val> + use<'a, T>>
75     where
76         T: IntoIterator<Item = RegType> + 'a,
77     {
78         unsafe {
79             let lr = self.call_start(args);
80 
81             match self.call_run(func) {
82                 DoneReason::ReturnToHost(()) => DoneReason::ReturnToHost(self.call_end(lr, rets)),
83                 DoneReason::Trap { pc, kind } => DoneReason::Trap { pc, kind },
84                 DoneReason::CallIndirectHost { id, resume } => {
85                     DoneReason::CallIndirectHost { id, resume }
86                 }
87             }
88         }
89     }
90 
91     /// Performs the initial part of [`Vm::call`] in setting up the `args`
92     /// provided in registers according to Pulley's ABI.
93     ///
94     /// # Return
95     ///
96     /// Returns the old `lr` register value. The current `lr` value is replaced
97     /// with a sentinel that triggers a return to the host when returned-to.
98     ///
99     /// # Unsafety
100     ///
101     /// All the same unsafety as `call` and additionally, you must
102     /// invoke `call_run` and then `call_end` after calling `call_start`.
103     /// If you don't want to wrangle these invocations, use `call` instead
104     /// of `call_{start,run,end}`.
call_start<'a>(&'a mut self, args: &[Val]) -> *mut u8105     pub unsafe fn call_start<'a>(&'a mut self, args: &[Val]) -> *mut u8 {
106         // NB: make sure this method stays in sync with
107         // `PulleyMachineDeps::compute_arg_locs`!
108 
109         let mut x_args = (0..15).map(|x| unsafe { XReg::new_unchecked(x) });
110         let mut f_args = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
111         #[cfg(not(pulley_disable_interp_simd))]
112         let mut v_args = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
113 
114         for arg in args {
115             match arg {
116                 Val::XReg(val) => match x_args.next() {
117                     Some(reg) => self.state[reg] = *val,
118                     None => todo!("stack slots"),
119                 },
120                 Val::FReg(val) => match f_args.next() {
121                     Some(reg) => self.state[reg] = *val,
122                     None => todo!("stack slots"),
123                 },
124                 #[cfg(not(pulley_disable_interp_simd))]
125                 Val::VReg(val) => match v_args.next() {
126                     Some(reg) => self.state[reg] = *val,
127                     None => todo!("stack slots"),
128                 },
129             }
130         }
131 
132         mem::replace(&mut self.state.lr, HOST_RETURN_ADDR)
133     }
134 
135     /// Performs the internal part of [`Vm::call`] where bytecode is actually
136     /// executed.
137     ///
138     /// # Unsafety
139     ///
140     /// In addition to all the invariants documented for `call`, you
141     /// may only invoke `call_run` after invoking `call_start` to
142     /// initialize this call's arguments.
call_run(&mut self, pc: NonNull<u8>) -> DoneReason<()>143     pub unsafe fn call_run(&mut self, pc: NonNull<u8>) -> DoneReason<()> {
144         self.state.debug_assert_done_reason_none();
145         let interpreter = Interpreter {
146             state: &mut self.state,
147             pc: unsafe { UnsafeBytecodeStream::new(pc) },
148             executing_pc: self.executing_pc.as_ref(),
149         };
150         let done = interpreter.run();
151         self.state.done_decode(done)
152     }
153 
154     /// Performs the tail end of [`Vm::call`] by returning the values as
155     /// determined by `rets` according to Pulley's ABI.
156     ///
157     /// The `old_ret` value should have been provided from `call_start`
158     /// previously.
159     ///
160     /// # Unsafety
161     ///
162     /// In addition to the invariants documented for `call`, this may
163     /// only be called after `call_run`.
call_end<'a>( &'a mut self, old_ret: *mut u8, rets: impl IntoIterator<Item = RegType> + 'a, ) -> impl Iterator<Item = Val> + 'a164     pub unsafe fn call_end<'a>(
165         &'a mut self,
166         old_ret: *mut u8,
167         rets: impl IntoIterator<Item = RegType> + 'a,
168     ) -> impl Iterator<Item = Val> + 'a {
169         self.state.lr = old_ret;
170         // NB: make sure this method stays in sync with
171         // `PulleyMachineDeps::compute_arg_locs`!
172 
173         let mut x_rets = (0..15).map(|x| unsafe { XReg::new_unchecked(x) });
174         let mut f_rets = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
175         #[cfg(not(pulley_disable_interp_simd))]
176         let mut v_rets = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
177 
178         rets.into_iter().map(move |ty| match ty {
179             RegType::XReg => match x_rets.next() {
180                 Some(reg) => Val::XReg(self.state[reg]),
181                 None => todo!("stack slots"),
182             },
183             RegType::FReg => match f_rets.next() {
184                 Some(reg) => Val::FReg(self.state[reg]),
185                 None => todo!("stack slots"),
186             },
187             #[cfg(not(pulley_disable_interp_simd))]
188             RegType::VReg => match v_rets.next() {
189                 Some(reg) => Val::VReg(self.state[reg]),
190                 None => todo!("stack slots"),
191             },
192             #[cfg(pulley_disable_interp_simd)]
193             RegType::VReg => panic!("simd support disabled at compile time"),
194         })
195     }
196 
197     /// Returns the current `fp` register value.
fp(&self) -> *mut u8198     pub fn fp(&self) -> *mut u8 {
199         self.state.fp
200     }
201 
202     /// Returns the current `lr` register value.
lr(&self) -> *mut u8203     pub fn lr(&self) -> *mut u8 {
204         self.state.lr
205     }
206 
207     /// Sets the current `fp` register value.
set_fp(&mut self, fp: *mut u8)208     pub unsafe fn set_fp(&mut self, fp: *mut u8) {
209         self.state.fp = fp;
210     }
211 
212     /// Sets the current `lr` register value.
set_lr(&mut self, lr: *mut u8)213     pub unsafe fn set_lr(&mut self, lr: *mut u8) {
214         self.state.lr = lr;
215     }
216 
217     /// Gets a handle to the currently executing program counter for this
218     /// interpreter which can be read from other threads.
219     //
220     // Note that despite this field still existing with `not(feature =
221     // "profile")` it's hidden from the public API in that scenario as it has no
222     // methods anyway.
223     #[cfg(feature = "profile")]
executing_pc(&self) -> &ExecutingPc224     pub fn executing_pc(&self) -> &ExecutingPc {
225         &self.executing_pc
226     }
227 }
228 
229 impl Drop for Vm {
drop(&mut self)230     fn drop(&mut self) {
231         self.executing_pc.set_done();
232     }
233 }
234 
235 /// The type of a register in the Pulley machine state.
236 #[derive(Clone, Copy, Debug)]
237 pub enum RegType {
238     /// An `x` register: integers.
239     XReg,
240 
241     /// An `f` register: floats.
242     FReg,
243 
244     /// A `v` register: vectors.
245     VReg,
246 }
247 
248 /// A value that can be stored in a register.
249 #[derive(Clone, Copy, Debug)]
250 pub enum Val {
251     /// An `x` register value: integers.
252     XReg(XRegVal),
253 
254     /// An `f` register value: floats.
255     FReg(FRegVal),
256 
257     /// A `v` register value: vectors.
258     #[cfg(not(pulley_disable_interp_simd))]
259     VReg(VRegVal),
260 }
261 
262 impl fmt::LowerHex for Val {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result263     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
264         match self {
265             Val::XReg(v) => fmt::LowerHex::fmt(v, f),
266             Val::FReg(v) => fmt::LowerHex::fmt(v, f),
267             #[cfg(not(pulley_disable_interp_simd))]
268             Val::VReg(v) => fmt::LowerHex::fmt(v, f),
269         }
270     }
271 }
272 
273 impl From<XRegVal> for Val {
from(value: XRegVal) -> Self274     fn from(value: XRegVal) -> Self {
275         Val::XReg(value)
276     }
277 }
278 
279 impl From<u64> for Val {
from(value: u64) -> Self280     fn from(value: u64) -> Self {
281         XRegVal::new_u64(value).into()
282     }
283 }
284 
285 impl From<u32> for Val {
from(value: u32) -> Self286     fn from(value: u32) -> Self {
287         XRegVal::new_u32(value).into()
288     }
289 }
290 
291 impl From<i64> for Val {
from(value: i64) -> Self292     fn from(value: i64) -> Self {
293         XRegVal::new_i64(value).into()
294     }
295 }
296 
297 impl From<i32> for Val {
from(value: i32) -> Self298     fn from(value: i32) -> Self {
299         XRegVal::new_i32(value).into()
300     }
301 }
302 
303 impl<T> From<*mut T> for Val {
from(value: *mut T) -> Self304     fn from(value: *mut T) -> Self {
305         XRegVal::new_ptr(value).into()
306     }
307 }
308 
309 impl From<FRegVal> for Val {
from(value: FRegVal) -> Self310     fn from(value: FRegVal) -> Self {
311         Val::FReg(value)
312     }
313 }
314 
315 impl From<f64> for Val {
from(value: f64) -> Self316     fn from(value: f64) -> Self {
317         FRegVal::new_f64(value).into()
318     }
319 }
320 
321 impl From<f32> for Val {
from(value: f32) -> Self322     fn from(value: f32) -> Self {
323         FRegVal::new_f32(value).into()
324     }
325 }
326 
327 #[cfg(not(pulley_disable_interp_simd))]
328 impl From<VRegVal> for Val {
from(value: VRegVal) -> Self329     fn from(value: VRegVal) -> Self {
330         Val::VReg(value)
331     }
332 }
333 
334 /// An `x` register value: integers.
335 #[derive(Copy, Clone)]
336 pub struct XRegVal(XRegUnion);
337 
338 impl PartialEq for XRegVal {
eq(&self, other: &Self) -> bool339     fn eq(&self, other: &Self) -> bool {
340         self.get_u64() == other.get_u64()
341     }
342 }
343 
344 impl Eq for XRegVal {}
345 
346 impl fmt::Debug for XRegVal {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result347     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
348         f.debug_struct("XRegVal")
349             .field("as_u64", &self.get_u64())
350             .finish()
351     }
352 }
353 
354 impl fmt::LowerHex for XRegVal {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result355     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
356         fmt::LowerHex::fmt(&self.get_u64(), f)
357     }
358 }
359 
360 /// Contents of an "x" register, or a general-purpose register.
361 ///
362 /// This is represented as a Rust `union` to make it easier to access typed
363 /// views of this, notably the `ptr` field which enables preserving a bit of
364 /// provenance for Rust for values stored as a pointer and read as a pointer.
365 ///
366 /// Note that the actual in-memory representation of this value is handled
367 /// carefully at this time. Pulley bytecode exposes the ability to store a
368 /// 32-bit result into a register and then read the 64-bit contents of the
369 /// register. This leaves us with the question of what to do with the upper bits
370 /// of the register when the 32-bit result is generated. Possibilities for
371 /// handling this are:
372 ///
373 /// 1. Do nothing, just store the 32-bit value. The problem with this approach
374 ///    means that the "upper bits" are now endianness-dependent. That means that
375 ///    the state of the register is now platform-dependent.
376 /// 2. Sign or zero-extend. This restores platform-independent behavior but
377 ///    requires an extra store on 32-bit platforms because they can probably
378 ///    only store 32-bits at a time.
379 /// 3. Always store the values in this union as little-endian. This means that
380 ///    big-endian platforms have to do a byte-swap but otherwise it has
381 ///    platform-independent behavior.
382 ///
383 /// This union chooses route (3) at this time where the values here are always
384 /// stored in little-endian form (even the `ptr` field). That guarantees
385 /// cross-platform behavior while also minimizing the amount of data stored on
386 /// writes.
387 ///
388 /// In the future we may wish to benchmark this and possibly change this.
389 /// Technically Cranelift-generated bytecode should never rely on the upper bits
390 /// of a register if it didn't previously write them so this in theory doesn't
391 /// actually matter for Cranelift or wasm semantics. The only cost right now is
392 /// to big-endian platforms though and it's not certain how crucial performance
393 /// will be there.
394 ///
395 /// One final note is that this notably contrasts with native CPUs where
396 /// native ISAs like RISC-V specifically define the entire register on every
397 /// instruction, even if only the low half contains a significant result. Pulley
398 /// is unlikely to become out-of-order within the CPU itself as it's interpreted
399 /// meaning that severing data-dependencies with previous operations is
400 /// hypothesized to not be too important. If this is ever a problem though it
401 /// could increase the likelihood we go for route (2) above instead (or maybe
402 /// even (1)).
403 #[derive(Copy, Clone)]
404 union XRegUnion {
405     i32: i32,
406     u32: u32,
407     i64: i64,
408     u64: u64,
409 
410     // Note that this is intentionally `usize` and not an actual pointer like
411     // `*mut u8`. The reason for this is that provenance is required in Rust for
412     // pointers but Cranelift has no pointer type and thus no concept of
413     // provenance. That means that at-rest it's not known whether the value has
414     // provenance or not and basically means that Pulley is required to use
415     // "permissive provenance" in Rust as opposed to strict provenance.
416     //
417     // That's more-or-less a long-winded way of saying that storage of a pointer
418     // in this value is done with `.expose_provenance()` and reading a pointer
419     // uses `with_exposed_provenance_mut(..)`.
420     ptr: usize,
421 }
422 
423 impl Default for XRegVal {
default() -> Self424     fn default() -> Self {
425         Self(unsafe { mem::zeroed() })
426     }
427 }
428 
429 #[expect(missing_docs, reason = "self-describing methods")]
430 impl XRegVal {
new_i32(x: i32) -> Self431     pub fn new_i32(x: i32) -> Self {
432         let mut val = XRegVal::default();
433         val.set_i32(x);
434         val
435     }
436 
new_u32(x: u32) -> Self437     pub fn new_u32(x: u32) -> Self {
438         let mut val = XRegVal::default();
439         val.set_u32(x);
440         val
441     }
442 
new_i64(x: i64) -> Self443     pub fn new_i64(x: i64) -> Self {
444         let mut val = XRegVal::default();
445         val.set_i64(x);
446         val
447     }
448 
new_u64(x: u64) -> Self449     pub fn new_u64(x: u64) -> Self {
450         let mut val = XRegVal::default();
451         val.set_u64(x);
452         val
453     }
454 
new_ptr<T>(ptr: *mut T) -> Self455     pub fn new_ptr<T>(ptr: *mut T) -> Self {
456         let mut val = XRegVal::default();
457         val.set_ptr(ptr);
458         val
459     }
460 
get_i32(&self) -> i32461     pub fn get_i32(&self) -> i32 {
462         let x = unsafe { self.0.i32 };
463         i32::from_le(x)
464     }
465 
get_u32(&self) -> u32466     pub fn get_u32(&self) -> u32 {
467         let x = unsafe { self.0.u32 };
468         u32::from_le(x)
469     }
470 
get_i64(&self) -> i64471     pub fn get_i64(&self) -> i64 {
472         let x = unsafe { self.0.i64 };
473         i64::from_le(x)
474     }
475 
get_u64(&self) -> u64476     pub fn get_u64(&self) -> u64 {
477         let x = unsafe { self.0.u64 };
478         u64::from_le(x)
479     }
480 
get_ptr<T>(&self) -> *mut T481     pub fn get_ptr<T>(&self) -> *mut T {
482         let ptr = unsafe { self.0.ptr };
483         core::ptr::with_exposed_provenance_mut(usize::from_le(ptr))
484     }
485 
set_i32(&mut self, x: i32)486     pub fn set_i32(&mut self, x: i32) {
487         self.0.i32 = x.to_le();
488     }
489 
set_u32(&mut self, x: u32)490     pub fn set_u32(&mut self, x: u32) {
491         self.0.u32 = x.to_le();
492     }
493 
set_i64(&mut self, x: i64)494     pub fn set_i64(&mut self, x: i64) {
495         self.0.i64 = x.to_le();
496     }
497 
set_u64(&mut self, x: u64)498     pub fn set_u64(&mut self, x: u64) {
499         self.0.u64 = x.to_le();
500     }
501 
set_ptr<T>(&mut self, ptr: *mut T)502     pub fn set_ptr<T>(&mut self, ptr: *mut T) {
503         self.0.ptr = ptr.expose_provenance().to_le();
504     }
505 }
506 
507 /// An `f` register value: floats.
508 #[derive(Copy, Clone)]
509 pub struct FRegVal(FRegUnion);
510 
511 impl fmt::Debug for FRegVal {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result512     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
513         f.debug_struct("FRegVal")
514             .field("as_f32", &self.get_f32())
515             .field("as_f64", &self.get_f64())
516             .finish()
517     }
518 }
519 
520 impl fmt::LowerHex for FRegVal {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result521     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
522         fmt::LowerHex::fmt(&self.get_f64().to_bits(), f)
523     }
524 }
525 
526 // NB: like `XRegUnion` values here are always little-endian, see the
527 // documentation above for more details.
528 #[derive(Copy, Clone)]
529 union FRegUnion {
530     f32: u32,
531     f64: u64,
532 }
533 
534 impl Default for FRegVal {
default() -> Self535     fn default() -> Self {
536         Self(unsafe { mem::zeroed() })
537     }
538 }
539 
540 #[expect(missing_docs, reason = "self-describing methods")]
541 impl FRegVal {
new_f32(f: f32) -> Self542     pub fn new_f32(f: f32) -> Self {
543         let mut val = Self::default();
544         val.set_f32(f);
545         val
546     }
547 
new_f64(f: f64) -> Self548     pub fn new_f64(f: f64) -> Self {
549         let mut val = Self::default();
550         val.set_f64(f);
551         val
552     }
553 
get_f32(&self) -> f32554     pub fn get_f32(&self) -> f32 {
555         let val = unsafe { self.0.f32 };
556         f32::from_le_bytes(val.to_ne_bytes())
557     }
558 
get_f64(&self) -> f64559     pub fn get_f64(&self) -> f64 {
560         let val = unsafe { self.0.f64 };
561         f64::from_le_bytes(val.to_ne_bytes())
562     }
563 
set_f32(&mut self, val: f32)564     pub fn set_f32(&mut self, val: f32) {
565         self.0.f32 = u32::from_ne_bytes(val.to_le_bytes());
566     }
567 
set_f64(&mut self, val: f64)568     pub fn set_f64(&mut self, val: f64) {
569         self.0.f64 = u64::from_ne_bytes(val.to_le_bytes());
570     }
571 }
572 
573 /// A `v` register value: vectors.
574 #[derive(Copy, Clone)]
575 #[cfg(not(pulley_disable_interp_simd))]
576 pub struct VRegVal(VRegUnion);
577 
578 #[cfg(not(pulley_disable_interp_simd))]
579 impl fmt::Debug for VRegVal {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result580     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
581         f.debug_struct("VRegVal")
582             .field("as_u128", &unsafe { self.0.u128 })
583             .finish()
584     }
585 }
586 
587 #[cfg(not(pulley_disable_interp_simd))]
588 impl fmt::LowerHex for VRegVal {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result589     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
590         fmt::LowerHex::fmt(unsafe { &self.0.u128 }, f)
591     }
592 }
593 
594 /// 128-bit vector registers.
595 ///
596 /// This register is always stored in little-endian order and has different
597 /// constraints than `XRegVal` and `FRegVal` above. Notably all fields of this
598 /// union are the same width so all bits are always defined. Note that
599 /// little-endian is required though so bitcasts between different shapes of
600 /// vectors works. This union cannot be stored in big-endian.
601 #[derive(Copy, Clone)]
602 #[repr(align(16))]
603 #[cfg(not(pulley_disable_interp_simd))]
604 union VRegUnion {
605     u128: u128,
606     i8x16: [i8; 16],
607     i16x8: [i16; 8],
608     i32x4: [i32; 4],
609     i64x2: [i64; 2],
610     u8x16: [u8; 16],
611     u16x8: [u16; 8],
612     u32x4: [u32; 4],
613     u64x2: [u64; 2],
614     // Note that these are `u32` and `u64`, not f32/f64. That's only because
615     // f32/f64 don't have `.to_le()` and `::from_le()` so need to go through the
616     // bits anyway.
617     f32x4: [u32; 4],
618     f64x2: [u64; 2],
619 }
620 
621 #[cfg(not(pulley_disable_interp_simd))]
622 impl Default for VRegVal {
default() -> Self623     fn default() -> Self {
624         Self(unsafe { mem::zeroed() })
625     }
626 }
627 
628 #[expect(missing_docs, reason = "self-describing methods")]
629 #[cfg(not(pulley_disable_interp_simd))]
630 impl VRegVal {
new_u128(i: u128) -> Self631     pub fn new_u128(i: u128) -> Self {
632         let mut val = Self::default();
633         val.set_u128(i);
634         val
635     }
636 
get_u128(&self) -> u128637     pub fn get_u128(&self) -> u128 {
638         let val = unsafe { self.0.u128 };
639         u128::from_le(val)
640     }
641 
set_u128(&mut self, val: u128)642     pub fn set_u128(&mut self, val: u128) {
643         self.0.u128 = val.to_le();
644     }
645 
get_i8x16(&self) -> [i8; 16]646     fn get_i8x16(&self) -> [i8; 16] {
647         let val = unsafe { self.0.i8x16 };
648         val.map(|e| i8::from_le(e))
649     }
650 
set_i8x16(&mut self, val: [i8; 16])651     fn set_i8x16(&mut self, val: [i8; 16]) {
652         self.0.i8x16 = val.map(|e| e.to_le());
653     }
654 
get_u8x16(&self) -> [u8; 16]655     fn get_u8x16(&self) -> [u8; 16] {
656         let val = unsafe { self.0.u8x16 };
657         val.map(|e| u8::from_le(e))
658     }
659 
set_u8x16(&mut self, val: [u8; 16])660     fn set_u8x16(&mut self, val: [u8; 16]) {
661         self.0.u8x16 = val.map(|e| e.to_le());
662     }
663 
get_i16x8(&self) -> [i16; 8]664     fn get_i16x8(&self) -> [i16; 8] {
665         let val = unsafe { self.0.i16x8 };
666         val.map(|e| i16::from_le(e))
667     }
668 
set_i16x8(&mut self, val: [i16; 8])669     fn set_i16x8(&mut self, val: [i16; 8]) {
670         self.0.i16x8 = val.map(|e| e.to_le());
671     }
672 
get_u16x8(&self) -> [u16; 8]673     fn get_u16x8(&self) -> [u16; 8] {
674         let val = unsafe { self.0.u16x8 };
675         val.map(|e| u16::from_le(e))
676     }
677 
set_u16x8(&mut self, val: [u16; 8])678     fn set_u16x8(&mut self, val: [u16; 8]) {
679         self.0.u16x8 = val.map(|e| e.to_le());
680     }
681 
get_i32x4(&self) -> [i32; 4]682     fn get_i32x4(&self) -> [i32; 4] {
683         let val = unsafe { self.0.i32x4 };
684         val.map(|e| i32::from_le(e))
685     }
686 
set_i32x4(&mut self, val: [i32; 4])687     fn set_i32x4(&mut self, val: [i32; 4]) {
688         self.0.i32x4 = val.map(|e| e.to_le());
689     }
690 
get_u32x4(&self) -> [u32; 4]691     fn get_u32x4(&self) -> [u32; 4] {
692         let val = unsafe { self.0.u32x4 };
693         val.map(|e| u32::from_le(e))
694     }
695 
set_u32x4(&mut self, val: [u32; 4])696     fn set_u32x4(&mut self, val: [u32; 4]) {
697         self.0.u32x4 = val.map(|e| e.to_le());
698     }
699 
get_i64x2(&self) -> [i64; 2]700     fn get_i64x2(&self) -> [i64; 2] {
701         let val = unsafe { self.0.i64x2 };
702         val.map(|e| i64::from_le(e))
703     }
704 
set_i64x2(&mut self, val: [i64; 2])705     fn set_i64x2(&mut self, val: [i64; 2]) {
706         self.0.i64x2 = val.map(|e| e.to_le());
707     }
708 
get_u64x2(&self) -> [u64; 2]709     fn get_u64x2(&self) -> [u64; 2] {
710         let val = unsafe { self.0.u64x2 };
711         val.map(|e| u64::from_le(e))
712     }
713 
set_u64x2(&mut self, val: [u64; 2])714     fn set_u64x2(&mut self, val: [u64; 2]) {
715         self.0.u64x2 = val.map(|e| e.to_le());
716     }
717 
get_f64x2(&self) -> [f64; 2]718     fn get_f64x2(&self) -> [f64; 2] {
719         let val = unsafe { self.0.f64x2 };
720         val.map(|e| f64::from_bits(u64::from_le(e)))
721     }
722 
set_f64x2(&mut self, val: [f64; 2])723     fn set_f64x2(&mut self, val: [f64; 2]) {
724         self.0.f64x2 = val.map(|e| e.to_bits().to_le());
725     }
726 
get_f32x4(&self) -> [f32; 4]727     fn get_f32x4(&self) -> [f32; 4] {
728         let val = unsafe { self.0.f32x4 };
729         val.map(|e| f32::from_bits(u32::from_le(e)))
730     }
731 
set_f32x4(&mut self, val: [f32; 4])732     fn set_f32x4(&mut self, val: [f32; 4]) {
733         self.0.f32x4 = val.map(|e| e.to_bits().to_le());
734     }
735 }
736 
737 /// The machine state for a Pulley virtual machine: the various registers and
738 /// stack.
739 pub struct MachineState {
740     x_regs: [XRegVal; XReg::RANGE.end as usize],
741     f_regs: [FRegVal; FReg::RANGE.end as usize],
742     #[cfg(not(pulley_disable_interp_simd))]
743     v_regs: [VRegVal; VReg::RANGE.end as usize],
744     fp: *mut u8,
745     lr: *mut u8,
746     stack: Stack,
747     done_reason: Option<DoneReason<()>>,
748 }
749 
750 unsafe impl Send for MachineState {}
751 unsafe impl Sync for MachineState {}
752 
753 /// Helper structure to store the state of the Pulley stack.
754 ///
755 /// The Pulley stack notably needs to be a 16-byte aligned allocation on the
756 /// host to ensure that addresses handed out are indeed 16-byte aligned. This is
757 /// done with a custom `Vec<T>` internally where `T` has size and align of 16.
758 /// This is manually done with a helper `Align16` type below.
759 struct Stack {
760     storage: TryVec<Align16>,
761 }
762 
763 /// Helper type used with `Stack` above.
764 #[derive(Copy, Clone)]
765 #[repr(align(16))]
766 struct Align16 {
767     // Just here to give the structure a size of 16. The alignment is always 16
768     // regardless of what the host platform's alignment of u128 is.
769     _unused: u128,
770 }
771 
772 impl Stack {
773     /// Creates a new stack which will have a byte size of at least `size`.
774     ///
775     /// The allocated stack might be slightly larger due to rounding necessary.
new(size: usize) -> Result<Stack, OutOfMemory>776     fn new(size: usize) -> Result<Stack, OutOfMemory> {
777         let mut storage = TryVec::new();
778         // Round up `size` to the nearest multiple of 16. Note that the
779         // stack is also allocated here but not initialized, and that's
780         // intentional as pulley bytecode should always initialize the stack
781         // before use.
782         storage.reserve_exact(size.checked_next_multiple_of(16).unwrap_or(usize::MAX) / 16)?;
783         Ok(Stack { storage })
784     }
785 
786     /// Returns a pointer to the top of the stack (the highest address).
787     ///
788     /// Note that the returned pointer has provenance for the entire stack
789     /// allocation, however, not just the top.
top(&mut self) -> *mut u8790     fn top(&mut self) -> *mut u8 {
791         let len = self.len();
792         unsafe { self.base().add(len) }
793     }
794 
795     /// Returns a pointer to the base of the stack (the lowest address).
796     ///
797     /// Note that the returned pointer has provenance for the entire stack
798     /// allocation, however, not just the top.
base(&mut self) -> *mut u8799     fn base(&mut self) -> *mut u8 {
800         self.storage.as_mut_ptr().cast::<u8>()
801     }
802 
803     /// Returns the length, in bytes, of this stack allocation.
len(&self) -> usize804     fn len(&self) -> usize {
805         self.storage.capacity() * mem::size_of::<Align16>()
806     }
807 }
808 
809 impl fmt::Debug for MachineState {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result810     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
811         let MachineState {
812             x_regs,
813             f_regs,
814             #[cfg(not(pulley_disable_interp_simd))]
815             v_regs,
816             stack: _,
817             done_reason: _,
818             fp: _,
819             lr: _,
820         } = self;
821 
822         struct RegMap<'a, R>(&'a [R], fn(u8) -> alloc::string::String);
823 
824         impl<R: fmt::Debug> fmt::Debug for RegMap<'_, R> {
825             fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
826                 let mut f = f.debug_map();
827                 for (i, r) in self.0.iter().enumerate() {
828                     f.entry(&(self.1)(i as u8), r);
829                 }
830                 f.finish()
831             }
832         }
833 
834         let mut f = f.debug_struct("MachineState");
835 
836         f.field(
837             "x_regs",
838             &RegMap(x_regs, |i| XReg::new(i).unwrap().to_string()),
839         )
840         .field(
841             "f_regs",
842             &RegMap(f_regs, |i| FReg::new(i).unwrap().to_string()),
843         );
844         #[cfg(not(pulley_disable_interp_simd))]
845         f.field(
846             "v_regs",
847             &RegMap(v_regs, |i| VReg::new(i).unwrap().to_string()),
848         );
849         f.finish_non_exhaustive()
850     }
851 }
852 
853 macro_rules! index_reg {
854     ($reg_ty:ty,$value_ty:ty,$field:ident) => {
855         impl Index<$reg_ty> for Vm {
856             type Output = $value_ty;
857 
858             fn index(&self, reg: $reg_ty) -> &Self::Output {
859                 &self.state[reg]
860             }
861         }
862 
863         impl IndexMut<$reg_ty> for Vm {
864             fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
865                 &mut self.state[reg]
866             }
867         }
868 
869         impl Index<$reg_ty> for MachineState {
870             type Output = $value_ty;
871 
872             fn index(&self, reg: $reg_ty) -> &Self::Output {
873                 &self.$field[reg.index()]
874             }
875         }
876 
877         impl IndexMut<$reg_ty> for MachineState {
878             fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
879                 &mut self.$field[reg.index()]
880             }
881         }
882     };
883 }
884 
885 index_reg!(XReg, XRegVal, x_regs);
886 index_reg!(FReg, FRegVal, f_regs);
887 #[cfg(not(pulley_disable_interp_simd))]
888 index_reg!(VReg, VRegVal, v_regs);
889 
890 /// Sentinel return address that signals the end of the call stack.
891 const HOST_RETURN_ADDR: *mut u8 = usize::MAX as *mut u8;
892 
893 impl MachineState {
with_stack(stack_size: usize) -> Result<Self, OutOfMemory>894     fn with_stack(stack_size: usize) -> Result<Self, OutOfMemory> {
895         let mut state = Self {
896             x_regs: [Default::default(); XReg::RANGE.end as usize],
897             f_regs: Default::default(),
898             #[cfg(not(pulley_disable_interp_simd))]
899             v_regs: Default::default(),
900             stack: Stack::new(stack_size)?,
901             done_reason: None,
902             fp: HOST_RETURN_ADDR,
903             lr: HOST_RETURN_ADDR,
904         };
905 
906         let sp = state.stack.top();
907         state[XReg::sp] = XRegVal::new_ptr(sp);
908 
909         Ok(state)
910     }
911 }
912 
913 /// Inner private module to prevent creation of the `Done` structure outside of
914 /// this module.
915 mod done {
916     use super::{Encode, Interpreter, MachineState};
917     use core::ops::ControlFlow;
918     use core::ptr::NonNull;
919 
920     /// Zero-sized sentinel indicating that pulley execution has halted.
921     ///
922     /// The reason for halting is stored in `MachineState`.
923     #[derive(Copy, Clone, Debug, PartialEq, Eq)]
924     pub struct Done {
925         _priv: (),
926     }
927 
928     /// Reason that the pulley interpreter has ceased execution.
929     pub enum DoneReason<T> {
930         /// A trap happened at this bytecode instruction.
931         Trap {
932             /// Which instruction is raising this trap.
933             pc: NonNull<u8>,
934             /// The kind of trap being raised, if known.
935             kind: Option<TrapKind>,
936         },
937         /// The `call_indirect_host` instruction was executed.
938         CallIndirectHost {
939             /// The payload of `call_indirect_host`.
940             id: u8,
941             /// Where to resume execution after the host has finished.
942             resume: NonNull<u8>,
943         },
944         /// Pulley has finished and the provided value is being returned.
945         ReturnToHost(T),
946     }
947 
948     /// Stored within `DoneReason::Trap`.
949     #[expect(missing_docs, reason = "self-describing variants")]
950     pub enum TrapKind {
951         DivideByZero,
952         IntegerOverflow,
953         BadConversionToInteger,
954         MemoryOutOfBounds,
955         DisabledOpcode,
956         StackOverflow,
957     }
958 
959     impl MachineState {
debug_assert_done_reason_none(&mut self)960         pub(super) fn debug_assert_done_reason_none(&mut self) {
961             debug_assert!(self.done_reason.is_none());
962         }
963 
964         pub(super) fn done_decode(&mut self, Done { _priv }: Done) -> DoneReason<()> {
965             self.done_reason.take().unwrap()
966         }
967     }
968 
969     impl Interpreter<'_> {
970         /// Finishes execution by recording `DoneReason::Trap`.
971         ///
972         /// This method takes an `I` generic parameter indicating which
973         /// instruction is executing this function and generating a trap. That's
974         /// used to go backwards from the current `pc` which is just beyond the
975         /// instruction to point to the instruction itself in the trap metadata
976         /// returned from the interpreter.
977         #[cold]
done_trap<I: Encode>(&mut self) -> ControlFlow<Done>978         pub fn done_trap<I: Encode>(&mut self) -> ControlFlow<Done> {
979             self.done_trap_kind::<I>(None)
980         }
981 
982         /// Same as `done_trap` but with an explicit `TrapKind`.
983         #[cold]
done_trap_kind<I: Encode>(&mut self, kind: Option<TrapKind>) -> ControlFlow<Done>984         pub fn done_trap_kind<I: Encode>(&mut self, kind: Option<TrapKind>) -> ControlFlow<Done> {
985             let pc = self.current_pc::<I>();
986             self.state.done_reason = Some(DoneReason::Trap { pc, kind });
987             ControlFlow::Break(Done { _priv: () })
988         }
989 
990         /// Finishes execution by recording `DoneReason::CallIndirectHost`.
991         #[cold]
done_call_indirect_host(&mut self, id: u8) -> ControlFlow<Done>992         pub fn done_call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
993             self.state.done_reason = Some(DoneReason::CallIndirectHost {
994                 id,
995                 resume: self.pc.as_ptr(),
996             });
997             ControlFlow::Break(Done { _priv: () })
998         }
999 
1000         /// Finishes execution by recording `DoneReason::ReturnToHost`.
1001         #[cold]
done_return_to_host(&mut self) -> ControlFlow<Done>1002         pub fn done_return_to_host(&mut self) -> ControlFlow<Done> {
1003             self.state.done_reason = Some(DoneReason::ReturnToHost(()));
1004             ControlFlow::Break(Done { _priv: () })
1005         }
1006     }
1007 }
1008 
1009 use done::Done;
1010 pub use done::{DoneReason, TrapKind};
1011 
1012 struct Interpreter<'a> {
1013     state: &'a mut MachineState,
1014     pc: UnsafeBytecodeStream,
1015     executing_pc: ExecutingPcRef<'a>,
1016 }
1017 
1018 impl Interpreter<'_> {
1019     /// Calculates the `offset` for the current instruction `I`.
1020     #[inline]
pc_rel<I: Encode>(&mut self, offset: PcRelOffset) -> NonNull<u8>1021     fn pc_rel<I: Encode>(&mut self, offset: PcRelOffset) -> NonNull<u8> {
1022         let offset = isize::try_from(i32::from(offset)).unwrap();
1023         unsafe { self.current_pc::<I>().offset(offset) }
1024     }
1025 
1026     /// Performs a relative jump of `offset` bytes from the current instruction.
1027     ///
1028     /// This will jump from the start of the current instruction, identified by
1029     /// `I`, `offset` bytes away. Note that the `self.pc` at the start of this
1030     /// function actually points to the instruction after this one so `I` is
1031     /// necessary to go back to ourselves after which we then go `offset` away.
1032     #[inline]
pc_rel_jump<I: Encode>(&mut self, offset: PcRelOffset) -> ControlFlow<Done>1033     fn pc_rel_jump<I: Encode>(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1034         let new_pc = self.pc_rel::<I>(offset);
1035         self.pc = unsafe { UnsafeBytecodeStream::new(new_pc) };
1036         ControlFlow::Continue(())
1037     }
1038 
1039     /// Returns the PC of the current instruction where `I` is the static type
1040     /// representing the current instruction.
current_pc<I: Encode>(&self) -> NonNull<u8>1041     fn current_pc<I: Encode>(&self) -> NonNull<u8> {
1042         unsafe { self.pc.offset(-isize::from(I::WIDTH)).as_ptr() }
1043     }
1044 
1045     /// `sp -= size_of::<T>(); *sp = val;`
1046     ///
1047     /// Note that `I` is the instruction which is pushing data to use if a trap
1048     /// is generated.
1049     #[must_use]
push<I: Encode, T>(&mut self, val: T) -> ControlFlow<Done>1050     fn push<I: Encode, T>(&mut self, val: T) -> ControlFlow<Done> {
1051         let new_sp = self.state[XReg::sp].get_ptr::<T>().wrapping_sub(1);
1052         self.set_sp::<I>(new_sp.cast())?;
1053         unsafe {
1054             new_sp.write_unaligned(val);
1055         }
1056         ControlFlow::Continue(())
1057     }
1058 
1059     /// `ret = *sp; sp -= size_of::<T>()`
pop<T>(&mut self) -> T1060     fn pop<T>(&mut self) -> T {
1061         let sp = self.state[XReg::sp].get_ptr::<T>();
1062         let val = unsafe { sp.read_unaligned() };
1063         self.set_sp_unchecked(sp.wrapping_add(1));
1064         val
1065     }
1066 
1067     /// Sets the stack pointer to the `sp` provided.
1068     ///
1069     /// Returns a trap if this would result in stack overflow, or if `sp` is
1070     /// beneath the base pointer of `self.state.stack`.
1071     ///
1072     /// The `I` parameter here is the instruction that is setting the stack
1073     /// pointer and is used to calculate this instruction's own `pc` if this
1074     /// instruction traps.
1075     #[must_use]
set_sp<I: Encode>(&mut self, sp: *mut u8) -> ControlFlow<Done>1076     fn set_sp<I: Encode>(&mut self, sp: *mut u8) -> ControlFlow<Done> {
1077         let sp_raw = sp as usize;
1078         let base_raw = self.state.stack.base() as usize;
1079         if sp_raw < base_raw {
1080             return self.done_trap_kind::<I>(Some(TrapKind::StackOverflow));
1081         }
1082         self.set_sp_unchecked(sp);
1083         ControlFlow::Continue(())
1084     }
1085 
1086     /// Same as `set_sp` but does not check to see if `sp` is in-bounds. Should
1087     /// only be used with stack increment operations such as `pop`.
set_sp_unchecked<T>(&mut self, sp: *mut T)1088     fn set_sp_unchecked<T>(&mut self, sp: *mut T) {
1089         if cfg!(debug_assertions) {
1090             let sp_raw = sp as usize;
1091             let base = self.state.stack.base() as usize;
1092             let end = base + self.state.stack.len();
1093             assert!(base <= sp_raw && sp_raw <= end);
1094         }
1095         self.state[XReg::sp].set_ptr(sp);
1096     }
1097 
1098     /// Loads a value of `T` using native-endian byte ordering from the `addr`
1099     /// specified.
1100     ///
1101     /// The `I` type parameter is the instruction issuing this load which is
1102     /// used in case of traps to calculate the trapping pc.
1103     ///
1104     /// Returns `ControlFlow::Break` if a trap happens or
1105     /// `ControlFlow::Continue` if the value was loaded successfully.
1106     ///
1107     /// # Unsafety
1108     ///
1109     /// Safety of this method relies on the safety of the original bytecode
1110     /// itself and correctly annotating both `T` and `I`.
1111     #[must_use]
load_ne<T, I: Encode>(&mut self, addr: impl AddressingMode) -> ControlFlow<Done, T>1112     unsafe fn load_ne<T, I: Encode>(&mut self, addr: impl AddressingMode) -> ControlFlow<Done, T> {
1113         unsafe { addr.load_ne::<T, I>(self) }
1114     }
1115 
1116     /// Stores a `val` to the `addr` specified.
1117     ///
1118     /// The `I` type parameter is the instruction issuing this store which is
1119     /// used in case of traps to calculate the trapping pc.
1120     ///
1121     /// Returns `ControlFlow::Break` if a trap happens or
1122     /// `ControlFlow::Continue` if the value was stored successfully.
1123     ///
1124     /// # Unsafety
1125     ///
1126     /// Safety of this method relies on the safety of the original bytecode
1127     /// itself and correctly annotating both `T` and `I`.
1128     #[must_use]
store_ne<T, I: Encode>( &mut self, addr: impl AddressingMode, val: T, ) -> ControlFlow<Done>1129     unsafe fn store_ne<T, I: Encode>(
1130         &mut self,
1131         addr: impl AddressingMode,
1132         val: T,
1133     ) -> ControlFlow<Done> {
1134         unsafe { addr.store_ne::<T, I>(self, val) }
1135     }
1136 
check_xnn_from_f32<I: Encode>( &mut self, val: f32, (lo, hi): (f32, f32), ) -> ControlFlow<Done>1137     fn check_xnn_from_f32<I: Encode>(
1138         &mut self,
1139         val: f32,
1140         (lo, hi): (f32, f32),
1141     ) -> ControlFlow<Done> {
1142         self.check_xnn_from_f64::<I>(val.into(), (lo.into(), hi.into()))
1143     }
1144 
check_xnn_from_f64<I: Encode>( &mut self, val: f64, (lo, hi): (f64, f64), ) -> ControlFlow<Done>1145     fn check_xnn_from_f64<I: Encode>(
1146         &mut self,
1147         val: f64,
1148         (lo, hi): (f64, f64),
1149     ) -> ControlFlow<Done> {
1150         if val != val {
1151             return self.done_trap_kind::<I>(Some(TrapKind::BadConversionToInteger));
1152         }
1153         let val = val.wasm_trunc();
1154         if val <= lo || val >= hi {
1155             return self.done_trap_kind::<I>(Some(TrapKind::IntegerOverflow));
1156         }
1157         ControlFlow::Continue(())
1158     }
1159 
1160     #[cfg(not(pulley_disable_interp_simd))]
get_i128(&self, lo: XReg, hi: XReg) -> i1281161     fn get_i128(&self, lo: XReg, hi: XReg) -> i128 {
1162         let lo = self.state[lo].get_u64();
1163         let hi = self.state[hi].get_i64();
1164         i128::from(lo) | (i128::from(hi) << 64)
1165     }
1166 
1167     #[cfg(not(pulley_disable_interp_simd))]
set_i128(&mut self, lo: XReg, hi: XReg, val: i128)1168     fn set_i128(&mut self, lo: XReg, hi: XReg, val: i128) {
1169         self.state[lo].set_u64(val as u64);
1170         self.state[hi].set_u64((val >> 64) as u64);
1171     }
1172 
record_executing_pc_for_profiling(&mut self)1173     fn record_executing_pc_for_profiling(&mut self) {
1174         // Note that this is a no-op if `feature = "profile"` is disabled.
1175         self.executing_pc.record(self.pc.as_ptr().as_ptr() as usize);
1176     }
1177 }
1178 
1179 /// Helper trait to encompass the various addressing modes of Pulley.
1180 trait AddressingMode: Sized {
1181     /// Calculates the native host address `*mut T` corresponding to this
1182     /// addressing mode.
1183     ///
1184     /// # Safety
1185     ///
1186     /// Relies on the original bytecode being safe to execute as this will
1187     /// otherwise perform unsafe byte offsets for example which requires the
1188     /// original bytecode to be correct.
1189     #[must_use]
addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>1190     unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>;
1191 
1192     /// Loads a value of `T` from this address, using native-endian byte order.
1193     ///
1194     /// For more information see [`Interpreter::load_ne`].
1195     #[must_use]
load_ne<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, T>1196     unsafe fn load_ne<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, T> {
1197         let ret = unsafe { self.addr::<T, I>(i)?.read_unaligned() };
1198         ControlFlow::Continue(ret)
1199     }
1200 
1201     /// Stores a `val` to this address, using native-endian byte order.
1202     ///
1203     /// For more information see [`Interpreter::store_ne`].
1204     #[must_use]
store_ne<T, I: Encode>(self, i: &mut Interpreter<'_>, val: T) -> ControlFlow<Done>1205     unsafe fn store_ne<T, I: Encode>(self, i: &mut Interpreter<'_>, val: T) -> ControlFlow<Done> {
1206         unsafe {
1207             self.addr::<T, I>(i)?.write_unaligned(val);
1208         }
1209         ControlFlow::Continue(())
1210     }
1211 }
1212 
1213 impl AddressingMode for AddrO32 {
addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>1214     unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1215         // Note that this addressing mode cannot return `ControlFlow::Break`
1216         // which is intentional. It's expected that LLVM optimizes away any
1217         // branches callers have.
1218         unsafe {
1219             ControlFlow::Continue(
1220                 i.state[self.addr]
1221                     .get_ptr::<T>()
1222                     .byte_offset(self.offset as isize),
1223             )
1224         }
1225     }
1226 }
1227 
1228 impl AddressingMode for AddrZ {
addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>1229     unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1230         // This addressing mode defines loading/storing to the null address as
1231         // a trap, but all other addresses are allowed.
1232         let host_addr = i.state[self.addr].get_ptr::<T>();
1233         if host_addr.is_null() {
1234             i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1235             unreachable!();
1236         }
1237         unsafe {
1238             let addr = host_addr.byte_offset(self.offset as isize);
1239             ControlFlow::Continue(addr)
1240         }
1241     }
1242 }
1243 
1244 impl AddressingMode for AddrG32 {
addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>1245     unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1246         // Test if `bound - offset - T` is less than the wasm address to
1247         // generate a trap. It's a guarantee of this instruction that these
1248         // subtractions don't overflow.
1249         let bound = i.state[self.host_heap_bound].get_u64() as usize;
1250         let offset = usize::from(self.offset);
1251         let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1252         if wasm_addr > bound - offset - size_of::<T>() {
1253             i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1254             unreachable!();
1255         }
1256         unsafe {
1257             let addr = i.state[self.host_heap_base]
1258                 .get_ptr::<T>()
1259                 .byte_add(wasm_addr)
1260                 .byte_add(offset);
1261             ControlFlow::Continue(addr)
1262         }
1263     }
1264 }
1265 
1266 impl AddressingMode for AddrG32Bne {
addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>1267     unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1268         // Same as `AddrG32` above except that the bound is loaded from memory.
1269         let bound = unsafe {
1270             *i.state[self.host_heap_bound_addr]
1271                 .get_ptr::<usize>()
1272                 .byte_add(usize::from(self.host_heap_bound_offset))
1273         };
1274         let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1275         let offset = usize::from(self.offset);
1276         if wasm_addr > bound - offset - size_of::<T>() {
1277             i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1278             unreachable!();
1279         }
1280         unsafe {
1281             let addr = i.state[self.host_heap_base]
1282                 .get_ptr::<T>()
1283                 .byte_add(wasm_addr)
1284                 .byte_add(offset);
1285             ControlFlow::Continue(addr)
1286         }
1287     }
1288 }
1289 
1290 #[test]
simple_push_pop()1291 fn simple_push_pop() {
1292     let mut state = MachineState::with_stack(16).unwrap();
1293     let pc = ExecutingPc::default();
1294     unsafe {
1295         let mut bytecode = [0; 10];
1296         let mut i = Interpreter {
1297             state: &mut state,
1298             // this isn't actually read so just manufacture a dummy one
1299             pc: UnsafeBytecodeStream::new(NonNull::new(bytecode.as_mut_ptr().offset(4)).unwrap()),
1300             executing_pc: pc.as_ref(),
1301         };
1302         assert!(i.push::<crate::Ret, _>(0_i32).is_continue());
1303         assert_eq!(i.pop::<i32>(), 0_i32);
1304         assert!(i.push::<crate::Ret, _>(1_i32).is_continue());
1305         assert!(i.push::<crate::Ret, _>(2_i32).is_continue());
1306         assert!(i.push::<crate::Ret, _>(3_i32).is_continue());
1307         assert!(i.push::<crate::Ret, _>(4_i32).is_continue());
1308         assert!(i.push::<crate::Ret, _>(5_i32).is_break());
1309         assert!(i.push::<crate::Ret, _>(6_i32).is_break());
1310         assert_eq!(i.pop::<i32>(), 4_i32);
1311         assert_eq!(i.pop::<i32>(), 3_i32);
1312         assert_eq!(i.pop::<i32>(), 2_i32);
1313         assert_eq!(i.pop::<i32>(), 1_i32);
1314     }
1315 }
1316 
1317 macro_rules! br_if_imm {
1318     ($(
1319         fn $snake:ident(&mut self, a: XReg, b: $imm:ident, offset: PcRelOffset)
1320             = $camel:ident / $op:tt / $get:ident;
1321     )*) => {$(
1322         fn $snake(&mut self, a: XReg, b: $imm, offset: PcRelOffset) -> ControlFlow<Done> {
1323             let a = self.state[a].$get();
1324             if a $op b.into() {
1325                 self.pc_rel_jump::<crate::$camel>(offset)
1326             } else {
1327                 ControlFlow::Continue(())
1328             }
1329         }
1330     )*};
1331 }
1332 
1333 impl OpVisitor for Interpreter<'_> {
1334     type BytecodeStream = UnsafeBytecodeStream;
1335     type Return = ControlFlow<Done>;
1336 
bytecode(&mut self) -> &mut UnsafeBytecodeStream1337     fn bytecode(&mut self) -> &mut UnsafeBytecodeStream {
1338         &mut self.pc
1339     }
1340 
nop(&mut self) -> ControlFlow<Done>1341     fn nop(&mut self) -> ControlFlow<Done> {
1342         ControlFlow::Continue(())
1343     }
1344 
ret(&mut self) -> ControlFlow<Done>1345     fn ret(&mut self) -> ControlFlow<Done> {
1346         let lr = self.state.lr;
1347         if lr == HOST_RETURN_ADDR {
1348             self.done_return_to_host()
1349         } else {
1350             self.pc = unsafe { UnsafeBytecodeStream::new(NonNull::new_unchecked(lr)) };
1351             ControlFlow::Continue(())
1352         }
1353     }
1354 
call(&mut self, offset: PcRelOffset) -> ControlFlow<Done>1355     fn call(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1356         let return_addr = self.pc.as_ptr();
1357         self.state.lr = return_addr.as_ptr();
1358         self.pc_rel_jump::<crate::Call>(offset)
1359     }
1360 
call1(&mut self, arg1: XReg, offset: PcRelOffset) -> ControlFlow<Done>1361     fn call1(&mut self, arg1: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1362         let return_addr = self.pc.as_ptr();
1363         self.state.lr = return_addr.as_ptr();
1364         self.state[XReg::x0] = self.state[arg1];
1365         self.pc_rel_jump::<crate::Call1>(offset)
1366     }
1367 
call2(&mut self, arg1: XReg, arg2: XReg, offset: PcRelOffset) -> ControlFlow<Done>1368     fn call2(&mut self, arg1: XReg, arg2: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1369         let return_addr = self.pc.as_ptr();
1370         self.state.lr = return_addr.as_ptr();
1371         let (x0, x1) = (self.state[arg1], self.state[arg2]);
1372         self.state[XReg::x0] = x0;
1373         self.state[XReg::x1] = x1;
1374         self.pc_rel_jump::<crate::Call2>(offset)
1375     }
1376 
call3( &mut self, arg1: XReg, arg2: XReg, arg3: XReg, offset: PcRelOffset, ) -> ControlFlow<Done>1377     fn call3(
1378         &mut self,
1379         arg1: XReg,
1380         arg2: XReg,
1381         arg3: XReg,
1382         offset: PcRelOffset,
1383     ) -> ControlFlow<Done> {
1384         let return_addr = self.pc.as_ptr();
1385         self.state.lr = return_addr.as_ptr();
1386         let (x0, x1, x2) = (self.state[arg1], self.state[arg2], self.state[arg3]);
1387         self.state[XReg::x0] = x0;
1388         self.state[XReg::x1] = x1;
1389         self.state[XReg::x2] = x2;
1390         self.pc_rel_jump::<crate::Call3>(offset)
1391     }
1392 
call4( &mut self, arg1: XReg, arg2: XReg, arg3: XReg, arg4: XReg, offset: PcRelOffset, ) -> ControlFlow<Done>1393     fn call4(
1394         &mut self,
1395         arg1: XReg,
1396         arg2: XReg,
1397         arg3: XReg,
1398         arg4: XReg,
1399         offset: PcRelOffset,
1400     ) -> ControlFlow<Done> {
1401         let return_addr = self.pc.as_ptr();
1402         self.state.lr = return_addr.as_ptr();
1403         let (x0, x1, x2, x3) = (
1404             self.state[arg1],
1405             self.state[arg2],
1406             self.state[arg3],
1407             self.state[arg4],
1408         );
1409         self.state[XReg::x0] = x0;
1410         self.state[XReg::x1] = x1;
1411         self.state[XReg::x2] = x2;
1412         self.state[XReg::x3] = x3;
1413         self.pc_rel_jump::<crate::Call4>(offset)
1414     }
1415 
call_indirect(&mut self, dst: XReg) -> ControlFlow<Done>1416     fn call_indirect(&mut self, dst: XReg) -> ControlFlow<Done> {
1417         let return_addr = self.pc.as_ptr();
1418         self.state.lr = return_addr.as_ptr();
1419         // SAFETY: part of the unsafe contract of the interpreter is only valid
1420         // bytecode is interpreted, so the jump destination is part of the validity
1421         // of the bytecode itself.
1422         unsafe {
1423             self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[dst].get_ptr()));
1424         }
1425         ControlFlow::Continue(())
1426     }
1427 
jump(&mut self, offset: PcRelOffset) -> ControlFlow<Done>1428     fn jump(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1429         self.pc_rel_jump::<crate::Jump>(offset)
1430     }
1431 
xjump(&mut self, reg: XReg) -> ControlFlow<Done>1432     fn xjump(&mut self, reg: XReg) -> ControlFlow<Done> {
1433         unsafe {
1434             self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[reg].get_ptr()));
1435         }
1436         ControlFlow::Continue(())
1437     }
1438 
br_if32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done>1439     fn br_if32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1440         let cond = self.state[cond].get_u32();
1441         if cond != 0 {
1442             self.pc_rel_jump::<crate::BrIf>(offset)
1443         } else {
1444             ControlFlow::Continue(())
1445         }
1446     }
1447 
br_if_not32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done>1448     fn br_if_not32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1449         let cond = self.state[cond].get_u32();
1450         if cond == 0 {
1451             self.pc_rel_jump::<crate::BrIfNot>(offset)
1452         } else {
1453             ControlFlow::Continue(())
1454         }
1455     }
1456 
br_if_xeq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done>1457     fn br_if_xeq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1458         let a = self.state[a].get_u32();
1459         let b = self.state[b].get_u32();
1460         if a == b {
1461             self.pc_rel_jump::<crate::BrIfXeq32>(offset)
1462         } else {
1463             ControlFlow::Continue(())
1464         }
1465     }
1466 
br_if_xneq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done>1467     fn br_if_xneq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1468         let a = self.state[a].get_u32();
1469         let b = self.state[b].get_u32();
1470         if a != b {
1471             self.pc_rel_jump::<crate::BrIfXneq32>(offset)
1472         } else {
1473             ControlFlow::Continue(())
1474         }
1475     }
1476 
br_if_xslt32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done>1477     fn br_if_xslt32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1478         let a = self.state[a].get_i32();
1479         let b = self.state[b].get_i32();
1480         if a < b {
1481             self.pc_rel_jump::<crate::BrIfXslt32>(offset)
1482         } else {
1483             ControlFlow::Continue(())
1484         }
1485     }
1486 
br_if_xslteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done>1487     fn br_if_xslteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1488         let a = self.state[a].get_i32();
1489         let b = self.state[b].get_i32();
1490         if a <= b {
1491             self.pc_rel_jump::<crate::BrIfXslteq32>(offset)
1492         } else {
1493             ControlFlow::Continue(())
1494         }
1495     }
1496 
br_if_xult32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done>1497     fn br_if_xult32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1498         let a = self.state[a].get_u32();
1499         let b = self.state[b].get_u32();
1500         if a < b {
1501             self.pc_rel_jump::<crate::BrIfXult32>(offset)
1502         } else {
1503             ControlFlow::Continue(())
1504         }
1505     }
1506 
br_if_xulteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done>1507     fn br_if_xulteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1508         let a = self.state[a].get_u32();
1509         let b = self.state[b].get_u32();
1510         if a <= b {
1511             self.pc_rel_jump::<crate::BrIfXulteq32>(offset)
1512         } else {
1513             ControlFlow::Continue(())
1514         }
1515     }
1516 
br_if_xeq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done>1517     fn br_if_xeq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1518         let a = self.state[a].get_u64();
1519         let b = self.state[b].get_u64();
1520         if a == b {
1521             self.pc_rel_jump::<crate::BrIfXeq64>(offset)
1522         } else {
1523             ControlFlow::Continue(())
1524         }
1525     }
1526 
br_if_xneq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done>1527     fn br_if_xneq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1528         let a = self.state[a].get_u64();
1529         let b = self.state[b].get_u64();
1530         if a != b {
1531             self.pc_rel_jump::<crate::BrIfXneq64>(offset)
1532         } else {
1533             ControlFlow::Continue(())
1534         }
1535     }
1536 
br_if_xslt64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done>1537     fn br_if_xslt64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1538         let a = self.state[a].get_i64();
1539         let b = self.state[b].get_i64();
1540         if a < b {
1541             self.pc_rel_jump::<crate::BrIfXslt64>(offset)
1542         } else {
1543             ControlFlow::Continue(())
1544         }
1545     }
1546 
br_if_xslteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done>1547     fn br_if_xslteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1548         let a = self.state[a].get_i64();
1549         let b = self.state[b].get_i64();
1550         if a <= b {
1551             self.pc_rel_jump::<crate::BrIfXslteq64>(offset)
1552         } else {
1553             ControlFlow::Continue(())
1554         }
1555     }
1556 
br_if_xult64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done>1557     fn br_if_xult64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1558         let a = self.state[a].get_u64();
1559         let b = self.state[b].get_u64();
1560         if a < b {
1561             self.pc_rel_jump::<crate::BrIfXult64>(offset)
1562         } else {
1563             ControlFlow::Continue(())
1564         }
1565     }
1566 
br_if_xulteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done>1567     fn br_if_xulteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1568         let a = self.state[a].get_u64();
1569         let b = self.state[b].get_u64();
1570         if a <= b {
1571             self.pc_rel_jump::<crate::BrIfXulteq64>(offset)
1572         } else {
1573             ControlFlow::Continue(())
1574         }
1575     }
1576 
1577     br_if_imm! {
1578         fn br_if_xeq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1579             = BrIfXeq32I8 / == / get_i32;
1580         fn br_if_xeq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1581             = BrIfXeq32I32 / == / get_i32;
1582         fn br_if_xneq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1583             = BrIfXneq32I8 / != / get_i32;
1584         fn br_if_xneq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1585             = BrIfXneq32I32 / != / get_i32;
1586 
1587         fn br_if_xslt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1588             = BrIfXslt32I8 / < / get_i32;
1589         fn br_if_xslt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1590             = BrIfXslt32I32 / < / get_i32;
1591         fn br_if_xsgt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1592             = BrIfXsgt32I8 / > / get_i32;
1593         fn br_if_xsgt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1594             = BrIfXsgt32I32 / > / get_i32;
1595         fn br_if_xslteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1596             = BrIfXslteq32I8 / <= / get_i32;
1597         fn br_if_xslteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1598             = BrIfXslteq32I32 / <= / get_i32;
1599         fn br_if_xsgteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1600             = BrIfXsgteq32I8 / >= / get_i32;
1601         fn br_if_xsgteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1602             = BrIfXsgteq32I32 / >= / get_i32;
1603 
1604         fn br_if_xult32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1605             = BrIfXult32U8 / < / get_u32;
1606         fn br_if_xult32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1607             = BrIfXult32U32 / < / get_u32;
1608         fn br_if_xugt32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1609             = BrIfXugt32U8 / > / get_u32;
1610         fn br_if_xugt32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1611             = BrIfXugt32U32 / > / get_u32;
1612         fn br_if_xulteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1613             = BrIfXulteq32U8 / <= / get_u32;
1614         fn br_if_xulteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1615             = BrIfXulteq32U32 / <= / get_u32;
1616         fn br_if_xugteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1617             = BrIfXugteq32U8 / >= / get_u32;
1618         fn br_if_xugteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1619             = BrIfXugteq32U32 / >= / get_u32;
1620 
1621         fn br_if_xeq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1622             = BrIfXeq64I8 / == / get_i64;
1623         fn br_if_xeq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1624             = BrIfXeq64I32 / == / get_i64;
1625         fn br_if_xneq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1626             = BrIfXneq64I8 / != / get_i64;
1627         fn br_if_xneq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1628             = BrIfXneq64I32 / != / get_i64;
1629 
1630         fn br_if_xslt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1631             = BrIfXslt64I8 / < / get_i64;
1632         fn br_if_xslt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1633             = BrIfXslt64I32 / < / get_i64;
1634         fn br_if_xsgt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1635             = BrIfXsgt64I8 / > / get_i64;
1636         fn br_if_xsgt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1637             = BrIfXsgt64I32 / > / get_i64;
1638         fn br_if_xslteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1639             = BrIfXslteq64I8 / <= / get_i64;
1640         fn br_if_xslteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1641             = BrIfXslteq64I32 / <= / get_i64;
1642         fn br_if_xsgteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1643             = BrIfXsgteq64I8 / >= / get_i64;
1644         fn br_if_xsgteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1645             = BrIfXsgteq64I32 / >= / get_i64;
1646 
1647         fn br_if_xult64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1648             = BrIfXult64U8 / < / get_u64;
1649         fn br_if_xult64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1650             = BrIfXult64U32 / < / get_u64;
1651         fn br_if_xugt64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1652             = BrIfXugt64U8 / > / get_u64;
1653         fn br_if_xugt64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1654             = BrIfXugt64U32 / > / get_u64;
1655         fn br_if_xulteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1656             = BrIfXulteq64U8 / <= / get_u64;
1657         fn br_if_xulteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1658             = BrIfXulteq64U32 / <= / get_u64;
1659         fn br_if_xugteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1660             = BrIfXugteq64U8 / >= / get_u64;
1661         fn br_if_xugteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1662             = BrIfXugteq64U32 / >= / get_u64;
1663     }
1664 
xmov(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>1665     fn xmov(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1666         let val = self.state[src];
1667         self.state[dst] = val;
1668         ControlFlow::Continue(())
1669     }
1670 
xconst8(&mut self, dst: XReg, imm: i8) -> ControlFlow<Done>1671     fn xconst8(&mut self, dst: XReg, imm: i8) -> ControlFlow<Done> {
1672         self.state[dst].set_i64(i64::from(imm));
1673         ControlFlow::Continue(())
1674     }
1675 
xzero(&mut self, dst: XReg) -> ControlFlow<Done>1676     fn xzero(&mut self, dst: XReg) -> ControlFlow<Done> {
1677         self.state[dst].set_i64(0);
1678         ControlFlow::Continue(())
1679     }
1680 
xone(&mut self, dst: XReg) -> ControlFlow<Done>1681     fn xone(&mut self, dst: XReg) -> ControlFlow<Done> {
1682         self.state[dst].set_i64(1);
1683         ControlFlow::Continue(())
1684     }
1685 
xconst16(&mut self, dst: XReg, imm: i16) -> ControlFlow<Done>1686     fn xconst16(&mut self, dst: XReg, imm: i16) -> ControlFlow<Done> {
1687         self.state[dst].set_i64(i64::from(imm));
1688         ControlFlow::Continue(())
1689     }
1690 
xconst32(&mut self, dst: XReg, imm: i32) -> ControlFlow<Done>1691     fn xconst32(&mut self, dst: XReg, imm: i32) -> ControlFlow<Done> {
1692         self.state[dst].set_i64(i64::from(imm));
1693         ControlFlow::Continue(())
1694     }
1695 
xconst64(&mut self, dst: XReg, imm: i64) -> ControlFlow<Done>1696     fn xconst64(&mut self, dst: XReg, imm: i64) -> ControlFlow<Done> {
1697         self.state[dst].set_i64(imm);
1698         ControlFlow::Continue(())
1699     }
1700 
xadd32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1701     fn xadd32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1702         let a = self.state[operands.src1].get_u32();
1703         let b = self.state[operands.src2].get_u32();
1704         self.state[operands.dst].set_u32(a.wrapping_add(b));
1705         ControlFlow::Continue(())
1706     }
1707 
xadd32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done>1708     fn xadd32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1709         self.xadd32_u32(dst, src1, src2.into())
1710     }
1711 
xadd32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done>1712     fn xadd32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1713         let a = self.state[src1].get_u32();
1714         self.state[dst].set_u32(a.wrapping_add(src2));
1715         ControlFlow::Continue(())
1716     }
1717 
xadd64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1718     fn xadd64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1719         let a = self.state[operands.src1].get_u64();
1720         let b = self.state[operands.src2].get_u64();
1721         self.state[operands.dst].set_u64(a.wrapping_add(b));
1722         ControlFlow::Continue(())
1723     }
1724 
xadd64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done>1725     fn xadd64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1726         self.xadd64_u32(dst, src1, src2.into())
1727     }
1728 
xadd64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done>1729     fn xadd64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1730         let a = self.state[src1].get_u64();
1731         self.state[dst].set_u64(a.wrapping_add(src2.into()));
1732         ControlFlow::Continue(())
1733     }
1734 
xmadd32(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done>1735     fn xmadd32(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1736         let a = self.state[src1].get_u32();
1737         let b = self.state[src2].get_u32();
1738         let c = self.state[src3].get_u32();
1739         self.state[dst].set_u32(a.wrapping_mul(b).wrapping_add(c));
1740         ControlFlow::Continue(())
1741     }
1742 
xmadd64(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done>1743     fn xmadd64(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1744         let a = self.state[src1].get_u64();
1745         let b = self.state[src2].get_u64();
1746         let c = self.state[src3].get_u64();
1747         self.state[dst].set_u64(a.wrapping_mul(b).wrapping_add(c));
1748         ControlFlow::Continue(())
1749     }
1750 
xsub32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1751     fn xsub32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1752         let a = self.state[operands.src1].get_u32();
1753         let b = self.state[operands.src2].get_u32();
1754         self.state[operands.dst].set_u32(a.wrapping_sub(b));
1755         ControlFlow::Continue(())
1756     }
1757 
xsub32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done>1758     fn xsub32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1759         self.xsub32_u32(dst, src1, src2.into())
1760     }
1761 
xsub32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done>1762     fn xsub32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1763         let a = self.state[src1].get_u32();
1764         self.state[dst].set_u32(a.wrapping_sub(src2));
1765         ControlFlow::Continue(())
1766     }
1767 
xsub64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1768     fn xsub64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1769         let a = self.state[operands.src1].get_u64();
1770         let b = self.state[operands.src2].get_u64();
1771         self.state[operands.dst].set_u64(a.wrapping_sub(b));
1772         ControlFlow::Continue(())
1773     }
1774 
xsub64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done>1775     fn xsub64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1776         self.xsub64_u32(dst, src1, src2.into())
1777     }
1778 
xsub64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done>1779     fn xsub64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1780         let a = self.state[src1].get_u64();
1781         self.state[dst].set_u64(a.wrapping_sub(src2.into()));
1782         ControlFlow::Continue(())
1783     }
1784 
xmul32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1785     fn xmul32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1786         let a = self.state[operands.src1].get_u32();
1787         let b = self.state[operands.src2].get_u32();
1788         self.state[operands.dst].set_u32(a.wrapping_mul(b));
1789         ControlFlow::Continue(())
1790     }
1791 
xmul32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done>1792     fn xmul32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1793         self.xmul32_s32(dst, src1, src2.into())
1794     }
1795 
xmul32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done>1796     fn xmul32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1797         let a = self.state[src1].get_i32();
1798         self.state[dst].set_i32(a.wrapping_mul(src2));
1799         ControlFlow::Continue(())
1800     }
1801 
xmul64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1802     fn xmul64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1803         let a = self.state[operands.src1].get_u64();
1804         let b = self.state[operands.src2].get_u64();
1805         self.state[operands.dst].set_u64(a.wrapping_mul(b));
1806         ControlFlow::Continue(())
1807     }
1808 
xmul64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done>1809     fn xmul64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1810         self.xmul64_s32(dst, src1, src2.into())
1811     }
1812 
xmul64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done>1813     fn xmul64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1814         let a = self.state[src1].get_i64();
1815         self.state[dst].set_i64(a.wrapping_mul(src2.into()));
1816         ControlFlow::Continue(())
1817     }
1818 
xshl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1819     fn xshl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1820         let a = self.state[operands.src1].get_u32();
1821         let b = self.state[operands.src2].get_u32();
1822         self.state[operands.dst].set_u32(a.wrapping_shl(b));
1823         ControlFlow::Continue(())
1824     }
1825 
xshr32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1826     fn xshr32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1827         let a = self.state[operands.src1].get_u32();
1828         let b = self.state[operands.src2].get_u32();
1829         self.state[operands.dst].set_u32(a.wrapping_shr(b));
1830         ControlFlow::Continue(())
1831     }
1832 
xshr32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1833     fn xshr32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1834         let a = self.state[operands.src1].get_i32();
1835         let b = self.state[operands.src2].get_u32();
1836         self.state[operands.dst].set_i32(a.wrapping_shr(b));
1837         ControlFlow::Continue(())
1838     }
1839 
xshl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1840     fn xshl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1841         let a = self.state[operands.src1].get_u64();
1842         let b = self.state[operands.src2].get_u32();
1843         self.state[operands.dst].set_u64(a.wrapping_shl(b));
1844         ControlFlow::Continue(())
1845     }
1846 
xshr64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1847     fn xshr64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1848         let a = self.state[operands.src1].get_u64();
1849         let b = self.state[operands.src2].get_u32();
1850         self.state[operands.dst].set_u64(a.wrapping_shr(b));
1851         ControlFlow::Continue(())
1852     }
1853 
xshr64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1854     fn xshr64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1855         let a = self.state[operands.src1].get_i64();
1856         let b = self.state[operands.src2].get_u32();
1857         self.state[operands.dst].set_i64(a.wrapping_shr(b));
1858         ControlFlow::Continue(())
1859     }
1860 
xshl32_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done>1861     fn xshl32_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1862         let a = self.state[operands.src1].get_u32();
1863         let b = u32::from(u8::from(operands.src2));
1864         self.state[operands.dst].set_u32(a.wrapping_shl(b));
1865         ControlFlow::Continue(())
1866     }
1867 
xshr32_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done>1868     fn xshr32_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1869         let a = self.state[operands.src1].get_u32();
1870         let b = u32::from(u8::from(operands.src2));
1871         self.state[operands.dst].set_u32(a.wrapping_shr(b));
1872         ControlFlow::Continue(())
1873     }
1874 
xshr32_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done>1875     fn xshr32_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1876         let a = self.state[operands.src1].get_i32();
1877         let b = u32::from(u8::from(operands.src2));
1878         self.state[operands.dst].set_i32(a.wrapping_shr(b));
1879         ControlFlow::Continue(())
1880     }
1881 
xshl64_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done>1882     fn xshl64_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1883         let a = self.state[operands.src1].get_u64();
1884         let b = u32::from(u8::from(operands.src2));
1885         self.state[operands.dst].set_u64(a.wrapping_shl(b));
1886         ControlFlow::Continue(())
1887     }
1888 
xshr64_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done>1889     fn xshr64_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1890         let a = self.state[operands.src1].get_u64();
1891         let b = u32::from(u8::from(operands.src2));
1892         self.state[operands.dst].set_u64(a.wrapping_shr(b));
1893         ControlFlow::Continue(())
1894     }
1895 
xshr64_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done>1896     fn xshr64_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1897         let a = self.state[operands.src1].get_i64();
1898         let b = u32::from(u8::from(operands.src2));
1899         self.state[operands.dst].set_i64(a.wrapping_shr(b));
1900         ControlFlow::Continue(())
1901     }
1902 
xneg32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>1903     fn xneg32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1904         let a = self.state[src].get_i32();
1905         self.state[dst].set_i32(a.wrapping_neg());
1906         ControlFlow::Continue(())
1907     }
1908 
xneg64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>1909     fn xneg64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1910         let a = self.state[src].get_i64();
1911         self.state[dst].set_i64(a.wrapping_neg());
1912         ControlFlow::Continue(())
1913     }
1914 
xeq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1915     fn xeq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1916         let a = self.state[operands.src1].get_u64();
1917         let b = self.state[operands.src2].get_u64();
1918         self.state[operands.dst].set_u32(u32::from(a == b));
1919         ControlFlow::Continue(())
1920     }
1921 
xneq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1922     fn xneq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1923         let a = self.state[operands.src1].get_u64();
1924         let b = self.state[operands.src2].get_u64();
1925         self.state[operands.dst].set_u32(u32::from(a != b));
1926         ControlFlow::Continue(())
1927     }
1928 
xslt64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1929     fn xslt64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1930         let a = self.state[operands.src1].get_i64();
1931         let b = self.state[operands.src2].get_i64();
1932         self.state[operands.dst].set_u32(u32::from(a < b));
1933         ControlFlow::Continue(())
1934     }
1935 
xslteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1936     fn xslteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1937         let a = self.state[operands.src1].get_i64();
1938         let b = self.state[operands.src2].get_i64();
1939         self.state[operands.dst].set_u32(u32::from(a <= b));
1940         ControlFlow::Continue(())
1941     }
1942 
xult64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1943     fn xult64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1944         let a = self.state[operands.src1].get_u64();
1945         let b = self.state[operands.src2].get_u64();
1946         self.state[operands.dst].set_u32(u32::from(a < b));
1947         ControlFlow::Continue(())
1948     }
1949 
xulteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1950     fn xulteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1951         let a = self.state[operands.src1].get_u64();
1952         let b = self.state[operands.src2].get_u64();
1953         self.state[operands.dst].set_u32(u32::from(a <= b));
1954         ControlFlow::Continue(())
1955     }
1956 
xeq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1957     fn xeq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1958         let a = self.state[operands.src1].get_u32();
1959         let b = self.state[operands.src2].get_u32();
1960         self.state[operands.dst].set_u32(u32::from(a == b));
1961         ControlFlow::Continue(())
1962     }
1963 
xneq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1964     fn xneq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1965         let a = self.state[operands.src1].get_u32();
1966         let b = self.state[operands.src2].get_u32();
1967         self.state[operands.dst].set_u32(u32::from(a != b));
1968         ControlFlow::Continue(())
1969     }
1970 
xslt32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1971     fn xslt32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1972         let a = self.state[operands.src1].get_i32();
1973         let b = self.state[operands.src2].get_i32();
1974         self.state[operands.dst].set_u32(u32::from(a < b));
1975         ControlFlow::Continue(())
1976     }
1977 
xslteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1978     fn xslteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1979         let a = self.state[operands.src1].get_i32();
1980         let b = self.state[operands.src2].get_i32();
1981         self.state[operands.dst].set_u32(u32::from(a <= b));
1982         ControlFlow::Continue(())
1983     }
1984 
xult32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1985     fn xult32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1986         let a = self.state[operands.src1].get_u32();
1987         let b = self.state[operands.src2].get_u32();
1988         self.state[operands.dst].set_u32(u32::from(a < b));
1989         ControlFlow::Continue(())
1990     }
1991 
xulteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>1992     fn xulteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1993         let a = self.state[operands.src1].get_u32();
1994         let b = self.state[operands.src2].get_u32();
1995         self.state[operands.dst].set_u32(u32::from(a <= b));
1996         ControlFlow::Continue(())
1997     }
1998 
push_frame(&mut self) -> ControlFlow<Done>1999     fn push_frame(&mut self) -> ControlFlow<Done> {
2000         self.push::<crate::PushFrame, _>(self.state.lr)?;
2001         self.push::<crate::PushFrame, _>(self.state.fp)?;
2002         self.state.fp = self.state[XReg::sp].get_ptr();
2003         ControlFlow::Continue(())
2004     }
2005 
2006     #[inline]
push_frame_save(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done>2007     fn push_frame_save(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2008         // Decrement the stack pointer `amt` bytes plus 2 pointers more for
2009         // fp/lr.
2010         let ptr_size = size_of::<usize>();
2011         let full_amt = usize::from(amt) + 2 * ptr_size;
2012         let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(full_amt);
2013         self.set_sp::<crate::PushFrameSave>(new_sp)?;
2014 
2015         unsafe {
2016             // Emulate `push_frame` by placing `lr` and `fp` onto the stack, in
2017             // that order, at the top of the allocated area.
2018             self.store_ne::<_, crate::PushFrameSave>(
2019                 AddrO32 {
2020                     addr: XReg::sp,
2021                     offset: (full_amt - 1 * ptr_size) as i32,
2022                 },
2023                 self.state.lr,
2024             )?;
2025             self.store_ne::<_, crate::PushFrameSave>(
2026                 AddrO32 {
2027                     addr: XReg::sp,
2028                     offset: (full_amt - 2 * ptr_size) as i32,
2029                 },
2030                 self.state.fp,
2031             )?;
2032 
2033             // Set `fp` to the top of our frame, where `fp` is stored.
2034             let mut offset = amt as i32;
2035             self.state.fp = self.state[XReg::sp]
2036                 .get_ptr::<u8>()
2037                 .byte_offset(offset as isize);
2038 
2039             // Next save any registers in `regs` to the stack.
2040             for reg in regs {
2041                 offset -= 8;
2042                 self.store_ne::<_, crate::PushFrameSave>(
2043                     AddrO32 {
2044                         addr: XReg::sp,
2045                         offset,
2046                     },
2047                     self.state[reg].get_u64(),
2048                 )?;
2049             }
2050         }
2051         ControlFlow::Continue(())
2052     }
2053 
pop_frame_restore(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done>2054     fn pop_frame_restore(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2055         // Restore all registers in `regs`, followed by the normal `pop_frame`
2056         // opcode below to restore fp/lr.
2057         unsafe {
2058             let mut offset = i32::from(amt);
2059             for reg in regs {
2060                 offset -= 8;
2061                 let val = self.load_ne::<_, crate::PopFrameRestore>(AddrO32 {
2062                     addr: XReg::sp,
2063                     offset,
2064                 })?;
2065                 self.state[reg].set_u64(val);
2066             }
2067         }
2068         self.pop_frame()
2069     }
2070 
pop_frame(&mut self) -> ControlFlow<Done>2071     fn pop_frame(&mut self) -> ControlFlow<Done> {
2072         self.set_sp_unchecked(self.state.fp);
2073         let fp = self.pop();
2074         let lr = self.pop();
2075         self.state.fp = fp;
2076         self.state.lr = lr;
2077         ControlFlow::Continue(())
2078     }
2079 
br_table32(&mut self, idx: XReg, amt: u32) -> ControlFlow<Done>2080     fn br_table32(&mut self, idx: XReg, amt: u32) -> ControlFlow<Done> {
2081         let idx = self.state[idx].get_u32().min(amt - 1) as isize;
2082         // SAFETY: part of the contract of the interpreter is only dealing with
2083         // valid bytecode, so this offset should be safe.
2084         self.pc = unsafe { self.pc.offset(idx * 4) };
2085 
2086         // Decode the `PcRelOffset` without tampering with `self.pc` as the
2087         // jump is relative to `self.pc`.
2088         let mut tmp = self.pc;
2089         let Ok(rel) = PcRelOffset::decode(&mut tmp);
2090         let offset = isize::try_from(i32::from(rel)).unwrap();
2091         self.pc = unsafe { self.pc.offset(offset) };
2092         ControlFlow::Continue(())
2093     }
2094 
stack_alloc32(&mut self, amt: u32) -> ControlFlow<Done>2095     fn stack_alloc32(&mut self, amt: u32) -> ControlFlow<Done> {
2096         let amt = usize::try_from(amt).unwrap();
2097         let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(amt);
2098         self.set_sp::<crate::StackAlloc32>(new_sp)?;
2099         ControlFlow::Continue(())
2100     }
2101 
stack_free32(&mut self, amt: u32) -> ControlFlow<Done>2102     fn stack_free32(&mut self, amt: u32) -> ControlFlow<Done> {
2103         let amt = usize::try_from(amt).unwrap();
2104         let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_add(amt);
2105         self.set_sp_unchecked(new_sp);
2106         ControlFlow::Continue(())
2107     }
2108 
zext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2109     fn zext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2110         let src = self.state[src].get_u64() as u8;
2111         self.state[dst].set_u64(src.into());
2112         ControlFlow::Continue(())
2113     }
2114 
zext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2115     fn zext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2116         let src = self.state[src].get_u64() as u16;
2117         self.state[dst].set_u64(src.into());
2118         ControlFlow::Continue(())
2119     }
2120 
zext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2121     fn zext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2122         let src = self.state[src].get_u64() as u32;
2123         self.state[dst].set_u64(src.into());
2124         ControlFlow::Continue(())
2125     }
2126 
sext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2127     fn sext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2128         let src = self.state[src].get_i64() as i8;
2129         self.state[dst].set_i64(src.into());
2130         ControlFlow::Continue(())
2131     }
2132 
sext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2133     fn sext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2134         let src = self.state[src].get_i64() as i16;
2135         self.state[dst].set_i64(src.into());
2136         ControlFlow::Continue(())
2137     }
2138 
sext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2139     fn sext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2140         let src = self.state[src].get_i64() as i32;
2141         self.state[dst].set_i64(src.into());
2142         ControlFlow::Continue(())
2143     }
2144 
xdiv32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2145     fn xdiv32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2146         let a = self.state[operands.src1].get_i32();
2147         let b = self.state[operands.src2].get_i32();
2148         match a.checked_div(b) {
2149             Some(result) => {
2150                 self.state[operands.dst].set_i32(result);
2151                 ControlFlow::Continue(())
2152             }
2153             None => {
2154                 let kind = if b == 0 {
2155                     TrapKind::DivideByZero
2156                 } else {
2157                     TrapKind::IntegerOverflow
2158                 };
2159                 self.done_trap_kind::<crate::XDiv32S>(Some(kind))
2160             }
2161         }
2162     }
2163 
xdiv64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2164     fn xdiv64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2165         let a = self.state[operands.src1].get_i64();
2166         let b = self.state[operands.src2].get_i64();
2167         match a.checked_div(b) {
2168             Some(result) => {
2169                 self.state[operands.dst].set_i64(result);
2170                 ControlFlow::Continue(())
2171             }
2172             None => {
2173                 let kind = if b == 0 {
2174                     TrapKind::DivideByZero
2175                 } else {
2176                     TrapKind::IntegerOverflow
2177                 };
2178                 self.done_trap_kind::<crate::XDiv64S>(Some(kind))
2179             }
2180         }
2181     }
2182 
xdiv32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2183     fn xdiv32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2184         let a = self.state[operands.src1].get_u32();
2185         let b = self.state[operands.src2].get_u32();
2186         match a.checked_div(b) {
2187             Some(result) => {
2188                 self.state[operands.dst].set_u32(result);
2189                 ControlFlow::Continue(())
2190             }
2191             None => self.done_trap_kind::<crate::XDiv32U>(Some(TrapKind::DivideByZero)),
2192         }
2193     }
2194 
xdiv64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2195     fn xdiv64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2196         let a = self.state[operands.src1].get_u64();
2197         let b = self.state[operands.src2].get_u64();
2198         match a.checked_div(b) {
2199             Some(result) => {
2200                 self.state[operands.dst].set_u64(result);
2201                 ControlFlow::Continue(())
2202             }
2203             None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2204         }
2205     }
2206 
xrem32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2207     fn xrem32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2208         let a = self.state[operands.src1].get_i32();
2209         let b = self.state[operands.src2].get_i32();
2210         let result = if a == i32::MIN && b == -1 {
2211             Some(0)
2212         } else {
2213             a.checked_rem(b)
2214         };
2215         match result {
2216             Some(result) => {
2217                 self.state[operands.dst].set_i32(result);
2218                 ControlFlow::Continue(())
2219             }
2220             None => self.done_trap_kind::<crate::XRem32S>(Some(TrapKind::DivideByZero)),
2221         }
2222     }
2223 
xrem64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2224     fn xrem64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2225         let a = self.state[operands.src1].get_i64();
2226         let b = self.state[operands.src2].get_i64();
2227         let result = if a == i64::MIN && b == -1 {
2228             Some(0)
2229         } else {
2230             a.checked_rem(b)
2231         };
2232         match result {
2233             Some(result) => {
2234                 self.state[operands.dst].set_i64(result);
2235                 ControlFlow::Continue(())
2236             }
2237             None => self.done_trap_kind::<crate::XRem64S>(Some(TrapKind::DivideByZero)),
2238         }
2239     }
2240 
xrem32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2241     fn xrem32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2242         let a = self.state[operands.src1].get_u32();
2243         let b = self.state[operands.src2].get_u32();
2244         match a.checked_rem(b) {
2245             Some(result) => {
2246                 self.state[operands.dst].set_u32(result);
2247                 ControlFlow::Continue(())
2248             }
2249             None => self.done_trap_kind::<crate::XRem32U>(Some(TrapKind::DivideByZero)),
2250         }
2251     }
2252 
xrem64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2253     fn xrem64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2254         let a = self.state[operands.src1].get_u64();
2255         let b = self.state[operands.src2].get_u64();
2256         match a.checked_rem(b) {
2257             Some(result) => {
2258                 self.state[operands.dst].set_u64(result);
2259                 ControlFlow::Continue(())
2260             }
2261             None => self.done_trap_kind::<crate::XRem64U>(Some(TrapKind::DivideByZero)),
2262         }
2263     }
2264 
xband32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2265     fn xband32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2266         let a = self.state[operands.src1].get_u32();
2267         let b = self.state[operands.src2].get_u32();
2268         self.state[operands.dst].set_u32(a & b);
2269         ControlFlow::Continue(())
2270     }
2271 
xband32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done>2272     fn xband32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2273         self.xband32_s32(dst, src1, src2.into())
2274     }
2275 
xband32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done>2276     fn xband32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2277         let a = self.state[src1].get_i32();
2278         self.state[dst].set_i32(a & src2);
2279         ControlFlow::Continue(())
2280     }
2281 
xband64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2282     fn xband64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2283         let a = self.state[operands.src1].get_u64();
2284         let b = self.state[operands.src2].get_u64();
2285         self.state[operands.dst].set_u64(a & b);
2286         ControlFlow::Continue(())
2287     }
2288 
xband64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done>2289     fn xband64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2290         self.xband64_s32(dst, src1, src2.into())
2291     }
2292 
xband64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done>2293     fn xband64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2294         let a = self.state[src1].get_i64();
2295         self.state[dst].set_i64(a & i64::from(src2));
2296         ControlFlow::Continue(())
2297     }
2298 
xbor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2299     fn xbor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2300         let a = self.state[operands.src1].get_u32();
2301         let b = self.state[operands.src2].get_u32();
2302         self.state[operands.dst].set_u32(a | b);
2303         ControlFlow::Continue(())
2304     }
2305 
xbor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done>2306     fn xbor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2307         self.xbor32_s32(dst, src1, src2.into())
2308     }
2309 
xbor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done>2310     fn xbor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2311         let a = self.state[src1].get_i32();
2312         self.state[dst].set_i32(a | src2);
2313         ControlFlow::Continue(())
2314     }
2315 
xbor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2316     fn xbor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2317         let a = self.state[operands.src1].get_u64();
2318         let b = self.state[operands.src2].get_u64();
2319         self.state[operands.dst].set_u64(a | b);
2320         ControlFlow::Continue(())
2321     }
2322 
xbor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done>2323     fn xbor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2324         self.xbor64_s32(dst, src1, src2.into())
2325     }
2326 
xbor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done>2327     fn xbor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2328         let a = self.state[src1].get_i64();
2329         self.state[dst].set_i64(a | i64::from(src2));
2330         ControlFlow::Continue(())
2331     }
2332 
xbxor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2333     fn xbxor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2334         let a = self.state[operands.src1].get_u32();
2335         let b = self.state[operands.src2].get_u32();
2336         self.state[operands.dst].set_u32(a ^ b);
2337         ControlFlow::Continue(())
2338     }
2339 
xbxor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done>2340     fn xbxor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2341         self.xbxor32_s32(dst, src1, src2.into())
2342     }
2343 
xbxor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done>2344     fn xbxor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2345         let a = self.state[src1].get_i32();
2346         self.state[dst].set_i32(a ^ src2);
2347         ControlFlow::Continue(())
2348     }
2349 
xbxor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2350     fn xbxor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2351         let a = self.state[operands.src1].get_u64();
2352         let b = self.state[operands.src2].get_u64();
2353         self.state[operands.dst].set_u64(a ^ b);
2354         ControlFlow::Continue(())
2355     }
2356 
xbxor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done>2357     fn xbxor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2358         self.xbxor64_s32(dst, src1, src2.into())
2359     }
2360 
xbxor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done>2361     fn xbxor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2362         let a = self.state[src1].get_i64();
2363         self.state[dst].set_i64(a ^ i64::from(src2));
2364         ControlFlow::Continue(())
2365     }
2366 
xbnot32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2367     fn xbnot32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2368         let a = self.state[src].get_u32();
2369         self.state[dst].set_u32(!a);
2370         ControlFlow::Continue(())
2371     }
2372 
xbnot64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2373     fn xbnot64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2374         let a = self.state[src].get_u64();
2375         self.state[dst].set_u64(!a);
2376         ControlFlow::Continue(())
2377     }
2378 
xmin32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2379     fn xmin32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2380         let a = self.state[operands.src1].get_u32();
2381         let b = self.state[operands.src2].get_u32();
2382         self.state[operands.dst].set_u32(a.min(b));
2383         ControlFlow::Continue(())
2384     }
2385 
xmin32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2386     fn xmin32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2387         let a = self.state[operands.src1].get_i32();
2388         let b = self.state[operands.src2].get_i32();
2389         self.state[operands.dst].set_i32(a.min(b));
2390         ControlFlow::Continue(())
2391     }
2392 
xmax32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2393     fn xmax32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2394         let a = self.state[operands.src1].get_u32();
2395         let b = self.state[operands.src2].get_u32();
2396         self.state[operands.dst].set_u32(a.max(b));
2397         ControlFlow::Continue(())
2398     }
2399 
xmax32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2400     fn xmax32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2401         let a = self.state[operands.src1].get_i32();
2402         let b = self.state[operands.src2].get_i32();
2403         self.state[operands.dst].set_i32(a.max(b));
2404         ControlFlow::Continue(())
2405     }
2406 
xmin64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2407     fn xmin64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2408         let a = self.state[operands.src1].get_u64();
2409         let b = self.state[operands.src2].get_u64();
2410         self.state[operands.dst].set_u64(a.min(b));
2411         ControlFlow::Continue(())
2412     }
2413 
xmin64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2414     fn xmin64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2415         let a = self.state[operands.src1].get_i64();
2416         let b = self.state[operands.src2].get_i64();
2417         self.state[operands.dst].set_i64(a.min(b));
2418         ControlFlow::Continue(())
2419     }
2420 
xmax64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2421     fn xmax64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2422         let a = self.state[operands.src1].get_u64();
2423         let b = self.state[operands.src2].get_u64();
2424         self.state[operands.dst].set_u64(a.max(b));
2425         ControlFlow::Continue(())
2426     }
2427 
xmax64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2428     fn xmax64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2429         let a = self.state[operands.src1].get_i64();
2430         let b = self.state[operands.src2].get_i64();
2431         self.state[operands.dst].set_i64(a.max(b));
2432         ControlFlow::Continue(())
2433     }
2434 
xctz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2435     fn xctz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2436         let a = self.state[src].get_u32();
2437         self.state[dst].set_u32(a.trailing_zeros());
2438         ControlFlow::Continue(())
2439     }
2440 
xctz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2441     fn xctz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2442         let a = self.state[src].get_u64();
2443         self.state[dst].set_u64(a.trailing_zeros().into());
2444         ControlFlow::Continue(())
2445     }
2446 
xclz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2447     fn xclz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2448         let a = self.state[src].get_u32();
2449         self.state[dst].set_u32(a.leading_zeros());
2450         ControlFlow::Continue(())
2451     }
2452 
xclz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2453     fn xclz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2454         let a = self.state[src].get_u64();
2455         self.state[dst].set_u64(a.leading_zeros().into());
2456         ControlFlow::Continue(())
2457     }
2458 
xpopcnt32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2459     fn xpopcnt32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2460         let a = self.state[src].get_u32();
2461         self.state[dst].set_u32(a.count_ones());
2462         ControlFlow::Continue(())
2463     }
2464 
xpopcnt64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2465     fn xpopcnt64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2466         let a = self.state[src].get_u64();
2467         self.state[dst].set_u64(a.count_ones().into());
2468         ControlFlow::Continue(())
2469     }
2470 
xrotl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2471     fn xrotl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2472         let a = self.state[operands.src1].get_u32();
2473         let b = self.state[operands.src2].get_u32();
2474         self.state[operands.dst].set_u32(a.rotate_left(b));
2475         ControlFlow::Continue(())
2476     }
2477 
xrotl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2478     fn xrotl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2479         let a = self.state[operands.src1].get_u64();
2480         let b = self.state[operands.src2].get_u32();
2481         self.state[operands.dst].set_u64(a.rotate_left(b));
2482         ControlFlow::Continue(())
2483     }
2484 
xrotr32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2485     fn xrotr32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2486         let a = self.state[operands.src1].get_u32();
2487         let b = self.state[operands.src2].get_u32();
2488         self.state[operands.dst].set_u32(a.rotate_right(b));
2489         ControlFlow::Continue(())
2490     }
2491 
xrotr64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2492     fn xrotr64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2493         let a = self.state[operands.src1].get_u64();
2494         let b = self.state[operands.src2].get_u32();
2495         self.state[operands.dst].set_u64(a.rotate_right(b));
2496         ControlFlow::Continue(())
2497     }
2498 
xselect32( &mut self, dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg, ) -> ControlFlow<Done>2499     fn xselect32(
2500         &mut self,
2501         dst: XReg,
2502         cond: XReg,
2503         if_nonzero: XReg,
2504         if_zero: XReg,
2505     ) -> ControlFlow<Done> {
2506         let result = if self.state[cond].get_u32() != 0 {
2507             self.state[if_nonzero].get_u32()
2508         } else {
2509             self.state[if_zero].get_u32()
2510         };
2511         self.state[dst].set_u32(result);
2512         ControlFlow::Continue(())
2513     }
2514 
xselect64( &mut self, dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg, ) -> ControlFlow<Done>2515     fn xselect64(
2516         &mut self,
2517         dst: XReg,
2518         cond: XReg,
2519         if_nonzero: XReg,
2520         if_zero: XReg,
2521     ) -> ControlFlow<Done> {
2522         let result = if self.state[cond].get_u32() != 0 {
2523             self.state[if_nonzero].get_u64()
2524         } else {
2525             self.state[if_zero].get_u64()
2526         };
2527         self.state[dst].set_u64(result);
2528         ControlFlow::Continue(())
2529     }
2530 
xabs32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2531     fn xabs32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2532         let a = self.state[src].get_i32();
2533         self.state[dst].set_i32(a.wrapping_abs());
2534         ControlFlow::Continue(())
2535     }
2536 
xabs64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2537     fn xabs64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2538         let a = self.state[src].get_i64();
2539         self.state[dst].set_i64(a.wrapping_abs());
2540         ControlFlow::Continue(())
2541     }
2542 
2543     // =========================================================================
2544     // o32 addressing modes
2545 
xload8_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done>2546     fn xload8_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2547         let result = unsafe { self.load_ne::<u8, crate::XLoad8U32O32>(addr)? };
2548         self.state[dst].set_u32(result.into());
2549         ControlFlow::Continue(())
2550     }
2551 
xload8_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done>2552     fn xload8_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2553         let result = unsafe { self.load_ne::<i8, crate::XLoad8S32O32>(addr)? };
2554         self.state[dst].set_i32(result.into());
2555         ControlFlow::Continue(())
2556     }
2557 
xload16le_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done>2558     fn xload16le_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2559         let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32O32>(addr)? };
2560         self.state[dst].set_u32(u16::from_le(result).into());
2561         ControlFlow::Continue(())
2562     }
2563 
xload16le_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done>2564     fn xload16le_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2565         let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32O32>(addr)? };
2566         self.state[dst].set_i32(i16::from_le(result).into());
2567         ControlFlow::Continue(())
2568     }
2569 
xload32le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done>2570     fn xload32le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2571         let result = unsafe { self.load_ne::<i32, crate::XLoad32LeO32>(addr)? };
2572         self.state[dst].set_i32(i32::from_le(result));
2573         ControlFlow::Continue(())
2574     }
2575 
xload64le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done>2576     fn xload64le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2577         let result = unsafe { self.load_ne::<i64, crate::XLoad64LeO32>(addr)? };
2578         self.state[dst].set_i64(i64::from_le(result));
2579         ControlFlow::Continue(())
2580     }
2581 
xstore8_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done>2582     fn xstore8_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2583         let val = self.state[val].get_u32() as u8;
2584         unsafe {
2585             self.store_ne::<u8, crate::XStore8O32>(addr, val)?;
2586         }
2587         ControlFlow::Continue(())
2588     }
2589 
xstore16le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done>2590     fn xstore16le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2591         let val = self.state[val].get_u32() as u16;
2592         unsafe {
2593             self.store_ne::<u16, crate::XStore16LeO32>(addr, val.to_le())?;
2594         }
2595         ControlFlow::Continue(())
2596     }
2597 
xstore32le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done>2598     fn xstore32le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2599         let val = self.state[val].get_u32();
2600         unsafe {
2601             self.store_ne::<u32, crate::XStore32LeO32>(addr, val.to_le())?;
2602         }
2603         ControlFlow::Continue(())
2604     }
2605 
xstore64le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done>2606     fn xstore64le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2607         let val = self.state[val].get_u64();
2608         unsafe {
2609             self.store_ne::<u64, crate::XStore64LeO32>(addr, val.to_le())?;
2610         }
2611         ControlFlow::Continue(())
2612     }
2613 
2614     // =========================================================================
2615     // g32 addressing modes
2616 
xload8_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done>2617     fn xload8_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2618         let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32>(addr)? };
2619         self.state[dst].set_u32(result.into());
2620         ControlFlow::Continue(())
2621     }
2622 
xload8_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done>2623     fn xload8_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2624         let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32>(addr)? };
2625         self.state[dst].set_i32(result.into());
2626         ControlFlow::Continue(())
2627     }
2628 
xload16le_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done>2629     fn xload16le_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2630         let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32>(addr)? };
2631         self.state[dst].set_u32(u16::from_le(result).into());
2632         ControlFlow::Continue(())
2633     }
2634 
xload16le_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done>2635     fn xload16le_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2636         let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32>(addr)? };
2637         self.state[dst].set_i32(i16::from_le(result).into());
2638         ControlFlow::Continue(())
2639     }
2640 
xload32le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done>2641     fn xload32le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2642         let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32>(addr)? };
2643         self.state[dst].set_i32(i32::from_le(result));
2644         ControlFlow::Continue(())
2645     }
2646 
xload64le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done>2647     fn xload64le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2648         let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32>(addr)? };
2649         self.state[dst].set_i64(i64::from_le(result));
2650         ControlFlow::Continue(())
2651     }
2652 
xstore8_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done>2653     fn xstore8_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2654         let val = self.state[val].get_u32() as u8;
2655         unsafe {
2656             self.store_ne::<u8, crate::XStore8G32>(addr, val)?;
2657         }
2658         ControlFlow::Continue(())
2659     }
2660 
xstore16le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done>2661     fn xstore16le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2662         let val = self.state[val].get_u32() as u16;
2663         unsafe {
2664             self.store_ne::<u16, crate::XStore16LeG32>(addr, val.to_le())?;
2665         }
2666         ControlFlow::Continue(())
2667     }
2668 
xstore32le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done>2669     fn xstore32le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2670         let val = self.state[val].get_u32();
2671         unsafe {
2672             self.store_ne::<u32, crate::XStore32LeG32>(addr, val.to_le())?;
2673         }
2674         ControlFlow::Continue(())
2675     }
2676 
xstore64le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done>2677     fn xstore64le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2678         let val = self.state[val].get_u64();
2679         unsafe {
2680             self.store_ne::<u64, crate::XStore64LeG32>(addr, val.to_le())?;
2681         }
2682         ControlFlow::Continue(())
2683     }
2684 
2685     // =========================================================================
2686     // z addressing modes
2687 
xload8_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done>2688     fn xload8_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2689         let result = unsafe { self.load_ne::<u8, crate::XLoad8U32Z>(addr)? };
2690         self.state[dst].set_u32(result.into());
2691         ControlFlow::Continue(())
2692     }
2693 
xload8_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done>2694     fn xload8_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2695         let result = unsafe { self.load_ne::<i8, crate::XLoad8S32Z>(addr)? };
2696         self.state[dst].set_i32(result.into());
2697         ControlFlow::Continue(())
2698     }
2699 
xload16le_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done>2700     fn xload16le_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2701         let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32Z>(addr)? };
2702         self.state[dst].set_u32(u16::from_le(result).into());
2703         ControlFlow::Continue(())
2704     }
2705 
xload16le_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done>2706     fn xload16le_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2707         let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32Z>(addr)? };
2708         self.state[dst].set_i32(i16::from_le(result).into());
2709         ControlFlow::Continue(())
2710     }
2711 
xload32le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done>2712     fn xload32le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2713         let result = unsafe { self.load_ne::<i32, crate::XLoad32LeZ>(addr)? };
2714         self.state[dst].set_i32(i32::from_le(result));
2715         ControlFlow::Continue(())
2716     }
2717 
xload64le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done>2718     fn xload64le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2719         let result = unsafe { self.load_ne::<i64, crate::XLoad64LeZ>(addr)? };
2720         self.state[dst].set_i64(i64::from_le(result));
2721         ControlFlow::Continue(())
2722     }
2723 
xstore8_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done>2724     fn xstore8_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2725         let val = self.state[val].get_u32() as u8;
2726         unsafe {
2727             self.store_ne::<u8, crate::XStore8Z>(addr, val)?;
2728         }
2729         ControlFlow::Continue(())
2730     }
2731 
xstore16le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done>2732     fn xstore16le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2733         let val = self.state[val].get_u32() as u16;
2734         unsafe {
2735             self.store_ne::<u16, crate::XStore16LeZ>(addr, val.to_le())?;
2736         }
2737         ControlFlow::Continue(())
2738     }
2739 
xstore32le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done>2740     fn xstore32le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2741         let val = self.state[val].get_u32();
2742         unsafe {
2743             self.store_ne::<u32, crate::XStore32LeZ>(addr, val.to_le())?;
2744         }
2745         ControlFlow::Continue(())
2746     }
2747 
xstore64le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done>2748     fn xstore64le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2749         let val = self.state[val].get_u64();
2750         unsafe {
2751             self.store_ne::<u64, crate::XStore64LeZ>(addr, val.to_le())?;
2752         }
2753         ControlFlow::Continue(())
2754     }
2755 
2756     // =========================================================================
2757     // g32bne addressing modes
2758 
xload8_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done>2759     fn xload8_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2760         let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32Bne>(addr)? };
2761         self.state[dst].set_u32(result.into());
2762         ControlFlow::Continue(())
2763     }
2764 
xload8_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done>2765     fn xload8_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2766         let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32Bne>(addr)? };
2767         self.state[dst].set_i32(result.into());
2768         ControlFlow::Continue(())
2769     }
2770 
xload16le_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done>2771     fn xload16le_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2772         let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32Bne>(addr)? };
2773         self.state[dst].set_u32(u16::from_le(result).into());
2774         ControlFlow::Continue(())
2775     }
2776 
xload16le_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done>2777     fn xload16le_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2778         let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32Bne>(addr)? };
2779         self.state[dst].set_i32(i16::from_le(result).into());
2780         ControlFlow::Continue(())
2781     }
2782 
xload32le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done>2783     fn xload32le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2784         let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32Bne>(addr)? };
2785         self.state[dst].set_i32(i32::from_le(result));
2786         ControlFlow::Continue(())
2787     }
2788 
xload64le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done>2789     fn xload64le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2790         let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32Bne>(addr)? };
2791         self.state[dst].set_i64(i64::from_le(result));
2792         ControlFlow::Continue(())
2793     }
2794 
xstore8_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done>2795     fn xstore8_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2796         let val = self.state[val].get_u32() as u8;
2797         unsafe {
2798             self.store_ne::<u8, crate::XStore8G32Bne>(addr, val)?;
2799         }
2800         ControlFlow::Continue(())
2801     }
2802 
xstore16le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done>2803     fn xstore16le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2804         let val = self.state[val].get_u32() as u16;
2805         unsafe {
2806             self.store_ne::<u16, crate::XStore16LeG32Bne>(addr, val.to_le())?;
2807         }
2808         ControlFlow::Continue(())
2809     }
2810 
xstore32le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done>2811     fn xstore32le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2812         let val = self.state[val].get_u32();
2813         unsafe {
2814             self.store_ne::<u32, crate::XStore32LeG32Bne>(addr, val.to_le())?;
2815         }
2816         ControlFlow::Continue(())
2817     }
2818 
xstore64le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done>2819     fn xstore64le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2820         let val = self.state[val].get_u64();
2821         unsafe {
2822             self.store_ne::<u64, crate::XStore64LeG32Bne>(addr, val.to_le())?;
2823         }
2824         ControlFlow::Continue(())
2825     }
2826 }
2827 
2828 impl ExtendedOpVisitor for Interpreter<'_> {
trap(&mut self) -> ControlFlow<Done>2829     fn trap(&mut self) -> ControlFlow<Done> {
2830         self.done_trap::<crate::Trap>()
2831     }
2832 
call_indirect_host(&mut self, id: u8) -> ControlFlow<Done>2833     fn call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
2834         self.done_call_indirect_host(id)
2835     }
2836 
xpcadd(&mut self, dst: XReg, offset: PcRelOffset) -> ControlFlow<Done>2837     fn xpcadd(&mut self, dst: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
2838         let pc = self.pc_rel::<crate::Xpcadd>(offset);
2839         self.state[dst].set_ptr(pc.as_ptr());
2840         ControlFlow::Continue(())
2841     }
2842 
bswap32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2843     fn bswap32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2844         let src = self.state[src].get_u32();
2845         self.state[dst].set_u32(src.swap_bytes());
2846         ControlFlow::Continue(())
2847     }
2848 
bswap64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done>2849     fn bswap64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2850         let src = self.state[src].get_u64();
2851         self.state[dst].set_u64(src.swap_bytes());
2852         ControlFlow::Continue(())
2853     }
2854 
xbmask32(&mut self, dst: XReg, src: XReg) -> Self::Return2855     fn xbmask32(&mut self, dst: XReg, src: XReg) -> Self::Return {
2856         let a = self.state[src].get_u32();
2857         if a == 0 {
2858             self.state[dst].set_u32(0);
2859         } else {
2860             self.state[dst].set_i32(-1);
2861         }
2862         ControlFlow::Continue(())
2863     }
2864 
xbmask64(&mut self, dst: XReg, src: XReg) -> Self::Return2865     fn xbmask64(&mut self, dst: XReg, src: XReg) -> Self::Return {
2866         let a = self.state[src].get_u64();
2867         if a == 0 {
2868             self.state[dst].set_u64(0);
2869         } else {
2870             self.state[dst].set_i64(-1);
2871         }
2872         ControlFlow::Continue(())
2873     }
2874 
xadd32_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2875     fn xadd32_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2876         let a = self.state[operands.src1].get_u32();
2877         let b = self.state[operands.src2].get_u32();
2878         match a.checked_add(b) {
2879             Some(c) => {
2880                 self.state[operands.dst].set_u32(c);
2881                 ControlFlow::Continue(())
2882             }
2883             None => self.done_trap::<crate::Xadd32UoverflowTrap>(),
2884         }
2885     }
2886 
xadd64_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2887     fn xadd64_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2888         let a = self.state[operands.src1].get_u64();
2889         let b = self.state[operands.src2].get_u64();
2890         match a.checked_add(b) {
2891             Some(c) => {
2892                 self.state[operands.dst].set_u64(c);
2893                 ControlFlow::Continue(())
2894             }
2895             None => self.done_trap::<crate::Xadd64UoverflowTrap>(),
2896         }
2897     }
2898 
xmulhi64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2899     fn xmulhi64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2900         let a = self.state[operands.src1].get_i64();
2901         let b = self.state[operands.src2].get_i64();
2902         let result = ((i128::from(a) * i128::from(b)) >> 64) as i64;
2903         self.state[operands.dst].set_i64(result);
2904         ControlFlow::Continue(())
2905     }
2906 
xmulhi64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done>2907     fn xmulhi64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2908         let a = self.state[operands.src1].get_u64();
2909         let b = self.state[operands.src2].get_u64();
2910         let result = ((u128::from(a) * u128::from(b)) >> 64) as u64;
2911         self.state[operands.dst].set_u64(result);
2912         ControlFlow::Continue(())
2913     }
2914 
2915     // =========================================================================
2916     // o32 addressing modes for big-endian X-registers
2917 
xload16be_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done>2918     fn xload16be_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2919         let result = unsafe { self.load_ne::<u16, crate::XLoad16BeU32O32>(addr)? };
2920         self.state[dst].set_u32(u16::from_be(result).into());
2921         ControlFlow::Continue(())
2922     }
2923 
xload16be_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done>2924     fn xload16be_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2925         let result = unsafe { self.load_ne::<i16, crate::XLoad16BeS32O32>(addr)? };
2926         self.state[dst].set_i32(i16::from_be(result).into());
2927         ControlFlow::Continue(())
2928     }
2929 
xload32be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done>2930     fn xload32be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2931         let result = unsafe { self.load_ne::<i32, crate::XLoad32BeO32>(addr)? };
2932         self.state[dst].set_i32(i32::from_be(result));
2933         ControlFlow::Continue(())
2934     }
2935 
xload64be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done>2936     fn xload64be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2937         let result = unsafe { self.load_ne::<i64, crate::XLoad64BeO32>(addr)? };
2938         self.state[dst].set_i64(i64::from_be(result));
2939         ControlFlow::Continue(())
2940     }
2941 
xstore16be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done>2942     fn xstore16be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2943         let val = self.state[val].get_u32() as u16;
2944         unsafe {
2945             self.store_ne::<u16, crate::XStore16BeO32>(addr, val.to_be())?;
2946         }
2947         ControlFlow::Continue(())
2948     }
2949 
xstore32be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done>2950     fn xstore32be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2951         let val = self.state[val].get_u32();
2952         unsafe {
2953             self.store_ne::<u32, crate::XStore32BeO32>(addr, val.to_be())?;
2954         }
2955         ControlFlow::Continue(())
2956     }
2957 
xstore64be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done>2958     fn xstore64be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2959         let val = self.state[val].get_u64();
2960         unsafe {
2961             self.store_ne::<u64, crate::XStore64BeO32>(addr, val.to_be())?;
2962         }
2963         ControlFlow::Continue(())
2964     }
2965 
2966     // =========================================================================
2967     // o32 addressing modes for little-endian F-registers
2968 
fload32le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done>2969     fn fload32le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2970         let val = unsafe { self.load_ne::<u32, crate::Fload32LeO32>(addr)? };
2971         self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
2972         ControlFlow::Continue(())
2973     }
2974 
fload64le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done>2975     fn fload64le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2976         let val = unsafe { self.load_ne::<u64, crate::Fload64LeO32>(addr)? };
2977         self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
2978         ControlFlow::Continue(())
2979     }
2980 
fstore32le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done>2981     fn fstore32le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2982         let val = self.state[src].get_f32();
2983         unsafe {
2984             self.store_ne::<u32, crate::Fstore32LeO32>(addr, val.to_bits().to_le())?;
2985         }
2986         ControlFlow::Continue(())
2987     }
2988 
fstore64le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done>2989     fn fstore64le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2990         let val = self.state[src].get_f64();
2991         unsafe {
2992             self.store_ne::<u64, crate::Fstore64LeO32>(addr, val.to_bits().to_le())?;
2993         }
2994         ControlFlow::Continue(())
2995     }
2996 
2997     // =========================================================================
2998     // o32 addressing modes for big-endian F-registers
2999 
fload32be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done>3000     fn fload32be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
3001         let val = unsafe { self.load_ne::<u32, crate::Fload32BeO32>(addr)? };
3002         self.state[dst].set_f32(f32::from_bits(u32::from_be(val)));
3003         ControlFlow::Continue(())
3004     }
3005 
fload64be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done>3006     fn fload64be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
3007         let val = unsafe { self.load_ne::<u64, crate::Fload64BeO32>(addr)? };
3008         self.state[dst].set_f64(f64::from_bits(u64::from_be(val)));
3009         ControlFlow::Continue(())
3010     }
3011 
fstore32be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done>3012     fn fstore32be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3013         let val = self.state[src].get_f32();
3014         unsafe {
3015             self.store_ne::<u32, crate::Fstore32BeO32>(addr, val.to_bits().to_be())?;
3016         }
3017         ControlFlow::Continue(())
3018     }
3019 
fstore64be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done>3020     fn fstore64be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3021         let val = self.state[src].get_f64();
3022         unsafe {
3023             self.store_ne::<u64, crate::Fstore64BeO32>(addr, val.to_bits().to_be())?;
3024         }
3025         ControlFlow::Continue(())
3026     }
3027 
3028     // =========================================================================
3029     // z addressing modes for little-endian F-registers
3030 
fload32le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done>3031     fn fload32le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3032         let val = unsafe { self.load_ne::<u32, crate::Fload32LeZ>(addr)? };
3033         self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3034         ControlFlow::Continue(())
3035     }
3036 
fload64le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done>3037     fn fload64le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3038         let val = unsafe { self.load_ne::<u64, crate::Fload64LeZ>(addr)? };
3039         self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3040         ControlFlow::Continue(())
3041     }
3042 
fstore32le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done>3043     fn fstore32le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3044         let val = self.state[src].get_f32();
3045         unsafe {
3046             self.store_ne::<u32, crate::Fstore32LeZ>(addr, val.to_bits().to_le())?;
3047         }
3048         ControlFlow::Continue(())
3049     }
3050 
fstore64le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done>3051     fn fstore64le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3052         let val = self.state[src].get_f64();
3053         unsafe {
3054             self.store_ne::<u64, crate::Fstore64LeZ>(addr, val.to_bits().to_le())?;
3055         }
3056         ControlFlow::Continue(())
3057     }
3058 
3059     // =========================================================================
3060     // g32 addressing modes for little-endian F-registers
3061 
fload32le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done>3062     fn fload32le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3063         let val = unsafe { self.load_ne::<u32, crate::Fload32LeG32>(addr)? };
3064         self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3065         ControlFlow::Continue(())
3066     }
3067 
fload64le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done>3068     fn fload64le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3069         let val = unsafe { self.load_ne::<u64, crate::Fload64LeG32>(addr)? };
3070         self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3071         ControlFlow::Continue(())
3072     }
3073 
fstore32le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done>3074     fn fstore32le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3075         let val = self.state[src].get_f32();
3076         unsafe {
3077             self.store_ne::<u32, crate::Fstore32LeG32>(addr, val.to_bits().to_le())?;
3078         }
3079         ControlFlow::Continue(())
3080     }
3081 
fstore64le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done>3082     fn fstore64le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3083         let val = self.state[src].get_f64();
3084         unsafe {
3085             self.store_ne::<u64, crate::Fstore64LeG32>(addr, val.to_bits().to_le())?;
3086         }
3087         ControlFlow::Continue(())
3088     }
3089 
3090     // =========================================================================
3091     // o32 addressing modes for little-endian V-registers
3092 
3093     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vload128le_o32(&mut self, dst: VReg, addr: AddrO32) -> ControlFlow<Done>3094     fn vload128le_o32(&mut self, dst: VReg, addr: AddrO32) -> ControlFlow<Done> {
3095         let val = unsafe { self.load_ne::<u128, crate::VLoad128O32>(addr)? };
3096         self.state[dst].set_u128(u128::from_le(val));
3097         ControlFlow::Continue(())
3098     }
3099 
3100     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vstore128le_o32(&mut self, addr: AddrO32, src: VReg) -> ControlFlow<Done>3101     fn vstore128le_o32(&mut self, addr: AddrO32, src: VReg) -> ControlFlow<Done> {
3102         let val = self.state[src].get_u128();
3103         unsafe {
3104             self.store_ne::<u128, crate::Vstore128LeO32>(addr, val.to_le())?;
3105         }
3106         ControlFlow::Continue(())
3107     }
3108 
3109     // =========================================================================
3110     // z addressing modes for little-endian V-registers
3111 
3112     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vload128le_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done>3113     fn vload128le_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
3114         let val = unsafe { self.load_ne::<u128, crate::VLoad128Z>(addr)? };
3115         self.state[dst].set_u128(u128::from_le(val));
3116         ControlFlow::Continue(())
3117     }
3118 
3119     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vstore128le_z(&mut self, addr: AddrZ, src: VReg) -> ControlFlow<Done>3120     fn vstore128le_z(&mut self, addr: AddrZ, src: VReg) -> ControlFlow<Done> {
3121         let val = self.state[src].get_u128();
3122         unsafe {
3123             self.store_ne::<u128, crate::Vstore128LeZ>(addr, val.to_le())?;
3124         }
3125         ControlFlow::Continue(())
3126     }
3127 
3128     // =========================================================================
3129     // g32 addressing modes for little-endian V-registers
3130 
3131     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vload128le_g32(&mut self, dst: VReg, addr: AddrG32) -> ControlFlow<Done>3132     fn vload128le_g32(&mut self, dst: VReg, addr: AddrG32) -> ControlFlow<Done> {
3133         let val = unsafe { self.load_ne::<u128, crate::VLoad128G32>(addr)? };
3134         self.state[dst].set_u128(u128::from_le(val));
3135         ControlFlow::Continue(())
3136     }
3137 
3138     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vstore128le_g32(&mut self, addr: AddrG32, src: VReg) -> ControlFlow<Done>3139     fn vstore128le_g32(&mut self, addr: AddrG32, src: VReg) -> ControlFlow<Done> {
3140         let val = self.state[src].get_u128();
3141         unsafe {
3142             self.store_ne::<u128, crate::Vstore128LeG32>(addr, val.to_le())?;
3143         }
3144         ControlFlow::Continue(())
3145     }
3146 
xmov_fp(&mut self, dst: XReg) -> ControlFlow<Done>3147     fn xmov_fp(&mut self, dst: XReg) -> ControlFlow<Done> {
3148         let fp = self.state.fp;
3149         self.state[dst].set_ptr(fp);
3150         ControlFlow::Continue(())
3151     }
3152 
xmov_lr(&mut self, dst: XReg) -> ControlFlow<Done>3153     fn xmov_lr(&mut self, dst: XReg) -> ControlFlow<Done> {
3154         let lr = self.state.lr;
3155         self.state[dst].set_ptr(lr);
3156         ControlFlow::Continue(())
3157     }
3158 
fmov(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3159     fn fmov(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3160         let val = self.state[src];
3161         self.state[dst] = val;
3162         ControlFlow::Continue(())
3163     }
3164 
3165     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmov(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>3166     fn vmov(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3167         let val = self.state[src];
3168         self.state[dst] = val;
3169         ControlFlow::Continue(())
3170     }
3171 
fconst32(&mut self, dst: FReg, bits: u32) -> ControlFlow<Done>3172     fn fconst32(&mut self, dst: FReg, bits: u32) -> ControlFlow<Done> {
3173         self.state[dst].set_f32(f32::from_bits(bits));
3174         ControlFlow::Continue(())
3175     }
3176 
fconst64(&mut self, dst: FReg, bits: u64) -> ControlFlow<Done>3177     fn fconst64(&mut self, dst: FReg, bits: u64) -> ControlFlow<Done> {
3178         self.state[dst].set_f64(f64::from_bits(bits));
3179         ControlFlow::Continue(())
3180     }
3181 
bitcast_int_from_float_32(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3182     fn bitcast_int_from_float_32(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3183         let val = self.state[src].get_f32();
3184         self.state[dst].set_u32(val.to_bits());
3185         ControlFlow::Continue(())
3186     }
3187 
bitcast_int_from_float_64(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3188     fn bitcast_int_from_float_64(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3189         let val = self.state[src].get_f64();
3190         self.state[dst].set_u64(val.to_bits());
3191         ControlFlow::Continue(())
3192     }
3193 
bitcast_float_from_int_32(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done>3194     fn bitcast_float_from_int_32(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3195         let val = self.state[src].get_u32();
3196         self.state[dst].set_f32(f32::from_bits(val));
3197         ControlFlow::Continue(())
3198     }
3199 
bitcast_float_from_int_64(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done>3200     fn bitcast_float_from_int_64(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3201         let val = self.state[src].get_u64();
3202         self.state[dst].set_f64(f64::from_bits(val));
3203         ControlFlow::Continue(())
3204     }
3205 
feq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done>3206     fn feq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3207         let a = self.state[src1].get_f32();
3208         let b = self.state[src2].get_f32();
3209         self.state[dst].set_u32(u32::from(a == b));
3210         ControlFlow::Continue(())
3211     }
3212 
fneq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done>3213     fn fneq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3214         let a = self.state[src1].get_f32();
3215         let b = self.state[src2].get_f32();
3216         self.state[dst].set_u32(u32::from(a != b));
3217         ControlFlow::Continue(())
3218     }
3219 
flt32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done>3220     fn flt32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3221         let a = self.state[src1].get_f32();
3222         let b = self.state[src2].get_f32();
3223         self.state[dst].set_u32(u32::from(a < b));
3224         ControlFlow::Continue(())
3225     }
3226 
flteq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done>3227     fn flteq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3228         let a = self.state[src1].get_f32();
3229         let b = self.state[src2].get_f32();
3230         self.state[dst].set_u32(u32::from(a <= b));
3231         ControlFlow::Continue(())
3232     }
3233 
feq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done>3234     fn feq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3235         let a = self.state[src1].get_f64();
3236         let b = self.state[src2].get_f64();
3237         self.state[dst].set_u32(u32::from(a == b));
3238         ControlFlow::Continue(())
3239     }
3240 
fneq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done>3241     fn fneq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3242         let a = self.state[src1].get_f64();
3243         let b = self.state[src2].get_f64();
3244         self.state[dst].set_u32(u32::from(a != b));
3245         ControlFlow::Continue(())
3246     }
3247 
flt64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done>3248     fn flt64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3249         let a = self.state[src1].get_f64();
3250         let b = self.state[src2].get_f64();
3251         self.state[dst].set_u32(u32::from(a < b));
3252         ControlFlow::Continue(())
3253     }
3254 
flteq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done>3255     fn flteq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3256         let a = self.state[src1].get_f64();
3257         let b = self.state[src2].get_f64();
3258         self.state[dst].set_u32(u32::from(a <= b));
3259         ControlFlow::Continue(())
3260     }
3261 
fselect32( &mut self, dst: FReg, cond: XReg, if_nonzero: FReg, if_zero: FReg, ) -> ControlFlow<Done>3262     fn fselect32(
3263         &mut self,
3264         dst: FReg,
3265         cond: XReg,
3266         if_nonzero: FReg,
3267         if_zero: FReg,
3268     ) -> ControlFlow<Done> {
3269         let result = if self.state[cond].get_u32() != 0 {
3270             self.state[if_nonzero].get_f32()
3271         } else {
3272             self.state[if_zero].get_f32()
3273         };
3274         self.state[dst].set_f32(result);
3275         ControlFlow::Continue(())
3276     }
3277 
fselect64( &mut self, dst: FReg, cond: XReg, if_nonzero: FReg, if_zero: FReg, ) -> ControlFlow<Done>3278     fn fselect64(
3279         &mut self,
3280         dst: FReg,
3281         cond: XReg,
3282         if_nonzero: FReg,
3283         if_zero: FReg,
3284     ) -> ControlFlow<Done> {
3285         let result = if self.state[cond].get_u32() != 0 {
3286             self.state[if_nonzero].get_f64()
3287         } else {
3288             self.state[if_zero].get_f64()
3289         };
3290         self.state[dst].set_f64(result);
3291         ControlFlow::Continue(())
3292     }
3293 
f32_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done>3294     fn f32_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3295         let a = self.state[src].get_i32();
3296         self.state[dst].set_f32(a as f32);
3297         ControlFlow::Continue(())
3298     }
3299 
f32_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done>3300     fn f32_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3301         let a = self.state[src].get_u32();
3302         self.state[dst].set_f32(a as f32);
3303         ControlFlow::Continue(())
3304     }
3305 
f32_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done>3306     fn f32_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3307         let a = self.state[src].get_i64();
3308         self.state[dst].set_f32(a as f32);
3309         ControlFlow::Continue(())
3310     }
3311 
f32_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done>3312     fn f32_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3313         let a = self.state[src].get_u64();
3314         self.state[dst].set_f32(a as f32);
3315         ControlFlow::Continue(())
3316     }
3317 
f64_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done>3318     fn f64_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3319         let a = self.state[src].get_i32();
3320         self.state[dst].set_f64(a as f64);
3321         ControlFlow::Continue(())
3322     }
3323 
f64_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done>3324     fn f64_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3325         let a = self.state[src].get_u32();
3326         self.state[dst].set_f64(a as f64);
3327         ControlFlow::Continue(())
3328     }
3329 
f64_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done>3330     fn f64_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3331         let a = self.state[src].get_i64();
3332         self.state[dst].set_f64(a as f64);
3333         ControlFlow::Continue(())
3334     }
3335 
f64_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done>3336     fn f64_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3337         let a = self.state[src].get_u64();
3338         self.state[dst].set_f64(a as f64);
3339         ControlFlow::Continue(())
3340     }
3341 
x32_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3342     fn x32_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3343         let a = self.state[src].get_f32();
3344         self.check_xnn_from_f32::<crate::X32FromF32S>(a, f32_cvt_to_int_bounds(true, 32))?;
3345         self.state[dst].set_i32(a as i32);
3346         ControlFlow::Continue(())
3347     }
3348 
x32_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3349     fn x32_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3350         let a = self.state[src].get_f32();
3351         self.check_xnn_from_f32::<crate::X32FromF32U>(a, f32_cvt_to_int_bounds(false, 32))?;
3352         self.state[dst].set_u32(a as u32);
3353         ControlFlow::Continue(())
3354     }
3355 
x64_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3356     fn x64_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3357         let a = self.state[src].get_f32();
3358         self.check_xnn_from_f32::<crate::X64FromF32S>(a, f32_cvt_to_int_bounds(true, 64))?;
3359         self.state[dst].set_i64(a as i64);
3360         ControlFlow::Continue(())
3361     }
3362 
x64_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3363     fn x64_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3364         let a = self.state[src].get_f32();
3365         self.check_xnn_from_f32::<crate::X64FromF32U>(a, f32_cvt_to_int_bounds(false, 64))?;
3366         self.state[dst].set_u64(a as u64);
3367         ControlFlow::Continue(())
3368     }
3369 
x32_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3370     fn x32_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3371         let a = self.state[src].get_f64();
3372         self.check_xnn_from_f64::<crate::X32FromF64S>(a, f64_cvt_to_int_bounds(true, 32))?;
3373         self.state[dst].set_i32(a as i32);
3374         ControlFlow::Continue(())
3375     }
3376 
x32_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3377     fn x32_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3378         let a = self.state[src].get_f64();
3379         self.check_xnn_from_f64::<crate::X32FromF64U>(a, f64_cvt_to_int_bounds(false, 32))?;
3380         self.state[dst].set_u32(a as u32);
3381         ControlFlow::Continue(())
3382     }
3383 
x64_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3384     fn x64_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3385         let a = self.state[src].get_f64();
3386         self.check_xnn_from_f64::<crate::X64FromF64S>(a, f64_cvt_to_int_bounds(true, 64))?;
3387         self.state[dst].set_i64(a as i64);
3388         ControlFlow::Continue(())
3389     }
3390 
x64_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3391     fn x64_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3392         let a = self.state[src].get_f64();
3393         self.check_xnn_from_f64::<crate::X64FromF64U>(a, f64_cvt_to_int_bounds(false, 64))?;
3394         self.state[dst].set_u64(a as u64);
3395         ControlFlow::Continue(())
3396     }
3397 
x32_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3398     fn x32_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3399         let a = self.state[src].get_f32();
3400         self.state[dst].set_i32(a as i32);
3401         ControlFlow::Continue(())
3402     }
3403 
x32_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3404     fn x32_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3405         let a = self.state[src].get_f32();
3406         self.state[dst].set_u32(a as u32);
3407         ControlFlow::Continue(())
3408     }
3409 
x64_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3410     fn x64_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3411         let a = self.state[src].get_f32();
3412         self.state[dst].set_i64(a as i64);
3413         ControlFlow::Continue(())
3414     }
3415 
x64_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3416     fn x64_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3417         let a = self.state[src].get_f32();
3418         self.state[dst].set_u64(a as u64);
3419         ControlFlow::Continue(())
3420     }
3421 
x32_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3422     fn x32_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3423         let a = self.state[src].get_f64();
3424         self.state[dst].set_i32(a as i32);
3425         ControlFlow::Continue(())
3426     }
3427 
x32_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3428     fn x32_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3429         let a = self.state[src].get_f64();
3430         self.state[dst].set_u32(a as u32);
3431         ControlFlow::Continue(())
3432     }
3433 
x64_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3434     fn x64_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3435         let a = self.state[src].get_f64();
3436         self.state[dst].set_i64(a as i64);
3437         ControlFlow::Continue(())
3438     }
3439 
x64_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done>3440     fn x64_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3441         let a = self.state[src].get_f64();
3442         self.state[dst].set_u64(a as u64);
3443         ControlFlow::Continue(())
3444     }
3445 
f32_from_f64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3446     fn f32_from_f64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3447         let a = self.state[src].get_f64();
3448         self.state[dst].set_f32(a as f32);
3449         ControlFlow::Continue(())
3450     }
3451 
f64_from_f32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3452     fn f64_from_f32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3453         let a = self.state[src].get_f32();
3454         self.state[dst].set_f64(a.into());
3455         ControlFlow::Continue(())
3456     }
3457 
fcopysign32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done>3458     fn fcopysign32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3459         let a = self.state[operands.src1].get_f32();
3460         let b = self.state[operands.src2].get_f32();
3461         self.state[operands.dst].set_f32(a.wasm_copysign(b));
3462         ControlFlow::Continue(())
3463     }
3464 
fcopysign64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done>3465     fn fcopysign64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3466         let a = self.state[operands.src1].get_f64();
3467         let b = self.state[operands.src2].get_f64();
3468         self.state[operands.dst].set_f64(a.wasm_copysign(b));
3469         ControlFlow::Continue(())
3470     }
3471 
fadd32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done>3472     fn fadd32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3473         let a = self.state[operands.src1].get_f32();
3474         let b = self.state[operands.src2].get_f32();
3475         self.state[operands.dst].set_f32(a + b);
3476         ControlFlow::Continue(())
3477     }
3478 
fsub32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done>3479     fn fsub32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3480         let a = self.state[operands.src1].get_f32();
3481         let b = self.state[operands.src2].get_f32();
3482         self.state[operands.dst].set_f32(a - b);
3483         ControlFlow::Continue(())
3484     }
3485 
3486     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsubf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>3487     fn vsubf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3488         let mut a = self.state[operands.src1].get_f32x4();
3489         let b = self.state[operands.src2].get_f32x4();
3490         for (a, b) in a.iter_mut().zip(b) {
3491             *a = *a - b;
3492         }
3493         self.state[operands.dst].set_f32x4(a);
3494         ControlFlow::Continue(())
3495     }
3496 
fmul32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done>3497     fn fmul32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3498         let a = self.state[operands.src1].get_f32();
3499         let b = self.state[operands.src2].get_f32();
3500         self.state[operands.dst].set_f32(a * b);
3501         ControlFlow::Continue(())
3502     }
3503 
3504     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmulf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>3505     fn vmulf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3506         let mut a = self.state[operands.src1].get_f32x4();
3507         let b = self.state[operands.src2].get_f32x4();
3508         for (a, b) in a.iter_mut().zip(b) {
3509             *a = *a * b;
3510         }
3511         self.state[operands.dst].set_f32x4(a);
3512         ControlFlow::Continue(())
3513     }
3514 
fdiv32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done>3515     fn fdiv32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3516         let a = self.state[operands.src1].get_f32();
3517         let b = self.state[operands.src2].get_f32();
3518         self.state[operands.dst].set_f32(a / b);
3519         ControlFlow::Continue(())
3520     }
3521 
3522     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vdivf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>3523     fn vdivf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3524         let a = self.state[operands.src1].get_f32x4();
3525         let b = self.state[operands.src2].get_f32x4();
3526         let mut result = [0.0f32; 4];
3527 
3528         for i in 0..4 {
3529             result[i] = a[i] / b[i];
3530         }
3531 
3532         self.state[operands.dst].set_f32x4(result);
3533         ControlFlow::Continue(())
3534     }
3535 
3536     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vdivf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>3537     fn vdivf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3538         let a = self.state[operands.src1].get_f64x2();
3539         let b = self.state[operands.src2].get_f64x2();
3540         let mut result = [0.0f64; 2];
3541 
3542         for i in 0..2 {
3543             result[i] = a[i] / b[i];
3544         }
3545 
3546         self.state[operands.dst].set_f64x2(result);
3547         ControlFlow::Continue(())
3548     }
3549 
fmaximum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done>3550     fn fmaximum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3551         let a = self.state[operands.src1].get_f32();
3552         let b = self.state[operands.src2].get_f32();
3553         self.state[operands.dst].set_f32(a.wasm_maximum(b));
3554         ControlFlow::Continue(())
3555     }
3556 
fminimum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done>3557     fn fminimum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3558         let a = self.state[operands.src1].get_f32();
3559         let b = self.state[operands.src2].get_f32();
3560         self.state[operands.dst].set_f32(a.wasm_minimum(b));
3561         ControlFlow::Continue(())
3562     }
3563 
ftrunc32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3564     fn ftrunc32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3565         let a = self.state[src].get_f32();
3566         self.state[dst].set_f32(a.wasm_trunc());
3567         ControlFlow::Continue(())
3568     }
3569 
3570     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vtrunc32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>3571     fn vtrunc32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3572         let mut a = self.state[src].get_f32x4();
3573         for elem in a.iter_mut() {
3574             *elem = elem.wasm_trunc();
3575         }
3576         self.state[dst].set_f32x4(a);
3577         ControlFlow::Continue(())
3578     }
3579 
3580     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vtrunc64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>3581     fn vtrunc64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3582         let mut a = self.state[src].get_f64x2();
3583         for elem in a.iter_mut() {
3584             *elem = elem.wasm_trunc();
3585         }
3586         self.state[dst].set_f64x2(a);
3587         ControlFlow::Continue(())
3588     }
3589 
ffloor32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3590     fn ffloor32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3591         let a = self.state[src].get_f32();
3592         self.state[dst].set_f32(a.wasm_floor());
3593         ControlFlow::Continue(())
3594     }
3595 
3596     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vfloor32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>3597     fn vfloor32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3598         let mut a = self.state[src].get_f32x4();
3599         for elem in a.iter_mut() {
3600             *elem = elem.wasm_floor();
3601         }
3602         self.state[dst].set_f32x4(a);
3603         ControlFlow::Continue(())
3604     }
3605 
3606     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vfloor64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>3607     fn vfloor64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3608         let mut a = self.state[src].get_f64x2();
3609         for elem in a.iter_mut() {
3610             *elem = elem.wasm_floor();
3611         }
3612         self.state[dst].set_f64x2(a);
3613         ControlFlow::Continue(())
3614     }
3615 
fceil32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3616     fn fceil32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3617         let a = self.state[src].get_f32();
3618         self.state[dst].set_f32(a.wasm_ceil());
3619         ControlFlow::Continue(())
3620     }
3621 
3622     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vceil32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>3623     fn vceil32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3624         let mut a = self.state[src].get_f32x4();
3625         for elem in a.iter_mut() {
3626             *elem = elem.wasm_ceil();
3627         }
3628         self.state[dst].set_f32x4(a);
3629 
3630         ControlFlow::Continue(())
3631     }
3632 
3633     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vceil64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>3634     fn vceil64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3635         let mut a = self.state[src].get_f64x2();
3636         for elem in a.iter_mut() {
3637             *elem = elem.wasm_ceil();
3638         }
3639         self.state[dst].set_f64x2(a);
3640 
3641         ControlFlow::Continue(())
3642     }
3643 
fnearest32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3644     fn fnearest32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3645         let a = self.state[src].get_f32();
3646         self.state[dst].set_f32(a.wasm_nearest());
3647         ControlFlow::Continue(())
3648     }
3649 
3650     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vnearest32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>3651     fn vnearest32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3652         let mut a = self.state[src].get_f32x4();
3653         for elem in a.iter_mut() {
3654             *elem = elem.wasm_nearest();
3655         }
3656         self.state[dst].set_f32x4(a);
3657         ControlFlow::Continue(())
3658     }
3659 
3660     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vnearest64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>3661     fn vnearest64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3662         let mut a = self.state[src].get_f64x2();
3663         for elem in a.iter_mut() {
3664             *elem = elem.wasm_nearest();
3665         }
3666         self.state[dst].set_f64x2(a);
3667         ControlFlow::Continue(())
3668     }
3669 
fsqrt32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3670     fn fsqrt32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3671         let a = self.state[src].get_f32();
3672         self.state[dst].set_f32(a.wasm_sqrt());
3673         ControlFlow::Continue(())
3674     }
3675 
3676     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsqrt32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>3677     fn vsqrt32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3678         let mut a = self.state[src].get_f32x4();
3679         for elem in a.iter_mut() {
3680             *elem = elem.wasm_sqrt();
3681         }
3682         self.state[dst].set_f32x4(a);
3683         ControlFlow::Continue(())
3684     }
3685 
3686     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsqrt64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>3687     fn vsqrt64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3688         let mut a = self.state[src].get_f64x2();
3689         for elem in a.iter_mut() {
3690             *elem = elem.wasm_sqrt();
3691         }
3692         self.state[dst].set_f64x2(a);
3693         ControlFlow::Continue(())
3694     }
3695 
fneg32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3696     fn fneg32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3697         let a = self.state[src].get_f32();
3698         self.state[dst].set_f32(-a);
3699         ControlFlow::Continue(())
3700     }
3701 
3702     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vnegf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>3703     fn vnegf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3704         let mut a = self.state[src].get_f32x4();
3705         for elem in a.iter_mut() {
3706             *elem = -*elem;
3707         }
3708         self.state[dst].set_f32x4(a);
3709         ControlFlow::Continue(())
3710     }
3711 
fabs32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3712     fn fabs32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3713         let a = self.state[src].get_f32();
3714         self.state[dst].set_f32(a.wasm_abs());
3715         ControlFlow::Continue(())
3716     }
3717 
fadd64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done>3718     fn fadd64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3719         let a = self.state[operands.src1].get_f64();
3720         let b = self.state[operands.src2].get_f64();
3721         self.state[operands.dst].set_f64(a + b);
3722         ControlFlow::Continue(())
3723     }
3724 
fsub64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done>3725     fn fsub64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3726         let a = self.state[operands.src1].get_f64();
3727         let b = self.state[operands.src2].get_f64();
3728         self.state[operands.dst].set_f64(a - b);
3729         ControlFlow::Continue(())
3730     }
3731 
fmul64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done>3732     fn fmul64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3733         let a = self.state[operands.src1].get_f64();
3734         let b = self.state[operands.src2].get_f64();
3735         self.state[operands.dst].set_f64(a * b);
3736         ControlFlow::Continue(())
3737     }
3738 
fdiv64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done>3739     fn fdiv64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3740         let a = self.state[operands.src1].get_f64();
3741         let b = self.state[operands.src2].get_f64();
3742         self.state[operands.dst].set_f64(a / b);
3743         ControlFlow::Continue(())
3744     }
3745 
fmaximum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done>3746     fn fmaximum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3747         let a = self.state[operands.src1].get_f64();
3748         let b = self.state[operands.src2].get_f64();
3749         self.state[operands.dst].set_f64(a.wasm_maximum(b));
3750         ControlFlow::Continue(())
3751     }
3752 
fminimum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done>3753     fn fminimum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3754         let a = self.state[operands.src1].get_f64();
3755         let b = self.state[operands.src2].get_f64();
3756         self.state[operands.dst].set_f64(a.wasm_minimum(b));
3757         ControlFlow::Continue(())
3758     }
3759 
ftrunc64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3760     fn ftrunc64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3761         let a = self.state[src].get_f64();
3762         self.state[dst].set_f64(a.wasm_trunc());
3763         ControlFlow::Continue(())
3764     }
3765 
ffloor64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3766     fn ffloor64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3767         let a = self.state[src].get_f64();
3768         self.state[dst].set_f64(a.wasm_floor());
3769         ControlFlow::Continue(())
3770     }
3771 
fceil64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3772     fn fceil64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3773         let a = self.state[src].get_f64();
3774         self.state[dst].set_f64(a.wasm_ceil());
3775         ControlFlow::Continue(())
3776     }
3777 
fnearest64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3778     fn fnearest64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3779         let a = self.state[src].get_f64();
3780         self.state[dst].set_f64(a.wasm_nearest());
3781         ControlFlow::Continue(())
3782     }
3783 
fsqrt64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3784     fn fsqrt64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3785         let a = self.state[src].get_f64();
3786         self.state[dst].set_f64(a.wasm_sqrt());
3787         ControlFlow::Continue(())
3788     }
3789 
fneg64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3790     fn fneg64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3791         let a = self.state[src].get_f64();
3792         self.state[dst].set_f64(-a);
3793         ControlFlow::Continue(())
3794     }
3795 
fabs64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done>3796     fn fabs64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3797         let a = self.state[src].get_f64();
3798         self.state[dst].set_f64(a.wasm_abs());
3799         ControlFlow::Continue(())
3800     }
3801 
3802     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vaddi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>3803     fn vaddi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3804         let mut a = self.state[operands.src1].get_i8x16();
3805         let b = self.state[operands.src2].get_i8x16();
3806         for (a, b) in a.iter_mut().zip(b) {
3807             *a = a.wrapping_add(b);
3808         }
3809         self.state[operands.dst].set_i8x16(a);
3810         ControlFlow::Continue(())
3811     }
3812 
3813     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vaddi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>3814     fn vaddi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3815         let mut a = self.state[operands.src1].get_i16x8();
3816         let b = self.state[operands.src2].get_i16x8();
3817         for (a, b) in a.iter_mut().zip(b) {
3818             *a = a.wrapping_add(b);
3819         }
3820         self.state[operands.dst].set_i16x8(a);
3821         ControlFlow::Continue(())
3822     }
3823 
3824     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vaddi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>3825     fn vaddi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3826         let mut a = self.state[operands.src1].get_i32x4();
3827         let b = self.state[operands.src2].get_i32x4();
3828         for (a, b) in a.iter_mut().zip(b) {
3829             *a = a.wrapping_add(b);
3830         }
3831         self.state[operands.dst].set_i32x4(a);
3832         ControlFlow::Continue(())
3833     }
3834 
3835     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vaddi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>3836     fn vaddi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3837         let mut a = self.state[operands.src1].get_i64x2();
3838         let b = self.state[operands.src2].get_i64x2();
3839         for (a, b) in a.iter_mut().zip(b) {
3840             *a = a.wrapping_add(b);
3841         }
3842         self.state[operands.dst].set_i64x2(a);
3843         ControlFlow::Continue(())
3844     }
3845 
3846     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vaddf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>3847     fn vaddf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3848         let mut a = self.state[operands.src1].get_f32x4();
3849         let b = self.state[operands.src2].get_f32x4();
3850         for (a, b) in a.iter_mut().zip(b) {
3851             *a += b;
3852         }
3853         self.state[operands.dst].set_f32x4(a);
3854         ControlFlow::Continue(())
3855     }
3856 
3857     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vaddf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>3858     fn vaddf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3859         let mut a = self.state[operands.src1].get_f64x2();
3860         let b = self.state[operands.src2].get_f64x2();
3861         for (a, b) in a.iter_mut().zip(b) {
3862             *a += b;
3863         }
3864         self.state[operands.dst].set_f64x2(a);
3865         ControlFlow::Continue(())
3866     }
3867 
3868     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vaddi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>3869     fn vaddi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3870         let mut a = self.state[operands.src1].get_i8x16();
3871         let b = self.state[operands.src2].get_i8x16();
3872         for (a, b) in a.iter_mut().zip(b) {
3873             *a = (*a).saturating_add(b);
3874         }
3875         self.state[operands.dst].set_i8x16(a);
3876         ControlFlow::Continue(())
3877     }
3878 
3879     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vaddu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>3880     fn vaddu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3881         let mut a = self.state[operands.src1].get_u8x16();
3882         let b = self.state[operands.src2].get_u8x16();
3883         for (a, b) in a.iter_mut().zip(b) {
3884             *a = (*a).saturating_add(b);
3885         }
3886         self.state[operands.dst].set_u8x16(a);
3887         ControlFlow::Continue(())
3888     }
3889 
3890     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vaddi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>3891     fn vaddi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3892         let mut a = self.state[operands.src1].get_i16x8();
3893         let b = self.state[operands.src2].get_i16x8();
3894         for (a, b) in a.iter_mut().zip(b) {
3895             *a = (*a).saturating_add(b);
3896         }
3897         self.state[operands.dst].set_i16x8(a);
3898         ControlFlow::Continue(())
3899     }
3900 
3901     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vaddu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>3902     fn vaddu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3903         let mut a = self.state[operands.src1].get_u16x8();
3904         let b = self.state[operands.src2].get_u16x8();
3905         for (a, b) in a.iter_mut().zip(b) {
3906             *a = (*a).saturating_add(b);
3907         }
3908         self.state[operands.dst].set_u16x8(a);
3909         ControlFlow::Continue(())
3910     }
3911 
3912     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vaddpairwisei16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>3913     fn vaddpairwisei16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3914         let a = self.state[operands.src1].get_i16x8();
3915         let b = self.state[operands.src2].get_i16x8();
3916         let mut result = [0i16; 8];
3917         let half = result.len() / 2;
3918         for i in 0..half {
3919             result[i] = a[2 * i].wrapping_add(a[2 * i + 1]);
3920             result[i + half] = b[2 * i].wrapping_add(b[2 * i + 1]);
3921         }
3922         self.state[operands.dst].set_i16x8(result);
3923         ControlFlow::Continue(())
3924     }
3925 
3926     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vaddpairwisei32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>3927     fn vaddpairwisei32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3928         let a = self.state[operands.src1].get_i32x4();
3929         let b = self.state[operands.src2].get_i32x4();
3930         let mut result = [0i32; 4];
3931         result[0] = a[0].wrapping_add(a[1]);
3932         result[1] = a[2].wrapping_add(a[3]);
3933         result[2] = b[0].wrapping_add(b[1]);
3934         result[3] = b[2].wrapping_add(b[3]);
3935         self.state[operands.dst].set_i32x4(result);
3936         ControlFlow::Continue(())
3937     }
3938 
3939     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vshli8x16(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done>3940     fn vshli8x16(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3941         let a = self.state[operands.src1].get_i8x16();
3942         let b = self.state[operands.src2].get_u32();
3943         self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shl(b)));
3944         ControlFlow::Continue(())
3945     }
3946 
3947     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vshli16x8(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done>3948     fn vshli16x8(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3949         let a = self.state[operands.src1].get_i16x8();
3950         let b = self.state[operands.src2].get_u32();
3951         self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shl(b)));
3952         ControlFlow::Continue(())
3953     }
3954 
3955     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vshli32x4(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done>3956     fn vshli32x4(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3957         let a = self.state[operands.src1].get_i32x4();
3958         let b = self.state[operands.src2].get_u32();
3959         self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shl(b)));
3960         ControlFlow::Continue(())
3961     }
3962 
3963     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vshli64x2(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done>3964     fn vshli64x2(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3965         let a = self.state[operands.src1].get_i64x2();
3966         let b = self.state[operands.src2].get_u32();
3967         self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shl(b)));
3968         ControlFlow::Continue(())
3969     }
3970 
3971     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vshri8x16_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done>3972     fn vshri8x16_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3973         let a = self.state[operands.src1].get_i8x16();
3974         let b = self.state[operands.src2].get_u32();
3975         self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shr(b)));
3976         ControlFlow::Continue(())
3977     }
3978 
3979     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vshri16x8_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done>3980     fn vshri16x8_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3981         let a = self.state[operands.src1].get_i16x8();
3982         let b = self.state[operands.src2].get_u32();
3983         self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shr(b)));
3984         ControlFlow::Continue(())
3985     }
3986 
3987     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vshri32x4_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done>3988     fn vshri32x4_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3989         let a = self.state[operands.src1].get_i32x4();
3990         let b = self.state[operands.src2].get_u32();
3991         self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shr(b)));
3992         ControlFlow::Continue(())
3993     }
3994 
3995     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vshri64x2_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done>3996     fn vshri64x2_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3997         let a = self.state[operands.src1].get_i64x2();
3998         let b = self.state[operands.src2].get_u32();
3999         self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shr(b)));
4000         ControlFlow::Continue(())
4001     }
4002 
4003     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vshri8x16_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done>4004     fn vshri8x16_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4005         let a = self.state[operands.src1].get_u8x16();
4006         let b = self.state[operands.src2].get_u32();
4007         self.state[operands.dst].set_u8x16(a.map(|a| a.wrapping_shr(b)));
4008         ControlFlow::Continue(())
4009     }
4010 
4011     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vshri16x8_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done>4012     fn vshri16x8_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4013         let a = self.state[operands.src1].get_u16x8();
4014         let b = self.state[operands.src2].get_u32();
4015         self.state[operands.dst].set_u16x8(a.map(|a| a.wrapping_shr(b)));
4016         ControlFlow::Continue(())
4017     }
4018 
4019     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vshri32x4_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done>4020     fn vshri32x4_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4021         let a = self.state[operands.src1].get_u32x4();
4022         let b = self.state[operands.src2].get_u32();
4023         self.state[operands.dst].set_u32x4(a.map(|a| a.wrapping_shr(b)));
4024         ControlFlow::Continue(())
4025     }
4026 
4027     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vshri64x2_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done>4028     fn vshri64x2_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4029         let a = self.state[operands.src1].get_u64x2();
4030         let b = self.state[operands.src2].get_u32();
4031         self.state[operands.dst].set_u64x2(a.map(|a| a.wrapping_shr(b)));
4032         ControlFlow::Continue(())
4033     }
4034 
4035     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vconst128(&mut self, dst: VReg, val: u128) -> ControlFlow<Done>4036     fn vconst128(&mut self, dst: VReg, val: u128) -> ControlFlow<Done> {
4037         self.state[dst].set_u128(val);
4038         ControlFlow::Continue(())
4039     }
4040 
4041     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done>4042     fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4043         let val = self.state[src].get_u32() as u8;
4044         self.state[dst].set_u8x16([val; 16]);
4045         ControlFlow::Continue(())
4046     }
4047 
4048     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done>4049     fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4050         let val = self.state[src].get_u32() as u16;
4051         self.state[dst].set_u16x8([val; 8]);
4052         ControlFlow::Continue(())
4053     }
4054 
4055     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done>4056     fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4057         let val = self.state[src].get_u32();
4058         self.state[dst].set_u32x4([val; 4]);
4059         ControlFlow::Continue(())
4060     }
4061 
4062     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done>4063     fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4064         let val = self.state[src].get_u64();
4065         self.state[dst].set_u64x2([val; 2]);
4066         ControlFlow::Continue(())
4067     }
4068 
4069     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done>4070     fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4071         let val = self.state[src].get_f32();
4072         self.state[dst].set_f32x4([val; 4]);
4073         ControlFlow::Continue(())
4074     }
4075 
4076     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done>4077     fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4078         let val = self.state[src].get_f64();
4079         self.state[dst].set_f64x2([val; 2]);
4080         ControlFlow::Continue(())
4081     }
4082 
4083     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vload8x8_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done>4084     fn vload8x8_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4085         let val = unsafe { self.load_ne::<[i8; 8], crate::VLoad8x8SZ>(addr)? };
4086         self.state[dst].set_i16x8(val.map(|i| i.into()));
4087         ControlFlow::Continue(())
4088     }
4089 
4090     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vload8x8_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done>4091     fn vload8x8_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4092         let val = unsafe { self.load_ne::<[u8; 8], crate::VLoad8x8UZ>(addr)? };
4093         self.state[dst].set_u16x8(val.map(|i| i.into()));
4094         ControlFlow::Continue(())
4095     }
4096 
4097     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vload16x4le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done>4098     fn vload16x4le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4099         let val = unsafe { self.load_ne::<[i16; 4], crate::VLoad16x4LeSZ>(addr)? };
4100         self.state[dst].set_i32x4(val.map(|i| i16::from_le(i).into()));
4101         ControlFlow::Continue(())
4102     }
4103 
4104     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vload16x4le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done>4105     fn vload16x4le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4106         let val = unsafe { self.load_ne::<[u16; 4], crate::VLoad16x4LeUZ>(addr)? };
4107         self.state[dst].set_u32x4(val.map(|i| u16::from_le(i).into()));
4108         ControlFlow::Continue(())
4109     }
4110 
4111     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vload32x2le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done>4112     fn vload32x2le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4113         let val = unsafe { self.load_ne::<[i32; 2], crate::VLoad32x2LeSZ>(addr)? };
4114         self.state[dst].set_i64x2(val.map(|i| i32::from_le(i).into()));
4115         ControlFlow::Continue(())
4116     }
4117 
4118     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vload32x2le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done>4119     fn vload32x2le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4120         let val = unsafe { self.load_ne::<[u32; 2], crate::VLoad32x2LeUZ>(addr)? };
4121         self.state[dst].set_u64x2(val.map(|i| u32::from_le(i).into()));
4122         ControlFlow::Continue(())
4123     }
4124 
4125     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vband128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4126     fn vband128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4127         let a = self.state[operands.src1].get_u128();
4128         let b = self.state[operands.src2].get_u128();
4129         self.state[operands.dst].set_u128(a & b);
4130         ControlFlow::Continue(())
4131     }
4132 
4133     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vbor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4134     fn vbor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4135         let a = self.state[operands.src1].get_u128();
4136         let b = self.state[operands.src2].get_u128();
4137         self.state[operands.dst].set_u128(a | b);
4138         ControlFlow::Continue(())
4139     }
4140 
4141     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vbxor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4142     fn vbxor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4143         let a = self.state[operands.src1].get_u128();
4144         let b = self.state[operands.src2].get_u128();
4145         self.state[operands.dst].set_u128(a ^ b);
4146         ControlFlow::Continue(())
4147     }
4148 
4149     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vbnot128(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4150     fn vbnot128(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4151         let a = self.state[src].get_u128();
4152         self.state[dst].set_u128(!a);
4153         ControlFlow::Continue(())
4154     }
4155 
4156     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vbitselect128(&mut self, dst: VReg, c: VReg, x: VReg, y: VReg) -> ControlFlow<Done>4157     fn vbitselect128(&mut self, dst: VReg, c: VReg, x: VReg, y: VReg) -> ControlFlow<Done> {
4158         let c = self.state[c].get_u128();
4159         let x = self.state[x].get_u128();
4160         let y = self.state[y].get_u128();
4161         self.state[dst].set_u128((c & x) | (!c & y));
4162         ControlFlow::Continue(())
4163     }
4164 
4165     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vbitmask8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done>4166     fn vbitmask8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4167         let a = self.state[src].get_u8x16();
4168         let mut result = 0;
4169         for item in a.iter().rev() {
4170             result <<= 1;
4171             result |= (*item >> 7) as u32;
4172         }
4173         self.state[dst].set_u32(result);
4174         ControlFlow::Continue(())
4175     }
4176 
4177     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vbitmask16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done>4178     fn vbitmask16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4179         let a = self.state[src].get_u16x8();
4180         let mut result = 0;
4181         for item in a.iter().rev() {
4182             result <<= 1;
4183             result |= (*item >> 15) as u32;
4184         }
4185         self.state[dst].set_u32(result);
4186         ControlFlow::Continue(())
4187     }
4188 
4189     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vbitmask32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done>4190     fn vbitmask32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4191         let a = self.state[src].get_u32x4();
4192         let mut result = 0;
4193         for item in a.iter().rev() {
4194             result <<= 1;
4195             result |= *item >> 31;
4196         }
4197         self.state[dst].set_u32(result);
4198         ControlFlow::Continue(())
4199     }
4200 
4201     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vbitmask64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done>4202     fn vbitmask64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4203         let a = self.state[src].get_u64x2();
4204         let mut result = 0;
4205         for item in a.iter().rev() {
4206             result <<= 1;
4207             result |= (*item >> 63) as u32;
4208         }
4209         self.state[dst].set_u32(result);
4210         ControlFlow::Continue(())
4211     }
4212 
4213     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
valltrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done>4214     fn valltrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4215         let a = self.state[src].get_u8x16();
4216         let result = a.iter().all(|a| *a != 0);
4217         self.state[dst].set_u32(u32::from(result));
4218         ControlFlow::Continue(())
4219     }
4220 
4221     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
valltrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done>4222     fn valltrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4223         let a = self.state[src].get_u16x8();
4224         let result = a.iter().all(|a| *a != 0);
4225         self.state[dst].set_u32(u32::from(result));
4226         ControlFlow::Continue(())
4227     }
4228 
4229     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
valltrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done>4230     fn valltrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4231         let a = self.state[src].get_u32x4();
4232         let result = a.iter().all(|a| *a != 0);
4233         self.state[dst].set_u32(u32::from(result));
4234         ControlFlow::Continue(())
4235     }
4236 
4237     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
valltrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done>4238     fn valltrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4239         let a = self.state[src].get_u64x2();
4240         let result = a.iter().all(|a| *a != 0);
4241         self.state[dst].set_u32(u32::from(result));
4242         ControlFlow::Continue(())
4243     }
4244 
4245     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vanytrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done>4246     fn vanytrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4247         let a = self.state[src].get_u8x16();
4248         let result = a.iter().any(|a| *a != 0);
4249         self.state[dst].set_u32(u32::from(result));
4250         ControlFlow::Continue(())
4251     }
4252 
4253     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vanytrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done>4254     fn vanytrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4255         let a = self.state[src].get_u16x8();
4256         let result = a.iter().any(|a| *a != 0);
4257         self.state[dst].set_u32(u32::from(result));
4258         ControlFlow::Continue(())
4259     }
4260 
4261     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vanytrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done>4262     fn vanytrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4263         let a = self.state[src].get_u32x4();
4264         let result = a.iter().any(|a| *a != 0);
4265         self.state[dst].set_u32(u32::from(result));
4266         ControlFlow::Continue(())
4267     }
4268 
4269     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vanytrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done>4270     fn vanytrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4271         let a = self.state[src].get_u64x2();
4272         let result = a.iter().any(|a| *a != 0);
4273         self.state[dst].set_u32(u32::from(result));
4274         ControlFlow::Continue(())
4275     }
4276 
4277     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vf32x4_from_i32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4278     fn vf32x4_from_i32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4279         let a = self.state[src].get_i32x4();
4280         self.state[dst].set_f32x4(a.map(|i| i as f32));
4281         ControlFlow::Continue(())
4282     }
4283 
4284     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vf32x4_from_i32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4285     fn vf32x4_from_i32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4286         let a = self.state[src].get_u32x4();
4287         self.state[dst].set_f32x4(a.map(|i| i as f32));
4288         ControlFlow::Continue(())
4289     }
4290 
4291     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vf64x2_from_i64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4292     fn vf64x2_from_i64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4293         let a = self.state[src].get_i64x2();
4294         self.state[dst].set_f64x2(a.map(|i| i as f64));
4295         ControlFlow::Continue(())
4296     }
4297 
4298     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vf64x2_from_i64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4299     fn vf64x2_from_i64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4300         let a = self.state[src].get_u64x2();
4301         self.state[dst].set_f64x2(a.map(|i| i as f64));
4302         ControlFlow::Continue(())
4303     }
4304 
4305     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vi32x4_from_f32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4306     fn vi32x4_from_f32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4307         let a = self.state[src].get_f32x4();
4308         self.state[dst].set_i32x4(a.map(|f| f as i32));
4309         ControlFlow::Continue(())
4310     }
4311 
4312     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vi32x4_from_f32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4313     fn vi32x4_from_f32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4314         let a = self.state[src].get_f32x4();
4315         self.state[dst].set_u32x4(a.map(|f| f as u32));
4316         ControlFlow::Continue(())
4317     }
4318 
4319     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vi64x2_from_f64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4320     fn vi64x2_from_f64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4321         let a = self.state[src].get_f64x2();
4322         self.state[dst].set_i64x2(a.map(|f| f as i64));
4323         ControlFlow::Continue(())
4324     }
4325 
4326     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vi64x2_from_f64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4327     fn vi64x2_from_f64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4328         let a = self.state[src].get_f64x2();
4329         self.state[dst].set_u64x2(a.map(|f| f as u64));
4330         ControlFlow::Continue(())
4331     }
4332 
4333     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vwidenlow8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4334     fn vwidenlow8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4335         let a = *self.state[src].get_i8x16().first_chunk().unwrap();
4336         self.state[dst].set_i16x8(a.map(|i| i.into()));
4337         ControlFlow::Continue(())
4338     }
4339 
4340     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vwidenlow8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4341     fn vwidenlow8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4342         let a = *self.state[src].get_u8x16().first_chunk().unwrap();
4343         self.state[dst].set_u16x8(a.map(|i| i.into()));
4344         ControlFlow::Continue(())
4345     }
4346 
4347     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vwidenlow16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4348     fn vwidenlow16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4349         let a = *self.state[src].get_i16x8().first_chunk().unwrap();
4350         self.state[dst].set_i32x4(a.map(|i| i.into()));
4351         ControlFlow::Continue(())
4352     }
4353 
4354     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vwidenlow16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4355     fn vwidenlow16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4356         let a = *self.state[src].get_u16x8().first_chunk().unwrap();
4357         self.state[dst].set_u32x4(a.map(|i| i.into()));
4358         ControlFlow::Continue(())
4359     }
4360 
4361     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vwidenlow32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4362     fn vwidenlow32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4363         let a = *self.state[src].get_i32x4().first_chunk().unwrap();
4364         self.state[dst].set_i64x2(a.map(|i| i.into()));
4365         ControlFlow::Continue(())
4366     }
4367 
4368     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vwidenlow32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4369     fn vwidenlow32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4370         let a = *self.state[src].get_u32x4().first_chunk().unwrap();
4371         self.state[dst].set_u64x2(a.map(|i| i.into()));
4372         ControlFlow::Continue(())
4373     }
4374 
4375     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vwidenhigh8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4376     fn vwidenhigh8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4377         let a = *self.state[src].get_i8x16().last_chunk().unwrap();
4378         self.state[dst].set_i16x8(a.map(|i| i.into()));
4379         ControlFlow::Continue(())
4380     }
4381 
4382     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vwidenhigh8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4383     fn vwidenhigh8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4384         let a = *self.state[src].get_u8x16().last_chunk().unwrap();
4385         self.state[dst].set_u16x8(a.map(|i| i.into()));
4386         ControlFlow::Continue(())
4387     }
4388 
4389     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vwidenhigh16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4390     fn vwidenhigh16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4391         let a = *self.state[src].get_i16x8().last_chunk().unwrap();
4392         self.state[dst].set_i32x4(a.map(|i| i.into()));
4393         ControlFlow::Continue(())
4394     }
4395 
4396     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vwidenhigh16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4397     fn vwidenhigh16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4398         let a = *self.state[src].get_u16x8().last_chunk().unwrap();
4399         self.state[dst].set_u32x4(a.map(|i| i.into()));
4400         ControlFlow::Continue(())
4401     }
4402 
4403     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vwidenhigh32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4404     fn vwidenhigh32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4405         let a = *self.state[src].get_i32x4().last_chunk().unwrap();
4406         self.state[dst].set_i64x2(a.map(|i| i.into()));
4407         ControlFlow::Continue(())
4408     }
4409 
4410     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vwidenhigh32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4411     fn vwidenhigh32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4412         let a = *self.state[src].get_u32x4().last_chunk().unwrap();
4413         self.state[dst].set_u64x2(a.map(|i| i.into()));
4414         ControlFlow::Continue(())
4415     }
4416 
4417     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vnarrow16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4418     fn vnarrow16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4419         let a = self.state[operands.src1].get_i16x8();
4420         let b = self.state[operands.src2].get_i16x8();
4421         let mut result = [0; 16];
4422         for (i, d) in a.iter().chain(&b).zip(&mut result) {
4423             *d = (*i)
4424                 .try_into()
4425                 .unwrap_or(if *i < 0 { i8::MIN } else { i8::MAX });
4426         }
4427         self.state[operands.dst].set_i8x16(result);
4428         ControlFlow::Continue(())
4429     }
4430 
4431     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vnarrow16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4432     fn vnarrow16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4433         let a = self.state[operands.src1].get_i16x8();
4434         let b = self.state[operands.src2].get_i16x8();
4435         let mut result = [0; 16];
4436         for (i, d) in a.iter().chain(&b).zip(&mut result) {
4437             *d = (*i)
4438                 .try_into()
4439                 .unwrap_or(if *i < 0 { u8::MIN } else { u8::MAX });
4440         }
4441         self.state[operands.dst].set_u8x16(result);
4442         ControlFlow::Continue(())
4443     }
4444 
4445     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vnarrow32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4446     fn vnarrow32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4447         let a = self.state[operands.src1].get_i32x4();
4448         let b = self.state[operands.src2].get_i32x4();
4449         let mut result = [0; 8];
4450         for (i, d) in a.iter().chain(&b).zip(&mut result) {
4451             *d = (*i)
4452                 .try_into()
4453                 .unwrap_or(if *i < 0 { i16::MIN } else { i16::MAX });
4454         }
4455         self.state[operands.dst].set_i16x8(result);
4456         ControlFlow::Continue(())
4457     }
4458 
4459     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vnarrow32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4460     fn vnarrow32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4461         let a = self.state[operands.src1].get_i32x4();
4462         let b = self.state[operands.src2].get_i32x4();
4463         let mut result = [0; 8];
4464         for (i, d) in a.iter().chain(&b).zip(&mut result) {
4465             *d = (*i)
4466                 .try_into()
4467                 .unwrap_or(if *i < 0 { u16::MIN } else { u16::MAX });
4468         }
4469         self.state[operands.dst].set_u16x8(result);
4470         ControlFlow::Continue(())
4471     }
4472 
4473     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vnarrow64x2_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4474     fn vnarrow64x2_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4475         let a = self.state[operands.src1].get_i64x2();
4476         let b = self.state[operands.src2].get_i64x2();
4477         let mut result = [0; 4];
4478         for (i, d) in a.iter().chain(&b).zip(&mut result) {
4479             *d = (*i)
4480                 .try_into()
4481                 .unwrap_or(if *i < 0 { i32::MIN } else { i32::MAX });
4482         }
4483         self.state[operands.dst].set_i32x4(result);
4484         ControlFlow::Continue(())
4485     }
4486 
4487     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vnarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4488     fn vnarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4489         let a = self.state[operands.src1].get_i64x2();
4490         let b = self.state[operands.src2].get_i64x2();
4491         let mut result = [0; 4];
4492         for (i, d) in a.iter().chain(&b).zip(&mut result) {
4493             *d = (*i)
4494                 .try_into()
4495                 .unwrap_or(if *i < 0 { u32::MIN } else { u32::MAX });
4496         }
4497         self.state[operands.dst].set_u32x4(result);
4498         ControlFlow::Continue(())
4499     }
4500 
4501     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vunarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4502     fn vunarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4503         let a = self.state[operands.src1].get_u64x2();
4504         let b = self.state[operands.src2].get_u64x2();
4505         let mut result = [0; 4];
4506         for (i, d) in a.iter().chain(&b).zip(&mut result) {
4507             *d = (*i).try_into().unwrap_or(u32::MAX);
4508         }
4509         self.state[operands.dst].set_u32x4(result);
4510         ControlFlow::Continue(())
4511     }
4512 
4513     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vfpromotelow(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4514     fn vfpromotelow(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4515         let a = self.state[src].get_f32x4();
4516         self.state[dst].set_f64x2([a[0].into(), a[1].into()]);
4517         ControlFlow::Continue(())
4518     }
4519 
4520     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vfdemote(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4521     fn vfdemote(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4522         let a = self.state[src].get_f64x2();
4523         self.state[dst].set_f32x4([a[0] as f32, a[1] as f32, 0.0, 0.0]);
4524         ControlFlow::Continue(())
4525     }
4526 
4527     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsubi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4528     fn vsubi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4529         let mut a = self.state[operands.src1].get_i8x16();
4530         let b = self.state[operands.src2].get_i8x16();
4531         for (a, b) in a.iter_mut().zip(b) {
4532             *a = a.wrapping_sub(b);
4533         }
4534         self.state[operands.dst].set_i8x16(a);
4535         ControlFlow::Continue(())
4536     }
4537 
4538     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsubi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4539     fn vsubi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4540         let mut a = self.state[operands.src1].get_i16x8();
4541         let b = self.state[operands.src2].get_i16x8();
4542         for (a, b) in a.iter_mut().zip(b) {
4543             *a = a.wrapping_sub(b);
4544         }
4545         self.state[operands.dst].set_i16x8(a);
4546         ControlFlow::Continue(())
4547     }
4548 
4549     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsubi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4550     fn vsubi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4551         let mut a = self.state[operands.src1].get_i32x4();
4552         let b = self.state[operands.src2].get_i32x4();
4553         for (a, b) in a.iter_mut().zip(b) {
4554             *a = a.wrapping_sub(b);
4555         }
4556         self.state[operands.dst].set_i32x4(a);
4557         ControlFlow::Continue(())
4558     }
4559 
4560     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsubi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4561     fn vsubi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4562         let mut a = self.state[operands.src1].get_i64x2();
4563         let b = self.state[operands.src2].get_i64x2();
4564         for (a, b) in a.iter_mut().zip(b) {
4565             *a = a.wrapping_sub(b);
4566         }
4567         self.state[operands.dst].set_i64x2(a);
4568         ControlFlow::Continue(())
4569     }
4570 
4571     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsubi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4572     fn vsubi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4573         let mut a = self.state[operands.src1].get_i8x16();
4574         let b = self.state[operands.src2].get_i8x16();
4575         for (a, b) in a.iter_mut().zip(b) {
4576             *a = a.saturating_sub(b);
4577         }
4578         self.state[operands.dst].set_i8x16(a);
4579         ControlFlow::Continue(())
4580     }
4581 
4582     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsubu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4583     fn vsubu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4584         let mut a = self.state[operands.src1].get_u8x16();
4585         let b = self.state[operands.src2].get_u8x16();
4586         for (a, b) in a.iter_mut().zip(b) {
4587             *a = a.saturating_sub(b);
4588         }
4589         self.state[operands.dst].set_u8x16(a);
4590         ControlFlow::Continue(())
4591     }
4592 
4593     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsubi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4594     fn vsubi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4595         let mut a = self.state[operands.src1].get_i16x8();
4596         let b = self.state[operands.src2].get_i16x8();
4597         for (a, b) in a.iter_mut().zip(b) {
4598             *a = a.saturating_sub(b);
4599         }
4600         self.state[operands.dst].set_i16x8(a);
4601         ControlFlow::Continue(())
4602     }
4603 
4604     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsubu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4605     fn vsubu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4606         let mut a = self.state[operands.src1].get_u16x8();
4607         let b = self.state[operands.src2].get_u16x8();
4608         for (a, b) in a.iter_mut().zip(b) {
4609             *a = a.saturating_sub(b);
4610         }
4611         self.state[operands.dst].set_u16x8(a);
4612         ControlFlow::Continue(())
4613     }
4614 
4615     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vsubf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4616     fn vsubf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4617         let mut a = self.state[operands.src1].get_f64x2();
4618         let b = self.state[operands.src2].get_f64x2();
4619         for (a, b) in a.iter_mut().zip(b) {
4620             *a = *a - b;
4621         }
4622         self.state[operands.dst].set_f64x2(a);
4623         ControlFlow::Continue(())
4624     }
4625 
4626     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmuli8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4627     fn vmuli8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4628         let mut a = self.state[operands.src1].get_i8x16();
4629         let b = self.state[operands.src2].get_i8x16();
4630         for (a, b) in a.iter_mut().zip(b) {
4631             *a = a.wrapping_mul(b);
4632         }
4633         self.state[operands.dst].set_i8x16(a);
4634         ControlFlow::Continue(())
4635     }
4636 
4637     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmuli16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4638     fn vmuli16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4639         let mut a = self.state[operands.src1].get_i16x8();
4640         let b = self.state[operands.src2].get_i16x8();
4641         for (a, b) in a.iter_mut().zip(b) {
4642             *a = a.wrapping_mul(b);
4643         }
4644         self.state[operands.dst].set_i16x8(a);
4645         ControlFlow::Continue(())
4646     }
4647 
4648     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmuli32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4649     fn vmuli32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4650         let mut a = self.state[operands.src1].get_i32x4();
4651         let b = self.state[operands.src2].get_i32x4();
4652         for (a, b) in a.iter_mut().zip(b) {
4653             *a = a.wrapping_mul(b);
4654         }
4655         self.state[operands.dst].set_i32x4(a);
4656         ControlFlow::Continue(())
4657     }
4658 
4659     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmuli64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4660     fn vmuli64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4661         let mut a = self.state[operands.src1].get_i64x2();
4662         let b = self.state[operands.src2].get_i64x2();
4663         for (a, b) in a.iter_mut().zip(b) {
4664             *a = a.wrapping_mul(b);
4665         }
4666         self.state[operands.dst].set_i64x2(a);
4667         ControlFlow::Continue(())
4668     }
4669 
4670     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmulf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4671     fn vmulf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4672         let mut a = self.state[operands.src1].get_f64x2();
4673         let b = self.state[operands.src2].get_f64x2();
4674         for (a, b) in a.iter_mut().zip(b) {
4675             *a = *a * b;
4676         }
4677         self.state[operands.dst].set_f64x2(a);
4678         ControlFlow::Continue(())
4679     }
4680 
4681     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4682     fn vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4683         let mut a = self.state[operands.src1].get_i16x8();
4684         let b = self.state[operands.src2].get_i16x8();
4685         const MIN: i32 = i16::MIN as i32;
4686         const MAX: i32 = i16::MAX as i32;
4687         for (a, b) in a.iter_mut().zip(b) {
4688             let r = (i32::from(*a) * i32::from(b) + (1 << 14)) >> 15;
4689             *a = r.clamp(MIN, MAX) as i16;
4690         }
4691         self.state[operands.dst].set_i16x8(a);
4692         ControlFlow::Continue(())
4693     }
4694 
4695     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vpopcnt8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>4696     fn vpopcnt8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4697         let a = self.state[src].get_u8x16();
4698         self.state[dst].set_u8x16(a.map(|i| i.count_ones() as u8));
4699         ControlFlow::Continue(())
4700     }
4701 
4702     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
xextractv8x16(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done>4703     fn xextractv8x16(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4704         let a = unsafe { *self.state[src].get_u8x16().get_unchecked(usize::from(lane)) };
4705         self.state[dst].set_u32(u32::from(a));
4706         ControlFlow::Continue(())
4707     }
4708 
4709     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
xextractv16x8(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done>4710     fn xextractv16x8(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4711         let a = unsafe { *self.state[src].get_u16x8().get_unchecked(usize::from(lane)) };
4712         self.state[dst].set_u32(u32::from(a));
4713         ControlFlow::Continue(())
4714     }
4715 
4716     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
xextractv32x4(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done>4717     fn xextractv32x4(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4718         let a = unsafe { *self.state[src].get_u32x4().get_unchecked(usize::from(lane)) };
4719         self.state[dst].set_u32(a);
4720         ControlFlow::Continue(())
4721     }
4722 
4723     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
xextractv64x2(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done>4724     fn xextractv64x2(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4725         let a = unsafe { *self.state[src].get_u64x2().get_unchecked(usize::from(lane)) };
4726         self.state[dst].set_u64(a);
4727         ControlFlow::Continue(())
4728     }
4729 
4730     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
fextractv32x4(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done>4731     fn fextractv32x4(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4732         let a = unsafe { *self.state[src].get_f32x4().get_unchecked(usize::from(lane)) };
4733         self.state[dst].set_f32(a);
4734         ControlFlow::Continue(())
4735     }
4736 
4737     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
fextractv64x2(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done>4738     fn fextractv64x2(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4739         let a = unsafe { *self.state[src].get_f64x2().get_unchecked(usize::from(lane)) };
4740         self.state[dst].set_f64(a);
4741         ControlFlow::Continue(())
4742     }
4743 
4744     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vinsertx8( &mut self, operands: BinaryOperands<VReg, VReg, XReg>, lane: u8, ) -> ControlFlow<Done>4745     fn vinsertx8(
4746         &mut self,
4747         operands: BinaryOperands<VReg, VReg, XReg>,
4748         lane: u8,
4749     ) -> ControlFlow<Done> {
4750         let mut a = self.state[operands.src1].get_u8x16();
4751         let b = self.state[operands.src2].get_u32() as u8;
4752         unsafe {
4753             *a.get_unchecked_mut(usize::from(lane)) = b;
4754         }
4755         self.state[operands.dst].set_u8x16(a);
4756         ControlFlow::Continue(())
4757     }
4758 
4759     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vinsertx16( &mut self, operands: BinaryOperands<VReg, VReg, XReg>, lane: u8, ) -> ControlFlow<Done>4760     fn vinsertx16(
4761         &mut self,
4762         operands: BinaryOperands<VReg, VReg, XReg>,
4763         lane: u8,
4764     ) -> ControlFlow<Done> {
4765         let mut a = self.state[operands.src1].get_u16x8();
4766         let b = self.state[operands.src2].get_u32() as u16;
4767         unsafe {
4768             *a.get_unchecked_mut(usize::from(lane)) = b;
4769         }
4770         self.state[operands.dst].set_u16x8(a);
4771         ControlFlow::Continue(())
4772     }
4773 
4774     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vinsertx32( &mut self, operands: BinaryOperands<VReg, VReg, XReg>, lane: u8, ) -> ControlFlow<Done>4775     fn vinsertx32(
4776         &mut self,
4777         operands: BinaryOperands<VReg, VReg, XReg>,
4778         lane: u8,
4779     ) -> ControlFlow<Done> {
4780         let mut a = self.state[operands.src1].get_u32x4();
4781         let b = self.state[operands.src2].get_u32();
4782         unsafe {
4783             *a.get_unchecked_mut(usize::from(lane)) = b;
4784         }
4785         self.state[operands.dst].set_u32x4(a);
4786         ControlFlow::Continue(())
4787     }
4788 
4789     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vinsertx64( &mut self, operands: BinaryOperands<VReg, VReg, XReg>, lane: u8, ) -> ControlFlow<Done>4790     fn vinsertx64(
4791         &mut self,
4792         operands: BinaryOperands<VReg, VReg, XReg>,
4793         lane: u8,
4794     ) -> ControlFlow<Done> {
4795         let mut a = self.state[operands.src1].get_u64x2();
4796         let b = self.state[operands.src2].get_u64();
4797         unsafe {
4798             *a.get_unchecked_mut(usize::from(lane)) = b;
4799         }
4800         self.state[operands.dst].set_u64x2(a);
4801         ControlFlow::Continue(())
4802     }
4803 
4804     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vinsertf32( &mut self, operands: BinaryOperands<VReg, VReg, FReg>, lane: u8, ) -> ControlFlow<Done>4805     fn vinsertf32(
4806         &mut self,
4807         operands: BinaryOperands<VReg, VReg, FReg>,
4808         lane: u8,
4809     ) -> ControlFlow<Done> {
4810         let mut a = self.state[operands.src1].get_f32x4();
4811         let b = self.state[operands.src2].get_f32();
4812         unsafe {
4813             *a.get_unchecked_mut(usize::from(lane)) = b;
4814         }
4815         self.state[operands.dst].set_f32x4(a);
4816         ControlFlow::Continue(())
4817     }
4818 
4819     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vinsertf64( &mut self, operands: BinaryOperands<VReg, VReg, FReg>, lane: u8, ) -> ControlFlow<Done>4820     fn vinsertf64(
4821         &mut self,
4822         operands: BinaryOperands<VReg, VReg, FReg>,
4823         lane: u8,
4824     ) -> ControlFlow<Done> {
4825         let mut a = self.state[operands.src1].get_f64x2();
4826         let b = self.state[operands.src2].get_f64();
4827         unsafe {
4828             *a.get_unchecked_mut(usize::from(lane)) = b;
4829         }
4830         self.state[operands.dst].set_f64x2(a);
4831         ControlFlow::Continue(())
4832     }
4833 
4834     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
veq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4835     fn veq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4836         let a = self.state[operands.src1].get_u8x16();
4837         let b = self.state[operands.src2].get_u8x16();
4838         let mut c = [0; 16];
4839         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4840             *c = if a == b { u8::MAX } else { 0 };
4841         }
4842         self.state[operands.dst].set_u8x16(c);
4843         ControlFlow::Continue(())
4844     }
4845 
4846     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vneq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4847     fn vneq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4848         let a = self.state[operands.src1].get_u8x16();
4849         let b = self.state[operands.src2].get_u8x16();
4850         let mut c = [0; 16];
4851         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4852             *c = if a != b { u8::MAX } else { 0 };
4853         }
4854         self.state[operands.dst].set_u8x16(c);
4855         ControlFlow::Continue(())
4856     }
4857 
4858     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vslt8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4859     fn vslt8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4860         let a = self.state[operands.src1].get_i8x16();
4861         let b = self.state[operands.src2].get_i8x16();
4862         let mut c = [0; 16];
4863         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4864             *c = if a < b { u8::MAX } else { 0 };
4865         }
4866         self.state[operands.dst].set_u8x16(c);
4867         ControlFlow::Continue(())
4868     }
4869 
4870     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vslteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4871     fn vslteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4872         let a = self.state[operands.src1].get_i8x16();
4873         let b = self.state[operands.src2].get_i8x16();
4874         let mut c = [0; 16];
4875         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4876             *c = if a <= b { u8::MAX } else { 0 };
4877         }
4878         self.state[operands.dst].set_u8x16(c);
4879         ControlFlow::Continue(())
4880     }
4881 
4882     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vult8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4883     fn vult8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4884         let a = self.state[operands.src1].get_u8x16();
4885         let b = self.state[operands.src2].get_u8x16();
4886         let mut c = [0; 16];
4887         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4888             *c = if a < b { u8::MAX } else { 0 };
4889         }
4890         self.state[operands.dst].set_u8x16(c);
4891         ControlFlow::Continue(())
4892     }
4893 
4894     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vulteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4895     fn vulteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4896         let a = self.state[operands.src1].get_u8x16();
4897         let b = self.state[operands.src2].get_u8x16();
4898         let mut c = [0; 16];
4899         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4900             *c = if a <= b { u8::MAX } else { 0 };
4901         }
4902         self.state[operands.dst].set_u8x16(c);
4903         ControlFlow::Continue(())
4904     }
4905 
4906     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
veq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4907     fn veq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4908         let a = self.state[operands.src1].get_u16x8();
4909         let b = self.state[operands.src2].get_u16x8();
4910         let mut c = [0; 8];
4911         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4912             *c = if a == b { u16::MAX } else { 0 };
4913         }
4914         self.state[operands.dst].set_u16x8(c);
4915         ControlFlow::Continue(())
4916     }
4917 
4918     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vneq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4919     fn vneq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4920         let a = self.state[operands.src1].get_u16x8();
4921         let b = self.state[operands.src2].get_u16x8();
4922         let mut c = [0; 8];
4923         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4924             *c = if a != b { u16::MAX } else { 0 };
4925         }
4926         self.state[operands.dst].set_u16x8(c);
4927         ControlFlow::Continue(())
4928     }
4929 
4930     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vslt16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4931     fn vslt16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4932         let a = self.state[operands.src1].get_i16x8();
4933         let b = self.state[operands.src2].get_i16x8();
4934         let mut c = [0; 8];
4935         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4936             *c = if a < b { u16::MAX } else { 0 };
4937         }
4938         self.state[operands.dst].set_u16x8(c);
4939         ControlFlow::Continue(())
4940     }
4941 
4942     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vslteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4943     fn vslteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4944         let a = self.state[operands.src1].get_i16x8();
4945         let b = self.state[operands.src2].get_i16x8();
4946         let mut c = [0; 8];
4947         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4948             *c = if a <= b { u16::MAX } else { 0 };
4949         }
4950         self.state[operands.dst].set_u16x8(c);
4951         ControlFlow::Continue(())
4952     }
4953 
4954     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vult16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4955     fn vult16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4956         let a = self.state[operands.src1].get_u16x8();
4957         let b = self.state[operands.src2].get_u16x8();
4958         let mut c = [0; 8];
4959         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4960             *c = if a < b { u16::MAX } else { 0 };
4961         }
4962         self.state[operands.dst].set_u16x8(c);
4963         ControlFlow::Continue(())
4964     }
4965 
4966     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vulteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4967     fn vulteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4968         let a = self.state[operands.src1].get_u16x8();
4969         let b = self.state[operands.src2].get_u16x8();
4970         let mut c = [0; 8];
4971         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4972             *c = if a <= b { u16::MAX } else { 0 };
4973         }
4974         self.state[operands.dst].set_u16x8(c);
4975         ControlFlow::Continue(())
4976     }
4977 
4978     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
veq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4979     fn veq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4980         let a = self.state[operands.src1].get_u32x4();
4981         let b = self.state[operands.src2].get_u32x4();
4982         let mut c = [0; 4];
4983         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4984             *c = if a == b { u32::MAX } else { 0 };
4985         }
4986         self.state[operands.dst].set_u32x4(c);
4987         ControlFlow::Continue(())
4988     }
4989 
4990     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vneq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>4991     fn vneq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4992         let a = self.state[operands.src1].get_u32x4();
4993         let b = self.state[operands.src2].get_u32x4();
4994         let mut c = [0; 4];
4995         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4996             *c = if a != b { u32::MAX } else { 0 };
4997         }
4998         self.state[operands.dst].set_u32x4(c);
4999         ControlFlow::Continue(())
5000     }
5001 
5002     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vslt32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5003     fn vslt32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5004         let a = self.state[operands.src1].get_i32x4();
5005         let b = self.state[operands.src2].get_i32x4();
5006         let mut c = [0; 4];
5007         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5008             *c = if a < b { u32::MAX } else { 0 };
5009         }
5010         self.state[operands.dst].set_u32x4(c);
5011         ControlFlow::Continue(())
5012     }
5013 
5014     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vslteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5015     fn vslteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5016         let a = self.state[operands.src1].get_i32x4();
5017         let b = self.state[operands.src2].get_i32x4();
5018         let mut c = [0; 4];
5019         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5020             *c = if a <= b { u32::MAX } else { 0 };
5021         }
5022         self.state[operands.dst].set_u32x4(c);
5023         ControlFlow::Continue(())
5024     }
5025 
5026     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vult32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5027     fn vult32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5028         let a = self.state[operands.src1].get_u32x4();
5029         let b = self.state[operands.src2].get_u32x4();
5030         let mut c = [0; 4];
5031         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5032             *c = if a < b { u32::MAX } else { 0 };
5033         }
5034         self.state[operands.dst].set_u32x4(c);
5035         ControlFlow::Continue(())
5036     }
5037 
5038     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vulteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5039     fn vulteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5040         let a = self.state[operands.src1].get_u32x4();
5041         let b = self.state[operands.src2].get_u32x4();
5042         let mut c = [0; 4];
5043         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5044             *c = if a <= b { u32::MAX } else { 0 };
5045         }
5046         self.state[operands.dst].set_u32x4(c);
5047         ControlFlow::Continue(())
5048     }
5049 
5050     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
veq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5051     fn veq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5052         let a = self.state[operands.src1].get_u64x2();
5053         let b = self.state[operands.src2].get_u64x2();
5054         let mut c = [0; 2];
5055         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5056             *c = if a == b { u64::MAX } else { 0 };
5057         }
5058         self.state[operands.dst].set_u64x2(c);
5059         ControlFlow::Continue(())
5060     }
5061 
5062     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vneq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5063     fn vneq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5064         let a = self.state[operands.src1].get_u64x2();
5065         let b = self.state[operands.src2].get_u64x2();
5066         let mut c = [0; 2];
5067         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5068             *c = if a != b { u64::MAX } else { 0 };
5069         }
5070         self.state[operands.dst].set_u64x2(c);
5071         ControlFlow::Continue(())
5072     }
5073 
5074     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vslt64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5075     fn vslt64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5076         let a = self.state[operands.src1].get_i64x2();
5077         let b = self.state[operands.src2].get_i64x2();
5078         let mut c = [0; 2];
5079         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5080             *c = if a < b { u64::MAX } else { 0 };
5081         }
5082         self.state[operands.dst].set_u64x2(c);
5083         ControlFlow::Continue(())
5084     }
5085 
5086     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vslteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5087     fn vslteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5088         let a = self.state[operands.src1].get_i64x2();
5089         let b = self.state[operands.src2].get_i64x2();
5090         let mut c = [0; 2];
5091         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5092             *c = if a <= b { u64::MAX } else { 0 };
5093         }
5094         self.state[operands.dst].set_u64x2(c);
5095         ControlFlow::Continue(())
5096     }
5097 
5098     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vult64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5099     fn vult64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5100         let a = self.state[operands.src1].get_u64x2();
5101         let b = self.state[operands.src2].get_u64x2();
5102         let mut c = [0; 2];
5103         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5104             *c = if a < b { u64::MAX } else { 0 };
5105         }
5106         self.state[operands.dst].set_u64x2(c);
5107         ControlFlow::Continue(())
5108     }
5109 
5110     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vulteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5111     fn vulteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5112         let a = self.state[operands.src1].get_u64x2();
5113         let b = self.state[operands.src2].get_u64x2();
5114         let mut c = [0; 2];
5115         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5116             *c = if a <= b { u64::MAX } else { 0 };
5117         }
5118         self.state[operands.dst].set_u64x2(c);
5119         ControlFlow::Continue(())
5120     }
5121 
5122     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vneg8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>5123     fn vneg8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5124         let a = self.state[src].get_i8x16();
5125         self.state[dst].set_i8x16(a.map(|i| i.wrapping_neg()));
5126         ControlFlow::Continue(())
5127     }
5128 
5129     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vneg16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>5130     fn vneg16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5131         let a = self.state[src].get_i16x8();
5132         self.state[dst].set_i16x8(a.map(|i| i.wrapping_neg()));
5133         ControlFlow::Continue(())
5134     }
5135 
5136     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vneg32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>5137     fn vneg32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5138         let a = self.state[src].get_i32x4();
5139         self.state[dst].set_i32x4(a.map(|i| i.wrapping_neg()));
5140         ControlFlow::Continue(())
5141     }
5142 
5143     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vneg64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>5144     fn vneg64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5145         let a = self.state[src].get_i64x2();
5146         self.state[dst].set_i64x2(a.map(|i| i.wrapping_neg()));
5147         ControlFlow::Continue(())
5148     }
5149 
5150     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vnegf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>5151     fn vnegf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5152         let a = self.state[src].get_f64x2();
5153         self.state[dst].set_f64x2(a.map(|i| -i));
5154         ControlFlow::Continue(())
5155     }
5156 
5157     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmin8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5158     fn vmin8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5159         let mut a = self.state[operands.src1].get_i8x16();
5160         let b = self.state[operands.src2].get_i8x16();
5161         for (a, b) in a.iter_mut().zip(&b) {
5162             *a = (*a).min(*b);
5163         }
5164         self.state[operands.dst].set_i8x16(a);
5165         ControlFlow::Continue(())
5166     }
5167 
5168     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmin8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5169     fn vmin8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5170         let mut a = self.state[operands.src1].get_u8x16();
5171         let b = self.state[operands.src2].get_u8x16();
5172         for (a, b) in a.iter_mut().zip(&b) {
5173             *a = (*a).min(*b);
5174         }
5175         self.state[operands.dst].set_u8x16(a);
5176         ControlFlow::Continue(())
5177     }
5178 
5179     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmin16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5180     fn vmin16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5181         let mut a = self.state[operands.src1].get_i16x8();
5182         let b = self.state[operands.src2].get_i16x8();
5183         for (a, b) in a.iter_mut().zip(&b) {
5184             *a = (*a).min(*b);
5185         }
5186         self.state[operands.dst].set_i16x8(a);
5187         ControlFlow::Continue(())
5188     }
5189 
5190     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmin16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5191     fn vmin16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5192         let mut a = self.state[operands.src1].get_u16x8();
5193         let b = self.state[operands.src2].get_u16x8();
5194         for (a, b) in a.iter_mut().zip(&b) {
5195             *a = (*a).min(*b);
5196         }
5197         self.state[operands.dst].set_u16x8(a);
5198         ControlFlow::Continue(())
5199     }
5200 
5201     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmin32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5202     fn vmin32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5203         let mut a = self.state[operands.src1].get_i32x4();
5204         let b = self.state[operands.src2].get_i32x4();
5205         for (a, b) in a.iter_mut().zip(&b) {
5206             *a = (*a).min(*b);
5207         }
5208         self.state[operands.dst].set_i32x4(a);
5209         ControlFlow::Continue(())
5210     }
5211 
5212     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmin32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5213     fn vmin32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5214         let mut a = self.state[operands.src1].get_u32x4();
5215         let b = self.state[operands.src2].get_u32x4();
5216         for (a, b) in a.iter_mut().zip(&b) {
5217             *a = (*a).min(*b);
5218         }
5219         self.state[operands.dst].set_u32x4(a);
5220         ControlFlow::Continue(())
5221     }
5222 
5223     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmax8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5224     fn vmax8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5225         let mut a = self.state[operands.src1].get_i8x16();
5226         let b = self.state[operands.src2].get_i8x16();
5227         for (a, b) in a.iter_mut().zip(&b) {
5228             *a = (*a).max(*b);
5229         }
5230         self.state[operands.dst].set_i8x16(a);
5231         ControlFlow::Continue(())
5232     }
5233 
5234     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmax8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5235     fn vmax8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5236         let mut a = self.state[operands.src1].get_u8x16();
5237         let b = self.state[operands.src2].get_u8x16();
5238         for (a, b) in a.iter_mut().zip(&b) {
5239             *a = (*a).max(*b);
5240         }
5241         self.state[operands.dst].set_u8x16(a);
5242         ControlFlow::Continue(())
5243     }
5244 
5245     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmax16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5246     fn vmax16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5247         let mut a = self.state[operands.src1].get_i16x8();
5248         let b = self.state[operands.src2].get_i16x8();
5249         for (a, b) in a.iter_mut().zip(&b) {
5250             *a = (*a).max(*b);
5251         }
5252         self.state[operands.dst].set_i16x8(a);
5253         ControlFlow::Continue(())
5254     }
5255 
5256     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmax16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5257     fn vmax16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5258         let mut a = self.state[operands.src1].get_u16x8();
5259         let b = self.state[operands.src2].get_u16x8();
5260         for (a, b) in a.iter_mut().zip(&b) {
5261             *a = (*a).max(*b);
5262         }
5263         self.state[operands.dst].set_u16x8(a);
5264         ControlFlow::Continue(())
5265     }
5266 
5267     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmax32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5268     fn vmax32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5269         let mut a = self.state[operands.src1].get_i32x4();
5270         let b = self.state[operands.src2].get_i32x4();
5271         for (a, b) in a.iter_mut().zip(&b) {
5272             *a = (*a).max(*b);
5273         }
5274         self.state[operands.dst].set_i32x4(a);
5275         ControlFlow::Continue(())
5276     }
5277 
5278     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmax32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5279     fn vmax32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5280         let mut a = self.state[operands.src1].get_u32x4();
5281         let b = self.state[operands.src2].get_u32x4();
5282         for (a, b) in a.iter_mut().zip(&b) {
5283             *a = (*a).max(*b);
5284         }
5285         self.state[operands.dst].set_u32x4(a);
5286         ControlFlow::Continue(())
5287     }
5288 
5289     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vabs8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>5290     fn vabs8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5291         let a = self.state[src].get_i8x16();
5292         self.state[dst].set_i8x16(a.map(|i| i.wrapping_abs()));
5293         ControlFlow::Continue(())
5294     }
5295 
5296     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vabs16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>5297     fn vabs16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5298         let a = self.state[src].get_i16x8();
5299         self.state[dst].set_i16x8(a.map(|i| i.wrapping_abs()));
5300         ControlFlow::Continue(())
5301     }
5302 
5303     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vabs32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>5304     fn vabs32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5305         let a = self.state[src].get_i32x4();
5306         self.state[dst].set_i32x4(a.map(|i| i.wrapping_abs()));
5307         ControlFlow::Continue(())
5308     }
5309 
5310     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vabs64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>5311     fn vabs64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5312         let a = self.state[src].get_i64x2();
5313         self.state[dst].set_i64x2(a.map(|i| i.wrapping_abs()));
5314         ControlFlow::Continue(())
5315     }
5316 
5317     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>5318     fn vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5319         let a = self.state[src].get_f32x4();
5320         self.state[dst].set_f32x4(a.map(|i| i.wasm_abs()));
5321         ControlFlow::Continue(())
5322     }
5323 
5324     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vabsf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done>5325     fn vabsf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5326         let a = self.state[src].get_f64x2();
5327         self.state[dst].set_f64x2(a.map(|i| i.wasm_abs()));
5328         ControlFlow::Continue(())
5329     }
5330 
5331     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmaximumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5332     fn vmaximumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5333         let mut a = self.state[operands.src1].get_f32x4();
5334         let b = self.state[operands.src2].get_f32x4();
5335         for (a, b) in a.iter_mut().zip(&b) {
5336             *a = a.wasm_maximum(*b);
5337         }
5338         self.state[operands.dst].set_f32x4(a);
5339         ControlFlow::Continue(())
5340     }
5341 
5342     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vmaximumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5343     fn vmaximumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5344         let mut a = self.state[operands.src1].get_f64x2();
5345         let b = self.state[operands.src2].get_f64x2();
5346         for (a, b) in a.iter_mut().zip(&b) {
5347             *a = a.wasm_maximum(*b);
5348         }
5349         self.state[operands.dst].set_f64x2(a);
5350         ControlFlow::Continue(())
5351     }
5352 
5353     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vminimumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5354     fn vminimumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5355         let mut a = self.state[operands.src1].get_f32x4();
5356         let b = self.state[operands.src2].get_f32x4();
5357         for (a, b) in a.iter_mut().zip(&b) {
5358             *a = a.wasm_minimum(*b);
5359         }
5360         self.state[operands.dst].set_f32x4(a);
5361         ControlFlow::Continue(())
5362     }
5363 
5364     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vminimumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5365     fn vminimumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5366         let mut a = self.state[operands.src1].get_f64x2();
5367         let b = self.state[operands.src2].get_f64x2();
5368         for (a, b) in a.iter_mut().zip(&b) {
5369             *a = a.wasm_minimum(*b);
5370         }
5371         self.state[operands.dst].set_f64x2(a);
5372         ControlFlow::Continue(())
5373     }
5374 
5375     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vshuffle(&mut self, dst: VReg, src1: VReg, src2: VReg, mask: u128) -> ControlFlow<Done>5376     fn vshuffle(&mut self, dst: VReg, src1: VReg, src2: VReg, mask: u128) -> ControlFlow<Done> {
5377         let a = self.state[src1].get_u8x16();
5378         let b = self.state[src2].get_u8x16();
5379         let result = mask.to_le_bytes().map(|m| {
5380             if m < 16 {
5381                 a[m as usize]
5382             } else {
5383                 b[m as usize - 16]
5384             }
5385         });
5386         self.state[dst].set_u8x16(result);
5387         ControlFlow::Continue(())
5388     }
5389 
5390     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vswizzlei8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5391     fn vswizzlei8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5392         let src1 = self.state[operands.src1].get_i8x16();
5393         let src2 = self.state[operands.src2].get_i8x16();
5394         let mut dst = [0i8; 16];
5395         for (i, &idx) in src2.iter().enumerate() {
5396             if (idx as usize) < 16 {
5397                 dst[i] = src1[idx as usize];
5398             } else {
5399                 dst[i] = 0
5400             }
5401         }
5402         self.state[operands.dst].set_i8x16(dst);
5403         ControlFlow::Continue(())
5404     }
5405 
5406     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vavground8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5407     fn vavground8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5408         let mut a = self.state[operands.src1].get_u8x16();
5409         let b = self.state[operands.src2].get_u8x16();
5410         for (a, b) in a.iter_mut().zip(&b) {
5411             // use wider precision to avoid overflow
5412             *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u8;
5413         }
5414         self.state[operands.dst].set_u8x16(a);
5415         ControlFlow::Continue(())
5416     }
5417 
5418     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vavground16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5419     fn vavground16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5420         let mut a = self.state[operands.src1].get_u16x8();
5421         let b = self.state[operands.src2].get_u16x8();
5422         for (a, b) in a.iter_mut().zip(&b) {
5423             // use wider precision to avoid overflow
5424             *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u16;
5425         }
5426         self.state[operands.dst].set_u16x8(a);
5427         ControlFlow::Continue(())
5428     }
5429 
5430     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
veqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5431     fn veqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5432         let a = self.state[operands.src1].get_f32x4();
5433         let b = self.state[operands.src2].get_f32x4();
5434         let mut c = [0; 4];
5435         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5436             *c = if a == b { u32::MAX } else { 0 };
5437         }
5438         self.state[operands.dst].set_u32x4(c);
5439         ControlFlow::Continue(())
5440     }
5441 
5442     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vneqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5443     fn vneqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5444         let a = self.state[operands.src1].get_f32x4();
5445         let b = self.state[operands.src2].get_f32x4();
5446         let mut c = [0; 4];
5447         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5448             *c = if a != b { u32::MAX } else { 0 };
5449         }
5450         self.state[operands.dst].set_u32x4(c);
5451         ControlFlow::Continue(())
5452     }
5453 
5454     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vltf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5455     fn vltf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5456         let a = self.state[operands.src1].get_f32x4();
5457         let b = self.state[operands.src2].get_f32x4();
5458         let mut c = [0; 4];
5459         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5460             *c = if a < b { u32::MAX } else { 0 };
5461         }
5462         self.state[operands.dst].set_u32x4(c);
5463         ControlFlow::Continue(())
5464     }
5465 
5466     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vlteqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5467     fn vlteqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5468         let a = self.state[operands.src1].get_f32x4();
5469         let b = self.state[operands.src2].get_f32x4();
5470         let mut c = [0; 4];
5471         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5472             *c = if a <= b { u32::MAX } else { 0 };
5473         }
5474         self.state[operands.dst].set_u32x4(c);
5475         ControlFlow::Continue(())
5476     }
5477 
5478     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
veqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5479     fn veqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5480         let a = self.state[operands.src1].get_f64x2();
5481         let b = self.state[operands.src2].get_f64x2();
5482         let mut c = [0; 2];
5483         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5484             *c = if a == b { u64::MAX } else { 0 };
5485         }
5486         self.state[operands.dst].set_u64x2(c);
5487         ControlFlow::Continue(())
5488     }
5489 
5490     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vneqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5491     fn vneqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5492         let a = self.state[operands.src1].get_f64x2();
5493         let b = self.state[operands.src2].get_f64x2();
5494         let mut c = [0; 2];
5495         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5496             *c = if a != b { u64::MAX } else { 0 };
5497         }
5498         self.state[operands.dst].set_u64x2(c);
5499         ControlFlow::Continue(())
5500     }
5501 
5502     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vltf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5503     fn vltf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5504         let a = self.state[operands.src1].get_f64x2();
5505         let b = self.state[operands.src2].get_f64x2();
5506         let mut c = [0; 2];
5507         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5508             *c = if a < b { u64::MAX } else { 0 };
5509         }
5510         self.state[operands.dst].set_u64x2(c);
5511         ControlFlow::Continue(())
5512     }
5513 
5514     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vlteqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done>5515     fn vlteqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5516         let a = self.state[operands.src1].get_f64x2();
5517         let b = self.state[operands.src2].get_f64x2();
5518         let mut c = [0; 2];
5519         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5520             *c = if a <= b { u64::MAX } else { 0 };
5521         }
5522         self.state[operands.dst].set_u64x2(c);
5523         ControlFlow::Continue(())
5524     }
5525 
5526     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vfma32x4(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done>5527     fn vfma32x4(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5528         let mut a = self.state[a].get_f32x4();
5529         let b = self.state[b].get_f32x4();
5530         let c = self.state[c].get_f32x4();
5531         for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5532             *a = a.wasm_mul_add(b, c);
5533         }
5534         self.state[dst].set_f32x4(a);
5535         ControlFlow::Continue(())
5536     }
5537 
5538     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vfma64x2(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done>5539     fn vfma64x2(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5540         let mut a = self.state[a].get_f64x2();
5541         let b = self.state[b].get_f64x2();
5542         let c = self.state[c].get_f64x2();
5543         for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5544             *a = a.wasm_mul_add(b, c);
5545         }
5546         self.state[dst].set_f64x2(a);
5547         ControlFlow::Continue(())
5548     }
5549 
5550     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
vselect( &mut self, dst: VReg, cond: XReg, if_nonzero: VReg, if_zero: VReg, ) -> ControlFlow<Done>5551     fn vselect(
5552         &mut self,
5553         dst: VReg,
5554         cond: XReg,
5555         if_nonzero: VReg,
5556         if_zero: VReg,
5557     ) -> ControlFlow<Done> {
5558         let result = if self.state[cond].get_u32() != 0 {
5559             self.state[if_nonzero]
5560         } else {
5561             self.state[if_zero]
5562         };
5563         self.state[dst] = result;
5564         ControlFlow::Continue(())
5565     }
5566 
5567     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
xadd128( &mut self, dst_lo: XReg, dst_hi: XReg, lhs_lo: XReg, lhs_hi: XReg, rhs_lo: XReg, rhs_hi: XReg, ) -> ControlFlow<Done>5568     fn xadd128(
5569         &mut self,
5570         dst_lo: XReg,
5571         dst_hi: XReg,
5572         lhs_lo: XReg,
5573         lhs_hi: XReg,
5574         rhs_lo: XReg,
5575         rhs_hi: XReg,
5576     ) -> ControlFlow<Done> {
5577         let lhs = self.get_i128(lhs_lo, lhs_hi);
5578         let rhs = self.get_i128(rhs_lo, rhs_hi);
5579         let result = lhs.wrapping_add(rhs);
5580         self.set_i128(dst_lo, dst_hi, result);
5581         ControlFlow::Continue(())
5582     }
5583 
5584     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
xsub128( &mut self, dst_lo: XReg, dst_hi: XReg, lhs_lo: XReg, lhs_hi: XReg, rhs_lo: XReg, rhs_hi: XReg, ) -> ControlFlow<Done>5585     fn xsub128(
5586         &mut self,
5587         dst_lo: XReg,
5588         dst_hi: XReg,
5589         lhs_lo: XReg,
5590         lhs_hi: XReg,
5591         rhs_lo: XReg,
5592         rhs_hi: XReg,
5593     ) -> ControlFlow<Done> {
5594         let lhs = self.get_i128(lhs_lo, lhs_hi);
5595         let rhs = self.get_i128(rhs_lo, rhs_hi);
5596         let result = lhs.wrapping_sub(rhs);
5597         self.set_i128(dst_lo, dst_hi, result);
5598         ControlFlow::Continue(())
5599     }
5600 
5601     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
xwidemul64_s( &mut self, dst_lo: XReg, dst_hi: XReg, lhs: XReg, rhs: XReg, ) -> ControlFlow<Done>5602     fn xwidemul64_s(
5603         &mut self,
5604         dst_lo: XReg,
5605         dst_hi: XReg,
5606         lhs: XReg,
5607         rhs: XReg,
5608     ) -> ControlFlow<Done> {
5609         let lhs = self.state[lhs].get_i64();
5610         let rhs = self.state[rhs].get_i64();
5611         let result = i128::from(lhs).wrapping_mul(i128::from(rhs));
5612         self.set_i128(dst_lo, dst_hi, result);
5613         ControlFlow::Continue(())
5614     }
5615 
5616     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
xwidemul64_u( &mut self, dst_lo: XReg, dst_hi: XReg, lhs: XReg, rhs: XReg, ) -> ControlFlow<Done>5617     fn xwidemul64_u(
5618         &mut self,
5619         dst_lo: XReg,
5620         dst_hi: XReg,
5621         lhs: XReg,
5622         rhs: XReg,
5623     ) -> ControlFlow<Done> {
5624         let lhs = self.state[lhs].get_u64();
5625         let rhs = self.state[rhs].get_u64();
5626         let result = u128::from(lhs).wrapping_mul(u128::from(rhs));
5627         self.set_i128(dst_lo, dst_hi, result as i128);
5628         ControlFlow::Continue(())
5629     }
5630 }
5631