xref: /wasmtime-44.0.1/pulley/src/interp.rs (revision bda02c19)
1 //! Interpretation of pulley bytecode.
2 
3 use crate::decode::*;
4 use crate::encode::Encode;
5 use crate::imms::*;
6 use crate::profile::{ExecutingPc, ExecutingPcRef};
7 use crate::regs::*;
8 use alloc::string::ToString;
9 use alloc::vec::Vec;
10 use core::fmt;
11 use core::mem;
12 use core::ops::ControlFlow;
13 use core::ops::{Index, IndexMut};
14 use core::ptr::NonNull;
15 use pulley_macros::interp_disable_if_cfg;
16 use wasmtime_core::math::{WasmFloat, f32_cvt_to_int_bounds, f64_cvt_to_int_bounds};
17 
18 mod debug;
19 #[cfg(all(not(pulley_tail_calls), not(pulley_assume_llvm_makes_tail_calls)))]
20 mod match_loop;
21 #[cfg(any(pulley_tail_calls, pulley_assume_llvm_makes_tail_calls))]
22 mod tail_loop;
23 
24 const DEFAULT_STACK_SIZE: usize = 1 << 20; // 1 MiB
25 
26 /// A virtual machine for interpreting Pulley bytecode.
27 pub struct Vm {
28     state: MachineState,
29     executing_pc: ExecutingPc,
30 }
31 
32 impl Default for Vm {
33     fn default() -> Self {
34         Vm::new()
35     }
36 }
37 
38 impl Vm {
39     /// Create a new virtual machine with the default stack size.
40     pub fn new() -> Self {
41         Self::with_stack(DEFAULT_STACK_SIZE)
42     }
43 
44     /// Create a new virtual machine with the given stack.
45     pub fn with_stack(stack_size: usize) -> Self {
46         Self {
47             state: MachineState::with_stack(stack_size),
48             executing_pc: ExecutingPc::default(),
49         }
50     }
51 
52     /// Get a shared reference to this VM's machine state.
53     pub fn state(&self) -> &MachineState {
54         &self.state
55     }
56 
57     /// Get an exclusive reference to this VM's machine state.
58     pub fn state_mut(&mut self) -> &mut MachineState {
59         &mut self.state
60     }
61 
62     /// Call a bytecode function.
63     ///
64     /// The given `func` must point to the beginning of a valid Pulley bytecode
65     /// function.
66     ///
67     /// The given `args` must match the number and type of arguments that
68     /// function expects.
69     ///
70     /// The given `rets` must match the function's actual return types.
71     ///
72     /// Returns either the resulting values, or the PC at which a trap was
73     /// raised.
74     pub unsafe fn call<'a, T>(
75         &'a mut self,
76         func: NonNull<u8>,
77         args: &[Val],
78         rets: T,
79     ) -> DoneReason<impl Iterator<Item = Val> + use<'a, T>>
80     where
81         T: IntoIterator<Item = RegType> + 'a,
82     {
83         unsafe {
84             let lr = self.call_start(args);
85 
86             match self.call_run(func) {
87                 DoneReason::ReturnToHost(()) => DoneReason::ReturnToHost(self.call_end(lr, rets)),
88                 DoneReason::Trap { pc, kind } => DoneReason::Trap { pc, kind },
89                 DoneReason::CallIndirectHost { id, resume } => {
90                     DoneReason::CallIndirectHost { id, resume }
91                 }
92             }
93         }
94     }
95 
96     /// Performs the initial part of [`Vm::call`] in setting up the `args`
97     /// provided in registers according to Pulley's ABI.
98     ///
99     /// # Return
100     ///
101     /// Returns the old `lr` register value. The current `lr` value is replaced
102     /// with a sentinel that triggers a return to the host when returned-to.
103     ///
104     /// # Unsafety
105     ///
106     /// All the same unsafety as `call` and additionally, you must
107     /// invoke `call_run` and then `call_end` after calling `call_start`.
108     /// If you don't want to wrangle these invocations, use `call` instead
109     /// of `call_{start,run,end}`.
110     pub unsafe fn call_start<'a>(&'a mut self, args: &[Val]) -> *mut u8 {
111         // NB: make sure this method stays in sync with
112         // `PulleyMachineDeps::compute_arg_locs`!
113 
114         let mut x_args = (0..15).map(|x| unsafe { XReg::new_unchecked(x) });
115         let mut f_args = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
116         #[cfg(not(pulley_disable_interp_simd))]
117         let mut v_args = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
118 
119         for arg in args {
120             match arg {
121                 Val::XReg(val) => match x_args.next() {
122                     Some(reg) => self.state[reg] = *val,
123                     None => todo!("stack slots"),
124                 },
125                 Val::FReg(val) => match f_args.next() {
126                     Some(reg) => self.state[reg] = *val,
127                     None => todo!("stack slots"),
128                 },
129                 #[cfg(not(pulley_disable_interp_simd))]
130                 Val::VReg(val) => match v_args.next() {
131                     Some(reg) => self.state[reg] = *val,
132                     None => todo!("stack slots"),
133                 },
134             }
135         }
136 
137         mem::replace(&mut self.state.lr, HOST_RETURN_ADDR)
138     }
139 
140     /// Peforms the internal part of [`Vm::call`] where bytecode is actually
141     /// executed.
142     ///
143     /// # Unsafety
144     ///
145     /// In addition to all the invariants documented for `call`, you
146     /// may only invoke `call_run` after invoking `call_start` to
147     /// initialize this call's arguments.
148     pub unsafe fn call_run(&mut self, pc: NonNull<u8>) -> DoneReason<()> {
149         self.state.debug_assert_done_reason_none();
150         let interpreter = Interpreter {
151             state: &mut self.state,
152             pc: unsafe { UnsafeBytecodeStream::new(pc) },
153             executing_pc: self.executing_pc.as_ref(),
154         };
155         let done = interpreter.run();
156         self.state.done_decode(done)
157     }
158 
159     /// Peforms the tail end of [`Vm::call`] by returning the values as
160     /// determined by `rets` according to Pulley's ABI.
161     ///
162     /// The `old_ret` value should have been provided from `call_start`
163     /// previously.
164     ///
165     /// # Unsafety
166     ///
167     /// In addition to the invariants documented for `call`, this may
168     /// only be called after `call_run`.
169     pub unsafe fn call_end<'a>(
170         &'a mut self,
171         old_ret: *mut u8,
172         rets: impl IntoIterator<Item = RegType> + 'a,
173     ) -> impl Iterator<Item = Val> + 'a {
174         self.state.lr = old_ret;
175         // NB: make sure this method stays in sync with
176         // `PulleyMachineDeps::compute_arg_locs`!
177 
178         let mut x_rets = (0..15).map(|x| unsafe { XReg::new_unchecked(x) });
179         let mut f_rets = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
180         #[cfg(not(pulley_disable_interp_simd))]
181         let mut v_rets = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
182 
183         rets.into_iter().map(move |ty| match ty {
184             RegType::XReg => match x_rets.next() {
185                 Some(reg) => Val::XReg(self.state[reg]),
186                 None => todo!("stack slots"),
187             },
188             RegType::FReg => match f_rets.next() {
189                 Some(reg) => Val::FReg(self.state[reg]),
190                 None => todo!("stack slots"),
191             },
192             #[cfg(not(pulley_disable_interp_simd))]
193             RegType::VReg => match v_rets.next() {
194                 Some(reg) => Val::VReg(self.state[reg]),
195                 None => todo!("stack slots"),
196             },
197             #[cfg(pulley_disable_interp_simd)]
198             RegType::VReg => panic!("simd support disabled at compile time"),
199         })
200     }
201 
202     /// Returns the current `fp` register value.
203     pub fn fp(&self) -> *mut u8 {
204         self.state.fp
205     }
206 
207     /// Returns the current `lr` register value.
208     pub fn lr(&self) -> *mut u8 {
209         self.state.lr
210     }
211 
212     /// Sets the current `fp` register value.
213     pub unsafe fn set_fp(&mut self, fp: *mut u8) {
214         self.state.fp = fp;
215     }
216 
217     /// Sets the current `lr` register value.
218     pub unsafe fn set_lr(&mut self, lr: *mut u8) {
219         self.state.lr = lr;
220     }
221 
222     /// Gets a handle to the currently executing program counter for this
223     /// interpreter which can be read from other threads.
224     //
225     // Note that despite this field still existing with `not(feature =
226     // "profile")` it's hidden from the public API in that scenario as it has no
227     // methods anyway.
228     #[cfg(feature = "profile")]
229     pub fn executing_pc(&self) -> &ExecutingPc {
230         &self.executing_pc
231     }
232 }
233 
234 impl Drop for Vm {
235     fn drop(&mut self) {
236         self.executing_pc.set_done();
237     }
238 }
239 
240 /// The type of a register in the Pulley machine state.
241 #[derive(Clone, Copy, Debug)]
242 pub enum RegType {
243     /// An `x` register: integers.
244     XReg,
245 
246     /// An `f` register: floats.
247     FReg,
248 
249     /// A `v` register: vectors.
250     VReg,
251 }
252 
253 /// A value that can be stored in a register.
254 #[derive(Clone, Copy, Debug)]
255 pub enum Val {
256     /// An `x` register value: integers.
257     XReg(XRegVal),
258 
259     /// An `f` register value: floats.
260     FReg(FRegVal),
261 
262     /// A `v` register value: vectors.
263     #[cfg(not(pulley_disable_interp_simd))]
264     VReg(VRegVal),
265 }
266 
267 impl fmt::LowerHex for Val {
268     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
269         match self {
270             Val::XReg(v) => fmt::LowerHex::fmt(v, f),
271             Val::FReg(v) => fmt::LowerHex::fmt(v, f),
272             #[cfg(not(pulley_disable_interp_simd))]
273             Val::VReg(v) => fmt::LowerHex::fmt(v, f),
274         }
275     }
276 }
277 
278 impl From<XRegVal> for Val {
279     fn from(value: XRegVal) -> Self {
280         Val::XReg(value)
281     }
282 }
283 
284 impl From<u64> for Val {
285     fn from(value: u64) -> Self {
286         XRegVal::new_u64(value).into()
287     }
288 }
289 
290 impl From<u32> for Val {
291     fn from(value: u32) -> Self {
292         XRegVal::new_u32(value).into()
293     }
294 }
295 
296 impl From<i64> for Val {
297     fn from(value: i64) -> Self {
298         XRegVal::new_i64(value).into()
299     }
300 }
301 
302 impl From<i32> for Val {
303     fn from(value: i32) -> Self {
304         XRegVal::new_i32(value).into()
305     }
306 }
307 
308 impl<T> From<*mut T> for Val {
309     fn from(value: *mut T) -> Self {
310         XRegVal::new_ptr(value).into()
311     }
312 }
313 
314 impl From<FRegVal> for Val {
315     fn from(value: FRegVal) -> Self {
316         Val::FReg(value)
317     }
318 }
319 
320 impl From<f64> for Val {
321     fn from(value: f64) -> Self {
322         FRegVal::new_f64(value).into()
323     }
324 }
325 
326 impl From<f32> for Val {
327     fn from(value: f32) -> Self {
328         FRegVal::new_f32(value).into()
329     }
330 }
331 
332 #[cfg(not(pulley_disable_interp_simd))]
333 impl From<VRegVal> for Val {
334     fn from(value: VRegVal) -> Self {
335         Val::VReg(value)
336     }
337 }
338 
339 /// An `x` register value: integers.
340 #[derive(Copy, Clone)]
341 pub struct XRegVal(XRegUnion);
342 
343 impl PartialEq for XRegVal {
344     fn eq(&self, other: &Self) -> bool {
345         self.get_u64() == other.get_u64()
346     }
347 }
348 
349 impl Eq for XRegVal {}
350 
351 impl fmt::Debug for XRegVal {
352     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
353         f.debug_struct("XRegVal")
354             .field("as_u64", &self.get_u64())
355             .finish()
356     }
357 }
358 
359 impl fmt::LowerHex for XRegVal {
360     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
361         fmt::LowerHex::fmt(&self.get_u64(), f)
362     }
363 }
364 
365 /// Contents of an "x" register, or a general-purpose register.
366 ///
367 /// This is represented as a Rust `union` to make it easier to access typed
368 /// views of this, notably the `ptr` field which enables preserving a bit of
369 /// provenance for Rust for values stored as a pointer and read as a pointer.
370 ///
371 /// Note that the actual in-memory representation of this value is handled
372 /// carefully at this time. Pulley bytecode exposes the ability to store a
373 /// 32-bit result into a register and then read the 64-bit contents of the
374 /// register. This leaves us with the question of what to do with the upper bits
375 /// of the register when the 32-bit result is generated. Possibilities for
376 /// handling this are:
377 ///
378 /// 1. Do nothing, just store the 32-bit value. The problem with this approach
379 ///    means that the "upper bits" are now endianness-dependent. That means that
380 ///    the state of the register is now platform-dependent.
381 /// 2. Sign or zero-extend. This restores platform-independent behavior but
382 ///    requires an extra store on 32-bit platforms because they can probably
383 ///    only store 32-bits at a time.
384 /// 3. Always store the values in this union as little-endian. This means that
385 ///    big-endian platforms have to do a byte-swap but otherwise it has
386 ///    platform-independent behavior.
387 ///
388 /// This union chooses route (3) at this time where the values here are always
389 /// stored in little-endian form (even the `ptr` field). That guarantees
390 /// cross-platform behavior while also minimizing the amount of data stored on
391 /// writes.
392 ///
393 /// In the future we may wish to benchmark this and possibly change this.
394 /// Technically Cranelift-generated bytecode should never rely on the upper bits
395 /// of a register if it didn't previously write them so this in theory doesn't
396 /// actually matter for Cranelift or wasm semantics. The only cost right now is
397 /// to big-endian platforms though and it's not certain how crucial performance
398 /// will be there.
399 ///
400 /// One final note is that this notably contrasts with native CPUs where
401 /// native ISAs like RISC-V specifically define the entire register on every
402 /// instruction, even if only the low half contains a significant result. Pulley
403 /// is unlikely to become out-of-order within the CPU itself as it's interpreted
404 /// meaning that severing data-dependencies with previous operations is
405 /// hypothesized to not be too important. If this is ever a problem though it
406 /// could increase the likelihood we go for route (2) above instead (or maybe
407 /// even (1)).
408 #[derive(Copy, Clone)]
409 union XRegUnion {
410     i32: i32,
411     u32: u32,
412     i64: i64,
413     u64: u64,
414 
415     // Note that this is intentionally `usize` and not an actual pointer like
416     // `*mut u8`. The reason for this is that provenance is required in Rust for
417     // pointers but Cranelift has no pointer type and thus no concept of
418     // provenance. That means that at-rest it's not known whether the value has
419     // provenance or not and basically means that Pulley is required to use
420     // "permissive provenance" in Rust as opposed to strict provenance.
421     //
422     // That's more-or-less a long-winded way of saying that storage of a pointer
423     // in this value is done with `.expose_provenance()` and reading a pointer
424     // uses `with_exposed_provenance_mut(..)`.
425     ptr: usize,
426 }
427 
428 impl Default for XRegVal {
429     fn default() -> Self {
430         Self(unsafe { mem::zeroed() })
431     }
432 }
433 
434 #[expect(missing_docs, reason = "self-describing methods")]
435 impl XRegVal {
436     pub fn new_i32(x: i32) -> Self {
437         let mut val = XRegVal::default();
438         val.set_i32(x);
439         val
440     }
441 
442     pub fn new_u32(x: u32) -> Self {
443         let mut val = XRegVal::default();
444         val.set_u32(x);
445         val
446     }
447 
448     pub fn new_i64(x: i64) -> Self {
449         let mut val = XRegVal::default();
450         val.set_i64(x);
451         val
452     }
453 
454     pub fn new_u64(x: u64) -> Self {
455         let mut val = XRegVal::default();
456         val.set_u64(x);
457         val
458     }
459 
460     pub fn new_ptr<T>(ptr: *mut T) -> Self {
461         let mut val = XRegVal::default();
462         val.set_ptr(ptr);
463         val
464     }
465 
466     pub fn get_i32(&self) -> i32 {
467         let x = unsafe { self.0.i32 };
468         i32::from_le(x)
469     }
470 
471     pub fn get_u32(&self) -> u32 {
472         let x = unsafe { self.0.u32 };
473         u32::from_le(x)
474     }
475 
476     pub fn get_i64(&self) -> i64 {
477         let x = unsafe { self.0.i64 };
478         i64::from_le(x)
479     }
480 
481     pub fn get_u64(&self) -> u64 {
482         let x = unsafe { self.0.u64 };
483         u64::from_le(x)
484     }
485 
486     pub fn get_ptr<T>(&self) -> *mut T {
487         let ptr = unsafe { self.0.ptr };
488         core::ptr::with_exposed_provenance_mut(usize::from_le(ptr))
489     }
490 
491     pub fn set_i32(&mut self, x: i32) {
492         self.0.i32 = x.to_le();
493     }
494 
495     pub fn set_u32(&mut self, x: u32) {
496         self.0.u32 = x.to_le();
497     }
498 
499     pub fn set_i64(&mut self, x: i64) {
500         self.0.i64 = x.to_le();
501     }
502 
503     pub fn set_u64(&mut self, x: u64) {
504         self.0.u64 = x.to_le();
505     }
506 
507     pub fn set_ptr<T>(&mut self, ptr: *mut T) {
508         self.0.ptr = ptr.expose_provenance().to_le();
509     }
510 }
511 
512 /// An `f` register value: floats.
513 #[derive(Copy, Clone)]
514 pub struct FRegVal(FRegUnion);
515 
516 impl fmt::Debug for FRegVal {
517     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
518         f.debug_struct("FRegVal")
519             .field("as_f32", &self.get_f32())
520             .field("as_f64", &self.get_f64())
521             .finish()
522     }
523 }
524 
525 impl fmt::LowerHex for FRegVal {
526     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
527         fmt::LowerHex::fmt(&self.get_f64().to_bits(), f)
528     }
529 }
530 
531 // NB: like `XRegUnion` values here are always little-endian, see the
532 // documentation above for more details.
533 #[derive(Copy, Clone)]
534 union FRegUnion {
535     f32: u32,
536     f64: u64,
537 }
538 
539 impl Default for FRegVal {
540     fn default() -> Self {
541         Self(unsafe { mem::zeroed() })
542     }
543 }
544 
545 #[expect(missing_docs, reason = "self-describing methods")]
546 impl FRegVal {
547     pub fn new_f32(f: f32) -> Self {
548         let mut val = Self::default();
549         val.set_f32(f);
550         val
551     }
552 
553     pub fn new_f64(f: f64) -> Self {
554         let mut val = Self::default();
555         val.set_f64(f);
556         val
557     }
558 
559     pub fn get_f32(&self) -> f32 {
560         let val = unsafe { self.0.f32 };
561         f32::from_le_bytes(val.to_ne_bytes())
562     }
563 
564     pub fn get_f64(&self) -> f64 {
565         let val = unsafe { self.0.f64 };
566         f64::from_le_bytes(val.to_ne_bytes())
567     }
568 
569     pub fn set_f32(&mut self, val: f32) {
570         self.0.f32 = u32::from_ne_bytes(val.to_le_bytes());
571     }
572 
573     pub fn set_f64(&mut self, val: f64) {
574         self.0.f64 = u64::from_ne_bytes(val.to_le_bytes());
575     }
576 }
577 
578 /// A `v` register value: vectors.
579 #[derive(Copy, Clone)]
580 #[cfg(not(pulley_disable_interp_simd))]
581 pub struct VRegVal(VRegUnion);
582 
583 #[cfg(not(pulley_disable_interp_simd))]
584 impl fmt::Debug for VRegVal {
585     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
586         f.debug_struct("VRegVal")
587             .field("as_u128", &unsafe { self.0.u128 })
588             .finish()
589     }
590 }
591 
592 #[cfg(not(pulley_disable_interp_simd))]
593 impl fmt::LowerHex for VRegVal {
594     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
595         fmt::LowerHex::fmt(unsafe { &self.0.u128 }, f)
596     }
597 }
598 
599 /// 128-bit vector registers.
600 ///
601 /// This register is always stored in little-endian order and has different
602 /// constraints than `XRegVal` and `FRegVal` above. Notably all fields of this
603 /// union are the same width so all bits are always defined. Note that
604 /// little-endian is required though so bitcasts between different shapes of
605 /// vectors works. This union cannot be stored in big-endian.
606 #[derive(Copy, Clone)]
607 #[repr(align(16))]
608 #[cfg(not(pulley_disable_interp_simd))]
609 union VRegUnion {
610     u128: u128,
611     i8x16: [i8; 16],
612     i16x8: [i16; 8],
613     i32x4: [i32; 4],
614     i64x2: [i64; 2],
615     u8x16: [u8; 16],
616     u16x8: [u16; 8],
617     u32x4: [u32; 4],
618     u64x2: [u64; 2],
619     // Note that these are `u32` and `u64`, not f32/f64. That's only because
620     // f32/f64 don't have `.to_le()` and `::from_le()` so need to go through the
621     // bits anyway.
622     f32x4: [u32; 4],
623     f64x2: [u64; 2],
624 }
625 
626 #[cfg(not(pulley_disable_interp_simd))]
627 impl Default for VRegVal {
628     fn default() -> Self {
629         Self(unsafe { mem::zeroed() })
630     }
631 }
632 
633 #[expect(missing_docs, reason = "self-describing methods")]
634 #[cfg(not(pulley_disable_interp_simd))]
635 impl VRegVal {
636     pub fn new_u128(i: u128) -> Self {
637         let mut val = Self::default();
638         val.set_u128(i);
639         val
640     }
641 
642     pub fn get_u128(&self) -> u128 {
643         let val = unsafe { self.0.u128 };
644         u128::from_le(val)
645     }
646 
647     pub fn set_u128(&mut self, val: u128) {
648         self.0.u128 = val.to_le();
649     }
650 
651     fn get_i8x16(&self) -> [i8; 16] {
652         let val = unsafe { self.0.i8x16 };
653         val.map(|e| i8::from_le(e))
654     }
655 
656     fn set_i8x16(&mut self, val: [i8; 16]) {
657         self.0.i8x16 = val.map(|e| e.to_le());
658     }
659 
660     fn get_u8x16(&self) -> [u8; 16] {
661         let val = unsafe { self.0.u8x16 };
662         val.map(|e| u8::from_le(e))
663     }
664 
665     fn set_u8x16(&mut self, val: [u8; 16]) {
666         self.0.u8x16 = val.map(|e| e.to_le());
667     }
668 
669     fn get_i16x8(&self) -> [i16; 8] {
670         let val = unsafe { self.0.i16x8 };
671         val.map(|e| i16::from_le(e))
672     }
673 
674     fn set_i16x8(&mut self, val: [i16; 8]) {
675         self.0.i16x8 = val.map(|e| e.to_le());
676     }
677 
678     fn get_u16x8(&self) -> [u16; 8] {
679         let val = unsafe { self.0.u16x8 };
680         val.map(|e| u16::from_le(e))
681     }
682 
683     fn set_u16x8(&mut self, val: [u16; 8]) {
684         self.0.u16x8 = val.map(|e| e.to_le());
685     }
686 
687     fn get_i32x4(&self) -> [i32; 4] {
688         let val = unsafe { self.0.i32x4 };
689         val.map(|e| i32::from_le(e))
690     }
691 
692     fn set_i32x4(&mut self, val: [i32; 4]) {
693         self.0.i32x4 = val.map(|e| e.to_le());
694     }
695 
696     fn get_u32x4(&self) -> [u32; 4] {
697         let val = unsafe { self.0.u32x4 };
698         val.map(|e| u32::from_le(e))
699     }
700 
701     fn set_u32x4(&mut self, val: [u32; 4]) {
702         self.0.u32x4 = val.map(|e| e.to_le());
703     }
704 
705     fn get_i64x2(&self) -> [i64; 2] {
706         let val = unsafe { self.0.i64x2 };
707         val.map(|e| i64::from_le(e))
708     }
709 
710     fn set_i64x2(&mut self, val: [i64; 2]) {
711         self.0.i64x2 = val.map(|e| e.to_le());
712     }
713 
714     fn get_u64x2(&self) -> [u64; 2] {
715         let val = unsafe { self.0.u64x2 };
716         val.map(|e| u64::from_le(e))
717     }
718 
719     fn set_u64x2(&mut self, val: [u64; 2]) {
720         self.0.u64x2 = val.map(|e| e.to_le());
721     }
722 
723     fn get_f64x2(&self) -> [f64; 2] {
724         let val = unsafe { self.0.f64x2 };
725         val.map(|e| f64::from_bits(u64::from_le(e)))
726     }
727 
728     fn set_f64x2(&mut self, val: [f64; 2]) {
729         self.0.f64x2 = val.map(|e| e.to_bits().to_le());
730     }
731 
732     fn get_f32x4(&self) -> [f32; 4] {
733         let val = unsafe { self.0.f32x4 };
734         val.map(|e| f32::from_bits(u32::from_le(e)))
735     }
736 
737     fn set_f32x4(&mut self, val: [f32; 4]) {
738         self.0.f32x4 = val.map(|e| e.to_bits().to_le());
739     }
740 }
741 
742 /// The machine state for a Pulley virtual machine: the various registers and
743 /// stack.
744 pub struct MachineState {
745     x_regs: [XRegVal; XReg::RANGE.end as usize],
746     f_regs: [FRegVal; FReg::RANGE.end as usize],
747     #[cfg(not(pulley_disable_interp_simd))]
748     v_regs: [VRegVal; VReg::RANGE.end as usize],
749     fp: *mut u8,
750     lr: *mut u8,
751     stack: Stack,
752     done_reason: Option<DoneReason<()>>,
753 }
754 
755 unsafe impl Send for MachineState {}
756 unsafe impl Sync for MachineState {}
757 
758 /// Helper structure to store the state of the Pulley stack.
759 ///
760 /// The Pulley stack notably needs to be a 16-byte aligned allocation on the
761 /// host to ensure that addresses handed out are indeed 16-byte aligned. This is
762 /// done with a custom `Vec<T>` internally where `T` has size and align of 16.
763 /// This is manually done with a helper `Align16` type below.
764 struct Stack {
765     storage: Vec<Align16>,
766 }
767 
768 /// Helper type used with `Stack` above.
769 #[derive(Copy, Clone)]
770 #[repr(align(16))]
771 struct Align16 {
772     // Just here to give the structure a size of 16. The alignment is always 16
773     // regardless of what the host platform's alignment of u128 is.
774     _unused: u128,
775 }
776 
777 impl Stack {
778     /// Creates a new stack which will have a byte size of at least `size`.
779     ///
780     /// The allocated stack might be slightly larger due to rounding necessary.
781     fn new(size: usize) -> Stack {
782         Stack {
783             // Round up `size` to the nearest multiple of 16. Note that the
784             // stack is also allocated here but not initialized, and that's
785             // intentional as pulley bytecode should always initialize the stack
786             // before use.
787             storage: Vec::with_capacity((size + 15) / 16),
788         }
789     }
790 
791     /// Returns a pointer to the top of the stack (the highest address).
792     ///
793     /// Note that the returned pointer has provenance for the entire stack
794     /// allocation, however, not just the top.
795     fn top(&mut self) -> *mut u8 {
796         let len = self.len();
797         unsafe { self.base().add(len) }
798     }
799 
800     /// Returns a pointer to the base of the stack (the lowest address).
801     ///
802     /// Note that the returned pointer has provenance for the entire stack
803     /// allocation, however, not just the top.
804     fn base(&mut self) -> *mut u8 {
805         self.storage.as_mut_ptr().cast::<u8>()
806     }
807 
808     /// Returns the length, in bytes, of this stack allocation.
809     fn len(&self) -> usize {
810         self.storage.capacity() * mem::size_of::<Align16>()
811     }
812 }
813 
814 impl fmt::Debug for MachineState {
815     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
816         let MachineState {
817             x_regs,
818             f_regs,
819             #[cfg(not(pulley_disable_interp_simd))]
820             v_regs,
821             stack: _,
822             done_reason: _,
823             fp: _,
824             lr: _,
825         } = self;
826 
827         struct RegMap<'a, R>(&'a [R], fn(u8) -> alloc::string::String);
828 
829         impl<R: fmt::Debug> fmt::Debug for RegMap<'_, R> {
830             fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
831                 let mut f = f.debug_map();
832                 for (i, r) in self.0.iter().enumerate() {
833                     f.entry(&(self.1)(i as u8), r);
834                 }
835                 f.finish()
836             }
837         }
838 
839         let mut f = f.debug_struct("MachineState");
840 
841         f.field(
842             "x_regs",
843             &RegMap(x_regs, |i| XReg::new(i).unwrap().to_string()),
844         )
845         .field(
846             "f_regs",
847             &RegMap(f_regs, |i| FReg::new(i).unwrap().to_string()),
848         );
849         #[cfg(not(pulley_disable_interp_simd))]
850         f.field(
851             "v_regs",
852             &RegMap(v_regs, |i| VReg::new(i).unwrap().to_string()),
853         );
854         f.finish_non_exhaustive()
855     }
856 }
857 
858 macro_rules! index_reg {
859     ($reg_ty:ty,$value_ty:ty,$field:ident) => {
860         impl Index<$reg_ty> for Vm {
861             type Output = $value_ty;
862 
863             fn index(&self, reg: $reg_ty) -> &Self::Output {
864                 &self.state[reg]
865             }
866         }
867 
868         impl IndexMut<$reg_ty> for Vm {
869             fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
870                 &mut self.state[reg]
871             }
872         }
873 
874         impl Index<$reg_ty> for MachineState {
875             type Output = $value_ty;
876 
877             fn index(&self, reg: $reg_ty) -> &Self::Output {
878                 &self.$field[reg.index()]
879             }
880         }
881 
882         impl IndexMut<$reg_ty> for MachineState {
883             fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
884                 &mut self.$field[reg.index()]
885             }
886         }
887     };
888 }
889 
890 index_reg!(XReg, XRegVal, x_regs);
891 index_reg!(FReg, FRegVal, f_regs);
892 #[cfg(not(pulley_disable_interp_simd))]
893 index_reg!(VReg, VRegVal, v_regs);
894 
895 /// Sentinel return address that signals the end of the call stack.
896 const HOST_RETURN_ADDR: *mut u8 = usize::MAX as *mut u8;
897 
898 impl MachineState {
899     fn with_stack(stack_size: usize) -> Self {
900         let mut state = Self {
901             x_regs: [Default::default(); XReg::RANGE.end as usize],
902             f_regs: Default::default(),
903             #[cfg(not(pulley_disable_interp_simd))]
904             v_regs: Default::default(),
905             stack: Stack::new(stack_size),
906             done_reason: None,
907             fp: HOST_RETURN_ADDR,
908             lr: HOST_RETURN_ADDR,
909         };
910 
911         let sp = state.stack.top();
912         state[XReg::sp] = XRegVal::new_ptr(sp);
913 
914         state
915     }
916 }
917 
918 /// Inner private module to prevent creation of the `Done` structure outside of
919 /// this module.
920 mod done {
921     use super::{Encode, Interpreter, MachineState};
922     use core::ops::ControlFlow;
923     use core::ptr::NonNull;
924 
925     /// Zero-sized sentinel indicating that pulley execution has halted.
926     ///
927     /// The reason for halting is stored in `MachineState`.
928     #[derive(Copy, Clone, Debug, PartialEq, Eq)]
929     pub struct Done {
930         _priv: (),
931     }
932 
933     /// Reason that the pulley interpreter has ceased execution.
934     pub enum DoneReason<T> {
935         /// A trap happened at this bytecode instruction.
936         Trap {
937             /// Which instruction is raising this trap.
938             pc: NonNull<u8>,
939             /// The kind of trap being raised, if known.
940             kind: Option<TrapKind>,
941         },
942         /// The `call_indirect_host` instruction was executed.
943         CallIndirectHost {
944             /// The payload of `call_indirect_host`.
945             id: u8,
946             /// Where to resume execution after the host has finished.
947             resume: NonNull<u8>,
948         },
949         /// Pulley has finished and the provided value is being returned.
950         ReturnToHost(T),
951     }
952 
953     /// Stored within `DoneReason::Trap`.
954     #[expect(missing_docs, reason = "self-describing variants")]
955     pub enum TrapKind {
956         DivideByZero,
957         IntegerOverflow,
958         BadConversionToInteger,
959         MemoryOutOfBounds,
960         DisabledOpcode,
961         StackOverflow,
962     }
963 
964     impl MachineState {
965         pub(super) fn debug_assert_done_reason_none(&mut self) {
966             debug_assert!(self.done_reason.is_none());
967         }
968 
969         pub(super) fn done_decode(&mut self, Done { _priv }: Done) -> DoneReason<()> {
970             self.done_reason.take().unwrap()
971         }
972     }
973 
974     impl Interpreter<'_> {
975         /// Finishes execution by recording `DoneReason::Trap`.
976         ///
977         /// This method takes an `I` generic parameter indicating which
978         /// instruction is executing this function and generating a trap. That's
979         /// used to go backwards from the current `pc` which is just beyond the
980         /// instruction to point to the instruction itself in the trap metadata
981         /// returned from the interpreter.
982         #[cold]
983         pub fn done_trap<I: Encode>(&mut self) -> ControlFlow<Done> {
984             self.done_trap_kind::<I>(None)
985         }
986 
987         /// Same as `done_trap` but with an explicit `TrapKind`.
988         #[cold]
989         pub fn done_trap_kind<I: Encode>(&mut self, kind: Option<TrapKind>) -> ControlFlow<Done> {
990             let pc = self.current_pc::<I>();
991             self.state.done_reason = Some(DoneReason::Trap { pc, kind });
992             ControlFlow::Break(Done { _priv: () })
993         }
994 
995         /// Finishes execution by recording `DoneReason::CallIndirectHost`.
996         #[cold]
997         pub fn done_call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
998             self.state.done_reason = Some(DoneReason::CallIndirectHost {
999                 id,
1000                 resume: self.pc.as_ptr(),
1001             });
1002             ControlFlow::Break(Done { _priv: () })
1003         }
1004 
1005         /// Finishes execution by recording `DoneReason::ReturnToHost`.
1006         #[cold]
1007         pub fn done_return_to_host(&mut self) -> ControlFlow<Done> {
1008             self.state.done_reason = Some(DoneReason::ReturnToHost(()));
1009             ControlFlow::Break(Done { _priv: () })
1010         }
1011     }
1012 }
1013 
1014 use done::Done;
1015 pub use done::{DoneReason, TrapKind};
1016 
1017 struct Interpreter<'a> {
1018     state: &'a mut MachineState,
1019     pc: UnsafeBytecodeStream,
1020     executing_pc: ExecutingPcRef<'a>,
1021 }
1022 
1023 impl Interpreter<'_> {
1024     /// Calculates the `offset` for the current instruction `I`.
1025     #[inline]
1026     fn pc_rel<I: Encode>(&mut self, offset: PcRelOffset) -> NonNull<u8> {
1027         let offset = isize::try_from(i32::from(offset)).unwrap();
1028         unsafe { self.current_pc::<I>().offset(offset) }
1029     }
1030 
1031     /// Performs a relative jump of `offset` bytes from the current instruction.
1032     ///
1033     /// This will jump from the start of the current instruction, identified by
1034     /// `I`, `offset` bytes away. Note that the `self.pc` at the start of this
1035     /// function actually points to the instruction after this one so `I` is
1036     /// necessary to go back to ourselves after which we then go `offset` away.
1037     #[inline]
1038     fn pc_rel_jump<I: Encode>(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1039         let new_pc = self.pc_rel::<I>(offset);
1040         self.pc = unsafe { UnsafeBytecodeStream::new(new_pc) };
1041         ControlFlow::Continue(())
1042     }
1043 
1044     /// Returns the PC of the current instruction where `I` is the static type
1045     /// representing the current instruction.
1046     fn current_pc<I: Encode>(&self) -> NonNull<u8> {
1047         unsafe { self.pc.offset(-isize::from(I::WIDTH)).as_ptr() }
1048     }
1049 
1050     /// `sp -= size_of::<T>(); *sp = val;`
1051     ///
1052     /// Note that `I` is the instruction which is pushing data to use if a trap
1053     /// is generated.
1054     #[must_use]
1055     fn push<I: Encode, T>(&mut self, val: T) -> ControlFlow<Done> {
1056         let new_sp = self.state[XReg::sp].get_ptr::<T>().wrapping_sub(1);
1057         self.set_sp::<I>(new_sp.cast())?;
1058         unsafe {
1059             new_sp.write_unaligned(val);
1060         }
1061         ControlFlow::Continue(())
1062     }
1063 
1064     /// `ret = *sp; sp -= size_of::<T>()`
1065     fn pop<T>(&mut self) -> T {
1066         let sp = self.state[XReg::sp].get_ptr::<T>();
1067         let val = unsafe { sp.read_unaligned() };
1068         self.set_sp_unchecked(sp.wrapping_add(1));
1069         val
1070     }
1071 
1072     /// Sets the stack pointer to the `sp` provided.
1073     ///
1074     /// Returns a trap if this would result in stack overflow, or if `sp` is
1075     /// beneath the base pointer of `self.state.stack`.
1076     ///
1077     /// The `I` parameter here is the instruction that is setting the stack
1078     /// pointer and is used to calculate this instruction's own `pc` if this
1079     /// instruction traps.
1080     #[must_use]
1081     fn set_sp<I: Encode>(&mut self, sp: *mut u8) -> ControlFlow<Done> {
1082         let sp_raw = sp as usize;
1083         let base_raw = self.state.stack.base() as usize;
1084         if sp_raw < base_raw {
1085             return self.done_trap_kind::<I>(Some(TrapKind::StackOverflow));
1086         }
1087         self.set_sp_unchecked(sp);
1088         ControlFlow::Continue(())
1089     }
1090 
1091     /// Same as `set_sp` but does not check to see if `sp` is in-bounds. Should
1092     /// only be used with stack increment operations such as `pop`.
1093     fn set_sp_unchecked<T>(&mut self, sp: *mut T) {
1094         if cfg!(debug_assertions) {
1095             let sp_raw = sp as usize;
1096             let base = self.state.stack.base() as usize;
1097             let end = base + self.state.stack.len();
1098             assert!(base <= sp_raw && sp_raw <= end);
1099         }
1100         self.state[XReg::sp].set_ptr(sp);
1101     }
1102 
1103     /// Loads a value of `T` using native-endian byte ordering from the `addr`
1104     /// specified.
1105     ///
1106     /// The `I` type parameter is the instruction issuing this load which is
1107     /// used in case of traps to calculate the trapping pc.
1108     ///
1109     /// Returns `ControlFlow::Break` if a trap happens or
1110     /// `ControlFlow::Continue` if the value was loaded successfully.
1111     ///
1112     /// # Unsafety
1113     ///
1114     /// Safety of this method relies on the safety of the original bytecode
1115     /// itself and correctly annotating both `T` and `I`.
1116     #[must_use]
1117     unsafe fn load_ne<T, I: Encode>(&mut self, addr: impl AddressingMode) -> ControlFlow<Done, T> {
1118         unsafe { addr.load_ne::<T, I>(self) }
1119     }
1120 
1121     /// Stores a `val` to the `addr` specified.
1122     ///
1123     /// The `I` type parameter is the instruction issuing this store which is
1124     /// used in case of traps to calculate the trapping pc.
1125     ///
1126     /// Returns `ControlFlow::Break` if a trap happens or
1127     /// `ControlFlow::Continue` if the value was stored successfully.
1128     ///
1129     /// # Unsafety
1130     ///
1131     /// Safety of this method relies on the safety of the original bytecode
1132     /// itself and correctly annotating both `T` and `I`.
1133     #[must_use]
1134     unsafe fn store_ne<T, I: Encode>(
1135         &mut self,
1136         addr: impl AddressingMode,
1137         val: T,
1138     ) -> ControlFlow<Done> {
1139         unsafe { addr.store_ne::<T, I>(self, val) }
1140     }
1141 
1142     fn check_xnn_from_f32<I: Encode>(
1143         &mut self,
1144         val: f32,
1145         (lo, hi): (f32, f32),
1146     ) -> ControlFlow<Done> {
1147         self.check_xnn_from_f64::<I>(val.into(), (lo.into(), hi.into()))
1148     }
1149 
1150     fn check_xnn_from_f64<I: Encode>(
1151         &mut self,
1152         val: f64,
1153         (lo, hi): (f64, f64),
1154     ) -> ControlFlow<Done> {
1155         if val != val {
1156             return self.done_trap_kind::<I>(Some(TrapKind::BadConversionToInteger));
1157         }
1158         let val = val.wasm_trunc();
1159         if val <= lo || val >= hi {
1160             return self.done_trap_kind::<I>(Some(TrapKind::IntegerOverflow));
1161         }
1162         ControlFlow::Continue(())
1163     }
1164 
1165     #[cfg(not(pulley_disable_interp_simd))]
1166     fn get_i128(&self, lo: XReg, hi: XReg) -> i128 {
1167         let lo = self.state[lo].get_u64();
1168         let hi = self.state[hi].get_i64();
1169         i128::from(lo) | (i128::from(hi) << 64)
1170     }
1171 
1172     #[cfg(not(pulley_disable_interp_simd))]
1173     fn set_i128(&mut self, lo: XReg, hi: XReg, val: i128) {
1174         self.state[lo].set_u64(val as u64);
1175         self.state[hi].set_u64((val >> 64) as u64);
1176     }
1177 
1178     fn record_executing_pc_for_profiling(&mut self) {
1179         // Note that this is a no-op if `feature = "profile"` is disabled.
1180         self.executing_pc.record(self.pc.as_ptr().as_ptr() as usize);
1181     }
1182 }
1183 
1184 /// Helper trait to encompass the various addressing modes of Pulley.
1185 trait AddressingMode: Sized {
1186     /// Calculates the native host address `*mut T` corresponding to this
1187     /// addressing mode.
1188     ///
1189     /// # Safety
1190     ///
1191     /// Relies on the original bytecode being safe to execute as this will
1192     /// otherwise perform unsafe byte offsets for example which requires the
1193     /// original bytecode to be correct.
1194     #[must_use]
1195     unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>;
1196 
1197     /// Loads a value of `T` from this address, using native-endian byte order.
1198     ///
1199     /// For more information see [`Interpreter::load_ne`].
1200     #[must_use]
1201     unsafe fn load_ne<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, T> {
1202         let ret = unsafe { self.addr::<T, I>(i)?.read_unaligned() };
1203         ControlFlow::Continue(ret)
1204     }
1205 
1206     /// Stores a `val` to this address, using native-endian byte order.
1207     ///
1208     /// For more information see [`Interpreter::store_ne`].
1209     #[must_use]
1210     unsafe fn store_ne<T, I: Encode>(self, i: &mut Interpreter<'_>, val: T) -> ControlFlow<Done> {
1211         unsafe {
1212             self.addr::<T, I>(i)?.write_unaligned(val);
1213         }
1214         ControlFlow::Continue(())
1215     }
1216 }
1217 
1218 impl AddressingMode for AddrO32 {
1219     unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1220         // Note that this addressing mode cannot return `ControlFlow::Break`
1221         // which is intentional. It's expected that LLVM optimizes away any
1222         // branches callers have.
1223         unsafe {
1224             ControlFlow::Continue(
1225                 i.state[self.addr]
1226                     .get_ptr::<T>()
1227                     .byte_offset(self.offset as isize),
1228             )
1229         }
1230     }
1231 }
1232 
1233 impl AddressingMode for AddrZ {
1234     unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1235         // This addressing mode defines loading/storing to the null address as
1236         // a trap, but all other addresses are allowed.
1237         let host_addr = i.state[self.addr].get_ptr::<T>();
1238         if host_addr.is_null() {
1239             i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1240             unreachable!();
1241         }
1242         unsafe {
1243             let addr = host_addr.byte_offset(self.offset as isize);
1244             ControlFlow::Continue(addr)
1245         }
1246     }
1247 }
1248 
1249 impl AddressingMode for AddrG32 {
1250     unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1251         // Test if `bound - offset - T` is less than the wasm address to
1252         // generate a trap. It's a guarantee of this instruction that these
1253         // subtractions don't overflow.
1254         let bound = i.state[self.host_heap_bound].get_u64() as usize;
1255         let offset = usize::from(self.offset);
1256         let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1257         if wasm_addr > bound - offset - size_of::<T>() {
1258             i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1259             unreachable!();
1260         }
1261         unsafe {
1262             let addr = i.state[self.host_heap_base]
1263                 .get_ptr::<T>()
1264                 .byte_add(wasm_addr)
1265                 .byte_add(offset);
1266             ControlFlow::Continue(addr)
1267         }
1268     }
1269 }
1270 
1271 impl AddressingMode for AddrG32Bne {
1272     unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1273         // Same as `AddrG32` above except that the bound is loaded from memory.
1274         let bound = unsafe {
1275             *i.state[self.host_heap_bound_addr]
1276                 .get_ptr::<usize>()
1277                 .byte_add(usize::from(self.host_heap_bound_offset))
1278         };
1279         let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1280         let offset = usize::from(self.offset);
1281         if wasm_addr > bound - offset - size_of::<T>() {
1282             i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1283             unreachable!();
1284         }
1285         unsafe {
1286             let addr = i.state[self.host_heap_base]
1287                 .get_ptr::<T>()
1288                 .byte_add(wasm_addr)
1289                 .byte_add(offset);
1290             ControlFlow::Continue(addr)
1291         }
1292     }
1293 }
1294 
1295 #[test]
1296 fn simple_push_pop() {
1297     let mut state = MachineState::with_stack(16);
1298     let pc = ExecutingPc::default();
1299     unsafe {
1300         let mut bytecode = [0; 10];
1301         let mut i = Interpreter {
1302             state: &mut state,
1303             // this isn't actually read so just manufacture a dummy one
1304             pc: UnsafeBytecodeStream::new(NonNull::new(bytecode.as_mut_ptr().offset(4)).unwrap()),
1305             executing_pc: pc.as_ref(),
1306         };
1307         assert!(i.push::<crate::Ret, _>(0_i32).is_continue());
1308         assert_eq!(i.pop::<i32>(), 0_i32);
1309         assert!(i.push::<crate::Ret, _>(1_i32).is_continue());
1310         assert!(i.push::<crate::Ret, _>(2_i32).is_continue());
1311         assert!(i.push::<crate::Ret, _>(3_i32).is_continue());
1312         assert!(i.push::<crate::Ret, _>(4_i32).is_continue());
1313         assert!(i.push::<crate::Ret, _>(5_i32).is_break());
1314         assert!(i.push::<crate::Ret, _>(6_i32).is_break());
1315         assert_eq!(i.pop::<i32>(), 4_i32);
1316         assert_eq!(i.pop::<i32>(), 3_i32);
1317         assert_eq!(i.pop::<i32>(), 2_i32);
1318         assert_eq!(i.pop::<i32>(), 1_i32);
1319     }
1320 }
1321 
1322 macro_rules! br_if_imm {
1323     ($(
1324         fn $snake:ident(&mut self, a: XReg, b: $imm:ident, offset: PcRelOffset)
1325             = $camel:ident / $op:tt / $get:ident;
1326     )*) => {$(
1327         fn $snake(&mut self, a: XReg, b: $imm, offset: PcRelOffset) -> ControlFlow<Done> {
1328             let a = self.state[a].$get();
1329             if a $op b.into() {
1330                 self.pc_rel_jump::<crate::$camel>(offset)
1331             } else {
1332                 ControlFlow::Continue(())
1333             }
1334         }
1335     )*};
1336 }
1337 
1338 impl OpVisitor for Interpreter<'_> {
1339     type BytecodeStream = UnsafeBytecodeStream;
1340     type Return = ControlFlow<Done>;
1341 
1342     fn bytecode(&mut self) -> &mut UnsafeBytecodeStream {
1343         &mut self.pc
1344     }
1345 
1346     fn nop(&mut self) -> ControlFlow<Done> {
1347         ControlFlow::Continue(())
1348     }
1349 
1350     fn ret(&mut self) -> ControlFlow<Done> {
1351         let lr = self.state.lr;
1352         if lr == HOST_RETURN_ADDR {
1353             self.done_return_to_host()
1354         } else {
1355             self.pc = unsafe { UnsafeBytecodeStream::new(NonNull::new_unchecked(lr)) };
1356             ControlFlow::Continue(())
1357         }
1358     }
1359 
1360     fn call(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1361         let return_addr = self.pc.as_ptr();
1362         self.state.lr = return_addr.as_ptr();
1363         self.pc_rel_jump::<crate::Call>(offset)
1364     }
1365 
1366     fn call1(&mut self, arg1: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1367         let return_addr = self.pc.as_ptr();
1368         self.state.lr = return_addr.as_ptr();
1369         self.state[XReg::x0] = self.state[arg1];
1370         self.pc_rel_jump::<crate::Call1>(offset)
1371     }
1372 
1373     fn call2(&mut self, arg1: XReg, arg2: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1374         let return_addr = self.pc.as_ptr();
1375         self.state.lr = return_addr.as_ptr();
1376         let (x0, x1) = (self.state[arg1], self.state[arg2]);
1377         self.state[XReg::x0] = x0;
1378         self.state[XReg::x1] = x1;
1379         self.pc_rel_jump::<crate::Call2>(offset)
1380     }
1381 
1382     fn call3(
1383         &mut self,
1384         arg1: XReg,
1385         arg2: XReg,
1386         arg3: XReg,
1387         offset: PcRelOffset,
1388     ) -> ControlFlow<Done> {
1389         let return_addr = self.pc.as_ptr();
1390         self.state.lr = return_addr.as_ptr();
1391         let (x0, x1, x2) = (self.state[arg1], self.state[arg2], self.state[arg3]);
1392         self.state[XReg::x0] = x0;
1393         self.state[XReg::x1] = x1;
1394         self.state[XReg::x2] = x2;
1395         self.pc_rel_jump::<crate::Call3>(offset)
1396     }
1397 
1398     fn call4(
1399         &mut self,
1400         arg1: XReg,
1401         arg2: XReg,
1402         arg3: XReg,
1403         arg4: XReg,
1404         offset: PcRelOffset,
1405     ) -> ControlFlow<Done> {
1406         let return_addr = self.pc.as_ptr();
1407         self.state.lr = return_addr.as_ptr();
1408         let (x0, x1, x2, x3) = (
1409             self.state[arg1],
1410             self.state[arg2],
1411             self.state[arg3],
1412             self.state[arg4],
1413         );
1414         self.state[XReg::x0] = x0;
1415         self.state[XReg::x1] = x1;
1416         self.state[XReg::x2] = x2;
1417         self.state[XReg::x3] = x3;
1418         self.pc_rel_jump::<crate::Call4>(offset)
1419     }
1420 
1421     fn call_indirect(&mut self, dst: XReg) -> ControlFlow<Done> {
1422         let return_addr = self.pc.as_ptr();
1423         self.state.lr = return_addr.as_ptr();
1424         // SAFETY: part of the unsafe contract of the interpreter is only valid
1425         // bytecode is interpreted, so the jump destination is part of the validity
1426         // of the bytecode itself.
1427         unsafe {
1428             self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[dst].get_ptr()));
1429         }
1430         ControlFlow::Continue(())
1431     }
1432 
1433     fn jump(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1434         self.pc_rel_jump::<crate::Jump>(offset)
1435     }
1436 
1437     fn xjump(&mut self, reg: XReg) -> ControlFlow<Done> {
1438         unsafe {
1439             self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[reg].get_ptr()));
1440         }
1441         ControlFlow::Continue(())
1442     }
1443 
1444     fn br_if32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1445         let cond = self.state[cond].get_u32();
1446         if cond != 0 {
1447             self.pc_rel_jump::<crate::BrIf>(offset)
1448         } else {
1449             ControlFlow::Continue(())
1450         }
1451     }
1452 
1453     fn br_if_not32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1454         let cond = self.state[cond].get_u32();
1455         if cond == 0 {
1456             self.pc_rel_jump::<crate::BrIfNot>(offset)
1457         } else {
1458             ControlFlow::Continue(())
1459         }
1460     }
1461 
1462     fn br_if_xeq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1463         let a = self.state[a].get_u32();
1464         let b = self.state[b].get_u32();
1465         if a == b {
1466             self.pc_rel_jump::<crate::BrIfXeq32>(offset)
1467         } else {
1468             ControlFlow::Continue(())
1469         }
1470     }
1471 
1472     fn br_if_xneq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1473         let a = self.state[a].get_u32();
1474         let b = self.state[b].get_u32();
1475         if a != b {
1476             self.pc_rel_jump::<crate::BrIfXneq32>(offset)
1477         } else {
1478             ControlFlow::Continue(())
1479         }
1480     }
1481 
1482     fn br_if_xslt32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1483         let a = self.state[a].get_i32();
1484         let b = self.state[b].get_i32();
1485         if a < b {
1486             self.pc_rel_jump::<crate::BrIfXslt32>(offset)
1487         } else {
1488             ControlFlow::Continue(())
1489         }
1490     }
1491 
1492     fn br_if_xslteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1493         let a = self.state[a].get_i32();
1494         let b = self.state[b].get_i32();
1495         if a <= b {
1496             self.pc_rel_jump::<crate::BrIfXslteq32>(offset)
1497         } else {
1498             ControlFlow::Continue(())
1499         }
1500     }
1501 
1502     fn br_if_xult32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1503         let a = self.state[a].get_u32();
1504         let b = self.state[b].get_u32();
1505         if a < b {
1506             self.pc_rel_jump::<crate::BrIfXult32>(offset)
1507         } else {
1508             ControlFlow::Continue(())
1509         }
1510     }
1511 
1512     fn br_if_xulteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1513         let a = self.state[a].get_u32();
1514         let b = self.state[b].get_u32();
1515         if a <= b {
1516             self.pc_rel_jump::<crate::BrIfXulteq32>(offset)
1517         } else {
1518             ControlFlow::Continue(())
1519         }
1520     }
1521 
1522     fn br_if_xeq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1523         let a = self.state[a].get_u64();
1524         let b = self.state[b].get_u64();
1525         if a == b {
1526             self.pc_rel_jump::<crate::BrIfXeq64>(offset)
1527         } else {
1528             ControlFlow::Continue(())
1529         }
1530     }
1531 
1532     fn br_if_xneq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1533         let a = self.state[a].get_u64();
1534         let b = self.state[b].get_u64();
1535         if a != b {
1536             self.pc_rel_jump::<crate::BrIfXneq64>(offset)
1537         } else {
1538             ControlFlow::Continue(())
1539         }
1540     }
1541 
1542     fn br_if_xslt64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1543         let a = self.state[a].get_i64();
1544         let b = self.state[b].get_i64();
1545         if a < b {
1546             self.pc_rel_jump::<crate::BrIfXslt64>(offset)
1547         } else {
1548             ControlFlow::Continue(())
1549         }
1550     }
1551 
1552     fn br_if_xslteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1553         let a = self.state[a].get_i64();
1554         let b = self.state[b].get_i64();
1555         if a <= b {
1556             self.pc_rel_jump::<crate::BrIfXslteq64>(offset)
1557         } else {
1558             ControlFlow::Continue(())
1559         }
1560     }
1561 
1562     fn br_if_xult64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1563         let a = self.state[a].get_u64();
1564         let b = self.state[b].get_u64();
1565         if a < b {
1566             self.pc_rel_jump::<crate::BrIfXult64>(offset)
1567         } else {
1568             ControlFlow::Continue(())
1569         }
1570     }
1571 
1572     fn br_if_xulteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1573         let a = self.state[a].get_u64();
1574         let b = self.state[b].get_u64();
1575         if a <= b {
1576             self.pc_rel_jump::<crate::BrIfXulteq64>(offset)
1577         } else {
1578             ControlFlow::Continue(())
1579         }
1580     }
1581 
1582     br_if_imm! {
1583         fn br_if_xeq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1584             = BrIfXeq32I8 / == / get_i32;
1585         fn br_if_xeq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1586             = BrIfXeq32I32 / == / get_i32;
1587         fn br_if_xneq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1588             = BrIfXneq32I8 / != / get_i32;
1589         fn br_if_xneq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1590             = BrIfXneq32I32 / != / get_i32;
1591 
1592         fn br_if_xslt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1593             = BrIfXslt32I8 / < / get_i32;
1594         fn br_if_xslt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1595             = BrIfXslt32I32 / < / get_i32;
1596         fn br_if_xsgt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1597             = BrIfXsgt32I8 / > / get_i32;
1598         fn br_if_xsgt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1599             = BrIfXsgt32I32 / > / get_i32;
1600         fn br_if_xslteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1601             = BrIfXslteq32I8 / <= / get_i32;
1602         fn br_if_xslteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1603             = BrIfXslteq32I32 / <= / get_i32;
1604         fn br_if_xsgteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1605             = BrIfXsgteq32I8 / >= / get_i32;
1606         fn br_if_xsgteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1607             = BrIfXsgteq32I32 / >= / get_i32;
1608 
1609         fn br_if_xult32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1610             = BrIfXult32U8 / < / get_u32;
1611         fn br_if_xult32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1612             = BrIfXult32U32 / < / get_u32;
1613         fn br_if_xugt32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1614             = BrIfXugt32U8 / > / get_u32;
1615         fn br_if_xugt32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1616             = BrIfXugt32U32 / > / get_u32;
1617         fn br_if_xulteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1618             = BrIfXulteq32U8 / <= / get_u32;
1619         fn br_if_xulteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1620             = BrIfXulteq32U32 / <= / get_u32;
1621         fn br_if_xugteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1622             = BrIfXugteq32U8 / >= / get_u32;
1623         fn br_if_xugteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1624             = BrIfXugteq32U32 / >= / get_u32;
1625 
1626         fn br_if_xeq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1627             = BrIfXeq64I8 / == / get_i64;
1628         fn br_if_xeq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1629             = BrIfXeq64I32 / == / get_i64;
1630         fn br_if_xneq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1631             = BrIfXneq64I8 / != / get_i64;
1632         fn br_if_xneq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1633             = BrIfXneq64I32 / != / get_i64;
1634 
1635         fn br_if_xslt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1636             = BrIfXslt64I8 / < / get_i64;
1637         fn br_if_xslt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1638             = BrIfXslt64I32 / < / get_i64;
1639         fn br_if_xsgt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1640             = BrIfXsgt64I8 / > / get_i64;
1641         fn br_if_xsgt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1642             = BrIfXsgt64I32 / > / get_i64;
1643         fn br_if_xslteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1644             = BrIfXslteq64I8 / <= / get_i64;
1645         fn br_if_xslteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1646             = BrIfXslteq64I32 / <= / get_i64;
1647         fn br_if_xsgteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1648             = BrIfXsgteq64I8 / >= / get_i64;
1649         fn br_if_xsgteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1650             = BrIfXsgteq64I32 / >= / get_i64;
1651 
1652         fn br_if_xult64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1653             = BrIfXult64U8 / < / get_u64;
1654         fn br_if_xult64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1655             = BrIfXult64U32 / < / get_u64;
1656         fn br_if_xugt64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1657             = BrIfXugt64U8 / > / get_u64;
1658         fn br_if_xugt64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1659             = BrIfXugt64U32 / > / get_u64;
1660         fn br_if_xulteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1661             = BrIfXulteq64U8 / <= / get_u64;
1662         fn br_if_xulteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1663             = BrIfXulteq64U32 / <= / get_u64;
1664         fn br_if_xugteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1665             = BrIfXugteq64U8 / >= / get_u64;
1666         fn br_if_xugteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1667             = BrIfXugteq64U32 / >= / get_u64;
1668     }
1669 
1670     fn xmov(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1671         let val = self.state[src];
1672         self.state[dst] = val;
1673         ControlFlow::Continue(())
1674     }
1675 
1676     fn xconst8(&mut self, dst: XReg, imm: i8) -> ControlFlow<Done> {
1677         self.state[dst].set_i64(i64::from(imm));
1678         ControlFlow::Continue(())
1679     }
1680 
1681     fn xzero(&mut self, dst: XReg) -> ControlFlow<Done> {
1682         self.state[dst].set_i64(0);
1683         ControlFlow::Continue(())
1684     }
1685 
1686     fn xone(&mut self, dst: XReg) -> ControlFlow<Done> {
1687         self.state[dst].set_i64(1);
1688         ControlFlow::Continue(())
1689     }
1690 
1691     fn xconst16(&mut self, dst: XReg, imm: i16) -> ControlFlow<Done> {
1692         self.state[dst].set_i64(i64::from(imm));
1693         ControlFlow::Continue(())
1694     }
1695 
1696     fn xconst32(&mut self, dst: XReg, imm: i32) -> ControlFlow<Done> {
1697         self.state[dst].set_i64(i64::from(imm));
1698         ControlFlow::Continue(())
1699     }
1700 
1701     fn xconst64(&mut self, dst: XReg, imm: i64) -> ControlFlow<Done> {
1702         self.state[dst].set_i64(imm);
1703         ControlFlow::Continue(())
1704     }
1705 
1706     fn xadd32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1707         let a = self.state[operands.src1].get_u32();
1708         let b = self.state[operands.src2].get_u32();
1709         self.state[operands.dst].set_u32(a.wrapping_add(b));
1710         ControlFlow::Continue(())
1711     }
1712 
1713     fn xadd32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1714         self.xadd32_u32(dst, src1, src2.into())
1715     }
1716 
1717     fn xadd32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1718         let a = self.state[src1].get_u32();
1719         self.state[dst].set_u32(a.wrapping_add(src2));
1720         ControlFlow::Continue(())
1721     }
1722 
1723     fn xadd64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1724         let a = self.state[operands.src1].get_u64();
1725         let b = self.state[operands.src2].get_u64();
1726         self.state[operands.dst].set_u64(a.wrapping_add(b));
1727         ControlFlow::Continue(())
1728     }
1729 
1730     fn xadd64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1731         self.xadd64_u32(dst, src1, src2.into())
1732     }
1733 
1734     fn xadd64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1735         let a = self.state[src1].get_u64();
1736         self.state[dst].set_u64(a.wrapping_add(src2.into()));
1737         ControlFlow::Continue(())
1738     }
1739 
1740     fn xmadd32(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1741         let a = self.state[src1].get_u32();
1742         let b = self.state[src2].get_u32();
1743         let c = self.state[src3].get_u32();
1744         self.state[dst].set_u32(a.wrapping_mul(b).wrapping_add(c));
1745         ControlFlow::Continue(())
1746     }
1747 
1748     fn xmadd64(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1749         let a = self.state[src1].get_u64();
1750         let b = self.state[src2].get_u64();
1751         let c = self.state[src3].get_u64();
1752         self.state[dst].set_u64(a.wrapping_mul(b).wrapping_add(c));
1753         ControlFlow::Continue(())
1754     }
1755 
1756     fn xsub32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1757         let a = self.state[operands.src1].get_u32();
1758         let b = self.state[operands.src2].get_u32();
1759         self.state[operands.dst].set_u32(a.wrapping_sub(b));
1760         ControlFlow::Continue(())
1761     }
1762 
1763     fn xsub32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1764         self.xsub32_u32(dst, src1, src2.into())
1765     }
1766 
1767     fn xsub32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1768         let a = self.state[src1].get_u32();
1769         self.state[dst].set_u32(a.wrapping_sub(src2));
1770         ControlFlow::Continue(())
1771     }
1772 
1773     fn xsub64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1774         let a = self.state[operands.src1].get_u64();
1775         let b = self.state[operands.src2].get_u64();
1776         self.state[operands.dst].set_u64(a.wrapping_sub(b));
1777         ControlFlow::Continue(())
1778     }
1779 
1780     fn xsub64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1781         self.xsub64_u32(dst, src1, src2.into())
1782     }
1783 
1784     fn xsub64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1785         let a = self.state[src1].get_u64();
1786         self.state[dst].set_u64(a.wrapping_sub(src2.into()));
1787         ControlFlow::Continue(())
1788     }
1789 
1790     fn xmul32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1791         let a = self.state[operands.src1].get_u32();
1792         let b = self.state[operands.src2].get_u32();
1793         self.state[operands.dst].set_u32(a.wrapping_mul(b));
1794         ControlFlow::Continue(())
1795     }
1796 
1797     fn xmul32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1798         self.xmul32_s32(dst, src1, src2.into())
1799     }
1800 
1801     fn xmul32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1802         let a = self.state[src1].get_i32();
1803         self.state[dst].set_i32(a.wrapping_mul(src2));
1804         ControlFlow::Continue(())
1805     }
1806 
1807     fn xmul64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1808         let a = self.state[operands.src1].get_u64();
1809         let b = self.state[operands.src2].get_u64();
1810         self.state[operands.dst].set_u64(a.wrapping_mul(b));
1811         ControlFlow::Continue(())
1812     }
1813 
1814     fn xmul64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1815         self.xmul64_s32(dst, src1, src2.into())
1816     }
1817 
1818     fn xmul64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1819         let a = self.state[src1].get_i64();
1820         self.state[dst].set_i64(a.wrapping_mul(src2.into()));
1821         ControlFlow::Continue(())
1822     }
1823 
1824     fn xshl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1825         let a = self.state[operands.src1].get_u32();
1826         let b = self.state[operands.src2].get_u32();
1827         self.state[operands.dst].set_u32(a.wrapping_shl(b));
1828         ControlFlow::Continue(())
1829     }
1830 
1831     fn xshr32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1832         let a = self.state[operands.src1].get_u32();
1833         let b = self.state[operands.src2].get_u32();
1834         self.state[operands.dst].set_u32(a.wrapping_shr(b));
1835         ControlFlow::Continue(())
1836     }
1837 
1838     fn xshr32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1839         let a = self.state[operands.src1].get_i32();
1840         let b = self.state[operands.src2].get_u32();
1841         self.state[operands.dst].set_i32(a.wrapping_shr(b));
1842         ControlFlow::Continue(())
1843     }
1844 
1845     fn xshl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1846         let a = self.state[operands.src1].get_u64();
1847         let b = self.state[operands.src2].get_u32();
1848         self.state[operands.dst].set_u64(a.wrapping_shl(b));
1849         ControlFlow::Continue(())
1850     }
1851 
1852     fn xshr64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1853         let a = self.state[operands.src1].get_u64();
1854         let b = self.state[operands.src2].get_u32();
1855         self.state[operands.dst].set_u64(a.wrapping_shr(b));
1856         ControlFlow::Continue(())
1857     }
1858 
1859     fn xshr64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1860         let a = self.state[operands.src1].get_i64();
1861         let b = self.state[operands.src2].get_u32();
1862         self.state[operands.dst].set_i64(a.wrapping_shr(b));
1863         ControlFlow::Continue(())
1864     }
1865 
1866     fn xshl32_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1867         let a = self.state[operands.src1].get_u32();
1868         let b = u32::from(u8::from(operands.src2));
1869         self.state[operands.dst].set_u32(a.wrapping_shl(b));
1870         ControlFlow::Continue(())
1871     }
1872 
1873     fn xshr32_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1874         let a = self.state[operands.src1].get_u32();
1875         let b = u32::from(u8::from(operands.src2));
1876         self.state[operands.dst].set_u32(a.wrapping_shr(b));
1877         ControlFlow::Continue(())
1878     }
1879 
1880     fn xshr32_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1881         let a = self.state[operands.src1].get_i32();
1882         let b = u32::from(u8::from(operands.src2));
1883         self.state[operands.dst].set_i32(a.wrapping_shr(b));
1884         ControlFlow::Continue(())
1885     }
1886 
1887     fn xshl64_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1888         let a = self.state[operands.src1].get_u64();
1889         let b = u32::from(u8::from(operands.src2));
1890         self.state[operands.dst].set_u64(a.wrapping_shl(b));
1891         ControlFlow::Continue(())
1892     }
1893 
1894     fn xshr64_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1895         let a = self.state[operands.src1].get_u64();
1896         let b = u32::from(u8::from(operands.src2));
1897         self.state[operands.dst].set_u64(a.wrapping_shr(b));
1898         ControlFlow::Continue(())
1899     }
1900 
1901     fn xshr64_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1902         let a = self.state[operands.src1].get_i64();
1903         let b = u32::from(u8::from(operands.src2));
1904         self.state[operands.dst].set_i64(a.wrapping_shr(b));
1905         ControlFlow::Continue(())
1906     }
1907 
1908     fn xneg32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1909         let a = self.state[src].get_i32();
1910         self.state[dst].set_i32(a.wrapping_neg());
1911         ControlFlow::Continue(())
1912     }
1913 
1914     fn xneg64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1915         let a = self.state[src].get_i64();
1916         self.state[dst].set_i64(a.wrapping_neg());
1917         ControlFlow::Continue(())
1918     }
1919 
1920     fn xeq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1921         let a = self.state[operands.src1].get_u64();
1922         let b = self.state[operands.src2].get_u64();
1923         self.state[operands.dst].set_u32(u32::from(a == b));
1924         ControlFlow::Continue(())
1925     }
1926 
1927     fn xneq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1928         let a = self.state[operands.src1].get_u64();
1929         let b = self.state[operands.src2].get_u64();
1930         self.state[operands.dst].set_u32(u32::from(a != b));
1931         ControlFlow::Continue(())
1932     }
1933 
1934     fn xslt64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1935         let a = self.state[operands.src1].get_i64();
1936         let b = self.state[operands.src2].get_i64();
1937         self.state[operands.dst].set_u32(u32::from(a < b));
1938         ControlFlow::Continue(())
1939     }
1940 
1941     fn xslteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1942         let a = self.state[operands.src1].get_i64();
1943         let b = self.state[operands.src2].get_i64();
1944         self.state[operands.dst].set_u32(u32::from(a <= b));
1945         ControlFlow::Continue(())
1946     }
1947 
1948     fn xult64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1949         let a = self.state[operands.src1].get_u64();
1950         let b = self.state[operands.src2].get_u64();
1951         self.state[operands.dst].set_u32(u32::from(a < b));
1952         ControlFlow::Continue(())
1953     }
1954 
1955     fn xulteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1956         let a = self.state[operands.src1].get_u64();
1957         let b = self.state[operands.src2].get_u64();
1958         self.state[operands.dst].set_u32(u32::from(a <= b));
1959         ControlFlow::Continue(())
1960     }
1961 
1962     fn xeq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1963         let a = self.state[operands.src1].get_u32();
1964         let b = self.state[operands.src2].get_u32();
1965         self.state[operands.dst].set_u32(u32::from(a == b));
1966         ControlFlow::Continue(())
1967     }
1968 
1969     fn xneq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1970         let a = self.state[operands.src1].get_u32();
1971         let b = self.state[operands.src2].get_u32();
1972         self.state[operands.dst].set_u32(u32::from(a != b));
1973         ControlFlow::Continue(())
1974     }
1975 
1976     fn xslt32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1977         let a = self.state[operands.src1].get_i32();
1978         let b = self.state[operands.src2].get_i32();
1979         self.state[operands.dst].set_u32(u32::from(a < b));
1980         ControlFlow::Continue(())
1981     }
1982 
1983     fn xslteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1984         let a = self.state[operands.src1].get_i32();
1985         let b = self.state[operands.src2].get_i32();
1986         self.state[operands.dst].set_u32(u32::from(a <= b));
1987         ControlFlow::Continue(())
1988     }
1989 
1990     fn xult32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1991         let a = self.state[operands.src1].get_u32();
1992         let b = self.state[operands.src2].get_u32();
1993         self.state[operands.dst].set_u32(u32::from(a < b));
1994         ControlFlow::Continue(())
1995     }
1996 
1997     fn xulteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1998         let a = self.state[operands.src1].get_u32();
1999         let b = self.state[operands.src2].get_u32();
2000         self.state[operands.dst].set_u32(u32::from(a <= b));
2001         ControlFlow::Continue(())
2002     }
2003 
2004     fn push_frame(&mut self) -> ControlFlow<Done> {
2005         self.push::<crate::PushFrame, _>(self.state.lr)?;
2006         self.push::<crate::PushFrame, _>(self.state.fp)?;
2007         self.state.fp = self.state[XReg::sp].get_ptr();
2008         ControlFlow::Continue(())
2009     }
2010 
2011     #[inline]
2012     fn push_frame_save(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2013         // Decrement the stack pointer `amt` bytes plus 2 pointers more for
2014         // fp/lr.
2015         let ptr_size = size_of::<usize>();
2016         let full_amt = usize::from(amt) + 2 * ptr_size;
2017         let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(full_amt);
2018         self.set_sp::<crate::PushFrameSave>(new_sp)?;
2019 
2020         unsafe {
2021             // Emulate `push_frame` by placing `lr` and `fp` onto the stack, in
2022             // that order, at the top of the allocated area.
2023             self.store_ne::<_, crate::PushFrameSave>(
2024                 AddrO32 {
2025                     addr: XReg::sp,
2026                     offset: (full_amt - 1 * ptr_size) as i32,
2027                 },
2028                 self.state.lr,
2029             )?;
2030             self.store_ne::<_, crate::PushFrameSave>(
2031                 AddrO32 {
2032                     addr: XReg::sp,
2033                     offset: (full_amt - 2 * ptr_size) as i32,
2034                 },
2035                 self.state.fp,
2036             )?;
2037 
2038             // Set `fp` to the top of our frame, where `fp` is stored.
2039             let mut offset = amt as i32;
2040             self.state.fp = self.state[XReg::sp]
2041                 .get_ptr::<u8>()
2042                 .byte_offset(offset as isize);
2043 
2044             // Next save any registers in `regs` to the stack.
2045             for reg in regs {
2046                 offset -= 8;
2047                 self.store_ne::<_, crate::PushFrameSave>(
2048                     AddrO32 {
2049                         addr: XReg::sp,
2050                         offset,
2051                     },
2052                     self.state[reg].get_u64(),
2053                 )?;
2054             }
2055         }
2056         ControlFlow::Continue(())
2057     }
2058 
2059     fn pop_frame_restore(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2060         // Restore all registers in `regs`, followed by the normal `pop_frame`
2061         // opcode below to restore fp/lr.
2062         unsafe {
2063             let mut offset = i32::from(amt);
2064             for reg in regs {
2065                 offset -= 8;
2066                 let val = self.load_ne::<_, crate::PopFrameRestore>(AddrO32 {
2067                     addr: XReg::sp,
2068                     offset,
2069                 })?;
2070                 self.state[reg].set_u64(val);
2071             }
2072         }
2073         self.pop_frame()
2074     }
2075 
2076     fn pop_frame(&mut self) -> ControlFlow<Done> {
2077         self.set_sp_unchecked(self.state.fp);
2078         let fp = self.pop();
2079         let lr = self.pop();
2080         self.state.fp = fp;
2081         self.state.lr = lr;
2082         ControlFlow::Continue(())
2083     }
2084 
2085     fn br_table32(&mut self, idx: XReg, amt: u32) -> ControlFlow<Done> {
2086         let idx = self.state[idx].get_u32().min(amt - 1) as isize;
2087         // SAFETY: part of the contract of the interpreter is only dealing with
2088         // valid bytecode, so this offset should be safe.
2089         self.pc = unsafe { self.pc.offset(idx * 4) };
2090 
2091         // Decode the `PcRelOffset` without tampering with `self.pc` as the
2092         // jump is relative to `self.pc`.
2093         let mut tmp = self.pc;
2094         let Ok(rel) = PcRelOffset::decode(&mut tmp);
2095         let offset = isize::try_from(i32::from(rel)).unwrap();
2096         self.pc = unsafe { self.pc.offset(offset) };
2097         ControlFlow::Continue(())
2098     }
2099 
2100     fn stack_alloc32(&mut self, amt: u32) -> ControlFlow<Done> {
2101         let amt = usize::try_from(amt).unwrap();
2102         let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(amt);
2103         self.set_sp::<crate::StackAlloc32>(new_sp)?;
2104         ControlFlow::Continue(())
2105     }
2106 
2107     fn stack_free32(&mut self, amt: u32) -> ControlFlow<Done> {
2108         let amt = usize::try_from(amt).unwrap();
2109         let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_add(amt);
2110         self.set_sp_unchecked(new_sp);
2111         ControlFlow::Continue(())
2112     }
2113 
2114     fn zext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2115         let src = self.state[src].get_u64() as u8;
2116         self.state[dst].set_u64(src.into());
2117         ControlFlow::Continue(())
2118     }
2119 
2120     fn zext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2121         let src = self.state[src].get_u64() as u16;
2122         self.state[dst].set_u64(src.into());
2123         ControlFlow::Continue(())
2124     }
2125 
2126     fn zext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2127         let src = self.state[src].get_u64() as u32;
2128         self.state[dst].set_u64(src.into());
2129         ControlFlow::Continue(())
2130     }
2131 
2132     fn sext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2133         let src = self.state[src].get_i64() as i8;
2134         self.state[dst].set_i64(src.into());
2135         ControlFlow::Continue(())
2136     }
2137 
2138     fn sext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2139         let src = self.state[src].get_i64() as i16;
2140         self.state[dst].set_i64(src.into());
2141         ControlFlow::Continue(())
2142     }
2143 
2144     fn sext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2145         let src = self.state[src].get_i64() as i32;
2146         self.state[dst].set_i64(src.into());
2147         ControlFlow::Continue(())
2148     }
2149 
2150     fn xdiv32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2151         let a = self.state[operands.src1].get_i32();
2152         let b = self.state[operands.src2].get_i32();
2153         match a.checked_div(b) {
2154             Some(result) => {
2155                 self.state[operands.dst].set_i32(result);
2156                 ControlFlow::Continue(())
2157             }
2158             None => {
2159                 let kind = if b == 0 {
2160                     TrapKind::DivideByZero
2161                 } else {
2162                     TrapKind::IntegerOverflow
2163                 };
2164                 self.done_trap_kind::<crate::XDiv32S>(Some(kind))
2165             }
2166         }
2167     }
2168 
2169     fn xdiv64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2170         let a = self.state[operands.src1].get_i64();
2171         let b = self.state[operands.src2].get_i64();
2172         match a.checked_div(b) {
2173             Some(result) => {
2174                 self.state[operands.dst].set_i64(result);
2175                 ControlFlow::Continue(())
2176             }
2177             None => {
2178                 let kind = if b == 0 {
2179                     TrapKind::DivideByZero
2180                 } else {
2181                     TrapKind::IntegerOverflow
2182                 };
2183                 self.done_trap_kind::<crate::XDiv64S>(Some(kind))
2184             }
2185         }
2186     }
2187 
2188     fn xdiv32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2189         let a = self.state[operands.src1].get_u32();
2190         let b = self.state[operands.src2].get_u32();
2191         match a.checked_div(b) {
2192             Some(result) => {
2193                 self.state[operands.dst].set_u32(result);
2194                 ControlFlow::Continue(())
2195             }
2196             None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2197         }
2198     }
2199 
2200     fn xdiv64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2201         let a = self.state[operands.src1].get_u64();
2202         let b = self.state[operands.src2].get_u64();
2203         match a.checked_div(b) {
2204             Some(result) => {
2205                 self.state[operands.dst].set_u64(result);
2206                 ControlFlow::Continue(())
2207             }
2208             None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2209         }
2210     }
2211 
2212     fn xrem32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2213         let a = self.state[operands.src1].get_i32();
2214         let b = self.state[operands.src2].get_i32();
2215         let result = if a == i32::MIN && b == -1 {
2216             Some(0)
2217         } else {
2218             a.checked_rem(b)
2219         };
2220         match result {
2221             Some(result) => {
2222                 self.state[operands.dst].set_i32(result);
2223                 ControlFlow::Continue(())
2224             }
2225             None => self.done_trap_kind::<crate::XRem32S>(Some(TrapKind::DivideByZero)),
2226         }
2227     }
2228 
2229     fn xrem64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2230         let a = self.state[operands.src1].get_i64();
2231         let b = self.state[operands.src2].get_i64();
2232         let result = if a == i64::MIN && b == -1 {
2233             Some(0)
2234         } else {
2235             a.checked_rem(b)
2236         };
2237         match result {
2238             Some(result) => {
2239                 self.state[operands.dst].set_i64(result);
2240                 ControlFlow::Continue(())
2241             }
2242             None => self.done_trap_kind::<crate::XRem64S>(Some(TrapKind::DivideByZero)),
2243         }
2244     }
2245 
2246     fn xrem32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2247         let a = self.state[operands.src1].get_u32();
2248         let b = self.state[operands.src2].get_u32();
2249         match a.checked_rem(b) {
2250             Some(result) => {
2251                 self.state[operands.dst].set_u32(result);
2252                 ControlFlow::Continue(())
2253             }
2254             None => self.done_trap_kind::<crate::XRem32U>(Some(TrapKind::DivideByZero)),
2255         }
2256     }
2257 
2258     fn xrem64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2259         let a = self.state[operands.src1].get_u64();
2260         let b = self.state[operands.src2].get_u64();
2261         match a.checked_rem(b) {
2262             Some(result) => {
2263                 self.state[operands.dst].set_u64(result);
2264                 ControlFlow::Continue(())
2265             }
2266             None => self.done_trap_kind::<crate::XRem64U>(Some(TrapKind::DivideByZero)),
2267         }
2268     }
2269 
2270     fn xband32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2271         let a = self.state[operands.src1].get_u32();
2272         let b = self.state[operands.src2].get_u32();
2273         self.state[operands.dst].set_u32(a & b);
2274         ControlFlow::Continue(())
2275     }
2276 
2277     fn xband32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2278         self.xband32_s32(dst, src1, src2.into())
2279     }
2280 
2281     fn xband32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2282         let a = self.state[src1].get_i32();
2283         self.state[dst].set_i32(a & src2);
2284         ControlFlow::Continue(())
2285     }
2286 
2287     fn xband64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2288         let a = self.state[operands.src1].get_u64();
2289         let b = self.state[operands.src2].get_u64();
2290         self.state[operands.dst].set_u64(a & b);
2291         ControlFlow::Continue(())
2292     }
2293 
2294     fn xband64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2295         self.xband64_s32(dst, src1, src2.into())
2296     }
2297 
2298     fn xband64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2299         let a = self.state[src1].get_i64();
2300         self.state[dst].set_i64(a & i64::from(src2));
2301         ControlFlow::Continue(())
2302     }
2303 
2304     fn xbor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2305         let a = self.state[operands.src1].get_u32();
2306         let b = self.state[operands.src2].get_u32();
2307         self.state[operands.dst].set_u32(a | b);
2308         ControlFlow::Continue(())
2309     }
2310 
2311     fn xbor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2312         self.xbor32_s32(dst, src1, src2.into())
2313     }
2314 
2315     fn xbor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2316         let a = self.state[src1].get_i32();
2317         self.state[dst].set_i32(a | src2);
2318         ControlFlow::Continue(())
2319     }
2320 
2321     fn xbor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2322         let a = self.state[operands.src1].get_u64();
2323         let b = self.state[operands.src2].get_u64();
2324         self.state[operands.dst].set_u64(a | b);
2325         ControlFlow::Continue(())
2326     }
2327 
2328     fn xbor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2329         self.xbor64_s32(dst, src1, src2.into())
2330     }
2331 
2332     fn xbor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2333         let a = self.state[src1].get_i64();
2334         self.state[dst].set_i64(a | i64::from(src2));
2335         ControlFlow::Continue(())
2336     }
2337 
2338     fn xbxor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2339         let a = self.state[operands.src1].get_u32();
2340         let b = self.state[operands.src2].get_u32();
2341         self.state[operands.dst].set_u32(a ^ b);
2342         ControlFlow::Continue(())
2343     }
2344 
2345     fn xbxor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2346         self.xbxor32_s32(dst, src1, src2.into())
2347     }
2348 
2349     fn xbxor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2350         let a = self.state[src1].get_i32();
2351         self.state[dst].set_i32(a ^ src2);
2352         ControlFlow::Continue(())
2353     }
2354 
2355     fn xbxor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2356         let a = self.state[operands.src1].get_u64();
2357         let b = self.state[operands.src2].get_u64();
2358         self.state[operands.dst].set_u64(a ^ b);
2359         ControlFlow::Continue(())
2360     }
2361 
2362     fn xbxor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2363         self.xbxor64_s32(dst, src1, src2.into())
2364     }
2365 
2366     fn xbxor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2367         let a = self.state[src1].get_i64();
2368         self.state[dst].set_i64(a ^ i64::from(src2));
2369         ControlFlow::Continue(())
2370     }
2371 
2372     fn xbnot32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2373         let a = self.state[src].get_u32();
2374         self.state[dst].set_u32(!a);
2375         ControlFlow::Continue(())
2376     }
2377 
2378     fn xbnot64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2379         let a = self.state[src].get_u64();
2380         self.state[dst].set_u64(!a);
2381         ControlFlow::Continue(())
2382     }
2383 
2384     fn xmin32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2385         let a = self.state[operands.src1].get_u32();
2386         let b = self.state[operands.src2].get_u32();
2387         self.state[operands.dst].set_u32(a.min(b));
2388         ControlFlow::Continue(())
2389     }
2390 
2391     fn xmin32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2392         let a = self.state[operands.src1].get_i32();
2393         let b = self.state[operands.src2].get_i32();
2394         self.state[operands.dst].set_i32(a.min(b));
2395         ControlFlow::Continue(())
2396     }
2397 
2398     fn xmax32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2399         let a = self.state[operands.src1].get_u32();
2400         let b = self.state[operands.src2].get_u32();
2401         self.state[operands.dst].set_u32(a.max(b));
2402         ControlFlow::Continue(())
2403     }
2404 
2405     fn xmax32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2406         let a = self.state[operands.src1].get_i32();
2407         let b = self.state[operands.src2].get_i32();
2408         self.state[operands.dst].set_i32(a.max(b));
2409         ControlFlow::Continue(())
2410     }
2411 
2412     fn xmin64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2413         let a = self.state[operands.src1].get_u64();
2414         let b = self.state[operands.src2].get_u64();
2415         self.state[operands.dst].set_u64(a.min(b));
2416         ControlFlow::Continue(())
2417     }
2418 
2419     fn xmin64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2420         let a = self.state[operands.src1].get_i64();
2421         let b = self.state[operands.src2].get_i64();
2422         self.state[operands.dst].set_i64(a.min(b));
2423         ControlFlow::Continue(())
2424     }
2425 
2426     fn xmax64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2427         let a = self.state[operands.src1].get_u64();
2428         let b = self.state[operands.src2].get_u64();
2429         self.state[operands.dst].set_u64(a.max(b));
2430         ControlFlow::Continue(())
2431     }
2432 
2433     fn xmax64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2434         let a = self.state[operands.src1].get_i64();
2435         let b = self.state[operands.src2].get_i64();
2436         self.state[operands.dst].set_i64(a.max(b));
2437         ControlFlow::Continue(())
2438     }
2439 
2440     fn xctz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2441         let a = self.state[src].get_u32();
2442         self.state[dst].set_u32(a.trailing_zeros());
2443         ControlFlow::Continue(())
2444     }
2445 
2446     fn xctz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2447         let a = self.state[src].get_u64();
2448         self.state[dst].set_u64(a.trailing_zeros().into());
2449         ControlFlow::Continue(())
2450     }
2451 
2452     fn xclz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2453         let a = self.state[src].get_u32();
2454         self.state[dst].set_u32(a.leading_zeros());
2455         ControlFlow::Continue(())
2456     }
2457 
2458     fn xclz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2459         let a = self.state[src].get_u64();
2460         self.state[dst].set_u64(a.leading_zeros().into());
2461         ControlFlow::Continue(())
2462     }
2463 
2464     fn xpopcnt32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2465         let a = self.state[src].get_u32();
2466         self.state[dst].set_u32(a.count_ones());
2467         ControlFlow::Continue(())
2468     }
2469 
2470     fn xpopcnt64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2471         let a = self.state[src].get_u64();
2472         self.state[dst].set_u64(a.count_ones().into());
2473         ControlFlow::Continue(())
2474     }
2475 
2476     fn xrotl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2477         let a = self.state[operands.src1].get_u32();
2478         let b = self.state[operands.src2].get_u32();
2479         self.state[operands.dst].set_u32(a.rotate_left(b));
2480         ControlFlow::Continue(())
2481     }
2482 
2483     fn xrotl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2484         let a = self.state[operands.src1].get_u64();
2485         let b = self.state[operands.src2].get_u32();
2486         self.state[operands.dst].set_u64(a.rotate_left(b));
2487         ControlFlow::Continue(())
2488     }
2489 
2490     fn xrotr32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2491         let a = self.state[operands.src1].get_u32();
2492         let b = self.state[operands.src2].get_u32();
2493         self.state[operands.dst].set_u32(a.rotate_right(b));
2494         ControlFlow::Continue(())
2495     }
2496 
2497     fn xrotr64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2498         let a = self.state[operands.src1].get_u64();
2499         let b = self.state[operands.src2].get_u32();
2500         self.state[operands.dst].set_u64(a.rotate_right(b));
2501         ControlFlow::Continue(())
2502     }
2503 
2504     fn xselect32(
2505         &mut self,
2506         dst: XReg,
2507         cond: XReg,
2508         if_nonzero: XReg,
2509         if_zero: XReg,
2510     ) -> ControlFlow<Done> {
2511         let result = if self.state[cond].get_u32() != 0 {
2512             self.state[if_nonzero].get_u32()
2513         } else {
2514             self.state[if_zero].get_u32()
2515         };
2516         self.state[dst].set_u32(result);
2517         ControlFlow::Continue(())
2518     }
2519 
2520     fn xselect64(
2521         &mut self,
2522         dst: XReg,
2523         cond: XReg,
2524         if_nonzero: XReg,
2525         if_zero: XReg,
2526     ) -> ControlFlow<Done> {
2527         let result = if self.state[cond].get_u32() != 0 {
2528             self.state[if_nonzero].get_u64()
2529         } else {
2530             self.state[if_zero].get_u64()
2531         };
2532         self.state[dst].set_u64(result);
2533         ControlFlow::Continue(())
2534     }
2535 
2536     fn xabs32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2537         let a = self.state[src].get_i32();
2538         self.state[dst].set_i32(a.wrapping_abs());
2539         ControlFlow::Continue(())
2540     }
2541 
2542     fn xabs64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2543         let a = self.state[src].get_i64();
2544         self.state[dst].set_i64(a.wrapping_abs());
2545         ControlFlow::Continue(())
2546     }
2547 
2548     // =========================================================================
2549     // o32 addressing modes
2550 
2551     fn xload8_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2552         let result = unsafe { self.load_ne::<u8, crate::XLoad8U32O32>(addr)? };
2553         self.state[dst].set_u32(result.into());
2554         ControlFlow::Continue(())
2555     }
2556 
2557     fn xload8_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2558         let result = unsafe { self.load_ne::<i8, crate::XLoad8S32O32>(addr)? };
2559         self.state[dst].set_i32(result.into());
2560         ControlFlow::Continue(())
2561     }
2562 
2563     fn xload16le_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2564         let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32O32>(addr)? };
2565         self.state[dst].set_u32(u16::from_le(result).into());
2566         ControlFlow::Continue(())
2567     }
2568 
2569     fn xload16le_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2570         let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32O32>(addr)? };
2571         self.state[dst].set_i32(i16::from_le(result).into());
2572         ControlFlow::Continue(())
2573     }
2574 
2575     fn xload32le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2576         let result = unsafe { self.load_ne::<i32, crate::XLoad32LeO32>(addr)? };
2577         self.state[dst].set_i32(i32::from_le(result));
2578         ControlFlow::Continue(())
2579     }
2580 
2581     fn xload64le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2582         let result = unsafe { self.load_ne::<i64, crate::XLoad64LeO32>(addr)? };
2583         self.state[dst].set_i64(i64::from_le(result));
2584         ControlFlow::Continue(())
2585     }
2586 
2587     fn xstore8_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2588         let val = self.state[val].get_u32() as u8;
2589         unsafe {
2590             self.store_ne::<u8, crate::XStore8O32>(addr, val)?;
2591         }
2592         ControlFlow::Continue(())
2593     }
2594 
2595     fn xstore16le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2596         let val = self.state[val].get_u32() as u16;
2597         unsafe {
2598             self.store_ne::<u16, crate::XStore16LeO32>(addr, val.to_le())?;
2599         }
2600         ControlFlow::Continue(())
2601     }
2602 
2603     fn xstore32le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2604         let val = self.state[val].get_u32();
2605         unsafe {
2606             self.store_ne::<u32, crate::XStore32LeO32>(addr, val.to_le())?;
2607         }
2608         ControlFlow::Continue(())
2609     }
2610 
2611     fn xstore64le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2612         let val = self.state[val].get_u64();
2613         unsafe {
2614             self.store_ne::<u64, crate::XStore64LeO32>(addr, val.to_le())?;
2615         }
2616         ControlFlow::Continue(())
2617     }
2618 
2619     // =========================================================================
2620     // g32 addressing modes
2621 
2622     fn xload8_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2623         let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32>(addr)? };
2624         self.state[dst].set_u32(result.into());
2625         ControlFlow::Continue(())
2626     }
2627 
2628     fn xload8_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2629         let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32>(addr)? };
2630         self.state[dst].set_i32(result.into());
2631         ControlFlow::Continue(())
2632     }
2633 
2634     fn xload16le_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2635         let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32>(addr)? };
2636         self.state[dst].set_u32(u16::from_le(result).into());
2637         ControlFlow::Continue(())
2638     }
2639 
2640     fn xload16le_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2641         let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32>(addr)? };
2642         self.state[dst].set_i32(i16::from_le(result).into());
2643         ControlFlow::Continue(())
2644     }
2645 
2646     fn xload32le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2647         let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32>(addr)? };
2648         self.state[dst].set_i32(i32::from_le(result));
2649         ControlFlow::Continue(())
2650     }
2651 
2652     fn xload64le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2653         let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32>(addr)? };
2654         self.state[dst].set_i64(i64::from_le(result));
2655         ControlFlow::Continue(())
2656     }
2657 
2658     fn xstore8_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2659         let val = self.state[val].get_u32() as u8;
2660         unsafe {
2661             self.store_ne::<u8, crate::XStore8G32>(addr, val)?;
2662         }
2663         ControlFlow::Continue(())
2664     }
2665 
2666     fn xstore16le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2667         let val = self.state[val].get_u32() as u16;
2668         unsafe {
2669             self.store_ne::<u16, crate::XStore16LeG32>(addr, val.to_le())?;
2670         }
2671         ControlFlow::Continue(())
2672     }
2673 
2674     fn xstore32le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2675         let val = self.state[val].get_u32();
2676         unsafe {
2677             self.store_ne::<u32, crate::XStore32LeG32>(addr, val.to_le())?;
2678         }
2679         ControlFlow::Continue(())
2680     }
2681 
2682     fn xstore64le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2683         let val = self.state[val].get_u64();
2684         unsafe {
2685             self.store_ne::<u64, crate::XStore64LeG32>(addr, val.to_le())?;
2686         }
2687         ControlFlow::Continue(())
2688     }
2689 
2690     // =========================================================================
2691     // z addressing modes
2692 
2693     fn xload8_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2694         let result = unsafe { self.load_ne::<u8, crate::XLoad8U32Z>(addr)? };
2695         self.state[dst].set_u32(result.into());
2696         ControlFlow::Continue(())
2697     }
2698 
2699     fn xload8_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2700         let result = unsafe { self.load_ne::<i8, crate::XLoad8S32Z>(addr)? };
2701         self.state[dst].set_i32(result.into());
2702         ControlFlow::Continue(())
2703     }
2704 
2705     fn xload16le_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2706         let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32Z>(addr)? };
2707         self.state[dst].set_u32(u16::from_le(result).into());
2708         ControlFlow::Continue(())
2709     }
2710 
2711     fn xload16le_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2712         let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32Z>(addr)? };
2713         self.state[dst].set_i32(i16::from_le(result).into());
2714         ControlFlow::Continue(())
2715     }
2716 
2717     fn xload32le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2718         let result = unsafe { self.load_ne::<i32, crate::XLoad32LeZ>(addr)? };
2719         self.state[dst].set_i32(i32::from_le(result));
2720         ControlFlow::Continue(())
2721     }
2722 
2723     fn xload64le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2724         let result = unsafe { self.load_ne::<i64, crate::XLoad64LeZ>(addr)? };
2725         self.state[dst].set_i64(i64::from_le(result));
2726         ControlFlow::Continue(())
2727     }
2728 
2729     fn xstore8_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2730         let val = self.state[val].get_u32() as u8;
2731         unsafe {
2732             self.store_ne::<u8, crate::XStore8Z>(addr, val)?;
2733         }
2734         ControlFlow::Continue(())
2735     }
2736 
2737     fn xstore16le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2738         let val = self.state[val].get_u32() as u16;
2739         unsafe {
2740             self.store_ne::<u16, crate::XStore16LeZ>(addr, val.to_le())?;
2741         }
2742         ControlFlow::Continue(())
2743     }
2744 
2745     fn xstore32le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2746         let val = self.state[val].get_u32();
2747         unsafe {
2748             self.store_ne::<u32, crate::XStore32LeZ>(addr, val.to_le())?;
2749         }
2750         ControlFlow::Continue(())
2751     }
2752 
2753     fn xstore64le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2754         let val = self.state[val].get_u64();
2755         unsafe {
2756             self.store_ne::<u64, crate::XStore64LeZ>(addr, val.to_le())?;
2757         }
2758         ControlFlow::Continue(())
2759     }
2760 
2761     // =========================================================================
2762     // g32bne addressing modes
2763 
2764     fn xload8_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2765         let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32Bne>(addr)? };
2766         self.state[dst].set_u32(result.into());
2767         ControlFlow::Continue(())
2768     }
2769 
2770     fn xload8_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2771         let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32Bne>(addr)? };
2772         self.state[dst].set_i32(result.into());
2773         ControlFlow::Continue(())
2774     }
2775 
2776     fn xload16le_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2777         let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32Bne>(addr)? };
2778         self.state[dst].set_u32(u16::from_le(result).into());
2779         ControlFlow::Continue(())
2780     }
2781 
2782     fn xload16le_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2783         let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32Bne>(addr)? };
2784         self.state[dst].set_i32(i16::from_le(result).into());
2785         ControlFlow::Continue(())
2786     }
2787 
2788     fn xload32le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2789         let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32Bne>(addr)? };
2790         self.state[dst].set_i32(i32::from_le(result));
2791         ControlFlow::Continue(())
2792     }
2793 
2794     fn xload64le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2795         let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32Bne>(addr)? };
2796         self.state[dst].set_i64(i64::from_le(result));
2797         ControlFlow::Continue(())
2798     }
2799 
2800     fn xstore8_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2801         let val = self.state[val].get_u32() as u8;
2802         unsafe {
2803             self.store_ne::<u8, crate::XStore8G32Bne>(addr, val)?;
2804         }
2805         ControlFlow::Continue(())
2806     }
2807 
2808     fn xstore16le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2809         let val = self.state[val].get_u32() as u16;
2810         unsafe {
2811             self.store_ne::<u16, crate::XStore16LeG32Bne>(addr, val.to_le())?;
2812         }
2813         ControlFlow::Continue(())
2814     }
2815 
2816     fn xstore32le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2817         let val = self.state[val].get_u32();
2818         unsafe {
2819             self.store_ne::<u32, crate::XStore32LeG32Bne>(addr, val.to_le())?;
2820         }
2821         ControlFlow::Continue(())
2822     }
2823 
2824     fn xstore64le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2825         let val = self.state[val].get_u64();
2826         unsafe {
2827             self.store_ne::<u64, crate::XStore64LeG32Bne>(addr, val.to_le())?;
2828         }
2829         ControlFlow::Continue(())
2830     }
2831 }
2832 
2833 impl ExtendedOpVisitor for Interpreter<'_> {
2834     fn trap(&mut self) -> ControlFlow<Done> {
2835         self.done_trap::<crate::Trap>()
2836     }
2837 
2838     fn call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
2839         self.done_call_indirect_host(id)
2840     }
2841 
2842     fn xpcadd(&mut self, dst: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
2843         let pc = self.pc_rel::<crate::Xpcadd>(offset);
2844         self.state[dst].set_ptr(pc.as_ptr());
2845         ControlFlow::Continue(())
2846     }
2847 
2848     fn bswap32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2849         let src = self.state[src].get_u32();
2850         self.state[dst].set_u32(src.swap_bytes());
2851         ControlFlow::Continue(())
2852     }
2853 
2854     fn bswap64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2855         let src = self.state[src].get_u64();
2856         self.state[dst].set_u64(src.swap_bytes());
2857         ControlFlow::Continue(())
2858     }
2859 
2860     fn xbmask32(&mut self, dst: XReg, src: XReg) -> Self::Return {
2861         let a = self.state[src].get_u32();
2862         if a == 0 {
2863             self.state[dst].set_u32(0);
2864         } else {
2865             self.state[dst].set_i32(-1);
2866         }
2867         ControlFlow::Continue(())
2868     }
2869 
2870     fn xbmask64(&mut self, dst: XReg, src: XReg) -> Self::Return {
2871         let a = self.state[src].get_u64();
2872         if a == 0 {
2873             self.state[dst].set_u64(0);
2874         } else {
2875             self.state[dst].set_i64(-1);
2876         }
2877         ControlFlow::Continue(())
2878     }
2879 
2880     fn xadd32_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2881         let a = self.state[operands.src1].get_u32();
2882         let b = self.state[operands.src2].get_u32();
2883         match a.checked_add(b) {
2884             Some(c) => {
2885                 self.state[operands.dst].set_u32(c);
2886                 ControlFlow::Continue(())
2887             }
2888             None => self.done_trap::<crate::Xadd32UoverflowTrap>(),
2889         }
2890     }
2891 
2892     fn xadd64_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2893         let a = self.state[operands.src1].get_u64();
2894         let b = self.state[operands.src2].get_u64();
2895         match a.checked_add(b) {
2896             Some(c) => {
2897                 self.state[operands.dst].set_u64(c);
2898                 ControlFlow::Continue(())
2899             }
2900             None => self.done_trap::<crate::Xadd64UoverflowTrap>(),
2901         }
2902     }
2903 
2904     fn xmulhi64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2905         let a = self.state[operands.src1].get_i64();
2906         let b = self.state[operands.src2].get_i64();
2907         let result = ((i128::from(a) * i128::from(b)) >> 64) as i64;
2908         self.state[operands.dst].set_i64(result);
2909         ControlFlow::Continue(())
2910     }
2911 
2912     fn xmulhi64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2913         let a = self.state[operands.src1].get_u64();
2914         let b = self.state[operands.src2].get_u64();
2915         let result = ((u128::from(a) * u128::from(b)) >> 64) as u64;
2916         self.state[operands.dst].set_u64(result);
2917         ControlFlow::Continue(())
2918     }
2919 
2920     // =========================================================================
2921     // o32 addressing modes for big-endian X-registers
2922 
2923     fn xload16be_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2924         let result = unsafe { self.load_ne::<u16, crate::XLoad16BeU32O32>(addr)? };
2925         self.state[dst].set_u32(u16::from_be(result).into());
2926         ControlFlow::Continue(())
2927     }
2928 
2929     fn xload16be_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2930         let result = unsafe { self.load_ne::<i16, crate::XLoad16BeS32O32>(addr)? };
2931         self.state[dst].set_i32(i16::from_be(result).into());
2932         ControlFlow::Continue(())
2933     }
2934 
2935     fn xload32be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2936         let result = unsafe { self.load_ne::<i32, crate::XLoad32BeO32>(addr)? };
2937         self.state[dst].set_i32(i32::from_be(result));
2938         ControlFlow::Continue(())
2939     }
2940 
2941     fn xload64be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2942         let result = unsafe { self.load_ne::<i64, crate::XLoad64BeO32>(addr)? };
2943         self.state[dst].set_i64(i64::from_be(result));
2944         ControlFlow::Continue(())
2945     }
2946 
2947     fn xstore16be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2948         let val = self.state[val].get_u32() as u16;
2949         unsafe {
2950             self.store_ne::<u16, crate::XStore16BeO32>(addr, val.to_be())?;
2951         }
2952         ControlFlow::Continue(())
2953     }
2954 
2955     fn xstore32be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2956         let val = self.state[val].get_u32();
2957         unsafe {
2958             self.store_ne::<u32, crate::XStore32BeO32>(addr, val.to_be())?;
2959         }
2960         ControlFlow::Continue(())
2961     }
2962 
2963     fn xstore64be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2964         let val = self.state[val].get_u64();
2965         unsafe {
2966             self.store_ne::<u64, crate::XStore64BeO32>(addr, val.to_be())?;
2967         }
2968         ControlFlow::Continue(())
2969     }
2970 
2971     // =========================================================================
2972     // o32 addressing modes for little-endian F-registers
2973 
2974     fn fload32le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2975         let val = unsafe { self.load_ne::<u32, crate::Fload32LeO32>(addr)? };
2976         self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
2977         ControlFlow::Continue(())
2978     }
2979 
2980     fn fload64le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2981         let val = unsafe { self.load_ne::<u64, crate::Fload64LeO32>(addr)? };
2982         self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
2983         ControlFlow::Continue(())
2984     }
2985 
2986     fn fstore32le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2987         let val = self.state[src].get_f32();
2988         unsafe {
2989             self.store_ne::<u32, crate::Fstore32LeO32>(addr, val.to_bits().to_le())?;
2990         }
2991         ControlFlow::Continue(())
2992     }
2993 
2994     fn fstore64le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2995         let val = self.state[src].get_f64();
2996         unsafe {
2997             self.store_ne::<u64, crate::Fstore64LeO32>(addr, val.to_bits().to_le())?;
2998         }
2999         ControlFlow::Continue(())
3000     }
3001 
3002     // =========================================================================
3003     // o32 addressing modes for big-endian F-registers
3004 
3005     fn fload32be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
3006         let val = unsafe { self.load_ne::<u32, crate::Fload32BeO32>(addr)? };
3007         self.state[dst].set_f32(f32::from_bits(u32::from_be(val)));
3008         ControlFlow::Continue(())
3009     }
3010 
3011     fn fload64be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
3012         let val = unsafe { self.load_ne::<u64, crate::Fload64BeO32>(addr)? };
3013         self.state[dst].set_f64(f64::from_bits(u64::from_be(val)));
3014         ControlFlow::Continue(())
3015     }
3016 
3017     fn fstore32be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3018         let val = self.state[src].get_f32();
3019         unsafe {
3020             self.store_ne::<u32, crate::Fstore32BeO32>(addr, val.to_bits().to_be())?;
3021         }
3022         ControlFlow::Continue(())
3023     }
3024 
3025     fn fstore64be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3026         let val = self.state[src].get_f64();
3027         unsafe {
3028             self.store_ne::<u64, crate::Fstore64BeO32>(addr, val.to_bits().to_be())?;
3029         }
3030         ControlFlow::Continue(())
3031     }
3032 
3033     // =========================================================================
3034     // z addressing modes for little-endian F-registers
3035 
3036     fn fload32le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3037         let val = unsafe { self.load_ne::<u32, crate::Fload32LeZ>(addr)? };
3038         self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3039         ControlFlow::Continue(())
3040     }
3041 
3042     fn fload64le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3043         let val = unsafe { self.load_ne::<u64, crate::Fload64LeZ>(addr)? };
3044         self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3045         ControlFlow::Continue(())
3046     }
3047 
3048     fn fstore32le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3049         let val = self.state[src].get_f32();
3050         unsafe {
3051             self.store_ne::<u32, crate::Fstore32LeZ>(addr, val.to_bits().to_le())?;
3052         }
3053         ControlFlow::Continue(())
3054     }
3055 
3056     fn fstore64le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3057         let val = self.state[src].get_f64();
3058         unsafe {
3059             self.store_ne::<u64, crate::Fstore64LeZ>(addr, val.to_bits().to_le())?;
3060         }
3061         ControlFlow::Continue(())
3062     }
3063 
3064     // =========================================================================
3065     // g32 addressing modes for little-endian F-registers
3066 
3067     fn fload32le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3068         let val = unsafe { self.load_ne::<u32, crate::Fload32LeG32>(addr)? };
3069         self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3070         ControlFlow::Continue(())
3071     }
3072 
3073     fn fload64le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3074         let val = unsafe { self.load_ne::<u64, crate::Fload64LeG32>(addr)? };
3075         self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3076         ControlFlow::Continue(())
3077     }
3078 
3079     fn fstore32le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3080         let val = self.state[src].get_f32();
3081         unsafe {
3082             self.store_ne::<u32, crate::Fstore32LeG32>(addr, val.to_bits().to_le())?;
3083         }
3084         ControlFlow::Continue(())
3085     }
3086 
3087     fn fstore64le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3088         let val = self.state[src].get_f64();
3089         unsafe {
3090             self.store_ne::<u64, crate::Fstore64LeG32>(addr, val.to_bits().to_le())?;
3091         }
3092         ControlFlow::Continue(())
3093     }
3094 
3095     // =========================================================================
3096     // o32 addressing modes for little-endian V-registers
3097 
3098     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3099     fn vload128le_o32(&mut self, dst: VReg, addr: AddrO32) -> ControlFlow<Done> {
3100         let val = unsafe { self.load_ne::<u128, crate::VLoad128O32>(addr)? };
3101         self.state[dst].set_u128(u128::from_le(val));
3102         ControlFlow::Continue(())
3103     }
3104 
3105     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3106     fn vstore128le_o32(&mut self, addr: AddrO32, src: VReg) -> ControlFlow<Done> {
3107         let val = self.state[src].get_u128();
3108         unsafe {
3109             self.store_ne::<u128, crate::Vstore128LeO32>(addr, val.to_le())?;
3110         }
3111         ControlFlow::Continue(())
3112     }
3113 
3114     // =========================================================================
3115     // z addressing modes for little-endian V-registers
3116 
3117     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3118     fn vload128le_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
3119         let val = unsafe { self.load_ne::<u128, crate::VLoad128Z>(addr)? };
3120         self.state[dst].set_u128(u128::from_le(val));
3121         ControlFlow::Continue(())
3122     }
3123 
3124     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3125     fn vstore128le_z(&mut self, addr: AddrZ, src: VReg) -> ControlFlow<Done> {
3126         let val = self.state[src].get_u128();
3127         unsafe {
3128             self.store_ne::<u128, crate::Vstore128LeZ>(addr, val.to_le())?;
3129         }
3130         ControlFlow::Continue(())
3131     }
3132 
3133     // =========================================================================
3134     // g32 addressing modes for little-endian V-registers
3135 
3136     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3137     fn vload128le_g32(&mut self, dst: VReg, addr: AddrG32) -> ControlFlow<Done> {
3138         let val = unsafe { self.load_ne::<u128, crate::VLoad128G32>(addr)? };
3139         self.state[dst].set_u128(u128::from_le(val));
3140         ControlFlow::Continue(())
3141     }
3142 
3143     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3144     fn vstore128le_g32(&mut self, addr: AddrG32, src: VReg) -> ControlFlow<Done> {
3145         let val = self.state[src].get_u128();
3146         unsafe {
3147             self.store_ne::<u128, crate::Vstore128LeG32>(addr, val.to_le())?;
3148         }
3149         ControlFlow::Continue(())
3150     }
3151 
3152     fn xmov_fp(&mut self, dst: XReg) -> ControlFlow<Done> {
3153         let fp = self.state.fp;
3154         self.state[dst].set_ptr(fp);
3155         ControlFlow::Continue(())
3156     }
3157 
3158     fn xmov_lr(&mut self, dst: XReg) -> ControlFlow<Done> {
3159         let lr = self.state.lr;
3160         self.state[dst].set_ptr(lr);
3161         ControlFlow::Continue(())
3162     }
3163 
3164     fn fmov(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3165         let val = self.state[src];
3166         self.state[dst] = val;
3167         ControlFlow::Continue(())
3168     }
3169 
3170     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3171     fn vmov(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3172         let val = self.state[src];
3173         self.state[dst] = val;
3174         ControlFlow::Continue(())
3175     }
3176 
3177     fn fconst32(&mut self, dst: FReg, bits: u32) -> ControlFlow<Done> {
3178         self.state[dst].set_f32(f32::from_bits(bits));
3179         ControlFlow::Continue(())
3180     }
3181 
3182     fn fconst64(&mut self, dst: FReg, bits: u64) -> ControlFlow<Done> {
3183         self.state[dst].set_f64(f64::from_bits(bits));
3184         ControlFlow::Continue(())
3185     }
3186 
3187     fn bitcast_int_from_float_32(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3188         let val = self.state[src].get_f32();
3189         self.state[dst].set_u32(val.to_bits());
3190         ControlFlow::Continue(())
3191     }
3192 
3193     fn bitcast_int_from_float_64(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3194         let val = self.state[src].get_f64();
3195         self.state[dst].set_u64(val.to_bits());
3196         ControlFlow::Continue(())
3197     }
3198 
3199     fn bitcast_float_from_int_32(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3200         let val = self.state[src].get_u32();
3201         self.state[dst].set_f32(f32::from_bits(val));
3202         ControlFlow::Continue(())
3203     }
3204 
3205     fn bitcast_float_from_int_64(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3206         let val = self.state[src].get_u64();
3207         self.state[dst].set_f64(f64::from_bits(val));
3208         ControlFlow::Continue(())
3209     }
3210 
3211     fn feq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3212         let a = self.state[src1].get_f32();
3213         let b = self.state[src2].get_f32();
3214         self.state[dst].set_u32(u32::from(a == b));
3215         ControlFlow::Continue(())
3216     }
3217 
3218     fn fneq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3219         let a = self.state[src1].get_f32();
3220         let b = self.state[src2].get_f32();
3221         self.state[dst].set_u32(u32::from(a != b));
3222         ControlFlow::Continue(())
3223     }
3224 
3225     fn flt32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3226         let a = self.state[src1].get_f32();
3227         let b = self.state[src2].get_f32();
3228         self.state[dst].set_u32(u32::from(a < b));
3229         ControlFlow::Continue(())
3230     }
3231 
3232     fn flteq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3233         let a = self.state[src1].get_f32();
3234         let b = self.state[src2].get_f32();
3235         self.state[dst].set_u32(u32::from(a <= b));
3236         ControlFlow::Continue(())
3237     }
3238 
3239     fn feq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3240         let a = self.state[src1].get_f64();
3241         let b = self.state[src2].get_f64();
3242         self.state[dst].set_u32(u32::from(a == b));
3243         ControlFlow::Continue(())
3244     }
3245 
3246     fn fneq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3247         let a = self.state[src1].get_f64();
3248         let b = self.state[src2].get_f64();
3249         self.state[dst].set_u32(u32::from(a != b));
3250         ControlFlow::Continue(())
3251     }
3252 
3253     fn flt64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3254         let a = self.state[src1].get_f64();
3255         let b = self.state[src2].get_f64();
3256         self.state[dst].set_u32(u32::from(a < b));
3257         ControlFlow::Continue(())
3258     }
3259 
3260     fn flteq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3261         let a = self.state[src1].get_f64();
3262         let b = self.state[src2].get_f64();
3263         self.state[dst].set_u32(u32::from(a <= b));
3264         ControlFlow::Continue(())
3265     }
3266 
3267     fn fselect32(
3268         &mut self,
3269         dst: FReg,
3270         cond: XReg,
3271         if_nonzero: FReg,
3272         if_zero: FReg,
3273     ) -> ControlFlow<Done> {
3274         let result = if self.state[cond].get_u32() != 0 {
3275             self.state[if_nonzero].get_f32()
3276         } else {
3277             self.state[if_zero].get_f32()
3278         };
3279         self.state[dst].set_f32(result);
3280         ControlFlow::Continue(())
3281     }
3282 
3283     fn fselect64(
3284         &mut self,
3285         dst: FReg,
3286         cond: XReg,
3287         if_nonzero: FReg,
3288         if_zero: FReg,
3289     ) -> ControlFlow<Done> {
3290         let result = if self.state[cond].get_u32() != 0 {
3291             self.state[if_nonzero].get_f64()
3292         } else {
3293             self.state[if_zero].get_f64()
3294         };
3295         self.state[dst].set_f64(result);
3296         ControlFlow::Continue(())
3297     }
3298 
3299     fn f32_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3300         let a = self.state[src].get_i32();
3301         self.state[dst].set_f32(a as f32);
3302         ControlFlow::Continue(())
3303     }
3304 
3305     fn f32_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3306         let a = self.state[src].get_u32();
3307         self.state[dst].set_f32(a as f32);
3308         ControlFlow::Continue(())
3309     }
3310 
3311     fn f32_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3312         let a = self.state[src].get_i64();
3313         self.state[dst].set_f32(a as f32);
3314         ControlFlow::Continue(())
3315     }
3316 
3317     fn f32_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3318         let a = self.state[src].get_u64();
3319         self.state[dst].set_f32(a as f32);
3320         ControlFlow::Continue(())
3321     }
3322 
3323     fn f64_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3324         let a = self.state[src].get_i32();
3325         self.state[dst].set_f64(a as f64);
3326         ControlFlow::Continue(())
3327     }
3328 
3329     fn f64_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3330         let a = self.state[src].get_u32();
3331         self.state[dst].set_f64(a as f64);
3332         ControlFlow::Continue(())
3333     }
3334 
3335     fn f64_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3336         let a = self.state[src].get_i64();
3337         self.state[dst].set_f64(a as f64);
3338         ControlFlow::Continue(())
3339     }
3340 
3341     fn f64_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3342         let a = self.state[src].get_u64();
3343         self.state[dst].set_f64(a as f64);
3344         ControlFlow::Continue(())
3345     }
3346 
3347     fn x32_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3348         let a = self.state[src].get_f32();
3349         self.check_xnn_from_f32::<crate::X32FromF32S>(a, f32_cvt_to_int_bounds(true, 32))?;
3350         self.state[dst].set_i32(a as i32);
3351         ControlFlow::Continue(())
3352     }
3353 
3354     fn x32_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3355         let a = self.state[src].get_f32();
3356         self.check_xnn_from_f32::<crate::X32FromF32U>(a, f32_cvt_to_int_bounds(false, 32))?;
3357         self.state[dst].set_u32(a as u32);
3358         ControlFlow::Continue(())
3359     }
3360 
3361     fn x64_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3362         let a = self.state[src].get_f32();
3363         self.check_xnn_from_f32::<crate::X64FromF32S>(a, f32_cvt_to_int_bounds(true, 64))?;
3364         self.state[dst].set_i64(a as i64);
3365         ControlFlow::Continue(())
3366     }
3367 
3368     fn x64_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3369         let a = self.state[src].get_f32();
3370         self.check_xnn_from_f32::<crate::X64FromF32U>(a, f32_cvt_to_int_bounds(false, 64))?;
3371         self.state[dst].set_u64(a as u64);
3372         ControlFlow::Continue(())
3373     }
3374 
3375     fn x32_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3376         let a = self.state[src].get_f64();
3377         self.check_xnn_from_f64::<crate::X32FromF64S>(a, f64_cvt_to_int_bounds(true, 32))?;
3378         self.state[dst].set_i32(a as i32);
3379         ControlFlow::Continue(())
3380     }
3381 
3382     fn x32_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3383         let a = self.state[src].get_f64();
3384         self.check_xnn_from_f64::<crate::X32FromF64U>(a, f64_cvt_to_int_bounds(false, 32))?;
3385         self.state[dst].set_u32(a as u32);
3386         ControlFlow::Continue(())
3387     }
3388 
3389     fn x64_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3390         let a = self.state[src].get_f64();
3391         self.check_xnn_from_f64::<crate::X64FromF64S>(a, f64_cvt_to_int_bounds(true, 64))?;
3392         self.state[dst].set_i64(a as i64);
3393         ControlFlow::Continue(())
3394     }
3395 
3396     fn x64_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3397         let a = self.state[src].get_f64();
3398         self.check_xnn_from_f64::<crate::X64FromF64U>(a, f64_cvt_to_int_bounds(false, 64))?;
3399         self.state[dst].set_u64(a as u64);
3400         ControlFlow::Continue(())
3401     }
3402 
3403     fn x32_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3404         let a = self.state[src].get_f32();
3405         self.state[dst].set_i32(a as i32);
3406         ControlFlow::Continue(())
3407     }
3408 
3409     fn x32_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3410         let a = self.state[src].get_f32();
3411         self.state[dst].set_u32(a as u32);
3412         ControlFlow::Continue(())
3413     }
3414 
3415     fn x64_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3416         let a = self.state[src].get_f32();
3417         self.state[dst].set_i64(a as i64);
3418         ControlFlow::Continue(())
3419     }
3420 
3421     fn x64_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3422         let a = self.state[src].get_f32();
3423         self.state[dst].set_u64(a as u64);
3424         ControlFlow::Continue(())
3425     }
3426 
3427     fn x32_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3428         let a = self.state[src].get_f64();
3429         self.state[dst].set_i32(a as i32);
3430         ControlFlow::Continue(())
3431     }
3432 
3433     fn x32_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3434         let a = self.state[src].get_f64();
3435         self.state[dst].set_u32(a as u32);
3436         ControlFlow::Continue(())
3437     }
3438 
3439     fn x64_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3440         let a = self.state[src].get_f64();
3441         self.state[dst].set_i64(a as i64);
3442         ControlFlow::Continue(())
3443     }
3444 
3445     fn x64_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3446         let a = self.state[src].get_f64();
3447         self.state[dst].set_u64(a as u64);
3448         ControlFlow::Continue(())
3449     }
3450 
3451     fn f32_from_f64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3452         let a = self.state[src].get_f64();
3453         self.state[dst].set_f32(a as f32);
3454         ControlFlow::Continue(())
3455     }
3456 
3457     fn f64_from_f32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3458         let a = self.state[src].get_f32();
3459         self.state[dst].set_f64(a.into());
3460         ControlFlow::Continue(())
3461     }
3462 
3463     fn fcopysign32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3464         let a = self.state[operands.src1].get_f32();
3465         let b = self.state[operands.src2].get_f32();
3466         self.state[operands.dst].set_f32(a.wasm_copysign(b));
3467         ControlFlow::Continue(())
3468     }
3469 
3470     fn fcopysign64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3471         let a = self.state[operands.src1].get_f64();
3472         let b = self.state[operands.src2].get_f64();
3473         self.state[operands.dst].set_f64(a.wasm_copysign(b));
3474         ControlFlow::Continue(())
3475     }
3476 
3477     fn fadd32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3478         let a = self.state[operands.src1].get_f32();
3479         let b = self.state[operands.src2].get_f32();
3480         self.state[operands.dst].set_f32(a + b);
3481         ControlFlow::Continue(())
3482     }
3483 
3484     fn fsub32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3485         let a = self.state[operands.src1].get_f32();
3486         let b = self.state[operands.src2].get_f32();
3487         self.state[operands.dst].set_f32(a - b);
3488         ControlFlow::Continue(())
3489     }
3490 
3491     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3492     fn vsubf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3493         let mut a = self.state[operands.src1].get_f32x4();
3494         let b = self.state[operands.src2].get_f32x4();
3495         for (a, b) in a.iter_mut().zip(b) {
3496             *a = *a - b;
3497         }
3498         self.state[operands.dst].set_f32x4(a);
3499         ControlFlow::Continue(())
3500     }
3501 
3502     fn fmul32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3503         let a = self.state[operands.src1].get_f32();
3504         let b = self.state[operands.src2].get_f32();
3505         self.state[operands.dst].set_f32(a * b);
3506         ControlFlow::Continue(())
3507     }
3508 
3509     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3510     fn vmulf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3511         let mut a = self.state[operands.src1].get_f32x4();
3512         let b = self.state[operands.src2].get_f32x4();
3513         for (a, b) in a.iter_mut().zip(b) {
3514             *a = *a * b;
3515         }
3516         self.state[operands.dst].set_f32x4(a);
3517         ControlFlow::Continue(())
3518     }
3519 
3520     fn fdiv32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3521         let a = self.state[operands.src1].get_f32();
3522         let b = self.state[operands.src2].get_f32();
3523         self.state[operands.dst].set_f32(a / b);
3524         ControlFlow::Continue(())
3525     }
3526 
3527     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3528     fn vdivf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3529         let a = self.state[operands.src1].get_f32x4();
3530         let b = self.state[operands.src2].get_f32x4();
3531         let mut result = [0.0f32; 4];
3532 
3533         for i in 0..4 {
3534             result[i] = a[i] / b[i];
3535         }
3536 
3537         self.state[operands.dst].set_f32x4(result);
3538         ControlFlow::Continue(())
3539     }
3540 
3541     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3542     fn vdivf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3543         let a = self.state[operands.src1].get_f64x2();
3544         let b = self.state[operands.src2].get_f64x2();
3545         let mut result = [0.0f64; 2];
3546 
3547         for i in 0..2 {
3548             result[i] = a[i] / b[i];
3549         }
3550 
3551         self.state[operands.dst].set_f64x2(result);
3552         ControlFlow::Continue(())
3553     }
3554 
3555     fn fmaximum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3556         let a = self.state[operands.src1].get_f32();
3557         let b = self.state[operands.src2].get_f32();
3558         self.state[operands.dst].set_f32(a.wasm_maximum(b));
3559         ControlFlow::Continue(())
3560     }
3561 
3562     fn fminimum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3563         let a = self.state[operands.src1].get_f32();
3564         let b = self.state[operands.src2].get_f32();
3565         self.state[operands.dst].set_f32(a.wasm_minimum(b));
3566         ControlFlow::Continue(())
3567     }
3568 
3569     fn ftrunc32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3570         let a = self.state[src].get_f32();
3571         self.state[dst].set_f32(a.wasm_trunc());
3572         ControlFlow::Continue(())
3573     }
3574 
3575     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3576     fn vtrunc32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3577         let mut a = self.state[src].get_f32x4();
3578         for elem in a.iter_mut() {
3579             *elem = elem.wasm_trunc();
3580         }
3581         self.state[dst].set_f32x4(a);
3582         ControlFlow::Continue(())
3583     }
3584 
3585     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3586     fn vtrunc64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3587         let mut a = self.state[src].get_f64x2();
3588         for elem in a.iter_mut() {
3589             *elem = elem.wasm_trunc();
3590         }
3591         self.state[dst].set_f64x2(a);
3592         ControlFlow::Continue(())
3593     }
3594 
3595     fn ffloor32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3596         let a = self.state[src].get_f32();
3597         self.state[dst].set_f32(a.wasm_floor());
3598         ControlFlow::Continue(())
3599     }
3600 
3601     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3602     fn vfloor32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3603         let mut a = self.state[src].get_f32x4();
3604         for elem in a.iter_mut() {
3605             *elem = elem.wasm_floor();
3606         }
3607         self.state[dst].set_f32x4(a);
3608         ControlFlow::Continue(())
3609     }
3610 
3611     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3612     fn vfloor64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3613         let mut a = self.state[src].get_f64x2();
3614         for elem in a.iter_mut() {
3615             *elem = elem.wasm_floor();
3616         }
3617         self.state[dst].set_f64x2(a);
3618         ControlFlow::Continue(())
3619     }
3620 
3621     fn fceil32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3622         let a = self.state[src].get_f32();
3623         self.state[dst].set_f32(a.wasm_ceil());
3624         ControlFlow::Continue(())
3625     }
3626 
3627     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3628     fn vceil32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3629         let mut a = self.state[src].get_f32x4();
3630         for elem in a.iter_mut() {
3631             *elem = elem.wasm_ceil();
3632         }
3633         self.state[dst].set_f32x4(a);
3634 
3635         ControlFlow::Continue(())
3636     }
3637 
3638     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3639     fn vceil64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3640         let mut a = self.state[src].get_f64x2();
3641         for elem in a.iter_mut() {
3642             *elem = elem.wasm_ceil();
3643         }
3644         self.state[dst].set_f64x2(a);
3645 
3646         ControlFlow::Continue(())
3647     }
3648 
3649     fn fnearest32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3650         let a = self.state[src].get_f32();
3651         self.state[dst].set_f32(a.wasm_nearest());
3652         ControlFlow::Continue(())
3653     }
3654 
3655     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3656     fn vnearest32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3657         let mut a = self.state[src].get_f32x4();
3658         for elem in a.iter_mut() {
3659             *elem = elem.wasm_nearest();
3660         }
3661         self.state[dst].set_f32x4(a);
3662         ControlFlow::Continue(())
3663     }
3664 
3665     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3666     fn vnearest64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3667         let mut a = self.state[src].get_f64x2();
3668         for elem in a.iter_mut() {
3669             *elem = elem.wasm_nearest();
3670         }
3671         self.state[dst].set_f64x2(a);
3672         ControlFlow::Continue(())
3673     }
3674 
3675     fn fsqrt32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3676         let a = self.state[src].get_f32();
3677         self.state[dst].set_f32(a.wasm_sqrt());
3678         ControlFlow::Continue(())
3679     }
3680 
3681     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3682     fn vsqrt32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3683         let mut a = self.state[src].get_f32x4();
3684         for elem in a.iter_mut() {
3685             *elem = elem.wasm_sqrt();
3686         }
3687         self.state[dst].set_f32x4(a);
3688         ControlFlow::Continue(())
3689     }
3690 
3691     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3692     fn vsqrt64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3693         let mut a = self.state[src].get_f64x2();
3694         for elem in a.iter_mut() {
3695             *elem = elem.wasm_sqrt();
3696         }
3697         self.state[dst].set_f64x2(a);
3698         ControlFlow::Continue(())
3699     }
3700 
3701     fn fneg32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3702         let a = self.state[src].get_f32();
3703         self.state[dst].set_f32(-a);
3704         ControlFlow::Continue(())
3705     }
3706 
3707     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3708     fn vnegf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3709         let mut a = self.state[src].get_f32x4();
3710         for elem in a.iter_mut() {
3711             *elem = -*elem;
3712         }
3713         self.state[dst].set_f32x4(a);
3714         ControlFlow::Continue(())
3715     }
3716 
3717     fn fabs32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3718         let a = self.state[src].get_f32();
3719         self.state[dst].set_f32(a.wasm_abs());
3720         ControlFlow::Continue(())
3721     }
3722 
3723     fn fadd64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3724         let a = self.state[operands.src1].get_f64();
3725         let b = self.state[operands.src2].get_f64();
3726         self.state[operands.dst].set_f64(a + b);
3727         ControlFlow::Continue(())
3728     }
3729 
3730     fn fsub64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3731         let a = self.state[operands.src1].get_f64();
3732         let b = self.state[operands.src2].get_f64();
3733         self.state[operands.dst].set_f64(a - b);
3734         ControlFlow::Continue(())
3735     }
3736 
3737     fn fmul64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3738         let a = self.state[operands.src1].get_f64();
3739         let b = self.state[operands.src2].get_f64();
3740         self.state[operands.dst].set_f64(a * b);
3741         ControlFlow::Continue(())
3742     }
3743 
3744     fn fdiv64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3745         let a = self.state[operands.src1].get_f64();
3746         let b = self.state[operands.src2].get_f64();
3747         self.state[operands.dst].set_f64(a / b);
3748         ControlFlow::Continue(())
3749     }
3750 
3751     fn fmaximum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3752         let a = self.state[operands.src1].get_f64();
3753         let b = self.state[operands.src2].get_f64();
3754         self.state[operands.dst].set_f64(a.wasm_maximum(b));
3755         ControlFlow::Continue(())
3756     }
3757 
3758     fn fminimum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3759         let a = self.state[operands.src1].get_f64();
3760         let b = self.state[operands.src2].get_f64();
3761         self.state[operands.dst].set_f64(a.wasm_minimum(b));
3762         ControlFlow::Continue(())
3763     }
3764 
3765     fn ftrunc64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3766         let a = self.state[src].get_f64();
3767         self.state[dst].set_f64(a.wasm_trunc());
3768         ControlFlow::Continue(())
3769     }
3770 
3771     fn ffloor64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3772         let a = self.state[src].get_f64();
3773         self.state[dst].set_f64(a.wasm_floor());
3774         ControlFlow::Continue(())
3775     }
3776 
3777     fn fceil64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3778         let a = self.state[src].get_f64();
3779         self.state[dst].set_f64(a.wasm_ceil());
3780         ControlFlow::Continue(())
3781     }
3782 
3783     fn fnearest64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3784         let a = self.state[src].get_f64();
3785         self.state[dst].set_f64(a.wasm_nearest());
3786         ControlFlow::Continue(())
3787     }
3788 
3789     fn fsqrt64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3790         let a = self.state[src].get_f64();
3791         self.state[dst].set_f64(a.wasm_sqrt());
3792         ControlFlow::Continue(())
3793     }
3794 
3795     fn fneg64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3796         let a = self.state[src].get_f64();
3797         self.state[dst].set_f64(-a);
3798         ControlFlow::Continue(())
3799     }
3800 
3801     fn fabs64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3802         let a = self.state[src].get_f64();
3803         self.state[dst].set_f64(a.wasm_abs());
3804         ControlFlow::Continue(())
3805     }
3806 
3807     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3808     fn vaddi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3809         let mut a = self.state[operands.src1].get_i8x16();
3810         let b = self.state[operands.src2].get_i8x16();
3811         for (a, b) in a.iter_mut().zip(b) {
3812             *a = a.wrapping_add(b);
3813         }
3814         self.state[operands.dst].set_i8x16(a);
3815         ControlFlow::Continue(())
3816     }
3817 
3818     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3819     fn vaddi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3820         let mut a = self.state[operands.src1].get_i16x8();
3821         let b = self.state[operands.src2].get_i16x8();
3822         for (a, b) in a.iter_mut().zip(b) {
3823             *a = a.wrapping_add(b);
3824         }
3825         self.state[operands.dst].set_i16x8(a);
3826         ControlFlow::Continue(())
3827     }
3828 
3829     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3830     fn vaddi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3831         let mut a = self.state[operands.src1].get_i32x4();
3832         let b = self.state[operands.src2].get_i32x4();
3833         for (a, b) in a.iter_mut().zip(b) {
3834             *a = a.wrapping_add(b);
3835         }
3836         self.state[operands.dst].set_i32x4(a);
3837         ControlFlow::Continue(())
3838     }
3839 
3840     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3841     fn vaddi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3842         let mut a = self.state[operands.src1].get_i64x2();
3843         let b = self.state[operands.src2].get_i64x2();
3844         for (a, b) in a.iter_mut().zip(b) {
3845             *a = a.wrapping_add(b);
3846         }
3847         self.state[operands.dst].set_i64x2(a);
3848         ControlFlow::Continue(())
3849     }
3850 
3851     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3852     fn vaddf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3853         let mut a = self.state[operands.src1].get_f32x4();
3854         let b = self.state[operands.src2].get_f32x4();
3855         for (a, b) in a.iter_mut().zip(b) {
3856             *a += b;
3857         }
3858         self.state[operands.dst].set_f32x4(a);
3859         ControlFlow::Continue(())
3860     }
3861 
3862     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3863     fn vaddf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3864         let mut a = self.state[operands.src1].get_f64x2();
3865         let b = self.state[operands.src2].get_f64x2();
3866         for (a, b) in a.iter_mut().zip(b) {
3867             *a += b;
3868         }
3869         self.state[operands.dst].set_f64x2(a);
3870         ControlFlow::Continue(())
3871     }
3872 
3873     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3874     fn vaddi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3875         let mut a = self.state[operands.src1].get_i8x16();
3876         let b = self.state[operands.src2].get_i8x16();
3877         for (a, b) in a.iter_mut().zip(b) {
3878             *a = (*a).saturating_add(b);
3879         }
3880         self.state[operands.dst].set_i8x16(a);
3881         ControlFlow::Continue(())
3882     }
3883 
3884     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3885     fn vaddu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3886         let mut a = self.state[operands.src1].get_u8x16();
3887         let b = self.state[operands.src2].get_u8x16();
3888         for (a, b) in a.iter_mut().zip(b) {
3889             *a = (*a).saturating_add(b);
3890         }
3891         self.state[operands.dst].set_u8x16(a);
3892         ControlFlow::Continue(())
3893     }
3894 
3895     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3896     fn vaddi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3897         let mut a = self.state[operands.src1].get_i16x8();
3898         let b = self.state[operands.src2].get_i16x8();
3899         for (a, b) in a.iter_mut().zip(b) {
3900             *a = (*a).saturating_add(b);
3901         }
3902         self.state[operands.dst].set_i16x8(a);
3903         ControlFlow::Continue(())
3904     }
3905 
3906     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3907     fn vaddu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3908         let mut a = self.state[operands.src1].get_u16x8();
3909         let b = self.state[operands.src2].get_u16x8();
3910         for (a, b) in a.iter_mut().zip(b) {
3911             *a = (*a).saturating_add(b);
3912         }
3913         self.state[operands.dst].set_u16x8(a);
3914         ControlFlow::Continue(())
3915     }
3916 
3917     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3918     fn vaddpairwisei16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3919         let a = self.state[operands.src1].get_i16x8();
3920         let b = self.state[operands.src2].get_i16x8();
3921         let mut result = [0i16; 8];
3922         let half = result.len() / 2;
3923         for i in 0..half {
3924             result[i] = a[2 * i].wrapping_add(a[2 * i + 1]);
3925             result[i + half] = b[2 * i].wrapping_add(b[2 * i + 1]);
3926         }
3927         self.state[operands.dst].set_i16x8(result);
3928         ControlFlow::Continue(())
3929     }
3930 
3931     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3932     fn vaddpairwisei32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3933         let a = self.state[operands.src1].get_i32x4();
3934         let b = self.state[operands.src2].get_i32x4();
3935         let mut result = [0i32; 4];
3936         result[0] = a[0].wrapping_add(a[1]);
3937         result[1] = a[2].wrapping_add(a[3]);
3938         result[2] = b[0].wrapping_add(b[1]);
3939         result[3] = b[2].wrapping_add(b[3]);
3940         self.state[operands.dst].set_i32x4(result);
3941         ControlFlow::Continue(())
3942     }
3943 
3944     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3945     fn vshli8x16(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3946         let a = self.state[operands.src1].get_i8x16();
3947         let b = self.state[operands.src2].get_u32();
3948         self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shl(b)));
3949         ControlFlow::Continue(())
3950     }
3951 
3952     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3953     fn vshli16x8(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3954         let a = self.state[operands.src1].get_i16x8();
3955         let b = self.state[operands.src2].get_u32();
3956         self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shl(b)));
3957         ControlFlow::Continue(())
3958     }
3959 
3960     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3961     fn vshli32x4(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3962         let a = self.state[operands.src1].get_i32x4();
3963         let b = self.state[operands.src2].get_u32();
3964         self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shl(b)));
3965         ControlFlow::Continue(())
3966     }
3967 
3968     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3969     fn vshli64x2(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3970         let a = self.state[operands.src1].get_i64x2();
3971         let b = self.state[operands.src2].get_u32();
3972         self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shl(b)));
3973         ControlFlow::Continue(())
3974     }
3975 
3976     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3977     fn vshri8x16_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3978         let a = self.state[operands.src1].get_i8x16();
3979         let b = self.state[operands.src2].get_u32();
3980         self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shr(b)));
3981         ControlFlow::Continue(())
3982     }
3983 
3984     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3985     fn vshri16x8_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3986         let a = self.state[operands.src1].get_i16x8();
3987         let b = self.state[operands.src2].get_u32();
3988         self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shr(b)));
3989         ControlFlow::Continue(())
3990     }
3991 
3992     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3993     fn vshri32x4_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3994         let a = self.state[operands.src1].get_i32x4();
3995         let b = self.state[operands.src2].get_u32();
3996         self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shr(b)));
3997         ControlFlow::Continue(())
3998     }
3999 
4000     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4001     fn vshri64x2_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4002         let a = self.state[operands.src1].get_i64x2();
4003         let b = self.state[operands.src2].get_u32();
4004         self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shr(b)));
4005         ControlFlow::Continue(())
4006     }
4007 
4008     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4009     fn vshri8x16_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4010         let a = self.state[operands.src1].get_u8x16();
4011         let b = self.state[operands.src2].get_u32();
4012         self.state[operands.dst].set_u8x16(a.map(|a| a.wrapping_shr(b)));
4013         ControlFlow::Continue(())
4014     }
4015 
4016     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4017     fn vshri16x8_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4018         let a = self.state[operands.src1].get_u16x8();
4019         let b = self.state[operands.src2].get_u32();
4020         self.state[operands.dst].set_u16x8(a.map(|a| a.wrapping_shr(b)));
4021         ControlFlow::Continue(())
4022     }
4023 
4024     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4025     fn vshri32x4_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4026         let a = self.state[operands.src1].get_u32x4();
4027         let b = self.state[operands.src2].get_u32();
4028         self.state[operands.dst].set_u32x4(a.map(|a| a.wrapping_shr(b)));
4029         ControlFlow::Continue(())
4030     }
4031 
4032     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4033     fn vshri64x2_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4034         let a = self.state[operands.src1].get_u64x2();
4035         let b = self.state[operands.src2].get_u32();
4036         self.state[operands.dst].set_u64x2(a.map(|a| a.wrapping_shr(b)));
4037         ControlFlow::Continue(())
4038     }
4039 
4040     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4041     fn vconst128(&mut self, dst: VReg, val: u128) -> ControlFlow<Done> {
4042         self.state[dst].set_u128(val);
4043         ControlFlow::Continue(())
4044     }
4045 
4046     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4047     fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4048         let val = self.state[src].get_u32() as u8;
4049         self.state[dst].set_u8x16([val; 16]);
4050         ControlFlow::Continue(())
4051     }
4052 
4053     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4054     fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4055         let val = self.state[src].get_u32() as u16;
4056         self.state[dst].set_u16x8([val; 8]);
4057         ControlFlow::Continue(())
4058     }
4059 
4060     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4061     fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4062         let val = self.state[src].get_u32();
4063         self.state[dst].set_u32x4([val; 4]);
4064         ControlFlow::Continue(())
4065     }
4066 
4067     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4068     fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4069         let val = self.state[src].get_u64();
4070         self.state[dst].set_u64x2([val; 2]);
4071         ControlFlow::Continue(())
4072     }
4073 
4074     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4075     fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4076         let val = self.state[src].get_f32();
4077         self.state[dst].set_f32x4([val; 4]);
4078         ControlFlow::Continue(())
4079     }
4080 
4081     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4082     fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4083         let val = self.state[src].get_f64();
4084         self.state[dst].set_f64x2([val; 2]);
4085         ControlFlow::Continue(())
4086     }
4087 
4088     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4089     fn vload8x8_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4090         let val = unsafe { self.load_ne::<[i8; 8], crate::VLoad8x8SZ>(addr)? };
4091         self.state[dst].set_i16x8(val.map(|i| i.into()));
4092         ControlFlow::Continue(())
4093     }
4094 
4095     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4096     fn vload8x8_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4097         let val = unsafe { self.load_ne::<[u8; 8], crate::VLoad8x8UZ>(addr)? };
4098         self.state[dst].set_u16x8(val.map(|i| i.into()));
4099         ControlFlow::Continue(())
4100     }
4101 
4102     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4103     fn vload16x4le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4104         let val = unsafe { self.load_ne::<[i16; 4], crate::VLoad16x4LeSZ>(addr)? };
4105         self.state[dst].set_i32x4(val.map(|i| i16::from_le(i).into()));
4106         ControlFlow::Continue(())
4107     }
4108 
4109     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4110     fn vload16x4le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4111         let val = unsafe { self.load_ne::<[u16; 4], crate::VLoad16x4LeUZ>(addr)? };
4112         self.state[dst].set_u32x4(val.map(|i| u16::from_le(i).into()));
4113         ControlFlow::Continue(())
4114     }
4115 
4116     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4117     fn vload32x2le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4118         let val = unsafe { self.load_ne::<[i32; 2], crate::VLoad32x2LeSZ>(addr)? };
4119         self.state[dst].set_i64x2(val.map(|i| i32::from_le(i).into()));
4120         ControlFlow::Continue(())
4121     }
4122 
4123     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4124     fn vload32x2le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4125         let val = unsafe { self.load_ne::<[u32; 2], crate::VLoad32x2LeUZ>(addr)? };
4126         self.state[dst].set_u64x2(val.map(|i| u32::from_le(i).into()));
4127         ControlFlow::Continue(())
4128     }
4129 
4130     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4131     fn vband128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4132         let a = self.state[operands.src1].get_u128();
4133         let b = self.state[operands.src2].get_u128();
4134         self.state[operands.dst].set_u128(a & b);
4135         ControlFlow::Continue(())
4136     }
4137 
4138     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4139     fn vbor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4140         let a = self.state[operands.src1].get_u128();
4141         let b = self.state[operands.src2].get_u128();
4142         self.state[operands.dst].set_u128(a | b);
4143         ControlFlow::Continue(())
4144     }
4145 
4146     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4147     fn vbxor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4148         let a = self.state[operands.src1].get_u128();
4149         let b = self.state[operands.src2].get_u128();
4150         self.state[operands.dst].set_u128(a ^ b);
4151         ControlFlow::Continue(())
4152     }
4153 
4154     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4155     fn vbnot128(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4156         let a = self.state[src].get_u128();
4157         self.state[dst].set_u128(!a);
4158         ControlFlow::Continue(())
4159     }
4160 
4161     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4162     fn vbitselect128(&mut self, dst: VReg, c: VReg, x: VReg, y: VReg) -> ControlFlow<Done> {
4163         let c = self.state[c].get_u128();
4164         let x = self.state[x].get_u128();
4165         let y = self.state[y].get_u128();
4166         self.state[dst].set_u128((c & x) | (!c & y));
4167         ControlFlow::Continue(())
4168     }
4169 
4170     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4171     fn vbitmask8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4172         let a = self.state[src].get_u8x16();
4173         let mut result = 0;
4174         for item in a.iter().rev() {
4175             result <<= 1;
4176             result |= (*item >> 7) as u32;
4177         }
4178         self.state[dst].set_u32(result);
4179         ControlFlow::Continue(())
4180     }
4181 
4182     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4183     fn vbitmask16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4184         let a = self.state[src].get_u16x8();
4185         let mut result = 0;
4186         for item in a.iter().rev() {
4187             result <<= 1;
4188             result |= (*item >> 15) as u32;
4189         }
4190         self.state[dst].set_u32(result);
4191         ControlFlow::Continue(())
4192     }
4193 
4194     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4195     fn vbitmask32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4196         let a = self.state[src].get_u32x4();
4197         let mut result = 0;
4198         for item in a.iter().rev() {
4199             result <<= 1;
4200             result |= *item >> 31;
4201         }
4202         self.state[dst].set_u32(result);
4203         ControlFlow::Continue(())
4204     }
4205 
4206     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4207     fn vbitmask64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4208         let a = self.state[src].get_u64x2();
4209         let mut result = 0;
4210         for item in a.iter().rev() {
4211             result <<= 1;
4212             result |= (*item >> 63) as u32;
4213         }
4214         self.state[dst].set_u32(result);
4215         ControlFlow::Continue(())
4216     }
4217 
4218     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4219     fn valltrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4220         let a = self.state[src].get_u8x16();
4221         let result = a.iter().all(|a| *a != 0);
4222         self.state[dst].set_u32(u32::from(result));
4223         ControlFlow::Continue(())
4224     }
4225 
4226     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4227     fn valltrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4228         let a = self.state[src].get_u16x8();
4229         let result = a.iter().all(|a| *a != 0);
4230         self.state[dst].set_u32(u32::from(result));
4231         ControlFlow::Continue(())
4232     }
4233 
4234     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4235     fn valltrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4236         let a = self.state[src].get_u32x4();
4237         let result = a.iter().all(|a| *a != 0);
4238         self.state[dst].set_u32(u32::from(result));
4239         ControlFlow::Continue(())
4240     }
4241 
4242     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4243     fn valltrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4244         let a = self.state[src].get_u64x2();
4245         let result = a.iter().all(|a| *a != 0);
4246         self.state[dst].set_u32(u32::from(result));
4247         ControlFlow::Continue(())
4248     }
4249 
4250     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4251     fn vanytrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4252         let a = self.state[src].get_u8x16();
4253         let result = a.iter().any(|a| *a != 0);
4254         self.state[dst].set_u32(u32::from(result));
4255         ControlFlow::Continue(())
4256     }
4257 
4258     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4259     fn vanytrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4260         let a = self.state[src].get_u16x8();
4261         let result = a.iter().any(|a| *a != 0);
4262         self.state[dst].set_u32(u32::from(result));
4263         ControlFlow::Continue(())
4264     }
4265 
4266     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4267     fn vanytrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4268         let a = self.state[src].get_u32x4();
4269         let result = a.iter().any(|a| *a != 0);
4270         self.state[dst].set_u32(u32::from(result));
4271         ControlFlow::Continue(())
4272     }
4273 
4274     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4275     fn vanytrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4276         let a = self.state[src].get_u64x2();
4277         let result = a.iter().any(|a| *a != 0);
4278         self.state[dst].set_u32(u32::from(result));
4279         ControlFlow::Continue(())
4280     }
4281 
4282     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4283     fn vf32x4_from_i32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4284         let a = self.state[src].get_i32x4();
4285         self.state[dst].set_f32x4(a.map(|i| i as f32));
4286         ControlFlow::Continue(())
4287     }
4288 
4289     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4290     fn vf32x4_from_i32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4291         let a = self.state[src].get_u32x4();
4292         self.state[dst].set_f32x4(a.map(|i| i as f32));
4293         ControlFlow::Continue(())
4294     }
4295 
4296     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4297     fn vf64x2_from_i64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4298         let a = self.state[src].get_i64x2();
4299         self.state[dst].set_f64x2(a.map(|i| i as f64));
4300         ControlFlow::Continue(())
4301     }
4302 
4303     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4304     fn vf64x2_from_i64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4305         let a = self.state[src].get_u64x2();
4306         self.state[dst].set_f64x2(a.map(|i| i as f64));
4307         ControlFlow::Continue(())
4308     }
4309 
4310     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4311     fn vi32x4_from_f32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4312         let a = self.state[src].get_f32x4();
4313         self.state[dst].set_i32x4(a.map(|f| f as i32));
4314         ControlFlow::Continue(())
4315     }
4316 
4317     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4318     fn vi32x4_from_f32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4319         let a = self.state[src].get_f32x4();
4320         self.state[dst].set_u32x4(a.map(|f| f as u32));
4321         ControlFlow::Continue(())
4322     }
4323 
4324     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4325     fn vi64x2_from_f64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4326         let a = self.state[src].get_f64x2();
4327         self.state[dst].set_i64x2(a.map(|f| f as i64));
4328         ControlFlow::Continue(())
4329     }
4330 
4331     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4332     fn vi64x2_from_f64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4333         let a = self.state[src].get_f64x2();
4334         self.state[dst].set_u64x2(a.map(|f| f as u64));
4335         ControlFlow::Continue(())
4336     }
4337 
4338     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4339     fn vwidenlow8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4340         let a = *self.state[src].get_i8x16().first_chunk().unwrap();
4341         self.state[dst].set_i16x8(a.map(|i| i.into()));
4342         ControlFlow::Continue(())
4343     }
4344 
4345     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4346     fn vwidenlow8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4347         let a = *self.state[src].get_u8x16().first_chunk().unwrap();
4348         self.state[dst].set_u16x8(a.map(|i| i.into()));
4349         ControlFlow::Continue(())
4350     }
4351 
4352     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4353     fn vwidenlow16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4354         let a = *self.state[src].get_i16x8().first_chunk().unwrap();
4355         self.state[dst].set_i32x4(a.map(|i| i.into()));
4356         ControlFlow::Continue(())
4357     }
4358 
4359     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4360     fn vwidenlow16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4361         let a = *self.state[src].get_u16x8().first_chunk().unwrap();
4362         self.state[dst].set_u32x4(a.map(|i| i.into()));
4363         ControlFlow::Continue(())
4364     }
4365 
4366     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4367     fn vwidenlow32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4368         let a = *self.state[src].get_i32x4().first_chunk().unwrap();
4369         self.state[dst].set_i64x2(a.map(|i| i.into()));
4370         ControlFlow::Continue(())
4371     }
4372 
4373     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4374     fn vwidenlow32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4375         let a = *self.state[src].get_u32x4().first_chunk().unwrap();
4376         self.state[dst].set_u64x2(a.map(|i| i.into()));
4377         ControlFlow::Continue(())
4378     }
4379 
4380     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4381     fn vwidenhigh8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4382         let a = *self.state[src].get_i8x16().last_chunk().unwrap();
4383         self.state[dst].set_i16x8(a.map(|i| i.into()));
4384         ControlFlow::Continue(())
4385     }
4386 
4387     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4388     fn vwidenhigh8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4389         let a = *self.state[src].get_u8x16().last_chunk().unwrap();
4390         self.state[dst].set_u16x8(a.map(|i| i.into()));
4391         ControlFlow::Continue(())
4392     }
4393 
4394     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4395     fn vwidenhigh16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4396         let a = *self.state[src].get_i16x8().last_chunk().unwrap();
4397         self.state[dst].set_i32x4(a.map(|i| i.into()));
4398         ControlFlow::Continue(())
4399     }
4400 
4401     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4402     fn vwidenhigh16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4403         let a = *self.state[src].get_u16x8().last_chunk().unwrap();
4404         self.state[dst].set_u32x4(a.map(|i| i.into()));
4405         ControlFlow::Continue(())
4406     }
4407 
4408     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4409     fn vwidenhigh32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4410         let a = *self.state[src].get_i32x4().last_chunk().unwrap();
4411         self.state[dst].set_i64x2(a.map(|i| i.into()));
4412         ControlFlow::Continue(())
4413     }
4414 
4415     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4416     fn vwidenhigh32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4417         let a = *self.state[src].get_u32x4().last_chunk().unwrap();
4418         self.state[dst].set_u64x2(a.map(|i| i.into()));
4419         ControlFlow::Continue(())
4420     }
4421 
4422     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4423     fn vnarrow16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4424         let a = self.state[operands.src1].get_i16x8();
4425         let b = self.state[operands.src2].get_i16x8();
4426         let mut result = [0; 16];
4427         for (i, d) in a.iter().chain(&b).zip(&mut result) {
4428             *d = (*i)
4429                 .try_into()
4430                 .unwrap_or(if *i < 0 { i8::MIN } else { i8::MAX });
4431         }
4432         self.state[operands.dst].set_i8x16(result);
4433         ControlFlow::Continue(())
4434     }
4435 
4436     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4437     fn vnarrow16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4438         let a = self.state[operands.src1].get_i16x8();
4439         let b = self.state[operands.src2].get_i16x8();
4440         let mut result = [0; 16];
4441         for (i, d) in a.iter().chain(&b).zip(&mut result) {
4442             *d = (*i)
4443                 .try_into()
4444                 .unwrap_or(if *i < 0 { u8::MIN } else { u8::MAX });
4445         }
4446         self.state[operands.dst].set_u8x16(result);
4447         ControlFlow::Continue(())
4448     }
4449 
4450     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4451     fn vnarrow32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4452         let a = self.state[operands.src1].get_i32x4();
4453         let b = self.state[operands.src2].get_i32x4();
4454         let mut result = [0; 8];
4455         for (i, d) in a.iter().chain(&b).zip(&mut result) {
4456             *d = (*i)
4457                 .try_into()
4458                 .unwrap_or(if *i < 0 { i16::MIN } else { i16::MAX });
4459         }
4460         self.state[operands.dst].set_i16x8(result);
4461         ControlFlow::Continue(())
4462     }
4463 
4464     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4465     fn vnarrow32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4466         let a = self.state[operands.src1].get_i32x4();
4467         let b = self.state[operands.src2].get_i32x4();
4468         let mut result = [0; 8];
4469         for (i, d) in a.iter().chain(&b).zip(&mut result) {
4470             *d = (*i)
4471                 .try_into()
4472                 .unwrap_or(if *i < 0 { u16::MIN } else { u16::MAX });
4473         }
4474         self.state[operands.dst].set_u16x8(result);
4475         ControlFlow::Continue(())
4476     }
4477 
4478     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4479     fn vnarrow64x2_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4480         let a = self.state[operands.src1].get_i64x2();
4481         let b = self.state[operands.src2].get_i64x2();
4482         let mut result = [0; 4];
4483         for (i, d) in a.iter().chain(&b).zip(&mut result) {
4484             *d = (*i)
4485                 .try_into()
4486                 .unwrap_or(if *i < 0 { i32::MIN } else { i32::MAX });
4487         }
4488         self.state[operands.dst].set_i32x4(result);
4489         ControlFlow::Continue(())
4490     }
4491 
4492     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4493     fn vnarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4494         let a = self.state[operands.src1].get_i64x2();
4495         let b = self.state[operands.src2].get_i64x2();
4496         let mut result = [0; 4];
4497         for (i, d) in a.iter().chain(&b).zip(&mut result) {
4498             *d = (*i)
4499                 .try_into()
4500                 .unwrap_or(if *i < 0 { u32::MIN } else { u32::MAX });
4501         }
4502         self.state[operands.dst].set_u32x4(result);
4503         ControlFlow::Continue(())
4504     }
4505 
4506     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4507     fn vunarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4508         let a = self.state[operands.src1].get_u64x2();
4509         let b = self.state[operands.src2].get_u64x2();
4510         let mut result = [0; 4];
4511         for (i, d) in a.iter().chain(&b).zip(&mut result) {
4512             *d = (*i).try_into().unwrap_or(u32::MAX);
4513         }
4514         self.state[operands.dst].set_u32x4(result);
4515         ControlFlow::Continue(())
4516     }
4517 
4518     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4519     fn vfpromotelow(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4520         let a = self.state[src].get_f32x4();
4521         self.state[dst].set_f64x2([a[0].into(), a[1].into()]);
4522         ControlFlow::Continue(())
4523     }
4524 
4525     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4526     fn vfdemote(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4527         let a = self.state[src].get_f64x2();
4528         self.state[dst].set_f32x4([a[0] as f32, a[1] as f32, 0.0, 0.0]);
4529         ControlFlow::Continue(())
4530     }
4531 
4532     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4533     fn vsubi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4534         let mut a = self.state[operands.src1].get_i8x16();
4535         let b = self.state[operands.src2].get_i8x16();
4536         for (a, b) in a.iter_mut().zip(b) {
4537             *a = a.wrapping_sub(b);
4538         }
4539         self.state[operands.dst].set_i8x16(a);
4540         ControlFlow::Continue(())
4541     }
4542 
4543     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4544     fn vsubi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4545         let mut a = self.state[operands.src1].get_i16x8();
4546         let b = self.state[operands.src2].get_i16x8();
4547         for (a, b) in a.iter_mut().zip(b) {
4548             *a = a.wrapping_sub(b);
4549         }
4550         self.state[operands.dst].set_i16x8(a);
4551         ControlFlow::Continue(())
4552     }
4553 
4554     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4555     fn vsubi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4556         let mut a = self.state[operands.src1].get_i32x4();
4557         let b = self.state[operands.src2].get_i32x4();
4558         for (a, b) in a.iter_mut().zip(b) {
4559             *a = a.wrapping_sub(b);
4560         }
4561         self.state[operands.dst].set_i32x4(a);
4562         ControlFlow::Continue(())
4563     }
4564 
4565     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4566     fn vsubi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4567         let mut a = self.state[operands.src1].get_i64x2();
4568         let b = self.state[operands.src2].get_i64x2();
4569         for (a, b) in a.iter_mut().zip(b) {
4570             *a = a.wrapping_sub(b);
4571         }
4572         self.state[operands.dst].set_i64x2(a);
4573         ControlFlow::Continue(())
4574     }
4575 
4576     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4577     fn vsubi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4578         let mut a = self.state[operands.src1].get_i8x16();
4579         let b = self.state[operands.src2].get_i8x16();
4580         for (a, b) in a.iter_mut().zip(b) {
4581             *a = a.saturating_sub(b);
4582         }
4583         self.state[operands.dst].set_i8x16(a);
4584         ControlFlow::Continue(())
4585     }
4586 
4587     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4588     fn vsubu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4589         let mut a = self.state[operands.src1].get_u8x16();
4590         let b = self.state[operands.src2].get_u8x16();
4591         for (a, b) in a.iter_mut().zip(b) {
4592             *a = a.saturating_sub(b);
4593         }
4594         self.state[operands.dst].set_u8x16(a);
4595         ControlFlow::Continue(())
4596     }
4597 
4598     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4599     fn vsubi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4600         let mut a = self.state[operands.src1].get_i16x8();
4601         let b = self.state[operands.src2].get_i16x8();
4602         for (a, b) in a.iter_mut().zip(b) {
4603             *a = a.saturating_sub(b);
4604         }
4605         self.state[operands.dst].set_i16x8(a);
4606         ControlFlow::Continue(())
4607     }
4608 
4609     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4610     fn vsubu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4611         let mut a = self.state[operands.src1].get_u16x8();
4612         let b = self.state[operands.src2].get_u16x8();
4613         for (a, b) in a.iter_mut().zip(b) {
4614             *a = a.saturating_sub(b);
4615         }
4616         self.state[operands.dst].set_u16x8(a);
4617         ControlFlow::Continue(())
4618     }
4619 
4620     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4621     fn vsubf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4622         let mut a = self.state[operands.src1].get_f64x2();
4623         let b = self.state[operands.src2].get_f64x2();
4624         for (a, b) in a.iter_mut().zip(b) {
4625             *a = *a - b;
4626         }
4627         self.state[operands.dst].set_f64x2(a);
4628         ControlFlow::Continue(())
4629     }
4630 
4631     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4632     fn vmuli8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4633         let mut a = self.state[operands.src1].get_i8x16();
4634         let b = self.state[operands.src2].get_i8x16();
4635         for (a, b) in a.iter_mut().zip(b) {
4636             *a = a.wrapping_mul(b);
4637         }
4638         self.state[operands.dst].set_i8x16(a);
4639         ControlFlow::Continue(())
4640     }
4641 
4642     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4643     fn vmuli16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4644         let mut a = self.state[operands.src1].get_i16x8();
4645         let b = self.state[operands.src2].get_i16x8();
4646         for (a, b) in a.iter_mut().zip(b) {
4647             *a = a.wrapping_mul(b);
4648         }
4649         self.state[operands.dst].set_i16x8(a);
4650         ControlFlow::Continue(())
4651     }
4652 
4653     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4654     fn vmuli32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4655         let mut a = self.state[operands.src1].get_i32x4();
4656         let b = self.state[operands.src2].get_i32x4();
4657         for (a, b) in a.iter_mut().zip(b) {
4658             *a = a.wrapping_mul(b);
4659         }
4660         self.state[operands.dst].set_i32x4(a);
4661         ControlFlow::Continue(())
4662     }
4663 
4664     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4665     fn vmuli64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4666         let mut a = self.state[operands.src1].get_i64x2();
4667         let b = self.state[operands.src2].get_i64x2();
4668         for (a, b) in a.iter_mut().zip(b) {
4669             *a = a.wrapping_mul(b);
4670         }
4671         self.state[operands.dst].set_i64x2(a);
4672         ControlFlow::Continue(())
4673     }
4674 
4675     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4676     fn vmulf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4677         let mut a = self.state[operands.src1].get_f64x2();
4678         let b = self.state[operands.src2].get_f64x2();
4679         for (a, b) in a.iter_mut().zip(b) {
4680             *a = *a * b;
4681         }
4682         self.state[operands.dst].set_f64x2(a);
4683         ControlFlow::Continue(())
4684     }
4685 
4686     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4687     fn vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4688         let mut a = self.state[operands.src1].get_i16x8();
4689         let b = self.state[operands.src2].get_i16x8();
4690         const MIN: i32 = i16::MIN as i32;
4691         const MAX: i32 = i16::MAX as i32;
4692         for (a, b) in a.iter_mut().zip(b) {
4693             let r = (i32::from(*a) * i32::from(b) + (1 << 14)) >> 15;
4694             *a = r.clamp(MIN, MAX) as i16;
4695         }
4696         self.state[operands.dst].set_i16x8(a);
4697         ControlFlow::Continue(())
4698     }
4699 
4700     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4701     fn vpopcnt8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4702         let a = self.state[src].get_u8x16();
4703         self.state[dst].set_u8x16(a.map(|i| i.count_ones() as u8));
4704         ControlFlow::Continue(())
4705     }
4706 
4707     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4708     fn xextractv8x16(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4709         let a = unsafe { *self.state[src].get_u8x16().get_unchecked(usize::from(lane)) };
4710         self.state[dst].set_u32(u32::from(a));
4711         ControlFlow::Continue(())
4712     }
4713 
4714     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4715     fn xextractv16x8(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4716         let a = unsafe { *self.state[src].get_u16x8().get_unchecked(usize::from(lane)) };
4717         self.state[dst].set_u32(u32::from(a));
4718         ControlFlow::Continue(())
4719     }
4720 
4721     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4722     fn xextractv32x4(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4723         let a = unsafe { *self.state[src].get_u32x4().get_unchecked(usize::from(lane)) };
4724         self.state[dst].set_u32(a);
4725         ControlFlow::Continue(())
4726     }
4727 
4728     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4729     fn xextractv64x2(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4730         let a = unsafe { *self.state[src].get_u64x2().get_unchecked(usize::from(lane)) };
4731         self.state[dst].set_u64(a);
4732         ControlFlow::Continue(())
4733     }
4734 
4735     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4736     fn fextractv32x4(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4737         let a = unsafe { *self.state[src].get_f32x4().get_unchecked(usize::from(lane)) };
4738         self.state[dst].set_f32(a);
4739         ControlFlow::Continue(())
4740     }
4741 
4742     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4743     fn fextractv64x2(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4744         let a = unsafe { *self.state[src].get_f64x2().get_unchecked(usize::from(lane)) };
4745         self.state[dst].set_f64(a);
4746         ControlFlow::Continue(())
4747     }
4748 
4749     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4750     fn vinsertx8(
4751         &mut self,
4752         operands: BinaryOperands<VReg, VReg, XReg>,
4753         lane: u8,
4754     ) -> ControlFlow<Done> {
4755         let mut a = self.state[operands.src1].get_u8x16();
4756         let b = self.state[operands.src2].get_u32() as u8;
4757         unsafe {
4758             *a.get_unchecked_mut(usize::from(lane)) = b;
4759         }
4760         self.state[operands.dst].set_u8x16(a);
4761         ControlFlow::Continue(())
4762     }
4763 
4764     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4765     fn vinsertx16(
4766         &mut self,
4767         operands: BinaryOperands<VReg, VReg, XReg>,
4768         lane: u8,
4769     ) -> ControlFlow<Done> {
4770         let mut a = self.state[operands.src1].get_u16x8();
4771         let b = self.state[operands.src2].get_u32() as u16;
4772         unsafe {
4773             *a.get_unchecked_mut(usize::from(lane)) = b;
4774         }
4775         self.state[operands.dst].set_u16x8(a);
4776         ControlFlow::Continue(())
4777     }
4778 
4779     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4780     fn vinsertx32(
4781         &mut self,
4782         operands: BinaryOperands<VReg, VReg, XReg>,
4783         lane: u8,
4784     ) -> ControlFlow<Done> {
4785         let mut a = self.state[operands.src1].get_u32x4();
4786         let b = self.state[operands.src2].get_u32();
4787         unsafe {
4788             *a.get_unchecked_mut(usize::from(lane)) = b;
4789         }
4790         self.state[operands.dst].set_u32x4(a);
4791         ControlFlow::Continue(())
4792     }
4793 
4794     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4795     fn vinsertx64(
4796         &mut self,
4797         operands: BinaryOperands<VReg, VReg, XReg>,
4798         lane: u8,
4799     ) -> ControlFlow<Done> {
4800         let mut a = self.state[operands.src1].get_u64x2();
4801         let b = self.state[operands.src2].get_u64();
4802         unsafe {
4803             *a.get_unchecked_mut(usize::from(lane)) = b;
4804         }
4805         self.state[operands.dst].set_u64x2(a);
4806         ControlFlow::Continue(())
4807     }
4808 
4809     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4810     fn vinsertf32(
4811         &mut self,
4812         operands: BinaryOperands<VReg, VReg, FReg>,
4813         lane: u8,
4814     ) -> ControlFlow<Done> {
4815         let mut a = self.state[operands.src1].get_f32x4();
4816         let b = self.state[operands.src2].get_f32();
4817         unsafe {
4818             *a.get_unchecked_mut(usize::from(lane)) = b;
4819         }
4820         self.state[operands.dst].set_f32x4(a);
4821         ControlFlow::Continue(())
4822     }
4823 
4824     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4825     fn vinsertf64(
4826         &mut self,
4827         operands: BinaryOperands<VReg, VReg, FReg>,
4828         lane: u8,
4829     ) -> ControlFlow<Done> {
4830         let mut a = self.state[operands.src1].get_f64x2();
4831         let b = self.state[operands.src2].get_f64();
4832         unsafe {
4833             *a.get_unchecked_mut(usize::from(lane)) = b;
4834         }
4835         self.state[operands.dst].set_f64x2(a);
4836         ControlFlow::Continue(())
4837     }
4838 
4839     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4840     fn veq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4841         let a = self.state[operands.src1].get_u8x16();
4842         let b = self.state[operands.src2].get_u8x16();
4843         let mut c = [0; 16];
4844         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4845             *c = if a == b { u8::MAX } else { 0 };
4846         }
4847         self.state[operands.dst].set_u8x16(c);
4848         ControlFlow::Continue(())
4849     }
4850 
4851     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4852     fn vneq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4853         let a = self.state[operands.src1].get_u8x16();
4854         let b = self.state[operands.src2].get_u8x16();
4855         let mut c = [0; 16];
4856         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4857             *c = if a != b { u8::MAX } else { 0 };
4858         }
4859         self.state[operands.dst].set_u8x16(c);
4860         ControlFlow::Continue(())
4861     }
4862 
4863     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4864     fn vslt8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4865         let a = self.state[operands.src1].get_i8x16();
4866         let b = self.state[operands.src2].get_i8x16();
4867         let mut c = [0; 16];
4868         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4869             *c = if a < b { u8::MAX } else { 0 };
4870         }
4871         self.state[operands.dst].set_u8x16(c);
4872         ControlFlow::Continue(())
4873     }
4874 
4875     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4876     fn vslteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4877         let a = self.state[operands.src1].get_i8x16();
4878         let b = self.state[operands.src2].get_i8x16();
4879         let mut c = [0; 16];
4880         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4881             *c = if a <= b { u8::MAX } else { 0 };
4882         }
4883         self.state[operands.dst].set_u8x16(c);
4884         ControlFlow::Continue(())
4885     }
4886 
4887     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4888     fn vult8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4889         let a = self.state[operands.src1].get_u8x16();
4890         let b = self.state[operands.src2].get_u8x16();
4891         let mut c = [0; 16];
4892         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4893             *c = if a < b { u8::MAX } else { 0 };
4894         }
4895         self.state[operands.dst].set_u8x16(c);
4896         ControlFlow::Continue(())
4897     }
4898 
4899     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4900     fn vulteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4901         let a = self.state[operands.src1].get_u8x16();
4902         let b = self.state[operands.src2].get_u8x16();
4903         let mut c = [0; 16];
4904         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4905             *c = if a <= b { u8::MAX } else { 0 };
4906         }
4907         self.state[operands.dst].set_u8x16(c);
4908         ControlFlow::Continue(())
4909     }
4910 
4911     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4912     fn veq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4913         let a = self.state[operands.src1].get_u16x8();
4914         let b = self.state[operands.src2].get_u16x8();
4915         let mut c = [0; 8];
4916         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4917             *c = if a == b { u16::MAX } else { 0 };
4918         }
4919         self.state[operands.dst].set_u16x8(c);
4920         ControlFlow::Continue(())
4921     }
4922 
4923     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4924     fn vneq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4925         let a = self.state[operands.src1].get_u16x8();
4926         let b = self.state[operands.src2].get_u16x8();
4927         let mut c = [0; 8];
4928         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4929             *c = if a != b { u16::MAX } else { 0 };
4930         }
4931         self.state[operands.dst].set_u16x8(c);
4932         ControlFlow::Continue(())
4933     }
4934 
4935     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4936     fn vslt16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4937         let a = self.state[operands.src1].get_i16x8();
4938         let b = self.state[operands.src2].get_i16x8();
4939         let mut c = [0; 8];
4940         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4941             *c = if a < b { u16::MAX } else { 0 };
4942         }
4943         self.state[operands.dst].set_u16x8(c);
4944         ControlFlow::Continue(())
4945     }
4946 
4947     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4948     fn vslteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4949         let a = self.state[operands.src1].get_i16x8();
4950         let b = self.state[operands.src2].get_i16x8();
4951         let mut c = [0; 8];
4952         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4953             *c = if a <= b { u16::MAX } else { 0 };
4954         }
4955         self.state[operands.dst].set_u16x8(c);
4956         ControlFlow::Continue(())
4957     }
4958 
4959     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4960     fn vult16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4961         let a = self.state[operands.src1].get_u16x8();
4962         let b = self.state[operands.src2].get_u16x8();
4963         let mut c = [0; 8];
4964         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4965             *c = if a < b { u16::MAX } else { 0 };
4966         }
4967         self.state[operands.dst].set_u16x8(c);
4968         ControlFlow::Continue(())
4969     }
4970 
4971     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4972     fn vulteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4973         let a = self.state[operands.src1].get_u16x8();
4974         let b = self.state[operands.src2].get_u16x8();
4975         let mut c = [0; 8];
4976         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4977             *c = if a <= b { u16::MAX } else { 0 };
4978         }
4979         self.state[operands.dst].set_u16x8(c);
4980         ControlFlow::Continue(())
4981     }
4982 
4983     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4984     fn veq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4985         let a = self.state[operands.src1].get_u32x4();
4986         let b = self.state[operands.src2].get_u32x4();
4987         let mut c = [0; 4];
4988         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4989             *c = if a == b { u32::MAX } else { 0 };
4990         }
4991         self.state[operands.dst].set_u32x4(c);
4992         ControlFlow::Continue(())
4993     }
4994 
4995     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4996     fn vneq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4997         let a = self.state[operands.src1].get_u32x4();
4998         let b = self.state[operands.src2].get_u32x4();
4999         let mut c = [0; 4];
5000         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5001             *c = if a != b { u32::MAX } else { 0 };
5002         }
5003         self.state[operands.dst].set_u32x4(c);
5004         ControlFlow::Continue(())
5005     }
5006 
5007     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5008     fn vslt32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5009         let a = self.state[operands.src1].get_i32x4();
5010         let b = self.state[operands.src2].get_i32x4();
5011         let mut c = [0; 4];
5012         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5013             *c = if a < b { u32::MAX } else { 0 };
5014         }
5015         self.state[operands.dst].set_u32x4(c);
5016         ControlFlow::Continue(())
5017     }
5018 
5019     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5020     fn vslteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5021         let a = self.state[operands.src1].get_i32x4();
5022         let b = self.state[operands.src2].get_i32x4();
5023         let mut c = [0; 4];
5024         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5025             *c = if a <= b { u32::MAX } else { 0 };
5026         }
5027         self.state[operands.dst].set_u32x4(c);
5028         ControlFlow::Continue(())
5029     }
5030 
5031     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5032     fn vult32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5033         let a = self.state[operands.src1].get_u32x4();
5034         let b = self.state[operands.src2].get_u32x4();
5035         let mut c = [0; 4];
5036         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5037             *c = if a < b { u32::MAX } else { 0 };
5038         }
5039         self.state[operands.dst].set_u32x4(c);
5040         ControlFlow::Continue(())
5041     }
5042 
5043     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5044     fn vulteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5045         let a = self.state[operands.src1].get_u32x4();
5046         let b = self.state[operands.src2].get_u32x4();
5047         let mut c = [0; 4];
5048         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5049             *c = if a <= b { u32::MAX } else { 0 };
5050         }
5051         self.state[operands.dst].set_u32x4(c);
5052         ControlFlow::Continue(())
5053     }
5054 
5055     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5056     fn veq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5057         let a = self.state[operands.src1].get_u64x2();
5058         let b = self.state[operands.src2].get_u64x2();
5059         let mut c = [0; 2];
5060         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5061             *c = if a == b { u64::MAX } else { 0 };
5062         }
5063         self.state[operands.dst].set_u64x2(c);
5064         ControlFlow::Continue(())
5065     }
5066 
5067     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5068     fn vneq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5069         let a = self.state[operands.src1].get_u64x2();
5070         let b = self.state[operands.src2].get_u64x2();
5071         let mut c = [0; 2];
5072         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5073             *c = if a != b { u64::MAX } else { 0 };
5074         }
5075         self.state[operands.dst].set_u64x2(c);
5076         ControlFlow::Continue(())
5077     }
5078 
5079     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5080     fn vslt64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5081         let a = self.state[operands.src1].get_i64x2();
5082         let b = self.state[operands.src2].get_i64x2();
5083         let mut c = [0; 2];
5084         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5085             *c = if a < b { u64::MAX } else { 0 };
5086         }
5087         self.state[operands.dst].set_u64x2(c);
5088         ControlFlow::Continue(())
5089     }
5090 
5091     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5092     fn vslteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5093         let a = self.state[operands.src1].get_i64x2();
5094         let b = self.state[operands.src2].get_i64x2();
5095         let mut c = [0; 2];
5096         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5097             *c = if a <= b { u64::MAX } else { 0 };
5098         }
5099         self.state[operands.dst].set_u64x2(c);
5100         ControlFlow::Continue(())
5101     }
5102 
5103     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5104     fn vult64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5105         let a = self.state[operands.src1].get_u64x2();
5106         let b = self.state[operands.src2].get_u64x2();
5107         let mut c = [0; 2];
5108         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5109             *c = if a < b { u64::MAX } else { 0 };
5110         }
5111         self.state[operands.dst].set_u64x2(c);
5112         ControlFlow::Continue(())
5113     }
5114 
5115     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5116     fn vulteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5117         let a = self.state[operands.src1].get_u64x2();
5118         let b = self.state[operands.src2].get_u64x2();
5119         let mut c = [0; 2];
5120         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5121             *c = if a <= b { u64::MAX } else { 0 };
5122         }
5123         self.state[operands.dst].set_u64x2(c);
5124         ControlFlow::Continue(())
5125     }
5126 
5127     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5128     fn vneg8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5129         let a = self.state[src].get_i8x16();
5130         self.state[dst].set_i8x16(a.map(|i| i.wrapping_neg()));
5131         ControlFlow::Continue(())
5132     }
5133 
5134     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5135     fn vneg16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5136         let a = self.state[src].get_i16x8();
5137         self.state[dst].set_i16x8(a.map(|i| i.wrapping_neg()));
5138         ControlFlow::Continue(())
5139     }
5140 
5141     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5142     fn vneg32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5143         let a = self.state[src].get_i32x4();
5144         self.state[dst].set_i32x4(a.map(|i| i.wrapping_neg()));
5145         ControlFlow::Continue(())
5146     }
5147 
5148     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5149     fn vneg64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5150         let a = self.state[src].get_i64x2();
5151         self.state[dst].set_i64x2(a.map(|i| i.wrapping_neg()));
5152         ControlFlow::Continue(())
5153     }
5154 
5155     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5156     fn vnegf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5157         let a = self.state[src].get_f64x2();
5158         self.state[dst].set_f64x2(a.map(|i| -i));
5159         ControlFlow::Continue(())
5160     }
5161 
5162     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5163     fn vmin8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5164         let mut a = self.state[operands.src1].get_i8x16();
5165         let b = self.state[operands.src2].get_i8x16();
5166         for (a, b) in a.iter_mut().zip(&b) {
5167             *a = (*a).min(*b);
5168         }
5169         self.state[operands.dst].set_i8x16(a);
5170         ControlFlow::Continue(())
5171     }
5172 
5173     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5174     fn vmin8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5175         let mut a = self.state[operands.src1].get_u8x16();
5176         let b = self.state[operands.src2].get_u8x16();
5177         for (a, b) in a.iter_mut().zip(&b) {
5178             *a = (*a).min(*b);
5179         }
5180         self.state[operands.dst].set_u8x16(a);
5181         ControlFlow::Continue(())
5182     }
5183 
5184     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5185     fn vmin16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5186         let mut a = self.state[operands.src1].get_i16x8();
5187         let b = self.state[operands.src2].get_i16x8();
5188         for (a, b) in a.iter_mut().zip(&b) {
5189             *a = (*a).min(*b);
5190         }
5191         self.state[operands.dst].set_i16x8(a);
5192         ControlFlow::Continue(())
5193     }
5194 
5195     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5196     fn vmin16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5197         let mut a = self.state[operands.src1].get_u16x8();
5198         let b = self.state[operands.src2].get_u16x8();
5199         for (a, b) in a.iter_mut().zip(&b) {
5200             *a = (*a).min(*b);
5201         }
5202         self.state[operands.dst].set_u16x8(a);
5203         ControlFlow::Continue(())
5204     }
5205 
5206     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5207     fn vmin32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5208         let mut a = self.state[operands.src1].get_i32x4();
5209         let b = self.state[operands.src2].get_i32x4();
5210         for (a, b) in a.iter_mut().zip(&b) {
5211             *a = (*a).min(*b);
5212         }
5213         self.state[operands.dst].set_i32x4(a);
5214         ControlFlow::Continue(())
5215     }
5216 
5217     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5218     fn vmin32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5219         let mut a = self.state[operands.src1].get_u32x4();
5220         let b = self.state[operands.src2].get_u32x4();
5221         for (a, b) in a.iter_mut().zip(&b) {
5222             *a = (*a).min(*b);
5223         }
5224         self.state[operands.dst].set_u32x4(a);
5225         ControlFlow::Continue(())
5226     }
5227 
5228     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5229     fn vmax8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5230         let mut a = self.state[operands.src1].get_i8x16();
5231         let b = self.state[operands.src2].get_i8x16();
5232         for (a, b) in a.iter_mut().zip(&b) {
5233             *a = (*a).max(*b);
5234         }
5235         self.state[operands.dst].set_i8x16(a);
5236         ControlFlow::Continue(())
5237     }
5238 
5239     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5240     fn vmax8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5241         let mut a = self.state[operands.src1].get_u8x16();
5242         let b = self.state[operands.src2].get_u8x16();
5243         for (a, b) in a.iter_mut().zip(&b) {
5244             *a = (*a).max(*b);
5245         }
5246         self.state[operands.dst].set_u8x16(a);
5247         ControlFlow::Continue(())
5248     }
5249 
5250     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5251     fn vmax16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5252         let mut a = self.state[operands.src1].get_i16x8();
5253         let b = self.state[operands.src2].get_i16x8();
5254         for (a, b) in a.iter_mut().zip(&b) {
5255             *a = (*a).max(*b);
5256         }
5257         self.state[operands.dst].set_i16x8(a);
5258         ControlFlow::Continue(())
5259     }
5260 
5261     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5262     fn vmax16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5263         let mut a = self.state[operands.src1].get_u16x8();
5264         let b = self.state[operands.src2].get_u16x8();
5265         for (a, b) in a.iter_mut().zip(&b) {
5266             *a = (*a).max(*b);
5267         }
5268         self.state[operands.dst].set_u16x8(a);
5269         ControlFlow::Continue(())
5270     }
5271 
5272     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5273     fn vmax32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5274         let mut a = self.state[operands.src1].get_i32x4();
5275         let b = self.state[operands.src2].get_i32x4();
5276         for (a, b) in a.iter_mut().zip(&b) {
5277             *a = (*a).max(*b);
5278         }
5279         self.state[operands.dst].set_i32x4(a);
5280         ControlFlow::Continue(())
5281     }
5282 
5283     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5284     fn vmax32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5285         let mut a = self.state[operands.src1].get_u32x4();
5286         let b = self.state[operands.src2].get_u32x4();
5287         for (a, b) in a.iter_mut().zip(&b) {
5288             *a = (*a).max(*b);
5289         }
5290         self.state[operands.dst].set_u32x4(a);
5291         ControlFlow::Continue(())
5292     }
5293 
5294     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5295     fn vabs8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5296         let a = self.state[src].get_i8x16();
5297         self.state[dst].set_i8x16(a.map(|i| i.wrapping_abs()));
5298         ControlFlow::Continue(())
5299     }
5300 
5301     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5302     fn vabs16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5303         let a = self.state[src].get_i16x8();
5304         self.state[dst].set_i16x8(a.map(|i| i.wrapping_abs()));
5305         ControlFlow::Continue(())
5306     }
5307 
5308     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5309     fn vabs32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5310         let a = self.state[src].get_i32x4();
5311         self.state[dst].set_i32x4(a.map(|i| i.wrapping_abs()));
5312         ControlFlow::Continue(())
5313     }
5314 
5315     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5316     fn vabs64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5317         let a = self.state[src].get_i64x2();
5318         self.state[dst].set_i64x2(a.map(|i| i.wrapping_abs()));
5319         ControlFlow::Continue(())
5320     }
5321 
5322     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5323     fn vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5324         let a = self.state[src].get_f32x4();
5325         self.state[dst].set_f32x4(a.map(|i| i.wasm_abs()));
5326         ControlFlow::Continue(())
5327     }
5328 
5329     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5330     fn vabsf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5331         let a = self.state[src].get_f64x2();
5332         self.state[dst].set_f64x2(a.map(|i| i.wasm_abs()));
5333         ControlFlow::Continue(())
5334     }
5335 
5336     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5337     fn vmaximumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5338         let mut a = self.state[operands.src1].get_f32x4();
5339         let b = self.state[operands.src2].get_f32x4();
5340         for (a, b) in a.iter_mut().zip(&b) {
5341             *a = a.wasm_maximum(*b);
5342         }
5343         self.state[operands.dst].set_f32x4(a);
5344         ControlFlow::Continue(())
5345     }
5346 
5347     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5348     fn vmaximumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5349         let mut a = self.state[operands.src1].get_f64x2();
5350         let b = self.state[operands.src2].get_f64x2();
5351         for (a, b) in a.iter_mut().zip(&b) {
5352             *a = a.wasm_maximum(*b);
5353         }
5354         self.state[operands.dst].set_f64x2(a);
5355         ControlFlow::Continue(())
5356     }
5357 
5358     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5359     fn vminimumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5360         let mut a = self.state[operands.src1].get_f32x4();
5361         let b = self.state[operands.src2].get_f32x4();
5362         for (a, b) in a.iter_mut().zip(&b) {
5363             *a = a.wasm_minimum(*b);
5364         }
5365         self.state[operands.dst].set_f32x4(a);
5366         ControlFlow::Continue(())
5367     }
5368 
5369     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5370     fn vminimumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5371         let mut a = self.state[operands.src1].get_f64x2();
5372         let b = self.state[operands.src2].get_f64x2();
5373         for (a, b) in a.iter_mut().zip(&b) {
5374             *a = a.wasm_minimum(*b);
5375         }
5376         self.state[operands.dst].set_f64x2(a);
5377         ControlFlow::Continue(())
5378     }
5379 
5380     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5381     fn vshuffle(&mut self, dst: VReg, src1: VReg, src2: VReg, mask: u128) -> ControlFlow<Done> {
5382         let a = self.state[src1].get_u8x16();
5383         let b = self.state[src2].get_u8x16();
5384         let result = mask.to_le_bytes().map(|m| {
5385             if m < 16 {
5386                 a[m as usize]
5387             } else {
5388                 b[m as usize - 16]
5389             }
5390         });
5391         self.state[dst].set_u8x16(result);
5392         ControlFlow::Continue(())
5393     }
5394 
5395     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5396     fn vswizzlei8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5397         let src1 = self.state[operands.src1].get_i8x16();
5398         let src2 = self.state[operands.src2].get_i8x16();
5399         let mut dst = [0i8; 16];
5400         for (i, &idx) in src2.iter().enumerate() {
5401             if (idx as usize) < 16 {
5402                 dst[i] = src1[idx as usize];
5403             } else {
5404                 dst[i] = 0
5405             }
5406         }
5407         self.state[operands.dst].set_i8x16(dst);
5408         ControlFlow::Continue(())
5409     }
5410 
5411     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5412     fn vavground8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5413         let mut a = self.state[operands.src1].get_u8x16();
5414         let b = self.state[operands.src2].get_u8x16();
5415         for (a, b) in a.iter_mut().zip(&b) {
5416             // use wider precision to avoid overflow
5417             *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u8;
5418         }
5419         self.state[operands.dst].set_u8x16(a);
5420         ControlFlow::Continue(())
5421     }
5422 
5423     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5424     fn vavground16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5425         let mut a = self.state[operands.src1].get_u16x8();
5426         let b = self.state[operands.src2].get_u16x8();
5427         for (a, b) in a.iter_mut().zip(&b) {
5428             // use wider precision to avoid overflow
5429             *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u16;
5430         }
5431         self.state[operands.dst].set_u16x8(a);
5432         ControlFlow::Continue(())
5433     }
5434 
5435     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5436     fn veqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5437         let a = self.state[operands.src1].get_f32x4();
5438         let b = self.state[operands.src2].get_f32x4();
5439         let mut c = [0; 4];
5440         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5441             *c = if a == b { u32::MAX } else { 0 };
5442         }
5443         self.state[operands.dst].set_u32x4(c);
5444         ControlFlow::Continue(())
5445     }
5446 
5447     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5448     fn vneqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5449         let a = self.state[operands.src1].get_f32x4();
5450         let b = self.state[operands.src2].get_f32x4();
5451         let mut c = [0; 4];
5452         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5453             *c = if a != b { u32::MAX } else { 0 };
5454         }
5455         self.state[operands.dst].set_u32x4(c);
5456         ControlFlow::Continue(())
5457     }
5458 
5459     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5460     fn vltf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5461         let a = self.state[operands.src1].get_f32x4();
5462         let b = self.state[operands.src2].get_f32x4();
5463         let mut c = [0; 4];
5464         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5465             *c = if a < b { u32::MAX } else { 0 };
5466         }
5467         self.state[operands.dst].set_u32x4(c);
5468         ControlFlow::Continue(())
5469     }
5470 
5471     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5472     fn vlteqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5473         let a = self.state[operands.src1].get_f32x4();
5474         let b = self.state[operands.src2].get_f32x4();
5475         let mut c = [0; 4];
5476         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5477             *c = if a <= b { u32::MAX } else { 0 };
5478         }
5479         self.state[operands.dst].set_u32x4(c);
5480         ControlFlow::Continue(())
5481     }
5482 
5483     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5484     fn veqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5485         let a = self.state[operands.src1].get_f64x2();
5486         let b = self.state[operands.src2].get_f64x2();
5487         let mut c = [0; 2];
5488         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5489             *c = if a == b { u64::MAX } else { 0 };
5490         }
5491         self.state[operands.dst].set_u64x2(c);
5492         ControlFlow::Continue(())
5493     }
5494 
5495     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5496     fn vneqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5497         let a = self.state[operands.src1].get_f64x2();
5498         let b = self.state[operands.src2].get_f64x2();
5499         let mut c = [0; 2];
5500         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5501             *c = if a != b { u64::MAX } else { 0 };
5502         }
5503         self.state[operands.dst].set_u64x2(c);
5504         ControlFlow::Continue(())
5505     }
5506 
5507     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5508     fn vltf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5509         let a = self.state[operands.src1].get_f64x2();
5510         let b = self.state[operands.src2].get_f64x2();
5511         let mut c = [0; 2];
5512         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5513             *c = if a < b { u64::MAX } else { 0 };
5514         }
5515         self.state[operands.dst].set_u64x2(c);
5516         ControlFlow::Continue(())
5517     }
5518 
5519     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5520     fn vlteqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5521         let a = self.state[operands.src1].get_f64x2();
5522         let b = self.state[operands.src2].get_f64x2();
5523         let mut c = [0; 2];
5524         for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5525             *c = if a <= b { u64::MAX } else { 0 };
5526         }
5527         self.state[operands.dst].set_u64x2(c);
5528         ControlFlow::Continue(())
5529     }
5530 
5531     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5532     fn vfma32x4(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5533         let mut a = self.state[a].get_f32x4();
5534         let b = self.state[b].get_f32x4();
5535         let c = self.state[c].get_f32x4();
5536         for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5537             *a = a.wasm_mul_add(b, c);
5538         }
5539         self.state[dst].set_f32x4(a);
5540         ControlFlow::Continue(())
5541     }
5542 
5543     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5544     fn vfma64x2(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5545         let mut a = self.state[a].get_f64x2();
5546         let b = self.state[b].get_f64x2();
5547         let c = self.state[c].get_f64x2();
5548         for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5549             *a = a.wasm_mul_add(b, c);
5550         }
5551         self.state[dst].set_f64x2(a);
5552         ControlFlow::Continue(())
5553     }
5554 
5555     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5556     fn vselect(
5557         &mut self,
5558         dst: VReg,
5559         cond: XReg,
5560         if_nonzero: VReg,
5561         if_zero: VReg,
5562     ) -> ControlFlow<Done> {
5563         let result = if self.state[cond].get_u32() != 0 {
5564             self.state[if_nonzero]
5565         } else {
5566             self.state[if_zero]
5567         };
5568         self.state[dst] = result;
5569         ControlFlow::Continue(())
5570     }
5571 
5572     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5573     fn xadd128(
5574         &mut self,
5575         dst_lo: XReg,
5576         dst_hi: XReg,
5577         lhs_lo: XReg,
5578         lhs_hi: XReg,
5579         rhs_lo: XReg,
5580         rhs_hi: XReg,
5581     ) -> ControlFlow<Done> {
5582         let lhs = self.get_i128(lhs_lo, lhs_hi);
5583         let rhs = self.get_i128(rhs_lo, rhs_hi);
5584         let result = lhs.wrapping_add(rhs);
5585         self.set_i128(dst_lo, dst_hi, result);
5586         ControlFlow::Continue(())
5587     }
5588 
5589     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5590     fn xsub128(
5591         &mut self,
5592         dst_lo: XReg,
5593         dst_hi: XReg,
5594         lhs_lo: XReg,
5595         lhs_hi: XReg,
5596         rhs_lo: XReg,
5597         rhs_hi: XReg,
5598     ) -> ControlFlow<Done> {
5599         let lhs = self.get_i128(lhs_lo, lhs_hi);
5600         let rhs = self.get_i128(rhs_lo, rhs_hi);
5601         let result = lhs.wrapping_sub(rhs);
5602         self.set_i128(dst_lo, dst_hi, result);
5603         ControlFlow::Continue(())
5604     }
5605 
5606     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5607     fn xwidemul64_s(
5608         &mut self,
5609         dst_lo: XReg,
5610         dst_hi: XReg,
5611         lhs: XReg,
5612         rhs: XReg,
5613     ) -> ControlFlow<Done> {
5614         let lhs = self.state[lhs].get_i64();
5615         let rhs = self.state[rhs].get_i64();
5616         let result = i128::from(lhs).wrapping_mul(i128::from(rhs));
5617         self.set_i128(dst_lo, dst_hi, result);
5618         ControlFlow::Continue(())
5619     }
5620 
5621     #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5622     fn xwidemul64_u(
5623         &mut self,
5624         dst_lo: XReg,
5625         dst_hi: XReg,
5626         lhs: XReg,
5627         rhs: XReg,
5628     ) -> ControlFlow<Done> {
5629         let lhs = self.state[lhs].get_u64();
5630         let rhs = self.state[rhs].get_u64();
5631         let result = u128::from(lhs).wrapping_mul(u128::from(rhs));
5632         self.set_i128(dst_lo, dst_hi, result as i128);
5633         ControlFlow::Continue(())
5634     }
5635 }
5636