xref: /wasmtime-44.0.1/pulley/src/lib.rs (revision fe6f7a40)
1 //! The pulley bytecode for fast interpreters.
2 
3 #![cfg_attr(docsrs, feature(doc_cfg))]
4 #![cfg_attr(pulley_tail_calls, feature(explicit_tail_calls))]
5 #![cfg_attr(pulley_tail_calls, allow(incomplete_features, unstable_features))]
6 #![deny(missing_docs)]
7 #![no_std]
8 
9 #[cfg(feature = "std")]
10 #[macro_use]
11 extern crate std;
12 
13 #[cfg(feature = "decode")]
14 extern crate alloc;
15 
16 /// Calls the given macro with each opcode.
17 ///
18 /// # Instruction Guidelines
19 ///
20 /// We're inventing an instruction set here which naturally brings a whole set
21 /// of design questions. Note that this is explicitly intended to be only ever
22 /// used for Pulley where there are a different set of design constraints than
23 /// other instruction sets (e.g. general-purpose CPU ISAs). Some examples of
24 /// constraints for Pulley are:
25 ///
26 /// * Instructions must be portable to many architectures.
27 /// * The Pulley ISA is mostly target-independent as the compilation target is
28 ///   currently only parameterized on pointer width and endianness.
29 /// * Pulley instructions should be balance of time-to-decode and code size. For
30 ///   example super fancy bit-packing tricks might be tough to decode in
31 ///   software but might be worthwhile if it's quite common and greatly reduces
32 ///   the size of bytecode. There's not a hard-and-fast answer here, but a
33 ///   balance to be made.
34 /// * Many "macro ops" are present to reduce the size of compiled bytecode so
35 ///   there is a wide set of duplicate functionality between opcodes (and this
36 ///   is expected).
37 ///
38 /// Given all this it's also useful to have a set of guidelines used to name and
39 /// develop Pulley instructions. As of the time of this writing it's still
40 /// pretty early days for Pulley so some of these guidelines may change over
41 /// time. Additionally instructions don't necessarily all follow these
42 /// conventions and that may also change over time. With that in mind, here's a
43 /// rough set of guidelines:
44 ///
45 /// * Most instructions are prefixed with `x`, `f`, or `v`, indicating which
46 ///   type of register they're operating on. (e.g. `xadd32` operates on the `x`
47 ///   integer registers and `fadd32` operates on the `f` float registers).
48 ///
49 /// * Most instructions are suffixed or otherwise contain the bit width they're
50 ///   operating on. For example `xadd32` is a 32-bit addition.
51 ///
52 /// * If an instruction operates on signed or unsigned data (such as division
53 ///   and remainder), then the instruction is suffixed with `_s` or `_u`.
54 ///
55 /// * Instructions operate on either 32 or 64-bit parts of a register.
56 ///   Instructions modifying only 32-bits of a register always modify the "low"
57 ///   part of a register and leave the upper part unmodified. This is intended
58 ///   to help 32-bit platforms where if most operations are 32-bit there's no
59 ///   need for extra instructions to sign or zero extend and modify the upper
60 ///   half of the register.
61 ///
62 /// * Binops use `BinaryOperands<T>` for the destination and argument registers.
63 ///
64 /// * Instructions operating on memory contain a few pieces of information:
65 ///
66 ///   ```text
67 ///   xload16le_u32_o32
68 ///   │└─┬┘└┤└┤ └┬┘ └┬┘
69 ///   │  │  │ │  │   ▼
70 ///   │  │  │ │  │   addressing mode
71 ///   │  │  │ │  ▼
72 ///   │  │  │ │  width of register modified + sign-extension (optional)
73 ///   │  │  │ ▼
74 ///   │  │  │ endianness of the operation (le/be)
75 ///   │  │  ▼
76 ///   │  │  bit-width of the operation
77 ///   │  ▼
78 ///   │  what's happening (load/store)
79 ///   ▼
80 ///   register being operated on (x/f/z)
81 ///   ```
82 ///
83 /// More guidelines might get added here over time, and if you have any
84 /// questions feel free to raise them and we can try to add them here as well!
85 #[macro_export]
86 macro_rules! for_each_op {
87     ( $macro:ident ) => {
88         $macro! {
89             /// No-operation.
90             nop = Nop;
91 
92             /// Transfer control the address in the `lr` register.
93             ret = Ret;
94 
95             /// Transfer control to the PC at the given offset and set the `lr`
96             /// register to the PC just after this instruction.
97             ///
98             /// This instruction generally assumes that the Pulley ABI is being
99             /// respected where arguments are in argument registers (starting at
100             /// x0 for integer arguments) and results are in result registers.
101             /// This instruction itself assume that all arguments are already in
102             /// their registers. Subsequent instructions below enable moving
103             /// arguments into the correct registers as part of the same call
104             /// instruction.
105             call = Call { offset: PcRelOffset };
106             /// Like `call`, but also `x0 = arg1`
107             call1 = Call1 { arg1: XReg, offset: PcRelOffset };
108             /// Like `call`, but also `x0, x1 = arg1, arg2`
109             call2 = Call2 { arg1: XReg, arg2: XReg, offset: PcRelOffset };
110             /// Like `call`, but also `x0, x1, x2 = arg1, arg2, arg3`
111             call3 = Call3 { arg1: XReg, arg2: XReg, arg3: XReg, offset: PcRelOffset };
112             /// Like `call`, but also `x0, x1, x2, x3 = arg1, arg2, arg3, arg4`
113             call4 = Call4 { arg1: XReg, arg2: XReg, arg3: XReg, arg4: XReg, offset: PcRelOffset };
114 
115             /// Transfer control to the PC in `reg` and set `lr` to the PC just
116             /// after this instruction.
117             call_indirect = CallIndirect { reg: XReg };
118 
119             /// Unconditionally transfer control to the PC at the given offset.
120             jump = Jump { offset: PcRelOffset };
121 
122             /// Unconditionally transfer control to the PC at specified
123             /// register.
124             xjump = XJump { reg: XReg };
125 
126             /// Conditionally transfer control to the given PC offset if
127             /// `low32(cond)` contains a non-zero value.
128             br_if32 = BrIf { cond: XReg, offset: PcRelOffset };
129 
130             /// Conditionally transfer control to the given PC offset if
131             /// `low32(cond)` contains a zero value.
132             br_if_not32 = BrIfNot { cond: XReg, offset: PcRelOffset };
133 
134             /// Branch if `a == b`.
135             br_if_xeq32 = BrIfXeq32 { a: XReg, b: XReg, offset: PcRelOffset };
136             /// Branch if `a != `b.
137             br_if_xneq32 = BrIfXneq32 { a: XReg, b: XReg, offset: PcRelOffset };
138             /// Branch if signed `a < b`.
139             br_if_xslt32 = BrIfXslt32 { a: XReg, b: XReg, offset: PcRelOffset };
140             /// Branch if signed `a <= b`.
141             br_if_xslteq32 = BrIfXslteq32 { a: XReg, b: XReg, offset: PcRelOffset };
142             /// Branch if unsigned `a < b`.
143             br_if_xult32 = BrIfXult32 { a: XReg, b: XReg, offset: PcRelOffset };
144             /// Branch if unsigned `a <= b`.
145             br_if_xulteq32 = BrIfXulteq32 { a: XReg, b: XReg, offset: PcRelOffset };
146             /// Branch if `a == b`.
147             br_if_xeq64 = BrIfXeq64 { a: XReg, b: XReg, offset: PcRelOffset };
148             /// Branch if `a != `b.
149             br_if_xneq64 = BrIfXneq64 { a: XReg, b: XReg, offset: PcRelOffset };
150             /// Branch if signed `a < b`.
151             br_if_xslt64 = BrIfXslt64 { a: XReg, b: XReg, offset: PcRelOffset };
152             /// Branch if signed `a <= b`.
153             br_if_xslteq64 = BrIfXslteq64 { a: XReg, b: XReg, offset: PcRelOffset };
154             /// Branch if unsigned `a < b`.
155             br_if_xult64 = BrIfXult64 { a: XReg, b: XReg, offset: PcRelOffset };
156             /// Branch if unsigned `a <= b`.
157             br_if_xulteq64 = BrIfXulteq64 { a: XReg, b: XReg, offset: PcRelOffset };
158 
159             /// Branch if `a == b`.
160             br_if_xeq32_i8 = BrIfXeq32I8 { a: XReg, b: i8, offset: PcRelOffset };
161             /// Branch if `a == b`.
162             br_if_xeq32_i32 = BrIfXeq32I32 { a: XReg, b: i32, offset: PcRelOffset };
163             /// Branch if `a != `b.
164             br_if_xneq32_i8 = BrIfXneq32I8 { a: XReg, b: i8, offset: PcRelOffset };
165             /// Branch if `a != `b.
166             br_if_xneq32_i32 = BrIfXneq32I32 { a: XReg, b: i32, offset: PcRelOffset };
167             /// Branch if signed `a < b`.
168             br_if_xslt32_i8 = BrIfXslt32I8 { a: XReg, b: i8, offset: PcRelOffset };
169             /// Branch if signed `a < b`.
170             br_if_xslt32_i32 = BrIfXslt32I32 { a: XReg, b: i32, offset: PcRelOffset };
171             /// Branch if signed `a > b`.
172             br_if_xsgt32_i8 = BrIfXsgt32I8 { a: XReg, b: i8, offset: PcRelOffset };
173             /// Branch if signed `a > b`.
174             br_if_xsgt32_i32 = BrIfXsgt32I32 { a: XReg, b: i32, offset: PcRelOffset };
175             /// Branch if signed `a <= b`.
176             br_if_xslteq32_i8 = BrIfXslteq32I8 { a: XReg, b: i8, offset: PcRelOffset };
177             /// Branch if signed `a <= b`.
178             br_if_xslteq32_i32 = BrIfXslteq32I32 { a: XReg, b: i32, offset: PcRelOffset };
179             /// Branch if signed `a >= b`.
180             br_if_xsgteq32_i8 = BrIfXsgteq32I8 { a: XReg, b: i8, offset: PcRelOffset };
181             /// Branch if signed `a >= b`.
182             br_if_xsgteq32_i32 = BrIfXsgteq32I32 { a: XReg, b: i32, offset: PcRelOffset };
183             /// Branch if unsigned `a < b`.
184             br_if_xult32_u8 = BrIfXult32U8 { a: XReg, b: u8, offset: PcRelOffset };
185             /// Branch if unsigned `a < b`.
186             br_if_xult32_u32 = BrIfXult32U32 { a: XReg, b: u32, offset: PcRelOffset };
187             /// Branch if unsigned `a <= b`.
188             br_if_xulteq32_u8 = BrIfXulteq32U8 { a: XReg, b: u8, offset: PcRelOffset };
189             /// Branch if unsigned `a <= b`.
190             br_if_xulteq32_u32 = BrIfXulteq32U32 { a: XReg, b: u32, offset: PcRelOffset };
191             /// Branch if unsigned `a > b`.
192             br_if_xugt32_u8 = BrIfXugt32U8 { a: XReg, b: u8, offset: PcRelOffset };
193             /// Branch if unsigned `a > b`.
194             br_if_xugt32_u32 = BrIfXugt32U32 { a: XReg, b: u32, offset: PcRelOffset };
195             /// Branch if unsigned `a >= b`.
196             br_if_xugteq32_u8 = BrIfXugteq32U8 { a: XReg, b: u8, offset: PcRelOffset };
197             /// Branch if unsigned `a >= b`.
198             br_if_xugteq32_u32 = BrIfXugteq32U32 { a: XReg, b: u32, offset: PcRelOffset };
199 
200             /// Branch if `a == b`.
201             br_if_xeq64_i8 = BrIfXeq64I8 { a: XReg, b: i8, offset: PcRelOffset };
202             /// Branch if `a == b`.
203             br_if_xeq64_i32 = BrIfXeq64I32 { a: XReg, b: i32, offset: PcRelOffset };
204             /// Branch if `a != `b.
205             br_if_xneq64_i8 = BrIfXneq64I8 { a: XReg, b: i8, offset: PcRelOffset };
206             /// Branch if `a != `b.
207             br_if_xneq64_i32 = BrIfXneq64I32 { a: XReg, b: i32, offset: PcRelOffset };
208             /// Branch if signed `a < b`.
209             br_if_xslt64_i8 = BrIfXslt64I8 { a: XReg, b: i8, offset: PcRelOffset };
210             /// Branch if signed `a < b`.
211             br_if_xslt64_i32 = BrIfXslt64I32 { a: XReg, b: i32, offset: PcRelOffset };
212             /// Branch if signed `a > b`.
213             br_if_xsgt64_i8 = BrIfXsgt64I8 { a: XReg, b: i8, offset: PcRelOffset };
214             /// Branch if signed `a > b`.
215             br_if_xsgt64_i32 = BrIfXsgt64I32 { a: XReg, b: i32, offset: PcRelOffset };
216             /// Branch if signed `a <= b`.
217             br_if_xslteq64_i8 = BrIfXslteq64I8 { a: XReg, b: i8, offset: PcRelOffset };
218             /// Branch if signed `a <= b`.
219             br_if_xslteq64_i32 = BrIfXslteq64I32 { a: XReg, b: i32, offset: PcRelOffset };
220             /// Branch if signed `a >= b`.
221             br_if_xsgteq64_i8 = BrIfXsgteq64I8 { a: XReg, b: i8, offset: PcRelOffset };
222             /// Branch if signed `a >= b`.
223             br_if_xsgteq64_i32 = BrIfXsgteq64I32 { a: XReg, b: i32, offset: PcRelOffset };
224             /// Branch if unsigned `a < b`.
225             br_if_xult64_u8 = BrIfXult64U8 { a: XReg, b: u8, offset: PcRelOffset };
226             /// Branch if unsigned `a < b`.
227             br_if_xult64_u32 = BrIfXult64U32 { a: XReg, b: u32, offset: PcRelOffset };
228             /// Branch if unsigned `a <= b`.
229             br_if_xulteq64_u8 = BrIfXulteq64U8 { a: XReg, b: u8, offset: PcRelOffset };
230             /// Branch if unsigned `a <= b`.
231             br_if_xulteq64_u32 = BrIfXulteq64U32 { a: XReg, b: u32, offset: PcRelOffset };
232             /// Branch if unsigned `a > b`.
233             br_if_xugt64_u8 = BrIfXugt64U8 { a: XReg, b: u8, offset: PcRelOffset };
234             /// Branch if unsigned `a > b`.
235             br_if_xugt64_u32 = BrIfXugt64U32 { a: XReg, b: u32, offset: PcRelOffset };
236             /// Branch if unsigned `a >= b`.
237             br_if_xugteq64_u8 = BrIfXugteq64U8 { a: XReg, b: u8, offset: PcRelOffset };
238             /// Branch if unsigned `a >= b`.
239             br_if_xugteq64_u32 = BrIfXugteq64U32 { a: XReg, b: u32, offset: PcRelOffset };
240 
241             /// Branch to the label indicated by `low32(idx)`.
242             ///
243             /// After this instruction are `amt` instances of `PcRelOffset`
244             /// and the `idx` selects which one will be branched to. The value
245             /// of `idx` is clamped to `amt - 1` (e.g. the last offset is the
246             /// "default" one.
247             br_table32 = BrTable32 { idx: XReg, amt: u32 };
248 
249             /// Move between `x` registers.
250             xmov = Xmov { dst: XReg, src: XReg };
251 
252             /// Set `dst = 0`
253             xzero = Xzero { dst: XReg };
254             /// Set `dst = 1`
255             xone = Xone { dst: XReg };
256             /// Set `dst = sign_extend(imm8)`.
257             xconst8 = Xconst8 { dst: XReg, imm: i8 };
258             /// Set `dst = sign_extend(imm16)`.
259             xconst16 = Xconst16 { dst: XReg, imm: i16 };
260             /// Set `dst = sign_extend(imm32)`.
261             xconst32 = Xconst32 { dst: XReg, imm: i32 };
262             /// Set `dst = imm64`.
263             xconst64 = Xconst64 { dst: XReg, imm: i64 };
264 
265             /// 32-bit wrapping addition: `low32(dst) = low32(src1) + low32(src2)`.
266             ///
267             /// The upper 32-bits of `dst` are unmodified.
268             xadd32 = Xadd32 { operands: BinaryOperands<XReg> };
269             /// Same as `xadd32` but `src2` is a zero-extended 8-bit immediate.
270             xadd32_u8 = Xadd32U8 { dst: XReg, src1: XReg, src2: u8 };
271             /// Same as `xadd32` but `src2` is a 32-bit immediate.
272             xadd32_u32 = Xadd32U32 { dst: XReg, src1: XReg, src2: u32 };
273 
274             /// 64-bit wrapping addition: `dst = src1 + src2`.
275             xadd64 = Xadd64 { operands: BinaryOperands<XReg> };
276             /// Same as `xadd64` but `src2` is a zero-extended 8-bit immediate.
277             xadd64_u8 = Xadd64U8 { dst: XReg, src1: XReg, src2: u8 };
278             /// Same as `xadd64` but `src2` is a zero-extended 32-bit immediate.
279             xadd64_u32 = Xadd64U32 { dst: XReg, src1: XReg, src2: u32 };
280 
281             /// `low32(dst) = low32(src1) * low32(src2) + low32(src3)`
282             xmadd32 = Xmadd32 { dst: XReg, src1: XReg, src2: XReg, src3: XReg };
283             /// `dst = src1 * src2 + src3`
284             xmadd64 = Xmadd64 { dst: XReg, src1: XReg, src2: XReg, src3: XReg };
285 
286             /// 32-bit wrapping subtraction: `low32(dst) = low32(src1) - low32(src2)`.
287             ///
288             /// The upper 32-bits of `dst` are unmodified.
289             xsub32 = Xsub32 { operands: BinaryOperands<XReg> };
290             /// Same as `xsub32` but `src2` is a zero-extended 8-bit immediate.
291             xsub32_u8 = Xsub32U8 { dst: XReg, src1: XReg, src2: u8 };
292             /// Same as `xsub32` but `src2` is a 32-bit immediate.
293             xsub32_u32 = Xsub32U32 { dst: XReg, src1: XReg, src2: u32 };
294 
295             /// 64-bit wrapping subtraction: `dst = src1 - src2`.
296             xsub64 = Xsub64 { operands: BinaryOperands<XReg> };
297             /// Same as `xsub64` but `src2` is a zero-extended 8-bit immediate.
298             xsub64_u8 = Xsub64U8 { dst: XReg, src1: XReg, src2: u8 };
299             /// Same as `xsub64` but `src2` is a zero-extended 32-bit immediate.
300             xsub64_u32 = Xsub64U32 { dst: XReg, src1: XReg, src2: u32 };
301 
302             /// `low32(dst) = low32(src1) * low32(src2)`
303             xmul32 = XMul32 { operands: BinaryOperands<XReg> };
304             /// Same as `xmul64` but `src2` is a sign-extended 8-bit immediate.
305             xmul32_s8 = Xmul32S8 { dst: XReg, src1: XReg, src2: i8 };
306             /// Same as `xmul32` but `src2` is a sign-extended 32-bit immediate.
307             xmul32_s32 = Xmul32S32 { dst: XReg, src1: XReg, src2: i32 };
308 
309             /// `dst = src1 * src2`
310             xmul64 = XMul64 { operands: BinaryOperands<XReg> };
311             /// Same as `xmul64` but `src2` is a sign-extended 8-bit immediate.
312             xmul64_s8 = Xmul64S8 { dst: XReg, src1: XReg, src2: i8 };
313             /// Same as `xmul64` but `src2` is a sign-extended 64-bit immediate.
314             xmul64_s32 = Xmul64S32 { dst: XReg, src1: XReg, src2: i32 };
315 
316             /// `low32(dst) = trailing_zeros(low32(src))`
317             xctz32 = Xctz32 { dst: XReg, src: XReg };
318             /// `dst = trailing_zeros(src)`
319             xctz64 = Xctz64 { dst: XReg, src: XReg };
320 
321             /// `low32(dst) = leading_zeros(low32(src))`
322             xclz32 = Xclz32 { dst: XReg, src: XReg };
323             /// `dst = leading_zeros(src)`
324             xclz64 = Xclz64 { dst: XReg, src: XReg };
325 
326             /// `low32(dst) = count_ones(low32(src))`
327             xpopcnt32 = Xpopcnt32 { dst: XReg, src: XReg };
328             /// `dst = count_ones(src)`
329             xpopcnt64 = Xpopcnt64 { dst: XReg, src: XReg };
330 
331             /// `low32(dst) = rotate_left(low32(src1), low32(src2))`
332             xrotl32 = Xrotl32 { operands: BinaryOperands<XReg> };
333             /// `dst = rotate_left(src1, src2)`
334             xrotl64 = Xrotl64 { operands: BinaryOperands<XReg> };
335 
336             /// `low32(dst) = rotate_right(low32(src1), low32(src2))`
337             xrotr32 = Xrotr32 { operands: BinaryOperands<XReg> };
338             /// `dst = rotate_right(src1, src2)`
339             xrotr64 = Xrotr64 { operands: BinaryOperands<XReg> };
340 
341             /// `low32(dst) = low32(src1) << low5(src2)`
342             xshl32 = Xshl32 { operands: BinaryOperands<XReg> };
343             /// `low32(dst) = low32(src1) >> low5(src2)`
344             xshr32_s = Xshr32S { operands: BinaryOperands<XReg> };
345             /// `low32(dst) = low32(src1) >> low5(src2)`
346             xshr32_u = Xshr32U { operands: BinaryOperands<XReg> };
347             /// `dst = src1 << low5(src2)`
348             xshl64 = Xshl64 { operands: BinaryOperands<XReg> };
349             /// `dst = src1 >> low6(src2)`
350             xshr64_s = Xshr64S { operands: BinaryOperands<XReg> };
351             /// `dst = src1 >> low6(src2)`
352             xshr64_u = Xshr64U { operands: BinaryOperands<XReg> };
353 
354             /// `low32(dst) = low32(src1) << low5(src2)`
355             xshl32_u6 = Xshl32U6 { operands: BinaryOperands<XReg, XReg, U6> };
356             /// `low32(dst) = low32(src1) >> low5(src2)`
357             xshr32_s_u6 = Xshr32SU6 { operands: BinaryOperands<XReg, XReg, U6> };
358             /// `low32(dst) = low32(src1) >> low5(src2)`
359             xshr32_u_u6 = Xshr32UU6 { operands: BinaryOperands<XReg, XReg, U6> };
360             /// `dst = src1 << low5(src2)`
361             xshl64_u6 = Xshl64U6 { operands: BinaryOperands<XReg, XReg, U6> };
362             /// `dst = src1 >> low6(src2)`
363             xshr64_s_u6 = Xshr64SU6 { operands: BinaryOperands<XReg, XReg, U6> };
364             /// `dst = src1 >> low6(src2)`
365             xshr64_u_u6 = Xshr64UU6 { operands: BinaryOperands<XReg, XReg, U6> };
366 
367             /// `low32(dst) = -low32(src)`
368             xneg32 = Xneg32 { dst: XReg, src: XReg };
369             /// `dst = -src`
370             xneg64 = Xneg64 { dst: XReg, src: XReg };
371 
372             /// `low32(dst) = src1 == src2`
373             xeq64 = Xeq64 { operands: BinaryOperands<XReg> };
374             /// `low32(dst) = src1 != src2`
375             xneq64 = Xneq64 { operands: BinaryOperands<XReg> };
376             /// `low32(dst) = src1 < src2` (signed)
377             xslt64 = Xslt64 { operands: BinaryOperands<XReg> };
378             /// `low32(dst) = src1 <= src2` (signed)
379             xslteq64 = Xslteq64 { operands: BinaryOperands<XReg> };
380             /// `low32(dst) = src1 < src2` (unsigned)
381             xult64 = Xult64 { operands: BinaryOperands<XReg> };
382             /// `low32(dst) = src1 <= src2` (unsigned)
383             xulteq64 = Xulteq64 { operands: BinaryOperands<XReg> };
384             /// `low32(dst) = low32(src1) == low32(src2)`
385             xeq32 = Xeq32 { operands: BinaryOperands<XReg> };
386             /// `low32(dst) = low32(src1) != low32(src2)`
387             xneq32 = Xneq32 { operands: BinaryOperands<XReg> };
388             /// `low32(dst) = low32(src1) < low32(src2)` (signed)
389             xslt32 = Xslt32 { operands: BinaryOperands<XReg> };
390             /// `low32(dst) = low32(src1) <= low32(src2)` (signed)
391             xslteq32 = Xslteq32 { operands: BinaryOperands<XReg> };
392             /// `low32(dst) = low32(src1) < low32(src2)` (unsigned)
393             xult32 = Xult32 { operands: BinaryOperands<XReg> };
394             /// `low32(dst) = low32(src1) <= low32(src2)` (unsigned)
395             xulteq32 = Xulteq32 { operands: BinaryOperands<XReg> };
396 
397             // Loads/stores with various addressing modes. Note that each style
398             // of addressing mode is split to its own suite of instructions to
399             // simplify the implementation of each opcode and avoid internal
400             // branching when using one addressing mode vs another.
401             //
402             // Note that big-endian, float, and vector loads are deferred to
403             // the "extended" opcode set below.
404 
405             /// `low32(dst) = zext_8_32(*addr)`
406             xload8_u32_o32 = XLoad8U32O32 { dst: XReg, addr: AddrO32 };
407             /// `low32(dst) = sext_8_32(*addr)`
408             xload8_s32_o32 = XLoad8S32O32 { dst: XReg, addr: AddrO32 };
409             /// `low32(dst) = o32ext_16_32(*addr)`
410             xload16le_u32_o32 = XLoad16LeU32O32 { dst: XReg, addr: AddrO32 };
411             /// `low32(dst) = sext_16_32(*addr)`
412             xload16le_s32_o32 = XLoad16LeS32O32 { dst: XReg, addr: AddrO32 };
413             /// `low32(dst) = *addr`
414             xload32le_o32 = XLoad32LeO32 { dst: XReg, addr: AddrO32 };
415             /// `dst = *addr`
416             xload64le_o32 = XLoad64LeO32 { dst: XReg, addr: AddrO32 };
417             /// `*addr = low8(src)`
418             xstore8_o32 = XStore8O32 { addr: AddrO32, src: XReg };
419             /// `*addr = low16(src)`
420             xstore16le_o32 = XStore16LeO32 { addr: AddrO32, src: XReg };
421             /// `*addr = low32(src)`
422             xstore32le_o32 = XStore32LeO32 { addr: AddrO32, src: XReg };
423             /// `*addr = src`
424             xstore64le_o32 = XStore64LeO32 { addr: AddrO32, src: XReg };
425 
426             /// `low32(dst) = zext_8_32(*addr)`
427             xload8_u32_z = XLoad8U32Z { dst: XReg, addr: AddrZ };
428             /// `low32(dst) = sext_8_32(*addr)`
429             xload8_s32_z = XLoad8S32Z { dst: XReg, addr: AddrZ };
430             /// `low32(dst) = zext_16_32(*addr)`
431             xload16le_u32_z = XLoad16LeU32Z { dst: XReg, addr: AddrZ };
432             /// `low32(dst) = sext_16_32(*addr)`
433             xload16le_s32_z = XLoad16LeS32Z { dst: XReg, addr: AddrZ };
434             /// `low32(dst) = *addr`
435             xload32le_z = XLoad32LeZ { dst: XReg, addr: AddrZ };
436             /// `dst = *addr`
437             xload64le_z = XLoad64LeZ { dst: XReg, addr: AddrZ };
438             /// `*addr = low8(src)`
439             xstore8_z = XStore8Z { addr: AddrZ, src: XReg };
440             /// `*addr = low16(src)`
441             xstore16le_z = XStore16LeZ { addr: AddrZ, src: XReg };
442             /// `*addr = low32(src)`
443             xstore32le_z = XStore32LeZ { addr: AddrZ, src: XReg };
444             /// `*addr = src`
445             xstore64le_z = XStore64LeZ { addr: AddrZ, src: XReg };
446 
447             /// `low32(dst) = zext_8_32(*addr)`
448             xload8_u32_g32 = XLoad8U32G32 { dst: XReg, addr: AddrG32 };
449             /// `low32(dst) = sext_8_32(*addr)`
450             xload8_s32_g32 = XLoad8S32G32 { dst: XReg, addr: AddrG32 };
451             /// `low32(dst) = zext_16_32(*addr)`
452             xload16le_u32_g32 = XLoad16LeU32G32 { dst: XReg, addr: AddrG32 };
453             /// `low32(dst) = sext_16_32(*addr)`
454             xload16le_s32_g32 = XLoad16LeS32G32 { dst: XReg, addr: AddrG32 };
455             /// `low32(dst) = *addr`
456             xload32le_g32 = XLoad32LeG32 { dst: XReg, addr: AddrG32 };
457             /// `dst = *addr`
458             xload64le_g32 = XLoad64LeG32 { dst: XReg, addr: AddrG32 };
459             /// `*addr = low8(src)`
460             xstore8_g32 = XStore8G32 { addr: AddrG32, src: XReg };
461             /// `*addr = low16(src)`
462             xstore16le_g32 = XStore16LeG32 { addr: AddrG32, src: XReg };
463             /// `*addr = low32(src)`
464             xstore32le_g32 = XStore32LeG32 { addr: AddrG32, src: XReg };
465             /// `*addr = src`
466             xstore64le_g32 = XStore64LeG32 { addr: AddrG32, src: XReg };
467 
468             /// `low32(dst) = zext_8_32(*addr)`
469             xload8_u32_g32bne = XLoad8U32G32Bne { dst: XReg, addr: AddrG32Bne };
470             /// `low32(dst) = sext_8_32(*addr)`
471             xload8_s32_g32bne = XLoad8S32G32Bne { dst: XReg, addr: AddrG32Bne };
472             /// `low32(dst) = zext_16_32(*addr)`
473             xload16le_u32_g32bne = XLoad16LeU32G32Bne { dst: XReg, addr: AddrG32Bne };
474             /// `low32(dst) = sext_16_32(*addr)`
475             xload16le_s32_g32bne = XLoad16LeS32G32Bne { dst: XReg, addr: AddrG32Bne };
476             /// `low32(dst) = *addr`
477             xload32le_g32bne = XLoad32LeG32Bne { dst: XReg, addr: AddrG32Bne };
478             /// `dst = *addr`
479             xload64le_g32bne = XLoad64LeG32Bne { dst: XReg, addr: AddrG32Bne };
480             /// `*addr = low8(src)`
481             xstore8_g32bne = XStore8G32Bne { addr: AddrG32Bne, src: XReg };
482             /// `*addr = low16(src)`
483             xstore16le_g32bne = XStore16LeG32Bne { addr: AddrG32Bne, src: XReg };
484             /// `*addr = low32(src)`
485             xstore32le_g32bne = XStore32LeG32Bne { addr: AddrG32Bne, src: XReg };
486             /// `*addr = src`
487             xstore64le_g32bne = XStore64LeG32Bne { addr: AddrG32Bne, src: XReg };
488 
489 
490             /// `push lr; push fp; fp = sp`
491             push_frame = PushFrame ;
492             /// `sp = fp; pop fp; pop lr`
493             pop_frame = PopFrame ;
494 
495             /// Macro-instruction to enter a function, allocate some stack, and
496             /// then save some registers.
497             ///
498             /// This is equivalent to `push_frame`, `stack_alloc32 amt`, then
499             /// saving all of `regs` to the top of the stack just allocated.
500             push_frame_save = PushFrameSave { amt: u16, regs: UpperRegSet<XReg> };
501             /// Inverse of `push_frame_save`. Restores `regs` from the top of
502             /// the stack, then runs `stack_free32 amt`, then runs `pop_frame`.
503             pop_frame_restore = PopFrameRestore { amt: u16, regs: UpperRegSet<XReg> };
504 
505             /// `sp = sp.checked_sub(amt)`
506             stack_alloc32 = StackAlloc32 { amt: u32 };
507 
508             /// `sp = sp + amt`
509             stack_free32 = StackFree32 { amt: u32 };
510 
511             /// `dst = zext(low8(src))`
512             zext8 = Zext8 { dst: XReg, src: XReg };
513             /// `dst = zext(low16(src))`
514             zext16 = Zext16 { dst: XReg, src: XReg };
515             /// `dst = zext(low32(src))`
516             zext32 = Zext32 { dst: XReg, src: XReg };
517             /// `dst = sext(low8(src))`
518             sext8 = Sext8 { dst: XReg, src: XReg };
519             /// `dst = sext(low16(src))`
520             sext16 = Sext16 { dst: XReg, src: XReg };
521             /// `dst = sext(low32(src))`
522             sext32 = Sext32 { dst: XReg, src: XReg };
523 
524             /// `low32(dst) = |low32(src)|`
525             xabs32 = XAbs32 { dst: XReg, src: XReg };
526             /// `dst = |src|`
527             xabs64 = XAbs64 { dst: XReg, src: XReg };
528 
529             /// `low32(dst) = low32(src1) / low32(src2)` (signed)
530             xdiv32_s = XDiv32S { operands: BinaryOperands<XReg> };
531 
532             /// `dst = src1 / src2` (signed)
533             xdiv64_s = XDiv64S { operands: BinaryOperands<XReg> };
534 
535             /// `low32(dst) = low32(src1) / low32(src2)` (unsigned)
536             xdiv32_u = XDiv32U { operands: BinaryOperands<XReg> };
537 
538             /// `dst = src1 / src2` (unsigned)
539             xdiv64_u = XDiv64U { operands: BinaryOperands<XReg> };
540 
541             /// `low32(dst) = low32(src1) % low32(src2)` (signed)
542             xrem32_s = XRem32S { operands: BinaryOperands<XReg> };
543 
544             /// `dst = src1 / src2` (signed)
545             xrem64_s = XRem64S { operands: BinaryOperands<XReg> };
546 
547             /// `low32(dst) = low32(src1) % low32(src2)` (unsigned)
548             xrem32_u = XRem32U { operands: BinaryOperands<XReg> };
549 
550             /// `dst = src1 / src2` (unsigned)
551             xrem64_u = XRem64U { operands: BinaryOperands<XReg> };
552 
553             /// `low32(dst) = low32(src1) & low32(src2)`
554             xband32 = XBand32 { operands: BinaryOperands<XReg> };
555             /// Same as `xband64` but `src2` is a sign-extended 8-bit immediate.
556             xband32_s8 = Xband32S8 { dst: XReg, src1: XReg, src2: i8 };
557             /// Same as `xband32` but `src2` is a sign-extended 32-bit immediate.
558             xband32_s32 = Xband32S32 { dst: XReg, src1: XReg, src2: i32 };
559             /// `dst = src1 & src2`
560             xband64 = XBand64 { operands: BinaryOperands<XReg> };
561             /// Same as `xband64` but `src2` is a sign-extended 8-bit immediate.
562             xband64_s8 = Xband64S8 { dst: XReg, src1: XReg, src2: i8 };
563             /// Same as `xband64` but `src2` is a sign-extended 32-bit immediate.
564             xband64_s32 = Xband64S32 { dst: XReg, src1: XReg, src2: i32 };
565             /// `low32(dst) = low32(src1) | low32(src2)`
566             xbor32 = XBor32 { operands: BinaryOperands<XReg> };
567             /// Same as `xbor64` but `src2` is a sign-extended 8-bit immediate.
568             xbor32_s8 = Xbor32S8 { dst: XReg, src1: XReg, src2: i8 };
569             /// Same as `xbor32` but `src2` is a sign-extended 32-bit immediate.
570             xbor32_s32 = Xbor32S32 { dst: XReg, src1: XReg, src2: i32 };
571             /// `dst = src1 | src2`
572             xbor64 = XBor64 { operands: BinaryOperands<XReg> };
573             /// Same as `xbor64` but `src2` is a sign-extended 8-bit immediate.
574             xbor64_s8 = Xbor64S8 { dst: XReg, src1: XReg, src2: i8 };
575             /// Same as `xbor64` but `src2` is a sign-extended 32-bit immediate.
576             xbor64_s32 = Xbor64S32 { dst: XReg, src1: XReg, src2: i32 };
577 
578             /// `low32(dst) = low32(src1) ^ low32(src2)`
579             xbxor32 = XBxor32 { operands: BinaryOperands<XReg> };
580             /// Same as `xbxor64` but `src2` is a sign-extended 8-bit immediate.
581             xbxor32_s8 = Xbxor32S8 { dst: XReg, src1: XReg, src2: i8 };
582             /// Same as `xbxor32` but `src2` is a sign-extended 32-bit immediate.
583             xbxor32_s32 = Xbxor32S32 { dst: XReg, src1: XReg, src2: i32 };
584             /// `dst = src1 ^ src2`
585             xbxor64 = XBxor64 { operands: BinaryOperands<XReg> };
586             /// Same as `xbxor64` but `src2` is a sign-extended 8-bit immediate.
587             xbxor64_s8 = Xbxor64S8 { dst: XReg, src1: XReg, src2: i8 };
588             /// Same as `xbxor64` but `src2` is a sign-extended 32-bit immediate.
589             xbxor64_s32 = Xbxor64S32 { dst: XReg, src1: XReg, src2: i32 };
590 
591             /// `low32(dst) = !low32(src1)`
592             xbnot32 = XBnot32 { dst: XReg, src: XReg };
593             /// `dst = !src1`
594             xbnot64 = XBnot64 { dst: XReg, src: XReg };
595 
596             /// `low32(dst) = min(low32(src1), low32(src2))` (unsigned)
597             xmin32_u = Xmin32U { operands: BinaryOperands<XReg> };
598             /// `low32(dst) = min(low32(src1), low32(src2))` (signed)
599             xmin32_s = Xmin32S { operands: BinaryOperands<XReg> };
600             /// `low32(dst) = max(low32(src1), low32(src2))` (unsigned)
601             xmax32_u = Xmax32U { operands: BinaryOperands<XReg> };
602             /// `low32(dst) = max(low32(src1), low32(src2))` (signed)
603             xmax32_s = Xmax32S { operands: BinaryOperands<XReg> };
604             /// `dst = min(src1, src2)` (unsigned)
605             xmin64_u = Xmin64U { operands: BinaryOperands<XReg> };
606             /// `dst = min(src1, src2)` (signed)
607             xmin64_s = Xmin64S { operands: BinaryOperands<XReg> };
608             /// `dst = max(src1, src2)` (unsigned)
609             xmax64_u = Xmax64U { operands: BinaryOperands<XReg> };
610             /// `dst = max(src1, src2)` (signed)
611             xmax64_s = Xmax64S { operands: BinaryOperands<XReg> };
612 
613             /// `low32(dst) = low32(cond) ? low32(if_nonzero) : low32(if_zero)`
614             xselect32 = XSelect32 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg };
615             /// `dst = low32(cond) ? if_nonzero : if_zero`
616             xselect64 = XSelect64 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg };
617         }
618     };
619 }
620 
621 /// Calls the given macro with each extended opcode.
622 #[macro_export]
623 macro_rules! for_each_extended_op {
624     ( $macro:ident ) => {
625         $macro! {
626             /// Raise a trap.
627             trap = Trap;
628 
629             /// A special opcode to halt interpreter execution and yield control
630             /// back to the host.
631             ///
632             /// This opcode results in `DoneReason::CallIndirectHost` where the
633             /// `id` here is shepherded along to the embedder. It's up to the
634             /// embedder to determine what to do with the `id` and the current
635             /// state of registers and the stack.
636             ///
637             /// In Wasmtime this is used to implement interpreter-to-host calls.
638             /// This is modeled as a `call` instruction where the first
639             /// parameter is the native function pointer to invoke and all
640             /// remaining parameters for the native function are in following
641             /// parameter positions (e.g. `x1`, `x2`, ...). The results of the
642             /// host call are then store in `x0`.
643             ///
644             /// Handling this in Wasmtime is done through a "relocation" which
645             /// is resolved at link-time when raw bytecode from Cranelift is
646             /// assembled into the final object that Wasmtime will interpret.
647             call_indirect_host = CallIndirectHost { id: u8 };
648 
649             /// Adds `offset` to the pc of this instruction and stores it in
650             /// `dst`.
651             xpcadd = Xpcadd { dst: XReg, offset: PcRelOffset };
652 
653             /// Gets the special "fp" register and moves it into `dst`.
654             xmov_fp = XmovFp { dst: XReg };
655 
656             /// Gets the special "lr" register and moves it into `dst`.
657             xmov_lr = XmovLr { dst: XReg };
658 
659             /// `dst = byteswap(low32(src))`
660             bswap32 = Bswap32 { dst: XReg, src: XReg };
661             /// `dst = byteswap(src)`
662             bswap64 = Bswap64 { dst: XReg, src: XReg };
663 
664             /// 32-bit checked unsigned addition: `low32(dst) = low32(src1) +
665             /// low32(src2)`.
666             ///
667             /// The upper 32-bits of `dst` are unmodified. Traps if the addition
668             /// overflows.
669             xadd32_uoverflow_trap = Xadd32UoverflowTrap { operands: BinaryOperands<XReg> };
670 
671             /// 64-bit checked unsigned addition: `dst = src1 + src2`.
672             xadd64_uoverflow_trap = Xadd64UoverflowTrap { operands: BinaryOperands<XReg> };
673 
674             /// `dst = high64(src1 * src2)` (signed)
675             xmulhi64_s = XMulHi64S { operands: BinaryOperands<XReg> };
676             /// `dst = high64(src1 * src2)` (unsigned)
677             xmulhi64_u = XMulHi64U { operands: BinaryOperands<XReg> };
678 
679             /// low32(dst) = if low32(src) == 0 { 0 } else { -1 }
680             xbmask32 = Xbmask32 { dst: XReg, src: XReg };
681             /// dst = if src == 0 { 0 } else { -1 }
682             xbmask64 = Xbmask64 { dst: XReg, src: XReg };
683 
684             // Big-endian loads/stores of X-registers using the "o32"
685             // addressing mode
686 
687             /// `low32(dst) = zext(*addr)`
688             xload16be_u32_o32 = XLoad16BeU32O32 { dst: XReg, addr: AddrO32 };
689             /// `low32(dst) = sext(*addr)`
690             xload16be_s32_o32 = XLoad16BeS32O32 { dst: XReg, addr: AddrO32 };
691             /// `low32(dst) = zext(*addr)`
692             xload32be_o32 = XLoad32BeO32 { dst: XReg, addr: AddrO32 };
693             /// `dst = *addr`
694             xload64be_o32 = XLoad64BeO32 { dst: XReg, addr: AddrO32 };
695             /// `*addr = low16(src)`
696             xstore16be_o32 = XStore16BeO32 { addr: AddrO32, src: XReg };
697             /// `*addr = low32(src)`
698             xstore32be_o32 = XStore32BeO32 { addr: AddrO32, src: XReg };
699             /// `*addr = low64(src)`
700             xstore64be_o32 = XStore64BeO32 { addr: AddrO32, src: XReg };
701 
702             // Big and little endian float loads/stores. Note that the "Z"
703             // addressing mode only has little-endian variants.
704 
705             /// `low32(dst) = zext(*addr)`
706             fload32be_o32 = Fload32BeO32 { dst: FReg, addr: AddrO32 };
707             /// `dst = *addr`
708             fload64be_o32 = Fload64BeO32 { dst: FReg, addr: AddrO32 };
709             /// `*addr = low32(src)`
710             fstore32be_o32 = Fstore32BeO32 { addr: AddrO32, src: FReg };
711             /// `*addr = src`
712             fstore64be_o32 = Fstore64BeO32 { addr: AddrO32, src: FReg };
713 
714             /// `low32(dst) = zext(*addr)`
715             fload32le_o32 = Fload32LeO32 { dst: FReg, addr: AddrO32 };
716             /// `dst = *addr`
717             fload64le_o32 = Fload64LeO32 { dst: FReg, addr: AddrO32 };
718             /// `*addr = low32(src)`
719             fstore32le_o32 = Fstore32LeO32 { addr: AddrO32, src: FReg };
720             /// `*addr = src`
721             fstore64le_o32 = Fstore64LeO32 { addr: AddrO32, src: FReg };
722 
723             /// `low32(dst) = zext(*addr)`
724             fload32le_z = Fload32LeZ { dst: FReg, addr: AddrZ };
725             /// `dst = *addr`
726             fload64le_z = Fload64LeZ { dst: FReg, addr: AddrZ };
727             /// `*addr = low32(src)`
728             fstore32le_z = Fstore32LeZ { addr: AddrZ, src: FReg };
729             /// `*addr = src`
730             fstore64le_z = Fstore64LeZ { addr: AddrZ, src: FReg };
731 
732             /// `low32(dst) = zext(*addr)`
733             fload32le_g32 = Fload32LeG32 { dst: FReg, addr: AddrG32 };
734             /// `dst = *addr`
735             fload64le_g32 = Fload64LeG32 { dst: FReg, addr: AddrG32 };
736             /// `*addr = low32(src)`
737             fstore32le_g32 = Fstore32LeG32 { addr: AddrG32, src: FReg };
738             /// `*addr = src`
739             fstore64le_g32 = Fstore64LeG32 { addr: AddrG32, src: FReg };
740 
741             // Vector loads/stores. Note that big-endian variants are all
742             // omitted.
743 
744             /// `dst = *addr`
745             vload128le_o32 = VLoad128O32 { dst: VReg, addr: AddrO32 };
746             /// `*addr = src`
747             vstore128le_o32 = Vstore128LeO32 { addr: AddrO32, src: VReg };
748             /// `dst = *(ptr + offset)`
749             vload128le_z = VLoad128Z { dst: VReg, addr: AddrZ };
750             /// `*(ptr + offset) = src`
751             vstore128le_z = Vstore128LeZ { addr: AddrZ, src: VReg };
752             /// `dst = *(ptr + offset)`
753             vload128le_g32 = VLoad128G32 { dst: VReg, addr: AddrG32 };
754             /// `*(ptr + offset) = src`
755             vstore128le_g32 = Vstore128LeG32 { addr: AddrG32, src: VReg };
756 
757             /// Move between `f` registers.
758             fmov = Fmov { dst: FReg, src: FReg };
759             /// Move between `v` registers.
760             vmov = Vmov { dst: VReg, src: VReg };
761 
762             /// `low32(dst) = bitcast low32(src) as i32`
763             bitcast_int_from_float_32 = BitcastIntFromFloat32 { dst: XReg, src: FReg };
764             /// `dst = bitcast src as i64`
765             bitcast_int_from_float_64 = BitcastIntFromFloat64 { dst: XReg, src: FReg };
766             /// `low32(dst) = bitcast low32(src) as f32`
767             bitcast_float_from_int_32 = BitcastFloatFromInt32 { dst: FReg, src: XReg };
768             /// `dst = bitcast src as f64`
769             bitcast_float_from_int_64 = BitcastFloatFromInt64 { dst: FReg, src: XReg };
770 
771             /// `low32(dst) = bits`
772             fconst32 = FConst32 { dst: FReg, bits: u32 };
773             /// `dst = bits`
774             fconst64 = FConst64 { dst: FReg, bits: u64 };
775 
776             /// `low32(dst) = zext(src1 == src2)`
777             feq32 = Feq32 { dst: XReg, src1: FReg, src2: FReg };
778             /// `low32(dst) = zext(src1 != src2)`
779             fneq32 = Fneq32 { dst: XReg, src1: FReg, src2: FReg };
780             /// `low32(dst) = zext(src1 < src2)`
781             flt32 = Flt32 { dst: XReg, src1: FReg, src2: FReg };
782             /// `low32(dst) = zext(src1 <= src2)`
783             flteq32 = Flteq32 { dst: XReg, src1: FReg, src2: FReg };
784             /// `low32(dst) = zext(src1 == src2)`
785             feq64 = Feq64 { dst: XReg, src1: FReg, src2: FReg };
786             /// `low32(dst) = zext(src1 != src2)`
787             fneq64 = Fneq64 { dst: XReg, src1: FReg, src2: FReg };
788             /// `low32(dst) = zext(src1 < src2)`
789             flt64 = Flt64 { dst: XReg, src1: FReg, src2: FReg };
790             /// `low32(dst) = zext(src1 <= src2)`
791             flteq64 = Flteq64 { dst: XReg, src1: FReg, src2: FReg };
792 
793             /// `low32(dst) = low32(cond) ? low32(if_nonzero) : low32(if_zero)`
794             fselect32 = FSelect32 { dst: FReg, cond: XReg, if_nonzero: FReg, if_zero: FReg };
795             /// `dst = low32(cond) ? if_nonzero : if_zero`
796             fselect64 = FSelect64 { dst: FReg, cond: XReg, if_nonzero: FReg, if_zero: FReg };
797 
798             /// `low32(dst) = demote(src)`
799             f32_from_f64 = F32FromF64 { dst: FReg, src: FReg };
800             /// `(st) = promote(low32(src))`
801             f64_from_f32 = F64FromF32 { dst: FReg, src: FReg };
802 
803             /// `low32(dst) = checked_f32_from_signed(low32(src))`
804             f32_from_x32_s = F32FromX32S { dst: FReg, src: XReg };
805             /// `low32(dst) = checked_f32_from_unsigned(low32(src))`
806             f32_from_x32_u = F32FromX32U { dst: FReg, src: XReg };
807             /// `low32(dst) = checked_f32_from_signed(src)`
808             f32_from_x64_s = F32FromX64S { dst: FReg, src: XReg };
809             /// `low32(dst) = checked_f32_from_unsigned(src)`
810             f32_from_x64_u = F32FromX64U { dst: FReg, src: XReg };
811             /// `dst = checked_f64_from_signed(low32(src))`
812             f64_from_x32_s = F64FromX32S { dst: FReg, src: XReg };
813             /// `dst = checked_f64_from_unsigned(low32(src))`
814             f64_from_x32_u = F64FromX32U { dst: FReg, src: XReg };
815             /// `dst = checked_f64_from_signed(src)`
816             f64_from_x64_s = F64FromX64S { dst: FReg, src: XReg };
817             /// `dst = checked_f64_from_unsigned(src)`
818             f64_from_x64_u = F64FromX64U { dst: FReg, src: XReg };
819 
820             /// `low32(dst) = checked_signed_from_f32(low32(src))`
821             x32_from_f32_s = X32FromF32S { dst: XReg, src: FReg };
822             /// `low32(dst) = checked_unsigned_from_f32(low32(src))`
823             x32_from_f32_u = X32FromF32U { dst: XReg, src: FReg };
824             /// `low32(dst) = checked_signed_from_f64(src)`
825             x32_from_f64_s = X32FromF64S { dst: XReg, src: FReg };
826             /// `low32(dst) = checked_unsigned_from_f64(src)`
827             x32_from_f64_u = X32FromF64U { dst: XReg, src: FReg };
828             /// `dst = checked_signed_from_f32(low32(src))`
829             x64_from_f32_s = X64FromF32S { dst: XReg, src: FReg };
830             /// `dst = checked_unsigned_from_f32(low32(src))`
831             x64_from_f32_u = X64FromF32U { dst: XReg, src: FReg };
832             /// `dst = checked_signed_from_f64(src)`
833             x64_from_f64_s = X64FromF64S { dst: XReg, src: FReg };
834             /// `dst = checked_unsigned_from_f64(src)`
835             x64_from_f64_u = X64FromF64U { dst: XReg, src: FReg };
836 
837             /// `low32(dst) = saturating_signed_from_f32(low32(src))`
838             x32_from_f32_s_sat = X32FromF32SSat { dst: XReg, src: FReg };
839             /// `low32(dst) = saturating_unsigned_from_f32(low32(src))`
840             x32_from_f32_u_sat = X32FromF32USat { dst: XReg, src: FReg };
841             /// `low32(dst) = saturating_signed_from_f64(src)`
842             x32_from_f64_s_sat = X32FromF64SSat { dst: XReg, src: FReg };
843             /// `low32(dst) = saturating_unsigned_from_f64(src)`
844             x32_from_f64_u_sat = X32FromF64USat { dst: XReg, src: FReg };
845             /// `dst = saturating_signed_from_f32(low32(src))`
846             x64_from_f32_s_sat = X64FromF32SSat { dst: XReg, src: FReg };
847             /// `dst = saturating_unsigned_from_f32(low32(src))`
848             x64_from_f32_u_sat = X64FromF32USat { dst: XReg, src: FReg };
849             /// `dst = saturating_signed_from_f64(src)`
850             x64_from_f64_s_sat = X64FromF64SSat { dst: XReg, src: FReg };
851             /// `dst = saturating_unsigned_from_f64(src)`
852             x64_from_f64_u_sat = X64FromF64USat { dst: XReg, src: FReg };
853 
854             /// `low32(dst) = copysign(low32(src1), low32(src2))`
855             fcopysign32 = FCopySign32 { operands: BinaryOperands<FReg> };
856             /// `dst = copysign(src1, src2)`
857             fcopysign64 = FCopySign64 { operands: BinaryOperands<FReg> };
858 
859             /// `low32(dst) = low32(src1) + low32(src2)`
860             fadd32 = Fadd32 { operands: BinaryOperands<FReg> };
861             /// `low32(dst) = low32(src1) - low32(src2)`
862             fsub32 = Fsub32 { operands: BinaryOperands<FReg> };
863             /// `low128(dst) = low128(src1) - low128(src2)`
864             vsubf32x4 = Vsubf32x4 { operands: BinaryOperands<VReg> };
865             /// `low32(dst) = low32(src1) * low32(src2)`
866             fmul32 = Fmul32 { operands: BinaryOperands<FReg> };
867             /// `low128(dst) = low128(src1) * low128(src2)`
868             vmulf32x4 = Vmulf32x4 { operands: BinaryOperands<VReg> };
869             /// `low32(dst) = low32(src1) / low32(src2)`
870             fdiv32 = Fdiv32 { operands: BinaryOperands<FReg> };
871             /// `low128(dst) = low128(src1) / low128(src2)`
872             vdivf32x4 = Vdivf32x4 { operands: BinaryOperands<VReg> };
873             /// `low32(dst) = ieee_maximum(low32(src1), low32(src2))`
874             fmaximum32 = Fmaximum32 { operands: BinaryOperands<FReg> };
875             /// `low32(dst) = ieee_minimum(low32(src1), low32(src2))`
876             fminimum32 = Fminimum32 { operands: BinaryOperands<FReg> };
877             /// `low32(dst) = ieee_trunc(low32(src))`
878             ftrunc32 = Ftrunc32 { dst: FReg, src: FReg };
879             /// `low128(dst) = ieee_trunc(low128(src))`
880             vtrunc32x4 = Vtrunc32x4 { dst: VReg, src: VReg };
881             /// `low128(dst) = ieee_trunc(low128(src))`
882             vtrunc64x2 = Vtrunc64x2 { dst: VReg, src: VReg };
883             /// `low32(dst) = ieee_floor(low32(src))`
884             ffloor32 = Ffloor32 { dst: FReg, src: FReg };
885             /// `low128(dst) = ieee_floor(low128(src))`
886             vfloor32x4 = Vfloor32x4 { dst: VReg, src: VReg };
887             /// `low128(dst) = ieee_floor(low128(src))`
888             vfloor64x2 = Vfloor64x2 { dst: VReg, src: VReg };
889             /// `low32(dst) = ieee_ceil(low32(src))`
890             fceil32 = Fceil32 { dst: FReg, src: FReg };
891             /// `low128(dst) = ieee_ceil(low128(src))`
892             vceil32x4 = Vceil32x4 { dst: VReg, src: VReg };
893             /// `low128(dst) = ieee_ceil(low128(src))`
894             vceil64x2 = Vceil64x2 { dst: VReg, src: VReg };
895             /// `low32(dst) = ieee_nearest(low32(src))`
896             fnearest32 = Fnearest32 { dst: FReg, src: FReg };
897             /// `low32(dst) = ieee_sqrt(low32(src))`
898             fsqrt32 = Fsqrt32 { dst: FReg, src: FReg };
899             /// `low32(dst) = ieee_sqrt(low32(src))`
900             vsqrt32x4 = Vsqrt32x4 { dst: VReg, src: VReg };
901             /// `low32(dst) = ieee_sqrt(low32(src))`
902             vsqrt64x2 = Vsqrt64x2 { dst: VReg, src: VReg };
903             /// `low32(dst) = -low32(src)`
904             fneg32 = Fneg32 { dst: FReg, src: FReg };
905             /// `low128(dst) = -low128(src)`
906             vnegf32x4 = Vnegf32x4 { dst: VReg, src: VReg };
907             /// `low32(dst) = |low32(src)|`
908             fabs32 = Fabs32 { dst: FReg, src: FReg };
909 
910             /// `dst = src1 + src2`
911             fadd64 = Fadd64 { operands: BinaryOperands<FReg> };
912             /// `dst = src1 - src2`
913             fsub64 = Fsub64 { operands: BinaryOperands<FReg> };
914             /// `dst = src1 * src2`
915             fmul64 = Fmul64 { operands: BinaryOperands<FReg> };
916             /// `dst = src1 / src2`
917             fdiv64 = Fdiv64 { operands: BinaryOperands<FReg> };
918             /// `dst = src1 / src2`
919             vdivf64x2 = VDivF64x2 { operands: BinaryOperands<VReg> };
920             /// `dst = ieee_maximum(src1, src2)`
921             fmaximum64 = Fmaximum64 { operands: BinaryOperands<FReg> };
922             /// `dst = ieee_minimum(src1, src2)`
923             fminimum64 = Fminimum64 { operands: BinaryOperands<FReg> };
924             /// `dst = ieee_trunc(src)`
925             ftrunc64 = Ftrunc64 { dst: FReg, src: FReg };
926             /// `dst = ieee_floor(src)`
927             ffloor64 = Ffloor64 { dst: FReg, src: FReg };
928             /// `dst = ieee_ceil(src)`
929             fceil64 = Fceil64 { dst: FReg, src: FReg };
930             /// `dst = ieee_nearest(src)`
931             fnearest64 = Fnearest64 { dst: FReg, src: FReg };
932             /// `low128(dst) = ieee_nearest(low128(src))`
933             vnearest32x4 = Vnearest32x4 { dst: VReg, src: VReg };
934             /// `low128(dst) = ieee_nearest(low128(src))`
935             vnearest64x2 = Vnearest64x2 { dst: VReg, src: VReg };
936             /// `dst = ieee_sqrt(src)`
937             fsqrt64 = Fsqrt64 { dst: FReg, src: FReg };
938             /// `dst = -src`
939             fneg64 = Fneg64 { dst: FReg, src: FReg };
940             /// `dst = |src|`
941             fabs64 = Fabs64 { dst: FReg, src: FReg };
942 
943             /// `dst = imm`
944             vconst128 = Vconst128 { dst: VReg, imm: u128 };
945 
946             /// `dst = src1 + src2`
947             vaddi8x16 = VAddI8x16 { operands: BinaryOperands<VReg> };
948             /// `dst = src1 + src2`
949             vaddi16x8 = VAddI16x8 { operands: BinaryOperands<VReg> };
950             /// `dst = src1 + src2`
951             vaddi32x4 = VAddI32x4 { operands: BinaryOperands<VReg> };
952             /// `dst = src1 + src2`
953             vaddi64x2 = VAddI64x2 { operands: BinaryOperands<VReg> };
954             /// `dst = src1 + src2`
955             vaddf32x4 = VAddF32x4 { operands: BinaryOperands<VReg> };
956             /// `dst = src1 + src2`
957             vaddf64x2 = VAddF64x2 { operands: BinaryOperands<VReg> };
958 
959             /// `dst = satruating_add(src1, src2)`
960             vaddi8x16_sat = VAddI8x16Sat { operands: BinaryOperands<VReg> };
961             /// `dst = satruating_add(src1, src2)`
962             vaddu8x16_sat = VAddU8x16Sat { operands: BinaryOperands<VReg> };
963             /// `dst = satruating_add(src1, src2)`
964             vaddi16x8_sat = VAddI16x8Sat { operands: BinaryOperands<VReg> };
965             /// `dst = satruating_add(src1, src2)`
966             vaddu16x8_sat = VAddU16x8Sat { operands: BinaryOperands<VReg> };
967 
968             /// `dst = [src1[0] + src1[1], ..., src2[6] + src2[7]]`
969             vaddpairwisei16x8_s = VAddpairwiseI16x8S { operands: BinaryOperands<VReg> };
970             /// `dst = [src1[0] + src1[1], ..., src2[2] + src2[3]]`
971             vaddpairwisei32x4_s = VAddpairwiseI32x4S { operands: BinaryOperands<VReg> };
972 
973             /// `dst = src1 << src2`
974             vshli8x16 = VShlI8x16 { operands: BinaryOperands<VReg, VReg, XReg> };
975             /// `dst = src1 << src2`
976             vshli16x8 = VShlI16x8 { operands: BinaryOperands<VReg, VReg, XReg> };
977             /// `dst = src1 << src2`
978             vshli32x4 = VShlI32x4 { operands: BinaryOperands<VReg, VReg, XReg> };
979             /// `dst = src1 << src2`
980             vshli64x2 = VShlI64x2 { operands: BinaryOperands<VReg, VReg, XReg> };
981             /// `dst = src1 >> src2` (signed)
982             vshri8x16_s = VShrI8x16S { operands: BinaryOperands<VReg, VReg, XReg> };
983             /// `dst = src1 >> src2` (signed)
984             vshri16x8_s = VShrI16x8S { operands: BinaryOperands<VReg, VReg, XReg> };
985             /// `dst = src1 >> src2` (signed)
986             vshri32x4_s = VShrI32x4S { operands: BinaryOperands<VReg, VReg, XReg> };
987             /// `dst = src1 >> src2` (signed)
988             vshri64x2_s = VShrI64x2S { operands: BinaryOperands<VReg, VReg, XReg> };
989             /// `dst = src1 >> src2` (unsigned)
990             vshri8x16_u = VShrI8x16U { operands: BinaryOperands<VReg, VReg, XReg> };
991             /// `dst = src1 >> src2` (unsigned)
992             vshri16x8_u = VShrI16x8U { operands: BinaryOperands<VReg, VReg, XReg> };
993             /// `dst = src1 >> src2` (unsigned)
994             vshri32x4_u = VShrI32x4U { operands: BinaryOperands<VReg, VReg, XReg> };
995             /// `dst = src1 >> src2` (unsigned)
996             vshri64x2_u = VShrI64x2U { operands: BinaryOperands<VReg, VReg, XReg> };
997 
998             /// `dst = splat(low8(src))`
999             vsplatx8 = VSplatX8 { dst: VReg, src: XReg };
1000             /// `dst = splat(low16(src))`
1001             vsplatx16 = VSplatX16 { dst: VReg, src: XReg };
1002             /// `dst = splat(low32(src))`
1003             vsplatx32 = VSplatX32 { dst: VReg, src: XReg };
1004             /// `dst = splat(src)`
1005             vsplatx64 = VSplatX64 { dst: VReg, src: XReg };
1006             /// `dst = splat(low32(src))`
1007             vsplatf32 = VSplatF32 { dst: VReg, src: FReg };
1008             /// `dst = splat(src)`
1009             vsplatf64 = VSplatF64 { dst: VReg, src: FReg };
1010 
1011             /// Load the 64-bit source as i8x8 and sign-extend to i16x8.
1012             vload8x8_s_z = VLoad8x8SZ { dst: VReg, addr: AddrZ };
1013             /// Load the 64-bit source as u8x8 and zero-extend to i16x8.
1014             vload8x8_u_z = VLoad8x8UZ { dst: VReg, addr: AddrZ };
1015             /// Load the 64-bit source as i16x4 and sign-extend to i32x4.
1016             vload16x4le_s_z = VLoad16x4LeSZ { dst: VReg, addr: AddrZ };
1017             /// Load the 64-bit source as u16x4 and zero-extend to i32x4.
1018             vload16x4le_u_z = VLoad16x4LeUZ { dst: VReg, addr: AddrZ };
1019             /// Load the 64-bit source as i32x2 and sign-extend to i64x2.
1020             vload32x2le_s_z = VLoad32x2LeSZ { dst: VReg, addr: AddrZ };
1021             /// Load the 64-bit source as u32x2 and zero-extend to i64x2.
1022             vload32x2le_u_z = VLoad32x2LeUZ { dst: VReg, addr: AddrZ };
1023 
1024             /// `dst = src1 & src2`
1025             vband128 = VBand128 { operands: BinaryOperands<VReg> };
1026             /// `dst = src1 | src2`
1027             vbor128 = VBor128 { operands: BinaryOperands<VReg> };
1028             /// `dst = src1 ^ src2`
1029             vbxor128 = VBxor128 { operands: BinaryOperands<VReg> };
1030             /// `dst = !src1`
1031             vbnot128 = VBnot128 { dst: VReg, src: VReg };
1032             /// `dst = (c & x) | (!c & y)`
1033             vbitselect128 = VBitselect128 { dst: VReg, c: VReg, x: VReg, y: VReg };
1034             /// Collect high bits of each lane into the low 32-bits of the
1035             /// destination.
1036             vbitmask8x16 = Vbitmask8x16 { dst: XReg, src: VReg };
1037             /// Collect high bits of each lane into the low 32-bits of the
1038             /// destination.
1039             vbitmask16x8 = Vbitmask16x8 { dst: XReg, src: VReg };
1040             /// Collect high bits of each lane into the low 32-bits of the
1041             /// destination.
1042             vbitmask32x4 = Vbitmask32x4 { dst: XReg, src: VReg };
1043             /// Collect high bits of each lane into the low 32-bits of the
1044             /// destination.
1045             vbitmask64x2 = Vbitmask64x2 { dst: XReg, src: VReg };
1046             /// Store whether all lanes are nonzero in `dst`.
1047             valltrue8x16 = Valltrue8x16 { dst: XReg, src: VReg };
1048             /// Store whether all lanes are nonzero in `dst`.
1049             valltrue16x8 = Valltrue16x8 { dst: XReg, src: VReg };
1050             /// Store whether all lanes are nonzero in `dst`.
1051             valltrue32x4 = Valltrue32x4 { dst: XReg, src: VReg };
1052             /// Store whether any lanes are nonzero in `dst`.
1053             valltrue64x2 = Valltrue64x2 { dst: XReg, src: VReg };
1054             /// Store whether any lanes are nonzero in `dst`.
1055             vanytrue8x16 = Vanytrue8x16 { dst: XReg, src: VReg };
1056             /// Store whether any lanes are nonzero in `dst`.
1057             vanytrue16x8 = Vanytrue16x8 { dst: XReg, src: VReg };
1058             /// Store whether any lanes are nonzero in `dst`.
1059             vanytrue32x4 = Vanytrue32x4 { dst: XReg, src: VReg };
1060             /// Store whether any lanes are nonzero in `dst`.
1061             vanytrue64x2 = Vanytrue64x2 { dst: XReg, src: VReg };
1062 
1063             /// Int-to-float conversion (same as `f32_from_x32_s`)
1064             vf32x4_from_i32x4_s = VF32x4FromI32x4S { dst: VReg, src: VReg };
1065             /// Int-to-float conversion (same as `f32_from_x32_u`)
1066             vf32x4_from_i32x4_u = VF32x4FromI32x4U { dst: VReg, src: VReg };
1067             /// Int-to-float conversion (same as `f64_from_x64_s`)
1068             vf64x2_from_i64x2_s = VF64x2FromI64x2S { dst: VReg, src: VReg };
1069             /// Int-to-float conversion (same as `f64_from_x64_u`)
1070             vf64x2_from_i64x2_u = VF64x2FromI64x2U { dst: VReg, src: VReg };
1071             /// Float-to-int conversion (same as `x32_from_f32_s`
1072             vi32x4_from_f32x4_s = VI32x4FromF32x4S { dst: VReg, src: VReg };
1073             /// Float-to-int conversion (same as `x32_from_f32_u`
1074             vi32x4_from_f32x4_u = VI32x4FromF32x4U { dst: VReg, src: VReg };
1075             /// Float-to-int conversion (same as `x64_from_f64_s`
1076             vi64x2_from_f64x2_s = VI64x2FromF64x2S { dst: VReg, src: VReg };
1077             /// Float-to-int conversion (same as `x64_from_f64_u`
1078             vi64x2_from_f64x2_u = VI64x2FromF64x2U { dst: VReg, src: VReg };
1079 
1080             /// Widens the low lanes of the input vector, as signed, to twice
1081             /// the width.
1082             vwidenlow8x16_s = VWidenLow8x16S { dst: VReg, src: VReg };
1083             /// Widens the low lanes of the input vector, as unsigned, to twice
1084             /// the width.
1085             vwidenlow8x16_u = VWidenLow8x16U { dst: VReg, src: VReg };
1086             /// Widens the low lanes of the input vector, as signed, to twice
1087             /// the width.
1088             vwidenlow16x8_s = VWidenLow16x8S { dst: VReg, src: VReg };
1089             /// Widens the low lanes of the input vector, as unsigned, to twice
1090             /// the width.
1091             vwidenlow16x8_u = VWidenLow16x8U { dst: VReg, src: VReg };
1092             /// Widens the low lanes of the input vector, as signed, to twice
1093             /// the width.
1094             vwidenlow32x4_s = VWidenLow32x4S { dst: VReg, src: VReg };
1095             /// Widens the low lanes of the input vector, as unsigned, to twice
1096             /// the width.
1097             vwidenlow32x4_u = VWidenLow32x4U { dst: VReg, src: VReg };
1098             /// Widens the high lanes of the input vector, as signed, to twice
1099             /// the width.
1100             vwidenhigh8x16_s = VWidenHigh8x16S { dst: VReg, src: VReg };
1101             /// Widens the high lanes of the input vector, as unsigned, to twice
1102             /// the width.
1103             vwidenhigh8x16_u = VWidenHigh8x16U { dst: VReg, src: VReg };
1104             /// Widens the high lanes of the input vector, as signed, to twice
1105             /// the width.
1106             vwidenhigh16x8_s = VWidenHigh16x8S { dst: VReg, src: VReg };
1107             /// Widens the high lanes of the input vector, as unsigned, to twice
1108             /// the width.
1109             vwidenhigh16x8_u = VWidenHigh16x8U { dst: VReg, src: VReg };
1110             /// Widens the high lanes of the input vector, as signed, to twice
1111             /// the width.
1112             vwidenhigh32x4_s = VWidenHigh32x4S { dst: VReg, src: VReg };
1113             /// Widens the high lanes of the input vector, as unsigned, to twice
1114             /// the width.
1115             vwidenhigh32x4_u = VWidenHigh32x4U { dst: VReg, src: VReg };
1116 
1117             /// Narrows the two 16x8 vectors, assuming all input lanes are
1118             /// signed, to half the width. Narrowing is signed and saturating.
1119             vnarrow16x8_s = Vnarrow16x8S { operands: BinaryOperands<VReg> };
1120             /// Narrows the two 16x8 vectors, assuming all input lanes are
1121             /// signed, to half the width. Narrowing is unsigned and saturating.
1122             vnarrow16x8_u = Vnarrow16x8U { operands: BinaryOperands<VReg> };
1123             /// Narrows the two 32x4 vectors, assuming all input lanes are
1124             /// signed, to half the width. Narrowing is signed and saturating.
1125             vnarrow32x4_s = Vnarrow32x4S { operands: BinaryOperands<VReg> };
1126             /// Narrows the two 32x4 vectors, assuming all input lanes are
1127             /// signed, to half the width. Narrowing is unsigned and saturating.
1128             vnarrow32x4_u = Vnarrow32x4U { operands: BinaryOperands<VReg> };
1129             /// Narrows the two 64x2 vectors, assuming all input lanes are
1130             /// signed, to half the width. Narrowing is signed and saturating.
1131             vnarrow64x2_s = Vnarrow64x2S { operands: BinaryOperands<VReg> };
1132             /// Narrows the two 64x2 vectors, assuming all input lanes are
1133             /// signed, to half the width. Narrowing is unsigned and saturating.
1134             vnarrow64x2_u = Vnarrow64x2U { operands: BinaryOperands<VReg> };
1135             /// Narrows the two 64x2 vectors, assuming all input lanes are
1136             /// unsigned, to half the width. Narrowing is unsigned and saturating.
1137             vunarrow64x2_u = Vunarrow64x2U { operands: BinaryOperands<VReg> };
1138             /// Promotes the low two lanes of the f32x4 input to f64x2.
1139             vfpromotelow = VFpromoteLow { dst: VReg, src: VReg };
1140             /// Demotes the two f64x2 lanes to f32x2 and then extends with two
1141             /// more zero lanes.
1142             vfdemote = VFdemote { dst: VReg, src: VReg };
1143 
1144             /// `dst = src1 - src2`
1145             vsubi8x16 = VSubI8x16 { operands: BinaryOperands<VReg> };
1146             /// `dst = src1 - src2`
1147             vsubi16x8 = VSubI16x8 { operands: BinaryOperands<VReg> };
1148             /// `dst = src1 - src2`
1149             vsubi32x4 = VSubI32x4 { operands: BinaryOperands<VReg> };
1150             /// `dst = src1 - src2`
1151             vsubi64x2 = VSubI64x2 { operands: BinaryOperands<VReg> };
1152             /// `dst = src1 - src2`
1153             vsubf64x2 = VSubF64x2 { operands: BinaryOperands<VReg> };
1154 
1155             /// `dst = saturating_sub(src1, src2)`
1156             vsubi8x16_sat = VSubI8x16Sat { operands: BinaryOperands<VReg> };
1157             /// `dst = saturating_sub(src1, src2)`
1158             vsubu8x16_sat = VSubU8x16Sat { operands: BinaryOperands<VReg> };
1159             /// `dst = saturating_sub(src1, src2)`
1160             vsubi16x8_sat = VSubI16x8Sat { operands: BinaryOperands<VReg> };
1161             /// `dst = saturating_sub(src1, src2)`
1162             vsubu16x8_sat = VSubU16x8Sat { operands: BinaryOperands<VReg> };
1163 
1164             /// `dst = src1 * src2`
1165             vmuli8x16 = VMulI8x16 { operands: BinaryOperands<VReg> };
1166             /// `dst = src1 * src2`
1167             vmuli16x8 = VMulI16x8 { operands: BinaryOperands<VReg> };
1168             /// `dst = src1 * src2`
1169             vmuli32x4 = VMulI32x4 { operands: BinaryOperands<VReg> };
1170             /// `dst = src1 * src2`
1171             vmuli64x2 = VMulI64x2 { operands: BinaryOperands<VReg> };
1172             /// `dst = src1 * src2`
1173             vmulf64x2 = VMulF64x2 { operands: BinaryOperands<VReg> };
1174 
1175             /// `dst = signed_saturate(src1 * src2 + (1 << (Q - 1)) >> Q)`
1176             vqmulrsi16x8 = VQmulrsI16x8 { operands: BinaryOperands<VReg> };
1177 
1178             /// `dst = count_ones(src)`
1179             vpopcnt8x16 = VPopcnt8x16 { dst: VReg, src: VReg };
1180 
1181             /// `low32(dst) = zext(src[lane])`
1182             xextractv8x16 = XExtractV8x16 { dst: XReg, src: VReg, lane: u8 };
1183             /// `low32(dst) = zext(src[lane])`
1184             xextractv16x8 = XExtractV16x8 { dst: XReg, src: VReg, lane: u8 };
1185             /// `low32(dst) = src[lane]`
1186             xextractv32x4 = XExtractV32x4 { dst: XReg, src: VReg, lane: u8 };
1187             /// `dst = src[lane]`
1188             xextractv64x2 = XExtractV64x2 { dst: XReg, src: VReg, lane: u8 };
1189             /// `low32(dst) = src[lane]`
1190             fextractv32x4 = FExtractV32x4 { dst: FReg, src: VReg, lane: u8 };
1191             /// `dst = src[lane]`
1192             fextractv64x2 = FExtractV64x2 { dst: FReg, src: VReg, lane: u8 };
1193 
1194             /// `dst = src1; dst[lane] = src2`
1195             vinsertx8 = VInsertX8 { operands: BinaryOperands<VReg, VReg, XReg>, lane: u8 };
1196             /// `dst = src1; dst[lane] = src2`
1197             vinsertx16 = VInsertX16 { operands: BinaryOperands<VReg, VReg, XReg>, lane: u8 };
1198             /// `dst = src1; dst[lane] = src2`
1199             vinsertx32 = VInsertX32 { operands: BinaryOperands<VReg, VReg, XReg>, lane: u8 };
1200             /// `dst = src1; dst[lane] = src2`
1201             vinsertx64 = VInsertX64 { operands: BinaryOperands<VReg, VReg, XReg>, lane: u8 };
1202             /// `dst = src1; dst[lane] = src2`
1203             vinsertf32 = VInsertF32 { operands: BinaryOperands<VReg, VReg, FReg>, lane: u8 };
1204             /// `dst = src1; dst[lane] = src2`
1205             vinsertf64 = VInsertF64 { operands: BinaryOperands<VReg, VReg, FReg>, lane: u8 };
1206 
1207             /// `dst = src == dst`
1208             veq8x16 = Veq8x16 { operands: BinaryOperands<VReg> };
1209             /// `dst = src != dst`
1210             vneq8x16 = Vneq8x16 { operands: BinaryOperands<VReg> };
1211             /// `dst = src < dst` (signed)
1212             vslt8x16 = Vslt8x16 { operands: BinaryOperands<VReg> };
1213             /// `dst = src <= dst` (signed)
1214             vslteq8x16 = Vslteq8x16 { operands: BinaryOperands<VReg> };
1215             /// `dst = src < dst` (unsigned)
1216             vult8x16 = Vult8x16 { operands: BinaryOperands<VReg> };
1217             /// `dst = src <= dst` (unsigned)
1218             vulteq8x16 = Vulteq8x16 { operands: BinaryOperands<VReg> };
1219             /// `dst = src == dst`
1220             veq16x8 = Veq16x8 { operands: BinaryOperands<VReg> };
1221             /// `dst = src != dst`
1222             vneq16x8 = Vneq16x8 { operands: BinaryOperands<VReg> };
1223             /// `dst = src < dst` (signed)
1224             vslt16x8 = Vslt16x8 { operands: BinaryOperands<VReg> };
1225             /// `dst = src <= dst` (signed)
1226             vslteq16x8 = Vslteq16x8 { operands: BinaryOperands<VReg> };
1227             /// `dst = src < dst` (unsigned)
1228             vult16x8 = Vult16x8 { operands: BinaryOperands<VReg> };
1229             /// `dst = src <= dst` (unsigned)
1230             vulteq16x8 = Vulteq16x8 { operands: BinaryOperands<VReg> };
1231             /// `dst = src == dst`
1232             veq32x4 = Veq32x4 { operands: BinaryOperands<VReg> };
1233             /// `dst = src != dst`
1234             vneq32x4 = Vneq32x4 { operands: BinaryOperands<VReg> };
1235             /// `dst = src < dst` (signed)
1236             vslt32x4 = Vslt32x4 { operands: BinaryOperands<VReg> };
1237             /// `dst = src <= dst` (signed)
1238             vslteq32x4 = Vslteq32x4 { operands: BinaryOperands<VReg> };
1239             /// `dst = src < dst` (unsigned)
1240             vult32x4 = Vult32x4 { operands: BinaryOperands<VReg> };
1241             /// `dst = src <= dst` (unsigned)
1242             vulteq32x4 = Vulteq32x4 { operands: BinaryOperands<VReg> };
1243             /// `dst = src == dst`
1244             veq64x2 = Veq64x2 { operands: BinaryOperands<VReg> };
1245             /// `dst = src != dst`
1246             vneq64x2 = Vneq64x2 { operands: BinaryOperands<VReg> };
1247             /// `dst = src < dst` (signed)
1248             vslt64x2 = Vslt64x2 { operands: BinaryOperands<VReg> };
1249             /// `dst = src <= dst` (signed)
1250             vslteq64x2 = Vslteq64x2 { operands: BinaryOperands<VReg> };
1251             /// `dst = src < dst` (unsigned)
1252             vult64x2 = Vult64x2 { operands: BinaryOperands<VReg> };
1253             /// `dst = src <= dst` (unsigned)
1254             vulteq64x2 = Vulteq64x2 { operands: BinaryOperands<VReg> };
1255 
1256             /// `dst = -src`
1257             vneg8x16 = Vneg8x16 { dst: VReg, src: VReg };
1258             /// `dst = -src`
1259             vneg16x8 = Vneg16x8 { dst: VReg, src: VReg };
1260             /// `dst = -src`
1261             vneg32x4 = Vneg32x4 { dst: VReg, src: VReg };
1262             /// `dst = -src`
1263             vneg64x2 = Vneg64x2 { dst: VReg, src: VReg };
1264             /// `dst = -src`
1265             vnegf64x2 = VnegF64x2 { dst: VReg, src: VReg };
1266 
1267             /// `dst = min(src1, src2)` (signed)
1268             vmin8x16_s = Vmin8x16S { operands: BinaryOperands<VReg> };
1269             /// `dst = min(src1, src2)` (unsigned)
1270             vmin8x16_u = Vmin8x16U { operands: BinaryOperands<VReg> };
1271             /// `dst = min(src1, src2)` (signed)
1272             vmin16x8_s = Vmin16x8S { operands: BinaryOperands<VReg> };
1273             /// `dst = min(src1, src2)` (unsigned)
1274             vmin16x8_u = Vmin16x8U { operands: BinaryOperands<VReg> };
1275             /// `dst = max(src1, src2)` (signed)
1276             vmax8x16_s = Vmax8x16S { operands: BinaryOperands<VReg> };
1277             /// `dst = max(src1, src2)` (unsigned)
1278             vmax8x16_u = Vmax8x16U { operands: BinaryOperands<VReg> };
1279             /// `dst = max(src1, src2)` (signed)
1280             vmax16x8_s = Vmax16x8S { operands: BinaryOperands<VReg> };
1281             /// `dst = max(src1, src2)` (unsigned)
1282             vmax16x8_u = Vmax16x8U { operands: BinaryOperands<VReg> };
1283 
1284             /// `dst = min(src1, src2)` (signed)
1285             vmin32x4_s = Vmin32x4S { operands: BinaryOperands<VReg> };
1286             /// `dst = min(src1, src2)` (unsigned)
1287             vmin32x4_u = Vmin32x4U { operands: BinaryOperands<VReg> };
1288             /// `dst = max(src1, src2)` (signed)
1289             vmax32x4_s = Vmax32x4S { operands: BinaryOperands<VReg> };
1290             /// `dst = max(src1, src2)` (unsigned)
1291             vmax32x4_u = Vmax32x4U { operands: BinaryOperands<VReg> };
1292 
1293             /// `dst = |src|`
1294             vabs8x16 = Vabs8x16 { dst: VReg, src: VReg };
1295             /// `dst = |src|`
1296             vabs16x8 = Vabs16x8 { dst: VReg, src: VReg };
1297             /// `dst = |src|`
1298             vabs32x4 = Vabs32x4 { dst: VReg, src: VReg };
1299             /// `dst = |src|`
1300             vabs64x2 = Vabs64x2 { dst: VReg, src: VReg };
1301 
1302             /// `dst = |src|`
1303             vabsf32x4 = Vabsf32x4 { dst: VReg, src: VReg };
1304             /// `dst = |src|`
1305             vabsf64x2 = Vabsf64x2 { dst: VReg, src: VReg };
1306             /// `dst = ieee_maximum(src1, src2)`
1307             vmaximumf32x4 = Vmaximumf32x4 { operands: BinaryOperands<VReg> };
1308             /// `dst = ieee_maximum(src1, src2)`
1309             vmaximumf64x2 = Vmaximumf64x2 { operands: BinaryOperands<VReg> };
1310             /// `dst = ieee_minimum(src1, src2)`
1311             vminimumf32x4 = Vminimumf32x4 { operands: BinaryOperands<VReg> };
1312             /// `dst = ieee_minimum(src1, src2)`
1313             vminimumf64x2 = Vminimumf64x2 { operands: BinaryOperands<VReg> };
1314 
1315             /// `dst = shuffle(src1, src2, mask)`
1316             vshuffle = VShuffle { dst: VReg, src1: VReg, src2: VReg, mask: u128 };
1317 
1318             /// `dst = swizzle(src1, src2)`
1319             vswizzlei8x16 = Vswizzlei8x16 { operands: BinaryOperands<VReg> };
1320 
1321             /// `dst = (src1 + src2 + 1) // 2`
1322             vavground8x16 = Vavground8x16 { operands: BinaryOperands<VReg> };
1323             /// `dst = (src1 + src2 + 1) // 2`
1324             vavground16x8 = Vavground16x8 { operands: BinaryOperands<VReg> };
1325 
1326             /// `dst = src == dst`
1327             veqf32x4 = VeqF32x4 { operands: BinaryOperands<VReg> };
1328             /// `dst = src != dst`
1329             vneqf32x4 = VneqF32x4 { operands: BinaryOperands<VReg> };
1330             /// `dst = src < dst`
1331             vltf32x4 = VltF32x4 { operands: BinaryOperands<VReg> };
1332             /// `dst = src <= dst`
1333             vlteqf32x4 = VlteqF32x4 { operands: BinaryOperands<VReg> };
1334             /// `dst = src == dst`
1335             veqf64x2 = VeqF64x2 { operands: BinaryOperands<VReg> };
1336             /// `dst = src != dst`
1337             vneqf64x2 = VneqF64x2 { operands: BinaryOperands<VReg> };
1338             /// `dst = src < dst`
1339             vltf64x2 = VltF64x2 { operands: BinaryOperands<VReg> };
1340             /// `dst = src <= dst`
1341             vlteqf64x2 = VlteqF64x2 { operands: BinaryOperands<VReg> };
1342 
1343             /// `dst = ieee_fma(a, b, c)`
1344             vfma32x4 = Vfma32x4 { dst: VReg, a: VReg, b: VReg, c: VReg };
1345             /// `dst = ieee_fma(a, b, c)`
1346             vfma64x2 = Vfma64x2 { dst: VReg, a: VReg, b: VReg, c: VReg };
1347 
1348             /// `dst = low32(cond) ? if_nonzero : if_zero`
1349             vselect = Vselect { dst: VReg, cond: XReg, if_nonzero: VReg, if_zero: VReg };
1350 
1351             /// `dst_hi:dst_lo = lhs_hi:lhs_lo + rhs_hi:rhs_lo`
1352             xadd128 = Xadd128 {
1353                 dst_lo: XReg,
1354                 dst_hi: XReg,
1355                 lhs_lo: XReg,
1356                 lhs_hi: XReg,
1357                 rhs_lo: XReg,
1358                 rhs_hi: XReg
1359             };
1360             /// `dst_hi:dst_lo = lhs_hi:lhs_lo - rhs_hi:rhs_lo`
1361             xsub128 = Xsub128 {
1362                 dst_lo: XReg,
1363                 dst_hi: XReg,
1364                 lhs_lo: XReg,
1365                 lhs_hi: XReg,
1366                 rhs_lo: XReg,
1367                 rhs_hi: XReg
1368             };
1369             /// `dst_hi:dst_lo = sext(lhs) * sext(rhs)`
1370             xwidemul64_s = Xwidemul64S {
1371                 dst_lo: XReg,
1372                 dst_hi: XReg,
1373                 lhs: XReg,
1374                 rhs: XReg
1375             };
1376             /// `dst_hi:dst_lo = zext(lhs) * zext(rhs)`
1377             xwidemul64_u = Xwidemul64U {
1378                 dst_lo: XReg,
1379                 dst_hi: XReg,
1380                 lhs: XReg,
1381                 rhs: XReg
1382             };
1383         }
1384     };
1385 }
1386 
1387 #[cfg(feature = "decode")]
1388 pub mod decode;
1389 #[cfg(feature = "disas")]
1390 pub mod disas;
1391 #[cfg(feature = "encode")]
1392 pub mod encode;
1393 #[cfg(feature = "interp")]
1394 pub mod interp;
1395 #[cfg(all(feature = "profile", feature = "interp"))]
1396 pub mod profile;
1397 #[cfg(all(not(feature = "profile"), feature = "interp"))]
1398 mod profile_disabled;
1399 #[cfg(all(not(feature = "profile"), feature = "interp"))]
1400 use profile_disabled as profile;
1401 
1402 pub mod regs;
1403 pub use regs::*;
1404 
1405 pub mod imms;
1406 pub use imms::*;
1407 
1408 pub mod op;
1409 pub use op::*;
1410 
1411 pub mod opcode;
1412 pub use opcode::*;
1413 
1414 #[cfg(feature = "decode")]
unreachable_unchecked() -> !1415 pub(crate) unsafe fn unreachable_unchecked() -> ! {
1416     #[cfg(debug_assertions)]
1417     unreachable!();
1418 
1419     #[cfg(not(debug_assertions))]
1420     unsafe {
1421         core::hint::unreachable_unchecked()
1422     }
1423 }
1424