xref: /wasmtime-44.0.1/winch/codegen/src/masm.rs (revision 39e910be)
1 use crate::Result;
2 use crate::abi::{self, LocalSlot, align_to};
3 use crate::codegen::{CodeGenContext, Emission, FuncEnv};
4 use crate::isa::{
5     CallingConvention,
6     reg::{Reg, RegClass, WritableReg, writable},
7 };
8 use cranelift_codegen::{
9     Final, MachBufferFinalized, MachLabel,
10     binemit::CodeOffset,
11     ir::{Endianness, MemFlags, RelSourceLoc, SourceLoc, UserExternalNameRef},
12 };
13 use std::{fmt::Debug, ops::Range};
14 use wasmtime_environ::{PtrSize, WasmHeapType, WasmRefType, WasmValType};
15 
16 pub(crate) use cranelift_codegen::ir::TrapCode;
17 
18 #[derive(Eq, PartialEq)]
19 pub(crate) enum DivKind {
20     /// Signed division.
21     Signed,
22     /// Unsigned division.
23     Unsigned,
24 }
25 
26 /// Represents the `memory.atomic.wait*` kind.
27 #[derive(Debug, Clone, Copy)]
28 pub(crate) enum AtomicWaitKind {
29     Wait32,
30     Wait64,
31 }
32 
33 /// Remainder kind.
34 #[derive(Copy, Clone)]
35 pub(crate) enum RemKind {
36     /// Signed remainder.
37     Signed,
38     /// Unsigned remainder.
39     Unsigned,
40 }
41 
42 impl RemKind {
is_signed(&self) -> bool43     pub fn is_signed(&self) -> bool {
44         matches!(self, Self::Signed)
45     }
46 }
47 
48 /// Kinds of vector min operation supported by WebAssembly.
49 pub(crate) enum V128MinKind {
50     /// 4 lanes of 32-bit floats.
51     F32x4,
52     /// 2 lanes of 64-bit floats.
53     F64x2,
54     /// 16 lanes of signed 8-bit integers.
55     I8x16S,
56     /// 16 lanes of unsigned 8-bit integers.
57     I8x16U,
58     /// 8 lanes of signed 16-bit integers.
59     I16x8S,
60     /// 8 lanes of unsigned 16-bit integers.
61     I16x8U,
62     /// 4 lanes of signed 32-bit integers.
63     I32x4S,
64     /// 4 lanes of unsigned 32-bit integers.
65     I32x4U,
66 }
67 
68 impl V128MinKind {
69     /// The size of each lane.
lane_size(&self) -> OperandSize70     pub(crate) fn lane_size(&self) -> OperandSize {
71         match self {
72             Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
73             Self::F64x2 => OperandSize::S64,
74             Self::I8x16S | Self::I8x16U => OperandSize::S8,
75             Self::I16x8S | Self::I16x8U => OperandSize::S16,
76         }
77     }
78 }
79 
80 /// Kinds of vector max operation supported by WebAssembly.
81 pub(crate) enum V128MaxKind {
82     /// 4 lanes of 32-bit floats.
83     F32x4,
84     /// 2 lanes of 64-bit floats.
85     F64x2,
86     /// 16 lanes of signed 8-bit integers.
87     I8x16S,
88     /// 16 lanes of unsigned 8-bit integers.
89     I8x16U,
90     /// 8 lanes of signed 16-bit integers.
91     I16x8S,
92     /// 8 lanes of unsigned 16-bit integers.
93     I16x8U,
94     /// 4 lanes of signed 32-bit integers.
95     I32x4S,
96     /// 4 lanes of unsigned 32-bit integers.
97     I32x4U,
98 }
99 
100 impl V128MaxKind {
101     /// The size of each lane.
lane_size(&self) -> OperandSize102     pub(crate) fn lane_size(&self) -> OperandSize {
103         match self {
104             Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
105             Self::F64x2 => OperandSize::S64,
106             Self::I8x16S | Self::I8x16U => OperandSize::S8,
107             Self::I16x8S | Self::I16x8U => OperandSize::S16,
108         }
109     }
110 }
111 
112 #[derive(Eq, PartialEq)]
113 pub(crate) enum MulWideKind {
114     Signed,
115     Unsigned,
116 }
117 
118 /// Type of operation for a read-modify-write instruction.
119 pub(crate) enum RmwOp {
120     Add,
121     Sub,
122     Xchg,
123     And,
124     Or,
125     Xor,
126 }
127 
128 /// The direction to perform the memory move.
129 #[derive(Debug, Clone, Eq, PartialEq)]
130 pub(crate) enum MemMoveDirection {
131     /// From high memory addresses to low memory addresses.
132     /// Invariant: the source location is closer to the FP than the destination
133     /// location, which will be closer to the SP.
134     HighToLow,
135     /// From low memory addresses to high memory addresses.
136     /// Invariant: the source location is closer to the SP than the destination
137     /// location, which will be closer to the FP.
138     LowToHigh,
139 }
140 
141 /// Classifies how to treat float-to-int conversions.
142 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
143 pub(crate) enum TruncKind {
144     /// Saturating conversion. If the source value is greater than the maximum
145     /// value of the destination type, the result is clamped to the
146     /// destination maximum value.
147     Checked,
148     /// An exception is raised if the source value is greater than the maximum
149     /// value of the destination type.
150     Unchecked,
151 }
152 
153 impl TruncKind {
154     /// Returns true if the truncation kind is checked.
is_checked(&self) -> bool155     pub(crate) fn is_checked(&self) -> bool {
156         *self == TruncKind::Checked
157     }
158 
159     /// Returns `true` if the trunc kind is [`Unchecked`].
160     ///
161     /// [`Unchecked`]: TruncKind::Unchecked
162     #[must_use]
is_unchecked(&self) -> bool163     pub(crate) fn is_unchecked(&self) -> bool {
164         matches!(self, Self::Unchecked)
165     }
166 }
167 
168 /// Representation of the stack pointer offset.
169 #[derive(Copy, Clone, Eq, PartialEq, Debug, PartialOrd, Ord, Default)]
170 pub struct SPOffset(u32);
171 
172 impl SPOffset {
from_u32(offs: u32) -> Self173     pub fn from_u32(offs: u32) -> Self {
174         Self(offs)
175     }
176 
as_u32(&self) -> u32177     pub fn as_u32(&self) -> u32 {
178         self.0
179     }
180 }
181 
182 /// A stack slot.
183 #[derive(Debug, Clone, Copy, Eq, PartialEq)]
184 pub struct StackSlot {
185     /// The location of the slot, relative to the stack pointer.
186     pub offset: SPOffset,
187     /// The size of the slot, in bytes.
188     pub size: u32,
189 }
190 
191 impl StackSlot {
new(offs: SPOffset, size: u32) -> Self192     pub fn new(offs: SPOffset, size: u32) -> Self {
193         Self { offset: offs, size }
194     }
195 }
196 
197 pub trait ScratchType {
198     /// Derive the register class from the scratch register type.
reg_class() -> RegClass199     fn reg_class() -> RegClass;
200 }
201 
202 /// A scratch register type of integer class.
203 pub struct IntScratch;
204 /// A scratch register type of floating point class.
205 pub struct FloatScratch;
206 
207 impl ScratchType for IntScratch {
reg_class() -> RegClass208     fn reg_class() -> RegClass {
209         RegClass::Int
210     }
211 }
212 
213 impl ScratchType for FloatScratch {
reg_class() -> RegClass214     fn reg_class() -> RegClass {
215         RegClass::Float
216     }
217 }
218 
219 /// A scratch register scope.
220 #[derive(Debug, Clone, Copy)]
221 pub struct Scratch(Reg);
222 
223 impl Scratch {
new(r: Reg) -> Self224     pub fn new(r: Reg) -> Self {
225         Self(r)
226     }
227 
228     #[inline]
inner(&self) -> Reg229     pub fn inner(&self) -> Reg {
230         self.0
231     }
232 
233     #[inline]
writable(&self) -> WritableReg234     pub fn writable(&self) -> WritableReg {
235         writable!(self.0)
236     }
237 }
238 
239 /// Kinds of integer binary comparison in WebAssembly. The [`MacroAssembler`]
240 /// implementation for each ISA is responsible for emitting the correct
241 /// sequence of instructions when lowering to machine code.
242 #[derive(Debug, Clone, Copy, Eq, PartialEq)]
243 pub(crate) enum IntCmpKind {
244     /// Equal.
245     Eq,
246     /// Not equal.
247     Ne,
248     /// Signed less than.
249     LtS,
250     /// Unsigned less than.
251     LtU,
252     /// Signed greater than.
253     GtS,
254     /// Unsigned greater than.
255     GtU,
256     /// Signed less than or equal.
257     LeS,
258     /// Unsigned less than or equal.
259     LeU,
260     /// Signed greater than or equal.
261     GeS,
262     /// Unsigned greater than or equal.
263     GeU,
264 }
265 
266 /// Kinds of float binary comparison in WebAssembly. The [`MacroAssembler`]
267 /// implementation for each ISA is responsible for emitting the correct
268 /// sequence of instructions when lowering code.
269 #[derive(Debug)]
270 pub(crate) enum FloatCmpKind {
271     /// Equal.
272     Eq,
273     /// Not equal.
274     Ne,
275     /// Less than.
276     Lt,
277     /// Greater than.
278     Gt,
279     /// Less than or equal.
280     Le,
281     /// Greater than or equal.
282     Ge,
283 }
284 
285 /// Kinds of shifts in WebAssembly.The [`masm`] implementation for each ISA is
286 /// responsible for emitting the correct sequence of instructions when
287 /// lowering to machine code.
288 #[derive(Debug, Clone, Copy, Eq, PartialEq)]
289 pub(crate) enum ShiftKind {
290     /// Left shift.
291     Shl,
292     /// Signed right shift.
293     ShrS,
294     /// Unsigned right shift.
295     ShrU,
296     /// Left rotate.
297     Rotl,
298     /// Right rotate.
299     Rotr,
300 }
301 
302 /// Kinds of extends in WebAssembly. Each MacroAssembler implementation
303 /// is responsible for emitting the correct sequence of instructions when
304 /// lowering to machine code.
305 #[derive(Copy, Clone)]
306 pub(crate) enum ExtendKind {
307     Signed(Extend<Signed>),
308     Unsigned(Extend<Zero>),
309 }
310 
311 #[derive(Copy, Clone)]
312 pub(crate) enum Signed {}
313 #[derive(Copy, Clone)]
314 pub(crate) enum Zero {}
315 
316 pub(crate) trait ExtendType {}
317 
318 impl ExtendType for Signed {}
319 impl ExtendType for Zero {}
320 
321 #[derive(Copy, Clone)]
322 pub(crate) enum Extend<T: ExtendType> {
323     /// 8 to 32 bit extend.
324     I32Extend8,
325     /// 16 to 32 bit extend.
326     I32Extend16,
327     /// 8 to 64 bit extend.
328     I64Extend8,
329     /// 16 to 64 bit extend.
330     I64Extend16,
331     /// 32 to 64 bit extend.
332     I64Extend32,
333 
334     /// Variant to hold the kind of extend marker.
335     ///
336     /// This is `Signed` or `Zero`, that are empty enums, which means that this variant cannot be
337     /// constructed.
338     __Kind(T),
339 }
340 
341 impl From<Extend<Zero>> for ExtendKind {
from(value: Extend<Zero>) -> Self342     fn from(value: Extend<Zero>) -> Self {
343         ExtendKind::Unsigned(value)
344     }
345 }
346 
347 impl<T: ExtendType> Extend<T> {
from_size(&self) -> OperandSize348     pub fn from_size(&self) -> OperandSize {
349         match self {
350             Extend::I32Extend8 | Extend::I64Extend8 => OperandSize::S8,
351             Extend::I32Extend16 | Extend::I64Extend16 => OperandSize::S16,
352             Extend::I64Extend32 => OperandSize::S32,
353             Extend::__Kind(_) => unreachable!(),
354         }
355     }
356 
to_size(&self) -> OperandSize357     pub fn to_size(&self) -> OperandSize {
358         match self {
359             Extend::I32Extend8 | Extend::I32Extend16 => OperandSize::S32,
360             Extend::I64Extend8 | Extend::I64Extend16 | Extend::I64Extend32 => OperandSize::S64,
361             Extend::__Kind(_) => unreachable!(),
362         }
363     }
364 
from_bits(&self) -> u8365     pub fn from_bits(&self) -> u8 {
366         self.from_size().num_bits()
367     }
368 
to_bits(&self) -> u8369     pub fn to_bits(&self) -> u8 {
370         self.to_size().num_bits()
371     }
372 }
373 
374 impl From<Extend<Signed>> for ExtendKind {
from(value: Extend<Signed>) -> Self375     fn from(value: Extend<Signed>) -> Self {
376         ExtendKind::Signed(value)
377     }
378 }
379 
380 impl ExtendKind {
signed(&self) -> bool381     pub fn signed(&self) -> bool {
382         match self {
383             Self::Signed(_) => true,
384             _ => false,
385         }
386     }
387 
from_bits(&self) -> u8388     pub fn from_bits(&self) -> u8 {
389         match self {
390             Self::Signed(s) => s.from_bits(),
391             Self::Unsigned(u) => u.from_bits(),
392         }
393     }
394 
to_bits(&self) -> u8395     pub fn to_bits(&self) -> u8 {
396         match self {
397             Self::Signed(s) => s.to_bits(),
398             Self::Unsigned(u) => u.to_bits(),
399         }
400     }
401 }
402 
403 /// Kinds of vector load and extends in WebAssembly. Each MacroAssembler
404 /// implementation is responsible for emitting the correct sequence of
405 /// instructions when lowering to machine code.
406 #[derive(Copy, Clone)]
407 pub(crate) enum V128LoadExtendKind {
408     /// Sign extends eight 8 bit integers to eight 16 bit lanes.
409     E8x8S,
410     /// Zero extends eight 8 bit integers to eight 16 bit lanes.
411     E8x8U,
412     /// Sign extends four 16 bit integers to four 32 bit lanes.
413     E16x4S,
414     /// Zero extends four 16 bit integers to four 32 bit lanes.
415     E16x4U,
416     /// Sign extends two 32 bit integers to two 64 bit lanes.
417     E32x2S,
418     /// Zero extends two 32 bit integers to two 64 bit lanes.
419     E32x2U,
420 }
421 
422 /// Kinds of splat loads supported by WebAssembly.
423 pub(crate) enum SplatLoadKind {
424     /// 8 bits.
425     S8,
426     /// 16 bits.
427     S16,
428     /// 32 bits.
429     S32,
430     /// 64 bits.
431     S64,
432 }
433 
434 /// Kinds of splat supported by WebAssembly.
435 #[derive(Copy, Debug, Clone, Eq, PartialEq)]
436 pub(crate) enum SplatKind {
437     /// 8 bit integer.
438     I8x16,
439     /// 16 bit integer.
440     I16x8,
441     /// 32 bit integer.
442     I32x4,
443     /// 64 bit integer.
444     I64x2,
445     /// 32 bit float.
446     F32x4,
447     /// 64 bit float.
448     F64x2,
449 }
450 
451 impl SplatKind {
452     /// The lane size to use for different kinds of splats.
lane_size(&self) -> OperandSize453     pub(crate) fn lane_size(&self) -> OperandSize {
454         match self {
455             SplatKind::I8x16 => OperandSize::S8,
456             SplatKind::I16x8 => OperandSize::S16,
457             SplatKind::I32x4 | SplatKind::F32x4 => OperandSize::S32,
458             SplatKind::I64x2 | SplatKind::F64x2 => OperandSize::S64,
459         }
460     }
461 }
462 
463 /// Kinds of extract lane supported by WebAssembly.
464 #[derive(Copy, Debug, Clone, Eq, PartialEq)]
465 pub(crate) enum ExtractLaneKind {
466     /// 16 lanes of 8-bit integers sign extended to 32-bits.
467     I8x16S,
468     /// 16 lanes of 8-bit integers zero extended to 32-bits.
469     I8x16U,
470     /// 8 lanes of 16-bit integers sign extended to 32-bits.
471     I16x8S,
472     /// 8 lanes of 16-bit integers zero extended to 32-bits.
473     I16x8U,
474     /// 4 lanes of 32-bit integers.
475     I32x4,
476     /// 2 lanes of 64-bit integers.
477     I64x2,
478     /// 4 lanes of 32-bit floats.
479     F32x4,
480     /// 2 lanes of 64-bit floats.
481     F64x2,
482 }
483 
484 impl ExtractLaneKind {
485     /// The lane size to use for different kinds of extract lane kinds.
lane_size(&self) -> OperandSize486     pub(crate) fn lane_size(&self) -> OperandSize {
487         match self {
488             ExtractLaneKind::I8x16S | ExtractLaneKind::I8x16U => OperandSize::S8,
489             ExtractLaneKind::I16x8S | ExtractLaneKind::I16x8U => OperandSize::S16,
490             ExtractLaneKind::I32x4 | ExtractLaneKind::F32x4 => OperandSize::S32,
491             ExtractLaneKind::I64x2 | ExtractLaneKind::F64x2 => OperandSize::S64,
492         }
493     }
494 }
495 
496 impl From<ExtractLaneKind> for Extend<Signed> {
from(value: ExtractLaneKind) -> Self497     fn from(value: ExtractLaneKind) -> Self {
498         match value {
499             ExtractLaneKind::I8x16S => Extend::I32Extend8,
500             ExtractLaneKind::I16x8S => Extend::I32Extend16,
501             _ => unimplemented!(),
502         }
503     }
504 }
505 
506 /// Kinds of replace lane supported by WebAssembly.
507 pub(crate) enum ReplaceLaneKind {
508     /// 16 lanes of 8 bit integers.
509     I8x16,
510     /// 8 lanes of 16 bit integers.
511     I16x8,
512     /// 4 lanes of 32 bit integers.
513     I32x4,
514     /// 2 lanes of 64 bit integers.
515     I64x2,
516     /// 4 lanes of 32 bit floats.
517     F32x4,
518     /// 2 lanes of 64 bit floats.
519     F64x2,
520 }
521 
522 impl ReplaceLaneKind {
523     /// The lane size to use for different kinds of replace lane kinds.
lane_size(&self) -> OperandSize524     pub(crate) fn lane_size(&self) -> OperandSize {
525         match self {
526             ReplaceLaneKind::I8x16 => OperandSize::S8,
527             ReplaceLaneKind::I16x8 => OperandSize::S16,
528             ReplaceLaneKind::I32x4 => OperandSize::S32,
529             ReplaceLaneKind::I64x2 => OperandSize::S64,
530             ReplaceLaneKind::F32x4 => OperandSize::S32,
531             ReplaceLaneKind::F64x2 => OperandSize::S64,
532         }
533     }
534 }
535 
536 /// Kinds of behavior supported by Wasm loads.
537 pub(crate) enum LoadKind {
538     /// Load the entire bytes of the operand size without any modifications.
539     Operand(OperandSize),
540     /// Atomic load, with optional scalar extend.
541     Atomic(OperandSize, Option<ExtendKind>),
542     /// Duplicate value into vector lanes.
543     Splat(SplatLoadKind),
544     /// Scalar (non-vector) extend.
545     ScalarExtend(ExtendKind),
546     /// Vector extend.
547     VectorExtend(V128LoadExtendKind),
548     /// Load content into select lane.
549     VectorLane(LaneSelector),
550     /// Load a single element into the lowest bits of a vector and initialize
551     /// all other bits to zero.
552     VectorZero(OperandSize),
553 }
554 
555 impl LoadKind {
556     /// Returns the [`OperandSize`] used in the load operation.
derive_operand_size(&self) -> OperandSize557     pub(crate) fn derive_operand_size(&self) -> OperandSize {
558         match self {
559             Self::ScalarExtend(extend) | Self::Atomic(_, Some(extend)) => {
560                 Self::operand_size_for_scalar(extend)
561             }
562             Self::VectorExtend(_) => OperandSize::S64,
563             Self::Splat(kind) => Self::operand_size_for_splat(kind),
564             Self::Operand(size)
565             | Self::Atomic(size, None)
566             | Self::VectorLane(LaneSelector { size, .. })
567             | Self::VectorZero(size) => *size,
568         }
569     }
570 
vector_lane(lane: u8, size: OperandSize) -> Self571     pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
572         Self::VectorLane(LaneSelector { lane, size })
573     }
574 
operand_size_for_scalar(extend_kind: &ExtendKind) -> OperandSize575     fn operand_size_for_scalar(extend_kind: &ExtendKind) -> OperandSize {
576         match extend_kind {
577             ExtendKind::Signed(s) => s.from_size(),
578             ExtendKind::Unsigned(u) => u.from_size(),
579         }
580     }
581 
operand_size_for_splat(kind: &SplatLoadKind) -> OperandSize582     fn operand_size_for_splat(kind: &SplatLoadKind) -> OperandSize {
583         match kind {
584             SplatLoadKind::S8 => OperandSize::S8,
585             SplatLoadKind::S16 => OperandSize::S16,
586             SplatLoadKind::S32 => OperandSize::S32,
587             SplatLoadKind::S64 => OperandSize::S64,
588         }
589     }
590 
is_atomic(&self) -> bool591     pub(crate) fn is_atomic(&self) -> bool {
592         matches!(self, Self::Atomic(_, _))
593     }
594 }
595 
596 /// Kinds of behavior supported by Wasm loads.
597 #[derive(Copy, Clone)]
598 pub enum StoreKind {
599     /// Store the entire bytes of the operand size without any modifications.
600     Operand(OperandSize),
601     /// Store the entire bytes of the operand size without any modifications, atomically.
602     Atomic(OperandSize),
603     /// Store the content of selected lane.
604     VectorLane(LaneSelector),
605 }
606 
607 impl StoreKind {
vector_lane(lane: u8, size: OperandSize) -> Self608     pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
609         Self::VectorLane(LaneSelector { lane, size })
610     }
611 }
612 
613 #[derive(Copy, Clone)]
614 pub struct LaneSelector {
615     pub lane: u8,
616     pub size: OperandSize,
617 }
618 
619 /// Types of vector integer to float conversions supported by WebAssembly.
620 pub(crate) enum V128ConvertKind {
621     /// 4 lanes of signed 32-bit integers to 4 lanes of 32-bit floats.
622     I32x4S,
623     /// 4 lanes of unsigned 32-bit integers to 4 lanes of 32-bit floats.
624     I32x4U,
625     /// 4 lanes of signed 32-bit integers to low bits of 2 lanes of 64-bit
626     /// floats.
627     I32x4LowS,
628     /// 4 lanes of unsigned 32-bit integers to low bits of 2 lanes of 64-bit
629     /// floats.
630     I32x4LowU,
631 }
632 
633 impl V128ConvertKind {
src_lane_size(&self) -> OperandSize634     pub(crate) fn src_lane_size(&self) -> OperandSize {
635         match self {
636             V128ConvertKind::I32x4S
637             | V128ConvertKind::I32x4U
638             | V128ConvertKind::I32x4LowS
639             | V128ConvertKind::I32x4LowU => OperandSize::S32,
640         }
641     }
642 
dst_lane_size(&self) -> OperandSize643     pub(crate) fn dst_lane_size(&self) -> OperandSize {
644         match self {
645             V128ConvertKind::I32x4S | V128ConvertKind::I32x4U => OperandSize::S32,
646             V128ConvertKind::I32x4LowS | V128ConvertKind::I32x4LowU => OperandSize::S64,
647         }
648     }
649 }
650 
651 /// Kinds of vector narrowing operations supported by WebAssembly.
652 pub(crate) enum V128NarrowKind {
653     /// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
654     /// signed saturation.
655     I16x8S,
656     /// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
657     /// unsigned saturation.
658     I16x8U,
659     /// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
660     /// signed saturation.
661     I32x4S,
662     /// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
663     /// unsigned saturation.
664     I32x4U,
665 }
666 
667 impl V128NarrowKind {
668     /// Return the size of the destination lanes.
dst_lane_size(&self) -> OperandSize669     pub(crate) fn dst_lane_size(&self) -> OperandSize {
670         match self {
671             Self::I16x8S | Self::I16x8U => OperandSize::S8,
672             Self::I32x4S | Self::I32x4U => OperandSize::S16,
673         }
674     }
675 }
676 
677 /// Kinds of vector extending operations supported by WebAssembly.
678 #[derive(Debug, Copy, Clone)]
679 pub(crate) enum V128ExtendKind {
680     /// Low half of i8x16 sign extended.
681     LowI8x16S,
682     /// High half of i8x16 sign extended.
683     HighI8x16S,
684     /// Low half of i8x16 zero extended.
685     LowI8x16U,
686     /// High half of i8x16 zero extended.
687     HighI8x16U,
688     /// Low half of i16x8 sign extended.
689     LowI16x8S,
690     /// High half of i16x8 sign extended.
691     HighI16x8S,
692     /// Low half of i16x8 zero extended.
693     LowI16x8U,
694     /// High half of i16x8 zero extended.
695     HighI16x8U,
696     /// Low half of i32x4 sign extended.
697     LowI32x4S,
698     /// High half of i32x4 sign extended.
699     HighI32x4S,
700     /// Low half of i32x4 zero extended.
701     LowI32x4U,
702     /// High half of i32x4 zero extended.
703     HighI32x4U,
704 }
705 
706 impl V128ExtendKind {
707     /// The size of the source's lanes.
src_lane_size(&self) -> OperandSize708     pub(crate) fn src_lane_size(&self) -> OperandSize {
709         match self {
710             Self::LowI8x16S | Self::LowI8x16U | Self::HighI8x16S | Self::HighI8x16U => {
711                 OperandSize::S8
712             }
713             Self::LowI16x8S | Self::LowI16x8U | Self::HighI16x8S | Self::HighI16x8U => {
714                 OperandSize::S16
715             }
716             Self::LowI32x4S | Self::LowI32x4U | Self::HighI32x4S | Self::HighI32x4U => {
717                 OperandSize::S32
718             }
719         }
720     }
721 }
722 
723 /// Kinds of vector equalities and non-equalities supported by WebAssembly.
724 pub(crate) enum VectorEqualityKind {
725     /// 16 lanes of 8 bit integers.
726     I8x16,
727     /// 8 lanes of 16 bit integers.
728     I16x8,
729     /// 4 lanes of 32 bit integers.
730     I32x4,
731     /// 2 lanes of 64 bit integers.
732     I64x2,
733     /// 4 lanes of 32 bit floats.
734     F32x4,
735     /// 2 lanes of 64 bit floats.
736     F64x2,
737 }
738 
739 impl VectorEqualityKind {
740     /// Get the lane size to use.
lane_size(&self) -> OperandSize741     pub(crate) fn lane_size(&self) -> OperandSize {
742         match self {
743             Self::I8x16 => OperandSize::S8,
744             Self::I16x8 => OperandSize::S16,
745             Self::I32x4 | Self::F32x4 => OperandSize::S32,
746             Self::I64x2 | Self::F64x2 => OperandSize::S64,
747         }
748     }
749 }
750 
751 /// Kinds of vector comparisons supported by WebAssembly.
752 pub(crate) enum VectorCompareKind {
753     /// 16 lanes of signed 8 bit integers.
754     I8x16S,
755     /// 16 lanes of unsigned 8 bit integers.
756     I8x16U,
757     /// 8 lanes of signed 16 bit integers.
758     I16x8S,
759     /// 8 lanes of unsigned 16 bit integers.
760     I16x8U,
761     /// 4 lanes of signed 32 bit integers.
762     I32x4S,
763     /// 4 lanes of unsigned 32 bit integers.
764     I32x4U,
765     /// 2 lanes of signed 64 bit integers.
766     I64x2S,
767     /// 4 lanes of 32 bit floats.
768     F32x4,
769     /// 2 lanes of 64 bit floats.
770     F64x2,
771 }
772 
773 impl VectorCompareKind {
774     /// Get the lane size to use.
lane_size(&self) -> OperandSize775     pub(crate) fn lane_size(&self) -> OperandSize {
776         match self {
777             Self::I8x16S | Self::I8x16U => OperandSize::S8,
778             Self::I16x8S | Self::I16x8U => OperandSize::S16,
779             Self::I32x4S | Self::I32x4U | Self::F32x4 => OperandSize::S32,
780             Self::I64x2S | Self::F64x2 => OperandSize::S64,
781         }
782     }
783 }
784 
785 /// Kinds of vector absolute operations supported by WebAssembly.
786 #[derive(Copy, Debug, Clone, Eq, PartialEq)]
787 pub(crate) enum V128AbsKind {
788     /// 8 bit integers.
789     I8x16,
790     /// 16 bit integers.
791     I16x8,
792     /// 32 bit integers.
793     I32x4,
794     /// 64 bit integers.
795     I64x2,
796     /// 32 bit floats.
797     F32x4,
798     /// 64 bit floats.
799     F64x2,
800 }
801 
802 impl V128AbsKind {
803     /// The lane size to use.
lane_size(&self) -> OperandSize804     pub(crate) fn lane_size(&self) -> OperandSize {
805         match self {
806             Self::I8x16 => OperandSize::S8,
807             Self::I16x8 => OperandSize::S16,
808             Self::I32x4 | Self::F32x4 => OperandSize::S32,
809             Self::I64x2 | Self::F64x2 => OperandSize::S64,
810         }
811     }
812 }
813 
814 /// Kinds of truncation for vectors supported by WebAssembly.
815 pub(crate) enum V128TruncKind {
816     /// Truncates 4 lanes of 32-bit floats to nearest integral value.
817     F32x4,
818     /// Truncates 2 lanes of 64-bit floats to nearest integral value.
819     F64x2,
820     /// Integers from signed F32x4.
821     I32x4FromF32x4S,
822     /// Integers from unsigned F32x4.
823     I32x4FromF32x4U,
824     /// Integers from signed F64x2.
825     I32x4FromF64x2SZero,
826     /// Integers from unsigned F64x2.
827     I32x4FromF64x2UZero,
828 }
829 
830 impl V128TruncKind {
831     /// The size of the source lanes.
src_lane_size(&self) -> OperandSize832     pub(crate) fn src_lane_size(&self) -> OperandSize {
833         match self {
834             V128TruncKind::F32x4
835             | V128TruncKind::I32x4FromF32x4S
836             | V128TruncKind::I32x4FromF32x4U => OperandSize::S32,
837             V128TruncKind::F64x2
838             | V128TruncKind::I32x4FromF64x2SZero
839             | V128TruncKind::I32x4FromF64x2UZero => OperandSize::S64,
840         }
841     }
842 
843     /// The size of the destination lanes.
dst_lane_size(&self) -> OperandSize844     pub(crate) fn dst_lane_size(&self) -> OperandSize {
845         if let V128TruncKind::F64x2 = self {
846             OperandSize::S64
847         } else {
848             OperandSize::S32
849         }
850     }
851 }
852 
853 /// Kinds of vector addition supported by WebAssembly.
854 pub(crate) enum V128AddKind {
855     /// 4 lanes of 32-bit floats wrapping.
856     F32x4,
857     /// 2 lanes of 64-bit floats wrapping.
858     F64x2,
859     /// 16 lanes of 8-bit integers wrapping.
860     I8x16,
861     /// 16 lanes of 8-bit integers signed saturating.
862     I8x16SatS,
863     /// 16 lanes of 8-bit integers unsigned saturating.
864     I8x16SatU,
865     /// 8 lanes of 16-bit integers wrapping.
866     I16x8,
867     /// 8 lanes of 16-bit integers signed saturating.
868     I16x8SatS,
869     /// 8 lanes of 16-bit integers unsigned saturating.
870     I16x8SatU,
871     /// 4 lanes of 32-bit integers wrapping.
872     I32x4,
873     /// 2 lanes of 64-bit integers wrapping.
874     I64x2,
875 }
876 
877 /// Kinds of vector subtraction supported by WebAssembly.
878 pub(crate) enum V128SubKind {
879     /// 4 lanes of 32-bit floats wrapping.
880     F32x4,
881     /// 2 lanes of 64-bit floats wrapping.
882     F64x2,
883     /// 16 lanes of 8-bit integers wrapping.
884     I8x16,
885     /// 16 lanes of 8-bit integers signed saturating.
886     I8x16SatS,
887     /// 16 lanes of 8-bit integers unsigned saturating.
888     I8x16SatU,
889     /// 8 lanes of 16-bit integers wrapping.
890     I16x8,
891     /// 8 lanes of 16-bit integers signed saturating.
892     I16x8SatS,
893     /// 8 lanes of 16-bit integers unsigned saturating.
894     I16x8SatU,
895     /// 4 lanes of 32-bit integers wrapping.
896     I32x4,
897     /// 2 lanes of 64-bit integers wrapping.
898     I64x2,
899 }
900 
901 impl From<V128NegKind> for V128SubKind {
from(value: V128NegKind) -> Self902     fn from(value: V128NegKind) -> Self {
903         match value {
904             V128NegKind::I8x16 => Self::I8x16,
905             V128NegKind::I16x8 => Self::I16x8,
906             V128NegKind::I32x4 => Self::I32x4,
907             V128NegKind::I64x2 => Self::I64x2,
908             V128NegKind::F32x4 | V128NegKind::F64x2 => unimplemented!(),
909         }
910     }
911 }
912 
913 /// Kinds of vector multiplication supported by WebAssembly.
914 pub(crate) enum V128MulKind {
915     /// 4 lanes of 32-bit floats.
916     F32x4,
917     /// 2 lanes of 64-bit floats.
918     F64x2,
919     /// 8 lanes of 16-bit integers.
920     I16x8,
921     /// 4 lanes of 32-bit integers.
922     I32x4,
923     /// 2 lanes of 64-bit integers.
924     I64x2,
925 }
926 
927 /// Kinds of vector negation supported by WebAssembly.
928 #[derive(Copy, Clone)]
929 pub(crate) enum V128NegKind {
930     /// 4 lanes of 32-bit floats.
931     F32x4,
932     /// 2 lanes of 64-bit floats.
933     F64x2,
934     /// 16 lanes of 8-bit integers.
935     I8x16,
936     /// 8 lanes of 16-bit integers.
937     I16x8,
938     /// 4 lanes of 32-bit integers.
939     I32x4,
940     /// 2 lanes of 64-bit integers.
941     I64x2,
942 }
943 
944 impl V128NegKind {
945     /// The size of the lanes.
lane_size(&self) -> OperandSize946     pub(crate) fn lane_size(&self) -> OperandSize {
947         match self {
948             Self::F32x4 | Self::I32x4 => OperandSize::S32,
949             Self::F64x2 | Self::I64x2 => OperandSize::S64,
950             Self::I8x16 => OperandSize::S8,
951             Self::I16x8 => OperandSize::S16,
952         }
953     }
954 }
955 
956 /// Kinds of extended pairwise addition supported by WebAssembly.
957 pub(crate) enum V128ExtAddKind {
958     /// 16 lanes of signed 8-bit integers.
959     I8x16S,
960     /// 16 lanes of unsigned 8-bit integers.
961     I8x16U,
962     /// 8 lanes of signed 16-bit integers.
963     I16x8S,
964     /// 8 lanes of unsigned 16-bit integers.
965     I16x8U,
966 }
967 
968 /// Kinds of vector extended multiplication supported by WebAssembly.
969 #[derive(Debug, Clone, Copy)]
970 pub(crate) enum V128ExtMulKind {
971     LowI8x16S,
972     HighI8x16S,
973     LowI8x16U,
974     HighI8x16U,
975     LowI16x8S,
976     HighI16x8S,
977     LowI16x8U,
978     HighI16x8U,
979     LowI32x4S,
980     HighI32x4S,
981     LowI32x4U,
982     HighI32x4U,
983 }
984 
985 impl From<V128ExtMulKind> for V128ExtendKind {
from(value: V128ExtMulKind) -> Self986     fn from(value: V128ExtMulKind) -> Self {
987         match value {
988             V128ExtMulKind::LowI8x16S => Self::LowI8x16S,
989             V128ExtMulKind::HighI8x16S => Self::HighI8x16S,
990             V128ExtMulKind::LowI8x16U => Self::LowI8x16U,
991             V128ExtMulKind::HighI8x16U => Self::HighI8x16U,
992             V128ExtMulKind::LowI16x8S => Self::LowI16x8S,
993             V128ExtMulKind::HighI16x8S => Self::HighI16x8S,
994             V128ExtMulKind::LowI16x8U => Self::LowI16x8U,
995             V128ExtMulKind::HighI16x8U => Self::HighI16x8U,
996             V128ExtMulKind::LowI32x4S => Self::LowI32x4S,
997             V128ExtMulKind::HighI32x4S => Self::HighI32x4S,
998             V128ExtMulKind::LowI32x4U => Self::LowI32x4U,
999             V128ExtMulKind::HighI32x4U => Self::HighI32x4U,
1000         }
1001     }
1002 }
1003 
1004 impl From<V128ExtMulKind> for V128MulKind {
from(value: V128ExtMulKind) -> Self1005     fn from(value: V128ExtMulKind) -> Self {
1006         match value {
1007             V128ExtMulKind::LowI8x16S
1008             | V128ExtMulKind::HighI8x16S
1009             | V128ExtMulKind::LowI8x16U
1010             | V128ExtMulKind::HighI8x16U => Self::I16x8,
1011             V128ExtMulKind::LowI16x8S
1012             | V128ExtMulKind::HighI16x8S
1013             | V128ExtMulKind::LowI16x8U
1014             | V128ExtMulKind::HighI16x8U => Self::I32x4,
1015             V128ExtMulKind::LowI32x4S
1016             | V128ExtMulKind::HighI32x4S
1017             | V128ExtMulKind::LowI32x4U
1018             | V128ExtMulKind::HighI32x4U => Self::I64x2,
1019         }
1020     }
1021 }
1022 
1023 /// Operand size, in bits.
1024 #[derive(Copy, Debug, Clone, Eq, PartialEq)]
1025 pub(crate) enum OperandSize {
1026     /// 8 bits.
1027     S8,
1028     /// 16 bits.
1029     S16,
1030     /// 32 bits.
1031     S32,
1032     /// 64 bits.
1033     S64,
1034     /// 128 bits.
1035     S128,
1036 }
1037 
1038 impl OperandSize {
1039     /// The number of bits in the operand.
num_bits(&self) -> u81040     pub fn num_bits(&self) -> u8 {
1041         match self {
1042             OperandSize::S8 => 8,
1043             OperandSize::S16 => 16,
1044             OperandSize::S32 => 32,
1045             OperandSize::S64 => 64,
1046             OperandSize::S128 => 128,
1047         }
1048     }
1049 
1050     /// The number of bytes in the operand.
bytes(&self) -> u321051     pub fn bytes(&self) -> u32 {
1052         match self {
1053             Self::S8 => 1,
1054             Self::S16 => 2,
1055             Self::S32 => 4,
1056             Self::S64 => 8,
1057             Self::S128 => 16,
1058         }
1059     }
1060 
1061     /// The binary logarithm of the number of bits in the operand.
log2(&self) -> u81062     pub fn log2(&self) -> u8 {
1063         match self {
1064             OperandSize::S8 => 3,
1065             OperandSize::S16 => 4,
1066             OperandSize::S32 => 5,
1067             OperandSize::S64 => 6,
1068             OperandSize::S128 => 7,
1069         }
1070     }
1071 
1072     /// Create an [`OperandSize`]  from the given number of bytes.
from_bytes(bytes: u8) -> Self1073     pub fn from_bytes(bytes: u8) -> Self {
1074         use OperandSize::*;
1075         match bytes {
1076             4 => S32,
1077             8 => S64,
1078             16 => S128,
1079             _ => panic!("Invalid bytes {bytes} for OperandSize"),
1080         }
1081     }
1082 
extend_to<T: ExtendType>(&self, to: Self) -> Option<Extend<T>>1083     pub fn extend_to<T: ExtendType>(&self, to: Self) -> Option<Extend<T>> {
1084         match to {
1085             OperandSize::S32 => match self {
1086                 OperandSize::S8 => Some(Extend::I32Extend8),
1087                 OperandSize::S16 => Some(Extend::I32Extend16),
1088                 _ => None,
1089             },
1090             OperandSize::S64 => match self {
1091                 OperandSize::S8 => Some(Extend::I64Extend8),
1092                 OperandSize::S16 => Some(Extend::I64Extend16),
1093                 OperandSize::S32 => Some(Extend::I64Extend32),
1094                 _ => None,
1095             },
1096             _ => None,
1097         }
1098     }
1099 
1100     /// The number of bits in the mantissa.
1101     ///
1102     /// Only implemented for floats.
mantissa_bits(&self) -> u81103     pub fn mantissa_bits(&self) -> u8 {
1104         match self {
1105             Self::S32 => 8,
1106             Self::S64 => 11,
1107             _ => unimplemented!(),
1108         }
1109     }
1110 }
1111 
1112 /// An abstraction over a register or immediate.
1113 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
1114 pub(crate) enum RegImm {
1115     /// A register.
1116     Reg(Reg),
1117     /// A tagged immediate argument.
1118     Imm(Imm),
1119 }
1120 
1121 /// An tagged representation of an immediate.
1122 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
1123 pub(crate) enum Imm {
1124     /// I32 immediate.
1125     I32(u32),
1126     /// I64 immediate.
1127     I64(u64),
1128     /// F32 immediate.
1129     F32(u32),
1130     /// F64 immediate.
1131     F64(u64),
1132     /// V128 immediate.
1133     V128(i128),
1134 }
1135 
1136 impl Imm {
1137     /// Create a new I64 immediate.
i64(val: i64) -> Self1138     pub fn i64(val: i64) -> Self {
1139         Self::I64(val as u64)
1140     }
1141 
1142     /// Create a new I32 immediate.
i32(val: i32) -> Self1143     pub fn i32(val: i32) -> Self {
1144         Self::I32(val as u32)
1145     }
1146 
1147     /// Create a new F32 immediate.
f32(bits: u32) -> Self1148     pub fn f32(bits: u32) -> Self {
1149         Self::F32(bits)
1150     }
1151 
1152     /// Create a new F64 immediate.
f64(bits: u64) -> Self1153     pub fn f64(bits: u64) -> Self {
1154         Self::F64(bits)
1155     }
1156 
1157     /// Create a new V128 immediate.
v128(bits: i128) -> Self1158     pub fn v128(bits: i128) -> Self {
1159         Self::V128(bits)
1160     }
1161 
1162     /// Convert the immediate to i32, if possible.
to_i32(&self) -> Option<i32>1163     pub fn to_i32(&self) -> Option<i32> {
1164         match self {
1165             Self::I32(v) => Some(*v as i32),
1166             Self::I64(v) => i32::try_from(*v as i64).ok(),
1167             _ => None,
1168         }
1169     }
1170 
1171     /// Unwraps the underlying integer value as u64.
1172     /// # Panics
1173     /// This function panics if the underlying value can't be represented
1174     /// as u64.
unwrap_as_u64(&self) -> u641175     pub fn unwrap_as_u64(&self) -> u64 {
1176         match self {
1177             Self::I32(v) => *v as u64,
1178             Self::I64(v) => *v,
1179             Self::F32(v) => *v as u64,
1180             Self::F64(v) => *v,
1181             _ => unreachable!(),
1182         }
1183     }
1184 
1185     /// Get the operand size of the immediate.
size(&self) -> OperandSize1186     pub fn size(&self) -> OperandSize {
1187         match self {
1188             Self::I32(_) | Self::F32(_) => OperandSize::S32,
1189             Self::I64(_) | Self::F64(_) => OperandSize::S64,
1190             Self::V128(_) => OperandSize::S128,
1191         }
1192     }
1193 
1194     /// Get a little endian representation of the immediate.
1195     ///
1196     /// This method heap allocates and is intended to be used when adding
1197     /// values to the constant pool.
to_bytes(&self) -> Vec<u8>1198     pub fn to_bytes(&self) -> Vec<u8> {
1199         match self {
1200             Imm::I32(n) => n.to_le_bytes().to_vec(),
1201             Imm::I64(n) => n.to_le_bytes().to_vec(),
1202             Imm::F32(n) => n.to_le_bytes().to_vec(),
1203             Imm::F64(n) => n.to_le_bytes().to_vec(),
1204             Imm::V128(n) => n.to_le_bytes().to_vec(),
1205         }
1206     }
1207 }
1208 
1209 /// The location of the [VMcontext] used for function calls.
1210 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
1211 pub(crate) enum VMContextLoc {
1212     /// Dynamic, stored in the given register.
1213     Reg(Reg),
1214     /// The pinned [VMContext] register.
1215     Pinned,
1216     /// A different VMContext is loaded at the provided offset from the current
1217     /// VMContext.
1218     OffsetFromPinned(u32),
1219 }
1220 
1221 /// The maximum number of context arguments currently used across the compiler.
1222 pub(crate) const MAX_CONTEXT_ARGS: usize = 2;
1223 
1224 /// Out-of-band special purpose arguments used for function call emission.
1225 ///
1226 /// We cannot rely on the value stack for these values given that inserting
1227 /// register or memory values at arbitrary locations of the value stack has the
1228 /// potential to break the stack ordering principle, which states that older
1229 /// values must always precede newer values, effectively simulating the order of
1230 /// values in the machine stack.
1231 /// The [ContextArgs] are meant to be resolved at every callsite; in some cases
1232 /// it might be possible to construct it early on, but given that it might
1233 /// contain allocatable registers, it's preferred to construct it in
1234 /// [FnCall::emit].
1235 #[derive(Clone, Debug)]
1236 pub(crate) enum ContextArgs {
1237     /// A single context argument is required; the current pinned [VMcontext]
1238     /// register must be passed as the first argument of the function call.
1239     VMContext([VMContextLoc; 1]),
1240     /// The callee and caller context arguments are required. In this case, the
1241     /// callee context argument is usually stored into an allocatable register
1242     /// and the caller is always the current pinned [VMContext] pointer.
1243     CalleeAndCallerVMContext([VMContextLoc; MAX_CONTEXT_ARGS]),
1244 }
1245 
1246 impl ContextArgs {
1247     /// Construct a [ContextArgs] declaring the usage of the pinned [VMContext]
1248     /// register as both the caller and callee context arguments.
pinned_callee_and_caller_vmctx() -> Self1249     pub fn pinned_callee_and_caller_vmctx() -> Self {
1250         Self::CalleeAndCallerVMContext([VMContextLoc::Pinned, VMContextLoc::Pinned])
1251     }
1252 
1253     /// Construct a [ContextArgs] that declares the usage of the pinned
1254     /// [VMContext] register as the only context argument.
pinned_vmctx() -> Self1255     pub fn pinned_vmctx() -> Self {
1256         Self::VMContext([VMContextLoc::Pinned])
1257     }
1258 
1259     /// Construct a [ContextArgs] that declares the usage of a [VMContext] loaded
1260     /// indirectly from the pinned [VMContext] register as the only context
1261     /// argument.
offset_from_pinned_vmctx(offset: u32) -> Self1262     pub fn offset_from_pinned_vmctx(offset: u32) -> Self {
1263         Self::VMContext([VMContextLoc::OffsetFromPinned(offset)])
1264     }
1265 
1266     /// Construct a [ContextArgs] that declares a dynamic callee context and the
1267     /// pinned [VMContext] register as the context arguments.
with_callee_and_pinned_caller(callee_vmctx: Reg) -> Self1268     pub fn with_callee_and_pinned_caller(callee_vmctx: Reg) -> Self {
1269         Self::CalleeAndCallerVMContext([VMContextLoc::Reg(callee_vmctx), VMContextLoc::Pinned])
1270     }
1271 
1272     /// Get the length of the [ContextArgs].
len(&self) -> usize1273     pub fn len(&self) -> usize {
1274         self.as_slice().len()
1275     }
1276 
1277     /// Get a slice of the context arguments.
as_slice(&self) -> &[VMContextLoc]1278     pub fn as_slice(&self) -> &[VMContextLoc] {
1279         match self {
1280             Self::VMContext(a) => a.as_slice(),
1281             Self::CalleeAndCallerVMContext(a) => a.as_slice(),
1282         }
1283     }
1284 }
1285 
1286 #[derive(Copy, Clone, Debug)]
1287 pub(crate) enum CalleeKind {
1288     /// A function call to a raw address.
1289     Indirect(Reg),
1290     /// A function call to a local function.
1291     Direct(UserExternalNameRef),
1292 }
1293 
1294 impl CalleeKind {
1295     /// Creates a callee kind from a register.
indirect(reg: Reg) -> Self1296     pub fn indirect(reg: Reg) -> Self {
1297         Self::Indirect(reg)
1298     }
1299 
1300     /// Creates a direct callee kind from a function name.
direct(name: UserExternalNameRef) -> Self1301     pub fn direct(name: UserExternalNameRef) -> Self {
1302         Self::Direct(name)
1303     }
1304 }
1305 
1306 impl RegImm {
1307     /// Register constructor.
reg(r: Reg) -> Self1308     pub fn reg(r: Reg) -> Self {
1309         RegImm::Reg(r)
1310     }
1311 
1312     /// I64 immediate constructor.
i64(val: i64) -> Self1313     pub fn i64(val: i64) -> Self {
1314         RegImm::Imm(Imm::i64(val))
1315     }
1316 
1317     /// I32 immediate constructor.
i32(val: i32) -> Self1318     pub fn i32(val: i32) -> Self {
1319         RegImm::Imm(Imm::i32(val))
1320     }
1321 
1322     /// F32 immediate, stored using its bits representation.
f32(bits: u32) -> Self1323     pub fn f32(bits: u32) -> Self {
1324         RegImm::Imm(Imm::f32(bits))
1325     }
1326 
1327     /// F64 immediate, stored using its bits representation.
f64(bits: u64) -> Self1328     pub fn f64(bits: u64) -> Self {
1329         RegImm::Imm(Imm::f64(bits))
1330     }
1331 
1332     /// V128 immediate.
v128(bits: i128) -> Self1333     pub fn v128(bits: i128) -> Self {
1334         RegImm::Imm(Imm::v128(bits))
1335     }
1336 }
1337 
1338 impl From<Reg> for RegImm {
from(r: Reg) -> Self1339     fn from(r: Reg) -> Self {
1340         Self::Reg(r)
1341     }
1342 }
1343 
1344 #[derive(Debug)]
1345 pub enum RoundingMode {
1346     Nearest,
1347     Up,
1348     Down,
1349     Zero,
1350 }
1351 
1352 /// Memory flags for trusted loads/stores.
1353 pub const TRUSTED_FLAGS: MemFlags = MemFlags::trusted();
1354 
1355 /// Flags used for WebAssembly loads / stores.
1356 /// Untrusted by default so we don't set `no_trap`.
1357 /// We also ensure that the endianness is the right one for WebAssembly.
1358 pub const UNTRUSTED_FLAGS: MemFlags = MemFlags::new().with_endianness(Endianness::Little);
1359 
1360 /// Generic MacroAssembler interface used by the code generation.
1361 ///
1362 /// The MacroAssembler trait aims to expose an interface, high-level enough,
1363 /// so that each ISA can provide its own lowering to machine code. For example,
1364 /// for WebAssembly operators that don't have a direct mapping to a machine
1365 /// a instruction, the interface defines a signature matching the WebAssembly
1366 /// operator, allowing each implementation to lower such operator entirely.
1367 /// This approach attributes more responsibility to the MacroAssembler, but frees
1368 /// the caller from concerning about assembling the right sequence of
1369 /// instructions at the operator callsite.
1370 ///
1371 /// The interface defaults to a three-argument form for binary operations;
1372 /// this allows a natural mapping to instructions for RISC architectures,
1373 /// that use three-argument form.
1374 /// This approach allows for a more general interface that can be restricted
1375 /// where needed, in the case of architectures that use a two-argument form.
1376 
1377 pub(crate) trait MacroAssembler {
1378     /// The addressing mode.
1379     type Address: Copy + Debug;
1380 
1381     /// The pointer representation of the target ISA,
1382     /// used to access information from [`VMOffsets`].
1383     type Ptr: PtrSize;
1384 
1385     /// The ABI details of the target.
1386     type ABI: abi::ABI;
1387 
1388     /// Emit the function prologue.
prologue(&mut self, vmctx: Reg) -> Result<()>1389     fn prologue(&mut self, vmctx: Reg) -> Result<()> {
1390         self.frame_setup()?;
1391         self.check_stack(vmctx)
1392     }
1393 
1394     /// Generate the frame setup sequence.
frame_setup(&mut self) -> Result<()>1395     fn frame_setup(&mut self) -> Result<()>;
1396 
1397     /// Generate the frame restore sequence.
frame_restore(&mut self) -> Result<()>1398     fn frame_restore(&mut self) -> Result<()>;
1399 
1400     /// Emit a stack check.
check_stack(&mut self, vmctx: Reg) -> Result<()>1401     fn check_stack(&mut self, vmctx: Reg) -> Result<()>;
1402 
1403     /// Emit the function epilogue.
epilogue(&mut self) -> Result<()>1404     fn epilogue(&mut self) -> Result<()> {
1405         self.frame_restore()
1406     }
1407 
1408     /// Reserve stack space.
reserve_stack(&mut self, bytes: u32) -> Result<()>1409     fn reserve_stack(&mut self, bytes: u32) -> Result<()>;
1410 
1411     /// Free stack space.
free_stack(&mut self, bytes: u32) -> Result<()>1412     fn free_stack(&mut self, bytes: u32) -> Result<()>;
1413 
1414     /// Reset the stack pointer to the given offset;
1415     ///
1416     /// Used to reset the stack pointer to a given offset
1417     /// when dealing with unreachable code.
reset_stack_pointer(&mut self, offset: SPOffset) -> Result<()>1418     fn reset_stack_pointer(&mut self, offset: SPOffset) -> Result<()>;
1419 
1420     /// Get the address of a local slot.
local_address(&mut self, local: &LocalSlot) -> Result<Self::Address>1421     fn local_address(&mut self, local: &LocalSlot) -> Result<Self::Address>;
1422 
1423     /// Constructs an address with an offset that is relative to the
1424     /// current position of the stack pointer (e.g. [sp + (sp_offset -
1425     /// offset)].
address_from_sp(&self, offset: SPOffset) -> Result<Self::Address>1426     fn address_from_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1427 
1428     /// Constructs an address with an offset that is absolute to the
1429     /// current position of the stack pointer (e.g. [sp + offset].
address_at_sp(&self, offset: SPOffset) -> Result<Self::Address>1430     fn address_at_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1431 
1432     /// Alias for [`Self::address_at_reg`] using the VMContext register as
1433     /// a base. The VMContext register is derived from the ABI type that is
1434     /// associated to the MacroAssembler.
address_at_vmctx(&self, offset: u32) -> Result<Self::Address>1435     fn address_at_vmctx(&self, offset: u32) -> Result<Self::Address>;
1436 
1437     /// Construct an address that is absolute to the current position
1438     /// of the given register.
address_at_reg(&self, reg: Reg, offset: u32) -> Result<Self::Address>1439     fn address_at_reg(&self, reg: Reg, offset: u32) -> Result<Self::Address>;
1440 
1441     /// Emit a function call to either a local or external function.
call( &mut self, stack_args_size: u32, f: impl FnMut(&mut Self) -> Result<(CalleeKind, CallingConvention)>, ) -> Result<u32>1442     fn call(
1443         &mut self,
1444         stack_args_size: u32,
1445         f: impl FnMut(&mut Self) -> Result<(CalleeKind, CallingConvention)>,
1446     ) -> Result<u32>;
1447 
1448     /// Acquire a scratch register and execute the given callback.
with_scratch<T: ScratchType, R>(&mut self, f: impl FnOnce(&mut Self, Scratch) -> R) -> R1449     fn with_scratch<T: ScratchType, R>(&mut self, f: impl FnOnce(&mut Self, Scratch) -> R) -> R;
1450 
1451     /// Convenience wrapper over [`Self::with_scratch`], derives the register class
1452     /// for a particular Wasm value type.
with_scratch_for<R>( &mut self, ty: WasmValType, f: impl FnOnce(&mut Self, Scratch) -> R, ) -> R1453     fn with_scratch_for<R>(
1454         &mut self,
1455         ty: WasmValType,
1456         f: impl FnOnce(&mut Self, Scratch) -> R,
1457     ) -> R {
1458         match ty {
1459             WasmValType::I32
1460             | WasmValType::I64
1461             | WasmValType::Ref(WasmRefType {
1462                 heap_type: WasmHeapType::Func,
1463                 ..
1464             }) => self.with_scratch::<IntScratch, _>(f),
1465             WasmValType::F32 | WasmValType::F64 | WasmValType::V128 => {
1466                 self.with_scratch::<FloatScratch, _>(f)
1467             }
1468             _ => unimplemented!(),
1469         }
1470     }
1471 
1472     /// Get stack pointer offset.
sp_offset(&self) -> Result<SPOffset>1473     fn sp_offset(&self) -> Result<SPOffset>;
1474 
1475     /// Perform a stack store.
store(&mut self, src: RegImm, dst: Self::Address, size: OperandSize) -> Result<()>1476     fn store(&mut self, src: RegImm, dst: Self::Address, size: OperandSize) -> Result<()>;
1477 
1478     /// Alias for `MacroAssembler::store` with the operand size corresponding
1479     /// to the pointer size of the target.
store_ptr(&mut self, src: Reg, dst: Self::Address) -> Result<()>1480     fn store_ptr(&mut self, src: Reg, dst: Self::Address) -> Result<()>;
1481 
1482     /// Perform a WebAssembly store.
1483     /// A WebAssembly store introduces several additional invariants compared to
1484     /// [Self::store], more precisely, it can implicitly trap, in certain
1485     /// circumstances, even if explicit bounds checks are elided, in that sense,
1486     /// we consider this type of load as untrusted. It can also differ with
1487     /// regards to the endianness depending on the target ISA. For this reason,
1488     /// [Self::wasm_store], should be explicitly used when emitting WebAssembly
1489     /// stores.
wasm_store(&mut self, src: Reg, dst: Self::Address, store_kind: StoreKind) -> Result<()>1490     fn wasm_store(&mut self, src: Reg, dst: Self::Address, store_kind: StoreKind) -> Result<()>;
1491 
1492     /// Perform a zero-extended stack load.
load(&mut self, src: Self::Address, dst: WritableReg, size: OperandSize) -> Result<()>1493     fn load(&mut self, src: Self::Address, dst: WritableReg, size: OperandSize) -> Result<()>;
1494 
1495     /// Perform a WebAssembly load.
1496     /// A WebAssembly load introduces several additional invariants compared to
1497     /// [Self::load], more precisely, it can implicitly trap, in certain
1498     /// circumstances, even if explicit bounds checks are elided, in that sense,
1499     /// we consider this type of load as untrusted. It can also differ with
1500     /// regards to the endianness depending on the target ISA. For this reason,
1501     /// [Self::wasm_load], should be explicitly used when emitting WebAssembly
1502     /// loads.
wasm_load(&mut self, src: Self::Address, dst: WritableReg, kind: LoadKind) -> Result<()>1503     fn wasm_load(&mut self, src: Self::Address, dst: WritableReg, kind: LoadKind) -> Result<()>;
1504 
1505     /// Alias for `MacroAssembler::load` with the operand size corresponding
1506     /// to the pointer size of the target.
load_ptr(&mut self, src: Self::Address, dst: WritableReg) -> Result<()>1507     fn load_ptr(&mut self, src: Self::Address, dst: WritableReg) -> Result<()>;
1508 
1509     /// Computes the effective address and stores the result in the destination
1510     /// register.
compute_addr( &mut self, _src: Self::Address, _dst: WritableReg, _size: OperandSize, ) -> Result<()>1511     fn compute_addr(
1512         &mut self,
1513         _src: Self::Address,
1514         _dst: WritableReg,
1515         _size: OperandSize,
1516     ) -> Result<()>;
1517 
1518     /// Pop a value from the machine stack into the given register.
pop(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>1519     fn pop(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1520 
1521     /// Perform a move.
mov(&mut self, dst: WritableReg, src: RegImm, size: OperandSize) -> Result<()>1522     fn mov(&mut self, dst: WritableReg, src: RegImm, size: OperandSize) -> Result<()>;
1523 
1524     /// Perform a conditional move.
cmov(&mut self, dst: WritableReg, src: Reg, cc: IntCmpKind, size: OperandSize) -> Result<()>1525     fn cmov(&mut self, dst: WritableReg, src: Reg, cc: IntCmpKind, size: OperandSize)
1526     -> Result<()>;
1527 
1528     /// Performs a memory move of bytes from src to dest.
1529     /// Bytes are moved in blocks of 8 bytes, where possible.
memmove( &mut self, src: SPOffset, dst: SPOffset, bytes: u32, direction: MemMoveDirection, ) -> Result<()>1530     fn memmove(
1531         &mut self,
1532         src: SPOffset,
1533         dst: SPOffset,
1534         bytes: u32,
1535         direction: MemMoveDirection,
1536     ) -> Result<()> {
1537         match direction {
1538             MemMoveDirection::LowToHigh => debug_assert!(dst.as_u32() < src.as_u32()),
1539             MemMoveDirection::HighToLow => debug_assert!(dst.as_u32() > src.as_u32()),
1540         }
1541         // At least 4 byte aligned.
1542         debug_assert!(bytes % 4 == 0);
1543         let mut remaining = bytes;
1544         let word_bytes = <Self::ABI as abi::ABI>::word_bytes();
1545 
1546         let word_bytes = word_bytes as u32;
1547 
1548         let mut dst_offs;
1549         let mut src_offs;
1550         match direction {
1551             MemMoveDirection::LowToHigh => {
1552                 dst_offs = dst.as_u32() - bytes;
1553                 src_offs = src.as_u32() - bytes;
1554                 self.with_scratch::<IntScratch, _>(|masm, scratch| {
1555                     while remaining >= word_bytes {
1556                         remaining -= word_bytes;
1557                         dst_offs += word_bytes;
1558                         src_offs += word_bytes;
1559 
1560                         masm.load_ptr(
1561                             masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1562                             scratch.writable(),
1563                         )?;
1564                         masm.store_ptr(
1565                             scratch.inner(),
1566                             masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1567                         )?;
1568                     }
1569                     wasmtime_environ::error::Ok(())
1570                 })?;
1571             }
1572             MemMoveDirection::HighToLow => {
1573                 // Go from the end to the beginning to handle overlapping addresses.
1574                 src_offs = src.as_u32();
1575                 dst_offs = dst.as_u32();
1576                 self.with_scratch::<IntScratch, _>(|masm, scratch| {
1577                     while remaining >= word_bytes {
1578                         masm.load_ptr(
1579                             masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1580                             scratch.writable(),
1581                         )?;
1582                         masm.store_ptr(
1583                             scratch.inner(),
1584                             masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1585                         )?;
1586 
1587                         remaining -= word_bytes;
1588                         src_offs -= word_bytes;
1589                         dst_offs -= word_bytes;
1590                     }
1591                     wasmtime_environ::error::Ok(())
1592                 })?;
1593             }
1594         }
1595 
1596         if remaining > 0 {
1597             let half_word = word_bytes / 2;
1598             let ptr_size = OperandSize::from_bytes(half_word as u8);
1599             debug_assert!(remaining == half_word);
1600             // Need to move the offsets ahead in the `LowToHigh` case to
1601             // compensate for the initial subtraction of `bytes`.
1602             if direction == MemMoveDirection::LowToHigh {
1603                 dst_offs += half_word;
1604                 src_offs += half_word;
1605             }
1606 
1607             self.with_scratch::<IntScratch, _>(|masm, scratch| {
1608                 masm.load(
1609                     masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1610                     scratch.writable(),
1611                     ptr_size,
1612                 )?;
1613                 masm.store(
1614                     scratch.inner().into(),
1615                     masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1616                     ptr_size,
1617                 )?;
1618                 wasmtime_environ::error::Ok(())
1619             })?;
1620         }
1621         Ok(())
1622     }
1623 
1624     /// Perform add operation.
add(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>1625     fn add(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1626 
1627     /// Perform add with unsigned extension.
add_uextend( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, from_size: OperandSize, size: OperandSize, ) -> Result<()>1628     fn add_uextend(
1629         &mut self,
1630         dst: WritableReg,
1631         lhs: Reg,
1632         rhs: Reg,
1633         from_size: OperandSize,
1634         size: OperandSize,
1635     ) -> Result<()>;
1636 
1637     /// Perform a checked unsigned integer addition, emitting the provided trap
1638     /// if the addition overflows.
1639     ///
1640     /// Note: This only accepts immediate operands. For register operands with
1641     /// proper extension, use add_uextend with manual overflow checking.
checked_uadd( &mut self, dst: WritableReg, lhs: Reg, rhs: Imm, size: OperandSize, trap: TrapCode, ) -> Result<()>1642     fn checked_uadd(
1643         &mut self,
1644         dst: WritableReg,
1645         lhs: Reg,
1646         rhs: Imm,
1647         size: OperandSize,
1648         trap: TrapCode,
1649     ) -> Result<()>;
1650 
1651     /// Perform subtraction operation.
sub(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>1652     fn sub(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1653 
1654     /// Perform multiplication operation.
mul(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>1655     fn mul(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1656 
1657     /// Perform a floating point add operation.
float_add(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>1658     fn float_add(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1659 
1660     /// Perform a floating point subtraction operation.
float_sub(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>1661     fn float_sub(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1662 
1663     /// Perform a floating point multiply operation.
float_mul(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>1664     fn float_mul(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1665 
1666     /// Perform a floating point divide operation.
float_div(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>1667     fn float_div(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1668 
1669     /// Perform a floating point minimum operation. In x86, this will emit
1670     /// multiple instructions.
float_min(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>1671     fn float_min(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1672 
1673     /// Perform a floating point maximum operation. In x86, this will emit
1674     /// multiple instructions.
float_max(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>1675     fn float_max(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1676 
1677     /// Perform a floating point copysign operation. In x86, this will emit
1678     /// multiple instructions.
float_copysign( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize, ) -> Result<()>1679     fn float_copysign(
1680         &mut self,
1681         dst: WritableReg,
1682         lhs: Reg,
1683         rhs: Reg,
1684         size: OperandSize,
1685     ) -> Result<()>;
1686 
1687     /// Perform a floating point abs operation.
float_abs(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>1688     fn float_abs(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1689 
1690     /// Perform a floating point negation operation.
float_neg(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>1691     fn float_neg(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1692 
1693     /// Perform a floating point floor operation.
float_round< F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext<Emission>, &mut Self) -> Result<()>, >( &mut self, mode: RoundingMode, env: &mut FuncEnv<Self::Ptr>, context: &mut CodeGenContext<Emission>, size: OperandSize, fallback: F, ) -> Result<()>1694     fn float_round<
1695         F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext<Emission>, &mut Self) -> Result<()>,
1696     >(
1697         &mut self,
1698         mode: RoundingMode,
1699         env: &mut FuncEnv<Self::Ptr>,
1700         context: &mut CodeGenContext<Emission>,
1701         size: OperandSize,
1702         fallback: F,
1703     ) -> Result<()>;
1704 
1705     /// Perform a floating point square root operation.
float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>1706     fn float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1707 
1708     /// Perform logical and operation.
and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>1709     fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1710 
1711     /// Perform logical or operation.
or(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>1712     fn or(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1713 
1714     /// Perform logical exclusive or operation.
xor(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>1715     fn xor(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1716 
1717     /// Perform a shift operation between a register and an immediate.
shift_ir( &mut self, dst: WritableReg, imm: Imm, lhs: Reg, kind: ShiftKind, size: OperandSize, ) -> Result<()>1718     fn shift_ir(
1719         &mut self,
1720         dst: WritableReg,
1721         imm: Imm,
1722         lhs: Reg,
1723         kind: ShiftKind,
1724         size: OperandSize,
1725     ) -> Result<()>;
1726 
1727     /// Perform a shift operation between two registers.
1728     /// This case is special in that some architectures have specific expectations
1729     /// regarding the location of the instruction arguments. To free the
1730     /// caller from having to deal with the architecture specific constraints
1731     /// we give this function access to the code generation context, allowing
1732     /// each implementation to decide the lowering path.
shift( &mut self, context: &mut CodeGenContext<Emission>, kind: ShiftKind, size: OperandSize, ) -> Result<()>1733     fn shift(
1734         &mut self,
1735         context: &mut CodeGenContext<Emission>,
1736         kind: ShiftKind,
1737         size: OperandSize,
1738     ) -> Result<()>;
1739 
1740     /// Perform division operation.
1741     /// Division is special in that some architectures have specific
1742     /// expectations regarding the location of the instruction
1743     /// arguments and regarding the location of the quotient /
1744     /// remainder. To free the caller from having to deal with the
1745     /// architecture specific constraints we give this function access
1746     /// to the code generation context, allowing each implementation
1747     /// to decide the lowering path.  For cases in which division is a
1748     /// unconstrained binary operation, the caller can decide to use
1749     /// the `CodeGenContext::i32_binop` or `CodeGenContext::i64_binop`
1750     /// functions.
div( &mut self, context: &mut CodeGenContext<Emission>, kind: DivKind, size: OperandSize, ) -> Result<()>1751     fn div(
1752         &mut self,
1753         context: &mut CodeGenContext<Emission>,
1754         kind: DivKind,
1755         size: OperandSize,
1756     ) -> Result<()>;
1757 
1758     /// Calculate remainder.
rem( &mut self, context: &mut CodeGenContext<Emission>, kind: RemKind, size: OperandSize, ) -> Result<()>1759     fn rem(
1760         &mut self,
1761         context: &mut CodeGenContext<Emission>,
1762         kind: RemKind,
1763         size: OperandSize,
1764     ) -> Result<()>;
1765 
1766     /// Compares `src1` against `src2` for the side effect of setting processor
1767     /// flags.
1768     ///
1769     /// Note that `src1` is the left-hand-side of the comparison and `src2` is
1770     /// the right-hand-side, so if testing `a < b` then `src1 == a` and
1771     /// `src2 == b`
cmp(&mut self, src1: Reg, src2: RegImm, size: OperandSize) -> Result<()>1772     fn cmp(&mut self, src1: Reg, src2: RegImm, size: OperandSize) -> Result<()>;
1773 
1774     /// Compare src and dst and put the result in dst.
1775     /// This function will potentially emit a series of instructions.
1776     ///
1777     /// The initial value in `dst` is the left-hand-side of the comparison and
1778     /// the initial value in `src` is the right-hand-side of the comparison.
1779     /// That means for `a < b` then `dst == a` and `src == b`.
cmp_with_set( &mut self, dst: WritableReg, src: RegImm, kind: IntCmpKind, size: OperandSize, ) -> Result<()>1780     fn cmp_with_set(
1781         &mut self,
1782         dst: WritableReg,
1783         src: RegImm,
1784         kind: IntCmpKind,
1785         size: OperandSize,
1786     ) -> Result<()>;
1787 
1788     /// Compare floats in src1 and src2 and put the result in dst.
1789     /// In x86, this will emit multiple instructions.
float_cmp_with_set( &mut self, dst: WritableReg, src1: Reg, src2: Reg, kind: FloatCmpKind, size: OperandSize, ) -> Result<()>1790     fn float_cmp_with_set(
1791         &mut self,
1792         dst: WritableReg,
1793         src1: Reg,
1794         src2: Reg,
1795         kind: FloatCmpKind,
1796         size: OperandSize,
1797     ) -> Result<()>;
1798 
1799     /// Count the number of leading zeroes in src and put the result in dst.
1800     /// In x64, this will emit multiple instructions if the `has_lzcnt` flag is
1801     /// false.
clz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>1802     fn clz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1803 
1804     /// Count the number of trailing zeroes in src and put the result in dst.masm
1805     /// In x64, this will emit multiple instructions if the `has_tzcnt` flag is
1806     /// false.
ctz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>1807     fn ctz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1808 
1809     /// Push the register to the stack, returning the stack slot metadata.
1810     // NB
1811     // The stack alignment should not be assumed after any call to `push`,
1812     // unless explicitly aligned otherwise.  Typically, stack alignment is
1813     // maintained at call sites and during the execution of
1814     // epilogues.
push(&mut self, src: Reg, size: OperandSize) -> Result<StackSlot>1815     fn push(&mut self, src: Reg, size: OperandSize) -> Result<StackSlot>;
1816 
1817     /// Finalize the assembly and return the result.
finalize(self, base: Option<SourceLoc>) -> Result<MachBufferFinalized<Final>>1818     fn finalize(self, base: Option<SourceLoc>) -> Result<MachBufferFinalized<Final>>;
1819 
1820     /// Zero a particular register.
zero(&mut self, reg: WritableReg) -> Result<()>1821     fn zero(&mut self, reg: WritableReg) -> Result<()>;
1822 
1823     /// Count the number of 1 bits in src and put the result in dst. In x64,
1824     /// this will emit multiple instructions if the `has_popcnt` flag is false.
popcnt(&mut self, context: &mut CodeGenContext<Emission>, size: OperandSize) -> Result<()>1825     fn popcnt(&mut self, context: &mut CodeGenContext<Emission>, size: OperandSize) -> Result<()>;
1826 
1827     /// Converts an i64 to an i32 by discarding the high 32 bits.
wrap(&mut self, dst: WritableReg, src: Reg) -> Result<()>1828     fn wrap(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1829 
1830     /// Extends an integer of a given size to a larger size.
extend(&mut self, dst: WritableReg, src: Reg, kind: ExtendKind) -> Result<()>1831     fn extend(&mut self, dst: WritableReg, src: Reg, kind: ExtendKind) -> Result<()>;
1832 
1833     /// Emits one or more instructions to perform a signed truncation of a
1834     /// float into an integer.
signed_truncate( &mut self, dst: WritableReg, src: Reg, src_size: OperandSize, dst_size: OperandSize, kind: TruncKind, ) -> Result<()>1835     fn signed_truncate(
1836         &mut self,
1837         dst: WritableReg,
1838         src: Reg,
1839         src_size: OperandSize,
1840         dst_size: OperandSize,
1841         kind: TruncKind,
1842     ) -> Result<()>;
1843 
1844     /// Emits one or more instructions to perform an unsigned truncation of a
1845     /// float into an integer.
unsigned_truncate( &mut self, context: &mut CodeGenContext<Emission>, src_size: OperandSize, dst_size: OperandSize, kind: TruncKind, ) -> Result<()>1846     fn unsigned_truncate(
1847         &mut self,
1848         context: &mut CodeGenContext<Emission>,
1849         src_size: OperandSize,
1850         dst_size: OperandSize,
1851         kind: TruncKind,
1852     ) -> Result<()>;
1853 
1854     /// Emits one or more instructions to perform a signed convert of an
1855     /// integer into a float.
signed_convert( &mut self, dst: WritableReg, src: Reg, src_size: OperandSize, dst_size: OperandSize, ) -> Result<()>1856     fn signed_convert(
1857         &mut self,
1858         dst: WritableReg,
1859         src: Reg,
1860         src_size: OperandSize,
1861         dst_size: OperandSize,
1862     ) -> Result<()>;
1863 
1864     /// Emits one or more instructions to perform an unsigned convert of an
1865     /// integer into a float.
unsigned_convert( &mut self, dst: WritableReg, src: Reg, tmp_gpr: Reg, src_size: OperandSize, dst_size: OperandSize, ) -> Result<()>1866     fn unsigned_convert(
1867         &mut self,
1868         dst: WritableReg,
1869         src: Reg,
1870         tmp_gpr: Reg,
1871         src_size: OperandSize,
1872         dst_size: OperandSize,
1873     ) -> Result<()>;
1874 
1875     /// Reinterpret a float as an integer.
reinterpret_float_as_int( &mut self, dst: WritableReg, src: Reg, size: OperandSize, ) -> Result<()>1876     fn reinterpret_float_as_int(
1877         &mut self,
1878         dst: WritableReg,
1879         src: Reg,
1880         size: OperandSize,
1881     ) -> Result<()>;
1882 
1883     /// Reinterpret an integer as a float.
reinterpret_int_as_float( &mut self, dst: WritableReg, src: Reg, size: OperandSize, ) -> Result<()>1884     fn reinterpret_int_as_float(
1885         &mut self,
1886         dst: WritableReg,
1887         src: Reg,
1888         size: OperandSize,
1889     ) -> Result<()>;
1890 
1891     /// Demote an f64 to an f32.
demote(&mut self, dst: WritableReg, src: Reg) -> Result<()>1892     fn demote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1893 
1894     /// Promote an f32 to an f64.
promote(&mut self, dst: WritableReg, src: Reg) -> Result<()>1895     fn promote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1896 
1897     /// Zero a given memory range.
1898     ///
1899     /// The default implementation divides the given memory range
1900     /// into word-sized slots. Then it unrolls a series of store
1901     /// instructions, effectively assigning zero to each slot.
zero_mem_range(&mut self, mem: &Range<u32>) -> Result<()>1902     fn zero_mem_range(&mut self, mem: &Range<u32>) -> Result<()> {
1903         let word_size = <Self::ABI as abi::ABI>::word_bytes() as u32;
1904         if mem.is_empty() {
1905             return Ok(());
1906         }
1907 
1908         let start = if mem.start % word_size == 0 {
1909             mem.start
1910         } else {
1911             // Ensure that the start of the range is at least 4-byte aligned.
1912             assert!(mem.start % 4 == 0);
1913             let start = align_to(mem.start, word_size);
1914             let addr: Self::Address = self.local_address(&LocalSlot::i32(start))?;
1915             self.store(RegImm::i32(0), addr, OperandSize::S32)?;
1916             // Ensure that the new start of the range, is word-size aligned.
1917             assert!(start % word_size == 0);
1918             start
1919         };
1920 
1921         let end = align_to(mem.end, word_size);
1922         let slots = (end - start) / word_size;
1923 
1924         if slots == 1 {
1925             let slot = LocalSlot::i64(start + word_size);
1926             let addr: Self::Address = self.local_address(&slot)?;
1927             self.store(RegImm::i64(0), addr, OperandSize::S64)?;
1928         } else {
1929             // TODO
1930             // Add an upper bound to this generation;
1931             // given a considerably large amount of slots
1932             // this will be inefficient.
1933             self.with_scratch::<IntScratch, _>(|masm, scratch| {
1934                 masm.zero(scratch.writable())?;
1935                 let zero = RegImm::reg(scratch.inner());
1936 
1937                 for step in (start..end).step_by(word_size as usize) {
1938                     let slot = LocalSlot::i64(step + word_size);
1939                     let addr: Self::Address = masm.local_address(&slot)?;
1940                     masm.store(zero, addr, OperandSize::S64)?;
1941                 }
1942                 wasmtime_environ::error::Ok(())
1943             })?;
1944         }
1945 
1946         Ok(())
1947     }
1948 
1949     /// Generate a label.
get_label(&mut self) -> Result<MachLabel>1950     fn get_label(&mut self) -> Result<MachLabel>;
1951 
1952     /// Bind the given label at the current code offset.
bind(&mut self, label: MachLabel) -> Result<()>1953     fn bind(&mut self, label: MachLabel) -> Result<()>;
1954 
1955     /// Conditional branch.
1956     ///
1957     /// Performs a comparison between the two operands,
1958     /// and immediately after emits a jump to the given
1959     /// label destination if the condition is met.
branch( &mut self, kind: IntCmpKind, lhs: Reg, rhs: RegImm, taken: MachLabel, size: OperandSize, ) -> Result<()>1960     fn branch(
1961         &mut self,
1962         kind: IntCmpKind,
1963         lhs: Reg,
1964         rhs: RegImm,
1965         taken: MachLabel,
1966         size: OperandSize,
1967     ) -> Result<()>;
1968 
1969     /// Emits and unconditional jump to the given label.
jmp(&mut self, target: MachLabel) -> Result<()>1970     fn jmp(&mut self, target: MachLabel) -> Result<()>;
1971 
1972     /// Emits a jump table sequence. The default label is specified as
1973     /// the last element of the targets slice.
jmp_table(&mut self, targets: &[MachLabel], index: Reg, tmp: Reg) -> Result<()>1974     fn jmp_table(&mut self, targets: &[MachLabel], index: Reg, tmp: Reg) -> Result<()>;
1975 
1976     /// Emit an unreachable code trap.
unreachable(&mut self) -> Result<()>1977     fn unreachable(&mut self) -> Result<()>;
1978 
1979     /// Emit an unconditional trap.
trap(&mut self, code: TrapCode) -> Result<()>1980     fn trap(&mut self, code: TrapCode) -> Result<()>;
1981 
1982     /// Traps if the condition code is met.
trapif(&mut self, cc: IntCmpKind, code: TrapCode) -> Result<()>1983     fn trapif(&mut self, cc: IntCmpKind, code: TrapCode) -> Result<()>;
1984 
1985     /// Trap if the source register is zero.
trapz(&mut self, src: Reg, code: TrapCode) -> Result<()>1986     fn trapz(&mut self, src: Reg, code: TrapCode) -> Result<()>;
1987 
1988     /// Ensures that the stack pointer is correctly positioned before an unconditional
1989     /// jump according to the requirements of the destination target.
ensure_sp_for_jump(&mut self, target: SPOffset) -> Result<()>1990     fn ensure_sp_for_jump(&mut self, target: SPOffset) -> Result<()> {
1991         let bytes = self
1992             .sp_offset()?
1993             .as_u32()
1994             .checked_sub(target.as_u32())
1995             .unwrap_or(0);
1996 
1997         if bytes > 0 {
1998             self.free_stack(bytes)?;
1999         }
2000 
2001         Ok(())
2002     }
2003 
2004     /// Mark the start of a source location returning the machine code offset
2005     /// and the relative source code location.
start_source_loc(&mut self, loc: RelSourceLoc) -> Result<(CodeOffset, RelSourceLoc)>2006     fn start_source_loc(&mut self, loc: RelSourceLoc) -> Result<(CodeOffset, RelSourceLoc)>;
2007 
2008     /// Mark the end of a source location.
end_source_loc(&mut self) -> Result<()>2009     fn end_source_loc(&mut self) -> Result<()>;
2010 
2011     /// The current offset, in bytes from the beginning of the function.
current_code_offset(&self) -> Result<CodeOffset>2012     fn current_code_offset(&self) -> Result<CodeOffset>;
2013 
2014     /// Performs a 128-bit addition
add128( &mut self, dst_lo: WritableReg, dst_hi: WritableReg, lhs_lo: Reg, lhs_hi: Reg, rhs_lo: Reg, rhs_hi: Reg, ) -> Result<()>2015     fn add128(
2016         &mut self,
2017         dst_lo: WritableReg,
2018         dst_hi: WritableReg,
2019         lhs_lo: Reg,
2020         lhs_hi: Reg,
2021         rhs_lo: Reg,
2022         rhs_hi: Reg,
2023     ) -> Result<()>;
2024 
2025     /// Performs a 128-bit subtraction
sub128( &mut self, dst_lo: WritableReg, dst_hi: WritableReg, lhs_lo: Reg, lhs_hi: Reg, rhs_lo: Reg, rhs_hi: Reg, ) -> Result<()>2026     fn sub128(
2027         &mut self,
2028         dst_lo: WritableReg,
2029         dst_hi: WritableReg,
2030         lhs_lo: Reg,
2031         lhs_hi: Reg,
2032         rhs_lo: Reg,
2033         rhs_hi: Reg,
2034     ) -> Result<()>;
2035 
2036     /// Performs a widening multiplication from two 64-bit operands into a
2037     /// 128-bit result.
2038     ///
2039     /// Note that some platforms require special handling of registers in this
2040     /// instruction (e.g. x64) so full access to `CodeGenContext` is provided.
mul_wide(&mut self, context: &mut CodeGenContext<Emission>, kind: MulWideKind) -> Result<()>2041     fn mul_wide(&mut self, context: &mut CodeGenContext<Emission>, kind: MulWideKind)
2042     -> Result<()>;
2043 
2044     /// Takes the value in a src operand and replicates it across lanes of
2045     /// `size` in a destination result.
splat(&mut self, context: &mut CodeGenContext<Emission>, size: SplatKind) -> Result<()>2046     fn splat(&mut self, context: &mut CodeGenContext<Emission>, size: SplatKind) -> Result<()>;
2047 
2048     /// Performs a shuffle between two 128-bit vectors into a 128-bit result
2049     /// using lanes as a mask to select which indexes to copy.
shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()>2050     fn shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()>;
2051 
2052     /// Performs a swizzle between two 128-bit vectors into a 128-bit result.
swizzle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg) -> Result<()>2053     fn swizzle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg) -> Result<()>;
2054 
2055     /// Performs the RMW `op` operation on the passed `addr`.
2056     ///
2057     /// The value *before* the operation was performed is written back to the `operand` register.
atomic_rmw( &mut self, context: &mut CodeGenContext<Emission>, addr: Self::Address, size: OperandSize, op: RmwOp, flags: MemFlags, extend: Option<Extend<Zero>>, ) -> Result<()>2058     fn atomic_rmw(
2059         &mut self,
2060         context: &mut CodeGenContext<Emission>,
2061         addr: Self::Address,
2062         size: OperandSize,
2063         op: RmwOp,
2064         flags: MemFlags,
2065         extend: Option<Extend<Zero>>,
2066     ) -> Result<()>;
2067 
2068     /// Extracts the scalar value from `src` in `lane` to `dst`.
extract_lane( &mut self, src: Reg, dst: WritableReg, lane: u8, kind: ExtractLaneKind, ) -> Result<()>2069     fn extract_lane(
2070         &mut self,
2071         src: Reg,
2072         dst: WritableReg,
2073         lane: u8,
2074         kind: ExtractLaneKind,
2075     ) -> Result<()>;
2076 
2077     /// Replaces the value in `lane` in `dst` with the value in `src`.
replace_lane( &mut self, src: RegImm, dst: WritableReg, lane: u8, kind: ReplaceLaneKind, ) -> Result<()>2078     fn replace_lane(
2079         &mut self,
2080         src: RegImm,
2081         dst: WritableReg,
2082         lane: u8,
2083         kind: ReplaceLaneKind,
2084     ) -> Result<()>;
2085 
2086     /// Perform an atomic CAS (compare-and-swap) operation with the value at `addr`, and `expected`
2087     /// and `replacement` (at the top of the context's stack).
2088     ///
2089     /// This method takes the `CodeGenContext` as an arguments to accommodate architectures that
2090     /// expect parameters in specific registers. The context stack contains the `replacement`,
2091     /// and `expected` values in that order. The implementer is expected to push the value at
2092     /// `addr` before the update to the context's stack before returning.
atomic_cas( &mut self, context: &mut CodeGenContext<Emission>, addr: Self::Address, size: OperandSize, flags: MemFlags, extend: Option<Extend<Zero>>, ) -> Result<()>2093     fn atomic_cas(
2094         &mut self,
2095         context: &mut CodeGenContext<Emission>,
2096         addr: Self::Address,
2097         size: OperandSize,
2098         flags: MemFlags,
2099         extend: Option<Extend<Zero>>,
2100     ) -> Result<()>;
2101 
2102     /// Compares vector registers `lhs` and `rhs` for equality and puts the
2103     /// vector of results in `dst`.
v128_eq( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, kind: VectorEqualityKind, ) -> Result<()>2104     fn v128_eq(
2105         &mut self,
2106         dst: WritableReg,
2107         lhs: Reg,
2108         rhs: Reg,
2109         kind: VectorEqualityKind,
2110     ) -> Result<()>;
2111 
2112     /// Compares vector registers `lhs` and `rhs` for inequality and puts the
2113     /// vector of results in `dst`.
v128_ne( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, kind: VectorEqualityKind, ) -> Result<()>2114     fn v128_ne(
2115         &mut self,
2116         dst: WritableReg,
2117         lhs: Reg,
2118         rhs: Reg,
2119         kind: VectorEqualityKind,
2120     ) -> Result<()>;
2121 
2122     /// Performs a less than comparison with vector registers `lhs` and `rhs`
2123     /// and puts the vector of results in `dst`.
v128_lt( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, kind: VectorCompareKind, ) -> Result<()>2124     fn v128_lt(
2125         &mut self,
2126         dst: WritableReg,
2127         lhs: Reg,
2128         rhs: Reg,
2129         kind: VectorCompareKind,
2130     ) -> Result<()>;
2131 
2132     /// Performs a less than or equal comparison with vector registers `lhs`
2133     /// and `rhs` and puts the vector of results in `dst`.
v128_le( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, kind: VectorCompareKind, ) -> Result<()>2134     fn v128_le(
2135         &mut self,
2136         dst: WritableReg,
2137         lhs: Reg,
2138         rhs: Reg,
2139         kind: VectorCompareKind,
2140     ) -> Result<()>;
2141 
2142     /// Performs a greater than comparison with vector registers `lhs` and
2143     /// `rhs` and puts the vector of results in `dst`.
v128_gt( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, kind: VectorCompareKind, ) -> Result<()>2144     fn v128_gt(
2145         &mut self,
2146         dst: WritableReg,
2147         lhs: Reg,
2148         rhs: Reg,
2149         kind: VectorCompareKind,
2150     ) -> Result<()>;
2151 
2152     /// Performs a greater than or equal comparison with vector registers `lhs`
2153     /// and `rhs` and puts the vector of results in `dst`.
v128_ge( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, kind: VectorCompareKind, ) -> Result<()>2154     fn v128_ge(
2155         &mut self,
2156         dst: WritableReg,
2157         lhs: Reg,
2158         rhs: Reg,
2159         kind: VectorCompareKind,
2160     ) -> Result<()>;
2161 
2162     /// Emit a memory fence.
fence(&mut self) -> Result<()>2163     fn fence(&mut self) -> Result<()>;
2164 
2165     /// Perform a logical `not` operation on the 128bits vector value in `dst`.
v128_not(&mut self, dst: WritableReg) -> Result<()>2166     fn v128_not(&mut self, dst: WritableReg) -> Result<()>;
2167 
2168     /// Perform a logical `and` operation on `src1` and `src1`, both 128bits vector values, writing
2169     /// the result to `dst`.
v128_and(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>2170     fn v128_and(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2171 
2172     /// Perform a logical `and_not` operation on `src1` and `src1`, both 128bits vector values, writing
2173     /// the result to `dst`.
2174     ///
2175     /// `and_not` is not commutative: dst = !src1 & src2.
v128_and_not(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>2176     fn v128_and_not(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2177 
2178     /// Perform a logical `or` operation on `src1` and `src1`, both 128bits vector values, writing
2179     /// the result to `dst`.
v128_or(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>2180     fn v128_or(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2181 
2182     /// Perform a logical `xor` operation on `src1` and `src1`, both 128bits vector values, writing
2183     /// the result to `dst`.
v128_xor(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>2184     fn v128_xor(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2185 
2186     /// Given two 128bits vectors `src1` and `src2`, and a 128bits bitmask `mask`, selects bits
2187     /// from `src1` when mask is 1, and from `src2` when mask is 0.
2188     ///
2189     /// This is equivalent to: `v128.or(v128.and(src1, mask), v128.and(src2, v128.not(mask)))`.
v128_bitselect(&mut self, src1: Reg, src2: Reg, mask: Reg, dst: WritableReg) -> Result<()>2190     fn v128_bitselect(&mut self, src1: Reg, src2: Reg, mask: Reg, dst: WritableReg) -> Result<()>;
2191 
2192     /// If any bit in `src` is 1, set `dst` to 1, or 0 otherwise.
v128_any_true(&mut self, src: Reg, dst: WritableReg) -> Result<()>2193     fn v128_any_true(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2194 
2195     /// Convert vector of integers to vector of floating points.
v128_convert(&mut self, src: Reg, dst: WritableReg, kind: V128ConvertKind) -> Result<()>2196     fn v128_convert(&mut self, src: Reg, dst: WritableReg, kind: V128ConvertKind) -> Result<()>;
2197 
2198     /// Convert two input vectors into a smaller lane vector by narrowing each
2199     /// lane.
v128_narrow( &mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128NarrowKind, ) -> Result<()>2200     fn v128_narrow(
2201         &mut self,
2202         src1: Reg,
2203         src2: Reg,
2204         dst: WritableReg,
2205         kind: V128NarrowKind,
2206     ) -> Result<()>;
2207 
2208     /// Converts a vector containing two 64-bit floating point lanes to two
2209     /// 32-bit floating point lanes and setting the two higher lanes to 0.
v128_demote(&mut self, src: Reg, dst: WritableReg) -> Result<()>2210     fn v128_demote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2211 
2212     /// Converts a vector containing four 32-bit floating point lanes to two
2213     /// 64-bit floating point lanes. Only the two lower lanes are converted.
v128_promote(&mut self, src: Reg, dst: WritableReg) -> Result<()>2214     fn v128_promote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2215 
2216     /// Converts low or high half of the smaller lane vector to a larger lane
2217     /// vector.
v128_extend(&mut self, src: Reg, dst: WritableReg, kind: V128ExtendKind) -> Result<()>2218     fn v128_extend(&mut self, src: Reg, dst: WritableReg, kind: V128ExtendKind) -> Result<()>;
2219 
2220     /// Perform a vector add between `lsh` and `rhs`, placing the result in
2221     /// `dst`.
v128_add(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128AddKind) -> Result<()>2222     fn v128_add(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128AddKind) -> Result<()>;
2223 
2224     /// Perform a vector sub between `lhs` and `rhs`, placing the result in `dst`.
v128_sub(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128SubKind) -> Result<()>2225     fn v128_sub(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128SubKind) -> Result<()>;
2226 
2227     /// Perform a vector lane-wise mul between `lhs` and `rhs`, placing the result in `dst`.
v128_mul(&mut self, context: &mut CodeGenContext<Emission>, kind: V128MulKind) -> Result<()>2228     fn v128_mul(&mut self, context: &mut CodeGenContext<Emission>, kind: V128MulKind)
2229     -> Result<()>;
2230 
2231     /// Perform an absolute operation on a vector.
v128_abs(&mut self, src: Reg, dst: WritableReg, kind: V128AbsKind) -> Result<()>2232     fn v128_abs(&mut self, src: Reg, dst: WritableReg, kind: V128AbsKind) -> Result<()>;
2233 
2234     /// Vectorized negate of the content of `op`.
v128_neg(&mut self, op: WritableReg, kind: V128NegKind) -> Result<()>2235     fn v128_neg(&mut self, op: WritableReg, kind: V128NegKind) -> Result<()>;
2236 
2237     /// Perform the shift operation specified by `kind`, by the shift amount specified by the 32-bit
2238     /// integer at the top of the stack, on the 128-bit vector specified by the second value
2239     /// from the top of the stack, interpreted as packed integers of size `lane_width`.
2240     ///
2241     /// The shift amount is taken modulo `lane_width`.
v128_shift( &mut self, context: &mut CodeGenContext<Emission>, lane_width: OperandSize, kind: ShiftKind, ) -> Result<()>2242     fn v128_shift(
2243         &mut self,
2244         context: &mut CodeGenContext<Emission>,
2245         lane_width: OperandSize,
2246         kind: ShiftKind,
2247     ) -> Result<()>;
2248 
2249     /// Perform a saturating integer q-format rounding multiplication.
v128_q15mulr_sat_s( &mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize, ) -> Result<()>2250     fn v128_q15mulr_sat_s(
2251         &mut self,
2252         lhs: Reg,
2253         rhs: Reg,
2254         dst: WritableReg,
2255         size: OperandSize,
2256     ) -> Result<()>;
2257 
2258     /// Sets `dst` to 1 if all lanes in `src` are non-zero, sets `dst` to 0
2259     /// otherwise.
v128_all_true(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2260     fn v128_all_true(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2261 
2262     /// Extracts the high bit of each lane in `src` and produces a scalar mask
2263     /// with all bits concatenated in `dst`.
v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2264     fn v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2265 
2266     /// Lanewise truncation operation.
2267     ///
2268     /// If using an integer kind of truncation, then this performs a lane-wise
2269     /// saturating conversion from float to integer using the IEEE
2270     /// `convertToIntegerTowardZero` function. If any input lane is NaN, the
2271     /// resulting lane is 0. If the rounded integer value of a lane is outside
2272     /// the range of the destination type, the result is saturated to the
2273     /// nearest representable integer value.
v128_trunc( &mut self, context: &mut CodeGenContext<Emission>, kind: V128TruncKind, ) -> Result<()>2274     fn v128_trunc(
2275         &mut self,
2276         context: &mut CodeGenContext<Emission>,
2277         kind: V128TruncKind,
2278     ) -> Result<()>;
2279 
2280     /// Perform a lane-wise `min` operation between `src1` and `src2`.
v128_min(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MinKind) -> Result<()>2281     fn v128_min(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MinKind)
2282     -> Result<()>;
2283 
2284     /// Perform a lane-wise `max` operation between `src1` and `src2`.
v128_max(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MaxKind) -> Result<()>2285     fn v128_max(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MaxKind)
2286     -> Result<()>;
2287 
2288     /// Perform the lane-wise integer extended multiplication producing twice wider result than the
2289     /// inputs. This is equivalent to an extend followed by a multiply.
2290     ///
2291     /// The extension to be performed is inferred from the `lane_width` and the `kind` of extmul,
2292     /// e.g, if `lane_width` is `S16`, and `kind` is `LowSigned`, then we sign-extend the lower
2293     /// 8bits of the 16bits lanes.
v128_extmul( &mut self, context: &mut CodeGenContext<Emission>, kind: V128ExtMulKind, ) -> Result<()>2294     fn v128_extmul(
2295         &mut self,
2296         context: &mut CodeGenContext<Emission>,
2297         kind: V128ExtMulKind,
2298     ) -> Result<()>;
2299 
2300     /// Perform the lane-wise integer extended pairwise addition producing extended results (twice
2301     /// wider results than the inputs).
v128_extadd_pairwise( &mut self, src: Reg, dst: WritableReg, kind: V128ExtAddKind, ) -> Result<()>2302     fn v128_extadd_pairwise(
2303         &mut self,
2304         src: Reg,
2305         dst: WritableReg,
2306         kind: V128ExtAddKind,
2307     ) -> Result<()>;
2308 
2309     /// Lane-wise multiply signed 16-bit integers in `lhs` and `rhs` and add
2310     /// adjacent pairs of the 32-bit results.
v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>2311     fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>;
2312 
2313     /// Count the number of bits set in each lane.
v128_popcnt(&mut self, context: &mut CodeGenContext<Emission>) -> Result<()>2314     fn v128_popcnt(&mut self, context: &mut CodeGenContext<Emission>) -> Result<()>;
2315 
2316     /// Lane-wise rounding average of vectors of integers in `lhs` and `rhs`
2317     /// and put the results in `dst`.
v128_avgr(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2318     fn v128_avgr(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2319 
2320     /// Lane-wise IEEE division on vectors of floats.
v128_div(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2321     fn v128_div(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2322 
2323     /// Lane-wise IEEE square root of vector of floats.
v128_sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2324     fn v128_sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2325 
2326     /// Lane-wise ceiling of vector of floats.
v128_ceil(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2327     fn v128_ceil(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2328 
2329     /// Lane-wise flooring of vector of floats.
v128_floor(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2330     fn v128_floor(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2331 
2332     /// Lane-wise rounding to nearest integer for vector of floats.
v128_nearest(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2333     fn v128_nearest(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2334 
2335     /// Lane-wise minimum value defined as `rhs < lhs ? rhs : lhs`.
v128_pmin(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2336     fn v128_pmin(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2337 
2338     /// Lane-wise maximum value defined as `lhs < rhs ? rhs : lhs`.
v128_pmax(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2339     fn v128_pmax(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2340 }
2341