xref: /wasmtime-44.0.1/winch/codegen/src/masm.rs (revision fee9be21)
1 use crate::abi::{self, LocalSlot, align_to};
2 use crate::codegen::{CodeGenContext, Emission, FuncEnv};
3 use crate::isa::{
4     CallingConvention,
5     reg::{Reg, RegClass, WritableReg, writable},
6 };
7 use anyhow::Result;
8 use cranelift_codegen::{
9     Final, MachBufferFinalized, MachLabel,
10     binemit::CodeOffset,
11     ir::{Endianness, MemFlags, RelSourceLoc, SourceLoc, UserExternalNameRef},
12 };
13 use std::{fmt::Debug, ops::Range};
14 use wasmtime_environ::{PtrSize, WasmHeapType, WasmRefType, WasmValType};
15 
16 pub(crate) use cranelift_codegen::ir::TrapCode;
17 
18 #[derive(Eq, PartialEq)]
19 pub(crate) enum DivKind {
20     /// Signed division.
21     Signed,
22     /// Unsigned division.
23     Unsigned,
24 }
25 
26 /// Represents the `memory.atomic.wait*` kind.
27 #[derive(Debug, Clone, Copy)]
28 pub(crate) enum AtomicWaitKind {
29     Wait32,
30     Wait64,
31 }
32 
33 /// Remainder kind.
34 #[derive(Copy, Clone)]
35 pub(crate) enum RemKind {
36     /// Signed remainder.
37     Signed,
38     /// Unsigned remainder.
39     Unsigned,
40 }
41 
42 impl RemKind {
43     pub fn is_signed(&self) -> bool {
44         matches!(self, Self::Signed)
45     }
46 }
47 
48 /// Kinds of vector min operation supported by WebAssembly.
49 pub(crate) enum V128MinKind {
50     /// 4 lanes of 32-bit floats.
51     F32x4,
52     /// 2 lanes of 64-bit floats.
53     F64x2,
54     /// 16 lanes of signed 8-bit integers.
55     I8x16S,
56     /// 16 lanes of unsigned 8-bit integers.
57     I8x16U,
58     /// 8 lanes of signed 16-bit integers.
59     I16x8S,
60     /// 8 lanes of unsigned 16-bit integers.
61     I16x8U,
62     /// 4 lanes of signed 32-bit integers.
63     I32x4S,
64     /// 4 lanes of unsigned 32-bit integers.
65     I32x4U,
66 }
67 
68 impl V128MinKind {
69     /// The size of each lane.
70     pub(crate) fn lane_size(&self) -> OperandSize {
71         match self {
72             Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
73             Self::F64x2 => OperandSize::S64,
74             Self::I8x16S | Self::I8x16U => OperandSize::S8,
75             Self::I16x8S | Self::I16x8U => OperandSize::S16,
76         }
77     }
78 }
79 
80 /// Kinds of vector max operation supported by WebAssembly.
81 pub(crate) enum V128MaxKind {
82     /// 4 lanes of 32-bit floats.
83     F32x4,
84     /// 2 lanes of 64-bit floats.
85     F64x2,
86     /// 16 lanes of signed 8-bit integers.
87     I8x16S,
88     /// 16 lanes of unsigned 8-bit integers.
89     I8x16U,
90     /// 8 lanes of signed 16-bit integers.
91     I16x8S,
92     /// 8 lanes of unsigned 16-bit integers.
93     I16x8U,
94     /// 4 lanes of signed 32-bit integers.
95     I32x4S,
96     /// 4 lanes of unsigned 32-bit integers.
97     I32x4U,
98 }
99 
100 impl V128MaxKind {
101     /// The size of each lane.
102     pub(crate) fn lane_size(&self) -> OperandSize {
103         match self {
104             Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
105             Self::F64x2 => OperandSize::S64,
106             Self::I8x16S | Self::I8x16U => OperandSize::S8,
107             Self::I16x8S | Self::I16x8U => OperandSize::S16,
108         }
109     }
110 }
111 
112 #[derive(Eq, PartialEq)]
113 pub(crate) enum MulWideKind {
114     Signed,
115     Unsigned,
116 }
117 
118 /// Type of operation for a read-modify-write instruction.
119 pub(crate) enum RmwOp {
120     Add,
121     Sub,
122     Xchg,
123     And,
124     Or,
125     Xor,
126 }
127 
128 /// The direction to perform the memory move.
129 #[derive(Debug, Clone, Eq, PartialEq)]
130 pub(crate) enum MemMoveDirection {
131     /// From high memory addresses to low memory addresses.
132     /// Invariant: the source location is closer to the FP than the destination
133     /// location, which will be closer to the SP.
134     HighToLow,
135     /// From low memory addresses to high memory addresses.
136     /// Invariant: the source location is closer to the SP than the destination
137     /// location, which will be closer to the FP.
138     LowToHigh,
139 }
140 
141 /// Classifies how to treat float-to-int conversions.
142 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
143 pub(crate) enum TruncKind {
144     /// Saturating conversion. If the source value is greater than the maximum
145     /// value of the destination type, the result is clamped to the
146     /// destination maximum value.
147     Checked,
148     /// An exception is raised if the source value is greater than the maximum
149     /// value of the destination type.
150     Unchecked,
151 }
152 
153 impl TruncKind {
154     /// Returns true if the truncation kind is checked.
155     pub(crate) fn is_checked(&self) -> bool {
156         *self == TruncKind::Checked
157     }
158 
159     /// Returns `true` if the trunc kind is [`Unchecked`].
160     ///
161     /// [`Unchecked`]: TruncKind::Unchecked
162     #[must_use]
163     pub(crate) fn is_unchecked(&self) -> bool {
164         matches!(self, Self::Unchecked)
165     }
166 }
167 
168 /// Representation of the stack pointer offset.
169 #[derive(Copy, Clone, Eq, PartialEq, Debug, PartialOrd, Ord, Default)]
170 pub struct SPOffset(u32);
171 
172 impl SPOffset {
173     pub fn from_u32(offs: u32) -> Self {
174         Self(offs)
175     }
176 
177     pub fn as_u32(&self) -> u32 {
178         self.0
179     }
180 }
181 
182 /// A stack slot.
183 #[derive(Debug, Clone, Copy, Eq, PartialEq)]
184 pub struct StackSlot {
185     /// The location of the slot, relative to the stack pointer.
186     pub offset: SPOffset,
187     /// The size of the slot, in bytes.
188     pub size: u32,
189 }
190 
191 impl StackSlot {
192     pub fn new(offs: SPOffset, size: u32) -> Self {
193         Self { offset: offs, size }
194     }
195 }
196 
197 pub trait ScratchType {
198     /// Derive the register class from the scratch register type.
199     fn reg_class() -> RegClass;
200 }
201 
202 /// A scratch register type of integer class.
203 pub struct IntScratch;
204 /// A scratch register type of floating point class.
205 pub struct FloatScratch;
206 
207 impl ScratchType for IntScratch {
208     fn reg_class() -> RegClass {
209         RegClass::Int
210     }
211 }
212 
213 impl ScratchType for FloatScratch {
214     fn reg_class() -> RegClass {
215         RegClass::Float
216     }
217 }
218 
219 /// A scratch register scope.
220 #[derive(Debug, Clone, Copy)]
221 pub struct Scratch(Reg);
222 
223 impl Scratch {
224     pub fn new(r: Reg) -> Self {
225         Self(r)
226     }
227 
228     #[inline]
229     pub fn inner(&self) -> Reg {
230         self.0
231     }
232 
233     #[inline]
234     pub fn writable(&self) -> WritableReg {
235         writable!(self.0)
236     }
237 }
238 
239 /// Kinds of integer binary comparison in WebAssembly. The [`MacroAssembler`]
240 /// implementation for each ISA is responsible for emitting the correct
241 /// sequence of instructions when lowering to machine code.
242 #[derive(Debug, Clone, Copy, Eq, PartialEq)]
243 pub(crate) enum IntCmpKind {
244     /// Equal.
245     Eq,
246     /// Not equal.
247     Ne,
248     /// Signed less than.
249     LtS,
250     /// Unsigned less than.
251     LtU,
252     /// Signed greater than.
253     GtS,
254     /// Unsigned greater than.
255     GtU,
256     /// Signed less than or equal.
257     LeS,
258     /// Unsigned less than or equal.
259     LeU,
260     /// Signed greater than or equal.
261     GeS,
262     /// Unsigned greater than or equal.
263     GeU,
264 }
265 
266 /// Kinds of float binary comparison in WebAssembly. The [`MacroAssembler`]
267 /// implementation for each ISA is responsible for emitting the correct
268 /// sequence of instructions when lowering code.
269 #[derive(Debug)]
270 pub(crate) enum FloatCmpKind {
271     /// Equal.
272     Eq,
273     /// Not equal.
274     Ne,
275     /// Less than.
276     Lt,
277     /// Greater than.
278     Gt,
279     /// Less than or equal.
280     Le,
281     /// Greater than or equal.
282     Ge,
283 }
284 
285 /// Kinds of shifts in WebAssembly.The [`masm`] implementation for each ISA is
286 /// responsible for emitting the correct sequence of instructions when
287 /// lowering to machine code.
288 #[derive(Debug, Clone, Copy, Eq, PartialEq)]
289 pub(crate) enum ShiftKind {
290     /// Left shift.
291     Shl,
292     /// Signed right shift.
293     ShrS,
294     /// Unsigned right shift.
295     ShrU,
296     /// Left rotate.
297     Rotl,
298     /// Right rotate.
299     Rotr,
300 }
301 
302 /// Kinds of extends in WebAssembly. Each MacroAssembler implementation
303 /// is responsible for emitting the correct sequence of instructions when
304 /// lowering to machine code.
305 #[derive(Copy, Clone)]
306 pub(crate) enum ExtendKind {
307     Signed(Extend<Signed>),
308     Unsigned(Extend<Zero>),
309 }
310 
311 #[derive(Copy, Clone)]
312 pub(crate) enum Signed {}
313 #[derive(Copy, Clone)]
314 pub(crate) enum Zero {}
315 
316 pub(crate) trait ExtendType {}
317 
318 impl ExtendType for Signed {}
319 impl ExtendType for Zero {}
320 
321 #[derive(Copy, Clone)]
322 pub(crate) enum Extend<T: ExtendType> {
323     /// 8 to 32 bit extend.
324     I32Extend8,
325     /// 16 to 32 bit extend.
326     I32Extend16,
327     /// 8 to 64 bit extend.
328     I64Extend8,
329     /// 16 to 64 bit extend.
330     I64Extend16,
331     /// 32 to 64 bit extend.
332     I64Extend32,
333 
334     /// Variant to hold the kind of extend marker.
335     ///
336     /// This is `Signed` or `Zero`, that are empty enums, which means that this variant cannot be
337     /// constructed.
338     __Kind(T),
339 }
340 
341 impl From<Extend<Zero>> for ExtendKind {
342     fn from(value: Extend<Zero>) -> Self {
343         ExtendKind::Unsigned(value)
344     }
345 }
346 
347 impl<T: ExtendType> Extend<T> {
348     pub fn from_size(&self) -> OperandSize {
349         match self {
350             Extend::I32Extend8 | Extend::I64Extend8 => OperandSize::S8,
351             Extend::I32Extend16 | Extend::I64Extend16 => OperandSize::S16,
352             Extend::I64Extend32 => OperandSize::S32,
353             Extend::__Kind(_) => unreachable!(),
354         }
355     }
356 
357     pub fn to_size(&self) -> OperandSize {
358         match self {
359             Extend::I32Extend8 | Extend::I32Extend16 => OperandSize::S32,
360             Extend::I64Extend8 | Extend::I64Extend16 | Extend::I64Extend32 => OperandSize::S64,
361             Extend::__Kind(_) => unreachable!(),
362         }
363     }
364 
365     pub fn from_bits(&self) -> u8 {
366         self.from_size().num_bits()
367     }
368 
369     pub fn to_bits(&self) -> u8 {
370         self.to_size().num_bits()
371     }
372 }
373 
374 impl From<Extend<Signed>> for ExtendKind {
375     fn from(value: Extend<Signed>) -> Self {
376         ExtendKind::Signed(value)
377     }
378 }
379 
380 impl ExtendKind {
381     pub fn signed(&self) -> bool {
382         match self {
383             Self::Signed(_) => true,
384             _ => false,
385         }
386     }
387 
388     pub fn from_bits(&self) -> u8 {
389         match self {
390             Self::Signed(s) => s.from_bits(),
391             Self::Unsigned(u) => u.from_bits(),
392         }
393     }
394 
395     pub fn to_bits(&self) -> u8 {
396         match self {
397             Self::Signed(s) => s.to_bits(),
398             Self::Unsigned(u) => u.to_bits(),
399         }
400     }
401 }
402 
403 /// Kinds of vector load and extends in WebAssembly. Each MacroAssembler
404 /// implementation is responsible for emitting the correct sequence of
405 /// instructions when lowering to machine code.
406 #[derive(Copy, Clone)]
407 pub(crate) enum V128LoadExtendKind {
408     /// Sign extends eight 8 bit integers to eight 16 bit lanes.
409     E8x8S,
410     /// Zero extends eight 8 bit integers to eight 16 bit lanes.
411     E8x8U,
412     /// Sign extends four 16 bit integers to four 32 bit lanes.
413     E16x4S,
414     /// Zero extends four 16 bit integers to four 32 bit lanes.
415     E16x4U,
416     /// Sign extends two 32 bit integers to two 64 bit lanes.
417     E32x2S,
418     /// Zero extends two 32 bit integers to two 64 bit lanes.
419     E32x2U,
420 }
421 
422 /// Kinds of splat loads supported by WebAssembly.
423 pub(crate) enum SplatLoadKind {
424     /// 8 bits.
425     S8,
426     /// 16 bits.
427     S16,
428     /// 32 bits.
429     S32,
430     /// 64 bits.
431     S64,
432 }
433 
434 /// Kinds of splat supported by WebAssembly.
435 #[derive(Copy, Debug, Clone, Eq, PartialEq)]
436 pub(crate) enum SplatKind {
437     /// 8 bit integer.
438     I8x16,
439     /// 16 bit integer.
440     I16x8,
441     /// 32 bit integer.
442     I32x4,
443     /// 64 bit integer.
444     I64x2,
445     /// 32 bit float.
446     F32x4,
447     /// 64 bit float.
448     F64x2,
449 }
450 
451 impl SplatKind {
452     /// The lane size to use for different kinds of splats.
453     pub(crate) fn lane_size(&self) -> OperandSize {
454         match self {
455             SplatKind::I8x16 => OperandSize::S8,
456             SplatKind::I16x8 => OperandSize::S16,
457             SplatKind::I32x4 | SplatKind::F32x4 => OperandSize::S32,
458             SplatKind::I64x2 | SplatKind::F64x2 => OperandSize::S64,
459         }
460     }
461 }
462 
463 /// Kinds of extract lane supported by WebAssembly.
464 #[derive(Copy, Debug, Clone, Eq, PartialEq)]
465 pub(crate) enum ExtractLaneKind {
466     /// 16 lanes of 8-bit integers sign extended to 32-bits.
467     I8x16S,
468     /// 16 lanes of 8-bit integers zero extended to 32-bits.
469     I8x16U,
470     /// 8 lanes of 16-bit integers sign extended to 32-bits.
471     I16x8S,
472     /// 8 lanes of 16-bit integers zero extended to 32-bits.
473     I16x8U,
474     /// 4 lanes of 32-bit integers.
475     I32x4,
476     /// 2 lanes of 64-bit integers.
477     I64x2,
478     /// 4 lanes of 32-bit floats.
479     F32x4,
480     /// 2 lanes of 64-bit floats.
481     F64x2,
482 }
483 
484 impl ExtractLaneKind {
485     /// The lane size to use for different kinds of extract lane kinds.
486     pub(crate) fn lane_size(&self) -> OperandSize {
487         match self {
488             ExtractLaneKind::I8x16S | ExtractLaneKind::I8x16U => OperandSize::S8,
489             ExtractLaneKind::I16x8S | ExtractLaneKind::I16x8U => OperandSize::S16,
490             ExtractLaneKind::I32x4 | ExtractLaneKind::F32x4 => OperandSize::S32,
491             ExtractLaneKind::I64x2 | ExtractLaneKind::F64x2 => OperandSize::S64,
492         }
493     }
494 }
495 
496 impl From<ExtractLaneKind> for Extend<Signed> {
497     fn from(value: ExtractLaneKind) -> Self {
498         match value {
499             ExtractLaneKind::I8x16S => Extend::I32Extend8,
500             ExtractLaneKind::I16x8S => Extend::I32Extend16,
501             _ => unimplemented!(),
502         }
503     }
504 }
505 
506 /// Kinds of replace lane supported by WebAssembly.
507 pub(crate) enum ReplaceLaneKind {
508     /// 16 lanes of 8 bit integers.
509     I8x16,
510     /// 8 lanes of 16 bit integers.
511     I16x8,
512     /// 4 lanes of 32 bit integers.
513     I32x4,
514     /// 2 lanes of 64 bit integers.
515     I64x2,
516     /// 4 lanes of 32 bit floats.
517     F32x4,
518     /// 2 lanes of 64 bit floats.
519     F64x2,
520 }
521 
522 impl ReplaceLaneKind {
523     /// The lane size to use for different kinds of replace lane kinds.
524     pub(crate) fn lane_size(&self) -> OperandSize {
525         match self {
526             ReplaceLaneKind::I8x16 => OperandSize::S8,
527             ReplaceLaneKind::I16x8 => OperandSize::S16,
528             ReplaceLaneKind::I32x4 => OperandSize::S32,
529             ReplaceLaneKind::I64x2 => OperandSize::S64,
530             ReplaceLaneKind::F32x4 => OperandSize::S32,
531             ReplaceLaneKind::F64x2 => OperandSize::S64,
532         }
533     }
534 }
535 
536 /// Kinds of behavior supported by Wasm loads.
537 pub(crate) enum LoadKind {
538     /// Load the entire bytes of the operand size without any modifications.
539     Operand(OperandSize),
540     /// Atomic load, with optional scalar extend.
541     Atomic(OperandSize, Option<ExtendKind>),
542     /// Duplicate value into vector lanes.
543     Splat(SplatLoadKind),
544     /// Scalar (non-vector) extend.
545     ScalarExtend(ExtendKind),
546     /// Vector extend.
547     VectorExtend(V128LoadExtendKind),
548     /// Load content into select lane.
549     VectorLane(LaneSelector),
550     /// Load a single element into the lowest bits of a vector and initialize
551     /// all other bits to zero.
552     VectorZero(OperandSize),
553 }
554 
555 impl LoadKind {
556     /// Returns the [`OperandSize`] used in the load operation.
557     pub(crate) fn derive_operand_size(&self) -> OperandSize {
558         match self {
559             Self::ScalarExtend(extend) | Self::Atomic(_, Some(extend)) => {
560                 Self::operand_size_for_scalar(extend)
561             }
562             Self::VectorExtend(_) => OperandSize::S64,
563             Self::Splat(kind) => Self::operand_size_for_splat(kind),
564             Self::Operand(size)
565             | Self::Atomic(size, None)
566             | Self::VectorLane(LaneSelector { size, .. })
567             | Self::VectorZero(size) => *size,
568         }
569     }
570 
571     pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
572         Self::VectorLane(LaneSelector { lane, size })
573     }
574 
575     fn operand_size_for_scalar(extend_kind: &ExtendKind) -> OperandSize {
576         match extend_kind {
577             ExtendKind::Signed(s) => s.from_size(),
578             ExtendKind::Unsigned(u) => u.from_size(),
579         }
580     }
581 
582     fn operand_size_for_splat(kind: &SplatLoadKind) -> OperandSize {
583         match kind {
584             SplatLoadKind::S8 => OperandSize::S8,
585             SplatLoadKind::S16 => OperandSize::S16,
586             SplatLoadKind::S32 => OperandSize::S32,
587             SplatLoadKind::S64 => OperandSize::S64,
588         }
589     }
590 
591     pub(crate) fn is_atomic(&self) -> bool {
592         matches!(self, Self::Atomic(_, _))
593     }
594 }
595 
596 /// Kinds of behavior supported by Wasm loads.
597 #[derive(Copy, Clone)]
598 pub enum StoreKind {
599     /// Store the entire bytes of the operand size without any modifications.
600     Operand(OperandSize),
601     /// Store the entire bytes of the operand size without any modifications, atomically.
602     Atomic(OperandSize),
603     /// Store the content of selected lane.
604     VectorLane(LaneSelector),
605 }
606 
607 impl StoreKind {
608     pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
609         Self::VectorLane(LaneSelector { lane, size })
610     }
611 }
612 
613 #[derive(Copy, Clone)]
614 pub struct LaneSelector {
615     pub lane: u8,
616     pub size: OperandSize,
617 }
618 
619 /// Types of vector integer to float conversions supported by WebAssembly.
620 pub(crate) enum V128ConvertKind {
621     /// 4 lanes of signed 32-bit integers to 4 lanes of 32-bit floats.
622     I32x4S,
623     /// 4 lanes of unsigned 32-bit integers to 4 lanes of 32-bit floats.
624     I32x4U,
625     /// 4 lanes of signed 32-bit integers to low bits of 2 lanes of 64-bit
626     /// floats.
627     I32x4LowS,
628     /// 4 lanes of unsigned 32-bit integers to low bits of 2 lanes of 64-bit
629     /// floats.
630     I32x4LowU,
631 }
632 
633 impl V128ConvertKind {
634     pub(crate) fn src_lane_size(&self) -> OperandSize {
635         match self {
636             V128ConvertKind::I32x4S
637             | V128ConvertKind::I32x4U
638             | V128ConvertKind::I32x4LowS
639             | V128ConvertKind::I32x4LowU => OperandSize::S32,
640         }
641     }
642 
643     pub(crate) fn dst_lane_size(&self) -> OperandSize {
644         match self {
645             V128ConvertKind::I32x4S | V128ConvertKind::I32x4U => OperandSize::S32,
646             V128ConvertKind::I32x4LowS | V128ConvertKind::I32x4LowU => OperandSize::S64,
647         }
648     }
649 }
650 
651 /// Kinds of vector narrowing operations supported by WebAssembly.
652 pub(crate) enum V128NarrowKind {
653     /// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
654     /// signed saturation.
655     I16x8S,
656     /// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
657     /// unsigned saturation.
658     I16x8U,
659     /// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
660     /// signed saturation.
661     I32x4S,
662     /// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
663     /// unsigned saturation.
664     I32x4U,
665 }
666 
667 impl V128NarrowKind {
668     /// Return the size of the destination lanes.
669     pub(crate) fn dst_lane_size(&self) -> OperandSize {
670         match self {
671             Self::I16x8S | Self::I16x8U => OperandSize::S8,
672             Self::I32x4S | Self::I32x4U => OperandSize::S16,
673         }
674     }
675 }
676 
677 /// Kinds of vector extending operations supported by WebAssembly.
678 #[derive(Debug, Copy, Clone)]
679 pub(crate) enum V128ExtendKind {
680     /// Low half of i8x16 sign extended.
681     LowI8x16S,
682     /// High half of i8x16 sign extended.
683     HighI8x16S,
684     /// Low half of i8x16 zero extended.
685     LowI8x16U,
686     /// High half of i8x16 zero extended.
687     HighI8x16U,
688     /// Low half of i16x8 sign extended.
689     LowI16x8S,
690     /// High half of i16x8 sign extended.
691     HighI16x8S,
692     /// Low half of i16x8 zero extended.
693     LowI16x8U,
694     /// High half of i16x8 zero extended.
695     HighI16x8U,
696     /// Low half of i32x4 sign extended.
697     LowI32x4S,
698     /// High half of i32x4 sign extended.
699     HighI32x4S,
700     /// Low half of i32x4 zero extended.
701     LowI32x4U,
702     /// High half of i32x4 zero extended.
703     HighI32x4U,
704 }
705 
706 impl V128ExtendKind {
707     /// The size of the source's lanes.
708     pub(crate) fn src_lane_size(&self) -> OperandSize {
709         match self {
710             Self::LowI8x16S | Self::LowI8x16U | Self::HighI8x16S | Self::HighI8x16U => {
711                 OperandSize::S8
712             }
713             Self::LowI16x8S | Self::LowI16x8U | Self::HighI16x8S | Self::HighI16x8U => {
714                 OperandSize::S16
715             }
716             Self::LowI32x4S | Self::LowI32x4U | Self::HighI32x4S | Self::HighI32x4U => {
717                 OperandSize::S32
718             }
719         }
720     }
721 }
722 
723 /// Kinds of vector equalities and non-equalities supported by WebAssembly.
724 pub(crate) enum VectorEqualityKind {
725     /// 16 lanes of 8 bit integers.
726     I8x16,
727     /// 8 lanes of 16 bit integers.
728     I16x8,
729     /// 4 lanes of 32 bit integers.
730     I32x4,
731     /// 2 lanes of 64 bit integers.
732     I64x2,
733     /// 4 lanes of 32 bit floats.
734     F32x4,
735     /// 2 lanes of 64 bit floats.
736     F64x2,
737 }
738 
739 impl VectorEqualityKind {
740     /// Get the lane size to use.
741     pub(crate) fn lane_size(&self) -> OperandSize {
742         match self {
743             Self::I8x16 => OperandSize::S8,
744             Self::I16x8 => OperandSize::S16,
745             Self::I32x4 | Self::F32x4 => OperandSize::S32,
746             Self::I64x2 | Self::F64x2 => OperandSize::S64,
747         }
748     }
749 }
750 
751 /// Kinds of vector comparisons supported by WebAssembly.
752 pub(crate) enum VectorCompareKind {
753     /// 16 lanes of signed 8 bit integers.
754     I8x16S,
755     /// 16 lanes of unsigned 8 bit integers.
756     I8x16U,
757     /// 8 lanes of signed 16 bit integers.
758     I16x8S,
759     /// 8 lanes of unsigned 16 bit integers.
760     I16x8U,
761     /// 4 lanes of signed 32 bit integers.
762     I32x4S,
763     /// 4 lanes of unsigned 32 bit integers.
764     I32x4U,
765     /// 2 lanes of signed 64 bit integers.
766     I64x2S,
767     /// 4 lanes of 32 bit floats.
768     F32x4,
769     /// 2 lanes of 64 bit floats.
770     F64x2,
771 }
772 
773 impl VectorCompareKind {
774     /// Get the lane size to use.
775     pub(crate) fn lane_size(&self) -> OperandSize {
776         match self {
777             Self::I8x16S | Self::I8x16U => OperandSize::S8,
778             Self::I16x8S | Self::I16x8U => OperandSize::S16,
779             Self::I32x4S | Self::I32x4U | Self::F32x4 => OperandSize::S32,
780             Self::I64x2S | Self::F64x2 => OperandSize::S64,
781         }
782     }
783 }
784 
785 /// Kinds of vector absolute operations supported by WebAssembly.
786 #[derive(Copy, Debug, Clone, Eq, PartialEq)]
787 pub(crate) enum V128AbsKind {
788     /// 8 bit integers.
789     I8x16,
790     /// 16 bit integers.
791     I16x8,
792     /// 32 bit integers.
793     I32x4,
794     /// 64 bit integers.
795     I64x2,
796     /// 32 bit floats.
797     F32x4,
798     /// 64 bit floats.
799     F64x2,
800 }
801 
802 impl V128AbsKind {
803     /// The lane size to use.
804     pub(crate) fn lane_size(&self) -> OperandSize {
805         match self {
806             Self::I8x16 => OperandSize::S8,
807             Self::I16x8 => OperandSize::S16,
808             Self::I32x4 | Self::F32x4 => OperandSize::S32,
809             Self::I64x2 | Self::F64x2 => OperandSize::S64,
810         }
811     }
812 }
813 
814 /// Kinds of truncation for vectors supported by WebAssembly.
815 pub(crate) enum V128TruncKind {
816     /// Truncates 4 lanes of 32-bit floats to nearest integral value.
817     F32x4,
818     /// Truncates 2 lanes of 64-bit floats to nearest integral value.
819     F64x2,
820     /// Integers from signed F32x4.
821     I32x4FromF32x4S,
822     /// Integers from unsigned F32x4.
823     I32x4FromF32x4U,
824     /// Integers from signed F64x2.
825     I32x4FromF64x2SZero,
826     /// Integers from unsigned F64x2.
827     I32x4FromF64x2UZero,
828 }
829 
830 impl V128TruncKind {
831     /// The size of the source lanes.
832     pub(crate) fn src_lane_size(&self) -> OperandSize {
833         match self {
834             V128TruncKind::F32x4
835             | V128TruncKind::I32x4FromF32x4S
836             | V128TruncKind::I32x4FromF32x4U => OperandSize::S32,
837             V128TruncKind::F64x2
838             | V128TruncKind::I32x4FromF64x2SZero
839             | V128TruncKind::I32x4FromF64x2UZero => OperandSize::S64,
840         }
841     }
842 
843     /// The size of the destination lanes.
844     pub(crate) fn dst_lane_size(&self) -> OperandSize {
845         if let V128TruncKind::F64x2 = self {
846             OperandSize::S64
847         } else {
848             OperandSize::S32
849         }
850     }
851 }
852 
853 /// Kinds of vector addition supported by WebAssembly.
854 pub(crate) enum V128AddKind {
855     /// 4 lanes of 32-bit floats wrapping.
856     F32x4,
857     /// 2 lanes of 64-bit floats wrapping.
858     F64x2,
859     /// 16 lanes of 8-bit integers wrapping.
860     I8x16,
861     /// 16 lanes of 8-bit integers signed saturating.
862     I8x16SatS,
863     /// 16 lanes of 8-bit integers unsigned saturating.
864     I8x16SatU,
865     /// 8 lanes of 16-bit integers wrapping.
866     I16x8,
867     /// 8 lanes of 16-bit integers signed saturating.
868     I16x8SatS,
869     /// 8 lanes of 16-bit integers unsigned saturating.
870     I16x8SatU,
871     /// 4 lanes of 32-bit integers wrapping.
872     I32x4,
873     /// 2 lanes of 64-bit integers wrapping.
874     I64x2,
875 }
876 
877 /// Kinds of vector subtraction supported by WebAssembly.
878 pub(crate) enum V128SubKind {
879     /// 4 lanes of 32-bit floats wrapping.
880     F32x4,
881     /// 2 lanes of 64-bit floats wrapping.
882     F64x2,
883     /// 16 lanes of 8-bit integers wrapping.
884     I8x16,
885     /// 16 lanes of 8-bit integers signed saturating.
886     I8x16SatS,
887     /// 16 lanes of 8-bit integers unsigned saturating.
888     I8x16SatU,
889     /// 8 lanes of 16-bit integers wrapping.
890     I16x8,
891     /// 8 lanes of 16-bit integers signed saturating.
892     I16x8SatS,
893     /// 8 lanes of 16-bit integers unsigned saturating.
894     I16x8SatU,
895     /// 4 lanes of 32-bit integers wrapping.
896     I32x4,
897     /// 2 lanes of 64-bit integers wrapping.
898     I64x2,
899 }
900 
901 impl From<V128NegKind> for V128SubKind {
902     fn from(value: V128NegKind) -> Self {
903         match value {
904             V128NegKind::I8x16 => Self::I8x16,
905             V128NegKind::I16x8 => Self::I16x8,
906             V128NegKind::I32x4 => Self::I32x4,
907             V128NegKind::I64x2 => Self::I64x2,
908             V128NegKind::F32x4 | V128NegKind::F64x2 => unimplemented!(),
909         }
910     }
911 }
912 
913 /// Kinds of vector multiplication supported by WebAssembly.
914 pub(crate) enum V128MulKind {
915     /// 4 lanes of 32-bit floats.
916     F32x4,
917     /// 2 lanes of 64-bit floats.
918     F64x2,
919     /// 8 lanes of 16-bit integers.
920     I16x8,
921     /// 4 lanes of 32-bit integers.
922     I32x4,
923     /// 2 lanes of 64-bit integers.
924     I64x2,
925 }
926 
927 /// Kinds of vector negation supported by WebAssembly.
928 #[derive(Copy, Clone)]
929 pub(crate) enum V128NegKind {
930     /// 4 lanes of 32-bit floats.
931     F32x4,
932     /// 2 lanes of 64-bit floats.
933     F64x2,
934     /// 16 lanes of 8-bit integers.
935     I8x16,
936     /// 8 lanes of 16-bit integers.
937     I16x8,
938     /// 4 lanes of 32-bit integers.
939     I32x4,
940     /// 2 lanes of 64-bit integers.
941     I64x2,
942 }
943 
944 impl V128NegKind {
945     /// The size of the lanes.
946     pub(crate) fn lane_size(&self) -> OperandSize {
947         match self {
948             Self::F32x4 | Self::I32x4 => OperandSize::S32,
949             Self::F64x2 | Self::I64x2 => OperandSize::S64,
950             Self::I8x16 => OperandSize::S8,
951             Self::I16x8 => OperandSize::S16,
952         }
953     }
954 }
955 
956 /// Kinds of extended pairwise addition supported by WebAssembly.
957 pub(crate) enum V128ExtAddKind {
958     /// 16 lanes of signed 8-bit integers.
959     I8x16S,
960     /// 16 lanes of unsigned 8-bit integers.
961     I8x16U,
962     /// 8 lanes of signed 16-bit integers.
963     I16x8S,
964     /// 8 lanes of unsigned 16-bit integers.
965     I16x8U,
966 }
967 
968 /// Kinds of vector extended multiplication supported by WebAssembly.
969 #[derive(Debug, Clone, Copy)]
970 pub(crate) enum V128ExtMulKind {
971     LowI8x16S,
972     HighI8x16S,
973     LowI8x16U,
974     HighI8x16U,
975     LowI16x8S,
976     HighI16x8S,
977     LowI16x8U,
978     HighI16x8U,
979     LowI32x4S,
980     HighI32x4S,
981     LowI32x4U,
982     HighI32x4U,
983 }
984 
985 impl From<V128ExtMulKind> for V128ExtendKind {
986     fn from(value: V128ExtMulKind) -> Self {
987         match value {
988             V128ExtMulKind::LowI8x16S => Self::LowI8x16S,
989             V128ExtMulKind::HighI8x16S => Self::HighI8x16S,
990             V128ExtMulKind::LowI8x16U => Self::LowI8x16U,
991             V128ExtMulKind::HighI8x16U => Self::HighI8x16U,
992             V128ExtMulKind::LowI16x8S => Self::LowI16x8S,
993             V128ExtMulKind::HighI16x8S => Self::HighI16x8S,
994             V128ExtMulKind::LowI16x8U => Self::LowI16x8U,
995             V128ExtMulKind::HighI16x8U => Self::HighI16x8U,
996             V128ExtMulKind::LowI32x4S => Self::LowI32x4S,
997             V128ExtMulKind::HighI32x4S => Self::HighI32x4S,
998             V128ExtMulKind::LowI32x4U => Self::LowI32x4U,
999             V128ExtMulKind::HighI32x4U => Self::HighI32x4U,
1000         }
1001     }
1002 }
1003 
1004 impl From<V128ExtMulKind> for V128MulKind {
1005     fn from(value: V128ExtMulKind) -> Self {
1006         match value {
1007             V128ExtMulKind::LowI8x16S
1008             | V128ExtMulKind::HighI8x16S
1009             | V128ExtMulKind::LowI8x16U
1010             | V128ExtMulKind::HighI8x16U => Self::I16x8,
1011             V128ExtMulKind::LowI16x8S
1012             | V128ExtMulKind::HighI16x8S
1013             | V128ExtMulKind::LowI16x8U
1014             | V128ExtMulKind::HighI16x8U => Self::I32x4,
1015             V128ExtMulKind::LowI32x4S
1016             | V128ExtMulKind::HighI32x4S
1017             | V128ExtMulKind::LowI32x4U
1018             | V128ExtMulKind::HighI32x4U => Self::I64x2,
1019         }
1020     }
1021 }
1022 
1023 /// Operand size, in bits.
1024 #[derive(Copy, Debug, Clone, Eq, PartialEq)]
1025 pub(crate) enum OperandSize {
1026     /// 8 bits.
1027     S8,
1028     /// 16 bits.
1029     S16,
1030     /// 32 bits.
1031     S32,
1032     /// 64 bits.
1033     S64,
1034     /// 128 bits.
1035     S128,
1036 }
1037 
1038 impl OperandSize {
1039     /// The number of bits in the operand.
1040     pub fn num_bits(&self) -> u8 {
1041         match self {
1042             OperandSize::S8 => 8,
1043             OperandSize::S16 => 16,
1044             OperandSize::S32 => 32,
1045             OperandSize::S64 => 64,
1046             OperandSize::S128 => 128,
1047         }
1048     }
1049 
1050     /// The number of bytes in the operand.
1051     pub fn bytes(&self) -> u32 {
1052         match self {
1053             Self::S8 => 1,
1054             Self::S16 => 2,
1055             Self::S32 => 4,
1056             Self::S64 => 8,
1057             Self::S128 => 16,
1058         }
1059     }
1060 
1061     /// The binary logarithm of the number of bits in the operand.
1062     pub fn log2(&self) -> u8 {
1063         match self {
1064             OperandSize::S8 => 3,
1065             OperandSize::S16 => 4,
1066             OperandSize::S32 => 5,
1067             OperandSize::S64 => 6,
1068             OperandSize::S128 => 7,
1069         }
1070     }
1071 
1072     /// Create an [`OperandSize`]  from the given number of bytes.
1073     pub fn from_bytes(bytes: u8) -> Self {
1074         use OperandSize::*;
1075         match bytes {
1076             4 => S32,
1077             8 => S64,
1078             16 => S128,
1079             _ => panic!("Invalid bytes {bytes} for OperandSize"),
1080         }
1081     }
1082 
1083     pub fn extend_to<T: ExtendType>(&self, to: Self) -> Option<Extend<T>> {
1084         match to {
1085             OperandSize::S32 => match self {
1086                 OperandSize::S8 => Some(Extend::I32Extend8),
1087                 OperandSize::S16 => Some(Extend::I32Extend16),
1088                 _ => None,
1089             },
1090             OperandSize::S64 => match self {
1091                 OperandSize::S8 => Some(Extend::I64Extend8),
1092                 OperandSize::S16 => Some(Extend::I64Extend16),
1093                 OperandSize::S32 => Some(Extend::I64Extend32),
1094                 _ => None,
1095             },
1096             _ => None,
1097         }
1098     }
1099 
1100     /// The number of bits in the mantissa.
1101     ///
1102     /// Only implemented for floats.
1103     pub fn mantissa_bits(&self) -> u8 {
1104         match self {
1105             Self::S32 => 8,
1106             Self::S64 => 11,
1107             _ => unimplemented!(),
1108         }
1109     }
1110 }
1111 
1112 /// An abstraction over a register or immediate.
1113 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
1114 pub(crate) enum RegImm {
1115     /// A register.
1116     Reg(Reg),
1117     /// A tagged immediate argument.
1118     Imm(Imm),
1119 }
1120 
1121 /// An tagged representation of an immediate.
1122 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
1123 pub(crate) enum Imm {
1124     /// I32 immediate.
1125     I32(u32),
1126     /// I64 immediate.
1127     I64(u64),
1128     /// F32 immediate.
1129     F32(u32),
1130     /// F64 immediate.
1131     F64(u64),
1132     /// V128 immediate.
1133     V128(i128),
1134 }
1135 
1136 impl Imm {
1137     /// Create a new I64 immediate.
1138     pub fn i64(val: i64) -> Self {
1139         Self::I64(val as u64)
1140     }
1141 
1142     /// Create a new I32 immediate.
1143     pub fn i32(val: i32) -> Self {
1144         Self::I32(val as u32)
1145     }
1146 
1147     /// Create a new F32 immediate.
1148     pub fn f32(bits: u32) -> Self {
1149         Self::F32(bits)
1150     }
1151 
1152     /// Create a new F64 immediate.
1153     pub fn f64(bits: u64) -> Self {
1154         Self::F64(bits)
1155     }
1156 
1157     /// Create a new V128 immediate.
1158     pub fn v128(bits: i128) -> Self {
1159         Self::V128(bits)
1160     }
1161 
1162     /// Convert the immediate to i32, if possible.
1163     pub fn to_i32(&self) -> Option<i32> {
1164         match self {
1165             Self::I32(v) => Some(*v as i32),
1166             Self::I64(v) => i32::try_from(*v as i64).ok(),
1167             _ => None,
1168         }
1169     }
1170 
1171     /// Unwraps the underlying integer value as u64.
1172     /// # Panics
1173     /// This function panics if the underlying value can't be represented
1174     /// as u64.
1175     pub fn unwrap_as_u64(&self) -> u64 {
1176         match self {
1177             Self::I32(v) => *v as u64,
1178             Self::I64(v) => *v,
1179             Self::F32(v) => *v as u64,
1180             Self::F64(v) => *v,
1181             _ => unreachable!(),
1182         }
1183     }
1184 
1185     /// Get the operand size of the immediate.
1186     pub fn size(&self) -> OperandSize {
1187         match self {
1188             Self::I32(_) | Self::F32(_) => OperandSize::S32,
1189             Self::I64(_) | Self::F64(_) => OperandSize::S64,
1190             Self::V128(_) => OperandSize::S128,
1191         }
1192     }
1193 
1194     /// Get a little endian representation of the immediate.
1195     ///
1196     /// This method heap allocates and is intended to be used when adding
1197     /// values to the constant pool.
1198     pub fn to_bytes(&self) -> Vec<u8> {
1199         match self {
1200             Imm::I32(n) => n.to_le_bytes().to_vec(),
1201             Imm::I64(n) => n.to_le_bytes().to_vec(),
1202             Imm::F32(n) => n.to_le_bytes().to_vec(),
1203             Imm::F64(n) => n.to_le_bytes().to_vec(),
1204             Imm::V128(n) => n.to_le_bytes().to_vec(),
1205         }
1206     }
1207 }
1208 
1209 /// The location of the [VMcontext] used for function calls.
1210 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
1211 pub(crate) enum VMContextLoc {
1212     /// Dynamic, stored in the given register.
1213     Reg(Reg),
1214     /// The pinned [VMContext] register.
1215     Pinned,
1216     /// A different VMContext is loaded at the provided offset from the current
1217     /// VMContext.
1218     OffsetFromPinned(u32),
1219 }
1220 
1221 /// The maximum number of context arguments currently used across the compiler.
1222 pub(crate) const MAX_CONTEXT_ARGS: usize = 2;
1223 
1224 /// Out-of-band special purpose arguments used for function call emission.
1225 ///
1226 /// We cannot rely on the value stack for these values given that inserting
1227 /// register or memory values at arbitrary locations of the value stack has the
1228 /// potential to break the stack ordering principle, which states that older
1229 /// values must always precede newer values, effectively simulating the order of
1230 /// values in the machine stack.
1231 /// The [ContextArgs] are meant to be resolved at every callsite; in some cases
1232 /// it might be possible to construct it early on, but given that it might
1233 /// contain allocatable registers, it's preferred to construct it in
1234 /// [FnCall::emit].
1235 #[derive(Clone, Debug)]
1236 pub(crate) enum ContextArgs {
1237     /// A single context argument is required; the current pinned [VMcontext]
1238     /// register must be passed as the first argument of the function call.
1239     VMContext([VMContextLoc; 1]),
1240     /// The callee and caller context arguments are required. In this case, the
1241     /// callee context argument is usually stored into an allocatable register
1242     /// and the caller is always the current pinned [VMContext] pointer.
1243     CalleeAndCallerVMContext([VMContextLoc; MAX_CONTEXT_ARGS]),
1244 }
1245 
1246 impl ContextArgs {
1247     /// Construct a [ContextArgs] declaring the usage of the pinned [VMContext]
1248     /// register as both the caller and callee context arguments.
1249     pub fn pinned_callee_and_caller_vmctx() -> Self {
1250         Self::CalleeAndCallerVMContext([VMContextLoc::Pinned, VMContextLoc::Pinned])
1251     }
1252 
1253     /// Construct a [ContextArgs] that declares the usage of the pinned
1254     /// [VMContext] register as the only context argument.
1255     pub fn pinned_vmctx() -> Self {
1256         Self::VMContext([VMContextLoc::Pinned])
1257     }
1258 
1259     /// Construct a [ContextArgs] that declares the usage of a [VMContext] loaded
1260     /// indirectly from the pinned [VMContext] register as the only context
1261     /// argument.
1262     pub fn offset_from_pinned_vmctx(offset: u32) -> Self {
1263         Self::VMContext([VMContextLoc::OffsetFromPinned(offset)])
1264     }
1265 
1266     /// Construct a [ContextArgs] that declares a dynamic callee context and the
1267     /// pinned [VMContext] register as the context arguments.
1268     pub fn with_callee_and_pinned_caller(callee_vmctx: Reg) -> Self {
1269         Self::CalleeAndCallerVMContext([VMContextLoc::Reg(callee_vmctx), VMContextLoc::Pinned])
1270     }
1271 
1272     /// Get the length of the [ContextArgs].
1273     pub fn len(&self) -> usize {
1274         self.as_slice().len()
1275     }
1276 
1277     /// Get a slice of the context arguments.
1278     pub fn as_slice(&self) -> &[VMContextLoc] {
1279         match self {
1280             Self::VMContext(a) => a.as_slice(),
1281             Self::CalleeAndCallerVMContext(a) => a.as_slice(),
1282         }
1283     }
1284 }
1285 
1286 #[derive(Copy, Clone, Debug)]
1287 pub(crate) enum CalleeKind {
1288     /// A function call to a raw address.
1289     Indirect(Reg),
1290     /// A function call to a local function.
1291     Direct(UserExternalNameRef),
1292 }
1293 
1294 impl CalleeKind {
1295     /// Creates a callee kind from a register.
1296     pub fn indirect(reg: Reg) -> Self {
1297         Self::Indirect(reg)
1298     }
1299 
1300     /// Creates a direct callee kind from a function name.
1301     pub fn direct(name: UserExternalNameRef) -> Self {
1302         Self::Direct(name)
1303     }
1304 }
1305 
1306 impl RegImm {
1307     /// Register constructor.
1308     pub fn reg(r: Reg) -> Self {
1309         RegImm::Reg(r)
1310     }
1311 
1312     /// I64 immediate constructor.
1313     pub fn i64(val: i64) -> Self {
1314         RegImm::Imm(Imm::i64(val))
1315     }
1316 
1317     /// I32 immediate constructor.
1318     pub fn i32(val: i32) -> Self {
1319         RegImm::Imm(Imm::i32(val))
1320     }
1321 
1322     /// F32 immediate, stored using its bits representation.
1323     pub fn f32(bits: u32) -> Self {
1324         RegImm::Imm(Imm::f32(bits))
1325     }
1326 
1327     /// F64 immediate, stored using its bits representation.
1328     pub fn f64(bits: u64) -> Self {
1329         RegImm::Imm(Imm::f64(bits))
1330     }
1331 
1332     /// V128 immediate.
1333     pub fn v128(bits: i128) -> Self {
1334         RegImm::Imm(Imm::v128(bits))
1335     }
1336 }
1337 
1338 impl From<Reg> for RegImm {
1339     fn from(r: Reg) -> Self {
1340         Self::Reg(r)
1341     }
1342 }
1343 
1344 #[derive(Debug)]
1345 pub enum RoundingMode {
1346     Nearest,
1347     Up,
1348     Down,
1349     Zero,
1350 }
1351 
1352 /// Memory flags for trusted loads/stores.
1353 pub const TRUSTED_FLAGS: MemFlags = MemFlags::trusted();
1354 
1355 /// Flags used for WebAssembly loads / stores.
1356 /// Untrusted by default so we don't set `no_trap`.
1357 /// We also ensure that the endianness is the right one for WebAssembly.
1358 pub const UNTRUSTED_FLAGS: MemFlags = MemFlags::new().with_endianness(Endianness::Little);
1359 
1360 /// Generic MacroAssembler interface used by the code generation.
1361 ///
1362 /// The MacroAssembler trait aims to expose an interface, high-level enough,
1363 /// so that each ISA can provide its own lowering to machine code. For example,
1364 /// for WebAssembly operators that don't have a direct mapping to a machine
1365 /// a instruction, the interface defines a signature matching the WebAssembly
1366 /// operator, allowing each implementation to lower such operator entirely.
1367 /// This approach attributes more responsibility to the MacroAssembler, but frees
1368 /// the caller from concerning about assembling the right sequence of
1369 /// instructions at the operator callsite.
1370 ///
1371 /// The interface defaults to a three-argument form for binary operations;
1372 /// this allows a natural mapping to instructions for RISC architectures,
1373 /// that use three-argument form.
1374 /// This approach allows for a more general interface that can be restricted
1375 /// where needed, in the case of architectures that use a two-argument form.
1376 
1377 pub(crate) trait MacroAssembler {
1378     /// The addressing mode.
1379     type Address: Copy + Debug;
1380 
1381     /// The pointer representation of the target ISA,
1382     /// used to access information from [`VMOffsets`].
1383     type Ptr: PtrSize;
1384 
1385     /// The ABI details of the target.
1386     type ABI: abi::ABI;
1387 
1388     /// Emit the function prologue.
1389     fn prologue(&mut self, vmctx: Reg) -> Result<()> {
1390         self.frame_setup()?;
1391         self.check_stack(vmctx)
1392     }
1393 
1394     /// Generate the frame setup sequence.
1395     fn frame_setup(&mut self) -> Result<()>;
1396 
1397     /// Generate the frame restore sequence.
1398     fn frame_restore(&mut self) -> Result<()>;
1399 
1400     /// Emit a stack check.
1401     fn check_stack(&mut self, vmctx: Reg) -> Result<()>;
1402 
1403     /// Emit the function epilogue.
1404     fn epilogue(&mut self) -> Result<()> {
1405         self.frame_restore()
1406     }
1407 
1408     /// Reserve stack space.
1409     fn reserve_stack(&mut self, bytes: u32) -> Result<()>;
1410 
1411     /// Free stack space.
1412     fn free_stack(&mut self, bytes: u32) -> Result<()>;
1413 
1414     /// Reset the stack pointer to the given offset;
1415     ///
1416     /// Used to reset the stack pointer to a given offset
1417     /// when dealing with unreachable code.
1418     fn reset_stack_pointer(&mut self, offset: SPOffset) -> Result<()>;
1419 
1420     /// Get the address of a local slot.
1421     fn local_address(&mut self, local: &LocalSlot) -> Result<Self::Address>;
1422 
1423     /// Constructs an address with an offset that is relative to the
1424     /// current position of the stack pointer (e.g. [sp + (sp_offset -
1425     /// offset)].
1426     fn address_from_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1427 
1428     /// Constructs an address with an offset that is absolute to the
1429     /// current position of the stack pointer (e.g. [sp + offset].
1430     fn address_at_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1431 
1432     /// Alias for [`Self::address_at_reg`] using the VMContext register as
1433     /// a base. The VMContext register is derived from the ABI type that is
1434     /// associated to the MacroAssembler.
1435     fn address_at_vmctx(&self, offset: u32) -> Result<Self::Address>;
1436 
1437     /// Construct an address that is absolute to the current position
1438     /// of the given register.
1439     fn address_at_reg(&self, reg: Reg, offset: u32) -> Result<Self::Address>;
1440 
1441     /// Emit a function call to either a local or external function.
1442     fn call(
1443         &mut self,
1444         stack_args_size: u32,
1445         f: impl FnMut(&mut Self) -> Result<(CalleeKind, CallingConvention)>,
1446     ) -> Result<u32>;
1447 
1448     /// Acquire a scratch register and execute the given callback.
1449     fn with_scratch<T: ScratchType, R>(&mut self, f: impl FnOnce(&mut Self, Scratch) -> R) -> R;
1450 
1451     /// Convenience wrapper over [`Self::with_scratch`], derives the register class
1452     /// for a particular Wasm value type.
1453     fn with_scratch_for<R>(
1454         &mut self,
1455         ty: WasmValType,
1456         f: impl FnOnce(&mut Self, Scratch) -> R,
1457     ) -> R {
1458         match ty {
1459             WasmValType::I32
1460             | WasmValType::I64
1461             | WasmValType::Ref(WasmRefType {
1462                 heap_type: WasmHeapType::Func,
1463                 ..
1464             }) => self.with_scratch::<IntScratch, _>(f),
1465             WasmValType::F32 | WasmValType::F64 | WasmValType::V128 => {
1466                 self.with_scratch::<FloatScratch, _>(f)
1467             }
1468             _ => unimplemented!(),
1469         }
1470     }
1471 
1472     /// Get stack pointer offset.
1473     fn sp_offset(&self) -> Result<SPOffset>;
1474 
1475     /// Perform a stack store.
1476     fn store(&mut self, src: RegImm, dst: Self::Address, size: OperandSize) -> Result<()>;
1477 
1478     /// Alias for `MacroAssembler::store` with the operand size corresponding
1479     /// to the pointer size of the target.
1480     fn store_ptr(&mut self, src: Reg, dst: Self::Address) -> Result<()>;
1481 
1482     /// Perform a WebAssembly store.
1483     /// A WebAssembly store introduces several additional invariants compared to
1484     /// [Self::store], more precisely, it can implicitly trap, in certain
1485     /// circumstances, even if explicit bounds checks are elided, in that sense,
1486     /// we consider this type of load as untrusted. It can also differ with
1487     /// regards to the endianness depending on the target ISA. For this reason,
1488     /// [Self::wasm_store], should be explicitly used when emitting WebAssembly
1489     /// stores.
1490     fn wasm_store(&mut self, src: Reg, dst: Self::Address, store_kind: StoreKind) -> Result<()>;
1491 
1492     /// Perform a zero-extended stack load.
1493     fn load(&mut self, src: Self::Address, dst: WritableReg, size: OperandSize) -> Result<()>;
1494 
1495     /// Perform a WebAssembly load.
1496     /// A WebAssembly load introduces several additional invariants compared to
1497     /// [Self::load], more precisely, it can implicitly trap, in certain
1498     /// circumstances, even if explicit bounds checks are elided, in that sense,
1499     /// we consider this type of load as untrusted. It can also differ with
1500     /// regards to the endianness depending on the target ISA. For this reason,
1501     /// [Self::wasm_load], should be explicitly used when emitting WebAssembly
1502     /// loads.
1503     fn wasm_load(&mut self, src: Self::Address, dst: WritableReg, kind: LoadKind) -> Result<()>;
1504 
1505     /// Alias for `MacroAssembler::load` with the operand size corresponding
1506     /// to the pointer size of the target.
1507     fn load_ptr(&mut self, src: Self::Address, dst: WritableReg) -> Result<()>;
1508 
1509     /// Computes the effective address and stores the result in the destination
1510     /// register.
1511     fn compute_addr(
1512         &mut self,
1513         _src: Self::Address,
1514         _dst: WritableReg,
1515         _size: OperandSize,
1516     ) -> Result<()>;
1517 
1518     /// Pop a value from the machine stack into the given register.
1519     fn pop(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1520 
1521     /// Perform a move.
1522     fn mov(&mut self, dst: WritableReg, src: RegImm, size: OperandSize) -> Result<()>;
1523 
1524     /// Perform a conditional move.
1525     fn cmov(&mut self, dst: WritableReg, src: Reg, cc: IntCmpKind, size: OperandSize)
1526     -> Result<()>;
1527 
1528     /// Performs a memory move of bytes from src to dest.
1529     /// Bytes are moved in blocks of 8 bytes, where possible.
1530     fn memmove(
1531         &mut self,
1532         src: SPOffset,
1533         dst: SPOffset,
1534         bytes: u32,
1535         direction: MemMoveDirection,
1536     ) -> Result<()> {
1537         match direction {
1538             MemMoveDirection::LowToHigh => debug_assert!(dst.as_u32() < src.as_u32()),
1539             MemMoveDirection::HighToLow => debug_assert!(dst.as_u32() > src.as_u32()),
1540         }
1541         // At least 4 byte aligned.
1542         debug_assert!(bytes % 4 == 0);
1543         let mut remaining = bytes;
1544         let word_bytes = <Self::ABI as abi::ABI>::word_bytes();
1545 
1546         let word_bytes = word_bytes as u32;
1547 
1548         let mut dst_offs;
1549         let mut src_offs;
1550         match direction {
1551             MemMoveDirection::LowToHigh => {
1552                 dst_offs = dst.as_u32() - bytes;
1553                 src_offs = src.as_u32() - bytes;
1554                 self.with_scratch::<IntScratch, _>(|masm, scratch| {
1555                     while remaining >= word_bytes {
1556                         remaining -= word_bytes;
1557                         dst_offs += word_bytes;
1558                         src_offs += word_bytes;
1559 
1560                         masm.load_ptr(
1561                             masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1562                             scratch.writable(),
1563                         )?;
1564                         masm.store_ptr(
1565                             scratch.inner(),
1566                             masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1567                         )?;
1568                     }
1569                     anyhow::Ok(())
1570                 })?;
1571             }
1572             MemMoveDirection::HighToLow => {
1573                 // Go from the end to the beginning to handle overlapping addresses.
1574                 src_offs = src.as_u32();
1575                 dst_offs = dst.as_u32();
1576                 self.with_scratch::<IntScratch, _>(|masm, scratch| {
1577                     while remaining >= word_bytes {
1578                         masm.load_ptr(
1579                             masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1580                             scratch.writable(),
1581                         )?;
1582                         masm.store_ptr(
1583                             scratch.inner(),
1584                             masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1585                         )?;
1586 
1587                         remaining -= word_bytes;
1588                         src_offs -= word_bytes;
1589                         dst_offs -= word_bytes;
1590                     }
1591                     anyhow::Ok(())
1592                 })?;
1593             }
1594         }
1595 
1596         if remaining > 0 {
1597             let half_word = word_bytes / 2;
1598             let ptr_size = OperandSize::from_bytes(half_word as u8);
1599             debug_assert!(remaining == half_word);
1600             // Need to move the offsets ahead in the `LowToHigh` case to
1601             // compensate for the initial subtraction of `bytes`.
1602             if direction == MemMoveDirection::LowToHigh {
1603                 dst_offs += half_word;
1604                 src_offs += half_word;
1605             }
1606 
1607             self.with_scratch::<IntScratch, _>(|masm, scratch| {
1608                 masm.load(
1609                     masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1610                     scratch.writable(),
1611                     ptr_size,
1612                 )?;
1613                 masm.store(
1614                     scratch.inner().into(),
1615                     masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1616                     ptr_size,
1617                 )?;
1618                 anyhow::Ok(())
1619             })?;
1620         }
1621         Ok(())
1622     }
1623 
1624     /// Perform add operation.
1625     fn add(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1626 
1627     /// Perform a checked unsigned integer addition, emitting the provided trap
1628     /// if the addition overflows.
1629     fn checked_uadd(
1630         &mut self,
1631         dst: WritableReg,
1632         lhs: Reg,
1633         rhs: RegImm,
1634         size: OperandSize,
1635         trap: TrapCode,
1636     ) -> Result<()>;
1637 
1638     /// Perform subtraction operation.
1639     fn sub(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1640 
1641     /// Perform multiplication operation.
1642     fn mul(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1643 
1644     /// Perform a floating point add operation.
1645     fn float_add(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1646 
1647     /// Perform a floating point subtraction operation.
1648     fn float_sub(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1649 
1650     /// Perform a floating point multiply operation.
1651     fn float_mul(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1652 
1653     /// Perform a floating point divide operation.
1654     fn float_div(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1655 
1656     /// Perform a floating point minimum operation. In x86, this will emit
1657     /// multiple instructions.
1658     fn float_min(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1659 
1660     /// Perform a floating point maximum operation. In x86, this will emit
1661     /// multiple instructions.
1662     fn float_max(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1663 
1664     /// Perform a floating point copysign operation. In x86, this will emit
1665     /// multiple instructions.
1666     fn float_copysign(
1667         &mut self,
1668         dst: WritableReg,
1669         lhs: Reg,
1670         rhs: Reg,
1671         size: OperandSize,
1672     ) -> Result<()>;
1673 
1674     /// Perform a floating point abs operation.
1675     fn float_abs(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1676 
1677     /// Perform a floating point negation operation.
1678     fn float_neg(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1679 
1680     /// Perform a floating point floor operation.
1681     fn float_round<
1682         F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext<Emission>, &mut Self) -> Result<()>,
1683     >(
1684         &mut self,
1685         mode: RoundingMode,
1686         env: &mut FuncEnv<Self::Ptr>,
1687         context: &mut CodeGenContext<Emission>,
1688         size: OperandSize,
1689         fallback: F,
1690     ) -> Result<()>;
1691 
1692     /// Perform a floating point square root operation.
1693     fn float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1694 
1695     /// Perform logical and operation.
1696     fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1697 
1698     /// Perform logical or operation.
1699     fn or(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1700 
1701     /// Perform logical exclusive or operation.
1702     fn xor(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1703 
1704     /// Perform a shift operation between a register and an immediate.
1705     fn shift_ir(
1706         &mut self,
1707         dst: WritableReg,
1708         imm: Imm,
1709         lhs: Reg,
1710         kind: ShiftKind,
1711         size: OperandSize,
1712     ) -> Result<()>;
1713 
1714     /// Perform a shift operation between two registers.
1715     /// This case is special in that some architectures have specific expectations
1716     /// regarding the location of the instruction arguments. To free the
1717     /// caller from having to deal with the architecture specific constraints
1718     /// we give this function access to the code generation context, allowing
1719     /// each implementation to decide the lowering path.
1720     fn shift(
1721         &mut self,
1722         context: &mut CodeGenContext<Emission>,
1723         kind: ShiftKind,
1724         size: OperandSize,
1725     ) -> Result<()>;
1726 
1727     /// Perform division operation.
1728     /// Division is special in that some architectures have specific
1729     /// expectations regarding the location of the instruction
1730     /// arguments and regarding the location of the quotient /
1731     /// remainder. To free the caller from having to deal with the
1732     /// architecture specific constraints we give this function access
1733     /// to the code generation context, allowing each implementation
1734     /// to decide the lowering path.  For cases in which division is a
1735     /// unconstrained binary operation, the caller can decide to use
1736     /// the `CodeGenContext::i32_binop` or `CodeGenContext::i64_binop`
1737     /// functions.
1738     fn div(
1739         &mut self,
1740         context: &mut CodeGenContext<Emission>,
1741         kind: DivKind,
1742         size: OperandSize,
1743     ) -> Result<()>;
1744 
1745     /// Calculate remainder.
1746     fn rem(
1747         &mut self,
1748         context: &mut CodeGenContext<Emission>,
1749         kind: RemKind,
1750         size: OperandSize,
1751     ) -> Result<()>;
1752 
1753     /// Compares `src1` against `src2` for the side effect of setting processor
1754     /// flags.
1755     ///
1756     /// Note that `src1` is the left-hand-side of the comparison and `src2` is
1757     /// the right-hand-side, so if testing `a < b` then `src1 == a` and
1758     /// `src2 == b`
1759     fn cmp(&mut self, src1: Reg, src2: RegImm, size: OperandSize) -> Result<()>;
1760 
1761     /// Compare src and dst and put the result in dst.
1762     /// This function will potentially emit a series of instructions.
1763     ///
1764     /// The initial value in `dst` is the left-hand-side of the comparison and
1765     /// the initial value in `src` is the right-hand-side of the comparison.
1766     /// That means for `a < b` then `dst == a` and `src == b`.
1767     fn cmp_with_set(
1768         &mut self,
1769         dst: WritableReg,
1770         src: RegImm,
1771         kind: IntCmpKind,
1772         size: OperandSize,
1773     ) -> Result<()>;
1774 
1775     /// Compare floats in src1 and src2 and put the result in dst.
1776     /// In x86, this will emit multiple instructions.
1777     fn float_cmp_with_set(
1778         &mut self,
1779         dst: WritableReg,
1780         src1: Reg,
1781         src2: Reg,
1782         kind: FloatCmpKind,
1783         size: OperandSize,
1784     ) -> Result<()>;
1785 
1786     /// Count the number of leading zeroes in src and put the result in dst.
1787     /// In x64, this will emit multiple instructions if the `has_lzcnt` flag is
1788     /// false.
1789     fn clz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1790 
1791     /// Count the number of trailing zeroes in src and put the result in dst.masm
1792     /// In x64, this will emit multiple instructions if the `has_tzcnt` flag is
1793     /// false.
1794     fn ctz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1795 
1796     /// Push the register to the stack, returning the stack slot metadata.
1797     // NB
1798     // The stack alignment should not be assumed after any call to `push`,
1799     // unless explicitly aligned otherwise.  Typically, stack alignment is
1800     // maintained at call sites and during the execution of
1801     // epilogues.
1802     fn push(&mut self, src: Reg, size: OperandSize) -> Result<StackSlot>;
1803 
1804     /// Finalize the assembly and return the result.
1805     fn finalize(self, base: Option<SourceLoc>) -> Result<MachBufferFinalized<Final>>;
1806 
1807     /// Zero a particular register.
1808     fn zero(&mut self, reg: WritableReg) -> Result<()>;
1809 
1810     /// Count the number of 1 bits in src and put the result in dst. In x64,
1811     /// this will emit multiple instructions if the `has_popcnt` flag is false.
1812     fn popcnt(&mut self, context: &mut CodeGenContext<Emission>, size: OperandSize) -> Result<()>;
1813 
1814     /// Converts an i64 to an i32 by discarding the high 32 bits.
1815     fn wrap(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1816 
1817     /// Extends an integer of a given size to a larger size.
1818     fn extend(&mut self, dst: WritableReg, src: Reg, kind: ExtendKind) -> Result<()>;
1819 
1820     /// Emits one or more instructions to perform a signed truncation of a
1821     /// float into an integer.
1822     fn signed_truncate(
1823         &mut self,
1824         dst: WritableReg,
1825         src: Reg,
1826         src_size: OperandSize,
1827         dst_size: OperandSize,
1828         kind: TruncKind,
1829     ) -> Result<()>;
1830 
1831     /// Emits one or more instructions to perform an unsigned truncation of a
1832     /// float into an integer.
1833     fn unsigned_truncate(
1834         &mut self,
1835         context: &mut CodeGenContext<Emission>,
1836         src_size: OperandSize,
1837         dst_size: OperandSize,
1838         kind: TruncKind,
1839     ) -> Result<()>;
1840 
1841     /// Emits one or more instructions to perform a signed convert of an
1842     /// integer into a float.
1843     fn signed_convert(
1844         &mut self,
1845         dst: WritableReg,
1846         src: Reg,
1847         src_size: OperandSize,
1848         dst_size: OperandSize,
1849     ) -> Result<()>;
1850 
1851     /// Emits one or more instructions to perform an unsigned convert of an
1852     /// integer into a float.
1853     fn unsigned_convert(
1854         &mut self,
1855         dst: WritableReg,
1856         src: Reg,
1857         tmp_gpr: Reg,
1858         src_size: OperandSize,
1859         dst_size: OperandSize,
1860     ) -> Result<()>;
1861 
1862     /// Reinterpret a float as an integer.
1863     fn reinterpret_float_as_int(
1864         &mut self,
1865         dst: WritableReg,
1866         src: Reg,
1867         size: OperandSize,
1868     ) -> Result<()>;
1869 
1870     /// Reinterpret an integer as a float.
1871     fn reinterpret_int_as_float(
1872         &mut self,
1873         dst: WritableReg,
1874         src: Reg,
1875         size: OperandSize,
1876     ) -> Result<()>;
1877 
1878     /// Demote an f64 to an f32.
1879     fn demote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1880 
1881     /// Promote an f32 to an f64.
1882     fn promote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1883 
1884     /// Zero a given memory range.
1885     ///
1886     /// The default implementation divides the given memory range
1887     /// into word-sized slots. Then it unrolls a series of store
1888     /// instructions, effectively assigning zero to each slot.
1889     fn zero_mem_range(&mut self, mem: &Range<u32>) -> Result<()> {
1890         let word_size = <Self::ABI as abi::ABI>::word_bytes() as u32;
1891         if mem.is_empty() {
1892             return Ok(());
1893         }
1894 
1895         let start = if mem.start % word_size == 0 {
1896             mem.start
1897         } else {
1898             // Ensure that the start of the range is at least 4-byte aligned.
1899             assert!(mem.start % 4 == 0);
1900             let start = align_to(mem.start, word_size);
1901             let addr: Self::Address = self.local_address(&LocalSlot::i32(start))?;
1902             self.store(RegImm::i32(0), addr, OperandSize::S32)?;
1903             // Ensure that the new start of the range, is word-size aligned.
1904             assert!(start % word_size == 0);
1905             start
1906         };
1907 
1908         let end = align_to(mem.end, word_size);
1909         let slots = (end - start) / word_size;
1910 
1911         if slots == 1 {
1912             let slot = LocalSlot::i64(start + word_size);
1913             let addr: Self::Address = self.local_address(&slot)?;
1914             self.store(RegImm::i64(0), addr, OperandSize::S64)?;
1915         } else {
1916             // TODO
1917             // Add an upper bound to this generation;
1918             // given a considerably large amount of slots
1919             // this will be inefficient.
1920             self.with_scratch::<IntScratch, _>(|masm, scratch| {
1921                 masm.zero(scratch.writable())?;
1922                 let zero = RegImm::reg(scratch.inner());
1923 
1924                 for step in (start..end).step_by(word_size as usize) {
1925                     let slot = LocalSlot::i64(step + word_size);
1926                     let addr: Self::Address = masm.local_address(&slot)?;
1927                     masm.store(zero, addr, OperandSize::S64)?;
1928                 }
1929                 anyhow::Ok(())
1930             })?;
1931         }
1932 
1933         Ok(())
1934     }
1935 
1936     /// Generate a label.
1937     fn get_label(&mut self) -> Result<MachLabel>;
1938 
1939     /// Bind the given label at the current code offset.
1940     fn bind(&mut self, label: MachLabel) -> Result<()>;
1941 
1942     /// Conditional branch.
1943     ///
1944     /// Performs a comparison between the two operands,
1945     /// and immediately after emits a jump to the given
1946     /// label destination if the condition is met.
1947     fn branch(
1948         &mut self,
1949         kind: IntCmpKind,
1950         lhs: Reg,
1951         rhs: RegImm,
1952         taken: MachLabel,
1953         size: OperandSize,
1954     ) -> Result<()>;
1955 
1956     /// Emits and unconditional jump to the given label.
1957     fn jmp(&mut self, target: MachLabel) -> Result<()>;
1958 
1959     /// Emits a jump table sequence. The default label is specified as
1960     /// the last element of the targets slice.
1961     fn jmp_table(&mut self, targets: &[MachLabel], index: Reg, tmp: Reg) -> Result<()>;
1962 
1963     /// Emit an unreachable code trap.
1964     fn unreachable(&mut self) -> Result<()>;
1965 
1966     /// Emit an unconditional trap.
1967     fn trap(&mut self, code: TrapCode) -> Result<()>;
1968 
1969     /// Traps if the condition code is met.
1970     fn trapif(&mut self, cc: IntCmpKind, code: TrapCode) -> Result<()>;
1971 
1972     /// Trap if the source register is zero.
1973     fn trapz(&mut self, src: Reg, code: TrapCode) -> Result<()>;
1974 
1975     /// Ensures that the stack pointer is correctly positioned before an unconditional
1976     /// jump according to the requirements of the destination target.
1977     fn ensure_sp_for_jump(&mut self, target: SPOffset) -> Result<()> {
1978         let bytes = self
1979             .sp_offset()?
1980             .as_u32()
1981             .checked_sub(target.as_u32())
1982             .unwrap_or(0);
1983 
1984         if bytes > 0 {
1985             self.free_stack(bytes)?;
1986         }
1987 
1988         Ok(())
1989     }
1990 
1991     /// Mark the start of a source location returning the machine code offset
1992     /// and the relative source code location.
1993     fn start_source_loc(&mut self, loc: RelSourceLoc) -> Result<(CodeOffset, RelSourceLoc)>;
1994 
1995     /// Mark the end of a source location.
1996     fn end_source_loc(&mut self) -> Result<()>;
1997 
1998     /// The current offset, in bytes from the beginning of the function.
1999     fn current_code_offset(&self) -> Result<CodeOffset>;
2000 
2001     /// Performs a 128-bit addition
2002     fn add128(
2003         &mut self,
2004         dst_lo: WritableReg,
2005         dst_hi: WritableReg,
2006         lhs_lo: Reg,
2007         lhs_hi: Reg,
2008         rhs_lo: Reg,
2009         rhs_hi: Reg,
2010     ) -> Result<()>;
2011 
2012     /// Performs a 128-bit subtraction
2013     fn sub128(
2014         &mut self,
2015         dst_lo: WritableReg,
2016         dst_hi: WritableReg,
2017         lhs_lo: Reg,
2018         lhs_hi: Reg,
2019         rhs_lo: Reg,
2020         rhs_hi: Reg,
2021     ) -> Result<()>;
2022 
2023     /// Performs a widening multiplication from two 64-bit operands into a
2024     /// 128-bit result.
2025     ///
2026     /// Note that some platforms require special handling of registers in this
2027     /// instruction (e.g. x64) so full access to `CodeGenContext` is provided.
2028     fn mul_wide(&mut self, context: &mut CodeGenContext<Emission>, kind: MulWideKind)
2029     -> Result<()>;
2030 
2031     /// Takes the value in a src operand and replicates it across lanes of
2032     /// `size` in a destination result.
2033     fn splat(&mut self, context: &mut CodeGenContext<Emission>, size: SplatKind) -> Result<()>;
2034 
2035     /// Performs a shuffle between two 128-bit vectors into a 128-bit result
2036     /// using lanes as a mask to select which indexes to copy.
2037     fn shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()>;
2038 
2039     /// Performs a swizzle between two 128-bit vectors into a 128-bit result.
2040     fn swizzle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg) -> Result<()>;
2041 
2042     /// Performs the RMW `op` operation on the passed `addr`.
2043     ///
2044     /// The value *before* the operation was performed is written back to the `operand` register.
2045     fn atomic_rmw(
2046         &mut self,
2047         context: &mut CodeGenContext<Emission>,
2048         addr: Self::Address,
2049         size: OperandSize,
2050         op: RmwOp,
2051         flags: MemFlags,
2052         extend: Option<Extend<Zero>>,
2053     ) -> Result<()>;
2054 
2055     /// Extracts the scalar value from `src` in `lane` to `dst`.
2056     fn extract_lane(
2057         &mut self,
2058         src: Reg,
2059         dst: WritableReg,
2060         lane: u8,
2061         kind: ExtractLaneKind,
2062     ) -> Result<()>;
2063 
2064     /// Replaces the value in `lane` in `dst` with the value in `src`.
2065     fn replace_lane(
2066         &mut self,
2067         src: RegImm,
2068         dst: WritableReg,
2069         lane: u8,
2070         kind: ReplaceLaneKind,
2071     ) -> Result<()>;
2072 
2073     /// Perform an atomic CAS (compare-and-swap) operation with the value at `addr`, and `expected`
2074     /// and `replacement` (at the top of the context's stack).
2075     ///
2076     /// This method takes the `CodeGenContext` as an arguments to accommodate architectures that
2077     /// expect parameters in specific registers. The context stack contains the `replacement`,
2078     /// and `expected` values in that order. The implementer is expected to push the value at
2079     /// `addr` before the update to the context's stack before returning.
2080     fn atomic_cas(
2081         &mut self,
2082         context: &mut CodeGenContext<Emission>,
2083         addr: Self::Address,
2084         size: OperandSize,
2085         flags: MemFlags,
2086         extend: Option<Extend<Zero>>,
2087     ) -> Result<()>;
2088 
2089     /// Compares vector registers `lhs` and `rhs` for equality and puts the
2090     /// vector of results in `dst`.
2091     fn v128_eq(
2092         &mut self,
2093         dst: WritableReg,
2094         lhs: Reg,
2095         rhs: Reg,
2096         kind: VectorEqualityKind,
2097     ) -> Result<()>;
2098 
2099     /// Compares vector registers `lhs` and `rhs` for inequality and puts the
2100     /// vector of results in `dst`.
2101     fn v128_ne(
2102         &mut self,
2103         dst: WritableReg,
2104         lhs: Reg,
2105         rhs: Reg,
2106         kind: VectorEqualityKind,
2107     ) -> Result<()>;
2108 
2109     /// Performs a less than comparison with vector registers `lhs` and `rhs`
2110     /// and puts the vector of results in `dst`.
2111     fn v128_lt(
2112         &mut self,
2113         dst: WritableReg,
2114         lhs: Reg,
2115         rhs: Reg,
2116         kind: VectorCompareKind,
2117     ) -> Result<()>;
2118 
2119     /// Performs a less than or equal comparison with vector registers `lhs`
2120     /// and `rhs` and puts the vector of results in `dst`.
2121     fn v128_le(
2122         &mut self,
2123         dst: WritableReg,
2124         lhs: Reg,
2125         rhs: Reg,
2126         kind: VectorCompareKind,
2127     ) -> Result<()>;
2128 
2129     /// Performs a greater than comparison with vector registers `lhs` and
2130     /// `rhs` and puts the vector of results in `dst`.
2131     fn v128_gt(
2132         &mut self,
2133         dst: WritableReg,
2134         lhs: Reg,
2135         rhs: Reg,
2136         kind: VectorCompareKind,
2137     ) -> Result<()>;
2138 
2139     /// Performs a greater than or equal comparison with vector registers `lhs`
2140     /// and `rhs` and puts the vector of results in `dst`.
2141     fn v128_ge(
2142         &mut self,
2143         dst: WritableReg,
2144         lhs: Reg,
2145         rhs: Reg,
2146         kind: VectorCompareKind,
2147     ) -> Result<()>;
2148 
2149     /// Emit a memory fence.
2150     fn fence(&mut self) -> Result<()>;
2151 
2152     /// Perform a logical `not` operation on the 128bits vector value in `dst`.
2153     fn v128_not(&mut self, dst: WritableReg) -> Result<()>;
2154 
2155     /// Perform a logical `and` operation on `src1` and `src1`, both 128bits vector values, writing
2156     /// the result to `dst`.
2157     fn v128_and(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2158 
2159     /// Perform a logical `and_not` operation on `src1` and `src1`, both 128bits vector values, writing
2160     /// the result to `dst`.
2161     ///
2162     /// `and_not` is not commutative: dst = !src1 & src2.
2163     fn v128_and_not(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2164 
2165     /// Perform a logical `or` operation on `src1` and `src1`, both 128bits vector values, writing
2166     /// the result to `dst`.
2167     fn v128_or(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2168 
2169     /// Perform a logical `xor` operation on `src1` and `src1`, both 128bits vector values, writing
2170     /// the result to `dst`.
2171     fn v128_xor(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2172 
2173     /// Given two 128bits vectors `src1` and `src2`, and a 128bits bitmask `mask`, selects bits
2174     /// from `src1` when mask is 1, and from `src2` when mask is 0.
2175     ///
2176     /// This is equivalent to: `v128.or(v128.and(src1, mask), v128.and(src2, v128.not(mask)))`.
2177     fn v128_bitselect(&mut self, src1: Reg, src2: Reg, mask: Reg, dst: WritableReg) -> Result<()>;
2178 
2179     /// If any bit in `src` is 1, set `dst` to 1, or 0 otherwise.
2180     fn v128_any_true(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2181 
2182     /// Convert vector of integers to vector of floating points.
2183     fn v128_convert(&mut self, src: Reg, dst: WritableReg, kind: V128ConvertKind) -> Result<()>;
2184 
2185     /// Convert two input vectors into a smaller lane vector by narrowing each
2186     /// lane.
2187     fn v128_narrow(
2188         &mut self,
2189         src1: Reg,
2190         src2: Reg,
2191         dst: WritableReg,
2192         kind: V128NarrowKind,
2193     ) -> Result<()>;
2194 
2195     /// Converts a vector containing two 64-bit floating point lanes to two
2196     /// 32-bit floating point lanes and setting the two higher lanes to 0.
2197     fn v128_demote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2198 
2199     /// Converts a vector containing four 32-bit floating point lanes to two
2200     /// 64-bit floating point lanes. Only the two lower lanes are converted.
2201     fn v128_promote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2202 
2203     /// Converts low or high half of the smaller lane vector to a larger lane
2204     /// vector.
2205     fn v128_extend(&mut self, src: Reg, dst: WritableReg, kind: V128ExtendKind) -> Result<()>;
2206 
2207     /// Perform a vector add between `lsh` and `rhs`, placing the result in
2208     /// `dst`.
2209     fn v128_add(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128AddKind) -> Result<()>;
2210 
2211     /// Perform a vector sub between `lhs` and `rhs`, placing the result in `dst`.
2212     fn v128_sub(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128SubKind) -> Result<()>;
2213 
2214     /// Perform a vector lane-wise mul between `lhs` and `rhs`, placing the result in `dst`.
2215     fn v128_mul(&mut self, context: &mut CodeGenContext<Emission>, kind: V128MulKind)
2216     -> Result<()>;
2217 
2218     /// Perform an absolute operation on a vector.
2219     fn v128_abs(&mut self, src: Reg, dst: WritableReg, kind: V128AbsKind) -> Result<()>;
2220 
2221     /// Vectorized negate of the content of `op`.
2222     fn v128_neg(&mut self, op: WritableReg, kind: V128NegKind) -> Result<()>;
2223 
2224     /// Perform the shift operation specified by `kind`, by the shift amount specified by the 32-bit
2225     /// integer at the top of the stack, on the 128-bit vector specified by the second value
2226     /// from the top of the stack, interpreted as packed integers of size `lane_width`.
2227     ///
2228     /// The shift amount is taken modulo `lane_width`.
2229     fn v128_shift(
2230         &mut self,
2231         context: &mut CodeGenContext<Emission>,
2232         lane_width: OperandSize,
2233         kind: ShiftKind,
2234     ) -> Result<()>;
2235 
2236     /// Perform a saturating integer q-format rounding multiplication.
2237     fn v128_q15mulr_sat_s(
2238         &mut self,
2239         lhs: Reg,
2240         rhs: Reg,
2241         dst: WritableReg,
2242         size: OperandSize,
2243     ) -> Result<()>;
2244 
2245     /// Sets `dst` to 1 if all lanes in `src` are non-zero, sets `dst` to 0
2246     /// otherwise.
2247     fn v128_all_true(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2248 
2249     /// Extracts the high bit of each lane in `src` and produces a scalar mask
2250     /// with all bits concatenated in `dst`.
2251     fn v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2252 
2253     /// Lanewise truncation operation.
2254     ///
2255     /// If using an integer kind of truncation, then this performs a lane-wise
2256     /// saturating conversion from float to integer using the IEEE
2257     /// `convertToIntegerTowardZero` function. If any input lane is NaN, the
2258     /// resulting lane is 0. If the rounded integer value of a lane is outside
2259     /// the range of the destination type, the result is saturated to the
2260     /// nearest representable integer value.
2261     fn v128_trunc(
2262         &mut self,
2263         context: &mut CodeGenContext<Emission>,
2264         kind: V128TruncKind,
2265     ) -> Result<()>;
2266 
2267     /// Perform a lane-wise `min` operation between `src1` and `src2`.
2268     fn v128_min(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MinKind)
2269     -> Result<()>;
2270 
2271     /// Perform a lane-wise `max` operation between `src1` and `src2`.
2272     fn v128_max(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MaxKind)
2273     -> Result<()>;
2274 
2275     /// Perform the lane-wise integer extended multiplication producing twice wider result than the
2276     /// inputs. This is equivalent to an extend followed by a multiply.
2277     ///
2278     /// The extension to be performed is inferred from the `lane_width` and the `kind` of extmul,
2279     /// e.g, if `lane_width` is `S16`, and `kind` is `LowSigned`, then we sign-extend the lower
2280     /// 8bits of the 16bits lanes.
2281     fn v128_extmul(
2282         &mut self,
2283         context: &mut CodeGenContext<Emission>,
2284         kind: V128ExtMulKind,
2285     ) -> Result<()>;
2286 
2287     /// Perform the lane-wise integer extended pairwise addition producing extended results (twice
2288     /// wider results than the inputs).
2289     fn v128_extadd_pairwise(
2290         &mut self,
2291         src: Reg,
2292         dst: WritableReg,
2293         kind: V128ExtAddKind,
2294     ) -> Result<()>;
2295 
2296     /// Lane-wise multiply signed 16-bit integers in `lhs` and `rhs` and add
2297     /// adjacent pairs of the 32-bit results.
2298     fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>;
2299 
2300     /// Count the number of bits set in each lane.
2301     fn v128_popcnt(&mut self, context: &mut CodeGenContext<Emission>) -> Result<()>;
2302 
2303     /// Lane-wise rounding average of vectors of integers in `lhs` and `rhs`
2304     /// and put the results in `dst`.
2305     fn v128_avgr(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2306 
2307     /// Lane-wise IEEE division on vectors of floats.
2308     fn v128_div(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2309 
2310     /// Lane-wise IEEE square root of vector of floats.
2311     fn v128_sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2312 
2313     /// Lane-wise ceiling of vector of floats.
2314     fn v128_ceil(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2315 
2316     /// Lane-wise flooring of vector of floats.
2317     fn v128_floor(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2318 
2319     /// Lane-wise rounding to nearest integer for vector of floats.
2320     fn v128_nearest(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2321 
2322     /// Lane-wise minimum value defined as `rhs < lhs ? rhs : lhs`.
2323     fn v128_pmin(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2324 
2325     /// Lane-wise maximum value defined as `lhs < rhs ? rhs : lhs`.
2326     fn v128_pmax(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2327 }
2328