xref: /wasmtime-44.0.1/winch/codegen/src/isa/x64/asm.rs (revision def5998e)
1 //! Assembler library implementation for x64.
2 
3 use crate::{
4     constant_pool::ConstantPool,
5     isa::{CallingConvention, reg::Reg},
6     masm::{
7         DivKind, Extend, ExtendKind, ExtendType, IntCmpKind, MulWideKind, OperandSize, RemKind,
8         RoundingMode, ShiftKind, Signed, V128ExtendKind, V128LoadExtendKind, Zero,
9     },
10     reg::writable,
11 };
12 use cranelift_codegen::{
13     CallInfo, Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState,
14     MachLabel, PatchRegion, Writable,
15     ir::{ExternalName, MemFlags, SourceLoc, TrapCode, Type, UserExternalNameRef, types},
16     isa::{
17         unwind::UnwindInst,
18         x64::{
19             AtomicRmwSeqOp, EmitInfo, EmitState, Inst,
20             args::{
21                 self, Amode, CC, ExtMode, FromWritableReg, Gpr, GprMem, GprMemImm, RegMem,
22                 RegMemImm, SyntheticAmode, WritableGpr, WritableXmm, Xmm, XmmMem, XmmMemImm,
23             },
24             external::{PairedGpr, PairedXmm},
25             settings as x64_settings,
26         },
27     },
28     settings,
29 };
30 
31 use crate::reg::WritableReg;
32 use cranelift_assembler_x64 as asm;
33 
34 use super::address::Address;
35 use smallvec::SmallVec;
36 
37 // Conversions between winch-codegen x64 types and cranelift-codegen x64 types.
38 
39 impl From<Reg> for RegMemImm {
from(reg: Reg) -> Self40     fn from(reg: Reg) -> Self {
41         RegMemImm::reg(reg.into())
42     }
43 }
44 
45 impl From<Reg> for RegMem {
from(value: Reg) -> Self46     fn from(value: Reg) -> Self {
47         RegMem::Reg { reg: value.into() }
48     }
49 }
50 
51 impl From<Reg> for WritableGpr {
from(reg: Reg) -> Self52     fn from(reg: Reg) -> Self {
53         let writable = Writable::from_reg(reg.into());
54         WritableGpr::from_writable_reg(writable).expect("valid writable gpr")
55     }
56 }
57 
58 impl From<Reg> for WritableXmm {
from(reg: Reg) -> Self59     fn from(reg: Reg) -> Self {
60         let writable = Writable::from_reg(reg.into());
61         WritableXmm::from_writable_reg(writable).expect("valid writable xmm")
62     }
63 }
64 
65 /// Convert a writable GPR register to the read-write pair expected by
66 /// `cranelift-codegen`.
pair_gpr(reg: WritableReg) -> PairedGpr67 fn pair_gpr(reg: WritableReg) -> PairedGpr {
68     assert!(reg.to_reg().is_int());
69     let read = Gpr::unwrap_new(reg.to_reg().into());
70     let write = WritableGpr::from_reg(reg.to_reg().into());
71     PairedGpr { read, write }
72 }
73 
74 impl From<Reg> for asm::Gpr<Gpr> {
from(reg: Reg) -> Self75     fn from(reg: Reg) -> Self {
76         asm::Gpr::new(reg.into())
77     }
78 }
79 
80 impl From<Reg> for asm::GprMem<Gpr, Gpr> {
from(reg: Reg) -> Self81     fn from(reg: Reg) -> Self {
82         asm::GprMem::Gpr(reg.into())
83     }
84 }
85 
86 /// Convert a writable XMM register to the read-write pair expected by
87 /// `cranelift-codegen`.
pair_xmm(reg: WritableReg) -> PairedXmm88 fn pair_xmm(reg: WritableReg) -> PairedXmm {
89     assert!(reg.to_reg().is_float());
90     let read = Xmm::unwrap_new(reg.to_reg().into());
91     let write = WritableXmm::from_reg(reg.to_reg().into());
92     PairedXmm { read, write }
93 }
94 
95 impl From<Reg> for asm::Xmm<Xmm> {
from(reg: Reg) -> Self96     fn from(reg: Reg) -> Self {
97         asm::Xmm::new(reg.into())
98     }
99 }
100 
101 impl From<Reg> for asm::XmmMem<Xmm, Gpr> {
from(reg: Reg) -> Self102     fn from(reg: Reg) -> Self {
103         asm::XmmMem::Xmm(reg.into())
104     }
105 }
106 
107 impl From<Reg> for Gpr {
from(reg: Reg) -> Self108     fn from(reg: Reg) -> Self {
109         Gpr::unwrap_new(reg.into())
110     }
111 }
112 
113 impl From<Reg> for GprMem {
from(value: Reg) -> Self114     fn from(value: Reg) -> Self {
115         GprMem::unwrap_new(value.into())
116     }
117 }
118 
119 impl From<Reg> for GprMemImm {
from(reg: Reg) -> Self120     fn from(reg: Reg) -> Self {
121         GprMemImm::unwrap_new(reg.into())
122     }
123 }
124 
125 impl From<Reg> for Xmm {
from(reg: Reg) -> Self126     fn from(reg: Reg) -> Self {
127         Xmm::unwrap_new(reg.into())
128     }
129 }
130 
131 impl From<Reg> for XmmMem {
from(value: Reg) -> Self132     fn from(value: Reg) -> Self {
133         XmmMem::unwrap_new(value.into())
134     }
135 }
136 
137 impl From<Reg> for XmmMemImm {
from(value: Reg) -> Self138     fn from(value: Reg) -> Self {
139         XmmMemImm::unwrap_new(value.into())
140     }
141 }
142 
143 impl From<OperandSize> for args::OperandSize {
from(size: OperandSize) -> Self144     fn from(size: OperandSize) -> Self {
145         match size {
146             OperandSize::S8 => Self::Size8,
147             OperandSize::S16 => Self::Size16,
148             OperandSize::S32 => Self::Size32,
149             OperandSize::S64 => Self::Size64,
150             s => panic!("Invalid operand size {s:?}"),
151         }
152     }
153 }
154 
155 impl From<IntCmpKind> for CC {
from(value: IntCmpKind) -> Self156     fn from(value: IntCmpKind) -> Self {
157         match value {
158             IntCmpKind::Eq => CC::Z,
159             IntCmpKind::Ne => CC::NZ,
160             IntCmpKind::LtS => CC::L,
161             IntCmpKind::LtU => CC::B,
162             IntCmpKind::GtS => CC::NLE,
163             IntCmpKind::GtU => CC::NBE,
164             IntCmpKind::LeS => CC::LE,
165             IntCmpKind::LeU => CC::BE,
166             IntCmpKind::GeS => CC::NL,
167             IntCmpKind::GeU => CC::NB,
168         }
169     }
170 }
171 
172 impl<T: ExtendType> From<Extend<T>> for ExtMode {
from(value: Extend<T>) -> Self173     fn from(value: Extend<T>) -> Self {
174         match value {
175             Extend::I32Extend8 => ExtMode::BL,
176             Extend::I32Extend16 => ExtMode::WL,
177             Extend::I64Extend8 => ExtMode::BQ,
178             Extend::I64Extend16 => ExtMode::WQ,
179             Extend::I64Extend32 => ExtMode::LQ,
180             Extend::__Kind(_) => unreachable!(),
181         }
182     }
183 }
184 
185 impl From<ExtendKind> for ExtMode {
from(value: ExtendKind) -> Self186     fn from(value: ExtendKind) -> Self {
187         match value {
188             ExtendKind::Signed(s) => s.into(),
189             ExtendKind::Unsigned(u) => u.into(),
190         }
191     }
192 }
193 
194 /// Kinds of extends supported by `vpmov`.
195 pub(super) enum VpmovKind {
196     /// Sign extends 8 lanes of 8-bit integers to 8 lanes of 16-bit integers.
197     E8x8S,
198     /// Zero extends 8 lanes of 8-bit integers to 8 lanes of 16-bit integers.
199     E8x8U,
200     /// Sign extends 4 lanes of 16-bit integers to 4 lanes of 32-bit integers.
201     E16x4S,
202     /// Zero extends 4 lanes of 16-bit integers to 4 lanes of 32-bit integers.
203     E16x4U,
204     /// Sign extends 2 lanes of 32-bit integers to 2 lanes of 64-bit integers.
205     E32x2S,
206     /// Zero extends 2 lanes of 32-bit integers to 2 lanes of 64-bit integers.
207     E32x2U,
208 }
209 
210 impl From<V128LoadExtendKind> for VpmovKind {
from(value: V128LoadExtendKind) -> Self211     fn from(value: V128LoadExtendKind) -> Self {
212         match value {
213             V128LoadExtendKind::E8x8S => Self::E8x8S,
214             V128LoadExtendKind::E8x8U => Self::E8x8U,
215             V128LoadExtendKind::E16x4S => Self::E16x4S,
216             V128LoadExtendKind::E16x4U => Self::E16x4U,
217             V128LoadExtendKind::E32x2S => Self::E32x2S,
218             V128LoadExtendKind::E32x2U => Self::E32x2U,
219         }
220     }
221 }
222 
223 impl From<V128ExtendKind> for VpmovKind {
from(value: V128ExtendKind) -> Self224     fn from(value: V128ExtendKind) -> Self {
225         match value {
226             V128ExtendKind::LowI8x16S | V128ExtendKind::HighI8x16S => Self::E8x8S,
227             V128ExtendKind::LowI8x16U => Self::E8x8U,
228             V128ExtendKind::LowI16x8S | V128ExtendKind::HighI16x8S => Self::E16x4S,
229             V128ExtendKind::LowI16x8U => Self::E16x4U,
230             V128ExtendKind::LowI32x4S | V128ExtendKind::HighI32x4S => Self::E32x2S,
231             V128ExtendKind::LowI32x4U => Self::E32x2U,
232             _ => unimplemented!(),
233         }
234     }
235 }
236 
237 /// Kinds of comparisons supported by `vcmp`.
238 pub(super) enum VcmpKind {
239     /// Equal comparison.
240     Eq,
241     /// Not equal comparison.
242     Ne,
243     /// Less than comparison.
244     Lt,
245     /// Less than or equal comparison.
246     Le,
247     /// Unordered comparison. Sets result to all 1s if either source operand is
248     /// NaN.
249     Unord,
250 }
251 
252 /// Kinds of conversions supported by `vcvt`.
253 pub(super) enum VcvtKind {
254     /// Converts 32-bit integers to 32-bit floats.
255     I32ToF32,
256     /// Converts doubleword integers to double precision floats.
257     I32ToF64,
258     /// Converts double precision floats to single precision floats.
259     F64ToF32,
260     // Converts double precision floats to 32-bit integers.
261     F64ToI32,
262     /// Converts single precision floats to double precision floats.
263     F32ToF64,
264     /// Converts single precision floats to 32-bit integers.
265     F32ToI32,
266 }
267 
268 /// Modes supported by `vround`.
269 pub(crate) enum VroundMode {
270     /// Rounds toward nearest (ties to even).
271     TowardNearest,
272     /// Rounds toward negative infinity.
273     TowardNegativeInfinity,
274     /// Rounds toward positive infinity.
275     TowardPositiveInfinity,
276     /// Rounds toward zero.
277     TowardZero,
278 }
279 
280 /// Low level assembler implementation for x64.
281 pub(crate) struct Assembler {
282     /// The machine instruction buffer.
283     buffer: MachBuffer<Inst>,
284     /// Constant emission information.
285     emit_info: EmitInfo,
286     /// Emission state.
287     emit_state: EmitState,
288     /// x64 flags.
289     isa_flags: x64_settings::Flags,
290     /// Constant pool.
291     pool: ConstantPool,
292 }
293 
294 impl Assembler {
295     /// Create a new x64 assembler.
new(shared_flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self296     pub fn new(shared_flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
297         Self {
298             buffer: MachBuffer::<Inst>::new(),
299             emit_state: Default::default(),
300             emit_info: EmitInfo::new(shared_flags, isa_flags.clone()),
301             pool: ConstantPool::new(),
302             isa_flags,
303         }
304     }
305 
306     /// Get a mutable reference to underlying
307     /// machine buffer.
buffer_mut(&mut self) -> &mut MachBuffer<Inst>308     pub fn buffer_mut(&mut self) -> &mut MachBuffer<Inst> {
309         &mut self.buffer
310     }
311 
312     /// Get a reference to the underlying machine buffer.
buffer(&self) -> &MachBuffer<Inst>313     pub fn buffer(&self) -> &MachBuffer<Inst> {
314         &self.buffer
315     }
316 
317     /// Adds a constant to the constant pool and returns its address.
add_constant(&mut self, constant: &[u8]) -> Address318     pub fn add_constant(&mut self, constant: &[u8]) -> Address {
319         let handle = self.pool.register(constant, &mut self.buffer);
320         Address::constant(handle)
321     }
322 
323     /// Load a floating point constant, using the constant pool.
load_fp_const(&mut self, dst: WritableReg, constant: &[u8], size: OperandSize)324     pub fn load_fp_const(&mut self, dst: WritableReg, constant: &[u8], size: OperandSize) {
325         let addr = self.add_constant(constant);
326         self.xmm_mov_mr(&addr, dst, size, MemFlags::trusted());
327     }
328 
329     /// Return the emitted code.
finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final>330     pub fn finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final> {
331         let stencil = self
332             .buffer
333             .finish(&self.pool.constants(), self.emit_state.ctrl_plane_mut());
334         stencil.apply_base_srcloc(loc.unwrap_or_default())
335     }
336 
emit(&mut self, inst: Inst)337     fn emit(&mut self, inst: Inst) {
338         inst.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);
339     }
340 
to_synthetic_amode(addr: &Address, memflags: MemFlags) -> SyntheticAmode341     fn to_synthetic_amode(addr: &Address, memflags: MemFlags) -> SyntheticAmode {
342         match *addr {
343             Address::Offset { base, offset } => {
344                 let amode = Amode::imm_reg(offset as i32, base.into()).with_flags(memflags);
345                 SyntheticAmode::real(amode)
346             }
347             Address::Const(c) => SyntheticAmode::ConstantOffset(c),
348             Address::ImmRegRegShift {
349                 simm32,
350                 base,
351                 index,
352                 shift,
353             } => SyntheticAmode::Real(Amode::ImmRegRegShift {
354                 simm32,
355                 base: base.into(),
356                 index: index.into(),
357                 shift,
358                 flags: memflags,
359             }),
360         }
361     }
362 
363     /// Emit an unwind instruction.
unwind_inst(&mut self, inst: UnwindInst)364     pub fn unwind_inst(&mut self, inst: UnwindInst) {
365         self.emit(Inst::Unwind { inst })
366     }
367 
368     /// Push register.
push_r(&mut self, reg: Reg)369     pub fn push_r(&mut self, reg: Reg) {
370         let inst = asm::inst::pushq_o::new(reg).into();
371         self.emit(Inst::External { inst });
372     }
373 
374     /// Pop to register.
pop_r(&mut self, dst: WritableReg)375     pub fn pop_r(&mut self, dst: WritableReg) {
376         let writable: WritableGpr = dst.map(Into::into);
377         let inst = asm::inst::popq_o::new(writable).into();
378         self.emit(Inst::External { inst });
379     }
380 
381     /// Return instruction.
ret(&mut self)382     pub fn ret(&mut self) {
383         let inst = asm::inst::retq_zo::new().into();
384         self.emit(Inst::External { inst });
385     }
386 
387     /// Register-to-register move.
mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)388     pub fn mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
389         let dst: WritableGpr = dst.map(|r| r.into());
390         let inst = match size {
391             OperandSize::S8 => asm::inst::movb_mr::new(dst, src).into(),
392             OperandSize::S16 => asm::inst::movw_mr::new(dst, src).into(),
393             OperandSize::S32 => asm::inst::movl_mr::new(dst, src).into(),
394             OperandSize::S64 => asm::inst::movq_mr::new(dst, src).into(),
395             _ => unreachable!(),
396         };
397         self.emit(Inst::External { inst });
398     }
399 
400     /// Register-to-memory move.
mov_rm(&mut self, src: Reg, addr: &Address, size: OperandSize, flags: MemFlags)401     pub fn mov_rm(&mut self, src: Reg, addr: &Address, size: OperandSize, flags: MemFlags) {
402         assert!(addr.is_offset());
403         let dst = Self::to_synthetic_amode(addr, flags);
404         let inst = match size {
405             OperandSize::S8 => asm::inst::movb_mr::new(dst, src).into(),
406             OperandSize::S16 => asm::inst::movw_mr::new(dst, src).into(),
407             OperandSize::S32 => asm::inst::movl_mr::new(dst, src).into(),
408             OperandSize::S64 => asm::inst::movq_mr::new(dst, src).into(),
409             _ => unreachable!(),
410         };
411         self.emit(Inst::External { inst });
412     }
413 
414     /// Immediate-to-memory move.
mov_im(&mut self, src: i32, addr: &Address, size: OperandSize, flags: MemFlags)415     pub fn mov_im(&mut self, src: i32, addr: &Address, size: OperandSize, flags: MemFlags) {
416         assert!(addr.is_offset());
417         let dst = Self::to_synthetic_amode(addr, flags);
418         let inst = match size {
419             OperandSize::S8 => {
420                 let src = i8::try_from(src).unwrap();
421                 asm::inst::movb_mi::new(dst, src.cast_unsigned()).into()
422             }
423             OperandSize::S16 => {
424                 let src = i16::try_from(src).unwrap();
425                 asm::inst::movw_mi::new(dst, src.cast_unsigned()).into()
426             }
427             OperandSize::S32 => asm::inst::movl_mi::new(dst, src.cast_unsigned()).into(),
428             OperandSize::S64 => asm::inst::movq_mi_sxl::new(dst, src).into(),
429             _ => unreachable!(),
430         };
431         self.emit(Inst::External { inst });
432     }
433 
434     /// Immediate-to-register move.
mov_ir(&mut self, imm: u64, dst: WritableReg, size: OperandSize)435     pub fn mov_ir(&mut self, imm: u64, dst: WritableReg, size: OperandSize) {
436         self.emit(Inst::imm(size.into(), imm, dst.map(Into::into)));
437     }
438 
439     /// Zero-extend memory-to-register load.
movzx_mr( &mut self, addr: &Address, dst: WritableReg, ext: Option<Extend<Zero>>, memflags: MemFlags, )440     pub fn movzx_mr(
441         &mut self,
442         addr: &Address,
443         dst: WritableReg,
444         ext: Option<Extend<Zero>>,
445         memflags: MemFlags,
446     ) {
447         let src = Self::to_synthetic_amode(addr, memflags);
448 
449         if let Some(ext) = ext {
450             let dst = WritableGpr::from_reg(dst.to_reg().into());
451             let inst = match ext.into() {
452                 ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
453                 ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
454                 ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
455                 ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
456                 ExtMode::LQ => {
457                     // This instruction selection may seem strange but is
458                     // correct in 64-bit mode: section 3.4.1.1 of the Intel
459                     // manual says that "32-bit operands generate a 32-bit
460                     // result, zero-extended to a 64-bit result in the
461                     // destination general-purpose register." This is applicable
462                     // beyond `mov` but we use this fact to zero-extend `src`
463                     // into `dst`.
464                     asm::inst::movl_rm::new(dst, src).into()
465                 }
466             };
467             self.emit(Inst::External { inst });
468         } else {
469             let dst = WritableGpr::from_reg(dst.to_reg().into());
470             let inst = asm::inst::movq_rm::new(dst, src).into();
471             self.emit(Inst::External { inst });
472         }
473     }
474 
475     // Sign-extend memory-to-register load.
movsx_mr( &mut self, addr: &Address, dst: WritableReg, ext: Extend<Signed>, memflags: MemFlags, )476     pub fn movsx_mr(
477         &mut self,
478         addr: &Address,
479         dst: WritableReg,
480         ext: Extend<Signed>,
481         memflags: MemFlags,
482     ) {
483         let src = Self::to_synthetic_amode(addr, memflags);
484         let dst = WritableGpr::from_reg(dst.to_reg().into());
485         let inst = match ext.into() {
486             ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
487             ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
488             ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
489             ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
490             ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
491         };
492         self.emit(Inst::External { inst });
493     }
494 
495     /// Register-to-register move with zero extension.
movzx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Zero>)496     pub fn movzx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Zero>) {
497         let dst = WritableGpr::from_reg(dst.to_reg().into());
498         let inst = match kind.into() {
499             ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
500             ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
501             ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
502             ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
503             ExtMode::LQ => {
504                 // This instruction selection may seem strange but is correct in
505                 // 64-bit mode: section 3.4.1.1 of the Intel manual says that
506                 // "32-bit operands generate a 32-bit result, zero-extended to a
507                 // 64-bit result in the destination general-purpose register."
508                 // This is applicable beyond `mov` but we use this fact to
509                 // zero-extend `src` into `dst`.
510                 asm::inst::movl_rm::new(dst, src).into()
511             }
512         };
513         self.emit(Inst::External { inst });
514     }
515 
516     /// Register-to-register move with sign extension.
movsx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Signed>)517     pub fn movsx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Signed>) {
518         let dst = WritableGpr::from_reg(dst.to_reg().into());
519         let inst = match kind.into() {
520             ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
521             ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
522             ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
523             ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
524             ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
525         };
526         self.emit(Inst::External { inst });
527     }
528 
529     /// Integer register conditional move.
cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize)530     pub fn cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize) {
531         use IntCmpKind::*;
532         use OperandSize::*;
533 
534         let dst: WritableGpr = dst.map(Into::into);
535         let inst = match size {
536             S8 | S16 | S32 => match cc {
537                 Eq => asm::inst::cmovel_rm::new(dst, src).into(),
538                 Ne => asm::inst::cmovnel_rm::new(dst, src).into(),
539                 LtS => asm::inst::cmovll_rm::new(dst, src).into(),
540                 LtU => asm::inst::cmovbl_rm::new(dst, src).into(),
541                 GtS => asm::inst::cmovgl_rm::new(dst, src).into(),
542                 GtU => asm::inst::cmoval_rm::new(dst, src).into(),
543                 LeS => asm::inst::cmovlel_rm::new(dst, src).into(),
544                 LeU => asm::inst::cmovbel_rm::new(dst, src).into(),
545                 GeS => asm::inst::cmovgel_rm::new(dst, src).into(),
546                 GeU => asm::inst::cmovael_rm::new(dst, src).into(),
547             },
548             S64 => match cc {
549                 Eq => asm::inst::cmoveq_rm::new(dst, src).into(),
550                 Ne => asm::inst::cmovneq_rm::new(dst, src).into(),
551                 LtS => asm::inst::cmovlq_rm::new(dst, src).into(),
552                 LtU => asm::inst::cmovbq_rm::new(dst, src).into(),
553                 GtS => asm::inst::cmovgq_rm::new(dst, src).into(),
554                 GtU => asm::inst::cmovaq_rm::new(dst, src).into(),
555                 LeS => asm::inst::cmovleq_rm::new(dst, src).into(),
556                 LeU => asm::inst::cmovbeq_rm::new(dst, src).into(),
557                 GeS => asm::inst::cmovgeq_rm::new(dst, src).into(),
558                 GeU => asm::inst::cmovaeq_rm::new(dst, src).into(),
559             },
560             _ => unreachable!(),
561         };
562         self.emit(Inst::External { inst });
563     }
564 
565     /// Single and double precision floating point
566     /// register-to-register move.
xmm_mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)567     pub fn xmm_mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
568         let ty = match size {
569             OperandSize::S32 => types::F32,
570             OperandSize::S64 => types::F64,
571             OperandSize::S128 => types::I32X4,
572             OperandSize::S8 | OperandSize::S16 => unreachable!(),
573         };
574         self.emit(Inst::gen_move(dst.map(|r| r.into()), src.into(), ty));
575     }
576 
577     /// Single and double precision floating point load.
xmm_mov_mr( &mut self, src: &Address, dst: WritableReg, size: OperandSize, flags: MemFlags, )578     pub fn xmm_mov_mr(
579         &mut self,
580         src: &Address,
581         dst: WritableReg,
582         size: OperandSize,
583         flags: MemFlags,
584     ) {
585         use OperandSize::*;
586 
587         assert!(dst.to_reg().is_float());
588 
589         let src = Self::to_synthetic_amode(src, flags);
590         let dst: WritableXmm = dst.map(|r| r.into());
591         let inst = match size {
592             S32 => asm::inst::movss_a_m::new(dst, src).into(),
593             S64 => asm::inst::movsd_a_m::new(dst, src).into(),
594             S128 => asm::inst::movdqu_a::new(dst, src).into(),
595             S8 | S16 => unreachable!(),
596         };
597         self.emit(Inst::External { inst });
598     }
599 
600     /// Vector load and extend.
xmm_vpmov_mr( &mut self, src: &Address, dst: WritableReg, kind: VpmovKind, flags: MemFlags, )601     pub fn xmm_vpmov_mr(
602         &mut self,
603         src: &Address,
604         dst: WritableReg,
605         kind: VpmovKind,
606         flags: MemFlags,
607     ) {
608         assert!(dst.to_reg().is_float());
609         let src = Self::to_synthetic_amode(src, flags);
610         let dst: WritableXmm = dst.map(|r| r.into());
611         let inst = match kind {
612             VpmovKind::E8x8S => asm::inst::vpmovsxbw_a::new(dst, src).into(),
613             VpmovKind::E8x8U => asm::inst::vpmovzxbw_a::new(dst, src).into(),
614             VpmovKind::E16x4S => asm::inst::vpmovsxwd_a::new(dst, src).into(),
615             VpmovKind::E16x4U => asm::inst::vpmovzxwd_a::new(dst, src).into(),
616             VpmovKind::E32x2S => asm::inst::vpmovsxdq_a::new(dst, src).into(),
617             VpmovKind::E32x2U => asm::inst::vpmovzxdq_a::new(dst, src).into(),
618         };
619         self.emit(Inst::External { inst });
620     }
621 
622     /// Extends vector of integers in `src` and puts results in `dst`.
xmm_vpmov_rr(&mut self, src: Reg, dst: WritableReg, kind: VpmovKind)623     pub fn xmm_vpmov_rr(&mut self, src: Reg, dst: WritableReg, kind: VpmovKind) {
624         let dst: WritableXmm = dst.map(|r| r.into());
625         let inst = match kind {
626             VpmovKind::E8x8S => asm::inst::vpmovsxbw_a::new(dst, src).into(),
627             VpmovKind::E8x8U => asm::inst::vpmovzxbw_a::new(dst, src).into(),
628             VpmovKind::E16x4S => asm::inst::vpmovsxwd_a::new(dst, src).into(),
629             VpmovKind::E16x4U => asm::inst::vpmovzxwd_a::new(dst, src).into(),
630             VpmovKind::E32x2S => asm::inst::vpmovsxdq_a::new(dst, src).into(),
631             VpmovKind::E32x2U => asm::inst::vpmovzxdq_a::new(dst, src).into(),
632         };
633         self.emit(Inst::External { inst });
634     }
635 
636     /// Vector load and broadcast.
xmm_vpbroadcast_mr( &mut self, src: &Address, dst: WritableReg, size: OperandSize, flags: MemFlags, )637     pub fn xmm_vpbroadcast_mr(
638         &mut self,
639         src: &Address,
640         dst: WritableReg,
641         size: OperandSize,
642         flags: MemFlags,
643     ) {
644         assert!(dst.to_reg().is_float());
645         let src = Self::to_synthetic_amode(src, flags);
646         let dst: WritableXmm = dst.map(|r| r.into());
647         let inst = match size {
648             OperandSize::S8 => asm::inst::vpbroadcastb_a::new(dst, src).into(),
649             OperandSize::S16 => asm::inst::vpbroadcastw_a::new(dst, src).into(),
650             OperandSize::S32 => asm::inst::vpbroadcastd_a::new(dst, src).into(),
651             _ => unimplemented!(),
652         };
653         self.emit(Inst::External { inst });
654     }
655 
656     /// Value in `src` is broadcast into lanes of `size` in `dst`.
xmm_vpbroadcast_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)657     pub fn xmm_vpbroadcast_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
658         assert!(src.is_float() && dst.to_reg().is_float());
659         let dst: WritableXmm = dst.map(|r| r.into());
660         let inst = match size {
661             OperandSize::S8 => asm::inst::vpbroadcastb_a::new(dst, src).into(),
662             OperandSize::S16 => asm::inst::vpbroadcastw_a::new(dst, src).into(),
663             OperandSize::S32 => asm::inst::vpbroadcastd_a::new(dst, src).into(),
664             _ => unimplemented!(),
665         };
666         self.emit(Inst::External { inst });
667     }
668 
669     /// Memory to register shuffle of bytes in vector.
xmm_vpshuf_mr( &mut self, src: &Address, dst: WritableReg, mask: u8, size: OperandSize, flags: MemFlags, )670     pub fn xmm_vpshuf_mr(
671         &mut self,
672         src: &Address,
673         dst: WritableReg,
674         mask: u8,
675         size: OperandSize,
676         flags: MemFlags,
677     ) {
678         let dst: WritableXmm = dst.map(|r| r.into());
679         let src = Self::to_synthetic_amode(src, flags);
680         let inst = match size {
681             OperandSize::S32 => asm::inst::vpshufd_a::new(dst, src, mask).into(),
682             _ => unimplemented!(),
683         };
684         self.emit(Inst::External { inst });
685     }
686 
687     /// Register to register shuffle of bytes in vector.
xmm_vpshuf_rr(&mut self, src: Reg, dst: WritableReg, mask: u8, size: OperandSize)688     pub fn xmm_vpshuf_rr(&mut self, src: Reg, dst: WritableReg, mask: u8, size: OperandSize) {
689         let dst: WritableXmm = dst.map(|r| r.into());
690 
691         let inst = match size {
692             OperandSize::S16 => asm::inst::vpshuflw_a::new(dst, src, mask).into(),
693             OperandSize::S32 => asm::inst::vpshufd_a::new(dst, src, mask).into(),
694             _ => unimplemented!(),
695         };
696 
697         self.emit(Inst::External { inst });
698     }
699 
700     /// Single and double precision floating point store.
xmm_mov_rm(&mut self, src: Reg, dst: &Address, size: OperandSize, flags: MemFlags)701     pub fn xmm_mov_rm(&mut self, src: Reg, dst: &Address, size: OperandSize, flags: MemFlags) {
702         use OperandSize::*;
703 
704         assert!(src.is_float());
705 
706         let dst = Self::to_synthetic_amode(dst, flags);
707         let src: Xmm = src.into();
708         let inst = match size {
709             S32 => asm::inst::movss_c_m::new(dst, src).into(),
710             S64 => asm::inst::movsd_c_m::new(dst, src).into(),
711             S128 => asm::inst::movdqu_b::new(dst, src).into(),
712             S16 | S8 => unreachable!(),
713         };
714         self.emit(Inst::External { inst })
715     }
716 
717     /// Floating point register conditional move.
xmm_cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize)718     pub fn xmm_cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize) {
719         let dst: WritableXmm = dst.map(Into::into);
720         let ty = match size {
721             OperandSize::S32 => types::F32,
722             OperandSize::S64 => types::F64,
723             // Move the entire 128 bits via movdqa.
724             OperandSize::S128 => types::I32X4,
725             OperandSize::S8 | OperandSize::S16 => unreachable!(),
726         };
727 
728         self.emit(Inst::XmmCmove {
729             ty,
730             cc: cc.into(),
731             consequent: Xmm::unwrap_new(src.into()),
732             alternative: dst.to_reg(),
733             dst,
734         })
735     }
736 
737     /// Subtract register and register
sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)738     pub fn sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
739         let dst = pair_gpr(dst);
740         let inst = match size {
741             OperandSize::S8 => asm::inst::subb_rm::new(dst, src).into(),
742             OperandSize::S16 => asm::inst::subw_rm::new(dst, src).into(),
743             OperandSize::S32 => asm::inst::subl_rm::new(dst, src).into(),
744             OperandSize::S64 => asm::inst::subq_rm::new(dst, src).into(),
745             OperandSize::S128 => unimplemented!(),
746         };
747         self.emit(Inst::External { inst });
748     }
749 
750     /// Subtract immediate register.
sub_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize)751     pub fn sub_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
752         let dst = pair_gpr(dst);
753         let inst = match size {
754             OperandSize::S8 => asm::inst::subb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
755             OperandSize::S16 => asm::inst::subw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
756             OperandSize::S32 => asm::inst::subl_mi::new(dst, imm as u32).into(),
757             OperandSize::S64 => asm::inst::subq_mi_sxl::new(dst, imm).into(),
758             OperandSize::S128 => unimplemented!(),
759         };
760         self.emit(Inst::External { inst });
761     }
762 
763     /// "and" two registers.
and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)764     pub fn and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
765         let dst = pair_gpr(dst);
766         let inst = match size {
767             OperandSize::S8 => asm::inst::andb_rm::new(dst, src).into(),
768             OperandSize::S16 => asm::inst::andw_rm::new(dst, src).into(),
769             OperandSize::S32 => asm::inst::andl_rm::new(dst, src).into(),
770             OperandSize::S64 => asm::inst::andq_rm::new(dst, src).into(),
771             OperandSize::S128 => unimplemented!(),
772         };
773         self.emit(Inst::External { inst });
774     }
775 
and_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize)776     pub fn and_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
777         let dst = pair_gpr(dst);
778         let inst = match size {
779             OperandSize::S8 => asm::inst::andb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
780             OperandSize::S16 => asm::inst::andw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
781             OperandSize::S32 => asm::inst::andl_mi::new(dst, imm as u32).into(),
782             OperandSize::S64 => asm::inst::andq_mi_sxl::new(dst, imm).into(),
783             OperandSize::S128 => unimplemented!(),
784         };
785         self.emit(Inst::External { inst });
786     }
787 
788     /// "and" two float registers.
xmm_and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)789     pub fn xmm_and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
790         let dst = pair_xmm(dst);
791         let inst = match size {
792             OperandSize::S32 => asm::inst::andps_a::new(dst, src).into(),
793             OperandSize::S64 => asm::inst::andpd_a::new(dst, src).into(),
794             OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
795         };
796         self.emit(Inst::External { inst });
797     }
798 
799     /// "and not" two float registers.
xmm_andn_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)800     pub fn xmm_andn_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
801         let dst = pair_xmm(dst);
802         let inst = match size {
803             OperandSize::S32 => asm::inst::andnps_a::new(dst, src).into(),
804             OperandSize::S64 => asm::inst::andnpd_a::new(dst, src).into(),
805             OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
806         };
807         self.emit(Inst::External { inst });
808     }
809 
gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize)810     pub fn gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
811         let dst: WritableXmm = dst.map(|r| r.into());
812         let inst = match size {
813             OperandSize::S32 => asm::inst::movd_a::new(dst, src).into(),
814             OperandSize::S64 => asm::inst::movq_a::new(dst, src).into(),
815             OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
816         };
817 
818         self.emit(Inst::External { inst });
819     }
820 
xmm_to_gpr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)821     pub fn xmm_to_gpr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
822         let dst: WritableGpr = dst.map(Into::into);
823         let src: Xmm = src.into();
824         let inst = match size {
825             OperandSize::S32 => asm::inst::movd_b::new(dst, src).into(),
826             OperandSize::S64 => asm::inst::movq_b::new(dst, src).into(),
827             OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
828         };
829 
830         self.emit(Inst::External { inst })
831     }
832 
833     /// Convert float to signed int.
cvt_float_to_sint_seq( &mut self, src: Reg, dst: WritableReg, tmp_gpr: Reg, tmp_xmm: Reg, src_size: OperandSize, dst_size: OperandSize, saturating: bool, )834     pub fn cvt_float_to_sint_seq(
835         &mut self,
836         src: Reg,
837         dst: WritableReg,
838         tmp_gpr: Reg,
839         tmp_xmm: Reg,
840         src_size: OperandSize,
841         dst_size: OperandSize,
842         saturating: bool,
843     ) {
844         self.emit(Inst::CvtFloatToSintSeq {
845             dst_size: dst_size.into(),
846             src_size: src_size.into(),
847             is_saturating: saturating,
848             src: src.into(),
849             dst: dst.map(Into::into),
850             tmp_gpr: tmp_gpr.into(),
851             tmp_xmm: tmp_xmm.into(),
852         });
853     }
854 
855     /// Convert float to unsigned int.
cvt_float_to_uint_seq( &mut self, src: Reg, dst: WritableReg, tmp_gpr: Reg, tmp_xmm: Reg, tmp_xmm2: Reg, src_size: OperandSize, dst_size: OperandSize, saturating: bool, )856     pub fn cvt_float_to_uint_seq(
857         &mut self,
858         src: Reg,
859         dst: WritableReg,
860         tmp_gpr: Reg,
861         tmp_xmm: Reg,
862         tmp_xmm2: Reg,
863         src_size: OperandSize,
864         dst_size: OperandSize,
865         saturating: bool,
866     ) {
867         self.emit(Inst::CvtFloatToUintSeq {
868             dst_size: dst_size.into(),
869             src_size: src_size.into(),
870             is_saturating: saturating,
871             src: src.into(),
872             dst: dst.map(Into::into),
873             tmp_gpr: tmp_gpr.into(),
874             tmp_xmm: tmp_xmm.into(),
875             tmp_xmm2: tmp_xmm2.into(),
876         });
877     }
878 
879     /// Convert signed int to float.
cvt_sint_to_float( &mut self, src: Reg, dst: WritableReg, src_size: OperandSize, dst_size: OperandSize, )880     pub fn cvt_sint_to_float(
881         &mut self,
882         src: Reg,
883         dst: WritableReg,
884         src_size: OperandSize,
885         dst_size: OperandSize,
886     ) {
887         use OperandSize::*;
888         let dst = pair_xmm(dst);
889         let inst = match (src_size, dst_size) {
890             (S32, S32) => asm::inst::cvtsi2ssl_a::new(dst, src).into(),
891             (S32, S64) => asm::inst::cvtsi2sdl_a::new(dst, src).into(),
892             (S64, S32) => asm::inst::cvtsi2ssq_a::new(dst, src).into(),
893             (S64, S64) => asm::inst::cvtsi2sdq_a::new(dst, src).into(),
894             _ => unreachable!(),
895         };
896         self.emit(Inst::External { inst });
897     }
898 
899     /// Convert unsigned 64-bit int to float.
cvt_uint64_to_float_seq( &mut self, src: Reg, dst: WritableReg, tmp_gpr1: Reg, tmp_gpr2: Reg, dst_size: OperandSize, )900     pub fn cvt_uint64_to_float_seq(
901         &mut self,
902         src: Reg,
903         dst: WritableReg,
904         tmp_gpr1: Reg,
905         tmp_gpr2: Reg,
906         dst_size: OperandSize,
907     ) {
908         self.emit(Inst::CvtUint64ToFloatSeq {
909             dst_size: dst_size.into(),
910             src: src.into(),
911             dst: dst.map(Into::into),
912             tmp_gpr1: tmp_gpr1.into(),
913             tmp_gpr2: tmp_gpr2.into(),
914         });
915     }
916 
917     /// Change precision of float.
cvt_float_to_float( &mut self, src: Reg, dst: WritableReg, src_size: OperandSize, dst_size: OperandSize, )918     pub fn cvt_float_to_float(
919         &mut self,
920         src: Reg,
921         dst: WritableReg,
922         src_size: OperandSize,
923         dst_size: OperandSize,
924     ) {
925         use OperandSize::*;
926         let dst = pair_xmm(dst);
927         let inst = match (src_size, dst_size) {
928             (S32, S64) => asm::inst::cvtss2sd_a::new(dst, src).into(),
929             (S64, S32) => asm::inst::cvtsd2ss_a::new(dst, src).into(),
930             _ => unimplemented!(),
931         };
932         self.emit(Inst::External { inst });
933     }
934 
or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)935     pub fn or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
936         let dst = pair_gpr(dst);
937         let inst = match size {
938             OperandSize::S8 => asm::inst::orb_rm::new(dst, src).into(),
939             OperandSize::S16 => asm::inst::orw_rm::new(dst, src).into(),
940             OperandSize::S32 => asm::inst::orl_rm::new(dst, src).into(),
941             OperandSize::S64 => asm::inst::orq_rm::new(dst, src).into(),
942             OperandSize::S128 => unimplemented!(),
943         };
944         self.emit(Inst::External { inst });
945     }
946 
or_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize)947     pub fn or_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
948         let dst = pair_gpr(dst);
949         let inst = match size {
950             OperandSize::S8 => asm::inst::orb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
951             OperandSize::S16 => asm::inst::orw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
952             OperandSize::S32 => asm::inst::orl_mi::new(dst, imm as u32).into(),
953             OperandSize::S64 => asm::inst::orq_mi_sxl::new(dst, imm).into(),
954             OperandSize::S128 => unimplemented!(),
955         };
956         self.emit(Inst::External { inst });
957     }
958 
xmm_or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)959     pub fn xmm_or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
960         let dst = pair_xmm(dst);
961         let inst = match size {
962             OperandSize::S32 => asm::inst::orps_a::new(dst, src).into(),
963             OperandSize::S64 => asm::inst::orpd_a::new(dst, src).into(),
964             OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
965         };
966         self.emit(Inst::External { inst });
967     }
968 
969     /// Logical exclusive or with registers.
xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)970     pub fn xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
971         let dst = pair_gpr(dst);
972         let inst = match size {
973             OperandSize::S8 => asm::inst::xorb_rm::new(dst, src).into(),
974             OperandSize::S16 => asm::inst::xorw_rm::new(dst, src).into(),
975             OperandSize::S32 => asm::inst::xorl_rm::new(dst, src).into(),
976             OperandSize::S64 => asm::inst::xorq_rm::new(dst, src).into(),
977             OperandSize::S128 => unimplemented!(),
978         };
979         self.emit(Inst::External { inst });
980     }
981 
xor_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize)982     pub fn xor_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
983         let dst = pair_gpr(dst);
984         let inst = match size {
985             OperandSize::S8 => asm::inst::xorb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
986             OperandSize::S16 => asm::inst::xorw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
987             OperandSize::S32 => asm::inst::xorl_mi::new(dst, imm as u32).into(),
988             OperandSize::S64 => asm::inst::xorq_mi_sxl::new(dst, imm).into(),
989             OperandSize::S128 => unimplemented!(),
990         };
991         self.emit(Inst::External { inst });
992     }
993 
994     /// Logical exclusive or with float registers.
xmm_xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)995     pub fn xmm_xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
996         let dst = pair_xmm(dst);
997         let inst = match size {
998             OperandSize::S32 => asm::inst::xorps_a::new(dst, src).into(),
999             OperandSize::S64 => asm::inst::xorpd_a::new(dst, src).into(),
1000             OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1001         };
1002         self.emit(Inst::External { inst });
1003     }
1004 
1005     /// Shift with register and register.
shift_rr(&mut self, src: Reg, dst: WritableReg, kind: ShiftKind, size: OperandSize)1006     pub fn shift_rr(&mut self, src: Reg, dst: WritableReg, kind: ShiftKind, size: OperandSize) {
1007         let dst = pair_gpr(dst);
1008         let src: Gpr = src.into();
1009         let inst = match (kind, size) {
1010             (ShiftKind::Shl, OperandSize::S32) => asm::inst::shll_mc::new(dst, src).into(),
1011             (ShiftKind::Shl, OperandSize::S64) => asm::inst::shlq_mc::new(dst, src).into(),
1012             (ShiftKind::Shl, _) => todo!(),
1013             (ShiftKind::ShrS, OperandSize::S32) => asm::inst::sarl_mc::new(dst, src).into(),
1014             (ShiftKind::ShrS, OperandSize::S64) => asm::inst::sarq_mc::new(dst, src).into(),
1015             (ShiftKind::ShrS, _) => todo!(),
1016             (ShiftKind::ShrU, OperandSize::S32) => asm::inst::shrl_mc::new(dst, src).into(),
1017             (ShiftKind::ShrU, OperandSize::S64) => asm::inst::shrq_mc::new(dst, src).into(),
1018             (ShiftKind::ShrU, _) => todo!(),
1019             (ShiftKind::Rotl, OperandSize::S32) => asm::inst::roll_mc::new(dst, src).into(),
1020             (ShiftKind::Rotl, OperandSize::S64) => asm::inst::rolq_mc::new(dst, src).into(),
1021             (ShiftKind::Rotl, _) => todo!(),
1022             (ShiftKind::Rotr, OperandSize::S32) => asm::inst::rorl_mc::new(dst, src).into(),
1023             (ShiftKind::Rotr, OperandSize::S64) => asm::inst::rorq_mc::new(dst, src).into(),
1024             (ShiftKind::Rotr, _) => todo!(),
1025         };
1026         self.emit(Inst::External { inst });
1027     }
1028 
1029     /// Shift with immediate and register.
shift_ir(&mut self, imm: u8, dst: WritableReg, kind: ShiftKind, size: OperandSize)1030     pub fn shift_ir(&mut self, imm: u8, dst: WritableReg, kind: ShiftKind, size: OperandSize) {
1031         let dst = pair_gpr(dst);
1032         let inst = match (kind, size) {
1033             (ShiftKind::Shl, OperandSize::S32) => asm::inst::shll_mi::new(dst, imm).into(),
1034             (ShiftKind::Shl, OperandSize::S64) => asm::inst::shlq_mi::new(dst, imm).into(),
1035             (ShiftKind::Shl, _) => todo!(),
1036             (ShiftKind::ShrS, OperandSize::S32) => asm::inst::sarl_mi::new(dst, imm).into(),
1037             (ShiftKind::ShrS, OperandSize::S64) => asm::inst::sarq_mi::new(dst, imm).into(),
1038             (ShiftKind::ShrS, _) => todo!(),
1039             (ShiftKind::ShrU, OperandSize::S32) => asm::inst::shrl_mi::new(dst, imm).into(),
1040             (ShiftKind::ShrU, OperandSize::S64) => asm::inst::shrq_mi::new(dst, imm).into(),
1041             (ShiftKind::ShrU, _) => todo!(),
1042             (ShiftKind::Rotl, OperandSize::S32) => asm::inst::roll_mi::new(dst, imm).into(),
1043             (ShiftKind::Rotl, OperandSize::S64) => asm::inst::rolq_mi::new(dst, imm).into(),
1044             (ShiftKind::Rotl, _) => todo!(),
1045             (ShiftKind::Rotr, OperandSize::S32) => asm::inst::rorl_mi::new(dst, imm).into(),
1046             (ShiftKind::Rotr, OperandSize::S64) => asm::inst::rorq_mi::new(dst, imm).into(),
1047             (ShiftKind::Rotr, _) => todo!(),
1048         };
1049         self.emit(Inst::External { inst });
1050     }
1051 
1052     /// Signed/unsigned division.
1053     ///
1054     /// Emits a sequence of instructions to ensure the correctness of
1055     /// the division invariants.  This function assumes that the
1056     /// caller has correctly allocated the dividend as `(rdx:rax)` and
1057     /// accounted for the quotient to be stored in `rax`.
div(&mut self, divisor: Reg, dst: (Reg, Reg), kind: DivKind, size: OperandSize)1058     pub fn div(&mut self, divisor: Reg, dst: (Reg, Reg), kind: DivKind, size: OperandSize) {
1059         let trap = match kind {
1060             // Signed division has two trapping conditions, integer overflow and
1061             // divide-by-zero. Check for divide-by-zero explicitly and let the
1062             // hardware detect overflow.
1063             DivKind::Signed => {
1064                 self.cmp_ir(divisor, 0, size);
1065                 self.emit(Inst::TrapIf {
1066                     cc: CC::Z,
1067                     trap_code: TrapCode::INTEGER_DIVISION_BY_ZERO,
1068                 });
1069 
1070                 // Sign-extend the dividend with tailor-made instructoins for
1071                 // just this operation.
1072                 let ext_dst: WritableGpr = dst.1.into();
1073                 let ext_src: Gpr = dst.0.into();
1074                 let inst = match size {
1075                     OperandSize::S32 => asm::inst::cltd_zo::new(ext_dst, ext_src).into(),
1076                     OperandSize::S64 => asm::inst::cqto_zo::new(ext_dst, ext_src).into(),
1077                     _ => unimplemented!(),
1078                 };
1079                 self.emit(Inst::External { inst });
1080                 TrapCode::INTEGER_OVERFLOW
1081             }
1082 
1083             // Unsigned division only traps in one case, on divide-by-zero, so
1084             // defer that to the trap opcode.
1085             //
1086             // The divisor_hi reg is initialized with zero through an
1087             // xor-against-itself op.
1088             DivKind::Unsigned => {
1089                 self.xor_rr(dst.1, writable!(dst.1), size);
1090                 TrapCode::INTEGER_DIVISION_BY_ZERO
1091             }
1092         };
1093         let dst0 = pair_gpr(writable!(dst.0));
1094         let dst1 = pair_gpr(writable!(dst.1));
1095         let inst = match (kind, size) {
1096             (DivKind::Signed, OperandSize::S32) => {
1097                 asm::inst::idivl_m::new(dst0, dst1, divisor, trap).into()
1098             }
1099             (DivKind::Unsigned, OperandSize::S32) => {
1100                 asm::inst::divl_m::new(dst0, dst1, divisor, trap).into()
1101             }
1102             (DivKind::Signed, OperandSize::S64) => {
1103                 asm::inst::idivq_m::new(dst0, dst1, divisor, trap).into()
1104             }
1105             (DivKind::Unsigned, OperandSize::S64) => {
1106                 asm::inst::divq_m::new(dst0, dst1, divisor, trap).into()
1107             }
1108             _ => todo!(),
1109         };
1110         self.emit(Inst::External { inst });
1111     }
1112 
1113     /// Signed/unsigned remainder.
1114     ///
1115     /// Emits a sequence of instructions to ensure the correctness of the
1116     /// division invariants and ultimately calculate the remainder.
1117     /// This function assumes that the
1118     /// caller has correctly allocated the dividend as `(rdx:rax)` and
1119     /// accounted for the remainder to be stored in `rdx`.
rem(&mut self, divisor: Reg, dst: (Reg, Reg), kind: RemKind, size: OperandSize)1120     pub fn rem(&mut self, divisor: Reg, dst: (Reg, Reg), kind: RemKind, size: OperandSize) {
1121         match kind {
1122             // Signed remainder goes through a pseudo-instruction which has
1123             // some internal branching. The `dividend_hi`, or `rdx`, is
1124             // initialized here with a `SignExtendData` instruction.
1125             RemKind::Signed => {
1126                 let ext_dst: WritableGpr = dst.1.into();
1127 
1128                 // Initialize `dividend_hi`, or `rdx`, with a tailor-made
1129                 // instruction for this operation.
1130                 let ext_src: Gpr = dst.0.into();
1131                 let inst = match size {
1132                     OperandSize::S32 => asm::inst::cltd_zo::new(ext_dst, ext_src).into(),
1133                     OperandSize::S64 => asm::inst::cqto_zo::new(ext_dst, ext_src).into(),
1134                     _ => unimplemented!(),
1135                 };
1136                 self.emit(Inst::External { inst });
1137                 self.emit(Inst::CheckedSRemSeq {
1138                     size: size.into(),
1139                     divisor: divisor.into(),
1140                     dividend_lo: dst.0.into(),
1141                     dividend_hi: dst.1.into(),
1142                     dst_quotient: dst.0.into(),
1143                     dst_remainder: dst.1.into(),
1144                 });
1145             }
1146 
1147             // Unsigned remainder initializes `dividend_hi` with zero and
1148             // then executes a normal `div` instruction.
1149             RemKind::Unsigned => {
1150                 self.xor_rr(dst.1, writable!(dst.1), size);
1151                 let dst0 = pair_gpr(writable!(dst.0));
1152                 let dst1 = pair_gpr(writable!(dst.1));
1153                 let trap = TrapCode::INTEGER_DIVISION_BY_ZERO;
1154                 let inst = match size {
1155                     OperandSize::S32 => asm::inst::divl_m::new(dst0, dst1, divisor, trap).into(),
1156                     OperandSize::S64 => asm::inst::divq_m::new(dst0, dst1, divisor, trap).into(),
1157                     _ => todo!(),
1158                 };
1159                 self.emit(Inst::External { inst });
1160             }
1161         }
1162     }
1163 
1164     /// Multiply immediate and register.
mul_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize)1165     pub fn mul_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
1166         use OperandSize::*;
1167         let src = dst.to_reg();
1168         let dst: WritableGpr = dst.to_reg().into();
1169         let inst = match size {
1170             S16 => asm::inst::imulw_rmi::new(dst, src, u16::try_from(imm).unwrap()).into(),
1171             S32 => asm::inst::imull_rmi::new(dst, src, imm as u32).into(),
1172             S64 => asm::inst::imulq_rmi_sxl::new(dst, src, imm).into(),
1173             S8 | S128 => unimplemented!(),
1174         };
1175         self.emit(Inst::External { inst });
1176     }
1177 
1178     /// Multiply register and register.
mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1179     pub fn mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1180         use OperandSize::*;
1181         let dst = pair_gpr(dst);
1182         let inst = match size {
1183             S16 => asm::inst::imulw_rm::new(dst, src).into(),
1184             S32 => asm::inst::imull_rm::new(dst, src).into(),
1185             S64 => asm::inst::imulq_rm::new(dst, src).into(),
1186             S8 | S128 => unimplemented!(),
1187         };
1188         self.emit(Inst::External { inst });
1189     }
1190 
1191     /// Add immediate and register.
add_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize)1192     pub fn add_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
1193         let dst = pair_gpr(dst);
1194         let inst = match size {
1195             OperandSize::S8 => asm::inst::addb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
1196             OperandSize::S16 => asm::inst::addw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
1197             OperandSize::S32 => asm::inst::addl_mi::new(dst, imm as u32).into(),
1198             OperandSize::S64 => asm::inst::addq_mi_sxl::new(dst, imm).into(),
1199             OperandSize::S128 => unimplemented!(),
1200         };
1201         self.emit(Inst::External { inst });
1202     }
1203 
1204     /// Add register and register.
add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1205     pub fn add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1206         let dst = pair_gpr(dst);
1207         let inst = match size {
1208             OperandSize::S8 => asm::inst::addb_rm::new(dst, src).into(),
1209             OperandSize::S16 => asm::inst::addw_rm::new(dst, src).into(),
1210             OperandSize::S32 => asm::inst::addl_rm::new(dst, src).into(),
1211             OperandSize::S64 => asm::inst::addq_rm::new(dst, src).into(),
1212             OperandSize::S128 => unimplemented!(),
1213         };
1214         self.emit(Inst::External { inst });
1215     }
1216 
lock_xadd( &mut self, addr: Address, dst: WritableReg, size: OperandSize, flags: MemFlags, )1217     pub fn lock_xadd(
1218         &mut self,
1219         addr: Address,
1220         dst: WritableReg,
1221         size: OperandSize,
1222         flags: MemFlags,
1223     ) {
1224         assert!(addr.is_offset());
1225         let mem = Self::to_synthetic_amode(&addr, flags);
1226         let dst = pair_gpr(dst);
1227         let inst = match size {
1228             OperandSize::S8 => asm::inst::lock_xaddb_mr::new(mem, dst).into(),
1229             OperandSize::S16 => asm::inst::lock_xaddw_mr::new(mem, dst).into(),
1230             OperandSize::S32 => asm::inst::lock_xaddl_mr::new(mem, dst).into(),
1231             OperandSize::S64 => asm::inst::lock_xaddq_mr::new(mem, dst).into(),
1232             OperandSize::S128 => unimplemented!(),
1233         };
1234 
1235         self.emit(Inst::External { inst });
1236     }
1237 
atomic_rmw_seq( &mut self, addr: Address, operand: Reg, dst: WritableReg, temp: WritableReg, size: OperandSize, flags: MemFlags, op: AtomicRmwSeqOp, )1238     pub fn atomic_rmw_seq(
1239         &mut self,
1240         addr: Address,
1241         operand: Reg,
1242         dst: WritableReg,
1243         temp: WritableReg,
1244         size: OperandSize,
1245         flags: MemFlags,
1246         op: AtomicRmwSeqOp,
1247     ) {
1248         assert!(addr.is_offset());
1249         let mem = Self::to_synthetic_amode(&addr, flags);
1250         self.emit(Inst::AtomicRmwSeq {
1251             ty: Type::int_with_byte_size(size.bytes() as _).unwrap(),
1252             mem,
1253             operand: operand.into(),
1254             temp: temp.map(Into::into),
1255             dst_old: dst.map(Into::into),
1256             op,
1257         });
1258     }
1259 
xchg(&mut self, addr: Address, dst: WritableReg, size: OperandSize, flags: MemFlags)1260     pub fn xchg(&mut self, addr: Address, dst: WritableReg, size: OperandSize, flags: MemFlags) {
1261         assert!(addr.is_offset());
1262         let mem = Self::to_synthetic_amode(&addr, flags);
1263         let dst = pair_gpr(dst);
1264         let inst = match size {
1265             OperandSize::S8 => asm::inst::xchgb_rm::new(dst, mem).into(),
1266             OperandSize::S16 => asm::inst::xchgw_rm::new(dst, mem).into(),
1267             OperandSize::S32 => asm::inst::xchgl_rm::new(dst, mem).into(),
1268             OperandSize::S64 => asm::inst::xchgq_rm::new(dst, mem).into(),
1269             OperandSize::S128 => unimplemented!(),
1270         };
1271 
1272         self.emit(Inst::External { inst });
1273     }
cmpxchg( &mut self, addr: Address, replacement: Reg, dst: WritableReg, size: OperandSize, flags: MemFlags, )1274     pub fn cmpxchg(
1275         &mut self,
1276         addr: Address,
1277         replacement: Reg,
1278         dst: WritableReg,
1279         size: OperandSize,
1280         flags: MemFlags,
1281     ) {
1282         assert!(addr.is_offset());
1283         let mem = Self::to_synthetic_amode(&addr, flags);
1284         let dst = pair_gpr(dst);
1285         let inst = match size {
1286             OperandSize::S8 => asm::inst::lock_cmpxchgb_mr::new(mem, replacement, dst).into(),
1287             OperandSize::S16 => asm::inst::lock_cmpxchgw_mr::new(mem, replacement, dst).into(),
1288             OperandSize::S32 => asm::inst::lock_cmpxchgl_mr::new(mem, replacement, dst).into(),
1289             OperandSize::S64 => asm::inst::lock_cmpxchgq_mr::new(mem, replacement, dst).into(),
1290             OperandSize::S128 => unimplemented!(),
1291         };
1292 
1293         self.emit(Inst::External { inst });
1294     }
1295 
cmp_ir(&mut self, src1: Reg, imm: i32, size: OperandSize)1296     pub fn cmp_ir(&mut self, src1: Reg, imm: i32, size: OperandSize) {
1297         let inst = match size {
1298             OperandSize::S8 => {
1299                 let imm = i8::try_from(imm).unwrap();
1300                 asm::inst::cmpb_mi::new(src1, imm.cast_unsigned()).into()
1301             }
1302             OperandSize::S16 => match i8::try_from(imm) {
1303                 Ok(imm8) => asm::inst::cmpw_mi_sxb::new(src1, imm8).into(),
1304                 Err(_) => {
1305                     asm::inst::cmpw_mi::new(src1, i16::try_from(imm).unwrap().cast_unsigned())
1306                         .into()
1307                 }
1308             },
1309             OperandSize::S32 => match i8::try_from(imm) {
1310                 Ok(imm8) => asm::inst::cmpl_mi_sxb::new(src1, imm8).into(),
1311                 Err(_) => asm::inst::cmpl_mi::new(src1, imm.cast_unsigned()).into(),
1312             },
1313             OperandSize::S64 => match i8::try_from(imm) {
1314                 Ok(imm8) => asm::inst::cmpq_mi_sxb::new(src1, imm8).into(),
1315                 Err(_) => asm::inst::cmpq_mi::new(src1, imm).into(),
1316             },
1317             OperandSize::S128 => unimplemented!(),
1318         };
1319 
1320         self.emit(Inst::External { inst });
1321     }
1322 
cmp_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize)1323     pub fn cmp_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1324         let inst = match size {
1325             OperandSize::S8 => asm::inst::cmpb_rm::new(src1, src2).into(),
1326             OperandSize::S16 => asm::inst::cmpw_rm::new(src1, src2).into(),
1327             OperandSize::S32 => asm::inst::cmpl_rm::new(src1, src2).into(),
1328             OperandSize::S64 => asm::inst::cmpq_rm::new(src1, src2).into(),
1329             OperandSize::S128 => unimplemented!(),
1330         };
1331 
1332         self.emit(Inst::External { inst });
1333     }
1334 
1335     /// Compares values in src1 and src2 and sets ZF, PF, and CF flags in EFLAGS
1336     /// register.
ucomis(&mut self, src1: Reg, src2: Reg, size: OperandSize)1337     pub fn ucomis(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1338         let inst = match size {
1339             OperandSize::S32 => asm::inst::ucomiss_a::new(src1, src2).into(),
1340             OperandSize::S64 => asm::inst::ucomisd_a::new(src1, src2).into(),
1341             OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1342         };
1343         self.emit(Inst::External { inst });
1344     }
1345 
popcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1346     pub fn popcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1347         assert!(
1348             self.isa_flags.has_popcnt() && self.isa_flags.has_sse42(),
1349             "Requires has_popcnt and has_sse42 flags"
1350         );
1351         let dst = WritableGpr::from_reg(dst.to_reg().into());
1352         let inst = match size {
1353             OperandSize::S16 => asm::inst::popcntw_rm::new(dst, src).into(),
1354             OperandSize::S32 => asm::inst::popcntl_rm::new(dst, src).into(),
1355             OperandSize::S64 => asm::inst::popcntq_rm::new(dst, src).into(),
1356             OperandSize::S8 | OperandSize::S128 => unreachable!(),
1357         };
1358         self.emit(Inst::External { inst });
1359     }
1360 
1361     /// Emit a test instruction with two register operands.
test_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize)1362     pub fn test_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1363         let inst = match size {
1364             OperandSize::S8 => asm::inst::testb_mr::new(src1, src2).into(),
1365             OperandSize::S16 => asm::inst::testw_mr::new(src1, src2).into(),
1366             OperandSize::S32 => asm::inst::testl_mr::new(src1, src2).into(),
1367             OperandSize::S64 => asm::inst::testq_mr::new(src1, src2).into(),
1368             OperandSize::S128 => unimplemented!(),
1369         };
1370 
1371         self.emit(Inst::External { inst });
1372     }
1373 
1374     /// Set value in dst to `0` or `1` based on flags in status register and
1375     /// [`CmpKind`].
setcc(&mut self, kind: IntCmpKind, dst: WritableReg)1376     pub fn setcc(&mut self, kind: IntCmpKind, dst: WritableReg) {
1377         self.setcc_impl(kind.into(), dst);
1378     }
1379 
1380     /// Set value in dst to `1` if parity flag in status register is set, `0`
1381     /// otherwise.
setp(&mut self, dst: WritableReg)1382     pub fn setp(&mut self, dst: WritableReg) {
1383         self.setcc_impl(CC::P, dst);
1384     }
1385 
1386     /// Set value in dst to `1` if parity flag in status register is not set,
1387     /// `0` otherwise.
setnp(&mut self, dst: WritableReg)1388     pub fn setnp(&mut self, dst: WritableReg) {
1389         self.setcc_impl(CC::NP, dst);
1390     }
1391 
setcc_impl(&mut self, cc: CC, dst: WritableReg)1392     fn setcc_impl(&mut self, cc: CC, dst: WritableReg) {
1393         // Clear the dst register or bits 1 to 31 may be incorrectly set.
1394         // Don't use xor since it updates the status register.
1395         let dst: WritableGpr = dst.map(Into::into);
1396         let inst = asm::inst::movl_oi::new(dst, 0).into();
1397         self.emit(Inst::External { inst });
1398 
1399         // Copy correct bit from status register into dst register.
1400         //
1401         // Note that some of these mnemonics don't match exactly and that's
1402         // intentional as there are multiple mnemonics for the same encoding in
1403         // some cases and the assembler picked ones that match Capstone rather
1404         // than Cranelift.
1405         let inst = match cc {
1406             CC::O => asm::inst::seto_m::new(dst).into(),
1407             CC::NO => asm::inst::setno_m::new(dst).into(),
1408             CC::B => asm::inst::setb_m::new(dst).into(),
1409             CC::NB => asm::inst::setae_m::new(dst).into(), //  nb == ae
1410             CC::Z => asm::inst::sete_m::new(dst).into(),   //   z ==  e
1411             CC::NZ => asm::inst::setne_m::new(dst).into(), //  nz == ne
1412             CC::BE => asm::inst::setbe_m::new(dst).into(),
1413             CC::NBE => asm::inst::seta_m::new(dst).into(), // nbe ==  a
1414             CC::S => asm::inst::sets_m::new(dst).into(),
1415             CC::NS => asm::inst::setns_m::new(dst).into(),
1416             CC::L => asm::inst::setl_m::new(dst).into(),
1417             CC::NL => asm::inst::setge_m::new(dst).into(), //  nl == ge
1418             CC::LE => asm::inst::setle_m::new(dst).into(),
1419             CC::NLE => asm::inst::setg_m::new(dst).into(), // nle ==  g
1420             CC::P => asm::inst::setp_m::new(dst).into(),
1421             CC::NP => asm::inst::setnp_m::new(dst).into(),
1422         };
1423         self.emit(Inst::External { inst });
1424     }
1425 
1426     /// Store the count of leading zeroes in src in dst.
1427     /// Requires `has_lzcnt` flag.
lzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1428     pub fn lzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1429         assert!(self.isa_flags.has_lzcnt(), "Requires has_lzcnt flag");
1430         let dst = WritableGpr::from_reg(dst.to_reg().into());
1431         let inst = match size {
1432             OperandSize::S16 => asm::inst::lzcntw_rm::new(dst, src).into(),
1433             OperandSize::S32 => asm::inst::lzcntl_rm::new(dst, src).into(),
1434             OperandSize::S64 => asm::inst::lzcntq_rm::new(dst, src).into(),
1435             OperandSize::S8 | OperandSize::S128 => unreachable!(),
1436         };
1437         self.emit(Inst::External { inst });
1438     }
1439 
1440     /// Store the count of trailing zeroes in src in dst.
1441     /// Requires `has_bmi1` flag.
tzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1442     pub fn tzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1443         assert!(self.isa_flags.has_bmi1(), "Requires has_bmi1 flag");
1444         let dst = WritableGpr::from_reg(dst.to_reg().into());
1445         let inst = match size {
1446             OperandSize::S16 => asm::inst::tzcntw_a::new(dst, src).into(),
1447             OperandSize::S32 => asm::inst::tzcntl_a::new(dst, src).into(),
1448             OperandSize::S64 => asm::inst::tzcntq_a::new(dst, src).into(),
1449             OperandSize::S8 | OperandSize::S128 => unreachable!(),
1450         };
1451         self.emit(Inst::External { inst });
1452     }
1453 
1454     /// Stores position of the most significant bit set in src in dst.
1455     /// Zero flag is set if src is equal to 0.
bsr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1456     pub fn bsr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1457         let dst: WritableGpr = WritableGpr::from_reg(dst.to_reg().into());
1458         let inst = match size {
1459             OperandSize::S16 => asm::inst::bsrw_rm::new(dst, src).into(),
1460             OperandSize::S32 => asm::inst::bsrl_rm::new(dst, src).into(),
1461             OperandSize::S64 => asm::inst::bsrq_rm::new(dst, src).into(),
1462             OperandSize::S8 | OperandSize::S128 => unreachable!(),
1463         };
1464         self.emit(Inst::External { inst });
1465     }
1466 
1467     /// Performs integer negation on `src` and places result in `dst`.
neg(&mut self, read: Reg, write: WritableReg, size: OperandSize)1468     pub fn neg(&mut self, read: Reg, write: WritableReg, size: OperandSize) {
1469         let gpr = PairedGpr {
1470             read: read.into(),
1471             write: WritableGpr::from_reg(write.to_reg().into()),
1472         };
1473         let inst = match size {
1474             OperandSize::S8 => asm::inst::negb_m::new(gpr).into(),
1475             OperandSize::S16 => asm::inst::negw_m::new(gpr).into(),
1476             OperandSize::S32 => asm::inst::negl_m::new(gpr).into(),
1477             OperandSize::S64 => asm::inst::negq_m::new(gpr).into(),
1478             OperandSize::S128 => unreachable!(),
1479         };
1480         self.emit(Inst::External { inst });
1481     }
1482 
1483     /// Stores position of the least significant bit set in src in dst.
1484     /// Zero flag is set if src is equal to 0.
bsf(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1485     pub fn bsf(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1486         let dst: WritableGpr = WritableGpr::from_reg(dst.to_reg().into());
1487         let inst = match size {
1488             OperandSize::S16 => asm::inst::bsfw_rm::new(dst, src).into(),
1489             OperandSize::S32 => asm::inst::bsfl_rm::new(dst, src).into(),
1490             OperandSize::S64 => asm::inst::bsfq_rm::new(dst, src).into(),
1491             OperandSize::S8 | OperandSize::S128 => unreachable!(),
1492         };
1493         self.emit(Inst::External { inst });
1494     }
1495 
1496     /// Performs float addition on src and dst and places result in dst.
xmm_add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1497     pub fn xmm_add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1498         let dst = pair_xmm(dst);
1499         let inst = match size {
1500             OperandSize::S32 => asm::inst::addss_a::new(dst, src).into(),
1501             OperandSize::S64 => asm::inst::addsd_a::new(dst, src).into(),
1502             OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1503         };
1504         self.emit(Inst::External { inst });
1505     }
1506 
1507     /// Performs float subtraction on src and dst and places result in dst.
xmm_sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1508     pub fn xmm_sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1509         let dst = pair_xmm(dst);
1510         let inst = match size {
1511             OperandSize::S32 => asm::inst::subss_a::new(dst, src).into(),
1512             OperandSize::S64 => asm::inst::subsd_a::new(dst, src).into(),
1513             OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1514         };
1515         self.emit(Inst::External { inst });
1516     }
1517 
1518     /// Performs float multiplication on src and dst and places result in dst.
xmm_mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1519     pub fn xmm_mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1520         use OperandSize::*;
1521         let dst = pair_xmm(dst);
1522         let inst = match size {
1523             S32 => asm::inst::mulss_a::new(dst, src).into(),
1524             S64 => asm::inst::mulsd_a::new(dst, src).into(),
1525             S8 | S16 | S128 => unreachable!(),
1526         };
1527         self.emit(Inst::External { inst });
1528     }
1529 
1530     /// Performs float division on src and dst and places result in dst.
xmm_div_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1531     pub fn xmm_div_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1532         let dst = pair_xmm(dst);
1533         let inst = match size {
1534             OperandSize::S32 => asm::inst::divss_a::new(dst, src).into(),
1535             OperandSize::S64 => asm::inst::divsd_a::new(dst, src).into(),
1536             OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1537         };
1538         self.emit(Inst::External { inst });
1539     }
1540 
1541     /// Minimum for src and dst XMM registers with results put in dst.
xmm_min_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1542     pub fn xmm_min_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1543         self.emit(Inst::XmmMinMaxSeq {
1544             size: size.into(),
1545             is_min: true,
1546             lhs: src.into(),
1547             rhs: dst.to_reg().into(),
1548             dst: dst.map(Into::into),
1549         });
1550     }
1551 
1552     /// Maximum for src and dst XMM registers with results put in dst.
xmm_max_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1553     pub fn xmm_max_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1554         self.emit(Inst::XmmMinMaxSeq {
1555             size: size.into(),
1556             is_min: false,
1557             lhs: src.into(),
1558             rhs: dst.to_reg().into(),
1559             dst: dst.map(Into::into),
1560         });
1561     }
1562 
1563     /// Perform rounding operation on float register src and place results in
1564     /// float register dst.
xmm_rounds_rr( &mut self, src: Reg, dst: WritableReg, mode: RoundingMode, size: OperandSize, )1565     pub fn xmm_rounds_rr(
1566         &mut self,
1567         src: Reg,
1568         dst: WritableReg,
1569         mode: RoundingMode,
1570         size: OperandSize,
1571     ) {
1572         let dst = dst.map(|r| r.into());
1573 
1574         let imm: u8 = match mode {
1575             RoundingMode::Nearest => 0x00,
1576             RoundingMode::Down => 0x01,
1577             RoundingMode::Up => 0x02,
1578             RoundingMode::Zero => 0x03,
1579         };
1580 
1581         let inst = match size {
1582             OperandSize::S32 => asm::inst::roundss_rmi::new(dst, src, imm).into(),
1583             OperandSize::S64 => asm::inst::roundsd_rmi::new(dst, src, imm).into(),
1584             OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1585         };
1586 
1587         self.emit(Inst::External { inst });
1588     }
1589 
sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1590     pub fn sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1591         use OperandSize::*;
1592         let dst = pair_xmm(dst);
1593         let inst = match size {
1594             S32 => asm::inst::sqrtss_a::new(dst, src).into(),
1595             S64 => asm::inst::sqrtsd_a::new(dst, src).into(),
1596             S8 | S16 | S128 => unimplemented!(),
1597         };
1598         self.emit(Inst::External { inst });
1599     }
1600 
1601     /// Emit a call to an unknown location through a register.
call_with_reg(&mut self, cc: CallingConvention, callee: Reg)1602     pub fn call_with_reg(&mut self, cc: CallingConvention, callee: Reg) {
1603         self.emit(Inst::CallUnknown {
1604             info: Box::new(CallInfo::empty(RegMem::reg(callee.into()), cc.into())),
1605         });
1606     }
1607 
1608     /// Emit a call to a locally defined function through an index.
call_with_name(&mut self, cc: CallingConvention, name: UserExternalNameRef)1609     pub fn call_with_name(&mut self, cc: CallingConvention, name: UserExternalNameRef) {
1610         self.emit(Inst::CallKnown {
1611             info: Box::new(CallInfo::empty(ExternalName::user(name), cc.into())),
1612         });
1613     }
1614 
1615     /// Emits a conditional jump to the given label.
jmp_if(&mut self, cc: impl Into<CC>, taken: MachLabel)1616     pub fn jmp_if(&mut self, cc: impl Into<CC>, taken: MachLabel) {
1617         self.emit(Inst::WinchJmpIf {
1618             cc: cc.into(),
1619             taken,
1620         });
1621     }
1622 
1623     /// Performs an unconditional jump to the given label.
jmp(&mut self, target: MachLabel)1624     pub fn jmp(&mut self, target: MachLabel) {
1625         self.emit(Inst::JmpKnown { dst: target });
1626     }
1627 
1628     /// Emits a jump table sequence.
jmp_table( &mut self, targets: SmallVec<[MachLabel; 4]>, default: MachLabel, index: Reg, tmp1: Reg, tmp2: Reg, )1629     pub fn jmp_table(
1630         &mut self,
1631         targets: SmallVec<[MachLabel; 4]>,
1632         default: MachLabel,
1633         index: Reg,
1634         tmp1: Reg,
1635         tmp2: Reg,
1636     ) {
1637         self.emit(Inst::JmpTableSeq {
1638             idx: index.into(),
1639             tmp1: Writable::from_reg(tmp1.into()),
1640             tmp2: Writable::from_reg(tmp2.into()),
1641             default_target: default,
1642             targets: Box::new(targets.to_vec()),
1643         })
1644     }
1645 
1646     /// Emit a trap instruction.
trap(&mut self, code: TrapCode)1647     pub fn trap(&mut self, code: TrapCode) {
1648         let inst = asm::inst::ud2_zo::new(code).into();
1649         self.emit(Inst::External { inst });
1650     }
1651 
1652     /// Conditional trap.
trapif(&mut self, cc: impl Into<CC>, trap_code: TrapCode)1653     pub fn trapif(&mut self, cc: impl Into<CC>, trap_code: TrapCode) {
1654         self.emit(Inst::TrapIf {
1655             cc: cc.into(),
1656             trap_code,
1657         });
1658     }
1659 
1660     /// Load effective address.
lea(&mut self, addr: &Address, dst: WritableReg, size: OperandSize)1661     pub fn lea(&mut self, addr: &Address, dst: WritableReg, size: OperandSize) {
1662         let addr = Self::to_synthetic_amode(addr, MemFlags::trusted());
1663         let dst: WritableGpr = dst.map(Into::into);
1664         let inst = match size {
1665             OperandSize::S16 => asm::inst::leaw_rm::new(dst, addr).into(),
1666             OperandSize::S32 => asm::inst::leal_rm::new(dst, addr).into(),
1667             OperandSize::S64 => asm::inst::leaq_rm::new(dst, addr).into(),
1668             OperandSize::S8 | OperandSize::S128 => unimplemented!(),
1669         };
1670         self.emit(Inst::External { inst });
1671     }
1672 
adc_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1673     pub fn adc_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1674         let dst = pair_gpr(dst);
1675         let inst = match size {
1676             OperandSize::S8 => asm::inst::adcb_rm::new(dst, src).into(),
1677             OperandSize::S16 => asm::inst::adcw_rm::new(dst, src).into(),
1678             OperandSize::S32 => asm::inst::adcl_rm::new(dst, src).into(),
1679             OperandSize::S64 => asm::inst::adcq_rm::new(dst, src).into(),
1680             OperandSize::S128 => unimplemented!(),
1681         };
1682         self.emit(Inst::External { inst });
1683     }
1684 
sbb_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1685     pub fn sbb_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1686         let dst = pair_gpr(dst);
1687         let inst = match size {
1688             OperandSize::S8 => asm::inst::sbbb_rm::new(dst, src).into(),
1689             OperandSize::S16 => asm::inst::sbbw_rm::new(dst, src).into(),
1690             OperandSize::S32 => asm::inst::sbbl_rm::new(dst, src).into(),
1691             OperandSize::S64 => asm::inst::sbbq_rm::new(dst, src).into(),
1692             OperandSize::S128 => unimplemented!(),
1693         };
1694         self.emit(Inst::External { inst });
1695     }
1696 
mul_wide( &mut self, dst_lo: WritableReg, dst_hi: WritableReg, lhs: Reg, rhs: Reg, kind: MulWideKind, size: OperandSize, )1697     pub fn mul_wide(
1698         &mut self,
1699         dst_lo: WritableReg,
1700         dst_hi: WritableReg,
1701         lhs: Reg,
1702         rhs: Reg,
1703         kind: MulWideKind,
1704         size: OperandSize,
1705     ) {
1706         use MulWideKind::*;
1707         use OperandSize::*;
1708         let rax = asm::Fixed(PairedGpr {
1709             read: lhs.into(),
1710             write: WritableGpr::from_reg(dst_lo.to_reg().into()),
1711         });
1712         let rdx = asm::Fixed(dst_hi.to_reg().into());
1713         if size == S8 {
1714             // For `mulb` and `imulb`, both the high and low bits are written to
1715             // RAX.
1716             assert_eq!(dst_lo, dst_hi);
1717         }
1718         let inst = match (size, kind) {
1719             (S8, Unsigned) => asm::inst::mulb_m::new(rax, rhs).into(),
1720             (S8, Signed) => asm::inst::imulb_m::new(rax, rhs).into(),
1721             (S16, Unsigned) => asm::inst::mulw_m::new(rax, rdx, rhs).into(),
1722             (S16, Signed) => asm::inst::imulw_m::new(rax, rdx, rhs).into(),
1723             (S32, Unsigned) => asm::inst::mull_m::new(rax, rdx, rhs).into(),
1724             (S32, Signed) => asm::inst::imull_m::new(rax, rdx, rhs).into(),
1725             (S64, Unsigned) => asm::inst::mulq_m::new(rax, rdx, rhs).into(),
1726             (S64, Signed) => asm::inst::imulq_m::new(rax, rdx, rhs).into(),
1727             (S128, _) => unimplemented!(),
1728         };
1729         self.emit(Inst::External { inst });
1730     }
1731 
1732     /// Shuffles bytes in `src` according to contents of `mask` and puts
1733     /// result in `dst`.
xmm_vpshufb_rrm(&mut self, dst: WritableReg, src: Reg, mask: &Address)1734     pub fn xmm_vpshufb_rrm(&mut self, dst: WritableReg, src: Reg, mask: &Address) {
1735         let dst: WritableXmm = dst.map(|r| r.into());
1736         let mask = Self::to_synthetic_amode(mask, MemFlags::trusted());
1737         let inst = asm::inst::vpshufb_b::new(dst, src, mask).into();
1738         self.emit(Inst::External { inst });
1739     }
1740 
1741     /// Shuffles bytes in `src` according to contents of `mask` and puts
1742     /// result in `dst`.
xmm_vpshufb_rrr(&mut self, dst: WritableReg, src: Reg, mask: Reg)1743     pub fn xmm_vpshufb_rrr(&mut self, dst: WritableReg, src: Reg, mask: Reg) {
1744         let dst: WritableXmm = dst.map(|r| r.into());
1745         let inst = asm::inst::vpshufb_b::new(dst, src, mask).into();
1746         self.emit(Inst::External { inst });
1747     }
1748 
1749     /// Add unsigned integers with unsigned saturation.
1750     ///
1751     /// Adds the src operands but when an individual byte result is larger than
1752     /// an unsigned byte integer, 0xFF is written instead.
xmm_vpaddus_rrm( &mut self, dst: WritableReg, src1: Reg, src2: &Address, size: OperandSize, )1753     pub fn xmm_vpaddus_rrm(
1754         &mut self,
1755         dst: WritableReg,
1756         src1: Reg,
1757         src2: &Address,
1758         size: OperandSize,
1759     ) {
1760         let dst: WritableXmm = dst.map(|r| r.into());
1761         let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());
1762         let inst = match size {
1763             OperandSize::S8 => asm::inst::vpaddusb_b::new(dst, src1, src2).into(),
1764             OperandSize::S32 => asm::inst::vpaddusw_b::new(dst, src1, src2).into(),
1765             _ => unimplemented!(),
1766         };
1767         self.emit(Inst::External { inst });
1768     }
1769 
1770     /// Add unsigned integers with unsigned saturation.
1771     ///
1772     /// Adds the src operands but when an individual byte result is larger than
1773     /// an unsigned byte integer, 0xFF is written instead.
xmm_vpaddus_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize)1774     pub fn xmm_vpaddus_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
1775         let dst: WritableXmm = dst.map(|r| r.into());
1776         let inst = match size {
1777             OperandSize::S8 => asm::inst::vpaddusb_b::new(dst, src1, src2).into(),
1778             OperandSize::S16 => asm::inst::vpaddusw_b::new(dst, src1, src2).into(),
1779             _ => unimplemented!(),
1780         };
1781         self.emit(Inst::External { inst });
1782     }
1783 
1784     /// Add signed integers.
xmm_vpadds_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize)1785     pub fn xmm_vpadds_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
1786         let dst: WritableXmm = dst.map(|r| r.into());
1787         let inst = match size {
1788             OperandSize::S8 => asm::inst::vpaddsb_b::new(dst, src1, src2).into(),
1789             OperandSize::S16 => asm::inst::vpaddsw_b::new(dst, src1, src2).into(),
1790             _ => unimplemented!(),
1791         };
1792         self.emit(Inst::External { inst });
1793     }
1794 
xmm_vpadd_rmr( &mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize, )1795     pub fn xmm_vpadd_rmr(
1796         &mut self,
1797         src1: Reg,
1798         src2: &Address,
1799         dst: WritableReg,
1800         size: OperandSize,
1801     ) {
1802         let dst: WritableXmm = dst.map(|r| r.into());
1803         let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
1804         let inst = match size {
1805             OperandSize::S8 => asm::inst::vpaddb_b::new(dst, src1, address).into(),
1806             OperandSize::S16 => asm::inst::vpaddw_b::new(dst, src1, address).into(),
1807             OperandSize::S32 => asm::inst::vpaddd_b::new(dst, src1, address).into(),
1808             _ => unimplemented!(),
1809         };
1810         self.emit(Inst::External { inst });
1811     }
1812 
1813     /// Adds vectors of integers in `src1` and `src2` and puts the results in
1814     /// `dst`.
xmm_vpadd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)1815     pub fn xmm_vpadd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
1816         let dst: WritableXmm = dst.map(|r| r.into());
1817         let inst = match size {
1818             OperandSize::S8 => asm::inst::vpaddb_b::new(dst, src1, src2).into(),
1819             OperandSize::S16 => asm::inst::vpaddw_b::new(dst, src1, src2).into(),
1820             OperandSize::S32 => asm::inst::vpaddd_b::new(dst, src1, src2).into(),
1821             OperandSize::S64 => asm::inst::vpaddq_b::new(dst, src1, src2).into(),
1822             _ => unimplemented!(),
1823         };
1824         self.emit(Inst::External { inst });
1825     }
1826 
mfence(&mut self)1827     pub fn mfence(&mut self) {
1828         self.emit(Inst::External {
1829             inst: asm::inst::mfence_zo::new().into(),
1830         });
1831     }
1832 
1833     /// Extract a value from `src` into `addr` determined by `lane`.
xmm_vpextr_rm( &mut self, addr: &Address, src: Reg, lane: u8, size: OperandSize, flags: MemFlags, )1834     pub(crate) fn xmm_vpextr_rm(
1835         &mut self,
1836         addr: &Address,
1837         src: Reg,
1838         lane: u8,
1839         size: OperandSize,
1840         flags: MemFlags,
1841     ) {
1842         assert!(addr.is_offset());
1843         let dst = Self::to_synthetic_amode(addr, flags);
1844         let inst = match size {
1845             OperandSize::S8 => asm::inst::vpextrb_a::new(dst, src, lane).into(),
1846             OperandSize::S16 => asm::inst::vpextrw_b::new(dst, src, lane).into(),
1847             OperandSize::S32 => asm::inst::vpextrd_a::new(dst, src, lane).into(),
1848             OperandSize::S64 => asm::inst::vpextrq_a::new(dst, src, lane).into(),
1849             _ => unimplemented!(),
1850         };
1851         self.emit(Inst::External { inst });
1852     }
1853 
1854     /// Extract a value from `src` into `dst` (zero extended) determined by `lane`.
xmm_vpextr_rr(&mut self, dst: WritableReg, src: Reg, lane: u8, size: OperandSize)1855     pub fn xmm_vpextr_rr(&mut self, dst: WritableReg, src: Reg, lane: u8, size: OperandSize) {
1856         let dst: WritableGpr = dst.map(|r| r.into());
1857         let inst = match size {
1858             OperandSize::S8 => asm::inst::vpextrb_a::new(dst, src, lane).into(),
1859             OperandSize::S16 => asm::inst::vpextrw_a::new(dst, src, lane).into(),
1860             OperandSize::S32 => asm::inst::vpextrd_a::new(dst, src, lane).into(),
1861             OperandSize::S64 => asm::inst::vpextrq_a::new(dst, src, lane).into(),
1862             _ => unimplemented!(),
1863         };
1864         self.emit(Inst::External { inst });
1865     }
1866 
1867     /// Copy value from `src2`, merge into `src1`, and put result in `dst` at
1868     /// the location specified in `count`.
xmm_vpinsr_rrm( &mut self, dst: WritableReg, src1: Reg, src2: &Address, count: u8, size: OperandSize, )1869     pub fn xmm_vpinsr_rrm(
1870         &mut self,
1871         dst: WritableReg,
1872         src1: Reg,
1873         src2: &Address,
1874         count: u8,
1875         size: OperandSize,
1876     ) {
1877         let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());
1878         let dst: WritableXmm = dst.map(|r| r.into());
1879 
1880         let inst = match size {
1881             OperandSize::S8 => asm::inst::vpinsrb_b::new(dst, src1, src2, count).into(),
1882             OperandSize::S16 => asm::inst::vpinsrw_b::new(dst, src1, src2, count).into(),
1883             OperandSize::S32 => asm::inst::vpinsrd_b::new(dst, src1, src2, count).into(),
1884             OperandSize::S64 => asm::inst::vpinsrq_b::new(dst, src1, src2, count).into(),
1885             OperandSize::S128 => unreachable!(),
1886         };
1887         self.emit(Inst::External { inst });
1888     }
1889 
1890     /// Copy value from `src2`, merge into `src1`, and put result in `dst` at
1891     /// the location specified in `count`.
xmm_vpinsr_rrr( &mut self, dst: WritableReg, src1: Reg, src2: Reg, count: u8, size: OperandSize, )1892     pub fn xmm_vpinsr_rrr(
1893         &mut self,
1894         dst: WritableReg,
1895         src1: Reg,
1896         src2: Reg,
1897         count: u8,
1898         size: OperandSize,
1899     ) {
1900         let dst: WritableXmm = dst.map(|r| r.into());
1901         let inst = match size {
1902             OperandSize::S8 => asm::inst::vpinsrb_b::new(dst, src1, src2, count).into(),
1903             OperandSize::S16 => asm::inst::vpinsrw_b::new(dst, src1, src2, count).into(),
1904             OperandSize::S32 => asm::inst::vpinsrd_b::new(dst, src1, src2, count).into(),
1905             OperandSize::S64 => asm::inst::vpinsrq_b::new(dst, src1, src2, count).into(),
1906             OperandSize::S128 => unreachable!(),
1907         };
1908         self.emit(Inst::External { inst });
1909     }
1910 
1911     /// Copy a 32-bit float in `src2`, merge into `src1`, and put result in `dst`.
xmm_vinsertps_rrm(&mut self, dst: WritableReg, src1: Reg, address: &Address, imm: u8)1912     pub fn xmm_vinsertps_rrm(&mut self, dst: WritableReg, src1: Reg, address: &Address, imm: u8) {
1913         let dst: WritableXmm = dst.map(|r| r.into());
1914         let address = Self::to_synthetic_amode(address, MemFlags::trusted());
1915         let inst = asm::inst::vinsertps_b::new(dst, src1, address, imm).into();
1916         self.emit(Inst::External { inst });
1917     }
1918 
1919     /// Copy a 32-bit float in `src2`, merge into `src1`, and put result in `dst`.
xmm_vinsertps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, imm: u8)1920     pub fn xmm_vinsertps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, imm: u8) {
1921         let dst: WritableXmm = dst.map(|r| r.into());
1922         let inst = asm::inst::vinsertps_b::new(dst, src1, src2, imm).into();
1923         self.emit(Inst::External { inst });
1924     }
1925 
1926     /// Moves lower 64-bit float from `src2` into lower 64-bits of `dst` and the
1927     /// upper 64-bits in `src1` into the upper 64-bits of `dst`.
xmm_vmovsd_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg)1928     pub fn xmm_vmovsd_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
1929         let dst: WritableXmm = dst.map(|r| r.into());
1930         let inst = asm::inst::vmovsd_b::new(dst, src1, src2).into();
1931         self.emit(Inst::External { inst });
1932     }
1933 
1934     /// Moves 64-bit float from `src` into lower 64-bits of `dst`.
1935     /// Zeroes out the upper 64 bits of `dst`.
xmm_vmovsd_rm(&mut self, dst: WritableReg, src: &Address)1936     pub fn xmm_vmovsd_rm(&mut self, dst: WritableReg, src: &Address) {
1937         let src = Self::to_synthetic_amode(src, MemFlags::trusted());
1938         let dst: WritableXmm = dst.map(|r| r.into());
1939         let inst = asm::inst::vmovsd_d::new(dst, src).into();
1940         self.emit(Inst::External { inst });
1941     }
1942 
1943     /// Moves two 32-bit floats from `src2` to the upper 64-bits of `dst`.
1944     /// Copies two 32-bit floats from the lower 64-bits of `src1` to lower
1945     /// 64-bits of `dst`.
xmm_vmovlhps_rrm(&mut self, dst: WritableReg, src1: Reg, src2: &Address)1946     pub fn xmm_vmovlhps_rrm(&mut self, dst: WritableReg, src1: Reg, src2: &Address) {
1947         let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());
1948         let dst: WritableXmm = dst.map(|r| r.into());
1949         let inst = asm::inst::vmovhps_b::new(dst, src1, src2).into();
1950         self.emit(Inst::External { inst });
1951     }
1952 
1953     /// Moves two 32-bit floats from the lower 64-bits of `src2` to the upper
1954     /// 64-bits of `dst`. Copies two 32-bit floats from the lower 64-bits of
1955     /// `src1` to lower 64-bits of `dst`.
xmm_vmovlhps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg)1956     pub fn xmm_vmovlhps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
1957         let dst: WritableXmm = dst.map(|r| r.into());
1958         let inst = asm::inst::vmovlhps_rvm::new(dst, src1, src2).into();
1959         self.emit(Inst::External { inst });
1960     }
1961 
1962     /// Move unaligned packed integer values from address `src` to `dst`.
xmm_vmovdqu_mr(&mut self, src: &Address, dst: WritableReg, flags: MemFlags)1963     pub fn xmm_vmovdqu_mr(&mut self, src: &Address, dst: WritableReg, flags: MemFlags) {
1964         let src = Self::to_synthetic_amode(src, flags);
1965         let dst: WritableXmm = dst.map(|r| r.into());
1966         let inst = asm::inst::vmovdqu_a::new(dst, src).into();
1967         self.emit(Inst::External { inst });
1968     }
1969 
1970     /// Move integer from `src` to xmm register `dst` using an AVX instruction.
avx_gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1971     pub fn avx_gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1972         let dst: WritableXmm = dst.map(|r| r.into());
1973         let inst = match size {
1974             OperandSize::S32 => asm::inst::vmovd_a::new(dst, src).into(),
1975             OperandSize::S64 => asm::inst::vmovq_a::new(dst, src).into(),
1976             _ => unreachable!(),
1977         };
1978 
1979         self.emit(Inst::External { inst });
1980     }
1981 
xmm_vptest(&mut self, src1: Reg, src2: Reg)1982     pub fn xmm_vptest(&mut self, src1: Reg, src2: Reg) {
1983         let inst = asm::inst::vptest_rm::new(src1, src2).into();
1984         self.emit(Inst::External { inst });
1985     }
1986 
1987     /// Converts vector of integers into vector of floating values.
xmm_vcvt_rr(&mut self, src: Reg, dst: WritableReg, kind: VcvtKind)1988     pub fn xmm_vcvt_rr(&mut self, src: Reg, dst: WritableReg, kind: VcvtKind) {
1989         let dst: WritableXmm = dst.map(|x| x.into());
1990         let inst = match kind {
1991             VcvtKind::I32ToF32 => asm::inst::vcvtdq2ps_a::new(dst, src).into(),
1992             VcvtKind::I32ToF64 => asm::inst::vcvtdq2pd_a::new(dst, src).into(),
1993             VcvtKind::F64ToF32 => asm::inst::vcvtpd2ps_a::new(dst, src).into(),
1994             VcvtKind::F64ToI32 => asm::inst::vcvttpd2dq_a::new(dst, src).into(),
1995             VcvtKind::F32ToF64 => asm::inst::vcvtps2pd_a::new(dst, src).into(),
1996             VcvtKind::F32ToI32 => asm::inst::vcvttps2dq_a::new(dst, src).into(),
1997         };
1998         self.emit(Inst::External { inst });
1999     }
2000 
2001     /// Subtract floats in vector `src1` to floats in vector `src2`.
xmm_vsubp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2002     pub fn xmm_vsubp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2003         let dst: WritableXmm = dst.map(|r| r.into());
2004         let inst = match size {
2005             OperandSize::S32 => asm::inst::vsubps_b::new(dst, src1, src2).into(),
2006             OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, src2).into(),
2007             _ => unimplemented!(),
2008         };
2009         self.emit(Inst::External { inst });
2010     }
2011 
2012     /// Subtract integers in vector `src1` from integers in vector `src2`.
xmm_vpsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2013     pub fn xmm_vpsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2014         let dst: WritableXmm = dst.map(|r| r.into());
2015         let inst = match size {
2016             OperandSize::S8 => asm::inst::vpsubb_b::new(dst, src1, src2).into(),
2017             OperandSize::S16 => asm::inst::vpsubw_b::new(dst, src1, src2).into(),
2018             OperandSize::S32 => asm::inst::vpsubd_b::new(dst, src1, src2).into(),
2019             OperandSize::S64 => asm::inst::vpsubq_b::new(dst, src1, src2).into(),
2020             _ => unimplemented!(),
2021         };
2022         self.emit(Inst::External { inst });
2023     }
2024 
2025     /// Subtract unsigned integers with unsigned saturation.
xmm_vpsubus_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize)2026     pub fn xmm_vpsubus_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
2027         let dst: WritableXmm = dst.map(|r| r.into());
2028         let inst = match size {
2029             OperandSize::S8 => asm::inst::vpsubusb_b::new(dst, src1, src2).into(),
2030             OperandSize::S16 => asm::inst::vpsubusw_b::new(dst, src1, src2).into(),
2031             _ => unimplemented!(),
2032         };
2033         self.emit(Inst::External { inst });
2034     }
2035 
2036     /// Subtract signed integers with signed saturation.
xmm_vpsubs_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize)2037     pub fn xmm_vpsubs_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
2038         let dst: WritableXmm = dst.map(|r| r.into());
2039         let inst = match size {
2040             OperandSize::S8 => asm::inst::vpsubsb_b::new(dst, src1, src2).into(),
2041             OperandSize::S16 => asm::inst::vpsubsw_b::new(dst, src1, src2).into(),
2042             _ => unimplemented!(),
2043         };
2044         self.emit(Inst::External { inst });
2045     }
2046 
2047     /// Add floats in vector `src1` to floats in vector `src2`.
xmm_vaddp_rrm( &mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize, )2048     pub fn xmm_vaddp_rrm(
2049         &mut self,
2050         src1: Reg,
2051         src2: &Address,
2052         dst: WritableReg,
2053         size: OperandSize,
2054     ) {
2055         let dst: WritableXmm = dst.map(|r| r.into());
2056         let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2057         let inst = match size {
2058             OperandSize::S32 => asm::inst::vaddps_b::new(dst, src1, address).into(),
2059             OperandSize::S64 => asm::inst::vaddpd_b::new(dst, src1, address).into(),
2060             _ => unimplemented!(),
2061         };
2062         self.emit(Inst::External { inst });
2063     }
2064 
2065     /// Add floats in vector `src1` to floats in vector `src2`.
xmm_vaddp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2066     pub fn xmm_vaddp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2067         let dst: WritableXmm = dst.map(|r| r.into());
2068         let inst = match size {
2069             OperandSize::S32 => asm::inst::vaddps_b::new(dst, src1, src2).into(),
2070             OperandSize::S64 => asm::inst::vaddpd_b::new(dst, src1, src2).into(),
2071             _ => unimplemented!(),
2072         };
2073         self.emit(Inst::External { inst });
2074     }
2075 
2076     /// Compare vector register `lhs` with a vector of integers in `rhs` for
2077     /// equality between packed integers and write the resulting vector into
2078     /// `dst`.
xmm_vpcmpeq_rrm( &mut self, dst: WritableReg, lhs: Reg, address: &Address, size: OperandSize, )2079     pub fn xmm_vpcmpeq_rrm(
2080         &mut self,
2081         dst: WritableReg,
2082         lhs: Reg,
2083         address: &Address,
2084         size: OperandSize,
2085     ) {
2086         let dst: WritableXmm = dst.map(|r| r.into());
2087         let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2088         let inst = match size {
2089             OperandSize::S8 => asm::inst::vpcmpeqb_b::new(dst, lhs, address).into(),
2090             OperandSize::S16 => asm::inst::vpcmpeqw_b::new(dst, lhs, address).into(),
2091             OperandSize::S32 => asm::inst::vpcmpeqd_b::new(dst, lhs, address).into(),
2092             OperandSize::S64 => asm::inst::vpcmpeqq_b::new(dst, lhs, address).into(),
2093             _ => unimplemented!(),
2094         };
2095         self.emit(Inst::External { inst });
2096     }
2097 
2098     /// Compare vector registers `lhs` and `rhs` for equality between packed
2099     /// integers and write the resulting vector into `dst`.
xmm_vpcmpeq_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize)2100     pub fn xmm_vpcmpeq_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2101         let dst: WritableXmm = dst.map(|r| r.into());
2102         let inst = match size {
2103             OperandSize::S8 => asm::inst::vpcmpeqb_b::new(dst, lhs, rhs).into(),
2104             OperandSize::S16 => asm::inst::vpcmpeqw_b::new(dst, lhs, rhs).into(),
2105             OperandSize::S32 => asm::inst::vpcmpeqd_b::new(dst, lhs, rhs).into(),
2106             OperandSize::S64 => asm::inst::vpcmpeqq_b::new(dst, lhs, rhs).into(),
2107             _ => unimplemented!(),
2108         };
2109         self.emit(Inst::External { inst });
2110     }
2111 
2112     /// Performs a greater than comparison with vectors of signed integers in
2113     /// `lhs` and `rhs` and puts the results in `dst`.
xmm_vpcmpgt_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize)2114     pub fn xmm_vpcmpgt_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2115         let dst: WritableXmm = dst.map(|r| r.into());
2116         let inst = match size {
2117             OperandSize::S8 => asm::inst::vpcmpgtb_b::new(dst, lhs, rhs).into(),
2118             OperandSize::S16 => asm::inst::vpcmpgtw_b::new(dst, lhs, rhs).into(),
2119             OperandSize::S32 => asm::inst::vpcmpgtd_b::new(dst, lhs, rhs).into(),
2120             OperandSize::S64 => asm::inst::vpcmpgtq_b::new(dst, lhs, rhs).into(),
2121             _ => unimplemented!(),
2122         };
2123         self.emit(Inst::External { inst });
2124     }
2125 
2126     /// Performs a max operation with vectors of signed integers in `lhs` and
2127     /// `rhs` and puts the results in `dst`.
xmm_vpmaxs_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize)2128     pub fn xmm_vpmaxs_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2129         let dst: WritableXmm = dst.map(|r| r.into());
2130         let inst = match size {
2131             OperandSize::S8 => asm::inst::vpmaxsb_b::new(dst, lhs, rhs).into(),
2132             OperandSize::S16 => asm::inst::vpmaxsw_b::new(dst, lhs, rhs).into(),
2133             OperandSize::S32 => asm::inst::vpmaxsd_b::new(dst, lhs, rhs).into(),
2134             _ => unimplemented!(),
2135         };
2136         self.emit(Inst::External { inst });
2137     }
2138 
2139     /// Performs a max operation with vectors of unsigned integers in `lhs` and
2140     /// `rhs` and puts the results in `dst`.
xmm_vpmaxu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize)2141     pub fn xmm_vpmaxu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2142         let dst: WritableXmm = dst.map(|r| r.into());
2143         let inst = match size {
2144             OperandSize::S8 => asm::inst::vpmaxub_b::new(dst, lhs, rhs).into(),
2145             OperandSize::S16 => asm::inst::vpmaxuw_b::new(dst, lhs, rhs).into(),
2146             OperandSize::S32 => asm::inst::vpmaxud_b::new(dst, lhs, rhs).into(),
2147             _ => unimplemented!(),
2148         };
2149         self.emit(Inst::External { inst });
2150     }
2151 
2152     /// Performs a min operation with vectors of signed integers in `lhs` and
2153     /// `rhs` and puts the results in `dst`.
xmm_vpmins_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize)2154     pub fn xmm_vpmins_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2155         let dst: WritableXmm = dst.map(|r| r.into());
2156         let inst = match size {
2157             OperandSize::S8 => asm::inst::vpminsb_b::new(dst, lhs, rhs).into(),
2158             OperandSize::S16 => asm::inst::vpminsw_b::new(dst, lhs, rhs).into(),
2159             OperandSize::S32 => asm::inst::vpminsd_b::new(dst, lhs, rhs).into(),
2160             _ => unimplemented!(),
2161         };
2162         self.emit(Inst::External { inst });
2163     }
2164 
2165     /// Performs a min operation with vectors of unsigned integers in `lhs` and
2166     /// `rhs` and puts the results in `dst`.
xmm_vpminu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize)2167     pub fn xmm_vpminu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2168         let dst: WritableXmm = dst.map(|r| r.into());
2169         let inst = match size {
2170             OperandSize::S8 => asm::inst::vpminub_b::new(dst, lhs, rhs).into(),
2171             OperandSize::S16 => asm::inst::vpminuw_b::new(dst, lhs, rhs).into(),
2172             OperandSize::S32 => asm::inst::vpminud_b::new(dst, lhs, rhs).into(),
2173             _ => unimplemented!(),
2174         };
2175         self.emit(Inst::External { inst });
2176     }
2177 
2178     /// Performs a comparison operation between vectors of floats in `lhs` and
2179     /// `rhs` and puts the results in `dst`.
xmm_vcmpp_rrr( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize, kind: VcmpKind, )2180     pub fn xmm_vcmpp_rrr(
2181         &mut self,
2182         dst: WritableReg,
2183         lhs: Reg,
2184         rhs: Reg,
2185         size: OperandSize,
2186         kind: VcmpKind,
2187     ) {
2188         let dst: WritableXmm = dst.map(|r| r.into());
2189         let imm = match kind {
2190             VcmpKind::Eq => 0,
2191             VcmpKind::Lt => 1,
2192             VcmpKind::Le => 2,
2193             VcmpKind::Unord => 3,
2194             VcmpKind::Ne => 4,
2195         };
2196         let inst = match size {
2197             OperandSize::S32 => asm::inst::vcmpps_b::new(dst, lhs, rhs, imm).into(),
2198             OperandSize::S64 => asm::inst::vcmppd_b::new(dst, lhs, rhs, imm).into(),
2199             _ => unimplemented!(),
2200         };
2201         self.emit(Inst::External { inst });
2202     }
2203 
2204     /// Performs a subtraction on two vectors of floats and puts the results in
2205     /// `dst`.
xmm_vsub_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize)2206     pub fn xmm_vsub_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize) {
2207         let dst: WritableXmm = dst.map(|r| r.into());
2208         let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2209         let inst = match size {
2210             OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, address).into(),
2211             _ => unimplemented!(),
2212         };
2213         self.emit(Inst::External { inst });
2214     }
2215 
2216     /// Performs a subtraction on two vectors of floats and puts the results in
2217     /// `dst`.
xmm_vsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2218     pub fn xmm_vsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2219         let dst: WritableXmm = dst.map(|r| r.into());
2220         let inst = match size {
2221             OperandSize::S32 => asm::inst::vsubps_b::new(dst, src1, src2).into(),
2222             OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, src2).into(),
2223             _ => unimplemented!(),
2224         };
2225         self.emit(Inst::External { inst });
2226     }
2227 
2228     /// Converts a vector of signed integers into a vector of narrower integers
2229     /// using saturation to handle overflow.
xmm_vpackss_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2230     pub fn xmm_vpackss_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2231         let dst: WritableXmm = dst.map(|r| r.into());
2232         let inst = match size {
2233             OperandSize::S8 => asm::inst::vpacksswb_b::new(dst, src1, src2).into(),
2234             OperandSize::S16 => asm::inst::vpackssdw_b::new(dst, src1, src2).into(),
2235             _ => unimplemented!(),
2236         };
2237         self.emit(Inst::External { inst });
2238     }
2239 
2240     /// Converts a vector of unsigned integers into a vector of narrower
2241     /// integers using saturation to handle overflow.
xmm_vpackus_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2242     pub fn xmm_vpackus_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2243         let dst: WritableXmm = dst.map(|r| r.into());
2244         let inst = match size {
2245             OperandSize::S8 => asm::inst::vpackuswb_b::new(dst, src1, src2).into(),
2246             OperandSize::S16 => asm::inst::vpackusdw_b::new(dst, src1, src2).into(),
2247             _ => unimplemented!(),
2248         };
2249         self.emit(Inst::External { inst });
2250     }
2251 
2252     /// Concatenates `src1` and `src2` and shifts right by `imm` and puts
2253     /// result in `dst`.
xmm_vpalignr_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, imm: u8)2254     pub fn xmm_vpalignr_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, imm: u8) {
2255         let dst: WritableXmm = dst.map(|r| r.into());
2256         let inst = asm::inst::vpalignr_b::new(dst, src1, src2, imm).into();
2257         self.emit(Inst::External { inst });
2258     }
2259 
2260     /// Takes the lower lanes of vectors of floats in `src1` and `src2` and
2261     /// interleaves them in `dst`.
xmm_vunpcklp_rrm( &mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize, )2262     pub fn xmm_vunpcklp_rrm(
2263         &mut self,
2264         src1: Reg,
2265         src2: &Address,
2266         dst: WritableReg,
2267         size: OperandSize,
2268     ) {
2269         let dst: WritableXmm = dst.map(|r| r.into());
2270         let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2271         let inst = match size {
2272             OperandSize::S32 => asm::inst::vunpcklps_b::new(dst, src1, address).into(),
2273             _ => unimplemented!(),
2274         };
2275         self.emit(Inst::External { inst });
2276     }
2277 
2278     /// Unpacks and interleaves high order data of floats in `src1` and `src2`
2279     /// and puts the results in `dst`.
xmm_vunpckhp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2280     pub fn xmm_vunpckhp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2281         let dst: WritableXmm = dst.map(|r| r.into());
2282         let inst = match size {
2283             OperandSize::S32 => asm::inst::vunpckhps_b::new(dst, src1, src2).into(),
2284             _ => unimplemented!(),
2285         };
2286         self.emit(Inst::External { inst });
2287     }
2288 
2289     /// Unpacks and interleaves the lower lanes of vectors of integers in `src1`
2290     /// and `src2` and puts the results in `dst`.
xmm_vpunpckl_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2291     pub fn xmm_vpunpckl_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2292         let dst: WritableXmm = dst.map(|r| r.into());
2293         let inst = match size {
2294             OperandSize::S8 => asm::inst::vpunpcklbw_b::new(dst, src1, src2).into(),
2295             OperandSize::S16 => asm::inst::vpunpcklwd_b::new(dst, src1, src2).into(),
2296             _ => unimplemented!(),
2297         };
2298         self.emit(Inst::External { inst });
2299     }
2300 
2301     /// Unpacks and interleaves the higher lanes of vectors of integers in
2302     /// `src1` and `src2` and puts the results in `dst`.
xmm_vpunpckh_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2303     pub fn xmm_vpunpckh_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2304         let dst: WritableXmm = dst.map(|r| r.into());
2305         let inst = match size {
2306             OperandSize::S8 => asm::inst::vpunpckhbw_b::new(dst, src1, src2).into(),
2307             OperandSize::S16 => asm::inst::vpunpckhwd_b::new(dst, src1, src2).into(),
2308             _ => unimplemented!(),
2309         };
2310         self.emit(Inst::External { inst });
2311     }
2312 
vpmullq(&mut self, src1: Reg, src2: Reg, dst: WritableReg)2313     pub(crate) fn vpmullq(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2314         let dst: WritableXmm = dst.map(|r| r.into());
2315         let inst = asm::inst::vpmullq_c::new(dst, src1, src2).into();
2316         self.emit(Inst::External { inst });
2317     }
2318 
2319     /// Creates a mask made up of the most significant bit of each byte of
2320     /// `src` and stores the result in `dst`.
xmm_vpmovmsk_rr( &mut self, src: Reg, dst: WritableReg, src_size: OperandSize, dst_size: OperandSize, )2321     pub fn xmm_vpmovmsk_rr(
2322         &mut self,
2323         src: Reg,
2324         dst: WritableReg,
2325         src_size: OperandSize,
2326         dst_size: OperandSize,
2327     ) {
2328         assert_eq!(dst_size, OperandSize::S32);
2329         let dst: WritableGpr = dst.map(|r| r.into());
2330         let inst = match src_size {
2331             OperandSize::S8 => asm::inst::vpmovmskb_rm::new(dst, src).into(),
2332             _ => unimplemented!(),
2333         };
2334 
2335         self.emit(Inst::External { inst });
2336     }
2337 
2338     /// Creates a mask made up of the most significant bit of each byte of
2339     /// in `src` and stores the result in `dst`.
xmm_vmovskp_rr( &mut self, src: Reg, dst: WritableReg, src_size: OperandSize, dst_size: OperandSize, )2340     pub fn xmm_vmovskp_rr(
2341         &mut self,
2342         src: Reg,
2343         dst: WritableReg,
2344         src_size: OperandSize,
2345         dst_size: OperandSize,
2346     ) {
2347         assert_eq!(dst_size, OperandSize::S32);
2348         let dst: WritableGpr = dst.map(|r| r.into());
2349         let inst = match src_size {
2350             OperandSize::S32 => asm::inst::vmovmskps_rm::new(dst, src).into(),
2351             OperandSize::S64 => asm::inst::vmovmskpd_rm::new(dst, src).into(),
2352             _ => unimplemented!(),
2353         };
2354 
2355         self.emit(Inst::External { inst });
2356     }
2357 
2358     /// Compute the absolute value of elements in vector `src` and put the
2359     /// results in `dst`.
xmm_vpabs_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)2360     pub fn xmm_vpabs_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
2361         let dst: WritableXmm = dst.map(|r| r.into());
2362         let inst = match size {
2363             OperandSize::S8 => asm::inst::vpabsb_a::new(dst, src).into(),
2364             OperandSize::S16 => asm::inst::vpabsw_a::new(dst, src).into(),
2365             OperandSize::S32 => asm::inst::vpabsd_a::new(dst, src).into(),
2366             _ => unimplemented!(),
2367         };
2368         self.emit(Inst::External { inst });
2369     }
2370 
2371     /// Arithmetically (sign preserving) right shift on vector in `src` by
2372     /// `amount` with result written to `dst`.
xmm_vpsra_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize)2373     pub fn xmm_vpsra_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {
2374         let dst: WritableXmm = dst.map(|r| r.into());
2375         let inst = match size {
2376             OperandSize::S16 => asm::inst::vpsraw_c::new(dst, src, amount).into(),
2377             OperandSize::S32 => asm::inst::vpsrad_c::new(dst, src, amount).into(),
2378             _ => unimplemented!(),
2379         };
2380         self.emit(Inst::External { inst });
2381     }
2382 
2383     /// Arithmetically (sign preserving) right shift on vector in `src` by
2384     /// `imm` with result written to `dst`.
xmm_vpsra_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize)2385     pub fn xmm_vpsra_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2386         let dst: WritableXmm = dst.map(|r| r.into());
2387         let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");
2388         let inst = match size {
2389             OperandSize::S32 => asm::inst::vpsrad_d::new(dst, src, imm).into(),
2390             _ => unimplemented!(),
2391         };
2392         self.emit(Inst::External { inst });
2393     }
2394 
2395     /// Shift vector data left by `imm`.
xmm_vpsll_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize)2396     pub fn xmm_vpsll_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2397         let dst: WritableXmm = dst.map(|r| r.into());
2398         let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");
2399         let inst = match size {
2400             OperandSize::S32 => asm::inst::vpslld_d::new(dst, src, imm).into(),
2401             OperandSize::S64 => asm::inst::vpsllq_d::new(dst, src, imm).into(),
2402             _ => unimplemented!(),
2403         };
2404         self.emit(Inst::External { inst });
2405     }
2406 
2407     /// Shift vector data left by `amount`.
xmm_vpsll_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize)2408     pub fn xmm_vpsll_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {
2409         let dst: WritableXmm = dst.map(|r| r.into());
2410         let inst = match size {
2411             OperandSize::S16 => asm::inst::vpsllw_c::new(dst, src, amount).into(),
2412             OperandSize::S32 => asm::inst::vpslld_c::new(dst, src, amount).into(),
2413             OperandSize::S64 => asm::inst::vpsllq_c::new(dst, src, amount).into(),
2414             _ => unimplemented!(),
2415         };
2416         self.emit(Inst::External { inst });
2417     }
2418 
2419     /// Shift vector data right by `imm`.
xmm_vpsrl_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize)2420     pub fn xmm_vpsrl_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2421         let dst: WritableXmm = dst.map(|r| r.into());
2422         let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");
2423         let inst = match size {
2424             OperandSize::S16 => asm::inst::vpsrlw_d::new(dst, src, imm).into(),
2425             OperandSize::S32 => asm::inst::vpsrld_d::new(dst, src, imm).into(),
2426             OperandSize::S64 => asm::inst::vpsrlq_d::new(dst, src, imm).into(),
2427             _ => unimplemented!(),
2428         };
2429         self.emit(Inst::External { inst });
2430     }
2431 
2432     /// Shift vector data right by `amount`.
xmm_vpsrl_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize)2433     pub fn xmm_vpsrl_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {
2434         let dst: WritableXmm = dst.map(|r| r.into());
2435         let inst = match size {
2436             OperandSize::S16 => asm::inst::vpsrlw_c::new(dst, src, amount).into(),
2437             OperandSize::S32 => asm::inst::vpsrld_c::new(dst, src, amount).into(),
2438             OperandSize::S64 => asm::inst::vpsrlq_c::new(dst, src, amount).into(),
2439             _ => unimplemented!(),
2440         };
2441         self.emit(Inst::External { inst });
2442     }
2443 
2444     /// Perform an `and` operation on vectors of floats in `src1` and `src2`
2445     /// and put the results in `dst`.
xmm_vandp_rrm( &mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize, )2446     pub fn xmm_vandp_rrm(
2447         &mut self,
2448         src1: Reg,
2449         src2: &Address,
2450         dst: WritableReg,
2451         size: OperandSize,
2452     ) {
2453         let dst: WritableXmm = dst.map(|r| r.into());
2454         let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2455         let inst = match size {
2456             OperandSize::S32 => asm::inst::vandps_b::new(dst, src1, address).into(),
2457             OperandSize::S64 => asm::inst::vandpd_b::new(dst, src1, address).into(),
2458             _ => unimplemented!(),
2459         };
2460         self.emit(Inst::External { inst });
2461     }
2462 
2463     /// Perform an `and` operation on vectors of floats in `src1` and `src2`
2464     /// and put the results in `dst`.
xmm_vandp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2465     pub fn xmm_vandp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2466         let dst: WritableXmm = dst.map(|r| r.into());
2467         let inst = match size {
2468             OperandSize::S32 => asm::inst::vandps_b::new(dst, src1, src2).into(),
2469             OperandSize::S64 => asm::inst::vandpd_b::new(dst, src1, src2).into(),
2470             _ => unimplemented!(),
2471         };
2472         self.emit(Inst::External { inst });
2473     }
2474 
2475     /// Performs a bitwise `and` operation on the vectors in `src1` and `src2`
2476     /// and stores the results in `dst`.
xmm_vpand_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg)2477     pub fn xmm_vpand_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg) {
2478         let dst: WritableXmm = dst.map(|r| r.into());
2479         let address = Self::to_synthetic_amode(&src2, MemFlags::trusted());
2480         let inst = asm::inst::vpand_b::new(dst, src1, address).into();
2481         self.emit(Inst::External { inst });
2482     }
2483 
2484     /// Performs a bitwise `and` operation on the vectors in `src1` and `src2`
2485     /// and stores the results in `dst`.
xmm_vpand_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg)2486     pub fn xmm_vpand_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2487         let dst: WritableXmm = dst.map(|r| r.into());
2488         let inst = asm::inst::vpand_b::new(dst, src1, src2).into();
2489         self.emit(Inst::External { inst });
2490     }
2491 
2492     /// Perform an `and not` operation on vectors of floats in `src1` and
2493     /// `src2` and put the results in `dst`.
xmm_vandnp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2494     pub fn xmm_vandnp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2495         let dst: WritableXmm = dst.map(|r| r.into());
2496         let inst = match size {
2497             OperandSize::S32 => asm::inst::vandnps_b::new(dst, src1, src2).into(),
2498             OperandSize::S64 => asm::inst::vandnpd_b::new(dst, src1, src2).into(),
2499             _ => unimplemented!(),
2500         };
2501         self.emit(Inst::External { inst });
2502     }
2503 
2504     /// Perform an `and not` operation on vectors in `src1` and `src2` and put
2505     /// the results in `dst`.
xmm_vpandn_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg)2506     pub fn xmm_vpandn_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2507         let dst: WritableXmm = dst.map(|r| r.into());
2508         let inst = asm::inst::vpandn_b::new(dst, src1, src2).into();
2509         self.emit(Inst::External { inst });
2510     }
2511 
2512     /// Perform an or operation for the vectors of floats in `src1` and `src2`
2513     /// and put the results in `dst`.
xmm_vorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2514     pub fn xmm_vorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2515         let dst: WritableXmm = dst.map(|r| r.into());
2516         let inst = match size {
2517             OperandSize::S32 => asm::inst::vorps_b::new(dst, src1, src2).into(),
2518             OperandSize::S64 => asm::inst::vorpd_b::new(dst, src1, src2).into(),
2519             _ => unimplemented!(),
2520         };
2521         self.emit(Inst::External { inst });
2522     }
2523 
2524     /// Bitwise OR of `src1` and `src2`.
xmm_vpor_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg)2525     pub fn xmm_vpor_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
2526         let dst: WritableXmm = dst.map(|r| r.into());
2527         let inst = asm::inst::vpor_b::new(dst, src1, src2).into();
2528         self.emit(Inst::External { inst });
2529     }
2530 
2531     /// Bitwise logical xor of vectors of floats in `src1` and `src2` and puts
2532     /// the results in `dst`.
xmm_vxorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2533     pub fn xmm_vxorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2534         let dst: WritableXmm = dst.map(|r| r.into());
2535         let inst = match size {
2536             OperandSize::S32 => asm::inst::vxorps_b::new(dst, src1, src2).into(),
2537             OperandSize::S64 => asm::inst::vxorpd_b::new(dst, src1, src2).into(),
2538             _ => unimplemented!(),
2539         };
2540         self.emit(Inst::External { inst });
2541     }
2542 
2543     /// Perform a logical on vector in `src` and in `address` and put the
2544     /// results in `dst`.
xmm_vpxor_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg)2545     pub fn xmm_vpxor_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2546         let dst: WritableXmm = dst.map(|r| r.into());
2547         let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2548         let inst = asm::inst::vpxor_b::new(dst, src, address).into();
2549         self.emit(Inst::External { inst });
2550     }
2551 
2552     /// Perform a logical on vectors in `src1` and `src2` and put the results in
2553     /// `dst`.
xmm_vpxor_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg)2554     pub fn xmm_vpxor_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2555         let dst: WritableXmm = dst.map(|r| r.into());
2556         let inst = asm::inst::vpxor_b::new(dst, src1, src2).into();
2557         self.emit(Inst::External { inst });
2558     }
2559 
2560     /// Perform a max operation across two vectors of floats and put the
2561     /// results in `dst`.
xmm_vmaxp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2562     pub fn xmm_vmaxp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2563         let dst: WritableXmm = dst.map(|r| r.into());
2564         let inst = match size {
2565             OperandSize::S32 => asm::inst::vmaxps_b::new(dst, src1, src2).into(),
2566             OperandSize::S64 => asm::inst::vmaxpd_b::new(dst, src1, src2).into(),
2567             _ => unimplemented!(),
2568         };
2569         self.emit(Inst::External { inst });
2570     }
2571 
2572     // Perform a min operation across two vectors of floats and put the
2573     // results in `dst`.
xmm_vminp_rrm( &mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize, )2574     pub fn xmm_vminp_rrm(
2575         &mut self,
2576         src1: Reg,
2577         src2: &Address,
2578         dst: WritableReg,
2579         size: OperandSize,
2580     ) {
2581         let dst: WritableXmm = dst.map(|r| r.into());
2582         let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2583         let inst = match size {
2584             OperandSize::S32 => asm::inst::vminps_b::new(dst, src1, address).into(),
2585             OperandSize::S64 => asm::inst::vminpd_b::new(dst, src1, address).into(),
2586             _ => unimplemented!(),
2587         };
2588         self.emit(Inst::External { inst });
2589     }
2590 
2591     // Perform a min operation across two vectors of floats and put the
2592     // results in `dst`.
xmm_vminp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2593     pub fn xmm_vminp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2594         let dst: WritableXmm = dst.map(|r| r.into());
2595         let inst = match size {
2596             OperandSize::S32 => asm::inst::vminps_b::new(dst, src1, src2).into(),
2597             OperandSize::S64 => asm::inst::vminpd_b::new(dst, src1, src2).into(),
2598             _ => unimplemented!(),
2599         };
2600         self.emit(Inst::External { inst });
2601     }
2602 
2603     // Round a vector of floats.
xmm_vroundp_rri( &mut self, src: Reg, dst: WritableReg, mode: VroundMode, size: OperandSize, )2604     pub fn xmm_vroundp_rri(
2605         &mut self,
2606         src: Reg,
2607         dst: WritableReg,
2608         mode: VroundMode,
2609         size: OperandSize,
2610     ) {
2611         let dst: WritableXmm = dst.map(|r| r.into());
2612         let imm = match mode {
2613             VroundMode::TowardNearest => 0,
2614             VroundMode::TowardNegativeInfinity => 1,
2615             VroundMode::TowardPositiveInfinity => 2,
2616             VroundMode::TowardZero => 3,
2617         };
2618 
2619         let inst = match size {
2620             OperandSize::S32 => asm::inst::vroundps_rmi::new(dst, src, imm).into(),
2621             OperandSize::S64 => asm::inst::vroundpd_rmi::new(dst, src, imm).into(),
2622             _ => unimplemented!(),
2623         };
2624 
2625         self.emit(Inst::External { inst });
2626     }
2627 
2628     /// Shuffle of vectors of floats.
xmm_vshufp_rrri( &mut self, src1: Reg, src2: Reg, dst: WritableReg, imm: u8, size: OperandSize, )2629     pub fn xmm_vshufp_rrri(
2630         &mut self,
2631         src1: Reg,
2632         src2: Reg,
2633         dst: WritableReg,
2634         imm: u8,
2635         size: OperandSize,
2636     ) {
2637         let dst: WritableXmm = dst.map(|r| r.into());
2638         let inst = match size {
2639             OperandSize::S32 => asm::inst::vshufps_b::new(dst, src1, src2, imm).into(),
2640             _ => unimplemented!(),
2641         };
2642         self.emit(Inst::External { inst });
2643     }
2644 
2645     /// Each lane in `src1` is multiplied by the corresponding lane in `src2`
2646     /// producing intermediate 32-bit operands. Each intermediate 32-bit
2647     /// operand is truncated to 18 most significant bits. Rounding is performed
2648     /// by adding 1 to the least significant bit of the 18-bit intermediate
2649     /// result. The 16 bits immediately to the right of the most significant
2650     /// bit of each 18-bit intermediate result is placed in each lane of `dst`.
xmm_vpmulhrs_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2651     pub fn xmm_vpmulhrs_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2652         let dst: WritableXmm = dst.map(|r| r.into());
2653         let inst = match size {
2654             OperandSize::S16 => asm::inst::vpmulhrsw_b::new(dst, src1, src2).into(),
2655             _ => unimplemented!(),
2656         };
2657         self.emit(Inst::External { inst });
2658     }
2659 
xmm_vpmuldq_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg)2660     pub fn xmm_vpmuldq_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2661         let dst: WritableXmm = dst.map(|r| r.into());
2662         let inst = asm::inst::vpmuldq_b::new(dst, src1, src2).into();
2663         self.emit(Inst::External { inst });
2664     }
2665 
xmm_vpmuludq_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg)2666     pub fn xmm_vpmuludq_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2667         let dst: WritableXmm = dst.map(|r| r.into());
2668         let inst = asm::inst::vpmuludq_b::new(dst, src1, src2).into();
2669         self.emit(Inst::External { inst });
2670     }
2671 
xmm_vpmull_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2672     pub fn xmm_vpmull_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2673         let dst: WritableXmm = dst.map(|r| r.into());
2674         let inst = match size {
2675             OperandSize::S16 => asm::inst::vpmullw_b::new(dst, src1, src2).into(),
2676             OperandSize::S32 => asm::inst::vpmulld_b::new(dst, src1, src2).into(),
2677             _ => unimplemented!(),
2678         };
2679         self.emit(Inst::External { inst });
2680     }
2681 
xmm_vmulp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2682     pub fn xmm_vmulp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2683         let dst: WritableXmm = dst.map(|r| r.into());
2684         let inst = match size {
2685             OperandSize::S32 => asm::inst::vmulps_b::new(dst, src1, src2).into(),
2686             OperandSize::S64 => asm::inst::vmulpd_b::new(dst, src1, src2).into(),
2687             _ => unimplemented!(),
2688         };
2689         self.emit(Inst::External { inst });
2690     }
2691 
2692     /// Perform an average operation for the vector of unsigned integers in
2693     /// `src1` and `src2` and put the results in `dst`.
xmm_vpavg_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2694     pub fn xmm_vpavg_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2695         let dst: WritableXmm = dst.map(|r| r.into());
2696         let inst = match size {
2697             OperandSize::S8 => asm::inst::vpavgb_b::new(dst, src1, src2).into(),
2698             OperandSize::S16 => asm::inst::vpavgw_b::new(dst, src1, src2).into(),
2699             _ => unimplemented!(),
2700         };
2701         self.emit(Inst::External { inst });
2702     }
2703 
2704     /// Divide the vector of floats in `src1` by the vector of floats in `src2`
2705     /// and put the results in `dst`.
xmm_vdivp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2706     pub fn xmm_vdivp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2707         let dst: WritableXmm = dst.map(|r| r.into());
2708         let inst = match size {
2709             OperandSize::S32 => asm::inst::vdivps_b::new(dst, src1, src2).into(),
2710             OperandSize::S64 => asm::inst::vdivpd_b::new(dst, src1, src2).into(),
2711             _ => unimplemented!(),
2712         };
2713         self.emit(Inst::External { inst });
2714     }
2715 
2716     /// Compute square roots of vector of floats in `src` and put the results
2717     /// in `dst`.
xmm_vsqrtp_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)2718     pub fn xmm_vsqrtp_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
2719         let dst: WritableXmm = dst.map(|r| r.into());
2720         let inst = match size {
2721             OperandSize::S32 => asm::inst::vsqrtps_b::new(dst, src).into(),
2722             OperandSize::S64 => asm::inst::vsqrtpd_b::new(dst, src).into(),
2723             _ => unimplemented!(),
2724         };
2725         self.emit(Inst::External { inst });
2726     }
2727 
2728     /// Multiply and add packed signed and unsigned bytes.
xmm_vpmaddubsw_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg)2729     pub fn xmm_vpmaddubsw_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2730         let dst: WritableXmm = dst.map(|r| r.into());
2731         let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2732         let inst = asm::inst::vpmaddubsw_b::new(dst, src, address).into();
2733         self.emit(Inst::External { inst });
2734     }
2735 
2736     /// Multiply and add packed signed and unsigned bytes.
xmm_vpmaddubsw_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg)2737     pub fn xmm_vpmaddubsw_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2738         let dst: WritableXmm = dst.map(|r| r.into());
2739         let inst = asm::inst::vpmaddubsw_b::new(dst, src1, src2).into();
2740         self.emit(Inst::External { inst });
2741     }
2742 
2743     /// Multiple and add packed integers.
xmm_vpmaddwd_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg)2744     pub fn xmm_vpmaddwd_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2745         let dst: WritableXmm = dst.map(|r| r.into());
2746         let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2747         let inst = asm::inst::vpmaddwd_b::new(dst, src, address).into();
2748         self.emit(Inst::External { inst });
2749     }
2750 
2751     /// Multiple and add packed integers.
xmm_vpmaddwd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg)2752     pub fn xmm_vpmaddwd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2753         let dst: WritableXmm = dst.map(|r| r.into());
2754         let inst = asm::inst::vpmaddwd_b::new(dst, src1, src2).into();
2755         self.emit(Inst::External { inst });
2756     }
2757 }
2758 
2759 /// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted,
2760 /// but the immediate is not yet known. Currently, this implementation expects a 32-bit immediate,
2761 /// so 8 and 16 bit operand sizes are not supported.
2762 pub(crate) struct PatchableAddToReg {
2763     /// The region to be patched in the [`MachBuffer`]. It must contain a valid add instruction
2764     /// sequence, accepting a 32-bit immediate.
2765     region: PatchRegion,
2766 
2767     /// The offset into the patchable region where the patchable constant begins.
2768     constant_offset: usize,
2769 }
2770 
2771 impl PatchableAddToReg {
2772     /// Create a new [`PatchableAddToReg`] by capturing a region in the output buffer where the
2773     /// add-with-immediate occurs. The [`MachBuffer`] will have and add-with-immediate instruction
2774     /// present in that region, though it will add `0` until the `::finalize` method is called.
2775     ///
2776     /// Currently this implementation expects to be able to patch a 32-bit immediate, which means
2777     /// that 8 and 16-bit addition cannot be supported.
new(reg: Reg, size: OperandSize, asm: &mut Assembler) -> Self2778     pub(crate) fn new(reg: Reg, size: OperandSize, asm: &mut Assembler) -> Self {
2779         let open = asm.buffer_mut().start_patchable();
2780         let start = asm.buffer().cur_offset();
2781 
2782         // Emit the opcode and register use for the add instruction.
2783         let reg = pair_gpr(Writable::from_reg(reg));
2784         let inst = match size {
2785             OperandSize::S32 => asm::inst::addl_mi::new(reg, 0_u32).into(),
2786             OperandSize::S64 => asm::inst::addq_mi_sxl::new(reg, 0_i32).into(),
2787             _ => {
2788                 panic!(
2789                     "{}-bit addition is not supported, please see the comment on PatchableAddToReg::new",
2790                     size.num_bits(),
2791                 )
2792             }
2793         };
2794         asm.emit(Inst::External { inst });
2795 
2796         // The offset to the constant is the width of what was just emitted
2797         // minus 4, the width of the 32-bit immediate.
2798         let constant_offset = usize::try_from(asm.buffer().cur_offset() - start - 4).unwrap();
2799 
2800         let region = asm.buffer_mut().end_patchable(open);
2801 
2802         Self {
2803             region,
2804             constant_offset,
2805         }
2806     }
2807 
2808     /// Patch the [`MachBuffer`] with the known constant to be added to the register. The final
2809     /// value is passed in as an i32, but the instruction encoding is fixed when
2810     /// [`PatchableAddToReg::new`] is called.
finalize(self, val: i32, buffer: &mut MachBuffer<Inst>)2811     pub(crate) fn finalize(self, val: i32, buffer: &mut MachBuffer<Inst>) {
2812         let slice = self.region.patch(buffer);
2813         debug_assert_eq!(slice.len(), self.constant_offset + 4);
2814         slice[self.constant_offset..].copy_from_slice(val.to_le_bytes().as_slice());
2815     }
2816 }
2817