1 //! Assembler library implementation for x64.
2
3 use crate::{
4 constant_pool::ConstantPool,
5 isa::{CallingConvention, reg::Reg},
6 masm::{
7 DivKind, Extend, ExtendKind, ExtendType, IntCmpKind, MulWideKind, OperandSize, RemKind,
8 RoundingMode, ShiftKind, Signed, V128ExtendKind, V128LoadExtendKind, Zero,
9 },
10 reg::writable,
11 };
12 use cranelift_codegen::{
13 CallInfo, Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState,
14 MachLabel, PatchRegion, Writable,
15 ir::{ExternalName, MemFlags, SourceLoc, TrapCode, Type, UserExternalNameRef, types},
16 isa::{
17 unwind::UnwindInst,
18 x64::{
19 AtomicRmwSeqOp, EmitInfo, EmitState, Inst,
20 args::{
21 self, Amode, CC, ExtMode, FromWritableReg, Gpr, GprMem, GprMemImm, RegMem,
22 RegMemImm, SyntheticAmode, WritableGpr, WritableXmm, Xmm, XmmMem, XmmMemImm,
23 },
24 external::{PairedGpr, PairedXmm},
25 settings as x64_settings,
26 },
27 },
28 settings,
29 };
30
31 use crate::reg::WritableReg;
32 use cranelift_assembler_x64 as asm;
33
34 use super::address::Address;
35 use smallvec::SmallVec;
36
37 // Conversions between winch-codegen x64 types and cranelift-codegen x64 types.
38
39 impl From<Reg> for RegMemImm {
from(reg: Reg) -> Self40 fn from(reg: Reg) -> Self {
41 RegMemImm::reg(reg.into())
42 }
43 }
44
45 impl From<Reg> for RegMem {
from(value: Reg) -> Self46 fn from(value: Reg) -> Self {
47 RegMem::Reg { reg: value.into() }
48 }
49 }
50
51 impl From<Reg> for WritableGpr {
from(reg: Reg) -> Self52 fn from(reg: Reg) -> Self {
53 let writable = Writable::from_reg(reg.into());
54 WritableGpr::from_writable_reg(writable).expect("valid writable gpr")
55 }
56 }
57
58 impl From<Reg> for WritableXmm {
from(reg: Reg) -> Self59 fn from(reg: Reg) -> Self {
60 let writable = Writable::from_reg(reg.into());
61 WritableXmm::from_writable_reg(writable).expect("valid writable xmm")
62 }
63 }
64
65 /// Convert a writable GPR register to the read-write pair expected by
66 /// `cranelift-codegen`.
pair_gpr(reg: WritableReg) -> PairedGpr67 fn pair_gpr(reg: WritableReg) -> PairedGpr {
68 assert!(reg.to_reg().is_int());
69 let read = Gpr::unwrap_new(reg.to_reg().into());
70 let write = WritableGpr::from_reg(reg.to_reg().into());
71 PairedGpr { read, write }
72 }
73
74 impl From<Reg> for asm::Gpr<Gpr> {
from(reg: Reg) -> Self75 fn from(reg: Reg) -> Self {
76 asm::Gpr::new(reg.into())
77 }
78 }
79
80 impl From<Reg> for asm::GprMem<Gpr, Gpr> {
from(reg: Reg) -> Self81 fn from(reg: Reg) -> Self {
82 asm::GprMem::Gpr(reg.into())
83 }
84 }
85
86 /// Convert a writable XMM register to the read-write pair expected by
87 /// `cranelift-codegen`.
pair_xmm(reg: WritableReg) -> PairedXmm88 fn pair_xmm(reg: WritableReg) -> PairedXmm {
89 assert!(reg.to_reg().is_float());
90 let read = Xmm::unwrap_new(reg.to_reg().into());
91 let write = WritableXmm::from_reg(reg.to_reg().into());
92 PairedXmm { read, write }
93 }
94
95 impl From<Reg> for asm::Xmm<Xmm> {
from(reg: Reg) -> Self96 fn from(reg: Reg) -> Self {
97 asm::Xmm::new(reg.into())
98 }
99 }
100
101 impl From<Reg> for asm::XmmMem<Xmm, Gpr> {
from(reg: Reg) -> Self102 fn from(reg: Reg) -> Self {
103 asm::XmmMem::Xmm(reg.into())
104 }
105 }
106
107 impl From<Reg> for Gpr {
from(reg: Reg) -> Self108 fn from(reg: Reg) -> Self {
109 Gpr::unwrap_new(reg.into())
110 }
111 }
112
113 impl From<Reg> for GprMem {
from(value: Reg) -> Self114 fn from(value: Reg) -> Self {
115 GprMem::unwrap_new(value.into())
116 }
117 }
118
119 impl From<Reg> for GprMemImm {
from(reg: Reg) -> Self120 fn from(reg: Reg) -> Self {
121 GprMemImm::unwrap_new(reg.into())
122 }
123 }
124
125 impl From<Reg> for Xmm {
from(reg: Reg) -> Self126 fn from(reg: Reg) -> Self {
127 Xmm::unwrap_new(reg.into())
128 }
129 }
130
131 impl From<Reg> for XmmMem {
from(value: Reg) -> Self132 fn from(value: Reg) -> Self {
133 XmmMem::unwrap_new(value.into())
134 }
135 }
136
137 impl From<Reg> for XmmMemImm {
from(value: Reg) -> Self138 fn from(value: Reg) -> Self {
139 XmmMemImm::unwrap_new(value.into())
140 }
141 }
142
143 impl From<OperandSize> for args::OperandSize {
from(size: OperandSize) -> Self144 fn from(size: OperandSize) -> Self {
145 match size {
146 OperandSize::S8 => Self::Size8,
147 OperandSize::S16 => Self::Size16,
148 OperandSize::S32 => Self::Size32,
149 OperandSize::S64 => Self::Size64,
150 s => panic!("Invalid operand size {s:?}"),
151 }
152 }
153 }
154
155 impl From<IntCmpKind> for CC {
from(value: IntCmpKind) -> Self156 fn from(value: IntCmpKind) -> Self {
157 match value {
158 IntCmpKind::Eq => CC::Z,
159 IntCmpKind::Ne => CC::NZ,
160 IntCmpKind::LtS => CC::L,
161 IntCmpKind::LtU => CC::B,
162 IntCmpKind::GtS => CC::NLE,
163 IntCmpKind::GtU => CC::NBE,
164 IntCmpKind::LeS => CC::LE,
165 IntCmpKind::LeU => CC::BE,
166 IntCmpKind::GeS => CC::NL,
167 IntCmpKind::GeU => CC::NB,
168 }
169 }
170 }
171
172 impl<T: ExtendType> From<Extend<T>> for ExtMode {
from(value: Extend<T>) -> Self173 fn from(value: Extend<T>) -> Self {
174 match value {
175 Extend::I32Extend8 => ExtMode::BL,
176 Extend::I32Extend16 => ExtMode::WL,
177 Extend::I64Extend8 => ExtMode::BQ,
178 Extend::I64Extend16 => ExtMode::WQ,
179 Extend::I64Extend32 => ExtMode::LQ,
180 Extend::__Kind(_) => unreachable!(),
181 }
182 }
183 }
184
185 impl From<ExtendKind> for ExtMode {
from(value: ExtendKind) -> Self186 fn from(value: ExtendKind) -> Self {
187 match value {
188 ExtendKind::Signed(s) => s.into(),
189 ExtendKind::Unsigned(u) => u.into(),
190 }
191 }
192 }
193
194 /// Kinds of extends supported by `vpmov`.
195 pub(super) enum VpmovKind {
196 /// Sign extends 8 lanes of 8-bit integers to 8 lanes of 16-bit integers.
197 E8x8S,
198 /// Zero extends 8 lanes of 8-bit integers to 8 lanes of 16-bit integers.
199 E8x8U,
200 /// Sign extends 4 lanes of 16-bit integers to 4 lanes of 32-bit integers.
201 E16x4S,
202 /// Zero extends 4 lanes of 16-bit integers to 4 lanes of 32-bit integers.
203 E16x4U,
204 /// Sign extends 2 lanes of 32-bit integers to 2 lanes of 64-bit integers.
205 E32x2S,
206 /// Zero extends 2 lanes of 32-bit integers to 2 lanes of 64-bit integers.
207 E32x2U,
208 }
209
210 impl From<V128LoadExtendKind> for VpmovKind {
from(value: V128LoadExtendKind) -> Self211 fn from(value: V128LoadExtendKind) -> Self {
212 match value {
213 V128LoadExtendKind::E8x8S => Self::E8x8S,
214 V128LoadExtendKind::E8x8U => Self::E8x8U,
215 V128LoadExtendKind::E16x4S => Self::E16x4S,
216 V128LoadExtendKind::E16x4U => Self::E16x4U,
217 V128LoadExtendKind::E32x2S => Self::E32x2S,
218 V128LoadExtendKind::E32x2U => Self::E32x2U,
219 }
220 }
221 }
222
223 impl From<V128ExtendKind> for VpmovKind {
from(value: V128ExtendKind) -> Self224 fn from(value: V128ExtendKind) -> Self {
225 match value {
226 V128ExtendKind::LowI8x16S | V128ExtendKind::HighI8x16S => Self::E8x8S,
227 V128ExtendKind::LowI8x16U => Self::E8x8U,
228 V128ExtendKind::LowI16x8S | V128ExtendKind::HighI16x8S => Self::E16x4S,
229 V128ExtendKind::LowI16x8U => Self::E16x4U,
230 V128ExtendKind::LowI32x4S | V128ExtendKind::HighI32x4S => Self::E32x2S,
231 V128ExtendKind::LowI32x4U => Self::E32x2U,
232 _ => unimplemented!(),
233 }
234 }
235 }
236
237 /// Kinds of comparisons supported by `vcmp`.
238 pub(super) enum VcmpKind {
239 /// Equal comparison.
240 Eq,
241 /// Not equal comparison.
242 Ne,
243 /// Less than comparison.
244 Lt,
245 /// Less than or equal comparison.
246 Le,
247 /// Unordered comparison. Sets result to all 1s if either source operand is
248 /// NaN.
249 Unord,
250 }
251
252 /// Kinds of conversions supported by `vcvt`.
253 pub(super) enum VcvtKind {
254 /// Converts 32-bit integers to 32-bit floats.
255 I32ToF32,
256 /// Converts doubleword integers to double precision floats.
257 I32ToF64,
258 /// Converts double precision floats to single precision floats.
259 F64ToF32,
260 // Converts double precision floats to 32-bit integers.
261 F64ToI32,
262 /// Converts single precision floats to double precision floats.
263 F32ToF64,
264 /// Converts single precision floats to 32-bit integers.
265 F32ToI32,
266 }
267
268 /// Modes supported by `vround`.
269 pub(crate) enum VroundMode {
270 /// Rounds toward nearest (ties to even).
271 TowardNearest,
272 /// Rounds toward negative infinity.
273 TowardNegativeInfinity,
274 /// Rounds toward positive infinity.
275 TowardPositiveInfinity,
276 /// Rounds toward zero.
277 TowardZero,
278 }
279
280 /// Low level assembler implementation for x64.
281 pub(crate) struct Assembler {
282 /// The machine instruction buffer.
283 buffer: MachBuffer<Inst>,
284 /// Constant emission information.
285 emit_info: EmitInfo,
286 /// Emission state.
287 emit_state: EmitState,
288 /// x64 flags.
289 isa_flags: x64_settings::Flags,
290 /// Constant pool.
291 pool: ConstantPool,
292 }
293
294 impl Assembler {
295 /// Create a new x64 assembler.
new(shared_flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self296 pub fn new(shared_flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
297 Self {
298 buffer: MachBuffer::<Inst>::new(),
299 emit_state: Default::default(),
300 emit_info: EmitInfo::new(shared_flags, isa_flags.clone()),
301 pool: ConstantPool::new(),
302 isa_flags,
303 }
304 }
305
306 /// Get a mutable reference to underlying
307 /// machine buffer.
buffer_mut(&mut self) -> &mut MachBuffer<Inst>308 pub fn buffer_mut(&mut self) -> &mut MachBuffer<Inst> {
309 &mut self.buffer
310 }
311
312 /// Get a reference to the underlying machine buffer.
buffer(&self) -> &MachBuffer<Inst>313 pub fn buffer(&self) -> &MachBuffer<Inst> {
314 &self.buffer
315 }
316
317 /// Adds a constant to the constant pool and returns its address.
add_constant(&mut self, constant: &[u8]) -> Address318 pub fn add_constant(&mut self, constant: &[u8]) -> Address {
319 let handle = self.pool.register(constant, &mut self.buffer);
320 Address::constant(handle)
321 }
322
323 /// Load a floating point constant, using the constant pool.
load_fp_const(&mut self, dst: WritableReg, constant: &[u8], size: OperandSize)324 pub fn load_fp_const(&mut self, dst: WritableReg, constant: &[u8], size: OperandSize) {
325 let addr = self.add_constant(constant);
326 self.xmm_mov_mr(&addr, dst, size, MemFlags::trusted());
327 }
328
329 /// Return the emitted code.
finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final>330 pub fn finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final> {
331 let stencil = self
332 .buffer
333 .finish(&self.pool.constants(), self.emit_state.ctrl_plane_mut());
334 stencil.apply_base_srcloc(loc.unwrap_or_default())
335 }
336
emit(&mut self, inst: Inst)337 fn emit(&mut self, inst: Inst) {
338 inst.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);
339 }
340
to_synthetic_amode(addr: &Address, memflags: MemFlags) -> SyntheticAmode341 fn to_synthetic_amode(addr: &Address, memflags: MemFlags) -> SyntheticAmode {
342 match *addr {
343 Address::Offset { base, offset } => {
344 let amode = Amode::imm_reg(offset as i32, base.into()).with_flags(memflags);
345 SyntheticAmode::real(amode)
346 }
347 Address::Const(c) => SyntheticAmode::ConstantOffset(c),
348 Address::ImmRegRegShift {
349 simm32,
350 base,
351 index,
352 shift,
353 } => SyntheticAmode::Real(Amode::ImmRegRegShift {
354 simm32,
355 base: base.into(),
356 index: index.into(),
357 shift,
358 flags: memflags,
359 }),
360 }
361 }
362
363 /// Emit an unwind instruction.
unwind_inst(&mut self, inst: UnwindInst)364 pub fn unwind_inst(&mut self, inst: UnwindInst) {
365 self.emit(Inst::Unwind { inst })
366 }
367
368 /// Push register.
push_r(&mut self, reg: Reg)369 pub fn push_r(&mut self, reg: Reg) {
370 let inst = asm::inst::pushq_o::new(reg).into();
371 self.emit(Inst::External { inst });
372 }
373
374 /// Pop to register.
pop_r(&mut self, dst: WritableReg)375 pub fn pop_r(&mut self, dst: WritableReg) {
376 let writable: WritableGpr = dst.map(Into::into);
377 let inst = asm::inst::popq_o::new(writable).into();
378 self.emit(Inst::External { inst });
379 }
380
381 /// Return instruction.
ret(&mut self)382 pub fn ret(&mut self) {
383 let inst = asm::inst::retq_zo::new().into();
384 self.emit(Inst::External { inst });
385 }
386
387 /// Register-to-register move.
mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)388 pub fn mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
389 let dst: WritableGpr = dst.map(|r| r.into());
390 let inst = match size {
391 OperandSize::S8 => asm::inst::movb_mr::new(dst, src).into(),
392 OperandSize::S16 => asm::inst::movw_mr::new(dst, src).into(),
393 OperandSize::S32 => asm::inst::movl_mr::new(dst, src).into(),
394 OperandSize::S64 => asm::inst::movq_mr::new(dst, src).into(),
395 _ => unreachable!(),
396 };
397 self.emit(Inst::External { inst });
398 }
399
400 /// Register-to-memory move.
mov_rm(&mut self, src: Reg, addr: &Address, size: OperandSize, flags: MemFlags)401 pub fn mov_rm(&mut self, src: Reg, addr: &Address, size: OperandSize, flags: MemFlags) {
402 assert!(addr.is_offset());
403 let dst = Self::to_synthetic_amode(addr, flags);
404 let inst = match size {
405 OperandSize::S8 => asm::inst::movb_mr::new(dst, src).into(),
406 OperandSize::S16 => asm::inst::movw_mr::new(dst, src).into(),
407 OperandSize::S32 => asm::inst::movl_mr::new(dst, src).into(),
408 OperandSize::S64 => asm::inst::movq_mr::new(dst, src).into(),
409 _ => unreachable!(),
410 };
411 self.emit(Inst::External { inst });
412 }
413
414 /// Immediate-to-memory move.
mov_im(&mut self, src: i32, addr: &Address, size: OperandSize, flags: MemFlags)415 pub fn mov_im(&mut self, src: i32, addr: &Address, size: OperandSize, flags: MemFlags) {
416 assert!(addr.is_offset());
417 let dst = Self::to_synthetic_amode(addr, flags);
418 let inst = match size {
419 OperandSize::S8 => {
420 let src = i8::try_from(src).unwrap();
421 asm::inst::movb_mi::new(dst, src.cast_unsigned()).into()
422 }
423 OperandSize::S16 => {
424 let src = i16::try_from(src).unwrap();
425 asm::inst::movw_mi::new(dst, src.cast_unsigned()).into()
426 }
427 OperandSize::S32 => asm::inst::movl_mi::new(dst, src.cast_unsigned()).into(),
428 OperandSize::S64 => asm::inst::movq_mi_sxl::new(dst, src).into(),
429 _ => unreachable!(),
430 };
431 self.emit(Inst::External { inst });
432 }
433
434 /// Immediate-to-register move.
mov_ir(&mut self, imm: u64, dst: WritableReg, size: OperandSize)435 pub fn mov_ir(&mut self, imm: u64, dst: WritableReg, size: OperandSize) {
436 self.emit(Inst::imm(size.into(), imm, dst.map(Into::into)));
437 }
438
439 /// Zero-extend memory-to-register load.
movzx_mr( &mut self, addr: &Address, dst: WritableReg, ext: Option<Extend<Zero>>, memflags: MemFlags, )440 pub fn movzx_mr(
441 &mut self,
442 addr: &Address,
443 dst: WritableReg,
444 ext: Option<Extend<Zero>>,
445 memflags: MemFlags,
446 ) {
447 let src = Self::to_synthetic_amode(addr, memflags);
448
449 if let Some(ext) = ext {
450 let dst = WritableGpr::from_reg(dst.to_reg().into());
451 let inst = match ext.into() {
452 ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
453 ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
454 ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
455 ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
456 ExtMode::LQ => {
457 // This instruction selection may seem strange but is
458 // correct in 64-bit mode: section 3.4.1.1 of the Intel
459 // manual says that "32-bit operands generate a 32-bit
460 // result, zero-extended to a 64-bit result in the
461 // destination general-purpose register." This is applicable
462 // beyond `mov` but we use this fact to zero-extend `src`
463 // into `dst`.
464 asm::inst::movl_rm::new(dst, src).into()
465 }
466 };
467 self.emit(Inst::External { inst });
468 } else {
469 let dst = WritableGpr::from_reg(dst.to_reg().into());
470 let inst = asm::inst::movq_rm::new(dst, src).into();
471 self.emit(Inst::External { inst });
472 }
473 }
474
475 // Sign-extend memory-to-register load.
movsx_mr( &mut self, addr: &Address, dst: WritableReg, ext: Extend<Signed>, memflags: MemFlags, )476 pub fn movsx_mr(
477 &mut self,
478 addr: &Address,
479 dst: WritableReg,
480 ext: Extend<Signed>,
481 memflags: MemFlags,
482 ) {
483 let src = Self::to_synthetic_amode(addr, memflags);
484 let dst = WritableGpr::from_reg(dst.to_reg().into());
485 let inst = match ext.into() {
486 ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
487 ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
488 ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
489 ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
490 ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
491 };
492 self.emit(Inst::External { inst });
493 }
494
495 /// Register-to-register move with zero extension.
movzx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Zero>)496 pub fn movzx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Zero>) {
497 let dst = WritableGpr::from_reg(dst.to_reg().into());
498 let inst = match kind.into() {
499 ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
500 ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
501 ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
502 ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
503 ExtMode::LQ => {
504 // This instruction selection may seem strange but is correct in
505 // 64-bit mode: section 3.4.1.1 of the Intel manual says that
506 // "32-bit operands generate a 32-bit result, zero-extended to a
507 // 64-bit result in the destination general-purpose register."
508 // This is applicable beyond `mov` but we use this fact to
509 // zero-extend `src` into `dst`.
510 asm::inst::movl_rm::new(dst, src).into()
511 }
512 };
513 self.emit(Inst::External { inst });
514 }
515
516 /// Register-to-register move with sign extension.
movsx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Signed>)517 pub fn movsx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Signed>) {
518 let dst = WritableGpr::from_reg(dst.to_reg().into());
519 let inst = match kind.into() {
520 ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
521 ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
522 ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
523 ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
524 ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
525 };
526 self.emit(Inst::External { inst });
527 }
528
529 /// Integer register conditional move.
cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize)530 pub fn cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize) {
531 use IntCmpKind::*;
532 use OperandSize::*;
533
534 let dst: WritableGpr = dst.map(Into::into);
535 let inst = match size {
536 S8 | S16 | S32 => match cc {
537 Eq => asm::inst::cmovel_rm::new(dst, src).into(),
538 Ne => asm::inst::cmovnel_rm::new(dst, src).into(),
539 LtS => asm::inst::cmovll_rm::new(dst, src).into(),
540 LtU => asm::inst::cmovbl_rm::new(dst, src).into(),
541 GtS => asm::inst::cmovgl_rm::new(dst, src).into(),
542 GtU => asm::inst::cmoval_rm::new(dst, src).into(),
543 LeS => asm::inst::cmovlel_rm::new(dst, src).into(),
544 LeU => asm::inst::cmovbel_rm::new(dst, src).into(),
545 GeS => asm::inst::cmovgel_rm::new(dst, src).into(),
546 GeU => asm::inst::cmovael_rm::new(dst, src).into(),
547 },
548 S64 => match cc {
549 Eq => asm::inst::cmoveq_rm::new(dst, src).into(),
550 Ne => asm::inst::cmovneq_rm::new(dst, src).into(),
551 LtS => asm::inst::cmovlq_rm::new(dst, src).into(),
552 LtU => asm::inst::cmovbq_rm::new(dst, src).into(),
553 GtS => asm::inst::cmovgq_rm::new(dst, src).into(),
554 GtU => asm::inst::cmovaq_rm::new(dst, src).into(),
555 LeS => asm::inst::cmovleq_rm::new(dst, src).into(),
556 LeU => asm::inst::cmovbeq_rm::new(dst, src).into(),
557 GeS => asm::inst::cmovgeq_rm::new(dst, src).into(),
558 GeU => asm::inst::cmovaeq_rm::new(dst, src).into(),
559 },
560 _ => unreachable!(),
561 };
562 self.emit(Inst::External { inst });
563 }
564
565 /// Single and double precision floating point
566 /// register-to-register move.
xmm_mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)567 pub fn xmm_mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
568 let ty = match size {
569 OperandSize::S32 => types::F32,
570 OperandSize::S64 => types::F64,
571 OperandSize::S128 => types::I32X4,
572 OperandSize::S8 | OperandSize::S16 => unreachable!(),
573 };
574 self.emit(Inst::gen_move(dst.map(|r| r.into()), src.into(), ty));
575 }
576
577 /// Single and double precision floating point load.
xmm_mov_mr( &mut self, src: &Address, dst: WritableReg, size: OperandSize, flags: MemFlags, )578 pub fn xmm_mov_mr(
579 &mut self,
580 src: &Address,
581 dst: WritableReg,
582 size: OperandSize,
583 flags: MemFlags,
584 ) {
585 use OperandSize::*;
586
587 assert!(dst.to_reg().is_float());
588
589 let src = Self::to_synthetic_amode(src, flags);
590 let dst: WritableXmm = dst.map(|r| r.into());
591 let inst = match size {
592 S32 => asm::inst::movss_a_m::new(dst, src).into(),
593 S64 => asm::inst::movsd_a_m::new(dst, src).into(),
594 S128 => asm::inst::movdqu_a::new(dst, src).into(),
595 S8 | S16 => unreachable!(),
596 };
597 self.emit(Inst::External { inst });
598 }
599
600 /// Vector load and extend.
xmm_vpmov_mr( &mut self, src: &Address, dst: WritableReg, kind: VpmovKind, flags: MemFlags, )601 pub fn xmm_vpmov_mr(
602 &mut self,
603 src: &Address,
604 dst: WritableReg,
605 kind: VpmovKind,
606 flags: MemFlags,
607 ) {
608 assert!(dst.to_reg().is_float());
609 let src = Self::to_synthetic_amode(src, flags);
610 let dst: WritableXmm = dst.map(|r| r.into());
611 let inst = match kind {
612 VpmovKind::E8x8S => asm::inst::vpmovsxbw_a::new(dst, src).into(),
613 VpmovKind::E8x8U => asm::inst::vpmovzxbw_a::new(dst, src).into(),
614 VpmovKind::E16x4S => asm::inst::vpmovsxwd_a::new(dst, src).into(),
615 VpmovKind::E16x4U => asm::inst::vpmovzxwd_a::new(dst, src).into(),
616 VpmovKind::E32x2S => asm::inst::vpmovsxdq_a::new(dst, src).into(),
617 VpmovKind::E32x2U => asm::inst::vpmovzxdq_a::new(dst, src).into(),
618 };
619 self.emit(Inst::External { inst });
620 }
621
622 /// Extends vector of integers in `src` and puts results in `dst`.
xmm_vpmov_rr(&mut self, src: Reg, dst: WritableReg, kind: VpmovKind)623 pub fn xmm_vpmov_rr(&mut self, src: Reg, dst: WritableReg, kind: VpmovKind) {
624 let dst: WritableXmm = dst.map(|r| r.into());
625 let inst = match kind {
626 VpmovKind::E8x8S => asm::inst::vpmovsxbw_a::new(dst, src).into(),
627 VpmovKind::E8x8U => asm::inst::vpmovzxbw_a::new(dst, src).into(),
628 VpmovKind::E16x4S => asm::inst::vpmovsxwd_a::new(dst, src).into(),
629 VpmovKind::E16x4U => asm::inst::vpmovzxwd_a::new(dst, src).into(),
630 VpmovKind::E32x2S => asm::inst::vpmovsxdq_a::new(dst, src).into(),
631 VpmovKind::E32x2U => asm::inst::vpmovzxdq_a::new(dst, src).into(),
632 };
633 self.emit(Inst::External { inst });
634 }
635
636 /// Vector load and broadcast.
xmm_vpbroadcast_mr( &mut self, src: &Address, dst: WritableReg, size: OperandSize, flags: MemFlags, )637 pub fn xmm_vpbroadcast_mr(
638 &mut self,
639 src: &Address,
640 dst: WritableReg,
641 size: OperandSize,
642 flags: MemFlags,
643 ) {
644 assert!(dst.to_reg().is_float());
645 let src = Self::to_synthetic_amode(src, flags);
646 let dst: WritableXmm = dst.map(|r| r.into());
647 let inst = match size {
648 OperandSize::S8 => asm::inst::vpbroadcastb_a::new(dst, src).into(),
649 OperandSize::S16 => asm::inst::vpbroadcastw_a::new(dst, src).into(),
650 OperandSize::S32 => asm::inst::vpbroadcastd_a::new(dst, src).into(),
651 _ => unimplemented!(),
652 };
653 self.emit(Inst::External { inst });
654 }
655
656 /// Value in `src` is broadcast into lanes of `size` in `dst`.
xmm_vpbroadcast_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)657 pub fn xmm_vpbroadcast_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
658 assert!(src.is_float() && dst.to_reg().is_float());
659 let dst: WritableXmm = dst.map(|r| r.into());
660 let inst = match size {
661 OperandSize::S8 => asm::inst::vpbroadcastb_a::new(dst, src).into(),
662 OperandSize::S16 => asm::inst::vpbroadcastw_a::new(dst, src).into(),
663 OperandSize::S32 => asm::inst::vpbroadcastd_a::new(dst, src).into(),
664 _ => unimplemented!(),
665 };
666 self.emit(Inst::External { inst });
667 }
668
669 /// Memory to register shuffle of bytes in vector.
xmm_vpshuf_mr( &mut self, src: &Address, dst: WritableReg, mask: u8, size: OperandSize, flags: MemFlags, )670 pub fn xmm_vpshuf_mr(
671 &mut self,
672 src: &Address,
673 dst: WritableReg,
674 mask: u8,
675 size: OperandSize,
676 flags: MemFlags,
677 ) {
678 let dst: WritableXmm = dst.map(|r| r.into());
679 let src = Self::to_synthetic_amode(src, flags);
680 let inst = match size {
681 OperandSize::S32 => asm::inst::vpshufd_a::new(dst, src, mask).into(),
682 _ => unimplemented!(),
683 };
684 self.emit(Inst::External { inst });
685 }
686
687 /// Register to register shuffle of bytes in vector.
xmm_vpshuf_rr(&mut self, src: Reg, dst: WritableReg, mask: u8, size: OperandSize)688 pub fn xmm_vpshuf_rr(&mut self, src: Reg, dst: WritableReg, mask: u8, size: OperandSize) {
689 let dst: WritableXmm = dst.map(|r| r.into());
690
691 let inst = match size {
692 OperandSize::S16 => asm::inst::vpshuflw_a::new(dst, src, mask).into(),
693 OperandSize::S32 => asm::inst::vpshufd_a::new(dst, src, mask).into(),
694 _ => unimplemented!(),
695 };
696
697 self.emit(Inst::External { inst });
698 }
699
700 /// Single and double precision floating point store.
xmm_mov_rm(&mut self, src: Reg, dst: &Address, size: OperandSize, flags: MemFlags)701 pub fn xmm_mov_rm(&mut self, src: Reg, dst: &Address, size: OperandSize, flags: MemFlags) {
702 use OperandSize::*;
703
704 assert!(src.is_float());
705
706 let dst = Self::to_synthetic_amode(dst, flags);
707 let src: Xmm = src.into();
708 let inst = match size {
709 S32 => asm::inst::movss_c_m::new(dst, src).into(),
710 S64 => asm::inst::movsd_c_m::new(dst, src).into(),
711 S128 => asm::inst::movdqu_b::new(dst, src).into(),
712 S16 | S8 => unreachable!(),
713 };
714 self.emit(Inst::External { inst })
715 }
716
717 /// Floating point register conditional move.
xmm_cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize)718 pub fn xmm_cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize) {
719 let dst: WritableXmm = dst.map(Into::into);
720 let ty = match size {
721 OperandSize::S32 => types::F32,
722 OperandSize::S64 => types::F64,
723 // Move the entire 128 bits via movdqa.
724 OperandSize::S128 => types::I32X4,
725 OperandSize::S8 | OperandSize::S16 => unreachable!(),
726 };
727
728 self.emit(Inst::XmmCmove {
729 ty,
730 cc: cc.into(),
731 consequent: Xmm::unwrap_new(src.into()),
732 alternative: dst.to_reg(),
733 dst,
734 })
735 }
736
737 /// Subtract register and register
sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)738 pub fn sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
739 let dst = pair_gpr(dst);
740 let inst = match size {
741 OperandSize::S8 => asm::inst::subb_rm::new(dst, src).into(),
742 OperandSize::S16 => asm::inst::subw_rm::new(dst, src).into(),
743 OperandSize::S32 => asm::inst::subl_rm::new(dst, src).into(),
744 OperandSize::S64 => asm::inst::subq_rm::new(dst, src).into(),
745 OperandSize::S128 => unimplemented!(),
746 };
747 self.emit(Inst::External { inst });
748 }
749
750 /// Subtract immediate register.
sub_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize)751 pub fn sub_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
752 let dst = pair_gpr(dst);
753 let inst = match size {
754 OperandSize::S8 => asm::inst::subb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
755 OperandSize::S16 => asm::inst::subw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
756 OperandSize::S32 => asm::inst::subl_mi::new(dst, imm as u32).into(),
757 OperandSize::S64 => asm::inst::subq_mi_sxl::new(dst, imm).into(),
758 OperandSize::S128 => unimplemented!(),
759 };
760 self.emit(Inst::External { inst });
761 }
762
763 /// "and" two registers.
and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)764 pub fn and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
765 let dst = pair_gpr(dst);
766 let inst = match size {
767 OperandSize::S8 => asm::inst::andb_rm::new(dst, src).into(),
768 OperandSize::S16 => asm::inst::andw_rm::new(dst, src).into(),
769 OperandSize::S32 => asm::inst::andl_rm::new(dst, src).into(),
770 OperandSize::S64 => asm::inst::andq_rm::new(dst, src).into(),
771 OperandSize::S128 => unimplemented!(),
772 };
773 self.emit(Inst::External { inst });
774 }
775
and_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize)776 pub fn and_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
777 let dst = pair_gpr(dst);
778 let inst = match size {
779 OperandSize::S8 => asm::inst::andb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
780 OperandSize::S16 => asm::inst::andw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
781 OperandSize::S32 => asm::inst::andl_mi::new(dst, imm as u32).into(),
782 OperandSize::S64 => asm::inst::andq_mi_sxl::new(dst, imm).into(),
783 OperandSize::S128 => unimplemented!(),
784 };
785 self.emit(Inst::External { inst });
786 }
787
788 /// "and" two float registers.
xmm_and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)789 pub fn xmm_and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
790 let dst = pair_xmm(dst);
791 let inst = match size {
792 OperandSize::S32 => asm::inst::andps_a::new(dst, src).into(),
793 OperandSize::S64 => asm::inst::andpd_a::new(dst, src).into(),
794 OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
795 };
796 self.emit(Inst::External { inst });
797 }
798
799 /// "and not" two float registers.
xmm_andn_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)800 pub fn xmm_andn_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
801 let dst = pair_xmm(dst);
802 let inst = match size {
803 OperandSize::S32 => asm::inst::andnps_a::new(dst, src).into(),
804 OperandSize::S64 => asm::inst::andnpd_a::new(dst, src).into(),
805 OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
806 };
807 self.emit(Inst::External { inst });
808 }
809
gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize)810 pub fn gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
811 let dst: WritableXmm = dst.map(|r| r.into());
812 let inst = match size {
813 OperandSize::S32 => asm::inst::movd_a::new(dst, src).into(),
814 OperandSize::S64 => asm::inst::movq_a::new(dst, src).into(),
815 OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
816 };
817
818 self.emit(Inst::External { inst });
819 }
820
xmm_to_gpr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)821 pub fn xmm_to_gpr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
822 let dst: WritableGpr = dst.map(Into::into);
823 let src: Xmm = src.into();
824 let inst = match size {
825 OperandSize::S32 => asm::inst::movd_b::new(dst, src).into(),
826 OperandSize::S64 => asm::inst::movq_b::new(dst, src).into(),
827 OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
828 };
829
830 self.emit(Inst::External { inst })
831 }
832
833 /// Convert float to signed int.
cvt_float_to_sint_seq( &mut self, src: Reg, dst: WritableReg, tmp_gpr: Reg, tmp_xmm: Reg, src_size: OperandSize, dst_size: OperandSize, saturating: bool, )834 pub fn cvt_float_to_sint_seq(
835 &mut self,
836 src: Reg,
837 dst: WritableReg,
838 tmp_gpr: Reg,
839 tmp_xmm: Reg,
840 src_size: OperandSize,
841 dst_size: OperandSize,
842 saturating: bool,
843 ) {
844 self.emit(Inst::CvtFloatToSintSeq {
845 dst_size: dst_size.into(),
846 src_size: src_size.into(),
847 is_saturating: saturating,
848 src: src.into(),
849 dst: dst.map(Into::into),
850 tmp_gpr: tmp_gpr.into(),
851 tmp_xmm: tmp_xmm.into(),
852 });
853 }
854
855 /// Convert float to unsigned int.
cvt_float_to_uint_seq( &mut self, src: Reg, dst: WritableReg, tmp_gpr: Reg, tmp_xmm: Reg, tmp_xmm2: Reg, src_size: OperandSize, dst_size: OperandSize, saturating: bool, )856 pub fn cvt_float_to_uint_seq(
857 &mut self,
858 src: Reg,
859 dst: WritableReg,
860 tmp_gpr: Reg,
861 tmp_xmm: Reg,
862 tmp_xmm2: Reg,
863 src_size: OperandSize,
864 dst_size: OperandSize,
865 saturating: bool,
866 ) {
867 self.emit(Inst::CvtFloatToUintSeq {
868 dst_size: dst_size.into(),
869 src_size: src_size.into(),
870 is_saturating: saturating,
871 src: src.into(),
872 dst: dst.map(Into::into),
873 tmp_gpr: tmp_gpr.into(),
874 tmp_xmm: tmp_xmm.into(),
875 tmp_xmm2: tmp_xmm2.into(),
876 });
877 }
878
879 /// Convert signed int to float.
cvt_sint_to_float( &mut self, src: Reg, dst: WritableReg, src_size: OperandSize, dst_size: OperandSize, )880 pub fn cvt_sint_to_float(
881 &mut self,
882 src: Reg,
883 dst: WritableReg,
884 src_size: OperandSize,
885 dst_size: OperandSize,
886 ) {
887 use OperandSize::*;
888 let dst = pair_xmm(dst);
889 let inst = match (src_size, dst_size) {
890 (S32, S32) => asm::inst::cvtsi2ssl_a::new(dst, src).into(),
891 (S32, S64) => asm::inst::cvtsi2sdl_a::new(dst, src).into(),
892 (S64, S32) => asm::inst::cvtsi2ssq_a::new(dst, src).into(),
893 (S64, S64) => asm::inst::cvtsi2sdq_a::new(dst, src).into(),
894 _ => unreachable!(),
895 };
896 self.emit(Inst::External { inst });
897 }
898
899 /// Convert unsigned 64-bit int to float.
cvt_uint64_to_float_seq( &mut self, src: Reg, dst: WritableReg, tmp_gpr1: Reg, tmp_gpr2: Reg, dst_size: OperandSize, )900 pub fn cvt_uint64_to_float_seq(
901 &mut self,
902 src: Reg,
903 dst: WritableReg,
904 tmp_gpr1: Reg,
905 tmp_gpr2: Reg,
906 dst_size: OperandSize,
907 ) {
908 self.emit(Inst::CvtUint64ToFloatSeq {
909 dst_size: dst_size.into(),
910 src: src.into(),
911 dst: dst.map(Into::into),
912 tmp_gpr1: tmp_gpr1.into(),
913 tmp_gpr2: tmp_gpr2.into(),
914 });
915 }
916
917 /// Change precision of float.
cvt_float_to_float( &mut self, src: Reg, dst: WritableReg, src_size: OperandSize, dst_size: OperandSize, )918 pub fn cvt_float_to_float(
919 &mut self,
920 src: Reg,
921 dst: WritableReg,
922 src_size: OperandSize,
923 dst_size: OperandSize,
924 ) {
925 use OperandSize::*;
926 let dst = pair_xmm(dst);
927 let inst = match (src_size, dst_size) {
928 (S32, S64) => asm::inst::cvtss2sd_a::new(dst, src).into(),
929 (S64, S32) => asm::inst::cvtsd2ss_a::new(dst, src).into(),
930 _ => unimplemented!(),
931 };
932 self.emit(Inst::External { inst });
933 }
934
or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)935 pub fn or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
936 let dst = pair_gpr(dst);
937 let inst = match size {
938 OperandSize::S8 => asm::inst::orb_rm::new(dst, src).into(),
939 OperandSize::S16 => asm::inst::orw_rm::new(dst, src).into(),
940 OperandSize::S32 => asm::inst::orl_rm::new(dst, src).into(),
941 OperandSize::S64 => asm::inst::orq_rm::new(dst, src).into(),
942 OperandSize::S128 => unimplemented!(),
943 };
944 self.emit(Inst::External { inst });
945 }
946
or_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize)947 pub fn or_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
948 let dst = pair_gpr(dst);
949 let inst = match size {
950 OperandSize::S8 => asm::inst::orb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
951 OperandSize::S16 => asm::inst::orw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
952 OperandSize::S32 => asm::inst::orl_mi::new(dst, imm as u32).into(),
953 OperandSize::S64 => asm::inst::orq_mi_sxl::new(dst, imm).into(),
954 OperandSize::S128 => unimplemented!(),
955 };
956 self.emit(Inst::External { inst });
957 }
958
xmm_or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)959 pub fn xmm_or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
960 let dst = pair_xmm(dst);
961 let inst = match size {
962 OperandSize::S32 => asm::inst::orps_a::new(dst, src).into(),
963 OperandSize::S64 => asm::inst::orpd_a::new(dst, src).into(),
964 OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
965 };
966 self.emit(Inst::External { inst });
967 }
968
969 /// Logical exclusive or with registers.
xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)970 pub fn xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
971 let dst = pair_gpr(dst);
972 let inst = match size {
973 OperandSize::S8 => asm::inst::xorb_rm::new(dst, src).into(),
974 OperandSize::S16 => asm::inst::xorw_rm::new(dst, src).into(),
975 OperandSize::S32 => asm::inst::xorl_rm::new(dst, src).into(),
976 OperandSize::S64 => asm::inst::xorq_rm::new(dst, src).into(),
977 OperandSize::S128 => unimplemented!(),
978 };
979 self.emit(Inst::External { inst });
980 }
981
xor_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize)982 pub fn xor_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
983 let dst = pair_gpr(dst);
984 let inst = match size {
985 OperandSize::S8 => asm::inst::xorb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
986 OperandSize::S16 => asm::inst::xorw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
987 OperandSize::S32 => asm::inst::xorl_mi::new(dst, imm as u32).into(),
988 OperandSize::S64 => asm::inst::xorq_mi_sxl::new(dst, imm).into(),
989 OperandSize::S128 => unimplemented!(),
990 };
991 self.emit(Inst::External { inst });
992 }
993
994 /// Logical exclusive or with float registers.
xmm_xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)995 pub fn xmm_xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
996 let dst = pair_xmm(dst);
997 let inst = match size {
998 OperandSize::S32 => asm::inst::xorps_a::new(dst, src).into(),
999 OperandSize::S64 => asm::inst::xorpd_a::new(dst, src).into(),
1000 OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1001 };
1002 self.emit(Inst::External { inst });
1003 }
1004
1005 /// Shift with register and register.
shift_rr(&mut self, src: Reg, dst: WritableReg, kind: ShiftKind, size: OperandSize)1006 pub fn shift_rr(&mut self, src: Reg, dst: WritableReg, kind: ShiftKind, size: OperandSize) {
1007 let dst = pair_gpr(dst);
1008 let src: Gpr = src.into();
1009 let inst = match (kind, size) {
1010 (ShiftKind::Shl, OperandSize::S32) => asm::inst::shll_mc::new(dst, src).into(),
1011 (ShiftKind::Shl, OperandSize::S64) => asm::inst::shlq_mc::new(dst, src).into(),
1012 (ShiftKind::Shl, _) => todo!(),
1013 (ShiftKind::ShrS, OperandSize::S32) => asm::inst::sarl_mc::new(dst, src).into(),
1014 (ShiftKind::ShrS, OperandSize::S64) => asm::inst::sarq_mc::new(dst, src).into(),
1015 (ShiftKind::ShrS, _) => todo!(),
1016 (ShiftKind::ShrU, OperandSize::S32) => asm::inst::shrl_mc::new(dst, src).into(),
1017 (ShiftKind::ShrU, OperandSize::S64) => asm::inst::shrq_mc::new(dst, src).into(),
1018 (ShiftKind::ShrU, _) => todo!(),
1019 (ShiftKind::Rotl, OperandSize::S32) => asm::inst::roll_mc::new(dst, src).into(),
1020 (ShiftKind::Rotl, OperandSize::S64) => asm::inst::rolq_mc::new(dst, src).into(),
1021 (ShiftKind::Rotl, _) => todo!(),
1022 (ShiftKind::Rotr, OperandSize::S32) => asm::inst::rorl_mc::new(dst, src).into(),
1023 (ShiftKind::Rotr, OperandSize::S64) => asm::inst::rorq_mc::new(dst, src).into(),
1024 (ShiftKind::Rotr, _) => todo!(),
1025 };
1026 self.emit(Inst::External { inst });
1027 }
1028
1029 /// Shift with immediate and register.
shift_ir(&mut self, imm: u8, dst: WritableReg, kind: ShiftKind, size: OperandSize)1030 pub fn shift_ir(&mut self, imm: u8, dst: WritableReg, kind: ShiftKind, size: OperandSize) {
1031 let dst = pair_gpr(dst);
1032 let inst = match (kind, size) {
1033 (ShiftKind::Shl, OperandSize::S32) => asm::inst::shll_mi::new(dst, imm).into(),
1034 (ShiftKind::Shl, OperandSize::S64) => asm::inst::shlq_mi::new(dst, imm).into(),
1035 (ShiftKind::Shl, _) => todo!(),
1036 (ShiftKind::ShrS, OperandSize::S32) => asm::inst::sarl_mi::new(dst, imm).into(),
1037 (ShiftKind::ShrS, OperandSize::S64) => asm::inst::sarq_mi::new(dst, imm).into(),
1038 (ShiftKind::ShrS, _) => todo!(),
1039 (ShiftKind::ShrU, OperandSize::S32) => asm::inst::shrl_mi::new(dst, imm).into(),
1040 (ShiftKind::ShrU, OperandSize::S64) => asm::inst::shrq_mi::new(dst, imm).into(),
1041 (ShiftKind::ShrU, _) => todo!(),
1042 (ShiftKind::Rotl, OperandSize::S32) => asm::inst::roll_mi::new(dst, imm).into(),
1043 (ShiftKind::Rotl, OperandSize::S64) => asm::inst::rolq_mi::new(dst, imm).into(),
1044 (ShiftKind::Rotl, _) => todo!(),
1045 (ShiftKind::Rotr, OperandSize::S32) => asm::inst::rorl_mi::new(dst, imm).into(),
1046 (ShiftKind::Rotr, OperandSize::S64) => asm::inst::rorq_mi::new(dst, imm).into(),
1047 (ShiftKind::Rotr, _) => todo!(),
1048 };
1049 self.emit(Inst::External { inst });
1050 }
1051
1052 /// Signed/unsigned division.
1053 ///
1054 /// Emits a sequence of instructions to ensure the correctness of
1055 /// the division invariants. This function assumes that the
1056 /// caller has correctly allocated the dividend as `(rdx:rax)` and
1057 /// accounted for the quotient to be stored in `rax`.
div(&mut self, divisor: Reg, dst: (Reg, Reg), kind: DivKind, size: OperandSize)1058 pub fn div(&mut self, divisor: Reg, dst: (Reg, Reg), kind: DivKind, size: OperandSize) {
1059 let trap = match kind {
1060 // Signed division has two trapping conditions, integer overflow and
1061 // divide-by-zero. Check for divide-by-zero explicitly and let the
1062 // hardware detect overflow.
1063 DivKind::Signed => {
1064 self.cmp_ir(divisor, 0, size);
1065 self.emit(Inst::TrapIf {
1066 cc: CC::Z,
1067 trap_code: TrapCode::INTEGER_DIVISION_BY_ZERO,
1068 });
1069
1070 // Sign-extend the dividend with tailor-made instructoins for
1071 // just this operation.
1072 let ext_dst: WritableGpr = dst.1.into();
1073 let ext_src: Gpr = dst.0.into();
1074 let inst = match size {
1075 OperandSize::S32 => asm::inst::cltd_zo::new(ext_dst, ext_src).into(),
1076 OperandSize::S64 => asm::inst::cqto_zo::new(ext_dst, ext_src).into(),
1077 _ => unimplemented!(),
1078 };
1079 self.emit(Inst::External { inst });
1080 TrapCode::INTEGER_OVERFLOW
1081 }
1082
1083 // Unsigned division only traps in one case, on divide-by-zero, so
1084 // defer that to the trap opcode.
1085 //
1086 // The divisor_hi reg is initialized with zero through an
1087 // xor-against-itself op.
1088 DivKind::Unsigned => {
1089 self.xor_rr(dst.1, writable!(dst.1), size);
1090 TrapCode::INTEGER_DIVISION_BY_ZERO
1091 }
1092 };
1093 let dst0 = pair_gpr(writable!(dst.0));
1094 let dst1 = pair_gpr(writable!(dst.1));
1095 let inst = match (kind, size) {
1096 (DivKind::Signed, OperandSize::S32) => {
1097 asm::inst::idivl_m::new(dst0, dst1, divisor, trap).into()
1098 }
1099 (DivKind::Unsigned, OperandSize::S32) => {
1100 asm::inst::divl_m::new(dst0, dst1, divisor, trap).into()
1101 }
1102 (DivKind::Signed, OperandSize::S64) => {
1103 asm::inst::idivq_m::new(dst0, dst1, divisor, trap).into()
1104 }
1105 (DivKind::Unsigned, OperandSize::S64) => {
1106 asm::inst::divq_m::new(dst0, dst1, divisor, trap).into()
1107 }
1108 _ => todo!(),
1109 };
1110 self.emit(Inst::External { inst });
1111 }
1112
1113 /// Signed/unsigned remainder.
1114 ///
1115 /// Emits a sequence of instructions to ensure the correctness of the
1116 /// division invariants and ultimately calculate the remainder.
1117 /// This function assumes that the
1118 /// caller has correctly allocated the dividend as `(rdx:rax)` and
1119 /// accounted for the remainder to be stored in `rdx`.
rem(&mut self, divisor: Reg, dst: (Reg, Reg), kind: RemKind, size: OperandSize)1120 pub fn rem(&mut self, divisor: Reg, dst: (Reg, Reg), kind: RemKind, size: OperandSize) {
1121 match kind {
1122 // Signed remainder goes through a pseudo-instruction which has
1123 // some internal branching. The `dividend_hi`, or `rdx`, is
1124 // initialized here with a `SignExtendData` instruction.
1125 RemKind::Signed => {
1126 let ext_dst: WritableGpr = dst.1.into();
1127
1128 // Initialize `dividend_hi`, or `rdx`, with a tailor-made
1129 // instruction for this operation.
1130 let ext_src: Gpr = dst.0.into();
1131 let inst = match size {
1132 OperandSize::S32 => asm::inst::cltd_zo::new(ext_dst, ext_src).into(),
1133 OperandSize::S64 => asm::inst::cqto_zo::new(ext_dst, ext_src).into(),
1134 _ => unimplemented!(),
1135 };
1136 self.emit(Inst::External { inst });
1137 self.emit(Inst::CheckedSRemSeq {
1138 size: size.into(),
1139 divisor: divisor.into(),
1140 dividend_lo: dst.0.into(),
1141 dividend_hi: dst.1.into(),
1142 dst_quotient: dst.0.into(),
1143 dst_remainder: dst.1.into(),
1144 });
1145 }
1146
1147 // Unsigned remainder initializes `dividend_hi` with zero and
1148 // then executes a normal `div` instruction.
1149 RemKind::Unsigned => {
1150 self.xor_rr(dst.1, writable!(dst.1), size);
1151 let dst0 = pair_gpr(writable!(dst.0));
1152 let dst1 = pair_gpr(writable!(dst.1));
1153 let trap = TrapCode::INTEGER_DIVISION_BY_ZERO;
1154 let inst = match size {
1155 OperandSize::S32 => asm::inst::divl_m::new(dst0, dst1, divisor, trap).into(),
1156 OperandSize::S64 => asm::inst::divq_m::new(dst0, dst1, divisor, trap).into(),
1157 _ => todo!(),
1158 };
1159 self.emit(Inst::External { inst });
1160 }
1161 }
1162 }
1163
1164 /// Multiply immediate and register.
mul_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize)1165 pub fn mul_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
1166 use OperandSize::*;
1167 let src = dst.to_reg();
1168 let dst: WritableGpr = dst.to_reg().into();
1169 let inst = match size {
1170 S16 => asm::inst::imulw_rmi::new(dst, src, u16::try_from(imm).unwrap()).into(),
1171 S32 => asm::inst::imull_rmi::new(dst, src, imm as u32).into(),
1172 S64 => asm::inst::imulq_rmi_sxl::new(dst, src, imm).into(),
1173 S8 | S128 => unimplemented!(),
1174 };
1175 self.emit(Inst::External { inst });
1176 }
1177
1178 /// Multiply register and register.
mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1179 pub fn mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1180 use OperandSize::*;
1181 let dst = pair_gpr(dst);
1182 let inst = match size {
1183 S16 => asm::inst::imulw_rm::new(dst, src).into(),
1184 S32 => asm::inst::imull_rm::new(dst, src).into(),
1185 S64 => asm::inst::imulq_rm::new(dst, src).into(),
1186 S8 | S128 => unimplemented!(),
1187 };
1188 self.emit(Inst::External { inst });
1189 }
1190
1191 /// Add immediate and register.
add_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize)1192 pub fn add_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
1193 let dst = pair_gpr(dst);
1194 let inst = match size {
1195 OperandSize::S8 => asm::inst::addb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
1196 OperandSize::S16 => asm::inst::addw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
1197 OperandSize::S32 => asm::inst::addl_mi::new(dst, imm as u32).into(),
1198 OperandSize::S64 => asm::inst::addq_mi_sxl::new(dst, imm).into(),
1199 OperandSize::S128 => unimplemented!(),
1200 };
1201 self.emit(Inst::External { inst });
1202 }
1203
1204 /// Add register and register.
add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1205 pub fn add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1206 let dst = pair_gpr(dst);
1207 let inst = match size {
1208 OperandSize::S8 => asm::inst::addb_rm::new(dst, src).into(),
1209 OperandSize::S16 => asm::inst::addw_rm::new(dst, src).into(),
1210 OperandSize::S32 => asm::inst::addl_rm::new(dst, src).into(),
1211 OperandSize::S64 => asm::inst::addq_rm::new(dst, src).into(),
1212 OperandSize::S128 => unimplemented!(),
1213 };
1214 self.emit(Inst::External { inst });
1215 }
1216
lock_xadd( &mut self, addr: Address, dst: WritableReg, size: OperandSize, flags: MemFlags, )1217 pub fn lock_xadd(
1218 &mut self,
1219 addr: Address,
1220 dst: WritableReg,
1221 size: OperandSize,
1222 flags: MemFlags,
1223 ) {
1224 assert!(addr.is_offset());
1225 let mem = Self::to_synthetic_amode(&addr, flags);
1226 let dst = pair_gpr(dst);
1227 let inst = match size {
1228 OperandSize::S8 => asm::inst::lock_xaddb_mr::new(mem, dst).into(),
1229 OperandSize::S16 => asm::inst::lock_xaddw_mr::new(mem, dst).into(),
1230 OperandSize::S32 => asm::inst::lock_xaddl_mr::new(mem, dst).into(),
1231 OperandSize::S64 => asm::inst::lock_xaddq_mr::new(mem, dst).into(),
1232 OperandSize::S128 => unimplemented!(),
1233 };
1234
1235 self.emit(Inst::External { inst });
1236 }
1237
atomic_rmw_seq( &mut self, addr: Address, operand: Reg, dst: WritableReg, temp: WritableReg, size: OperandSize, flags: MemFlags, op: AtomicRmwSeqOp, )1238 pub fn atomic_rmw_seq(
1239 &mut self,
1240 addr: Address,
1241 operand: Reg,
1242 dst: WritableReg,
1243 temp: WritableReg,
1244 size: OperandSize,
1245 flags: MemFlags,
1246 op: AtomicRmwSeqOp,
1247 ) {
1248 assert!(addr.is_offset());
1249 let mem = Self::to_synthetic_amode(&addr, flags);
1250 self.emit(Inst::AtomicRmwSeq {
1251 ty: Type::int_with_byte_size(size.bytes() as _).unwrap(),
1252 mem,
1253 operand: operand.into(),
1254 temp: temp.map(Into::into),
1255 dst_old: dst.map(Into::into),
1256 op,
1257 });
1258 }
1259
xchg(&mut self, addr: Address, dst: WritableReg, size: OperandSize, flags: MemFlags)1260 pub fn xchg(&mut self, addr: Address, dst: WritableReg, size: OperandSize, flags: MemFlags) {
1261 assert!(addr.is_offset());
1262 let mem = Self::to_synthetic_amode(&addr, flags);
1263 let dst = pair_gpr(dst);
1264 let inst = match size {
1265 OperandSize::S8 => asm::inst::xchgb_rm::new(dst, mem).into(),
1266 OperandSize::S16 => asm::inst::xchgw_rm::new(dst, mem).into(),
1267 OperandSize::S32 => asm::inst::xchgl_rm::new(dst, mem).into(),
1268 OperandSize::S64 => asm::inst::xchgq_rm::new(dst, mem).into(),
1269 OperandSize::S128 => unimplemented!(),
1270 };
1271
1272 self.emit(Inst::External { inst });
1273 }
cmpxchg( &mut self, addr: Address, replacement: Reg, dst: WritableReg, size: OperandSize, flags: MemFlags, )1274 pub fn cmpxchg(
1275 &mut self,
1276 addr: Address,
1277 replacement: Reg,
1278 dst: WritableReg,
1279 size: OperandSize,
1280 flags: MemFlags,
1281 ) {
1282 assert!(addr.is_offset());
1283 let mem = Self::to_synthetic_amode(&addr, flags);
1284 let dst = pair_gpr(dst);
1285 let inst = match size {
1286 OperandSize::S8 => asm::inst::lock_cmpxchgb_mr::new(mem, replacement, dst).into(),
1287 OperandSize::S16 => asm::inst::lock_cmpxchgw_mr::new(mem, replacement, dst).into(),
1288 OperandSize::S32 => asm::inst::lock_cmpxchgl_mr::new(mem, replacement, dst).into(),
1289 OperandSize::S64 => asm::inst::lock_cmpxchgq_mr::new(mem, replacement, dst).into(),
1290 OperandSize::S128 => unimplemented!(),
1291 };
1292
1293 self.emit(Inst::External { inst });
1294 }
1295
cmp_ir(&mut self, src1: Reg, imm: i32, size: OperandSize)1296 pub fn cmp_ir(&mut self, src1: Reg, imm: i32, size: OperandSize) {
1297 let inst = match size {
1298 OperandSize::S8 => {
1299 let imm = i8::try_from(imm).unwrap();
1300 asm::inst::cmpb_mi::new(src1, imm.cast_unsigned()).into()
1301 }
1302 OperandSize::S16 => match i8::try_from(imm) {
1303 Ok(imm8) => asm::inst::cmpw_mi_sxb::new(src1, imm8).into(),
1304 Err(_) => {
1305 asm::inst::cmpw_mi::new(src1, i16::try_from(imm).unwrap().cast_unsigned())
1306 .into()
1307 }
1308 },
1309 OperandSize::S32 => match i8::try_from(imm) {
1310 Ok(imm8) => asm::inst::cmpl_mi_sxb::new(src1, imm8).into(),
1311 Err(_) => asm::inst::cmpl_mi::new(src1, imm.cast_unsigned()).into(),
1312 },
1313 OperandSize::S64 => match i8::try_from(imm) {
1314 Ok(imm8) => asm::inst::cmpq_mi_sxb::new(src1, imm8).into(),
1315 Err(_) => asm::inst::cmpq_mi::new(src1, imm).into(),
1316 },
1317 OperandSize::S128 => unimplemented!(),
1318 };
1319
1320 self.emit(Inst::External { inst });
1321 }
1322
cmp_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize)1323 pub fn cmp_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1324 let inst = match size {
1325 OperandSize::S8 => asm::inst::cmpb_rm::new(src1, src2).into(),
1326 OperandSize::S16 => asm::inst::cmpw_rm::new(src1, src2).into(),
1327 OperandSize::S32 => asm::inst::cmpl_rm::new(src1, src2).into(),
1328 OperandSize::S64 => asm::inst::cmpq_rm::new(src1, src2).into(),
1329 OperandSize::S128 => unimplemented!(),
1330 };
1331
1332 self.emit(Inst::External { inst });
1333 }
1334
1335 /// Compares values in src1 and src2 and sets ZF, PF, and CF flags in EFLAGS
1336 /// register.
ucomis(&mut self, src1: Reg, src2: Reg, size: OperandSize)1337 pub fn ucomis(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1338 let inst = match size {
1339 OperandSize::S32 => asm::inst::ucomiss_a::new(src1, src2).into(),
1340 OperandSize::S64 => asm::inst::ucomisd_a::new(src1, src2).into(),
1341 OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1342 };
1343 self.emit(Inst::External { inst });
1344 }
1345
popcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1346 pub fn popcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1347 assert!(
1348 self.isa_flags.has_popcnt() && self.isa_flags.has_sse42(),
1349 "Requires has_popcnt and has_sse42 flags"
1350 );
1351 let dst = WritableGpr::from_reg(dst.to_reg().into());
1352 let inst = match size {
1353 OperandSize::S16 => asm::inst::popcntw_rm::new(dst, src).into(),
1354 OperandSize::S32 => asm::inst::popcntl_rm::new(dst, src).into(),
1355 OperandSize::S64 => asm::inst::popcntq_rm::new(dst, src).into(),
1356 OperandSize::S8 | OperandSize::S128 => unreachable!(),
1357 };
1358 self.emit(Inst::External { inst });
1359 }
1360
1361 /// Emit a test instruction with two register operands.
test_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize)1362 pub fn test_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1363 let inst = match size {
1364 OperandSize::S8 => asm::inst::testb_mr::new(src1, src2).into(),
1365 OperandSize::S16 => asm::inst::testw_mr::new(src1, src2).into(),
1366 OperandSize::S32 => asm::inst::testl_mr::new(src1, src2).into(),
1367 OperandSize::S64 => asm::inst::testq_mr::new(src1, src2).into(),
1368 OperandSize::S128 => unimplemented!(),
1369 };
1370
1371 self.emit(Inst::External { inst });
1372 }
1373
1374 /// Set value in dst to `0` or `1` based on flags in status register and
1375 /// [`CmpKind`].
setcc(&mut self, kind: IntCmpKind, dst: WritableReg)1376 pub fn setcc(&mut self, kind: IntCmpKind, dst: WritableReg) {
1377 self.setcc_impl(kind.into(), dst);
1378 }
1379
1380 /// Set value in dst to `1` if parity flag in status register is set, `0`
1381 /// otherwise.
setp(&mut self, dst: WritableReg)1382 pub fn setp(&mut self, dst: WritableReg) {
1383 self.setcc_impl(CC::P, dst);
1384 }
1385
1386 /// Set value in dst to `1` if parity flag in status register is not set,
1387 /// `0` otherwise.
setnp(&mut self, dst: WritableReg)1388 pub fn setnp(&mut self, dst: WritableReg) {
1389 self.setcc_impl(CC::NP, dst);
1390 }
1391
setcc_impl(&mut self, cc: CC, dst: WritableReg)1392 fn setcc_impl(&mut self, cc: CC, dst: WritableReg) {
1393 // Clear the dst register or bits 1 to 31 may be incorrectly set.
1394 // Don't use xor since it updates the status register.
1395 let dst: WritableGpr = dst.map(Into::into);
1396 let inst = asm::inst::movl_oi::new(dst, 0).into();
1397 self.emit(Inst::External { inst });
1398
1399 // Copy correct bit from status register into dst register.
1400 //
1401 // Note that some of these mnemonics don't match exactly and that's
1402 // intentional as there are multiple mnemonics for the same encoding in
1403 // some cases and the assembler picked ones that match Capstone rather
1404 // than Cranelift.
1405 let inst = match cc {
1406 CC::O => asm::inst::seto_m::new(dst).into(),
1407 CC::NO => asm::inst::setno_m::new(dst).into(),
1408 CC::B => asm::inst::setb_m::new(dst).into(),
1409 CC::NB => asm::inst::setae_m::new(dst).into(), // nb == ae
1410 CC::Z => asm::inst::sete_m::new(dst).into(), // z == e
1411 CC::NZ => asm::inst::setne_m::new(dst).into(), // nz == ne
1412 CC::BE => asm::inst::setbe_m::new(dst).into(),
1413 CC::NBE => asm::inst::seta_m::new(dst).into(), // nbe == a
1414 CC::S => asm::inst::sets_m::new(dst).into(),
1415 CC::NS => asm::inst::setns_m::new(dst).into(),
1416 CC::L => asm::inst::setl_m::new(dst).into(),
1417 CC::NL => asm::inst::setge_m::new(dst).into(), // nl == ge
1418 CC::LE => asm::inst::setle_m::new(dst).into(),
1419 CC::NLE => asm::inst::setg_m::new(dst).into(), // nle == g
1420 CC::P => asm::inst::setp_m::new(dst).into(),
1421 CC::NP => asm::inst::setnp_m::new(dst).into(),
1422 };
1423 self.emit(Inst::External { inst });
1424 }
1425
1426 /// Store the count of leading zeroes in src in dst.
1427 /// Requires `has_lzcnt` flag.
lzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1428 pub fn lzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1429 assert!(self.isa_flags.has_lzcnt(), "Requires has_lzcnt flag");
1430 let dst = WritableGpr::from_reg(dst.to_reg().into());
1431 let inst = match size {
1432 OperandSize::S16 => asm::inst::lzcntw_rm::new(dst, src).into(),
1433 OperandSize::S32 => asm::inst::lzcntl_rm::new(dst, src).into(),
1434 OperandSize::S64 => asm::inst::lzcntq_rm::new(dst, src).into(),
1435 OperandSize::S8 | OperandSize::S128 => unreachable!(),
1436 };
1437 self.emit(Inst::External { inst });
1438 }
1439
1440 /// Store the count of trailing zeroes in src in dst.
1441 /// Requires `has_bmi1` flag.
tzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1442 pub fn tzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1443 assert!(self.isa_flags.has_bmi1(), "Requires has_bmi1 flag");
1444 let dst = WritableGpr::from_reg(dst.to_reg().into());
1445 let inst = match size {
1446 OperandSize::S16 => asm::inst::tzcntw_a::new(dst, src).into(),
1447 OperandSize::S32 => asm::inst::tzcntl_a::new(dst, src).into(),
1448 OperandSize::S64 => asm::inst::tzcntq_a::new(dst, src).into(),
1449 OperandSize::S8 | OperandSize::S128 => unreachable!(),
1450 };
1451 self.emit(Inst::External { inst });
1452 }
1453
1454 /// Stores position of the most significant bit set in src in dst.
1455 /// Zero flag is set if src is equal to 0.
bsr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1456 pub fn bsr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1457 let dst: WritableGpr = WritableGpr::from_reg(dst.to_reg().into());
1458 let inst = match size {
1459 OperandSize::S16 => asm::inst::bsrw_rm::new(dst, src).into(),
1460 OperandSize::S32 => asm::inst::bsrl_rm::new(dst, src).into(),
1461 OperandSize::S64 => asm::inst::bsrq_rm::new(dst, src).into(),
1462 OperandSize::S8 | OperandSize::S128 => unreachable!(),
1463 };
1464 self.emit(Inst::External { inst });
1465 }
1466
1467 /// Performs integer negation on `src` and places result in `dst`.
neg(&mut self, read: Reg, write: WritableReg, size: OperandSize)1468 pub fn neg(&mut self, read: Reg, write: WritableReg, size: OperandSize) {
1469 let gpr = PairedGpr {
1470 read: read.into(),
1471 write: WritableGpr::from_reg(write.to_reg().into()),
1472 };
1473 let inst = match size {
1474 OperandSize::S8 => asm::inst::negb_m::new(gpr).into(),
1475 OperandSize::S16 => asm::inst::negw_m::new(gpr).into(),
1476 OperandSize::S32 => asm::inst::negl_m::new(gpr).into(),
1477 OperandSize::S64 => asm::inst::negq_m::new(gpr).into(),
1478 OperandSize::S128 => unreachable!(),
1479 };
1480 self.emit(Inst::External { inst });
1481 }
1482
1483 /// Stores position of the least significant bit set in src in dst.
1484 /// Zero flag is set if src is equal to 0.
bsf(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1485 pub fn bsf(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1486 let dst: WritableGpr = WritableGpr::from_reg(dst.to_reg().into());
1487 let inst = match size {
1488 OperandSize::S16 => asm::inst::bsfw_rm::new(dst, src).into(),
1489 OperandSize::S32 => asm::inst::bsfl_rm::new(dst, src).into(),
1490 OperandSize::S64 => asm::inst::bsfq_rm::new(dst, src).into(),
1491 OperandSize::S8 | OperandSize::S128 => unreachable!(),
1492 };
1493 self.emit(Inst::External { inst });
1494 }
1495
1496 /// Performs float addition on src and dst and places result in dst.
xmm_add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1497 pub fn xmm_add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1498 let dst = pair_xmm(dst);
1499 let inst = match size {
1500 OperandSize::S32 => asm::inst::addss_a::new(dst, src).into(),
1501 OperandSize::S64 => asm::inst::addsd_a::new(dst, src).into(),
1502 OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1503 };
1504 self.emit(Inst::External { inst });
1505 }
1506
1507 /// Performs float subtraction on src and dst and places result in dst.
xmm_sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1508 pub fn xmm_sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1509 let dst = pair_xmm(dst);
1510 let inst = match size {
1511 OperandSize::S32 => asm::inst::subss_a::new(dst, src).into(),
1512 OperandSize::S64 => asm::inst::subsd_a::new(dst, src).into(),
1513 OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1514 };
1515 self.emit(Inst::External { inst });
1516 }
1517
1518 /// Performs float multiplication on src and dst and places result in dst.
xmm_mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1519 pub fn xmm_mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1520 use OperandSize::*;
1521 let dst = pair_xmm(dst);
1522 let inst = match size {
1523 S32 => asm::inst::mulss_a::new(dst, src).into(),
1524 S64 => asm::inst::mulsd_a::new(dst, src).into(),
1525 S8 | S16 | S128 => unreachable!(),
1526 };
1527 self.emit(Inst::External { inst });
1528 }
1529
1530 /// Performs float division on src and dst and places result in dst.
xmm_div_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1531 pub fn xmm_div_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1532 let dst = pair_xmm(dst);
1533 let inst = match size {
1534 OperandSize::S32 => asm::inst::divss_a::new(dst, src).into(),
1535 OperandSize::S64 => asm::inst::divsd_a::new(dst, src).into(),
1536 OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1537 };
1538 self.emit(Inst::External { inst });
1539 }
1540
1541 /// Minimum for src and dst XMM registers with results put in dst.
xmm_min_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1542 pub fn xmm_min_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1543 self.emit(Inst::XmmMinMaxSeq {
1544 size: size.into(),
1545 is_min: true,
1546 lhs: src.into(),
1547 rhs: dst.to_reg().into(),
1548 dst: dst.map(Into::into),
1549 });
1550 }
1551
1552 /// Maximum for src and dst XMM registers with results put in dst.
xmm_max_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1553 pub fn xmm_max_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1554 self.emit(Inst::XmmMinMaxSeq {
1555 size: size.into(),
1556 is_min: false,
1557 lhs: src.into(),
1558 rhs: dst.to_reg().into(),
1559 dst: dst.map(Into::into),
1560 });
1561 }
1562
1563 /// Perform rounding operation on float register src and place results in
1564 /// float register dst.
xmm_rounds_rr( &mut self, src: Reg, dst: WritableReg, mode: RoundingMode, size: OperandSize, )1565 pub fn xmm_rounds_rr(
1566 &mut self,
1567 src: Reg,
1568 dst: WritableReg,
1569 mode: RoundingMode,
1570 size: OperandSize,
1571 ) {
1572 let dst = dst.map(|r| r.into());
1573
1574 let imm: u8 = match mode {
1575 RoundingMode::Nearest => 0x00,
1576 RoundingMode::Down => 0x01,
1577 RoundingMode::Up => 0x02,
1578 RoundingMode::Zero => 0x03,
1579 };
1580
1581 let inst = match size {
1582 OperandSize::S32 => asm::inst::roundss_rmi::new(dst, src, imm).into(),
1583 OperandSize::S64 => asm::inst::roundsd_rmi::new(dst, src, imm).into(),
1584 OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1585 };
1586
1587 self.emit(Inst::External { inst });
1588 }
1589
sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1590 pub fn sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1591 use OperandSize::*;
1592 let dst = pair_xmm(dst);
1593 let inst = match size {
1594 S32 => asm::inst::sqrtss_a::new(dst, src).into(),
1595 S64 => asm::inst::sqrtsd_a::new(dst, src).into(),
1596 S8 | S16 | S128 => unimplemented!(),
1597 };
1598 self.emit(Inst::External { inst });
1599 }
1600
1601 /// Emit a call to an unknown location through a register.
call_with_reg(&mut self, cc: CallingConvention, callee: Reg)1602 pub fn call_with_reg(&mut self, cc: CallingConvention, callee: Reg) {
1603 self.emit(Inst::CallUnknown {
1604 info: Box::new(CallInfo::empty(RegMem::reg(callee.into()), cc.into())),
1605 });
1606 }
1607
1608 /// Emit a call to a locally defined function through an index.
call_with_name(&mut self, cc: CallingConvention, name: UserExternalNameRef)1609 pub fn call_with_name(&mut self, cc: CallingConvention, name: UserExternalNameRef) {
1610 self.emit(Inst::CallKnown {
1611 info: Box::new(CallInfo::empty(ExternalName::user(name), cc.into())),
1612 });
1613 }
1614
1615 /// Emits a conditional jump to the given label.
jmp_if(&mut self, cc: impl Into<CC>, taken: MachLabel)1616 pub fn jmp_if(&mut self, cc: impl Into<CC>, taken: MachLabel) {
1617 self.emit(Inst::WinchJmpIf {
1618 cc: cc.into(),
1619 taken,
1620 });
1621 }
1622
1623 /// Performs an unconditional jump to the given label.
jmp(&mut self, target: MachLabel)1624 pub fn jmp(&mut self, target: MachLabel) {
1625 self.emit(Inst::JmpKnown { dst: target });
1626 }
1627
1628 /// Emits a jump table sequence.
jmp_table( &mut self, targets: SmallVec<[MachLabel; 4]>, default: MachLabel, index: Reg, tmp1: Reg, tmp2: Reg, )1629 pub fn jmp_table(
1630 &mut self,
1631 targets: SmallVec<[MachLabel; 4]>,
1632 default: MachLabel,
1633 index: Reg,
1634 tmp1: Reg,
1635 tmp2: Reg,
1636 ) {
1637 self.emit(Inst::JmpTableSeq {
1638 idx: index.into(),
1639 tmp1: Writable::from_reg(tmp1.into()),
1640 tmp2: Writable::from_reg(tmp2.into()),
1641 default_target: default,
1642 targets: Box::new(targets.to_vec()),
1643 })
1644 }
1645
1646 /// Emit a trap instruction.
trap(&mut self, code: TrapCode)1647 pub fn trap(&mut self, code: TrapCode) {
1648 let inst = asm::inst::ud2_zo::new(code).into();
1649 self.emit(Inst::External { inst });
1650 }
1651
1652 /// Conditional trap.
trapif(&mut self, cc: impl Into<CC>, trap_code: TrapCode)1653 pub fn trapif(&mut self, cc: impl Into<CC>, trap_code: TrapCode) {
1654 self.emit(Inst::TrapIf {
1655 cc: cc.into(),
1656 trap_code,
1657 });
1658 }
1659
1660 /// Load effective address.
lea(&mut self, addr: &Address, dst: WritableReg, size: OperandSize)1661 pub fn lea(&mut self, addr: &Address, dst: WritableReg, size: OperandSize) {
1662 let addr = Self::to_synthetic_amode(addr, MemFlags::trusted());
1663 let dst: WritableGpr = dst.map(Into::into);
1664 let inst = match size {
1665 OperandSize::S16 => asm::inst::leaw_rm::new(dst, addr).into(),
1666 OperandSize::S32 => asm::inst::leal_rm::new(dst, addr).into(),
1667 OperandSize::S64 => asm::inst::leaq_rm::new(dst, addr).into(),
1668 OperandSize::S8 | OperandSize::S128 => unimplemented!(),
1669 };
1670 self.emit(Inst::External { inst });
1671 }
1672
adc_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1673 pub fn adc_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1674 let dst = pair_gpr(dst);
1675 let inst = match size {
1676 OperandSize::S8 => asm::inst::adcb_rm::new(dst, src).into(),
1677 OperandSize::S16 => asm::inst::adcw_rm::new(dst, src).into(),
1678 OperandSize::S32 => asm::inst::adcl_rm::new(dst, src).into(),
1679 OperandSize::S64 => asm::inst::adcq_rm::new(dst, src).into(),
1680 OperandSize::S128 => unimplemented!(),
1681 };
1682 self.emit(Inst::External { inst });
1683 }
1684
sbb_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1685 pub fn sbb_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1686 let dst = pair_gpr(dst);
1687 let inst = match size {
1688 OperandSize::S8 => asm::inst::sbbb_rm::new(dst, src).into(),
1689 OperandSize::S16 => asm::inst::sbbw_rm::new(dst, src).into(),
1690 OperandSize::S32 => asm::inst::sbbl_rm::new(dst, src).into(),
1691 OperandSize::S64 => asm::inst::sbbq_rm::new(dst, src).into(),
1692 OperandSize::S128 => unimplemented!(),
1693 };
1694 self.emit(Inst::External { inst });
1695 }
1696
mul_wide( &mut self, dst_lo: WritableReg, dst_hi: WritableReg, lhs: Reg, rhs: Reg, kind: MulWideKind, size: OperandSize, )1697 pub fn mul_wide(
1698 &mut self,
1699 dst_lo: WritableReg,
1700 dst_hi: WritableReg,
1701 lhs: Reg,
1702 rhs: Reg,
1703 kind: MulWideKind,
1704 size: OperandSize,
1705 ) {
1706 use MulWideKind::*;
1707 use OperandSize::*;
1708 let rax = asm::Fixed(PairedGpr {
1709 read: lhs.into(),
1710 write: WritableGpr::from_reg(dst_lo.to_reg().into()),
1711 });
1712 let rdx = asm::Fixed(dst_hi.to_reg().into());
1713 if size == S8 {
1714 // For `mulb` and `imulb`, both the high and low bits are written to
1715 // RAX.
1716 assert_eq!(dst_lo, dst_hi);
1717 }
1718 let inst = match (size, kind) {
1719 (S8, Unsigned) => asm::inst::mulb_m::new(rax, rhs).into(),
1720 (S8, Signed) => asm::inst::imulb_m::new(rax, rhs).into(),
1721 (S16, Unsigned) => asm::inst::mulw_m::new(rax, rdx, rhs).into(),
1722 (S16, Signed) => asm::inst::imulw_m::new(rax, rdx, rhs).into(),
1723 (S32, Unsigned) => asm::inst::mull_m::new(rax, rdx, rhs).into(),
1724 (S32, Signed) => asm::inst::imull_m::new(rax, rdx, rhs).into(),
1725 (S64, Unsigned) => asm::inst::mulq_m::new(rax, rdx, rhs).into(),
1726 (S64, Signed) => asm::inst::imulq_m::new(rax, rdx, rhs).into(),
1727 (S128, _) => unimplemented!(),
1728 };
1729 self.emit(Inst::External { inst });
1730 }
1731
1732 /// Shuffles bytes in `src` according to contents of `mask` and puts
1733 /// result in `dst`.
xmm_vpshufb_rrm(&mut self, dst: WritableReg, src: Reg, mask: &Address)1734 pub fn xmm_vpshufb_rrm(&mut self, dst: WritableReg, src: Reg, mask: &Address) {
1735 let dst: WritableXmm = dst.map(|r| r.into());
1736 let mask = Self::to_synthetic_amode(mask, MemFlags::trusted());
1737 let inst = asm::inst::vpshufb_b::new(dst, src, mask).into();
1738 self.emit(Inst::External { inst });
1739 }
1740
1741 /// Shuffles bytes in `src` according to contents of `mask` and puts
1742 /// result in `dst`.
xmm_vpshufb_rrr(&mut self, dst: WritableReg, src: Reg, mask: Reg)1743 pub fn xmm_vpshufb_rrr(&mut self, dst: WritableReg, src: Reg, mask: Reg) {
1744 let dst: WritableXmm = dst.map(|r| r.into());
1745 let inst = asm::inst::vpshufb_b::new(dst, src, mask).into();
1746 self.emit(Inst::External { inst });
1747 }
1748
1749 /// Add unsigned integers with unsigned saturation.
1750 ///
1751 /// Adds the src operands but when an individual byte result is larger than
1752 /// an unsigned byte integer, 0xFF is written instead.
xmm_vpaddus_rrm( &mut self, dst: WritableReg, src1: Reg, src2: &Address, size: OperandSize, )1753 pub fn xmm_vpaddus_rrm(
1754 &mut self,
1755 dst: WritableReg,
1756 src1: Reg,
1757 src2: &Address,
1758 size: OperandSize,
1759 ) {
1760 let dst: WritableXmm = dst.map(|r| r.into());
1761 let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());
1762 let inst = match size {
1763 OperandSize::S8 => asm::inst::vpaddusb_b::new(dst, src1, src2).into(),
1764 OperandSize::S32 => asm::inst::vpaddusw_b::new(dst, src1, src2).into(),
1765 _ => unimplemented!(),
1766 };
1767 self.emit(Inst::External { inst });
1768 }
1769
1770 /// Add unsigned integers with unsigned saturation.
1771 ///
1772 /// Adds the src operands but when an individual byte result is larger than
1773 /// an unsigned byte integer, 0xFF is written instead.
xmm_vpaddus_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize)1774 pub fn xmm_vpaddus_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
1775 let dst: WritableXmm = dst.map(|r| r.into());
1776 let inst = match size {
1777 OperandSize::S8 => asm::inst::vpaddusb_b::new(dst, src1, src2).into(),
1778 OperandSize::S16 => asm::inst::vpaddusw_b::new(dst, src1, src2).into(),
1779 _ => unimplemented!(),
1780 };
1781 self.emit(Inst::External { inst });
1782 }
1783
1784 /// Add signed integers.
xmm_vpadds_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize)1785 pub fn xmm_vpadds_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
1786 let dst: WritableXmm = dst.map(|r| r.into());
1787 let inst = match size {
1788 OperandSize::S8 => asm::inst::vpaddsb_b::new(dst, src1, src2).into(),
1789 OperandSize::S16 => asm::inst::vpaddsw_b::new(dst, src1, src2).into(),
1790 _ => unimplemented!(),
1791 };
1792 self.emit(Inst::External { inst });
1793 }
1794
xmm_vpadd_rmr( &mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize, )1795 pub fn xmm_vpadd_rmr(
1796 &mut self,
1797 src1: Reg,
1798 src2: &Address,
1799 dst: WritableReg,
1800 size: OperandSize,
1801 ) {
1802 let dst: WritableXmm = dst.map(|r| r.into());
1803 let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
1804 let inst = match size {
1805 OperandSize::S8 => asm::inst::vpaddb_b::new(dst, src1, address).into(),
1806 OperandSize::S16 => asm::inst::vpaddw_b::new(dst, src1, address).into(),
1807 OperandSize::S32 => asm::inst::vpaddd_b::new(dst, src1, address).into(),
1808 _ => unimplemented!(),
1809 };
1810 self.emit(Inst::External { inst });
1811 }
1812
1813 /// Adds vectors of integers in `src1` and `src2` and puts the results in
1814 /// `dst`.
xmm_vpadd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)1815 pub fn xmm_vpadd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
1816 let dst: WritableXmm = dst.map(|r| r.into());
1817 let inst = match size {
1818 OperandSize::S8 => asm::inst::vpaddb_b::new(dst, src1, src2).into(),
1819 OperandSize::S16 => asm::inst::vpaddw_b::new(dst, src1, src2).into(),
1820 OperandSize::S32 => asm::inst::vpaddd_b::new(dst, src1, src2).into(),
1821 OperandSize::S64 => asm::inst::vpaddq_b::new(dst, src1, src2).into(),
1822 _ => unimplemented!(),
1823 };
1824 self.emit(Inst::External { inst });
1825 }
1826
mfence(&mut self)1827 pub fn mfence(&mut self) {
1828 self.emit(Inst::External {
1829 inst: asm::inst::mfence_zo::new().into(),
1830 });
1831 }
1832
1833 /// Extract a value from `src` into `addr` determined by `lane`.
xmm_vpextr_rm( &mut self, addr: &Address, src: Reg, lane: u8, size: OperandSize, flags: MemFlags, )1834 pub(crate) fn xmm_vpextr_rm(
1835 &mut self,
1836 addr: &Address,
1837 src: Reg,
1838 lane: u8,
1839 size: OperandSize,
1840 flags: MemFlags,
1841 ) {
1842 assert!(addr.is_offset());
1843 let dst = Self::to_synthetic_amode(addr, flags);
1844 let inst = match size {
1845 OperandSize::S8 => asm::inst::vpextrb_a::new(dst, src, lane).into(),
1846 OperandSize::S16 => asm::inst::vpextrw_b::new(dst, src, lane).into(),
1847 OperandSize::S32 => asm::inst::vpextrd_a::new(dst, src, lane).into(),
1848 OperandSize::S64 => asm::inst::vpextrq_a::new(dst, src, lane).into(),
1849 _ => unimplemented!(),
1850 };
1851 self.emit(Inst::External { inst });
1852 }
1853
1854 /// Extract a value from `src` into `dst` (zero extended) determined by `lane`.
xmm_vpextr_rr(&mut self, dst: WritableReg, src: Reg, lane: u8, size: OperandSize)1855 pub fn xmm_vpextr_rr(&mut self, dst: WritableReg, src: Reg, lane: u8, size: OperandSize) {
1856 let dst: WritableGpr = dst.map(|r| r.into());
1857 let inst = match size {
1858 OperandSize::S8 => asm::inst::vpextrb_a::new(dst, src, lane).into(),
1859 OperandSize::S16 => asm::inst::vpextrw_a::new(dst, src, lane).into(),
1860 OperandSize::S32 => asm::inst::vpextrd_a::new(dst, src, lane).into(),
1861 OperandSize::S64 => asm::inst::vpextrq_a::new(dst, src, lane).into(),
1862 _ => unimplemented!(),
1863 };
1864 self.emit(Inst::External { inst });
1865 }
1866
1867 /// Copy value from `src2`, merge into `src1`, and put result in `dst` at
1868 /// the location specified in `count`.
xmm_vpinsr_rrm( &mut self, dst: WritableReg, src1: Reg, src2: &Address, count: u8, size: OperandSize, )1869 pub fn xmm_vpinsr_rrm(
1870 &mut self,
1871 dst: WritableReg,
1872 src1: Reg,
1873 src2: &Address,
1874 count: u8,
1875 size: OperandSize,
1876 ) {
1877 let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());
1878 let dst: WritableXmm = dst.map(|r| r.into());
1879
1880 let inst = match size {
1881 OperandSize::S8 => asm::inst::vpinsrb_b::new(dst, src1, src2, count).into(),
1882 OperandSize::S16 => asm::inst::vpinsrw_b::new(dst, src1, src2, count).into(),
1883 OperandSize::S32 => asm::inst::vpinsrd_b::new(dst, src1, src2, count).into(),
1884 OperandSize::S64 => asm::inst::vpinsrq_b::new(dst, src1, src2, count).into(),
1885 OperandSize::S128 => unreachable!(),
1886 };
1887 self.emit(Inst::External { inst });
1888 }
1889
1890 /// Copy value from `src2`, merge into `src1`, and put result in `dst` at
1891 /// the location specified in `count`.
xmm_vpinsr_rrr( &mut self, dst: WritableReg, src1: Reg, src2: Reg, count: u8, size: OperandSize, )1892 pub fn xmm_vpinsr_rrr(
1893 &mut self,
1894 dst: WritableReg,
1895 src1: Reg,
1896 src2: Reg,
1897 count: u8,
1898 size: OperandSize,
1899 ) {
1900 let dst: WritableXmm = dst.map(|r| r.into());
1901 let inst = match size {
1902 OperandSize::S8 => asm::inst::vpinsrb_b::new(dst, src1, src2, count).into(),
1903 OperandSize::S16 => asm::inst::vpinsrw_b::new(dst, src1, src2, count).into(),
1904 OperandSize::S32 => asm::inst::vpinsrd_b::new(dst, src1, src2, count).into(),
1905 OperandSize::S64 => asm::inst::vpinsrq_b::new(dst, src1, src2, count).into(),
1906 OperandSize::S128 => unreachable!(),
1907 };
1908 self.emit(Inst::External { inst });
1909 }
1910
1911 /// Copy a 32-bit float in `src2`, merge into `src1`, and put result in `dst`.
xmm_vinsertps_rrm(&mut self, dst: WritableReg, src1: Reg, address: &Address, imm: u8)1912 pub fn xmm_vinsertps_rrm(&mut self, dst: WritableReg, src1: Reg, address: &Address, imm: u8) {
1913 let dst: WritableXmm = dst.map(|r| r.into());
1914 let address = Self::to_synthetic_amode(address, MemFlags::trusted());
1915 let inst = asm::inst::vinsertps_b::new(dst, src1, address, imm).into();
1916 self.emit(Inst::External { inst });
1917 }
1918
1919 /// Copy a 32-bit float in `src2`, merge into `src1`, and put result in `dst`.
xmm_vinsertps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, imm: u8)1920 pub fn xmm_vinsertps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, imm: u8) {
1921 let dst: WritableXmm = dst.map(|r| r.into());
1922 let inst = asm::inst::vinsertps_b::new(dst, src1, src2, imm).into();
1923 self.emit(Inst::External { inst });
1924 }
1925
1926 /// Moves lower 64-bit float from `src2` into lower 64-bits of `dst` and the
1927 /// upper 64-bits in `src1` into the upper 64-bits of `dst`.
xmm_vmovsd_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg)1928 pub fn xmm_vmovsd_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
1929 let dst: WritableXmm = dst.map(|r| r.into());
1930 let inst = asm::inst::vmovsd_b::new(dst, src1, src2).into();
1931 self.emit(Inst::External { inst });
1932 }
1933
1934 /// Moves 64-bit float from `src` into lower 64-bits of `dst`.
1935 /// Zeroes out the upper 64 bits of `dst`.
xmm_vmovsd_rm(&mut self, dst: WritableReg, src: &Address)1936 pub fn xmm_vmovsd_rm(&mut self, dst: WritableReg, src: &Address) {
1937 let src = Self::to_synthetic_amode(src, MemFlags::trusted());
1938 let dst: WritableXmm = dst.map(|r| r.into());
1939 let inst = asm::inst::vmovsd_d::new(dst, src).into();
1940 self.emit(Inst::External { inst });
1941 }
1942
1943 /// Moves two 32-bit floats from `src2` to the upper 64-bits of `dst`.
1944 /// Copies two 32-bit floats from the lower 64-bits of `src1` to lower
1945 /// 64-bits of `dst`.
xmm_vmovlhps_rrm(&mut self, dst: WritableReg, src1: Reg, src2: &Address)1946 pub fn xmm_vmovlhps_rrm(&mut self, dst: WritableReg, src1: Reg, src2: &Address) {
1947 let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());
1948 let dst: WritableXmm = dst.map(|r| r.into());
1949 let inst = asm::inst::vmovhps_b::new(dst, src1, src2).into();
1950 self.emit(Inst::External { inst });
1951 }
1952
1953 /// Moves two 32-bit floats from the lower 64-bits of `src2` to the upper
1954 /// 64-bits of `dst`. Copies two 32-bit floats from the lower 64-bits of
1955 /// `src1` to lower 64-bits of `dst`.
xmm_vmovlhps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg)1956 pub fn xmm_vmovlhps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
1957 let dst: WritableXmm = dst.map(|r| r.into());
1958 let inst = asm::inst::vmovlhps_rvm::new(dst, src1, src2).into();
1959 self.emit(Inst::External { inst });
1960 }
1961
1962 /// Move unaligned packed integer values from address `src` to `dst`.
xmm_vmovdqu_mr(&mut self, src: &Address, dst: WritableReg, flags: MemFlags)1963 pub fn xmm_vmovdqu_mr(&mut self, src: &Address, dst: WritableReg, flags: MemFlags) {
1964 let src = Self::to_synthetic_amode(src, flags);
1965 let dst: WritableXmm = dst.map(|r| r.into());
1966 let inst = asm::inst::vmovdqu_a::new(dst, src).into();
1967 self.emit(Inst::External { inst });
1968 }
1969
1970 /// Move integer from `src` to xmm register `dst` using an AVX instruction.
avx_gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize)1971 pub fn avx_gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1972 let dst: WritableXmm = dst.map(|r| r.into());
1973 let inst = match size {
1974 OperandSize::S32 => asm::inst::vmovd_a::new(dst, src).into(),
1975 OperandSize::S64 => asm::inst::vmovq_a::new(dst, src).into(),
1976 _ => unreachable!(),
1977 };
1978
1979 self.emit(Inst::External { inst });
1980 }
1981
xmm_vptest(&mut self, src1: Reg, src2: Reg)1982 pub fn xmm_vptest(&mut self, src1: Reg, src2: Reg) {
1983 let inst = asm::inst::vptest_rm::new(src1, src2).into();
1984 self.emit(Inst::External { inst });
1985 }
1986
1987 /// Converts vector of integers into vector of floating values.
xmm_vcvt_rr(&mut self, src: Reg, dst: WritableReg, kind: VcvtKind)1988 pub fn xmm_vcvt_rr(&mut self, src: Reg, dst: WritableReg, kind: VcvtKind) {
1989 let dst: WritableXmm = dst.map(|x| x.into());
1990 let inst = match kind {
1991 VcvtKind::I32ToF32 => asm::inst::vcvtdq2ps_a::new(dst, src).into(),
1992 VcvtKind::I32ToF64 => asm::inst::vcvtdq2pd_a::new(dst, src).into(),
1993 VcvtKind::F64ToF32 => asm::inst::vcvtpd2ps_a::new(dst, src).into(),
1994 VcvtKind::F64ToI32 => asm::inst::vcvttpd2dq_a::new(dst, src).into(),
1995 VcvtKind::F32ToF64 => asm::inst::vcvtps2pd_a::new(dst, src).into(),
1996 VcvtKind::F32ToI32 => asm::inst::vcvttps2dq_a::new(dst, src).into(),
1997 };
1998 self.emit(Inst::External { inst });
1999 }
2000
2001 /// Subtract floats in vector `src1` to floats in vector `src2`.
xmm_vsubp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2002 pub fn xmm_vsubp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2003 let dst: WritableXmm = dst.map(|r| r.into());
2004 let inst = match size {
2005 OperandSize::S32 => asm::inst::vsubps_b::new(dst, src1, src2).into(),
2006 OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, src2).into(),
2007 _ => unimplemented!(),
2008 };
2009 self.emit(Inst::External { inst });
2010 }
2011
2012 /// Subtract integers in vector `src1` from integers in vector `src2`.
xmm_vpsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2013 pub fn xmm_vpsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2014 let dst: WritableXmm = dst.map(|r| r.into());
2015 let inst = match size {
2016 OperandSize::S8 => asm::inst::vpsubb_b::new(dst, src1, src2).into(),
2017 OperandSize::S16 => asm::inst::vpsubw_b::new(dst, src1, src2).into(),
2018 OperandSize::S32 => asm::inst::vpsubd_b::new(dst, src1, src2).into(),
2019 OperandSize::S64 => asm::inst::vpsubq_b::new(dst, src1, src2).into(),
2020 _ => unimplemented!(),
2021 };
2022 self.emit(Inst::External { inst });
2023 }
2024
2025 /// Subtract unsigned integers with unsigned saturation.
xmm_vpsubus_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize)2026 pub fn xmm_vpsubus_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
2027 let dst: WritableXmm = dst.map(|r| r.into());
2028 let inst = match size {
2029 OperandSize::S8 => asm::inst::vpsubusb_b::new(dst, src1, src2).into(),
2030 OperandSize::S16 => asm::inst::vpsubusw_b::new(dst, src1, src2).into(),
2031 _ => unimplemented!(),
2032 };
2033 self.emit(Inst::External { inst });
2034 }
2035
2036 /// Subtract signed integers with signed saturation.
xmm_vpsubs_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize)2037 pub fn xmm_vpsubs_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
2038 let dst: WritableXmm = dst.map(|r| r.into());
2039 let inst = match size {
2040 OperandSize::S8 => asm::inst::vpsubsb_b::new(dst, src1, src2).into(),
2041 OperandSize::S16 => asm::inst::vpsubsw_b::new(dst, src1, src2).into(),
2042 _ => unimplemented!(),
2043 };
2044 self.emit(Inst::External { inst });
2045 }
2046
2047 /// Add floats in vector `src1` to floats in vector `src2`.
xmm_vaddp_rrm( &mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize, )2048 pub fn xmm_vaddp_rrm(
2049 &mut self,
2050 src1: Reg,
2051 src2: &Address,
2052 dst: WritableReg,
2053 size: OperandSize,
2054 ) {
2055 let dst: WritableXmm = dst.map(|r| r.into());
2056 let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2057 let inst = match size {
2058 OperandSize::S32 => asm::inst::vaddps_b::new(dst, src1, address).into(),
2059 OperandSize::S64 => asm::inst::vaddpd_b::new(dst, src1, address).into(),
2060 _ => unimplemented!(),
2061 };
2062 self.emit(Inst::External { inst });
2063 }
2064
2065 /// Add floats in vector `src1` to floats in vector `src2`.
xmm_vaddp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2066 pub fn xmm_vaddp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2067 let dst: WritableXmm = dst.map(|r| r.into());
2068 let inst = match size {
2069 OperandSize::S32 => asm::inst::vaddps_b::new(dst, src1, src2).into(),
2070 OperandSize::S64 => asm::inst::vaddpd_b::new(dst, src1, src2).into(),
2071 _ => unimplemented!(),
2072 };
2073 self.emit(Inst::External { inst });
2074 }
2075
2076 /// Compare vector register `lhs` with a vector of integers in `rhs` for
2077 /// equality between packed integers and write the resulting vector into
2078 /// `dst`.
xmm_vpcmpeq_rrm( &mut self, dst: WritableReg, lhs: Reg, address: &Address, size: OperandSize, )2079 pub fn xmm_vpcmpeq_rrm(
2080 &mut self,
2081 dst: WritableReg,
2082 lhs: Reg,
2083 address: &Address,
2084 size: OperandSize,
2085 ) {
2086 let dst: WritableXmm = dst.map(|r| r.into());
2087 let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2088 let inst = match size {
2089 OperandSize::S8 => asm::inst::vpcmpeqb_b::new(dst, lhs, address).into(),
2090 OperandSize::S16 => asm::inst::vpcmpeqw_b::new(dst, lhs, address).into(),
2091 OperandSize::S32 => asm::inst::vpcmpeqd_b::new(dst, lhs, address).into(),
2092 OperandSize::S64 => asm::inst::vpcmpeqq_b::new(dst, lhs, address).into(),
2093 _ => unimplemented!(),
2094 };
2095 self.emit(Inst::External { inst });
2096 }
2097
2098 /// Compare vector registers `lhs` and `rhs` for equality between packed
2099 /// integers and write the resulting vector into `dst`.
xmm_vpcmpeq_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize)2100 pub fn xmm_vpcmpeq_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2101 let dst: WritableXmm = dst.map(|r| r.into());
2102 let inst = match size {
2103 OperandSize::S8 => asm::inst::vpcmpeqb_b::new(dst, lhs, rhs).into(),
2104 OperandSize::S16 => asm::inst::vpcmpeqw_b::new(dst, lhs, rhs).into(),
2105 OperandSize::S32 => asm::inst::vpcmpeqd_b::new(dst, lhs, rhs).into(),
2106 OperandSize::S64 => asm::inst::vpcmpeqq_b::new(dst, lhs, rhs).into(),
2107 _ => unimplemented!(),
2108 };
2109 self.emit(Inst::External { inst });
2110 }
2111
2112 /// Performs a greater than comparison with vectors of signed integers in
2113 /// `lhs` and `rhs` and puts the results in `dst`.
xmm_vpcmpgt_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize)2114 pub fn xmm_vpcmpgt_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2115 let dst: WritableXmm = dst.map(|r| r.into());
2116 let inst = match size {
2117 OperandSize::S8 => asm::inst::vpcmpgtb_b::new(dst, lhs, rhs).into(),
2118 OperandSize::S16 => asm::inst::vpcmpgtw_b::new(dst, lhs, rhs).into(),
2119 OperandSize::S32 => asm::inst::vpcmpgtd_b::new(dst, lhs, rhs).into(),
2120 OperandSize::S64 => asm::inst::vpcmpgtq_b::new(dst, lhs, rhs).into(),
2121 _ => unimplemented!(),
2122 };
2123 self.emit(Inst::External { inst });
2124 }
2125
2126 /// Performs a max operation with vectors of signed integers in `lhs` and
2127 /// `rhs` and puts the results in `dst`.
xmm_vpmaxs_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize)2128 pub fn xmm_vpmaxs_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2129 let dst: WritableXmm = dst.map(|r| r.into());
2130 let inst = match size {
2131 OperandSize::S8 => asm::inst::vpmaxsb_b::new(dst, lhs, rhs).into(),
2132 OperandSize::S16 => asm::inst::vpmaxsw_b::new(dst, lhs, rhs).into(),
2133 OperandSize::S32 => asm::inst::vpmaxsd_b::new(dst, lhs, rhs).into(),
2134 _ => unimplemented!(),
2135 };
2136 self.emit(Inst::External { inst });
2137 }
2138
2139 /// Performs a max operation with vectors of unsigned integers in `lhs` and
2140 /// `rhs` and puts the results in `dst`.
xmm_vpmaxu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize)2141 pub fn xmm_vpmaxu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2142 let dst: WritableXmm = dst.map(|r| r.into());
2143 let inst = match size {
2144 OperandSize::S8 => asm::inst::vpmaxub_b::new(dst, lhs, rhs).into(),
2145 OperandSize::S16 => asm::inst::vpmaxuw_b::new(dst, lhs, rhs).into(),
2146 OperandSize::S32 => asm::inst::vpmaxud_b::new(dst, lhs, rhs).into(),
2147 _ => unimplemented!(),
2148 };
2149 self.emit(Inst::External { inst });
2150 }
2151
2152 /// Performs a min operation with vectors of signed integers in `lhs` and
2153 /// `rhs` and puts the results in `dst`.
xmm_vpmins_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize)2154 pub fn xmm_vpmins_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2155 let dst: WritableXmm = dst.map(|r| r.into());
2156 let inst = match size {
2157 OperandSize::S8 => asm::inst::vpminsb_b::new(dst, lhs, rhs).into(),
2158 OperandSize::S16 => asm::inst::vpminsw_b::new(dst, lhs, rhs).into(),
2159 OperandSize::S32 => asm::inst::vpminsd_b::new(dst, lhs, rhs).into(),
2160 _ => unimplemented!(),
2161 };
2162 self.emit(Inst::External { inst });
2163 }
2164
2165 /// Performs a min operation with vectors of unsigned integers in `lhs` and
2166 /// `rhs` and puts the results in `dst`.
xmm_vpminu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize)2167 pub fn xmm_vpminu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2168 let dst: WritableXmm = dst.map(|r| r.into());
2169 let inst = match size {
2170 OperandSize::S8 => asm::inst::vpminub_b::new(dst, lhs, rhs).into(),
2171 OperandSize::S16 => asm::inst::vpminuw_b::new(dst, lhs, rhs).into(),
2172 OperandSize::S32 => asm::inst::vpminud_b::new(dst, lhs, rhs).into(),
2173 _ => unimplemented!(),
2174 };
2175 self.emit(Inst::External { inst });
2176 }
2177
2178 /// Performs a comparison operation between vectors of floats in `lhs` and
2179 /// `rhs` and puts the results in `dst`.
xmm_vcmpp_rrr( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize, kind: VcmpKind, )2180 pub fn xmm_vcmpp_rrr(
2181 &mut self,
2182 dst: WritableReg,
2183 lhs: Reg,
2184 rhs: Reg,
2185 size: OperandSize,
2186 kind: VcmpKind,
2187 ) {
2188 let dst: WritableXmm = dst.map(|r| r.into());
2189 let imm = match kind {
2190 VcmpKind::Eq => 0,
2191 VcmpKind::Lt => 1,
2192 VcmpKind::Le => 2,
2193 VcmpKind::Unord => 3,
2194 VcmpKind::Ne => 4,
2195 };
2196 let inst = match size {
2197 OperandSize::S32 => asm::inst::vcmpps_b::new(dst, lhs, rhs, imm).into(),
2198 OperandSize::S64 => asm::inst::vcmppd_b::new(dst, lhs, rhs, imm).into(),
2199 _ => unimplemented!(),
2200 };
2201 self.emit(Inst::External { inst });
2202 }
2203
2204 /// Performs a subtraction on two vectors of floats and puts the results in
2205 /// `dst`.
xmm_vsub_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize)2206 pub fn xmm_vsub_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize) {
2207 let dst: WritableXmm = dst.map(|r| r.into());
2208 let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2209 let inst = match size {
2210 OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, address).into(),
2211 _ => unimplemented!(),
2212 };
2213 self.emit(Inst::External { inst });
2214 }
2215
2216 /// Performs a subtraction on two vectors of floats and puts the results in
2217 /// `dst`.
xmm_vsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2218 pub fn xmm_vsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2219 let dst: WritableXmm = dst.map(|r| r.into());
2220 let inst = match size {
2221 OperandSize::S32 => asm::inst::vsubps_b::new(dst, src1, src2).into(),
2222 OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, src2).into(),
2223 _ => unimplemented!(),
2224 };
2225 self.emit(Inst::External { inst });
2226 }
2227
2228 /// Converts a vector of signed integers into a vector of narrower integers
2229 /// using saturation to handle overflow.
xmm_vpackss_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2230 pub fn xmm_vpackss_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2231 let dst: WritableXmm = dst.map(|r| r.into());
2232 let inst = match size {
2233 OperandSize::S8 => asm::inst::vpacksswb_b::new(dst, src1, src2).into(),
2234 OperandSize::S16 => asm::inst::vpackssdw_b::new(dst, src1, src2).into(),
2235 _ => unimplemented!(),
2236 };
2237 self.emit(Inst::External { inst });
2238 }
2239
2240 /// Converts a vector of unsigned integers into a vector of narrower
2241 /// integers using saturation to handle overflow.
xmm_vpackus_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2242 pub fn xmm_vpackus_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2243 let dst: WritableXmm = dst.map(|r| r.into());
2244 let inst = match size {
2245 OperandSize::S8 => asm::inst::vpackuswb_b::new(dst, src1, src2).into(),
2246 OperandSize::S16 => asm::inst::vpackusdw_b::new(dst, src1, src2).into(),
2247 _ => unimplemented!(),
2248 };
2249 self.emit(Inst::External { inst });
2250 }
2251
2252 /// Concatenates `src1` and `src2` and shifts right by `imm` and puts
2253 /// result in `dst`.
xmm_vpalignr_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, imm: u8)2254 pub fn xmm_vpalignr_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, imm: u8) {
2255 let dst: WritableXmm = dst.map(|r| r.into());
2256 let inst = asm::inst::vpalignr_b::new(dst, src1, src2, imm).into();
2257 self.emit(Inst::External { inst });
2258 }
2259
2260 /// Takes the lower lanes of vectors of floats in `src1` and `src2` and
2261 /// interleaves them in `dst`.
xmm_vunpcklp_rrm( &mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize, )2262 pub fn xmm_vunpcklp_rrm(
2263 &mut self,
2264 src1: Reg,
2265 src2: &Address,
2266 dst: WritableReg,
2267 size: OperandSize,
2268 ) {
2269 let dst: WritableXmm = dst.map(|r| r.into());
2270 let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2271 let inst = match size {
2272 OperandSize::S32 => asm::inst::vunpcklps_b::new(dst, src1, address).into(),
2273 _ => unimplemented!(),
2274 };
2275 self.emit(Inst::External { inst });
2276 }
2277
2278 /// Unpacks and interleaves high order data of floats in `src1` and `src2`
2279 /// and puts the results in `dst`.
xmm_vunpckhp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2280 pub fn xmm_vunpckhp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2281 let dst: WritableXmm = dst.map(|r| r.into());
2282 let inst = match size {
2283 OperandSize::S32 => asm::inst::vunpckhps_b::new(dst, src1, src2).into(),
2284 _ => unimplemented!(),
2285 };
2286 self.emit(Inst::External { inst });
2287 }
2288
2289 /// Unpacks and interleaves the lower lanes of vectors of integers in `src1`
2290 /// and `src2` and puts the results in `dst`.
xmm_vpunpckl_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2291 pub fn xmm_vpunpckl_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2292 let dst: WritableXmm = dst.map(|r| r.into());
2293 let inst = match size {
2294 OperandSize::S8 => asm::inst::vpunpcklbw_b::new(dst, src1, src2).into(),
2295 OperandSize::S16 => asm::inst::vpunpcklwd_b::new(dst, src1, src2).into(),
2296 _ => unimplemented!(),
2297 };
2298 self.emit(Inst::External { inst });
2299 }
2300
2301 /// Unpacks and interleaves the higher lanes of vectors of integers in
2302 /// `src1` and `src2` and puts the results in `dst`.
xmm_vpunpckh_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2303 pub fn xmm_vpunpckh_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2304 let dst: WritableXmm = dst.map(|r| r.into());
2305 let inst = match size {
2306 OperandSize::S8 => asm::inst::vpunpckhbw_b::new(dst, src1, src2).into(),
2307 OperandSize::S16 => asm::inst::vpunpckhwd_b::new(dst, src1, src2).into(),
2308 _ => unimplemented!(),
2309 };
2310 self.emit(Inst::External { inst });
2311 }
2312
vpmullq(&mut self, src1: Reg, src2: Reg, dst: WritableReg)2313 pub(crate) fn vpmullq(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2314 let dst: WritableXmm = dst.map(|r| r.into());
2315 let inst = asm::inst::vpmullq_c::new(dst, src1, src2).into();
2316 self.emit(Inst::External { inst });
2317 }
2318
2319 /// Creates a mask made up of the most significant bit of each byte of
2320 /// `src` and stores the result in `dst`.
xmm_vpmovmsk_rr( &mut self, src: Reg, dst: WritableReg, src_size: OperandSize, dst_size: OperandSize, )2321 pub fn xmm_vpmovmsk_rr(
2322 &mut self,
2323 src: Reg,
2324 dst: WritableReg,
2325 src_size: OperandSize,
2326 dst_size: OperandSize,
2327 ) {
2328 assert_eq!(dst_size, OperandSize::S32);
2329 let dst: WritableGpr = dst.map(|r| r.into());
2330 let inst = match src_size {
2331 OperandSize::S8 => asm::inst::vpmovmskb_rm::new(dst, src).into(),
2332 _ => unimplemented!(),
2333 };
2334
2335 self.emit(Inst::External { inst });
2336 }
2337
2338 /// Creates a mask made up of the most significant bit of each byte of
2339 /// in `src` and stores the result in `dst`.
xmm_vmovskp_rr( &mut self, src: Reg, dst: WritableReg, src_size: OperandSize, dst_size: OperandSize, )2340 pub fn xmm_vmovskp_rr(
2341 &mut self,
2342 src: Reg,
2343 dst: WritableReg,
2344 src_size: OperandSize,
2345 dst_size: OperandSize,
2346 ) {
2347 assert_eq!(dst_size, OperandSize::S32);
2348 let dst: WritableGpr = dst.map(|r| r.into());
2349 let inst = match src_size {
2350 OperandSize::S32 => asm::inst::vmovmskps_rm::new(dst, src).into(),
2351 OperandSize::S64 => asm::inst::vmovmskpd_rm::new(dst, src).into(),
2352 _ => unimplemented!(),
2353 };
2354
2355 self.emit(Inst::External { inst });
2356 }
2357
2358 /// Compute the absolute value of elements in vector `src` and put the
2359 /// results in `dst`.
xmm_vpabs_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)2360 pub fn xmm_vpabs_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
2361 let dst: WritableXmm = dst.map(|r| r.into());
2362 let inst = match size {
2363 OperandSize::S8 => asm::inst::vpabsb_a::new(dst, src).into(),
2364 OperandSize::S16 => asm::inst::vpabsw_a::new(dst, src).into(),
2365 OperandSize::S32 => asm::inst::vpabsd_a::new(dst, src).into(),
2366 _ => unimplemented!(),
2367 };
2368 self.emit(Inst::External { inst });
2369 }
2370
2371 /// Arithmetically (sign preserving) right shift on vector in `src` by
2372 /// `amount` with result written to `dst`.
xmm_vpsra_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize)2373 pub fn xmm_vpsra_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {
2374 let dst: WritableXmm = dst.map(|r| r.into());
2375 let inst = match size {
2376 OperandSize::S16 => asm::inst::vpsraw_c::new(dst, src, amount).into(),
2377 OperandSize::S32 => asm::inst::vpsrad_c::new(dst, src, amount).into(),
2378 _ => unimplemented!(),
2379 };
2380 self.emit(Inst::External { inst });
2381 }
2382
2383 /// Arithmetically (sign preserving) right shift on vector in `src` by
2384 /// `imm` with result written to `dst`.
xmm_vpsra_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize)2385 pub fn xmm_vpsra_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2386 let dst: WritableXmm = dst.map(|r| r.into());
2387 let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");
2388 let inst = match size {
2389 OperandSize::S32 => asm::inst::vpsrad_d::new(dst, src, imm).into(),
2390 _ => unimplemented!(),
2391 };
2392 self.emit(Inst::External { inst });
2393 }
2394
2395 /// Shift vector data left by `imm`.
xmm_vpsll_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize)2396 pub fn xmm_vpsll_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2397 let dst: WritableXmm = dst.map(|r| r.into());
2398 let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");
2399 let inst = match size {
2400 OperandSize::S32 => asm::inst::vpslld_d::new(dst, src, imm).into(),
2401 OperandSize::S64 => asm::inst::vpsllq_d::new(dst, src, imm).into(),
2402 _ => unimplemented!(),
2403 };
2404 self.emit(Inst::External { inst });
2405 }
2406
2407 /// Shift vector data left by `amount`.
xmm_vpsll_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize)2408 pub fn xmm_vpsll_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {
2409 let dst: WritableXmm = dst.map(|r| r.into());
2410 let inst = match size {
2411 OperandSize::S16 => asm::inst::vpsllw_c::new(dst, src, amount).into(),
2412 OperandSize::S32 => asm::inst::vpslld_c::new(dst, src, amount).into(),
2413 OperandSize::S64 => asm::inst::vpsllq_c::new(dst, src, amount).into(),
2414 _ => unimplemented!(),
2415 };
2416 self.emit(Inst::External { inst });
2417 }
2418
2419 /// Shift vector data right by `imm`.
xmm_vpsrl_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize)2420 pub fn xmm_vpsrl_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2421 let dst: WritableXmm = dst.map(|r| r.into());
2422 let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");
2423 let inst = match size {
2424 OperandSize::S16 => asm::inst::vpsrlw_d::new(dst, src, imm).into(),
2425 OperandSize::S32 => asm::inst::vpsrld_d::new(dst, src, imm).into(),
2426 OperandSize::S64 => asm::inst::vpsrlq_d::new(dst, src, imm).into(),
2427 _ => unimplemented!(),
2428 };
2429 self.emit(Inst::External { inst });
2430 }
2431
2432 /// Shift vector data right by `amount`.
xmm_vpsrl_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize)2433 pub fn xmm_vpsrl_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {
2434 let dst: WritableXmm = dst.map(|r| r.into());
2435 let inst = match size {
2436 OperandSize::S16 => asm::inst::vpsrlw_c::new(dst, src, amount).into(),
2437 OperandSize::S32 => asm::inst::vpsrld_c::new(dst, src, amount).into(),
2438 OperandSize::S64 => asm::inst::vpsrlq_c::new(dst, src, amount).into(),
2439 _ => unimplemented!(),
2440 };
2441 self.emit(Inst::External { inst });
2442 }
2443
2444 /// Perform an `and` operation on vectors of floats in `src1` and `src2`
2445 /// and put the results in `dst`.
xmm_vandp_rrm( &mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize, )2446 pub fn xmm_vandp_rrm(
2447 &mut self,
2448 src1: Reg,
2449 src2: &Address,
2450 dst: WritableReg,
2451 size: OperandSize,
2452 ) {
2453 let dst: WritableXmm = dst.map(|r| r.into());
2454 let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2455 let inst = match size {
2456 OperandSize::S32 => asm::inst::vandps_b::new(dst, src1, address).into(),
2457 OperandSize::S64 => asm::inst::vandpd_b::new(dst, src1, address).into(),
2458 _ => unimplemented!(),
2459 };
2460 self.emit(Inst::External { inst });
2461 }
2462
2463 /// Perform an `and` operation on vectors of floats in `src1` and `src2`
2464 /// and put the results in `dst`.
xmm_vandp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2465 pub fn xmm_vandp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2466 let dst: WritableXmm = dst.map(|r| r.into());
2467 let inst = match size {
2468 OperandSize::S32 => asm::inst::vandps_b::new(dst, src1, src2).into(),
2469 OperandSize::S64 => asm::inst::vandpd_b::new(dst, src1, src2).into(),
2470 _ => unimplemented!(),
2471 };
2472 self.emit(Inst::External { inst });
2473 }
2474
2475 /// Performs a bitwise `and` operation on the vectors in `src1` and `src2`
2476 /// and stores the results in `dst`.
xmm_vpand_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg)2477 pub fn xmm_vpand_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg) {
2478 let dst: WritableXmm = dst.map(|r| r.into());
2479 let address = Self::to_synthetic_amode(&src2, MemFlags::trusted());
2480 let inst = asm::inst::vpand_b::new(dst, src1, address).into();
2481 self.emit(Inst::External { inst });
2482 }
2483
2484 /// Performs a bitwise `and` operation on the vectors in `src1` and `src2`
2485 /// and stores the results in `dst`.
xmm_vpand_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg)2486 pub fn xmm_vpand_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2487 let dst: WritableXmm = dst.map(|r| r.into());
2488 let inst = asm::inst::vpand_b::new(dst, src1, src2).into();
2489 self.emit(Inst::External { inst });
2490 }
2491
2492 /// Perform an `and not` operation on vectors of floats in `src1` and
2493 /// `src2` and put the results in `dst`.
xmm_vandnp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2494 pub fn xmm_vandnp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2495 let dst: WritableXmm = dst.map(|r| r.into());
2496 let inst = match size {
2497 OperandSize::S32 => asm::inst::vandnps_b::new(dst, src1, src2).into(),
2498 OperandSize::S64 => asm::inst::vandnpd_b::new(dst, src1, src2).into(),
2499 _ => unimplemented!(),
2500 };
2501 self.emit(Inst::External { inst });
2502 }
2503
2504 /// Perform an `and not` operation on vectors in `src1` and `src2` and put
2505 /// the results in `dst`.
xmm_vpandn_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg)2506 pub fn xmm_vpandn_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2507 let dst: WritableXmm = dst.map(|r| r.into());
2508 let inst = asm::inst::vpandn_b::new(dst, src1, src2).into();
2509 self.emit(Inst::External { inst });
2510 }
2511
2512 /// Perform an or operation for the vectors of floats in `src1` and `src2`
2513 /// and put the results in `dst`.
xmm_vorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2514 pub fn xmm_vorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2515 let dst: WritableXmm = dst.map(|r| r.into());
2516 let inst = match size {
2517 OperandSize::S32 => asm::inst::vorps_b::new(dst, src1, src2).into(),
2518 OperandSize::S64 => asm::inst::vorpd_b::new(dst, src1, src2).into(),
2519 _ => unimplemented!(),
2520 };
2521 self.emit(Inst::External { inst });
2522 }
2523
2524 /// Bitwise OR of `src1` and `src2`.
xmm_vpor_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg)2525 pub fn xmm_vpor_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
2526 let dst: WritableXmm = dst.map(|r| r.into());
2527 let inst = asm::inst::vpor_b::new(dst, src1, src2).into();
2528 self.emit(Inst::External { inst });
2529 }
2530
2531 /// Bitwise logical xor of vectors of floats in `src1` and `src2` and puts
2532 /// the results in `dst`.
xmm_vxorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2533 pub fn xmm_vxorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2534 let dst: WritableXmm = dst.map(|r| r.into());
2535 let inst = match size {
2536 OperandSize::S32 => asm::inst::vxorps_b::new(dst, src1, src2).into(),
2537 OperandSize::S64 => asm::inst::vxorpd_b::new(dst, src1, src2).into(),
2538 _ => unimplemented!(),
2539 };
2540 self.emit(Inst::External { inst });
2541 }
2542
2543 /// Perform a logical on vector in `src` and in `address` and put the
2544 /// results in `dst`.
xmm_vpxor_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg)2545 pub fn xmm_vpxor_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2546 let dst: WritableXmm = dst.map(|r| r.into());
2547 let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2548 let inst = asm::inst::vpxor_b::new(dst, src, address).into();
2549 self.emit(Inst::External { inst });
2550 }
2551
2552 /// Perform a logical on vectors in `src1` and `src2` and put the results in
2553 /// `dst`.
xmm_vpxor_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg)2554 pub fn xmm_vpxor_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2555 let dst: WritableXmm = dst.map(|r| r.into());
2556 let inst = asm::inst::vpxor_b::new(dst, src1, src2).into();
2557 self.emit(Inst::External { inst });
2558 }
2559
2560 /// Perform a max operation across two vectors of floats and put the
2561 /// results in `dst`.
xmm_vmaxp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2562 pub fn xmm_vmaxp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2563 let dst: WritableXmm = dst.map(|r| r.into());
2564 let inst = match size {
2565 OperandSize::S32 => asm::inst::vmaxps_b::new(dst, src1, src2).into(),
2566 OperandSize::S64 => asm::inst::vmaxpd_b::new(dst, src1, src2).into(),
2567 _ => unimplemented!(),
2568 };
2569 self.emit(Inst::External { inst });
2570 }
2571
2572 // Perform a min operation across two vectors of floats and put the
2573 // results in `dst`.
xmm_vminp_rrm( &mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize, )2574 pub fn xmm_vminp_rrm(
2575 &mut self,
2576 src1: Reg,
2577 src2: &Address,
2578 dst: WritableReg,
2579 size: OperandSize,
2580 ) {
2581 let dst: WritableXmm = dst.map(|r| r.into());
2582 let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2583 let inst = match size {
2584 OperandSize::S32 => asm::inst::vminps_b::new(dst, src1, address).into(),
2585 OperandSize::S64 => asm::inst::vminpd_b::new(dst, src1, address).into(),
2586 _ => unimplemented!(),
2587 };
2588 self.emit(Inst::External { inst });
2589 }
2590
2591 // Perform a min operation across two vectors of floats and put the
2592 // results in `dst`.
xmm_vminp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2593 pub fn xmm_vminp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2594 let dst: WritableXmm = dst.map(|r| r.into());
2595 let inst = match size {
2596 OperandSize::S32 => asm::inst::vminps_b::new(dst, src1, src2).into(),
2597 OperandSize::S64 => asm::inst::vminpd_b::new(dst, src1, src2).into(),
2598 _ => unimplemented!(),
2599 };
2600 self.emit(Inst::External { inst });
2601 }
2602
2603 // Round a vector of floats.
xmm_vroundp_rri( &mut self, src: Reg, dst: WritableReg, mode: VroundMode, size: OperandSize, )2604 pub fn xmm_vroundp_rri(
2605 &mut self,
2606 src: Reg,
2607 dst: WritableReg,
2608 mode: VroundMode,
2609 size: OperandSize,
2610 ) {
2611 let dst: WritableXmm = dst.map(|r| r.into());
2612 let imm = match mode {
2613 VroundMode::TowardNearest => 0,
2614 VroundMode::TowardNegativeInfinity => 1,
2615 VroundMode::TowardPositiveInfinity => 2,
2616 VroundMode::TowardZero => 3,
2617 };
2618
2619 let inst = match size {
2620 OperandSize::S32 => asm::inst::vroundps_rmi::new(dst, src, imm).into(),
2621 OperandSize::S64 => asm::inst::vroundpd_rmi::new(dst, src, imm).into(),
2622 _ => unimplemented!(),
2623 };
2624
2625 self.emit(Inst::External { inst });
2626 }
2627
2628 /// Shuffle of vectors of floats.
xmm_vshufp_rrri( &mut self, src1: Reg, src2: Reg, dst: WritableReg, imm: u8, size: OperandSize, )2629 pub fn xmm_vshufp_rrri(
2630 &mut self,
2631 src1: Reg,
2632 src2: Reg,
2633 dst: WritableReg,
2634 imm: u8,
2635 size: OperandSize,
2636 ) {
2637 let dst: WritableXmm = dst.map(|r| r.into());
2638 let inst = match size {
2639 OperandSize::S32 => asm::inst::vshufps_b::new(dst, src1, src2, imm).into(),
2640 _ => unimplemented!(),
2641 };
2642 self.emit(Inst::External { inst });
2643 }
2644
2645 /// Each lane in `src1` is multiplied by the corresponding lane in `src2`
2646 /// producing intermediate 32-bit operands. Each intermediate 32-bit
2647 /// operand is truncated to 18 most significant bits. Rounding is performed
2648 /// by adding 1 to the least significant bit of the 18-bit intermediate
2649 /// result. The 16 bits immediately to the right of the most significant
2650 /// bit of each 18-bit intermediate result is placed in each lane of `dst`.
xmm_vpmulhrs_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2651 pub fn xmm_vpmulhrs_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2652 let dst: WritableXmm = dst.map(|r| r.into());
2653 let inst = match size {
2654 OperandSize::S16 => asm::inst::vpmulhrsw_b::new(dst, src1, src2).into(),
2655 _ => unimplemented!(),
2656 };
2657 self.emit(Inst::External { inst });
2658 }
2659
xmm_vpmuldq_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg)2660 pub fn xmm_vpmuldq_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2661 let dst: WritableXmm = dst.map(|r| r.into());
2662 let inst = asm::inst::vpmuldq_b::new(dst, src1, src2).into();
2663 self.emit(Inst::External { inst });
2664 }
2665
xmm_vpmuludq_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg)2666 pub fn xmm_vpmuludq_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2667 let dst: WritableXmm = dst.map(|r| r.into());
2668 let inst = asm::inst::vpmuludq_b::new(dst, src1, src2).into();
2669 self.emit(Inst::External { inst });
2670 }
2671
xmm_vpmull_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2672 pub fn xmm_vpmull_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2673 let dst: WritableXmm = dst.map(|r| r.into());
2674 let inst = match size {
2675 OperandSize::S16 => asm::inst::vpmullw_b::new(dst, src1, src2).into(),
2676 OperandSize::S32 => asm::inst::vpmulld_b::new(dst, src1, src2).into(),
2677 _ => unimplemented!(),
2678 };
2679 self.emit(Inst::External { inst });
2680 }
2681
xmm_vmulp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2682 pub fn xmm_vmulp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2683 let dst: WritableXmm = dst.map(|r| r.into());
2684 let inst = match size {
2685 OperandSize::S32 => asm::inst::vmulps_b::new(dst, src1, src2).into(),
2686 OperandSize::S64 => asm::inst::vmulpd_b::new(dst, src1, src2).into(),
2687 _ => unimplemented!(),
2688 };
2689 self.emit(Inst::External { inst });
2690 }
2691
2692 /// Perform an average operation for the vector of unsigned integers in
2693 /// `src1` and `src2` and put the results in `dst`.
xmm_vpavg_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2694 pub fn xmm_vpavg_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2695 let dst: WritableXmm = dst.map(|r| r.into());
2696 let inst = match size {
2697 OperandSize::S8 => asm::inst::vpavgb_b::new(dst, src1, src2).into(),
2698 OperandSize::S16 => asm::inst::vpavgw_b::new(dst, src1, src2).into(),
2699 _ => unimplemented!(),
2700 };
2701 self.emit(Inst::External { inst });
2702 }
2703
2704 /// Divide the vector of floats in `src1` by the vector of floats in `src2`
2705 /// and put the results in `dst`.
xmm_vdivp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize)2706 pub fn xmm_vdivp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2707 let dst: WritableXmm = dst.map(|r| r.into());
2708 let inst = match size {
2709 OperandSize::S32 => asm::inst::vdivps_b::new(dst, src1, src2).into(),
2710 OperandSize::S64 => asm::inst::vdivpd_b::new(dst, src1, src2).into(),
2711 _ => unimplemented!(),
2712 };
2713 self.emit(Inst::External { inst });
2714 }
2715
2716 /// Compute square roots of vector of floats in `src` and put the results
2717 /// in `dst`.
xmm_vsqrtp_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize)2718 pub fn xmm_vsqrtp_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
2719 let dst: WritableXmm = dst.map(|r| r.into());
2720 let inst = match size {
2721 OperandSize::S32 => asm::inst::vsqrtps_b::new(dst, src).into(),
2722 OperandSize::S64 => asm::inst::vsqrtpd_b::new(dst, src).into(),
2723 _ => unimplemented!(),
2724 };
2725 self.emit(Inst::External { inst });
2726 }
2727
2728 /// Multiply and add packed signed and unsigned bytes.
xmm_vpmaddubsw_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg)2729 pub fn xmm_vpmaddubsw_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2730 let dst: WritableXmm = dst.map(|r| r.into());
2731 let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2732 let inst = asm::inst::vpmaddubsw_b::new(dst, src, address).into();
2733 self.emit(Inst::External { inst });
2734 }
2735
2736 /// Multiply and add packed signed and unsigned bytes.
xmm_vpmaddubsw_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg)2737 pub fn xmm_vpmaddubsw_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2738 let dst: WritableXmm = dst.map(|r| r.into());
2739 let inst = asm::inst::vpmaddubsw_b::new(dst, src1, src2).into();
2740 self.emit(Inst::External { inst });
2741 }
2742
2743 /// Multiple and add packed integers.
xmm_vpmaddwd_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg)2744 pub fn xmm_vpmaddwd_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2745 let dst: WritableXmm = dst.map(|r| r.into());
2746 let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2747 let inst = asm::inst::vpmaddwd_b::new(dst, src, address).into();
2748 self.emit(Inst::External { inst });
2749 }
2750
2751 /// Multiple and add packed integers.
xmm_vpmaddwd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg)2752 pub fn xmm_vpmaddwd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2753 let dst: WritableXmm = dst.map(|r| r.into());
2754 let inst = asm::inst::vpmaddwd_b::new(dst, src1, src2).into();
2755 self.emit(Inst::External { inst });
2756 }
2757 }
2758
2759 /// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted,
2760 /// but the immediate is not yet known. Currently, this implementation expects a 32-bit immediate,
2761 /// so 8 and 16 bit operand sizes are not supported.
2762 pub(crate) struct PatchableAddToReg {
2763 /// The region to be patched in the [`MachBuffer`]. It must contain a valid add instruction
2764 /// sequence, accepting a 32-bit immediate.
2765 region: PatchRegion,
2766
2767 /// The offset into the patchable region where the patchable constant begins.
2768 constant_offset: usize,
2769 }
2770
2771 impl PatchableAddToReg {
2772 /// Create a new [`PatchableAddToReg`] by capturing a region in the output buffer where the
2773 /// add-with-immediate occurs. The [`MachBuffer`] will have and add-with-immediate instruction
2774 /// present in that region, though it will add `0` until the `::finalize` method is called.
2775 ///
2776 /// Currently this implementation expects to be able to patch a 32-bit immediate, which means
2777 /// that 8 and 16-bit addition cannot be supported.
new(reg: Reg, size: OperandSize, asm: &mut Assembler) -> Self2778 pub(crate) fn new(reg: Reg, size: OperandSize, asm: &mut Assembler) -> Self {
2779 let open = asm.buffer_mut().start_patchable();
2780 let start = asm.buffer().cur_offset();
2781
2782 // Emit the opcode and register use for the add instruction.
2783 let reg = pair_gpr(Writable::from_reg(reg));
2784 let inst = match size {
2785 OperandSize::S32 => asm::inst::addl_mi::new(reg, 0_u32).into(),
2786 OperandSize::S64 => asm::inst::addq_mi_sxl::new(reg, 0_i32).into(),
2787 _ => {
2788 panic!(
2789 "{}-bit addition is not supported, please see the comment on PatchableAddToReg::new",
2790 size.num_bits(),
2791 )
2792 }
2793 };
2794 asm.emit(Inst::External { inst });
2795
2796 // The offset to the constant is the width of what was just emitted
2797 // minus 4, the width of the 32-bit immediate.
2798 let constant_offset = usize::try_from(asm.buffer().cur_offset() - start - 4).unwrap();
2799
2800 let region = asm.buffer_mut().end_patchable(open);
2801
2802 Self {
2803 region,
2804 constant_offset,
2805 }
2806 }
2807
2808 /// Patch the [`MachBuffer`] with the known constant to be added to the register. The final
2809 /// value is passed in as an i32, but the instruction encoding is fixed when
2810 /// [`PatchableAddToReg::new`] is called.
finalize(self, val: i32, buffer: &mut MachBuffer<Inst>)2811 pub(crate) fn finalize(self, val: i32, buffer: &mut MachBuffer<Inst>) {
2812 let slice = self.region.patch(buffer);
2813 debug_assert_eq!(slice.len(), self.constant_offset + 4);
2814 slice[self.constant_offset..].copy_from_slice(val.to_le_bytes().as_slice());
2815 }
2816 }
2817