1 use crate::Result; 2 use crate::abi::{self, LocalSlot, align_to}; 3 use crate::codegen::{CodeGenContext, Emission, FuncEnv}; 4 use crate::isa::{ 5 CallingConvention, 6 reg::{Reg, RegClass, WritableReg, writable}, 7 }; 8 use cranelift_codegen::{ 9 Final, MachBufferFinalized, MachLabel, 10 binemit::CodeOffset, 11 ir::{Endianness, MemFlags, RelSourceLoc, SourceLoc, UserExternalNameRef}, 12 }; 13 use std::{fmt::Debug, ops::Range}; 14 use wasmtime_environ::{PtrSize, WasmHeapType, WasmRefType, WasmValType}; 15 16 pub(crate) use cranelift_codegen::ir::TrapCode; 17 18 #[derive(Eq, PartialEq)] 19 pub(crate) enum DivKind { 20 /// Signed division. 21 Signed, 22 /// Unsigned division. 23 Unsigned, 24 } 25 26 /// Represents the `memory.atomic.wait*` kind. 27 #[derive(Debug, Clone, Copy)] 28 pub(crate) enum AtomicWaitKind { 29 Wait32, 30 Wait64, 31 } 32 33 /// Remainder kind. 34 #[derive(Copy, Clone)] 35 pub(crate) enum RemKind { 36 /// Signed remainder. 37 Signed, 38 /// Unsigned remainder. 39 Unsigned, 40 } 41 42 impl RemKind { is_signed(&self) -> bool43 pub fn is_signed(&self) -> bool { 44 matches!(self, Self::Signed) 45 } 46 } 47 48 /// Kinds of vector min operation supported by WebAssembly. 49 pub(crate) enum V128MinKind { 50 /// 4 lanes of 32-bit floats. 51 F32x4, 52 /// 2 lanes of 64-bit floats. 53 F64x2, 54 /// 16 lanes of signed 8-bit integers. 55 I8x16S, 56 /// 16 lanes of unsigned 8-bit integers. 57 I8x16U, 58 /// 8 lanes of signed 16-bit integers. 59 I16x8S, 60 /// 8 lanes of unsigned 16-bit integers. 61 I16x8U, 62 /// 4 lanes of signed 32-bit integers. 63 I32x4S, 64 /// 4 lanes of unsigned 32-bit integers. 65 I32x4U, 66 } 67 68 impl V128MinKind { 69 /// The size of each lane. lane_size(&self) -> OperandSize70 pub(crate) fn lane_size(&self) -> OperandSize { 71 match self { 72 Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32, 73 Self::F64x2 => OperandSize::S64, 74 Self::I8x16S | Self::I8x16U => OperandSize::S8, 75 Self::I16x8S | Self::I16x8U => OperandSize::S16, 76 } 77 } 78 } 79 80 /// Kinds of vector max operation supported by WebAssembly. 81 pub(crate) enum V128MaxKind { 82 /// 4 lanes of 32-bit floats. 83 F32x4, 84 /// 2 lanes of 64-bit floats. 85 F64x2, 86 /// 16 lanes of signed 8-bit integers. 87 I8x16S, 88 /// 16 lanes of unsigned 8-bit integers. 89 I8x16U, 90 /// 8 lanes of signed 16-bit integers. 91 I16x8S, 92 /// 8 lanes of unsigned 16-bit integers. 93 I16x8U, 94 /// 4 lanes of signed 32-bit integers. 95 I32x4S, 96 /// 4 lanes of unsigned 32-bit integers. 97 I32x4U, 98 } 99 100 impl V128MaxKind { 101 /// The size of each lane. lane_size(&self) -> OperandSize102 pub(crate) fn lane_size(&self) -> OperandSize { 103 match self { 104 Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32, 105 Self::F64x2 => OperandSize::S64, 106 Self::I8x16S | Self::I8x16U => OperandSize::S8, 107 Self::I16x8S | Self::I16x8U => OperandSize::S16, 108 } 109 } 110 } 111 112 #[derive(Eq, PartialEq)] 113 pub(crate) enum MulWideKind { 114 Signed, 115 Unsigned, 116 } 117 118 /// Type of operation for a read-modify-write instruction. 119 pub(crate) enum RmwOp { 120 Add, 121 Sub, 122 Xchg, 123 And, 124 Or, 125 Xor, 126 } 127 128 /// The direction to perform the memory move. 129 #[derive(Debug, Clone, Eq, PartialEq)] 130 pub(crate) enum MemMoveDirection { 131 /// From high memory addresses to low memory addresses. 132 /// Invariant: the source location is closer to the FP than the destination 133 /// location, which will be closer to the SP. 134 HighToLow, 135 /// From low memory addresses to high memory addresses. 136 /// Invariant: the source location is closer to the SP than the destination 137 /// location, which will be closer to the FP. 138 LowToHigh, 139 } 140 141 /// Classifies how to treat float-to-int conversions. 142 #[derive(Debug, Copy, Clone, Eq, PartialEq)] 143 pub(crate) enum TruncKind { 144 /// Saturating conversion. If the source value is greater than the maximum 145 /// value of the destination type, the result is clamped to the 146 /// destination maximum value. 147 Checked, 148 /// An exception is raised if the source value is greater than the maximum 149 /// value of the destination type. 150 Unchecked, 151 } 152 153 impl TruncKind { 154 /// Returns true if the truncation kind is checked. is_checked(&self) -> bool155 pub(crate) fn is_checked(&self) -> bool { 156 *self == TruncKind::Checked 157 } 158 159 /// Returns `true` if the trunc kind is [`Unchecked`]. 160 /// 161 /// [`Unchecked`]: TruncKind::Unchecked 162 #[must_use] is_unchecked(&self) -> bool163 pub(crate) fn is_unchecked(&self) -> bool { 164 matches!(self, Self::Unchecked) 165 } 166 } 167 168 /// Representation of the stack pointer offset. 169 #[derive(Copy, Clone, Eq, PartialEq, Debug, PartialOrd, Ord, Default)] 170 pub struct SPOffset(u32); 171 172 impl SPOffset { from_u32(offs: u32) -> Self173 pub fn from_u32(offs: u32) -> Self { 174 Self(offs) 175 } 176 as_u32(&self) -> u32177 pub fn as_u32(&self) -> u32 { 178 self.0 179 } 180 } 181 182 /// A stack slot. 183 #[derive(Debug, Clone, Copy, Eq, PartialEq)] 184 pub struct StackSlot { 185 /// The location of the slot, relative to the stack pointer. 186 pub offset: SPOffset, 187 /// The size of the slot, in bytes. 188 pub size: u32, 189 } 190 191 impl StackSlot { new(offs: SPOffset, size: u32) -> Self192 pub fn new(offs: SPOffset, size: u32) -> Self { 193 Self { offset: offs, size } 194 } 195 } 196 197 pub trait ScratchType { 198 /// Derive the register class from the scratch register type. reg_class() -> RegClass199 fn reg_class() -> RegClass; 200 } 201 202 /// A scratch register type of integer class. 203 pub struct IntScratch; 204 /// A scratch register type of floating point class. 205 pub struct FloatScratch; 206 207 impl ScratchType for IntScratch { reg_class() -> RegClass208 fn reg_class() -> RegClass { 209 RegClass::Int 210 } 211 } 212 213 impl ScratchType for FloatScratch { reg_class() -> RegClass214 fn reg_class() -> RegClass { 215 RegClass::Float 216 } 217 } 218 219 /// A scratch register scope. 220 #[derive(Debug, Clone, Copy)] 221 pub struct Scratch(Reg); 222 223 impl Scratch { new(r: Reg) -> Self224 pub fn new(r: Reg) -> Self { 225 Self(r) 226 } 227 228 #[inline] inner(&self) -> Reg229 pub fn inner(&self) -> Reg { 230 self.0 231 } 232 233 #[inline] writable(&self) -> WritableReg234 pub fn writable(&self) -> WritableReg { 235 writable!(self.0) 236 } 237 } 238 239 /// Kinds of integer binary comparison in WebAssembly. The [`MacroAssembler`] 240 /// implementation for each ISA is responsible for emitting the correct 241 /// sequence of instructions when lowering to machine code. 242 #[derive(Debug, Clone, Copy, Eq, PartialEq)] 243 pub(crate) enum IntCmpKind { 244 /// Equal. 245 Eq, 246 /// Not equal. 247 Ne, 248 /// Signed less than. 249 LtS, 250 /// Unsigned less than. 251 LtU, 252 /// Signed greater than. 253 GtS, 254 /// Unsigned greater than. 255 GtU, 256 /// Signed less than or equal. 257 LeS, 258 /// Unsigned less than or equal. 259 LeU, 260 /// Signed greater than or equal. 261 GeS, 262 /// Unsigned greater than or equal. 263 GeU, 264 } 265 266 /// Kinds of float binary comparison in WebAssembly. The [`MacroAssembler`] 267 /// implementation for each ISA is responsible for emitting the correct 268 /// sequence of instructions when lowering code. 269 #[derive(Debug)] 270 pub(crate) enum FloatCmpKind { 271 /// Equal. 272 Eq, 273 /// Not equal. 274 Ne, 275 /// Less than. 276 Lt, 277 /// Greater than. 278 Gt, 279 /// Less than or equal. 280 Le, 281 /// Greater than or equal. 282 Ge, 283 } 284 285 /// Kinds of shifts in WebAssembly.The [`masm`] implementation for each ISA is 286 /// responsible for emitting the correct sequence of instructions when 287 /// lowering to machine code. 288 #[derive(Debug, Clone, Copy, Eq, PartialEq)] 289 pub(crate) enum ShiftKind { 290 /// Left shift. 291 Shl, 292 /// Signed right shift. 293 ShrS, 294 /// Unsigned right shift. 295 ShrU, 296 /// Left rotate. 297 Rotl, 298 /// Right rotate. 299 Rotr, 300 } 301 302 /// Kinds of extends in WebAssembly. Each MacroAssembler implementation 303 /// is responsible for emitting the correct sequence of instructions when 304 /// lowering to machine code. 305 #[derive(Copy, Clone)] 306 pub(crate) enum ExtendKind { 307 Signed(Extend<Signed>), 308 Unsigned(Extend<Zero>), 309 } 310 311 #[derive(Copy, Clone)] 312 pub(crate) enum Signed {} 313 #[derive(Copy, Clone)] 314 pub(crate) enum Zero {} 315 316 pub(crate) trait ExtendType {} 317 318 impl ExtendType for Signed {} 319 impl ExtendType for Zero {} 320 321 #[derive(Copy, Clone)] 322 pub(crate) enum Extend<T: ExtendType> { 323 /// 8 to 32 bit extend. 324 I32Extend8, 325 /// 16 to 32 bit extend. 326 I32Extend16, 327 /// 8 to 64 bit extend. 328 I64Extend8, 329 /// 16 to 64 bit extend. 330 I64Extend16, 331 /// 32 to 64 bit extend. 332 I64Extend32, 333 334 /// Variant to hold the kind of extend marker. 335 /// 336 /// This is `Signed` or `Zero`, that are empty enums, which means that this variant cannot be 337 /// constructed. 338 __Kind(T), 339 } 340 341 impl From<Extend<Zero>> for ExtendKind { from(value: Extend<Zero>) -> Self342 fn from(value: Extend<Zero>) -> Self { 343 ExtendKind::Unsigned(value) 344 } 345 } 346 347 impl<T: ExtendType> Extend<T> { from_size(&self) -> OperandSize348 pub fn from_size(&self) -> OperandSize { 349 match self { 350 Extend::I32Extend8 | Extend::I64Extend8 => OperandSize::S8, 351 Extend::I32Extend16 | Extend::I64Extend16 => OperandSize::S16, 352 Extend::I64Extend32 => OperandSize::S32, 353 Extend::__Kind(_) => unreachable!(), 354 } 355 } 356 to_size(&self) -> OperandSize357 pub fn to_size(&self) -> OperandSize { 358 match self { 359 Extend::I32Extend8 | Extend::I32Extend16 => OperandSize::S32, 360 Extend::I64Extend8 | Extend::I64Extend16 | Extend::I64Extend32 => OperandSize::S64, 361 Extend::__Kind(_) => unreachable!(), 362 } 363 } 364 from_bits(&self) -> u8365 pub fn from_bits(&self) -> u8 { 366 self.from_size().num_bits() 367 } 368 to_bits(&self) -> u8369 pub fn to_bits(&self) -> u8 { 370 self.to_size().num_bits() 371 } 372 } 373 374 impl From<Extend<Signed>> for ExtendKind { from(value: Extend<Signed>) -> Self375 fn from(value: Extend<Signed>) -> Self { 376 ExtendKind::Signed(value) 377 } 378 } 379 380 impl ExtendKind { signed(&self) -> bool381 pub fn signed(&self) -> bool { 382 match self { 383 Self::Signed(_) => true, 384 _ => false, 385 } 386 } 387 from_bits(&self) -> u8388 pub fn from_bits(&self) -> u8 { 389 match self { 390 Self::Signed(s) => s.from_bits(), 391 Self::Unsigned(u) => u.from_bits(), 392 } 393 } 394 to_bits(&self) -> u8395 pub fn to_bits(&self) -> u8 { 396 match self { 397 Self::Signed(s) => s.to_bits(), 398 Self::Unsigned(u) => u.to_bits(), 399 } 400 } 401 } 402 403 /// Kinds of vector load and extends in WebAssembly. Each MacroAssembler 404 /// implementation is responsible for emitting the correct sequence of 405 /// instructions when lowering to machine code. 406 #[derive(Copy, Clone)] 407 pub(crate) enum V128LoadExtendKind { 408 /// Sign extends eight 8 bit integers to eight 16 bit lanes. 409 E8x8S, 410 /// Zero extends eight 8 bit integers to eight 16 bit lanes. 411 E8x8U, 412 /// Sign extends four 16 bit integers to four 32 bit lanes. 413 E16x4S, 414 /// Zero extends four 16 bit integers to four 32 bit lanes. 415 E16x4U, 416 /// Sign extends two 32 bit integers to two 64 bit lanes. 417 E32x2S, 418 /// Zero extends two 32 bit integers to two 64 bit lanes. 419 E32x2U, 420 } 421 422 /// Kinds of splat loads supported by WebAssembly. 423 pub(crate) enum SplatLoadKind { 424 /// 8 bits. 425 S8, 426 /// 16 bits. 427 S16, 428 /// 32 bits. 429 S32, 430 /// 64 bits. 431 S64, 432 } 433 434 /// Kinds of splat supported by WebAssembly. 435 #[derive(Copy, Debug, Clone, Eq, PartialEq)] 436 pub(crate) enum SplatKind { 437 /// 8 bit integer. 438 I8x16, 439 /// 16 bit integer. 440 I16x8, 441 /// 32 bit integer. 442 I32x4, 443 /// 64 bit integer. 444 I64x2, 445 /// 32 bit float. 446 F32x4, 447 /// 64 bit float. 448 F64x2, 449 } 450 451 impl SplatKind { 452 /// The lane size to use for different kinds of splats. lane_size(&self) -> OperandSize453 pub(crate) fn lane_size(&self) -> OperandSize { 454 match self { 455 SplatKind::I8x16 => OperandSize::S8, 456 SplatKind::I16x8 => OperandSize::S16, 457 SplatKind::I32x4 | SplatKind::F32x4 => OperandSize::S32, 458 SplatKind::I64x2 | SplatKind::F64x2 => OperandSize::S64, 459 } 460 } 461 } 462 463 /// Kinds of extract lane supported by WebAssembly. 464 #[derive(Copy, Debug, Clone, Eq, PartialEq)] 465 pub(crate) enum ExtractLaneKind { 466 /// 16 lanes of 8-bit integers sign extended to 32-bits. 467 I8x16S, 468 /// 16 lanes of 8-bit integers zero extended to 32-bits. 469 I8x16U, 470 /// 8 lanes of 16-bit integers sign extended to 32-bits. 471 I16x8S, 472 /// 8 lanes of 16-bit integers zero extended to 32-bits. 473 I16x8U, 474 /// 4 lanes of 32-bit integers. 475 I32x4, 476 /// 2 lanes of 64-bit integers. 477 I64x2, 478 /// 4 lanes of 32-bit floats. 479 F32x4, 480 /// 2 lanes of 64-bit floats. 481 F64x2, 482 } 483 484 impl ExtractLaneKind { 485 /// The lane size to use for different kinds of extract lane kinds. lane_size(&self) -> OperandSize486 pub(crate) fn lane_size(&self) -> OperandSize { 487 match self { 488 ExtractLaneKind::I8x16S | ExtractLaneKind::I8x16U => OperandSize::S8, 489 ExtractLaneKind::I16x8S | ExtractLaneKind::I16x8U => OperandSize::S16, 490 ExtractLaneKind::I32x4 | ExtractLaneKind::F32x4 => OperandSize::S32, 491 ExtractLaneKind::I64x2 | ExtractLaneKind::F64x2 => OperandSize::S64, 492 } 493 } 494 } 495 496 impl From<ExtractLaneKind> for Extend<Signed> { from(value: ExtractLaneKind) -> Self497 fn from(value: ExtractLaneKind) -> Self { 498 match value { 499 ExtractLaneKind::I8x16S => Extend::I32Extend8, 500 ExtractLaneKind::I16x8S => Extend::I32Extend16, 501 _ => unimplemented!(), 502 } 503 } 504 } 505 506 /// Kinds of replace lane supported by WebAssembly. 507 pub(crate) enum ReplaceLaneKind { 508 /// 16 lanes of 8 bit integers. 509 I8x16, 510 /// 8 lanes of 16 bit integers. 511 I16x8, 512 /// 4 lanes of 32 bit integers. 513 I32x4, 514 /// 2 lanes of 64 bit integers. 515 I64x2, 516 /// 4 lanes of 32 bit floats. 517 F32x4, 518 /// 2 lanes of 64 bit floats. 519 F64x2, 520 } 521 522 impl ReplaceLaneKind { 523 /// The lane size to use for different kinds of replace lane kinds. lane_size(&self) -> OperandSize524 pub(crate) fn lane_size(&self) -> OperandSize { 525 match self { 526 ReplaceLaneKind::I8x16 => OperandSize::S8, 527 ReplaceLaneKind::I16x8 => OperandSize::S16, 528 ReplaceLaneKind::I32x4 => OperandSize::S32, 529 ReplaceLaneKind::I64x2 => OperandSize::S64, 530 ReplaceLaneKind::F32x4 => OperandSize::S32, 531 ReplaceLaneKind::F64x2 => OperandSize::S64, 532 } 533 } 534 } 535 536 /// Kinds of behavior supported by Wasm loads. 537 pub(crate) enum LoadKind { 538 /// Load the entire bytes of the operand size without any modifications. 539 Operand(OperandSize), 540 /// Atomic load, with optional scalar extend. 541 Atomic(OperandSize, Option<ExtendKind>), 542 /// Duplicate value into vector lanes. 543 Splat(SplatLoadKind), 544 /// Scalar (non-vector) extend. 545 ScalarExtend(ExtendKind), 546 /// Vector extend. 547 VectorExtend(V128LoadExtendKind), 548 /// Load content into select lane. 549 VectorLane(LaneSelector), 550 /// Load a single element into the lowest bits of a vector and initialize 551 /// all other bits to zero. 552 VectorZero(OperandSize), 553 } 554 555 impl LoadKind { 556 /// Returns the [`OperandSize`] used in the load operation. derive_operand_size(&self) -> OperandSize557 pub(crate) fn derive_operand_size(&self) -> OperandSize { 558 match self { 559 Self::ScalarExtend(extend) | Self::Atomic(_, Some(extend)) => { 560 Self::operand_size_for_scalar(extend) 561 } 562 Self::VectorExtend(_) => OperandSize::S64, 563 Self::Splat(kind) => Self::operand_size_for_splat(kind), 564 Self::Operand(size) 565 | Self::Atomic(size, None) 566 | Self::VectorLane(LaneSelector { size, .. }) 567 | Self::VectorZero(size) => *size, 568 } 569 } 570 vector_lane(lane: u8, size: OperandSize) -> Self571 pub fn vector_lane(lane: u8, size: OperandSize) -> Self { 572 Self::VectorLane(LaneSelector { lane, size }) 573 } 574 operand_size_for_scalar(extend_kind: &ExtendKind) -> OperandSize575 fn operand_size_for_scalar(extend_kind: &ExtendKind) -> OperandSize { 576 match extend_kind { 577 ExtendKind::Signed(s) => s.from_size(), 578 ExtendKind::Unsigned(u) => u.from_size(), 579 } 580 } 581 operand_size_for_splat(kind: &SplatLoadKind) -> OperandSize582 fn operand_size_for_splat(kind: &SplatLoadKind) -> OperandSize { 583 match kind { 584 SplatLoadKind::S8 => OperandSize::S8, 585 SplatLoadKind::S16 => OperandSize::S16, 586 SplatLoadKind::S32 => OperandSize::S32, 587 SplatLoadKind::S64 => OperandSize::S64, 588 } 589 } 590 is_atomic(&self) -> bool591 pub(crate) fn is_atomic(&self) -> bool { 592 matches!(self, Self::Atomic(_, _)) 593 } 594 } 595 596 /// Kinds of behavior supported by Wasm loads. 597 #[derive(Copy, Clone)] 598 pub enum StoreKind { 599 /// Store the entire bytes of the operand size without any modifications. 600 Operand(OperandSize), 601 /// Store the entire bytes of the operand size without any modifications, atomically. 602 Atomic(OperandSize), 603 /// Store the content of selected lane. 604 VectorLane(LaneSelector), 605 } 606 607 impl StoreKind { vector_lane(lane: u8, size: OperandSize) -> Self608 pub fn vector_lane(lane: u8, size: OperandSize) -> Self { 609 Self::VectorLane(LaneSelector { lane, size }) 610 } 611 } 612 613 #[derive(Copy, Clone)] 614 pub struct LaneSelector { 615 pub lane: u8, 616 pub size: OperandSize, 617 } 618 619 /// Types of vector integer to float conversions supported by WebAssembly. 620 pub(crate) enum V128ConvertKind { 621 /// 4 lanes of signed 32-bit integers to 4 lanes of 32-bit floats. 622 I32x4S, 623 /// 4 lanes of unsigned 32-bit integers to 4 lanes of 32-bit floats. 624 I32x4U, 625 /// 4 lanes of signed 32-bit integers to low bits of 2 lanes of 64-bit 626 /// floats. 627 I32x4LowS, 628 /// 4 lanes of unsigned 32-bit integers to low bits of 2 lanes of 64-bit 629 /// floats. 630 I32x4LowU, 631 } 632 633 impl V128ConvertKind { src_lane_size(&self) -> OperandSize634 pub(crate) fn src_lane_size(&self) -> OperandSize { 635 match self { 636 V128ConvertKind::I32x4S 637 | V128ConvertKind::I32x4U 638 | V128ConvertKind::I32x4LowS 639 | V128ConvertKind::I32x4LowU => OperandSize::S32, 640 } 641 } 642 dst_lane_size(&self) -> OperandSize643 pub(crate) fn dst_lane_size(&self) -> OperandSize { 644 match self { 645 V128ConvertKind::I32x4S | V128ConvertKind::I32x4U => OperandSize::S32, 646 V128ConvertKind::I32x4LowS | V128ConvertKind::I32x4LowU => OperandSize::S64, 647 } 648 } 649 } 650 651 /// Kinds of vector narrowing operations supported by WebAssembly. 652 pub(crate) enum V128NarrowKind { 653 /// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using 654 /// signed saturation. 655 I16x8S, 656 /// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using 657 /// unsigned saturation. 658 I16x8U, 659 /// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using 660 /// signed saturation. 661 I32x4S, 662 /// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using 663 /// unsigned saturation. 664 I32x4U, 665 } 666 667 impl V128NarrowKind { 668 /// Return the size of the destination lanes. dst_lane_size(&self) -> OperandSize669 pub(crate) fn dst_lane_size(&self) -> OperandSize { 670 match self { 671 Self::I16x8S | Self::I16x8U => OperandSize::S8, 672 Self::I32x4S | Self::I32x4U => OperandSize::S16, 673 } 674 } 675 } 676 677 /// Kinds of vector extending operations supported by WebAssembly. 678 #[derive(Debug, Copy, Clone)] 679 pub(crate) enum V128ExtendKind { 680 /// Low half of i8x16 sign extended. 681 LowI8x16S, 682 /// High half of i8x16 sign extended. 683 HighI8x16S, 684 /// Low half of i8x16 zero extended. 685 LowI8x16U, 686 /// High half of i8x16 zero extended. 687 HighI8x16U, 688 /// Low half of i16x8 sign extended. 689 LowI16x8S, 690 /// High half of i16x8 sign extended. 691 HighI16x8S, 692 /// Low half of i16x8 zero extended. 693 LowI16x8U, 694 /// High half of i16x8 zero extended. 695 HighI16x8U, 696 /// Low half of i32x4 sign extended. 697 LowI32x4S, 698 /// High half of i32x4 sign extended. 699 HighI32x4S, 700 /// Low half of i32x4 zero extended. 701 LowI32x4U, 702 /// High half of i32x4 zero extended. 703 HighI32x4U, 704 } 705 706 impl V128ExtendKind { 707 /// The size of the source's lanes. src_lane_size(&self) -> OperandSize708 pub(crate) fn src_lane_size(&self) -> OperandSize { 709 match self { 710 Self::LowI8x16S | Self::LowI8x16U | Self::HighI8x16S | Self::HighI8x16U => { 711 OperandSize::S8 712 } 713 Self::LowI16x8S | Self::LowI16x8U | Self::HighI16x8S | Self::HighI16x8U => { 714 OperandSize::S16 715 } 716 Self::LowI32x4S | Self::LowI32x4U | Self::HighI32x4S | Self::HighI32x4U => { 717 OperandSize::S32 718 } 719 } 720 } 721 } 722 723 /// Kinds of vector equalities and non-equalities supported by WebAssembly. 724 pub(crate) enum VectorEqualityKind { 725 /// 16 lanes of 8 bit integers. 726 I8x16, 727 /// 8 lanes of 16 bit integers. 728 I16x8, 729 /// 4 lanes of 32 bit integers. 730 I32x4, 731 /// 2 lanes of 64 bit integers. 732 I64x2, 733 /// 4 lanes of 32 bit floats. 734 F32x4, 735 /// 2 lanes of 64 bit floats. 736 F64x2, 737 } 738 739 impl VectorEqualityKind { 740 /// Get the lane size to use. lane_size(&self) -> OperandSize741 pub(crate) fn lane_size(&self) -> OperandSize { 742 match self { 743 Self::I8x16 => OperandSize::S8, 744 Self::I16x8 => OperandSize::S16, 745 Self::I32x4 | Self::F32x4 => OperandSize::S32, 746 Self::I64x2 | Self::F64x2 => OperandSize::S64, 747 } 748 } 749 } 750 751 /// Kinds of vector comparisons supported by WebAssembly. 752 pub(crate) enum VectorCompareKind { 753 /// 16 lanes of signed 8 bit integers. 754 I8x16S, 755 /// 16 lanes of unsigned 8 bit integers. 756 I8x16U, 757 /// 8 lanes of signed 16 bit integers. 758 I16x8S, 759 /// 8 lanes of unsigned 16 bit integers. 760 I16x8U, 761 /// 4 lanes of signed 32 bit integers. 762 I32x4S, 763 /// 4 lanes of unsigned 32 bit integers. 764 I32x4U, 765 /// 2 lanes of signed 64 bit integers. 766 I64x2S, 767 /// 4 lanes of 32 bit floats. 768 F32x4, 769 /// 2 lanes of 64 bit floats. 770 F64x2, 771 } 772 773 impl VectorCompareKind { 774 /// Get the lane size to use. lane_size(&self) -> OperandSize775 pub(crate) fn lane_size(&self) -> OperandSize { 776 match self { 777 Self::I8x16S | Self::I8x16U => OperandSize::S8, 778 Self::I16x8S | Self::I16x8U => OperandSize::S16, 779 Self::I32x4S | Self::I32x4U | Self::F32x4 => OperandSize::S32, 780 Self::I64x2S | Self::F64x2 => OperandSize::S64, 781 } 782 } 783 } 784 785 /// Kinds of vector absolute operations supported by WebAssembly. 786 #[derive(Copy, Debug, Clone, Eq, PartialEq)] 787 pub(crate) enum V128AbsKind { 788 /// 8 bit integers. 789 I8x16, 790 /// 16 bit integers. 791 I16x8, 792 /// 32 bit integers. 793 I32x4, 794 /// 64 bit integers. 795 I64x2, 796 /// 32 bit floats. 797 F32x4, 798 /// 64 bit floats. 799 F64x2, 800 } 801 802 impl V128AbsKind { 803 /// The lane size to use. lane_size(&self) -> OperandSize804 pub(crate) fn lane_size(&self) -> OperandSize { 805 match self { 806 Self::I8x16 => OperandSize::S8, 807 Self::I16x8 => OperandSize::S16, 808 Self::I32x4 | Self::F32x4 => OperandSize::S32, 809 Self::I64x2 | Self::F64x2 => OperandSize::S64, 810 } 811 } 812 } 813 814 /// Kinds of truncation for vectors supported by WebAssembly. 815 pub(crate) enum V128TruncKind { 816 /// Truncates 4 lanes of 32-bit floats to nearest integral value. 817 F32x4, 818 /// Truncates 2 lanes of 64-bit floats to nearest integral value. 819 F64x2, 820 /// Integers from signed F32x4. 821 I32x4FromF32x4S, 822 /// Integers from unsigned F32x4. 823 I32x4FromF32x4U, 824 /// Integers from signed F64x2. 825 I32x4FromF64x2SZero, 826 /// Integers from unsigned F64x2. 827 I32x4FromF64x2UZero, 828 } 829 830 impl V128TruncKind { 831 /// The size of the source lanes. src_lane_size(&self) -> OperandSize832 pub(crate) fn src_lane_size(&self) -> OperandSize { 833 match self { 834 V128TruncKind::F32x4 835 | V128TruncKind::I32x4FromF32x4S 836 | V128TruncKind::I32x4FromF32x4U => OperandSize::S32, 837 V128TruncKind::F64x2 838 | V128TruncKind::I32x4FromF64x2SZero 839 | V128TruncKind::I32x4FromF64x2UZero => OperandSize::S64, 840 } 841 } 842 843 /// The size of the destination lanes. dst_lane_size(&self) -> OperandSize844 pub(crate) fn dst_lane_size(&self) -> OperandSize { 845 if let V128TruncKind::F64x2 = self { 846 OperandSize::S64 847 } else { 848 OperandSize::S32 849 } 850 } 851 } 852 853 /// Kinds of vector addition supported by WebAssembly. 854 pub(crate) enum V128AddKind { 855 /// 4 lanes of 32-bit floats wrapping. 856 F32x4, 857 /// 2 lanes of 64-bit floats wrapping. 858 F64x2, 859 /// 16 lanes of 8-bit integers wrapping. 860 I8x16, 861 /// 16 lanes of 8-bit integers signed saturating. 862 I8x16SatS, 863 /// 16 lanes of 8-bit integers unsigned saturating. 864 I8x16SatU, 865 /// 8 lanes of 16-bit integers wrapping. 866 I16x8, 867 /// 8 lanes of 16-bit integers signed saturating. 868 I16x8SatS, 869 /// 8 lanes of 16-bit integers unsigned saturating. 870 I16x8SatU, 871 /// 4 lanes of 32-bit integers wrapping. 872 I32x4, 873 /// 2 lanes of 64-bit integers wrapping. 874 I64x2, 875 } 876 877 /// Kinds of vector subtraction supported by WebAssembly. 878 pub(crate) enum V128SubKind { 879 /// 4 lanes of 32-bit floats wrapping. 880 F32x4, 881 /// 2 lanes of 64-bit floats wrapping. 882 F64x2, 883 /// 16 lanes of 8-bit integers wrapping. 884 I8x16, 885 /// 16 lanes of 8-bit integers signed saturating. 886 I8x16SatS, 887 /// 16 lanes of 8-bit integers unsigned saturating. 888 I8x16SatU, 889 /// 8 lanes of 16-bit integers wrapping. 890 I16x8, 891 /// 8 lanes of 16-bit integers signed saturating. 892 I16x8SatS, 893 /// 8 lanes of 16-bit integers unsigned saturating. 894 I16x8SatU, 895 /// 4 lanes of 32-bit integers wrapping. 896 I32x4, 897 /// 2 lanes of 64-bit integers wrapping. 898 I64x2, 899 } 900 901 impl From<V128NegKind> for V128SubKind { from(value: V128NegKind) -> Self902 fn from(value: V128NegKind) -> Self { 903 match value { 904 V128NegKind::I8x16 => Self::I8x16, 905 V128NegKind::I16x8 => Self::I16x8, 906 V128NegKind::I32x4 => Self::I32x4, 907 V128NegKind::I64x2 => Self::I64x2, 908 V128NegKind::F32x4 | V128NegKind::F64x2 => unimplemented!(), 909 } 910 } 911 } 912 913 /// Kinds of vector multiplication supported by WebAssembly. 914 pub(crate) enum V128MulKind { 915 /// 4 lanes of 32-bit floats. 916 F32x4, 917 /// 2 lanes of 64-bit floats. 918 F64x2, 919 /// 8 lanes of 16-bit integers. 920 I16x8, 921 /// 4 lanes of 32-bit integers. 922 I32x4, 923 /// 2 lanes of 64-bit integers. 924 I64x2, 925 } 926 927 /// Kinds of vector negation supported by WebAssembly. 928 #[derive(Copy, Clone)] 929 pub(crate) enum V128NegKind { 930 /// 4 lanes of 32-bit floats. 931 F32x4, 932 /// 2 lanes of 64-bit floats. 933 F64x2, 934 /// 16 lanes of 8-bit integers. 935 I8x16, 936 /// 8 lanes of 16-bit integers. 937 I16x8, 938 /// 4 lanes of 32-bit integers. 939 I32x4, 940 /// 2 lanes of 64-bit integers. 941 I64x2, 942 } 943 944 impl V128NegKind { 945 /// The size of the lanes. lane_size(&self) -> OperandSize946 pub(crate) fn lane_size(&self) -> OperandSize { 947 match self { 948 Self::F32x4 | Self::I32x4 => OperandSize::S32, 949 Self::F64x2 | Self::I64x2 => OperandSize::S64, 950 Self::I8x16 => OperandSize::S8, 951 Self::I16x8 => OperandSize::S16, 952 } 953 } 954 } 955 956 /// Kinds of extended pairwise addition supported by WebAssembly. 957 pub(crate) enum V128ExtAddKind { 958 /// 16 lanes of signed 8-bit integers. 959 I8x16S, 960 /// 16 lanes of unsigned 8-bit integers. 961 I8x16U, 962 /// 8 lanes of signed 16-bit integers. 963 I16x8S, 964 /// 8 lanes of unsigned 16-bit integers. 965 I16x8U, 966 } 967 968 /// Kinds of vector extended multiplication supported by WebAssembly. 969 #[derive(Debug, Clone, Copy)] 970 pub(crate) enum V128ExtMulKind { 971 LowI8x16S, 972 HighI8x16S, 973 LowI8x16U, 974 HighI8x16U, 975 LowI16x8S, 976 HighI16x8S, 977 LowI16x8U, 978 HighI16x8U, 979 LowI32x4S, 980 HighI32x4S, 981 LowI32x4U, 982 HighI32x4U, 983 } 984 985 impl From<V128ExtMulKind> for V128ExtendKind { from(value: V128ExtMulKind) -> Self986 fn from(value: V128ExtMulKind) -> Self { 987 match value { 988 V128ExtMulKind::LowI8x16S => Self::LowI8x16S, 989 V128ExtMulKind::HighI8x16S => Self::HighI8x16S, 990 V128ExtMulKind::LowI8x16U => Self::LowI8x16U, 991 V128ExtMulKind::HighI8x16U => Self::HighI8x16U, 992 V128ExtMulKind::LowI16x8S => Self::LowI16x8S, 993 V128ExtMulKind::HighI16x8S => Self::HighI16x8S, 994 V128ExtMulKind::LowI16x8U => Self::LowI16x8U, 995 V128ExtMulKind::HighI16x8U => Self::HighI16x8U, 996 V128ExtMulKind::LowI32x4S => Self::LowI32x4S, 997 V128ExtMulKind::HighI32x4S => Self::HighI32x4S, 998 V128ExtMulKind::LowI32x4U => Self::LowI32x4U, 999 V128ExtMulKind::HighI32x4U => Self::HighI32x4U, 1000 } 1001 } 1002 } 1003 1004 impl From<V128ExtMulKind> for V128MulKind { from(value: V128ExtMulKind) -> Self1005 fn from(value: V128ExtMulKind) -> Self { 1006 match value { 1007 V128ExtMulKind::LowI8x16S 1008 | V128ExtMulKind::HighI8x16S 1009 | V128ExtMulKind::LowI8x16U 1010 | V128ExtMulKind::HighI8x16U => Self::I16x8, 1011 V128ExtMulKind::LowI16x8S 1012 | V128ExtMulKind::HighI16x8S 1013 | V128ExtMulKind::LowI16x8U 1014 | V128ExtMulKind::HighI16x8U => Self::I32x4, 1015 V128ExtMulKind::LowI32x4S 1016 | V128ExtMulKind::HighI32x4S 1017 | V128ExtMulKind::LowI32x4U 1018 | V128ExtMulKind::HighI32x4U => Self::I64x2, 1019 } 1020 } 1021 } 1022 1023 /// Operand size, in bits. 1024 #[derive(Copy, Debug, Clone, Eq, PartialEq)] 1025 pub(crate) enum OperandSize { 1026 /// 8 bits. 1027 S8, 1028 /// 16 bits. 1029 S16, 1030 /// 32 bits. 1031 S32, 1032 /// 64 bits. 1033 S64, 1034 /// 128 bits. 1035 S128, 1036 } 1037 1038 impl OperandSize { 1039 /// The number of bits in the operand. num_bits(&self) -> u81040 pub fn num_bits(&self) -> u8 { 1041 match self { 1042 OperandSize::S8 => 8, 1043 OperandSize::S16 => 16, 1044 OperandSize::S32 => 32, 1045 OperandSize::S64 => 64, 1046 OperandSize::S128 => 128, 1047 } 1048 } 1049 1050 /// The number of bytes in the operand. bytes(&self) -> u321051 pub fn bytes(&self) -> u32 { 1052 match self { 1053 Self::S8 => 1, 1054 Self::S16 => 2, 1055 Self::S32 => 4, 1056 Self::S64 => 8, 1057 Self::S128 => 16, 1058 } 1059 } 1060 1061 /// The binary logarithm of the number of bits in the operand. log2(&self) -> u81062 pub fn log2(&self) -> u8 { 1063 match self { 1064 OperandSize::S8 => 3, 1065 OperandSize::S16 => 4, 1066 OperandSize::S32 => 5, 1067 OperandSize::S64 => 6, 1068 OperandSize::S128 => 7, 1069 } 1070 } 1071 1072 /// Create an [`OperandSize`] from the given number of bytes. from_bytes(bytes: u8) -> Self1073 pub fn from_bytes(bytes: u8) -> Self { 1074 use OperandSize::*; 1075 match bytes { 1076 4 => S32, 1077 8 => S64, 1078 16 => S128, 1079 _ => panic!("Invalid bytes {bytes} for OperandSize"), 1080 } 1081 } 1082 extend_to<T: ExtendType>(&self, to: Self) -> Option<Extend<T>>1083 pub fn extend_to<T: ExtendType>(&self, to: Self) -> Option<Extend<T>> { 1084 match to { 1085 OperandSize::S32 => match self { 1086 OperandSize::S8 => Some(Extend::I32Extend8), 1087 OperandSize::S16 => Some(Extend::I32Extend16), 1088 _ => None, 1089 }, 1090 OperandSize::S64 => match self { 1091 OperandSize::S8 => Some(Extend::I64Extend8), 1092 OperandSize::S16 => Some(Extend::I64Extend16), 1093 OperandSize::S32 => Some(Extend::I64Extend32), 1094 _ => None, 1095 }, 1096 _ => None, 1097 } 1098 } 1099 1100 /// The number of bits in the mantissa. 1101 /// 1102 /// Only implemented for floats. mantissa_bits(&self) -> u81103 pub fn mantissa_bits(&self) -> u8 { 1104 match self { 1105 Self::S32 => 8, 1106 Self::S64 => 11, 1107 _ => unimplemented!(), 1108 } 1109 } 1110 } 1111 1112 /// An abstraction over a register or immediate. 1113 #[derive(Copy, Clone, Debug, PartialEq, Eq)] 1114 pub(crate) enum RegImm { 1115 /// A register. 1116 Reg(Reg), 1117 /// A tagged immediate argument. 1118 Imm(Imm), 1119 } 1120 1121 /// An tagged representation of an immediate. 1122 #[derive(Copy, Clone, Debug, PartialEq, Eq)] 1123 pub(crate) enum Imm { 1124 /// I32 immediate. 1125 I32(u32), 1126 /// I64 immediate. 1127 I64(u64), 1128 /// F32 immediate. 1129 F32(u32), 1130 /// F64 immediate. 1131 F64(u64), 1132 /// V128 immediate. 1133 V128(i128), 1134 } 1135 1136 impl Imm { 1137 /// Create a new I64 immediate. i64(val: i64) -> Self1138 pub fn i64(val: i64) -> Self { 1139 Self::I64(val as u64) 1140 } 1141 1142 /// Create a new I32 immediate. i32(val: i32) -> Self1143 pub fn i32(val: i32) -> Self { 1144 Self::I32(val as u32) 1145 } 1146 1147 /// Create a new F32 immediate. f32(bits: u32) -> Self1148 pub fn f32(bits: u32) -> Self { 1149 Self::F32(bits) 1150 } 1151 1152 /// Create a new F64 immediate. f64(bits: u64) -> Self1153 pub fn f64(bits: u64) -> Self { 1154 Self::F64(bits) 1155 } 1156 1157 /// Create a new V128 immediate. v128(bits: i128) -> Self1158 pub fn v128(bits: i128) -> Self { 1159 Self::V128(bits) 1160 } 1161 1162 /// Convert the immediate to i32, if possible. to_i32(&self) -> Option<i32>1163 pub fn to_i32(&self) -> Option<i32> { 1164 match self { 1165 Self::I32(v) => Some(*v as i32), 1166 Self::I64(v) => i32::try_from(*v as i64).ok(), 1167 _ => None, 1168 } 1169 } 1170 1171 /// Unwraps the underlying integer value as u64. 1172 /// # Panics 1173 /// This function panics if the underlying value can't be represented 1174 /// as u64. unwrap_as_u64(&self) -> u641175 pub fn unwrap_as_u64(&self) -> u64 { 1176 match self { 1177 Self::I32(v) => *v as u64, 1178 Self::I64(v) => *v, 1179 Self::F32(v) => *v as u64, 1180 Self::F64(v) => *v, 1181 _ => unreachable!(), 1182 } 1183 } 1184 1185 /// Get the operand size of the immediate. size(&self) -> OperandSize1186 pub fn size(&self) -> OperandSize { 1187 match self { 1188 Self::I32(_) | Self::F32(_) => OperandSize::S32, 1189 Self::I64(_) | Self::F64(_) => OperandSize::S64, 1190 Self::V128(_) => OperandSize::S128, 1191 } 1192 } 1193 1194 /// Get a little endian representation of the immediate. 1195 /// 1196 /// This method heap allocates and is intended to be used when adding 1197 /// values to the constant pool. to_bytes(&self) -> Vec<u8>1198 pub fn to_bytes(&self) -> Vec<u8> { 1199 match self { 1200 Imm::I32(n) => n.to_le_bytes().to_vec(), 1201 Imm::I64(n) => n.to_le_bytes().to_vec(), 1202 Imm::F32(n) => n.to_le_bytes().to_vec(), 1203 Imm::F64(n) => n.to_le_bytes().to_vec(), 1204 Imm::V128(n) => n.to_le_bytes().to_vec(), 1205 } 1206 } 1207 } 1208 1209 /// The location of the [VMcontext] used for function calls. 1210 #[derive(Copy, Clone, Debug, Eq, PartialEq)] 1211 pub(crate) enum VMContextLoc { 1212 /// Dynamic, stored in the given register. 1213 Reg(Reg), 1214 /// The pinned [VMContext] register. 1215 Pinned, 1216 /// A different VMContext is loaded at the provided offset from the current 1217 /// VMContext. 1218 OffsetFromPinned(u32), 1219 } 1220 1221 /// The maximum number of context arguments currently used across the compiler. 1222 pub(crate) const MAX_CONTEXT_ARGS: usize = 2; 1223 1224 /// Out-of-band special purpose arguments used for function call emission. 1225 /// 1226 /// We cannot rely on the value stack for these values given that inserting 1227 /// register or memory values at arbitrary locations of the value stack has the 1228 /// potential to break the stack ordering principle, which states that older 1229 /// values must always precede newer values, effectively simulating the order of 1230 /// values in the machine stack. 1231 /// The [ContextArgs] are meant to be resolved at every callsite; in some cases 1232 /// it might be possible to construct it early on, but given that it might 1233 /// contain allocatable registers, it's preferred to construct it in 1234 /// [FnCall::emit]. 1235 #[derive(Clone, Debug)] 1236 pub(crate) enum ContextArgs { 1237 /// A single context argument is required; the current pinned [VMcontext] 1238 /// register must be passed as the first argument of the function call. 1239 VMContext([VMContextLoc; 1]), 1240 /// The callee and caller context arguments are required. In this case, the 1241 /// callee context argument is usually stored into an allocatable register 1242 /// and the caller is always the current pinned [VMContext] pointer. 1243 CalleeAndCallerVMContext([VMContextLoc; MAX_CONTEXT_ARGS]), 1244 } 1245 1246 impl ContextArgs { 1247 /// Construct a [ContextArgs] declaring the usage of the pinned [VMContext] 1248 /// register as both the caller and callee context arguments. pinned_callee_and_caller_vmctx() -> Self1249 pub fn pinned_callee_and_caller_vmctx() -> Self { 1250 Self::CalleeAndCallerVMContext([VMContextLoc::Pinned, VMContextLoc::Pinned]) 1251 } 1252 1253 /// Construct a [ContextArgs] that declares the usage of the pinned 1254 /// [VMContext] register as the only context argument. pinned_vmctx() -> Self1255 pub fn pinned_vmctx() -> Self { 1256 Self::VMContext([VMContextLoc::Pinned]) 1257 } 1258 1259 /// Construct a [ContextArgs] that declares the usage of a [VMContext] loaded 1260 /// indirectly from the pinned [VMContext] register as the only context 1261 /// argument. offset_from_pinned_vmctx(offset: u32) -> Self1262 pub fn offset_from_pinned_vmctx(offset: u32) -> Self { 1263 Self::VMContext([VMContextLoc::OffsetFromPinned(offset)]) 1264 } 1265 1266 /// Construct a [ContextArgs] that declares a dynamic callee context and the 1267 /// pinned [VMContext] register as the context arguments. with_callee_and_pinned_caller(callee_vmctx: Reg) -> Self1268 pub fn with_callee_and_pinned_caller(callee_vmctx: Reg) -> Self { 1269 Self::CalleeAndCallerVMContext([VMContextLoc::Reg(callee_vmctx), VMContextLoc::Pinned]) 1270 } 1271 1272 /// Get the length of the [ContextArgs]. len(&self) -> usize1273 pub fn len(&self) -> usize { 1274 self.as_slice().len() 1275 } 1276 1277 /// Get a slice of the context arguments. as_slice(&self) -> &[VMContextLoc]1278 pub fn as_slice(&self) -> &[VMContextLoc] { 1279 match self { 1280 Self::VMContext(a) => a.as_slice(), 1281 Self::CalleeAndCallerVMContext(a) => a.as_slice(), 1282 } 1283 } 1284 } 1285 1286 #[derive(Copy, Clone, Debug)] 1287 pub(crate) enum CalleeKind { 1288 /// A function call to a raw address. 1289 Indirect(Reg), 1290 /// A function call to a local function. 1291 Direct(UserExternalNameRef), 1292 } 1293 1294 impl CalleeKind { 1295 /// Creates a callee kind from a register. indirect(reg: Reg) -> Self1296 pub fn indirect(reg: Reg) -> Self { 1297 Self::Indirect(reg) 1298 } 1299 1300 /// Creates a direct callee kind from a function name. direct(name: UserExternalNameRef) -> Self1301 pub fn direct(name: UserExternalNameRef) -> Self { 1302 Self::Direct(name) 1303 } 1304 } 1305 1306 impl RegImm { 1307 /// Register constructor. reg(r: Reg) -> Self1308 pub fn reg(r: Reg) -> Self { 1309 RegImm::Reg(r) 1310 } 1311 1312 /// I64 immediate constructor. i64(val: i64) -> Self1313 pub fn i64(val: i64) -> Self { 1314 RegImm::Imm(Imm::i64(val)) 1315 } 1316 1317 /// I32 immediate constructor. i32(val: i32) -> Self1318 pub fn i32(val: i32) -> Self { 1319 RegImm::Imm(Imm::i32(val)) 1320 } 1321 1322 /// F32 immediate, stored using its bits representation. f32(bits: u32) -> Self1323 pub fn f32(bits: u32) -> Self { 1324 RegImm::Imm(Imm::f32(bits)) 1325 } 1326 1327 /// F64 immediate, stored using its bits representation. f64(bits: u64) -> Self1328 pub fn f64(bits: u64) -> Self { 1329 RegImm::Imm(Imm::f64(bits)) 1330 } 1331 1332 /// V128 immediate. v128(bits: i128) -> Self1333 pub fn v128(bits: i128) -> Self { 1334 RegImm::Imm(Imm::v128(bits)) 1335 } 1336 } 1337 1338 impl From<Reg> for RegImm { from(r: Reg) -> Self1339 fn from(r: Reg) -> Self { 1340 Self::Reg(r) 1341 } 1342 } 1343 1344 #[derive(Debug)] 1345 pub enum RoundingMode { 1346 Nearest, 1347 Up, 1348 Down, 1349 Zero, 1350 } 1351 1352 /// Memory flags for trusted loads/stores. 1353 pub const TRUSTED_FLAGS: MemFlags = MemFlags::trusted(); 1354 1355 /// Flags used for WebAssembly loads / stores. 1356 /// Untrusted by default so we don't set `no_trap`. 1357 /// We also ensure that the endianness is the right one for WebAssembly. 1358 pub const UNTRUSTED_FLAGS: MemFlags = MemFlags::new().with_endianness(Endianness::Little); 1359 1360 /// Generic MacroAssembler interface used by the code generation. 1361 /// 1362 /// The MacroAssembler trait aims to expose an interface, high-level enough, 1363 /// so that each ISA can provide its own lowering to machine code. For example, 1364 /// for WebAssembly operators that don't have a direct mapping to a machine 1365 /// a instruction, the interface defines a signature matching the WebAssembly 1366 /// operator, allowing each implementation to lower such operator entirely. 1367 /// This approach attributes more responsibility to the MacroAssembler, but frees 1368 /// the caller from concerning about assembling the right sequence of 1369 /// instructions at the operator callsite. 1370 /// 1371 /// The interface defaults to a three-argument form for binary operations; 1372 /// this allows a natural mapping to instructions for RISC architectures, 1373 /// that use three-argument form. 1374 /// This approach allows for a more general interface that can be restricted 1375 /// where needed, in the case of architectures that use a two-argument form. 1376 1377 pub(crate) trait MacroAssembler { 1378 /// The addressing mode. 1379 type Address: Copy + Debug; 1380 1381 /// The pointer representation of the target ISA, 1382 /// used to access information from [`VMOffsets`]. 1383 type Ptr: PtrSize; 1384 1385 /// The ABI details of the target. 1386 type ABI: abi::ABI; 1387 1388 /// Emit the function prologue. prologue(&mut self, vmctx: Reg) -> Result<()>1389 fn prologue(&mut self, vmctx: Reg) -> Result<()> { 1390 self.frame_setup()?; 1391 self.check_stack(vmctx) 1392 } 1393 1394 /// Generate the frame setup sequence. frame_setup(&mut self) -> Result<()>1395 fn frame_setup(&mut self) -> Result<()>; 1396 1397 /// Generate the frame restore sequence. frame_restore(&mut self) -> Result<()>1398 fn frame_restore(&mut self) -> Result<()>; 1399 1400 /// Emit a stack check. check_stack(&mut self, vmctx: Reg) -> Result<()>1401 fn check_stack(&mut self, vmctx: Reg) -> Result<()>; 1402 1403 /// Emit the function epilogue. epilogue(&mut self) -> Result<()>1404 fn epilogue(&mut self) -> Result<()> { 1405 self.frame_restore() 1406 } 1407 1408 /// Reserve stack space. reserve_stack(&mut self, bytes: u32) -> Result<()>1409 fn reserve_stack(&mut self, bytes: u32) -> Result<()>; 1410 1411 /// Free stack space. free_stack(&mut self, bytes: u32) -> Result<()>1412 fn free_stack(&mut self, bytes: u32) -> Result<()>; 1413 1414 /// Reset the stack pointer to the given offset; 1415 /// 1416 /// Used to reset the stack pointer to a given offset 1417 /// when dealing with unreachable code. reset_stack_pointer(&mut self, offset: SPOffset) -> Result<()>1418 fn reset_stack_pointer(&mut self, offset: SPOffset) -> Result<()>; 1419 1420 /// Get the address of a local slot. local_address(&mut self, local: &LocalSlot) -> Result<Self::Address>1421 fn local_address(&mut self, local: &LocalSlot) -> Result<Self::Address>; 1422 1423 /// Constructs an address with an offset that is relative to the 1424 /// current position of the stack pointer (e.g. [sp + (sp_offset - 1425 /// offset)]. address_from_sp(&self, offset: SPOffset) -> Result<Self::Address>1426 fn address_from_sp(&self, offset: SPOffset) -> Result<Self::Address>; 1427 1428 /// Constructs an address with an offset that is absolute to the 1429 /// current position of the stack pointer (e.g. [sp + offset]. address_at_sp(&self, offset: SPOffset) -> Result<Self::Address>1430 fn address_at_sp(&self, offset: SPOffset) -> Result<Self::Address>; 1431 1432 /// Alias for [`Self::address_at_reg`] using the VMContext register as 1433 /// a base. The VMContext register is derived from the ABI type that is 1434 /// associated to the MacroAssembler. address_at_vmctx(&self, offset: u32) -> Result<Self::Address>1435 fn address_at_vmctx(&self, offset: u32) -> Result<Self::Address>; 1436 1437 /// Construct an address that is absolute to the current position 1438 /// of the given register. address_at_reg(&self, reg: Reg, offset: u32) -> Result<Self::Address>1439 fn address_at_reg(&self, reg: Reg, offset: u32) -> Result<Self::Address>; 1440 1441 /// Emit a function call to either a local or external function. call( &mut self, stack_args_size: u32, f: impl FnMut(&mut Self) -> Result<(CalleeKind, CallingConvention)>, ) -> Result<u32>1442 fn call( 1443 &mut self, 1444 stack_args_size: u32, 1445 f: impl FnMut(&mut Self) -> Result<(CalleeKind, CallingConvention)>, 1446 ) -> Result<u32>; 1447 1448 /// Acquire a scratch register and execute the given callback. with_scratch<T: ScratchType, R>(&mut self, f: impl FnOnce(&mut Self, Scratch) -> R) -> R1449 fn with_scratch<T: ScratchType, R>(&mut self, f: impl FnOnce(&mut Self, Scratch) -> R) -> R; 1450 1451 /// Convenience wrapper over [`Self::with_scratch`], derives the register class 1452 /// for a particular Wasm value type. with_scratch_for<R>( &mut self, ty: WasmValType, f: impl FnOnce(&mut Self, Scratch) -> R, ) -> R1453 fn with_scratch_for<R>( 1454 &mut self, 1455 ty: WasmValType, 1456 f: impl FnOnce(&mut Self, Scratch) -> R, 1457 ) -> R { 1458 match ty { 1459 WasmValType::I32 1460 | WasmValType::I64 1461 | WasmValType::Ref(WasmRefType { 1462 heap_type: WasmHeapType::Func, 1463 .. 1464 }) => self.with_scratch::<IntScratch, _>(f), 1465 WasmValType::F32 | WasmValType::F64 | WasmValType::V128 => { 1466 self.with_scratch::<FloatScratch, _>(f) 1467 } 1468 _ => unimplemented!(), 1469 } 1470 } 1471 1472 /// Get stack pointer offset. sp_offset(&self) -> Result<SPOffset>1473 fn sp_offset(&self) -> Result<SPOffset>; 1474 1475 /// Perform a stack store. store(&mut self, src: RegImm, dst: Self::Address, size: OperandSize) -> Result<()>1476 fn store(&mut self, src: RegImm, dst: Self::Address, size: OperandSize) -> Result<()>; 1477 1478 /// Alias for `MacroAssembler::store` with the operand size corresponding 1479 /// to the pointer size of the target. store_ptr(&mut self, src: Reg, dst: Self::Address) -> Result<()>1480 fn store_ptr(&mut self, src: Reg, dst: Self::Address) -> Result<()>; 1481 1482 /// Perform a WebAssembly store. 1483 /// A WebAssembly store introduces several additional invariants compared to 1484 /// [Self::store], more precisely, it can implicitly trap, in certain 1485 /// circumstances, even if explicit bounds checks are elided, in that sense, 1486 /// we consider this type of load as untrusted. It can also differ with 1487 /// regards to the endianness depending on the target ISA. For this reason, 1488 /// [Self::wasm_store], should be explicitly used when emitting WebAssembly 1489 /// stores. wasm_store(&mut self, src: Reg, dst: Self::Address, store_kind: StoreKind) -> Result<()>1490 fn wasm_store(&mut self, src: Reg, dst: Self::Address, store_kind: StoreKind) -> Result<()>; 1491 1492 /// Perform a zero-extended stack load. load(&mut self, src: Self::Address, dst: WritableReg, size: OperandSize) -> Result<()>1493 fn load(&mut self, src: Self::Address, dst: WritableReg, size: OperandSize) -> Result<()>; 1494 1495 /// Perform a WebAssembly load. 1496 /// A WebAssembly load introduces several additional invariants compared to 1497 /// [Self::load], more precisely, it can implicitly trap, in certain 1498 /// circumstances, even if explicit bounds checks are elided, in that sense, 1499 /// we consider this type of load as untrusted. It can also differ with 1500 /// regards to the endianness depending on the target ISA. For this reason, 1501 /// [Self::wasm_load], should be explicitly used when emitting WebAssembly 1502 /// loads. wasm_load(&mut self, src: Self::Address, dst: WritableReg, kind: LoadKind) -> Result<()>1503 fn wasm_load(&mut self, src: Self::Address, dst: WritableReg, kind: LoadKind) -> Result<()>; 1504 1505 /// Alias for `MacroAssembler::load` with the operand size corresponding 1506 /// to the pointer size of the target. load_ptr(&mut self, src: Self::Address, dst: WritableReg) -> Result<()>1507 fn load_ptr(&mut self, src: Self::Address, dst: WritableReg) -> Result<()>; 1508 1509 /// Computes the effective address and stores the result in the destination 1510 /// register. compute_addr( &mut self, _src: Self::Address, _dst: WritableReg, _size: OperandSize, ) -> Result<()>1511 fn compute_addr( 1512 &mut self, 1513 _src: Self::Address, 1514 _dst: WritableReg, 1515 _size: OperandSize, 1516 ) -> Result<()>; 1517 1518 /// Pop a value from the machine stack into the given register. pop(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>1519 fn pop(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>; 1520 1521 /// Perform a move. mov(&mut self, dst: WritableReg, src: RegImm, size: OperandSize) -> Result<()>1522 fn mov(&mut self, dst: WritableReg, src: RegImm, size: OperandSize) -> Result<()>; 1523 1524 /// Perform a conditional move. cmov(&mut self, dst: WritableReg, src: Reg, cc: IntCmpKind, size: OperandSize) -> Result<()>1525 fn cmov(&mut self, dst: WritableReg, src: Reg, cc: IntCmpKind, size: OperandSize) 1526 -> Result<()>; 1527 1528 /// Performs a memory move of bytes from src to dest. 1529 /// Bytes are moved in blocks of 8 bytes, where possible. memmove( &mut self, src: SPOffset, dst: SPOffset, bytes: u32, direction: MemMoveDirection, ) -> Result<()>1530 fn memmove( 1531 &mut self, 1532 src: SPOffset, 1533 dst: SPOffset, 1534 bytes: u32, 1535 direction: MemMoveDirection, 1536 ) -> Result<()> { 1537 match direction { 1538 MemMoveDirection::LowToHigh => debug_assert!(dst.as_u32() < src.as_u32()), 1539 MemMoveDirection::HighToLow => debug_assert!(dst.as_u32() > src.as_u32()), 1540 } 1541 // At least 4 byte aligned. 1542 debug_assert!(bytes % 4 == 0); 1543 let mut remaining = bytes; 1544 let word_bytes = <Self::ABI as abi::ABI>::word_bytes(); 1545 1546 let word_bytes = word_bytes as u32; 1547 1548 let mut dst_offs; 1549 let mut src_offs; 1550 match direction { 1551 MemMoveDirection::LowToHigh => { 1552 dst_offs = dst.as_u32() - bytes; 1553 src_offs = src.as_u32() - bytes; 1554 self.with_scratch::<IntScratch, _>(|masm, scratch| { 1555 while remaining >= word_bytes { 1556 remaining -= word_bytes; 1557 dst_offs += word_bytes; 1558 src_offs += word_bytes; 1559 1560 masm.load_ptr( 1561 masm.address_from_sp(SPOffset::from_u32(src_offs))?, 1562 scratch.writable(), 1563 )?; 1564 masm.store_ptr( 1565 scratch.inner(), 1566 masm.address_from_sp(SPOffset::from_u32(dst_offs))?, 1567 )?; 1568 } 1569 wasmtime_environ::error::Ok(()) 1570 })?; 1571 } 1572 MemMoveDirection::HighToLow => { 1573 // Go from the end to the beginning to handle overlapping addresses. 1574 src_offs = src.as_u32(); 1575 dst_offs = dst.as_u32(); 1576 self.with_scratch::<IntScratch, _>(|masm, scratch| { 1577 while remaining >= word_bytes { 1578 masm.load_ptr( 1579 masm.address_from_sp(SPOffset::from_u32(src_offs))?, 1580 scratch.writable(), 1581 )?; 1582 masm.store_ptr( 1583 scratch.inner(), 1584 masm.address_from_sp(SPOffset::from_u32(dst_offs))?, 1585 )?; 1586 1587 remaining -= word_bytes; 1588 src_offs -= word_bytes; 1589 dst_offs -= word_bytes; 1590 } 1591 wasmtime_environ::error::Ok(()) 1592 })?; 1593 } 1594 } 1595 1596 if remaining > 0 { 1597 let half_word = word_bytes / 2; 1598 let ptr_size = OperandSize::from_bytes(half_word as u8); 1599 debug_assert!(remaining == half_word); 1600 // Need to move the offsets ahead in the `LowToHigh` case to 1601 // compensate for the initial subtraction of `bytes`. 1602 if direction == MemMoveDirection::LowToHigh { 1603 dst_offs += half_word; 1604 src_offs += half_word; 1605 } 1606 1607 self.with_scratch::<IntScratch, _>(|masm, scratch| { 1608 masm.load( 1609 masm.address_from_sp(SPOffset::from_u32(src_offs))?, 1610 scratch.writable(), 1611 ptr_size, 1612 )?; 1613 masm.store( 1614 scratch.inner().into(), 1615 masm.address_from_sp(SPOffset::from_u32(dst_offs))?, 1616 ptr_size, 1617 )?; 1618 wasmtime_environ::error::Ok(()) 1619 })?; 1620 } 1621 Ok(()) 1622 } 1623 1624 /// Perform add operation. add(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>1625 fn add(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>; 1626 1627 /// Perform add with unsigned extension. add_uextend( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, from_size: OperandSize, size: OperandSize, ) -> Result<()>1628 fn add_uextend( 1629 &mut self, 1630 dst: WritableReg, 1631 lhs: Reg, 1632 rhs: Reg, 1633 from_size: OperandSize, 1634 size: OperandSize, 1635 ) -> Result<()>; 1636 1637 /// Perform a checked unsigned integer addition, emitting the provided trap 1638 /// if the addition overflows. 1639 /// 1640 /// Note: This only accepts immediate operands. For register operands with 1641 /// proper extension, use add_uextend with manual overflow checking. checked_uadd( &mut self, dst: WritableReg, lhs: Reg, rhs: Imm, size: OperandSize, trap: TrapCode, ) -> Result<()>1642 fn checked_uadd( 1643 &mut self, 1644 dst: WritableReg, 1645 lhs: Reg, 1646 rhs: Imm, 1647 size: OperandSize, 1648 trap: TrapCode, 1649 ) -> Result<()>; 1650 1651 /// Perform subtraction operation. sub(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>1652 fn sub(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>; 1653 1654 /// Perform multiplication operation. mul(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>1655 fn mul(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>; 1656 1657 /// Perform a floating point add operation. float_add(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>1658 fn float_add(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>; 1659 1660 /// Perform a floating point subtraction operation. float_sub(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>1661 fn float_sub(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>; 1662 1663 /// Perform a floating point multiply operation. float_mul(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>1664 fn float_mul(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>; 1665 1666 /// Perform a floating point divide operation. float_div(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>1667 fn float_div(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>; 1668 1669 /// Perform a floating point minimum operation. In x86, this will emit 1670 /// multiple instructions. float_min(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>1671 fn float_min(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>; 1672 1673 /// Perform a floating point maximum operation. In x86, this will emit 1674 /// multiple instructions. float_max(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>1675 fn float_max(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>; 1676 1677 /// Perform a floating point copysign operation. In x86, this will emit 1678 /// multiple instructions. float_copysign( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize, ) -> Result<()>1679 fn float_copysign( 1680 &mut self, 1681 dst: WritableReg, 1682 lhs: Reg, 1683 rhs: Reg, 1684 size: OperandSize, 1685 ) -> Result<()>; 1686 1687 /// Perform a floating point abs operation. float_abs(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>1688 fn float_abs(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>; 1689 1690 /// Perform a floating point negation operation. float_neg(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>1691 fn float_neg(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>; 1692 1693 /// Perform a floating point floor operation. float_round< F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext<Emission>, &mut Self) -> Result<()>, >( &mut self, mode: RoundingMode, env: &mut FuncEnv<Self::Ptr>, context: &mut CodeGenContext<Emission>, size: OperandSize, fallback: F, ) -> Result<()>1694 fn float_round< 1695 F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext<Emission>, &mut Self) -> Result<()>, 1696 >( 1697 &mut self, 1698 mode: RoundingMode, 1699 env: &mut FuncEnv<Self::Ptr>, 1700 context: &mut CodeGenContext<Emission>, 1701 size: OperandSize, 1702 fallback: F, 1703 ) -> Result<()>; 1704 1705 /// Perform a floating point square root operation. float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>1706 fn float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>; 1707 1708 /// Perform logical and operation. and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>1709 fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>; 1710 1711 /// Perform logical or operation. or(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>1712 fn or(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>; 1713 1714 /// Perform logical exclusive or operation. xor(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>1715 fn xor(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>; 1716 1717 /// Perform a shift operation between a register and an immediate. shift_ir( &mut self, dst: WritableReg, imm: Imm, lhs: Reg, kind: ShiftKind, size: OperandSize, ) -> Result<()>1718 fn shift_ir( 1719 &mut self, 1720 dst: WritableReg, 1721 imm: Imm, 1722 lhs: Reg, 1723 kind: ShiftKind, 1724 size: OperandSize, 1725 ) -> Result<()>; 1726 1727 /// Perform a shift operation between two registers. 1728 /// This case is special in that some architectures have specific expectations 1729 /// regarding the location of the instruction arguments. To free the 1730 /// caller from having to deal with the architecture specific constraints 1731 /// we give this function access to the code generation context, allowing 1732 /// each implementation to decide the lowering path. shift( &mut self, context: &mut CodeGenContext<Emission>, kind: ShiftKind, size: OperandSize, ) -> Result<()>1733 fn shift( 1734 &mut self, 1735 context: &mut CodeGenContext<Emission>, 1736 kind: ShiftKind, 1737 size: OperandSize, 1738 ) -> Result<()>; 1739 1740 /// Perform division operation. 1741 /// Division is special in that some architectures have specific 1742 /// expectations regarding the location of the instruction 1743 /// arguments and regarding the location of the quotient / 1744 /// remainder. To free the caller from having to deal with the 1745 /// architecture specific constraints we give this function access 1746 /// to the code generation context, allowing each implementation 1747 /// to decide the lowering path. For cases in which division is a 1748 /// unconstrained binary operation, the caller can decide to use 1749 /// the `CodeGenContext::i32_binop` or `CodeGenContext::i64_binop` 1750 /// functions. div( &mut self, context: &mut CodeGenContext<Emission>, kind: DivKind, size: OperandSize, ) -> Result<()>1751 fn div( 1752 &mut self, 1753 context: &mut CodeGenContext<Emission>, 1754 kind: DivKind, 1755 size: OperandSize, 1756 ) -> Result<()>; 1757 1758 /// Calculate remainder. rem( &mut self, context: &mut CodeGenContext<Emission>, kind: RemKind, size: OperandSize, ) -> Result<()>1759 fn rem( 1760 &mut self, 1761 context: &mut CodeGenContext<Emission>, 1762 kind: RemKind, 1763 size: OperandSize, 1764 ) -> Result<()>; 1765 1766 /// Compares `src1` against `src2` for the side effect of setting processor 1767 /// flags. 1768 /// 1769 /// Note that `src1` is the left-hand-side of the comparison and `src2` is 1770 /// the right-hand-side, so if testing `a < b` then `src1 == a` and 1771 /// `src2 == b` cmp(&mut self, src1: Reg, src2: RegImm, size: OperandSize) -> Result<()>1772 fn cmp(&mut self, src1: Reg, src2: RegImm, size: OperandSize) -> Result<()>; 1773 1774 /// Compare src and dst and put the result in dst. 1775 /// This function will potentially emit a series of instructions. 1776 /// 1777 /// The initial value in `dst` is the left-hand-side of the comparison and 1778 /// the initial value in `src` is the right-hand-side of the comparison. 1779 /// That means for `a < b` then `dst == a` and `src == b`. cmp_with_set( &mut self, dst: WritableReg, src: RegImm, kind: IntCmpKind, size: OperandSize, ) -> Result<()>1780 fn cmp_with_set( 1781 &mut self, 1782 dst: WritableReg, 1783 src: RegImm, 1784 kind: IntCmpKind, 1785 size: OperandSize, 1786 ) -> Result<()>; 1787 1788 /// Compare floats in src1 and src2 and put the result in dst. 1789 /// In x86, this will emit multiple instructions. float_cmp_with_set( &mut self, dst: WritableReg, src1: Reg, src2: Reg, kind: FloatCmpKind, size: OperandSize, ) -> Result<()>1790 fn float_cmp_with_set( 1791 &mut self, 1792 dst: WritableReg, 1793 src1: Reg, 1794 src2: Reg, 1795 kind: FloatCmpKind, 1796 size: OperandSize, 1797 ) -> Result<()>; 1798 1799 /// Count the number of leading zeroes in src and put the result in dst. 1800 /// In x64, this will emit multiple instructions if the `has_lzcnt` flag is 1801 /// false. clz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>1802 fn clz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>; 1803 1804 /// Count the number of trailing zeroes in src and put the result in dst.masm 1805 /// In x64, this will emit multiple instructions if the `has_tzcnt` flag is 1806 /// false. ctz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>1807 fn ctz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>; 1808 1809 /// Push the register to the stack, returning the stack slot metadata. 1810 // NB 1811 // The stack alignment should not be assumed after any call to `push`, 1812 // unless explicitly aligned otherwise. Typically, stack alignment is 1813 // maintained at call sites and during the execution of 1814 // epilogues. push(&mut self, src: Reg, size: OperandSize) -> Result<StackSlot>1815 fn push(&mut self, src: Reg, size: OperandSize) -> Result<StackSlot>; 1816 1817 /// Finalize the assembly and return the result. finalize(self, base: Option<SourceLoc>) -> Result<MachBufferFinalized<Final>>1818 fn finalize(self, base: Option<SourceLoc>) -> Result<MachBufferFinalized<Final>>; 1819 1820 /// Zero a particular register. zero(&mut self, reg: WritableReg) -> Result<()>1821 fn zero(&mut self, reg: WritableReg) -> Result<()>; 1822 1823 /// Count the number of 1 bits in src and put the result in dst. In x64, 1824 /// this will emit multiple instructions if the `has_popcnt` flag is false. popcnt(&mut self, context: &mut CodeGenContext<Emission>, size: OperandSize) -> Result<()>1825 fn popcnt(&mut self, context: &mut CodeGenContext<Emission>, size: OperandSize) -> Result<()>; 1826 1827 /// Converts an i64 to an i32 by discarding the high 32 bits. wrap(&mut self, dst: WritableReg, src: Reg) -> Result<()>1828 fn wrap(&mut self, dst: WritableReg, src: Reg) -> Result<()>; 1829 1830 /// Extends an integer of a given size to a larger size. extend(&mut self, dst: WritableReg, src: Reg, kind: ExtendKind) -> Result<()>1831 fn extend(&mut self, dst: WritableReg, src: Reg, kind: ExtendKind) -> Result<()>; 1832 1833 /// Emits one or more instructions to perform a signed truncation of a 1834 /// float into an integer. signed_truncate( &mut self, dst: WritableReg, src: Reg, src_size: OperandSize, dst_size: OperandSize, kind: TruncKind, ) -> Result<()>1835 fn signed_truncate( 1836 &mut self, 1837 dst: WritableReg, 1838 src: Reg, 1839 src_size: OperandSize, 1840 dst_size: OperandSize, 1841 kind: TruncKind, 1842 ) -> Result<()>; 1843 1844 /// Emits one or more instructions to perform an unsigned truncation of a 1845 /// float into an integer. unsigned_truncate( &mut self, context: &mut CodeGenContext<Emission>, src_size: OperandSize, dst_size: OperandSize, kind: TruncKind, ) -> Result<()>1846 fn unsigned_truncate( 1847 &mut self, 1848 context: &mut CodeGenContext<Emission>, 1849 src_size: OperandSize, 1850 dst_size: OperandSize, 1851 kind: TruncKind, 1852 ) -> Result<()>; 1853 1854 /// Emits one or more instructions to perform a signed convert of an 1855 /// integer into a float. signed_convert( &mut self, dst: WritableReg, src: Reg, src_size: OperandSize, dst_size: OperandSize, ) -> Result<()>1856 fn signed_convert( 1857 &mut self, 1858 dst: WritableReg, 1859 src: Reg, 1860 src_size: OperandSize, 1861 dst_size: OperandSize, 1862 ) -> Result<()>; 1863 1864 /// Emits one or more instructions to perform an unsigned convert of an 1865 /// integer into a float. unsigned_convert( &mut self, dst: WritableReg, src: Reg, tmp_gpr: Reg, src_size: OperandSize, dst_size: OperandSize, ) -> Result<()>1866 fn unsigned_convert( 1867 &mut self, 1868 dst: WritableReg, 1869 src: Reg, 1870 tmp_gpr: Reg, 1871 src_size: OperandSize, 1872 dst_size: OperandSize, 1873 ) -> Result<()>; 1874 1875 /// Reinterpret a float as an integer. reinterpret_float_as_int( &mut self, dst: WritableReg, src: Reg, size: OperandSize, ) -> Result<()>1876 fn reinterpret_float_as_int( 1877 &mut self, 1878 dst: WritableReg, 1879 src: Reg, 1880 size: OperandSize, 1881 ) -> Result<()>; 1882 1883 /// Reinterpret an integer as a float. reinterpret_int_as_float( &mut self, dst: WritableReg, src: Reg, size: OperandSize, ) -> Result<()>1884 fn reinterpret_int_as_float( 1885 &mut self, 1886 dst: WritableReg, 1887 src: Reg, 1888 size: OperandSize, 1889 ) -> Result<()>; 1890 1891 /// Demote an f64 to an f32. demote(&mut self, dst: WritableReg, src: Reg) -> Result<()>1892 fn demote(&mut self, dst: WritableReg, src: Reg) -> Result<()>; 1893 1894 /// Promote an f32 to an f64. promote(&mut self, dst: WritableReg, src: Reg) -> Result<()>1895 fn promote(&mut self, dst: WritableReg, src: Reg) -> Result<()>; 1896 1897 /// Zero a given memory range. 1898 /// 1899 /// The default implementation divides the given memory range 1900 /// into word-sized slots. Then it unrolls a series of store 1901 /// instructions, effectively assigning zero to each slot. zero_mem_range(&mut self, mem: &Range<u32>) -> Result<()>1902 fn zero_mem_range(&mut self, mem: &Range<u32>) -> Result<()> { 1903 let word_size = <Self::ABI as abi::ABI>::word_bytes() as u32; 1904 if mem.is_empty() { 1905 return Ok(()); 1906 } 1907 1908 let start = if mem.start % word_size == 0 { 1909 mem.start 1910 } else { 1911 // Ensure that the start of the range is at least 4-byte aligned. 1912 assert!(mem.start % 4 == 0); 1913 let start = align_to(mem.start, word_size); 1914 let addr: Self::Address = self.local_address(&LocalSlot::i32(start))?; 1915 self.store(RegImm::i32(0), addr, OperandSize::S32)?; 1916 // Ensure that the new start of the range, is word-size aligned. 1917 assert!(start % word_size == 0); 1918 start 1919 }; 1920 1921 let end = align_to(mem.end, word_size); 1922 let slots = (end - start) / word_size; 1923 1924 if slots == 1 { 1925 let slot = LocalSlot::i64(start + word_size); 1926 let addr: Self::Address = self.local_address(&slot)?; 1927 self.store(RegImm::i64(0), addr, OperandSize::S64)?; 1928 } else { 1929 // TODO 1930 // Add an upper bound to this generation; 1931 // given a considerably large amount of slots 1932 // this will be inefficient. 1933 self.with_scratch::<IntScratch, _>(|masm, scratch| { 1934 masm.zero(scratch.writable())?; 1935 let zero = RegImm::reg(scratch.inner()); 1936 1937 for step in (start..end).step_by(word_size as usize) { 1938 let slot = LocalSlot::i64(step + word_size); 1939 let addr: Self::Address = masm.local_address(&slot)?; 1940 masm.store(zero, addr, OperandSize::S64)?; 1941 } 1942 wasmtime_environ::error::Ok(()) 1943 })?; 1944 } 1945 1946 Ok(()) 1947 } 1948 1949 /// Generate a label. get_label(&mut self) -> Result<MachLabel>1950 fn get_label(&mut self) -> Result<MachLabel>; 1951 1952 /// Bind the given label at the current code offset. bind(&mut self, label: MachLabel) -> Result<()>1953 fn bind(&mut self, label: MachLabel) -> Result<()>; 1954 1955 /// Conditional branch. 1956 /// 1957 /// Performs a comparison between the two operands, 1958 /// and immediately after emits a jump to the given 1959 /// label destination if the condition is met. branch( &mut self, kind: IntCmpKind, lhs: Reg, rhs: RegImm, taken: MachLabel, size: OperandSize, ) -> Result<()>1960 fn branch( 1961 &mut self, 1962 kind: IntCmpKind, 1963 lhs: Reg, 1964 rhs: RegImm, 1965 taken: MachLabel, 1966 size: OperandSize, 1967 ) -> Result<()>; 1968 1969 /// Emits and unconditional jump to the given label. jmp(&mut self, target: MachLabel) -> Result<()>1970 fn jmp(&mut self, target: MachLabel) -> Result<()>; 1971 1972 /// Emits a jump table sequence. The default label is specified as 1973 /// the last element of the targets slice. jmp_table(&mut self, targets: &[MachLabel], index: Reg, tmp: Reg) -> Result<()>1974 fn jmp_table(&mut self, targets: &[MachLabel], index: Reg, tmp: Reg) -> Result<()>; 1975 1976 /// Emit an unreachable code trap. unreachable(&mut self) -> Result<()>1977 fn unreachable(&mut self) -> Result<()>; 1978 1979 /// Emit an unconditional trap. trap(&mut self, code: TrapCode) -> Result<()>1980 fn trap(&mut self, code: TrapCode) -> Result<()>; 1981 1982 /// Traps if the condition code is met. trapif(&mut self, cc: IntCmpKind, code: TrapCode) -> Result<()>1983 fn trapif(&mut self, cc: IntCmpKind, code: TrapCode) -> Result<()>; 1984 1985 /// Trap if the source register is zero. trapz(&mut self, src: Reg, code: TrapCode) -> Result<()>1986 fn trapz(&mut self, src: Reg, code: TrapCode) -> Result<()>; 1987 1988 /// Ensures that the stack pointer is correctly positioned before an unconditional 1989 /// jump according to the requirements of the destination target. ensure_sp_for_jump(&mut self, target: SPOffset) -> Result<()>1990 fn ensure_sp_for_jump(&mut self, target: SPOffset) -> Result<()> { 1991 let bytes = self 1992 .sp_offset()? 1993 .as_u32() 1994 .checked_sub(target.as_u32()) 1995 .unwrap_or(0); 1996 1997 if bytes > 0 { 1998 self.free_stack(bytes)?; 1999 } 2000 2001 Ok(()) 2002 } 2003 2004 /// Mark the start of a source location returning the machine code offset 2005 /// and the relative source code location. start_source_loc(&mut self, loc: RelSourceLoc) -> Result<(CodeOffset, RelSourceLoc)>2006 fn start_source_loc(&mut self, loc: RelSourceLoc) -> Result<(CodeOffset, RelSourceLoc)>; 2007 2008 /// Mark the end of a source location. end_source_loc(&mut self) -> Result<()>2009 fn end_source_loc(&mut self) -> Result<()>; 2010 2011 /// The current offset, in bytes from the beginning of the function. current_code_offset(&self) -> Result<CodeOffset>2012 fn current_code_offset(&self) -> Result<CodeOffset>; 2013 2014 /// Performs a 128-bit addition add128( &mut self, dst_lo: WritableReg, dst_hi: WritableReg, lhs_lo: Reg, lhs_hi: Reg, rhs_lo: Reg, rhs_hi: Reg, ) -> Result<()>2015 fn add128( 2016 &mut self, 2017 dst_lo: WritableReg, 2018 dst_hi: WritableReg, 2019 lhs_lo: Reg, 2020 lhs_hi: Reg, 2021 rhs_lo: Reg, 2022 rhs_hi: Reg, 2023 ) -> Result<()>; 2024 2025 /// Performs a 128-bit subtraction sub128( &mut self, dst_lo: WritableReg, dst_hi: WritableReg, lhs_lo: Reg, lhs_hi: Reg, rhs_lo: Reg, rhs_hi: Reg, ) -> Result<()>2026 fn sub128( 2027 &mut self, 2028 dst_lo: WritableReg, 2029 dst_hi: WritableReg, 2030 lhs_lo: Reg, 2031 lhs_hi: Reg, 2032 rhs_lo: Reg, 2033 rhs_hi: Reg, 2034 ) -> Result<()>; 2035 2036 /// Performs a widening multiplication from two 64-bit operands into a 2037 /// 128-bit result. 2038 /// 2039 /// Note that some platforms require special handling of registers in this 2040 /// instruction (e.g. x64) so full access to `CodeGenContext` is provided. mul_wide(&mut self, context: &mut CodeGenContext<Emission>, kind: MulWideKind) -> Result<()>2041 fn mul_wide(&mut self, context: &mut CodeGenContext<Emission>, kind: MulWideKind) 2042 -> Result<()>; 2043 2044 /// Takes the value in a src operand and replicates it across lanes of 2045 /// `size` in a destination result. splat(&mut self, context: &mut CodeGenContext<Emission>, size: SplatKind) -> Result<()>2046 fn splat(&mut self, context: &mut CodeGenContext<Emission>, size: SplatKind) -> Result<()>; 2047 2048 /// Performs a shuffle between two 128-bit vectors into a 128-bit result 2049 /// using lanes as a mask to select which indexes to copy. shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()>2050 fn shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()>; 2051 2052 /// Performs a swizzle between two 128-bit vectors into a 128-bit result. swizzle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg) -> Result<()>2053 fn swizzle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg) -> Result<()>; 2054 2055 /// Performs the RMW `op` operation on the passed `addr`. 2056 /// 2057 /// The value *before* the operation was performed is written back to the `operand` register. atomic_rmw( &mut self, context: &mut CodeGenContext<Emission>, addr: Self::Address, size: OperandSize, op: RmwOp, flags: MemFlags, extend: Option<Extend<Zero>>, ) -> Result<()>2058 fn atomic_rmw( 2059 &mut self, 2060 context: &mut CodeGenContext<Emission>, 2061 addr: Self::Address, 2062 size: OperandSize, 2063 op: RmwOp, 2064 flags: MemFlags, 2065 extend: Option<Extend<Zero>>, 2066 ) -> Result<()>; 2067 2068 /// Extracts the scalar value from `src` in `lane` to `dst`. extract_lane( &mut self, src: Reg, dst: WritableReg, lane: u8, kind: ExtractLaneKind, ) -> Result<()>2069 fn extract_lane( 2070 &mut self, 2071 src: Reg, 2072 dst: WritableReg, 2073 lane: u8, 2074 kind: ExtractLaneKind, 2075 ) -> Result<()>; 2076 2077 /// Replaces the value in `lane` in `dst` with the value in `src`. replace_lane( &mut self, src: RegImm, dst: WritableReg, lane: u8, kind: ReplaceLaneKind, ) -> Result<()>2078 fn replace_lane( 2079 &mut self, 2080 src: RegImm, 2081 dst: WritableReg, 2082 lane: u8, 2083 kind: ReplaceLaneKind, 2084 ) -> Result<()>; 2085 2086 /// Perform an atomic CAS (compare-and-swap) operation with the value at `addr`, and `expected` 2087 /// and `replacement` (at the top of the context's stack). 2088 /// 2089 /// This method takes the `CodeGenContext` as an arguments to accommodate architectures that 2090 /// expect parameters in specific registers. The context stack contains the `replacement`, 2091 /// and `expected` values in that order. The implementer is expected to push the value at 2092 /// `addr` before the update to the context's stack before returning. atomic_cas( &mut self, context: &mut CodeGenContext<Emission>, addr: Self::Address, size: OperandSize, flags: MemFlags, extend: Option<Extend<Zero>>, ) -> Result<()>2093 fn atomic_cas( 2094 &mut self, 2095 context: &mut CodeGenContext<Emission>, 2096 addr: Self::Address, 2097 size: OperandSize, 2098 flags: MemFlags, 2099 extend: Option<Extend<Zero>>, 2100 ) -> Result<()>; 2101 2102 /// Compares vector registers `lhs` and `rhs` for equality and puts the 2103 /// vector of results in `dst`. v128_eq( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, kind: VectorEqualityKind, ) -> Result<()>2104 fn v128_eq( 2105 &mut self, 2106 dst: WritableReg, 2107 lhs: Reg, 2108 rhs: Reg, 2109 kind: VectorEqualityKind, 2110 ) -> Result<()>; 2111 2112 /// Compares vector registers `lhs` and `rhs` for inequality and puts the 2113 /// vector of results in `dst`. v128_ne( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, kind: VectorEqualityKind, ) -> Result<()>2114 fn v128_ne( 2115 &mut self, 2116 dst: WritableReg, 2117 lhs: Reg, 2118 rhs: Reg, 2119 kind: VectorEqualityKind, 2120 ) -> Result<()>; 2121 2122 /// Performs a less than comparison with vector registers `lhs` and `rhs` 2123 /// and puts the vector of results in `dst`. v128_lt( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, kind: VectorCompareKind, ) -> Result<()>2124 fn v128_lt( 2125 &mut self, 2126 dst: WritableReg, 2127 lhs: Reg, 2128 rhs: Reg, 2129 kind: VectorCompareKind, 2130 ) -> Result<()>; 2131 2132 /// Performs a less than or equal comparison with vector registers `lhs` 2133 /// and `rhs` and puts the vector of results in `dst`. v128_le( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, kind: VectorCompareKind, ) -> Result<()>2134 fn v128_le( 2135 &mut self, 2136 dst: WritableReg, 2137 lhs: Reg, 2138 rhs: Reg, 2139 kind: VectorCompareKind, 2140 ) -> Result<()>; 2141 2142 /// Performs a greater than comparison with vector registers `lhs` and 2143 /// `rhs` and puts the vector of results in `dst`. v128_gt( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, kind: VectorCompareKind, ) -> Result<()>2144 fn v128_gt( 2145 &mut self, 2146 dst: WritableReg, 2147 lhs: Reg, 2148 rhs: Reg, 2149 kind: VectorCompareKind, 2150 ) -> Result<()>; 2151 2152 /// Performs a greater than or equal comparison with vector registers `lhs` 2153 /// and `rhs` and puts the vector of results in `dst`. v128_ge( &mut self, dst: WritableReg, lhs: Reg, rhs: Reg, kind: VectorCompareKind, ) -> Result<()>2154 fn v128_ge( 2155 &mut self, 2156 dst: WritableReg, 2157 lhs: Reg, 2158 rhs: Reg, 2159 kind: VectorCompareKind, 2160 ) -> Result<()>; 2161 2162 /// Emit a memory fence. fence(&mut self) -> Result<()>2163 fn fence(&mut self) -> Result<()>; 2164 2165 /// Perform a logical `not` operation on the 128bits vector value in `dst`. v128_not(&mut self, dst: WritableReg) -> Result<()>2166 fn v128_not(&mut self, dst: WritableReg) -> Result<()>; 2167 2168 /// Perform a logical `and` operation on `src1` and `src1`, both 128bits vector values, writing 2169 /// the result to `dst`. v128_and(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>2170 fn v128_and(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>; 2171 2172 /// Perform a logical `and_not` operation on `src1` and `src1`, both 128bits vector values, writing 2173 /// the result to `dst`. 2174 /// 2175 /// `and_not` is not commutative: dst = !src1 & src2. v128_and_not(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>2176 fn v128_and_not(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>; 2177 2178 /// Perform a logical `or` operation on `src1` and `src1`, both 128bits vector values, writing 2179 /// the result to `dst`. v128_or(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>2180 fn v128_or(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>; 2181 2182 /// Perform a logical `xor` operation on `src1` and `src1`, both 128bits vector values, writing 2183 /// the result to `dst`. v128_xor(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>2184 fn v128_xor(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>; 2185 2186 /// Given two 128bits vectors `src1` and `src2`, and a 128bits bitmask `mask`, selects bits 2187 /// from `src1` when mask is 1, and from `src2` when mask is 0. 2188 /// 2189 /// This is equivalent to: `v128.or(v128.and(src1, mask), v128.and(src2, v128.not(mask)))`. v128_bitselect(&mut self, src1: Reg, src2: Reg, mask: Reg, dst: WritableReg) -> Result<()>2190 fn v128_bitselect(&mut self, src1: Reg, src2: Reg, mask: Reg, dst: WritableReg) -> Result<()>; 2191 2192 /// If any bit in `src` is 1, set `dst` to 1, or 0 otherwise. v128_any_true(&mut self, src: Reg, dst: WritableReg) -> Result<()>2193 fn v128_any_true(&mut self, src: Reg, dst: WritableReg) -> Result<()>; 2194 2195 /// Convert vector of integers to vector of floating points. v128_convert(&mut self, src: Reg, dst: WritableReg, kind: V128ConvertKind) -> Result<()>2196 fn v128_convert(&mut self, src: Reg, dst: WritableReg, kind: V128ConvertKind) -> Result<()>; 2197 2198 /// Convert two input vectors into a smaller lane vector by narrowing each 2199 /// lane. v128_narrow( &mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128NarrowKind, ) -> Result<()>2200 fn v128_narrow( 2201 &mut self, 2202 src1: Reg, 2203 src2: Reg, 2204 dst: WritableReg, 2205 kind: V128NarrowKind, 2206 ) -> Result<()>; 2207 2208 /// Converts a vector containing two 64-bit floating point lanes to two 2209 /// 32-bit floating point lanes and setting the two higher lanes to 0. v128_demote(&mut self, src: Reg, dst: WritableReg) -> Result<()>2210 fn v128_demote(&mut self, src: Reg, dst: WritableReg) -> Result<()>; 2211 2212 /// Converts a vector containing four 32-bit floating point lanes to two 2213 /// 64-bit floating point lanes. Only the two lower lanes are converted. v128_promote(&mut self, src: Reg, dst: WritableReg) -> Result<()>2214 fn v128_promote(&mut self, src: Reg, dst: WritableReg) -> Result<()>; 2215 2216 /// Converts low or high half of the smaller lane vector to a larger lane 2217 /// vector. v128_extend(&mut self, src: Reg, dst: WritableReg, kind: V128ExtendKind) -> Result<()>2218 fn v128_extend(&mut self, src: Reg, dst: WritableReg, kind: V128ExtendKind) -> Result<()>; 2219 2220 /// Perform a vector add between `lsh` and `rhs`, placing the result in 2221 /// `dst`. v128_add(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128AddKind) -> Result<()>2222 fn v128_add(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128AddKind) -> Result<()>; 2223 2224 /// Perform a vector sub between `lhs` and `rhs`, placing the result in `dst`. v128_sub(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128SubKind) -> Result<()>2225 fn v128_sub(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128SubKind) -> Result<()>; 2226 2227 /// Perform a vector lane-wise mul between `lhs` and `rhs`, placing the result in `dst`. v128_mul(&mut self, context: &mut CodeGenContext<Emission>, kind: V128MulKind) -> Result<()>2228 fn v128_mul(&mut self, context: &mut CodeGenContext<Emission>, kind: V128MulKind) 2229 -> Result<()>; 2230 2231 /// Perform an absolute operation on a vector. v128_abs(&mut self, src: Reg, dst: WritableReg, kind: V128AbsKind) -> Result<()>2232 fn v128_abs(&mut self, src: Reg, dst: WritableReg, kind: V128AbsKind) -> Result<()>; 2233 2234 /// Vectorized negate of the content of `op`. v128_neg(&mut self, op: WritableReg, kind: V128NegKind) -> Result<()>2235 fn v128_neg(&mut self, op: WritableReg, kind: V128NegKind) -> Result<()>; 2236 2237 /// Perform the shift operation specified by `kind`, by the shift amount specified by the 32-bit 2238 /// integer at the top of the stack, on the 128-bit vector specified by the second value 2239 /// from the top of the stack, interpreted as packed integers of size `lane_width`. 2240 /// 2241 /// The shift amount is taken modulo `lane_width`. v128_shift( &mut self, context: &mut CodeGenContext<Emission>, lane_width: OperandSize, kind: ShiftKind, ) -> Result<()>2242 fn v128_shift( 2243 &mut self, 2244 context: &mut CodeGenContext<Emission>, 2245 lane_width: OperandSize, 2246 kind: ShiftKind, 2247 ) -> Result<()>; 2248 2249 /// Perform a saturating integer q-format rounding multiplication. v128_q15mulr_sat_s( &mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize, ) -> Result<()>2250 fn v128_q15mulr_sat_s( 2251 &mut self, 2252 lhs: Reg, 2253 rhs: Reg, 2254 dst: WritableReg, 2255 size: OperandSize, 2256 ) -> Result<()>; 2257 2258 /// Sets `dst` to 1 if all lanes in `src` are non-zero, sets `dst` to 0 2259 /// otherwise. v128_all_true(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2260 fn v128_all_true(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>; 2261 2262 /// Extracts the high bit of each lane in `src` and produces a scalar mask 2263 /// with all bits concatenated in `dst`. v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2264 fn v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>; 2265 2266 /// Lanewise truncation operation. 2267 /// 2268 /// If using an integer kind of truncation, then this performs a lane-wise 2269 /// saturating conversion from float to integer using the IEEE 2270 /// `convertToIntegerTowardZero` function. If any input lane is NaN, the 2271 /// resulting lane is 0. If the rounded integer value of a lane is outside 2272 /// the range of the destination type, the result is saturated to the 2273 /// nearest representable integer value. v128_trunc( &mut self, context: &mut CodeGenContext<Emission>, kind: V128TruncKind, ) -> Result<()>2274 fn v128_trunc( 2275 &mut self, 2276 context: &mut CodeGenContext<Emission>, 2277 kind: V128TruncKind, 2278 ) -> Result<()>; 2279 2280 /// Perform a lane-wise `min` operation between `src1` and `src2`. v128_min(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MinKind) -> Result<()>2281 fn v128_min(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MinKind) 2282 -> Result<()>; 2283 2284 /// Perform a lane-wise `max` operation between `src1` and `src2`. v128_max(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MaxKind) -> Result<()>2285 fn v128_max(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MaxKind) 2286 -> Result<()>; 2287 2288 /// Perform the lane-wise integer extended multiplication producing twice wider result than the 2289 /// inputs. This is equivalent to an extend followed by a multiply. 2290 /// 2291 /// The extension to be performed is inferred from the `lane_width` and the `kind` of extmul, 2292 /// e.g, if `lane_width` is `S16`, and `kind` is `LowSigned`, then we sign-extend the lower 2293 /// 8bits of the 16bits lanes. v128_extmul( &mut self, context: &mut CodeGenContext<Emission>, kind: V128ExtMulKind, ) -> Result<()>2294 fn v128_extmul( 2295 &mut self, 2296 context: &mut CodeGenContext<Emission>, 2297 kind: V128ExtMulKind, 2298 ) -> Result<()>; 2299 2300 /// Perform the lane-wise integer extended pairwise addition producing extended results (twice 2301 /// wider results than the inputs). v128_extadd_pairwise( &mut self, src: Reg, dst: WritableReg, kind: V128ExtAddKind, ) -> Result<()>2302 fn v128_extadd_pairwise( 2303 &mut self, 2304 src: Reg, 2305 dst: WritableReg, 2306 kind: V128ExtAddKind, 2307 ) -> Result<()>; 2308 2309 /// Lane-wise multiply signed 16-bit integers in `lhs` and `rhs` and add 2310 /// adjacent pairs of the 32-bit results. v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>2311 fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>; 2312 2313 /// Count the number of bits set in each lane. v128_popcnt(&mut self, context: &mut CodeGenContext<Emission>) -> Result<()>2314 fn v128_popcnt(&mut self, context: &mut CodeGenContext<Emission>) -> Result<()>; 2315 2316 /// Lane-wise rounding average of vectors of integers in `lhs` and `rhs` 2317 /// and put the results in `dst`. v128_avgr(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2318 fn v128_avgr(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>; 2319 2320 /// Lane-wise IEEE division on vectors of floats. v128_div(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2321 fn v128_div(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>; 2322 2323 /// Lane-wise IEEE square root of vector of floats. v128_sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2324 fn v128_sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>; 2325 2326 /// Lane-wise ceiling of vector of floats. v128_ceil(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2327 fn v128_ceil(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>; 2328 2329 /// Lane-wise flooring of vector of floats. v128_floor(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2330 fn v128_floor(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>; 2331 2332 /// Lane-wise rounding to nearest integer for vector of floats. v128_nearest(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2333 fn v128_nearest(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>; 2334 2335 /// Lane-wise minimum value defined as `rhs < lhs ? rhs : lhs`. v128_pmin(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2336 fn v128_pmin(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>; 2337 2338 /// Lane-wise maximum value defined as `lhs < rhs ? rhs : lhs`. v128_pmax(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>2339 fn v128_pmax(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>; 2340 } 2341