1 //! AArch64 ISA: binary code emission.
2 
3 use cranelift_control::ControlPlane;
4 
5 use crate::ir::{self, types::*};
6 use crate::isa::aarch64;
7 use crate::isa::aarch64::inst::*;
8 use crate::trace;
9 
10 /// Memory addressing mode finalization: convert "special" modes (e.g.,
11 /// generic arbitrary stack offset) into real addressing modes, possibly by
12 /// emitting some helper instructions that come immediately before the use
13 /// of this amode.
mem_finalize( sink: Option<&mut MachBuffer<Inst>>, mem: &AMode, access_ty: Type, state: &EmitState, ) -> (SmallVec<[Inst; 4]>, AMode)14 pub fn mem_finalize(
15     sink: Option<&mut MachBuffer<Inst>>,
16     mem: &AMode,
17     access_ty: Type,
18     state: &EmitState,
19 ) -> (SmallVec<[Inst; 4]>, AMode) {
20     match mem {
21         &AMode::RegOffset { off, .. }
22         | &AMode::SPOffset { off }
23         | &AMode::FPOffset { off }
24         | &AMode::IncomingArg { off }
25         | &AMode::SlotOffset { off } => {
26             let basereg = match mem {
27                 &AMode::RegOffset { rn, .. } => rn,
28                 &AMode::SPOffset { .. }
29                 | &AMode::SlotOffset { .. }
30                 | &AMode::IncomingArg { .. } => stack_reg(),
31                 &AMode::FPOffset { .. } => fp_reg(),
32                 _ => unreachable!(),
33             };
34             let off = match mem {
35                 &AMode::IncomingArg { .. } => {
36                     let frame_layout = state.frame_layout();
37                     i64::from(
38                         frame_layout.setup_area_size
39                             + frame_layout.tail_args_size
40                             + frame_layout.clobber_size
41                             + frame_layout.fixed_frame_storage_size
42                             + frame_layout.outgoing_args_size,
43                     ) - off
44                 }
45                 &AMode::SlotOffset { .. } => {
46                     let adj = i64::from(state.frame_layout().outgoing_args_size);
47                     trace!(
48                         "mem_finalize: slot offset {} + adj {} -> {}",
49                         off,
50                         adj,
51                         off + adj
52                     );
53                     off + adj
54                 }
55                 _ => off,
56             };
57 
58             if let Some(simm9) = SImm9::maybe_from_i64(off) {
59                 let mem = AMode::Unscaled { rn: basereg, simm9 };
60                 (smallvec![], mem)
61             } else if let Some(uimm12) = UImm12Scaled::maybe_from_i64(off, access_ty) {
62                 let mem = AMode::UnsignedOffset {
63                     rn: basereg,
64                     uimm12,
65                 };
66                 (smallvec![], mem)
67             } else {
68                 let tmp = writable_spilltmp_reg();
69                 (
70                     Inst::load_constant(tmp, off as u64),
71                     AMode::RegExtended {
72                         rn: basereg,
73                         rm: tmp.to_reg(),
74                         extendop: ExtendOp::SXTX,
75                     },
76                 )
77             }
78         }
79 
80         AMode::Const { addr } => {
81             let sink = match sink {
82                 Some(sink) => sink,
83                 None => return (smallvec![], mem.clone()),
84             };
85             let label = sink.get_label_for_constant(*addr);
86             let label = MemLabel::Mach(label);
87             (smallvec![], AMode::Label { label })
88         }
89 
90         _ => (smallvec![], mem.clone()),
91     }
92 }
93 
94 //=============================================================================
95 // Instructions and subcomponents: emission
96 
machreg_to_gpr(m: Reg) -> u3297 pub(crate) fn machreg_to_gpr(m: Reg) -> u32 {
98     assert_eq!(m.class(), RegClass::Int);
99     u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
100 }
101 
machreg_to_vec(m: Reg) -> u32102 pub(crate) fn machreg_to_vec(m: Reg) -> u32 {
103     assert_eq!(m.class(), RegClass::Float);
104     u32::from(m.to_real_reg().unwrap().hw_enc())
105 }
106 
machreg_to_gpr_or_vec(m: Reg) -> u32107 fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
108     u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
109 }
110 
111 /// Encode a 3-register aeithmeric instruction.
enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32112 pub fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
113     (bits_31_21 << 21)
114         | (bits_15_10 << 10)
115         | machreg_to_gpr(rd.to_reg())
116         | (machreg_to_gpr(rn) << 5)
117         | (machreg_to_gpr(rm) << 16)
118 }
119 
enc_arith_rr_imm12( bits_31_24: u32, immshift: u32, imm12: u32, rn: Reg, rd: Writable<Reg>, ) -> u32120 fn enc_arith_rr_imm12(
121     bits_31_24: u32,
122     immshift: u32,
123     imm12: u32,
124     rn: Reg,
125     rd: Writable<Reg>,
126 ) -> u32 {
127     (bits_31_24 << 24)
128         | (immshift << 22)
129         | (imm12 << 10)
130         | (machreg_to_gpr(rn) << 5)
131         | machreg_to_gpr(rd.to_reg())
132 }
133 
enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32134 fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
135     (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
136 }
137 
enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32138 fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
139     (top11 << 21)
140         | (machreg_to_gpr(rm) << 16)
141         | (bit15 << 15)
142         | (machreg_to_gpr(ra) << 10)
143         | (machreg_to_gpr(rn) << 5)
144         | machreg_to_gpr(rd.to_reg())
145 }
146 
enc_jump26(op_31_26: u32, off_26_0: u32) -> u32147 fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 {
148     assert!(off_26_0 < (1 << 26));
149     (op_31_26 << 26) | off_26_0
150 }
151 
enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32152 fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 {
153     assert!(off_18_0 < (1 << 19));
154     (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg)
155 }
156 
enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32157 fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
158     assert!(off_18_0 < (1 << 19));
159     assert!(cond < (1 << 4));
160     (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
161 }
162 
163 /// Set the size bit of an instruction.
enc_op_size(op: u32, size: OperandSize) -> u32164 fn enc_op_size(op: u32, size: OperandSize) -> u32 {
165     (op & !(1 << 31)) | (size.sf_bit() << 31)
166 }
167 
enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32168 fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
169     match kind {
170         CondBrKind::Zero(reg, size) => enc_op_size(
171             enc_cmpbr(0b0_011010_0, taken.as_offset19_or_zero(), reg),
172             size,
173         ),
174         CondBrKind::NotZero(reg, size) => enc_op_size(
175             enc_cmpbr(0b0_011010_1, taken.as_offset19_or_zero(), reg),
176             size,
177         ),
178         CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
179     }
180 }
181 
enc_test_bit_and_branch( kind: TestBitAndBranchKind, taken: BranchTarget, reg: Reg, bit: u8, ) -> u32182 fn enc_test_bit_and_branch(
183     kind: TestBitAndBranchKind,
184     taken: BranchTarget,
185     reg: Reg,
186     bit: u8,
187 ) -> u32 {
188     assert!(bit < 64);
189     let op_31 = u32::from(bit >> 5);
190     let op_23_19 = u32::from(bit & 0b11111);
191     let op_30_24 = 0b0110110
192         | match kind {
193             TestBitAndBranchKind::Z => 0,
194             TestBitAndBranchKind::NZ => 1,
195         };
196     (op_31 << 31)
197         | (op_30_24 << 24)
198         | (op_23_19 << 19)
199         | (taken.as_offset14_or_zero() << 5)
200         | machreg_to_gpr(reg)
201 }
202 
203 /// Encode a move-wide instruction.
enc_move_wide( op: MoveWideOp, rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize, ) -> u32204 pub fn enc_move_wide(
205     op: MoveWideOp,
206     rd: Writable<Reg>,
207     imm: MoveWideConst,
208     size: OperandSize,
209 ) -> u32 {
210     assert!(imm.shift <= 0b11);
211     let op = match op {
212         MoveWideOp::MovN => 0b00,
213         MoveWideOp::MovZ => 0b10,
214     };
215     0x12800000
216         | size.sf_bit() << 31
217         | op << 29
218         | u32::from(imm.shift) << 21
219         | u32::from(imm.bits) << 5
220         | machreg_to_gpr(rd.to_reg())
221 }
222 
223 /// Encode a move-keep immediate instruction.
enc_movk(rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize) -> u32224 pub fn enc_movk(rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize) -> u32 {
225     assert!(imm.shift <= 0b11);
226     0x72800000
227         | size.sf_bit() << 31
228         | u32::from(imm.shift) << 21
229         | u32::from(imm.bits) << 5
230         | machreg_to_gpr(rd.to_reg())
231 }
232 
enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32233 fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
234     (op_31_22 << 22)
235         | (simm7.bits() << 15)
236         | (machreg_to_gpr(rt2) << 10)
237         | (machreg_to_gpr(rn) << 5)
238         | machreg_to_gpr(rt)
239 }
240 
enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32241 fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 {
242     (op_31_22 << 22)
243         | (simm9.bits() << 12)
244         | (op_11_10 << 10)
245         | (machreg_to_gpr(rn) << 5)
246         | machreg_to_gpr_or_vec(rd)
247 }
248 
enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32249 fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 {
250     (op_31_22 << 22)
251         | (0b1 << 24)
252         | (uimm12.bits() << 10)
253         | (machreg_to_gpr(rn) << 5)
254         | machreg_to_gpr_or_vec(rd)
255 }
256 
enc_ldst_reg( op_31_22: u32, rn: Reg, rm: Reg, s_bit: bool, extendop: Option<ExtendOp>, rd: Reg, ) -> u32257 fn enc_ldst_reg(
258     op_31_22: u32,
259     rn: Reg,
260     rm: Reg,
261     s_bit: bool,
262     extendop: Option<ExtendOp>,
263     rd: Reg,
264 ) -> u32 {
265     let s_bit = if s_bit { 1 } else { 0 };
266     let extend_bits = match extendop {
267         Some(ExtendOp::UXTW) => 0b010,
268         Some(ExtendOp::SXTW) => 0b110,
269         Some(ExtendOp::SXTX) => 0b111,
270         None => 0b011, // LSL
271         _ => panic!("bad extend mode for ld/st AMode"),
272     };
273     (op_31_22 << 22)
274         | (1 << 21)
275         | (machreg_to_gpr(rm) << 16)
276         | (extend_bits << 13)
277         | (s_bit << 12)
278         | (0b10 << 10)
279         | (machreg_to_gpr(rn) << 5)
280         | machreg_to_gpr_or_vec(rd)
281 }
282 
enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32283 pub(crate) fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {
284     (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
285 }
286 
enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32287 fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
288     debug_assert_eq!(q & 0b1, q);
289     debug_assert_eq!(size & 0b11, size);
290     0b0_0_0011010_10_00000_110_0_00_00000_00000
291         | q << 30
292         | size << 10
293         | machreg_to_gpr(rn) << 5
294         | machreg_to_vec(rt.to_reg())
295 }
296 
enc_ldst_vec_pair( opc: u32, amode: u32, is_load: bool, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg, ) -> u32297 fn enc_ldst_vec_pair(
298     opc: u32,
299     amode: u32,
300     is_load: bool,
301     simm7: SImm7Scaled,
302     rn: Reg,
303     rt: Reg,
304     rt2: Reg,
305 ) -> u32 {
306     debug_assert_eq!(opc & 0b11, opc);
307     debug_assert_eq!(amode & 0b11, amode);
308 
309     0b00_10110_00_0_0000000_00000_00000_00000
310         | opc << 30
311         | amode << 23
312         | (is_load as u32) << 22
313         | simm7.bits() << 15
314         | machreg_to_vec(rt2) << 10
315         | machreg_to_gpr(rn) << 5
316         | machreg_to_vec(rt)
317 }
318 
enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32319 fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
320     (top11 << 21)
321         | (machreg_to_vec(rm) << 16)
322         | (bit15_10 << 10)
323         | (machreg_to_vec(rn) << 5)
324         | machreg_to_vec(rd.to_reg())
325 }
326 
enc_vec_rrr_long( q: u32, u: u32, size: u32, bit14: u32, rm: Reg, rn: Reg, rd: Writable<Reg>, ) -> u32327 fn enc_vec_rrr_long(
328     q: u32,
329     u: u32,
330     size: u32,
331     bit14: u32,
332     rm: Reg,
333     rn: Reg,
334     rd: Writable<Reg>,
335 ) -> u32 {
336     debug_assert_eq!(q & 0b1, q);
337     debug_assert_eq!(u & 0b1, u);
338     debug_assert_eq!(size & 0b11, size);
339     debug_assert_eq!(bit14 & 0b1, bit14);
340 
341     0b0_0_0_01110_00_1_00000_100000_00000_00000
342         | q << 30
343         | u << 29
344         | size << 22
345         | bit14 << 14
346         | (machreg_to_vec(rm) << 16)
347         | (machreg_to_vec(rn) << 5)
348         | machreg_to_vec(rd.to_reg())
349 }
350 
enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32351 fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
352     (0b01011010110 << 21)
353         | size << 31
354         | opcode2 << 16
355         | opcode1 << 10
356         | machreg_to_gpr(rn) << 5
357         | machreg_to_gpr(rd.to_reg())
358 }
359 
enc_br(rn: Reg) -> u32360 pub(crate) fn enc_br(rn: Reg) -> u32 {
361     0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)
362 }
363 
enc_adr_inst(opcode: u32, off: i32, rd: Writable<Reg>) -> u32364 pub(crate) fn enc_adr_inst(opcode: u32, off: i32, rd: Writable<Reg>) -> u32 {
365     let off = u32::try_from(off).unwrap();
366     let immlo = off & 3;
367     let immhi = (off >> 2) & ((1 << 19) - 1);
368     opcode | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
369 }
370 
enc_adr(off: i32, rd: Writable<Reg>) -> u32371 pub(crate) fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
372     let opcode = 0b00010000 << 24;
373     enc_adr_inst(opcode, off, rd)
374 }
375 
enc_adrp(off: i32, rd: Writable<Reg>) -> u32376 pub(crate) fn enc_adrp(off: i32, rd: Writable<Reg>) -> u32 {
377     let opcode = 0b10010000 << 24;
378     enc_adr_inst(opcode, off, rd)
379 }
380 
enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, op: u32, o2: u32) -> u32381 fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, op: u32, o2: u32) -> u32 {
382     debug_assert_eq!(op & 0b1, op);
383     debug_assert_eq!(o2 & 0b1, o2);
384     0b100_11010100_00000_0000_00_00000_00000
385         | (op << 30)
386         | (machreg_to_gpr(rm) << 16)
387         | (cond.bits() << 12)
388         | (o2 << 10)
389         | (machreg_to_gpr(rn) << 5)
390         | machreg_to_gpr(rd.to_reg())
391 }
392 
enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32393 fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
394     0b000_11110_00_1_00000_0000_11_00000_00000
395         | (size.ftype() << 22)
396         | (machreg_to_vec(rm) << 16)
397         | (machreg_to_vec(rn) << 5)
398         | machreg_to_vec(rd.to_reg())
399         | (cond.bits() << 12)
400 }
401 
enc_ccmp(size: OperandSize, rn: Reg, rm: Reg, nzcv: NZCV, cond: Cond) -> u32402 fn enc_ccmp(size: OperandSize, rn: Reg, rm: Reg, nzcv: NZCV, cond: Cond) -> u32 {
403     0b0_1_1_11010010_00000_0000_00_00000_0_0000
404         | size.sf_bit() << 31
405         | machreg_to_gpr(rm) << 16
406         | cond.bits() << 12
407         | machreg_to_gpr(rn) << 5
408         | nzcv.bits()
409 }
410 
enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32411 fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
412     0b0_1_1_11010010_00000_0000_10_00000_0_0000
413         | size.sf_bit() << 31
414         | imm.bits() << 16
415         | cond.bits() << 12
416         | machreg_to_gpr(rn) << 5
417         | nzcv.bits()
418 }
419 
enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32420 fn enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32 {
421     match size {
422         OperandSize::Size64 => {
423             debug_assert!(immr <= 63);
424             debug_assert!(imms <= 63);
425         }
426         OperandSize::Size32 => {
427             debug_assert!(immr <= 31);
428             debug_assert!(imms <= 31);
429         }
430     }
431     debug_assert_eq!(opc & 0b11, opc);
432     let n_bit = size.sf_bit();
433     0b0_00_100110_0_000000_000000_00000_00000
434         | size.sf_bit() << 31
435         | u32::from(opc) << 29
436         | n_bit << 22
437         | u32::from(immr) << 16
438         | u32::from(imms) << 10
439         | machreg_to_gpr(rn) << 5
440         | machreg_to_gpr(rd.to_reg())
441 }
442 
enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32443 fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
444     0b00001110_101_00000_00011_1_00000_00000
445         | ((is_16b as u32) << 30)
446         | machreg_to_vec(rd.to_reg())
447         | (machreg_to_vec(rn) << 16)
448         | (machreg_to_vec(rn) << 5)
449 }
450 
enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32451 fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
452     (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
453 }
454 
enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32455 fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
456     (top22 << 10)
457         | (machreg_to_vec(rm) << 16)
458         | (machreg_to_vec(rn) << 5)
459         | machreg_to_vec(rd.to_reg())
460 }
461 
enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32462 fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 {
463     (top17 << 15)
464         | (machreg_to_vec(rm) << 16)
465         | (machreg_to_vec(ra) << 10)
466         | (machreg_to_vec(rn) << 5)
467         | machreg_to_vec(rd.to_reg())
468 }
469 
enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32470 fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {
471     0b000_11110_00_1_00000_00_1000_00000_00000
472         | (size.ftype() << 22)
473         | (machreg_to_vec(rm) << 16)
474         | (machreg_to_vec(rn) << 5)
475 }
476 
enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32477 fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
478     (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg())
479 }
480 
enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32481 fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
482     (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())
483 }
484 
enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32485 fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
486     (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
487 }
488 
enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32489 fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
490     debug_assert_eq!(qu & 0b11, qu);
491     debug_assert_eq!(size & 0b11, size);
492     debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
493     let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;
494     bits | qu << 29
495         | size << 22
496         | bits_12_16 << 12
497         | machreg_to_vec(rn) << 5
498         | machreg_to_vec(rd.to_reg())
499 }
500 
enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32501 fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
502     debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
503 
504     0b010_11110_11_11000_11011_10_00000_00000
505         | bits_12_16 << 12
506         | machreg_to_vec(rn) << 5
507         | machreg_to_vec(rd.to_reg())
508 }
509 
enc_vec_rr_pair_long(u: u32, enc_size: u32, rd: Writable<Reg>, rn: Reg) -> u32510 fn enc_vec_rr_pair_long(u: u32, enc_size: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
511     debug_assert_eq!(u & 0b1, u);
512     debug_assert_eq!(enc_size & 0b1, enc_size);
513 
514     0b0_1_0_01110_00_10000_00_0_10_10_00000_00000
515         | u << 29
516         | enc_size << 22
517         | machreg_to_vec(rn) << 5
518         | machreg_to_vec(rd.to_reg())
519 }
520 
enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32521 fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
522     debug_assert_eq!(q & 0b1, q);
523     debug_assert_eq!(u & 0b1, u);
524     debug_assert_eq!(size & 0b11, size);
525     debug_assert_eq!(opcode & 0b11111, opcode);
526     0b0_0_0_01110_00_11000_0_0000_10_00000_00000
527         | q << 30
528         | u << 29
529         | size << 22
530         | opcode << 12
531         | machreg_to_vec(rn) << 5
532         | machreg_to_vec(rd.to_reg())
533 }
534 
enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32535 fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
536     debug_assert_eq!(len & 0b11, len);
537     0b0_1_001110_000_00000_0_00_0_00_00000_00000
538         | (machreg_to_vec(rm) << 16)
539         | len << 13
540         | (is_extension as u32) << 12
541         | (machreg_to_vec(rn) << 5)
542         | machreg_to_vec(rd.to_reg())
543 }
544 
enc_dmb_ish() -> u32545 fn enc_dmb_ish() -> u32 {
546     0xD5033BBF
547 }
548 
enc_acq_rel(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32549 fn enc_acq_rel(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
550     assert!(machreg_to_gpr(rt.to_reg()) != 31);
551     let sz = match ty {
552         I64 => 0b11,
553         I32 => 0b10,
554         I16 => 0b01,
555         I8 => 0b00,
556         _ => unreachable!(),
557     };
558     let bit15 = match op {
559         AtomicRMWOp::Swp => 0b1,
560         _ => 0b0,
561     };
562     let op = match op {
563         AtomicRMWOp::Add => 0b000,
564         AtomicRMWOp::Clr => 0b001,
565         AtomicRMWOp::Eor => 0b010,
566         AtomicRMWOp::Set => 0b011,
567         AtomicRMWOp::Smax => 0b100,
568         AtomicRMWOp::Smin => 0b101,
569         AtomicRMWOp::Umax => 0b110,
570         AtomicRMWOp::Umin => 0b111,
571         AtomicRMWOp::Swp => 0b000,
572     };
573     0b00_111_000_111_00000_0_000_00_00000_00000
574         | (sz << 30)
575         | (machreg_to_gpr(rs) << 16)
576         | bit15 << 15
577         | (op << 12)
578         | (machreg_to_gpr(rn) << 5)
579         | machreg_to_gpr(rt.to_reg())
580 }
581 
enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32582 fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
583     let sz = match ty {
584         I64 => 0b11,
585         I32 => 0b10,
586         I16 => 0b01,
587         I8 => 0b00,
588         _ => unreachable!(),
589     };
590     0b00_001000_1_1_0_11111_1_11111_00000_00000
591         | (sz << 30)
592         | (machreg_to_gpr(rn) << 5)
593         | machreg_to_gpr(rt.to_reg())
594 }
595 
enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32596 fn enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32 {
597     let sz = match ty {
598         I64 => 0b11,
599         I32 => 0b10,
600         I16 => 0b01,
601         I8 => 0b00,
602         _ => unreachable!(),
603     };
604     0b00_001000_100_11111_1_11111_00000_00000
605         | (sz << 30)
606         | (machreg_to_gpr(rn) << 5)
607         | machreg_to_gpr(rt)
608 }
609 
enc_ldaxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32610 fn enc_ldaxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
611     let sz = match ty {
612         I64 => 0b11,
613         I32 => 0b10,
614         I16 => 0b01,
615         I8 => 0b00,
616         _ => unreachable!(),
617     };
618     0b00_001000_0_1_0_11111_1_11111_00000_00000
619         | (sz << 30)
620         | (machreg_to_gpr(rn) << 5)
621         | machreg_to_gpr(rt.to_reg())
622 }
623 
enc_stlxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32624 fn enc_stlxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
625     let sz = match ty {
626         I64 => 0b11,
627         I32 => 0b10,
628         I16 => 0b01,
629         I8 => 0b00,
630         _ => unreachable!(),
631     };
632     0b00_001000_000_00000_1_11111_00000_00000
633         | (sz << 30)
634         | (machreg_to_gpr(rs.to_reg()) << 16)
635         | (machreg_to_gpr(rn) << 5)
636         | machreg_to_gpr(rt)
637 }
638 
enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32639 fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
640     debug_assert_eq!(size & 0b11, size);
641 
642     0b00_0010001_1_1_00000_1_11111_00000_00000
643         | size << 30
644         | machreg_to_gpr(rs.to_reg()) << 16
645         | machreg_to_gpr(rn) << 5
646         | machreg_to_gpr(rt)
647 }
648 
enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32649 fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
650     let abc = (imm >> 5) as u32;
651     let defgh = (imm & 0b11111) as u32;
652 
653     debug_assert_eq!(cmode & 0b1111, cmode);
654     debug_assert_eq!(q_op & 0b11, q_op);
655 
656     0b0_0_0_0111100000_000_0000_01_00000_00000
657         | (q_op << 29)
658         | (abc << 16)
659         | (cmode << 12)
660         | (defgh << 5)
661         | machreg_to_vec(rd.to_reg())
662 }
663 
664 /// State carried between emissions of a sequence of instructions.
665 #[derive(Default, Clone, Debug)]
666 pub struct EmitState {
667     /// The user stack map for the upcoming instruction, as provided to
668     /// `pre_safepoint()`.
669     user_stack_map: Option<ir::UserStackMap>,
670 
671     /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
672     /// optimized away at compiletime. See [cranelift_control].
673     ctrl_plane: ControlPlane,
674 
675     frame_layout: FrameLayout,
676 }
677 
678 impl MachInstEmitState<Inst> for EmitState {
new(abi: &Callee<AArch64MachineDeps>, ctrl_plane: ControlPlane) -> Self679     fn new(abi: &Callee<AArch64MachineDeps>, ctrl_plane: ControlPlane) -> Self {
680         EmitState {
681             user_stack_map: None,
682             ctrl_plane,
683             frame_layout: abi.frame_layout().clone(),
684         }
685     }
686 
pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>)687     fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
688         self.user_stack_map = user_stack_map;
689     }
690 
ctrl_plane_mut(&mut self) -> &mut ControlPlane691     fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
692         &mut self.ctrl_plane
693     }
694 
take_ctrl_plane(self) -> ControlPlane695     fn take_ctrl_plane(self) -> ControlPlane {
696         self.ctrl_plane
697     }
698 
frame_layout(&self) -> &FrameLayout699     fn frame_layout(&self) -> &FrameLayout {
700         &self.frame_layout
701     }
702 }
703 
704 impl EmitState {
take_stack_map(&mut self) -> Option<ir::UserStackMap>705     fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
706         self.user_stack_map.take()
707     }
708 
clear_post_insn(&mut self)709     fn clear_post_insn(&mut self) {
710         self.user_stack_map = None;
711     }
712 }
713 
714 /// Constant state used during function compilation.
715 pub struct EmitInfo {
716     flags: settings::Flags,
717     isa_flags: aarch64::settings::Flags,
718 }
719 
720 impl EmitInfo {
721     /// Create a constant state for emission of instructions.
new(flags: settings::Flags, isa_flags: aarch64::settings::Flags) -> Self722     pub fn new(flags: settings::Flags, isa_flags: aarch64::settings::Flags) -> Self {
723         Self { flags, isa_flags }
724     }
725 }
726 
727 impl MachInstEmit for Inst {
728     type State = EmitState;
729     type Info = EmitInfo;
730 
emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState)731     fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
732         // N.B.: we *must* not exceed the "worst-case size" used to compute
733         // where to insert islands, except when islands are explicitly triggered
734         // (with an `EmitIsland`). We check this in debug builds. This is `mut`
735         // to allow disabling the check for `JTSequence`, which is always
736         // emitted following an `EmitIsland`.
737         let mut start_off = sink.cur_offset();
738 
739         match self {
740             &Inst::AluRRR {
741                 alu_op,
742                 size,
743                 rd,
744                 rn,
745                 rm,
746             } => {
747                 debug_assert!(match alu_op {
748                     ALUOp::SMulH | ALUOp::UMulH => size == OperandSize::Size64,
749                     _ => true,
750                 });
751                 let top11 = match alu_op {
752                     ALUOp::Add => 0b00001011_000,
753                     ALUOp::Adc => 0b00011010_000,
754                     ALUOp::AdcS => 0b00111010_000,
755                     ALUOp::Sub => 0b01001011_000,
756                     ALUOp::Sbc => 0b01011010_000,
757                     ALUOp::SbcS => 0b01111010_000,
758                     ALUOp::Orr => 0b00101010_000,
759                     ALUOp::And => 0b00001010_000,
760                     ALUOp::AndS => 0b01101010_000,
761                     ALUOp::Eor => 0b01001010_000,
762                     ALUOp::OrrNot => 0b00101010_001,
763                     ALUOp::AndNot => 0b00001010_001,
764                     ALUOp::EorNot => 0b01001010_001,
765                     ALUOp::AddS => 0b00101011_000,
766                     ALUOp::SubS => 0b01101011_000,
767                     ALUOp::SDiv | ALUOp::UDiv => 0b00011010_110,
768                     ALUOp::Extr | ALUOp::Lsr | ALUOp::Asr | ALUOp::Lsl => 0b00011010_110,
769                     ALUOp::SMulH => 0b10011011_010,
770                     ALUOp::UMulH => 0b10011011_110,
771                 };
772 
773                 let top11 = top11 | size.sf_bit() << 10;
774                 let bit15_10 = match alu_op {
775                     ALUOp::SDiv => 0b000011,
776                     ALUOp::UDiv => 0b000010,
777                     ALUOp::Extr => 0b001011,
778                     ALUOp::Lsr => 0b001001,
779                     ALUOp::Asr => 0b001010,
780                     ALUOp::Lsl => 0b001000,
781                     ALUOp::SMulH | ALUOp::UMulH => 0b011111,
782                     _ => 0b000000,
783                 };
784                 debug_assert_ne!(writable_stack_reg(), rd);
785                 // The stack pointer is the zero register in this context, so this might be an
786                 // indication that something is wrong.
787                 debug_assert_ne!(stack_reg(), rn);
788                 debug_assert_ne!(stack_reg(), rm);
789                 sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
790             }
791             &Inst::AluRRRR {
792                 alu_op,
793                 size,
794                 rd,
795                 rm,
796                 rn,
797                 ra,
798             } => {
799                 let (top11, bit15) = match alu_op {
800                     ALUOp3::MAdd => (0b0_00_11011_000, 0),
801                     ALUOp3::MSub => (0b0_00_11011_000, 1),
802                     ALUOp3::UMAddL => {
803                         debug_assert!(size == OperandSize::Size32);
804                         (0b1_00_11011_1_01, 0)
805                     }
806                     ALUOp3::SMAddL => {
807                         debug_assert!(size == OperandSize::Size32);
808                         (0b1_00_11011_0_01, 0)
809                     }
810                 };
811                 let top11 = top11 | size.sf_bit() << 10;
812                 sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
813             }
814             &Inst::AluRRImm12 {
815                 alu_op,
816                 size,
817                 rd,
818                 rn,
819                 ref imm12,
820             } => {
821                 let top8 = match alu_op {
822                     ALUOp::Add => 0b000_10001,
823                     ALUOp::Sub => 0b010_10001,
824                     ALUOp::AddS => 0b001_10001,
825                     ALUOp::SubS => 0b011_10001,
826                     _ => unimplemented!("{:?}", alu_op),
827                 };
828                 let top8 = top8 | size.sf_bit() << 7;
829                 sink.put4(enc_arith_rr_imm12(
830                     top8,
831                     imm12.shift_bits(),
832                     imm12.imm_bits(),
833                     rn,
834                     rd,
835                 ));
836             }
837             &Inst::AluRRImmLogic {
838                 alu_op,
839                 size,
840                 rd,
841                 rn,
842                 ref imml,
843             } => {
844                 let (top9, inv) = match alu_op {
845                     ALUOp::Orr => (0b001_100100, false),
846                     ALUOp::And => (0b000_100100, false),
847                     ALUOp::AndS => (0b011_100100, false),
848                     ALUOp::Eor => (0b010_100100, false),
849                     ALUOp::OrrNot => (0b001_100100, true),
850                     ALUOp::AndNot => (0b000_100100, true),
851                     ALUOp::EorNot => (0b010_100100, true),
852                     _ => unimplemented!("{:?}", alu_op),
853                 };
854                 let top9 = top9 | size.sf_bit() << 8;
855                 let imml = if inv { imml.invert() } else { *imml };
856                 sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd));
857             }
858 
859             &Inst::AluRRImmShift {
860                 alu_op,
861                 size,
862                 rd,
863                 rn,
864                 ref immshift,
865             } => {
866                 let amt = immshift.value();
867                 let (top10, immr, imms) = match alu_op {
868                     ALUOp::Extr => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
869                     ALUOp::Lsr => (0b0101001100, u32::from(amt), 0b011111),
870                     ALUOp::Asr => (0b0001001100, u32::from(amt), 0b011111),
871                     ALUOp::Lsl => {
872                         let bits = if size.is64() { 64 } else { 32 };
873                         (
874                             0b0101001100,
875                             u32::from((bits - amt) % bits),
876                             u32::from(bits - 1 - amt),
877                         )
878                     }
879                     _ => unimplemented!("{:?}", alu_op),
880                 };
881                 let top10 = top10 | size.sf_bit() << 9 | size.sf_bit();
882                 let imms = match alu_op {
883                     ALUOp::Lsr | ALUOp::Asr => imms | size.sf_bit() << 5,
884                     _ => imms,
885                 };
886                 sink.put4(
887                     (top10 << 22)
888                         | (immr << 16)
889                         | (imms << 10)
890                         | (machreg_to_gpr(rn) << 5)
891                         | machreg_to_gpr(rd.to_reg()),
892                 );
893             }
894 
895             &Inst::AluRRRShift {
896                 alu_op,
897                 size,
898                 rd,
899                 rn,
900                 rm,
901                 ref shiftop,
902             } => {
903                 let top11: u32 = match alu_op {
904                     ALUOp::Add => 0b000_01011000,
905                     ALUOp::AddS => 0b001_01011000,
906                     ALUOp::Sub => 0b010_01011000,
907                     ALUOp::SubS => 0b011_01011000,
908                     ALUOp::Orr => 0b001_01010000,
909                     ALUOp::And => 0b000_01010000,
910                     ALUOp::AndS => 0b011_01010000,
911                     ALUOp::Eor => 0b010_01010000,
912                     ALUOp::OrrNot => 0b001_01010001,
913                     ALUOp::EorNot => 0b010_01010001,
914                     ALUOp::AndNot => 0b000_01010001,
915                     ALUOp::Extr => 0b000_10011100,
916                     _ => unimplemented!("{:?}", alu_op),
917                 };
918                 let top11 = top11 | size.sf_bit() << 10;
919                 let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);
920                 let bits_15_10 = u32::from(shiftop.amt().value());
921                 sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
922             }
923 
924             &Inst::AluRRRExtend {
925                 alu_op,
926                 size,
927                 rd,
928                 rn,
929                 rm,
930                 extendop,
931             } => {
932                 let top11: u32 = match alu_op {
933                     ALUOp::Add => 0b00001011001,
934                     ALUOp::Sub => 0b01001011001,
935                     ALUOp::AddS => 0b00101011001,
936                     ALUOp::SubS => 0b01101011001,
937                     _ => unimplemented!("{:?}", alu_op),
938                 };
939                 let top11 = top11 | size.sf_bit() << 10;
940                 let bits_15_10 = u32::from(extendop.bits()) << 3;
941                 sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
942             }
943 
944             &Inst::BitRR {
945                 op, size, rd, rn, ..
946             } => {
947                 let (op1, op2) = match op {
948                     BitOp::RBit => (0b00000, 0b000000),
949                     BitOp::Clz => (0b00000, 0b000100),
950                     BitOp::Cls => (0b00000, 0b000101),
951                     BitOp::Rev16 => (0b00000, 0b000001),
952                     BitOp::Rev32 => (0b00000, 0b000010),
953                     BitOp::Rev64 => (0b00000, 0b000011),
954                 };
955                 sink.put4(enc_bit_rr(size.sf_bit(), op1, op2, rn, rd))
956             }
957 
958             &Inst::ULoad8 { rd, ref mem, flags }
959             | &Inst::SLoad8 { rd, ref mem, flags }
960             | &Inst::ULoad16 { rd, ref mem, flags }
961             | &Inst::SLoad16 { rd, ref mem, flags }
962             | &Inst::ULoad32 { rd, ref mem, flags }
963             | &Inst::SLoad32 { rd, ref mem, flags }
964             | &Inst::ULoad64 {
965                 rd, ref mem, flags, ..
966             }
967             | &Inst::FpuLoad16 { rd, ref mem, flags }
968             | &Inst::FpuLoad32 { rd, ref mem, flags }
969             | &Inst::FpuLoad64 { rd, ref mem, flags }
970             | &Inst::FpuLoad128 { rd, ref mem, flags } => {
971                 let mem = mem.clone();
972                 let access_ty = self.mem_type().unwrap();
973                 let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state);
974 
975                 for inst in mem_insts.into_iter() {
976                     inst.emit(sink, emit_info, state);
977                 }
978 
979                 // ldst encoding helpers take Reg, not Writable<Reg>.
980                 let rd = rd.to_reg();
981 
982                 // This is the base opcode (top 10 bits) for the "unscaled
983                 // immediate" form (Unscaled). Other addressing modes will OR in
984                 // other values for bits 24/25 (bits 1/2 of this constant).
985                 let op = match self {
986                     Inst::ULoad8 { .. } => 0b0011100001,
987                     Inst::SLoad8 { .. } => 0b0011100010,
988                     Inst::ULoad16 { .. } => 0b0111100001,
989                     Inst::SLoad16 { .. } => 0b0111100010,
990                     Inst::ULoad32 { .. } => 0b1011100001,
991                     Inst::SLoad32 { .. } => 0b1011100010,
992                     Inst::ULoad64 { .. } => 0b1111100001,
993                     Inst::FpuLoad16 { .. } => 0b0111110001,
994                     Inst::FpuLoad32 { .. } => 0b1011110001,
995                     Inst::FpuLoad64 { .. } => 0b1111110001,
996                     Inst::FpuLoad128 { .. } => 0b0011110011,
997                     _ => unreachable!(),
998                 };
999 
1000                 if let Some(trap_code) = flags.trap_code() {
1001                     // Register the offset at which the actual load instruction starts.
1002                     sink.add_trap(trap_code);
1003                 }
1004 
1005                 match &mem {
1006                     &AMode::Unscaled { rn, simm9 } => {
1007                         let reg = rn;
1008                         sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
1009                     }
1010                     &AMode::UnsignedOffset { rn, uimm12 } => {
1011                         let reg = rn;
1012                         sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd));
1013                     }
1014                     &AMode::RegReg { rn, rm } => {
1015                         let r1 = rn;
1016                         let r2 = rm;
1017                         sink.put4(enc_ldst_reg(
1018                             op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
1019                         ));
1020                     }
1021                     &AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => {
1022                         let r1 = rn;
1023                         let r2 = rm;
1024                         let extendop = match &mem {
1025                             &AMode::RegScaled { .. } => None,
1026                             &AMode::RegScaledExtended { extendop, .. } => Some(extendop),
1027                             _ => unreachable!(),
1028                         };
1029                         sink.put4(enc_ldst_reg(
1030                             op, r1, r2, /* scaled = */ true, extendop, rd,
1031                         ));
1032                     }
1033                     &AMode::RegExtended { rn, rm, extendop } => {
1034                         let r1 = rn;
1035                         let r2 = rm;
1036                         sink.put4(enc_ldst_reg(
1037                             op,
1038                             r1,
1039                             r2,
1040                             /* scaled = */ false,
1041                             Some(extendop),
1042                             rd,
1043                         ));
1044                     }
1045                     &AMode::Label { ref label } => {
1046                         let offset = match label {
1047                             // cast i32 to u32 (two's-complement)
1048                             MemLabel::PCRel(off) => *off as u32,
1049                             // Emit a relocation into the `MachBuffer`
1050                             // for the label that's being loaded from and
1051                             // encode an address of 0 in its place which will
1052                             // get filled in by relocation resolution later on.
1053                             MemLabel::Mach(label) => {
1054                                 sink.use_label_at_offset(
1055                                     sink.cur_offset(),
1056                                     *label,
1057                                     LabelUse::Ldr19,
1058                                 );
1059                                 0
1060                             }
1061                         } / 4;
1062                         assert!(offset < (1 << 19));
1063                         match self {
1064                             &Inst::ULoad32 { .. } => {
1065                                 sink.put4(enc_ldst_imm19(0b00011000, offset, rd));
1066                             }
1067                             &Inst::SLoad32 { .. } => {
1068                                 sink.put4(enc_ldst_imm19(0b10011000, offset, rd));
1069                             }
1070                             &Inst::FpuLoad32 { .. } => {
1071                                 sink.put4(enc_ldst_imm19(0b00011100, offset, rd));
1072                             }
1073                             &Inst::ULoad64 { .. } => {
1074                                 sink.put4(enc_ldst_imm19(0b01011000, offset, rd));
1075                             }
1076                             &Inst::FpuLoad64 { .. } => {
1077                                 sink.put4(enc_ldst_imm19(0b01011100, offset, rd));
1078                             }
1079                             &Inst::FpuLoad128 { .. } => {
1080                                 sink.put4(enc_ldst_imm19(0b10011100, offset, rd));
1081                             }
1082                             _ => panic!("Unsupported size for LDR from constant pool!"),
1083                         }
1084                     }
1085                     &AMode::SPPreIndexed { simm9 } => {
1086                         let reg = stack_reg();
1087                         sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
1088                     }
1089                     &AMode::SPPostIndexed { simm9 } => {
1090                         let reg = stack_reg();
1091                         sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
1092                     }
1093                     // Eliminated by `mem_finalize()` above.
1094                     &AMode::SPOffset { .. }
1095                     | &AMode::FPOffset { .. }
1096                     | &AMode::IncomingArg { .. }
1097                     | &AMode::SlotOffset { .. }
1098                     | &AMode::Const { .. }
1099                     | &AMode::RegOffset { .. } => {
1100                         panic!("Should not see {mem:?} here!")
1101                     }
1102                 }
1103             }
1104 
1105             &Inst::Store8 { rd, ref mem, flags }
1106             | &Inst::Store16 { rd, ref mem, flags }
1107             | &Inst::Store32 { rd, ref mem, flags }
1108             | &Inst::Store64 { rd, ref mem, flags }
1109             | &Inst::FpuStore16 { rd, ref mem, flags }
1110             | &Inst::FpuStore32 { rd, ref mem, flags }
1111             | &Inst::FpuStore64 { rd, ref mem, flags }
1112             | &Inst::FpuStore128 { rd, ref mem, flags } => {
1113                 let mem = mem.clone();
1114                 let access_ty = self.mem_type().unwrap();
1115                 let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state);
1116 
1117                 for inst in mem_insts.into_iter() {
1118                     inst.emit(sink, emit_info, state);
1119                 }
1120 
1121                 let op = match self {
1122                     Inst::Store8 { .. } => 0b0011100000,
1123                     Inst::Store16 { .. } => 0b0111100000,
1124                     Inst::Store32 { .. } => 0b1011100000,
1125                     Inst::Store64 { .. } => 0b1111100000,
1126                     Inst::FpuStore16 { .. } => 0b0111110000,
1127                     Inst::FpuStore32 { .. } => 0b1011110000,
1128                     Inst::FpuStore64 { .. } => 0b1111110000,
1129                     Inst::FpuStore128 { .. } => 0b0011110010,
1130                     _ => unreachable!(),
1131                 };
1132 
1133                 if let Some(trap_code) = flags.trap_code() {
1134                     // Register the offset at which the actual store instruction starts.
1135                     sink.add_trap(trap_code);
1136                 }
1137 
1138                 match &mem {
1139                     &AMode::Unscaled { rn, simm9 } => {
1140                         let reg = rn;
1141                         sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
1142                     }
1143                     &AMode::UnsignedOffset { rn, uimm12 } => {
1144                         let reg = rn;
1145                         sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd));
1146                     }
1147                     &AMode::RegReg { rn, rm } => {
1148                         let r1 = rn;
1149                         let r2 = rm;
1150                         sink.put4(enc_ldst_reg(
1151                             op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
1152                         ));
1153                     }
1154                     &AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => {
1155                         let r1 = rn;
1156                         let r2 = rm;
1157                         let extendop = match &mem {
1158                             &AMode::RegScaled { .. } => None,
1159                             &AMode::RegScaledExtended { extendop, .. } => Some(extendop),
1160                             _ => unreachable!(),
1161                         };
1162                         sink.put4(enc_ldst_reg(
1163                             op, r1, r2, /* scaled = */ true, extendop, rd,
1164                         ));
1165                     }
1166                     &AMode::RegExtended { rn, rm, extendop } => {
1167                         let r1 = rn;
1168                         let r2 = rm;
1169                         sink.put4(enc_ldst_reg(
1170                             op,
1171                             r1,
1172                             r2,
1173                             /* scaled = */ false,
1174                             Some(extendop),
1175                             rd,
1176                         ));
1177                     }
1178                     &AMode::Label { .. } => {
1179                         panic!("Store to a MemLabel not implemented!");
1180                     }
1181                     &AMode::SPPreIndexed { simm9 } => {
1182                         let reg = stack_reg();
1183                         sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
1184                     }
1185                     &AMode::SPPostIndexed { simm9 } => {
1186                         let reg = stack_reg();
1187                         sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
1188                     }
1189                     // Eliminated by `mem_finalize()` above.
1190                     &AMode::SPOffset { .. }
1191                     | &AMode::FPOffset { .. }
1192                     | &AMode::IncomingArg { .. }
1193                     | &AMode::SlotOffset { .. }
1194                     | &AMode::Const { .. }
1195                     | &AMode::RegOffset { .. } => {
1196                         panic!("Should not see {mem:?} here!")
1197                     }
1198                 }
1199             }
1200 
1201             &Inst::StoreP64 {
1202                 rt,
1203                 rt2,
1204                 ref mem,
1205                 flags,
1206             } => {
1207                 let mem = mem.clone();
1208                 if let Some(trap_code) = flags.trap_code() {
1209                     // Register the offset at which the actual store instruction starts.
1210                     sink.add_trap(trap_code);
1211                 }
1212                 match &mem {
1213                     &PairAMode::SignedOffset { reg, simm7 } => {
1214                         assert_eq!(simm7.scale_ty, I64);
1215                         sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
1216                     }
1217                     &PairAMode::SPPreIndexed { simm7 } => {
1218                         assert_eq!(simm7.scale_ty, I64);
1219                         let reg = stack_reg();
1220                         sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2));
1221                     }
1222                     &PairAMode::SPPostIndexed { simm7 } => {
1223                         assert_eq!(simm7.scale_ty, I64);
1224                         let reg = stack_reg();
1225                         sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2));
1226                     }
1227                 }
1228             }
1229             &Inst::LoadP64 {
1230                 rt,
1231                 rt2,
1232                 ref mem,
1233                 flags,
1234             } => {
1235                 let rt = rt.to_reg();
1236                 let rt2 = rt2.to_reg();
1237                 let mem = mem.clone();
1238                 if let Some(trap_code) = flags.trap_code() {
1239                     // Register the offset at which the actual load instruction starts.
1240                     sink.add_trap(trap_code);
1241                 }
1242 
1243                 match &mem {
1244                     &PairAMode::SignedOffset { reg, simm7 } => {
1245                         assert_eq!(simm7.scale_ty, I64);
1246                         sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
1247                     }
1248                     &PairAMode::SPPreIndexed { simm7 } => {
1249                         assert_eq!(simm7.scale_ty, I64);
1250                         let reg = stack_reg();
1251                         sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2));
1252                     }
1253                     &PairAMode::SPPostIndexed { simm7 } => {
1254                         assert_eq!(simm7.scale_ty, I64);
1255                         let reg = stack_reg();
1256                         sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2));
1257                     }
1258                 }
1259             }
1260             &Inst::FpuLoadP64 {
1261                 rt,
1262                 rt2,
1263                 ref mem,
1264                 flags,
1265             }
1266             | &Inst::FpuLoadP128 {
1267                 rt,
1268                 rt2,
1269                 ref mem,
1270                 flags,
1271             } => {
1272                 let rt = rt.to_reg();
1273                 let rt2 = rt2.to_reg();
1274                 let mem = mem.clone();
1275 
1276                 if let Some(trap_code) = flags.trap_code() {
1277                     // Register the offset at which the actual load instruction starts.
1278                     sink.add_trap(trap_code);
1279                 }
1280 
1281                 let opc = match self {
1282                     &Inst::FpuLoadP64 { .. } => 0b01,
1283                     &Inst::FpuLoadP128 { .. } => 0b10,
1284                     _ => unreachable!(),
1285                 };
1286 
1287                 match &mem {
1288                     &PairAMode::SignedOffset { reg, simm7 } => {
1289                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1290                         sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
1291                     }
1292                     &PairAMode::SPPreIndexed { simm7 } => {
1293                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1294                         let reg = stack_reg();
1295                         sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2));
1296                     }
1297                     &PairAMode::SPPostIndexed { simm7 } => {
1298                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1299                         let reg = stack_reg();
1300                         sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2));
1301                     }
1302                 }
1303             }
1304             &Inst::FpuStoreP64 {
1305                 rt,
1306                 rt2,
1307                 ref mem,
1308                 flags,
1309             }
1310             | &Inst::FpuStoreP128 {
1311                 rt,
1312                 rt2,
1313                 ref mem,
1314                 flags,
1315             } => {
1316                 let mem = mem.clone();
1317 
1318                 if let Some(trap_code) = flags.trap_code() {
1319                     // Register the offset at which the actual store instruction starts.
1320                     sink.add_trap(trap_code);
1321                 }
1322 
1323                 let opc = match self {
1324                     &Inst::FpuStoreP64 { .. } => 0b01,
1325                     &Inst::FpuStoreP128 { .. } => 0b10,
1326                     _ => unreachable!(),
1327                 };
1328 
1329                 match &mem {
1330                     &PairAMode::SignedOffset { reg, simm7 } => {
1331                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1332                         sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
1333                     }
1334                     &PairAMode::SPPreIndexed { simm7 } => {
1335                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1336                         let reg = stack_reg();
1337                         sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2));
1338                     }
1339                     &PairAMode::SPPostIndexed { simm7 } => {
1340                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1341                         let reg = stack_reg();
1342                         sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2));
1343                     }
1344                 }
1345             }
1346             &Inst::Mov { size, rd, rm } => {
1347                 assert!(rd.to_reg().class() == rm.class());
1348                 assert!(rm.class() == RegClass::Int);
1349 
1350                 match size {
1351                     OperandSize::Size64 => {
1352                         // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1353                         // should never MOV to XZR.
1354                         assert!(rd.to_reg() != stack_reg());
1355 
1356                         if rm == stack_reg() {
1357                             // We can't use ORR here, so use an `add rd, sp, #0` instead.
1358                             let imm12 = Imm12::maybe_from_u64(0).unwrap();
1359                             sink.put4(enc_arith_rr_imm12(
1360                                 0b100_10001,
1361                                 imm12.shift_bits(),
1362                                 imm12.imm_bits(),
1363                                 rm,
1364                                 rd,
1365                             ));
1366                         } else {
1367                             // Encoded as ORR rd, rm, zero.
1368                             sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
1369                         }
1370                     }
1371                     OperandSize::Size32 => {
1372                         // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1373                         // should never MOV to XZR.
1374                         assert!(machreg_to_gpr(rd.to_reg()) != 31);
1375                         // Encoded as ORR rd, rm, zero.
1376                         sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
1377                     }
1378                 }
1379             }
1380             &Inst::MovFromPReg { rd, rm } => {
1381                 let rm: Reg = rm.into();
1382                 debug_assert!(
1383                     [
1384                         regs::fp_reg(),
1385                         regs::stack_reg(),
1386                         regs::link_reg(),
1387                         regs::pinned_reg()
1388                     ]
1389                     .contains(&rm)
1390                 );
1391                 assert!(rm.class() == RegClass::Int);
1392                 assert!(rd.to_reg().class() == rm.class());
1393                 let size = OperandSize::Size64;
1394                 Inst::Mov { size, rd, rm }.emit(sink, emit_info, state);
1395             }
1396             &Inst::MovToPReg { rd, rm } => {
1397                 let rd: Writable<Reg> = Writable::from_reg(rd.into());
1398                 debug_assert!(
1399                     [
1400                         regs::fp_reg(),
1401                         regs::stack_reg(),
1402                         regs::link_reg(),
1403                         regs::pinned_reg()
1404                     ]
1405                     .contains(&rd.to_reg())
1406                 );
1407                 assert!(rd.to_reg().class() == RegClass::Int);
1408                 assert!(rm.class() == rd.to_reg().class());
1409                 let size = OperandSize::Size64;
1410                 Inst::Mov { size, rd, rm }.emit(sink, emit_info, state);
1411             }
1412             &Inst::MovWide { op, rd, imm, size } => {
1413                 sink.put4(enc_move_wide(op, rd, imm, size));
1414             }
1415             &Inst::MovK { rd, rn, imm, size } => {
1416                 debug_assert_eq!(rn, rd.to_reg());
1417                 sink.put4(enc_movk(rd, imm, size));
1418             }
1419             &Inst::CSel { rd, rn, rm, cond } => {
1420                 sink.put4(enc_csel(rd, rn, rm, cond, 0, 0));
1421             }
1422             &Inst::CSNeg { rd, rn, rm, cond } => {
1423                 sink.put4(enc_csel(rd, rn, rm, cond, 1, 1));
1424             }
1425             &Inst::CSet { rd, cond } => {
1426                 sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 0, 1));
1427             }
1428             &Inst::CSetm { rd, cond } => {
1429                 sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 1, 0));
1430             }
1431             &Inst::CCmp {
1432                 size,
1433                 rn,
1434                 rm,
1435                 nzcv,
1436                 cond,
1437             } => {
1438                 sink.put4(enc_ccmp(size, rn, rm, nzcv, cond));
1439             }
1440             &Inst::CCmpImm {
1441                 size,
1442                 rn,
1443                 imm,
1444                 nzcv,
1445                 cond,
1446             } => {
1447                 sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
1448             }
1449             &Inst::AtomicRMW {
1450                 ty,
1451                 op,
1452                 rs,
1453                 rt,
1454                 rn,
1455                 flags,
1456             } => {
1457                 if let Some(trap_code) = flags.trap_code() {
1458                     sink.add_trap(trap_code);
1459                 }
1460 
1461                 sink.put4(enc_acq_rel(ty, op, rs, rt, rn));
1462             }
1463             &Inst::AtomicRMWLoop { ty, op, flags, .. } => {
1464                 /* Emit this:
1465                      again:
1466                       ldaxr{,b,h}  x/w27, [x25]
1467                       // maybe sign extend
1468                       op          x28, x27, x26 // op is add,sub,and,orr,eor
1469                       stlxr{,b,h}  w24, x/w28, [x25]
1470                       cbnz        x24, again
1471 
1472                    Operand conventions:
1473                       IN:  x25 (addr), x26 (2nd arg for op)
1474                       OUT: x27 (old value), x24 (trashed), x28 (trashed)
1475 
1476                    It is unfortunate that, per the ARM documentation, x28 cannot be used for
1477                    both the store-data and success-flag operands of stlxr.  This causes the
1478                    instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
1479                    instead for the success-flag.
1480                 */
1481                 // TODO: We should not hardcode registers here, a better idea would be to
1482                 // pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those
1483                 let xzr = zero_reg();
1484                 let x24 = xreg(24);
1485                 let x25 = xreg(25);
1486                 let x26 = xreg(26);
1487                 let x27 = xreg(27);
1488                 let x28 = xreg(28);
1489                 let x24wr = writable_xreg(24);
1490                 let x27wr = writable_xreg(27);
1491                 let x28wr = writable_xreg(28);
1492                 let again_label = sink.get_label();
1493 
1494                 // again:
1495                 sink.bind_label(again_label, &mut state.ctrl_plane);
1496 
1497                 if let Some(trap_code) = flags.trap_code() {
1498                     sink.add_trap(trap_code);
1499                 }
1500 
1501                 sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
1502                 let size = OperandSize::from_ty(ty);
1503                 let sign_ext = match op {
1504                     AtomicRMWLoopOp::Smin | AtomicRMWLoopOp::Smax => match ty {
1505                         I16 => Some((ExtendOp::SXTH, 16)),
1506                         I8 => Some((ExtendOp::SXTB, 8)),
1507                         _ => None,
1508                     },
1509                     _ => None,
1510                 };
1511 
1512                 // sxt{b|h} the loaded result if necessary.
1513                 if sign_ext.is_some() {
1514                     let (_, from_bits) = sign_ext.unwrap();
1515                     Inst::Extend {
1516                         rd: x27wr,
1517                         rn: x27,
1518                         signed: true,
1519                         from_bits,
1520                         to_bits: size.bits(),
1521                     }
1522                     .emit(sink, emit_info, state);
1523                 }
1524 
1525                 match op {
1526                     AtomicRMWLoopOp::Xchg => {} // do nothing
1527                     AtomicRMWLoopOp::Nand => {
1528                         // and x28, x27, x26
1529                         // mvn x28, x28
1530 
1531                         Inst::AluRRR {
1532                             alu_op: ALUOp::And,
1533                             size,
1534                             rd: x28wr,
1535                             rn: x27,
1536                             rm: x26,
1537                         }
1538                         .emit(sink, emit_info, state);
1539 
1540                         Inst::AluRRR {
1541                             alu_op: ALUOp::OrrNot,
1542                             size,
1543                             rd: x28wr,
1544                             rn: xzr,
1545                             rm: x28,
1546                         }
1547                         .emit(sink, emit_info, state);
1548                     }
1549                     AtomicRMWLoopOp::Umin
1550                     | AtomicRMWLoopOp::Umax
1551                     | AtomicRMWLoopOp::Smin
1552                     | AtomicRMWLoopOp::Smax => {
1553                         // cmp x27, x26 {?sxt}
1554                         // csel.op x28, x27, x26
1555 
1556                         let cond = match op {
1557                             AtomicRMWLoopOp::Umin => Cond::Lo,
1558                             AtomicRMWLoopOp::Umax => Cond::Hi,
1559                             AtomicRMWLoopOp::Smin => Cond::Lt,
1560                             AtomicRMWLoopOp::Smax => Cond::Gt,
1561                             _ => unreachable!(),
1562                         };
1563 
1564                         if sign_ext.is_some() {
1565                             let (extendop, _) = sign_ext.unwrap();
1566                             Inst::AluRRRExtend {
1567                                 alu_op: ALUOp::SubS,
1568                                 size,
1569                                 rd: writable_zero_reg(),
1570                                 rn: x27,
1571                                 rm: x26,
1572                                 extendop,
1573                             }
1574                             .emit(sink, emit_info, state);
1575                         } else {
1576                             Inst::AluRRR {
1577                                 alu_op: ALUOp::SubS,
1578                                 size,
1579                                 rd: writable_zero_reg(),
1580                                 rn: x27,
1581                                 rm: x26,
1582                             }
1583                             .emit(sink, emit_info, state);
1584                         }
1585 
1586                         Inst::CSel {
1587                             cond,
1588                             rd: x28wr,
1589                             rn: x27,
1590                             rm: x26,
1591                         }
1592                         .emit(sink, emit_info, state);
1593                     }
1594                     _ => {
1595                         // add/sub/and/orr/eor x28, x27, x26
1596                         let alu_op = match op {
1597                             AtomicRMWLoopOp::Add => ALUOp::Add,
1598                             AtomicRMWLoopOp::Sub => ALUOp::Sub,
1599                             AtomicRMWLoopOp::And => ALUOp::And,
1600                             AtomicRMWLoopOp::Orr => ALUOp::Orr,
1601                             AtomicRMWLoopOp::Eor => ALUOp::Eor,
1602                             AtomicRMWLoopOp::Nand
1603                             | AtomicRMWLoopOp::Umin
1604                             | AtomicRMWLoopOp::Umax
1605                             | AtomicRMWLoopOp::Smin
1606                             | AtomicRMWLoopOp::Smax
1607                             | AtomicRMWLoopOp::Xchg => unreachable!(),
1608                         };
1609 
1610                         Inst::AluRRR {
1611                             alu_op,
1612                             size,
1613                             rd: x28wr,
1614                             rn: x27,
1615                             rm: x26,
1616                         }
1617                         .emit(sink, emit_info, state);
1618                     }
1619                 }
1620 
1621                 if let Some(trap_code) = flags.trap_code() {
1622                     sink.add_trap(trap_code);
1623                 }
1624                 if op == AtomicRMWLoopOp::Xchg {
1625                     sink.put4(enc_stlxr(ty, x24wr, x26, x25)); // stlxr w24, x26, [x25]
1626                 } else {
1627                     sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1628                 }
1629 
1630                 // cbnz w24, again
1631                 // Note, we're actually testing x24, and relying on the default zero-high-half
1632                 // rule in the assignment that `stlxr` does.
1633                 let br_offset = sink.cur_offset();
1634                 sink.put4(enc_conditional_br(
1635                     BranchTarget::Label(again_label),
1636                     CondBrKind::NotZero(x24, OperandSize::Size64),
1637                 ));
1638                 sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
1639             }
1640             &Inst::AtomicCAS {
1641                 rd,
1642                 rs,
1643                 rt,
1644                 rn,
1645                 ty,
1646                 flags,
1647             } => {
1648                 debug_assert_eq!(rd.to_reg(), rs);
1649                 let size = match ty {
1650                     I8 => 0b00,
1651                     I16 => 0b01,
1652                     I32 => 0b10,
1653                     I64 => 0b11,
1654                     _ => panic!("Unsupported type: {ty}"),
1655                 };
1656 
1657                 if let Some(trap_code) = flags.trap_code() {
1658                     sink.add_trap(trap_code);
1659                 }
1660 
1661                 sink.put4(enc_cas(size, rd, rt, rn));
1662             }
1663             &Inst::AtomicCASLoop { ty, flags, .. } => {
1664                 /* Emit this:
1665                     again:
1666                      ldaxr{,b,h} x/w27, [x25]
1667                      cmp         x27, x/w26 uxt{b,h}
1668                      b.ne        out
1669                      stlxr{,b,h} w24, x/w28, [x25]
1670                      cbnz        x24, again
1671                     out:
1672 
1673                   Operand conventions:
1674                      IN:  x25 (addr), x26 (expected value), x28 (replacement value)
1675                      OUT: x27 (old value), x24 (trashed)
1676                 */
1677                 let x24 = xreg(24);
1678                 let x25 = xreg(25);
1679                 let x26 = xreg(26);
1680                 let x27 = xreg(27);
1681                 let x28 = xreg(28);
1682                 let xzrwr = writable_zero_reg();
1683                 let x24wr = writable_xreg(24);
1684                 let x27wr = writable_xreg(27);
1685                 let again_label = sink.get_label();
1686                 let out_label = sink.get_label();
1687 
1688                 // again:
1689                 sink.bind_label(again_label, &mut state.ctrl_plane);
1690 
1691                 if let Some(trap_code) = flags.trap_code() {
1692                     sink.add_trap(trap_code);
1693                 }
1694 
1695                 // ldaxr x27, [x25]
1696                 sink.put4(enc_ldaxr(ty, x27wr, x25));
1697 
1698                 // The top 32-bits are zero-extended by the ldaxr so we don't
1699                 // have to use UXTW, just the x-form of the register.
1700                 let (bit21, extend_op) = match ty {
1701                     I8 => (0b1, 0b000000),
1702                     I16 => (0b1, 0b001000),
1703                     _ => (0b0, 0b000000),
1704                 };
1705                 let bits_31_21 = 0b111_01011_000 | bit21;
1706                 // cmp x27, x26 (== subs xzr, x27, x26)
1707                 sink.put4(enc_arith_rrr(bits_31_21, extend_op, xzrwr, x27, x26));
1708 
1709                 // b.ne out
1710                 let br_out_offset = sink.cur_offset();
1711                 sink.put4(enc_conditional_br(
1712                     BranchTarget::Label(out_label),
1713                     CondBrKind::Cond(Cond::Ne),
1714                 ));
1715                 sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);
1716 
1717                 if let Some(trap_code) = flags.trap_code() {
1718                     sink.add_trap(trap_code);
1719                 }
1720 
1721                 sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1722 
1723                 // cbnz w24, again.
1724                 // Note, we're actually testing x24, and relying on the default zero-high-half
1725                 // rule in the assignment that `stlxr` does.
1726                 let br_again_offset = sink.cur_offset();
1727                 sink.put4(enc_conditional_br(
1728                     BranchTarget::Label(again_label),
1729                     CondBrKind::NotZero(x24, OperandSize::Size64),
1730                 ));
1731                 sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19);
1732 
1733                 // out:
1734                 sink.bind_label(out_label, &mut state.ctrl_plane);
1735             }
1736             &Inst::LoadAcquire {
1737                 access_ty,
1738                 rt,
1739                 rn,
1740                 flags,
1741             } => {
1742                 if let Some(trap_code) = flags.trap_code() {
1743                     sink.add_trap(trap_code);
1744                 }
1745 
1746                 sink.put4(enc_ldar(access_ty, rt, rn));
1747             }
1748             &Inst::StoreRelease {
1749                 access_ty,
1750                 rt,
1751                 rn,
1752                 flags,
1753             } => {
1754                 if let Some(trap_code) = flags.trap_code() {
1755                     sink.add_trap(trap_code);
1756                 }
1757 
1758                 sink.put4(enc_stlr(access_ty, rt, rn));
1759             }
1760             &Inst::Fence {} => {
1761                 sink.put4(enc_dmb_ish()); // dmb ish
1762             }
1763             &Inst::Csdb {} => {
1764                 sink.put4(0xd503229f);
1765             }
1766             &Inst::FpuMove32 { rd, rn } => {
1767                 sink.put4(enc_fpurr(0b000_11110_00_1_000000_10000, rd, rn));
1768             }
1769             &Inst::FpuMove64 { rd, rn } => {
1770                 sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn));
1771             }
1772             &Inst::FpuMove128 { rd, rn } => {
1773                 sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
1774             }
1775             &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1776                 let (imm5, shift, mask) = match size.lane_size() {
1777                     ScalarSize::Size32 => (0b00100, 3, 0b011),
1778                     ScalarSize::Size64 => (0b01000, 4, 0b001),
1779                     _ => unimplemented!(),
1780                 };
1781                 debug_assert_eq!(idx & mask, idx);
1782                 let imm5 = imm5 | ((idx as u32) << shift);
1783                 sink.put4(
1784                     0b010_11110000_00000_000001_00000_00000
1785                         | (imm5 << 16)
1786                         | (machreg_to_vec(rn) << 5)
1787                         | machreg_to_vec(rd.to_reg()),
1788                 );
1789             }
1790             &Inst::FpuExtend { rd, rn, size } => {
1791                 sink.put4(enc_fpurr(
1792                     0b000_11110_00_1_000000_10000 | (size.ftype() << 12),
1793                     rd,
1794                     rn,
1795                 ));
1796             }
1797             &Inst::FpuRR {
1798                 fpu_op,
1799                 size,
1800                 rd,
1801                 rn,
1802             } => {
1803                 let top22 = match fpu_op {
1804                     FPUOp1::Abs => 0b000_11110_00_1_000001_10000,
1805                     FPUOp1::Neg => 0b000_11110_00_1_000010_10000,
1806                     FPUOp1::Sqrt => 0b000_11110_00_1_000011_10000,
1807                     FPUOp1::Cvt32To64 => {
1808                         debug_assert_eq!(size, ScalarSize::Size32);
1809                         0b000_11110_00_1_000101_10000
1810                     }
1811                     FPUOp1::Cvt64To32 => {
1812                         debug_assert_eq!(size, ScalarSize::Size64);
1813                         0b000_11110_01_1_000100_10000
1814                     }
1815                 };
1816                 let top22 = top22 | size.ftype() << 12;
1817                 sink.put4(enc_fpurr(top22, rd, rn));
1818             }
1819             &Inst::FpuRRR {
1820                 fpu_op,
1821                 size,
1822                 rd,
1823                 rn,
1824                 rm,
1825             } => {
1826                 let top22 = match fpu_op {
1827                     FPUOp2::Add => 0b000_11110_00_1_00000_001010,
1828                     FPUOp2::Sub => 0b000_11110_00_1_00000_001110,
1829                     FPUOp2::Mul => 0b000_11110_00_1_00000_000010,
1830                     FPUOp2::Div => 0b000_11110_00_1_00000_000110,
1831                     FPUOp2::Max => 0b000_11110_00_1_00000_010010,
1832                     FPUOp2::Min => 0b000_11110_00_1_00000_010110,
1833                 };
1834                 let top22 = top22 | size.ftype() << 12;
1835                 sink.put4(enc_fpurrr(top22, rd, rn, rm));
1836             }
1837             &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
1838                 FPUOpRI::UShr32(imm) => {
1839                     debug_assert_eq!(32, imm.lane_size_in_bits);
1840                     sink.put4(
1841                         0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
1842                             | imm.enc() << 16
1843                             | machreg_to_vec(rn) << 5
1844                             | machreg_to_vec(rd.to_reg()),
1845                     )
1846                 }
1847                 FPUOpRI::UShr64(imm) => {
1848                     debug_assert_eq!(64, imm.lane_size_in_bits);
1849                     sink.put4(
1850                         0b01_1_111110_0000000_00_0_0_0_1_00000_00000
1851                             | imm.enc() << 16
1852                             | machreg_to_vec(rn) << 5
1853                             | machreg_to_vec(rd.to_reg()),
1854                     )
1855                 }
1856             },
1857             &Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
1858                 debug_assert_eq!(rd.to_reg(), ri);
1859                 match fpu_op {
1860                     FPUOpRIMod::Sli64(imm) => {
1861                         debug_assert_eq!(64, imm.lane_size_in_bits);
1862                         sink.put4(
1863                             0b01_1_111110_0000000_010101_00000_00000
1864                                 | imm.enc() << 16
1865                                 | machreg_to_vec(rn) << 5
1866                                 | machreg_to_vec(rd.to_reg()),
1867                         )
1868                     }
1869                     FPUOpRIMod::Sli32(imm) => {
1870                         debug_assert_eq!(32, imm.lane_size_in_bits);
1871                         sink.put4(
1872                             0b0_0_1_011110_0000000_010101_00000_00000
1873                                 | imm.enc() << 16
1874                                 | machreg_to_vec(rn) << 5
1875                                 | machreg_to_vec(rd.to_reg()),
1876                         )
1877                     }
1878                 }
1879             }
1880             &Inst::FpuRRRR {
1881                 fpu_op,
1882                 size,
1883                 rd,
1884                 rn,
1885                 rm,
1886                 ra,
1887             } => {
1888                 let top17 = match fpu_op {
1889                     FPUOp3::MAdd => 0b000_11111_00_0_00000_0,
1890                     FPUOp3::MSub => 0b000_11111_00_0_00000_1,
1891                     FPUOp3::NMAdd => 0b000_11111_00_1_00000_0,
1892                     FPUOp3::NMSub => 0b000_11111_00_1_00000_1,
1893                 };
1894                 let top17 = top17 | size.ftype() << 7;
1895                 sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
1896             }
1897             &Inst::VecMisc { op, rd, rn, size } => {
1898                 let (q, enc_size) = size.enc_size();
1899                 let (u, bits_12_16, size) = match op {
1900                     VecMisc2::Not => (0b1, 0b00101, 0b00),
1901                     VecMisc2::Neg => (0b1, 0b01011, enc_size),
1902                     VecMisc2::Abs => (0b0, 0b01011, enc_size),
1903                     VecMisc2::Fabs => {
1904                         debug_assert!(
1905                             size == VectorSize::Size32x2
1906                                 || size == VectorSize::Size32x4
1907                                 || size == VectorSize::Size64x2
1908                         );
1909                         (0b0, 0b01111, enc_size)
1910                     }
1911                     VecMisc2::Fneg => {
1912                         debug_assert!(
1913                             size == VectorSize::Size32x2
1914                                 || size == VectorSize::Size32x4
1915                                 || size == VectorSize::Size64x2
1916                         );
1917                         (0b1, 0b01111, enc_size)
1918                     }
1919                     VecMisc2::Fsqrt => {
1920                         debug_assert!(
1921                             size == VectorSize::Size32x2
1922                                 || size == VectorSize::Size32x4
1923                                 || size == VectorSize::Size64x2
1924                         );
1925                         (0b1, 0b11111, enc_size)
1926                     }
1927                     VecMisc2::Rev16 => {
1928                         debug_assert_eq!(size, VectorSize::Size8x16);
1929                         (0b0, 0b00001, enc_size)
1930                     }
1931                     VecMisc2::Rev32 => {
1932                         debug_assert!(size == VectorSize::Size8x16 || size == VectorSize::Size16x8);
1933                         (0b1, 0b00000, enc_size)
1934                     }
1935                     VecMisc2::Rev64 => {
1936                         debug_assert!(
1937                             size == VectorSize::Size8x16
1938                                 || size == VectorSize::Size16x8
1939                                 || size == VectorSize::Size32x4
1940                         );
1941                         (0b0, 0b00000, enc_size)
1942                     }
1943                     VecMisc2::Fcvtzs => {
1944                         debug_assert!(
1945                             size == VectorSize::Size32x2
1946                                 || size == VectorSize::Size32x4
1947                                 || size == VectorSize::Size64x2
1948                         );
1949                         (0b0, 0b11011, enc_size)
1950                     }
1951                     VecMisc2::Fcvtzu => {
1952                         debug_assert!(
1953                             size == VectorSize::Size32x2
1954                                 || size == VectorSize::Size32x4
1955                                 || size == VectorSize::Size64x2
1956                         );
1957                         (0b1, 0b11011, enc_size)
1958                     }
1959                     VecMisc2::Scvtf => {
1960                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1961                         (0b0, 0b11101, enc_size & 0b1)
1962                     }
1963                     VecMisc2::Ucvtf => {
1964                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1965                         (0b1, 0b11101, enc_size & 0b1)
1966                     }
1967                     VecMisc2::Frintn => {
1968                         debug_assert!(
1969                             size == VectorSize::Size32x2
1970                                 || size == VectorSize::Size32x4
1971                                 || size == VectorSize::Size64x2
1972                         );
1973                         (0b0, 0b11000, enc_size & 0b01)
1974                     }
1975                     VecMisc2::Frintz => {
1976                         debug_assert!(
1977                             size == VectorSize::Size32x2
1978                                 || size == VectorSize::Size32x4
1979                                 || size == VectorSize::Size64x2
1980                         );
1981                         (0b0, 0b11001, enc_size)
1982                     }
1983                     VecMisc2::Frintm => {
1984                         debug_assert!(
1985                             size == VectorSize::Size32x2
1986                                 || size == VectorSize::Size32x4
1987                                 || size == VectorSize::Size64x2
1988                         );
1989                         (0b0, 0b11001, enc_size & 0b01)
1990                     }
1991                     VecMisc2::Frintp => {
1992                         debug_assert!(
1993                             size == VectorSize::Size32x2
1994                                 || size == VectorSize::Size32x4
1995                                 || size == VectorSize::Size64x2
1996                         );
1997                         (0b0, 0b11000, enc_size)
1998                     }
1999                     VecMisc2::Cnt => {
2000                         debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
2001                         (0b0, 0b00101, enc_size)
2002                     }
2003                     VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),
2004                     VecMisc2::Cmge0 => (0b1, 0b01000, enc_size),
2005                     VecMisc2::Cmgt0 => (0b0, 0b01000, enc_size),
2006                     VecMisc2::Cmle0 => (0b1, 0b01001, enc_size),
2007                     VecMisc2::Cmlt0 => (0b0, 0b01010, enc_size),
2008                     VecMisc2::Fcmeq0 => {
2009                         debug_assert!(
2010                             size == VectorSize::Size32x2
2011                                 || size == VectorSize::Size32x4
2012                                 || size == VectorSize::Size64x2
2013                         );
2014                         (0b0, 0b01101, enc_size)
2015                     }
2016                     VecMisc2::Fcmge0 => {
2017                         debug_assert!(
2018                             size == VectorSize::Size32x2
2019                                 || size == VectorSize::Size32x4
2020                                 || size == VectorSize::Size64x2
2021                         );
2022                         (0b1, 0b01100, enc_size)
2023                     }
2024                     VecMisc2::Fcmgt0 => {
2025                         debug_assert!(
2026                             size == VectorSize::Size32x2
2027                                 || size == VectorSize::Size32x4
2028                                 || size == VectorSize::Size64x2
2029                         );
2030                         (0b0, 0b01100, enc_size)
2031                     }
2032                     VecMisc2::Fcmle0 => {
2033                         debug_assert!(
2034                             size == VectorSize::Size32x2
2035                                 || size == VectorSize::Size32x4
2036                                 || size == VectorSize::Size64x2
2037                         );
2038                         (0b1, 0b01101, enc_size)
2039                     }
2040                     VecMisc2::Fcmlt0 => {
2041                         debug_assert!(
2042                             size == VectorSize::Size32x2
2043                                 || size == VectorSize::Size32x4
2044                                 || size == VectorSize::Size64x2
2045                         );
2046                         (0b0, 0b01110, enc_size)
2047                     }
2048                 };
2049                 sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
2050             }
2051             &Inst::VecLanes { op, rd, rn, size } => {
2052                 let (q, size) = match size {
2053                     VectorSize::Size8x8 => (0b0, 0b00),
2054                     VectorSize::Size8x16 => (0b1, 0b00),
2055                     VectorSize::Size16x4 => (0b0, 0b01),
2056                     VectorSize::Size16x8 => (0b1, 0b01),
2057                     VectorSize::Size32x4 => (0b1, 0b10),
2058                     _ => unreachable!(),
2059                 };
2060                 let (u, opcode) = match op {
2061                     VecLanesOp::Uminv => (0b1, 0b11010),
2062                     VecLanesOp::Addv => (0b0, 0b11011),
2063                 };
2064                 sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
2065             }
2066             &Inst::VecShiftImm {
2067                 op,
2068                 rd,
2069                 rn,
2070                 size,
2071                 imm,
2072             } => {
2073                 let (is_shr, mut template) = match op {
2074                     VecShiftImmOp::Ushr => (true, 0b_001_011110_0000_000_000001_00000_00000_u32),
2075                     VecShiftImmOp::Sshr => (true, 0b_000_011110_0000_000_000001_00000_00000_u32),
2076                     VecShiftImmOp::Shl => (false, 0b_000_011110_0000_000_010101_00000_00000_u32),
2077                 };
2078                 if size.is_128bits() {
2079                     template |= 0b1 << 30;
2080                 }
2081                 let imm = imm as u32;
2082                 // Deal with the somewhat strange encoding scheme for, and limits on,
2083                 // the shift amount.
2084                 let immh_immb = match (size.lane_size(), is_shr) {
2085                     (ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
2086                         0b_1000_000_u32 | (64 - imm)
2087                     }
2088                     (ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
2089                         0b_0100_000_u32 | (32 - imm)
2090                     }
2091                     (ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
2092                         0b_0010_000_u32 | (16 - imm)
2093                     }
2094                     (ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
2095                         0b_0001_000_u32 | (8 - imm)
2096                     }
2097                     (ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
2098                     (ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
2099                     (ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
2100                     (ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
2101                     _ => panic!(
2102                         "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {op:?}, {size:?}, {imm:?}"
2103                     ),
2104                 };
2105                 let rn_enc = machreg_to_vec(rn);
2106                 let rd_enc = machreg_to_vec(rd.to_reg());
2107                 sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
2108             }
2109             &Inst::VecShiftImmMod {
2110                 op,
2111                 rd,
2112                 ri,
2113                 rn,
2114                 size,
2115                 imm,
2116             } => {
2117                 debug_assert_eq!(rd.to_reg(), ri);
2118                 let (is_shr, mut template) = match op {
2119                     VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),
2120                 };
2121                 if size.is_128bits() {
2122                     template |= 0b1 << 30;
2123                 }
2124                 let imm = imm as u32;
2125                 // Deal with the somewhat strange encoding scheme for, and limits on,
2126                 // the shift amount.
2127                 let immh_immb = match (size.lane_size(), is_shr) {
2128                     (ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
2129                         0b_1000_000_u32 | (64 - imm)
2130                     }
2131                     (ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
2132                         0b_0100_000_u32 | (32 - imm)
2133                     }
2134                     (ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
2135                         0b_0010_000_u32 | (16 - imm)
2136                     }
2137                     (ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
2138                         0b_0001_000_u32 | (8 - imm)
2139                     }
2140                     (ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
2141                     (ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
2142                     (ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
2143                     (ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
2144                     _ => panic!(
2145                         "aarch64: Inst::VecShiftImmMod: emit: invalid op/size/imm {op:?}, {size:?}, {imm:?}"
2146                     ),
2147                 };
2148                 let rn_enc = machreg_to_vec(rn);
2149                 let rd_enc = machreg_to_vec(rd.to_reg());
2150                 sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
2151             }
2152             &Inst::VecExtract { rd, rn, rm, imm4 } => {
2153                 if imm4 < 16 {
2154                     let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
2155                     let rm_enc = machreg_to_vec(rm);
2156                     let rn_enc = machreg_to_vec(rn);
2157                     let rd_enc = machreg_to_vec(rd.to_reg());
2158                     sink.put4(
2159                         template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
2160                     );
2161                 } else {
2162                     panic!("aarch64: Inst::VecExtract: emit: invalid extract index {imm4}");
2163                 }
2164             }
2165             &Inst::VecTbl { rd, rn, rm } => {
2166                 sink.put4(enc_tbl(/* is_extension = */ false, 0b00, rd, rn, rm));
2167             }
2168             &Inst::VecTblExt { rd, ri, rn, rm } => {
2169                 debug_assert_eq!(rd.to_reg(), ri);
2170                 sink.put4(enc_tbl(/* is_extension = */ true, 0b00, rd, rn, rm));
2171             }
2172             &Inst::VecTbl2 { rd, rn, rn2, rm } => {
2173                 assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
2174                 sink.put4(enc_tbl(/* is_extension = */ false, 0b01, rd, rn, rm));
2175             }
2176             &Inst::VecTbl2Ext {
2177                 rd,
2178                 ri,
2179                 rn,
2180                 rn2,
2181                 rm,
2182             } => {
2183                 debug_assert_eq!(rd.to_reg(), ri);
2184                 assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
2185                 sink.put4(enc_tbl(/* is_extension = */ true, 0b01, rd, rn, rm));
2186             }
2187             &Inst::FpuCmp { size, rn, rm } => {
2188                 sink.put4(enc_fcmp(size, rn, rm));
2189             }
2190             &Inst::FpuToInt { op, rd, rn } => {
2191                 let top16 = match op {
2192                     // FCVTZS (32/32-bit)
2193                     FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,
2194                     // FCVTZU (32/32-bit)
2195                     FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001,
2196                     // FCVTZS (32/64-bit)
2197                     FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000,
2198                     // FCVTZU (32/64-bit)
2199                     FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001,
2200                     // FCVTZS (64/32-bit)
2201                     FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000,
2202                     // FCVTZU (64/32-bit)
2203                     FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001,
2204                     // FCVTZS (64/64-bit)
2205                     FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000,
2206                     // FCVTZU (64/64-bit)
2207                     FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001,
2208                 };
2209                 sink.put4(enc_fputoint(top16, rd, rn));
2210             }
2211             &Inst::IntToFpu { op, rd, rn } => {
2212                 let top16 = match op {
2213                     // SCVTF (32/32-bit)
2214                     IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,
2215                     // UCVTF (32/32-bit)
2216                     IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011,
2217                     // SCVTF (64/32-bit)
2218                     IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010,
2219                     // UCVTF (64/32-bit)
2220                     IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011,
2221                     // SCVTF (32/64-bit)
2222                     IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010,
2223                     // UCVTF (32/64-bit)
2224                     IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011,
2225                     // SCVTF (64/64-bit)
2226                     IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010,
2227                     // UCVTF (64/64-bit)
2228                     IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011,
2229                 };
2230                 sink.put4(enc_inttofpu(top16, rd, rn));
2231             }
2232             &Inst::FpuCSel16 { rd, rn, rm, cond } => {
2233                 sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size16));
2234             }
2235             &Inst::FpuCSel32 { rd, rn, rm, cond } => {
2236                 sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
2237             }
2238             &Inst::FpuCSel64 { rd, rn, rm, cond } => {
2239                 sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
2240             }
2241             &Inst::FpuRound { op, rd, rn } => {
2242                 let top22 = match op {
2243                     FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,
2244                     FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,
2245                     FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000,
2246                     FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000,
2247                     FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000,
2248                     FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000,
2249                     FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000,
2250                     FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000,
2251                 };
2252                 sink.put4(enc_fround(top22, rd, rn));
2253             }
2254             &Inst::MovToFpu { rd, rn, size } => {
2255                 let template = match size {
2256                     ScalarSize::Size16 => 0b000_11110_11_1_00_111_000000_00000_00000,
2257                     ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
2258                     ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
2259                     _ => unreachable!(),
2260                 };
2261                 sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
2262             }
2263             &Inst::FpuMoveFPImm { rd, imm, size } => {
2264                 sink.put4(
2265                     0b000_11110_00_1_00_000_000100_00000_00000
2266                         | size.ftype() << 22
2267                         | ((imm.enc_bits() as u32) << 13)
2268                         | machreg_to_vec(rd.to_reg()),
2269                 );
2270             }
2271             &Inst::MovToVec {
2272                 rd,
2273                 ri,
2274                 rn,
2275                 idx,
2276                 size,
2277             } => {
2278                 debug_assert_eq!(rd.to_reg(), ri);
2279                 let (imm5, shift) = match size.lane_size() {
2280                     ScalarSize::Size8 => (0b00001, 1),
2281                     ScalarSize::Size16 => (0b00010, 2),
2282                     ScalarSize::Size32 => (0b00100, 3),
2283                     ScalarSize::Size64 => (0b01000, 4),
2284                     _ => unreachable!(),
2285                 };
2286                 debug_assert_eq!(idx & (0b11111 >> shift), idx);
2287                 let imm5 = imm5 | ((idx as u32) << shift);
2288                 sink.put4(
2289                     0b010_01110000_00000_0_0011_1_00000_00000
2290                         | (imm5 << 16)
2291                         | (machreg_to_gpr(rn) << 5)
2292                         | machreg_to_vec(rd.to_reg()),
2293                 );
2294             }
2295             &Inst::MovFromVec { rd, rn, idx, size } => {
2296                 let (q, imm5, shift, mask) = match size {
2297                     ScalarSize::Size8 => (0b0, 0b00001, 1, 0b1111),
2298                     ScalarSize::Size16 => (0b0, 0b00010, 2, 0b0111),
2299                     ScalarSize::Size32 => (0b0, 0b00100, 3, 0b0011),
2300                     ScalarSize::Size64 => (0b1, 0b01000, 4, 0b0001),
2301                     _ => panic!("Unexpected scalar FP operand size: {size:?}"),
2302                 };
2303                 debug_assert_eq!(idx & mask, idx);
2304                 let imm5 = imm5 | ((idx as u32) << shift);
2305                 sink.put4(
2306                     0b000_01110000_00000_0_0111_1_00000_00000
2307                         | (q << 30)
2308                         | (imm5 << 16)
2309                         | (machreg_to_vec(rn) << 5)
2310                         | machreg_to_gpr(rd.to_reg()),
2311                 );
2312             }
2313             &Inst::MovFromVecSigned {
2314                 rd,
2315                 rn,
2316                 idx,
2317                 size,
2318                 scalar_size,
2319             } => {
2320                 let (imm5, shift, half) = match size {
2321                     VectorSize::Size8x8 => (0b00001, 1, true),
2322                     VectorSize::Size8x16 => (0b00001, 1, false),
2323                     VectorSize::Size16x4 => (0b00010, 2, true),
2324                     VectorSize::Size16x8 => (0b00010, 2, false),
2325                     VectorSize::Size32x2 => {
2326                         debug_assert_ne!(scalar_size, OperandSize::Size32);
2327                         (0b00100, 3, true)
2328                     }
2329                     VectorSize::Size32x4 => {
2330                         debug_assert_ne!(scalar_size, OperandSize::Size32);
2331                         (0b00100, 3, false)
2332                     }
2333                     _ => panic!("Unexpected vector operand size"),
2334                 };
2335                 debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx);
2336                 let imm5 = imm5 | ((idx as u32) << shift);
2337                 sink.put4(
2338                     0b000_01110000_00000_0_0101_1_00000_00000
2339                         | (scalar_size.is64() as u32) << 30
2340                         | (imm5 << 16)
2341                         | (machreg_to_vec(rn) << 5)
2342                         | machreg_to_gpr(rd.to_reg()),
2343                 );
2344             }
2345             &Inst::VecDup { rd, rn, size } => {
2346                 let q = size.is_128bits() as u32;
2347                 let imm5 = match size.lane_size() {
2348                     ScalarSize::Size8 => 0b00001,
2349                     ScalarSize::Size16 => 0b00010,
2350                     ScalarSize::Size32 => 0b00100,
2351                     ScalarSize::Size64 => 0b01000,
2352                     _ => unreachable!(),
2353                 };
2354                 sink.put4(
2355                     0b0_0_0_01110000_00000_000011_00000_00000
2356                         | (q << 30)
2357                         | (imm5 << 16)
2358                         | (machreg_to_gpr(rn) << 5)
2359                         | machreg_to_vec(rd.to_reg()),
2360                 );
2361             }
2362             &Inst::VecDupFromFpu { rd, rn, size, lane } => {
2363                 let q = size.is_128bits() as u32;
2364                 let imm5 = match size.lane_size() {
2365                     ScalarSize::Size8 => {
2366                         assert!(lane < 16);
2367                         0b00001 | (u32::from(lane) << 1)
2368                     }
2369                     ScalarSize::Size16 => {
2370                         assert!(lane < 8);
2371                         0b00010 | (u32::from(lane) << 2)
2372                     }
2373                     ScalarSize::Size32 => {
2374                         assert!(lane < 4);
2375                         0b00100 | (u32::from(lane) << 3)
2376                     }
2377                     ScalarSize::Size64 => {
2378                         assert!(lane < 2);
2379                         0b01000 | (u32::from(lane) << 4)
2380                     }
2381                     _ => unimplemented!(),
2382                 };
2383                 sink.put4(
2384                     0b000_01110000_00000_000001_00000_00000
2385                         | (q << 30)
2386                         | (imm5 << 16)
2387                         | (machreg_to_vec(rn) << 5)
2388                         | machreg_to_vec(rd.to_reg()),
2389                 );
2390             }
2391             &Inst::VecDupFPImm { rd, imm, size } => {
2392                 let imm = imm.enc_bits();
2393                 let op = match size.lane_size() {
2394                     ScalarSize::Size32 => 0,
2395                     ScalarSize::Size64 => 1,
2396                     _ => unimplemented!(),
2397                 };
2398                 let q_op = op | ((size.is_128bits() as u32) << 1);
2399 
2400                 sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm));
2401             }
2402             &Inst::VecDupImm {
2403                 rd,
2404                 imm,
2405                 invert,
2406                 size,
2407             } => {
2408                 let (imm, shift, shift_ones) = imm.value();
2409                 let (op, cmode) = match size.lane_size() {
2410                     ScalarSize::Size8 => {
2411                         assert!(!invert);
2412                         assert_eq!(shift, 0);
2413 
2414                         (0, 0b1110)
2415                     }
2416                     ScalarSize::Size16 => {
2417                         let s = shift & 8;
2418 
2419                         assert!(!shift_ones);
2420                         assert_eq!(s, shift);
2421 
2422                         (invert as u32, 0b1000 | (s >> 2))
2423                     }
2424                     ScalarSize::Size32 => {
2425                         if shift_ones {
2426                             assert!(shift == 8 || shift == 16);
2427 
2428                             (invert as u32, 0b1100 | (shift >> 4))
2429                         } else {
2430                             let s = shift & 24;
2431 
2432                             assert_eq!(s, shift);
2433 
2434                             (invert as u32, 0b0000 | (s >> 2))
2435                         }
2436                     }
2437                     ScalarSize::Size64 => {
2438                         assert!(!invert);
2439                         assert_eq!(shift, 0);
2440 
2441                         (1, 0b1110)
2442                     }
2443                     _ => unreachable!(),
2444                 };
2445                 let q_op = op | ((size.is_128bits() as u32) << 1);
2446 
2447                 sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));
2448             }
2449             &Inst::VecExtend {
2450                 t,
2451                 rd,
2452                 rn,
2453                 high_half,
2454                 lane_size,
2455             } => {
2456                 let immh = match lane_size {
2457                     ScalarSize::Size16 => 0b001,
2458                     ScalarSize::Size32 => 0b010,
2459                     ScalarSize::Size64 => 0b100,
2460                     _ => panic!("Unexpected VecExtend to lane size of {lane_size:?}"),
2461                 };
2462                 let u = match t {
2463                     VecExtendOp::Sxtl => 0b0,
2464                     VecExtendOp::Uxtl => 0b1,
2465                 };
2466                 sink.put4(
2467                     0b000_011110_0000_000_101001_00000_00000
2468                         | ((high_half as u32) << 30)
2469                         | (u << 29)
2470                         | (immh << 19)
2471                         | (machreg_to_vec(rn) << 5)
2472                         | machreg_to_vec(rd.to_reg()),
2473                 );
2474             }
2475             &Inst::VecRRLong {
2476                 op,
2477                 rd,
2478                 rn,
2479                 high_half,
2480             } => {
2481                 let (u, size, bits_12_16) = match op {
2482                     VecRRLongOp::Fcvtl16 => (0b0, 0b00, 0b10111),
2483                     VecRRLongOp::Fcvtl32 => (0b0, 0b01, 0b10111),
2484                     VecRRLongOp::Shll8 => (0b1, 0b00, 0b10011),
2485                     VecRRLongOp::Shll16 => (0b1, 0b01, 0b10011),
2486                     VecRRLongOp::Shll32 => (0b1, 0b10, 0b10011),
2487                 };
2488 
2489                 sink.put4(enc_vec_rr_misc(
2490                     ((high_half as u32) << 1) | u,
2491                     size,
2492                     bits_12_16,
2493                     rd,
2494                     rn,
2495                 ));
2496             }
2497             &Inst::VecRRNarrowLow {
2498                 op,
2499                 rd,
2500                 rn,
2501                 lane_size,
2502             }
2503             | &Inst::VecRRNarrowHigh {
2504                 op,
2505                 rd,
2506                 rn,
2507                 lane_size,
2508                 ..
2509             } => {
2510                 let high_half = match self {
2511                     &Inst::VecRRNarrowLow { .. } => false,
2512                     &Inst::VecRRNarrowHigh { .. } => true,
2513                     _ => unreachable!(),
2514                 };
2515 
2516                 let size = match lane_size {
2517                     ScalarSize::Size8 => 0b00,
2518                     ScalarSize::Size16 => 0b01,
2519                     ScalarSize::Size32 => 0b10,
2520                     _ => panic!("unsupported size: {lane_size:?}"),
2521                 };
2522 
2523                 // Floats use a single bit, to encode either half or single.
2524                 let size = match op {
2525                     VecRRNarrowOp::Fcvtn => size >> 1,
2526                     _ => size,
2527                 };
2528 
2529                 let (u, bits_12_16) = match op {
2530                     VecRRNarrowOp::Xtn => (0b0, 0b10010),
2531                     VecRRNarrowOp::Sqxtn => (0b0, 0b10100),
2532                     VecRRNarrowOp::Sqxtun => (0b1, 0b10010),
2533                     VecRRNarrowOp::Uqxtn => (0b1, 0b10100),
2534                     VecRRNarrowOp::Fcvtn => (0b0, 0b10110),
2535                 };
2536 
2537                 sink.put4(enc_vec_rr_misc(
2538                     ((high_half as u32) << 1) | u,
2539                     size,
2540                     bits_12_16,
2541                     rd,
2542                     rn,
2543                 ));
2544             }
2545             &Inst::VecMovElement {
2546                 rd,
2547                 ri,
2548                 rn,
2549                 dest_idx,
2550                 src_idx,
2551                 size,
2552             } => {
2553                 debug_assert_eq!(rd.to_reg(), ri);
2554                 let (imm5, shift) = match size.lane_size() {
2555                     ScalarSize::Size8 => (0b00001, 1),
2556                     ScalarSize::Size16 => (0b00010, 2),
2557                     ScalarSize::Size32 => (0b00100, 3),
2558                     ScalarSize::Size64 => (0b01000, 4),
2559                     _ => unreachable!(),
2560                 };
2561                 let mask = 0b11111 >> shift;
2562                 debug_assert_eq!(dest_idx & mask, dest_idx);
2563                 debug_assert_eq!(src_idx & mask, src_idx);
2564                 let imm4 = (src_idx as u32) << (shift - 1);
2565                 let imm5 = imm5 | ((dest_idx as u32) << shift);
2566                 sink.put4(
2567                     0b011_01110000_00000_0_0000_1_00000_00000
2568                         | (imm5 << 16)
2569                         | (imm4 << 11)
2570                         | (machreg_to_vec(rn) << 5)
2571                         | machreg_to_vec(rd.to_reg()),
2572                 );
2573             }
2574             &Inst::VecRRPair { op, rd, rn } => {
2575                 let bits_12_16 = match op {
2576                     VecPairOp::Addp => 0b11011,
2577                 };
2578 
2579                 sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn));
2580             }
2581             &Inst::VecRRRLong {
2582                 rd,
2583                 rn,
2584                 rm,
2585                 alu_op,
2586                 high_half,
2587             } => {
2588                 let (u, size, bit14) = match alu_op {
2589                     VecRRRLongOp::Smull8 => (0b0, 0b00, 0b1),
2590                     VecRRRLongOp::Smull16 => (0b0, 0b01, 0b1),
2591                     VecRRRLongOp::Smull32 => (0b0, 0b10, 0b1),
2592                     VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1),
2593                     VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1),
2594                     VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1),
2595                 };
2596                 sink.put4(enc_vec_rrr_long(
2597                     high_half as u32,
2598                     u,
2599                     size,
2600                     bit14,
2601                     rm,
2602                     rn,
2603                     rd,
2604                 ));
2605             }
2606             &Inst::VecRRRLongMod {
2607                 rd,
2608                 ri,
2609                 rn,
2610                 rm,
2611                 alu_op,
2612                 high_half,
2613             } => {
2614                 debug_assert_eq!(rd.to_reg(), ri);
2615                 let (u, size, bit14) = match alu_op {
2616                     VecRRRLongModOp::Umlal8 => (0b1, 0b00, 0b0),
2617                     VecRRRLongModOp::Umlal16 => (0b1, 0b01, 0b0),
2618                     VecRRRLongModOp::Umlal32 => (0b1, 0b10, 0b0),
2619                 };
2620                 sink.put4(enc_vec_rrr_long(
2621                     high_half as u32,
2622                     u,
2623                     size,
2624                     bit14,
2625                     rm,
2626                     rn,
2627                     rd,
2628                 ));
2629             }
2630             &Inst::VecRRPairLong { op, rd, rn } => {
2631                 let (u, size) = match op {
2632                     VecRRPairLongOp::Saddlp8 => (0b0, 0b0),
2633                     VecRRPairLongOp::Uaddlp8 => (0b1, 0b0),
2634                     VecRRPairLongOp::Saddlp16 => (0b0, 0b1),
2635                     VecRRPairLongOp::Uaddlp16 => (0b1, 0b1),
2636                 };
2637 
2638                 sink.put4(enc_vec_rr_pair_long(u, size, rd, rn));
2639             }
2640             &Inst::VecRRR {
2641                 rd,
2642                 rn,
2643                 rm,
2644                 alu_op,
2645                 size,
2646             } => {
2647                 let (q, enc_size) = size.enc_size();
2648                 let is_float = match alu_op {
2649                     VecALUOp::Fcmeq
2650                     | VecALUOp::Fcmgt
2651                     | VecALUOp::Fcmge
2652                     | VecALUOp::Fadd
2653                     | VecALUOp::Fsub
2654                     | VecALUOp::Fdiv
2655                     | VecALUOp::Fmax
2656                     | VecALUOp::Fmin
2657                     | VecALUOp::Fmul => true,
2658                     _ => false,
2659                 };
2660 
2661                 let (top11, bit15_10) = match alu_op {
2662                     VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
2663                     VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011),
2664                     VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011),
2665                     VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011),
2666                     VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011),
2667                     VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111),
2668                     VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101),
2669                     VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101),
2670                     VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111),
2671                     VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001),
2672                     VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001),
2673                     VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001),
2674                     // The following logical instructions operate on bytes, so are not encoded differently
2675                     // for the different vector types.
2676                     VecALUOp::And => (0b000_01110_00_1, 0b000111),
2677                     VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
2678                     VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
2679                     VecALUOp::Orn => (0b000_01110_11_1, 0b000111),
2680                     VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
2681                     VecALUOp::Umaxp => {
2682                         debug_assert_ne!(size, VectorSize::Size64x2);
2683 
2684                         (0b001_01110_00_1 | enc_size << 1, 0b101001)
2685                     }
2686                     VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
2687                     VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
2688                     VecALUOp::Mul => {
2689                         debug_assert_ne!(size, VectorSize::Size64x2);
2690                         (0b000_01110_00_1 | enc_size << 1, 0b100111)
2691                     }
2692                     VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001),
2693                     VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001),
2694                     VecALUOp::Umin => {
2695                         debug_assert_ne!(size, VectorSize::Size64x2);
2696 
2697                         (0b001_01110_00_1 | enc_size << 1, 0b011011)
2698                     }
2699                     VecALUOp::Smin => {
2700                         debug_assert_ne!(size, VectorSize::Size64x2);
2701 
2702                         (0b000_01110_00_1 | enc_size << 1, 0b011011)
2703                     }
2704                     VecALUOp::Umax => {
2705                         debug_assert_ne!(size, VectorSize::Size64x2);
2706 
2707                         (0b001_01110_00_1 | enc_size << 1, 0b011001)
2708                     }
2709                     VecALUOp::Smax => {
2710                         debug_assert_ne!(size, VectorSize::Size64x2);
2711 
2712                         (0b000_01110_00_1 | enc_size << 1, 0b011001)
2713                     }
2714                     VecALUOp::Urhadd => {
2715                         debug_assert_ne!(size, VectorSize::Size64x2);
2716 
2717                         (0b001_01110_00_1 | enc_size << 1, 0b000101)
2718                     }
2719                     VecALUOp::Fadd => (0b000_01110_00_1, 0b110101),
2720                     VecALUOp::Fsub => (0b000_01110_10_1, 0b110101),
2721                     VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111),
2722                     VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
2723                     VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
2724                     VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
2725                     VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
2726                     VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
2727                     VecALUOp::Zip2 => (0b01001110_00_0 | enc_size << 1, 0b011110),
2728                     VecALUOp::Sqrdmulh => {
2729                         debug_assert!(
2730                             size.lane_size() == ScalarSize::Size16
2731                                 || size.lane_size() == ScalarSize::Size32
2732                         );
2733 
2734                         (0b001_01110_00_1 | enc_size << 1, 0b101101)
2735                     }
2736                     VecALUOp::Uzp1 => (0b01001110_00_0 | enc_size << 1, 0b000110),
2737                     VecALUOp::Uzp2 => (0b01001110_00_0 | enc_size << 1, 0b010110),
2738                     VecALUOp::Trn1 => (0b01001110_00_0 | enc_size << 1, 0b001010),
2739                     VecALUOp::Trn2 => (0b01001110_00_0 | enc_size << 1, 0b011010),
2740                 };
2741                 let top11 = if is_float {
2742                     top11 | size.enc_float_size() << 1
2743                 } else {
2744                     top11
2745                 };
2746                 sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
2747             }
2748             &Inst::VecRRRMod {
2749                 rd,
2750                 ri,
2751                 rn,
2752                 rm,
2753                 alu_op,
2754                 size,
2755             } => {
2756                 debug_assert_eq!(rd.to_reg(), ri);
2757                 let (q, _enc_size) = size.enc_size();
2758 
2759                 let (top11, bit15_10) = match alu_op {
2760                     VecALUModOp::Bsl => (0b001_01110_01_1, 0b000111),
2761                     VecALUModOp::Fmla => {
2762                         (0b000_01110_00_1 | (size.enc_float_size() << 1), 0b110011)
2763                     }
2764                     VecALUModOp::Fmls => {
2765                         (0b000_01110_10_1 | (size.enc_float_size() << 1), 0b110011)
2766                     }
2767                 };
2768                 sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
2769             }
2770             &Inst::VecFmlaElem {
2771                 rd,
2772                 ri,
2773                 rn,
2774                 rm,
2775                 alu_op,
2776                 size,
2777                 idx,
2778             } => {
2779                 debug_assert_eq!(rd.to_reg(), ri);
2780                 let idx = u32::from(idx);
2781 
2782                 let (q, _size) = size.enc_size();
2783                 let o2 = match alu_op {
2784                     VecALUModOp::Fmla => 0b0,
2785                     VecALUModOp::Fmls => 0b1,
2786                     _ => unreachable!(),
2787                 };
2788 
2789                 let (h, l) = match size {
2790                     VectorSize::Size32x4 => {
2791                         assert!(idx < 4);
2792                         (idx >> 1, idx & 1)
2793                     }
2794                     VectorSize::Size64x2 => {
2795                         assert!(idx < 2);
2796                         (idx, 0)
2797                     }
2798                     _ => unreachable!(),
2799                 };
2800 
2801                 let top11 = 0b000_011111_00 | (q << 9) | (size.enc_float_size() << 1) | l;
2802                 let bit15_10 = 0b000100 | (o2 << 4) | (h << 1);
2803                 sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
2804             }
2805             &Inst::VecLoadReplicate {
2806                 rd,
2807                 rn,
2808                 size,
2809                 flags,
2810             } => {
2811                 let (q, size) = size.enc_size();
2812 
2813                 if let Some(trap_code) = flags.trap_code() {
2814                     // Register the offset at which the actual load instruction starts.
2815                     sink.add_trap(trap_code);
2816                 }
2817 
2818                 sink.put4(enc_ldst_vec(q, size, rn, rd));
2819             }
2820             &Inst::VecCSel { rd, rn, rm, cond } => {
2821                 /* Emit this:
2822                       b.cond  else
2823                       mov     rd, rm
2824                       b       out
2825                      else:
2826                       mov     rd, rn
2827                      out:
2828 
2829                    Note, we could do better in the cases where rd == rn or rd == rm.
2830                 */
2831                 let else_label = sink.get_label();
2832                 let out_label = sink.get_label();
2833 
2834                 // b.cond else
2835                 let br_else_offset = sink.cur_offset();
2836                 sink.put4(enc_conditional_br(
2837                     BranchTarget::Label(else_label),
2838                     CondBrKind::Cond(cond),
2839                 ));
2840                 sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);
2841 
2842                 // mov rd, rm
2843                 sink.put4(enc_vecmov(/* 16b = */ true, rd, rm));
2844 
2845                 // b out
2846                 let b_out_offset = sink.cur_offset();
2847                 sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26);
2848                 sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label);
2849                 sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */));
2850 
2851                 // else:
2852                 sink.bind_label(else_label, &mut state.ctrl_plane);
2853 
2854                 // mov rd, rn
2855                 sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
2856 
2857                 // out:
2858                 sink.bind_label(out_label, &mut state.ctrl_plane);
2859             }
2860             &Inst::MovToNZCV { rn } => {
2861                 sink.put4(0xd51b4200 | machreg_to_gpr(rn));
2862             }
2863             &Inst::MovFromNZCV { rd } => {
2864                 sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));
2865             }
2866             &Inst::Extend {
2867                 rd,
2868                 rn,
2869                 signed: false,
2870                 from_bits: 1,
2871                 to_bits,
2872             } => {
2873                 assert!(to_bits <= 64);
2874                 // Reduce zero-extend-from-1-bit to:
2875                 // - and rd, rn, #1
2876                 // Note: This is special cased as UBFX may take more cycles
2877                 // than AND on smaller cores.
2878                 let imml = ImmLogic::maybe_from_u64(1, I32).unwrap();
2879                 Inst::AluRRImmLogic {
2880                     alu_op: ALUOp::And,
2881                     size: OperandSize::Size32,
2882                     rd,
2883                     rn,
2884                     imml,
2885                 }
2886                 .emit(sink, emit_info, state);
2887             }
2888             &Inst::Extend {
2889                 rd,
2890                 rn,
2891                 signed: false,
2892                 from_bits: 32,
2893                 to_bits: 64,
2894             } => {
2895                 let mov = Inst::Mov {
2896                     size: OperandSize::Size32,
2897                     rd,
2898                     rm: rn,
2899                 };
2900                 mov.emit(sink, emit_info, state);
2901             }
2902             &Inst::Extend {
2903                 rd,
2904                 rn,
2905                 signed,
2906                 from_bits,
2907                 to_bits,
2908             } => {
2909                 let (opc, size) = if signed {
2910                     (0b00, OperandSize::from_bits(to_bits))
2911                 } else {
2912                     (0b10, OperandSize::Size32)
2913                 };
2914                 sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1));
2915             }
2916             &Inst::Jump { ref dest } => {
2917                 let off = sink.cur_offset();
2918                 // Indicate that the jump uses a label, if so, so that a fixup can occur later.
2919                 if let Some(l) = dest.as_label() {
2920                     sink.use_label_at_offset(off, l, LabelUse::Branch26);
2921                     sink.add_uncond_branch(off, off + 4, l);
2922                 }
2923                 // Emit the jump itself.
2924                 sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
2925             }
2926             &Inst::Args { .. } | &Inst::Rets { .. } => {
2927                 // Nothing: this is a pseudoinstruction that serves
2928                 // only to constrain registers at a certain point.
2929             }
2930             &Inst::Ret {} => {
2931                 sink.put4(0xd65f03c0);
2932             }
2933             &Inst::AuthenticatedRet { key, is_hint } => {
2934                 let (op2, is_hint) = match key {
2935                     APIKey::AZ => (0b100, true),
2936                     APIKey::ASP => (0b101, is_hint),
2937                     APIKey::BZ => (0b110, true),
2938                     APIKey::BSP => (0b111, is_hint),
2939                 };
2940 
2941                 if is_hint {
2942                     sink.put4(key.enc_auti_hint());
2943                     Inst::Ret {}.emit(sink, emit_info, state);
2944                 } else {
2945                     sink.put4(0xd65f0bff | (op2 << 9)); // reta{key}
2946                 }
2947             }
2948             &Inst::Call { ref info } => {
2949                 let start = sink.cur_offset();
2950                 let user_stack_map = state.take_stack_map();
2951                 sink.add_reloc(Reloc::Arm64Call, &info.dest, 0);
2952                 sink.put4(enc_jump26(0b100101, 0));
2953                 if let Some(s) = user_stack_map {
2954                     let offset = sink.cur_offset();
2955                     sink.push_user_stack_map(state, offset, s);
2956                 }
2957 
2958                 if let Some(try_call) = info.try_call_info.as_ref() {
2959                     sink.add_try_call_site(
2960                         Some(state.frame_layout.sp_to_fp()),
2961                         try_call.exception_handlers(&state.frame_layout),
2962                     );
2963                 } else {
2964                     sink.add_call_site();
2965                 }
2966 
2967                 if info.callee_pop_size > 0 {
2968                     let callee_pop_size =
2969                         i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB");
2970                     for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
2971                         inst.emit(sink, emit_info, state);
2972                     }
2973                 }
2974 
2975                 if info.patchable {
2976                     sink.add_patchable_call_site(sink.cur_offset() - start);
2977                 } else {
2978                     // Load any stack-carried return values.
2979                     info.emit_retval_loads::<AArch64MachineDeps, _, _>(
2980                         state.frame_layout().stackslots_size,
2981                         |inst| inst.emit(sink, emit_info, state),
2982                         |needed_space| Some(Inst::EmitIsland { needed_space }),
2983                     );
2984                 }
2985 
2986                 // If this is a try-call, jump to the continuation
2987                 // (normal-return) block.
2988                 if let Some(try_call) = info.try_call_info.as_ref() {
2989                     let jmp = Inst::Jump {
2990                         dest: BranchTarget::Label(try_call.continuation),
2991                     };
2992                     jmp.emit(sink, emit_info, state);
2993                 }
2994 
2995                 // We produce an island above if needed, so disable
2996                 // the worst-case-size check in this case.
2997                 start_off = sink.cur_offset();
2998             }
2999             &Inst::CallInd { ref info } => {
3000                 let user_stack_map = state.take_stack_map();
3001                 sink.put4(
3002                     0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.dest) << 5),
3003                 );
3004                 if let Some(s) = user_stack_map {
3005                     let offset = sink.cur_offset();
3006                     sink.push_user_stack_map(state, offset, s);
3007                 }
3008 
3009                 if let Some(try_call) = info.try_call_info.as_ref() {
3010                     sink.add_try_call_site(
3011                         Some(state.frame_layout.sp_to_fp()),
3012                         try_call.exception_handlers(&state.frame_layout),
3013                     );
3014                 } else {
3015                     sink.add_call_site();
3016                 }
3017 
3018                 if info.callee_pop_size > 0 {
3019                     let callee_pop_size =
3020                         i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB");
3021                     for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
3022                         inst.emit(sink, emit_info, state);
3023                     }
3024                 }
3025 
3026                 // Load any stack-carried return values.
3027                 info.emit_retval_loads::<AArch64MachineDeps, _, _>(
3028                     state.frame_layout().stackslots_size,
3029                     |inst| inst.emit(sink, emit_info, state),
3030                     |needed_space| Some(Inst::EmitIsland { needed_space }),
3031                 );
3032 
3033                 // If this is a try-call, jump to the continuation
3034                 // (normal-return) block.
3035                 if let Some(try_call) = info.try_call_info.as_ref() {
3036                     let jmp = Inst::Jump {
3037                         dest: BranchTarget::Label(try_call.continuation),
3038                     };
3039                     jmp.emit(sink, emit_info, state);
3040                 }
3041 
3042                 // We produce an island above if needed, so disable
3043                 // the worst-case-size check in this case.
3044                 start_off = sink.cur_offset();
3045             }
3046             &Inst::ReturnCall { ref info } => {
3047                 emit_return_call_common_sequence(sink, emit_info, state, info);
3048 
3049                 // Note: this is not `Inst::Jump { .. }.emit(..)` because we
3050                 // have different metadata in this case: we don't have a label
3051                 // for the target, but rather a function relocation.
3052                 sink.add_reloc(Reloc::Arm64Call, &info.dest, 0);
3053                 sink.put4(enc_jump26(0b000101, 0));
3054                 sink.add_call_site();
3055 
3056                 // `emit_return_call_common_sequence` emits an island if
3057                 // necessary, so we can safely disable the worst-case-size check
3058                 // in this case.
3059                 start_off = sink.cur_offset();
3060             }
3061             &Inst::ReturnCallInd { ref info } => {
3062                 emit_return_call_common_sequence(sink, emit_info, state, info);
3063 
3064                 Inst::IndirectBr {
3065                     rn: info.dest,
3066                     targets: vec![],
3067                 }
3068                 .emit(sink, emit_info, state);
3069                 sink.add_call_site();
3070 
3071                 // `emit_return_call_common_sequence` emits an island if
3072                 // necessary, so we can safely disable the worst-case-size check
3073                 // in this case.
3074                 start_off = sink.cur_offset();
3075             }
3076             &Inst::CondBr {
3077                 taken,
3078                 not_taken,
3079                 kind,
3080             } => {
3081                 // Conditional part first.
3082                 let cond_off = sink.cur_offset();
3083                 if let Some(l) = taken.as_label() {
3084                     sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
3085                     let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
3086                     sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
3087                 }
3088                 sink.put4(enc_conditional_br(taken, kind));
3089 
3090                 // Unconditional part next.
3091                 let uncond_off = sink.cur_offset();
3092                 if let Some(l) = not_taken.as_label() {
3093                     sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
3094                     sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
3095                 }
3096                 sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
3097             }
3098             &Inst::TestBitAndBranch {
3099                 taken,
3100                 not_taken,
3101                 kind,
3102                 rn,
3103                 bit,
3104             } => {
3105                 // Emit the conditional branch first
3106                 let cond_off = sink.cur_offset();
3107                 if let Some(l) = taken.as_label() {
3108                     sink.use_label_at_offset(cond_off, l, LabelUse::Branch14);
3109                     let inverted =
3110                         enc_test_bit_and_branch(kind.complement(), taken, rn, bit).to_le_bytes();
3111                     sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
3112                 }
3113                 sink.put4(enc_test_bit_and_branch(kind, taken, rn, bit));
3114 
3115                 // Unconditional part next.
3116                 let uncond_off = sink.cur_offset();
3117                 if let Some(l) = not_taken.as_label() {
3118                     sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
3119                     sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
3120                 }
3121                 sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
3122             }
3123             &Inst::TrapIf { kind, trap_code } => {
3124                 let label = sink.defer_trap(trap_code);
3125                 // condbr KIND, LABEL
3126                 let off = sink.cur_offset();
3127                 sink.put4(enc_conditional_br(BranchTarget::Label(label), kind));
3128                 sink.use_label_at_offset(off, label, LabelUse::Branch19);
3129             }
3130             &Inst::IndirectBr { rn, .. } => {
3131                 sink.put4(enc_br(rn));
3132             }
3133             &Inst::Nop0 => {}
3134             &Inst::Nop4 => {
3135                 sink.put4(0xd503201f);
3136             }
3137             &Inst::Brk => {
3138                 sink.put4(0xd43e0000);
3139             }
3140             &Inst::Udf { trap_code } => {
3141                 sink.add_trap(trap_code);
3142                 sink.put_data(Inst::TRAP_OPCODE);
3143             }
3144             &Inst::Adr { rd, off } => {
3145                 assert!(off > -(1 << 20));
3146                 assert!(off < (1 << 20));
3147                 sink.put4(enc_adr(off, rd));
3148             }
3149             &Inst::Adrp { rd, off } => {
3150                 assert!(off > -(1 << 20));
3151                 assert!(off < (1 << 20));
3152                 sink.put4(enc_adrp(off, rd));
3153             }
3154             &Inst::Word4 { data } => {
3155                 sink.put4(data);
3156             }
3157             &Inst::Word8 { data } => {
3158                 sink.put8(data);
3159             }
3160             &Inst::JTSequence {
3161                 ridx,
3162                 rtmp1,
3163                 rtmp2,
3164                 default,
3165                 ref targets,
3166                 ..
3167             } => {
3168                 // This sequence is *one* instruction in the vcode, and is expanded only here at
3169                 // emission time, because we cannot allow the regalloc to insert spills/reloads in
3170                 // the middle; we depend on hardcoded PC-rel addressing below.
3171 
3172                 // Branch to default when condition code from prior comparison indicates.
3173                 let br =
3174                     enc_conditional_br(BranchTarget::Label(default), CondBrKind::Cond(Cond::Hs));
3175 
3176                 // No need to inform the sink's branch folding logic about this branch, because it
3177                 // will not be merged with any other branch, flipped, or elided (it is not preceded
3178                 // or succeeded by any other branch). Just emit it with the label use.
3179                 let default_br_offset = sink.cur_offset();
3180                 sink.use_label_at_offset(default_br_offset, default, LabelUse::Branch19);
3181                 sink.put4(br);
3182 
3183                 // Overwrite the index with a zero when the above
3184                 // branch misspeculates (Spectre mitigation). Save the
3185                 // resulting index in rtmp2.
3186                 let inst = Inst::CSel {
3187                     rd: rtmp2,
3188                     cond: Cond::Hs,
3189                     rn: zero_reg(),
3190                     rm: ridx,
3191                 };
3192                 inst.emit(sink, emit_info, state);
3193                 // Prevent any data value speculation if spectre mitigations are
3194                 // enabled.
3195                 if emit_info.flags.enable_table_access_spectre_mitigation()
3196                     && emit_info.isa_flags.use_csdb()
3197                 {
3198                     Inst::Csdb.emit(sink, emit_info, state);
3199                 }
3200 
3201                 // Load address of jump table
3202                 let inst = Inst::Adr { rd: rtmp1, off: 16 };
3203                 inst.emit(sink, emit_info, state);
3204                 // Load value out of jump table
3205                 let inst = Inst::SLoad32 {
3206                     rd: rtmp2,
3207                     mem: AMode::reg_plus_reg_scaled_extended(
3208                         rtmp1.to_reg(),
3209                         rtmp2.to_reg(),
3210                         ExtendOp::UXTW,
3211                     ),
3212                     flags: MemFlags::trusted(),
3213                 };
3214                 inst.emit(sink, emit_info, state);
3215                 // Add base of jump table to jump-table-sourced block offset
3216                 let inst = Inst::AluRRR {
3217                     alu_op: ALUOp::Add,
3218                     size: OperandSize::Size64,
3219                     rd: rtmp1,
3220                     rn: rtmp1.to_reg(),
3221                     rm: rtmp2.to_reg(),
3222                 };
3223                 inst.emit(sink, emit_info, state);
3224                 // Branch to computed address. (`targets` here is only used for successor queries
3225                 // and is not needed for emission.)
3226                 let inst = Inst::IndirectBr {
3227                     rn: rtmp1.to_reg(),
3228                     targets: vec![],
3229                 };
3230                 inst.emit(sink, emit_info, state);
3231                 // Emit jump table (table of 32-bit offsets).
3232                 let jt_off = sink.cur_offset();
3233                 for &target in targets.iter() {
3234                     let word_off = sink.cur_offset();
3235                     // off_into_table is an addend here embedded in the label to be later patched
3236                     // at the end of codegen. The offset is initially relative to this jump table
3237                     // entry; with the extra addend, it'll be relative to the jump table's start,
3238                     // after patching.
3239                     let off_into_table = word_off - jt_off;
3240                     sink.use_label_at_offset(word_off, target, LabelUse::PCRel32);
3241                     sink.put4(off_into_table);
3242                 }
3243 
3244                 // Lowering produces an EmitIsland before using a JTSequence, so we can safely
3245                 // disable the worst-case-size check in this case.
3246                 start_off = sink.cur_offset();
3247             }
3248             &Inst::LoadExtNameGot { rd, ref name } => {
3249                 // See this CE Example for the variations of this with and without BTI & PAUTH
3250                 // https://godbolt.org/z/ncqjbbvvn
3251                 //
3252                 // Emit the following code:
3253                 //   adrp    rd, :got:X
3254                 //   ldr     rd, [rd, :got_lo12:X]
3255 
3256                 // adrp rd, symbol
3257                 sink.add_reloc(Reloc::Aarch64AdrGotPage21, &**name, 0);
3258                 let inst = Inst::Adrp { rd, off: 0 };
3259                 inst.emit(sink, emit_info, state);
3260 
3261                 // ldr rd, [rd, :got_lo12:X]
3262                 sink.add_reloc(Reloc::Aarch64Ld64GotLo12Nc, &**name, 0);
3263                 let inst = Inst::ULoad64 {
3264                     rd,
3265                     mem: AMode::reg(rd.to_reg()),
3266                     flags: MemFlags::trusted(),
3267                 };
3268                 inst.emit(sink, emit_info, state);
3269             }
3270             &Inst::LoadExtNameNear {
3271                 rd,
3272                 ref name,
3273                 offset,
3274             } => {
3275                 // Emit the following code:
3276                 //   adrp    rd, X
3277                 //   add     rd, rd, :lo12:X
3278                 //
3279                 // See https://godbolt.org/z/855KEvM5r for an example.
3280 
3281                 // adrp rd, symbol
3282                 sink.add_reloc(Reloc::Aarch64AdrPrelPgHi21, &**name, offset);
3283                 let inst = Inst::Adrp { rd, off: 0 };
3284                 inst.emit(sink, emit_info, state);
3285 
3286                 // add rd, rd, :lo12:X
3287                 sink.add_reloc(Reloc::Aarch64AddAbsLo12Nc, &**name, offset);
3288                 let inst = Inst::AluRRImm12 {
3289                     alu_op: ALUOp::Add,
3290                     size: OperandSize::Size64,
3291                     rd,
3292                     rn: rd.to_reg(),
3293                     imm12: Imm12::ZERO,
3294                 };
3295                 inst.emit(sink, emit_info, state);
3296             }
3297             &Inst::LoadExtNameFar {
3298                 rd,
3299                 ref name,
3300                 offset,
3301             } => {
3302                 // With absolute offsets we set up a load from a preallocated space, and then jump
3303                 // over it.
3304                 //
3305                 // Emit the following code:
3306                 //   ldr     rd, #8
3307                 //   b       #0x10
3308                 //   <8 byte space>
3309 
3310                 let inst = Inst::ULoad64 {
3311                     rd,
3312                     mem: AMode::Label {
3313                         label: MemLabel::PCRel(8),
3314                     },
3315                     flags: MemFlags::trusted(),
3316                 };
3317                 inst.emit(sink, emit_info, state);
3318                 let inst = Inst::Jump {
3319                     dest: BranchTarget::ResolvedOffset(12),
3320                 };
3321                 inst.emit(sink, emit_info, state);
3322                 sink.add_reloc(Reloc::Abs8, &**name, offset);
3323                 sink.put8(0);
3324             }
3325             &Inst::LoadAddr { rd, ref mem } => {
3326                 let mem = mem.clone();
3327                 let (mem_insts, mem) = mem_finalize(Some(sink), &mem, I8, state);
3328                 for inst in mem_insts.into_iter() {
3329                     inst.emit(sink, emit_info, state);
3330                 }
3331 
3332                 let (reg, index_reg, offset) = match mem {
3333                     AMode::RegExtended { rn, rm, extendop } => {
3334                         let r = rn;
3335                         (r, Some((rm, extendop)), 0)
3336                     }
3337                     AMode::Unscaled { rn, simm9 } => {
3338                         let r = rn;
3339                         (r, None, simm9.value())
3340                     }
3341                     AMode::UnsignedOffset { rn, uimm12 } => {
3342                         let r = rn;
3343                         (r, None, uimm12.value() as i32)
3344                     }
3345                     _ => panic!("Unsupported case for LoadAddr: {mem:?}"),
3346                 };
3347                 let abs_offset = if offset < 0 {
3348                     -offset as u64
3349                 } else {
3350                     offset as u64
3351                 };
3352                 let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add };
3353 
3354                 if let Some((idx, extendop)) = index_reg {
3355                     let add = Inst::AluRRRExtend {
3356                         alu_op: ALUOp::Add,
3357                         size: OperandSize::Size64,
3358                         rd,
3359                         rn: reg,
3360                         rm: idx,
3361                         extendop,
3362                     };
3363 
3364                     add.emit(sink, emit_info, state);
3365                 } else if offset == 0 {
3366                     if reg != rd.to_reg() {
3367                         let mov = Inst::Mov {
3368                             size: OperandSize::Size64,
3369                             rd,
3370                             rm: reg,
3371                         };
3372 
3373                         mov.emit(sink, emit_info, state);
3374                     }
3375                 } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
3376                     let add = Inst::AluRRImm12 {
3377                         alu_op,
3378                         size: OperandSize::Size64,
3379                         rd,
3380                         rn: reg,
3381                         imm12,
3382                     };
3383                     add.emit(sink, emit_info, state);
3384                 } else {
3385                     // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction
3386                     // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
3387                     // that no other instructions will be inserted here (we're emitting directly),
3388                     // and a live range of `tmp2` should not span this instruction, so this use
3389                     // should otherwise be correct.
3390                     debug_assert!(rd.to_reg() != tmp2_reg());
3391                     debug_assert!(reg != tmp2_reg());
3392                     let tmp = writable_tmp2_reg();
3393                     for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
3394                         insn.emit(sink, emit_info, state);
3395                     }
3396                     let add = Inst::AluRRR {
3397                         alu_op,
3398                         size: OperandSize::Size64,
3399                         rd,
3400                         rn: reg,
3401                         rm: tmp.to_reg(),
3402                     };
3403                     add.emit(sink, emit_info, state);
3404                 }
3405             }
3406             &Inst::Paci { key } => {
3407                 let (crm, op2) = match key {
3408                     APIKey::AZ => (0b0011, 0b000),
3409                     APIKey::ASP => (0b0011, 0b001),
3410                     APIKey::BZ => (0b0011, 0b010),
3411                     APIKey::BSP => (0b0011, 0b011),
3412                 };
3413 
3414                 sink.put4(0xd503211f | (crm << 8) | (op2 << 5));
3415             }
3416             &Inst::Xpaclri => sink.put4(0xd50320ff),
3417             &Inst::Bti { targets } => {
3418                 let targets = match targets {
3419                     BranchTargetType::None => 0b00,
3420                     BranchTargetType::C => 0b01,
3421                     BranchTargetType::J => 0b10,
3422                     BranchTargetType::JC => 0b11,
3423                 };
3424 
3425                 sink.put4(0xd503241f | targets << 6);
3426             }
3427             &Inst::EmitIsland { needed_space } => {
3428                 if sink.island_needed(needed_space + 4) {
3429                     let jump_around_label = sink.get_label();
3430                     let jmp = Inst::Jump {
3431                         dest: BranchTarget::Label(jump_around_label),
3432                     };
3433                     jmp.emit(sink, emit_info, state);
3434                     sink.emit_island(needed_space + 4, &mut state.ctrl_plane);
3435                     sink.bind_label(jump_around_label, &mut state.ctrl_plane);
3436                 }
3437             }
3438 
3439             &Inst::ElfTlsGetAddr {
3440                 ref symbol,
3441                 rd,
3442                 tmp,
3443             } => {
3444                 assert_eq!(xreg(0), rd.to_reg());
3445 
3446                 // See the original proposal for TLSDESC.
3447                 // http://www.fsfla.org/~lxoliva/writeups/TLS/paper-lk2006.pdf
3448                 //
3449                 // Implement the TLSDESC instruction sequence:
3450                 //   adrp x0, :tlsdesc:tlsvar
3451                 //   ldr  tmp, [x0, :tlsdesc_lo12:tlsvar]
3452                 //   add  x0, x0, :tlsdesc_lo12:tlsvar
3453                 //   blr  tmp
3454                 //   mrs  tmp, tpidr_el0
3455                 //   add  x0, x0, tmp
3456                 //
3457                 // This is the instruction sequence that GCC emits for ELF GD TLS Relocations in aarch64
3458                 // See: https://gcc.godbolt.org/z/e4j7MdErh
3459 
3460                 // adrp x0, :tlsdesc:tlsvar
3461                 sink.add_reloc(Reloc::Aarch64TlsDescAdrPage21, &**symbol, 0);
3462                 Inst::Adrp { rd, off: 0 }.emit(sink, emit_info, state);
3463 
3464                 // ldr  tmp, [x0, :tlsdesc_lo12:tlsvar]
3465                 sink.add_reloc(Reloc::Aarch64TlsDescLd64Lo12, &**symbol, 0);
3466                 Inst::ULoad64 {
3467                     rd: tmp,
3468                     mem: AMode::reg(rd.to_reg()),
3469                     flags: MemFlags::trusted(),
3470                 }
3471                 .emit(sink, emit_info, state);
3472 
3473                 // add x0, x0, :tlsdesc_lo12:tlsvar
3474                 sink.add_reloc(Reloc::Aarch64TlsDescAddLo12, &**symbol, 0);
3475                 Inst::AluRRImm12 {
3476                     alu_op: ALUOp::Add,
3477                     size: OperandSize::Size64,
3478                     rd,
3479                     rn: rd.to_reg(),
3480                     imm12: Imm12::maybe_from_u64(0).unwrap(),
3481                 }
3482                 .emit(sink, emit_info, state);
3483 
3484                 // blr tmp
3485                 sink.add_reloc(Reloc::Aarch64TlsDescCall, &**symbol, 0);
3486                 Inst::CallInd {
3487                     info: crate::isa::Box::new(CallInfo::empty(tmp.to_reg(), CallConv::SystemV)),
3488                 }
3489                 .emit(sink, emit_info, state);
3490 
3491                 // mrs tmp, tpidr_el0
3492                 sink.put4(0xd53bd040 | machreg_to_gpr(tmp.to_reg()));
3493 
3494                 // add x0, x0, tmp
3495                 Inst::AluRRR {
3496                     alu_op: ALUOp::Add,
3497                     size: OperandSize::Size64,
3498                     rd,
3499                     rn: rd.to_reg(),
3500                     rm: tmp.to_reg(),
3501                 }
3502                 .emit(sink, emit_info, state);
3503             }
3504 
3505             &Inst::MachOTlsGetAddr { ref symbol, rd } => {
3506                 // Each thread local variable gets a descriptor, where the first xword of the descriptor is a pointer
3507                 // to a function that takes the descriptor address in x0, and after the function returns x0
3508                 // contains the address for the thread local variable
3509                 //
3510                 // what we want to emit is basically:
3511                 //
3512                 // adrp x0, <label>@TLVPPAGE  ; Load the address of the page of the thread local variable pointer (TLVP)
3513                 // ldr x0, [x0, <label>@TLVPPAGEOFF] ; Load the descriptor's address into x0
3514                 // ldr x1, [x0] ; Load the function pointer (the first part of the descriptor)
3515                 // blr x1 ; Call the function pointer with the descriptor address in x0
3516                 // ; x0 now contains the TLV address
3517 
3518                 assert_eq!(xreg(0), rd.to_reg());
3519                 let rtmp = writable_xreg(1);
3520 
3521                 // adrp x0, <label>@TLVPPAGE
3522                 sink.add_reloc(Reloc::MachOAarch64TlsAdrPage21, symbol, 0);
3523                 sink.put4(0x90000000);
3524 
3525                 // ldr x0, [x0, <label>@TLVPPAGEOFF]
3526                 sink.add_reloc(Reloc::MachOAarch64TlsAdrPageOff12, symbol, 0);
3527                 sink.put4(0xf9400000);
3528 
3529                 // load [x0] into temp register
3530                 Inst::ULoad64 {
3531                     rd: rtmp,
3532                     mem: AMode::reg(rd.to_reg()),
3533                     flags: MemFlags::trusted(),
3534                 }
3535                 .emit(sink, emit_info, state);
3536 
3537                 // call function pointer in temp register
3538                 Inst::CallInd {
3539                     info: crate::isa::Box::new(CallInfo::empty(
3540                         rtmp.to_reg(),
3541                         CallConv::AppleAarch64,
3542                     )),
3543                 }
3544                 .emit(sink, emit_info, state);
3545             }
3546 
3547             &Inst::Unwind { ref inst } => {
3548                 sink.add_unwind(inst.clone());
3549             }
3550 
3551             &Inst::DummyUse { .. } => {}
3552 
3553             &Inst::LabelAddress { dst, label } => {
3554                 // We emit an ADR only, which is +/- 2MiB range. This
3555                 // should be sufficient for the typical use-case of
3556                 // this instruction, which is insmall trampolines to
3557                 // get exception-handler addresses.
3558                 let inst = Inst::Adr { rd: dst, off: 0 };
3559                 let offset = sink.cur_offset();
3560                 inst.emit(sink, emit_info, state);
3561                 sink.use_label_at_offset(offset, label, LabelUse::Adr21);
3562             }
3563 
3564             &Inst::SequencePoint { .. } => {
3565                 // Nothing.
3566             }
3567 
3568             &Inst::StackProbeLoop { start, end, step } => {
3569                 assert!(emit_info.flags.enable_probestack());
3570 
3571                 // The loop generated here uses `start` as a counter register to
3572                 // count backwards until negating it exceeds `end`. In other
3573                 // words `start` is an offset from `sp` we're testing where
3574                 // `end` is the max size we need to test. The loop looks like:
3575                 //
3576                 //      loop_start:
3577                 //          sub start, start, #step
3578                 //          stur xzr, [sp, start]
3579                 //          cmn start, end
3580                 //          br.gt loop_start
3581                 //      loop_end:
3582                 //
3583                 // Note that this loop cannot use the spilltmp and tmp2
3584                 // registers as those are currently used as the input to this
3585                 // loop when generating the instruction. This means that some
3586                 // more flavorful address modes and lowerings need to be
3587                 // avoided.
3588                 //
3589                 // Perhaps someone more clever than I can figure out how to use
3590                 // `subs` or the like and skip the `cmn`, but I can't figure it
3591                 // out at this time.
3592 
3593                 let loop_start = sink.get_label();
3594                 sink.bind_label(loop_start, &mut state.ctrl_plane);
3595 
3596                 Inst::AluRRImm12 {
3597                     alu_op: ALUOp::Sub,
3598                     size: OperandSize::Size64,
3599                     rd: start,
3600                     rn: start.to_reg(),
3601                     imm12: step,
3602                 }
3603                 .emit(sink, emit_info, state);
3604                 Inst::Store32 {
3605                     rd: regs::zero_reg(),
3606                     mem: AMode::RegReg {
3607                         rn: regs::stack_reg(),
3608                         rm: start.to_reg(),
3609                     },
3610                     flags: MemFlags::trusted(),
3611                 }
3612                 .emit(sink, emit_info, state);
3613                 Inst::AluRRR {
3614                     alu_op: ALUOp::AddS,
3615                     size: OperandSize::Size64,
3616                     rd: regs::writable_zero_reg(),
3617                     rn: start.to_reg(),
3618                     rm: end,
3619                 }
3620                 .emit(sink, emit_info, state);
3621 
3622                 let loop_end = sink.get_label();
3623                 Inst::CondBr {
3624                     taken: BranchTarget::Label(loop_start),
3625                     not_taken: BranchTarget::Label(loop_end),
3626                     kind: CondBrKind::Cond(Cond::Gt),
3627                 }
3628                 .emit(sink, emit_info, state);
3629                 sink.bind_label(loop_end, &mut state.ctrl_plane);
3630             }
3631         }
3632 
3633         let end_off = sink.cur_offset();
3634         debug_assert!(
3635             (end_off - start_off) <= Inst::worst_case_size()
3636                 || matches!(self, Inst::EmitIsland { .. }),
3637             "Worst case size exceed for {:?}: {}",
3638             self,
3639             end_off - start_off
3640         );
3641 
3642         state.clear_post_insn();
3643     }
3644 
pretty_print_inst(&self, state: &mut Self::State) -> String3645     fn pretty_print_inst(&self, state: &mut Self::State) -> String {
3646         self.print_with_state(state)
3647     }
3648 }
3649 
emit_return_call_common_sequence<T>( sink: &mut MachBuffer<Inst>, emit_info: &EmitInfo, state: &mut EmitState, info: &ReturnCallInfo<T>, )3650 fn emit_return_call_common_sequence<T>(
3651     sink: &mut MachBuffer<Inst>,
3652     emit_info: &EmitInfo,
3653     state: &mut EmitState,
3654     info: &ReturnCallInfo<T>,
3655 ) {
3656     for inst in AArch64MachineDeps::gen_clobber_restore(
3657         CallConv::Tail,
3658         &emit_info.flags,
3659         state.frame_layout(),
3660     ) {
3661         inst.emit(sink, emit_info, state);
3662     }
3663 
3664     let setup_area_size = state.frame_layout().setup_area_size;
3665     if setup_area_size > 0 {
3666         // N.B.: sp is already adjusted to the appropriate place by the
3667         // clobber-restore code (which also frees the fixed frame). Hence, there
3668         // is no need for the usual `mov sp, fp` here.
3669 
3670         // `ldp fp, lr, [sp], #16`
3671         Inst::LoadP64 {
3672             rt: writable_fp_reg(),
3673             rt2: writable_link_reg(),
3674             mem: PairAMode::SPPostIndexed {
3675                 // TODO: we could fold the increment for incoming_args_diff here, as long as that
3676                 // value is less than 502*8, by adding it to `setup_area_size`.
3677                 // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDP--Load-Pair-of-Registers-
3678                 simm7: SImm7Scaled::maybe_from_i64(i64::from(setup_area_size), types::I64).unwrap(),
3679             },
3680             flags: MemFlags::trusted(),
3681         }
3682         .emit(sink, emit_info, state);
3683     }
3684 
3685     // Adjust SP to account for the possible over-allocation in the prologue.
3686     let incoming_args_diff = state.frame_layout().tail_args_size - info.new_stack_arg_size;
3687     if incoming_args_diff > 0 {
3688         for inst in
3689             AArch64MachineDeps::gen_sp_reg_adjust(i32::try_from(incoming_args_diff).unwrap())
3690         {
3691             inst.emit(sink, emit_info, state);
3692         }
3693     }
3694 
3695     if let Some(key) = info.key {
3696         sink.put4(key.enc_auti_hint());
3697     }
3698 }
3699