1 //! This implements the VCode container: a CFG of Insts that have been lowered. 2 //! 3 //! VCode is virtual-register code. An instruction in VCode is almost a machine 4 //! instruction; however, its register slots can refer to virtual registers in 5 //! addition to real machine registers. 6 //! 7 //! VCode is structured with traditional basic blocks, and 8 //! each block must be terminated by an unconditional branch (one target), a 9 //! conditional branch (two targets), or a return (no targets). Note that this 10 //! slightly differs from the machine code of most ISAs: in most ISAs, a 11 //! conditional branch has one target (and the not-taken case falls through). 12 //! However, we expect that machine backends will elide branches to the following 13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond / 14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to 15 //! play with layout prior to final binary emission, as well, if we want. 16 //! 17 //! See the main module comment in `mod.rs` for more details on the VCode-based 18 //! backend pipeline. 19 20 use crate::CodegenError; 21 use crate::FxHashMap; 22 use crate::ir::{self, Constant, ConstantData, ValueLabel, types}; 23 use crate::ranges::Ranges; 24 use crate::timing; 25 use crate::trace; 26 use crate::{LabelValueLoc, ValueLocRange}; 27 use crate::{machinst::*, trace_log_enabled}; 28 use regalloc2::{ 29 Edit, Function as RegallocFunction, InstOrEdit, InstPosition, InstRange, Operand, 30 OperandConstraint, OperandKind, PRegSet, ProgPoint, RegClass, 31 }; 32 33 use crate::HashMap; 34 use crate::hash_map::Entry; 35 use core::cmp::Ordering; 36 use core::fmt::{self, Write}; 37 use core::mem::take; 38 use core::ops::Range; 39 use cranelift_entity::{Keys, entity_impl}; 40 41 /// Index referring to an instruction in VCode. 42 pub type InsnIndex = regalloc2::Inst; 43 44 /// Extension trait for `InsnIndex` to allow conversion to a 45 /// `BackwardsInsnIndex`. 46 trait ToBackwardsInsnIndex { to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex47 fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex; 48 } 49 50 impl ToBackwardsInsnIndex for InsnIndex { to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex51 fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex { 52 BackwardsInsnIndex::new(num_insts - self.index() - 1) 53 } 54 } 55 56 /// An index referring to an instruction in the VCode when it is backwards, 57 /// during VCode construction. 58 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] 59 #[cfg_attr( 60 feature = "enable-serde", 61 derive(::serde::Serialize, ::serde::Deserialize) 62 )] 63 pub struct BackwardsInsnIndex(InsnIndex); 64 65 impl BackwardsInsnIndex { new(i: usize) -> Self66 pub fn new(i: usize) -> Self { 67 BackwardsInsnIndex(InsnIndex::new(i)) 68 } 69 } 70 71 /// Index referring to a basic block in VCode. 72 pub type BlockIndex = regalloc2::Block; 73 74 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be 75 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`. 76 pub trait VCodeInst: MachInst + MachInstEmit {} 77 impl<I: MachInst + MachInstEmit> VCodeInst for I {} 78 79 /// A function in "VCode" (virtualized-register code) form, after 80 /// lowering. This is essentially a standard CFG of basic blocks, 81 /// where each basic block consists of lowered instructions produced 82 /// by the machine-specific backend. 83 /// 84 /// Note that the VCode is immutable once produced, and is not 85 /// modified by register allocation in particular. Rather, register 86 /// allocation on the `VCode` produces a separate `regalloc2::Output` 87 /// struct, and this can be passed to `emit`. `emit` in turn does not 88 /// modify the vcode, but produces an `EmitResult`, which contains the 89 /// machine code itself, and the associated disassembly and/or 90 /// metadata as requested. 91 pub struct VCode<I: VCodeInst> { 92 /// VReg IR-level types. 93 vreg_types: Vec<Type>, 94 95 /// Lowered machine instructions in order corresponding to the original IR. 96 insts: Vec<I>, 97 98 /// A map from backwards instruction index to the user stack map for that 99 /// instruction. 100 /// 101 /// This is a sparse side table that only has entries for instructions that 102 /// are safepoints, and only for a subset of those that have an associated 103 /// user stack map. 104 user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>, 105 106 /// A map from backwards instruction index to the debug tags for 107 /// that instruction. Each entry indexes a range in the 108 /// `debug_tag_pool`. 109 debug_tags: FxHashMap<BackwardsInsnIndex, Range<u32>>, 110 111 /// Pooled storage for sequences of debug tags; indexed by entries 112 /// in `debug_tags`. 113 debug_tag_pool: Vec<ir::DebugTag>, 114 115 /// Operands: pre-regalloc references to virtual registers with 116 /// constraints, in one flattened array. This allows the regalloc 117 /// to efficiently access all operands without requiring expensive 118 /// matches or method invocations on insts. 119 operands: Vec<Operand>, 120 121 /// Operand index ranges: for each instruction in `insts`, there 122 /// is a tuple here providing the range in `operands` for that 123 /// instruction's operands. 124 operand_ranges: Ranges, 125 126 /// Clobbers: a sparse map from instruction indices to clobber masks. 127 clobbers: FxHashMap<InsnIndex, PRegSet>, 128 129 /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is 130 /// reasonable to keep one of these per instruction.) 131 srclocs: Vec<RelSourceLoc>, 132 133 /// Entry block. 134 entry: BlockIndex, 135 136 /// Block instruction indices. 137 block_ranges: Ranges, 138 139 /// Block successors: index range in the `block_succs` list. 140 block_succ_range: Ranges, 141 142 /// Block successor lists, concatenated into one vec. The 143 /// `block_succ_range` list of tuples above gives (start, end) 144 /// ranges within this list that correspond to each basic block's 145 /// successors. 146 block_succs: Vec<regalloc2::Block>, 147 148 /// Block predecessors: index range in the `block_preds` list. 149 block_pred_range: Ranges, 150 151 /// Block predecessor lists, concatenated into one vec. The 152 /// `block_pred_range` list of tuples above gives (start, end) 153 /// ranges within this list that correspond to each basic block's 154 /// predecessors. 155 block_preds: Vec<regalloc2::Block>, 156 157 /// Block parameters: index range in `block_params` below. 158 block_params_range: Ranges, 159 160 /// Block parameter lists, concatenated into one vec. The 161 /// `block_params_range` list of tuples above gives (start, end) 162 /// ranges within this list that correspond to each basic block's 163 /// blockparam vregs. 164 block_params: Vec<regalloc2::VReg>, 165 166 /// Outgoing block arguments on branch instructions, concatenated 167 /// into one list. 168 /// 169 /// Note that this is conceptually a 3D array: we have a VReg list 170 /// per block, per successor. We flatten those three dimensions 171 /// into this 1D vec, then store index ranges in two levels of 172 /// indirection. 173 /// 174 /// Indexed by the indices in `branch_block_arg_succ_range`. 175 branch_block_args: Vec<regalloc2::VReg>, 176 177 /// Array of sequences of (start, end) tuples in 178 /// `branch_block_args`, one for each successor; these sequences 179 /// for each block are concatenated. 180 /// 181 /// Indexed by the indices in `branch_block_arg_succ_range`. 182 branch_block_arg_range: Ranges, 183 184 /// For a given block, indices in `branch_block_arg_range` 185 /// corresponding to all of its successors. 186 branch_block_arg_succ_range: Ranges, 187 188 /// Block-order information. 189 block_order: BlockLoweringOrder, 190 191 /// ABI object. 192 pub(crate) abi: Callee<I::ABIMachineSpec>, 193 194 /// Constant information used during code emission. This should be 195 /// immutable across function compilations within the same module. 196 emit_info: I::Info, 197 198 /// Constants. 199 pub(crate) constants: VCodeConstants, 200 201 /// Value labels for debuginfo attached to vregs. 202 debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>, 203 204 pub(crate) sigs: SigSet, 205 206 log2_min_function_alignment: u8, 207 } 208 209 /// The result of `VCode::emit`. Contains all information computed 210 /// during emission: actual machine code, optionally a disassembly, 211 /// and optionally metadata about the code layout. 212 pub struct EmitResult { 213 /// The MachBuffer containing the machine code. 214 pub buffer: MachBufferFinalized<Stencil>, 215 216 /// Offset of each basic block, recorded during emission. Computed 217 /// only if `machine_code_cfg_info` is enabled. 218 pub bb_offsets: Vec<CodeOffset>, 219 220 /// Final basic-block edges, in terms of code offsets of 221 /// bb-starts. Computed only if `machine_code_cfg_info` is enabled. 222 pub bb_edges: Vec<(CodeOffset, CodeOffset)>, 223 224 /// The pretty-printed disassembly, if any. This uses the same 225 /// pretty-printing for MachInsts as the pre-regalloc VCode Debug 226 /// implementation, but additionally includes the prologue and 227 /// epilogue(s), and makes use of the regalloc results. 228 pub disasm: Option<String>, 229 230 /// Value-labels information (debug metadata). 231 pub value_labels_ranges: ValueLabelsRanges, 232 } 233 234 /// A builder for a VCode function body. 235 /// 236 /// This builder has the ability to accept instructions in either 237 /// forward or reverse order, depending on the pass direction that 238 /// produces the VCode. The lowering from CLIF to VCode<MachInst> 239 /// ordinarily occurs in reverse order (in order to allow instructions 240 /// to be lowered only if used, and not merged) so a reversal will 241 /// occur at the end of lowering to ensure the VCode is in machine 242 /// order. 243 /// 244 /// If built in reverse, block and instruction indices used once the 245 /// VCode is built are relative to the final (reversed) order, not the 246 /// order of construction. Note that this means we do not know the 247 /// final block or instruction indices when building, so we do not 248 /// hand them out. (The user is assumed to know them when appending 249 /// terminator instructions with successor blocks.) 250 pub struct VCodeBuilder<I: VCodeInst> { 251 /// In-progress VCode. 252 pub(crate) vcode: VCode<I>, 253 254 /// In what direction is the build occurring? 255 direction: VCodeBuildDirection, 256 257 /// Debug-value label in-progress map, keyed by label. For each 258 /// label, we keep disjoint ranges mapping to vregs. We'll flatten 259 /// this into (vreg, range, label) tuples when done. 260 debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>, 261 } 262 263 /// Direction in which a VCodeBuilder builds VCode. 264 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 265 pub enum VCodeBuildDirection { 266 // TODO: add `Forward` once we need it and can test it adequately. 267 /// Backward-build pass: we expect the producer to call `emit()` 268 /// with instructions in reverse program order within each block. 269 Backward, 270 } 271 272 impl<I: VCodeInst> VCodeBuilder<I> { 273 /// Create a new VCodeBuilder. new( sigs: SigSet, abi: Callee<I::ABIMachineSpec>, emit_info: I::Info, block_order: BlockLoweringOrder, constants: VCodeConstants, direction: VCodeBuildDirection, log2_min_function_alignment: u8, ) -> Self274 pub fn new( 275 sigs: SigSet, 276 abi: Callee<I::ABIMachineSpec>, 277 emit_info: I::Info, 278 block_order: BlockLoweringOrder, 279 constants: VCodeConstants, 280 direction: VCodeBuildDirection, 281 log2_min_function_alignment: u8, 282 ) -> Self { 283 let vcode = VCode::new( 284 sigs, 285 abi, 286 emit_info, 287 block_order, 288 constants, 289 log2_min_function_alignment, 290 ); 291 292 VCodeBuilder { 293 vcode, 294 direction, 295 debug_info: FxHashMap::default(), 296 } 297 } 298 init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()>299 pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> { 300 self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs) 301 } 302 303 /// Access the ABI object. abi(&self) -> &Callee<I::ABIMachineSpec>304 pub fn abi(&self) -> &Callee<I::ABIMachineSpec> { 305 &self.vcode.abi 306 } 307 308 /// Access the ABI object. abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec>309 pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> { 310 &mut self.vcode.abi 311 } 312 sigs(&self) -> &SigSet313 pub fn sigs(&self) -> &SigSet { 314 &self.vcode.sigs 315 } 316 sigs_mut(&mut self) -> &mut SigSet317 pub fn sigs_mut(&mut self) -> &mut SigSet { 318 &mut self.vcode.sigs 319 } 320 321 /// Access to the BlockLoweringOrder object. block_order(&self) -> &BlockLoweringOrder322 pub fn block_order(&self) -> &BlockLoweringOrder { 323 &self.vcode.block_order 324 } 325 326 /// Set the current block as the entry block. set_entry(&mut self, block: BlockIndex)327 pub fn set_entry(&mut self, block: BlockIndex) { 328 self.vcode.entry = block; 329 } 330 331 /// End the current basic block. Must be called after emitting vcode insts 332 /// for IR insts and prior to ending the function (building the VCode). end_bb(&mut self)333 pub fn end_bb(&mut self) { 334 let end_idx = self.vcode.insts.len(); 335 // Add the instruction index range to the list of blocks. 336 self.vcode.block_ranges.push_end(end_idx); 337 // End the successors list. 338 let succ_end = self.vcode.block_succs.len(); 339 self.vcode.block_succ_range.push_end(succ_end); 340 // End the blockparams list. 341 let block_params_end = self.vcode.block_params.len(); 342 self.vcode.block_params_range.push_end(block_params_end); 343 // End the branch blockparam args list. 344 let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len(); 345 self.vcode 346 .branch_block_arg_succ_range 347 .push_end(branch_block_arg_succ_end); 348 } 349 add_block_param(&mut self, param: VirtualReg)350 pub fn add_block_param(&mut self, param: VirtualReg) { 351 self.vcode.block_params.push(param.into()); 352 } 353 add_branch_args_for_succ(&mut self, args: &[Reg])354 fn add_branch_args_for_succ(&mut self, args: &[Reg]) { 355 self.vcode 356 .branch_block_args 357 .extend(args.iter().map(|&arg| VReg::from(arg))); 358 let end = self.vcode.branch_block_args.len(); 359 self.vcode.branch_block_arg_range.push_end(end); 360 } 361 362 /// Push an instruction for the current BB and current IR inst 363 /// within the BB. push(&mut self, insn: I, loc: RelSourceLoc)364 pub fn push(&mut self, insn: I, loc: RelSourceLoc) { 365 assert!(!insn.is_low_level_branch()); // These are not meant to be in VCode. 366 self.vcode.insts.push(insn); 367 self.vcode.srclocs.push(loc); 368 } 369 370 /// Add a successor block with branch args. add_succ(&mut self, block: BlockIndex, args: &[Reg])371 pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) { 372 self.vcode.block_succs.push(block); 373 self.add_branch_args_for_succ(args); 374 } 375 376 /// Add a debug value label to a register. add_value_label(&mut self, reg: Reg, label: ValueLabel)377 pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) { 378 // 1) In the reversed order, we consider the instructions 379 // that define ranges in the "debug_info" array to refer 380 // to the IP **after** them (when reversed): 381 // IP[2]__| Inst 3 | 382 // IP[1]__| Inst 2 | 383 // IP[0]__| Inst 1 | 384 // | Inst 0 | 385 // This is so that we can represent IP[<function start>], 386 // done at the cost of not being to represent IP[<function end>], 387 // which is OK since no values will be live at that point. 388 // 2) The live range for "reg" begins at the current IP 389 // and continues until the next, in execution order, 390 // VReg that defines "label". Since the ranges are open 391 // at the end, the subtraction of 1 cancels out: 392 // [last..current IP] <=> 393 // [last..last emitted inst index] <=> 394 // [last..next_inst_index - 1] <=> 395 // [last..next_inst_index) 396 // 397 let next_inst_index = self.vcode.insts.len(); 398 if next_inst_index == 0 { 399 // This would produce a defective [0..0) range. 400 return; 401 } 402 let next_inst = InsnIndex::new(next_inst_index); 403 let labels = self.debug_info.entry(label).or_insert_with(|| vec![]); 404 let last = labels 405 .last() 406 .map(|(_start, end, _vreg)| *end) 407 .unwrap_or(InsnIndex::new(0)); 408 labels.push((last, next_inst, reg.into())); 409 } 410 411 /// Access the constants. constants(&mut self) -> &mut VCodeConstants412 pub fn constants(&mut self) -> &mut VCodeConstants { 413 &mut self.vcode.constants 414 } 415 compute_preds_from_succs(&mut self)416 fn compute_preds_from_succs(&mut self) { 417 // Do a linear-time counting sort: first determine how many 418 // times each block appears as a successor. 419 let mut starts = vec![0u32; self.vcode.num_blocks()]; 420 for succ in &self.vcode.block_succs { 421 starts[succ.index()] += 1; 422 } 423 424 // Determine for each block the starting index where that 425 // block's predecessors should go. This is equivalent to the 426 // ranges we need to store in block_pred_range. 427 self.vcode.block_pred_range.reserve(starts.len()); 428 let mut end = 0; 429 for count in starts.iter_mut() { 430 let start = end; 431 end += *count; 432 *count = start; 433 self.vcode.block_pred_range.push_end(end as usize); 434 } 435 let end = end as usize; 436 debug_assert_eq!(end, self.vcode.block_succs.len()); 437 438 // Walk over the successors again, this time grouped by 439 // predecessor, and push the predecessor at the current 440 // starting position of each of its successors. We build 441 // each group of predecessors in whatever order Ranges::iter 442 // returns them; regalloc2 doesn't care. 443 self.vcode.block_preds.resize(end, BlockIndex::invalid()); 444 for (pred, range) in self.vcode.block_succ_range.iter() { 445 let pred = BlockIndex::new(pred); 446 for succ in &self.vcode.block_succs[range] { 447 let pos = &mut starts[succ.index()]; 448 self.vcode.block_preds[*pos as usize] = pred; 449 *pos += 1; 450 } 451 } 452 debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid())); 453 } 454 455 /// Called once, when a build in Backward order is complete, to 456 /// perform the overall reversal (into final forward order) and 457 /// finalize metadata accordingly. reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>)458 fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) { 459 let n_insts = self.vcode.insts.len(); 460 if n_insts == 0 { 461 return; 462 } 463 464 // Reverse the per-block and per-inst sequences. 465 self.vcode.block_ranges.reverse_index(); 466 self.vcode.block_ranges.reverse_target(n_insts); 467 // block_params_range is indexed by block (and blocks were 468 // traversed in reverse) so we reverse it; but block-param 469 // sequences in the concatenated vec can remain in reverse 470 // order (it is effectively an arena of arbitrarily-placed 471 // referenced sequences). 472 self.vcode.block_params_range.reverse_index(); 473 // Likewise, we reverse block_succ_range, but the block_succ 474 // concatenated array can remain as-is. 475 self.vcode.block_succ_range.reverse_index(); 476 self.vcode.insts.reverse(); 477 self.vcode.srclocs.reverse(); 478 // Likewise, branch_block_arg_succ_range is indexed by block 479 // so must be reversed. 480 self.vcode.branch_block_arg_succ_range.reverse_index(); 481 482 // To translate an instruction index *endpoint* in reversed 483 // order to forward order, compute `n_insts - i`. 484 // 485 // Why not `n_insts - 1 - i`? That would be correct to 486 // translate an individual instruction index (for ten insts 0 487 // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes 488 // 0). But for the usual inclusive-start, exclusive-end range 489 // idiom, inclusive starts become exclusive ends and 490 // vice-versa, so e.g. an (inclusive) start of 0 becomes an 491 // (exclusive) end of 10. 492 let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index()); 493 494 // Generate debug-value labels based on per-label maps. 495 for (label, tuples) in &self.debug_info { 496 for &(start, end, vreg) in tuples { 497 let vreg = vregs.resolve_vreg_alias(vreg); 498 let fwd_start = translate(end); 499 let fwd_end = translate(start); 500 self.vcode 501 .debug_value_labels 502 .push((vreg, fwd_start, fwd_end, label.as_u32())); 503 } 504 } 505 506 // Now sort debug value labels by VReg, as required 507 // by regalloc2. 508 self.vcode 509 .debug_value_labels 510 .sort_unstable_by_key(|(vreg, _, _, _)| *vreg); 511 } 512 collect_operands(&mut self, vregs: &VRegAllocator<I>)513 fn collect_operands(&mut self, vregs: &VRegAllocator<I>) { 514 let allocatable = PRegSet::from(self.vcode.abi.machine_env()); 515 for (i, insn) in self.vcode.insts.iter_mut().enumerate() { 516 // Push operands from the instruction onto the operand list. 517 // 518 // We rename through the vreg alias table as we collect 519 // the operands. This is better than a separate post-pass 520 // over operands, because it has more cache locality: 521 // operands only need to pass through L1 once. This is 522 // also better than renaming instructions' 523 // operands/registers while lowering, because here we only 524 // need to do the `match` over the instruction to visit 525 // its register fields (which is slow, branchy code) once. 526 527 let mut op_collector = 528 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| { 529 vregs.resolve_vreg_alias(vreg) 530 }); 531 insn.get_operands(&mut op_collector); 532 let (ops, clobbers) = op_collector.finish(); 533 self.vcode.operand_ranges.push_end(ops); 534 535 if clobbers != PRegSet::default() { 536 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers); 537 } 538 539 if let Some((dst, src)) = insn.is_move() { 540 // We should never see non-virtual registers present in move 541 // instructions. 542 assert!( 543 src.is_virtual(), 544 "the real register {src:?} was used as the source of a move instruction" 545 ); 546 assert!( 547 dst.to_reg().is_virtual(), 548 "the real register {:?} was used as the destination of a move instruction", 549 dst.to_reg() 550 ); 551 } 552 } 553 554 // Translate blockparam args via the vreg aliases table as well. 555 for arg in &mut self.vcode.branch_block_args { 556 let new_arg = vregs.resolve_vreg_alias(*arg); 557 trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg); 558 *arg = new_arg; 559 } 560 } 561 562 /// Build the final VCode. build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I>563 pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> { 564 self.vcode.vreg_types = take(&mut vregs.vreg_types); 565 566 if self.direction == VCodeBuildDirection::Backward { 567 self.reverse_and_finalize(&vregs); 568 } 569 self.collect_operands(&vregs); 570 571 self.compute_preds_from_succs(); 572 self.vcode.debug_value_labels.sort_unstable(); 573 574 // At this point, nothing in the vcode should mention any 575 // VReg which has been aliased. All the appropriate rewriting 576 // should have happened above. Just to be sure, let's 577 // double-check each field which has vregs. 578 // Note: can't easily check vcode.insts, resolved in collect_operands. 579 // Operands are resolved in collect_operands. 580 vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg())); 581 // Currently block params are never aliased to another vreg. 582 vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied()); 583 // Branch block args are resolved in collect_operands. 584 vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied()); 585 // Debug value labels are resolved in reverse_and_finalize. 586 vregs.debug_assert_no_vreg_aliases( 587 self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg), 588 ); 589 590 self.vcode 591 } 592 593 /// Add a user stack map for the associated instruction. add_user_stack_map( &mut self, inst: BackwardsInsnIndex, entries: &[ir::UserStackMapEntry], )594 pub fn add_user_stack_map( 595 &mut self, 596 inst: BackwardsInsnIndex, 597 entries: &[ir::UserStackMapEntry], 598 ) { 599 let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets()); 600 let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map); 601 debug_assert!(old_entry.is_none()); 602 } 603 604 /// Add debug tags for the associated instruction. add_debug_tags(&mut self, inst: BackwardsInsnIndex, entries: &[ir::DebugTag])605 pub fn add_debug_tags(&mut self, inst: BackwardsInsnIndex, entries: &[ir::DebugTag]) { 606 let start = u32::try_from(self.vcode.debug_tag_pool.len()).unwrap(); 607 self.vcode.debug_tag_pool.extend(entries.iter().cloned()); 608 let end = u32::try_from(self.vcode.debug_tag_pool.len()).unwrap(); 609 self.vcode.debug_tags.insert(inst, start..end); 610 } 611 } 612 613 const NO_INST_OFFSET: CodeOffset = u32::MAX; 614 615 impl<I: VCodeInst> VCode<I> { 616 /// New empty VCode. new( sigs: SigSet, abi: Callee<I::ABIMachineSpec>, emit_info: I::Info, block_order: BlockLoweringOrder, constants: VCodeConstants, log2_min_function_alignment: u8, ) -> Self617 fn new( 618 sigs: SigSet, 619 abi: Callee<I::ABIMachineSpec>, 620 emit_info: I::Info, 621 block_order: BlockLoweringOrder, 622 constants: VCodeConstants, 623 log2_min_function_alignment: u8, 624 ) -> Self { 625 let n_blocks = block_order.lowered_order().len(); 626 VCode { 627 sigs, 628 vreg_types: vec![], 629 insts: Vec::with_capacity(10 * n_blocks), 630 user_stack_maps: FxHashMap::default(), 631 debug_tags: FxHashMap::default(), 632 debug_tag_pool: vec![], 633 operands: Vec::with_capacity(30 * n_blocks), 634 operand_ranges: Ranges::with_capacity(10 * n_blocks), 635 clobbers: FxHashMap::default(), 636 srclocs: Vec::with_capacity(10 * n_blocks), 637 entry: BlockIndex::new(0), 638 block_ranges: Ranges::with_capacity(n_blocks), 639 block_succ_range: Ranges::with_capacity(n_blocks), 640 block_succs: Vec::with_capacity(n_blocks), 641 block_pred_range: Ranges::default(), 642 block_preds: Vec::new(), 643 block_params_range: Ranges::with_capacity(n_blocks), 644 block_params: Vec::with_capacity(5 * n_blocks), 645 branch_block_args: Vec::with_capacity(10 * n_blocks), 646 branch_block_arg_range: Ranges::with_capacity(2 * n_blocks), 647 branch_block_arg_succ_range: Ranges::with_capacity(n_blocks), 648 block_order, 649 abi, 650 emit_info, 651 constants, 652 debug_value_labels: vec![], 653 log2_min_function_alignment, 654 } 655 } 656 657 /// Get the number of blocks. Block indices will be in the range `0 .. 658 /// (self.num_blocks() - 1)`. num_blocks(&self) -> usize659 pub fn num_blocks(&self) -> usize { 660 self.block_ranges.len() 661 } 662 663 /// The number of lowered instructions. num_insts(&self) -> usize664 pub fn num_insts(&self) -> usize { 665 self.insts.len() 666 } 667 compute_clobbers_and_function_calls( &self, regalloc: ®alloc2::Output, ) -> (Vec<Writable<RealReg>>, FunctionCalls)668 fn compute_clobbers_and_function_calls( 669 &self, 670 regalloc: ®alloc2::Output, 671 ) -> (Vec<Writable<RealReg>>, FunctionCalls) { 672 let mut clobbered = PRegSet::default(); 673 let mut function_calls = FunctionCalls::None; 674 675 // All moves are included in clobbers. 676 for (_, Edit::Move { to, .. }) in ®alloc.edits { 677 if let Some(preg) = to.as_reg() { 678 clobbered.add(preg); 679 } 680 } 681 682 for (i, range) in self.operand_ranges.iter() { 683 let operands = &self.operands[range.clone()]; 684 let allocs = ®alloc.allocs[range]; 685 for (operand, alloc) in operands.iter().zip(allocs.iter()) { 686 if operand.kind() == OperandKind::Def { 687 if let Some(preg) = alloc.as_reg() { 688 clobbered.add(preg); 689 } 690 } 691 } 692 693 function_calls.update(self.insts[i].call_type()); 694 695 // Also add explicitly-clobbered registers. 696 // 697 // Skip merging this instruction's clobber list if not 698 // "included in clobbers" as per the MachInst. (Some 699 // backends use this to implement ABI specifics; e.g., 700 // excluding calls of the same ABI as the current function 701 // from clobbers, because by definition everything 702 // clobbered by the call can be clobbered by this function 703 // without saving as well. 704 // 705 // This is important for a particular optimization: when 706 // some registers are "half-clobbered", e.g. vector/float 707 // registers on aarch64, we want them to be seen as 708 // clobbered by regalloc so it avoids carrying values 709 // across calls in these registers but not seen as 710 // clobbered by prologue generation here (because the 711 // actual half-clobber implied by the clobber list fits 712 // within the clobbers that we allow without 713 // clobber-saves). 714 if self.insts[i].is_included_in_clobbers() { 715 if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) { 716 clobbered.union_from(inst_clobbered); 717 } 718 } 719 } 720 721 let clobbered_regs = clobbered 722 .into_iter() 723 .map(|preg| Writable::from_reg(RealReg::from(preg))) 724 .collect(); 725 726 (clobbered_regs, function_calls) 727 } 728 729 /// Emit the instructions to a `MachBuffer`, containing fixed-up 730 /// code and external reloc/trap/etc. records ready for use. Takes 731 /// the regalloc results as well. 732 /// 733 /// Returns the machine code itself, and optionally metadata 734 /// and/or a disassembly, as an `EmitResult`. The `VCode` itself 735 /// is consumed by the emission process. emit( mut self, regalloc: ®alloc2::Output, want_disasm: bool, flags: &settings::Flags, ctrl_plane: &mut ControlPlane, ) -> EmitResult where I: VCodeInst,736 pub fn emit( 737 mut self, 738 regalloc: ®alloc2::Output, 739 want_disasm: bool, 740 flags: &settings::Flags, 741 ctrl_plane: &mut ControlPlane, 742 ) -> EmitResult 743 where 744 I: VCodeInst, 745 { 746 let _tt = timing::vcode_emit(); 747 let mut buffer = MachBuffer::new(); 748 buffer.set_log2_min_function_alignment(self.log2_min_function_alignment); 749 let mut bb_starts: Vec<Option<CodeOffset>> = vec![]; 750 751 // The first M MachLabels are reserved for block indices. 752 buffer.reserve_labels_for_blocks(self.num_blocks()); 753 754 // Register all allocated constants with the `MachBuffer` to ensure that 755 // any references to the constants during instructions can be handled 756 // correctly. 757 buffer.register_constants(&self.constants); 758 759 // Construct the final order we emit code in: cold blocks at the end. 760 let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![]; 761 let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![]; 762 for block in 0..self.num_blocks() { 763 let block = BlockIndex::new(block); 764 if self.block_order.is_cold(block) { 765 cold_blocks.push(block); 766 } else { 767 final_order.push(block); 768 } 769 } 770 final_order.extend(cold_blocks.clone()); 771 772 // Compute/save info we need for the prologue: clobbers and 773 // number of spillslots. 774 // 775 // We clone `abi` here because we will mutate it as we 776 // generate the prologue and set other info, but we can't 777 // mutate `VCode`. The info it usually carries prior to 778 // setting clobbers is fairly minimal so this should be 779 // relatively cheap. 780 let (clobbers, function_calls) = self.compute_clobbers_and_function_calls(regalloc); 781 self.abi.compute_frame_layout( 782 &self.sigs, 783 regalloc.num_spillslots, 784 clobbers, 785 function_calls, 786 ); 787 788 // Emit blocks. 789 let mut cur_srcloc = None; 790 let mut last_offset = None; 791 let mut inst_offsets = vec![]; 792 let mut state = I::State::new(&self.abi, core::mem::take(ctrl_plane)); 793 794 let mut disasm = String::new(); 795 796 if !self.debug_value_labels.is_empty() { 797 inst_offsets.resize(self.insts.len(), NO_INST_OFFSET); 798 } 799 800 // Count edits per block ahead of time; this is needed for 801 // lookahead island emission. (We could derive it per-block 802 // with binary search in the edit list, but it's more 803 // efficient to do it in one pass here.) 804 let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![]; 805 let mut edit_idx = 0; 806 for block in 0..self.num_blocks() { 807 let end_inst = InsnIndex::new(self.block_ranges.get(block).end); 808 let start_edit_idx = edit_idx; 809 while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst { 810 edit_idx += 1; 811 } 812 let end_edit_idx = edit_idx; 813 ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32); 814 } 815 816 let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled(); 817 let mut bb_padding = match flags.bb_padding_log2_minus_one() { 818 0 => Vec::new(), 819 n => vec![0; 1 << (n - 1)], 820 }; 821 let mut total_bb_padding = 0; 822 823 for (block_order_idx, &block) in final_order.iter().enumerate() { 824 trace!("emitting block {:?}", block); 825 826 // Call the new block hook for state 827 state.on_new_block(); 828 829 // Emit NOPs to align the block. 830 let new_offset = I::align_basic_block(buffer.cur_offset()); 831 while new_offset > buffer.cur_offset() { 832 // Pad with NOPs up to the aligned block offset. 833 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize); 834 nop.emit(&mut buffer, &self.emit_info, &mut Default::default()); 835 } 836 assert_eq!(buffer.cur_offset(), new_offset); 837 838 let do_emit = |inst: &I, 839 disasm: &mut String, 840 buffer: &mut MachBuffer<I>, 841 state: &mut I::State| { 842 if want_disasm && !inst.is_args() { 843 let mut s = state.clone(); 844 writeln!(disasm, " {}", inst.pretty_print_inst(&mut s)).unwrap(); 845 } 846 inst.emit(buffer, &self.emit_info, state); 847 }; 848 849 // Is this the first block? Emit the prologue directly if so. 850 if block == self.entry { 851 trace!(" -> entry block"); 852 buffer.start_srcloc(Default::default()); 853 for inst in &self.abi.gen_prologue() { 854 do_emit(&inst, &mut disasm, &mut buffer, &mut state); 855 } 856 buffer.end_srcloc(); 857 } 858 859 // Now emit the regular block body. 860 861 buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut()); 862 863 if want_disasm { 864 writeln!(&mut disasm, "block{}:", block.index()).unwrap(); 865 } 866 867 if flags.machine_code_cfg_info() { 868 // Track BB starts. If we have backed up due to MachBuffer 869 // branch opts, note that the removed blocks were removed. 870 let cur_offset = buffer.cur_offset(); 871 if last_offset.is_some() && cur_offset <= last_offset.unwrap() { 872 for i in (0..bb_starts.len()).rev() { 873 if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() { 874 break; 875 } 876 bb_starts[i] = None; 877 } 878 } 879 bb_starts.push(Some(cur_offset)); 880 last_offset = Some(cur_offset); 881 } 882 883 if let Some(block_start) = I::gen_block_start( 884 self.block_order.is_indirect_branch_target(block), 885 is_forward_edge_cfi_enabled, 886 ) { 887 do_emit(&block_start, &mut disasm, &mut buffer, &mut state); 888 } 889 890 for inst_or_edit in regalloc.block_insts_and_edits(&self, block) { 891 match inst_or_edit { 892 InstOrEdit::Inst(iix) => { 893 if !self.debug_value_labels.is_empty() { 894 // If we need to produce debug info, 895 // record the offset of each instruction 896 // so that we can translate value-label 897 // ranges to machine-code offsets. 898 899 // Cold blocks violate monotonicity 900 // assumptions elsewhere (that 901 // instructions in inst-index order are in 902 // order in machine code), so we omit 903 // their offsets here. Value-label range 904 // generation below will skip empty ranges 905 // and ranges with to-offsets of zero. 906 if !self.block_order.is_cold(block) { 907 inst_offsets[iix.index()] = buffer.cur_offset(); 908 } 909 } 910 911 // Update the srcloc at this point in the buffer. 912 let srcloc = self.srclocs[iix.index()]; 913 if cur_srcloc != Some(srcloc) { 914 if cur_srcloc.is_some() { 915 buffer.end_srcloc(); 916 } 917 buffer.start_srcloc(srcloc); 918 cur_srcloc = Some(srcloc); 919 } 920 921 // If this is a safepoint, compute a stack map 922 // and pass it to the emit state. 923 let stack_map_disasm = if self.insts[iix.index()].is_safepoint() { 924 let (user_stack_map, user_stack_map_disasm) = { 925 // The `user_stack_maps` is keyed by reverse 926 // instruction index, so we must flip the 927 // index. We can't put this into a helper method 928 // due to borrowck issues because parts of 929 // `self` are borrowed mutably elsewhere in this 930 // function. 931 let index = iix.to_backwards_insn_index(self.num_insts()); 932 let user_stack_map = self.user_stack_maps.remove(&index); 933 let user_stack_map_disasm = if want_disasm { 934 user_stack_map.as_ref().map(|m| format!(" ; {m:?}")) 935 } else { 936 None 937 }; 938 (user_stack_map, user_stack_map_disasm) 939 }; 940 941 state.pre_safepoint(user_stack_map); 942 943 user_stack_map_disasm 944 } else { 945 None 946 }; 947 948 // Place debug tags in the emission buffer 949 // either at the offset prior to the 950 // instruction or after the instruction, 951 // depending on whether this is a call. See 952 // the documentation on [`MachDebugTagPos`] 953 // for details on why. 954 let mut debug_tag_disasm = None; 955 let mut place_debug_tags = 956 |this: &VCode<I>, pos: MachDebugTagPos, buffer: &mut MachBuffer<I>| { 957 // As above, translate the forward instruction 958 // index to a backward index for the lookup. 959 let debug_tag_range = { 960 let index = iix.to_backwards_insn_index(this.num_insts()); 961 this.debug_tags.get(&index) 962 }; 963 if let Some(range) = debug_tag_range { 964 let start = usize::try_from(range.start).unwrap(); 965 let end = usize::try_from(range.end).unwrap(); 966 let tags = &this.debug_tag_pool[start..end]; 967 968 if want_disasm { 969 debug_tag_disasm = 970 Some(format!(" ; ^-- debug @ {pos:?}: {tags:?}")); 971 } 972 buffer.push_debug_tags(pos, tags); 973 } 974 }; 975 let debug_tag_pos = 976 if self.insts[iix.index()].call_type() == CallType::Regular { 977 MachDebugTagPos::Post 978 } else { 979 MachDebugTagPos::Pre 980 }; 981 982 if debug_tag_pos == MachDebugTagPos::Pre { 983 place_debug_tags(&self, debug_tag_pos, &mut buffer); 984 } 985 986 // If the instruction we are about to emit is 987 // a return, place an epilogue at this point 988 // (and don't emit the return; the actual 989 // epilogue will contain it). 990 if self.insts[iix.index()].is_term() == MachTerminator::Ret { 991 log::trace!("emitting epilogue"); 992 for inst in self.abi.gen_epilogue() { 993 do_emit(&inst, &mut disasm, &mut buffer, &mut state); 994 } 995 } else { 996 // Update the operands for this inst using the 997 // allocations from the regalloc result. 998 let mut allocs = regalloc.inst_allocs(iix).iter(); 999 self.insts[iix.index()].get_operands( 1000 &mut |reg: &mut Reg, constraint, _kind, _pos| { 1001 let alloc = 1002 allocs.next().expect("enough allocations for all operands"); 1003 1004 if let Some(alloc) = alloc.as_reg() { 1005 let alloc: Reg = alloc.into(); 1006 if let OperandConstraint::FixedReg(rreg) = constraint { 1007 debug_assert_eq!(Reg::from(rreg), alloc); 1008 } 1009 *reg = alloc; 1010 } else if let Some(alloc) = alloc.as_stack() { 1011 let alloc: Reg = alloc.into(); 1012 *reg = alloc; 1013 } 1014 }, 1015 ); 1016 debug_assert!(allocs.next().is_none()); 1017 1018 log::trace!("emitting: {:?}", self.insts[iix.index()]); 1019 1020 // Emit the instruction! 1021 do_emit( 1022 &self.insts[iix.index()], 1023 &mut disasm, 1024 &mut buffer, 1025 &mut state, 1026 ); 1027 1028 if debug_tag_pos == MachDebugTagPos::Post { 1029 place_debug_tags(&self, debug_tag_pos, &mut buffer); 1030 } 1031 1032 if let Some(stack_map_disasm) = stack_map_disasm { 1033 disasm.push_str(&stack_map_disasm); 1034 disasm.push('\n'); 1035 } 1036 if let Some(debug_tag_disasm) = debug_tag_disasm { 1037 disasm.push_str(&debug_tag_disasm); 1038 disasm.push('\n'); 1039 } 1040 } 1041 } 1042 1043 InstOrEdit::Edit(Edit::Move { from, to }) => { 1044 // Create a move/spill/reload instruction and 1045 // immediately emit it. 1046 match (from.as_reg(), to.as_reg()) { 1047 (Some(from), Some(to)) => { 1048 // Reg-to-reg move. 1049 let from_rreg = Reg::from(from); 1050 let to_rreg = Writable::from_reg(Reg::from(to)); 1051 debug_assert_eq!(from.class(), to.class()); 1052 let ty = I::canonical_type_for_rc(from.class()); 1053 let mv = I::gen_move(to_rreg, from_rreg, ty); 1054 do_emit(&mv, &mut disasm, &mut buffer, &mut state); 1055 } 1056 (Some(from), None) => { 1057 // Spill from register to spillslot. 1058 let to = to.as_stack().unwrap(); 1059 let from_rreg = RealReg::from(from); 1060 let spill = self.abi.gen_spill(to, from_rreg); 1061 do_emit(&spill, &mut disasm, &mut buffer, &mut state); 1062 } 1063 (None, Some(to)) => { 1064 // Load from spillslot to register. 1065 let from = from.as_stack().unwrap(); 1066 let to_rreg = Writable::from_reg(RealReg::from(to)); 1067 let reload = self.abi.gen_reload(to_rreg, from); 1068 do_emit(&reload, &mut disasm, &mut buffer, &mut state); 1069 } 1070 (None, None) => { 1071 panic!("regalloc2 should have eliminated stack-to-stack moves!"); 1072 } 1073 } 1074 } 1075 } 1076 } 1077 1078 if cur_srcloc.is_some() { 1079 buffer.end_srcloc(); 1080 cur_srcloc = None; 1081 } 1082 1083 // Do we need an island? Get the worst-case size of the next BB, add 1084 // it to the optional padding behind the block, and pass this to the 1085 // `MachBuffer` to determine if an island is necessary. 1086 let worst_case_next_bb = if block_order_idx < final_order.len() - 1 { 1087 let next_block = final_order[block_order_idx + 1]; 1088 let next_block_range = self.block_ranges.get(next_block.index()); 1089 let next_block_size = next_block_range.len() as u32; 1090 let next_block_ra_insertions = ra_edits_per_block[next_block.index()]; 1091 I::worst_case_size() * (next_block_size + next_block_ra_insertions) 1092 } else { 1093 0 1094 }; 1095 let padding = if bb_padding.is_empty() { 1096 0 1097 } else { 1098 bb_padding.len() as u32 + I::LabelUse::ALIGN - 1 1099 }; 1100 if buffer.island_needed(padding + worst_case_next_bb) { 1101 buffer.emit_island(padding + worst_case_next_bb, ctrl_plane); 1102 } 1103 1104 // Insert padding, if configured, to stress the `MachBuffer`'s 1105 // relocation and island calculations. 1106 // 1107 // Padding can get quite large during fuzzing though so place a 1108 // total cap on it where when a per-function threshold is exceeded 1109 // the padding is turned back down to zero. This avoids a small-ish 1110 // test case generating a GB+ memory footprint in Cranelift for 1111 // example. 1112 if !bb_padding.is_empty() { 1113 buffer.put_data(&bb_padding); 1114 buffer.align_to(I::LabelUse::ALIGN); 1115 total_bb_padding += bb_padding.len(); 1116 if total_bb_padding > (150 << 20) { 1117 bb_padding = Vec::new(); 1118 } 1119 } 1120 } 1121 1122 debug_assert!( 1123 self.user_stack_maps.is_empty(), 1124 "any stack maps should have been consumed by instruction emission, still have: {:#?}", 1125 self.user_stack_maps, 1126 ); 1127 1128 // Do any optimizations on branches at tail of buffer, as if we had 1129 // bound one last label. 1130 buffer.optimize_branches(ctrl_plane); 1131 1132 // emission state is not needed anymore, move control plane back out 1133 *ctrl_plane = state.take_ctrl_plane(); 1134 1135 let func_body_len = buffer.cur_offset(); 1136 1137 // Create `bb_edges` and final (filtered) `bb_starts`. 1138 let mut bb_edges = vec![]; 1139 let mut bb_offsets = vec![]; 1140 if flags.machine_code_cfg_info() { 1141 for block in 0..self.num_blocks() { 1142 if bb_starts[block].is_none() { 1143 // Block was deleted by MachBuffer; skip. 1144 continue; 1145 } 1146 let from = bb_starts[block].unwrap(); 1147 1148 bb_offsets.push(from); 1149 // Resolve each `succ` label and add edges. 1150 let succs = self.block_succs(BlockIndex::new(block)); 1151 for &succ in succs.iter() { 1152 let to = buffer.resolve_label_offset(MachLabel::from_block(succ)); 1153 bb_edges.push((from, to)); 1154 } 1155 } 1156 } 1157 1158 self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len); 1159 let value_labels_ranges = 1160 self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len); 1161 1162 // Store metadata about frame layout in the MachBuffer. 1163 buffer.set_frame_layout(self.abi.frame_slot_metadata()); 1164 1165 EmitResult { 1166 buffer: buffer.finish(&self.constants, ctrl_plane), 1167 bb_offsets, 1168 bb_edges, 1169 disasm: if want_disasm { Some(disasm) } else { None }, 1170 value_labels_ranges, 1171 } 1172 } 1173 monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32)1174 fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) { 1175 if self.debug_value_labels.is_empty() { 1176 return; 1177 } 1178 1179 // During emission, branch removal can make offsets of instructions incorrect. 1180 // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj] 1181 // It will be recorded as (say): [30] [34] [38] [42] [<would be 46>] 1182 // When the jumps get removed we are left with (in "inst_offsets"): 1183 // [insi][jmp0][jmp1][jmp2][insj][...] 1184 // [30] [34] [38] [42] [34] 1185 // Which violates the monotonicity invariant. This method sets offsets of these 1186 // removed instructions such as to make them appear zero-sized: 1187 // [insi][jmp0][jmp1][jmp2][insj][...] 1188 // [30] [34] [34] [34] [34] 1189 // 1190 let mut next_offset = func_body_len; 1191 for inst_index in (0..(inst_offsets.len() - 1)).rev() { 1192 let inst_offset = inst_offsets[inst_index]; 1193 1194 // Not all instructions get their offsets recorded. 1195 if inst_offset == NO_INST_OFFSET { 1196 continue; 1197 } 1198 1199 if inst_offset > next_offset { 1200 trace!( 1201 "Fixing code offset of the removed Inst {}: {} -> {}", 1202 inst_index, inst_offset, next_offset 1203 ); 1204 inst_offsets[inst_index] = next_offset; 1205 continue; 1206 } 1207 1208 next_offset = inst_offset; 1209 } 1210 } 1211 compute_value_labels_ranges( &self, regalloc: ®alloc2::Output, inst_offsets: &[CodeOffset], func_body_len: u32, ) -> ValueLabelsRanges1212 fn compute_value_labels_ranges( 1213 &self, 1214 regalloc: ®alloc2::Output, 1215 inst_offsets: &[CodeOffset], 1216 func_body_len: u32, 1217 ) -> ValueLabelsRanges { 1218 if self.debug_value_labels.is_empty() { 1219 return ValueLabelsRanges::default(); 1220 } 1221 1222 if trace_log_enabled!() { 1223 self.log_value_labels_ranges(regalloc, inst_offsets); 1224 } 1225 1226 let mut value_labels_ranges: ValueLabelsRanges = HashMap::new(); 1227 for &(label, from, to, alloc) in ®alloc.debug_locations { 1228 let label = ValueLabel::from_u32(label); 1229 let ranges = value_labels_ranges.entry(label).or_insert_with(|| vec![]); 1230 let prog_point_to_inst = |prog_point: ProgPoint| { 1231 let mut inst = prog_point.inst(); 1232 if prog_point.pos() == InstPosition::After { 1233 inst = inst.next(); 1234 } 1235 inst.index() 1236 }; 1237 let inst_to_offset = |inst_index: usize| { 1238 // Skip over cold blocks. 1239 for offset in &inst_offsets[inst_index..] { 1240 if *offset != NO_INST_OFFSET { 1241 return *offset; 1242 } 1243 } 1244 func_body_len 1245 }; 1246 let from_inst_index = prog_point_to_inst(from); 1247 let to_inst_index = prog_point_to_inst(to); 1248 let from_offset = inst_to_offset(from_inst_index); 1249 let to_offset = inst_to_offset(to_inst_index); 1250 1251 // Empty ranges or unavailable offsets can happen 1252 // due to cold blocks and branch removal (see above). 1253 if from_offset == to_offset { 1254 continue; 1255 } 1256 1257 let loc = if let Some(preg) = alloc.as_reg() { 1258 LabelValueLoc::Reg(Reg::from(preg)) 1259 } else { 1260 #[cfg(not(feature = "unwind"))] 1261 continue; 1262 1263 #[cfg(feature = "unwind")] 1264 { 1265 let slot = alloc.as_stack().unwrap(); 1266 let slot_offset = self.abi.get_spillslot_offset(slot); 1267 let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset(); 1268 let caller_sp_to_cfa_offset = 1269 crate::isa::unwind::systemv::caller_sp_to_cfa_offset(); 1270 // NOTE: this is a negative offset because it's relative to the caller's SP 1271 let cfa_to_sp_offset = 1272 -((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64); 1273 LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset) 1274 } 1275 }; 1276 1277 // Coalesce adjacent ranges that for the same location 1278 // to minimize output size here and for the consumers. 1279 if let Some(last_loc_range) = ranges.last_mut() { 1280 if last_loc_range.loc == loc && last_loc_range.end == from_offset { 1281 trace!( 1282 "Extending debug range for {:?} in {:?} to Inst {} ({})", 1283 label, loc, to_inst_index, to_offset 1284 ); 1285 last_loc_range.end = to_offset; 1286 continue; 1287 } 1288 } 1289 1290 trace!( 1291 "Recording debug range for {:?} in {:?}: [Inst {}..Inst {}) [{}..{})", 1292 label, loc, from_inst_index, to_inst_index, from_offset, to_offset 1293 ); 1294 1295 ranges.push(ValueLocRange { 1296 loc, 1297 start: from_offset, 1298 end: to_offset, 1299 }); 1300 } 1301 1302 value_labels_ranges 1303 } 1304 log_value_labels_ranges(&self, regalloc: ®alloc2::Output, inst_offsets: &[CodeOffset])1305 fn log_value_labels_ranges(&self, regalloc: ®alloc2::Output, inst_offsets: &[CodeOffset]) { 1306 debug_assert!(trace_log_enabled!()); 1307 1308 // What debug labels do we have? Note we'll skip those that have not been 1309 // allocated any location at all. They will show up as numeric gaps in the table. 1310 let mut labels = vec![]; 1311 for &(label, _, _, _) in ®alloc.debug_locations { 1312 if Some(&label) == labels.last() { 1313 continue; 1314 } 1315 labels.push(label); 1316 } 1317 1318 // Reformat the data on what VRegs were the VLs assigned to by lowering, since 1319 // the array we have is sorted by VReg, and we want it sorted by VL for easy 1320 // access in the loop below. 1321 let mut vregs = vec![]; 1322 for &(vreg, start, end, label) in &self.debug_value_labels { 1323 if matches!(labels.binary_search(&label), Ok(_)) { 1324 vregs.push((label, start, end, vreg)); 1325 } 1326 } 1327 vregs.sort_unstable_by( 1328 |(l_label, l_start, _, _), (r_label, r_start, _, _)| match l_label.cmp(r_label) { 1329 Ordering::Equal => l_start.cmp(r_start), 1330 cmp => cmp, 1331 }, 1332 ); 1333 1334 #[derive(PartialEq)] 1335 enum Mode { 1336 Measure, 1337 Emit, 1338 } 1339 #[derive(PartialEq)] 1340 enum Row { 1341 Head, 1342 Line, 1343 Inst(usize, usize), 1344 } 1345 1346 let mut widths = vec![0; 3 + 2 * labels.len()]; 1347 let mut row = String::new(); 1348 let mut output_row = |row_kind: Row, mode: Mode| { 1349 let mut column_index = 0; 1350 row.clear(); 1351 1352 macro_rules! output_cell_impl { 1353 ($fill:literal, $span:literal, $($cell_fmt:tt)*) => { 1354 let column_start = row.len(); 1355 { 1356 row.push('|'); 1357 write!(row, $($cell_fmt)*).unwrap(); 1358 } 1359 1360 let next_column_index = column_index + $span; 1361 let expected_width: usize = widths[column_index..next_column_index].iter().sum(); 1362 if mode == Mode::Measure { 1363 let actual_width = row.len() - column_start; 1364 if actual_width > expected_width { 1365 widths[next_column_index - 1] += actual_width - expected_width; 1366 } 1367 } else { 1368 let column_end = column_start + expected_width; 1369 while row.len() != column_end { 1370 row.push($fill); 1371 } 1372 } 1373 column_index = next_column_index; 1374 }; 1375 } 1376 macro_rules! output_cell { 1377 ($($cell_fmt:tt)*) => { 1378 output_cell_impl!(' ', 1, $($cell_fmt)*); 1379 }; 1380 } 1381 1382 match row_kind { 1383 Row::Head => { 1384 output_cell!("BB"); 1385 output_cell!("Inst"); 1386 output_cell!("IP"); 1387 for label in &labels { 1388 output_cell_impl!(' ', 2, "{:?}", ValueLabel::from_u32(*label)); 1389 } 1390 } 1391 Row::Line => { 1392 debug_assert!(mode == Mode::Emit); 1393 for _ in 0..3 { 1394 output_cell_impl!('-', 1, ""); 1395 } 1396 for _ in &labels { 1397 output_cell_impl!('-', 2, ""); 1398 } 1399 } 1400 Row::Inst(block_index, inst_index) => { 1401 debug_assert!(inst_index < self.num_insts()); 1402 if self.block_ranges.get(block_index).start == inst_index { 1403 output_cell!("B{}", block_index); 1404 } else { 1405 output_cell!(""); 1406 } 1407 output_cell!("Inst {inst_index} "); 1408 output_cell!("{} ", inst_offsets[inst_index]); 1409 1410 for label in &labels { 1411 // First, the VReg. 1412 use regalloc2::Inst; 1413 let vreg_cmp = |inst: usize, 1414 vreg_label: &u32, 1415 range_start: &Inst, 1416 range_end: &Inst| { 1417 match vreg_label.cmp(&label) { 1418 Ordering::Equal => { 1419 if range_end.index() <= inst { 1420 Ordering::Less 1421 } else if range_start.index() > inst { 1422 Ordering::Greater 1423 } else { 1424 Ordering::Equal 1425 } 1426 } 1427 cmp => cmp, 1428 } 1429 }; 1430 let vreg_index = 1431 vregs.binary_search_by(|(l, s, e, _)| vreg_cmp(inst_index, l, s, e)); 1432 if let Ok(vreg_index) = vreg_index { 1433 let mut prev_vreg = None; 1434 if inst_index > 0 { 1435 let prev_vreg_index = vregs.binary_search_by(|(l, s, e, _)| { 1436 vreg_cmp(inst_index - 1, l, s, e) 1437 }); 1438 if let Ok(prev_vreg_index) = prev_vreg_index { 1439 prev_vreg = Some(vregs[prev_vreg_index].3); 1440 } 1441 } 1442 1443 let vreg = vregs[vreg_index].3; 1444 if Some(vreg) == prev_vreg { 1445 output_cell!("*"); 1446 } else { 1447 output_cell!("{}", vreg); 1448 } 1449 } else { 1450 output_cell!(""); 1451 } 1452 1453 // Second, the allocated location. 1454 let inst_prog_point = ProgPoint::before(Inst::new(inst_index)); 1455 let range_index = regalloc.debug_locations.binary_search_by( 1456 |(range_label, range_start, range_end, _)| match range_label.cmp(label) 1457 { 1458 Ordering::Equal => { 1459 if *range_end <= inst_prog_point { 1460 Ordering::Less 1461 } else if *range_start > inst_prog_point { 1462 Ordering::Greater 1463 } else { 1464 Ordering::Equal 1465 } 1466 } 1467 cmp => cmp, 1468 }, 1469 ); 1470 if let Ok(range_index) = range_index { 1471 // Live at this instruction, print the location. 1472 if let Some(reg) = regalloc.debug_locations[range_index].3.as_reg() { 1473 output_cell!("{:?}", Reg::from(reg)); 1474 } else { 1475 output_cell!("Stk"); 1476 } 1477 } else { 1478 // Not live at this instruction. 1479 output_cell!(""); 1480 } 1481 } 1482 } 1483 } 1484 row.push('|'); 1485 1486 if mode == Mode::Emit { 1487 trace!("{}", row.as_str()); 1488 } 1489 }; 1490 1491 for block_index in 0..self.num_blocks() { 1492 for inst_index in self.block_ranges.get(block_index) { 1493 output_row(Row::Inst(block_index, inst_index), Mode::Measure); 1494 } 1495 } 1496 output_row(Row::Head, Mode::Measure); 1497 1498 output_row(Row::Head, Mode::Emit); 1499 output_row(Row::Line, Mode::Emit); 1500 for block_index in 0..self.num_blocks() { 1501 for inst_index in self.block_ranges.get(block_index) { 1502 output_row(Row::Inst(block_index, inst_index), Mode::Emit); 1503 } 1504 } 1505 } 1506 1507 /// Get the IR block for a BlockIndex, if one exists. bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block>1508 pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> { 1509 self.block_order.lowered_order()[block.index()].orig_block() 1510 } 1511 1512 /// Get the user stack map associated with the given forward instruction index. get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap>1513 pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> { 1514 let index = inst.to_backwards_insn_index(self.num_insts()); 1515 self.user_stack_maps.get(&index) 1516 } 1517 } 1518 1519 impl<I: VCodeInst> core::ops::Index<InsnIndex> for VCode<I> { 1520 type Output = I; index(&self, idx: InsnIndex) -> &Self::Output1521 fn index(&self, idx: InsnIndex) -> &Self::Output { 1522 &self.insts[idx.index()] 1523 } 1524 } 1525 1526 impl<I: VCodeInst> RegallocFunction for VCode<I> { num_insts(&self) -> usize1527 fn num_insts(&self) -> usize { 1528 self.insts.len() 1529 } 1530 num_blocks(&self) -> usize1531 fn num_blocks(&self) -> usize { 1532 self.block_ranges.len() 1533 } 1534 entry_block(&self) -> BlockIndex1535 fn entry_block(&self) -> BlockIndex { 1536 self.entry 1537 } 1538 block_insns(&self, block: BlockIndex) -> InstRange1539 fn block_insns(&self, block: BlockIndex) -> InstRange { 1540 let range = self.block_ranges.get(block.index()); 1541 InstRange::new(InsnIndex::new(range.start), InsnIndex::new(range.end)) 1542 } 1543 block_succs(&self, block: BlockIndex) -> &[BlockIndex]1544 fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] { 1545 let range = self.block_succ_range.get(block.index()); 1546 &self.block_succs[range] 1547 } 1548 block_preds(&self, block: BlockIndex) -> &[BlockIndex]1549 fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] { 1550 let range = self.block_pred_range.get(block.index()); 1551 &self.block_preds[range] 1552 } 1553 block_params(&self, block: BlockIndex) -> &[VReg]1554 fn block_params(&self, block: BlockIndex) -> &[VReg] { 1555 // As a special case we don't return block params for the entry block, as all the arguments 1556 // will be defined by the `Inst::Args` instruction. 1557 if block == self.entry { 1558 return &[]; 1559 } 1560 1561 let range = self.block_params_range.get(block.index()); 1562 &self.block_params[range] 1563 } 1564 branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg]1565 fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] { 1566 let succ_range = self.branch_block_arg_succ_range.get(block.index()); 1567 debug_assert!(succ_idx < succ_range.len()); 1568 let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx); 1569 &self.branch_block_args[branch_block_args] 1570 } 1571 is_ret(&self, insn: InsnIndex) -> bool1572 fn is_ret(&self, insn: InsnIndex) -> bool { 1573 match self.insts[insn.index()].is_term() { 1574 // We treat blocks terminated by an unconditional trap like a return for regalloc. 1575 MachTerminator::None => self.insts[insn.index()].is_trap(), 1576 MachTerminator::Ret | MachTerminator::RetCall => true, 1577 MachTerminator::Branch => false, 1578 } 1579 } 1580 is_branch(&self, insn: InsnIndex) -> bool1581 fn is_branch(&self, insn: InsnIndex) -> bool { 1582 match self.insts[insn.index()].is_term() { 1583 MachTerminator::Branch => true, 1584 _ => false, 1585 } 1586 } 1587 inst_operands(&self, insn: InsnIndex) -> &[Operand]1588 fn inst_operands(&self, insn: InsnIndex) -> &[Operand] { 1589 let range = self.operand_ranges.get(insn.index()); 1590 &self.operands[range] 1591 } 1592 inst_clobbers(&self, insn: InsnIndex) -> PRegSet1593 fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet { 1594 self.clobbers.get(&insn).cloned().unwrap_or_default() 1595 } 1596 num_vregs(&self) -> usize1597 fn num_vregs(&self) -> usize { 1598 self.vreg_types.len() 1599 } 1600 debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)]1601 fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] { 1602 &self.debug_value_labels 1603 } 1604 spillslot_size(&self, regclass: RegClass) -> usize1605 fn spillslot_size(&self, regclass: RegClass) -> usize { 1606 self.abi.get_spillslot_size(regclass) as usize 1607 } 1608 allow_multiple_vreg_defs(&self) -> bool1609 fn allow_multiple_vreg_defs(&self) -> bool { 1610 // At least the s390x backend requires this, because the 1611 // `Loop` pseudo-instruction aggregates all Operands so pinned 1612 // vregs (RealRegs) may occur more than once. 1613 true 1614 } 1615 } 1616 1617 impl<I: VCodeInst> Debug for VRegAllocator<I> { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1618 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 1619 writeln!(f, "VRegAllocator {{")?; 1620 1621 let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>(); 1622 alias_keys.sort_unstable(); 1623 for key in alias_keys { 1624 let dest = self.vreg_aliases.get(&key).unwrap(); 1625 writeln!(f, " {:?} := {:?}", Reg::from(key), Reg::from(*dest))?; 1626 } 1627 1628 writeln!(f, "}}") 1629 } 1630 } 1631 1632 impl<I: VCodeInst> fmt::Debug for VCode<I> { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1633 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 1634 writeln!(f, "VCode {{")?; 1635 writeln!(f, " Entry block: {}", self.entry.index())?; 1636 1637 let mut state = Default::default(); 1638 1639 for block in 0..self.num_blocks() { 1640 let block = BlockIndex::new(block); 1641 writeln!( 1642 f, 1643 "Block {}({:?}):", 1644 block.index(), 1645 self.block_params(block) 1646 )?; 1647 if let Some(bb) = self.bindex_to_bb(block) { 1648 writeln!(f, " (original IR block: {bb})")?; 1649 } 1650 for (succ_idx, succ) in self.block_succs(block).iter().enumerate() { 1651 writeln!( 1652 f, 1653 " (successor: Block {}({:?}))", 1654 succ.index(), 1655 self.branch_blockparams(block, InsnIndex::new(0) /* dummy */, succ_idx) 1656 )?; 1657 } 1658 for inst in self.block_ranges.get(block.index()) { 1659 writeln!( 1660 f, 1661 " Inst {}: {}", 1662 inst, 1663 self.insts[inst].pretty_print_inst(&mut state) 1664 )?; 1665 if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) { 1666 writeln!(f, " {user_stack_map:?}")?; 1667 } 1668 } 1669 } 1670 1671 writeln!(f, "}}")?; 1672 Ok(()) 1673 } 1674 } 1675 1676 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder. 1677 pub struct VRegAllocator<I> { 1678 /// VReg IR-level types. 1679 vreg_types: Vec<Type>, 1680 1681 /// VReg aliases. When the final VCode is built we rewrite all 1682 /// uses of the keys in this table to their replacement values. 1683 /// 1684 /// We use these aliases to rename an instruction's expected 1685 /// result vregs to the returned vregs from lowering, which are 1686 /// usually freshly-allocated temps. 1687 vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>, 1688 1689 /// A deferred error, to be bubbled up to the top level of the 1690 /// lowering algorithm. We take this approach because we cannot 1691 /// currently propagate a `Result` upward through ISLE code (the 1692 /// lowering rules) or some ABI code. 1693 deferred_error: Option<CodegenError>, 1694 1695 /// The type of instruction that this allocator makes registers for. 1696 _inst: core::marker::PhantomData<I>, 1697 } 1698 1699 impl<I: VCodeInst> VRegAllocator<I> { 1700 /// Make a new VRegAllocator. with_capacity(capacity: usize) -> Self1701 pub fn with_capacity(capacity: usize) -> Self { 1702 let capacity = first_user_vreg_index() + capacity; 1703 let mut vreg_types = Vec::with_capacity(capacity); 1704 vreg_types.resize(first_user_vreg_index(), types::INVALID); 1705 Self { 1706 vreg_types, 1707 vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()), 1708 deferred_error: None, 1709 _inst: core::marker::PhantomData::default(), 1710 } 1711 } 1712 1713 /// Allocate a fresh ValueRegs. alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>>1714 pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> { 1715 if self.deferred_error.is_some() { 1716 return Err(CodegenError::CodeTooLarge); 1717 } 1718 let v = self.vreg_types.len(); 1719 let (regclasses, tys) = I::rc_for_type(ty)?; 1720 1721 // Check that new indices are in-bounds for regalloc2's 1722 // VReg/Operand representation. 1723 if v + regclasses.len() > VReg::MAX { 1724 return Err(CodegenError::CodeTooLarge); 1725 } 1726 1727 // Check that new indices are in-bounds for our Reg 1728 // bit-packing on top of the RA2 types, which represents 1729 // spillslots as well. 1730 let check = |vreg: regalloc2::VReg| -> CodegenResult<Reg> { 1731 Reg::from_virtual_reg_checked(vreg).ok_or(CodegenError::CodeTooLarge) 1732 }; 1733 1734 let regs: ValueRegs<Reg> = match regclasses { 1735 &[rc0] => ValueRegs::one(check(VReg::new(v, rc0))?), 1736 &[rc0, rc1] => ValueRegs::two(check(VReg::new(v, rc0))?, check(VReg::new(v + 1, rc1))?), 1737 // We can extend this if/when we support 32-bit targets; e.g., 1738 // an i128 on a 32-bit machine will need up to four machine regs 1739 // for a `Value`. 1740 _ => panic!("Value must reside in 1 or 2 registers"), 1741 }; 1742 for (®_ty, ®) in tys.iter().zip(regs.regs().iter()) { 1743 let vreg = reg.to_virtual_reg().unwrap(); 1744 debug_assert_eq!(self.vreg_types.len(), vreg.index()); 1745 self.vreg_types.push(reg_ty); 1746 } 1747 1748 Ok(regs) 1749 } 1750 1751 /// Allocate a fresh ValueRegs, deferring any out-of-vregs 1752 /// errors. This is useful in places where we cannot bubble a 1753 /// `CodegenResult` upward easily, and which are known to be 1754 /// invoked from within the lowering loop that checks the deferred 1755 /// error status below. alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg>1756 pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> { 1757 match self.alloc(ty) { 1758 Ok(x) => x, 1759 Err(e) => { 1760 self.deferred_error = Some(e); 1761 self.bogus_for_deferred_error(ty) 1762 } 1763 } 1764 } 1765 1766 /// Take any deferred error that was accumulated by `alloc_with_deferred_error`. take_deferred_error(&mut self) -> Option<CodegenError>1767 pub fn take_deferred_error(&mut self) -> Option<CodegenError> { 1768 self.deferred_error.take() 1769 } 1770 1771 /// Produce an bogus VReg placeholder with the proper number of 1772 /// registers for the given type. This is meant to be used with 1773 /// deferred allocation errors (see `Lower::alloc_tmp()`). bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg>1774 fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> { 1775 let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type"); 1776 match regclasses { 1777 &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()), 1778 &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()), 1779 _ => panic!("Value must reside in 1 or 2 registers"), 1780 } 1781 } 1782 1783 /// Rewrite any mention of `from` into `to`. set_vreg_alias(&mut self, from: Reg, to: Reg)1784 pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) { 1785 let from = from.into(); 1786 let resolved_to = self.resolve_vreg_alias(to.into()); 1787 // Disallow cycles (see below). 1788 assert_ne!(resolved_to, from); 1789 1790 let old_alias = self.vreg_aliases.insert(from, resolved_to); 1791 debug_assert_eq!(old_alias, None); 1792 } 1793 resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg1794 fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg { 1795 // We prevent cycles from existing by resolving targets of 1796 // aliases eagerly before setting them. If the target resolves 1797 // to the origin of the alias, then a cycle would be created 1798 // and the alias is disallowed. Because of the structure of 1799 // SSA code (one instruction can refer to another's defs but 1800 // not vice-versa, except indirectly through 1801 // phis/blockparams), cycles should not occur as we use 1802 // aliases to redirect vregs to the temps that actually define 1803 // them. 1804 while let Some(to) = self.vreg_aliases.get(&vreg) { 1805 vreg = *to; 1806 } 1807 vreg 1808 } 1809 1810 #[inline] debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>)1811 fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) { 1812 debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg))); 1813 } 1814 } 1815 1816 /// This structure tracks the large constants used in VCode that will be emitted separately by the 1817 /// [MachBuffer]. 1818 /// 1819 /// First, during the lowering phase, constants are inserted using 1820 /// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are 1821 /// used in this phase. Some deduplication is performed, when possible, as constant 1822 /// values are inserted. 1823 /// 1824 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the 1825 /// constants so that instructions can refer to the value's memory location. The [MachBuffer] 1826 /// then writes the constant values to the buffer. 1827 #[derive(Default)] 1828 pub struct VCodeConstants { 1829 constants: PrimaryMap<VCodeConstant, VCodeConstantData>, 1830 pool_uses: HashMap<Constant, VCodeConstant>, 1831 well_known_uses: HashMap<*const [u8], VCodeConstant>, 1832 u64s: HashMap<[u8; 8], VCodeConstant>, 1833 } 1834 impl VCodeConstants { 1835 /// Initialize the structure with the expected number of constants. with_capacity(expected_num_constants: usize) -> Self1836 pub fn with_capacity(expected_num_constants: usize) -> Self { 1837 Self { 1838 constants: PrimaryMap::with_capacity(expected_num_constants), 1839 pool_uses: HashMap::with_capacity(expected_num_constants), 1840 well_known_uses: HashMap::new(), 1841 u64s: HashMap::new(), 1842 } 1843 } 1844 1845 /// Insert a constant; using this method indicates that a constant value will be used and thus 1846 /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants 1847 /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not 1848 /// [VCodeConstantData::Generated]. insert(&mut self, data: VCodeConstantData) -> VCodeConstant1849 pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant { 1850 match data { 1851 VCodeConstantData::Generated(_) => self.constants.push(data), 1852 VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) { 1853 None => { 1854 let vcode_constant = self.constants.push(data); 1855 self.pool_uses.insert(constant, vcode_constant); 1856 vcode_constant 1857 } 1858 Some(&vcode_constant) => vcode_constant, 1859 }, 1860 VCodeConstantData::WellKnown(data_ref) => { 1861 match self.well_known_uses.entry(data_ref as *const [u8]) { 1862 Entry::Vacant(v) => { 1863 let vcode_constant = self.constants.push(data); 1864 v.insert(vcode_constant); 1865 vcode_constant 1866 } 1867 Entry::Occupied(o) => *o.get(), 1868 } 1869 } 1870 VCodeConstantData::U64(value) => match self.u64s.entry(value) { 1871 Entry::Vacant(v) => { 1872 let vcode_constant = self.constants.push(data); 1873 v.insert(vcode_constant); 1874 vcode_constant 1875 } 1876 Entry::Occupied(o) => *o.get(), 1877 }, 1878 } 1879 } 1880 1881 /// Return the number of constants inserted. len(&self) -> usize1882 pub fn len(&self) -> usize { 1883 self.constants.len() 1884 } 1885 1886 /// Iterate over the `VCodeConstant` keys inserted in this structure. keys(&self) -> Keys<VCodeConstant>1887 pub fn keys(&self) -> Keys<VCodeConstant> { 1888 self.constants.keys() 1889 } 1890 1891 /// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this 1892 /// structure. iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)>1893 pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> { 1894 self.constants.iter() 1895 } 1896 1897 /// Returns the data associated with the specified constant. get(&self, c: VCodeConstant) -> &VCodeConstantData1898 pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData { 1899 &self.constants[c] 1900 } 1901 1902 /// Checks if the given [VCodeConstantData] is registered as 1903 /// used by the pool. pool_uses(&self, constant: &VCodeConstantData) -> bool1904 pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool { 1905 match constant { 1906 VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c), 1907 _ => false, 1908 } 1909 } 1910 } 1911 1912 /// A use of a constant by one or more VCode instructions; see [VCodeConstants]. 1913 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 1914 pub struct VCodeConstant(u32); 1915 entity_impl!(VCodeConstant); 1916 1917 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking 1918 /// these separately instead of as raw byte buffers allows us to avoid some duplication. 1919 pub enum VCodeConstantData { 1920 /// A constant already present in the Cranelift IR 1921 /// [ConstantPool](crate::ir::constant::ConstantPool). 1922 Pool(Constant, ConstantData), 1923 /// A reference to a well-known constant value that is statically encoded within the compiler. 1924 WellKnown(&'static [u8]), 1925 /// A constant value generated during lowering; the value may depend on the instruction context 1926 /// which makes it difficult to de-duplicate--if possible, use other variants. 1927 Generated(ConstantData), 1928 /// A constant of at most 64 bits. These are deduplicated as 1929 /// well. Stored as a fixed-size array of `u8` so that we do not 1930 /// encounter endianness problems when cross-compiling. 1931 U64([u8; 8]), 1932 } 1933 impl VCodeConstantData { 1934 /// Retrieve the constant data as a byte slice. as_slice(&self) -> &[u8]1935 pub fn as_slice(&self) -> &[u8] { 1936 match self { 1937 VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(), 1938 VCodeConstantData::WellKnown(d) => d, 1939 VCodeConstantData::U64(value) => &value[..], 1940 } 1941 } 1942 1943 /// Calculate the alignment of the constant data. alignment(&self) -> u321944 pub fn alignment(&self) -> u32 { 1945 if self.as_slice().len() <= 8 { 8 } else { 16 } 1946 } 1947 } 1948 1949 #[cfg(test)] 1950 mod test { 1951 use super::*; 1952 use core::mem::size_of; 1953 1954 #[test] size_of_constant_structs()1955 fn size_of_constant_structs() { 1956 assert_eq!(size_of::<Constant>(), 4); 1957 assert_eq!(size_of::<VCodeConstant>(), 4); 1958 assert_eq!(size_of::<ConstantData>(), 3 * size_of::<usize>()); 1959 assert_eq!(size_of::<VCodeConstantData>(), 4 * size_of::<usize>()); 1960 assert_eq!( 1961 size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(), 1962 3 * size_of::<usize>() 1963 ); 1964 // TODO The VCodeConstants structure's memory size could be further optimized. 1965 // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at 1966 // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes. 1967 } 1968 } 1969