1 //! This implements the VCode container: a CFG of Insts that have been lowered. 2 //! 3 //! VCode is virtual-register code. An instruction in VCode is almost a machine 4 //! instruction; however, its register slots can refer to virtual registers in 5 //! addition to real machine registers. 6 //! 7 //! VCode is structured with traditional basic blocks, and 8 //! each block must be terminated by an unconditional branch (one target), a 9 //! conditional branch (two targets), or a return (no targets). Note that this 10 //! slightly differs from the machine code of most ISAs: in most ISAs, a 11 //! conditional branch has one target (and the not-taken case falls through). 12 //! However, we expect that machine backends will elide branches to the following 13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond / 14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to 15 //! play with layout prior to final binary emission, as well, if we want. 16 //! 17 //! See the main module comment in `mod.rs` for more details on the VCode-based 18 //! backend pipeline. 19 20 use crate::CodegenError; 21 use crate::FxHashMap; 22 use crate::ir::pcc::*; 23 use crate::ir::{self, Constant, ConstantData, ValueLabel, types}; 24 use crate::ranges::Ranges; 25 use crate::timing; 26 use crate::trace; 27 use crate::{LabelValueLoc, ValueLocRange}; 28 use crate::{machinst::*, trace_log_enabled}; 29 use regalloc2::{ 30 Edit, Function as RegallocFunction, InstOrEdit, InstPosition, InstRange, Operand, 31 OperandConstraint, OperandKind, PRegSet, ProgPoint, RegClass, 32 }; 33 34 use crate::HashMap; 35 use crate::hash_map::Entry; 36 use core::cmp::Ordering; 37 use core::fmt::{self, Write}; 38 use core::mem::take; 39 use core::ops::Range; 40 use cranelift_entity::{Keys, entity_impl}; 41 42 /// Index referring to an instruction in VCode. 43 pub type InsnIndex = regalloc2::Inst; 44 45 /// Extension trait for `InsnIndex` to allow conversion to a 46 /// `BackwardsInsnIndex`. 47 trait ToBackwardsInsnIndex { 48 fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex; 49 } 50 51 impl ToBackwardsInsnIndex for InsnIndex { 52 fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex { 53 BackwardsInsnIndex::new(num_insts - self.index() - 1) 54 } 55 } 56 57 /// An index referring to an instruction in the VCode when it is backwards, 58 /// during VCode construction. 59 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] 60 #[cfg_attr( 61 feature = "enable-serde", 62 derive(::serde::Serialize, ::serde::Deserialize) 63 )] 64 pub struct BackwardsInsnIndex(InsnIndex); 65 66 impl BackwardsInsnIndex { 67 pub fn new(i: usize) -> Self { 68 BackwardsInsnIndex(InsnIndex::new(i)) 69 } 70 } 71 72 /// Index referring to a basic block in VCode. 73 pub type BlockIndex = regalloc2::Block; 74 75 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be 76 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`. 77 pub trait VCodeInst: MachInst + MachInstEmit {} 78 impl<I: MachInst + MachInstEmit> VCodeInst for I {} 79 80 /// A function in "VCode" (virtualized-register code) form, after 81 /// lowering. This is essentially a standard CFG of basic blocks, 82 /// where each basic block consists of lowered instructions produced 83 /// by the machine-specific backend. 84 /// 85 /// Note that the VCode is immutable once produced, and is not 86 /// modified by register allocation in particular. Rather, register 87 /// allocation on the `VCode` produces a separate `regalloc2::Output` 88 /// struct, and this can be passed to `emit`. `emit` in turn does not 89 /// modify the vcode, but produces an `EmitResult`, which contains the 90 /// machine code itself, and the associated disassembly and/or 91 /// metadata as requested. 92 pub struct VCode<I: VCodeInst> { 93 /// VReg IR-level types. 94 vreg_types: Vec<Type>, 95 96 /// Lowered machine instructions in order corresponding to the original IR. 97 insts: Vec<I>, 98 99 /// A map from backwards instruction index to the user stack map for that 100 /// instruction. 101 /// 102 /// This is a sparse side table that only has entries for instructions that 103 /// are safepoints, and only for a subset of those that have an associated 104 /// user stack map. 105 user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>, 106 107 /// A map from backwards instruction index to the debug tags for 108 /// that instruction. Each entry indexes a range in the 109 /// `debug_tag_pool`. 110 debug_tags: FxHashMap<BackwardsInsnIndex, Range<u32>>, 111 112 /// Pooled storage for sequences of debug tags; indexed by entries 113 /// in `debug_tags`. 114 debug_tag_pool: Vec<ir::DebugTag>, 115 116 /// Operands: pre-regalloc references to virtual registers with 117 /// constraints, in one flattened array. This allows the regalloc 118 /// to efficiently access all operands without requiring expensive 119 /// matches or method invocations on insts. 120 operands: Vec<Operand>, 121 122 /// Operand index ranges: for each instruction in `insts`, there 123 /// is a tuple here providing the range in `operands` for that 124 /// instruction's operands. 125 operand_ranges: Ranges, 126 127 /// Clobbers: a sparse map from instruction indices to clobber masks. 128 clobbers: FxHashMap<InsnIndex, PRegSet>, 129 130 /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is 131 /// reasonable to keep one of these per instruction.) 132 srclocs: Vec<RelSourceLoc>, 133 134 /// Entry block. 135 entry: BlockIndex, 136 137 /// Block instruction indices. 138 block_ranges: Ranges, 139 140 /// Block successors: index range in the `block_succs` list. 141 block_succ_range: Ranges, 142 143 /// Block successor lists, concatenated into one vec. The 144 /// `block_succ_range` list of tuples above gives (start, end) 145 /// ranges within this list that correspond to each basic block's 146 /// successors. 147 block_succs: Vec<regalloc2::Block>, 148 149 /// Block predecessors: index range in the `block_preds` list. 150 block_pred_range: Ranges, 151 152 /// Block predecessor lists, concatenated into one vec. The 153 /// `block_pred_range` list of tuples above gives (start, end) 154 /// ranges within this list that correspond to each basic block's 155 /// predecessors. 156 block_preds: Vec<regalloc2::Block>, 157 158 /// Block parameters: index range in `block_params` below. 159 block_params_range: Ranges, 160 161 /// Block parameter lists, concatenated into one vec. The 162 /// `block_params_range` list of tuples above gives (start, end) 163 /// ranges within this list that correspond to each basic block's 164 /// blockparam vregs. 165 block_params: Vec<regalloc2::VReg>, 166 167 /// Outgoing block arguments on branch instructions, concatenated 168 /// into one list. 169 /// 170 /// Note that this is conceptually a 3D array: we have a VReg list 171 /// per block, per successor. We flatten those three dimensions 172 /// into this 1D vec, then store index ranges in two levels of 173 /// indirection. 174 /// 175 /// Indexed by the indices in `branch_block_arg_succ_range`. 176 branch_block_args: Vec<regalloc2::VReg>, 177 178 /// Array of sequences of (start, end) tuples in 179 /// `branch_block_args`, one for each successor; these sequences 180 /// for each block are concatenated. 181 /// 182 /// Indexed by the indices in `branch_block_arg_succ_range`. 183 branch_block_arg_range: Ranges, 184 185 /// For a given block, indices in `branch_block_arg_range` 186 /// corresponding to all of its successors. 187 branch_block_arg_succ_range: Ranges, 188 189 /// Block-order information. 190 block_order: BlockLoweringOrder, 191 192 /// ABI object. 193 pub(crate) abi: Callee<I::ABIMachineSpec>, 194 195 /// Constant information used during code emission. This should be 196 /// immutable across function compilations within the same module. 197 emit_info: I::Info, 198 199 /// Constants. 200 pub(crate) constants: VCodeConstants, 201 202 /// Value labels for debuginfo attached to vregs. 203 debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>, 204 205 pub(crate) sigs: SigSet, 206 207 /// Facts on VRegs, for proof-carrying code verification. 208 facts: Vec<Option<Fact>>, 209 210 log2_min_function_alignment: u8, 211 } 212 213 /// The result of `VCode::emit`. Contains all information computed 214 /// during emission: actual machine code, optionally a disassembly, 215 /// and optionally metadata about the code layout. 216 pub struct EmitResult { 217 /// The MachBuffer containing the machine code. 218 pub buffer: MachBufferFinalized<Stencil>, 219 220 /// Offset of each basic block, recorded during emission. Computed 221 /// only if `machine_code_cfg_info` is enabled. 222 pub bb_offsets: Vec<CodeOffset>, 223 224 /// Final basic-block edges, in terms of code offsets of 225 /// bb-starts. Computed only if `machine_code_cfg_info` is enabled. 226 pub bb_edges: Vec<(CodeOffset, CodeOffset)>, 227 228 /// The pretty-printed disassembly, if any. This uses the same 229 /// pretty-printing for MachInsts as the pre-regalloc VCode Debug 230 /// implementation, but additionally includes the prologue and 231 /// epilogue(s), and makes use of the regalloc results. 232 pub disasm: Option<String>, 233 234 /// Value-labels information (debug metadata). 235 pub value_labels_ranges: ValueLabelsRanges, 236 } 237 238 /// A builder for a VCode function body. 239 /// 240 /// This builder has the ability to accept instructions in either 241 /// forward or reverse order, depending on the pass direction that 242 /// produces the VCode. The lowering from CLIF to VCode<MachInst> 243 /// ordinarily occurs in reverse order (in order to allow instructions 244 /// to be lowered only if used, and not merged) so a reversal will 245 /// occur at the end of lowering to ensure the VCode is in machine 246 /// order. 247 /// 248 /// If built in reverse, block and instruction indices used once the 249 /// VCode is built are relative to the final (reversed) order, not the 250 /// order of construction. Note that this means we do not know the 251 /// final block or instruction indices when building, so we do not 252 /// hand them out. (The user is assumed to know them when appending 253 /// terminator instructions with successor blocks.) 254 pub struct VCodeBuilder<I: VCodeInst> { 255 /// In-progress VCode. 256 pub(crate) vcode: VCode<I>, 257 258 /// In what direction is the build occurring? 259 direction: VCodeBuildDirection, 260 261 /// Debug-value label in-progress map, keyed by label. For each 262 /// label, we keep disjoint ranges mapping to vregs. We'll flatten 263 /// this into (vreg, range, label) tuples when done. 264 debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>, 265 } 266 267 /// Direction in which a VCodeBuilder builds VCode. 268 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 269 pub enum VCodeBuildDirection { 270 // TODO: add `Forward` once we need it and can test it adequately. 271 /// Backward-build pass: we expect the producer to call `emit()` 272 /// with instructions in reverse program order within each block. 273 Backward, 274 } 275 276 impl<I: VCodeInst> VCodeBuilder<I> { 277 /// Create a new VCodeBuilder. 278 pub fn new( 279 sigs: SigSet, 280 abi: Callee<I::ABIMachineSpec>, 281 emit_info: I::Info, 282 block_order: BlockLoweringOrder, 283 constants: VCodeConstants, 284 direction: VCodeBuildDirection, 285 log2_min_function_alignment: u8, 286 ) -> Self { 287 let vcode = VCode::new( 288 sigs, 289 abi, 290 emit_info, 291 block_order, 292 constants, 293 log2_min_function_alignment, 294 ); 295 296 VCodeBuilder { 297 vcode, 298 direction, 299 debug_info: FxHashMap::default(), 300 } 301 } 302 303 pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> { 304 self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs) 305 } 306 307 /// Access the ABI object. 308 pub fn abi(&self) -> &Callee<I::ABIMachineSpec> { 309 &self.vcode.abi 310 } 311 312 /// Access the ABI object. 313 pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> { 314 &mut self.vcode.abi 315 } 316 317 pub fn sigs(&self) -> &SigSet { 318 &self.vcode.sigs 319 } 320 321 pub fn sigs_mut(&mut self) -> &mut SigSet { 322 &mut self.vcode.sigs 323 } 324 325 /// Access to the BlockLoweringOrder object. 326 pub fn block_order(&self) -> &BlockLoweringOrder { 327 &self.vcode.block_order 328 } 329 330 /// Set the current block as the entry block. 331 pub fn set_entry(&mut self, block: BlockIndex) { 332 self.vcode.entry = block; 333 } 334 335 /// End the current basic block. Must be called after emitting vcode insts 336 /// for IR insts and prior to ending the function (building the VCode). 337 pub fn end_bb(&mut self) { 338 let end_idx = self.vcode.insts.len(); 339 // Add the instruction index range to the list of blocks. 340 self.vcode.block_ranges.push_end(end_idx); 341 // End the successors list. 342 let succ_end = self.vcode.block_succs.len(); 343 self.vcode.block_succ_range.push_end(succ_end); 344 // End the blockparams list. 345 let block_params_end = self.vcode.block_params.len(); 346 self.vcode.block_params_range.push_end(block_params_end); 347 // End the branch blockparam args list. 348 let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len(); 349 self.vcode 350 .branch_block_arg_succ_range 351 .push_end(branch_block_arg_succ_end); 352 } 353 354 pub fn add_block_param(&mut self, param: VirtualReg) { 355 self.vcode.block_params.push(param.into()); 356 } 357 358 fn add_branch_args_for_succ(&mut self, args: &[Reg]) { 359 self.vcode 360 .branch_block_args 361 .extend(args.iter().map(|&arg| VReg::from(arg))); 362 let end = self.vcode.branch_block_args.len(); 363 self.vcode.branch_block_arg_range.push_end(end); 364 } 365 366 /// Push an instruction for the current BB and current IR inst 367 /// within the BB. 368 pub fn push(&mut self, insn: I, loc: RelSourceLoc) { 369 assert!(!insn.is_low_level_branch()); // These are not meant to be in VCode. 370 self.vcode.insts.push(insn); 371 self.vcode.srclocs.push(loc); 372 } 373 374 /// Add a successor block with branch args. 375 pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) { 376 self.vcode.block_succs.push(block); 377 self.add_branch_args_for_succ(args); 378 } 379 380 /// Add a debug value label to a register. 381 pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) { 382 // 1) In the reversed order, we consider the instructions 383 // that define ranges in the "debug_info" array to refer 384 // to the IP **after** them (when reversed): 385 // IP[2]__| Inst 3 | 386 // IP[1]__| Inst 2 | 387 // IP[0]__| Inst 1 | 388 // | Inst 0 | 389 // This is so that we can represent IP[<function start>], 390 // done at the cost of not being to represent IP[<function end>], 391 // which is OK since no values will be live at that point. 392 // 2) The live range for "reg" begins at the current IP 393 // and continues until the next, in execution order, 394 // VReg that defines "label". Since the ranges are open 395 // at the end, the subtraction of 1 cancels out: 396 // [last..current IP] <=> 397 // [last..last emitted inst index] <=> 398 // [last..next_inst_index - 1] <=> 399 // [last..next_inst_index) 400 // 401 let next_inst_index = self.vcode.insts.len(); 402 if next_inst_index == 0 { 403 // This would produce a defective [0..0) range. 404 return; 405 } 406 let next_inst = InsnIndex::new(next_inst_index); 407 let labels = self.debug_info.entry(label).or_insert_with(|| vec![]); 408 let last = labels 409 .last() 410 .map(|(_start, end, _vreg)| *end) 411 .unwrap_or(InsnIndex::new(0)); 412 labels.push((last, next_inst, reg.into())); 413 } 414 415 /// Access the constants. 416 pub fn constants(&mut self) -> &mut VCodeConstants { 417 &mut self.vcode.constants 418 } 419 420 fn compute_preds_from_succs(&mut self) { 421 // Do a linear-time counting sort: first determine how many 422 // times each block appears as a successor. 423 let mut starts = vec![0u32; self.vcode.num_blocks()]; 424 for succ in &self.vcode.block_succs { 425 starts[succ.index()] += 1; 426 } 427 428 // Determine for each block the starting index where that 429 // block's predecessors should go. This is equivalent to the 430 // ranges we need to store in block_pred_range. 431 self.vcode.block_pred_range.reserve(starts.len()); 432 let mut end = 0; 433 for count in starts.iter_mut() { 434 let start = end; 435 end += *count; 436 *count = start; 437 self.vcode.block_pred_range.push_end(end as usize); 438 } 439 let end = end as usize; 440 debug_assert_eq!(end, self.vcode.block_succs.len()); 441 442 // Walk over the successors again, this time grouped by 443 // predecessor, and push the predecessor at the current 444 // starting position of each of its successors. We build 445 // each group of predecessors in whatever order Ranges::iter 446 // returns them; regalloc2 doesn't care. 447 self.vcode.block_preds.resize(end, BlockIndex::invalid()); 448 for (pred, range) in self.vcode.block_succ_range.iter() { 449 let pred = BlockIndex::new(pred); 450 for succ in &self.vcode.block_succs[range] { 451 let pos = &mut starts[succ.index()]; 452 self.vcode.block_preds[*pos as usize] = pred; 453 *pos += 1; 454 } 455 } 456 debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid())); 457 } 458 459 /// Called once, when a build in Backward order is complete, to 460 /// perform the overall reversal (into final forward order) and 461 /// finalize metadata accordingly. 462 fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) { 463 let n_insts = self.vcode.insts.len(); 464 if n_insts == 0 { 465 return; 466 } 467 468 // Reverse the per-block and per-inst sequences. 469 self.vcode.block_ranges.reverse_index(); 470 self.vcode.block_ranges.reverse_target(n_insts); 471 // block_params_range is indexed by block (and blocks were 472 // traversed in reverse) so we reverse it; but block-param 473 // sequences in the concatenated vec can remain in reverse 474 // order (it is effectively an arena of arbitrarily-placed 475 // referenced sequences). 476 self.vcode.block_params_range.reverse_index(); 477 // Likewise, we reverse block_succ_range, but the block_succ 478 // concatenated array can remain as-is. 479 self.vcode.block_succ_range.reverse_index(); 480 self.vcode.insts.reverse(); 481 self.vcode.srclocs.reverse(); 482 // Likewise, branch_block_arg_succ_range is indexed by block 483 // so must be reversed. 484 self.vcode.branch_block_arg_succ_range.reverse_index(); 485 486 // To translate an instruction index *endpoint* in reversed 487 // order to forward order, compute `n_insts - i`. 488 // 489 // Why not `n_insts - 1 - i`? That would be correct to 490 // translate an individual instruction index (for ten insts 0 491 // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes 492 // 0). But for the usual inclusive-start, exclusive-end range 493 // idiom, inclusive starts become exclusive ends and 494 // vice-versa, so e.g. an (inclusive) start of 0 becomes an 495 // (exclusive) end of 10. 496 let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index()); 497 498 // Generate debug-value labels based on per-label maps. 499 for (label, tuples) in &self.debug_info { 500 for &(start, end, vreg) in tuples { 501 let vreg = vregs.resolve_vreg_alias(vreg); 502 let fwd_start = translate(end); 503 let fwd_end = translate(start); 504 self.vcode 505 .debug_value_labels 506 .push((vreg, fwd_start, fwd_end, label.as_u32())); 507 } 508 } 509 510 // Now sort debug value labels by VReg, as required 511 // by regalloc2. 512 self.vcode 513 .debug_value_labels 514 .sort_unstable_by_key(|(vreg, _, _, _)| *vreg); 515 } 516 517 fn collect_operands(&mut self, vregs: &VRegAllocator<I>) { 518 let allocatable = PRegSet::from(self.vcode.abi.machine_env()); 519 for (i, insn) in self.vcode.insts.iter_mut().enumerate() { 520 // Push operands from the instruction onto the operand list. 521 // 522 // We rename through the vreg alias table as we collect 523 // the operands. This is better than a separate post-pass 524 // over operands, because it has more cache locality: 525 // operands only need to pass through L1 once. This is 526 // also better than renaming instructions' 527 // operands/registers while lowering, because here we only 528 // need to do the `match` over the instruction to visit 529 // its register fields (which is slow, branchy code) once. 530 531 let mut op_collector = 532 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| { 533 vregs.resolve_vreg_alias(vreg) 534 }); 535 insn.get_operands(&mut op_collector); 536 let (ops, clobbers) = op_collector.finish(); 537 self.vcode.operand_ranges.push_end(ops); 538 539 if clobbers != PRegSet::default() { 540 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers); 541 } 542 543 if let Some((dst, src)) = insn.is_move() { 544 // We should never see non-virtual registers present in move 545 // instructions. 546 assert!( 547 src.is_virtual(), 548 "the real register {src:?} was used as the source of a move instruction" 549 ); 550 assert!( 551 dst.to_reg().is_virtual(), 552 "the real register {:?} was used as the destination of a move instruction", 553 dst.to_reg() 554 ); 555 } 556 } 557 558 // Translate blockparam args via the vreg aliases table as well. 559 for arg in &mut self.vcode.branch_block_args { 560 let new_arg = vregs.resolve_vreg_alias(*arg); 561 trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg); 562 *arg = new_arg; 563 } 564 } 565 566 /// Build the final VCode. 567 pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> { 568 self.vcode.vreg_types = take(&mut vregs.vreg_types); 569 self.vcode.facts = take(&mut vregs.facts); 570 571 if self.direction == VCodeBuildDirection::Backward { 572 self.reverse_and_finalize(&vregs); 573 } 574 self.collect_operands(&vregs); 575 576 self.compute_preds_from_succs(); 577 self.vcode.debug_value_labels.sort_unstable(); 578 579 // At this point, nothing in the vcode should mention any 580 // VReg which has been aliased. All the appropriate rewriting 581 // should have happened above. Just to be sure, let's 582 // double-check each field which has vregs. 583 // Note: can't easily check vcode.insts, resolved in collect_operands. 584 // Operands are resolved in collect_operands. 585 vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg())); 586 // Currently block params are never aliased to another vreg. 587 vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied()); 588 // Branch block args are resolved in collect_operands. 589 vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied()); 590 // Debug value labels are resolved in reverse_and_finalize. 591 vregs.debug_assert_no_vreg_aliases( 592 self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg), 593 ); 594 // Facts are resolved eagerly during set_vreg_alias. 595 vregs.debug_assert_no_vreg_aliases( 596 self.vcode 597 .facts 598 .iter() 599 .zip(&vregs.vreg_types) 600 .enumerate() 601 .filter(|(_, (fact, _))| fact.is_some()) 602 .map(|(vreg, (_, &ty))| { 603 let (regclasses, _) = I::rc_for_type(ty).unwrap(); 604 VReg::new(vreg, regclasses[0]) 605 }), 606 ); 607 608 self.vcode 609 } 610 611 /// Add a user stack map for the associated instruction. 612 pub fn add_user_stack_map( 613 &mut self, 614 inst: BackwardsInsnIndex, 615 entries: &[ir::UserStackMapEntry], 616 ) { 617 let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets()); 618 let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map); 619 debug_assert!(old_entry.is_none()); 620 } 621 622 /// Add debug tags for the associated instruction. 623 pub fn add_debug_tags(&mut self, inst: BackwardsInsnIndex, entries: &[ir::DebugTag]) { 624 let start = u32::try_from(self.vcode.debug_tag_pool.len()).unwrap(); 625 self.vcode.debug_tag_pool.extend(entries.iter().cloned()); 626 let end = u32::try_from(self.vcode.debug_tag_pool.len()).unwrap(); 627 self.vcode.debug_tags.insert(inst, start..end); 628 } 629 } 630 631 const NO_INST_OFFSET: CodeOffset = u32::MAX; 632 633 impl<I: VCodeInst> VCode<I> { 634 /// New empty VCode. 635 fn new( 636 sigs: SigSet, 637 abi: Callee<I::ABIMachineSpec>, 638 emit_info: I::Info, 639 block_order: BlockLoweringOrder, 640 constants: VCodeConstants, 641 log2_min_function_alignment: u8, 642 ) -> Self { 643 let n_blocks = block_order.lowered_order().len(); 644 VCode { 645 sigs, 646 vreg_types: vec![], 647 insts: Vec::with_capacity(10 * n_blocks), 648 user_stack_maps: FxHashMap::default(), 649 debug_tags: FxHashMap::default(), 650 debug_tag_pool: vec![], 651 operands: Vec::with_capacity(30 * n_blocks), 652 operand_ranges: Ranges::with_capacity(10 * n_blocks), 653 clobbers: FxHashMap::default(), 654 srclocs: Vec::with_capacity(10 * n_blocks), 655 entry: BlockIndex::new(0), 656 block_ranges: Ranges::with_capacity(n_blocks), 657 block_succ_range: Ranges::with_capacity(n_blocks), 658 block_succs: Vec::with_capacity(n_blocks), 659 block_pred_range: Ranges::default(), 660 block_preds: Vec::new(), 661 block_params_range: Ranges::with_capacity(n_blocks), 662 block_params: Vec::with_capacity(5 * n_blocks), 663 branch_block_args: Vec::with_capacity(10 * n_blocks), 664 branch_block_arg_range: Ranges::with_capacity(2 * n_blocks), 665 branch_block_arg_succ_range: Ranges::with_capacity(n_blocks), 666 block_order, 667 abi, 668 emit_info, 669 constants, 670 debug_value_labels: vec![], 671 facts: vec![], 672 log2_min_function_alignment, 673 } 674 } 675 676 /// Get the number of blocks. Block indices will be in the range `0 .. 677 /// (self.num_blocks() - 1)`. 678 pub fn num_blocks(&self) -> usize { 679 self.block_ranges.len() 680 } 681 682 /// The number of lowered instructions. 683 pub fn num_insts(&self) -> usize { 684 self.insts.len() 685 } 686 687 fn compute_clobbers_and_function_calls( 688 &self, 689 regalloc: ®alloc2::Output, 690 ) -> (Vec<Writable<RealReg>>, FunctionCalls) { 691 let mut clobbered = PRegSet::default(); 692 let mut function_calls = FunctionCalls::None; 693 694 // All moves are included in clobbers. 695 for (_, Edit::Move { to, .. }) in ®alloc.edits { 696 if let Some(preg) = to.as_reg() { 697 clobbered.add(preg); 698 } 699 } 700 701 for (i, range) in self.operand_ranges.iter() { 702 let operands = &self.operands[range.clone()]; 703 let allocs = ®alloc.allocs[range]; 704 for (operand, alloc) in operands.iter().zip(allocs.iter()) { 705 if operand.kind() == OperandKind::Def { 706 if let Some(preg) = alloc.as_reg() { 707 clobbered.add(preg); 708 } 709 } 710 } 711 712 function_calls.update(self.insts[i].call_type()); 713 714 // Also add explicitly-clobbered registers. 715 // 716 // Skip merging this instruction's clobber list if not 717 // "included in clobbers" as per the MachInst. (Some 718 // backends use this to implement ABI specifics; e.g., 719 // excluding calls of the same ABI as the current function 720 // from clobbers, because by definition everything 721 // clobbered by the call can be clobbered by this function 722 // without saving as well. 723 // 724 // This is important for a particular optimization: when 725 // some registers are "half-clobbered", e.g. vector/float 726 // registers on aarch64, we want them to be seen as 727 // clobbered by regalloc so it avoids carrying values 728 // across calls in these registers but not seen as 729 // clobbered by prologue generation here (because the 730 // actual half-clobber implied by the clobber list fits 731 // within the clobbers that we allow without 732 // clobber-saves). 733 if self.insts[i].is_included_in_clobbers() { 734 if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) { 735 clobbered.union_from(inst_clobbered); 736 } 737 } 738 } 739 740 let clobbered_regs = clobbered 741 .into_iter() 742 .map(|preg| Writable::from_reg(RealReg::from(preg))) 743 .collect(); 744 745 (clobbered_regs, function_calls) 746 } 747 748 /// Emit the instructions to a `MachBuffer`, containing fixed-up 749 /// code and external reloc/trap/etc. records ready for use. Takes 750 /// the regalloc results as well. 751 /// 752 /// Returns the machine code itself, and optionally metadata 753 /// and/or a disassembly, as an `EmitResult`. The `VCode` itself 754 /// is consumed by the emission process. 755 pub fn emit( 756 mut self, 757 regalloc: ®alloc2::Output, 758 want_disasm: bool, 759 flags: &settings::Flags, 760 ctrl_plane: &mut ControlPlane, 761 ) -> EmitResult 762 where 763 I: VCodeInst, 764 { 765 let _tt = timing::vcode_emit(); 766 let mut buffer = MachBuffer::new(); 767 buffer.set_log2_min_function_alignment(self.log2_min_function_alignment); 768 let mut bb_starts: Vec<Option<CodeOffset>> = vec![]; 769 770 // The first M MachLabels are reserved for block indices. 771 buffer.reserve_labels_for_blocks(self.num_blocks()); 772 773 // Register all allocated constants with the `MachBuffer` to ensure that 774 // any references to the constants during instructions can be handled 775 // correctly. 776 buffer.register_constants(&self.constants); 777 778 // Construct the final order we emit code in: cold blocks at the end. 779 let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![]; 780 let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![]; 781 for block in 0..self.num_blocks() { 782 let block = BlockIndex::new(block); 783 if self.block_order.is_cold(block) { 784 cold_blocks.push(block); 785 } else { 786 final_order.push(block); 787 } 788 } 789 final_order.extend(cold_blocks.clone()); 790 791 // Compute/save info we need for the prologue: clobbers and 792 // number of spillslots. 793 // 794 // We clone `abi` here because we will mutate it as we 795 // generate the prologue and set other info, but we can't 796 // mutate `VCode`. The info it usually carries prior to 797 // setting clobbers is fairly minimal so this should be 798 // relatively cheap. 799 let (clobbers, function_calls) = self.compute_clobbers_and_function_calls(regalloc); 800 self.abi.compute_frame_layout( 801 &self.sigs, 802 regalloc.num_spillslots, 803 clobbers, 804 function_calls, 805 ); 806 807 // Emit blocks. 808 let mut cur_srcloc = None; 809 let mut last_offset = None; 810 let mut inst_offsets = vec![]; 811 let mut state = I::State::new(&self.abi, core::mem::take(ctrl_plane)); 812 813 let mut disasm = String::new(); 814 815 if !self.debug_value_labels.is_empty() { 816 inst_offsets.resize(self.insts.len(), NO_INST_OFFSET); 817 } 818 819 // Count edits per block ahead of time; this is needed for 820 // lookahead island emission. (We could derive it per-block 821 // with binary search in the edit list, but it's more 822 // efficient to do it in one pass here.) 823 let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![]; 824 let mut edit_idx = 0; 825 for block in 0..self.num_blocks() { 826 let end_inst = InsnIndex::new(self.block_ranges.get(block).end); 827 let start_edit_idx = edit_idx; 828 while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst { 829 edit_idx += 1; 830 } 831 let end_edit_idx = edit_idx; 832 ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32); 833 } 834 835 let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled(); 836 let mut bb_padding = match flags.bb_padding_log2_minus_one() { 837 0 => Vec::new(), 838 n => vec![0; 1 << (n - 1)], 839 }; 840 let mut total_bb_padding = 0; 841 842 for (block_order_idx, &block) in final_order.iter().enumerate() { 843 trace!("emitting block {:?}", block); 844 845 // Call the new block hook for state 846 state.on_new_block(); 847 848 // Emit NOPs to align the block. 849 let new_offset = I::align_basic_block(buffer.cur_offset()); 850 while new_offset > buffer.cur_offset() { 851 // Pad with NOPs up to the aligned block offset. 852 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize); 853 nop.emit(&mut buffer, &self.emit_info, &mut Default::default()); 854 } 855 assert_eq!(buffer.cur_offset(), new_offset); 856 857 let do_emit = |inst: &I, 858 disasm: &mut String, 859 buffer: &mut MachBuffer<I>, 860 state: &mut I::State| { 861 if want_disasm && !inst.is_args() { 862 let mut s = state.clone(); 863 writeln!(disasm, " {}", inst.pretty_print_inst(&mut s)).unwrap(); 864 } 865 inst.emit(buffer, &self.emit_info, state); 866 }; 867 868 // Is this the first block? Emit the prologue directly if so. 869 if block == self.entry { 870 trace!(" -> entry block"); 871 buffer.start_srcloc(Default::default()); 872 for inst in &self.abi.gen_prologue() { 873 do_emit(&inst, &mut disasm, &mut buffer, &mut state); 874 } 875 buffer.end_srcloc(); 876 } 877 878 // Now emit the regular block body. 879 880 buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut()); 881 882 if want_disasm { 883 writeln!(&mut disasm, "block{}:", block.index()).unwrap(); 884 } 885 886 if flags.machine_code_cfg_info() { 887 // Track BB starts. If we have backed up due to MachBuffer 888 // branch opts, note that the removed blocks were removed. 889 let cur_offset = buffer.cur_offset(); 890 if last_offset.is_some() && cur_offset <= last_offset.unwrap() { 891 for i in (0..bb_starts.len()).rev() { 892 if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() { 893 break; 894 } 895 bb_starts[i] = None; 896 } 897 } 898 bb_starts.push(Some(cur_offset)); 899 last_offset = Some(cur_offset); 900 } 901 902 if let Some(block_start) = I::gen_block_start( 903 self.block_order.is_indirect_branch_target(block), 904 is_forward_edge_cfi_enabled, 905 ) { 906 do_emit(&block_start, &mut disasm, &mut buffer, &mut state); 907 } 908 909 for inst_or_edit in regalloc.block_insts_and_edits(&self, block) { 910 match inst_or_edit { 911 InstOrEdit::Inst(iix) => { 912 if !self.debug_value_labels.is_empty() { 913 // If we need to produce debug info, 914 // record the offset of each instruction 915 // so that we can translate value-label 916 // ranges to machine-code offsets. 917 918 // Cold blocks violate monotonicity 919 // assumptions elsewhere (that 920 // instructions in inst-index order are in 921 // order in machine code), so we omit 922 // their offsets here. Value-label range 923 // generation below will skip empty ranges 924 // and ranges with to-offsets of zero. 925 if !self.block_order.is_cold(block) { 926 inst_offsets[iix.index()] = buffer.cur_offset(); 927 } 928 } 929 930 // Update the srcloc at this point in the buffer. 931 let srcloc = self.srclocs[iix.index()]; 932 if cur_srcloc != Some(srcloc) { 933 if cur_srcloc.is_some() { 934 buffer.end_srcloc(); 935 } 936 buffer.start_srcloc(srcloc); 937 cur_srcloc = Some(srcloc); 938 } 939 940 // If this is a safepoint, compute a stack map 941 // and pass it to the emit state. 942 let stack_map_disasm = if self.insts[iix.index()].is_safepoint() { 943 let (user_stack_map, user_stack_map_disasm) = { 944 // The `user_stack_maps` is keyed by reverse 945 // instruction index, so we must flip the 946 // index. We can't put this into a helper method 947 // due to borrowck issues because parts of 948 // `self` are borrowed mutably elsewhere in this 949 // function. 950 let index = iix.to_backwards_insn_index(self.num_insts()); 951 let user_stack_map = self.user_stack_maps.remove(&index); 952 let user_stack_map_disasm = if want_disasm { 953 user_stack_map.as_ref().map(|m| format!(" ; {m:?}")) 954 } else { 955 None 956 }; 957 (user_stack_map, user_stack_map_disasm) 958 }; 959 960 state.pre_safepoint(user_stack_map); 961 962 user_stack_map_disasm 963 } else { 964 None 965 }; 966 967 // Place debug tags in the emission buffer 968 // either at the offset prior to the 969 // instruction or after the instruction, 970 // depending on whether this is a call. See 971 // the documentation on [`MachDebugTagPos`] 972 // for details on why. 973 let mut debug_tag_disasm = None; 974 let mut place_debug_tags = 975 |this: &VCode<I>, pos: MachDebugTagPos, buffer: &mut MachBuffer<I>| { 976 // As above, translate the forward instruction 977 // index to a backward index for the lookup. 978 let debug_tag_range = { 979 let index = iix.to_backwards_insn_index(this.num_insts()); 980 this.debug_tags.get(&index) 981 }; 982 if let Some(range) = debug_tag_range { 983 let start = usize::try_from(range.start).unwrap(); 984 let end = usize::try_from(range.end).unwrap(); 985 let tags = &this.debug_tag_pool[start..end]; 986 987 if want_disasm { 988 debug_tag_disasm = 989 Some(format!(" ; ^-- debug @ {pos:?}: {tags:?}")); 990 } 991 buffer.push_debug_tags(pos, tags); 992 } 993 }; 994 let debug_tag_pos = 995 if self.insts[iix.index()].call_type() == CallType::Regular { 996 MachDebugTagPos::Post 997 } else { 998 MachDebugTagPos::Pre 999 }; 1000 1001 if debug_tag_pos == MachDebugTagPos::Pre { 1002 place_debug_tags(&self, debug_tag_pos, &mut buffer); 1003 } 1004 1005 // If the instruction we are about to emit is 1006 // a return, place an epilogue at this point 1007 // (and don't emit the return; the actual 1008 // epilogue will contain it). 1009 if self.insts[iix.index()].is_term() == MachTerminator::Ret { 1010 log::trace!("emitting epilogue"); 1011 for inst in self.abi.gen_epilogue() { 1012 do_emit(&inst, &mut disasm, &mut buffer, &mut state); 1013 } 1014 } else { 1015 // Update the operands for this inst using the 1016 // allocations from the regalloc result. 1017 let mut allocs = regalloc.inst_allocs(iix).iter(); 1018 self.insts[iix.index()].get_operands( 1019 &mut |reg: &mut Reg, constraint, _kind, _pos| { 1020 let alloc = 1021 allocs.next().expect("enough allocations for all operands"); 1022 1023 if let Some(alloc) = alloc.as_reg() { 1024 let alloc: Reg = alloc.into(); 1025 if let OperandConstraint::FixedReg(rreg) = constraint { 1026 debug_assert_eq!(Reg::from(rreg), alloc); 1027 } 1028 *reg = alloc; 1029 } else if let Some(alloc) = alloc.as_stack() { 1030 let alloc: Reg = alloc.into(); 1031 *reg = alloc; 1032 } 1033 }, 1034 ); 1035 debug_assert!(allocs.next().is_none()); 1036 1037 log::trace!("emitting: {:?}", self.insts[iix.index()]); 1038 1039 // Emit the instruction! 1040 do_emit( 1041 &self.insts[iix.index()], 1042 &mut disasm, 1043 &mut buffer, 1044 &mut state, 1045 ); 1046 1047 if debug_tag_pos == MachDebugTagPos::Post { 1048 place_debug_tags(&self, debug_tag_pos, &mut buffer); 1049 } 1050 1051 if let Some(stack_map_disasm) = stack_map_disasm { 1052 disasm.push_str(&stack_map_disasm); 1053 disasm.push('\n'); 1054 } 1055 if let Some(debug_tag_disasm) = debug_tag_disasm { 1056 disasm.push_str(&debug_tag_disasm); 1057 disasm.push('\n'); 1058 } 1059 } 1060 } 1061 1062 InstOrEdit::Edit(Edit::Move { from, to }) => { 1063 // Create a move/spill/reload instruction and 1064 // immediately emit it. 1065 match (from.as_reg(), to.as_reg()) { 1066 (Some(from), Some(to)) => { 1067 // Reg-to-reg move. 1068 let from_rreg = Reg::from(from); 1069 let to_rreg = Writable::from_reg(Reg::from(to)); 1070 debug_assert_eq!(from.class(), to.class()); 1071 let ty = I::canonical_type_for_rc(from.class()); 1072 let mv = I::gen_move(to_rreg, from_rreg, ty); 1073 do_emit(&mv, &mut disasm, &mut buffer, &mut state); 1074 } 1075 (Some(from), None) => { 1076 // Spill from register to spillslot. 1077 let to = to.as_stack().unwrap(); 1078 let from_rreg = RealReg::from(from); 1079 let spill = self.abi.gen_spill(to, from_rreg); 1080 do_emit(&spill, &mut disasm, &mut buffer, &mut state); 1081 } 1082 (None, Some(to)) => { 1083 // Load from spillslot to register. 1084 let from = from.as_stack().unwrap(); 1085 let to_rreg = Writable::from_reg(RealReg::from(to)); 1086 let reload = self.abi.gen_reload(to_rreg, from); 1087 do_emit(&reload, &mut disasm, &mut buffer, &mut state); 1088 } 1089 (None, None) => { 1090 panic!("regalloc2 should have eliminated stack-to-stack moves!"); 1091 } 1092 } 1093 } 1094 } 1095 } 1096 1097 if cur_srcloc.is_some() { 1098 buffer.end_srcloc(); 1099 cur_srcloc = None; 1100 } 1101 1102 // Do we need an island? Get the worst-case size of the next BB, add 1103 // it to the optional padding behind the block, and pass this to the 1104 // `MachBuffer` to determine if an island is necessary. 1105 let worst_case_next_bb = if block_order_idx < final_order.len() - 1 { 1106 let next_block = final_order[block_order_idx + 1]; 1107 let next_block_range = self.block_ranges.get(next_block.index()); 1108 let next_block_size = next_block_range.len() as u32; 1109 let next_block_ra_insertions = ra_edits_per_block[next_block.index()]; 1110 I::worst_case_size() * (next_block_size + next_block_ra_insertions) 1111 } else { 1112 0 1113 }; 1114 let padding = if bb_padding.is_empty() { 1115 0 1116 } else { 1117 bb_padding.len() as u32 + I::LabelUse::ALIGN - 1 1118 }; 1119 if buffer.island_needed(padding + worst_case_next_bb) { 1120 buffer.emit_island(padding + worst_case_next_bb, ctrl_plane); 1121 } 1122 1123 // Insert padding, if configured, to stress the `MachBuffer`'s 1124 // relocation and island calculations. 1125 // 1126 // Padding can get quite large during fuzzing though so place a 1127 // total cap on it where when a per-function threshold is exceeded 1128 // the padding is turned back down to zero. This avoids a small-ish 1129 // test case generating a GB+ memory footprint in Cranelift for 1130 // example. 1131 if !bb_padding.is_empty() { 1132 buffer.put_data(&bb_padding); 1133 buffer.align_to(I::LabelUse::ALIGN); 1134 total_bb_padding += bb_padding.len(); 1135 if total_bb_padding > (150 << 20) { 1136 bb_padding = Vec::new(); 1137 } 1138 } 1139 } 1140 1141 debug_assert!( 1142 self.user_stack_maps.is_empty(), 1143 "any stack maps should have been consumed by instruction emission, still have: {:#?}", 1144 self.user_stack_maps, 1145 ); 1146 1147 // Do any optimizations on branches at tail of buffer, as if we had 1148 // bound one last label. 1149 buffer.optimize_branches(ctrl_plane); 1150 1151 // emission state is not needed anymore, move control plane back out 1152 *ctrl_plane = state.take_ctrl_plane(); 1153 1154 let func_body_len = buffer.cur_offset(); 1155 1156 // Create `bb_edges` and final (filtered) `bb_starts`. 1157 let mut bb_edges = vec![]; 1158 let mut bb_offsets = vec![]; 1159 if flags.machine_code_cfg_info() { 1160 for block in 0..self.num_blocks() { 1161 if bb_starts[block].is_none() { 1162 // Block was deleted by MachBuffer; skip. 1163 continue; 1164 } 1165 let from = bb_starts[block].unwrap(); 1166 1167 bb_offsets.push(from); 1168 // Resolve each `succ` label and add edges. 1169 let succs = self.block_succs(BlockIndex::new(block)); 1170 for &succ in succs.iter() { 1171 let to = buffer.resolve_label_offset(MachLabel::from_block(succ)); 1172 bb_edges.push((from, to)); 1173 } 1174 } 1175 } 1176 1177 self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len); 1178 let value_labels_ranges = 1179 self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len); 1180 1181 // Store metadata about frame layout in the MachBuffer. 1182 buffer.set_frame_layout(self.abi.frame_slot_metadata()); 1183 1184 EmitResult { 1185 buffer: buffer.finish(&self.constants, ctrl_plane), 1186 bb_offsets, 1187 bb_edges, 1188 disasm: if want_disasm { Some(disasm) } else { None }, 1189 value_labels_ranges, 1190 } 1191 } 1192 1193 fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) { 1194 if self.debug_value_labels.is_empty() { 1195 return; 1196 } 1197 1198 // During emission, branch removal can make offsets of instructions incorrect. 1199 // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj] 1200 // It will be recorded as (say): [30] [34] [38] [42] [<would be 46>] 1201 // When the jumps get removed we are left with (in "inst_offsets"): 1202 // [insi][jmp0][jmp1][jmp2][insj][...] 1203 // [30] [34] [38] [42] [34] 1204 // Which violates the monotonicity invariant. This method sets offsets of these 1205 // removed instructions such as to make them appear zero-sized: 1206 // [insi][jmp0][jmp1][jmp2][insj][...] 1207 // [30] [34] [34] [34] [34] 1208 // 1209 let mut next_offset = func_body_len; 1210 for inst_index in (0..(inst_offsets.len() - 1)).rev() { 1211 let inst_offset = inst_offsets[inst_index]; 1212 1213 // Not all instructions get their offsets recorded. 1214 if inst_offset == NO_INST_OFFSET { 1215 continue; 1216 } 1217 1218 if inst_offset > next_offset { 1219 trace!( 1220 "Fixing code offset of the removed Inst {}: {} -> {}", 1221 inst_index, inst_offset, next_offset 1222 ); 1223 inst_offsets[inst_index] = next_offset; 1224 continue; 1225 } 1226 1227 next_offset = inst_offset; 1228 } 1229 } 1230 1231 fn compute_value_labels_ranges( 1232 &self, 1233 regalloc: ®alloc2::Output, 1234 inst_offsets: &[CodeOffset], 1235 func_body_len: u32, 1236 ) -> ValueLabelsRanges { 1237 if self.debug_value_labels.is_empty() { 1238 return ValueLabelsRanges::default(); 1239 } 1240 1241 if trace_log_enabled!() { 1242 self.log_value_labels_ranges(regalloc, inst_offsets); 1243 } 1244 1245 let mut value_labels_ranges: ValueLabelsRanges = HashMap::new(); 1246 for &(label, from, to, alloc) in ®alloc.debug_locations { 1247 let label = ValueLabel::from_u32(label); 1248 let ranges = value_labels_ranges.entry(label).or_insert_with(|| vec![]); 1249 let prog_point_to_inst = |prog_point: ProgPoint| { 1250 let mut inst = prog_point.inst(); 1251 if prog_point.pos() == InstPosition::After { 1252 inst = inst.next(); 1253 } 1254 inst.index() 1255 }; 1256 let inst_to_offset = |inst_index: usize| { 1257 // Skip over cold blocks. 1258 for offset in &inst_offsets[inst_index..] { 1259 if *offset != NO_INST_OFFSET { 1260 return *offset; 1261 } 1262 } 1263 func_body_len 1264 }; 1265 let from_inst_index = prog_point_to_inst(from); 1266 let to_inst_index = prog_point_to_inst(to); 1267 let from_offset = inst_to_offset(from_inst_index); 1268 let to_offset = inst_to_offset(to_inst_index); 1269 1270 // Empty ranges or unavailable offsets can happen 1271 // due to cold blocks and branch removal (see above). 1272 if from_offset == to_offset { 1273 continue; 1274 } 1275 1276 let loc = if let Some(preg) = alloc.as_reg() { 1277 LabelValueLoc::Reg(Reg::from(preg)) 1278 } else { 1279 #[cfg(not(feature = "unwind"))] 1280 continue; 1281 1282 #[cfg(feature = "unwind")] 1283 { 1284 let slot = alloc.as_stack().unwrap(); 1285 let slot_offset = self.abi.get_spillslot_offset(slot); 1286 let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset(); 1287 let caller_sp_to_cfa_offset = 1288 crate::isa::unwind::systemv::caller_sp_to_cfa_offset(); 1289 // NOTE: this is a negative offset because it's relative to the caller's SP 1290 let cfa_to_sp_offset = 1291 -((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64); 1292 LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset) 1293 } 1294 }; 1295 1296 // Coalesce adjacent ranges that for the same location 1297 // to minimize output size here and for the consumers. 1298 if let Some(last_loc_range) = ranges.last_mut() { 1299 if last_loc_range.loc == loc && last_loc_range.end == from_offset { 1300 trace!( 1301 "Extending debug range for {:?} in {:?} to Inst {} ({})", 1302 label, loc, to_inst_index, to_offset 1303 ); 1304 last_loc_range.end = to_offset; 1305 continue; 1306 } 1307 } 1308 1309 trace!( 1310 "Recording debug range for {:?} in {:?}: [Inst {}..Inst {}) [{}..{})", 1311 label, loc, from_inst_index, to_inst_index, from_offset, to_offset 1312 ); 1313 1314 ranges.push(ValueLocRange { 1315 loc, 1316 start: from_offset, 1317 end: to_offset, 1318 }); 1319 } 1320 1321 value_labels_ranges 1322 } 1323 1324 fn log_value_labels_ranges(&self, regalloc: ®alloc2::Output, inst_offsets: &[CodeOffset]) { 1325 debug_assert!(trace_log_enabled!()); 1326 1327 // What debug labels do we have? Note we'll skip those that have not been 1328 // allocated any location at all. They will show up as numeric gaps in the table. 1329 let mut labels = vec![]; 1330 for &(label, _, _, _) in ®alloc.debug_locations { 1331 if Some(&label) == labels.last() { 1332 continue; 1333 } 1334 labels.push(label); 1335 } 1336 1337 // Reformat the data on what VRegs were the VLs assigned to by lowering, since 1338 // the array we have is sorted by VReg, and we want it sorted by VL for easy 1339 // access in the loop below. 1340 let mut vregs = vec![]; 1341 for &(vreg, start, end, label) in &self.debug_value_labels { 1342 if matches!(labels.binary_search(&label), Ok(_)) { 1343 vregs.push((label, start, end, vreg)); 1344 } 1345 } 1346 vregs.sort_unstable_by( 1347 |(l_label, l_start, _, _), (r_label, r_start, _, _)| match l_label.cmp(r_label) { 1348 Ordering::Equal => l_start.cmp(r_start), 1349 cmp => cmp, 1350 }, 1351 ); 1352 1353 #[derive(PartialEq)] 1354 enum Mode { 1355 Measure, 1356 Emit, 1357 } 1358 #[derive(PartialEq)] 1359 enum Row { 1360 Head, 1361 Line, 1362 Inst(usize, usize), 1363 } 1364 1365 let mut widths = vec![0; 3 + 2 * labels.len()]; 1366 let mut row = String::new(); 1367 let mut output_row = |row_kind: Row, mode: Mode| { 1368 let mut column_index = 0; 1369 row.clear(); 1370 1371 macro_rules! output_cell_impl { 1372 ($fill:literal, $span:literal, $($cell_fmt:tt)*) => { 1373 let column_start = row.len(); 1374 { 1375 row.push('|'); 1376 write!(row, $($cell_fmt)*).unwrap(); 1377 } 1378 1379 let next_column_index = column_index + $span; 1380 let expected_width: usize = widths[column_index..next_column_index].iter().sum(); 1381 if mode == Mode::Measure { 1382 let actual_width = row.len() - column_start; 1383 if actual_width > expected_width { 1384 widths[next_column_index - 1] += actual_width - expected_width; 1385 } 1386 } else { 1387 let column_end = column_start + expected_width; 1388 while row.len() != column_end { 1389 row.push($fill); 1390 } 1391 } 1392 column_index = next_column_index; 1393 }; 1394 } 1395 macro_rules! output_cell { 1396 ($($cell_fmt:tt)*) => { 1397 output_cell_impl!(' ', 1, $($cell_fmt)*); 1398 }; 1399 } 1400 1401 match row_kind { 1402 Row::Head => { 1403 output_cell!("BB"); 1404 output_cell!("Inst"); 1405 output_cell!("IP"); 1406 for label in &labels { 1407 output_cell_impl!(' ', 2, "{:?}", ValueLabel::from_u32(*label)); 1408 } 1409 } 1410 Row::Line => { 1411 debug_assert!(mode == Mode::Emit); 1412 for _ in 0..3 { 1413 output_cell_impl!('-', 1, ""); 1414 } 1415 for _ in &labels { 1416 output_cell_impl!('-', 2, ""); 1417 } 1418 } 1419 Row::Inst(block_index, inst_index) => { 1420 debug_assert!(inst_index < self.num_insts()); 1421 if self.block_ranges.get(block_index).start == inst_index { 1422 output_cell!("B{}", block_index); 1423 } else { 1424 output_cell!(""); 1425 } 1426 output_cell!("Inst {inst_index} "); 1427 output_cell!("{} ", inst_offsets[inst_index]); 1428 1429 for label in &labels { 1430 // First, the VReg. 1431 use regalloc2::Inst; 1432 let vreg_cmp = |inst: usize, 1433 vreg_label: &u32, 1434 range_start: &Inst, 1435 range_end: &Inst| { 1436 match vreg_label.cmp(&label) { 1437 Ordering::Equal => { 1438 if range_end.index() <= inst { 1439 Ordering::Less 1440 } else if range_start.index() > inst { 1441 Ordering::Greater 1442 } else { 1443 Ordering::Equal 1444 } 1445 } 1446 cmp => cmp, 1447 } 1448 }; 1449 let vreg_index = 1450 vregs.binary_search_by(|(l, s, e, _)| vreg_cmp(inst_index, l, s, e)); 1451 if let Ok(vreg_index) = vreg_index { 1452 let mut prev_vreg = None; 1453 if inst_index > 0 { 1454 let prev_vreg_index = vregs.binary_search_by(|(l, s, e, _)| { 1455 vreg_cmp(inst_index - 1, l, s, e) 1456 }); 1457 if let Ok(prev_vreg_index) = prev_vreg_index { 1458 prev_vreg = Some(vregs[prev_vreg_index].3); 1459 } 1460 } 1461 1462 let vreg = vregs[vreg_index].3; 1463 if Some(vreg) == prev_vreg { 1464 output_cell!("*"); 1465 } else { 1466 output_cell!("{}", vreg); 1467 } 1468 } else { 1469 output_cell!(""); 1470 } 1471 1472 // Second, the allocated location. 1473 let inst_prog_point = ProgPoint::before(Inst::new(inst_index)); 1474 let range_index = regalloc.debug_locations.binary_search_by( 1475 |(range_label, range_start, range_end, _)| match range_label.cmp(label) 1476 { 1477 Ordering::Equal => { 1478 if *range_end <= inst_prog_point { 1479 Ordering::Less 1480 } else if *range_start > inst_prog_point { 1481 Ordering::Greater 1482 } else { 1483 Ordering::Equal 1484 } 1485 } 1486 cmp => cmp, 1487 }, 1488 ); 1489 if let Ok(range_index) = range_index { 1490 // Live at this instruction, print the location. 1491 if let Some(reg) = regalloc.debug_locations[range_index].3.as_reg() { 1492 output_cell!("{:?}", Reg::from(reg)); 1493 } else { 1494 output_cell!("Stk"); 1495 } 1496 } else { 1497 // Not live at this instruction. 1498 output_cell!(""); 1499 } 1500 } 1501 } 1502 } 1503 row.push('|'); 1504 1505 if mode == Mode::Emit { 1506 trace!("{}", row.as_str()); 1507 } 1508 }; 1509 1510 for block_index in 0..self.num_blocks() { 1511 for inst_index in self.block_ranges.get(block_index) { 1512 output_row(Row::Inst(block_index, inst_index), Mode::Measure); 1513 } 1514 } 1515 output_row(Row::Head, Mode::Measure); 1516 1517 output_row(Row::Head, Mode::Emit); 1518 output_row(Row::Line, Mode::Emit); 1519 for block_index in 0..self.num_blocks() { 1520 for inst_index in self.block_ranges.get(block_index) { 1521 output_row(Row::Inst(block_index, inst_index), Mode::Emit); 1522 } 1523 } 1524 } 1525 1526 /// Get the IR block for a BlockIndex, if one exists. 1527 pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> { 1528 self.block_order.lowered_order()[block.index()].orig_block() 1529 } 1530 1531 /// Get the type of a VReg. 1532 pub fn vreg_type(&self, vreg: VReg) -> Type { 1533 self.vreg_types[vreg.vreg()] 1534 } 1535 1536 /// Get the fact, if any, for a given VReg. 1537 pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> { 1538 self.facts[vreg.vreg()].as_ref() 1539 } 1540 1541 /// Set the fact for a given VReg. 1542 pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) { 1543 trace!("set fact on {}: {:?}", vreg, fact); 1544 self.facts[vreg.vreg()] = Some(fact); 1545 } 1546 1547 /// Does a given instruction define any facts? 1548 pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool { 1549 self.inst_operands(inst) 1550 .iter() 1551 .filter(|o| o.kind() == OperandKind::Def) 1552 .map(|o| o.vreg()) 1553 .any(|vreg| self.facts[vreg.vreg()].is_some()) 1554 } 1555 1556 /// Get the user stack map associated with the given forward instruction index. 1557 pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> { 1558 let index = inst.to_backwards_insn_index(self.num_insts()); 1559 self.user_stack_maps.get(&index) 1560 } 1561 } 1562 1563 impl<I: VCodeInst> core::ops::Index<InsnIndex> for VCode<I> { 1564 type Output = I; 1565 fn index(&self, idx: InsnIndex) -> &Self::Output { 1566 &self.insts[idx.index()] 1567 } 1568 } 1569 1570 impl<I: VCodeInst> RegallocFunction for VCode<I> { 1571 fn num_insts(&self) -> usize { 1572 self.insts.len() 1573 } 1574 1575 fn num_blocks(&self) -> usize { 1576 self.block_ranges.len() 1577 } 1578 1579 fn entry_block(&self) -> BlockIndex { 1580 self.entry 1581 } 1582 1583 fn block_insns(&self, block: BlockIndex) -> InstRange { 1584 let range = self.block_ranges.get(block.index()); 1585 InstRange::new(InsnIndex::new(range.start), InsnIndex::new(range.end)) 1586 } 1587 1588 fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] { 1589 let range = self.block_succ_range.get(block.index()); 1590 &self.block_succs[range] 1591 } 1592 1593 fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] { 1594 let range = self.block_pred_range.get(block.index()); 1595 &self.block_preds[range] 1596 } 1597 1598 fn block_params(&self, block: BlockIndex) -> &[VReg] { 1599 // As a special case we don't return block params for the entry block, as all the arguments 1600 // will be defined by the `Inst::Args` instruction. 1601 if block == self.entry { 1602 return &[]; 1603 } 1604 1605 let range = self.block_params_range.get(block.index()); 1606 &self.block_params[range] 1607 } 1608 1609 fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] { 1610 let succ_range = self.branch_block_arg_succ_range.get(block.index()); 1611 debug_assert!(succ_idx < succ_range.len()); 1612 let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx); 1613 &self.branch_block_args[branch_block_args] 1614 } 1615 1616 fn is_ret(&self, insn: InsnIndex) -> bool { 1617 match self.insts[insn.index()].is_term() { 1618 // We treat blocks terminated by an unconditional trap like a return for regalloc. 1619 MachTerminator::None => self.insts[insn.index()].is_trap(), 1620 MachTerminator::Ret | MachTerminator::RetCall => true, 1621 MachTerminator::Branch => false, 1622 } 1623 } 1624 1625 fn is_branch(&self, insn: InsnIndex) -> bool { 1626 match self.insts[insn.index()].is_term() { 1627 MachTerminator::Branch => true, 1628 _ => false, 1629 } 1630 } 1631 1632 fn inst_operands(&self, insn: InsnIndex) -> &[Operand] { 1633 let range = self.operand_ranges.get(insn.index()); 1634 &self.operands[range] 1635 } 1636 1637 fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet { 1638 self.clobbers.get(&insn).cloned().unwrap_or_default() 1639 } 1640 1641 fn num_vregs(&self) -> usize { 1642 self.vreg_types.len() 1643 } 1644 1645 fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] { 1646 &self.debug_value_labels 1647 } 1648 1649 fn spillslot_size(&self, regclass: RegClass) -> usize { 1650 self.abi.get_spillslot_size(regclass) as usize 1651 } 1652 1653 fn allow_multiple_vreg_defs(&self) -> bool { 1654 // At least the s390x backend requires this, because the 1655 // `Loop` pseudo-instruction aggregates all Operands so pinned 1656 // vregs (RealRegs) may occur more than once. 1657 true 1658 } 1659 } 1660 1661 impl<I: VCodeInst> Debug for VRegAllocator<I> { 1662 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 1663 writeln!(f, "VRegAllocator {{")?; 1664 1665 let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>(); 1666 alias_keys.sort_unstable(); 1667 for key in alias_keys { 1668 let dest = self.vreg_aliases.get(&key).unwrap(); 1669 writeln!(f, " {:?} := {:?}", Reg::from(key), Reg::from(*dest))?; 1670 } 1671 1672 for (vreg, fact) in self.facts.iter().enumerate() { 1673 if let Some(fact) = fact { 1674 writeln!(f, " v{vreg} ! {fact}")?; 1675 } 1676 } 1677 1678 writeln!(f, "}}") 1679 } 1680 } 1681 1682 impl<I: VCodeInst> fmt::Debug for VCode<I> { 1683 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 1684 writeln!(f, "VCode {{")?; 1685 writeln!(f, " Entry block: {}", self.entry.index())?; 1686 1687 let mut state = Default::default(); 1688 1689 for block in 0..self.num_blocks() { 1690 let block = BlockIndex::new(block); 1691 writeln!( 1692 f, 1693 "Block {}({:?}):", 1694 block.index(), 1695 self.block_params(block) 1696 )?; 1697 if let Some(bb) = self.bindex_to_bb(block) { 1698 writeln!(f, " (original IR block: {bb})")?; 1699 } 1700 for (succ_idx, succ) in self.block_succs(block).iter().enumerate() { 1701 writeln!( 1702 f, 1703 " (successor: Block {}({:?}))", 1704 succ.index(), 1705 self.branch_blockparams(block, InsnIndex::new(0) /* dummy */, succ_idx) 1706 )?; 1707 } 1708 for inst in self.block_ranges.get(block.index()) { 1709 writeln!( 1710 f, 1711 " Inst {}: {}", 1712 inst, 1713 self.insts[inst].pretty_print_inst(&mut state) 1714 )?; 1715 if !self.operands.is_empty() { 1716 for operand in self.inst_operands(InsnIndex::new(inst)) { 1717 if operand.kind() == OperandKind::Def { 1718 if let Some(fact) = &self.facts[operand.vreg().vreg()] { 1719 writeln!(f, " v{} ! {}", operand.vreg().vreg(), fact)?; 1720 } 1721 } 1722 } 1723 } 1724 if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) { 1725 writeln!(f, " {user_stack_map:?}")?; 1726 } 1727 } 1728 } 1729 1730 writeln!(f, "}}")?; 1731 Ok(()) 1732 } 1733 } 1734 1735 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder. 1736 pub struct VRegAllocator<I> { 1737 /// VReg IR-level types. 1738 vreg_types: Vec<Type>, 1739 1740 /// VReg aliases. When the final VCode is built we rewrite all 1741 /// uses of the keys in this table to their replacement values. 1742 /// 1743 /// We use these aliases to rename an instruction's expected 1744 /// result vregs to the returned vregs from lowering, which are 1745 /// usually freshly-allocated temps. 1746 vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>, 1747 1748 /// A deferred error, to be bubbled up to the top level of the 1749 /// lowering algorithm. We take this approach because we cannot 1750 /// currently propagate a `Result` upward through ISLE code (the 1751 /// lowering rules) or some ABI code. 1752 deferred_error: Option<CodegenError>, 1753 1754 /// Facts on VRegs, for proof-carrying code. 1755 facts: Vec<Option<Fact>>, 1756 1757 /// The type of instruction that this allocator makes registers for. 1758 _inst: core::marker::PhantomData<I>, 1759 } 1760 1761 impl<I: VCodeInst> VRegAllocator<I> { 1762 /// Make a new VRegAllocator. 1763 pub fn with_capacity(capacity: usize) -> Self { 1764 let capacity = first_user_vreg_index() + capacity; 1765 let mut vreg_types = Vec::with_capacity(capacity); 1766 vreg_types.resize(first_user_vreg_index(), types::INVALID); 1767 Self { 1768 vreg_types, 1769 vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()), 1770 deferred_error: None, 1771 facts: Vec::with_capacity(capacity), 1772 _inst: core::marker::PhantomData::default(), 1773 } 1774 } 1775 1776 /// Allocate a fresh ValueRegs. 1777 pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> { 1778 if self.deferred_error.is_some() { 1779 return Err(CodegenError::CodeTooLarge); 1780 } 1781 let v = self.vreg_types.len(); 1782 let (regclasses, tys) = I::rc_for_type(ty)?; 1783 1784 // Check that new indices are in-bounds for regalloc2's 1785 // VReg/Operand representation. 1786 if v + regclasses.len() > VReg::MAX { 1787 return Err(CodegenError::CodeTooLarge); 1788 } 1789 1790 // Check that new indices are in-bounds for our Reg 1791 // bit-packing on top of the RA2 types, which represents 1792 // spillslots as well. 1793 let check = |vreg: regalloc2::VReg| -> CodegenResult<Reg> { 1794 Reg::from_virtual_reg_checked(vreg).ok_or(CodegenError::CodeTooLarge) 1795 }; 1796 1797 let regs: ValueRegs<Reg> = match regclasses { 1798 &[rc0] => ValueRegs::one(check(VReg::new(v, rc0))?), 1799 &[rc0, rc1] => ValueRegs::two(check(VReg::new(v, rc0))?, check(VReg::new(v + 1, rc1))?), 1800 // We can extend this if/when we support 32-bit targets; e.g., 1801 // an i128 on a 32-bit machine will need up to four machine regs 1802 // for a `Value`. 1803 _ => panic!("Value must reside in 1 or 2 registers"), 1804 }; 1805 for (®_ty, ®) in tys.iter().zip(regs.regs().iter()) { 1806 let vreg = reg.to_virtual_reg().unwrap(); 1807 debug_assert_eq!(self.vreg_types.len(), vreg.index()); 1808 self.vreg_types.push(reg_ty); 1809 } 1810 1811 // Create empty facts for each allocated vreg. 1812 self.facts.resize(self.vreg_types.len(), None); 1813 1814 Ok(regs) 1815 } 1816 1817 /// Allocate a fresh ValueRegs, deferring any out-of-vregs 1818 /// errors. This is useful in places where we cannot bubble a 1819 /// `CodegenResult` upward easily, and which are known to be 1820 /// invoked from within the lowering loop that checks the deferred 1821 /// error status below. 1822 pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> { 1823 match self.alloc(ty) { 1824 Ok(x) => x, 1825 Err(e) => { 1826 self.deferred_error = Some(e); 1827 self.bogus_for_deferred_error(ty) 1828 } 1829 } 1830 } 1831 1832 /// Take any deferred error that was accumulated by `alloc_with_deferred_error`. 1833 pub fn take_deferred_error(&mut self) -> Option<CodegenError> { 1834 self.deferred_error.take() 1835 } 1836 1837 /// Produce an bogus VReg placeholder with the proper number of 1838 /// registers for the given type. This is meant to be used with 1839 /// deferred allocation errors (see `Lower::alloc_tmp()`). 1840 fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> { 1841 let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type"); 1842 match regclasses { 1843 &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()), 1844 &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()), 1845 _ => panic!("Value must reside in 1 or 2 registers"), 1846 } 1847 } 1848 1849 /// Rewrite any mention of `from` into `to`. 1850 pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) { 1851 let from = from.into(); 1852 let resolved_to = self.resolve_vreg_alias(to.into()); 1853 // Disallow cycles (see below). 1854 assert_ne!(resolved_to, from); 1855 1856 // Maintain the invariant that PCC facts only exist on vregs 1857 // which aren't aliases. We want to preserve whatever was 1858 // stated about the vreg before its producer was lowered. 1859 if let Some(fact) = self.facts[from.vreg()].take() { 1860 self.set_fact(resolved_to, fact); 1861 } 1862 1863 let old_alias = self.vreg_aliases.insert(from, resolved_to); 1864 debug_assert_eq!(old_alias, None); 1865 } 1866 1867 fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg { 1868 // We prevent cycles from existing by resolving targets of 1869 // aliases eagerly before setting them. If the target resolves 1870 // to the origin of the alias, then a cycle would be created 1871 // and the alias is disallowed. Because of the structure of 1872 // SSA code (one instruction can refer to another's defs but 1873 // not vice-versa, except indirectly through 1874 // phis/blockparams), cycles should not occur as we use 1875 // aliases to redirect vregs to the temps that actually define 1876 // them. 1877 while let Some(to) = self.vreg_aliases.get(&vreg) { 1878 vreg = *to; 1879 } 1880 vreg 1881 } 1882 1883 #[inline] 1884 fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) { 1885 debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg))); 1886 } 1887 1888 /// Set the proof-carrying code fact on a given virtual register. 1889 /// 1890 /// Returns the old fact, if any (only one fact can be stored). 1891 fn set_fact(&mut self, vreg: regalloc2::VReg, fact: Fact) -> Option<Fact> { 1892 trace!("vreg {:?} has fact: {:?}", vreg, fact); 1893 debug_assert!(!self.vreg_aliases.contains_key(&vreg)); 1894 self.facts[vreg.vreg()].replace(fact) 1895 } 1896 1897 /// Set a fact only if one doesn't already exist. 1898 pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) { 1899 let vreg = self.resolve_vreg_alias(vreg.into()); 1900 if self.facts[vreg.vreg()].is_none() { 1901 self.set_fact(vreg, fact); 1902 } 1903 } 1904 1905 /// Allocate a fresh ValueRegs, with a given fact to apply if 1906 /// the value fits in one VReg. 1907 pub fn alloc_with_maybe_fact( 1908 &mut self, 1909 ty: Type, 1910 fact: Option<Fact>, 1911 ) -> CodegenResult<ValueRegs<Reg>> { 1912 let result = self.alloc(ty)?; 1913 1914 // Ensure that we don't lose a fact on a value that splits 1915 // into multiple VRegs. 1916 assert!(result.len() == 1 || fact.is_none()); 1917 if let Some(fact) = fact { 1918 self.set_fact(result.regs()[0].into(), fact); 1919 } 1920 1921 Ok(result) 1922 } 1923 } 1924 1925 /// This structure tracks the large constants used in VCode that will be emitted separately by the 1926 /// [MachBuffer]. 1927 /// 1928 /// First, during the lowering phase, constants are inserted using 1929 /// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are 1930 /// used in this phase. Some deduplication is performed, when possible, as constant 1931 /// values are inserted. 1932 /// 1933 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the 1934 /// constants so that instructions can refer to the value's memory location. The [MachBuffer] 1935 /// then writes the constant values to the buffer. 1936 #[derive(Default)] 1937 pub struct VCodeConstants { 1938 constants: PrimaryMap<VCodeConstant, VCodeConstantData>, 1939 pool_uses: HashMap<Constant, VCodeConstant>, 1940 well_known_uses: HashMap<*const [u8], VCodeConstant>, 1941 u64s: HashMap<[u8; 8], VCodeConstant>, 1942 } 1943 impl VCodeConstants { 1944 /// Initialize the structure with the expected number of constants. 1945 pub fn with_capacity(expected_num_constants: usize) -> Self { 1946 Self { 1947 constants: PrimaryMap::with_capacity(expected_num_constants), 1948 pool_uses: HashMap::with_capacity(expected_num_constants), 1949 well_known_uses: HashMap::new(), 1950 u64s: HashMap::new(), 1951 } 1952 } 1953 1954 /// Insert a constant; using this method indicates that a constant value will be used and thus 1955 /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants 1956 /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not 1957 /// [VCodeConstantData::Generated]. 1958 pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant { 1959 match data { 1960 VCodeConstantData::Generated(_) => self.constants.push(data), 1961 VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) { 1962 None => { 1963 let vcode_constant = self.constants.push(data); 1964 self.pool_uses.insert(constant, vcode_constant); 1965 vcode_constant 1966 } 1967 Some(&vcode_constant) => vcode_constant, 1968 }, 1969 VCodeConstantData::WellKnown(data_ref) => { 1970 match self.well_known_uses.entry(data_ref as *const [u8]) { 1971 Entry::Vacant(v) => { 1972 let vcode_constant = self.constants.push(data); 1973 v.insert(vcode_constant); 1974 vcode_constant 1975 } 1976 Entry::Occupied(o) => *o.get(), 1977 } 1978 } 1979 VCodeConstantData::U64(value) => match self.u64s.entry(value) { 1980 Entry::Vacant(v) => { 1981 let vcode_constant = self.constants.push(data); 1982 v.insert(vcode_constant); 1983 vcode_constant 1984 } 1985 Entry::Occupied(o) => *o.get(), 1986 }, 1987 } 1988 } 1989 1990 /// Return the number of constants inserted. 1991 pub fn len(&self) -> usize { 1992 self.constants.len() 1993 } 1994 1995 /// Iterate over the `VCodeConstant` keys inserted in this structure. 1996 pub fn keys(&self) -> Keys<VCodeConstant> { 1997 self.constants.keys() 1998 } 1999 2000 /// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this 2001 /// structure. 2002 pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> { 2003 self.constants.iter() 2004 } 2005 2006 /// Returns the data associated with the specified constant. 2007 pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData { 2008 &self.constants[c] 2009 } 2010 2011 /// Checks if the given [VCodeConstantData] is registered as 2012 /// used by the pool. 2013 pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool { 2014 match constant { 2015 VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c), 2016 _ => false, 2017 } 2018 } 2019 } 2020 2021 /// A use of a constant by one or more VCode instructions; see [VCodeConstants]. 2022 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 2023 pub struct VCodeConstant(u32); 2024 entity_impl!(VCodeConstant); 2025 2026 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking 2027 /// these separately instead of as raw byte buffers allows us to avoid some duplication. 2028 pub enum VCodeConstantData { 2029 /// A constant already present in the Cranelift IR 2030 /// [ConstantPool](crate::ir::constant::ConstantPool). 2031 Pool(Constant, ConstantData), 2032 /// A reference to a well-known constant value that is statically encoded within the compiler. 2033 WellKnown(&'static [u8]), 2034 /// A constant value generated during lowering; the value may depend on the instruction context 2035 /// which makes it difficult to de-duplicate--if possible, use other variants. 2036 Generated(ConstantData), 2037 /// A constant of at most 64 bits. These are deduplicated as 2038 /// well. Stored as a fixed-size array of `u8` so that we do not 2039 /// encounter endianness problems when cross-compiling. 2040 U64([u8; 8]), 2041 } 2042 impl VCodeConstantData { 2043 /// Retrieve the constant data as a byte slice. 2044 pub fn as_slice(&self) -> &[u8] { 2045 match self { 2046 VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(), 2047 VCodeConstantData::WellKnown(d) => d, 2048 VCodeConstantData::U64(value) => &value[..], 2049 } 2050 } 2051 2052 /// Calculate the alignment of the constant data. 2053 pub fn alignment(&self) -> u32 { 2054 if self.as_slice().len() <= 8 { 8 } else { 16 } 2055 } 2056 } 2057 2058 #[cfg(test)] 2059 mod test { 2060 use super::*; 2061 use core::mem::size_of; 2062 2063 #[test] 2064 fn size_of_constant_structs() { 2065 assert_eq!(size_of::<Constant>(), 4); 2066 assert_eq!(size_of::<VCodeConstant>(), 4); 2067 assert_eq!(size_of::<ConstantData>(), 3 * size_of::<usize>()); 2068 assert_eq!(size_of::<VCodeConstantData>(), 4 * size_of::<usize>()); 2069 assert_eq!( 2070 size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(), 2071 3 * size_of::<usize>() 2072 ); 2073 // TODO The VCodeConstants structure's memory size could be further optimized. 2074 // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at 2075 // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes. 2076 } 2077 } 2078