1 //! This implements the VCode container: a CFG of Insts that have been lowered. 2 //! 3 //! VCode is virtual-register code. An instruction in VCode is almost a machine 4 //! instruction; however, its register slots can refer to virtual registers in 5 //! addition to real machine registers. 6 //! 7 //! VCode is structured with traditional basic blocks, and 8 //! each block must be terminated by an unconditional branch (one target), a 9 //! conditional branch (two targets), or a return (no targets). Note that this 10 //! slightly differs from the machine code of most ISAs: in most ISAs, a 11 //! conditional branch has one target (and the not-taken case falls through). 12 //! However, we expect that machine backends will elide branches to the following 13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond / 14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to 15 //! play with layout prior to final binary emission, as well, if we want. 16 //! 17 //! See the main module comment in `mod.rs` for more details on the VCode-based 18 //! backend pipeline. 19 20 use crate::ir::pcc::*; 21 use crate::ir::{self, types, Constant, ConstantData, ValueLabel}; 22 use crate::ranges::Ranges; 23 use crate::timing; 24 use crate::trace; 25 use crate::CodegenError; 26 use crate::{machinst::*, trace_log_enabled}; 27 use crate::{LabelValueLoc, ValueLocRange}; 28 use regalloc2::{ 29 Edit, Function as RegallocFunction, InstOrEdit, InstPosition, InstRange, MachineEnv, Operand, 30 OperandConstraint, OperandKind, PRegSet, ProgPoint, RegClass, 31 }; 32 use rustc_hash::FxHashMap; 33 34 use core::cmp::Ordering; 35 use core::fmt::{self, Write}; 36 use core::mem::take; 37 use cranelift_entity::{entity_impl, Keys}; 38 use std::collections::hash_map::Entry; 39 use std::collections::HashMap; 40 41 /// Index referring to an instruction in VCode. 42 pub type InsnIndex = regalloc2::Inst; 43 44 /// Extension trait for `InsnIndex` to allow conversion to a 45 /// `BackwardsInsnIndex`. 46 trait ToBackwardsInsnIndex { 47 fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex; 48 } 49 50 impl ToBackwardsInsnIndex for InsnIndex { 51 fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex { 52 BackwardsInsnIndex::new(num_insts - self.index() - 1) 53 } 54 } 55 56 /// An index referring to an instruction in the VCode when it is backwards, 57 /// during VCode construction. 58 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] 59 #[cfg_attr( 60 feature = "enable-serde", 61 derive(::serde::Serialize, ::serde::Deserialize) 62 )] 63 pub struct BackwardsInsnIndex(InsnIndex); 64 65 impl BackwardsInsnIndex { 66 pub fn new(i: usize) -> Self { 67 BackwardsInsnIndex(InsnIndex::new(i)) 68 } 69 } 70 71 /// Index referring to a basic block in VCode. 72 pub type BlockIndex = regalloc2::Block; 73 74 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be 75 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`. 76 pub trait VCodeInst: MachInst + MachInstEmit {} 77 impl<I: MachInst + MachInstEmit> VCodeInst for I {} 78 79 /// A function in "VCode" (virtualized-register code) form, after 80 /// lowering. This is essentially a standard CFG of basic blocks, 81 /// where each basic block consists of lowered instructions produced 82 /// by the machine-specific backend. 83 /// 84 /// Note that the VCode is immutable once produced, and is not 85 /// modified by register allocation in particular. Rather, register 86 /// allocation on the `VCode` produces a separate `regalloc2::Output` 87 /// struct, and this can be passed to `emit`. `emit` in turn does not 88 /// modify the vcode, but produces an `EmitResult`, which contains the 89 /// machine code itself, and the associated disassembly and/or 90 /// metadata as requested. 91 pub struct VCode<I: VCodeInst> { 92 /// VReg IR-level types. 93 vreg_types: Vec<Type>, 94 95 /// Lowered machine instructions in order corresponding to the original IR. 96 insts: Vec<I>, 97 98 /// A map from backwards instruction index to the user stack map for that 99 /// instruction. 100 /// 101 /// This is a sparse side table that only has entries for instructions that 102 /// are safepoints, and only for a subset of those that have an associated 103 /// user stack map. 104 user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>, 105 106 /// Operands: pre-regalloc references to virtual registers with 107 /// constraints, in one flattened array. This allows the regalloc 108 /// to efficiently access all operands without requiring expensive 109 /// matches or method invocations on insts. 110 operands: Vec<Operand>, 111 112 /// Operand index ranges: for each instruction in `insts`, there 113 /// is a tuple here providing the range in `operands` for that 114 /// instruction's operands. 115 operand_ranges: Ranges, 116 117 /// Clobbers: a sparse map from instruction indices to clobber masks. 118 clobbers: FxHashMap<InsnIndex, PRegSet>, 119 120 /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is 121 /// reasonable to keep one of these per instruction.) 122 srclocs: Vec<RelSourceLoc>, 123 124 /// Entry block. 125 entry: BlockIndex, 126 127 /// Block instruction indices. 128 block_ranges: Ranges, 129 130 /// Block successors: index range in the `block_succs` list. 131 block_succ_range: Ranges, 132 133 /// Block successor lists, concatenated into one vec. The 134 /// `block_succ_range` list of tuples above gives (start, end) 135 /// ranges within this list that correspond to each basic block's 136 /// successors. 137 block_succs: Vec<regalloc2::Block>, 138 139 /// Block predecessors: index range in the `block_preds` list. 140 block_pred_range: Ranges, 141 142 /// Block predecessor lists, concatenated into one vec. The 143 /// `block_pred_range` list of tuples above gives (start, end) 144 /// ranges within this list that correspond to each basic block's 145 /// predecessors. 146 block_preds: Vec<regalloc2::Block>, 147 148 /// Block parameters: index range in `block_params` below. 149 block_params_range: Ranges, 150 151 /// Block parameter lists, concatenated into one vec. The 152 /// `block_params_range` list of tuples above gives (start, end) 153 /// ranges within this list that correspond to each basic block's 154 /// blockparam vregs. 155 block_params: Vec<regalloc2::VReg>, 156 157 /// Outgoing block arguments on branch instructions, concatenated 158 /// into one list. 159 /// 160 /// Note that this is conceptually a 3D array: we have a VReg list 161 /// per block, per successor. We flatten those three dimensions 162 /// into this 1D vec, then store index ranges in two levels of 163 /// indirection. 164 /// 165 /// Indexed by the indices in `branch_block_arg_succ_range`. 166 branch_block_args: Vec<regalloc2::VReg>, 167 168 /// Array of sequences of (start, end) tuples in 169 /// `branch_block_args`, one for each successor; these sequences 170 /// for each block are concatenated. 171 /// 172 /// Indexed by the indices in `branch_block_arg_succ_range`. 173 branch_block_arg_range: Ranges, 174 175 /// For a given block, indices in `branch_block_arg_range` 176 /// corresponding to all of its successors. 177 branch_block_arg_succ_range: Ranges, 178 179 /// Block-order information. 180 block_order: BlockLoweringOrder, 181 182 /// ABI object. 183 pub(crate) abi: Callee<I::ABIMachineSpec>, 184 185 /// Constant information used during code emission. This should be 186 /// immutable across function compilations within the same module. 187 emit_info: I::Info, 188 189 /// Constants. 190 pub(crate) constants: VCodeConstants, 191 192 /// Value labels for debuginfo attached to vregs. 193 debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>, 194 195 pub(crate) sigs: SigSet, 196 197 /// Facts on VRegs, for proof-carrying code verification. 198 facts: Vec<Option<Fact>>, 199 200 log2_min_function_alignment: u8, 201 } 202 203 /// The result of `VCode::emit`. Contains all information computed 204 /// during emission: actual machine code, optionally a disassembly, 205 /// and optionally metadata about the code layout. 206 pub struct EmitResult { 207 /// The MachBuffer containing the machine code. 208 pub buffer: MachBufferFinalized<Stencil>, 209 210 /// Offset of each basic block, recorded during emission. Computed 211 /// only if `debug_value_labels` is non-empty. 212 pub bb_offsets: Vec<CodeOffset>, 213 214 /// Final basic-block edges, in terms of code offsets of 215 /// bb-starts. Computed only if `debug_value_labels` is non-empty. 216 pub bb_edges: Vec<(CodeOffset, CodeOffset)>, 217 218 /// Final length of function body. 219 pub func_body_len: CodeOffset, 220 221 /// The pretty-printed disassembly, if any. This uses the same 222 /// pretty-printing for MachInsts as the pre-regalloc VCode Debug 223 /// implementation, but additionally includes the prologue and 224 /// epilogue(s), and makes use of the regalloc results. 225 pub disasm: Option<String>, 226 227 /// Offsets of sized stackslots. 228 pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>, 229 230 /// Offsets of dynamic stackslots. 231 pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>, 232 233 /// Value-labels information (debug metadata). 234 pub value_labels_ranges: ValueLabelsRanges, 235 236 /// Stack frame size. 237 pub frame_size: u32, 238 } 239 240 /// A builder for a VCode function body. 241 /// 242 /// This builder has the ability to accept instructions in either 243 /// forward or reverse order, depending on the pass direction that 244 /// produces the VCode. The lowering from CLIF to VCode<MachInst> 245 /// ordinarily occurs in reverse order (in order to allow instructions 246 /// to be lowered only if used, and not merged) so a reversal will 247 /// occur at the end of lowering to ensure the VCode is in machine 248 /// order. 249 /// 250 /// If built in reverse, block and instruction indices used once the 251 /// VCode is built are relative to the final (reversed) order, not the 252 /// order of construction. Note that this means we do not know the 253 /// final block or instruction indices when building, so we do not 254 /// hand them out. (The user is assumed to know them when appending 255 /// terminator instructions with successor blocks.) 256 pub struct VCodeBuilder<I: VCodeInst> { 257 /// In-progress VCode. 258 pub(crate) vcode: VCode<I>, 259 260 /// In what direction is the build occurring? 261 direction: VCodeBuildDirection, 262 263 /// Debug-value label in-progress map, keyed by label. For each 264 /// label, we keep disjoint ranges mapping to vregs. We'll flatten 265 /// this into (vreg, range, label) tuples when done. 266 debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>, 267 } 268 269 /// Direction in which a VCodeBuilder builds VCode. 270 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 271 pub enum VCodeBuildDirection { 272 // TODO: add `Forward` once we need it and can test it adequately. 273 /// Backward-build pass: we expect the producer to call `emit()` 274 /// with instructions in reverse program order within each block. 275 Backward, 276 } 277 278 impl<I: VCodeInst> VCodeBuilder<I> { 279 /// Create a new VCodeBuilder. 280 pub fn new( 281 sigs: SigSet, 282 abi: Callee<I::ABIMachineSpec>, 283 emit_info: I::Info, 284 block_order: BlockLoweringOrder, 285 constants: VCodeConstants, 286 direction: VCodeBuildDirection, 287 log2_min_function_alignment: u8, 288 ) -> Self { 289 let vcode = VCode::new( 290 sigs, 291 abi, 292 emit_info, 293 block_order, 294 constants, 295 log2_min_function_alignment, 296 ); 297 298 VCodeBuilder { 299 vcode, 300 direction, 301 debug_info: FxHashMap::default(), 302 } 303 } 304 305 pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> { 306 self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs) 307 } 308 309 /// Access the ABI object. 310 pub fn abi(&self) -> &Callee<I::ABIMachineSpec> { 311 &self.vcode.abi 312 } 313 314 /// Access the ABI object. 315 pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> { 316 &mut self.vcode.abi 317 } 318 319 pub fn sigs(&self) -> &SigSet { 320 &self.vcode.sigs 321 } 322 323 pub fn sigs_mut(&mut self) -> &mut SigSet { 324 &mut self.vcode.sigs 325 } 326 327 /// Access to the BlockLoweringOrder object. 328 pub fn block_order(&self) -> &BlockLoweringOrder { 329 &self.vcode.block_order 330 } 331 332 /// Set the current block as the entry block. 333 pub fn set_entry(&mut self, block: BlockIndex) { 334 self.vcode.entry = block; 335 } 336 337 /// End the current basic block. Must be called after emitting vcode insts 338 /// for IR insts and prior to ending the function (building the VCode). 339 pub fn end_bb(&mut self) { 340 let end_idx = self.vcode.insts.len(); 341 // Add the instruction index range to the list of blocks. 342 self.vcode.block_ranges.push_end(end_idx); 343 // End the successors list. 344 let succ_end = self.vcode.block_succs.len(); 345 self.vcode.block_succ_range.push_end(succ_end); 346 // End the blockparams list. 347 let block_params_end = self.vcode.block_params.len(); 348 self.vcode.block_params_range.push_end(block_params_end); 349 // End the branch blockparam args list. 350 let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len(); 351 self.vcode 352 .branch_block_arg_succ_range 353 .push_end(branch_block_arg_succ_end); 354 } 355 356 pub fn add_block_param(&mut self, param: VirtualReg) { 357 self.vcode.block_params.push(param.into()); 358 } 359 360 fn add_branch_args_for_succ(&mut self, args: &[Reg]) { 361 self.vcode 362 .branch_block_args 363 .extend(args.iter().map(|&arg| VReg::from(arg))); 364 let end = self.vcode.branch_block_args.len(); 365 self.vcode.branch_block_arg_range.push_end(end); 366 } 367 368 /// Push an instruction for the current BB and current IR inst 369 /// within the BB. 370 pub fn push(&mut self, insn: I, loc: RelSourceLoc) { 371 assert!(!insn.is_low_level_branch()); // These are not meant to be in VCode. 372 self.vcode.insts.push(insn); 373 self.vcode.srclocs.push(loc); 374 } 375 376 /// Add a successor block with branch args. 377 pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) { 378 self.vcode.block_succs.push(block); 379 self.add_branch_args_for_succ(args); 380 } 381 382 /// Add a debug value label to a register. 383 pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) { 384 // 1) In the reversed order, we consider the instructions 385 // that define ranges in the "debug_info" array to refer 386 // to the IP **after** them (when reversed): 387 // IP[2]__| Inst 3 | 388 // IP[1]__| Inst 2 | 389 // IP[0]__| Inst 1 | 390 // | Inst 0 | 391 // This is so that we can represent IP[<function start>], 392 // done at the cost of not being to represent IP[<function end>], 393 // which is OK since no values will be live at that point. 394 // 2) The live range for "reg" begins at the current IP 395 // and continues until the next, in execution order, 396 // VReg that defines "label". Since the ranges are open 397 // at the end, the subtraction of 1 cancels out: 398 // [last..current IP] <=> 399 // [last..last emitted inst index] <=> 400 // [last..next_inst_index - 1] <=> 401 // [last..next_inst_index) 402 // 403 let next_inst_index = self.vcode.insts.len(); 404 if next_inst_index == 0 { 405 // This would produce a defective [0..0) range. 406 return; 407 } 408 let next_inst = InsnIndex::new(next_inst_index); 409 let labels = self.debug_info.entry(label).or_insert_with(|| vec![]); 410 let last = labels 411 .last() 412 .map(|(_start, end, _vreg)| *end) 413 .unwrap_or(InsnIndex::new(0)); 414 labels.push((last, next_inst, reg.into())); 415 } 416 417 /// Access the constants. 418 pub fn constants(&mut self) -> &mut VCodeConstants { 419 &mut self.vcode.constants 420 } 421 422 fn compute_preds_from_succs(&mut self) { 423 // Do a linear-time counting sort: first determine how many 424 // times each block appears as a successor. 425 let mut starts = vec![0u32; self.vcode.num_blocks()]; 426 for succ in &self.vcode.block_succs { 427 starts[succ.index()] += 1; 428 } 429 430 // Determine for each block the starting index where that 431 // block's predecessors should go. This is equivalent to the 432 // ranges we need to store in block_pred_range. 433 self.vcode.block_pred_range.reserve(starts.len()); 434 let mut end = 0; 435 for count in starts.iter_mut() { 436 let start = end; 437 end += *count; 438 *count = start; 439 self.vcode.block_pred_range.push_end(end as usize); 440 } 441 let end = end as usize; 442 debug_assert_eq!(end, self.vcode.block_succs.len()); 443 444 // Walk over the successors again, this time grouped by 445 // predecessor, and push the predecessor at the current 446 // starting position of each of its successors. We build 447 // each group of predecessors in whatever order Ranges::iter 448 // returns them; regalloc2 doesn't care. 449 self.vcode.block_preds.resize(end, BlockIndex::invalid()); 450 for (pred, range) in self.vcode.block_succ_range.iter() { 451 let pred = BlockIndex::new(pred); 452 for succ in &self.vcode.block_succs[range] { 453 let pos = &mut starts[succ.index()]; 454 self.vcode.block_preds[*pos as usize] = pred; 455 *pos += 1; 456 } 457 } 458 debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid())); 459 } 460 461 /// Called once, when a build in Backward order is complete, to 462 /// perform the overall reversal (into final forward order) and 463 /// finalize metadata accordingly. 464 fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) { 465 let n_insts = self.vcode.insts.len(); 466 if n_insts == 0 { 467 return; 468 } 469 470 // Reverse the per-block and per-inst sequences. 471 self.vcode.block_ranges.reverse_index(); 472 self.vcode.block_ranges.reverse_target(n_insts); 473 // block_params_range is indexed by block (and blocks were 474 // traversed in reverse) so we reverse it; but block-param 475 // sequences in the concatenated vec can remain in reverse 476 // order (it is effectively an arena of arbitrarily-placed 477 // referenced sequences). 478 self.vcode.block_params_range.reverse_index(); 479 // Likewise, we reverse block_succ_range, but the block_succ 480 // concatenated array can remain as-is. 481 self.vcode.block_succ_range.reverse_index(); 482 self.vcode.insts.reverse(); 483 self.vcode.srclocs.reverse(); 484 // Likewise, branch_block_arg_succ_range is indexed by block 485 // so must be reversed. 486 self.vcode.branch_block_arg_succ_range.reverse_index(); 487 488 // To translate an instruction index *endpoint* in reversed 489 // order to forward order, compute `n_insts - i`. 490 // 491 // Why not `n_insts - 1 - i`? That would be correct to 492 // translate an individual instruction index (for ten insts 0 493 // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes 494 // 0). But for the usual inclusive-start, exclusive-end range 495 // idiom, inclusive starts become exclusive ends and 496 // vice-versa, so e.g. an (inclusive) start of 0 becomes an 497 // (exclusive) end of 10. 498 let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index()); 499 500 // Generate debug-value labels based on per-label maps. 501 for (label, tuples) in &self.debug_info { 502 for &(start, end, vreg) in tuples { 503 let vreg = vregs.resolve_vreg_alias(vreg); 504 let fwd_start = translate(end); 505 let fwd_end = translate(start); 506 self.vcode 507 .debug_value_labels 508 .push((vreg, fwd_start, fwd_end, label.as_u32())); 509 } 510 } 511 512 // Now sort debug value labels by VReg, as required 513 // by regalloc2. 514 self.vcode 515 .debug_value_labels 516 .sort_unstable_by_key(|(vreg, _, _, _)| *vreg); 517 } 518 519 fn collect_operands(&mut self, vregs: &VRegAllocator<I>) { 520 let allocatable = PRegSet::from(self.vcode.machine_env()); 521 for (i, insn) in self.vcode.insts.iter_mut().enumerate() { 522 // Push operands from the instruction onto the operand list. 523 // 524 // We rename through the vreg alias table as we collect 525 // the operands. This is better than a separate post-pass 526 // over operands, because it has more cache locality: 527 // operands only need to pass through L1 once. This is 528 // also better than renaming instructions' 529 // operands/registers while lowering, because here we only 530 // need to do the `match` over the instruction to visit 531 // its register fields (which is slow, branchy code) once. 532 533 let mut op_collector = 534 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| { 535 vregs.resolve_vreg_alias(vreg) 536 }); 537 insn.get_operands(&mut op_collector); 538 let (ops, clobbers) = op_collector.finish(); 539 self.vcode.operand_ranges.push_end(ops); 540 541 if clobbers != PRegSet::default() { 542 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers); 543 } 544 545 if let Some((dst, src)) = insn.is_move() { 546 // We should never see non-virtual registers present in move 547 // instructions. 548 assert!( 549 src.is_virtual(), 550 "the real register {src:?} was used as the source of a move instruction" 551 ); 552 assert!( 553 dst.to_reg().is_virtual(), 554 "the real register {:?} was used as the destination of a move instruction", 555 dst.to_reg() 556 ); 557 } 558 } 559 560 // Translate blockparam args via the vreg aliases table as well. 561 for arg in &mut self.vcode.branch_block_args { 562 let new_arg = vregs.resolve_vreg_alias(*arg); 563 trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg); 564 *arg = new_arg; 565 } 566 } 567 568 /// Build the final VCode. 569 pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> { 570 self.vcode.vreg_types = take(&mut vregs.vreg_types); 571 self.vcode.facts = take(&mut vregs.facts); 572 573 if self.direction == VCodeBuildDirection::Backward { 574 self.reverse_and_finalize(&vregs); 575 } 576 self.collect_operands(&vregs); 577 578 self.compute_preds_from_succs(); 579 self.vcode.debug_value_labels.sort_unstable(); 580 581 // At this point, nothing in the vcode should mention any 582 // VReg which has been aliased. All the appropriate rewriting 583 // should have happened above. Just to be sure, let's 584 // double-check each field which has vregs. 585 // Note: can't easily check vcode.insts, resolved in collect_operands. 586 // Operands are resolved in collect_operands. 587 vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg())); 588 // Currently block params are never aliased to another vreg. 589 vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied()); 590 // Branch block args are resolved in collect_operands. 591 vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied()); 592 // Debug value labels are resolved in reverse_and_finalize. 593 vregs.debug_assert_no_vreg_aliases( 594 self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg), 595 ); 596 // Facts are resolved eagerly during set_vreg_alias. 597 vregs.debug_assert_no_vreg_aliases( 598 self.vcode 599 .facts 600 .iter() 601 .zip(&vregs.vreg_types) 602 .enumerate() 603 .filter(|(_, (fact, _))| fact.is_some()) 604 .map(|(vreg, (_, &ty))| { 605 let (regclasses, _) = I::rc_for_type(ty).unwrap(); 606 VReg::new(vreg, regclasses[0]) 607 }), 608 ); 609 610 self.vcode 611 } 612 613 /// Add a user stack map for the associated instruction. 614 pub fn add_user_stack_map( 615 &mut self, 616 inst: BackwardsInsnIndex, 617 entries: &[ir::UserStackMapEntry], 618 ) { 619 let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets()); 620 let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map); 621 debug_assert!(old_entry.is_none()); 622 } 623 } 624 625 const NO_INST_OFFSET: CodeOffset = u32::MAX; 626 627 impl<I: VCodeInst> VCode<I> { 628 /// New empty VCode. 629 fn new( 630 sigs: SigSet, 631 abi: Callee<I::ABIMachineSpec>, 632 emit_info: I::Info, 633 block_order: BlockLoweringOrder, 634 constants: VCodeConstants, 635 log2_min_function_alignment: u8, 636 ) -> Self { 637 let n_blocks = block_order.lowered_order().len(); 638 VCode { 639 sigs, 640 vreg_types: vec![], 641 insts: Vec::with_capacity(10 * n_blocks), 642 user_stack_maps: FxHashMap::default(), 643 operands: Vec::with_capacity(30 * n_blocks), 644 operand_ranges: Ranges::with_capacity(10 * n_blocks), 645 clobbers: FxHashMap::default(), 646 srclocs: Vec::with_capacity(10 * n_blocks), 647 entry: BlockIndex::new(0), 648 block_ranges: Ranges::with_capacity(n_blocks), 649 block_succ_range: Ranges::with_capacity(n_blocks), 650 block_succs: Vec::with_capacity(n_blocks), 651 block_pred_range: Ranges::default(), 652 block_preds: Vec::new(), 653 block_params_range: Ranges::with_capacity(n_blocks), 654 block_params: Vec::with_capacity(5 * n_blocks), 655 branch_block_args: Vec::with_capacity(10 * n_blocks), 656 branch_block_arg_range: Ranges::with_capacity(2 * n_blocks), 657 branch_block_arg_succ_range: Ranges::with_capacity(n_blocks), 658 block_order, 659 abi, 660 emit_info, 661 constants, 662 debug_value_labels: vec![], 663 facts: vec![], 664 log2_min_function_alignment, 665 } 666 } 667 668 /// Get the ABI-dependent MachineEnv for managing register allocation. 669 pub fn machine_env(&self) -> &MachineEnv { 670 self.abi.machine_env(&self.sigs) 671 } 672 673 /// Get the number of blocks. Block indices will be in the range `0 .. 674 /// (self.num_blocks() - 1)`. 675 pub fn num_blocks(&self) -> usize { 676 self.block_ranges.len() 677 } 678 679 /// The number of lowered instructions. 680 pub fn num_insts(&self) -> usize { 681 self.insts.len() 682 } 683 684 fn compute_clobbers(&self, regalloc: ®alloc2::Output) -> Vec<Writable<RealReg>> { 685 let mut clobbered = PRegSet::default(); 686 687 // All moves are included in clobbers. 688 for (_, Edit::Move { to, .. }) in ®alloc.edits { 689 if let Some(preg) = to.as_reg() { 690 clobbered.add(preg); 691 } 692 } 693 694 for (i, range) in self.operand_ranges.iter() { 695 let operands = &self.operands[range.clone()]; 696 let allocs = ®alloc.allocs[range]; 697 for (operand, alloc) in operands.iter().zip(allocs.iter()) { 698 if operand.kind() == OperandKind::Def { 699 if let Some(preg) = alloc.as_reg() { 700 clobbered.add(preg); 701 } 702 } 703 } 704 705 // Also add explicitly-clobbered registers. 706 // 707 // Skip merging this instruction's clobber list if not 708 // "included in clobbers" as per the MachInst. (Some 709 // backends use this to implement ABI specifics; e.g., 710 // excluding calls of the same ABI as the current function 711 // from clobbers, because by definition everything 712 // clobbered by the call can be clobbered by this function 713 // without saving as well. 714 // 715 // This is important for a particular optimization: when 716 // some registers are "half-clobbered", e.g. vector/float 717 // registers on aarch64, we want them to be seen as 718 // clobbered by regalloc so it avoids carrying values 719 // across calls in these registers but not seen as 720 // clobbered by prologue generation here (because the 721 // actual half-clobber implied by the clobber list fits 722 // within the clobbers that we allow without 723 // clobber-saves). 724 if self.insts[i].is_included_in_clobbers() { 725 if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) { 726 clobbered.union_from(inst_clobbered); 727 } 728 } 729 } 730 731 clobbered 732 .into_iter() 733 .map(|preg| Writable::from_reg(RealReg::from(preg))) 734 .collect() 735 } 736 737 /// Emit the instructions to a `MachBuffer`, containing fixed-up 738 /// code and external reloc/trap/etc. records ready for use. Takes 739 /// the regalloc results as well. 740 /// 741 /// Returns the machine code itself, and optionally metadata 742 /// and/or a disassembly, as an `EmitResult`. The `VCode` itself 743 /// is consumed by the emission process. 744 pub fn emit( 745 mut self, 746 regalloc: ®alloc2::Output, 747 want_disasm: bool, 748 flags: &settings::Flags, 749 ctrl_plane: &mut ControlPlane, 750 ) -> EmitResult 751 where 752 I: VCodeInst, 753 { 754 let _tt = timing::vcode_emit(); 755 let mut buffer = MachBuffer::new(); 756 buffer.set_log2_min_function_alignment(self.log2_min_function_alignment); 757 let mut bb_starts: Vec<Option<CodeOffset>> = vec![]; 758 759 // The first M MachLabels are reserved for block indices. 760 buffer.reserve_labels_for_blocks(self.num_blocks()); 761 762 // Register all allocated constants with the `MachBuffer` to ensure that 763 // any references to the constants during instructions can be handled 764 // correctly. 765 buffer.register_constants(&self.constants); 766 767 // Construct the final order we emit code in: cold blocks at the end. 768 let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![]; 769 let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![]; 770 for block in 0..self.num_blocks() { 771 let block = BlockIndex::new(block); 772 if self.block_order.is_cold(block) { 773 cold_blocks.push(block); 774 } else { 775 final_order.push(block); 776 } 777 } 778 final_order.extend(cold_blocks.clone()); 779 780 // Compute/save info we need for the prologue: clobbers and 781 // number of spillslots. 782 // 783 // We clone `abi` here because we will mutate it as we 784 // generate the prologue and set other info, but we can't 785 // mutate `VCode`. The info it usually carries prior to 786 // setting clobbers is fairly minimal so this should be 787 // relatively cheap. 788 let clobbers = self.compute_clobbers(regalloc); 789 self.abi 790 .compute_frame_layout(&self.sigs, regalloc.num_spillslots, clobbers); 791 792 // Emit blocks. 793 let mut cur_srcloc = None; 794 let mut last_offset = None; 795 let mut inst_offsets = vec![]; 796 let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane)); 797 798 let mut disasm = String::new(); 799 800 if !self.debug_value_labels.is_empty() { 801 inst_offsets.resize(self.insts.len(), NO_INST_OFFSET); 802 } 803 804 // Count edits per block ahead of time; this is needed for 805 // lookahead island emission. (We could derive it per-block 806 // with binary search in the edit list, but it's more 807 // efficient to do it in one pass here.) 808 let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![]; 809 let mut edit_idx = 0; 810 for block in 0..self.num_blocks() { 811 let end_inst = InsnIndex::new(self.block_ranges.get(block).end); 812 let start_edit_idx = edit_idx; 813 while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst { 814 edit_idx += 1; 815 } 816 let end_edit_idx = edit_idx; 817 ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32); 818 } 819 820 let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled(); 821 let mut bb_padding = match flags.bb_padding_log2_minus_one() { 822 0 => Vec::new(), 823 n => vec![0; 1 << (n - 1)], 824 }; 825 let mut total_bb_padding = 0; 826 827 for (block_order_idx, &block) in final_order.iter().enumerate() { 828 trace!("emitting block {:?}", block); 829 830 // Call the new block hook for state 831 state.on_new_block(); 832 833 // Emit NOPs to align the block. 834 let new_offset = I::align_basic_block(buffer.cur_offset()); 835 while new_offset > buffer.cur_offset() { 836 // Pad with NOPs up to the aligned block offset. 837 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize); 838 nop.emit(&mut buffer, &self.emit_info, &mut Default::default()); 839 } 840 assert_eq!(buffer.cur_offset(), new_offset); 841 842 let do_emit = |inst: &I, 843 disasm: &mut String, 844 buffer: &mut MachBuffer<I>, 845 state: &mut I::State| { 846 if want_disasm && !inst.is_args() { 847 let mut s = state.clone(); 848 writeln!(disasm, " {}", inst.pretty_print_inst(&mut s)).unwrap(); 849 } 850 inst.emit(buffer, &self.emit_info, state); 851 }; 852 853 // Is this the first block? Emit the prologue directly if so. 854 if block == self.entry { 855 trace!(" -> entry block"); 856 buffer.start_srcloc(Default::default()); 857 for inst in &self.abi.gen_prologue() { 858 do_emit(&inst, &mut disasm, &mut buffer, &mut state); 859 } 860 buffer.end_srcloc(); 861 } 862 863 // Now emit the regular block body. 864 865 buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut()); 866 867 if want_disasm { 868 writeln!(&mut disasm, "block{}:", block.index()).unwrap(); 869 } 870 871 if flags.machine_code_cfg_info() { 872 // Track BB starts. If we have backed up due to MachBuffer 873 // branch opts, note that the removed blocks were removed. 874 let cur_offset = buffer.cur_offset(); 875 if last_offset.is_some() && cur_offset <= last_offset.unwrap() { 876 for i in (0..bb_starts.len()).rev() { 877 if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() { 878 break; 879 } 880 bb_starts[i] = None; 881 } 882 } 883 bb_starts.push(Some(cur_offset)); 884 last_offset = Some(cur_offset); 885 } 886 887 if let Some(block_start) = I::gen_block_start( 888 self.block_order.is_indirect_branch_target(block), 889 is_forward_edge_cfi_enabled, 890 ) { 891 do_emit(&block_start, &mut disasm, &mut buffer, &mut state); 892 } 893 894 for inst_or_edit in regalloc.block_insts_and_edits(&self, block) { 895 match inst_or_edit { 896 InstOrEdit::Inst(iix) => { 897 if !self.debug_value_labels.is_empty() { 898 // If we need to produce debug info, 899 // record the offset of each instruction 900 // so that we can translate value-label 901 // ranges to machine-code offsets. 902 903 // Cold blocks violate monotonicity 904 // assumptions elsewhere (that 905 // instructions in inst-index order are in 906 // order in machine code), so we omit 907 // their offsets here. Value-label range 908 // generation below will skip empty ranges 909 // and ranges with to-offsets of zero. 910 if !self.block_order.is_cold(block) { 911 inst_offsets[iix.index()] = buffer.cur_offset(); 912 } 913 } 914 915 // Update the srcloc at this point in the buffer. 916 let srcloc = self.srclocs[iix.index()]; 917 if cur_srcloc != Some(srcloc) { 918 if cur_srcloc.is_some() { 919 buffer.end_srcloc(); 920 } 921 buffer.start_srcloc(srcloc); 922 cur_srcloc = Some(srcloc); 923 } 924 925 // If this is a safepoint, compute a stack map 926 // and pass it to the emit state. 927 let stack_map_disasm = if self.insts[iix.index()].is_safepoint() { 928 let (user_stack_map, user_stack_map_disasm) = { 929 // The `user_stack_maps` is keyed by reverse 930 // instruction index, so we must flip the 931 // index. We can't put this into a helper method 932 // due to borrowck issues because parts of 933 // `self` are borrowed mutably elsewhere in this 934 // function. 935 let index = iix.to_backwards_insn_index(self.num_insts()); 936 let user_stack_map = self.user_stack_maps.remove(&index); 937 let user_stack_map_disasm = 938 user_stack_map.as_ref().map(|m| format!(" ; {m:?}")); 939 (user_stack_map, user_stack_map_disasm) 940 }; 941 942 state.pre_safepoint(user_stack_map); 943 944 user_stack_map_disasm 945 } else { 946 None 947 }; 948 949 // If the instruction we are about to emit is 950 // a return, place an epilogue at this point 951 // (and don't emit the return; the actual 952 // epilogue will contain it). 953 if self.insts[iix.index()].is_term() == MachTerminator::Ret { 954 for inst in self.abi.gen_epilogue() { 955 do_emit(&inst, &mut disasm, &mut buffer, &mut state); 956 } 957 } else { 958 // Update the operands for this inst using the 959 // allocations from the regalloc result. 960 let mut allocs = regalloc.inst_allocs(iix).iter(); 961 self.insts[iix.index()].get_operands( 962 &mut |reg: &mut Reg, constraint, _kind, _pos| { 963 let alloc = 964 allocs.next().expect("enough allocations for all operands"); 965 966 if let Some(alloc) = alloc.as_reg() { 967 let alloc: Reg = alloc.into(); 968 if let OperandConstraint::FixedReg(rreg) = constraint { 969 debug_assert_eq!(Reg::from(rreg), alloc); 970 } 971 *reg = alloc; 972 } else if let Some(alloc) = alloc.as_stack() { 973 let alloc: Reg = alloc.into(); 974 *reg = alloc; 975 } 976 }, 977 ); 978 debug_assert!(allocs.next().is_none()); 979 980 // Emit the instruction! 981 do_emit( 982 &self.insts[iix.index()], 983 &mut disasm, 984 &mut buffer, 985 &mut state, 986 ); 987 if let Some(stack_map_disasm) = stack_map_disasm { 988 disasm.push_str(&stack_map_disasm); 989 disasm.push('\n'); 990 } 991 } 992 } 993 994 InstOrEdit::Edit(Edit::Move { from, to }) => { 995 // Create a move/spill/reload instruction and 996 // immediately emit it. 997 match (from.as_reg(), to.as_reg()) { 998 (Some(from), Some(to)) => { 999 // Reg-to-reg move. 1000 let from_rreg = Reg::from(from); 1001 let to_rreg = Writable::from_reg(Reg::from(to)); 1002 debug_assert_eq!(from.class(), to.class()); 1003 let ty = I::canonical_type_for_rc(from.class()); 1004 let mv = I::gen_move(to_rreg, from_rreg, ty); 1005 do_emit(&mv, &mut disasm, &mut buffer, &mut state); 1006 } 1007 (Some(from), None) => { 1008 // Spill from register to spillslot. 1009 let to = to.as_stack().unwrap(); 1010 let from_rreg = RealReg::from(from); 1011 let spill = self.abi.gen_spill(to, from_rreg); 1012 do_emit(&spill, &mut disasm, &mut buffer, &mut state); 1013 } 1014 (None, Some(to)) => { 1015 // Load from spillslot to register. 1016 let from = from.as_stack().unwrap(); 1017 let to_rreg = Writable::from_reg(RealReg::from(to)); 1018 let reload = self.abi.gen_reload(to_rreg, from); 1019 do_emit(&reload, &mut disasm, &mut buffer, &mut state); 1020 } 1021 (None, None) => { 1022 panic!("regalloc2 should have eliminated stack-to-stack moves!"); 1023 } 1024 } 1025 } 1026 } 1027 } 1028 1029 if cur_srcloc.is_some() { 1030 buffer.end_srcloc(); 1031 cur_srcloc = None; 1032 } 1033 1034 // Do we need an island? Get the worst-case size of the next BB, add 1035 // it to the optional padding behind the block, and pass this to the 1036 // `MachBuffer` to determine if an island is necessary. 1037 let worst_case_next_bb = if block_order_idx < final_order.len() - 1 { 1038 let next_block = final_order[block_order_idx + 1]; 1039 let next_block_range = self.block_ranges.get(next_block.index()); 1040 let next_block_size = next_block_range.len() as u32; 1041 let next_block_ra_insertions = ra_edits_per_block[next_block.index()]; 1042 I::worst_case_size() * (next_block_size + next_block_ra_insertions) 1043 } else { 1044 0 1045 }; 1046 let padding = if bb_padding.is_empty() { 1047 0 1048 } else { 1049 bb_padding.len() as u32 + I::LabelUse::ALIGN - 1 1050 }; 1051 if buffer.island_needed(padding + worst_case_next_bb) { 1052 buffer.emit_island(padding + worst_case_next_bb, ctrl_plane); 1053 } 1054 1055 // Insert padding, if configured, to stress the `MachBuffer`'s 1056 // relocation and island calculations. 1057 // 1058 // Padding can get quite large during fuzzing though so place a 1059 // total cap on it where when a per-function threshold is exceeded 1060 // the padding is turned back down to zero. This avoids a small-ish 1061 // test case generating a GB+ memory footprint in Cranelift for 1062 // example. 1063 if !bb_padding.is_empty() { 1064 buffer.put_data(&bb_padding); 1065 buffer.align_to(I::LabelUse::ALIGN); 1066 total_bb_padding += bb_padding.len(); 1067 if total_bb_padding > (150 << 20) { 1068 bb_padding = Vec::new(); 1069 } 1070 } 1071 } 1072 1073 debug_assert!( 1074 self.user_stack_maps.is_empty(), 1075 "any stack maps should have been consumed by instruction emission, still have: {:#?}", 1076 self.user_stack_maps, 1077 ); 1078 1079 // Do any optimizations on branches at tail of buffer, as if we had 1080 // bound one last label. 1081 buffer.optimize_branches(ctrl_plane); 1082 1083 // emission state is not needed anymore, move control plane back out 1084 *ctrl_plane = state.take_ctrl_plane(); 1085 1086 let func_body_len = buffer.cur_offset(); 1087 1088 // Create `bb_edges` and final (filtered) `bb_starts`. 1089 let mut bb_edges = vec![]; 1090 let mut bb_offsets = vec![]; 1091 if flags.machine_code_cfg_info() { 1092 for block in 0..self.num_blocks() { 1093 if bb_starts[block].is_none() { 1094 // Block was deleted by MachBuffer; skip. 1095 continue; 1096 } 1097 let from = bb_starts[block].unwrap(); 1098 1099 bb_offsets.push(from); 1100 // Resolve each `succ` label and add edges. 1101 let succs = self.block_succs(BlockIndex::new(block)); 1102 for &succ in succs.iter() { 1103 let to = buffer.resolve_label_offset(MachLabel::from_block(succ)); 1104 bb_edges.push((from, to)); 1105 } 1106 } 1107 } 1108 1109 self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len); 1110 let value_labels_ranges = 1111 self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len); 1112 let frame_size = self.abi.frame_size(); 1113 1114 EmitResult { 1115 buffer: buffer.finish(&self.constants, ctrl_plane), 1116 bb_offsets, 1117 bb_edges, 1118 func_body_len, 1119 disasm: if want_disasm { Some(disasm) } else { None }, 1120 sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(), 1121 dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(), 1122 value_labels_ranges, 1123 frame_size, 1124 } 1125 } 1126 1127 fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) { 1128 if self.debug_value_labels.is_empty() { 1129 return; 1130 } 1131 1132 // During emission, branch removal can make offsets of instructions incorrect. 1133 // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj] 1134 // It will be recorded as (say): [30] [34] [38] [42] [<would be 46>] 1135 // When the jumps get removed we are left with (in "inst_offsets"): 1136 // [insi][jmp0][jmp1][jmp2][insj][...] 1137 // [30] [34] [38] [42] [34] 1138 // Which violates the monotonicity invariant. This method sets offsets of these 1139 // removed instructions such as to make them appear zero-sized: 1140 // [insi][jmp0][jmp1][jmp2][insj][...] 1141 // [30] [34] [34] [34] [34] 1142 // 1143 let mut next_offset = func_body_len; 1144 for inst_index in (0..(inst_offsets.len() - 1)).rev() { 1145 let inst_offset = inst_offsets[inst_index]; 1146 1147 // Not all instructions get their offsets recorded. 1148 if inst_offset == NO_INST_OFFSET { 1149 continue; 1150 } 1151 1152 if inst_offset > next_offset { 1153 trace!( 1154 "Fixing code offset of the removed Inst {}: {} -> {}", 1155 inst_index, 1156 inst_offset, 1157 next_offset 1158 ); 1159 inst_offsets[inst_index] = next_offset; 1160 continue; 1161 } 1162 1163 next_offset = inst_offset; 1164 } 1165 } 1166 1167 fn compute_value_labels_ranges( 1168 &self, 1169 regalloc: ®alloc2::Output, 1170 inst_offsets: &[CodeOffset], 1171 func_body_len: u32, 1172 ) -> ValueLabelsRanges { 1173 if self.debug_value_labels.is_empty() { 1174 return ValueLabelsRanges::default(); 1175 } 1176 1177 if trace_log_enabled!() { 1178 self.log_value_labels_ranges(regalloc, inst_offsets); 1179 } 1180 1181 let mut value_labels_ranges: ValueLabelsRanges = HashMap::new(); 1182 for &(label, from, to, alloc) in ®alloc.debug_locations { 1183 let label = ValueLabel::from_u32(label); 1184 let ranges = value_labels_ranges.entry(label).or_insert_with(|| vec![]); 1185 let prog_point_to_inst = |prog_point: ProgPoint| { 1186 let mut inst = prog_point.inst(); 1187 if prog_point.pos() == InstPosition::After { 1188 inst = inst.next(); 1189 } 1190 inst.index() 1191 }; 1192 let from_inst_index = prog_point_to_inst(from); 1193 let to_inst_index = prog_point_to_inst(to); 1194 let from_offset = inst_offsets[from_inst_index]; 1195 let to_offset = if to_inst_index == inst_offsets.len() { 1196 func_body_len 1197 } else { 1198 inst_offsets[to_inst_index] 1199 }; 1200 1201 // Empty ranges or unavailable offsets can happen 1202 // due to cold blocks and branch removal (see above). 1203 if from_offset == NO_INST_OFFSET 1204 || to_offset == NO_INST_OFFSET 1205 || from_offset == to_offset 1206 { 1207 continue; 1208 } 1209 1210 let loc = if let Some(preg) = alloc.as_reg() { 1211 LabelValueLoc::Reg(Reg::from(preg)) 1212 } else { 1213 let slot = alloc.as_stack().unwrap(); 1214 let slot_offset = self.abi.get_spillslot_offset(slot); 1215 let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset(); 1216 let caller_sp_to_cfa_offset = 1217 crate::isa::unwind::systemv::caller_sp_to_cfa_offset(); 1218 // NOTE: this is a negative offset because it's relative to the caller's SP 1219 let cfa_to_sp_offset = 1220 -((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64); 1221 LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset) 1222 }; 1223 1224 // Coalesce adjacent ranges that for the same location 1225 // to minimize output size here and for the consumers. 1226 if let Some(last_loc_range) = ranges.last_mut() { 1227 if last_loc_range.loc == loc && last_loc_range.end == from_offset { 1228 trace!( 1229 "Extending debug range for {:?} in {:?} to Inst {} ({})", 1230 label, 1231 loc, 1232 to_inst_index, 1233 to_offset 1234 ); 1235 last_loc_range.end = to_offset; 1236 continue; 1237 } 1238 } 1239 1240 trace!( 1241 "Recording debug range for {:?} in {:?}: [Inst {}..Inst {}) [{}..{})", 1242 label, 1243 loc, 1244 from_inst_index, 1245 to_inst_index, 1246 from_offset, 1247 to_offset 1248 ); 1249 1250 ranges.push(ValueLocRange { 1251 loc, 1252 start: from_offset, 1253 end: to_offset, 1254 }); 1255 } 1256 1257 value_labels_ranges 1258 } 1259 1260 fn log_value_labels_ranges(&self, regalloc: ®alloc2::Output, inst_offsets: &[CodeOffset]) { 1261 debug_assert!(trace_log_enabled!()); 1262 1263 // What debug labels do we have? Note we'll skip those that have not been 1264 // allocated any location at all. They will show up as numeric gaps in the table. 1265 let mut labels = vec![]; 1266 for &(label, _, _, _) in ®alloc.debug_locations { 1267 if Some(&label) == labels.last() { 1268 continue; 1269 } 1270 labels.push(label); 1271 } 1272 1273 // Reformat the data on what VRegs were the VLs assigned to by lowering, since 1274 // the array we have is sorted by VReg, and we want it sorted by VL for easy 1275 // access in the loop below. 1276 let mut vregs = vec![]; 1277 for &(vreg, start, end, label) in &self.debug_value_labels { 1278 if matches!(labels.binary_search(&label), Ok(_)) { 1279 vregs.push((label, start, end, vreg)); 1280 } 1281 } 1282 vregs.sort_unstable_by( 1283 |(l_label, l_start, _, _), (r_label, r_start, _, _)| match l_label.cmp(r_label) { 1284 Ordering::Equal => l_start.cmp(r_start), 1285 cmp => cmp, 1286 }, 1287 ); 1288 1289 #[derive(PartialEq)] 1290 enum Mode { 1291 Measure, 1292 Emit, 1293 } 1294 #[derive(PartialEq)] 1295 enum Row { 1296 Head, 1297 Line, 1298 Inst(usize, usize), 1299 } 1300 1301 let mut widths = vec![0; 3 + 2 * labels.len()]; 1302 let mut row = String::new(); 1303 let mut output_row = |row_kind: Row, mode: Mode| { 1304 let mut column_index = 0; 1305 row.clear(); 1306 1307 macro_rules! output_cell_impl { 1308 ($fill:literal, $span:literal, $($cell_fmt:tt)*) => { 1309 let column_start = row.len(); 1310 { 1311 row.push('|'); 1312 write!(row, $($cell_fmt)*).unwrap(); 1313 } 1314 1315 let next_column_index = column_index + $span; 1316 let expected_width: usize = widths[column_index..next_column_index].iter().sum(); 1317 if mode == Mode::Measure { 1318 let actual_width = row.len() - column_start; 1319 if actual_width > expected_width { 1320 widths[next_column_index - 1] += actual_width - expected_width; 1321 } 1322 } else { 1323 let column_end = column_start + expected_width; 1324 while row.len() != column_end { 1325 row.push($fill); 1326 } 1327 } 1328 column_index = next_column_index; 1329 }; 1330 } 1331 macro_rules! output_cell { 1332 ($($cell_fmt:tt)*) => { 1333 output_cell_impl!(' ', 1, $($cell_fmt)*); 1334 }; 1335 } 1336 1337 match row_kind { 1338 Row::Head => { 1339 output_cell!("BB"); 1340 output_cell!("Inst"); 1341 output_cell!("IP"); 1342 for label in &labels { 1343 output_cell_impl!(' ', 2, "{:?}", ValueLabel::from_u32(*label)); 1344 } 1345 } 1346 Row::Line => { 1347 debug_assert!(mode == Mode::Emit); 1348 for _ in 0..3 { 1349 output_cell_impl!('-', 1, ""); 1350 } 1351 for _ in &labels { 1352 output_cell_impl!('-', 2, ""); 1353 } 1354 } 1355 Row::Inst(block_index, inst_index) => { 1356 debug_assert!(inst_index < self.num_insts()); 1357 if self.block_ranges.get(block_index).start == inst_index { 1358 output_cell!("B{}", block_index); 1359 } else { 1360 output_cell!(""); 1361 } 1362 output_cell!("Inst {inst_index} "); 1363 output_cell!("{} ", inst_offsets[inst_index]); 1364 1365 for label in &labels { 1366 // First, the VReg. 1367 use regalloc2::Inst; 1368 let vreg_cmp = |inst: usize, 1369 vreg_label: &u32, 1370 range_start: &Inst, 1371 range_end: &Inst| { 1372 match vreg_label.cmp(&label) { 1373 Ordering::Equal => { 1374 if range_end.index() <= inst { 1375 Ordering::Less 1376 } else if range_start.index() > inst { 1377 Ordering::Greater 1378 } else { 1379 Ordering::Equal 1380 } 1381 } 1382 cmp => cmp, 1383 } 1384 }; 1385 let vreg_index = 1386 vregs.binary_search_by(|(l, s, e, _)| vreg_cmp(inst_index, l, s, e)); 1387 if let Ok(vreg_index) = vreg_index { 1388 let mut prev_vreg = None; 1389 if inst_index > 0 { 1390 let prev_vreg_index = vregs.binary_search_by(|(l, s, e, _)| { 1391 vreg_cmp(inst_index - 1, l, s, e) 1392 }); 1393 if let Ok(prev_vreg_index) = prev_vreg_index { 1394 prev_vreg = Some(vregs[prev_vreg_index].3); 1395 } 1396 } 1397 1398 let vreg = vregs[vreg_index].3; 1399 if Some(vreg) == prev_vreg { 1400 output_cell!("*"); 1401 } else { 1402 output_cell!("{}", vreg); 1403 } 1404 } else { 1405 output_cell!(""); 1406 } 1407 1408 // Second, the allocated location. 1409 let inst_prog_point = ProgPoint::before(Inst::new(inst_index)); 1410 let range_index = regalloc.debug_locations.binary_search_by( 1411 |(range_label, range_start, range_end, _)| match range_label.cmp(label) 1412 { 1413 Ordering::Equal => { 1414 if *range_end <= inst_prog_point { 1415 Ordering::Less 1416 } else if *range_start > inst_prog_point { 1417 Ordering::Greater 1418 } else { 1419 Ordering::Equal 1420 } 1421 } 1422 cmp => cmp, 1423 }, 1424 ); 1425 if let Ok(range_index) = range_index { 1426 // Live at this instruction, print the location. 1427 if let Some(reg) = regalloc.debug_locations[range_index].3.as_reg() { 1428 output_cell!("{:?}", Reg::from(reg)); 1429 } else { 1430 output_cell!("Stk"); 1431 } 1432 } else { 1433 // Not live at this instruction. 1434 output_cell!(""); 1435 } 1436 } 1437 } 1438 } 1439 row.push('|'); 1440 1441 if mode == Mode::Emit { 1442 trace!("{}", row.as_str()); 1443 } 1444 }; 1445 1446 for block_index in 0..self.num_blocks() { 1447 for inst_index in self.block_ranges.get(block_index) { 1448 output_row(Row::Inst(block_index, inst_index), Mode::Measure); 1449 } 1450 } 1451 output_row(Row::Head, Mode::Measure); 1452 1453 output_row(Row::Head, Mode::Emit); 1454 output_row(Row::Line, Mode::Emit); 1455 for block_index in 0..self.num_blocks() { 1456 for inst_index in self.block_ranges.get(block_index) { 1457 output_row(Row::Inst(block_index, inst_index), Mode::Emit); 1458 } 1459 } 1460 } 1461 1462 /// Get the IR block for a BlockIndex, if one exists. 1463 pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> { 1464 self.block_order.lowered_order()[block.index()].orig_block() 1465 } 1466 1467 /// Get the type of a VReg. 1468 pub fn vreg_type(&self, vreg: VReg) -> Type { 1469 self.vreg_types[vreg.vreg()] 1470 } 1471 1472 /// Get the fact, if any, for a given VReg. 1473 pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> { 1474 self.facts[vreg.vreg()].as_ref() 1475 } 1476 1477 /// Set the fact for a given VReg. 1478 pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) { 1479 trace!("set fact on {}: {:?}", vreg, fact); 1480 self.facts[vreg.vreg()] = Some(fact); 1481 } 1482 1483 /// Does a given instruction define any facts? 1484 pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool { 1485 self.inst_operands(inst) 1486 .iter() 1487 .filter(|o| o.kind() == OperandKind::Def) 1488 .map(|o| o.vreg()) 1489 .any(|vreg| self.facts[vreg.vreg()].is_some()) 1490 } 1491 1492 /// Get the user stack map associated with the given forward instruction index. 1493 pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> { 1494 let index = inst.to_backwards_insn_index(self.num_insts()); 1495 self.user_stack_maps.get(&index) 1496 } 1497 } 1498 1499 impl<I: VCodeInst> std::ops::Index<InsnIndex> for VCode<I> { 1500 type Output = I; 1501 fn index(&self, idx: InsnIndex) -> &Self::Output { 1502 &self.insts[idx.index()] 1503 } 1504 } 1505 1506 impl<I: VCodeInst> RegallocFunction for VCode<I> { 1507 fn num_insts(&self) -> usize { 1508 self.insts.len() 1509 } 1510 1511 fn num_blocks(&self) -> usize { 1512 self.block_ranges.len() 1513 } 1514 1515 fn entry_block(&self) -> BlockIndex { 1516 self.entry 1517 } 1518 1519 fn block_insns(&self, block: BlockIndex) -> InstRange { 1520 let range = self.block_ranges.get(block.index()); 1521 InstRange::new(InsnIndex::new(range.start), InsnIndex::new(range.end)) 1522 } 1523 1524 fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] { 1525 let range = self.block_succ_range.get(block.index()); 1526 &self.block_succs[range] 1527 } 1528 1529 fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] { 1530 let range = self.block_pred_range.get(block.index()); 1531 &self.block_preds[range] 1532 } 1533 1534 fn block_params(&self, block: BlockIndex) -> &[VReg] { 1535 // As a special case we don't return block params for the entry block, as all the arguments 1536 // will be defined by the `Inst::Args` instruction. 1537 if block == self.entry { 1538 return &[]; 1539 } 1540 1541 let range = self.block_params_range.get(block.index()); 1542 &self.block_params[range] 1543 } 1544 1545 fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] { 1546 let succ_range = self.branch_block_arg_succ_range.get(block.index()); 1547 debug_assert!(succ_idx < succ_range.len()); 1548 let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx); 1549 &self.branch_block_args[branch_block_args] 1550 } 1551 1552 fn is_ret(&self, insn: InsnIndex) -> bool { 1553 match self.insts[insn.index()].is_term() { 1554 // We treat blocks terminated by an unconditional trap like a return for regalloc. 1555 MachTerminator::None => self.insts[insn.index()].is_trap(), 1556 MachTerminator::Ret | MachTerminator::RetCall => true, 1557 MachTerminator::Branch => false, 1558 } 1559 } 1560 1561 fn is_branch(&self, insn: InsnIndex) -> bool { 1562 match self.insts[insn.index()].is_term() { 1563 MachTerminator::Branch => true, 1564 _ => false, 1565 } 1566 } 1567 1568 fn inst_operands(&self, insn: InsnIndex) -> &[Operand] { 1569 let range = self.operand_ranges.get(insn.index()); 1570 &self.operands[range] 1571 } 1572 1573 fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet { 1574 self.clobbers.get(&insn).cloned().unwrap_or_default() 1575 } 1576 1577 fn num_vregs(&self) -> usize { 1578 self.vreg_types.len() 1579 } 1580 1581 fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] { 1582 &self.debug_value_labels 1583 } 1584 1585 fn spillslot_size(&self, regclass: RegClass) -> usize { 1586 self.abi.get_spillslot_size(regclass) as usize 1587 } 1588 1589 fn allow_multiple_vreg_defs(&self) -> bool { 1590 // At least the s390x backend requires this, because the 1591 // `Loop` pseudo-instruction aggregates all Operands so pinned 1592 // vregs (RealRegs) may occur more than once. 1593 true 1594 } 1595 } 1596 1597 impl<I: VCodeInst> Debug for VRegAllocator<I> { 1598 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 1599 writeln!(f, "VRegAllocator {{")?; 1600 1601 let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>(); 1602 alias_keys.sort_unstable(); 1603 for key in alias_keys { 1604 let dest = self.vreg_aliases.get(&key).unwrap(); 1605 writeln!(f, " {:?} := {:?}", Reg::from(key), Reg::from(*dest))?; 1606 } 1607 1608 for (vreg, fact) in self.facts.iter().enumerate() { 1609 if let Some(fact) = fact { 1610 writeln!(f, " v{vreg} ! {fact}")?; 1611 } 1612 } 1613 1614 writeln!(f, "}}") 1615 } 1616 } 1617 1618 impl<I: VCodeInst> fmt::Debug for VCode<I> { 1619 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 1620 writeln!(f, "VCode {{")?; 1621 writeln!(f, " Entry block: {}", self.entry.index())?; 1622 1623 let mut state = Default::default(); 1624 1625 for block in 0..self.num_blocks() { 1626 let block = BlockIndex::new(block); 1627 writeln!( 1628 f, 1629 "Block {}({:?}):", 1630 block.index(), 1631 self.block_params(block) 1632 )?; 1633 if let Some(bb) = self.bindex_to_bb(block) { 1634 writeln!(f, " (original IR block: {bb})")?; 1635 } 1636 for (succ_idx, succ) in self.block_succs(block).iter().enumerate() { 1637 writeln!( 1638 f, 1639 " (successor: Block {}({:?}))", 1640 succ.index(), 1641 self.branch_blockparams(block, InsnIndex::new(0) /* dummy */, succ_idx) 1642 )?; 1643 } 1644 for inst in self.block_ranges.get(block.index()) { 1645 writeln!( 1646 f, 1647 " Inst {}: {}", 1648 inst, 1649 self.insts[inst].pretty_print_inst(&mut state) 1650 )?; 1651 if !self.operands.is_empty() { 1652 for operand in self.inst_operands(InsnIndex::new(inst)) { 1653 if operand.kind() == OperandKind::Def { 1654 if let Some(fact) = &self.facts[operand.vreg().vreg()] { 1655 writeln!(f, " v{} ! {}", operand.vreg().vreg(), fact)?; 1656 } 1657 } 1658 } 1659 } 1660 if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) { 1661 writeln!(f, " {user_stack_map:?}")?; 1662 } 1663 } 1664 } 1665 1666 writeln!(f, "}}")?; 1667 Ok(()) 1668 } 1669 } 1670 1671 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder. 1672 pub struct VRegAllocator<I> { 1673 /// VReg IR-level types. 1674 vreg_types: Vec<Type>, 1675 1676 /// VReg aliases. When the final VCode is built we rewrite all 1677 /// uses of the keys in this table to their replacement values. 1678 /// 1679 /// We use these aliases to rename an instruction's expected 1680 /// result vregs to the returned vregs from lowering, which are 1681 /// usually freshly-allocated temps. 1682 vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>, 1683 1684 /// A deferred error, to be bubbled up to the top level of the 1685 /// lowering algorithm. We take this approach because we cannot 1686 /// currently propagate a `Result` upward through ISLE code (the 1687 /// lowering rules) or some ABI code. 1688 deferred_error: Option<CodegenError>, 1689 1690 /// Facts on VRegs, for proof-carrying code. 1691 facts: Vec<Option<Fact>>, 1692 1693 /// The type of instruction that this allocator makes registers for. 1694 _inst: core::marker::PhantomData<I>, 1695 } 1696 1697 impl<I: VCodeInst> VRegAllocator<I> { 1698 /// Make a new VRegAllocator. 1699 pub fn with_capacity(capacity: usize) -> Self { 1700 let capacity = first_user_vreg_index() + capacity; 1701 let mut vreg_types = Vec::with_capacity(capacity); 1702 vreg_types.resize(first_user_vreg_index(), types::INVALID); 1703 Self { 1704 vreg_types, 1705 vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()), 1706 deferred_error: None, 1707 facts: Vec::with_capacity(capacity), 1708 _inst: core::marker::PhantomData::default(), 1709 } 1710 } 1711 1712 /// Allocate a fresh ValueRegs. 1713 pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> { 1714 if self.deferred_error.is_some() { 1715 return Err(CodegenError::CodeTooLarge); 1716 } 1717 let v = self.vreg_types.len(); 1718 let (regclasses, tys) = I::rc_for_type(ty)?; 1719 if v + regclasses.len() >= VReg::MAX { 1720 return Err(CodegenError::CodeTooLarge); 1721 } 1722 1723 let regs: ValueRegs<Reg> = match regclasses { 1724 &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()), 1725 &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()), 1726 // We can extend this if/when we support 32-bit targets; e.g., 1727 // an i128 on a 32-bit machine will need up to four machine regs 1728 // for a `Value`. 1729 _ => panic!("Value must reside in 1 or 2 registers"), 1730 }; 1731 for (®_ty, ®) in tys.iter().zip(regs.regs().iter()) { 1732 let vreg = reg.to_virtual_reg().unwrap(); 1733 debug_assert_eq!(self.vreg_types.len(), vreg.index()); 1734 self.vreg_types.push(reg_ty); 1735 } 1736 1737 // Create empty facts for each allocated vreg. 1738 self.facts.resize(self.vreg_types.len(), None); 1739 1740 Ok(regs) 1741 } 1742 1743 /// Allocate a fresh ValueRegs, deferring any out-of-vregs 1744 /// errors. This is useful in places where we cannot bubble a 1745 /// `CodegenResult` upward easily, and which are known to be 1746 /// invoked from within the lowering loop that checks the deferred 1747 /// error status below. 1748 pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> { 1749 match self.alloc(ty) { 1750 Ok(x) => x, 1751 Err(e) => { 1752 self.deferred_error = Some(e); 1753 self.bogus_for_deferred_error(ty) 1754 } 1755 } 1756 } 1757 1758 /// Take any deferred error that was accumulated by `alloc_with_deferred_error`. 1759 pub fn take_deferred_error(&mut self) -> Option<CodegenError> { 1760 self.deferred_error.take() 1761 } 1762 1763 /// Produce an bogus VReg placeholder with the proper number of 1764 /// registers for the given type. This is meant to be used with 1765 /// deferred allocation errors (see `Lower::alloc_tmp()`). 1766 fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> { 1767 let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type"); 1768 match regclasses { 1769 &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()), 1770 &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()), 1771 _ => panic!("Value must reside in 1 or 2 registers"), 1772 } 1773 } 1774 1775 /// Rewrite any mention of `from` into `to`. 1776 pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) { 1777 let from = from.into(); 1778 let resolved_to = self.resolve_vreg_alias(to.into()); 1779 // Disallow cycles (see below). 1780 assert_ne!(resolved_to, from); 1781 1782 // Maintain the invariant that PCC facts only exist on vregs 1783 // which aren't aliases. We want to preserve whatever was 1784 // stated about the vreg before its producer was lowered. 1785 if let Some(fact) = self.facts[from.vreg()].take() { 1786 self.set_fact(resolved_to, fact); 1787 } 1788 1789 let old_alias = self.vreg_aliases.insert(from, resolved_to); 1790 debug_assert_eq!(old_alias, None); 1791 } 1792 1793 fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg { 1794 // We prevent cycles from existing by resolving targets of 1795 // aliases eagerly before setting them. If the target resolves 1796 // to the origin of the alias, then a cycle would be created 1797 // and the alias is disallowed. Because of the structure of 1798 // SSA code (one instruction can refer to another's defs but 1799 // not vice-versa, except indirectly through 1800 // phis/blockparams), cycles should not occur as we use 1801 // aliases to redirect vregs to the temps that actually define 1802 // them. 1803 while let Some(to) = self.vreg_aliases.get(&vreg) { 1804 vreg = *to; 1805 } 1806 vreg 1807 } 1808 1809 #[inline] 1810 fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) { 1811 debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg))); 1812 } 1813 1814 /// Set the proof-carrying code fact on a given virtual register. 1815 /// 1816 /// Returns the old fact, if any (only one fact can be stored). 1817 fn set_fact(&mut self, vreg: regalloc2::VReg, fact: Fact) -> Option<Fact> { 1818 trace!("vreg {:?} has fact: {:?}", vreg, fact); 1819 debug_assert!(!self.vreg_aliases.contains_key(&vreg)); 1820 self.facts[vreg.vreg()].replace(fact) 1821 } 1822 1823 /// Set a fact only if one doesn't already exist. 1824 pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) { 1825 let vreg = self.resolve_vreg_alias(vreg.into()); 1826 if self.facts[vreg.vreg()].is_none() { 1827 self.set_fact(vreg, fact); 1828 } 1829 } 1830 1831 /// Allocate a fresh ValueRegs, with a given fact to apply if 1832 /// the value fits in one VReg. 1833 pub fn alloc_with_maybe_fact( 1834 &mut self, 1835 ty: Type, 1836 fact: Option<Fact>, 1837 ) -> CodegenResult<ValueRegs<Reg>> { 1838 let result = self.alloc(ty)?; 1839 1840 // Ensure that we don't lose a fact on a value that splits 1841 // into multiple VRegs. 1842 assert!(result.len() == 1 || fact.is_none()); 1843 if let Some(fact) = fact { 1844 self.set_fact(result.regs()[0].into(), fact); 1845 } 1846 1847 Ok(result) 1848 } 1849 } 1850 1851 /// This structure tracks the large constants used in VCode that will be emitted separately by the 1852 /// [MachBuffer]. 1853 /// 1854 /// First, during the lowering phase, constants are inserted using 1855 /// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are 1856 /// used in this phase. Some deduplication is performed, when possible, as constant 1857 /// values are inserted. 1858 /// 1859 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the 1860 /// constants so that instructions can refer to the value's memory location. The [MachBuffer] 1861 /// then writes the constant values to the buffer. 1862 #[derive(Default)] 1863 pub struct VCodeConstants { 1864 constants: PrimaryMap<VCodeConstant, VCodeConstantData>, 1865 pool_uses: HashMap<Constant, VCodeConstant>, 1866 well_known_uses: HashMap<*const [u8], VCodeConstant>, 1867 u64s: HashMap<[u8; 8], VCodeConstant>, 1868 } 1869 impl VCodeConstants { 1870 /// Initialize the structure with the expected number of constants. 1871 pub fn with_capacity(expected_num_constants: usize) -> Self { 1872 Self { 1873 constants: PrimaryMap::with_capacity(expected_num_constants), 1874 pool_uses: HashMap::with_capacity(expected_num_constants), 1875 well_known_uses: HashMap::new(), 1876 u64s: HashMap::new(), 1877 } 1878 } 1879 1880 /// Insert a constant; using this method indicates that a constant value will be used and thus 1881 /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants 1882 /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not 1883 /// [VCodeConstantData::Generated]. 1884 pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant { 1885 match data { 1886 VCodeConstantData::Generated(_) => self.constants.push(data), 1887 VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) { 1888 None => { 1889 let vcode_constant = self.constants.push(data); 1890 self.pool_uses.insert(constant, vcode_constant); 1891 vcode_constant 1892 } 1893 Some(&vcode_constant) => vcode_constant, 1894 }, 1895 VCodeConstantData::WellKnown(data_ref) => { 1896 match self.well_known_uses.entry(data_ref as *const [u8]) { 1897 Entry::Vacant(v) => { 1898 let vcode_constant = self.constants.push(data); 1899 v.insert(vcode_constant); 1900 vcode_constant 1901 } 1902 Entry::Occupied(o) => *o.get(), 1903 } 1904 } 1905 VCodeConstantData::U64(value) => match self.u64s.entry(value) { 1906 Entry::Vacant(v) => { 1907 let vcode_constant = self.constants.push(data); 1908 v.insert(vcode_constant); 1909 vcode_constant 1910 } 1911 Entry::Occupied(o) => *o.get(), 1912 }, 1913 } 1914 } 1915 1916 /// Return the number of constants inserted. 1917 pub fn len(&self) -> usize { 1918 self.constants.len() 1919 } 1920 1921 /// Iterate over the `VCodeConstant` keys inserted in this structure. 1922 pub fn keys(&self) -> Keys<VCodeConstant> { 1923 self.constants.keys() 1924 } 1925 1926 /// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this 1927 /// structure. 1928 pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> { 1929 self.constants.iter() 1930 } 1931 1932 /// Returns the data associated with the specified constant. 1933 pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData { 1934 &self.constants[c] 1935 } 1936 1937 /// Checks if the given [VCodeConstantData] is registered as 1938 /// used by the pool. 1939 pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool { 1940 match constant { 1941 VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c), 1942 _ => false, 1943 } 1944 } 1945 } 1946 1947 /// A use of a constant by one or more VCode instructions; see [VCodeConstants]. 1948 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 1949 pub struct VCodeConstant(u32); 1950 entity_impl!(VCodeConstant); 1951 1952 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking 1953 /// these separately instead of as raw byte buffers allows us to avoid some duplication. 1954 pub enum VCodeConstantData { 1955 /// A constant already present in the Cranelift IR 1956 /// [ConstantPool](crate::ir::constant::ConstantPool). 1957 Pool(Constant, ConstantData), 1958 /// A reference to a well-known constant value that is statically encoded within the compiler. 1959 WellKnown(&'static [u8]), 1960 /// A constant value generated during lowering; the value may depend on the instruction context 1961 /// which makes it difficult to de-duplicate--if possible, use other variants. 1962 Generated(ConstantData), 1963 /// A constant of at most 64 bits. These are deduplicated as 1964 /// well. Stored as a fixed-size array of `u8` so that we do not 1965 /// encounter endianness problems when cross-compiling. 1966 U64([u8; 8]), 1967 } 1968 impl VCodeConstantData { 1969 /// Retrieve the constant data as a byte slice. 1970 pub fn as_slice(&self) -> &[u8] { 1971 match self { 1972 VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(), 1973 VCodeConstantData::WellKnown(d) => d, 1974 VCodeConstantData::U64(value) => &value[..], 1975 } 1976 } 1977 1978 /// Calculate the alignment of the constant data. 1979 pub fn alignment(&self) -> u32 { 1980 if self.as_slice().len() <= 8 { 1981 8 1982 } else { 1983 16 1984 } 1985 } 1986 } 1987 1988 #[cfg(test)] 1989 mod test { 1990 use super::*; 1991 use std::mem::size_of; 1992 1993 #[test] 1994 fn size_of_constant_structs() { 1995 assert_eq!(size_of::<Constant>(), 4); 1996 assert_eq!(size_of::<VCodeConstant>(), 4); 1997 assert_eq!(size_of::<ConstantData>(), 3 * size_of::<usize>()); 1998 assert_eq!(size_of::<VCodeConstantData>(), 4 * size_of::<usize>()); 1999 assert_eq!( 2000 size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(), 2001 3 * size_of::<usize>() 2002 ); 2003 // TODO The VCodeConstants structure's memory size could be further optimized. 2004 // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at 2005 // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes. 2006 } 2007 } 2008