1 //! This implements the VCode container: a CFG of Insts that have been lowered. 2 //! 3 //! VCode is virtual-register code. An instruction in VCode is almost a machine 4 //! instruction; however, its register slots can refer to virtual registers in 5 //! addition to real machine registers. 6 //! 7 //! VCode is structured with traditional basic blocks, and 8 //! each block must be terminated by an unconditional branch (one target), a 9 //! conditional branch (two targets), or a return (no targets). Note that this 10 //! slightly differs from the machine code of most ISAs: in most ISAs, a 11 //! conditional branch has one target (and the not-taken case falls through). 12 //! However, we expect that machine backends will elide branches to the following 13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond / 14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to 15 //! play with layout prior to final binary emission, as well, if we want. 16 //! 17 //! See the main module comment in `mod.rs` for more details on the VCode-based 18 //! backend pipeline. 19 20 use crate::ir::pcc::*; 21 use crate::ir::{self, types, Constant, ConstantData, ValueLabel}; 22 use crate::machinst::*; 23 use crate::ranges::Ranges; 24 use crate::timing; 25 use crate::trace; 26 use crate::CodegenError; 27 use crate::{LabelValueLoc, ValueLocRange}; 28 use regalloc2::{ 29 Edit, Function as RegallocFunction, InstOrEdit, InstRange, MachineEnv, Operand, 30 OperandConstraint, OperandKind, PRegSet, RegClass, 31 }; 32 use rustc_hash::FxHashMap; 33 34 use core::mem::take; 35 use cranelift_entity::{entity_impl, Keys}; 36 use std::collections::hash_map::Entry; 37 use std::collections::HashMap; 38 use std::fmt; 39 40 /// Index referring to an instruction in VCode. 41 pub type InsnIndex = regalloc2::Inst; 42 43 /// Extension trait for `InsnIndex` to allow conversion to a 44 /// `BackwardsInsnIndex`. 45 trait ToBackwardsInsnIndex { 46 fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex; 47 } 48 49 impl ToBackwardsInsnIndex for InsnIndex { 50 fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex { 51 BackwardsInsnIndex::new(num_insts - self.index() - 1) 52 } 53 } 54 55 /// An index referring to an instruction in the VCode when it is backwards, 56 /// during VCode construction. 57 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] 58 #[cfg_attr( 59 feature = "enable-serde", 60 derive(::serde::Serialize, ::serde::Deserialize) 61 )] 62 pub struct BackwardsInsnIndex(InsnIndex); 63 64 impl BackwardsInsnIndex { 65 pub fn new(i: usize) -> Self { 66 BackwardsInsnIndex(InsnIndex::new(i)) 67 } 68 } 69 70 /// Index referring to a basic block in VCode. 71 pub type BlockIndex = regalloc2::Block; 72 73 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be 74 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`. 75 pub trait VCodeInst: MachInst + MachInstEmit {} 76 impl<I: MachInst + MachInstEmit> VCodeInst for I {} 77 78 /// A function in "VCode" (virtualized-register code) form, after 79 /// lowering. This is essentially a standard CFG of basic blocks, 80 /// where each basic block consists of lowered instructions produced 81 /// by the machine-specific backend. 82 /// 83 /// Note that the VCode is immutable once produced, and is not 84 /// modified by register allocation in particular. Rather, register 85 /// allocation on the `VCode` produces a separate `regalloc2::Output` 86 /// struct, and this can be passed to `emit`. `emit` in turn does not 87 /// modify the vcode, but produces an `EmitResult`, which contains the 88 /// machine code itself, and the associated disassembly and/or 89 /// metadata as requested. 90 pub struct VCode<I: VCodeInst> { 91 /// VReg IR-level types. 92 vreg_types: Vec<Type>, 93 94 /// Lowered machine instructions in order corresponding to the original IR. 95 insts: Vec<I>, 96 97 /// A map from backwards instruction index to the user stack map for that 98 /// instruction. 99 /// 100 /// This is a sparse side table that only has entries for instructions that 101 /// are safepoints, and only for a subset of those that have an associated 102 /// user stack map. 103 user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>, 104 105 /// Operands: pre-regalloc references to virtual registers with 106 /// constraints, in one flattened array. This allows the regalloc 107 /// to efficiently access all operands without requiring expensive 108 /// matches or method invocations on insts. 109 operands: Vec<Operand>, 110 111 /// Operand index ranges: for each instruction in `insts`, there 112 /// is a tuple here providing the range in `operands` for that 113 /// instruction's operands. 114 operand_ranges: Ranges, 115 116 /// Clobbers: a sparse map from instruction indices to clobber masks. 117 clobbers: FxHashMap<InsnIndex, PRegSet>, 118 119 /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is 120 /// reasonable to keep one of these per instruction.) 121 srclocs: Vec<RelSourceLoc>, 122 123 /// Entry block. 124 entry: BlockIndex, 125 126 /// Block instruction indices. 127 block_ranges: Ranges, 128 129 /// Block successors: index range in the `block_succs` list. 130 block_succ_range: Ranges, 131 132 /// Block successor lists, concatenated into one vec. The 133 /// `block_succ_range` list of tuples above gives (start, end) 134 /// ranges within this list that correspond to each basic block's 135 /// successors. 136 block_succs: Vec<regalloc2::Block>, 137 138 /// Block predecessors: index range in the `block_preds` list. 139 block_pred_range: Ranges, 140 141 /// Block predecessor lists, concatenated into one vec. The 142 /// `block_pred_range` list of tuples above gives (start, end) 143 /// ranges within this list that correspond to each basic block's 144 /// predecessors. 145 block_preds: Vec<regalloc2::Block>, 146 147 /// Block parameters: index range in `block_params` below. 148 block_params_range: Ranges, 149 150 /// Block parameter lists, concatenated into one vec. The 151 /// `block_params_range` list of tuples above gives (start, end) 152 /// ranges within this list that correspond to each basic block's 153 /// blockparam vregs. 154 block_params: Vec<regalloc2::VReg>, 155 156 /// Outgoing block arguments on branch instructions, concatenated 157 /// into one list. 158 /// 159 /// Note that this is conceptually a 3D array: we have a VReg list 160 /// per block, per successor. We flatten those three dimensions 161 /// into this 1D vec, then store index ranges in two levels of 162 /// indirection. 163 /// 164 /// Indexed by the indices in `branch_block_arg_succ_range`. 165 branch_block_args: Vec<regalloc2::VReg>, 166 167 /// Array of sequences of (start, end) tuples in 168 /// `branch_block_args`, one for each successor; these sequences 169 /// for each block are concatenated. 170 /// 171 /// Indexed by the indices in `branch_block_arg_succ_range`. 172 branch_block_arg_range: Ranges, 173 174 /// For a given block, indices in `branch_block_arg_range` 175 /// corresponding to all of its successors. 176 branch_block_arg_succ_range: Ranges, 177 178 /// Block-order information. 179 block_order: BlockLoweringOrder, 180 181 /// ABI object. 182 pub(crate) abi: Callee<I::ABIMachineSpec>, 183 184 /// Constant information used during code emission. This should be 185 /// immutable across function compilations within the same module. 186 emit_info: I::Info, 187 188 /// Reference-typed `regalloc2::VReg`s. The regalloc requires 189 /// these in a dense slice (as opposed to querying the 190 /// reftype-status of each vreg) for efficient iteration. 191 reftyped_vregs: Vec<VReg>, 192 193 /// Constants. 194 pub(crate) constants: VCodeConstants, 195 196 /// Value labels for debuginfo attached to vregs. 197 debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>, 198 199 pub(crate) sigs: SigSet, 200 201 /// Facts on VRegs, for proof-carrying code verification. 202 facts: Vec<Option<Fact>>, 203 } 204 205 /// The result of `VCode::emit`. Contains all information computed 206 /// during emission: actual machine code, optionally a disassembly, 207 /// and optionally metadata about the code layout. 208 pub struct EmitResult { 209 /// The MachBuffer containing the machine code. 210 pub buffer: MachBufferFinalized<Stencil>, 211 212 /// Offset of each basic block, recorded during emission. Computed 213 /// only if `debug_value_labels` is non-empty. 214 pub bb_offsets: Vec<CodeOffset>, 215 216 /// Final basic-block edges, in terms of code offsets of 217 /// bb-starts. Computed only if `debug_value_labels` is non-empty. 218 pub bb_edges: Vec<(CodeOffset, CodeOffset)>, 219 220 /// Final length of function body. 221 pub func_body_len: CodeOffset, 222 223 /// The pretty-printed disassembly, if any. This uses the same 224 /// pretty-printing for MachInsts as the pre-regalloc VCode Debug 225 /// implementation, but additionally includes the prologue and 226 /// epilogue(s), and makes use of the regalloc results. 227 pub disasm: Option<String>, 228 229 /// Offsets of sized stackslots. 230 pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>, 231 232 /// Offsets of dynamic stackslots. 233 pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>, 234 235 /// Value-labels information (debug metadata). 236 pub value_labels_ranges: ValueLabelsRanges, 237 238 /// Stack frame size. 239 pub frame_size: u32, 240 } 241 242 /// A builder for a VCode function body. 243 /// 244 /// This builder has the ability to accept instructions in either 245 /// forward or reverse order, depending on the pass direction that 246 /// produces the VCode. The lowering from CLIF to VCode<MachInst> 247 /// ordinarily occurs in reverse order (in order to allow instructions 248 /// to be lowered only if used, and not merged) so a reversal will 249 /// occur at the end of lowering to ensure the VCode is in machine 250 /// order. 251 /// 252 /// If built in reverse, block and instruction indices used once the 253 /// VCode is built are relative to the final (reversed) order, not the 254 /// order of construction. Note that this means we do not know the 255 /// final block or instruction indices when building, so we do not 256 /// hand them out. (The user is assumed to know them when appending 257 /// terminator instructions with successor blocks.) 258 pub struct VCodeBuilder<I: VCodeInst> { 259 /// In-progress VCode. 260 pub(crate) vcode: VCode<I>, 261 262 /// In what direction is the build occurring? 263 direction: VCodeBuildDirection, 264 265 /// Debug-value label in-progress map, keyed by label. For each 266 /// label, we keep disjoint ranges mapping to vregs. We'll flatten 267 /// this into (vreg, range, label) tuples when done. 268 debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>, 269 } 270 271 /// Direction in which a VCodeBuilder builds VCode. 272 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 273 pub enum VCodeBuildDirection { 274 // TODO: add `Forward` once we need it and can test it adequately. 275 /// Backward-build pass: we expect the producer to call `emit()` 276 /// with instructions in reverse program order within each block. 277 Backward, 278 } 279 280 impl<I: VCodeInst> VCodeBuilder<I> { 281 /// Create a new VCodeBuilder. 282 pub fn new( 283 sigs: SigSet, 284 abi: Callee<I::ABIMachineSpec>, 285 emit_info: I::Info, 286 block_order: BlockLoweringOrder, 287 constants: VCodeConstants, 288 direction: VCodeBuildDirection, 289 ) -> Self { 290 let vcode = VCode::new(sigs, abi, emit_info, block_order, constants); 291 292 VCodeBuilder { 293 vcode, 294 direction, 295 debug_info: FxHashMap::default(), 296 } 297 } 298 299 pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> { 300 self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs) 301 } 302 303 /// Access the ABI object. 304 pub fn abi(&self) -> &Callee<I::ABIMachineSpec> { 305 &self.vcode.abi 306 } 307 308 /// Access the ABI object. 309 pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> { 310 &mut self.vcode.abi 311 } 312 313 pub fn sigs(&self) -> &SigSet { 314 &self.vcode.sigs 315 } 316 317 pub fn sigs_mut(&mut self) -> &mut SigSet { 318 &mut self.vcode.sigs 319 } 320 321 /// Access to the BlockLoweringOrder object. 322 pub fn block_order(&self) -> &BlockLoweringOrder { 323 &self.vcode.block_order 324 } 325 326 /// Set the current block as the entry block. 327 pub fn set_entry(&mut self, block: BlockIndex) { 328 self.vcode.entry = block; 329 } 330 331 /// End the current basic block. Must be called after emitting vcode insts 332 /// for IR insts and prior to ending the function (building the VCode). 333 pub fn end_bb(&mut self) { 334 let end_idx = self.vcode.insts.len(); 335 // Add the instruction index range to the list of blocks. 336 self.vcode.block_ranges.push_end(end_idx); 337 // End the successors list. 338 let succ_end = self.vcode.block_succs.len(); 339 self.vcode.block_succ_range.push_end(succ_end); 340 // End the blockparams list. 341 let block_params_end = self.vcode.block_params.len(); 342 self.vcode.block_params_range.push_end(block_params_end); 343 // End the branch blockparam args list. 344 let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len(); 345 self.vcode 346 .branch_block_arg_succ_range 347 .push_end(branch_block_arg_succ_end); 348 } 349 350 pub fn add_block_param(&mut self, param: VirtualReg) { 351 self.vcode.block_params.push(param.into()); 352 } 353 354 fn add_branch_args_for_succ(&mut self, args: &[Reg]) { 355 self.vcode 356 .branch_block_args 357 .extend(args.iter().map(|&arg| VReg::from(arg))); 358 let end = self.vcode.branch_block_args.len(); 359 self.vcode.branch_block_arg_range.push_end(end); 360 } 361 362 /// Push an instruction for the current BB and current IR inst 363 /// within the BB. 364 pub fn push(&mut self, insn: I, loc: RelSourceLoc) { 365 self.vcode.insts.push(insn); 366 self.vcode.srclocs.push(loc); 367 } 368 369 /// Add a successor block with branch args. 370 pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) { 371 self.vcode.block_succs.push(block); 372 self.add_branch_args_for_succ(args); 373 } 374 375 /// Add a debug value label to a register. 376 pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) { 377 // We'll fix up labels in reverse(). Because we're generating 378 // code bottom-to-top, the liverange of the label goes *from* 379 // the last index at which was defined (or 0, which is the end 380 // of the eventual function) *to* just this instruction, and 381 // no further. 382 let inst = InsnIndex::new(self.vcode.insts.len()); 383 let labels = self.debug_info.entry(label).or_insert_with(|| vec![]); 384 let last = labels 385 .last() 386 .map(|(_start, end, _vreg)| *end) 387 .unwrap_or(InsnIndex::new(0)); 388 labels.push((last, inst, reg.into())); 389 } 390 391 /// Access the constants. 392 pub fn constants(&mut self) -> &mut VCodeConstants { 393 &mut self.vcode.constants 394 } 395 396 fn compute_preds_from_succs(&mut self) { 397 // Do a linear-time counting sort: first determine how many 398 // times each block appears as a successor. 399 let mut starts = vec![0u32; self.vcode.num_blocks()]; 400 for succ in &self.vcode.block_succs { 401 starts[succ.index()] += 1; 402 } 403 404 // Determine for each block the starting index where that 405 // block's predecessors should go. This is equivalent to the 406 // ranges we need to store in block_pred_range. 407 self.vcode.block_pred_range.reserve(starts.len()); 408 let mut end = 0; 409 for count in starts.iter_mut() { 410 let start = end; 411 end += *count; 412 *count = start; 413 self.vcode.block_pred_range.push_end(end as usize); 414 } 415 let end = end as usize; 416 debug_assert_eq!(end, self.vcode.block_succs.len()); 417 418 // Walk over the successors again, this time grouped by 419 // predecessor, and push the predecessor at the current 420 // starting position of each of its successors. We build 421 // each group of predecessors in whatever order Ranges::iter 422 // returns them; regalloc2 doesn't care. 423 self.vcode.block_preds.resize(end, BlockIndex::invalid()); 424 for (pred, range) in self.vcode.block_succ_range.iter() { 425 let pred = BlockIndex::new(pred); 426 for succ in &self.vcode.block_succs[range] { 427 let pos = &mut starts[succ.index()]; 428 self.vcode.block_preds[*pos as usize] = pred; 429 *pos += 1; 430 } 431 } 432 debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid())); 433 } 434 435 /// Called once, when a build in Backward order is complete, to 436 /// perform the overall reversal (into final forward order) and 437 /// finalize metadata accordingly. 438 fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) { 439 let n_insts = self.vcode.insts.len(); 440 if n_insts == 0 { 441 return; 442 } 443 444 // Reverse the per-block and per-inst sequences. 445 self.vcode.block_ranges.reverse_index(); 446 self.vcode.block_ranges.reverse_target(n_insts); 447 // block_params_range is indexed by block (and blocks were 448 // traversed in reverse) so we reverse it; but block-param 449 // sequences in the concatenated vec can remain in reverse 450 // order (it is effectively an arena of arbitrarily-placed 451 // referenced sequences). 452 self.vcode.block_params_range.reverse_index(); 453 // Likewise, we reverse block_succ_range, but the block_succ 454 // concatenated array can remain as-is. 455 self.vcode.block_succ_range.reverse_index(); 456 self.vcode.insts.reverse(); 457 self.vcode.srclocs.reverse(); 458 // Likewise, branch_block_arg_succ_range is indexed by block 459 // so must be reversed. 460 self.vcode.branch_block_arg_succ_range.reverse_index(); 461 462 // To translate an instruction index *endpoint* in reversed 463 // order to forward order, compute `n_insts - i`. 464 // 465 // Why not `n_insts - 1 - i`? That would be correct to 466 // translate an individual instruction index (for ten insts 0 467 // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes 468 // 0). But for the usual inclusive-start, exclusive-end range 469 // idiom, inclusive starts become exclusive ends and 470 // vice-versa, so e.g. an (inclusive) start of 0 becomes an 471 // (exclusive) end of 10. 472 let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index()); 473 474 // Generate debug-value labels based on per-label maps. 475 for (label, tuples) in &self.debug_info { 476 for &(start, end, vreg) in tuples { 477 let vreg = vregs.resolve_vreg_alias(vreg); 478 let fwd_start = translate(end); 479 let fwd_end = translate(start); 480 self.vcode 481 .debug_value_labels 482 .push((vreg, fwd_start, fwd_end, label.as_u32())); 483 } 484 } 485 486 // Now sort debug value labels by VReg, as required 487 // by regalloc2. 488 self.vcode 489 .debug_value_labels 490 .sort_unstable_by_key(|(vreg, _, _, _)| *vreg); 491 } 492 493 fn collect_operands(&mut self, vregs: &VRegAllocator<I>) { 494 let allocatable = PRegSet::from(self.vcode.machine_env()); 495 for (i, insn) in self.vcode.insts.iter_mut().enumerate() { 496 // Push operands from the instruction onto the operand list. 497 // 498 // We rename through the vreg alias table as we collect 499 // the operands. This is better than a separate post-pass 500 // over operands, because it has more cache locality: 501 // operands only need to pass through L1 once. This is 502 // also better than renaming instructions' 503 // operands/registers while lowering, because here we only 504 // need to do the `match` over the instruction to visit 505 // its register fields (which is slow, branchy code) once. 506 507 let mut op_collector = 508 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| { 509 vregs.resolve_vreg_alias(vreg) 510 }); 511 insn.get_operands(&mut op_collector); 512 let (ops, clobbers) = op_collector.finish(); 513 self.vcode.operand_ranges.push_end(ops); 514 515 if clobbers != PRegSet::default() { 516 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers); 517 } 518 519 if let Some((dst, src)) = insn.is_move() { 520 // We should never see non-virtual registers present in move 521 // instructions. 522 assert!( 523 src.is_virtual(), 524 "the real register {:?} was used as the source of a move instruction", 525 src 526 ); 527 assert!( 528 dst.to_reg().is_virtual(), 529 "the real register {:?} was used as the destination of a move instruction", 530 dst.to_reg() 531 ); 532 } 533 } 534 535 // Translate blockparam args via the vreg aliases table as well. 536 for arg in &mut self.vcode.branch_block_args { 537 let new_arg = vregs.resolve_vreg_alias(*arg); 538 trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg); 539 *arg = new_arg; 540 } 541 } 542 543 /// Build the final VCode. 544 pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> { 545 self.vcode.vreg_types = take(&mut vregs.vreg_types); 546 self.vcode.facts = take(&mut vregs.facts); 547 self.vcode.reftyped_vregs = take(&mut vregs.reftyped_vregs); 548 549 if self.direction == VCodeBuildDirection::Backward { 550 self.reverse_and_finalize(&vregs); 551 } 552 self.collect_operands(&vregs); 553 554 // Apply register aliases to the `reftyped_vregs` list since this list 555 // will be returned directly to `regalloc2` eventually and all 556 // operands/results of instructions will use the alias-resolved vregs 557 // from `regalloc2`'s perspective. 558 // 559 // Also note that `reftyped_vregs` can't have duplicates, so after the 560 // aliases are applied duplicates are removed. 561 for reg in self.vcode.reftyped_vregs.iter_mut() { 562 *reg = vregs.resolve_vreg_alias(*reg); 563 } 564 self.vcode.reftyped_vregs.sort(); 565 self.vcode.reftyped_vregs.dedup(); 566 567 self.compute_preds_from_succs(); 568 self.vcode.debug_value_labels.sort_unstable(); 569 570 // At this point, nothing in the vcode should mention any 571 // VReg which has been aliased. All the appropriate rewriting 572 // should have happened above. Just to be sure, let's 573 // double-check each field which has vregs. 574 // Note: can't easily check vcode.insts, resolved in collect_operands. 575 // Operands are resolved in collect_operands. 576 vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg())); 577 // Currently block params are never aliased to another vreg. 578 vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied()); 579 // Branch block args are resolved in collect_operands. 580 vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied()); 581 // Reftyped vregs are resolved above in this function. 582 vregs.debug_assert_no_vreg_aliases(self.vcode.reftyped_vregs.iter().copied()); 583 // Debug value labels are resolved in reverse_and_finalize. 584 vregs.debug_assert_no_vreg_aliases( 585 self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg), 586 ); 587 // Facts are resolved eagerly during set_vreg_alias. 588 vregs.debug_assert_no_vreg_aliases( 589 self.vcode 590 .facts 591 .iter() 592 .zip(&vregs.vreg_types) 593 .enumerate() 594 .filter(|(_, (fact, _))| fact.is_some()) 595 .map(|(vreg, (_, &ty))| { 596 let (regclasses, _) = I::rc_for_type(ty).unwrap(); 597 VReg::new(vreg, regclasses[0]) 598 }), 599 ); 600 601 self.vcode 602 } 603 604 /// Add a user stack map for the associated instruction. 605 pub fn add_user_stack_map( 606 &mut self, 607 inst: BackwardsInsnIndex, 608 entries: &[ir::UserStackMapEntry], 609 ) { 610 let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets()); 611 let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map); 612 debug_assert!(old_entry.is_none()); 613 } 614 } 615 616 /// Is this type a reference type? 617 fn is_reftype(ty: Type) -> bool { 618 ty == types::R64 || ty == types::R32 619 } 620 621 const NO_INST_OFFSET: CodeOffset = u32::MAX; 622 623 impl<I: VCodeInst> VCode<I> { 624 /// New empty VCode. 625 fn new( 626 sigs: SigSet, 627 abi: Callee<I::ABIMachineSpec>, 628 emit_info: I::Info, 629 block_order: BlockLoweringOrder, 630 constants: VCodeConstants, 631 ) -> Self { 632 let n_blocks = block_order.lowered_order().len(); 633 VCode { 634 sigs, 635 vreg_types: vec![], 636 insts: Vec::with_capacity(10 * n_blocks), 637 user_stack_maps: FxHashMap::default(), 638 operands: Vec::with_capacity(30 * n_blocks), 639 operand_ranges: Ranges::with_capacity(10 * n_blocks), 640 clobbers: FxHashMap::default(), 641 srclocs: Vec::with_capacity(10 * n_blocks), 642 entry: BlockIndex::new(0), 643 block_ranges: Ranges::with_capacity(n_blocks), 644 block_succ_range: Ranges::with_capacity(n_blocks), 645 block_succs: Vec::with_capacity(n_blocks), 646 block_pred_range: Ranges::default(), 647 block_preds: Vec::new(), 648 block_params_range: Ranges::with_capacity(n_blocks), 649 block_params: Vec::with_capacity(5 * n_blocks), 650 branch_block_args: Vec::with_capacity(10 * n_blocks), 651 branch_block_arg_range: Ranges::with_capacity(2 * n_blocks), 652 branch_block_arg_succ_range: Ranges::with_capacity(n_blocks), 653 block_order, 654 abi, 655 emit_info, 656 reftyped_vregs: vec![], 657 constants, 658 debug_value_labels: vec![], 659 facts: vec![], 660 } 661 } 662 663 /// Get the ABI-dependent MachineEnv for managing register allocation. 664 pub fn machine_env(&self) -> &MachineEnv { 665 self.abi.machine_env(&self.sigs) 666 } 667 668 /// Get the number of blocks. Block indices will be in the range `0 .. 669 /// (self.num_blocks() - 1)`. 670 pub fn num_blocks(&self) -> usize { 671 self.block_ranges.len() 672 } 673 674 /// The number of lowered instructions. 675 pub fn num_insts(&self) -> usize { 676 self.insts.len() 677 } 678 679 fn compute_clobbers(&self, regalloc: ®alloc2::Output) -> Vec<Writable<RealReg>> { 680 let mut clobbered = PRegSet::default(); 681 682 // All moves are included in clobbers. 683 for (_, Edit::Move { to, .. }) in ®alloc.edits { 684 if let Some(preg) = to.as_reg() { 685 clobbered.add(preg); 686 } 687 } 688 689 for (i, range) in self.operand_ranges.iter() { 690 // Skip this instruction if not "included in clobbers" as 691 // per the MachInst. (Some backends use this to implement 692 // ABI specifics; e.g., excluding calls of the same ABI as 693 // the current function from clobbers, because by 694 // definition everything clobbered by the call can be 695 // clobbered by this function without saving as well.) 696 if !self.insts[i].is_included_in_clobbers() { 697 continue; 698 } 699 700 let operands = &self.operands[range.clone()]; 701 let allocs = ®alloc.allocs[range]; 702 for (operand, alloc) in operands.iter().zip(allocs.iter()) { 703 if operand.kind() == OperandKind::Def { 704 if let Some(preg) = alloc.as_reg() { 705 clobbered.add(preg); 706 } 707 } 708 } 709 710 // Also add explicitly-clobbered registers. 711 if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) { 712 clobbered.union_from(inst_clobbered); 713 } 714 } 715 716 clobbered 717 .into_iter() 718 .map(|preg| Writable::from_reg(RealReg::from(preg))) 719 .collect() 720 } 721 722 /// Emit the instructions to a `MachBuffer`, containing fixed-up 723 /// code and external reloc/trap/etc. records ready for use. Takes 724 /// the regalloc results as well. 725 /// 726 /// Returns the machine code itself, and optionally metadata 727 /// and/or a disassembly, as an `EmitResult`. The `VCode` itself 728 /// is consumed by the emission process. 729 pub fn emit( 730 mut self, 731 regalloc: ®alloc2::Output, 732 want_disasm: bool, 733 flags: &settings::Flags, 734 ctrl_plane: &mut ControlPlane, 735 ) -> EmitResult 736 where 737 I: VCodeInst, 738 { 739 // To write into disasm string. 740 use core::fmt::Write; 741 742 let _tt = timing::vcode_emit(); 743 let mut buffer = MachBuffer::new(); 744 let mut bb_starts: Vec<Option<CodeOffset>> = vec![]; 745 746 // The first M MachLabels are reserved for block indices. 747 buffer.reserve_labels_for_blocks(self.num_blocks()); 748 749 // Register all allocated constants with the `MachBuffer` to ensure that 750 // any references to the constants during instructions can be handled 751 // correctly. 752 buffer.register_constants(&self.constants); 753 754 // Construct the final order we emit code in: cold blocks at the end. 755 let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![]; 756 let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![]; 757 for block in 0..self.num_blocks() { 758 let block = BlockIndex::new(block); 759 if self.block_order.is_cold(block) { 760 cold_blocks.push(block); 761 } else { 762 final_order.push(block); 763 } 764 } 765 final_order.extend(cold_blocks.clone()); 766 767 // Compute/save info we need for the prologue: clobbers and 768 // number of spillslots. 769 // 770 // We clone `abi` here because we will mutate it as we 771 // generate the prologue and set other info, but we can't 772 // mutate `VCode`. The info it usually carries prior to 773 // setting clobbers is fairly minimal so this should be 774 // relatively cheap. 775 let clobbers = self.compute_clobbers(regalloc); 776 self.abi 777 .compute_frame_layout(&self.sigs, regalloc.num_spillslots, clobbers); 778 779 // Emit blocks. 780 let mut cur_srcloc = None; 781 let mut last_offset = None; 782 let mut inst_offsets = vec![]; 783 let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane)); 784 785 let mut disasm = String::new(); 786 787 if !self.debug_value_labels.is_empty() { 788 inst_offsets.resize(self.insts.len(), NO_INST_OFFSET); 789 } 790 791 // Count edits per block ahead of time; this is needed for 792 // lookahead island emission. (We could derive it per-block 793 // with binary search in the edit list, but it's more 794 // efficient to do it in one pass here.) 795 let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![]; 796 let mut edit_idx = 0; 797 for block in 0..self.num_blocks() { 798 let end_inst = InsnIndex::new(self.block_ranges.get(block).end); 799 let start_edit_idx = edit_idx; 800 while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst { 801 edit_idx += 1; 802 } 803 let end_edit_idx = edit_idx; 804 ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32); 805 } 806 807 let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled(); 808 let mut bb_padding = match flags.bb_padding_log2_minus_one() { 809 0 => Vec::new(), 810 n => vec![0; 1 << (n - 1)], 811 }; 812 let mut total_bb_padding = 0; 813 814 for (block_order_idx, &block) in final_order.iter().enumerate() { 815 trace!("emitting block {:?}", block); 816 817 // Call the new block hook for state 818 state.on_new_block(); 819 820 // Emit NOPs to align the block. 821 let new_offset = I::align_basic_block(buffer.cur_offset()); 822 while new_offset > buffer.cur_offset() { 823 // Pad with NOPs up to the aligned block offset. 824 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize); 825 nop.emit(&mut buffer, &self.emit_info, &mut Default::default()); 826 } 827 assert_eq!(buffer.cur_offset(), new_offset); 828 829 let do_emit = |inst: &I, 830 disasm: &mut String, 831 buffer: &mut MachBuffer<I>, 832 state: &mut I::State| { 833 if want_disasm && !inst.is_args() { 834 let mut s = state.clone(); 835 writeln!(disasm, " {}", inst.pretty_print_inst(&mut s)).unwrap(); 836 } 837 inst.emit(buffer, &self.emit_info, state); 838 }; 839 840 // Is this the first block? Emit the prologue directly if so. 841 if block == self.entry { 842 trace!(" -> entry block"); 843 buffer.start_srcloc(Default::default()); 844 for inst in &self.abi.gen_prologue() { 845 do_emit(&inst, &mut disasm, &mut buffer, &mut state); 846 } 847 buffer.end_srcloc(); 848 } 849 850 // Now emit the regular block body. 851 852 buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut()); 853 854 if want_disasm { 855 writeln!(&mut disasm, "block{}:", block.index()).unwrap(); 856 } 857 858 if flags.machine_code_cfg_info() { 859 // Track BB starts. If we have backed up due to MachBuffer 860 // branch opts, note that the removed blocks were removed. 861 let cur_offset = buffer.cur_offset(); 862 if last_offset.is_some() && cur_offset <= last_offset.unwrap() { 863 for i in (0..bb_starts.len()).rev() { 864 if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() { 865 break; 866 } 867 bb_starts[i] = None; 868 } 869 } 870 bb_starts.push(Some(cur_offset)); 871 last_offset = Some(cur_offset); 872 } 873 874 if let Some(block_start) = I::gen_block_start( 875 self.block_order.is_indirect_branch_target(block), 876 is_forward_edge_cfi_enabled, 877 ) { 878 do_emit(&block_start, &mut disasm, &mut buffer, &mut state); 879 } 880 881 for inst_or_edit in regalloc.block_insts_and_edits(&self, block) { 882 match inst_or_edit { 883 InstOrEdit::Inst(iix) => { 884 if !self.debug_value_labels.is_empty() { 885 // If we need to produce debug info, 886 // record the offset of each instruction 887 // so that we can translate value-label 888 // ranges to machine-code offsets. 889 890 // Cold blocks violate monotonicity 891 // assumptions elsewhere (that 892 // instructions in inst-index order are in 893 // order in machine code), so we omit 894 // their offsets here. Value-label range 895 // generation below will skip empty ranges 896 // and ranges with to-offsets of zero. 897 if !self.block_order.is_cold(block) { 898 inst_offsets[iix.index()] = buffer.cur_offset(); 899 } 900 } 901 902 // Update the srcloc at this point in the buffer. 903 let srcloc = self.srclocs[iix.index()]; 904 if cur_srcloc != Some(srcloc) { 905 if cur_srcloc.is_some() { 906 buffer.end_srcloc(); 907 } 908 buffer.start_srcloc(srcloc); 909 cur_srcloc = Some(srcloc); 910 } 911 912 // If this is a safepoint, compute a stack map 913 // and pass it to the emit state. 914 let stack_map_disasm = if self.insts[iix.index()].is_safepoint() { 915 let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![]; 916 // Find the contiguous range of 917 // (progpoint, allocation) safepoint slot 918 // records in `regalloc.safepoint_slots` 919 // for this instruction index. 920 let safepoint_slots_start = regalloc 921 .safepoint_slots 922 .binary_search_by(|(progpoint, _alloc)| { 923 if progpoint.inst() >= iix { 924 std::cmp::Ordering::Greater 925 } else { 926 std::cmp::Ordering::Less 927 } 928 }) 929 .unwrap_err(); 930 931 for (_, alloc) in regalloc.safepoint_slots[safepoint_slots_start..] 932 .iter() 933 .take_while(|(progpoint, _)| progpoint.inst() == iix) 934 { 935 let slot = alloc.as_stack().unwrap(); 936 safepoint_slots.push(slot); 937 } 938 939 let stack_map = if safepoint_slots.is_empty() { 940 None 941 } else { 942 Some( 943 self.abi 944 .spillslots_to_stack_map(&safepoint_slots[..], &state), 945 ) 946 }; 947 948 let (user_stack_map, user_stack_map_disasm) = { 949 // The `user_stack_maps` is keyed by reverse 950 // instruction index, so we must flip the 951 // index. We can't put this into a helper method 952 // due to borrowck issues because parts of 953 // `self` are borrowed mutably elsewhere in this 954 // function. 955 let index = iix.to_backwards_insn_index(self.num_insts()); 956 let user_stack_map = self.user_stack_maps.remove(&index); 957 let user_stack_map_disasm = 958 user_stack_map.as_ref().map(|m| format!(" ; {m:?}")); 959 (user_stack_map, user_stack_map_disasm) 960 }; 961 962 state.pre_safepoint(stack_map, user_stack_map); 963 964 user_stack_map_disasm 965 } else { 966 None 967 }; 968 969 // If the instruction we are about to emit is 970 // a return, place an epilogue at this point 971 // (and don't emit the return; the actual 972 // epilogue will contain it). 973 if self.insts[iix.index()].is_term() == MachTerminator::Ret { 974 for inst in self.abi.gen_epilogue() { 975 do_emit(&inst, &mut disasm, &mut buffer, &mut state); 976 } 977 } else { 978 // Update the operands for this inst using the 979 // allocations from the regalloc result. 980 let mut allocs = regalloc.inst_allocs(iix).iter(); 981 self.insts[iix.index()].get_operands( 982 &mut |reg: &mut Reg, constraint, _kind, _pos| { 983 let alloc = allocs 984 .next() 985 .expect("enough allocations for all operands") 986 .as_reg() 987 .expect("only register allocations, not stack allocations") 988 .into(); 989 990 if let OperandConstraint::FixedReg(rreg) = constraint { 991 debug_assert_eq!(Reg::from(rreg), alloc); 992 } 993 *reg = alloc; 994 }, 995 ); 996 debug_assert!(allocs.next().is_none()); 997 998 // Emit the instruction! 999 do_emit( 1000 &self.insts[iix.index()], 1001 &mut disasm, 1002 &mut buffer, 1003 &mut state, 1004 ); 1005 if let Some(stack_map_disasm) = stack_map_disasm { 1006 disasm.push_str(&stack_map_disasm); 1007 disasm.push('\n'); 1008 } 1009 } 1010 } 1011 1012 InstOrEdit::Edit(Edit::Move { from, to }) => { 1013 // Create a move/spill/reload instruction and 1014 // immediately emit it. 1015 match (from.as_reg(), to.as_reg()) { 1016 (Some(from), Some(to)) => { 1017 // Reg-to-reg move. 1018 let from_rreg = Reg::from(from); 1019 let to_rreg = Writable::from_reg(Reg::from(to)); 1020 debug_assert_eq!(from.class(), to.class()); 1021 let ty = I::canonical_type_for_rc(from.class()); 1022 let mv = I::gen_move(to_rreg, from_rreg, ty); 1023 do_emit(&mv, &mut disasm, &mut buffer, &mut state); 1024 } 1025 (Some(from), None) => { 1026 // Spill from register to spillslot. 1027 let to = to.as_stack().unwrap(); 1028 let from_rreg = RealReg::from(from); 1029 let spill = self.abi.gen_spill(to, from_rreg); 1030 do_emit(&spill, &mut disasm, &mut buffer, &mut state); 1031 } 1032 (None, Some(to)) => { 1033 // Load from spillslot to register. 1034 let from = from.as_stack().unwrap(); 1035 let to_rreg = Writable::from_reg(RealReg::from(to)); 1036 let reload = self.abi.gen_reload(to_rreg, from); 1037 do_emit(&reload, &mut disasm, &mut buffer, &mut state); 1038 } 1039 (None, None) => { 1040 panic!("regalloc2 should have eliminated stack-to-stack moves!"); 1041 } 1042 } 1043 } 1044 } 1045 } 1046 1047 if cur_srcloc.is_some() { 1048 buffer.end_srcloc(); 1049 cur_srcloc = None; 1050 } 1051 1052 // Do we need an island? Get the worst-case size of the next BB, add 1053 // it to the optional padding behind the block, and pass this to the 1054 // `MachBuffer` to determine if an island is necessary. 1055 let worst_case_next_bb = if block_order_idx < final_order.len() - 1 { 1056 let next_block = final_order[block_order_idx + 1]; 1057 let next_block_range = self.block_ranges.get(next_block.index()); 1058 let next_block_size = next_block_range.len() as u32; 1059 let next_block_ra_insertions = ra_edits_per_block[next_block.index()]; 1060 I::worst_case_size() * (next_block_size + next_block_ra_insertions) 1061 } else { 1062 0 1063 }; 1064 let padding = if bb_padding.is_empty() { 1065 0 1066 } else { 1067 bb_padding.len() as u32 + I::LabelUse::ALIGN - 1 1068 }; 1069 if buffer.island_needed(padding + worst_case_next_bb) { 1070 buffer.emit_island(padding + worst_case_next_bb, ctrl_plane); 1071 } 1072 1073 // Insert padding, if configured, to stress the `MachBuffer`'s 1074 // relocation and island calculations. 1075 // 1076 // Padding can get quite large during fuzzing though so place a 1077 // total cap on it where when a per-function threshold is exceeded 1078 // the padding is turned back down to zero. This avoids a small-ish 1079 // test case generating a GB+ memory footprint in Cranelift for 1080 // example. 1081 if !bb_padding.is_empty() { 1082 buffer.put_data(&bb_padding); 1083 buffer.align_to(I::LabelUse::ALIGN); 1084 total_bb_padding += bb_padding.len(); 1085 if total_bb_padding > (150 << 20) { 1086 bb_padding = Vec::new(); 1087 } 1088 } 1089 } 1090 1091 debug_assert!( 1092 self.user_stack_maps.is_empty(), 1093 "any stack maps should have been consumed by instruction emission, still have: {:#?}", 1094 self.user_stack_maps, 1095 ); 1096 1097 // Do any optimizations on branches at tail of buffer, as if we had 1098 // bound one last label. 1099 buffer.optimize_branches(ctrl_plane); 1100 1101 // emission state is not needed anymore, move control plane back out 1102 *ctrl_plane = state.take_ctrl_plane(); 1103 1104 let func_body_len = buffer.cur_offset(); 1105 1106 // Create `bb_edges` and final (filtered) `bb_starts`. 1107 let mut bb_edges = vec![]; 1108 let mut bb_offsets = vec![]; 1109 if flags.machine_code_cfg_info() { 1110 for block in 0..self.num_blocks() { 1111 if bb_starts[block].is_none() { 1112 // Block was deleted by MachBuffer; skip. 1113 continue; 1114 } 1115 let from = bb_starts[block].unwrap(); 1116 1117 bb_offsets.push(from); 1118 // Resolve each `succ` label and add edges. 1119 let succs = self.block_succs(BlockIndex::new(block)); 1120 for &succ in succs.iter() { 1121 let to = buffer.resolve_label_offset(MachLabel::from_block(succ)); 1122 bb_edges.push((from, to)); 1123 } 1124 } 1125 } 1126 1127 self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len); 1128 let value_labels_ranges = 1129 self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len); 1130 let frame_size = self.abi.frame_size(); 1131 1132 EmitResult { 1133 buffer: buffer.finish(&self.constants, ctrl_plane), 1134 bb_offsets, 1135 bb_edges, 1136 func_body_len, 1137 disasm: if want_disasm { Some(disasm) } else { None }, 1138 sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(), 1139 dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(), 1140 value_labels_ranges, 1141 frame_size, 1142 } 1143 } 1144 1145 fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) { 1146 if self.debug_value_labels.is_empty() { 1147 return; 1148 } 1149 1150 // During emission, branch removal can make offsets of instructions incorrect. 1151 // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj] 1152 // It will be recorded as (say): [30] [34] [38] [42] [<would be 46>] 1153 // When the jumps get removed we are left with (in "inst_offsets"): 1154 // [insi][jmp0][jmp1][jmp2][insj][...] 1155 // [30] [34] [38] [42] [34] 1156 // Which violates the monotonicity invariant. This method sets offsets of these 1157 // removed instructions such as to make them appear zero-sized: 1158 // [insi][jmp0][jmp1][jmp2][insj][...] 1159 // [30] [34] [34] [34] [34] 1160 // 1161 let mut next_offset = func_body_len; 1162 for inst_index in (0..(inst_offsets.len() - 1)).rev() { 1163 let inst_offset = inst_offsets[inst_index]; 1164 1165 // Not all instructions get their offsets recorded. 1166 if inst_offset == NO_INST_OFFSET { 1167 continue; 1168 } 1169 1170 if inst_offset > next_offset { 1171 trace!( 1172 "Fixing code offset of the removed Inst {}: {} -> {}", 1173 inst_index, 1174 inst_offset, 1175 next_offset 1176 ); 1177 inst_offsets[inst_index] = next_offset; 1178 continue; 1179 } 1180 1181 next_offset = inst_offset; 1182 } 1183 } 1184 1185 fn compute_value_labels_ranges( 1186 &self, 1187 regalloc: ®alloc2::Output, 1188 inst_offsets: &[CodeOffset], 1189 func_body_len: u32, 1190 ) -> ValueLabelsRanges { 1191 if self.debug_value_labels.is_empty() { 1192 return ValueLabelsRanges::default(); 1193 } 1194 1195 let mut value_labels_ranges: ValueLabelsRanges = HashMap::new(); 1196 for &(label, from, to, alloc) in ®alloc.debug_locations { 1197 let ranges = value_labels_ranges 1198 .entry(ValueLabel::from_u32(label)) 1199 .or_insert_with(|| vec![]); 1200 let from_offset = inst_offsets[from.inst().index()]; 1201 let to_offset = if to.inst().index() == inst_offsets.len() { 1202 func_body_len 1203 } else { 1204 inst_offsets[to.inst().index()] 1205 }; 1206 1207 // Empty ranges or unavailable offsets can happen 1208 // due to cold blocks and branch removal (see above). 1209 if from_offset == NO_INST_OFFSET 1210 || to_offset == NO_INST_OFFSET 1211 || from_offset == to_offset 1212 { 1213 continue; 1214 } 1215 1216 let loc = if let Some(preg) = alloc.as_reg() { 1217 LabelValueLoc::Reg(Reg::from(preg)) 1218 } else { 1219 let slot = alloc.as_stack().unwrap(); 1220 let slot_offset = self.abi.get_spillslot_offset(slot); 1221 let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset(); 1222 let caller_sp_to_cfa_offset = 1223 crate::isa::unwind::systemv::caller_sp_to_cfa_offset(); 1224 // NOTE: this is a negative offset because it's relative to the caller's SP 1225 let cfa_to_sp_offset = 1226 -((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64); 1227 LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset) 1228 }; 1229 1230 // ValueLocRanges are recorded by *instruction-end 1231 // offset*. `from_offset` is the *start* of the 1232 // instruction; that is the same as the end of another 1233 // instruction, so we only want to begin coverage once 1234 // we are past the previous instruction's end. 1235 let start = from_offset + 1; 1236 1237 // Likewise, `end` is exclusive, but we want to 1238 // *include* the end of the last 1239 // instruction. `to_offset` is the start of the 1240 // `to`-instruction, which is the exclusive end, i.e., 1241 // the first instruction not covered. That 1242 // instruction's start is the same as the end of the 1243 // last instruction that is included, so we go one 1244 // byte further to be sure to include it. 1245 let end = to_offset + 1; 1246 1247 // Coalesce adjacent ranges that for the same location 1248 // to minimize output size here and for the consumers. 1249 if let Some(last_loc_range) = ranges.last_mut() { 1250 if last_loc_range.loc == loc && last_loc_range.end == start { 1251 trace!( 1252 "Extending debug range for VL{} in {:?} to {}", 1253 label, 1254 loc, 1255 end 1256 ); 1257 last_loc_range.end = end; 1258 continue; 1259 } 1260 } 1261 1262 trace!( 1263 "Recording debug range for VL{} in {:?}: [Inst {}..Inst {}) [{}..{})", 1264 label, 1265 loc, 1266 from.inst().index(), 1267 to.inst().index(), 1268 start, 1269 end 1270 ); 1271 1272 ranges.push(ValueLocRange { loc, start, end }); 1273 } 1274 1275 value_labels_ranges 1276 } 1277 1278 /// Get the IR block for a BlockIndex, if one exists. 1279 pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> { 1280 self.block_order.lowered_order()[block.index()].orig_block() 1281 } 1282 1283 /// Get the type of a VReg. 1284 pub fn vreg_type(&self, vreg: VReg) -> Type { 1285 self.vreg_types[vreg.vreg()] 1286 } 1287 1288 /// Get the fact, if any, for a given VReg. 1289 pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> { 1290 self.facts[vreg.vreg()].as_ref() 1291 } 1292 1293 /// Set the fact for a given VReg. 1294 pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) { 1295 trace!("set fact on {}: {:?}", vreg, fact); 1296 self.facts[vreg.vreg()] = Some(fact); 1297 } 1298 1299 /// Does a given instruction define any facts? 1300 pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool { 1301 self.inst_operands(inst) 1302 .iter() 1303 .filter(|o| o.kind() == OperandKind::Def) 1304 .map(|o| o.vreg()) 1305 .any(|vreg| self.facts[vreg.vreg()].is_some()) 1306 } 1307 1308 /// Get the user stack map associated with the given forward instruction index. 1309 pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> { 1310 let index = inst.to_backwards_insn_index(self.num_insts()); 1311 self.user_stack_maps.get(&index) 1312 } 1313 } 1314 1315 impl<I: VCodeInst> std::ops::Index<InsnIndex> for VCode<I> { 1316 type Output = I; 1317 fn index(&self, idx: InsnIndex) -> &Self::Output { 1318 &self.insts[idx.index()] 1319 } 1320 } 1321 1322 impl<I: VCodeInst> RegallocFunction for VCode<I> { 1323 fn num_insts(&self) -> usize { 1324 self.insts.len() 1325 } 1326 1327 fn num_blocks(&self) -> usize { 1328 self.block_ranges.len() 1329 } 1330 1331 fn entry_block(&self) -> BlockIndex { 1332 self.entry 1333 } 1334 1335 fn block_insns(&self, block: BlockIndex) -> InstRange { 1336 let range = self.block_ranges.get(block.index()); 1337 InstRange::forward(InsnIndex::new(range.start), InsnIndex::new(range.end)) 1338 } 1339 1340 fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] { 1341 let range = self.block_succ_range.get(block.index()); 1342 &self.block_succs[range] 1343 } 1344 1345 fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] { 1346 let range = self.block_pred_range.get(block.index()); 1347 &self.block_preds[range] 1348 } 1349 1350 fn block_params(&self, block: BlockIndex) -> &[VReg] { 1351 // As a special case we don't return block params for the entry block, as all the arguments 1352 // will be defined by the `Inst::Args` instruction. 1353 if block == self.entry { 1354 return &[]; 1355 } 1356 1357 let range = self.block_params_range.get(block.index()); 1358 &self.block_params[range] 1359 } 1360 1361 fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] { 1362 let succ_range = self.branch_block_arg_succ_range.get(block.index()); 1363 debug_assert!(succ_idx < succ_range.len()); 1364 let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx); 1365 &self.branch_block_args[branch_block_args] 1366 } 1367 1368 fn is_ret(&self, insn: InsnIndex) -> bool { 1369 match self.insts[insn.index()].is_term() { 1370 // We treat blocks terminated by an unconditional trap like a return for regalloc. 1371 MachTerminator::None => self.insts[insn.index()].is_trap(), 1372 MachTerminator::Ret | MachTerminator::RetCall => true, 1373 MachTerminator::Uncond | MachTerminator::Cond | MachTerminator::Indirect => false, 1374 } 1375 } 1376 1377 fn is_branch(&self, insn: InsnIndex) -> bool { 1378 match self.insts[insn.index()].is_term() { 1379 MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true, 1380 _ => false, 1381 } 1382 } 1383 1384 fn requires_refs_on_stack(&self, insn: InsnIndex) -> bool { 1385 self.insts[insn.index()].is_safepoint() 1386 } 1387 1388 fn inst_operands(&self, insn: InsnIndex) -> &[Operand] { 1389 let range = self.operand_ranges.get(insn.index()); 1390 &self.operands[range] 1391 } 1392 1393 fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet { 1394 self.clobbers.get(&insn).cloned().unwrap_or_default() 1395 } 1396 1397 fn num_vregs(&self) -> usize { 1398 self.vreg_types.len() 1399 } 1400 1401 fn reftype_vregs(&self) -> &[VReg] { 1402 &self.reftyped_vregs 1403 } 1404 1405 fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] { 1406 &self.debug_value_labels 1407 } 1408 1409 fn spillslot_size(&self, regclass: RegClass) -> usize { 1410 self.abi.get_spillslot_size(regclass) as usize 1411 } 1412 1413 fn allow_multiple_vreg_defs(&self) -> bool { 1414 // At least the s390x backend requires this, because the 1415 // `Loop` pseudo-instruction aggregates all Operands so pinned 1416 // vregs (RealRegs) may occur more than once. 1417 true 1418 } 1419 } 1420 1421 impl<I: VCodeInst> Debug for VRegAllocator<I> { 1422 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 1423 writeln!(f, "VRegAllocator {{")?; 1424 1425 let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>(); 1426 alias_keys.sort_unstable(); 1427 for key in alias_keys { 1428 let dest = self.vreg_aliases.get(&key).unwrap(); 1429 writeln!(f, " {:?} := {:?}", Reg::from(key), Reg::from(*dest))?; 1430 } 1431 1432 for (vreg, fact) in self.facts.iter().enumerate() { 1433 if let Some(fact) = fact { 1434 writeln!(f, " v{} ! {}", vreg, fact)?; 1435 } 1436 } 1437 1438 writeln!(f, "}}") 1439 } 1440 } 1441 1442 impl<I: VCodeInst> fmt::Debug for VCode<I> { 1443 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 1444 writeln!(f, "VCode {{")?; 1445 writeln!(f, " Entry block: {}", self.entry.index())?; 1446 1447 let mut state = Default::default(); 1448 1449 for block in 0..self.num_blocks() { 1450 let block = BlockIndex::new(block); 1451 writeln!( 1452 f, 1453 "Block {}({:?}):", 1454 block.index(), 1455 self.block_params(block) 1456 )?; 1457 if let Some(bb) = self.bindex_to_bb(block) { 1458 writeln!(f, " (original IR block: {})", bb)?; 1459 } 1460 for (succ_idx, succ) in self.block_succs(block).iter().enumerate() { 1461 writeln!( 1462 f, 1463 " (successor: Block {}({:?}))", 1464 succ.index(), 1465 self.branch_blockparams(block, InsnIndex::new(0) /* dummy */, succ_idx) 1466 )?; 1467 } 1468 for inst in self.block_ranges.get(block.index()) { 1469 writeln!( 1470 f, 1471 " Inst {}: {}", 1472 inst, 1473 self.insts[inst].pretty_print_inst(&mut state) 1474 )?; 1475 if !self.operands.is_empty() { 1476 for operand in self.inst_operands(InsnIndex::new(inst)) { 1477 if operand.kind() == OperandKind::Def { 1478 if let Some(fact) = &self.facts[operand.vreg().vreg()] { 1479 writeln!(f, " v{} ! {}", operand.vreg().vreg(), fact)?; 1480 } 1481 } 1482 } 1483 } 1484 if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) { 1485 writeln!(f, " {user_stack_map:?}")?; 1486 } 1487 } 1488 } 1489 1490 writeln!(f, "}}")?; 1491 Ok(()) 1492 } 1493 } 1494 1495 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder. 1496 pub struct VRegAllocator<I> { 1497 /// VReg IR-level types. 1498 vreg_types: Vec<Type>, 1499 1500 /// Reference-typed `regalloc2::VReg`s. The regalloc requires 1501 /// these in a dense slice (as opposed to querying the 1502 /// reftype-status of each vreg) for efficient iteration. 1503 reftyped_vregs: Vec<VReg>, 1504 1505 /// VReg aliases. When the final VCode is built we rewrite all 1506 /// uses of the keys in this table to their replacement values. 1507 /// 1508 /// We use these aliases to rename an instruction's expected 1509 /// result vregs to the returned vregs from lowering, which are 1510 /// usually freshly-allocated temps. 1511 vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>, 1512 1513 /// A deferred error, to be bubbled up to the top level of the 1514 /// lowering algorithm. We take this approach because we cannot 1515 /// currently propagate a `Result` upward through ISLE code (the 1516 /// lowering rules) or some ABI code. 1517 deferred_error: Option<CodegenError>, 1518 1519 /// Facts on VRegs, for proof-carrying code. 1520 facts: Vec<Option<Fact>>, 1521 1522 /// The type of instruction that this allocator makes registers for. 1523 _inst: core::marker::PhantomData<I>, 1524 } 1525 1526 impl<I: VCodeInst> VRegAllocator<I> { 1527 /// Make a new VRegAllocator. 1528 pub fn with_capacity(capacity: usize) -> Self { 1529 let capacity = first_user_vreg_index() + capacity; 1530 let mut vreg_types = Vec::with_capacity(capacity); 1531 vreg_types.resize(first_user_vreg_index(), types::INVALID); 1532 Self { 1533 vreg_types, 1534 reftyped_vregs: vec![], 1535 vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()), 1536 deferred_error: None, 1537 facts: Vec::with_capacity(capacity), 1538 _inst: core::marker::PhantomData::default(), 1539 } 1540 } 1541 1542 /// Allocate a fresh ValueRegs. 1543 pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> { 1544 if self.deferred_error.is_some() { 1545 return Err(CodegenError::CodeTooLarge); 1546 } 1547 let v = self.vreg_types.len(); 1548 let (regclasses, tys) = I::rc_for_type(ty)?; 1549 if v + regclasses.len() >= VReg::MAX { 1550 return Err(CodegenError::CodeTooLarge); 1551 } 1552 1553 let regs: ValueRegs<Reg> = match regclasses { 1554 &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()), 1555 &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()), 1556 // We can extend this if/when we support 32-bit targets; e.g., 1557 // an i128 on a 32-bit machine will need up to four machine regs 1558 // for a `Value`. 1559 _ => panic!("Value must reside in 1 or 2 registers"), 1560 }; 1561 for (®_ty, ®) in tys.iter().zip(regs.regs().iter()) { 1562 let vreg = reg.to_virtual_reg().unwrap(); 1563 debug_assert_eq!(self.vreg_types.len(), vreg.index()); 1564 self.vreg_types.push(reg_ty); 1565 if is_reftype(reg_ty) { 1566 self.reftyped_vregs.push(vreg.into()); 1567 } 1568 } 1569 1570 // Create empty facts for each allocated vreg. 1571 self.facts.resize(self.vreg_types.len(), None); 1572 1573 Ok(regs) 1574 } 1575 1576 /// Allocate a fresh ValueRegs, deferring any out-of-vregs 1577 /// errors. This is useful in places where we cannot bubble a 1578 /// `CodegenResult` upward easily, and which are known to be 1579 /// invoked from within the lowering loop that checks the deferred 1580 /// error status below. 1581 pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> { 1582 match self.alloc(ty) { 1583 Ok(x) => x, 1584 Err(e) => { 1585 self.deferred_error = Some(e); 1586 self.bogus_for_deferred_error(ty) 1587 } 1588 } 1589 } 1590 1591 /// Take any deferred error that was accumulated by `alloc_with_deferred_error`. 1592 pub fn take_deferred_error(&mut self) -> Option<CodegenError> { 1593 self.deferred_error.take() 1594 } 1595 1596 /// Produce an bogus VReg placeholder with the proper number of 1597 /// registers for the given type. This is meant to be used with 1598 /// deferred allocation errors (see `Lower::alloc_tmp()`). 1599 fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> { 1600 let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type"); 1601 match regclasses { 1602 &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()), 1603 &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()), 1604 _ => panic!("Value must reside in 1 or 2 registers"), 1605 } 1606 } 1607 1608 /// Rewrite any mention of `from` into `to`. 1609 pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) { 1610 let from = from.into(); 1611 let resolved_to = self.resolve_vreg_alias(to.into()); 1612 // Disallow cycles (see below). 1613 assert_ne!(resolved_to, from); 1614 1615 // Maintain the invariant that PCC facts only exist on vregs 1616 // which aren't aliases. We want to preserve whatever was 1617 // stated about the vreg before its producer was lowered. 1618 if let Some(fact) = self.facts[from.vreg()].take() { 1619 self.set_fact(resolved_to, fact); 1620 } 1621 1622 let old_alias = self.vreg_aliases.insert(from, resolved_to); 1623 debug_assert_eq!(old_alias, None); 1624 } 1625 1626 fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg { 1627 // We prevent cycles from existing by resolving targets of 1628 // aliases eagerly before setting them. If the target resolves 1629 // to the origin of the alias, then a cycle would be created 1630 // and the alias is disallowed. Because of the structure of 1631 // SSA code (one instruction can refer to another's defs but 1632 // not vice-versa, except indirectly through 1633 // phis/blockparams), cycles should not occur as we use 1634 // aliases to redirect vregs to the temps that actually define 1635 // them. 1636 while let Some(to) = self.vreg_aliases.get(&vreg) { 1637 vreg = *to; 1638 } 1639 vreg 1640 } 1641 1642 #[inline] 1643 fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) { 1644 debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg))); 1645 } 1646 1647 /// Set the proof-carrying code fact on a given virtual register. 1648 /// 1649 /// Returns the old fact, if any (only one fact can be stored). 1650 fn set_fact(&mut self, vreg: regalloc2::VReg, fact: Fact) -> Option<Fact> { 1651 trace!("vreg {:?} has fact: {:?}", vreg, fact); 1652 debug_assert!(!self.vreg_aliases.contains_key(&vreg)); 1653 self.facts[vreg.vreg()].replace(fact) 1654 } 1655 1656 /// Set a fact only if one doesn't already exist. 1657 pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) { 1658 let vreg = self.resolve_vreg_alias(vreg.into()); 1659 if self.facts[vreg.vreg()].is_none() { 1660 self.set_fact(vreg, fact); 1661 } 1662 } 1663 1664 /// Allocate a fresh ValueRegs, with a given fact to apply if 1665 /// the value fits in one VReg. 1666 pub fn alloc_with_maybe_fact( 1667 &mut self, 1668 ty: Type, 1669 fact: Option<Fact>, 1670 ) -> CodegenResult<ValueRegs<Reg>> { 1671 let result = self.alloc(ty)?; 1672 1673 // Ensure that we don't lose a fact on a value that splits 1674 // into multiple VRegs. 1675 assert!(result.len() == 1 || fact.is_none()); 1676 if let Some(fact) = fact { 1677 self.set_fact(result.regs()[0].into(), fact); 1678 } 1679 1680 Ok(result) 1681 } 1682 } 1683 1684 /// This structure tracks the large constants used in VCode that will be emitted separately by the 1685 /// [MachBuffer]. 1686 /// 1687 /// First, during the lowering phase, constants are inserted using 1688 /// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are 1689 /// used in this phase. Some deduplication is performed, when possible, as constant 1690 /// values are inserted. 1691 /// 1692 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the 1693 /// constants so that instructions can refer to the value's memory location. The [MachBuffer] 1694 /// then writes the constant values to the buffer. 1695 #[derive(Default)] 1696 pub struct VCodeConstants { 1697 constants: PrimaryMap<VCodeConstant, VCodeConstantData>, 1698 pool_uses: HashMap<Constant, VCodeConstant>, 1699 well_known_uses: HashMap<*const [u8], VCodeConstant>, 1700 u64s: HashMap<[u8; 8], VCodeConstant>, 1701 } 1702 impl VCodeConstants { 1703 /// Initialize the structure with the expected number of constants. 1704 pub fn with_capacity(expected_num_constants: usize) -> Self { 1705 Self { 1706 constants: PrimaryMap::with_capacity(expected_num_constants), 1707 pool_uses: HashMap::with_capacity(expected_num_constants), 1708 well_known_uses: HashMap::new(), 1709 u64s: HashMap::new(), 1710 } 1711 } 1712 1713 /// Insert a constant; using this method indicates that a constant value will be used and thus 1714 /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants 1715 /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not 1716 /// [VCodeConstantData::Generated]. 1717 pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant { 1718 match data { 1719 VCodeConstantData::Generated(_) => self.constants.push(data), 1720 VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) { 1721 None => { 1722 let vcode_constant = self.constants.push(data); 1723 self.pool_uses.insert(constant, vcode_constant); 1724 vcode_constant 1725 } 1726 Some(&vcode_constant) => vcode_constant, 1727 }, 1728 VCodeConstantData::WellKnown(data_ref) => { 1729 match self.well_known_uses.entry(data_ref as *const [u8]) { 1730 Entry::Vacant(v) => { 1731 let vcode_constant = self.constants.push(data); 1732 v.insert(vcode_constant); 1733 vcode_constant 1734 } 1735 Entry::Occupied(o) => *o.get(), 1736 } 1737 } 1738 VCodeConstantData::U64(value) => match self.u64s.entry(value) { 1739 Entry::Vacant(v) => { 1740 let vcode_constant = self.constants.push(data); 1741 v.insert(vcode_constant); 1742 vcode_constant 1743 } 1744 Entry::Occupied(o) => *o.get(), 1745 }, 1746 } 1747 } 1748 1749 /// Return the number of constants inserted. 1750 pub fn len(&self) -> usize { 1751 self.constants.len() 1752 } 1753 1754 /// Iterate over the `VCodeConstant` keys inserted in this structure. 1755 pub fn keys(&self) -> Keys<VCodeConstant> { 1756 self.constants.keys() 1757 } 1758 1759 /// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this 1760 /// structure. 1761 pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> { 1762 self.constants.iter() 1763 } 1764 1765 /// Returns the data associated with the specified constant. 1766 pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData { 1767 &self.constants[c] 1768 } 1769 1770 /// Checks if the given [VCodeConstantData] is registered as 1771 /// used by the pool. 1772 pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool { 1773 match constant { 1774 VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c), 1775 _ => false, 1776 } 1777 } 1778 } 1779 1780 /// A use of a constant by one or more VCode instructions; see [VCodeConstants]. 1781 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 1782 pub struct VCodeConstant(u32); 1783 entity_impl!(VCodeConstant); 1784 1785 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking 1786 /// these separately instead of as raw byte buffers allows us to avoid some duplication. 1787 pub enum VCodeConstantData { 1788 /// A constant already present in the Cranelift IR 1789 /// [ConstantPool](crate::ir::constant::ConstantPool). 1790 Pool(Constant, ConstantData), 1791 /// A reference to a well-known constant value that is statically encoded within the compiler. 1792 WellKnown(&'static [u8]), 1793 /// A constant value generated during lowering; the value may depend on the instruction context 1794 /// which makes it difficult to de-duplicate--if possible, use other variants. 1795 Generated(ConstantData), 1796 /// A constant of at most 64 bits. These are deduplicated as 1797 /// well. Stored as a fixed-size array of `u8` so that we do not 1798 /// encounter endianness problems when cross-compiling. 1799 U64([u8; 8]), 1800 } 1801 impl VCodeConstantData { 1802 /// Retrieve the constant data as a byte slice. 1803 pub fn as_slice(&self) -> &[u8] { 1804 match self { 1805 VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(), 1806 VCodeConstantData::WellKnown(d) => d, 1807 VCodeConstantData::U64(value) => &value[..], 1808 } 1809 } 1810 1811 /// Calculate the alignment of the constant data. 1812 pub fn alignment(&self) -> u32 { 1813 if self.as_slice().len() <= 8 { 1814 8 1815 } else { 1816 16 1817 } 1818 } 1819 } 1820 1821 #[cfg(test)] 1822 mod test { 1823 use super::*; 1824 use std::mem::size_of; 1825 1826 #[test] 1827 fn size_of_constant_structs() { 1828 assert_eq!(size_of::<Constant>(), 4); 1829 assert_eq!(size_of::<VCodeConstant>(), 4); 1830 assert_eq!(size_of::<ConstantData>(), 24); 1831 assert_eq!(size_of::<VCodeConstantData>(), 32); 1832 assert_eq!( 1833 size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(), 1834 24 1835 ); 1836 // TODO The VCodeConstants structure's memory size could be further optimized. 1837 // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at 1838 // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes. 1839 } 1840 } 1841