1 //! This implements the VCode container: a CFG of Insts that have been lowered. 2 //! 3 //! VCode is virtual-register code. An instruction in VCode is almost a machine 4 //! instruction; however, its register slots can refer to virtual registers in 5 //! addition to real machine registers. 6 //! 7 //! VCode is structured with traditional basic blocks, and 8 //! each block must be terminated by an unconditional branch (one target), a 9 //! conditional branch (two targets), or a return (no targets). Note that this 10 //! slightly differs from the machine code of most ISAs: in most ISAs, a 11 //! conditional branch has one target (and the not-taken case falls through). 12 //! However, we expect that machine backends will elide branches to the following 13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond / 14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to 15 //! play with layout prior to final binary emission, as well, if we want. 16 //! 17 //! See the main module comment in `mod.rs` for more details on the VCode-based 18 //! backend pipeline. 19 20 use crate::ir::pcc::*; 21 use crate::ir::{self, types, Constant, ConstantData, ValueLabel}; 22 use crate::machinst::*; 23 use crate::ranges::Ranges; 24 use crate::timing; 25 use crate::trace; 26 use crate::CodegenError; 27 use crate::{LabelValueLoc, ValueLocRange}; 28 use regalloc2::{ 29 Edit, Function as RegallocFunction, InstOrEdit, InstRange, MachineEnv, Operand, 30 OperandConstraint, OperandKind, PRegSet, RegClass, 31 }; 32 use rustc_hash::FxHashMap; 33 34 use core::mem::take; 35 use cranelift_entity::{entity_impl, Keys}; 36 use std::collections::hash_map::Entry; 37 use std::collections::HashMap; 38 use std::fmt; 39 40 /// Index referring to an instruction in VCode. 41 pub type InsnIndex = regalloc2::Inst; 42 43 /// Index referring to a basic block in VCode. 44 pub type BlockIndex = regalloc2::Block; 45 46 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be 47 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`. 48 pub trait VCodeInst: MachInst + MachInstEmit {} 49 impl<I: MachInst + MachInstEmit> VCodeInst for I {} 50 51 /// A function in "VCode" (virtualized-register code) form, after 52 /// lowering. This is essentially a standard CFG of basic blocks, 53 /// where each basic block consists of lowered instructions produced 54 /// by the machine-specific backend. 55 /// 56 /// Note that the VCode is immutable once produced, and is not 57 /// modified by register allocation in particular. Rather, register 58 /// allocation on the `VCode` produces a separate `regalloc2::Output` 59 /// struct, and this can be passed to `emit`. `emit` in turn does not 60 /// modify the vcode, but produces an `EmitResult`, which contains the 61 /// machine code itself, and the associated disassembly and/or 62 /// metadata as requested. 63 pub struct VCode<I: VCodeInst> { 64 /// VReg IR-level types. 65 vreg_types: Vec<Type>, 66 67 /// Lowered machine instructions in order corresponding to the original IR. 68 insts: Vec<I>, 69 70 /// Operands: pre-regalloc references to virtual registers with 71 /// constraints, in one flattened array. This allows the regalloc 72 /// to efficiently access all operands without requiring expensive 73 /// matches or method invocations on insts. 74 operands: Vec<Operand>, 75 76 /// Operand index ranges: for each instruction in `insts`, there 77 /// is a tuple here providing the range in `operands` for that 78 /// instruction's operands. 79 operand_ranges: Ranges, 80 81 /// Clobbers: a sparse map from instruction indices to clobber masks. 82 clobbers: FxHashMap<InsnIndex, PRegSet>, 83 84 /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is 85 /// reasonable to keep one of these per instruction.) 86 srclocs: Vec<RelSourceLoc>, 87 88 /// Entry block. 89 entry: BlockIndex, 90 91 /// Block instruction indices. 92 block_ranges: Ranges, 93 94 /// Block successors: index range in the `block_succs` list. 95 block_succ_range: Ranges, 96 97 /// Block successor lists, concatenated into one vec. The 98 /// `block_succ_range` list of tuples above gives (start, end) 99 /// ranges within this list that correspond to each basic block's 100 /// successors. 101 block_succs: Vec<regalloc2::Block>, 102 103 /// Block predecessors: index range in the `block_preds` list. 104 block_pred_range: Ranges, 105 106 /// Block predecessor lists, concatenated into one vec. The 107 /// `block_pred_range` list of tuples above gives (start, end) 108 /// ranges within this list that correspond to each basic block's 109 /// predecessors. 110 block_preds: Vec<regalloc2::Block>, 111 112 /// Block parameters: index range in `block_params` below. 113 block_params_range: Ranges, 114 115 /// Block parameter lists, concatenated into one vec. The 116 /// `block_params_range` list of tuples above gives (start, end) 117 /// ranges within this list that correspond to each basic block's 118 /// blockparam vregs. 119 block_params: Vec<regalloc2::VReg>, 120 121 /// Outgoing block arguments on branch instructions, concatenated 122 /// into one list. 123 /// 124 /// Note that this is conceptually a 3D array: we have a VReg list 125 /// per block, per successor. We flatten those three dimensions 126 /// into this 1D vec, then store index ranges in two levels of 127 /// indirection. 128 /// 129 /// Indexed by the indices in `branch_block_arg_succ_range`. 130 branch_block_args: Vec<regalloc2::VReg>, 131 132 /// Array of sequences of (start, end) tuples in 133 /// `branch_block_args`, one for each successor; these sequences 134 /// for each block are concatenated. 135 /// 136 /// Indexed by the indices in `branch_block_arg_succ_range`. 137 branch_block_arg_range: Ranges, 138 139 /// For a given block, indices in `branch_block_arg_range` 140 /// corresponding to all of its successors. 141 branch_block_arg_succ_range: Ranges, 142 143 /// Block-order information. 144 block_order: BlockLoweringOrder, 145 146 /// ABI object. 147 pub(crate) abi: Callee<I::ABIMachineSpec>, 148 149 /// Constant information used during code emission. This should be 150 /// immutable across function compilations within the same module. 151 emit_info: I::Info, 152 153 /// Reference-typed `regalloc2::VReg`s. The regalloc requires 154 /// these in a dense slice (as opposed to querying the 155 /// reftype-status of each vreg) for efficient iteration. 156 reftyped_vregs: Vec<VReg>, 157 158 /// Constants. 159 pub(crate) constants: VCodeConstants, 160 161 /// Value labels for debuginfo attached to vregs. 162 debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>, 163 164 pub(crate) sigs: SigSet, 165 166 /// Facts on VRegs, for proof-carrying code verification. 167 facts: Vec<Option<Fact>>, 168 } 169 170 /// The result of `VCode::emit`. Contains all information computed 171 /// during emission: actual machine code, optionally a disassembly, 172 /// and optionally metadata about the code layout. 173 pub struct EmitResult { 174 /// The MachBuffer containing the machine code. 175 pub buffer: MachBufferFinalized<Stencil>, 176 177 /// Offset of each basic block, recorded during emission. Computed 178 /// only if `debug_value_labels` is non-empty. 179 pub bb_offsets: Vec<CodeOffset>, 180 181 /// Final basic-block edges, in terms of code offsets of 182 /// bb-starts. Computed only if `debug_value_labels` is non-empty. 183 pub bb_edges: Vec<(CodeOffset, CodeOffset)>, 184 185 /// Final length of function body. 186 pub func_body_len: CodeOffset, 187 188 /// The pretty-printed disassembly, if any. This uses the same 189 /// pretty-printing for MachInsts as the pre-regalloc VCode Debug 190 /// implementation, but additionally includes the prologue and 191 /// epilogue(s), and makes use of the regalloc results. 192 pub disasm: Option<String>, 193 194 /// Offsets of sized stackslots. 195 pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>, 196 197 /// Offsets of dynamic stackslots. 198 pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>, 199 200 /// Value-labels information (debug metadata). 201 pub value_labels_ranges: ValueLabelsRanges, 202 203 /// Stack frame size. 204 pub frame_size: u32, 205 } 206 207 /// A builder for a VCode function body. 208 /// 209 /// This builder has the ability to accept instructions in either 210 /// forward or reverse order, depending on the pass direction that 211 /// produces the VCode. The lowering from CLIF to VCode<MachInst> 212 /// ordinarily occurs in reverse order (in order to allow instructions 213 /// to be lowered only if used, and not merged) so a reversal will 214 /// occur at the end of lowering to ensure the VCode is in machine 215 /// order. 216 /// 217 /// If built in reverse, block and instruction indices used once the 218 /// VCode is built are relative to the final (reversed) order, not the 219 /// order of construction. Note that this means we do not know the 220 /// final block or instruction indices when building, so we do not 221 /// hand them out. (The user is assumed to know them when appending 222 /// terminator instructions with successor blocks.) 223 pub struct VCodeBuilder<I: VCodeInst> { 224 /// In-progress VCode. 225 pub(crate) vcode: VCode<I>, 226 227 /// In what direction is the build occurring? 228 direction: VCodeBuildDirection, 229 230 /// Debug-value label in-progress map, keyed by label. For each 231 /// label, we keep disjoint ranges mapping to vregs. We'll flatten 232 /// this into (vreg, range, label) tuples when done. 233 debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>, 234 } 235 236 /// Direction in which a VCodeBuilder builds VCode. 237 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 238 pub enum VCodeBuildDirection { 239 // TODO: add `Forward` once we need it and can test it adequately. 240 /// Backward-build pass: we expect the producer to call `emit()` 241 /// with instructions in reverse program order within each block. 242 Backward, 243 } 244 245 impl<I: VCodeInst> VCodeBuilder<I> { 246 /// Create a new VCodeBuilder. 247 pub fn new( 248 sigs: SigSet, 249 abi: Callee<I::ABIMachineSpec>, 250 emit_info: I::Info, 251 block_order: BlockLoweringOrder, 252 constants: VCodeConstants, 253 direction: VCodeBuildDirection, 254 ) -> VCodeBuilder<I> { 255 let vcode = VCode::new(sigs, abi, emit_info, block_order, constants); 256 257 VCodeBuilder { 258 vcode, 259 direction, 260 debug_info: FxHashMap::default(), 261 } 262 } 263 264 pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> { 265 self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs) 266 } 267 268 /// Access the ABI object. 269 pub fn abi(&self) -> &Callee<I::ABIMachineSpec> { 270 &self.vcode.abi 271 } 272 273 /// Access the ABI object. 274 pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> { 275 &mut self.vcode.abi 276 } 277 278 pub fn sigs(&self) -> &SigSet { 279 &self.vcode.sigs 280 } 281 282 pub fn sigs_mut(&mut self) -> &mut SigSet { 283 &mut self.vcode.sigs 284 } 285 286 /// Access to the BlockLoweringOrder object. 287 pub fn block_order(&self) -> &BlockLoweringOrder { 288 &self.vcode.block_order 289 } 290 291 /// Set the current block as the entry block. 292 pub fn set_entry(&mut self, block: BlockIndex) { 293 self.vcode.entry = block; 294 } 295 296 /// End the current basic block. Must be called after emitting vcode insts 297 /// for IR insts and prior to ending the function (building the VCode). 298 pub fn end_bb(&mut self) { 299 let end_idx = self.vcode.insts.len(); 300 // Add the instruction index range to the list of blocks. 301 self.vcode.block_ranges.push_end(end_idx); 302 // End the successors list. 303 let succ_end = self.vcode.block_succs.len(); 304 self.vcode.block_succ_range.push_end(succ_end); 305 // End the blockparams list. 306 let block_params_end = self.vcode.block_params.len(); 307 self.vcode.block_params_range.push_end(block_params_end); 308 // End the branch blockparam args list. 309 let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len(); 310 self.vcode 311 .branch_block_arg_succ_range 312 .push_end(branch_block_arg_succ_end); 313 } 314 315 pub fn add_block_param(&mut self, param: VirtualReg) { 316 self.vcode.block_params.push(param.into()); 317 } 318 319 fn add_branch_args_for_succ(&mut self, args: &[Reg]) { 320 self.vcode 321 .branch_block_args 322 .extend(args.iter().map(|&arg| VReg::from(arg))); 323 let end = self.vcode.branch_block_args.len(); 324 self.vcode.branch_block_arg_range.push_end(end); 325 } 326 327 /// Push an instruction for the current BB and current IR inst 328 /// within the BB. 329 pub fn push(&mut self, insn: I, loc: RelSourceLoc) { 330 self.vcode.insts.push(insn); 331 self.vcode.srclocs.push(loc); 332 } 333 334 /// Add a successor block with branch args. 335 pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) { 336 self.vcode.block_succs.push(block); 337 self.add_branch_args_for_succ(args); 338 } 339 340 /// Add a debug value label to a register. 341 pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) { 342 // We'll fix up labels in reverse(). Because we're generating 343 // code bottom-to-top, the liverange of the label goes *from* 344 // the last index at which was defined (or 0, which is the end 345 // of the eventual function) *to* just this instruction, and 346 // no further. 347 let inst = InsnIndex::new(self.vcode.insts.len()); 348 let labels = self.debug_info.entry(label).or_insert_with(|| vec![]); 349 let last = labels 350 .last() 351 .map(|(_start, end, _vreg)| *end) 352 .unwrap_or(InsnIndex::new(0)); 353 labels.push((last, inst, reg.into())); 354 } 355 356 /// Access the constants. 357 pub fn constants(&mut self) -> &mut VCodeConstants { 358 &mut self.vcode.constants 359 } 360 361 fn compute_preds_from_succs(&mut self) { 362 // Do a linear-time counting sort: first determine how many 363 // times each block appears as a successor. 364 let mut starts = vec![0u32; self.vcode.num_blocks()]; 365 for succ in &self.vcode.block_succs { 366 starts[succ.index()] += 1; 367 } 368 369 // Determine for each block the starting index where that 370 // block's predecessors should go. This is equivalent to the 371 // ranges we need to store in block_pred_range. 372 self.vcode.block_pred_range.reserve(starts.len()); 373 let mut end = 0; 374 for count in starts.iter_mut() { 375 let start = end; 376 end += *count; 377 *count = start; 378 self.vcode.block_pred_range.push_end(end as usize); 379 } 380 let end = end as usize; 381 debug_assert_eq!(end, self.vcode.block_succs.len()); 382 383 // Walk over the successors again, this time grouped by 384 // predecessor, and push the predecessor at the current 385 // starting position of each of its successors. We build 386 // each group of predecessors in whatever order Ranges::iter 387 // returns them; regalloc2 doesn't care. 388 self.vcode.block_preds.resize(end, BlockIndex::invalid()); 389 for (pred, range) in self.vcode.block_succ_range.iter() { 390 let pred = BlockIndex::new(pred); 391 for succ in &self.vcode.block_succs[range] { 392 let pos = &mut starts[succ.index()]; 393 self.vcode.block_preds[*pos as usize] = pred; 394 *pos += 1; 395 } 396 } 397 debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid())); 398 } 399 400 /// Called once, when a build in Backward order is complete, to 401 /// perform the overall reversal (into final forward order) and 402 /// finalize metadata accordingly. 403 fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) { 404 let n_insts = self.vcode.insts.len(); 405 if n_insts == 0 { 406 return; 407 } 408 409 // Reverse the per-block and per-inst sequences. 410 self.vcode.block_ranges.reverse_index(); 411 self.vcode.block_ranges.reverse_target(n_insts); 412 // block_params_range is indexed by block (and blocks were 413 // traversed in reverse) so we reverse it; but block-param 414 // sequences in the concatenated vec can remain in reverse 415 // order (it is effectively an arena of arbitrarily-placed 416 // referenced sequences). 417 self.vcode.block_params_range.reverse_index(); 418 // Likewise, we reverse block_succ_range, but the block_succ 419 // concatenated array can remain as-is. 420 self.vcode.block_succ_range.reverse_index(); 421 self.vcode.insts.reverse(); 422 self.vcode.srclocs.reverse(); 423 // Likewise, branch_block_arg_succ_range is indexed by block 424 // so must be reversed. 425 self.vcode.branch_block_arg_succ_range.reverse_index(); 426 427 // To translate an instruction index *endpoint* in reversed 428 // order to forward order, compute `n_insts - i`. 429 // 430 // Why not `n_insts - 1 - i`? That would be correct to 431 // translate an individual instruction index (for ten insts 0 432 // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes 433 // 0). But for the usual inclusive-start, exclusive-end range 434 // idiom, inclusive starts become exclusive ends and 435 // vice-versa, so e.g. an (inclusive) start of 0 becomes an 436 // (exclusive) end of 10. 437 let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index()); 438 439 // Generate debug-value labels based on per-label maps. 440 for (label, tuples) in &self.debug_info { 441 for &(start, end, vreg) in tuples { 442 let vreg = vregs.resolve_vreg_alias(vreg); 443 let fwd_start = translate(end); 444 let fwd_end = translate(start); 445 self.vcode 446 .debug_value_labels 447 .push((vreg, fwd_start, fwd_end, label.as_u32())); 448 } 449 } 450 451 // Now sort debug value labels by VReg, as required 452 // by regalloc2. 453 self.vcode 454 .debug_value_labels 455 .sort_unstable_by_key(|(vreg, _, _, _)| *vreg); 456 } 457 458 fn collect_operands(&mut self, vregs: &VRegAllocator<I>) { 459 let allocatable = PRegSet::from(self.vcode.machine_env()); 460 for (i, insn) in self.vcode.insts.iter_mut().enumerate() { 461 // Push operands from the instruction onto the operand list. 462 // 463 // We rename through the vreg alias table as we collect 464 // the operands. This is better than a separate post-pass 465 // over operands, because it has more cache locality: 466 // operands only need to pass through L1 once. This is 467 // also better than renaming instructions' 468 // operands/registers while lowering, because here we only 469 // need to do the `match` over the instruction to visit 470 // its register fields (which is slow, branchy code) once. 471 472 let mut op_collector = 473 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| { 474 vregs.resolve_vreg_alias(vreg) 475 }); 476 insn.get_operands(&mut op_collector); 477 let (ops, clobbers) = op_collector.finish(); 478 self.vcode.operand_ranges.push_end(ops); 479 480 if clobbers != PRegSet::default() { 481 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers); 482 } 483 484 if let Some((dst, src)) = insn.is_move() { 485 // We should never see non-virtual registers present in move 486 // instructions. 487 assert!( 488 src.is_virtual(), 489 "the real register {:?} was used as the source of a move instruction", 490 src 491 ); 492 assert!( 493 dst.to_reg().is_virtual(), 494 "the real register {:?} was used as the destination of a move instruction", 495 dst.to_reg() 496 ); 497 } 498 } 499 500 // Translate blockparam args via the vreg aliases table as well. 501 for arg in &mut self.vcode.branch_block_args { 502 let new_arg = vregs.resolve_vreg_alias(*arg); 503 trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg); 504 *arg = new_arg; 505 } 506 } 507 508 /// Build the final VCode. 509 pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> { 510 self.vcode.vreg_types = take(&mut vregs.vreg_types); 511 self.vcode.facts = take(&mut vregs.facts); 512 self.vcode.reftyped_vregs = take(&mut vregs.reftyped_vregs); 513 514 if self.direction == VCodeBuildDirection::Backward { 515 self.reverse_and_finalize(&vregs); 516 } 517 self.collect_operands(&vregs); 518 519 // Apply register aliases to the `reftyped_vregs` list since this list 520 // will be returned directly to `regalloc2` eventually and all 521 // operands/results of instructions will use the alias-resolved vregs 522 // from `regalloc2`'s perspective. 523 // 524 // Also note that `reftyped_vregs` can't have duplicates, so after the 525 // aliases are applied duplicates are removed. 526 for reg in self.vcode.reftyped_vregs.iter_mut() { 527 *reg = vregs.resolve_vreg_alias(*reg); 528 } 529 self.vcode.reftyped_vregs.sort(); 530 self.vcode.reftyped_vregs.dedup(); 531 532 self.compute_preds_from_succs(); 533 self.vcode.debug_value_labels.sort_unstable(); 534 535 // At this point, nothing in the vcode should mention any 536 // VReg which has been aliased. All the appropriate rewriting 537 // should have happened above. Just to be sure, let's 538 // double-check each field which has vregs. 539 // Note: can't easily check vcode.insts, resolved in collect_operands. 540 // Operands are resolved in collect_operands. 541 vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg())); 542 // Currently block params are never aliased to another vreg. 543 vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied()); 544 // Branch block args are resolved in collect_operands. 545 vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied()); 546 // Reftyped vregs are resolved above in this function. 547 vregs.debug_assert_no_vreg_aliases(self.vcode.reftyped_vregs.iter().copied()); 548 // Debug value labels are resolved in reverse_and_finalize. 549 vregs.debug_assert_no_vreg_aliases( 550 self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg), 551 ); 552 // Facts are resolved eagerly during set_vreg_alias. 553 vregs.debug_assert_no_vreg_aliases( 554 self.vcode 555 .facts 556 .iter() 557 .zip(&vregs.vreg_types) 558 .enumerate() 559 .filter(|(_, (fact, _))| fact.is_some()) 560 .map(|(vreg, (_, &ty))| { 561 let (regclasses, _) = I::rc_for_type(ty).unwrap(); 562 VReg::new(vreg, regclasses[0]) 563 }), 564 ); 565 566 self.vcode 567 } 568 } 569 570 /// Is this type a reference type? 571 fn is_reftype(ty: Type) -> bool { 572 ty == types::R64 || ty == types::R32 573 } 574 575 const NO_INST_OFFSET: CodeOffset = u32::MAX; 576 577 impl<I: VCodeInst> VCode<I> { 578 /// New empty VCode. 579 fn new( 580 sigs: SigSet, 581 abi: Callee<I::ABIMachineSpec>, 582 emit_info: I::Info, 583 block_order: BlockLoweringOrder, 584 constants: VCodeConstants, 585 ) -> VCode<I> { 586 let n_blocks = block_order.lowered_order().len(); 587 VCode { 588 sigs, 589 vreg_types: vec![], 590 insts: Vec::with_capacity(10 * n_blocks), 591 operands: Vec::with_capacity(30 * n_blocks), 592 operand_ranges: Ranges::with_capacity(10 * n_blocks), 593 clobbers: FxHashMap::default(), 594 srclocs: Vec::with_capacity(10 * n_blocks), 595 entry: BlockIndex::new(0), 596 block_ranges: Ranges::with_capacity(n_blocks), 597 block_succ_range: Ranges::with_capacity(n_blocks), 598 block_succs: Vec::with_capacity(n_blocks), 599 block_pred_range: Ranges::default(), 600 block_preds: Vec::new(), 601 block_params_range: Ranges::with_capacity(n_blocks), 602 block_params: Vec::with_capacity(5 * n_blocks), 603 branch_block_args: Vec::with_capacity(10 * n_blocks), 604 branch_block_arg_range: Ranges::with_capacity(2 * n_blocks), 605 branch_block_arg_succ_range: Ranges::with_capacity(n_blocks), 606 block_order, 607 abi, 608 emit_info, 609 reftyped_vregs: vec![], 610 constants, 611 debug_value_labels: vec![], 612 facts: vec![], 613 } 614 } 615 616 /// Get the ABI-dependent MachineEnv for managing register allocation. 617 pub fn machine_env(&self) -> &MachineEnv { 618 self.abi.machine_env(&self.sigs) 619 } 620 621 /// Get the number of blocks. Block indices will be in the range `0 .. 622 /// (self.num_blocks() - 1)`. 623 pub fn num_blocks(&self) -> usize { 624 self.block_ranges.len() 625 } 626 627 /// The number of lowered instructions. 628 pub fn num_insts(&self) -> usize { 629 self.insts.len() 630 } 631 632 fn compute_clobbers(&self, regalloc: ®alloc2::Output) -> Vec<Writable<RealReg>> { 633 let mut clobbered = PRegSet::default(); 634 635 // All moves are included in clobbers. 636 for (_, Edit::Move { to, .. }) in ®alloc.edits { 637 if let Some(preg) = to.as_reg() { 638 clobbered.add(preg); 639 } 640 } 641 642 for (i, range) in self.operand_ranges.iter() { 643 // Skip this instruction if not "included in clobbers" as 644 // per the MachInst. (Some backends use this to implement 645 // ABI specifics; e.g., excluding calls of the same ABI as 646 // the current function from clobbers, because by 647 // definition everything clobbered by the call can be 648 // clobbered by this function without saving as well.) 649 if !self.insts[i].is_included_in_clobbers() { 650 continue; 651 } 652 653 let operands = &self.operands[range.clone()]; 654 let allocs = ®alloc.allocs[range]; 655 for (operand, alloc) in operands.iter().zip(allocs.iter()) { 656 if operand.kind() == OperandKind::Def { 657 if let Some(preg) = alloc.as_reg() { 658 clobbered.add(preg); 659 } 660 } 661 } 662 663 // Also add explicitly-clobbered registers. 664 if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) { 665 clobbered.union_from(inst_clobbered); 666 } 667 } 668 669 clobbered 670 .into_iter() 671 .map(|preg| Writable::from_reg(RealReg::from(preg))) 672 .collect() 673 } 674 675 /// Emit the instructions to a `MachBuffer`, containing fixed-up 676 /// code and external reloc/trap/etc. records ready for use. Takes 677 /// the regalloc results as well. 678 /// 679 /// Returns the machine code itself, and optionally metadata 680 /// and/or a disassembly, as an `EmitResult`. The `VCode` itself 681 /// is consumed by the emission process. 682 pub fn emit( 683 mut self, 684 regalloc: ®alloc2::Output, 685 want_disasm: bool, 686 flags: &settings::Flags, 687 ctrl_plane: &mut ControlPlane, 688 ) -> EmitResult 689 where 690 I: VCodeInst, 691 { 692 // To write into disasm string. 693 use core::fmt::Write; 694 695 let _tt = timing::vcode_emit(); 696 let mut buffer = MachBuffer::new(); 697 let mut bb_starts: Vec<Option<CodeOffset>> = vec![]; 698 699 // The first M MachLabels are reserved for block indices. 700 buffer.reserve_labels_for_blocks(self.num_blocks()); 701 702 // Register all allocated constants with the `MachBuffer` to ensure that 703 // any references to the constants during instructions can be handled 704 // correctly. 705 buffer.register_constants(&self.constants); 706 707 // Construct the final order we emit code in: cold blocks at the end. 708 let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![]; 709 let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![]; 710 for block in 0..self.num_blocks() { 711 let block = BlockIndex::new(block); 712 if self.block_order.is_cold(block) { 713 cold_blocks.push(block); 714 } else { 715 final_order.push(block); 716 } 717 } 718 final_order.extend(cold_blocks.clone()); 719 720 // Compute/save info we need for the prologue: clobbers and 721 // number of spillslots. 722 // 723 // We clone `abi` here because we will mutate it as we 724 // generate the prologue and set other info, but we can't 725 // mutate `VCode`. The info it usually carries prior to 726 // setting clobbers is fairly minimal so this should be 727 // relatively cheap. 728 let clobbers = self.compute_clobbers(regalloc); 729 self.abi 730 .compute_frame_layout(&self.sigs, regalloc.num_spillslots, clobbers); 731 732 // Emit blocks. 733 let mut cur_srcloc = None; 734 let mut last_offset = None; 735 let mut inst_offsets = vec![]; 736 let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane)); 737 738 let mut disasm = String::new(); 739 740 if !self.debug_value_labels.is_empty() { 741 inst_offsets.resize(self.insts.len(), NO_INST_OFFSET); 742 } 743 744 // Count edits per block ahead of time; this is needed for 745 // lookahead island emission. (We could derive it per-block 746 // with binary search in the edit list, but it's more 747 // efficient to do it in one pass here.) 748 let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![]; 749 let mut edit_idx = 0; 750 for block in 0..self.num_blocks() { 751 let end_inst = InsnIndex::new(self.block_ranges.get(block).end); 752 let start_edit_idx = edit_idx; 753 while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst { 754 edit_idx += 1; 755 } 756 let end_edit_idx = edit_idx; 757 ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32); 758 } 759 760 let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled(); 761 let mut bb_padding = match flags.bb_padding_log2_minus_one() { 762 0 => Vec::new(), 763 n => vec![0; 1 << (n - 1)], 764 }; 765 let mut total_bb_padding = 0; 766 767 for (block_order_idx, &block) in final_order.iter().enumerate() { 768 trace!("emitting block {:?}", block); 769 770 // Call the new block hook for state 771 state.on_new_block(); 772 773 // Emit NOPs to align the block. 774 let new_offset = I::align_basic_block(buffer.cur_offset()); 775 while new_offset > buffer.cur_offset() { 776 // Pad with NOPs up to the aligned block offset. 777 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize); 778 nop.emit(&mut buffer, &self.emit_info, &mut Default::default()); 779 } 780 assert_eq!(buffer.cur_offset(), new_offset); 781 782 let do_emit = |inst: &I, 783 disasm: &mut String, 784 buffer: &mut MachBuffer<I>, 785 state: &mut I::State| { 786 if want_disasm && !inst.is_args() { 787 let mut s = state.clone(); 788 writeln!(disasm, " {}", inst.pretty_print_inst(&mut s)).unwrap(); 789 } 790 inst.emit(buffer, &self.emit_info, state); 791 }; 792 793 // Is this the first block? Emit the prologue directly if so. 794 if block == self.entry { 795 trace!(" -> entry block"); 796 buffer.start_srcloc(Default::default()); 797 for inst in &self.abi.gen_prologue() { 798 do_emit(&inst, &mut disasm, &mut buffer, &mut state); 799 } 800 buffer.end_srcloc(); 801 } 802 803 // Now emit the regular block body. 804 805 buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut()); 806 807 if want_disasm { 808 writeln!(&mut disasm, "block{}:", block.index()).unwrap(); 809 } 810 811 if flags.machine_code_cfg_info() { 812 // Track BB starts. If we have backed up due to MachBuffer 813 // branch opts, note that the removed blocks were removed. 814 let cur_offset = buffer.cur_offset(); 815 if last_offset.is_some() && cur_offset <= last_offset.unwrap() { 816 for i in (0..bb_starts.len()).rev() { 817 if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() { 818 break; 819 } 820 bb_starts[i] = None; 821 } 822 } 823 bb_starts.push(Some(cur_offset)); 824 last_offset = Some(cur_offset); 825 } 826 827 if let Some(block_start) = I::gen_block_start( 828 self.block_order.is_indirect_branch_target(block), 829 is_forward_edge_cfi_enabled, 830 ) { 831 do_emit(&block_start, &mut disasm, &mut buffer, &mut state); 832 } 833 834 for inst_or_edit in regalloc.block_insts_and_edits(&self, block) { 835 match inst_or_edit { 836 InstOrEdit::Inst(iix) => { 837 if !self.debug_value_labels.is_empty() { 838 // If we need to produce debug info, 839 // record the offset of each instruction 840 // so that we can translate value-label 841 // ranges to machine-code offsets. 842 843 // Cold blocks violate monotonicity 844 // assumptions elsewhere (that 845 // instructions in inst-index order are in 846 // order in machine code), so we omit 847 // their offsets here. Value-label range 848 // generation below will skip empty ranges 849 // and ranges with to-offsets of zero. 850 if !self.block_order.is_cold(block) { 851 inst_offsets[iix.index()] = buffer.cur_offset(); 852 } 853 } 854 855 // Update the srcloc at this point in the buffer. 856 let srcloc = self.srclocs[iix.index()]; 857 if cur_srcloc != Some(srcloc) { 858 if cur_srcloc.is_some() { 859 buffer.end_srcloc(); 860 } 861 buffer.start_srcloc(srcloc); 862 cur_srcloc = Some(srcloc); 863 } 864 865 // If this is a safepoint, compute a stack map 866 // and pass it to the emit state. 867 if self.insts[iix.index()].is_safepoint() { 868 let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![]; 869 // Find the contiguous range of 870 // (progpoint, allocation) safepoint slot 871 // records in `regalloc.safepoint_slots` 872 // for this instruction index. 873 let safepoint_slots_start = regalloc 874 .safepoint_slots 875 .binary_search_by(|(progpoint, _alloc)| { 876 if progpoint.inst() >= iix { 877 std::cmp::Ordering::Greater 878 } else { 879 std::cmp::Ordering::Less 880 } 881 }) 882 .unwrap_err(); 883 884 for (_, alloc) in regalloc.safepoint_slots[safepoint_slots_start..] 885 .iter() 886 .take_while(|(progpoint, _)| progpoint.inst() == iix) 887 { 888 let slot = alloc.as_stack().unwrap(); 889 safepoint_slots.push(slot); 890 } 891 if !safepoint_slots.is_empty() { 892 let stack_map = self 893 .abi 894 .spillslots_to_stack_map(&safepoint_slots[..], &state); 895 state.pre_safepoint(stack_map); 896 } 897 } 898 899 // If the instruction we are about to emit is 900 // a return, place an epilogue at this point 901 // (and don't emit the return; the actual 902 // epilogue will contain it). 903 if self.insts[iix.index()].is_term() == MachTerminator::Ret { 904 for inst in self.abi.gen_epilogue() { 905 do_emit(&inst, &mut disasm, &mut buffer, &mut state); 906 } 907 } else { 908 // Update the operands for this inst using the 909 // allocations from the regalloc result. 910 let mut allocs = regalloc.inst_allocs(iix).iter(); 911 self.insts[iix.index()].get_operands( 912 &mut |reg: &mut Reg, constraint, _kind, _pos| { 913 let alloc = allocs 914 .next() 915 .expect("enough allocations for all operands") 916 .as_reg() 917 .expect("only register allocations, not stack allocations") 918 .into(); 919 920 if let OperandConstraint::FixedReg(rreg) = constraint { 921 debug_assert_eq!(Reg::from(rreg), alloc); 922 } 923 *reg = alloc; 924 }, 925 ); 926 debug_assert!(allocs.next().is_none()); 927 928 // Emit the instruction! 929 do_emit( 930 &self.insts[iix.index()], 931 &mut disasm, 932 &mut buffer, 933 &mut state, 934 ); 935 } 936 } 937 938 InstOrEdit::Edit(Edit::Move { from, to }) => { 939 // Create a move/spill/reload instruction and 940 // immediately emit it. 941 match (from.as_reg(), to.as_reg()) { 942 (Some(from), Some(to)) => { 943 // Reg-to-reg move. 944 let from_rreg = Reg::from(from); 945 let to_rreg = Writable::from_reg(Reg::from(to)); 946 debug_assert_eq!(from.class(), to.class()); 947 let ty = I::canonical_type_for_rc(from.class()); 948 let mv = I::gen_move(to_rreg, from_rreg, ty); 949 do_emit(&mv, &mut disasm, &mut buffer, &mut state); 950 } 951 (Some(from), None) => { 952 // Spill from register to spillslot. 953 let to = to.as_stack().unwrap(); 954 let from_rreg = RealReg::from(from); 955 let spill = self.abi.gen_spill(to, from_rreg); 956 do_emit(&spill, &mut disasm, &mut buffer, &mut state); 957 } 958 (None, Some(to)) => { 959 // Load from spillslot to register. 960 let from = from.as_stack().unwrap(); 961 let to_rreg = Writable::from_reg(RealReg::from(to)); 962 let reload = self.abi.gen_reload(to_rreg, from); 963 do_emit(&reload, &mut disasm, &mut buffer, &mut state); 964 } 965 (None, None) => { 966 panic!("regalloc2 should have eliminated stack-to-stack moves!"); 967 } 968 } 969 } 970 } 971 } 972 973 if cur_srcloc.is_some() { 974 buffer.end_srcloc(); 975 cur_srcloc = None; 976 } 977 978 // Do we need an island? Get the worst-case size of the next BB, add 979 // it to the optional padding behind the block, and pass this to the 980 // `MachBuffer` to determine if an island is necessary. 981 let worst_case_next_bb = if block_order_idx < final_order.len() - 1 { 982 let next_block = final_order[block_order_idx + 1]; 983 let next_block_range = self.block_ranges.get(next_block.index()); 984 let next_block_size = next_block_range.len() as u32; 985 let next_block_ra_insertions = ra_edits_per_block[next_block.index()]; 986 I::worst_case_size() * (next_block_size + next_block_ra_insertions) 987 } else { 988 0 989 }; 990 let padding = if bb_padding.is_empty() { 991 0 992 } else { 993 bb_padding.len() as u32 + I::LabelUse::ALIGN - 1 994 }; 995 if buffer.island_needed(padding + worst_case_next_bb) { 996 buffer.emit_island(padding + worst_case_next_bb, ctrl_plane); 997 } 998 999 // Insert padding, if configured, to stress the `MachBuffer`'s 1000 // relocation and island calculations. 1001 // 1002 // Padding can get quite large during fuzzing though so place a 1003 // total cap on it where when a per-function threshold is exceeded 1004 // the padding is turned back down to zero. This avoids a small-ish 1005 // test case generating a GB+ memory footprint in Cranelift for 1006 // example. 1007 if !bb_padding.is_empty() { 1008 buffer.put_data(&bb_padding); 1009 buffer.align_to(I::LabelUse::ALIGN); 1010 total_bb_padding += bb_padding.len(); 1011 if total_bb_padding > (150 << 20) { 1012 bb_padding = Vec::new(); 1013 } 1014 } 1015 } 1016 1017 // Do any optimizations on branches at tail of buffer, as if we had 1018 // bound one last label. 1019 buffer.optimize_branches(ctrl_plane); 1020 1021 // emission state is not needed anymore, move control plane back out 1022 *ctrl_plane = state.take_ctrl_plane(); 1023 1024 let func_body_len = buffer.cur_offset(); 1025 1026 // Create `bb_edges` and final (filtered) `bb_starts`. 1027 let mut bb_edges = vec![]; 1028 let mut bb_offsets = vec![]; 1029 if flags.machine_code_cfg_info() { 1030 for block in 0..self.num_blocks() { 1031 if bb_starts[block].is_none() { 1032 // Block was deleted by MachBuffer; skip. 1033 continue; 1034 } 1035 let from = bb_starts[block].unwrap(); 1036 1037 bb_offsets.push(from); 1038 // Resolve each `succ` label and add edges. 1039 let succs = self.block_succs(BlockIndex::new(block)); 1040 for &succ in succs.iter() { 1041 let to = buffer.resolve_label_offset(MachLabel::from_block(succ)); 1042 bb_edges.push((from, to)); 1043 } 1044 } 1045 } 1046 1047 self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len); 1048 let value_labels_ranges = 1049 self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len); 1050 let frame_size = self.abi.frame_size(); 1051 1052 EmitResult { 1053 buffer: buffer.finish(&self.constants, ctrl_plane), 1054 bb_offsets, 1055 bb_edges, 1056 func_body_len, 1057 disasm: if want_disasm { Some(disasm) } else { None }, 1058 sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(), 1059 dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(), 1060 value_labels_ranges, 1061 frame_size, 1062 } 1063 } 1064 1065 fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) { 1066 if self.debug_value_labels.is_empty() { 1067 return; 1068 } 1069 1070 // During emission, branch removal can make offsets of instructions incorrect. 1071 // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj] 1072 // It will be recorded as (say): [30] [34] [38] [42] [<would be 46>] 1073 // When the jumps get removed we are left with (in "inst_offsets"): 1074 // [insi][jmp0][jmp1][jmp2][insj][...] 1075 // [30] [34] [38] [42] [34] 1076 // Which violates the monotonicity invariant. This method sets offsets of these 1077 // removed instructions such as to make them appear zero-sized: 1078 // [insi][jmp0][jmp1][jmp2][insj][...] 1079 // [30] [34] [34] [34] [34] 1080 // 1081 let mut next_offset = func_body_len; 1082 for inst_index in (0..(inst_offsets.len() - 1)).rev() { 1083 let inst_offset = inst_offsets[inst_index]; 1084 1085 // Not all instructions get their offsets recorded. 1086 if inst_offset == NO_INST_OFFSET { 1087 continue; 1088 } 1089 1090 if inst_offset > next_offset { 1091 trace!( 1092 "Fixing code offset of the removed Inst {}: {} -> {}", 1093 inst_index, 1094 inst_offset, 1095 next_offset 1096 ); 1097 inst_offsets[inst_index] = next_offset; 1098 continue; 1099 } 1100 1101 next_offset = inst_offset; 1102 } 1103 } 1104 1105 fn compute_value_labels_ranges( 1106 &self, 1107 regalloc: ®alloc2::Output, 1108 inst_offsets: &[CodeOffset], 1109 func_body_len: u32, 1110 ) -> ValueLabelsRanges { 1111 if self.debug_value_labels.is_empty() { 1112 return ValueLabelsRanges::default(); 1113 } 1114 1115 let mut value_labels_ranges: ValueLabelsRanges = HashMap::new(); 1116 for &(label, from, to, alloc) in ®alloc.debug_locations { 1117 let ranges = value_labels_ranges 1118 .entry(ValueLabel::from_u32(label)) 1119 .or_insert_with(|| vec![]); 1120 let from_offset = inst_offsets[from.inst().index()]; 1121 let to_offset = if to.inst().index() == inst_offsets.len() { 1122 func_body_len 1123 } else { 1124 inst_offsets[to.inst().index()] 1125 }; 1126 1127 // Empty ranges or unavailable offsets can happen 1128 // due to cold blocks and branch removal (see above). 1129 if from_offset == NO_INST_OFFSET 1130 || to_offset == NO_INST_OFFSET 1131 || from_offset == to_offset 1132 { 1133 continue; 1134 } 1135 1136 let loc = if let Some(preg) = alloc.as_reg() { 1137 LabelValueLoc::Reg(Reg::from(preg)) 1138 } else { 1139 let slot = alloc.as_stack().unwrap(); 1140 let slot_offset = self.abi.get_spillslot_offset(slot); 1141 let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset(); 1142 let caller_sp_to_cfa_offset = 1143 crate::isa::unwind::systemv::caller_sp_to_cfa_offset(); 1144 // NOTE: this is a negative offset because it's relative to the caller's SP 1145 let cfa_to_sp_offset = 1146 -((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64); 1147 LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset) 1148 }; 1149 1150 // ValueLocRanges are recorded by *instruction-end 1151 // offset*. `from_offset` is the *start* of the 1152 // instruction; that is the same as the end of another 1153 // instruction, so we only want to begin coverage once 1154 // we are past the previous instruction's end. 1155 let start = from_offset + 1; 1156 1157 // Likewise, `end` is exclusive, but we want to 1158 // *include* the end of the last 1159 // instruction. `to_offset` is the start of the 1160 // `to`-instruction, which is the exclusive end, i.e., 1161 // the first instruction not covered. That 1162 // instruction's start is the same as the end of the 1163 // last instruction that is included, so we go one 1164 // byte further to be sure to include it. 1165 let end = to_offset + 1; 1166 1167 // Coalesce adjacent ranges that for the same location 1168 // to minimize output size here and for the consumers. 1169 if let Some(last_loc_range) = ranges.last_mut() { 1170 if last_loc_range.loc == loc && last_loc_range.end == start { 1171 trace!( 1172 "Extending debug range for VL{} in {:?} to {}", 1173 label, 1174 loc, 1175 end 1176 ); 1177 last_loc_range.end = end; 1178 continue; 1179 } 1180 } 1181 1182 trace!( 1183 "Recording debug range for VL{} in {:?}: [Inst {}..Inst {}) [{}..{})", 1184 label, 1185 loc, 1186 from.inst().index(), 1187 to.inst().index(), 1188 start, 1189 end 1190 ); 1191 1192 ranges.push(ValueLocRange { loc, start, end }); 1193 } 1194 1195 value_labels_ranges 1196 } 1197 1198 /// Get the IR block for a BlockIndex, if one exists. 1199 pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> { 1200 self.block_order.lowered_order()[block.index()].orig_block() 1201 } 1202 1203 /// Get the type of a VReg. 1204 pub fn vreg_type(&self, vreg: VReg) -> Type { 1205 self.vreg_types[vreg.vreg()] 1206 } 1207 1208 /// Get the fact, if any, for a given VReg. 1209 pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> { 1210 self.facts[vreg.vreg()].as_ref() 1211 } 1212 1213 /// Set the fact for a given VReg. 1214 pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) { 1215 trace!("set fact on {}: {:?}", vreg, fact); 1216 self.facts[vreg.vreg()] = Some(fact); 1217 } 1218 1219 /// Does a given instruction define any facts? 1220 pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool { 1221 self.inst_operands(inst) 1222 .iter() 1223 .filter(|o| o.kind() == OperandKind::Def) 1224 .map(|o| o.vreg()) 1225 .any(|vreg| self.facts[vreg.vreg()].is_some()) 1226 } 1227 } 1228 1229 impl<I: VCodeInst> std::ops::Index<InsnIndex> for VCode<I> { 1230 type Output = I; 1231 fn index(&self, idx: InsnIndex) -> &Self::Output { 1232 &self.insts[idx.index()] 1233 } 1234 } 1235 1236 impl<I: VCodeInst> RegallocFunction for VCode<I> { 1237 fn num_insts(&self) -> usize { 1238 self.insts.len() 1239 } 1240 1241 fn num_blocks(&self) -> usize { 1242 self.block_ranges.len() 1243 } 1244 1245 fn entry_block(&self) -> BlockIndex { 1246 self.entry 1247 } 1248 1249 fn block_insns(&self, block: BlockIndex) -> InstRange { 1250 let range = self.block_ranges.get(block.index()); 1251 InstRange::forward(InsnIndex::new(range.start), InsnIndex::new(range.end)) 1252 } 1253 1254 fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] { 1255 let range = self.block_succ_range.get(block.index()); 1256 &self.block_succs[range] 1257 } 1258 1259 fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] { 1260 let range = self.block_pred_range.get(block.index()); 1261 &self.block_preds[range] 1262 } 1263 1264 fn block_params(&self, block: BlockIndex) -> &[VReg] { 1265 // As a special case we don't return block params for the entry block, as all the arguments 1266 // will be defined by the `Inst::Args` instruction. 1267 if block == self.entry { 1268 return &[]; 1269 } 1270 1271 let range = self.block_params_range.get(block.index()); 1272 &self.block_params[range] 1273 } 1274 1275 fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] { 1276 let succ_range = self.branch_block_arg_succ_range.get(block.index()); 1277 debug_assert!(succ_idx < succ_range.len()); 1278 let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx); 1279 &self.branch_block_args[branch_block_args] 1280 } 1281 1282 fn is_ret(&self, insn: InsnIndex) -> bool { 1283 match self.insts[insn.index()].is_term() { 1284 // We treat blocks terminated by an unconditional trap like a return for regalloc. 1285 MachTerminator::None => self.insts[insn.index()].is_trap(), 1286 MachTerminator::Ret | MachTerminator::RetCall => true, 1287 MachTerminator::Uncond | MachTerminator::Cond | MachTerminator::Indirect => false, 1288 } 1289 } 1290 1291 fn is_branch(&self, insn: InsnIndex) -> bool { 1292 match self.insts[insn.index()].is_term() { 1293 MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true, 1294 _ => false, 1295 } 1296 } 1297 1298 fn requires_refs_on_stack(&self, insn: InsnIndex) -> bool { 1299 self.insts[insn.index()].is_safepoint() 1300 } 1301 1302 fn inst_operands(&self, insn: InsnIndex) -> &[Operand] { 1303 let range = self.operand_ranges.get(insn.index()); 1304 &self.operands[range] 1305 } 1306 1307 fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet { 1308 self.clobbers.get(&insn).cloned().unwrap_or_default() 1309 } 1310 1311 fn num_vregs(&self) -> usize { 1312 self.vreg_types.len() 1313 } 1314 1315 fn reftype_vregs(&self) -> &[VReg] { 1316 &self.reftyped_vregs 1317 } 1318 1319 fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] { 1320 &self.debug_value_labels 1321 } 1322 1323 fn spillslot_size(&self, regclass: RegClass) -> usize { 1324 self.abi.get_spillslot_size(regclass) as usize 1325 } 1326 1327 fn allow_multiple_vreg_defs(&self) -> bool { 1328 // At least the s390x backend requires this, because the 1329 // `Loop` pseudo-instruction aggregates all Operands so pinned 1330 // vregs (RealRegs) may occur more than once. 1331 true 1332 } 1333 } 1334 1335 impl<I: VCodeInst> Debug for VRegAllocator<I> { 1336 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 1337 writeln!(f, "VRegAllocator {{")?; 1338 1339 let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>(); 1340 alias_keys.sort_unstable(); 1341 for key in alias_keys { 1342 let dest = self.vreg_aliases.get(&key).unwrap(); 1343 writeln!(f, " {:?} := {:?}", Reg::from(key), Reg::from(*dest))?; 1344 } 1345 1346 for (vreg, fact) in self.facts.iter().enumerate() { 1347 if let Some(fact) = fact { 1348 writeln!(f, " v{} ! {}", vreg, fact)?; 1349 } 1350 } 1351 1352 writeln!(f, "}}") 1353 } 1354 } 1355 1356 impl<I: VCodeInst> fmt::Debug for VCode<I> { 1357 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 1358 writeln!(f, "VCode {{")?; 1359 writeln!(f, " Entry block: {}", self.entry.index())?; 1360 1361 let mut state = Default::default(); 1362 1363 for block in 0..self.num_blocks() { 1364 let block = BlockIndex::new(block); 1365 writeln!(f, "Block {}:", block.index())?; 1366 if let Some(bb) = self.bindex_to_bb(block) { 1367 writeln!(f, " (original IR block: {})", bb)?; 1368 } 1369 for succ in self.block_succs(block) { 1370 writeln!(f, " (successor: Block {})", succ.index())?; 1371 } 1372 for inst in self.block_ranges.get(block.index()) { 1373 writeln!( 1374 f, 1375 " Inst {}: {}", 1376 inst, 1377 self.insts[inst].pretty_print_inst(&mut state) 1378 )?; 1379 if !self.operands.is_empty() { 1380 for operand in self.inst_operands(InsnIndex::new(inst)) { 1381 if operand.kind() == OperandKind::Def { 1382 if let Some(fact) = &self.facts[operand.vreg().vreg()] { 1383 writeln!(f, " v{} ! {}", operand.vreg().vreg(), fact)?; 1384 } 1385 } 1386 } 1387 } 1388 } 1389 } 1390 1391 writeln!(f, "}}")?; 1392 Ok(()) 1393 } 1394 } 1395 1396 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder. 1397 pub struct VRegAllocator<I> { 1398 /// VReg IR-level types. 1399 vreg_types: Vec<Type>, 1400 1401 /// Reference-typed `regalloc2::VReg`s. The regalloc requires 1402 /// these in a dense slice (as opposed to querying the 1403 /// reftype-status of each vreg) for efficient iteration. 1404 reftyped_vregs: Vec<VReg>, 1405 1406 /// VReg aliases. When the final VCode is built we rewrite all 1407 /// uses of the keys in this table to their replacement values. 1408 /// 1409 /// We use these aliases to rename an instruction's expected 1410 /// result vregs to the returned vregs from lowering, which are 1411 /// usually freshly-allocated temps. 1412 vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>, 1413 1414 /// A deferred error, to be bubbled up to the top level of the 1415 /// lowering algorithm. We take this approach because we cannot 1416 /// currently propagate a `Result` upward through ISLE code (the 1417 /// lowering rules) or some ABI code. 1418 deferred_error: Option<CodegenError>, 1419 1420 /// Facts on VRegs, for proof-carrying code. 1421 facts: Vec<Option<Fact>>, 1422 1423 /// The type of instruction that this allocator makes registers for. 1424 _inst: core::marker::PhantomData<I>, 1425 } 1426 1427 impl<I: VCodeInst> VRegAllocator<I> { 1428 /// Make a new VRegAllocator. 1429 pub fn with_capacity(capacity: usize) -> Self { 1430 let capacity = first_user_vreg_index() + capacity; 1431 let mut vreg_types = Vec::with_capacity(capacity); 1432 vreg_types.resize(first_user_vreg_index(), types::INVALID); 1433 Self { 1434 vreg_types, 1435 reftyped_vregs: vec![], 1436 vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()), 1437 deferred_error: None, 1438 facts: Vec::with_capacity(capacity), 1439 _inst: core::marker::PhantomData::default(), 1440 } 1441 } 1442 1443 /// Allocate a fresh ValueRegs. 1444 pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> { 1445 if self.deferred_error.is_some() { 1446 return Err(CodegenError::CodeTooLarge); 1447 } 1448 let v = self.vreg_types.len(); 1449 let (regclasses, tys) = I::rc_for_type(ty)?; 1450 if v + regclasses.len() >= VReg::MAX { 1451 return Err(CodegenError::CodeTooLarge); 1452 } 1453 1454 let regs: ValueRegs<Reg> = match regclasses { 1455 &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()), 1456 &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()), 1457 // We can extend this if/when we support 32-bit targets; e.g., 1458 // an i128 on a 32-bit machine will need up to four machine regs 1459 // for a `Value`. 1460 _ => panic!("Value must reside in 1 or 2 registers"), 1461 }; 1462 for (®_ty, ®) in tys.iter().zip(regs.regs().iter()) { 1463 let vreg = reg.to_virtual_reg().unwrap(); 1464 debug_assert_eq!(self.vreg_types.len(), vreg.index()); 1465 self.vreg_types.push(reg_ty); 1466 if is_reftype(reg_ty) { 1467 self.reftyped_vregs.push(vreg.into()); 1468 } 1469 } 1470 1471 // Create empty facts for each allocated vreg. 1472 self.facts.resize(self.vreg_types.len(), None); 1473 1474 Ok(regs) 1475 } 1476 1477 /// Allocate a fresh ValueRegs, deferring any out-of-vregs 1478 /// errors. This is useful in places where we cannot bubble a 1479 /// `CodegenResult` upward easily, and which are known to be 1480 /// invoked from within the lowering loop that checks the deferred 1481 /// error status below. 1482 pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> { 1483 match self.alloc(ty) { 1484 Ok(x) => x, 1485 Err(e) => { 1486 self.deferred_error = Some(e); 1487 self.bogus_for_deferred_error(ty) 1488 } 1489 } 1490 } 1491 1492 /// Take any deferred error that was accumulated by `alloc_with_deferred_error`. 1493 pub fn take_deferred_error(&mut self) -> Option<CodegenError> { 1494 self.deferred_error.take() 1495 } 1496 1497 /// Produce an bogus VReg placeholder with the proper number of 1498 /// registers for the given type. This is meant to be used with 1499 /// deferred allocation errors (see `Lower::alloc_tmp()`). 1500 fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> { 1501 let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type"); 1502 match regclasses { 1503 &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()), 1504 &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()), 1505 _ => panic!("Value must reside in 1 or 2 registers"), 1506 } 1507 } 1508 1509 /// Rewrite any mention of `from` into `to`. 1510 pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) { 1511 let from = from.into(); 1512 let resolved_to = self.resolve_vreg_alias(to.into()); 1513 // Disallow cycles (see below). 1514 assert_ne!(resolved_to, from); 1515 1516 // Maintain the invariant that PCC facts only exist on vregs 1517 // which aren't aliases. We want to preserve whatever was 1518 // stated about the vreg before its producer was lowered. 1519 if let Some(fact) = self.facts[from.vreg()].take() { 1520 self.set_fact(resolved_to, fact); 1521 } 1522 1523 let old_alias = self.vreg_aliases.insert(from, resolved_to); 1524 debug_assert_eq!(old_alias, None); 1525 } 1526 1527 fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg { 1528 // We prevent cycles from existing by resolving targets of 1529 // aliases eagerly before setting them. If the target resolves 1530 // to the origin of the alias, then a cycle would be created 1531 // and the alias is disallowed. Because of the structure of 1532 // SSA code (one instruction can refer to another's defs but 1533 // not vice-versa, except indirectly through 1534 // phis/blockparams), cycles should not occur as we use 1535 // aliases to redirect vregs to the temps that actually define 1536 // them. 1537 while let Some(to) = self.vreg_aliases.get(&vreg) { 1538 vreg = *to; 1539 } 1540 vreg 1541 } 1542 1543 #[inline] 1544 fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) { 1545 debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg))); 1546 } 1547 1548 /// Set the proof-carrying code fact on a given virtual register. 1549 /// 1550 /// Returns the old fact, if any (only one fact can be stored). 1551 fn set_fact(&mut self, vreg: regalloc2::VReg, fact: Fact) -> Option<Fact> { 1552 trace!("vreg {:?} has fact: {:?}", vreg, fact); 1553 debug_assert!(!self.vreg_aliases.contains_key(&vreg)); 1554 self.facts[vreg.vreg()].replace(fact) 1555 } 1556 1557 /// Set a fact only if one doesn't already exist. 1558 pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) { 1559 let vreg = self.resolve_vreg_alias(vreg.into()); 1560 if self.facts[vreg.vreg()].is_none() { 1561 self.set_fact(vreg, fact); 1562 } 1563 } 1564 1565 /// Allocate a fresh ValueRegs, with a given fact to apply if 1566 /// the value fits in one VReg. 1567 pub fn alloc_with_maybe_fact( 1568 &mut self, 1569 ty: Type, 1570 fact: Option<Fact>, 1571 ) -> CodegenResult<ValueRegs<Reg>> { 1572 let result = self.alloc(ty)?; 1573 1574 // Ensure that we don't lose a fact on a value that splits 1575 // into multiple VRegs. 1576 assert!(result.len() == 1 || fact.is_none()); 1577 if let Some(fact) = fact { 1578 self.set_fact(result.regs()[0].into(), fact); 1579 } 1580 1581 Ok(result) 1582 } 1583 } 1584 1585 /// This structure tracks the large constants used in VCode that will be emitted separately by the 1586 /// [MachBuffer]. 1587 /// 1588 /// First, during the lowering phase, constants are inserted using 1589 /// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are 1590 /// used in this phase. Some deduplication is performed, when possible, as constant 1591 /// values are inserted. 1592 /// 1593 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the 1594 /// constants so that instructions can refer to the value's memory location. The [MachBuffer] 1595 /// then writes the constant values to the buffer. 1596 #[derive(Default)] 1597 pub struct VCodeConstants { 1598 constants: PrimaryMap<VCodeConstant, VCodeConstantData>, 1599 pool_uses: HashMap<Constant, VCodeConstant>, 1600 well_known_uses: HashMap<*const [u8], VCodeConstant>, 1601 u64s: HashMap<[u8; 8], VCodeConstant>, 1602 } 1603 impl VCodeConstants { 1604 /// Initialize the structure with the expected number of constants. 1605 pub fn with_capacity(expected_num_constants: usize) -> Self { 1606 Self { 1607 constants: PrimaryMap::with_capacity(expected_num_constants), 1608 pool_uses: HashMap::with_capacity(expected_num_constants), 1609 well_known_uses: HashMap::new(), 1610 u64s: HashMap::new(), 1611 } 1612 } 1613 1614 /// Insert a constant; using this method indicates that a constant value will be used and thus 1615 /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants 1616 /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not 1617 /// [VCodeConstantData::Generated]. 1618 pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant { 1619 match data { 1620 VCodeConstantData::Generated(_) => self.constants.push(data), 1621 VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) { 1622 None => { 1623 let vcode_constant = self.constants.push(data); 1624 self.pool_uses.insert(constant, vcode_constant); 1625 vcode_constant 1626 } 1627 Some(&vcode_constant) => vcode_constant, 1628 }, 1629 VCodeConstantData::WellKnown(data_ref) => { 1630 match self.well_known_uses.entry(data_ref as *const [u8]) { 1631 Entry::Vacant(v) => { 1632 let vcode_constant = self.constants.push(data); 1633 v.insert(vcode_constant); 1634 vcode_constant 1635 } 1636 Entry::Occupied(o) => *o.get(), 1637 } 1638 } 1639 VCodeConstantData::U64(value) => match self.u64s.entry(value) { 1640 Entry::Vacant(v) => { 1641 let vcode_constant = self.constants.push(data); 1642 v.insert(vcode_constant); 1643 vcode_constant 1644 } 1645 Entry::Occupied(o) => *o.get(), 1646 }, 1647 } 1648 } 1649 1650 /// Return the number of constants inserted. 1651 pub fn len(&self) -> usize { 1652 self.constants.len() 1653 } 1654 1655 /// Iterate over the `VCodeConstant` keys inserted in this structure. 1656 pub fn keys(&self) -> Keys<VCodeConstant> { 1657 self.constants.keys() 1658 } 1659 1660 /// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this 1661 /// structure. 1662 pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> { 1663 self.constants.iter() 1664 } 1665 1666 /// Returns the data associated with the specified constant. 1667 pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData { 1668 &self.constants[c] 1669 } 1670 1671 /// Checks if the given [VCodeConstantData] is registered as 1672 /// used by the pool. 1673 pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool { 1674 match constant { 1675 VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c), 1676 _ => false, 1677 } 1678 } 1679 } 1680 1681 /// A use of a constant by one or more VCode instructions; see [VCodeConstants]. 1682 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 1683 pub struct VCodeConstant(u32); 1684 entity_impl!(VCodeConstant); 1685 1686 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking 1687 /// these separately instead of as raw byte buffers allows us to avoid some duplication. 1688 pub enum VCodeConstantData { 1689 /// A constant already present in the Cranelift IR 1690 /// [ConstantPool](crate::ir::constant::ConstantPool). 1691 Pool(Constant, ConstantData), 1692 /// A reference to a well-known constant value that is statically encoded within the compiler. 1693 WellKnown(&'static [u8]), 1694 /// A constant value generated during lowering; the value may depend on the instruction context 1695 /// which makes it difficult to de-duplicate--if possible, use other variants. 1696 Generated(ConstantData), 1697 /// A constant of at most 64 bits. These are deduplicated as 1698 /// well. Stored as a fixed-size array of `u8` so that we do not 1699 /// encounter endianness problems when cross-compiling. 1700 U64([u8; 8]), 1701 } 1702 impl VCodeConstantData { 1703 /// Retrieve the constant data as a byte slice. 1704 pub fn as_slice(&self) -> &[u8] { 1705 match self { 1706 VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(), 1707 VCodeConstantData::WellKnown(d) => d, 1708 VCodeConstantData::U64(value) => &value[..], 1709 } 1710 } 1711 1712 /// Calculate the alignment of the constant data. 1713 pub fn alignment(&self) -> u32 { 1714 if self.as_slice().len() <= 8 { 1715 8 1716 } else { 1717 16 1718 } 1719 } 1720 } 1721 1722 #[cfg(test)] 1723 mod test { 1724 use super::*; 1725 use std::mem::size_of; 1726 1727 #[test] 1728 fn size_of_constant_structs() { 1729 assert_eq!(size_of::<Constant>(), 4); 1730 assert_eq!(size_of::<VCodeConstant>(), 4); 1731 assert_eq!(size_of::<ConstantData>(), 24); 1732 assert_eq!(size_of::<VCodeConstantData>(), 32); 1733 assert_eq!( 1734 size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(), 1735 24 1736 ); 1737 // TODO The VCodeConstants structure's memory size could be further optimized. 1738 // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at 1739 // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes. 1740 } 1741 } 1742