1 //! This implements the VCode container: a CFG of Insts that have been lowered. 2 //! 3 //! VCode is virtual-register code. An instruction in VCode is almost a machine 4 //! instruction; however, its register slots can refer to virtual registers in 5 //! addition to real machine registers. 6 //! 7 //! VCode is structured with traditional basic blocks, and 8 //! each block must be terminated by an unconditional branch (one target), a 9 //! conditional branch (two targets), or a return (no targets). Note that this 10 //! slightly differs from the machine code of most ISAs: in most ISAs, a 11 //! conditional branch has one target (and the not-taken case falls through). 12 //! However, we expect that machine backends will elide branches to the following 13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond / 14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to 15 //! play with layout prior to final binary emission, as well, if we want. 16 //! 17 //! See the main module comment in `mod.rs` for more details on the VCode-based 18 //! backend pipeline. 19 20 use crate::fx::FxHashMap; 21 use crate::fx::FxHashSet; 22 use crate::ir::pcc::*; 23 use crate::ir::RelSourceLoc; 24 use crate::ir::{self, types, Constant, ConstantData, DynamicStackSlot, ValueLabel}; 25 use crate::machinst::*; 26 use crate::timing; 27 use crate::trace; 28 use crate::CodegenError; 29 use crate::{LabelValueLoc, ValueLocRange}; 30 use cranelift_control::ControlPlane; 31 use regalloc2::{ 32 Edit, Function as RegallocFunction, InstOrEdit, InstRange, MachineEnv, Operand, OperandKind, 33 PRegSet, RegClass, VReg, 34 }; 35 36 use alloc::vec::Vec; 37 use cranelift_entity::{entity_impl, Keys, PrimaryMap}; 38 use std::collections::hash_map::Entry; 39 use std::collections::HashMap; 40 use std::fmt; 41 42 /// Index referring to an instruction in VCode. 43 pub type InsnIndex = regalloc2::Inst; 44 45 /// Index referring to a basic block in VCode. 46 pub type BlockIndex = regalloc2::Block; 47 48 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be 49 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`. 50 pub trait VCodeInst: MachInst + MachInstEmit {} 51 impl<I: MachInst + MachInstEmit> VCodeInst for I {} 52 53 /// A function in "VCode" (virtualized-register code) form, after 54 /// lowering. This is essentially a standard CFG of basic blocks, 55 /// where each basic block consists of lowered instructions produced 56 /// by the machine-specific backend. 57 /// 58 /// Note that the VCode is immutable once produced, and is not 59 /// modified by register allocation in particular. Rather, register 60 /// allocation on the `VCode` produces a separate `regalloc2::Output` 61 /// struct, and this can be passed to `emit`. `emit` in turn does not 62 /// modify the vcode, but produces an `EmitResult`, which contains the 63 /// machine code itself, and the associated disassembly and/or 64 /// metadata as requested. 65 pub struct VCode<I: VCodeInst> { 66 /// VReg IR-level types. 67 vreg_types: Vec<Type>, 68 69 /// Lowered machine instructions in order corresponding to the original IR. 70 insts: Vec<I>, 71 72 /// Operands: pre-regalloc references to virtual registers with 73 /// constraints, in one flattened array. This allows the regalloc 74 /// to efficiently access all operands without requiring expensive 75 /// matches or method invocations on insts. 76 operands: Vec<Operand>, 77 78 /// Operand index ranges: for each instruction in `insts`, there 79 /// is a tuple here providing the range in `operands` for that 80 /// instruction's operands. 81 operand_ranges: Vec<(u32, u32)>, 82 83 /// Clobbers: a sparse map from instruction indices to clobber masks. 84 clobbers: FxHashMap<InsnIndex, PRegSet>, 85 86 /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is 87 /// reasonable to keep one of these per instruction.) 88 srclocs: Vec<RelSourceLoc>, 89 90 /// Entry block. 91 entry: BlockIndex, 92 93 /// Block instruction indices. 94 block_ranges: Vec<(InsnIndex, InsnIndex)>, 95 96 /// Block successors: index range in the `block_succs_preds` list. 97 block_succ_range: Vec<(u32, u32)>, 98 99 /// Block predecessors: index range in the `block_succs_preds` list. 100 block_pred_range: Vec<(u32, u32)>, 101 102 /// Block successor and predecessor lists, concatenated into one 103 /// Vec. The `block_succ_range` and `block_pred_range` lists of 104 /// tuples above give (start, end) ranges within this list that 105 /// correspond to each basic block's successors or predecessors, 106 /// respectively. 107 block_succs_preds: Vec<regalloc2::Block>, 108 109 /// Block parameters: index range in `block_params` below. 110 block_params_range: Vec<(u32, u32)>, 111 112 /// Block parameter lists, concatenated into one vec. The 113 /// `block_params_range` list of tuples above gives (start, end) 114 /// ranges within this list that correspond to each basic block's 115 /// blockparam vregs. 116 block_params: Vec<regalloc2::VReg>, 117 118 /// Outgoing block arguments on branch instructions, concatenated 119 /// into one list. 120 /// 121 /// Note that this is conceptually a 3D array: we have a VReg list 122 /// per block, per successor. We flatten those three dimensions 123 /// into this 1D vec, then store index ranges in two levels of 124 /// indirection. 125 /// 126 /// Indexed by the indices in `branch_block_arg_succ_range`. 127 branch_block_args: Vec<regalloc2::VReg>, 128 129 /// Array of sequences of (start, end) tuples in 130 /// `branch_block_args`, one for each successor; these sequences 131 /// for each block are concatenated. 132 /// 133 /// Indexed by the indices in `branch_block_arg_succ_range`. 134 branch_block_arg_range: Vec<(u32, u32)>, 135 136 /// For a given block, indices in `branch_block_arg_range` 137 /// corresponding to all of its successors. 138 branch_block_arg_succ_range: Vec<(u32, u32)>, 139 140 /// VReg aliases. Each key in this table is translated to its 141 /// value when gathering Operands from instructions. Aliases are 142 /// not chased transitively (we do not further look up the 143 /// translated reg to see if it is another alias). 144 /// 145 /// We use these aliases to rename an instruction's expected 146 /// result vregs to the returned vregs from lowering, which are 147 /// usually freshly-allocated temps. 148 /// 149 /// Operands and branch arguments will already have been 150 /// translated through this alias table; but it helps to make 151 /// sense of instructions when pretty-printed, for example. 152 vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>, 153 154 /// Block-order information. 155 block_order: BlockLoweringOrder, 156 157 /// ABI object. 158 pub(crate) abi: Callee<I::ABIMachineSpec>, 159 160 /// Constant information used during code emission. This should be 161 /// immutable across function compilations within the same module. 162 emit_info: I::Info, 163 164 /// Reference-typed `regalloc2::VReg`s. The regalloc requires 165 /// these in a dense slice (as opposed to querying the 166 /// reftype-status of each vreg) for efficient iteration. 167 reftyped_vregs: Vec<VReg>, 168 169 /// Constants. 170 constants: VCodeConstants, 171 172 /// Value labels for debuginfo attached to vregs. 173 debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>, 174 175 pub(crate) sigs: SigSet, 176 177 /// Facts on VRegs, for proof-carrying code verification. 178 facts: Vec<Option<Fact>>, 179 } 180 181 /// The result of `VCode::emit`. Contains all information computed 182 /// during emission: actual machine code, optionally a disassembly, 183 /// and optionally metadata about the code layout. 184 pub struct EmitResult { 185 /// The MachBuffer containing the machine code. 186 pub buffer: MachBufferFinalized<Stencil>, 187 188 /// Offset of each basic block, recorded during emission. Computed 189 /// only if `debug_value_labels` is non-empty. 190 pub bb_offsets: Vec<CodeOffset>, 191 192 /// Final basic-block edges, in terms of code offsets of 193 /// bb-starts. Computed only if `debug_value_labels` is non-empty. 194 pub bb_edges: Vec<(CodeOffset, CodeOffset)>, 195 196 /// Final length of function body. 197 pub func_body_len: CodeOffset, 198 199 /// The pretty-printed disassembly, if any. This uses the same 200 /// pretty-printing for MachInsts as the pre-regalloc VCode Debug 201 /// implementation, but additionally includes the prologue and 202 /// epilogue(s), and makes use of the regalloc results. 203 pub disasm: Option<String>, 204 205 /// Offsets of sized stackslots. 206 pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>, 207 208 /// Offsets of dynamic stackslots. 209 pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>, 210 211 /// Value-labels information (debug metadata). 212 pub value_labels_ranges: ValueLabelsRanges, 213 214 /// Stack frame size. 215 pub frame_size: u32, 216 } 217 218 /// A builder for a VCode function body. 219 /// 220 /// This builder has the ability to accept instructions in either 221 /// forward or reverse order, depending on the pass direction that 222 /// produces the VCode. The lowering from CLIF to VCode<MachInst> 223 /// ordinarily occurs in reverse order (in order to allow instructions 224 /// to be lowered only if used, and not merged) so a reversal will 225 /// occur at the end of lowering to ensure the VCode is in machine 226 /// order. 227 /// 228 /// If built in reverse, block and instruction indices used once the 229 /// VCode is built are relative to the final (reversed) order, not the 230 /// order of construction. Note that this means we do not know the 231 /// final block or instruction indices when building, so we do not 232 /// hand them out. (The user is assumed to know them when appending 233 /// terminator instructions with successor blocks.) 234 pub struct VCodeBuilder<I: VCodeInst> { 235 /// In-progress VCode. 236 pub(crate) vcode: VCode<I>, 237 238 /// In what direction is the build occuring? 239 direction: VCodeBuildDirection, 240 241 /// Index of the last block-start in the vcode. 242 block_start: usize, 243 244 /// Start of succs for the current block in the concatenated succs list. 245 succ_start: usize, 246 247 /// Start of blockparams for the current block in the concatenated 248 /// blockparams list. 249 block_params_start: usize, 250 251 /// Start of successor blockparam arg list entries in 252 /// the concatenated branch_block_arg_range list. 253 branch_block_arg_succ_start: usize, 254 255 /// Current source location. 256 cur_srcloc: RelSourceLoc, 257 258 /// Debug-value label in-progress map, keyed by label. For each 259 /// label, we keep disjoint ranges mapping to vregs. We'll flatten 260 /// this into (vreg, range, label) tuples when done. 261 debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>, 262 } 263 264 /// Direction in which a VCodeBuilder builds VCode. 265 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 266 pub enum VCodeBuildDirection { 267 // TODO: add `Forward` once we need it and can test it adequately. 268 /// Backward-build pass: we expect the producer to call `emit()` 269 /// with instructions in reverse program order within each block. 270 Backward, 271 } 272 273 impl<I: VCodeInst> VCodeBuilder<I> { 274 /// Create a new VCodeBuilder. 275 pub fn new( 276 sigs: SigSet, 277 abi: Callee<I::ABIMachineSpec>, 278 emit_info: I::Info, 279 block_order: BlockLoweringOrder, 280 constants: VCodeConstants, 281 direction: VCodeBuildDirection, 282 ) -> VCodeBuilder<I> { 283 let vcode = VCode::new(sigs, abi, emit_info, block_order, constants); 284 285 VCodeBuilder { 286 vcode, 287 direction, 288 block_start: 0, 289 succ_start: 0, 290 block_params_start: 0, 291 branch_block_arg_succ_start: 0, 292 cur_srcloc: Default::default(), 293 debug_info: FxHashMap::default(), 294 } 295 } 296 297 pub fn init_abi(&mut self, temps: Vec<Writable<Reg>>) { 298 self.vcode.abi.init(&self.vcode.sigs, temps); 299 } 300 301 /// Access the ABI object. 302 pub fn abi(&self) -> &Callee<I::ABIMachineSpec> { 303 &self.vcode.abi 304 } 305 306 /// Access the ABI object. 307 pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> { 308 &mut self.vcode.abi 309 } 310 311 pub fn sigs(&self) -> &SigSet { 312 &self.vcode.sigs 313 } 314 315 pub fn sigs_mut(&mut self) -> &mut SigSet { 316 &mut self.vcode.sigs 317 } 318 319 /// Access to the BlockLoweringOrder object. 320 pub fn block_order(&self) -> &BlockLoweringOrder { 321 &self.vcode.block_order 322 } 323 324 /// Set the current block as the entry block. 325 pub fn set_entry(&mut self, block: BlockIndex) { 326 self.vcode.entry = block; 327 } 328 329 /// End the current basic block. Must be called after emitting vcode insts 330 /// for IR insts and prior to ending the function (building the VCode). 331 pub fn end_bb(&mut self) { 332 let start_idx = self.block_start; 333 let end_idx = self.vcode.insts.len(); 334 self.block_start = end_idx; 335 // Add the instruction index range to the list of blocks. 336 self.vcode 337 .block_ranges 338 .push((InsnIndex::new(start_idx), InsnIndex::new(end_idx))); 339 // End the successors list. 340 let succ_end = self.vcode.block_succs_preds.len(); 341 self.vcode 342 .block_succ_range 343 .push((self.succ_start as u32, succ_end as u32)); 344 self.succ_start = succ_end; 345 // End the blockparams list. 346 let block_params_end = self.vcode.block_params.len(); 347 self.vcode 348 .block_params_range 349 .push((self.block_params_start as u32, block_params_end as u32)); 350 self.block_params_start = block_params_end; 351 // End the branch blockparam args list. 352 let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len(); 353 self.vcode.branch_block_arg_succ_range.push(( 354 self.branch_block_arg_succ_start as u32, 355 branch_block_arg_succ_end as u32, 356 )); 357 self.branch_block_arg_succ_start = branch_block_arg_succ_end; 358 } 359 360 pub fn add_block_param(&mut self, param: VirtualReg) { 361 self.vcode.block_params.push(param.into()); 362 } 363 364 fn add_branch_args_for_succ(&mut self, args: &[Reg]) { 365 let start = self.vcode.branch_block_args.len(); 366 self.vcode 367 .branch_block_args 368 .extend(args.iter().map(|&arg| VReg::from(arg))); 369 let end = self.vcode.branch_block_args.len(); 370 self.vcode 371 .branch_block_arg_range 372 .push((start as u32, end as u32)); 373 } 374 375 /// Push an instruction for the current BB and current IR inst 376 /// within the BB. 377 pub fn push(&mut self, insn: I) { 378 self.vcode.insts.push(insn); 379 self.vcode.srclocs.push(self.cur_srcloc); 380 } 381 382 /// Add a successor block with branch args. 383 pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) { 384 self.vcode.block_succs_preds.push(block); 385 self.add_branch_args_for_succ(args); 386 } 387 388 /// Set the current source location. 389 pub fn set_srcloc(&mut self, srcloc: RelSourceLoc) { 390 self.cur_srcloc = srcloc; 391 } 392 393 /// Add a debug value label to a register. 394 pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) { 395 // We'll fix up labels in reverse(). Because we're generating 396 // code bottom-to-top, the liverange of the label goes *from* 397 // the last index at which was defined (or 0, which is the end 398 // of the eventual function) *to* just this instruction, and 399 // no further. 400 let inst = InsnIndex::new(self.vcode.insts.len()); 401 let labels = self.debug_info.entry(label).or_insert_with(|| vec![]); 402 let last = labels 403 .last() 404 .map(|(_start, end, _vreg)| *end) 405 .unwrap_or(InsnIndex::new(0)); 406 labels.push((last, inst, reg.into())); 407 } 408 409 pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) { 410 let from = from.into(); 411 let resolved_to = self.vcode.resolve_vreg_alias(to.into()); 412 // Disallow cycles (see below). 413 assert_ne!(resolved_to, from); 414 self.vcode.vreg_aliases.insert(from, resolved_to); 415 } 416 417 /// Access the constants. 418 pub fn constants(&mut self) -> &mut VCodeConstants { 419 &mut self.vcode.constants 420 } 421 422 fn compute_preds_from_succs(&mut self) { 423 // Compute predecessors from successors. In order to gather 424 // all preds for a block into a contiguous sequence, we build 425 // a list of (succ, pred) tuples and then sort. 426 let mut succ_pred_edges: Vec<(BlockIndex, BlockIndex)> = 427 Vec::with_capacity(self.vcode.block_succs_preds.len()); 428 for (pred, &(start, end)) in self.vcode.block_succ_range.iter().enumerate() { 429 let pred = BlockIndex::new(pred); 430 for i in start..end { 431 let succ = BlockIndex::new(self.vcode.block_succs_preds[i as usize].index()); 432 succ_pred_edges.push((succ, pred)); 433 } 434 } 435 succ_pred_edges.sort_unstable(); 436 437 let mut i = 0; 438 for succ in 0..self.vcode.num_blocks() { 439 let succ = BlockIndex::new(succ); 440 let start = self.vcode.block_succs_preds.len(); 441 while i < succ_pred_edges.len() && succ_pred_edges[i].0 == succ { 442 let pred = succ_pred_edges[i].1; 443 self.vcode.block_succs_preds.push(pred); 444 i += 1; 445 } 446 let end = self.vcode.block_succs_preds.len(); 447 self.vcode.block_pred_range.push((start as u32, end as u32)); 448 } 449 } 450 451 /// Called once, when a build in Backward order is complete, to 452 /// perform the overall reversal (into final forward order) and 453 /// finalize metadata accordingly. 454 fn reverse_and_finalize(&mut self) { 455 let n_insts = self.vcode.insts.len(); 456 if n_insts == 0 { 457 return; 458 } 459 460 // Reverse the per-block and per-inst sequences. 461 self.vcode.block_ranges.reverse(); 462 // block_params_range is indexed by block (and blocks were 463 // traversed in reverse) so we reverse it; but block-param 464 // sequences in the concatenated vec can remain in reverse 465 // order (it is effectively an arena of arbitrarily-placed 466 // referenced sequences). 467 self.vcode.block_params_range.reverse(); 468 // Likewise, we reverse block_succ_range, but the block_succ 469 // concatenated array can remain as-is. 470 self.vcode.block_succ_range.reverse(); 471 self.vcode.insts.reverse(); 472 self.vcode.srclocs.reverse(); 473 // Likewise, branch_block_arg_succ_range is indexed by block 474 // so must be reversed. 475 self.vcode.branch_block_arg_succ_range.reverse(); 476 477 // To translate an instruction index *endpoint* in reversed 478 // order to forward order, compute `n_insts - i`. 479 // 480 // Why not `n_insts - 1 - i`? That would be correct to 481 // translate an individual instruction index (for ten insts 0 482 // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes 483 // 0). But for the usual inclusive-start, exclusive-end range 484 // idiom, inclusive starts become exclusive ends and 485 // vice-versa, so e.g. an (inclusive) start of 0 becomes an 486 // (exclusive) end of 10. 487 let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index()); 488 489 // Edit the block-range instruction indices. 490 for tuple in &mut self.vcode.block_ranges { 491 let (start, end) = *tuple; 492 *tuple = (translate(end), translate(start)); // Note reversed order. 493 } 494 495 // Generate debug-value labels based on per-label maps. 496 for (label, tuples) in &self.debug_info { 497 for &(start, end, vreg) in tuples { 498 let vreg = self.vcode.resolve_vreg_alias(vreg); 499 let fwd_start = translate(end); 500 let fwd_end = translate(start); 501 self.vcode 502 .debug_value_labels 503 .push((vreg, fwd_start, fwd_end, label.as_u32())); 504 } 505 } 506 507 // Now sort debug value labels by VReg, as required 508 // by regalloc2. 509 self.vcode 510 .debug_value_labels 511 .sort_unstable_by_key(|(vreg, _, _, _)| *vreg); 512 } 513 514 fn collect_operands(&mut self) { 515 for (i, insn) in self.vcode.insts.iter().enumerate() { 516 // Push operands from the instruction onto the operand list. 517 // 518 // We rename through the vreg alias table as we collect 519 // the operands. This is better than a separate post-pass 520 // over operands, because it has more cache locality: 521 // operands only need to pass through L1 once. This is 522 // also better than renaming instructions' 523 // operands/registers while lowering, because here we only 524 // need to do the `match` over the instruction to visit 525 // its register fields (which is slow, branchy code) once. 526 527 let vreg_aliases = &self.vcode.vreg_aliases; 528 let allocatable = PRegSet::from(self.vcode.machine_env()); 529 let mut op_collector = 530 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| { 531 VCode::<I>::resolve_vreg_alias_impl(vreg_aliases, vreg) 532 }); 533 insn.get_operands(&mut op_collector); 534 let (ops, clobbers) = op_collector.finish(); 535 self.vcode.operand_ranges.push(ops); 536 537 if clobbers != PRegSet::default() { 538 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers); 539 } 540 541 if let Some((dst, src)) = insn.is_move() { 542 // We should never see non-virtual registers present in move 543 // instructions. 544 assert!( 545 src.is_virtual(), 546 "the real register {:?} was used as the source of a move instruction", 547 src 548 ); 549 assert!( 550 dst.to_reg().is_virtual(), 551 "the real register {:?} was used as the destination of a move instruction", 552 dst.to_reg() 553 ); 554 } 555 } 556 557 // Translate blockparam args via the vreg aliases table as well. 558 for arg in &mut self.vcode.branch_block_args { 559 let new_arg = VCode::<I>::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *arg); 560 trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg); 561 *arg = new_arg; 562 } 563 } 564 565 /// Build the final VCode. 566 pub fn build(mut self, vregs: VRegAllocator<I>) -> VCode<I> { 567 self.vcode.vreg_types = vregs.vreg_types; 568 self.vcode.facts = vregs.facts; 569 self.vcode.reftyped_vregs = vregs.reftyped_vregs; 570 571 if self.direction == VCodeBuildDirection::Backward { 572 self.reverse_and_finalize(); 573 } 574 self.collect_operands(); 575 576 // Apply register aliases to the `reftyped_vregs` list since this list 577 // will be returned directly to `regalloc2` eventually and all 578 // operands/results of instructions will use the alias-resolved vregs 579 // from `regalloc2`'s perspective. 580 // 581 // Also note that `reftyped_vregs` can't have duplicates, so after the 582 // aliases are applied duplicates are removed. 583 for reg in self.vcode.reftyped_vregs.iter_mut() { 584 *reg = VCode::<I>::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *reg); 585 } 586 self.vcode.reftyped_vregs.sort(); 587 self.vcode.reftyped_vregs.dedup(); 588 589 self.compute_preds_from_succs(); 590 self.vcode.debug_value_labels.sort_unstable(); 591 self.vcode 592 } 593 } 594 595 /// Is this type a reference type? 596 fn is_reftype(ty: Type) -> bool { 597 ty == types::R64 || ty == types::R32 598 } 599 600 const NO_INST_OFFSET: CodeOffset = u32::MAX; 601 602 impl<I: VCodeInst> VCode<I> { 603 /// New empty VCode. 604 fn new( 605 sigs: SigSet, 606 abi: Callee<I::ABIMachineSpec>, 607 emit_info: I::Info, 608 block_order: BlockLoweringOrder, 609 constants: VCodeConstants, 610 ) -> VCode<I> { 611 let n_blocks = block_order.lowered_order().len(); 612 VCode { 613 sigs, 614 vreg_types: vec![], 615 insts: Vec::with_capacity(10 * n_blocks), 616 operands: Vec::with_capacity(30 * n_blocks), 617 operand_ranges: Vec::with_capacity(10 * n_blocks), 618 clobbers: FxHashMap::default(), 619 srclocs: Vec::with_capacity(10 * n_blocks), 620 entry: BlockIndex::new(0), 621 block_ranges: Vec::with_capacity(n_blocks), 622 block_succ_range: Vec::with_capacity(n_blocks), 623 block_succs_preds: Vec::with_capacity(2 * n_blocks), 624 block_pred_range: Vec::with_capacity(n_blocks), 625 block_params_range: Vec::with_capacity(n_blocks), 626 block_params: Vec::with_capacity(5 * n_blocks), 627 branch_block_args: Vec::with_capacity(10 * n_blocks), 628 branch_block_arg_range: Vec::with_capacity(2 * n_blocks), 629 branch_block_arg_succ_range: Vec::with_capacity(n_blocks), 630 block_order, 631 abi, 632 emit_info, 633 reftyped_vregs: vec![], 634 constants, 635 debug_value_labels: vec![], 636 vreg_aliases: FxHashMap::with_capacity_and_hasher(10 * n_blocks, Default::default()), 637 facts: vec![], 638 } 639 } 640 641 /// Get the ABI-dependent MachineEnv for managing register allocation. 642 pub fn machine_env(&self) -> &MachineEnv { 643 self.abi.machine_env(&self.sigs) 644 } 645 646 /// Get the number of blocks. Block indices will be in the range `0 .. 647 /// (self.num_blocks() - 1)`. 648 pub fn num_blocks(&self) -> usize { 649 self.block_ranges.len() 650 } 651 652 /// The number of lowered instructions. 653 pub fn num_insts(&self) -> usize { 654 self.insts.len() 655 } 656 657 /// Get the successors for a block. 658 pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] { 659 let (start, end) = self.block_succ_range[block.index()]; 660 &self.block_succs_preds[start as usize..end as usize] 661 } 662 663 fn compute_clobbers(&self, regalloc: ®alloc2::Output) -> Vec<Writable<RealReg>> { 664 // Compute clobbered registers. 665 let mut clobbered = vec![]; 666 let mut clobbered_set = FxHashSet::default(); 667 668 // All moves are included in clobbers. 669 for edit in ®alloc.edits { 670 let Edit::Move { to, .. } = edit.1; 671 if let Some(preg) = to.as_reg() { 672 let reg = RealReg::from(preg); 673 if clobbered_set.insert(reg) { 674 clobbered.push(Writable::from_reg(reg)); 675 } 676 } 677 } 678 679 for (i, (start, end)) in self.operand_ranges.iter().enumerate() { 680 // Skip this instruction if not "included in clobbers" as 681 // per the MachInst. (Some backends use this to implement 682 // ABI specifics; e.g., excluding calls of the same ABI as 683 // the current function from clobbers, because by 684 // definition everything clobbered by the call can be 685 // clobbered by this function without saving as well.) 686 if !self.insts[i].is_included_in_clobbers() { 687 continue; 688 } 689 690 let start = *start as usize; 691 let end = *end as usize; 692 let operands = &self.operands[start..end]; 693 let allocs = ®alloc.allocs[start..end]; 694 for (operand, alloc) in operands.iter().zip(allocs.iter()) { 695 // We're interested only in writes (Mods or Defs). 696 if operand.kind() == OperandKind::Use { 697 continue; 698 } 699 if let Some(preg) = alloc.as_reg() { 700 let reg = RealReg::from(preg); 701 if clobbered_set.insert(reg) { 702 clobbered.push(Writable::from_reg(reg)); 703 } 704 } 705 } 706 707 // Also add explicitly-clobbered registers. 708 for preg in self 709 .clobbers 710 .get(&InsnIndex::new(i)) 711 .cloned() 712 .unwrap_or_default() 713 { 714 let reg = RealReg::from(preg); 715 if clobbered_set.insert(reg) { 716 clobbered.push(Writable::from_reg(reg)); 717 } 718 } 719 } 720 721 clobbered 722 } 723 724 /// Emit the instructions to a `MachBuffer`, containing fixed-up 725 /// code and external reloc/trap/etc. records ready for use. Takes 726 /// the regalloc results as well. 727 /// 728 /// Returns the machine code itself, and optionally metadata 729 /// and/or a disassembly, as an `EmitResult`. The `VCode` itself 730 /// is consumed by the emission process. 731 pub fn emit( 732 mut self, 733 regalloc: ®alloc2::Output, 734 want_disasm: bool, 735 flags: &settings::Flags, 736 ctrl_plane: &mut ControlPlane, 737 ) -> EmitResult 738 where 739 I: VCodeInst, 740 { 741 // To write into disasm string. 742 use core::fmt::Write; 743 744 let _tt = timing::vcode_emit(); 745 let mut buffer = MachBuffer::new(); 746 let mut bb_starts: Vec<Option<CodeOffset>> = vec![]; 747 748 // The first M MachLabels are reserved for block indices. 749 buffer.reserve_labels_for_blocks(self.num_blocks()); 750 751 // Register all allocated constants with the `MachBuffer` to ensure that 752 // any references to the constants during instructions can be handled 753 // correctly. 754 buffer.register_constants(&self.constants); 755 756 // Construct the final order we emit code in: cold blocks at the end. 757 let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![]; 758 let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![]; 759 for block in 0..self.num_blocks() { 760 let block = BlockIndex::new(block); 761 if self.block_order.is_cold(block) { 762 cold_blocks.push(block); 763 } else { 764 final_order.push(block); 765 } 766 } 767 final_order.extend(cold_blocks.clone()); 768 769 // Compute/save info we need for the prologue: clobbers and 770 // number of spillslots. 771 // 772 // We clone `abi` here because we will mutate it as we 773 // generate the prologue and set other info, but we can't 774 // mutate `VCode`. The info it usually carries prior to 775 // setting clobbers is fairly minimal so this should be 776 // relatively cheap. 777 let clobbers = self.compute_clobbers(regalloc); 778 self.abi.set_num_spillslots(regalloc.num_spillslots); 779 self.abi.set_clobbered(clobbers); 780 self.abi.compute_frame_layout(&self.sigs); 781 782 // Emit blocks. 783 let mut cur_srcloc = None; 784 let mut last_offset = None; 785 let mut inst_offsets = vec![]; 786 let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane)); 787 788 let mut disasm = String::new(); 789 790 if !self.debug_value_labels.is_empty() { 791 inst_offsets.resize(self.insts.len(), NO_INST_OFFSET); 792 } 793 794 // Count edits per block ahead of time; this is needed for 795 // lookahead island emission. (We could derive it per-block 796 // with binary search in the edit list, but it's more 797 // efficient to do it in one pass here.) 798 let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![]; 799 let mut edit_idx = 0; 800 for block in 0..self.num_blocks() { 801 let end_inst = self.block_ranges[block].1; 802 let start_edit_idx = edit_idx; 803 while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst { 804 edit_idx += 1; 805 } 806 let end_edit_idx = edit_idx; 807 ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32); 808 } 809 810 let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled(); 811 let mut bb_padding = match flags.bb_padding_log2_minus_one() { 812 0 => Vec::new(), 813 n => vec![0; 1 << (n - 1)], 814 }; 815 let mut total_bb_padding = 0; 816 817 for (block_order_idx, &block) in final_order.iter().enumerate() { 818 trace!("emitting block {:?}", block); 819 820 // Call the new block hook for state 821 state.on_new_block(); 822 823 // Emit NOPs to align the block. 824 let new_offset = I::align_basic_block(buffer.cur_offset()); 825 while new_offset > buffer.cur_offset() { 826 // Pad with NOPs up to the aligned block offset. 827 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize); 828 nop.emit(&[], &mut buffer, &self.emit_info, &mut Default::default()); 829 } 830 assert_eq!(buffer.cur_offset(), new_offset); 831 832 let do_emit = |inst: &I, 833 allocs: &[Allocation], 834 disasm: &mut String, 835 buffer: &mut MachBuffer<I>, 836 state: &mut I::State| { 837 if want_disasm && !inst.is_args() { 838 let mut s = state.clone(); 839 writeln!(disasm, " {}", inst.pretty_print_inst(allocs, &mut s)).unwrap(); 840 } 841 inst.emit(allocs, buffer, &self.emit_info, state); 842 }; 843 844 // Is this the first block? Emit the prologue directly if so. 845 if block == self.entry { 846 trace!(" -> entry block"); 847 buffer.start_srcloc(Default::default()); 848 state.pre_sourceloc(Default::default()); 849 for inst in &self.abi.gen_prologue() { 850 do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state); 851 } 852 buffer.end_srcloc(); 853 } 854 855 // Now emit the regular block body. 856 857 buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut()); 858 859 if want_disasm { 860 writeln!(&mut disasm, "block{}:", block.index()).unwrap(); 861 } 862 863 if flags.machine_code_cfg_info() { 864 // Track BB starts. If we have backed up due to MachBuffer 865 // branch opts, note that the removed blocks were removed. 866 let cur_offset = buffer.cur_offset(); 867 if last_offset.is_some() && cur_offset <= last_offset.unwrap() { 868 for i in (0..bb_starts.len()).rev() { 869 if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() { 870 break; 871 } 872 bb_starts[i] = None; 873 } 874 } 875 bb_starts.push(Some(cur_offset)); 876 last_offset = Some(cur_offset); 877 } 878 879 if let Some(block_start) = I::gen_block_start( 880 self.block_order.is_indirect_branch_target(block), 881 is_forward_edge_cfi_enabled, 882 ) { 883 do_emit(&block_start, &[], &mut disasm, &mut buffer, &mut state); 884 } 885 886 for inst_or_edit in regalloc.block_insts_and_edits(&self, block) { 887 match inst_or_edit { 888 InstOrEdit::Inst(iix) => { 889 if !self.debug_value_labels.is_empty() { 890 // If we need to produce debug info, 891 // record the offset of each instruction 892 // so that we can translate value-label 893 // ranges to machine-code offsets. 894 895 // Cold blocks violate monotonicity 896 // assumptions elsewhere (that 897 // instructions in inst-index order are in 898 // order in machine code), so we omit 899 // their offsets here. Value-label range 900 // generation below will skip empty ranges 901 // and ranges with to-offsets of zero. 902 if !self.block_order.is_cold(block) { 903 inst_offsets[iix.index()] = buffer.cur_offset(); 904 } 905 } 906 907 // Update the srcloc at this point in the buffer. 908 let srcloc = self.srclocs[iix.index()]; 909 if cur_srcloc != Some(srcloc) { 910 if cur_srcloc.is_some() { 911 buffer.end_srcloc(); 912 } 913 buffer.start_srcloc(srcloc); 914 cur_srcloc = Some(srcloc); 915 } 916 state.pre_sourceloc(cur_srcloc.unwrap_or_default()); 917 918 // If this is a safepoint, compute a stack map 919 // and pass it to the emit state. 920 if self.insts[iix.index()].is_safepoint() { 921 let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![]; 922 // Find the contiguous range of 923 // (progpoint, allocation) safepoint slot 924 // records in `regalloc.safepoint_slots` 925 // for this instruction index. 926 let safepoint_slots_start = regalloc 927 .safepoint_slots 928 .binary_search_by(|(progpoint, _alloc)| { 929 if progpoint.inst() >= iix { 930 std::cmp::Ordering::Greater 931 } else { 932 std::cmp::Ordering::Less 933 } 934 }) 935 .unwrap_err(); 936 937 for (_, alloc) in regalloc.safepoint_slots[safepoint_slots_start..] 938 .iter() 939 .take_while(|(progpoint, _)| progpoint.inst() == iix) 940 { 941 let slot = alloc.as_stack().unwrap(); 942 safepoint_slots.push(slot); 943 } 944 if !safepoint_slots.is_empty() { 945 let stack_map = self 946 .abi 947 .spillslots_to_stack_map(&safepoint_slots[..], &state); 948 state.pre_safepoint(stack_map); 949 } 950 } 951 952 // Get the allocations for this inst from the regalloc result. 953 let allocs = regalloc.inst_allocs(iix); 954 955 // If the instruction we are about to emit is 956 // a return, place an epilogue at this point 957 // (and don't emit the return; the actual 958 // epilogue will contain it). 959 if self.insts[iix.index()].is_term() == MachTerminator::Ret { 960 for inst in self.abi.gen_epilogue() { 961 do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state); 962 } 963 } else { 964 // Emit the instruction! 965 do_emit( 966 &self.insts[iix.index()], 967 allocs, 968 &mut disasm, 969 &mut buffer, 970 &mut state, 971 ); 972 } 973 } 974 975 InstOrEdit::Edit(Edit::Move { from, to }) => { 976 // Create a move/spill/reload instruction and 977 // immediately emit it. 978 match (from.as_reg(), to.as_reg()) { 979 (Some(from), Some(to)) => { 980 // Reg-to-reg move. 981 let from_rreg = Reg::from(from); 982 let to_rreg = Writable::from_reg(Reg::from(to)); 983 debug_assert_eq!(from.class(), to.class()); 984 let ty = I::canonical_type_for_rc(from.class()); 985 let mv = I::gen_move(to_rreg, from_rreg, ty); 986 do_emit(&mv, &[], &mut disasm, &mut buffer, &mut state); 987 } 988 (Some(from), None) => { 989 // Spill from register to spillslot. 990 let to = to.as_stack().unwrap(); 991 let from_rreg = RealReg::from(from); 992 let spill = self.abi.gen_spill(to, from_rreg); 993 do_emit(&spill, &[], &mut disasm, &mut buffer, &mut state); 994 } 995 (None, Some(to)) => { 996 // Load from spillslot to register. 997 let from = from.as_stack().unwrap(); 998 let to_rreg = Writable::from_reg(RealReg::from(to)); 999 let reload = self.abi.gen_reload(to_rreg, from); 1000 do_emit(&reload, &[], &mut disasm, &mut buffer, &mut state); 1001 } 1002 (None, None) => { 1003 panic!("regalloc2 should have eliminated stack-to-stack moves!"); 1004 } 1005 } 1006 } 1007 } 1008 } 1009 1010 if cur_srcloc.is_some() { 1011 buffer.end_srcloc(); 1012 cur_srcloc = None; 1013 } 1014 1015 // Do we need an island? Get the worst-case size of the next BB, add 1016 // it to the optional padding behind the block, and pass this to the 1017 // `MachBuffer` to determine if an island is necessary. 1018 let worst_case_next_bb = if block_order_idx < final_order.len() - 1 { 1019 let next_block = final_order[block_order_idx + 1]; 1020 let next_block_range = self.block_ranges[next_block.index()]; 1021 let next_block_size = 1022 (next_block_range.1.index() - next_block_range.0.index()) as u32; 1023 let next_block_ra_insertions = ra_edits_per_block[next_block.index()]; 1024 I::worst_case_size() * (next_block_size + next_block_ra_insertions) 1025 } else { 1026 0 1027 }; 1028 let padding = if bb_padding.is_empty() { 1029 0 1030 } else { 1031 bb_padding.len() as u32 + I::LabelUse::ALIGN - 1 1032 }; 1033 if buffer.island_needed(padding + worst_case_next_bb) { 1034 buffer.emit_island(padding + worst_case_next_bb, ctrl_plane); 1035 } 1036 1037 // Insert padding, if configured, to stress the `MachBuffer`'s 1038 // relocation and island calculations. 1039 // 1040 // Padding can get quite large during fuzzing though so place a 1041 // total cap on it where when a per-function threshold is exceeded 1042 // the padding is turned back down to zero. This avoids a small-ish 1043 // test case generating a GB+ memory footprint in Cranelift for 1044 // example. 1045 if !bb_padding.is_empty() { 1046 buffer.put_data(&bb_padding); 1047 buffer.align_to(I::LabelUse::ALIGN); 1048 total_bb_padding += bb_padding.len(); 1049 if total_bb_padding > (150 << 20) { 1050 bb_padding = Vec::new(); 1051 } 1052 } 1053 } 1054 1055 // Do any optimizations on branches at tail of buffer, as if we had 1056 // bound one last label. 1057 buffer.optimize_branches(ctrl_plane); 1058 1059 // emission state is not needed anymore, move control plane back out 1060 *ctrl_plane = state.take_ctrl_plane(); 1061 1062 let func_body_len = buffer.cur_offset(); 1063 1064 // Create `bb_edges` and final (filtered) `bb_starts`. 1065 let mut bb_edges = vec![]; 1066 let mut bb_offsets = vec![]; 1067 if flags.machine_code_cfg_info() { 1068 for block in 0..self.num_blocks() { 1069 if bb_starts[block].is_none() { 1070 // Block was deleted by MachBuffer; skip. 1071 continue; 1072 } 1073 let from = bb_starts[block].unwrap(); 1074 1075 bb_offsets.push(from); 1076 // Resolve each `succ` label and add edges. 1077 let succs = self.block_succs(BlockIndex::new(block)); 1078 for &succ in succs.iter() { 1079 let to = buffer.resolve_label_offset(MachLabel::from_block(succ)); 1080 bb_edges.push((from, to)); 1081 } 1082 } 1083 } 1084 1085 self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len); 1086 let value_labels_ranges = 1087 self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len); 1088 let frame_size = self.abi.frame_size(); 1089 1090 EmitResult { 1091 buffer: buffer.finish(&self.constants, ctrl_plane), 1092 bb_offsets, 1093 bb_edges, 1094 func_body_len, 1095 disasm: if want_disasm { Some(disasm) } else { None }, 1096 sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(), 1097 dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(), 1098 value_labels_ranges, 1099 frame_size, 1100 } 1101 } 1102 1103 fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) { 1104 if self.debug_value_labels.is_empty() { 1105 return; 1106 } 1107 1108 // During emission, branch removal can make offsets of instructions incorrect. 1109 // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj] 1110 // It will be recorded as (say): [30] [34] [38] [42] [<would be 46>] 1111 // When the jumps get removed we are left with (in "inst_offsets"): 1112 // [insi][jmp0][jmp1][jmp2][insj][...] 1113 // [30] [34] [38] [42] [34] 1114 // Which violates the monotonicity invariant. This method sets offsets of these 1115 // removed instructions such as to make them appear zero-sized: 1116 // [insi][jmp0][jmp1][jmp2][insj][...] 1117 // [30] [34] [34] [34] [34] 1118 // 1119 let mut next_offset = func_body_len; 1120 for inst_index in (0..(inst_offsets.len() - 1)).rev() { 1121 let inst_offset = inst_offsets[inst_index]; 1122 1123 // Not all instructions get their offsets recorded. 1124 if inst_offset == NO_INST_OFFSET { 1125 continue; 1126 } 1127 1128 if inst_offset > next_offset { 1129 trace!( 1130 "Fixing code offset of the removed Inst {}: {} -> {}", 1131 inst_index, 1132 inst_offset, 1133 next_offset 1134 ); 1135 inst_offsets[inst_index] = next_offset; 1136 continue; 1137 } 1138 1139 next_offset = inst_offset; 1140 } 1141 } 1142 1143 fn compute_value_labels_ranges( 1144 &self, 1145 regalloc: ®alloc2::Output, 1146 inst_offsets: &[CodeOffset], 1147 func_body_len: u32, 1148 ) -> ValueLabelsRanges { 1149 if self.debug_value_labels.is_empty() { 1150 return ValueLabelsRanges::default(); 1151 } 1152 1153 let mut value_labels_ranges: ValueLabelsRanges = HashMap::new(); 1154 for &(label, from, to, alloc) in ®alloc.debug_locations { 1155 let ranges = value_labels_ranges 1156 .entry(ValueLabel::from_u32(label)) 1157 .or_insert_with(|| vec![]); 1158 let from_offset = inst_offsets[from.inst().index()]; 1159 let to_offset = if to.inst().index() == inst_offsets.len() { 1160 func_body_len 1161 } else { 1162 inst_offsets[to.inst().index()] 1163 }; 1164 1165 // Empty ranges or unavailable offsets can happen 1166 // due to cold blocks and branch removal (see above). 1167 if from_offset == NO_INST_OFFSET 1168 || to_offset == NO_INST_OFFSET 1169 || from_offset == to_offset 1170 { 1171 continue; 1172 } 1173 1174 let loc = if let Some(preg) = alloc.as_reg() { 1175 LabelValueLoc::Reg(Reg::from(preg)) 1176 } else { 1177 let slot = alloc.as_stack().unwrap(); 1178 let sp_offset = self.abi.get_spillslot_offset(slot); 1179 let sp_to_caller_sp_offset = self.abi.nominal_sp_to_caller_sp_offset(); 1180 let caller_sp_to_cfa_offset = 1181 crate::isa::unwind::systemv::caller_sp_to_cfa_offset(); 1182 let cfa_to_sp_offset = -((sp_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64); 1183 LabelValueLoc::CFAOffset(cfa_to_sp_offset + sp_offset) 1184 }; 1185 1186 // ValueLocRanges are recorded by *instruction-end 1187 // offset*. `from_offset` is the *start* of the 1188 // instruction; that is the same as the end of another 1189 // instruction, so we only want to begin coverage once 1190 // we are past the previous instruction's end. 1191 let start = from_offset + 1; 1192 1193 // Likewise, `end` is exclusive, but we want to 1194 // *include* the end of the last 1195 // instruction. `to_offset` is the start of the 1196 // `to`-instruction, which is the exclusive end, i.e., 1197 // the first instruction not covered. That 1198 // instruction's start is the same as the end of the 1199 // last instruction that is included, so we go one 1200 // byte further to be sure to include it. 1201 let end = to_offset + 1; 1202 1203 // Coalesce adjacent ranges that for the same location 1204 // to minimize output size here and for the consumers. 1205 if let Some(last_loc_range) = ranges.last_mut() { 1206 if last_loc_range.loc == loc && last_loc_range.end == start { 1207 trace!( 1208 "Extending debug range for VL{} in {:?} to {}", 1209 label, 1210 loc, 1211 end 1212 ); 1213 last_loc_range.end = end; 1214 continue; 1215 } 1216 } 1217 1218 trace!( 1219 "Recording debug range for VL{} in {:?}: [Inst {}..Inst {}) [{}..{})", 1220 label, 1221 loc, 1222 from.inst().index(), 1223 to.inst().index(), 1224 start, 1225 end 1226 ); 1227 1228 ranges.push(ValueLocRange { loc, start, end }); 1229 } 1230 1231 value_labels_ranges 1232 } 1233 1234 /// Get the IR block for a BlockIndex, if one exists. 1235 pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> { 1236 self.block_order.lowered_order()[block.index()].orig_block() 1237 } 1238 1239 pub fn resolve_vreg_alias(&self, from: regalloc2::VReg) -> regalloc2::VReg { 1240 Self::resolve_vreg_alias_impl(&self.vreg_aliases, from) 1241 } 1242 1243 /// Implementation of alias resolution. Separate helper that does 1244 /// not borrow `self` in order to allow working around borrowing 1245 /// restrictions. 1246 fn resolve_vreg_alias_impl( 1247 aliases: &FxHashMap<regalloc2::VReg, regalloc2::VReg>, 1248 from: regalloc2::VReg, 1249 ) -> regalloc2::VReg { 1250 // We prevent cycles from existing by resolving targets of 1251 // aliases eagerly before setting them. If the target resolves 1252 // to the origin of the alias, then a cycle would be created 1253 // and the alias is disallowed. Because of the structure of 1254 // SSA code (one instruction can refer to another's defs but 1255 // not vice-versa, except indirectly through 1256 // phis/blockparams), cycles should not occur as we use 1257 // aliases to redirect vregs to the temps that actually define 1258 // them. 1259 1260 let mut vreg = from; 1261 while let Some(to) = aliases.get(&vreg) { 1262 vreg = *to; 1263 } 1264 vreg 1265 } 1266 1267 #[inline] 1268 fn assert_no_vreg_aliases<'a>(&self, list: &'a [VReg]) -> &'a [VReg] { 1269 for vreg in list { 1270 self.assert_not_vreg_alias(*vreg); 1271 } 1272 list 1273 } 1274 1275 #[inline] 1276 fn assert_not_vreg_alias(&self, vreg: VReg) -> VReg { 1277 debug_assert!(self.resolve_vreg_alias(vreg) == vreg); 1278 vreg 1279 } 1280 1281 #[inline] 1282 fn assert_operand_not_vreg_alias(&self, op: Operand) -> Operand { 1283 // It should be true by construction that `Operand`s do not contain any 1284 // aliased vregs since they're all collected and mapped when the VCode 1285 // is itself constructed. 1286 self.assert_not_vreg_alias(op.vreg()); 1287 op 1288 } 1289 1290 /// Get the fact, if any, for a given VReg. 1291 pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> { 1292 let vreg = self.resolve_vreg_alias(vreg); 1293 self.facts[vreg.vreg()].as_ref() 1294 } 1295 1296 /// Does a given instruction define any facts? 1297 pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool { 1298 self.inst_operands(inst) 1299 .iter() 1300 .filter(|o| o.kind() == OperandKind::Def) 1301 .map(|o| o.vreg()) 1302 .any(|vreg| self.vreg_fact(vreg).is_some()) 1303 } 1304 } 1305 1306 impl<I: VCodeInst> std::ops::Index<InsnIndex> for VCode<I> { 1307 type Output = I; 1308 fn index(&self, idx: InsnIndex) -> &Self::Output { 1309 &self.insts[idx.index()] 1310 } 1311 } 1312 1313 impl<I: VCodeInst> RegallocFunction for VCode<I> { 1314 fn num_insts(&self) -> usize { 1315 self.insts.len() 1316 } 1317 1318 fn num_blocks(&self) -> usize { 1319 self.block_ranges.len() 1320 } 1321 1322 fn entry_block(&self) -> BlockIndex { 1323 self.entry 1324 } 1325 1326 fn block_insns(&self, block: BlockIndex) -> InstRange { 1327 let (start, end) = self.block_ranges[block.index()]; 1328 InstRange::forward(start, end) 1329 } 1330 1331 fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] { 1332 let (start, end) = self.block_succ_range[block.index()]; 1333 &self.block_succs_preds[start as usize..end as usize] 1334 } 1335 1336 fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] { 1337 let (start, end) = self.block_pred_range[block.index()]; 1338 &self.block_succs_preds[start as usize..end as usize] 1339 } 1340 1341 fn block_params(&self, block: BlockIndex) -> &[VReg] { 1342 // As a special case we don't return block params for the entry block, as all the arguments 1343 // will be defined by the `Inst::Args` instruction. 1344 if block == self.entry { 1345 return &[]; 1346 } 1347 1348 let (start, end) = self.block_params_range[block.index()]; 1349 let ret = &self.block_params[start as usize..end as usize]; 1350 // Currently block params are never aliased to another vreg, but 1351 // double-check just to be sure. 1352 self.assert_no_vreg_aliases(ret) 1353 } 1354 1355 fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] { 1356 let (succ_range_start, succ_range_end) = self.branch_block_arg_succ_range[block.index()]; 1357 let succ_ranges = 1358 &self.branch_block_arg_range[succ_range_start as usize..succ_range_end as usize]; 1359 let (branch_block_args_start, branch_block_args_end) = succ_ranges[succ_idx]; 1360 let ret = &self.branch_block_args 1361 [branch_block_args_start as usize..branch_block_args_end as usize]; 1362 self.assert_no_vreg_aliases(ret) 1363 } 1364 1365 fn is_ret(&self, insn: InsnIndex) -> bool { 1366 match self.insts[insn.index()].is_term() { 1367 // We treat blocks terminated by an unconditional trap like a return for regalloc. 1368 MachTerminator::None => self.insts[insn.index()].is_trap(), 1369 MachTerminator::Ret | MachTerminator::RetCall => true, 1370 MachTerminator::Uncond | MachTerminator::Cond | MachTerminator::Indirect => false, 1371 } 1372 } 1373 1374 fn is_branch(&self, insn: InsnIndex) -> bool { 1375 match self.insts[insn.index()].is_term() { 1376 MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true, 1377 _ => false, 1378 } 1379 } 1380 1381 fn requires_refs_on_stack(&self, insn: InsnIndex) -> bool { 1382 self.insts[insn.index()].is_safepoint() 1383 } 1384 1385 fn inst_operands(&self, insn: InsnIndex) -> &[Operand] { 1386 let (start, end) = self.operand_ranges[insn.index()]; 1387 let ret = &self.operands[start as usize..end as usize]; 1388 for op in ret { 1389 self.assert_operand_not_vreg_alias(*op); 1390 } 1391 ret 1392 } 1393 1394 fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet { 1395 self.clobbers.get(&insn).cloned().unwrap_or_default() 1396 } 1397 1398 fn num_vregs(&self) -> usize { 1399 std::cmp::max(self.vreg_types.len(), first_user_vreg_index()) 1400 } 1401 1402 fn reftype_vregs(&self) -> &[VReg] { 1403 self.assert_no_vreg_aliases(&self.reftyped_vregs[..]) 1404 } 1405 1406 fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] { 1407 // VRegs here are inserted into `debug_value_labels` after code is 1408 // generated and aliases are fully defined, so no double-check that 1409 // aliases are not lingering. 1410 for (vreg, ..) in self.debug_value_labels.iter() { 1411 self.assert_not_vreg_alias(*vreg); 1412 } 1413 &self.debug_value_labels[..] 1414 } 1415 1416 fn spillslot_size(&self, regclass: RegClass) -> usize { 1417 self.abi.get_spillslot_size(regclass) as usize 1418 } 1419 1420 fn allow_multiple_vreg_defs(&self) -> bool { 1421 // At least the s390x backend requires this, because the 1422 // `Loop` pseudo-instruction aggregates all Operands so pinned 1423 // vregs (RealRegs) may occur more than once. 1424 true 1425 } 1426 } 1427 1428 impl<I: VCodeInst> fmt::Debug for VCode<I> { 1429 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 1430 writeln!(f, "VCode {{")?; 1431 writeln!(f, " Entry block: {}", self.entry.index())?; 1432 1433 let mut state = Default::default(); 1434 1435 let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>(); 1436 alias_keys.sort_unstable(); 1437 for key in alias_keys { 1438 let dest = self.vreg_aliases.get(&key).unwrap(); 1439 writeln!(f, " {:?} := {:?}", Reg::from(key), Reg::from(*dest))?; 1440 } 1441 1442 for block in 0..self.num_blocks() { 1443 let block = BlockIndex::new(block); 1444 writeln!(f, "Block {}:", block.index())?; 1445 if let Some(bb) = self.bindex_to_bb(block) { 1446 writeln!(f, " (original IR block: {})", bb)?; 1447 } 1448 for succ in self.succs(block) { 1449 writeln!(f, " (successor: Block {})", succ.index())?; 1450 } 1451 let (start, end) = self.block_ranges[block.index()]; 1452 writeln!( 1453 f, 1454 " (instruction range: {} .. {})", 1455 start.index(), 1456 end.index() 1457 )?; 1458 for inst in start.index()..end.index() { 1459 writeln!( 1460 f, 1461 " Inst {}: {}", 1462 inst, 1463 self.insts[inst].pretty_print_inst(&[], &mut state) 1464 )?; 1465 for operand in self.inst_operands(InsnIndex::new(inst)) { 1466 if operand.kind() == OperandKind::Def { 1467 if let Some(fact) = &self.facts[operand.vreg().vreg()] { 1468 writeln!(f, " v{} ! {}", operand.vreg().vreg(), fact)?; 1469 } 1470 } 1471 } 1472 } 1473 } 1474 1475 writeln!(f, "}}")?; 1476 Ok(()) 1477 } 1478 } 1479 1480 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder. 1481 pub struct VRegAllocator<I> { 1482 /// Next virtual register number to allocate. 1483 next_vreg: usize, 1484 1485 /// VReg IR-level types. 1486 vreg_types: Vec<Type>, 1487 1488 /// A set with the same contents as `reftyped_vregs`, in order to 1489 /// avoid inserting more than once. 1490 reftyped_vregs_set: FxHashSet<VReg>, 1491 1492 /// Reference-typed `regalloc2::VReg`s. The regalloc requires 1493 /// these in a dense slice (as opposed to querying the 1494 /// reftype-status of each vreg) for efficient iteration. 1495 reftyped_vregs: Vec<VReg>, 1496 1497 /// A deferred error, to be bubbled up to the top level of the 1498 /// lowering algorithm. We take this approach because we cannot 1499 /// currently propagate a `Result` upward through ISLE code (the 1500 /// lowering rules) or some ABI code. 1501 deferred_error: Option<CodegenError>, 1502 1503 /// Facts on VRegs, for proof-carrying code. 1504 facts: Vec<Option<Fact>>, 1505 1506 /// The type of instruction that this allocator makes registers for. 1507 _inst: core::marker::PhantomData<I>, 1508 } 1509 1510 impl<I: VCodeInst> VRegAllocator<I> { 1511 /// Make a new VRegAllocator. 1512 pub fn new() -> Self { 1513 Self { 1514 next_vreg: first_user_vreg_index(), 1515 vreg_types: vec![], 1516 facts: vec![], 1517 reftyped_vregs_set: FxHashSet::default(), 1518 reftyped_vregs: vec![], 1519 deferred_error: None, 1520 _inst: core::marker::PhantomData::default(), 1521 } 1522 } 1523 1524 /// Allocate a fresh ValueRegs. 1525 pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> { 1526 if self.deferred_error.is_some() { 1527 return Err(CodegenError::CodeTooLarge); 1528 } 1529 let v = self.next_vreg; 1530 let (regclasses, tys) = I::rc_for_type(ty)?; 1531 self.next_vreg += regclasses.len(); 1532 if self.next_vreg >= VReg::MAX { 1533 return Err(CodegenError::CodeTooLarge); 1534 } 1535 1536 let regs: ValueRegs<Reg> = match regclasses { 1537 &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()), 1538 &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()), 1539 // We can extend this if/when we support 32-bit targets; e.g., 1540 // an i128 on a 32-bit machine will need up to four machine regs 1541 // for a `Value`. 1542 _ => panic!("Value must reside in 1 or 2 registers"), 1543 }; 1544 for (®_ty, ®) in tys.iter().zip(regs.regs().iter()) { 1545 self.set_vreg_type(reg.to_virtual_reg().unwrap(), reg_ty); 1546 } 1547 1548 // Create empty facts for each allocated vreg. 1549 self.facts 1550 .resize(usize::try_from(self.next_vreg).unwrap(), None); 1551 1552 Ok(regs) 1553 } 1554 1555 /// Allocate a fresh ValueRegs, deferring any out-of-vregs 1556 /// errors. This is useful in places where we cannot bubble a 1557 /// `CodegenResult` upward easily, and which are known to be 1558 /// invoked from within the lowering loop that checks the deferred 1559 /// error status below. 1560 pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> { 1561 match self.alloc(ty) { 1562 Ok(x) => x, 1563 Err(e) => { 1564 self.deferred_error = Some(e); 1565 self.bogus_for_deferred_error(ty) 1566 } 1567 } 1568 } 1569 1570 /// Take any deferred error that was accumulated by `alloc_with_deferred_error`. 1571 pub fn take_deferred_error(&mut self) -> Option<CodegenError> { 1572 self.deferred_error.take() 1573 } 1574 1575 /// Produce an bogus VReg placeholder with the proper number of 1576 /// registers for the given type. This is meant to be used with 1577 /// deferred allocation errors (see `Lower::alloc_tmp()`). 1578 fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> { 1579 let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type"); 1580 match regclasses { 1581 &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()), 1582 &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()), 1583 _ => panic!("Value must reside in 1 or 2 registers"), 1584 } 1585 } 1586 1587 /// Set the type of this virtual register. 1588 pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) { 1589 if self.vreg_types.len() <= vreg.index() { 1590 self.vreg_types.resize(vreg.index() + 1, ir::types::INVALID); 1591 } 1592 self.vreg_types[vreg.index()] = ty; 1593 if is_reftype(ty) { 1594 let vreg: VReg = vreg.into(); 1595 if self.reftyped_vregs_set.insert(vreg) { 1596 self.reftyped_vregs.push(vreg); 1597 } 1598 } 1599 } 1600 1601 /// Set the proof-carrying code fact on a given virtual register. 1602 /// 1603 /// Returns the old fact, if any (only one fact can be stored). 1604 pub fn set_fact(&mut self, vreg: VirtualReg, fact: Fact) -> Option<Fact> { 1605 trace!("vreg {:?} has fact: {:?}", vreg, fact); 1606 self.facts[vreg.index()].replace(fact) 1607 } 1608 1609 /// Take (and remove) a fact about a VReg. Used when setting up 1610 /// aliases: we want to move a fact from the alias vreg to the 1611 /// aliased vreg, to preserve facts about a value that were stated 1612 /// before we lowered its producer. 1613 pub fn take_fact(&mut self, vreg: VirtualReg) -> Option<Fact> { 1614 self.facts[vreg.index()].take() 1615 } 1616 1617 /// Allocate a fresh ValueRegs, with a given fact to apply if 1618 /// the value fits in one VReg. 1619 pub fn alloc_with_maybe_fact( 1620 &mut self, 1621 ty: Type, 1622 fact: Option<Fact>, 1623 ) -> CodegenResult<ValueRegs<Reg>> { 1624 let result = self.alloc(ty)?; 1625 1626 // Ensure that we don't lose a fact on a value that splits 1627 // into multiple VRegs. 1628 assert!(result.len() == 1 || fact.is_none()); 1629 if let Some(fact) = fact { 1630 self.set_fact(result.regs()[0].to_virtual_reg().unwrap(), fact); 1631 } 1632 1633 Ok(result) 1634 } 1635 } 1636 1637 /// This structure tracks the large constants used in VCode that will be emitted separately by the 1638 /// [MachBuffer]. 1639 /// 1640 /// First, during the lowering phase, constants are inserted using 1641 /// [VCodeConstants.insert]; an intermediate handle, [VCodeConstant], tracks what constants are 1642 /// used in this phase. Some deduplication is performed, when possible, as constant 1643 /// values are inserted. 1644 /// 1645 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the 1646 /// constants so that instructions can refer to the value's memory location. The [MachBuffer] 1647 /// then writes the constant values to the buffer. 1648 #[derive(Default)] 1649 pub struct VCodeConstants { 1650 constants: PrimaryMap<VCodeConstant, VCodeConstantData>, 1651 pool_uses: HashMap<Constant, VCodeConstant>, 1652 well_known_uses: HashMap<*const [u8], VCodeConstant>, 1653 u64s: HashMap<[u8; 8], VCodeConstant>, 1654 } 1655 impl VCodeConstants { 1656 /// Initialize the structure with the expected number of constants. 1657 pub fn with_capacity(expected_num_constants: usize) -> Self { 1658 Self { 1659 constants: PrimaryMap::with_capacity(expected_num_constants), 1660 pool_uses: HashMap::with_capacity(expected_num_constants), 1661 well_known_uses: HashMap::new(), 1662 u64s: HashMap::new(), 1663 } 1664 } 1665 1666 /// Insert a constant; using this method indicates that a constant value will be used and thus 1667 /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants 1668 /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not 1669 /// [VCodeConstantData::Generated]. 1670 pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant { 1671 match data { 1672 VCodeConstantData::Generated(_) => self.constants.push(data), 1673 VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) { 1674 None => { 1675 let vcode_constant = self.constants.push(data); 1676 self.pool_uses.insert(constant, vcode_constant); 1677 vcode_constant 1678 } 1679 Some(&vcode_constant) => vcode_constant, 1680 }, 1681 VCodeConstantData::WellKnown(data_ref) => { 1682 match self.well_known_uses.entry(data_ref as *const [u8]) { 1683 Entry::Vacant(v) => { 1684 let vcode_constant = self.constants.push(data); 1685 v.insert(vcode_constant); 1686 vcode_constant 1687 } 1688 Entry::Occupied(o) => *o.get(), 1689 } 1690 } 1691 VCodeConstantData::U64(value) => match self.u64s.entry(value) { 1692 Entry::Vacant(v) => { 1693 let vcode_constant = self.constants.push(data); 1694 v.insert(vcode_constant); 1695 vcode_constant 1696 } 1697 Entry::Occupied(o) => *o.get(), 1698 }, 1699 } 1700 } 1701 1702 /// Return the number of constants inserted. 1703 pub fn len(&self) -> usize { 1704 self.constants.len() 1705 } 1706 1707 /// Iterate over the [VCodeConstant] keys inserted in this structure. 1708 pub fn keys(&self) -> Keys<VCodeConstant> { 1709 self.constants.keys() 1710 } 1711 1712 /// Iterate over the [VCodeConstant] keys and the data (as a byte slice) inserted in this 1713 /// structure. 1714 pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> { 1715 self.constants.iter() 1716 } 1717 1718 /// Returns the data associated with the specified constant. 1719 pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData { 1720 &self.constants[c] 1721 } 1722 } 1723 1724 /// A use of a constant by one or more VCode instructions; see [VCodeConstants]. 1725 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 1726 pub struct VCodeConstant(u32); 1727 entity_impl!(VCodeConstant); 1728 1729 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking 1730 /// these separately instead of as raw byte buffers allows us to avoid some duplication. 1731 pub enum VCodeConstantData { 1732 /// A constant already present in the Cranelift IR 1733 /// [ConstantPool](crate::ir::constant::ConstantPool). 1734 Pool(Constant, ConstantData), 1735 /// A reference to a well-known constant value that is statically encoded within the compiler. 1736 WellKnown(&'static [u8]), 1737 /// A constant value generated during lowering; the value may depend on the instruction context 1738 /// which makes it difficult to de-duplicate--if possible, use other variants. 1739 Generated(ConstantData), 1740 /// A constant of at most 64 bits. These are deduplicated as 1741 /// well. Stored as a fixed-size array of `u8` so that we do not 1742 /// encounter endianness problems when cross-compiling. 1743 U64([u8; 8]), 1744 } 1745 impl VCodeConstantData { 1746 /// Retrieve the constant data as a byte slice. 1747 pub fn as_slice(&self) -> &[u8] { 1748 match self { 1749 VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(), 1750 VCodeConstantData::WellKnown(d) => d, 1751 VCodeConstantData::U64(value) => &value[..], 1752 } 1753 } 1754 1755 /// Calculate the alignment of the constant data. 1756 pub fn alignment(&self) -> u32 { 1757 if self.as_slice().len() <= 8 { 1758 8 1759 } else { 1760 16 1761 } 1762 } 1763 } 1764 1765 #[cfg(test)] 1766 mod test { 1767 use super::*; 1768 use std::mem::size_of; 1769 1770 #[test] 1771 fn size_of_constant_structs() { 1772 assert_eq!(size_of::<Constant>(), 4); 1773 assert_eq!(size_of::<VCodeConstant>(), 4); 1774 assert_eq!(size_of::<ConstantData>(), 24); 1775 assert_eq!(size_of::<VCodeConstantData>(), 32); 1776 assert_eq!( 1777 size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(), 1778 24 1779 ); 1780 // TODO The VCodeConstants structure's memory size could be further optimized. 1781 // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at 1782 // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes. 1783 } 1784 } 1785