1 //! This implements the VCode container: a CFG of Insts that have been lowered. 2 //! 3 //! VCode is virtual-register code. An instruction in VCode is almost a machine 4 //! instruction; however, its register slots can refer to virtual registers in 5 //! addition to real machine registers. 6 //! 7 //! VCode is structured with traditional basic blocks, and 8 //! each block must be terminated by an unconditional branch (one target), a 9 //! conditional branch (two targets), or a return (no targets). Note that this 10 //! slightly differs from the machine code of most ISAs: in most ISAs, a 11 //! conditional branch has one target (and the not-taken case falls through). 12 //! However, we expect that machine backends will elide branches to the following 13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond / 14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to 15 //! play with layout prior to final binary emission, as well, if we want. 16 //! 17 //! See the main module comment in `mod.rs` for more details on the VCode-based 18 //! backend pipeline. 19 20 use crate::fx::FxHashMap; 21 use crate::fx::FxHashSet; 22 use crate::ir::pcc::*; 23 use crate::ir::{self, types, Constant, ConstantData, ValueLabel}; 24 use crate::machinst::*; 25 use crate::timing; 26 use crate::trace; 27 use crate::CodegenError; 28 use crate::{LabelValueLoc, ValueLocRange}; 29 use regalloc2::{ 30 Edit, Function as RegallocFunction, InstOrEdit, InstRange, MachineEnv, Operand, OperandKind, 31 PRegSet, RegClass, 32 }; 33 34 use cranelift_entity::{entity_impl, Keys}; 35 use std::collections::hash_map::Entry; 36 use std::collections::HashMap; 37 use std::fmt; 38 39 /// Index referring to an instruction in VCode. 40 pub type InsnIndex = regalloc2::Inst; 41 42 /// Index referring to a basic block in VCode. 43 pub type BlockIndex = regalloc2::Block; 44 45 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be 46 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`. 47 pub trait VCodeInst: MachInst + MachInstEmit {} 48 impl<I: MachInst + MachInstEmit> VCodeInst for I {} 49 50 /// A function in "VCode" (virtualized-register code) form, after 51 /// lowering. This is essentially a standard CFG of basic blocks, 52 /// where each basic block consists of lowered instructions produced 53 /// by the machine-specific backend. 54 /// 55 /// Note that the VCode is immutable once produced, and is not 56 /// modified by register allocation in particular. Rather, register 57 /// allocation on the `VCode` produces a separate `regalloc2::Output` 58 /// struct, and this can be passed to `emit`. `emit` in turn does not 59 /// modify the vcode, but produces an `EmitResult`, which contains the 60 /// machine code itself, and the associated disassembly and/or 61 /// metadata as requested. 62 pub struct VCode<I: VCodeInst> { 63 /// VReg IR-level types. 64 vreg_types: Vec<Type>, 65 66 /// Lowered machine instructions in order corresponding to the original IR. 67 insts: Vec<I>, 68 69 /// Operands: pre-regalloc references to virtual registers with 70 /// constraints, in one flattened array. This allows the regalloc 71 /// to efficiently access all operands without requiring expensive 72 /// matches or method invocations on insts. 73 operands: Vec<Operand>, 74 75 /// Operand index ranges: for each instruction in `insts`, there 76 /// is a tuple here providing the range in `operands` for that 77 /// instruction's operands. 78 operand_ranges: Vec<(u32, u32)>, 79 80 /// Clobbers: a sparse map from instruction indices to clobber masks. 81 clobbers: FxHashMap<InsnIndex, PRegSet>, 82 83 /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is 84 /// reasonable to keep one of these per instruction.) 85 srclocs: Vec<RelSourceLoc>, 86 87 /// Entry block. 88 entry: BlockIndex, 89 90 /// Block instruction indices. 91 block_ranges: Vec<(InsnIndex, InsnIndex)>, 92 93 /// Block successors: index range in the `block_succs_preds` list. 94 block_succ_range: Vec<(u32, u32)>, 95 96 /// Block predecessors: index range in the `block_succs_preds` list. 97 block_pred_range: Vec<(u32, u32)>, 98 99 /// Block successor and predecessor lists, concatenated into one 100 /// Vec. The `block_succ_range` and `block_pred_range` lists of 101 /// tuples above give (start, end) ranges within this list that 102 /// correspond to each basic block's successors or predecessors, 103 /// respectively. 104 block_succs_preds: Vec<regalloc2::Block>, 105 106 /// Block parameters: index range in `block_params` below. 107 block_params_range: Vec<(u32, u32)>, 108 109 /// Block parameter lists, concatenated into one vec. The 110 /// `block_params_range` list of tuples above gives (start, end) 111 /// ranges within this list that correspond to each basic block's 112 /// blockparam vregs. 113 block_params: Vec<regalloc2::VReg>, 114 115 /// Outgoing block arguments on branch instructions, concatenated 116 /// into one list. 117 /// 118 /// Note that this is conceptually a 3D array: we have a VReg list 119 /// per block, per successor. We flatten those three dimensions 120 /// into this 1D vec, then store index ranges in two levels of 121 /// indirection. 122 /// 123 /// Indexed by the indices in `branch_block_arg_succ_range`. 124 branch_block_args: Vec<regalloc2::VReg>, 125 126 /// Array of sequences of (start, end) tuples in 127 /// `branch_block_args`, one for each successor; these sequences 128 /// for each block are concatenated. 129 /// 130 /// Indexed by the indices in `branch_block_arg_succ_range`. 131 branch_block_arg_range: Vec<(u32, u32)>, 132 133 /// For a given block, indices in `branch_block_arg_range` 134 /// corresponding to all of its successors. 135 branch_block_arg_succ_range: Vec<(u32, u32)>, 136 137 /// VReg aliases. Each key in this table is translated to its 138 /// value when gathering Operands from instructions. Aliases are 139 /// not chased transitively (we do not further look up the 140 /// translated reg to see if it is another alias). 141 /// 142 /// We use these aliases to rename an instruction's expected 143 /// result vregs to the returned vregs from lowering, which are 144 /// usually freshly-allocated temps. 145 /// 146 /// Operands and branch arguments will already have been 147 /// translated through this alias table; but it helps to make 148 /// sense of instructions when pretty-printed, for example. 149 vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>, 150 151 /// Block-order information. 152 block_order: BlockLoweringOrder, 153 154 /// ABI object. 155 pub(crate) abi: Callee<I::ABIMachineSpec>, 156 157 /// Constant information used during code emission. This should be 158 /// immutable across function compilations within the same module. 159 emit_info: I::Info, 160 161 /// Reference-typed `regalloc2::VReg`s. The regalloc requires 162 /// these in a dense slice (as opposed to querying the 163 /// reftype-status of each vreg) for efficient iteration. 164 reftyped_vregs: Vec<VReg>, 165 166 /// Constants. 167 pub(crate) constants: VCodeConstants, 168 169 /// Value labels for debuginfo attached to vregs. 170 debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>, 171 172 pub(crate) sigs: SigSet, 173 174 /// Facts on VRegs, for proof-carrying code verification. 175 facts: Vec<Option<Fact>>, 176 } 177 178 /// The result of `VCode::emit`. Contains all information computed 179 /// during emission: actual machine code, optionally a disassembly, 180 /// and optionally metadata about the code layout. 181 pub struct EmitResult { 182 /// The MachBuffer containing the machine code. 183 pub buffer: MachBufferFinalized<Stencil>, 184 185 /// Offset of each basic block, recorded during emission. Computed 186 /// only if `debug_value_labels` is non-empty. 187 pub bb_offsets: Vec<CodeOffset>, 188 189 /// Final basic-block edges, in terms of code offsets of 190 /// bb-starts. Computed only if `debug_value_labels` is non-empty. 191 pub bb_edges: Vec<(CodeOffset, CodeOffset)>, 192 193 /// Final length of function body. 194 pub func_body_len: CodeOffset, 195 196 /// The pretty-printed disassembly, if any. This uses the same 197 /// pretty-printing for MachInsts as the pre-regalloc VCode Debug 198 /// implementation, but additionally includes the prologue and 199 /// epilogue(s), and makes use of the regalloc results. 200 pub disasm: Option<String>, 201 202 /// Offsets of sized stackslots. 203 pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>, 204 205 /// Offsets of dynamic stackslots. 206 pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>, 207 208 /// Value-labels information (debug metadata). 209 pub value_labels_ranges: ValueLabelsRanges, 210 211 /// Stack frame size. 212 pub frame_size: u32, 213 } 214 215 /// A builder for a VCode function body. 216 /// 217 /// This builder has the ability to accept instructions in either 218 /// forward or reverse order, depending on the pass direction that 219 /// produces the VCode. The lowering from CLIF to VCode<MachInst> 220 /// ordinarily occurs in reverse order (in order to allow instructions 221 /// to be lowered only if used, and not merged) so a reversal will 222 /// occur at the end of lowering to ensure the VCode is in machine 223 /// order. 224 /// 225 /// If built in reverse, block and instruction indices used once the 226 /// VCode is built are relative to the final (reversed) order, not the 227 /// order of construction. Note that this means we do not know the 228 /// final block or instruction indices when building, so we do not 229 /// hand them out. (The user is assumed to know them when appending 230 /// terminator instructions with successor blocks.) 231 pub struct VCodeBuilder<I: VCodeInst> { 232 /// In-progress VCode. 233 pub(crate) vcode: VCode<I>, 234 235 /// In what direction is the build occuring? 236 direction: VCodeBuildDirection, 237 238 /// Index of the last block-start in the vcode. 239 block_start: usize, 240 241 /// Start of succs for the current block in the concatenated succs list. 242 succ_start: usize, 243 244 /// Start of blockparams for the current block in the concatenated 245 /// blockparams list. 246 block_params_start: usize, 247 248 /// Start of successor blockparam arg list entries in 249 /// the concatenated branch_block_arg_range list. 250 branch_block_arg_succ_start: usize, 251 252 /// Current source location. 253 cur_srcloc: RelSourceLoc, 254 255 /// Debug-value label in-progress map, keyed by label. For each 256 /// label, we keep disjoint ranges mapping to vregs. We'll flatten 257 /// this into (vreg, range, label) tuples when done. 258 debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>, 259 } 260 261 /// Direction in which a VCodeBuilder builds VCode. 262 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 263 pub enum VCodeBuildDirection { 264 // TODO: add `Forward` once we need it and can test it adequately. 265 /// Backward-build pass: we expect the producer to call `emit()` 266 /// with instructions in reverse program order within each block. 267 Backward, 268 } 269 270 impl<I: VCodeInst> VCodeBuilder<I> { 271 /// Create a new VCodeBuilder. 272 pub fn new( 273 sigs: SigSet, 274 abi: Callee<I::ABIMachineSpec>, 275 emit_info: I::Info, 276 block_order: BlockLoweringOrder, 277 constants: VCodeConstants, 278 direction: VCodeBuildDirection, 279 ) -> VCodeBuilder<I> { 280 let vcode = VCode::new(sigs, abi, emit_info, block_order, constants); 281 282 VCodeBuilder { 283 vcode, 284 direction, 285 block_start: 0, 286 succ_start: 0, 287 block_params_start: 0, 288 branch_block_arg_succ_start: 0, 289 cur_srcloc: Default::default(), 290 debug_info: FxHashMap::default(), 291 } 292 } 293 294 pub fn init_abi(&mut self, temps: Vec<Writable<Reg>>) { 295 self.vcode.abi.init(&self.vcode.sigs, temps); 296 } 297 298 /// Access the ABI object. 299 pub fn abi(&self) -> &Callee<I::ABIMachineSpec> { 300 &self.vcode.abi 301 } 302 303 /// Access the ABI object. 304 pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> { 305 &mut self.vcode.abi 306 } 307 308 pub fn sigs(&self) -> &SigSet { 309 &self.vcode.sigs 310 } 311 312 pub fn sigs_mut(&mut self) -> &mut SigSet { 313 &mut self.vcode.sigs 314 } 315 316 /// Access to the BlockLoweringOrder object. 317 pub fn block_order(&self) -> &BlockLoweringOrder { 318 &self.vcode.block_order 319 } 320 321 /// Set the current block as the entry block. 322 pub fn set_entry(&mut self, block: BlockIndex) { 323 self.vcode.entry = block; 324 } 325 326 /// End the current basic block. Must be called after emitting vcode insts 327 /// for IR insts and prior to ending the function (building the VCode). 328 pub fn end_bb(&mut self) { 329 let start_idx = self.block_start; 330 let end_idx = self.vcode.insts.len(); 331 self.block_start = end_idx; 332 // Add the instruction index range to the list of blocks. 333 self.vcode 334 .block_ranges 335 .push((InsnIndex::new(start_idx), InsnIndex::new(end_idx))); 336 // End the successors list. 337 let succ_end = self.vcode.block_succs_preds.len(); 338 self.vcode 339 .block_succ_range 340 .push((self.succ_start as u32, succ_end as u32)); 341 self.succ_start = succ_end; 342 // End the blockparams list. 343 let block_params_end = self.vcode.block_params.len(); 344 self.vcode 345 .block_params_range 346 .push((self.block_params_start as u32, block_params_end as u32)); 347 self.block_params_start = block_params_end; 348 // End the branch blockparam args list. 349 let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len(); 350 self.vcode.branch_block_arg_succ_range.push(( 351 self.branch_block_arg_succ_start as u32, 352 branch_block_arg_succ_end as u32, 353 )); 354 self.branch_block_arg_succ_start = branch_block_arg_succ_end; 355 } 356 357 pub fn add_block_param(&mut self, param: VirtualReg) { 358 self.vcode.block_params.push(param.into()); 359 } 360 361 fn add_branch_args_for_succ(&mut self, args: &[Reg]) { 362 let start = self.vcode.branch_block_args.len(); 363 self.vcode 364 .branch_block_args 365 .extend(args.iter().map(|&arg| VReg::from(arg))); 366 let end = self.vcode.branch_block_args.len(); 367 self.vcode 368 .branch_block_arg_range 369 .push((start as u32, end as u32)); 370 } 371 372 /// Push an instruction for the current BB and current IR inst 373 /// within the BB. 374 pub fn push(&mut self, insn: I) { 375 self.vcode.insts.push(insn); 376 self.vcode.srclocs.push(self.cur_srcloc); 377 } 378 379 /// Add a successor block with branch args. 380 pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) { 381 self.vcode.block_succs_preds.push(block); 382 self.add_branch_args_for_succ(args); 383 } 384 385 /// Set the current source location. 386 pub fn set_srcloc(&mut self, srcloc: RelSourceLoc) { 387 self.cur_srcloc = srcloc; 388 } 389 390 /// Add a debug value label to a register. 391 pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) { 392 // We'll fix up labels in reverse(). Because we're generating 393 // code bottom-to-top, the liverange of the label goes *from* 394 // the last index at which was defined (or 0, which is the end 395 // of the eventual function) *to* just this instruction, and 396 // no further. 397 let inst = InsnIndex::new(self.vcode.insts.len()); 398 let labels = self.debug_info.entry(label).or_insert_with(|| vec![]); 399 let last = labels 400 .last() 401 .map(|(_start, end, _vreg)| *end) 402 .unwrap_or(InsnIndex::new(0)); 403 labels.push((last, inst, reg.into())); 404 } 405 406 pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) { 407 let from = from.into(); 408 let resolved_to = self.vcode.resolve_vreg_alias(to.into()); 409 // Disallow cycles (see below). 410 assert_ne!(resolved_to, from); 411 self.vcode.vreg_aliases.insert(from, resolved_to); 412 } 413 414 /// Access the constants. 415 pub fn constants(&mut self) -> &mut VCodeConstants { 416 &mut self.vcode.constants 417 } 418 419 fn compute_preds_from_succs(&mut self) { 420 // Compute predecessors from successors. In order to gather 421 // all preds for a block into a contiguous sequence, we build 422 // a list of (succ, pred) tuples and then sort. 423 let mut succ_pred_edges: Vec<(BlockIndex, BlockIndex)> = 424 Vec::with_capacity(self.vcode.block_succs_preds.len()); 425 for (pred, &(start, end)) in self.vcode.block_succ_range.iter().enumerate() { 426 let pred = BlockIndex::new(pred); 427 for i in start..end { 428 let succ = BlockIndex::new(self.vcode.block_succs_preds[i as usize].index()); 429 succ_pred_edges.push((succ, pred)); 430 } 431 } 432 succ_pred_edges.sort_unstable(); 433 434 let mut i = 0; 435 for succ in 0..self.vcode.num_blocks() { 436 let succ = BlockIndex::new(succ); 437 let start = self.vcode.block_succs_preds.len(); 438 while i < succ_pred_edges.len() && succ_pred_edges[i].0 == succ { 439 let pred = succ_pred_edges[i].1; 440 self.vcode.block_succs_preds.push(pred); 441 i += 1; 442 } 443 let end = self.vcode.block_succs_preds.len(); 444 self.vcode.block_pred_range.push((start as u32, end as u32)); 445 } 446 } 447 448 /// Called once, when a build in Backward order is complete, to 449 /// perform the overall reversal (into final forward order) and 450 /// finalize metadata accordingly. 451 fn reverse_and_finalize(&mut self) { 452 let n_insts = self.vcode.insts.len(); 453 if n_insts == 0 { 454 return; 455 } 456 457 // Reverse the per-block and per-inst sequences. 458 self.vcode.block_ranges.reverse(); 459 // block_params_range is indexed by block (and blocks were 460 // traversed in reverse) so we reverse it; but block-param 461 // sequences in the concatenated vec can remain in reverse 462 // order (it is effectively an arena of arbitrarily-placed 463 // referenced sequences). 464 self.vcode.block_params_range.reverse(); 465 // Likewise, we reverse block_succ_range, but the block_succ 466 // concatenated array can remain as-is. 467 self.vcode.block_succ_range.reverse(); 468 self.vcode.insts.reverse(); 469 self.vcode.srclocs.reverse(); 470 // Likewise, branch_block_arg_succ_range is indexed by block 471 // so must be reversed. 472 self.vcode.branch_block_arg_succ_range.reverse(); 473 474 // To translate an instruction index *endpoint* in reversed 475 // order to forward order, compute `n_insts - i`. 476 // 477 // Why not `n_insts - 1 - i`? That would be correct to 478 // translate an individual instruction index (for ten insts 0 479 // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes 480 // 0). But for the usual inclusive-start, exclusive-end range 481 // idiom, inclusive starts become exclusive ends and 482 // vice-versa, so e.g. an (inclusive) start of 0 becomes an 483 // (exclusive) end of 10. 484 let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index()); 485 486 // Edit the block-range instruction indices. 487 for tuple in &mut self.vcode.block_ranges { 488 let (start, end) = *tuple; 489 *tuple = (translate(end), translate(start)); // Note reversed order. 490 } 491 492 // Generate debug-value labels based on per-label maps. 493 for (label, tuples) in &self.debug_info { 494 for &(start, end, vreg) in tuples { 495 let vreg = self.vcode.resolve_vreg_alias(vreg); 496 let fwd_start = translate(end); 497 let fwd_end = translate(start); 498 self.vcode 499 .debug_value_labels 500 .push((vreg, fwd_start, fwd_end, label.as_u32())); 501 } 502 } 503 504 // Now sort debug value labels by VReg, as required 505 // by regalloc2. 506 self.vcode 507 .debug_value_labels 508 .sort_unstable_by_key(|(vreg, _, _, _)| *vreg); 509 } 510 511 fn collect_operands(&mut self) { 512 for (i, insn) in self.vcode.insts.iter().enumerate() { 513 // Push operands from the instruction onto the operand list. 514 // 515 // We rename through the vreg alias table as we collect 516 // the operands. This is better than a separate post-pass 517 // over operands, because it has more cache locality: 518 // operands only need to pass through L1 once. This is 519 // also better than renaming instructions' 520 // operands/registers while lowering, because here we only 521 // need to do the `match` over the instruction to visit 522 // its register fields (which is slow, branchy code) once. 523 524 let vreg_aliases = &self.vcode.vreg_aliases; 525 let allocatable = PRegSet::from(self.vcode.machine_env()); 526 let mut op_collector = 527 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| { 528 VCode::<I>::resolve_vreg_alias_impl(vreg_aliases, vreg) 529 }); 530 insn.get_operands(&mut op_collector); 531 let (ops, clobbers) = op_collector.finish(); 532 self.vcode.operand_ranges.push(ops); 533 534 if clobbers != PRegSet::default() { 535 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers); 536 } 537 538 if let Some((dst, src)) = insn.is_move() { 539 // We should never see non-virtual registers present in move 540 // instructions. 541 assert!( 542 src.is_virtual(), 543 "the real register {:?} was used as the source of a move instruction", 544 src 545 ); 546 assert!( 547 dst.to_reg().is_virtual(), 548 "the real register {:?} was used as the destination of a move instruction", 549 dst.to_reg() 550 ); 551 } 552 } 553 554 // Translate blockparam args via the vreg aliases table as well. 555 for arg in &mut self.vcode.branch_block_args { 556 let new_arg = VCode::<I>::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *arg); 557 trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg); 558 *arg = new_arg; 559 } 560 } 561 562 /// Build the final VCode. 563 pub fn build(mut self, vregs: VRegAllocator<I>) -> VCode<I> { 564 self.vcode.vreg_types = vregs.vreg_types; 565 self.vcode.facts = vregs.facts; 566 self.vcode.reftyped_vregs = vregs.reftyped_vregs; 567 568 if self.direction == VCodeBuildDirection::Backward { 569 self.reverse_and_finalize(); 570 } 571 self.collect_operands(); 572 573 // Apply register aliases to the `reftyped_vregs` list since this list 574 // will be returned directly to `regalloc2` eventually and all 575 // operands/results of instructions will use the alias-resolved vregs 576 // from `regalloc2`'s perspective. 577 // 578 // Also note that `reftyped_vregs` can't have duplicates, so after the 579 // aliases are applied duplicates are removed. 580 for reg in self.vcode.reftyped_vregs.iter_mut() { 581 *reg = VCode::<I>::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *reg); 582 } 583 self.vcode.reftyped_vregs.sort(); 584 self.vcode.reftyped_vregs.dedup(); 585 586 self.compute_preds_from_succs(); 587 self.vcode.debug_value_labels.sort_unstable(); 588 self.vcode 589 } 590 } 591 592 /// Is this type a reference type? 593 fn is_reftype(ty: Type) -> bool { 594 ty == types::R64 || ty == types::R32 595 } 596 597 const NO_INST_OFFSET: CodeOffset = u32::MAX; 598 599 impl<I: VCodeInst> VCode<I> { 600 /// New empty VCode. 601 fn new( 602 sigs: SigSet, 603 abi: Callee<I::ABIMachineSpec>, 604 emit_info: I::Info, 605 block_order: BlockLoweringOrder, 606 constants: VCodeConstants, 607 ) -> VCode<I> { 608 let n_blocks = block_order.lowered_order().len(); 609 VCode { 610 sigs, 611 vreg_types: vec![], 612 insts: Vec::with_capacity(10 * n_blocks), 613 operands: Vec::with_capacity(30 * n_blocks), 614 operand_ranges: Vec::with_capacity(10 * n_blocks), 615 clobbers: FxHashMap::default(), 616 srclocs: Vec::with_capacity(10 * n_blocks), 617 entry: BlockIndex::new(0), 618 block_ranges: Vec::with_capacity(n_blocks), 619 block_succ_range: Vec::with_capacity(n_blocks), 620 block_succs_preds: Vec::with_capacity(2 * n_blocks), 621 block_pred_range: Vec::with_capacity(n_blocks), 622 block_params_range: Vec::with_capacity(n_blocks), 623 block_params: Vec::with_capacity(5 * n_blocks), 624 branch_block_args: Vec::with_capacity(10 * n_blocks), 625 branch_block_arg_range: Vec::with_capacity(2 * n_blocks), 626 branch_block_arg_succ_range: Vec::with_capacity(n_blocks), 627 block_order, 628 abi, 629 emit_info, 630 reftyped_vregs: vec![], 631 constants, 632 debug_value_labels: vec![], 633 vreg_aliases: FxHashMap::with_capacity_and_hasher(10 * n_blocks, Default::default()), 634 facts: vec![], 635 } 636 } 637 638 /// Get the ABI-dependent MachineEnv for managing register allocation. 639 pub fn machine_env(&self) -> &MachineEnv { 640 self.abi.machine_env(&self.sigs) 641 } 642 643 /// Get the number of blocks. Block indices will be in the range `0 .. 644 /// (self.num_blocks() - 1)`. 645 pub fn num_blocks(&self) -> usize { 646 self.block_ranges.len() 647 } 648 649 /// The number of lowered instructions. 650 pub fn num_insts(&self) -> usize { 651 self.insts.len() 652 } 653 654 /// Get the successors for a block. 655 pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] { 656 let (start, end) = self.block_succ_range[block.index()]; 657 &self.block_succs_preds[start as usize..end as usize] 658 } 659 660 fn compute_clobbers(&self, regalloc: ®alloc2::Output) -> Vec<Writable<RealReg>> { 661 // Compute clobbered registers. 662 let mut clobbered = vec![]; 663 let mut clobbered_set = FxHashSet::default(); 664 665 // All moves are included in clobbers. 666 for edit in ®alloc.edits { 667 let Edit::Move { to, .. } = edit.1; 668 if let Some(preg) = to.as_reg() { 669 let reg = RealReg::from(preg); 670 if clobbered_set.insert(reg) { 671 clobbered.push(Writable::from_reg(reg)); 672 } 673 } 674 } 675 676 for (i, (start, end)) in self.operand_ranges.iter().enumerate() { 677 // Skip this instruction if not "included in clobbers" as 678 // per the MachInst. (Some backends use this to implement 679 // ABI specifics; e.g., excluding calls of the same ABI as 680 // the current function from clobbers, because by 681 // definition everything clobbered by the call can be 682 // clobbered by this function without saving as well.) 683 if !self.insts[i].is_included_in_clobbers() { 684 continue; 685 } 686 687 let start = *start as usize; 688 let end = *end as usize; 689 let operands = &self.operands[start..end]; 690 let allocs = ®alloc.allocs[start..end]; 691 for (operand, alloc) in operands.iter().zip(allocs.iter()) { 692 // We're interested only in writes (Mods or Defs). 693 if operand.kind() == OperandKind::Use { 694 continue; 695 } 696 if let Some(preg) = alloc.as_reg() { 697 let reg = RealReg::from(preg); 698 if clobbered_set.insert(reg) { 699 clobbered.push(Writable::from_reg(reg)); 700 } 701 } 702 } 703 704 // Also add explicitly-clobbered registers. 705 for preg in self 706 .clobbers 707 .get(&InsnIndex::new(i)) 708 .cloned() 709 .unwrap_or_default() 710 { 711 let reg = RealReg::from(preg); 712 if clobbered_set.insert(reg) { 713 clobbered.push(Writable::from_reg(reg)); 714 } 715 } 716 } 717 718 clobbered 719 } 720 721 /// Emit the instructions to a `MachBuffer`, containing fixed-up 722 /// code and external reloc/trap/etc. records ready for use. Takes 723 /// the regalloc results as well. 724 /// 725 /// Returns the machine code itself, and optionally metadata 726 /// and/or a disassembly, as an `EmitResult`. The `VCode` itself 727 /// is consumed by the emission process. 728 pub fn emit( 729 mut self, 730 regalloc: ®alloc2::Output, 731 want_disasm: bool, 732 flags: &settings::Flags, 733 ctrl_plane: &mut ControlPlane, 734 ) -> EmitResult 735 where 736 I: VCodeInst, 737 { 738 // To write into disasm string. 739 use core::fmt::Write; 740 741 let _tt = timing::vcode_emit(); 742 let mut buffer = MachBuffer::new(); 743 let mut bb_starts: Vec<Option<CodeOffset>> = vec![]; 744 745 // The first M MachLabels are reserved for block indices. 746 buffer.reserve_labels_for_blocks(self.num_blocks()); 747 748 // Register all allocated constants with the `MachBuffer` to ensure that 749 // any references to the constants during instructions can be handled 750 // correctly. 751 buffer.register_constants(&self.constants); 752 753 // Construct the final order we emit code in: cold blocks at the end. 754 let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![]; 755 let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![]; 756 for block in 0..self.num_blocks() { 757 let block = BlockIndex::new(block); 758 if self.block_order.is_cold(block) { 759 cold_blocks.push(block); 760 } else { 761 final_order.push(block); 762 } 763 } 764 final_order.extend(cold_blocks.clone()); 765 766 // Compute/save info we need for the prologue: clobbers and 767 // number of spillslots. 768 // 769 // We clone `abi` here because we will mutate it as we 770 // generate the prologue and set other info, but we can't 771 // mutate `VCode`. The info it usually carries prior to 772 // setting clobbers is fairly minimal so this should be 773 // relatively cheap. 774 let clobbers = self.compute_clobbers(regalloc); 775 self.abi.set_num_spillslots(regalloc.num_spillslots); 776 self.abi.set_clobbered(clobbers); 777 self.abi.compute_frame_layout(&self.sigs); 778 779 // Emit blocks. 780 let mut cur_srcloc = None; 781 let mut last_offset = None; 782 let mut inst_offsets = vec![]; 783 let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane)); 784 785 let mut disasm = String::new(); 786 787 if !self.debug_value_labels.is_empty() { 788 inst_offsets.resize(self.insts.len(), NO_INST_OFFSET); 789 } 790 791 // Count edits per block ahead of time; this is needed for 792 // lookahead island emission. (We could derive it per-block 793 // with binary search in the edit list, but it's more 794 // efficient to do it in one pass here.) 795 let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![]; 796 let mut edit_idx = 0; 797 for block in 0..self.num_blocks() { 798 let end_inst = self.block_ranges[block].1; 799 let start_edit_idx = edit_idx; 800 while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst { 801 edit_idx += 1; 802 } 803 let end_edit_idx = edit_idx; 804 ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32); 805 } 806 807 let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled(); 808 let mut bb_padding = match flags.bb_padding_log2_minus_one() { 809 0 => Vec::new(), 810 n => vec![0; 1 << (n - 1)], 811 }; 812 let mut total_bb_padding = 0; 813 814 for (block_order_idx, &block) in final_order.iter().enumerate() { 815 trace!("emitting block {:?}", block); 816 817 // Call the new block hook for state 818 state.on_new_block(); 819 820 // Emit NOPs to align the block. 821 let new_offset = I::align_basic_block(buffer.cur_offset()); 822 while new_offset > buffer.cur_offset() { 823 // Pad with NOPs up to the aligned block offset. 824 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize); 825 nop.emit(&[], &mut buffer, &self.emit_info, &mut Default::default()); 826 } 827 assert_eq!(buffer.cur_offset(), new_offset); 828 829 let do_emit = |inst: &I, 830 allocs: &[Allocation], 831 disasm: &mut String, 832 buffer: &mut MachBuffer<I>, 833 state: &mut I::State| { 834 if want_disasm && !inst.is_args() { 835 let mut s = state.clone(); 836 writeln!(disasm, " {}", inst.pretty_print_inst(allocs, &mut s)).unwrap(); 837 } 838 inst.emit(allocs, buffer, &self.emit_info, state); 839 }; 840 841 // Is this the first block? Emit the prologue directly if so. 842 if block == self.entry { 843 trace!(" -> entry block"); 844 buffer.start_srcloc(Default::default()); 845 state.pre_sourceloc(Default::default()); 846 for inst in &self.abi.gen_prologue() { 847 do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state); 848 } 849 buffer.end_srcloc(); 850 } 851 852 // Now emit the regular block body. 853 854 buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut()); 855 856 if want_disasm { 857 writeln!(&mut disasm, "block{}:", block.index()).unwrap(); 858 } 859 860 if flags.machine_code_cfg_info() { 861 // Track BB starts. If we have backed up due to MachBuffer 862 // branch opts, note that the removed blocks were removed. 863 let cur_offset = buffer.cur_offset(); 864 if last_offset.is_some() && cur_offset <= last_offset.unwrap() { 865 for i in (0..bb_starts.len()).rev() { 866 if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() { 867 break; 868 } 869 bb_starts[i] = None; 870 } 871 } 872 bb_starts.push(Some(cur_offset)); 873 last_offset = Some(cur_offset); 874 } 875 876 if let Some(block_start) = I::gen_block_start( 877 self.block_order.is_indirect_branch_target(block), 878 is_forward_edge_cfi_enabled, 879 ) { 880 do_emit(&block_start, &[], &mut disasm, &mut buffer, &mut state); 881 } 882 883 for inst_or_edit in regalloc.block_insts_and_edits(&self, block) { 884 match inst_or_edit { 885 InstOrEdit::Inst(iix) => { 886 if !self.debug_value_labels.is_empty() { 887 // If we need to produce debug info, 888 // record the offset of each instruction 889 // so that we can translate value-label 890 // ranges to machine-code offsets. 891 892 // Cold blocks violate monotonicity 893 // assumptions elsewhere (that 894 // instructions in inst-index order are in 895 // order in machine code), so we omit 896 // their offsets here. Value-label range 897 // generation below will skip empty ranges 898 // and ranges with to-offsets of zero. 899 if !self.block_order.is_cold(block) { 900 inst_offsets[iix.index()] = buffer.cur_offset(); 901 } 902 } 903 904 // Update the srcloc at this point in the buffer. 905 let srcloc = self.srclocs[iix.index()]; 906 if cur_srcloc != Some(srcloc) { 907 if cur_srcloc.is_some() { 908 buffer.end_srcloc(); 909 } 910 buffer.start_srcloc(srcloc); 911 cur_srcloc = Some(srcloc); 912 } 913 state.pre_sourceloc(cur_srcloc.unwrap_or_default()); 914 915 // If this is a safepoint, compute a stack map 916 // and pass it to the emit state. 917 if self.insts[iix.index()].is_safepoint() { 918 let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![]; 919 // Find the contiguous range of 920 // (progpoint, allocation) safepoint slot 921 // records in `regalloc.safepoint_slots` 922 // for this instruction index. 923 let safepoint_slots_start = regalloc 924 .safepoint_slots 925 .binary_search_by(|(progpoint, _alloc)| { 926 if progpoint.inst() >= iix { 927 std::cmp::Ordering::Greater 928 } else { 929 std::cmp::Ordering::Less 930 } 931 }) 932 .unwrap_err(); 933 934 for (_, alloc) in regalloc.safepoint_slots[safepoint_slots_start..] 935 .iter() 936 .take_while(|(progpoint, _)| progpoint.inst() == iix) 937 { 938 let slot = alloc.as_stack().unwrap(); 939 safepoint_slots.push(slot); 940 } 941 if !safepoint_slots.is_empty() { 942 let stack_map = self 943 .abi 944 .spillslots_to_stack_map(&safepoint_slots[..], &state); 945 state.pre_safepoint(stack_map); 946 } 947 } 948 949 // Get the allocations for this inst from the regalloc result. 950 let allocs = regalloc.inst_allocs(iix); 951 952 // If the instruction we are about to emit is 953 // a return, place an epilogue at this point 954 // (and don't emit the return; the actual 955 // epilogue will contain it). 956 if self.insts[iix.index()].is_term() == MachTerminator::Ret { 957 for inst in self.abi.gen_epilogue() { 958 do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state); 959 } 960 } else { 961 // Emit the instruction! 962 do_emit( 963 &self.insts[iix.index()], 964 allocs, 965 &mut disasm, 966 &mut buffer, 967 &mut state, 968 ); 969 } 970 } 971 972 InstOrEdit::Edit(Edit::Move { from, to }) => { 973 // Create a move/spill/reload instruction and 974 // immediately emit it. 975 match (from.as_reg(), to.as_reg()) { 976 (Some(from), Some(to)) => { 977 // Reg-to-reg move. 978 let from_rreg = Reg::from(from); 979 let to_rreg = Writable::from_reg(Reg::from(to)); 980 debug_assert_eq!(from.class(), to.class()); 981 let ty = I::canonical_type_for_rc(from.class()); 982 let mv = I::gen_move(to_rreg, from_rreg, ty); 983 do_emit(&mv, &[], &mut disasm, &mut buffer, &mut state); 984 } 985 (Some(from), None) => { 986 // Spill from register to spillslot. 987 let to = to.as_stack().unwrap(); 988 let from_rreg = RealReg::from(from); 989 let spill = self.abi.gen_spill(to, from_rreg); 990 do_emit(&spill, &[], &mut disasm, &mut buffer, &mut state); 991 } 992 (None, Some(to)) => { 993 // Load from spillslot to register. 994 let from = from.as_stack().unwrap(); 995 let to_rreg = Writable::from_reg(RealReg::from(to)); 996 let reload = self.abi.gen_reload(to_rreg, from); 997 do_emit(&reload, &[], &mut disasm, &mut buffer, &mut state); 998 } 999 (None, None) => { 1000 panic!("regalloc2 should have eliminated stack-to-stack moves!"); 1001 } 1002 } 1003 } 1004 } 1005 } 1006 1007 if cur_srcloc.is_some() { 1008 buffer.end_srcloc(); 1009 cur_srcloc = None; 1010 } 1011 1012 // Do we need an island? Get the worst-case size of the next BB, add 1013 // it to the optional padding behind the block, and pass this to the 1014 // `MachBuffer` to determine if an island is necessary. 1015 let worst_case_next_bb = if block_order_idx < final_order.len() - 1 { 1016 let next_block = final_order[block_order_idx + 1]; 1017 let next_block_range = self.block_ranges[next_block.index()]; 1018 let next_block_size = 1019 (next_block_range.1.index() - next_block_range.0.index()) as u32; 1020 let next_block_ra_insertions = ra_edits_per_block[next_block.index()]; 1021 I::worst_case_size() * (next_block_size + next_block_ra_insertions) 1022 } else { 1023 0 1024 }; 1025 let padding = if bb_padding.is_empty() { 1026 0 1027 } else { 1028 bb_padding.len() as u32 + I::LabelUse::ALIGN - 1 1029 }; 1030 if buffer.island_needed(padding + worst_case_next_bb) { 1031 buffer.emit_island(padding + worst_case_next_bb, ctrl_plane); 1032 } 1033 1034 // Insert padding, if configured, to stress the `MachBuffer`'s 1035 // relocation and island calculations. 1036 // 1037 // Padding can get quite large during fuzzing though so place a 1038 // total cap on it where when a per-function threshold is exceeded 1039 // the padding is turned back down to zero. This avoids a small-ish 1040 // test case generating a GB+ memory footprint in Cranelift for 1041 // example. 1042 if !bb_padding.is_empty() { 1043 buffer.put_data(&bb_padding); 1044 buffer.align_to(I::LabelUse::ALIGN); 1045 total_bb_padding += bb_padding.len(); 1046 if total_bb_padding > (150 << 20) { 1047 bb_padding = Vec::new(); 1048 } 1049 } 1050 } 1051 1052 // Do any optimizations on branches at tail of buffer, as if we had 1053 // bound one last label. 1054 buffer.optimize_branches(ctrl_plane); 1055 1056 // emission state is not needed anymore, move control plane back out 1057 *ctrl_plane = state.take_ctrl_plane(); 1058 1059 let func_body_len = buffer.cur_offset(); 1060 1061 // Create `bb_edges` and final (filtered) `bb_starts`. 1062 let mut bb_edges = vec![]; 1063 let mut bb_offsets = vec![]; 1064 if flags.machine_code_cfg_info() { 1065 for block in 0..self.num_blocks() { 1066 if bb_starts[block].is_none() { 1067 // Block was deleted by MachBuffer; skip. 1068 continue; 1069 } 1070 let from = bb_starts[block].unwrap(); 1071 1072 bb_offsets.push(from); 1073 // Resolve each `succ` label and add edges. 1074 let succs = self.block_succs(BlockIndex::new(block)); 1075 for &succ in succs.iter() { 1076 let to = buffer.resolve_label_offset(MachLabel::from_block(succ)); 1077 bb_edges.push((from, to)); 1078 } 1079 } 1080 } 1081 1082 self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len); 1083 let value_labels_ranges = 1084 self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len); 1085 let frame_size = self.abi.frame_size(); 1086 1087 EmitResult { 1088 buffer: buffer.finish(&self.constants, ctrl_plane), 1089 bb_offsets, 1090 bb_edges, 1091 func_body_len, 1092 disasm: if want_disasm { Some(disasm) } else { None }, 1093 sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(), 1094 dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(), 1095 value_labels_ranges, 1096 frame_size, 1097 } 1098 } 1099 1100 fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) { 1101 if self.debug_value_labels.is_empty() { 1102 return; 1103 } 1104 1105 // During emission, branch removal can make offsets of instructions incorrect. 1106 // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj] 1107 // It will be recorded as (say): [30] [34] [38] [42] [<would be 46>] 1108 // When the jumps get removed we are left with (in "inst_offsets"): 1109 // [insi][jmp0][jmp1][jmp2][insj][...] 1110 // [30] [34] [38] [42] [34] 1111 // Which violates the monotonicity invariant. This method sets offsets of these 1112 // removed instructions such as to make them appear zero-sized: 1113 // [insi][jmp0][jmp1][jmp2][insj][...] 1114 // [30] [34] [34] [34] [34] 1115 // 1116 let mut next_offset = func_body_len; 1117 for inst_index in (0..(inst_offsets.len() - 1)).rev() { 1118 let inst_offset = inst_offsets[inst_index]; 1119 1120 // Not all instructions get their offsets recorded. 1121 if inst_offset == NO_INST_OFFSET { 1122 continue; 1123 } 1124 1125 if inst_offset > next_offset { 1126 trace!( 1127 "Fixing code offset of the removed Inst {}: {} -> {}", 1128 inst_index, 1129 inst_offset, 1130 next_offset 1131 ); 1132 inst_offsets[inst_index] = next_offset; 1133 continue; 1134 } 1135 1136 next_offset = inst_offset; 1137 } 1138 } 1139 1140 fn compute_value_labels_ranges( 1141 &self, 1142 regalloc: ®alloc2::Output, 1143 inst_offsets: &[CodeOffset], 1144 func_body_len: u32, 1145 ) -> ValueLabelsRanges { 1146 if self.debug_value_labels.is_empty() { 1147 return ValueLabelsRanges::default(); 1148 } 1149 1150 let mut value_labels_ranges: ValueLabelsRanges = HashMap::new(); 1151 for &(label, from, to, alloc) in ®alloc.debug_locations { 1152 let ranges = value_labels_ranges 1153 .entry(ValueLabel::from_u32(label)) 1154 .or_insert_with(|| vec![]); 1155 let from_offset = inst_offsets[from.inst().index()]; 1156 let to_offset = if to.inst().index() == inst_offsets.len() { 1157 func_body_len 1158 } else { 1159 inst_offsets[to.inst().index()] 1160 }; 1161 1162 // Empty ranges or unavailable offsets can happen 1163 // due to cold blocks and branch removal (see above). 1164 if from_offset == NO_INST_OFFSET 1165 || to_offset == NO_INST_OFFSET 1166 || from_offset == to_offset 1167 { 1168 continue; 1169 } 1170 1171 let loc = if let Some(preg) = alloc.as_reg() { 1172 LabelValueLoc::Reg(Reg::from(preg)) 1173 } else { 1174 let slot = alloc.as_stack().unwrap(); 1175 let sp_offset = self.abi.get_spillslot_offset(slot); 1176 let sp_to_caller_sp_offset = self.abi.nominal_sp_to_caller_sp_offset(); 1177 let caller_sp_to_cfa_offset = 1178 crate::isa::unwind::systemv::caller_sp_to_cfa_offset(); 1179 let cfa_to_sp_offset = -((sp_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64); 1180 LabelValueLoc::CFAOffset(cfa_to_sp_offset + sp_offset) 1181 }; 1182 1183 // ValueLocRanges are recorded by *instruction-end 1184 // offset*. `from_offset` is the *start* of the 1185 // instruction; that is the same as the end of another 1186 // instruction, so we only want to begin coverage once 1187 // we are past the previous instruction's end. 1188 let start = from_offset + 1; 1189 1190 // Likewise, `end` is exclusive, but we want to 1191 // *include* the end of the last 1192 // instruction. `to_offset` is the start of the 1193 // `to`-instruction, which is the exclusive end, i.e., 1194 // the first instruction not covered. That 1195 // instruction's start is the same as the end of the 1196 // last instruction that is included, so we go one 1197 // byte further to be sure to include it. 1198 let end = to_offset + 1; 1199 1200 // Coalesce adjacent ranges that for the same location 1201 // to minimize output size here and for the consumers. 1202 if let Some(last_loc_range) = ranges.last_mut() { 1203 if last_loc_range.loc == loc && last_loc_range.end == start { 1204 trace!( 1205 "Extending debug range for VL{} in {:?} to {}", 1206 label, 1207 loc, 1208 end 1209 ); 1210 last_loc_range.end = end; 1211 continue; 1212 } 1213 } 1214 1215 trace!( 1216 "Recording debug range for VL{} in {:?}: [Inst {}..Inst {}) [{}..{})", 1217 label, 1218 loc, 1219 from.inst().index(), 1220 to.inst().index(), 1221 start, 1222 end 1223 ); 1224 1225 ranges.push(ValueLocRange { loc, start, end }); 1226 } 1227 1228 value_labels_ranges 1229 } 1230 1231 /// Get the IR block for a BlockIndex, if one exists. 1232 pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> { 1233 self.block_order.lowered_order()[block.index()].orig_block() 1234 } 1235 1236 pub fn resolve_vreg_alias(&self, from: regalloc2::VReg) -> regalloc2::VReg { 1237 Self::resolve_vreg_alias_impl(&self.vreg_aliases, from) 1238 } 1239 1240 /// Implementation of alias resolution. Separate helper that does 1241 /// not borrow `self` in order to allow working around borrowing 1242 /// restrictions. 1243 fn resolve_vreg_alias_impl( 1244 aliases: &FxHashMap<regalloc2::VReg, regalloc2::VReg>, 1245 from: regalloc2::VReg, 1246 ) -> regalloc2::VReg { 1247 // We prevent cycles from existing by resolving targets of 1248 // aliases eagerly before setting them. If the target resolves 1249 // to the origin of the alias, then a cycle would be created 1250 // and the alias is disallowed. Because of the structure of 1251 // SSA code (one instruction can refer to another's defs but 1252 // not vice-versa, except indirectly through 1253 // phis/blockparams), cycles should not occur as we use 1254 // aliases to redirect vregs to the temps that actually define 1255 // them. 1256 1257 let mut vreg = from; 1258 while let Some(to) = aliases.get(&vreg) { 1259 vreg = *to; 1260 } 1261 vreg 1262 } 1263 1264 #[inline] 1265 fn assert_no_vreg_aliases<'a>(&self, list: &'a [VReg]) -> &'a [VReg] { 1266 for vreg in list { 1267 self.assert_not_vreg_alias(*vreg); 1268 } 1269 list 1270 } 1271 1272 #[inline] 1273 fn assert_not_vreg_alias(&self, vreg: VReg) -> VReg { 1274 debug_assert!(self.resolve_vreg_alias(vreg) == vreg); 1275 vreg 1276 } 1277 1278 #[inline] 1279 fn assert_operand_not_vreg_alias(&self, op: Operand) -> Operand { 1280 // It should be true by construction that `Operand`s do not contain any 1281 // aliased vregs since they're all collected and mapped when the VCode 1282 // is itself constructed. 1283 self.assert_not_vreg_alias(op.vreg()); 1284 op 1285 } 1286 1287 /// Get the type of a VReg. 1288 pub fn vreg_type(&self, vreg: VReg) -> Type { 1289 self.vreg_types[vreg.vreg()] 1290 } 1291 1292 /// Get the fact, if any, for a given VReg. 1293 pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> { 1294 let vreg = self.resolve_vreg_alias(vreg); 1295 self.facts[vreg.vreg()].as_ref() 1296 } 1297 1298 /// Set the fact for a given VReg. 1299 pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) { 1300 let vreg = self.resolve_vreg_alias(vreg); 1301 trace!("set fact on {}: {:?}", vreg, fact); 1302 self.facts[vreg.vreg()] = Some(fact); 1303 } 1304 1305 /// Does a given instruction define any facts? 1306 pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool { 1307 self.inst_operands(inst) 1308 .iter() 1309 .filter(|o| o.kind() == OperandKind::Def) 1310 .map(|o| o.vreg()) 1311 .any(|vreg| self.facts[vreg.vreg()].is_some()) 1312 } 1313 } 1314 1315 impl<I: VCodeInst> std::ops::Index<InsnIndex> for VCode<I> { 1316 type Output = I; 1317 fn index(&self, idx: InsnIndex) -> &Self::Output { 1318 &self.insts[idx.index()] 1319 } 1320 } 1321 1322 impl<I: VCodeInst> RegallocFunction for VCode<I> { 1323 fn num_insts(&self) -> usize { 1324 self.insts.len() 1325 } 1326 1327 fn num_blocks(&self) -> usize { 1328 self.block_ranges.len() 1329 } 1330 1331 fn entry_block(&self) -> BlockIndex { 1332 self.entry 1333 } 1334 1335 fn block_insns(&self, block: BlockIndex) -> InstRange { 1336 let (start, end) = self.block_ranges[block.index()]; 1337 InstRange::forward(start, end) 1338 } 1339 1340 fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] { 1341 let (start, end) = self.block_succ_range[block.index()]; 1342 &self.block_succs_preds[start as usize..end as usize] 1343 } 1344 1345 fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] { 1346 let (start, end) = self.block_pred_range[block.index()]; 1347 &self.block_succs_preds[start as usize..end as usize] 1348 } 1349 1350 fn block_params(&self, block: BlockIndex) -> &[VReg] { 1351 // As a special case we don't return block params for the entry block, as all the arguments 1352 // will be defined by the `Inst::Args` instruction. 1353 if block == self.entry { 1354 return &[]; 1355 } 1356 1357 let (start, end) = self.block_params_range[block.index()]; 1358 let ret = &self.block_params[start as usize..end as usize]; 1359 // Currently block params are never aliased to another vreg, but 1360 // double-check just to be sure. 1361 self.assert_no_vreg_aliases(ret) 1362 } 1363 1364 fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] { 1365 let (succ_range_start, succ_range_end) = self.branch_block_arg_succ_range[block.index()]; 1366 let succ_ranges = 1367 &self.branch_block_arg_range[succ_range_start as usize..succ_range_end as usize]; 1368 let (branch_block_args_start, branch_block_args_end) = succ_ranges[succ_idx]; 1369 let ret = &self.branch_block_args 1370 [branch_block_args_start as usize..branch_block_args_end as usize]; 1371 self.assert_no_vreg_aliases(ret) 1372 } 1373 1374 fn is_ret(&self, insn: InsnIndex) -> bool { 1375 match self.insts[insn.index()].is_term() { 1376 // We treat blocks terminated by an unconditional trap like a return for regalloc. 1377 MachTerminator::None => self.insts[insn.index()].is_trap(), 1378 MachTerminator::Ret | MachTerminator::RetCall => true, 1379 MachTerminator::Uncond | MachTerminator::Cond | MachTerminator::Indirect => false, 1380 } 1381 } 1382 1383 fn is_branch(&self, insn: InsnIndex) -> bool { 1384 match self.insts[insn.index()].is_term() { 1385 MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true, 1386 _ => false, 1387 } 1388 } 1389 1390 fn requires_refs_on_stack(&self, insn: InsnIndex) -> bool { 1391 self.insts[insn.index()].is_safepoint() 1392 } 1393 1394 fn inst_operands(&self, insn: InsnIndex) -> &[Operand] { 1395 let (start, end) = self.operand_ranges[insn.index()]; 1396 let ret = &self.operands[start as usize..end as usize]; 1397 for op in ret { 1398 self.assert_operand_not_vreg_alias(*op); 1399 } 1400 ret 1401 } 1402 1403 fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet { 1404 self.clobbers.get(&insn).cloned().unwrap_or_default() 1405 } 1406 1407 fn num_vregs(&self) -> usize { 1408 std::cmp::max(self.vreg_types.len(), first_user_vreg_index()) 1409 } 1410 1411 fn reftype_vregs(&self) -> &[VReg] { 1412 self.assert_no_vreg_aliases(&self.reftyped_vregs[..]) 1413 } 1414 1415 fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] { 1416 // VRegs here are inserted into `debug_value_labels` after code is 1417 // generated and aliases are fully defined, so no double-check that 1418 // aliases are not lingering. 1419 for (vreg, ..) in self.debug_value_labels.iter() { 1420 self.assert_not_vreg_alias(*vreg); 1421 } 1422 &self.debug_value_labels[..] 1423 } 1424 1425 fn spillslot_size(&self, regclass: RegClass) -> usize { 1426 self.abi.get_spillslot_size(regclass) as usize 1427 } 1428 1429 fn allow_multiple_vreg_defs(&self) -> bool { 1430 // At least the s390x backend requires this, because the 1431 // `Loop` pseudo-instruction aggregates all Operands so pinned 1432 // vregs (RealRegs) may occur more than once. 1433 true 1434 } 1435 } 1436 1437 impl<I: VCodeInst> fmt::Debug for VCode<I> { 1438 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 1439 writeln!(f, "VCode {{")?; 1440 writeln!(f, " Entry block: {}", self.entry.index())?; 1441 1442 let mut state = Default::default(); 1443 1444 let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>(); 1445 alias_keys.sort_unstable(); 1446 for key in alias_keys { 1447 let dest = self.vreg_aliases.get(&key).unwrap(); 1448 writeln!(f, " {:?} := {:?}", Reg::from(key), Reg::from(*dest))?; 1449 } 1450 1451 for block in 0..self.num_blocks() { 1452 let block = BlockIndex::new(block); 1453 writeln!(f, "Block {}:", block.index())?; 1454 if let Some(bb) = self.bindex_to_bb(block) { 1455 writeln!(f, " (original IR block: {})", bb)?; 1456 } 1457 for succ in self.succs(block) { 1458 writeln!(f, " (successor: Block {})", succ.index())?; 1459 } 1460 let (start, end) = self.block_ranges[block.index()]; 1461 writeln!( 1462 f, 1463 " (instruction range: {} .. {})", 1464 start.index(), 1465 end.index() 1466 )?; 1467 for inst in start.index()..end.index() { 1468 writeln!( 1469 f, 1470 " Inst {}: {}", 1471 inst, 1472 self.insts[inst].pretty_print_inst(&[], &mut state) 1473 )?; 1474 for operand in self.inst_operands(InsnIndex::new(inst)) { 1475 if operand.kind() == OperandKind::Def { 1476 if let Some(fact) = &self.facts[operand.vreg().vreg()] { 1477 writeln!(f, " v{} ! {}", operand.vreg().vreg(), fact)?; 1478 } 1479 } 1480 } 1481 } 1482 } 1483 1484 writeln!(f, "}}")?; 1485 Ok(()) 1486 } 1487 } 1488 1489 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder. 1490 pub struct VRegAllocator<I> { 1491 /// Next virtual register number to allocate. 1492 next_vreg: usize, 1493 1494 /// VReg IR-level types. 1495 vreg_types: Vec<Type>, 1496 1497 /// A set with the same contents as `reftyped_vregs`, in order to 1498 /// avoid inserting more than once. 1499 reftyped_vregs_set: FxHashSet<VReg>, 1500 1501 /// Reference-typed `regalloc2::VReg`s. The regalloc requires 1502 /// these in a dense slice (as opposed to querying the 1503 /// reftype-status of each vreg) for efficient iteration. 1504 reftyped_vregs: Vec<VReg>, 1505 1506 /// A deferred error, to be bubbled up to the top level of the 1507 /// lowering algorithm. We take this approach because we cannot 1508 /// currently propagate a `Result` upward through ISLE code (the 1509 /// lowering rules) or some ABI code. 1510 deferred_error: Option<CodegenError>, 1511 1512 /// Facts on VRegs, for proof-carrying code. 1513 facts: Vec<Option<Fact>>, 1514 1515 /// The type of instruction that this allocator makes registers for. 1516 _inst: core::marker::PhantomData<I>, 1517 } 1518 1519 impl<I: VCodeInst> VRegAllocator<I> { 1520 /// Make a new VRegAllocator. 1521 pub fn new() -> Self { 1522 Self { 1523 next_vreg: first_user_vreg_index(), 1524 vreg_types: vec![], 1525 facts: vec![], 1526 reftyped_vregs_set: FxHashSet::default(), 1527 reftyped_vregs: vec![], 1528 deferred_error: None, 1529 _inst: core::marker::PhantomData::default(), 1530 } 1531 } 1532 1533 /// Allocate a fresh ValueRegs. 1534 pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> { 1535 if self.deferred_error.is_some() { 1536 return Err(CodegenError::CodeTooLarge); 1537 } 1538 let v = self.next_vreg; 1539 let (regclasses, tys) = I::rc_for_type(ty)?; 1540 self.next_vreg += regclasses.len(); 1541 if self.next_vreg >= VReg::MAX { 1542 return Err(CodegenError::CodeTooLarge); 1543 } 1544 1545 let regs: ValueRegs<Reg> = match regclasses { 1546 &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()), 1547 &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()), 1548 // We can extend this if/when we support 32-bit targets; e.g., 1549 // an i128 on a 32-bit machine will need up to four machine regs 1550 // for a `Value`. 1551 _ => panic!("Value must reside in 1 or 2 registers"), 1552 }; 1553 for (®_ty, ®) in tys.iter().zip(regs.regs().iter()) { 1554 self.set_vreg_type(reg.to_virtual_reg().unwrap(), reg_ty); 1555 } 1556 1557 // Create empty facts for each allocated vreg. 1558 self.facts 1559 .resize(usize::try_from(self.next_vreg).unwrap(), None); 1560 1561 Ok(regs) 1562 } 1563 1564 /// Allocate a fresh ValueRegs, deferring any out-of-vregs 1565 /// errors. This is useful in places where we cannot bubble a 1566 /// `CodegenResult` upward easily, and which are known to be 1567 /// invoked from within the lowering loop that checks the deferred 1568 /// error status below. 1569 pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> { 1570 match self.alloc(ty) { 1571 Ok(x) => x, 1572 Err(e) => { 1573 self.deferred_error = Some(e); 1574 self.bogus_for_deferred_error(ty) 1575 } 1576 } 1577 } 1578 1579 /// Take any deferred error that was accumulated by `alloc_with_deferred_error`. 1580 pub fn take_deferred_error(&mut self) -> Option<CodegenError> { 1581 self.deferred_error.take() 1582 } 1583 1584 /// Produce an bogus VReg placeholder with the proper number of 1585 /// registers for the given type. This is meant to be used with 1586 /// deferred allocation errors (see `Lower::alloc_tmp()`). 1587 fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> { 1588 let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type"); 1589 match regclasses { 1590 &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()), 1591 &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()), 1592 _ => panic!("Value must reside in 1 or 2 registers"), 1593 } 1594 } 1595 1596 /// Set the type of this virtual register. 1597 pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) { 1598 if self.vreg_types.len() <= vreg.index() { 1599 self.vreg_types.resize(vreg.index() + 1, ir::types::INVALID); 1600 } 1601 self.vreg_types[vreg.index()] = ty; 1602 if is_reftype(ty) { 1603 let vreg: VReg = vreg.into(); 1604 if self.reftyped_vregs_set.insert(vreg) { 1605 self.reftyped_vregs.push(vreg); 1606 } 1607 } 1608 } 1609 1610 /// Set the proof-carrying code fact on a given virtual register. 1611 /// 1612 /// Returns the old fact, if any (only one fact can be stored). 1613 pub fn set_fact(&mut self, vreg: VirtualReg, fact: Fact) -> Option<Fact> { 1614 trace!("vreg {:?} has fact: {:?}", vreg, fact); 1615 self.facts[vreg.index()].replace(fact) 1616 } 1617 1618 /// Take (and remove) a fact about a VReg. Used when setting up 1619 /// aliases: we want to move a fact from the alias vreg to the 1620 /// aliased vreg, to preserve facts about a value that were stated 1621 /// before we lowered its producer. 1622 pub fn take_fact(&mut self, vreg: VirtualReg) -> Option<Fact> { 1623 self.facts[vreg.index()].take() 1624 } 1625 1626 /// Set a fact only if one doesn't already exist. 1627 pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) { 1628 if self.facts[vreg.index()].is_none() { 1629 self.set_fact(vreg, fact); 1630 } 1631 } 1632 1633 /// Allocate a fresh ValueRegs, with a given fact to apply if 1634 /// the value fits in one VReg. 1635 pub fn alloc_with_maybe_fact( 1636 &mut self, 1637 ty: Type, 1638 fact: Option<Fact>, 1639 ) -> CodegenResult<ValueRegs<Reg>> { 1640 let result = self.alloc(ty)?; 1641 1642 // Ensure that we don't lose a fact on a value that splits 1643 // into multiple VRegs. 1644 assert!(result.len() == 1 || fact.is_none()); 1645 if let Some(fact) = fact { 1646 self.set_fact(result.regs()[0].to_virtual_reg().unwrap(), fact); 1647 } 1648 1649 Ok(result) 1650 } 1651 } 1652 1653 /// This structure tracks the large constants used in VCode that will be emitted separately by the 1654 /// [MachBuffer]. 1655 /// 1656 /// First, during the lowering phase, constants are inserted using 1657 /// [VCodeConstants.insert]; an intermediate handle, [VCodeConstant], tracks what constants are 1658 /// used in this phase. Some deduplication is performed, when possible, as constant 1659 /// values are inserted. 1660 /// 1661 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the 1662 /// constants so that instructions can refer to the value's memory location. The [MachBuffer] 1663 /// then writes the constant values to the buffer. 1664 #[derive(Default)] 1665 pub struct VCodeConstants { 1666 constants: PrimaryMap<VCodeConstant, VCodeConstantData>, 1667 pool_uses: HashMap<Constant, VCodeConstant>, 1668 well_known_uses: HashMap<*const [u8], VCodeConstant>, 1669 u64s: HashMap<[u8; 8], VCodeConstant>, 1670 } 1671 impl VCodeConstants { 1672 /// Initialize the structure with the expected number of constants. 1673 pub fn with_capacity(expected_num_constants: usize) -> Self { 1674 Self { 1675 constants: PrimaryMap::with_capacity(expected_num_constants), 1676 pool_uses: HashMap::with_capacity(expected_num_constants), 1677 well_known_uses: HashMap::new(), 1678 u64s: HashMap::new(), 1679 } 1680 } 1681 1682 /// Insert a constant; using this method indicates that a constant value will be used and thus 1683 /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants 1684 /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not 1685 /// [VCodeConstantData::Generated]. 1686 pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant { 1687 match data { 1688 VCodeConstantData::Generated(_) => self.constants.push(data), 1689 VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) { 1690 None => { 1691 let vcode_constant = self.constants.push(data); 1692 self.pool_uses.insert(constant, vcode_constant); 1693 vcode_constant 1694 } 1695 Some(&vcode_constant) => vcode_constant, 1696 }, 1697 VCodeConstantData::WellKnown(data_ref) => { 1698 match self.well_known_uses.entry(data_ref as *const [u8]) { 1699 Entry::Vacant(v) => { 1700 let vcode_constant = self.constants.push(data); 1701 v.insert(vcode_constant); 1702 vcode_constant 1703 } 1704 Entry::Occupied(o) => *o.get(), 1705 } 1706 } 1707 VCodeConstantData::U64(value) => match self.u64s.entry(value) { 1708 Entry::Vacant(v) => { 1709 let vcode_constant = self.constants.push(data); 1710 v.insert(vcode_constant); 1711 vcode_constant 1712 } 1713 Entry::Occupied(o) => *o.get(), 1714 }, 1715 } 1716 } 1717 1718 /// Return the number of constants inserted. 1719 pub fn len(&self) -> usize { 1720 self.constants.len() 1721 } 1722 1723 /// Iterate over the [VCodeConstant] keys inserted in this structure. 1724 pub fn keys(&self) -> Keys<VCodeConstant> { 1725 self.constants.keys() 1726 } 1727 1728 /// Iterate over the [VCodeConstant] keys and the data (as a byte slice) inserted in this 1729 /// structure. 1730 pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> { 1731 self.constants.iter() 1732 } 1733 1734 /// Returns the data associated with the specified constant. 1735 pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData { 1736 &self.constants[c] 1737 } 1738 1739 /// Checks if the given [VCodeConstantData] is registered as 1740 /// used by the pool. 1741 pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool { 1742 match constant { 1743 VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c), 1744 _ => false, 1745 } 1746 } 1747 } 1748 1749 /// A use of a constant by one or more VCode instructions; see [VCodeConstants]. 1750 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 1751 pub struct VCodeConstant(u32); 1752 entity_impl!(VCodeConstant); 1753 1754 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking 1755 /// these separately instead of as raw byte buffers allows us to avoid some duplication. 1756 pub enum VCodeConstantData { 1757 /// A constant already present in the Cranelift IR 1758 /// [ConstantPool](crate::ir::constant::ConstantPool). 1759 Pool(Constant, ConstantData), 1760 /// A reference to a well-known constant value that is statically encoded within the compiler. 1761 WellKnown(&'static [u8]), 1762 /// A constant value generated during lowering; the value may depend on the instruction context 1763 /// which makes it difficult to de-duplicate--if possible, use other variants. 1764 Generated(ConstantData), 1765 /// A constant of at most 64 bits. These are deduplicated as 1766 /// well. Stored as a fixed-size array of `u8` so that we do not 1767 /// encounter endianness problems when cross-compiling. 1768 U64([u8; 8]), 1769 } 1770 impl VCodeConstantData { 1771 /// Retrieve the constant data as a byte slice. 1772 pub fn as_slice(&self) -> &[u8] { 1773 match self { 1774 VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(), 1775 VCodeConstantData::WellKnown(d) => d, 1776 VCodeConstantData::U64(value) => &value[..], 1777 } 1778 } 1779 1780 /// Calculate the alignment of the constant data. 1781 pub fn alignment(&self) -> u32 { 1782 if self.as_slice().len() <= 8 { 1783 8 1784 } else { 1785 16 1786 } 1787 } 1788 } 1789 1790 #[cfg(test)] 1791 mod test { 1792 use super::*; 1793 use std::mem::size_of; 1794 1795 #[test] 1796 fn size_of_constant_structs() { 1797 assert_eq!(size_of::<Constant>(), 4); 1798 assert_eq!(size_of::<VCodeConstant>(), 4); 1799 assert_eq!(size_of::<ConstantData>(), 24); 1800 assert_eq!(size_of::<VCodeConstantData>(), 32); 1801 assert_eq!( 1802 size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(), 1803 24 1804 ); 1805 // TODO The VCodeConstants structure's memory size could be further optimized. 1806 // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at 1807 // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes. 1808 } 1809 } 1810