1 //! This implements the VCode container: a CFG of Insts that have been lowered. 2 //! 3 //! VCode is virtual-register code. An instruction in VCode is almost a machine 4 //! instruction; however, its register slots can refer to virtual registers in 5 //! addition to real machine registers. 6 //! 7 //! VCode is structured with traditional basic blocks, and 8 //! each block must be terminated by an unconditional branch (one target), a 9 //! conditional branch (two targets), or a return (no targets). Note that this 10 //! slightly differs from the machine code of most ISAs: in most ISAs, a 11 //! conditional branch has one target (and the not-taken case falls through). 12 //! However, we expect that machine backends will elide branches to the following 13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond / 14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to 15 //! play with layout prior to final binary emission, as well, if we want. 16 //! 17 //! See the main module comment in `mod.rs` for more details on the VCode-based 18 //! backend pipeline. 19 20 use crate::fx::FxHashMap; 21 use crate::fx::FxHashSet; 22 use crate::ir::RelSourceLoc; 23 use crate::ir::{self, types, Constant, ConstantData, DynamicStackSlot, LabelValueLoc, ValueLabel}; 24 use crate::machinst::*; 25 use crate::timing; 26 use crate::trace; 27 use crate::CodegenError; 28 use crate::ValueLocRange; 29 use cranelift_control::ControlPlane; 30 use regalloc2::{ 31 Edit, Function as RegallocFunction, InstOrEdit, InstRange, Operand, OperandKind, PRegSet, 32 RegClass, VReg, 33 }; 34 35 use alloc::vec::Vec; 36 use cranelift_entity::{entity_impl, Keys, PrimaryMap}; 37 use std::collections::hash_map::Entry; 38 use std::collections::HashMap; 39 use std::fmt; 40 41 /// Index referring to an instruction in VCode. 42 pub type InsnIndex = regalloc2::Inst; 43 44 /// Index referring to a basic block in VCode. 45 pub type BlockIndex = regalloc2::Block; 46 47 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be 48 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`. 49 pub trait VCodeInst: MachInst + MachInstEmit {} 50 impl<I: MachInst + MachInstEmit> VCodeInst for I {} 51 52 /// A function in "VCode" (virtualized-register code) form, after 53 /// lowering. This is essentially a standard CFG of basic blocks, 54 /// where each basic block consists of lowered instructions produced 55 /// by the machine-specific backend. 56 /// 57 /// Note that the VCode is immutable once produced, and is not 58 /// modified by register allocation in particular. Rather, register 59 /// allocation on the `VCode` produces a separate `regalloc2::Output` 60 /// struct, and this can be passed to `emit`. `emit` in turn does not 61 /// modify the vcode, but produces an `EmitResult`, which contains the 62 /// machine code itself, and the associated disassembly and/or 63 /// metadata as requested. 64 pub struct VCode<I: VCodeInst> { 65 /// VReg IR-level types. 66 vreg_types: Vec<Type>, 67 68 /// Lowered machine instructions in order corresponding to the original IR. 69 insts: Vec<I>, 70 71 /// Operands: pre-regalloc references to virtual registers with 72 /// constraints, in one flattened array. This allows the regalloc 73 /// to efficiently access all operands without requiring expensive 74 /// matches or method invocations on insts. 75 operands: Vec<Operand>, 76 77 /// Operand index ranges: for each instruction in `insts`, there 78 /// is a tuple here providing the range in `operands` for that 79 /// instruction's operands. 80 operand_ranges: Vec<(u32, u32)>, 81 82 /// Clobbers: a sparse map from instruction indices to clobber masks. 83 clobbers: FxHashMap<InsnIndex, PRegSet>, 84 85 /// Move information: for a given InsnIndex, (src, dst) operand pair. 86 is_move: FxHashMap<InsnIndex, (Operand, Operand)>, 87 88 /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is 89 /// reasonable to keep one of these per instruction.) 90 srclocs: Vec<RelSourceLoc>, 91 92 /// Entry block. 93 entry: BlockIndex, 94 95 /// Block instruction indices. 96 block_ranges: Vec<(InsnIndex, InsnIndex)>, 97 98 /// Block successors: index range in the `block_succs_preds` list. 99 block_succ_range: Vec<(u32, u32)>, 100 101 /// Block predecessors: index range in the `block_succs_preds` list. 102 block_pred_range: Vec<(u32, u32)>, 103 104 /// Block successor and predecessor lists, concatenated into one 105 /// Vec. The `block_succ_range` and `block_pred_range` lists of 106 /// tuples above give (start, end) ranges within this list that 107 /// correspond to each basic block's successors or predecessors, 108 /// respectively. 109 block_succs_preds: Vec<regalloc2::Block>, 110 111 /// Block parameters: index range in `block_params` below. 112 block_params_range: Vec<(u32, u32)>, 113 114 /// Block parameter lists, concatenated into one vec. The 115 /// `block_params_range` list of tuples above gives (start, end) 116 /// ranges within this list that correspond to each basic block's 117 /// blockparam vregs. 118 block_params: Vec<regalloc2::VReg>, 119 120 /// Outgoing block arguments on branch instructions, concatenated 121 /// into one list. 122 /// 123 /// Note that this is conceptually a 3D array: we have a VReg list 124 /// per block, per successor. We flatten those three dimensions 125 /// into this 1D vec, then store index ranges in two levels of 126 /// indirection. 127 /// 128 /// Indexed by the indices in `branch_block_arg_succ_range`. 129 branch_block_args: Vec<regalloc2::VReg>, 130 131 /// Array of sequences of (start, end) tuples in 132 /// `branch_block_args`, one for each successor; these sequences 133 /// for each block are concatenated. 134 /// 135 /// Indexed by the indices in `branch_block_arg_succ_range`. 136 branch_block_arg_range: Vec<(u32, u32)>, 137 138 /// For a given block, indices in `branch_block_arg_range` 139 /// corresponding to all of its successors. 140 branch_block_arg_succ_range: Vec<(u32, u32)>, 141 142 /// VReg aliases. Each key in this table is translated to its 143 /// value when gathering Operands from instructions. Aliases are 144 /// not chased transitively (we do not further look up the 145 /// translated reg to see if it is another alias). 146 /// 147 /// We use these aliases to rename an instruction's expected 148 /// result vregs to the returned vregs from lowering, which are 149 /// usually freshly-allocated temps. 150 /// 151 /// Operands and branch arguments will already have been 152 /// translated through this alias table; but it helps to make 153 /// sense of instructions when pretty-printed, for example. 154 vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>, 155 156 /// Block-order information. 157 block_order: BlockLoweringOrder, 158 159 /// ABI object. 160 pub(crate) abi: Callee<I::ABIMachineSpec>, 161 162 /// Constant information used during code emission. This should be 163 /// immutable across function compilations within the same module. 164 emit_info: I::Info, 165 166 /// Reference-typed `regalloc2::VReg`s. The regalloc requires 167 /// these in a dense slice (as opposed to querying the 168 /// reftype-status of each vreg) for efficient iteration. 169 reftyped_vregs: Vec<VReg>, 170 171 /// Constants. 172 constants: VCodeConstants, 173 174 /// Value labels for debuginfo attached to vregs. 175 debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>, 176 177 pub(crate) sigs: SigSet, 178 } 179 180 /// The result of `VCode::emit`. Contains all information computed 181 /// during emission: actual machine code, optionally a disassembly, 182 /// and optionally metadata about the code layout. 183 pub struct EmitResult<I: VCodeInst> { 184 /// The MachBuffer containing the machine code. 185 pub buffer: MachBuffer<I>, 186 187 /// Offset of each basic block, recorded during emission. Computed 188 /// only if `debug_value_labels` is non-empty. 189 pub bb_offsets: Vec<CodeOffset>, 190 191 /// Final basic-block edges, in terms of code offsets of 192 /// bb-starts. Computed only if `debug_value_labels` is non-empty. 193 pub bb_edges: Vec<(CodeOffset, CodeOffset)>, 194 195 /// Final instruction offsets, recorded during emission. Computed 196 /// only if `debug_value_labels` is non-empty. 197 pub inst_offsets: Vec<CodeOffset>, 198 199 /// Final length of function body. 200 pub func_body_len: CodeOffset, 201 202 /// The pretty-printed disassembly, if any. This uses the same 203 /// pretty-printing for MachInsts as the pre-regalloc VCode Debug 204 /// implementation, but additionally includes the prologue and 205 /// epilogue(s), and makes use of the regalloc results. 206 pub disasm: Option<String>, 207 208 /// Offsets of sized stackslots. 209 pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>, 210 211 /// Offsets of dynamic stackslots. 212 pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>, 213 214 /// Value-labels information (debug metadata). 215 pub value_labels_ranges: ValueLabelsRanges, 216 217 /// Stack frame size. 218 pub frame_size: u32, 219 220 /// The alignment requirement for pc-relative loads. 221 pub alignment: u32, 222 } 223 224 /// A builder for a VCode function body. 225 /// 226 /// This builder has the ability to accept instructions in either 227 /// forward or reverse order, depending on the pass direction that 228 /// produces the VCode. The lowering from CLIF to VCode<MachInst> 229 /// ordinarily occurs in reverse order (in order to allow instructions 230 /// to be lowered only if used, and not merged) so a reversal will 231 /// occur at the end of lowering to ensure the VCode is in machine 232 /// order. 233 /// 234 /// If built in reverse, block and instruction indices used once the 235 /// VCode is built are relative to the final (reversed) order, not the 236 /// order of construction. Note that this means we do not know the 237 /// final block or instruction indices when building, so we do not 238 /// hand them out. (The user is assumed to know them when appending 239 /// terminator instructions with successor blocks.) 240 pub struct VCodeBuilder<I: VCodeInst> { 241 /// In-progress VCode. 242 pub(crate) vcode: VCode<I>, 243 244 /// In what direction is the build occuring? 245 direction: VCodeBuildDirection, 246 247 /// Index of the last block-start in the vcode. 248 block_start: usize, 249 250 /// Start of succs for the current block in the concatenated succs list. 251 succ_start: usize, 252 253 /// Start of blockparams for the current block in the concatenated 254 /// blockparams list. 255 block_params_start: usize, 256 257 /// Start of successor blockparam arg list entries in 258 /// the concatenated branch_block_arg_range list. 259 branch_block_arg_succ_start: usize, 260 261 /// Current source location. 262 cur_srcloc: RelSourceLoc, 263 264 /// Debug-value label in-progress map, keyed by label. For each 265 /// label, we keep disjoint ranges mapping to vregs. We'll flatten 266 /// this into (vreg, range, label) tuples when done. 267 debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>, 268 } 269 270 /// Direction in which a VCodeBuilder builds VCode. 271 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 272 pub enum VCodeBuildDirection { 273 // TODO: add `Forward` once we need it and can test it adequately. 274 /// Backward-build pass: we expect the producer to call `emit()` 275 /// with instructions in reverse program order within each block. 276 Backward, 277 } 278 279 impl<I: VCodeInst> VCodeBuilder<I> { 280 /// Create a new VCodeBuilder. 281 pub fn new( 282 sigs: SigSet, 283 abi: Callee<I::ABIMachineSpec>, 284 emit_info: I::Info, 285 block_order: BlockLoweringOrder, 286 constants: VCodeConstants, 287 direction: VCodeBuildDirection, 288 ) -> VCodeBuilder<I> { 289 let vcode = VCode::new(sigs, abi, emit_info, block_order, constants); 290 291 VCodeBuilder { 292 vcode, 293 direction, 294 block_start: 0, 295 succ_start: 0, 296 block_params_start: 0, 297 branch_block_arg_succ_start: 0, 298 cur_srcloc: Default::default(), 299 debug_info: FxHashMap::default(), 300 } 301 } 302 303 pub fn init_abi(&mut self, temps: Vec<Writable<Reg>>) { 304 self.vcode.abi.init(&self.vcode.sigs, temps); 305 } 306 307 /// Access the ABI object. 308 pub fn abi(&self) -> &Callee<I::ABIMachineSpec> { 309 &self.vcode.abi 310 } 311 312 /// Access the ABI object. 313 pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> { 314 &mut self.vcode.abi 315 } 316 317 pub fn sigs(&self) -> &SigSet { 318 &self.vcode.sigs 319 } 320 321 pub fn sigs_mut(&mut self) -> &mut SigSet { 322 &mut self.vcode.sigs 323 } 324 325 /// Access to the BlockLoweringOrder object. 326 pub fn block_order(&self) -> &BlockLoweringOrder { 327 &self.vcode.block_order 328 } 329 330 /// Set the current block as the entry block. 331 pub fn set_entry(&mut self, block: BlockIndex) { 332 self.vcode.entry = block; 333 } 334 335 /// End the current basic block. Must be called after emitting vcode insts 336 /// for IR insts and prior to ending the function (building the VCode). 337 pub fn end_bb(&mut self) { 338 let start_idx = self.block_start; 339 let end_idx = self.vcode.insts.len(); 340 self.block_start = end_idx; 341 // Add the instruction index range to the list of blocks. 342 self.vcode 343 .block_ranges 344 .push((InsnIndex::new(start_idx), InsnIndex::new(end_idx))); 345 // End the successors list. 346 let succ_end = self.vcode.block_succs_preds.len(); 347 self.vcode 348 .block_succ_range 349 .push((self.succ_start as u32, succ_end as u32)); 350 self.succ_start = succ_end; 351 // End the blockparams list. 352 let block_params_end = self.vcode.block_params.len(); 353 self.vcode 354 .block_params_range 355 .push((self.block_params_start as u32, block_params_end as u32)); 356 self.block_params_start = block_params_end; 357 // End the branch blockparam args list. 358 let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len(); 359 self.vcode.branch_block_arg_succ_range.push(( 360 self.branch_block_arg_succ_start as u32, 361 branch_block_arg_succ_end as u32, 362 )); 363 self.branch_block_arg_succ_start = branch_block_arg_succ_end; 364 } 365 366 pub fn add_block_param(&mut self, param: VirtualReg) { 367 self.vcode.block_params.push(param.into()); 368 } 369 370 fn add_branch_args_for_succ(&mut self, args: &[Reg]) { 371 let start = self.vcode.branch_block_args.len(); 372 self.vcode 373 .branch_block_args 374 .extend(args.iter().map(|&arg| VReg::from(arg))); 375 let end = self.vcode.branch_block_args.len(); 376 self.vcode 377 .branch_block_arg_range 378 .push((start as u32, end as u32)); 379 } 380 381 /// Push an instruction for the current BB and current IR inst 382 /// within the BB. 383 pub fn push(&mut self, insn: I) { 384 self.vcode.insts.push(insn); 385 self.vcode.srclocs.push(self.cur_srcloc); 386 } 387 388 /// Add a successor block with branch args. 389 pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) { 390 self.vcode.block_succs_preds.push(block); 391 self.add_branch_args_for_succ(args); 392 } 393 394 /// Set the current source location. 395 pub fn set_srcloc(&mut self, srcloc: RelSourceLoc) { 396 self.cur_srcloc = srcloc; 397 } 398 399 /// Add a debug value label to a register. 400 pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) { 401 // We'll fix up labels in reverse(). Because we're generating 402 // code bottom-to-top, the liverange of the label goes *from* 403 // the last index at which was defined (or 0, which is the end 404 // of the eventual function) *to* just this instruction, and 405 // no further. 406 let inst = InsnIndex::new(self.vcode.insts.len()); 407 let labels = self.debug_info.entry(label).or_insert_with(|| vec![]); 408 let last = labels 409 .last() 410 .map(|(_start, end, _vreg)| *end) 411 .unwrap_or(InsnIndex::new(0)); 412 labels.push((last, inst, reg.into())); 413 } 414 415 pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) { 416 let from = from.into(); 417 let resolved_to = self.resolve_vreg_alias(to.into()); 418 // Disallow cycles (see below). 419 assert_ne!(resolved_to, from); 420 self.vcode.vreg_aliases.insert(from, resolved_to); 421 } 422 423 pub fn resolve_vreg_alias(&self, from: regalloc2::VReg) -> regalloc2::VReg { 424 Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, from) 425 } 426 427 fn resolve_vreg_alias_impl( 428 aliases: &FxHashMap<regalloc2::VReg, regalloc2::VReg>, 429 from: regalloc2::VReg, 430 ) -> regalloc2::VReg { 431 // We prevent cycles from existing by resolving targets of 432 // aliases eagerly before setting them. If the target resolves 433 // to the origin of the alias, then a cycle would be created 434 // and the alias is disallowed. Because of the structure of 435 // SSA code (one instruction can refer to another's defs but 436 // not vice-versa, except indirectly through 437 // phis/blockparams), cycles should not occur as we use 438 // aliases to redirect vregs to the temps that actually define 439 // them. 440 441 let mut vreg = from; 442 while let Some(to) = aliases.get(&vreg) { 443 vreg = *to; 444 } 445 vreg 446 } 447 448 /// Access the constants. 449 pub fn constants(&mut self) -> &mut VCodeConstants { 450 &mut self.vcode.constants 451 } 452 453 fn compute_preds_from_succs(&mut self) { 454 // Compute predecessors from successors. In order to gather 455 // all preds for a block into a contiguous sequence, we build 456 // a list of (succ, pred) tuples and then sort. 457 let mut succ_pred_edges: Vec<(BlockIndex, BlockIndex)> = 458 Vec::with_capacity(self.vcode.block_succs_preds.len()); 459 for (pred, &(start, end)) in self.vcode.block_succ_range.iter().enumerate() { 460 let pred = BlockIndex::new(pred); 461 for i in start..end { 462 let succ = BlockIndex::new(self.vcode.block_succs_preds[i as usize].index()); 463 succ_pred_edges.push((succ, pred)); 464 } 465 } 466 succ_pred_edges.sort_unstable(); 467 468 let mut i = 0; 469 for succ in 0..self.vcode.num_blocks() { 470 let succ = BlockIndex::new(succ); 471 let start = self.vcode.block_succs_preds.len(); 472 while i < succ_pred_edges.len() && succ_pred_edges[i].0 == succ { 473 let pred = succ_pred_edges[i].1; 474 self.vcode.block_succs_preds.push(pred); 475 i += 1; 476 } 477 let end = self.vcode.block_succs_preds.len(); 478 self.vcode.block_pred_range.push((start as u32, end as u32)); 479 } 480 } 481 482 /// Called once, when a build in Backward order is complete, to 483 /// perform the overall reversal (into final forward order) and 484 /// finalize metadata accordingly. 485 fn reverse_and_finalize(&mut self) { 486 let n_insts = self.vcode.insts.len(); 487 if n_insts == 0 { 488 return; 489 } 490 491 // Reverse the per-block and per-inst sequences. 492 self.vcode.block_ranges.reverse(); 493 // block_params_range is indexed by block (and blocks were 494 // traversed in reverse) so we reverse it; but block-param 495 // sequences in the concatenated vec can remain in reverse 496 // order (it is effectively an arena of arbitrarily-placed 497 // referenced sequences). 498 self.vcode.block_params_range.reverse(); 499 // Likewise, we reverse block_succ_range, but the block_succ 500 // concatenated array can remain as-is. 501 self.vcode.block_succ_range.reverse(); 502 self.vcode.insts.reverse(); 503 self.vcode.srclocs.reverse(); 504 // Likewise, branch_block_arg_succ_range is indexed by block 505 // so must be reversed. 506 self.vcode.branch_block_arg_succ_range.reverse(); 507 508 // To translate an instruction index *endpoint* in reversed 509 // order to forward order, compute `n_insts - i`. 510 // 511 // Why not `n_insts - 1 - i`? That would be correct to 512 // translate an individual instruction index (for ten insts 0 513 // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes 514 // 0). But for the usual inclusive-start, exclusive-end range 515 // idiom, inclusive starts become exclusive ends and 516 // vice-versa, so e.g. an (inclusive) start of 0 becomes an 517 // (exclusive) end of 10. 518 let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index()); 519 520 // Edit the block-range instruction indices. 521 for tuple in &mut self.vcode.block_ranges { 522 let (start, end) = *tuple; 523 *tuple = (translate(end), translate(start)); // Note reversed order. 524 } 525 526 // Generate debug-value labels based on per-label maps. 527 for (label, tuples) in &self.debug_info { 528 for &(start, end, vreg) in tuples { 529 let vreg = self.resolve_vreg_alias(vreg); 530 let fwd_start = translate(end); 531 let fwd_end = translate(start); 532 self.vcode 533 .debug_value_labels 534 .push((vreg, fwd_start, fwd_end, label.as_u32())); 535 } 536 } 537 538 // Now sort debug value labels by VReg, as required 539 // by regalloc2. 540 self.vcode 541 .debug_value_labels 542 .sort_unstable_by_key(|(vreg, _, _, _)| *vreg); 543 } 544 545 fn collect_operands(&mut self, allocatable: PRegSet) { 546 for (i, insn) in self.vcode.insts.iter().enumerate() { 547 // Push operands from the instruction onto the operand list. 548 // 549 // We rename through the vreg alias table as we collect 550 // the operands. This is better than a separate post-pass 551 // over operands, because it has more cache locality: 552 // operands only need to pass through L1 once. This is 553 // also better than renaming instructions' 554 // operands/registers while lowering, because here we only 555 // need to do the `match` over the instruction to visit 556 // its register fields (which is slow, branchy code) once. 557 558 let vreg_aliases = &self.vcode.vreg_aliases; 559 let mut op_collector = 560 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| { 561 Self::resolve_vreg_alias_impl(vreg_aliases, vreg) 562 }); 563 insn.get_operands(&mut op_collector); 564 let (ops, clobbers) = op_collector.finish(); 565 self.vcode.operand_ranges.push(ops); 566 567 if clobbers != PRegSet::default() { 568 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers); 569 } 570 571 if let Some((dst, src)) = insn.is_move() { 572 // We should never see non-virtual registers present in move 573 // instructions. 574 assert!( 575 src.is_virtual(), 576 "the real register {:?} was used as the source of a move instruction", 577 src 578 ); 579 assert!( 580 dst.to_reg().is_virtual(), 581 "the real register {:?} was used as the destination of a move instruction", 582 dst.to_reg() 583 ); 584 585 let src = Operand::reg_use(Self::resolve_vreg_alias_impl(vreg_aliases, src.into())); 586 let dst = Operand::reg_def(Self::resolve_vreg_alias_impl( 587 vreg_aliases, 588 dst.to_reg().into(), 589 )); 590 591 // Note that regalloc2 requires these in (src, dst) order. 592 self.vcode.is_move.insert(InsnIndex::new(i), (src, dst)); 593 } 594 } 595 596 // Translate blockparam args via the vreg aliases table as well. 597 for arg in &mut self.vcode.branch_block_args { 598 let new_arg = Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *arg); 599 trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg); 600 *arg = new_arg; 601 } 602 } 603 604 /// Build the final VCode. 605 pub fn build(mut self, allocatable: PRegSet, vregs: VRegAllocator<I>) -> VCode<I> { 606 self.vcode.vreg_types = vregs.vreg_types; 607 self.vcode.reftyped_vregs = vregs.reftyped_vregs; 608 609 if self.direction == VCodeBuildDirection::Backward { 610 self.reverse_and_finalize(); 611 } 612 self.collect_operands(allocatable); 613 614 // Apply register aliases to the `reftyped_vregs` list since this list 615 // will be returned directly to `regalloc2` eventually and all 616 // operands/results of instructions will use the alias-resolved vregs 617 // from `regalloc2`'s perspective. 618 // 619 // Also note that `reftyped_vregs` can't have duplicates, so after the 620 // aliases are applied duplicates are removed. 621 for reg in self.vcode.reftyped_vregs.iter_mut() { 622 *reg = Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *reg); 623 } 624 self.vcode.reftyped_vregs.sort(); 625 self.vcode.reftyped_vregs.dedup(); 626 627 self.compute_preds_from_succs(); 628 self.vcode.debug_value_labels.sort_unstable(); 629 self.vcode 630 } 631 } 632 633 /// Is this type a reference type? 634 fn is_reftype(ty: Type) -> bool { 635 ty == types::R64 || ty == types::R32 636 } 637 638 impl<I: VCodeInst> VCode<I> { 639 /// New empty VCode. 640 fn new( 641 sigs: SigSet, 642 abi: Callee<I::ABIMachineSpec>, 643 emit_info: I::Info, 644 block_order: BlockLoweringOrder, 645 constants: VCodeConstants, 646 ) -> VCode<I> { 647 let n_blocks = block_order.lowered_order().len(); 648 VCode { 649 sigs, 650 vreg_types: vec![], 651 insts: Vec::with_capacity(10 * n_blocks), 652 operands: Vec::with_capacity(30 * n_blocks), 653 operand_ranges: Vec::with_capacity(10 * n_blocks), 654 clobbers: FxHashMap::default(), 655 is_move: FxHashMap::default(), 656 srclocs: Vec::with_capacity(10 * n_blocks), 657 entry: BlockIndex::new(0), 658 block_ranges: Vec::with_capacity(n_blocks), 659 block_succ_range: Vec::with_capacity(n_blocks), 660 block_succs_preds: Vec::with_capacity(2 * n_blocks), 661 block_pred_range: Vec::with_capacity(n_blocks), 662 block_params_range: Vec::with_capacity(n_blocks), 663 block_params: Vec::with_capacity(5 * n_blocks), 664 branch_block_args: Vec::with_capacity(10 * n_blocks), 665 branch_block_arg_range: Vec::with_capacity(2 * n_blocks), 666 branch_block_arg_succ_range: Vec::with_capacity(n_blocks), 667 block_order, 668 abi, 669 emit_info, 670 reftyped_vregs: vec![], 671 constants, 672 debug_value_labels: vec![], 673 vreg_aliases: FxHashMap::with_capacity_and_hasher(10 * n_blocks, Default::default()), 674 } 675 } 676 677 /// Get the number of blocks. Block indices will be in the range `0 .. 678 /// (self.num_blocks() - 1)`. 679 pub fn num_blocks(&self) -> usize { 680 self.block_ranges.len() 681 } 682 683 /// The number of lowered instructions. 684 pub fn num_insts(&self) -> usize { 685 self.insts.len() 686 } 687 688 /// Get the successors for a block. 689 pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] { 690 let (start, end) = self.block_succ_range[block.index()]; 691 &self.block_succs_preds[start as usize..end as usize] 692 } 693 694 fn compute_clobbers(&self, regalloc: ®alloc2::Output) -> Vec<Writable<RealReg>> { 695 // Compute clobbered registers. 696 let mut clobbered = vec![]; 697 let mut clobbered_set = FxHashSet::default(); 698 699 // All moves are included in clobbers. 700 for edit in ®alloc.edits { 701 let Edit::Move { to, .. } = edit.1; 702 if let Some(preg) = to.as_reg() { 703 let reg = RealReg::from(preg); 704 if clobbered_set.insert(reg) { 705 clobbered.push(Writable::from_reg(reg)); 706 } 707 } 708 } 709 710 for (i, (start, end)) in self.operand_ranges.iter().enumerate() { 711 // Skip this instruction if not "included in clobbers" as 712 // per the MachInst. (Some backends use this to implement 713 // ABI specifics; e.g., excluding calls of the same ABI as 714 // the current function from clobbers, because by 715 // definition everything clobbered by the call can be 716 // clobbered by this function without saving as well.) 717 if !self.insts[i].is_included_in_clobbers() { 718 continue; 719 } 720 721 let start = *start as usize; 722 let end = *end as usize; 723 let operands = &self.operands[start..end]; 724 let allocs = ®alloc.allocs[start..end]; 725 for (operand, alloc) in operands.iter().zip(allocs.iter()) { 726 // We're interested only in writes (Mods or Defs). 727 if operand.kind() == OperandKind::Use { 728 continue; 729 } 730 if let Some(preg) = alloc.as_reg() { 731 let reg = RealReg::from(preg); 732 if clobbered_set.insert(reg) { 733 clobbered.push(Writable::from_reg(reg)); 734 } 735 } 736 } 737 738 // Also add explicitly-clobbered registers. 739 for preg in self 740 .clobbers 741 .get(&InsnIndex::new(i)) 742 .cloned() 743 .unwrap_or_default() 744 { 745 let reg = RealReg::from(preg); 746 if clobbered_set.insert(reg) { 747 clobbered.push(Writable::from_reg(reg)); 748 } 749 } 750 } 751 752 clobbered 753 } 754 755 /// Emit the instructions to a `MachBuffer`, containing fixed-up 756 /// code and external reloc/trap/etc. records ready for use. Takes 757 /// the regalloc results as well. 758 /// 759 /// Returns the machine code itself, and optionally metadata 760 /// and/or a disassembly, as an `EmitResult`. The `VCode` itself 761 /// is consumed by the emission process. 762 pub fn emit( 763 mut self, 764 regalloc: ®alloc2::Output, 765 want_disasm: bool, 766 want_metadata: bool, 767 ctrl_plane: &mut ControlPlane, 768 ) -> EmitResult<I> 769 where 770 I: VCodeInst, 771 { 772 // To write into disasm string. 773 use core::fmt::Write; 774 775 let _tt = timing::vcode_emit(); 776 let mut buffer = MachBuffer::new(); 777 let mut bb_starts: Vec<Option<CodeOffset>> = vec![]; 778 779 // The first M MachLabels are reserved for block indices, the next N MachLabels for 780 // constants. 781 buffer.reserve_labels_for_blocks(self.num_blocks()); 782 buffer.reserve_labels_for_constants(&self.constants); 783 784 // Construct the final order we emit code in: cold blocks at the end. 785 let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![]; 786 let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![]; 787 for block in 0..self.num_blocks() { 788 let block = BlockIndex::new(block); 789 if self.block_order.is_cold(block) { 790 cold_blocks.push(block); 791 } else { 792 final_order.push(block); 793 } 794 } 795 final_order.extend(cold_blocks.clone()); 796 797 // Compute/save info we need for the prologue: clobbers and 798 // number of spillslots. 799 // 800 // We clone `abi` here because we will mutate it as we 801 // generate the prologue and set other info, but we can't 802 // mutate `VCode`. The info it usually carries prior to 803 // setting clobbers is fairly minimal so this should be 804 // relatively cheap. 805 let clobbers = self.compute_clobbers(regalloc); 806 self.abi.set_num_spillslots(regalloc.num_spillslots); 807 self.abi.set_clobbered(clobbers); 808 809 // We need to generate the prologue in order to get the ABI 810 // object into the right state first. We'll emit it when we 811 // hit the right block below. 812 let prologue_insts = self.abi.gen_prologue(&self.sigs); 813 814 // Emit blocks. 815 let mut cur_srcloc = None; 816 let mut last_offset = None; 817 let mut inst_offsets = vec![]; 818 let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane)); 819 820 let mut disasm = String::new(); 821 822 if !self.debug_value_labels.is_empty() { 823 inst_offsets.resize(self.insts.len(), 0); 824 } 825 826 // Count edits per block ahead of time; this is needed for 827 // lookahead island emission. (We could derive it per-block 828 // with binary search in the edit list, but it's more 829 // efficient to do it in one pass here.) 830 let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![]; 831 let mut edit_idx = 0; 832 for block in 0..self.num_blocks() { 833 let end_inst = self.block_ranges[block].1; 834 let start_edit_idx = edit_idx; 835 while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst { 836 edit_idx += 1; 837 } 838 let end_edit_idx = edit_idx; 839 ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32); 840 } 841 842 let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled(); 843 844 for (block_order_idx, &block) in final_order.iter().enumerate() { 845 trace!("emitting block {:?}", block); 846 let new_offset = I::align_basic_block(buffer.cur_offset()); 847 while new_offset > buffer.cur_offset() { 848 // Pad with NOPs up to the aligned block offset. 849 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize); 850 nop.emit(&[], &mut buffer, &self.emit_info, &mut Default::default()); 851 } 852 assert_eq!(buffer.cur_offset(), new_offset); 853 854 let do_emit = |inst: &I, 855 allocs: &[Allocation], 856 disasm: &mut String, 857 buffer: &mut MachBuffer<I>, 858 state: &mut I::State| { 859 if want_disasm && !inst.is_args() { 860 let mut s = state.clone(); 861 writeln!(disasm, " {}", inst.pretty_print_inst(allocs, &mut s)).unwrap(); 862 } 863 inst.emit(allocs, buffer, &self.emit_info, state); 864 }; 865 866 // Is this the first block? Emit the prologue directly if so. 867 if block == self.entry { 868 trace!(" -> entry block"); 869 buffer.start_srcloc(Default::default()); 870 state.pre_sourceloc(Default::default()); 871 for inst in &prologue_insts { 872 do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state); 873 } 874 buffer.end_srcloc(); 875 } 876 877 // Now emit the regular block body. 878 879 buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut()); 880 881 if want_disasm { 882 writeln!(&mut disasm, "block{}:", block.index()).unwrap(); 883 } 884 885 if want_metadata { 886 // Track BB starts. If we have backed up due to MachBuffer 887 // branch opts, note that the removed blocks were removed. 888 let cur_offset = buffer.cur_offset(); 889 if last_offset.is_some() && cur_offset <= last_offset.unwrap() { 890 for i in (0..bb_starts.len()).rev() { 891 if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() { 892 break; 893 } 894 bb_starts[i] = None; 895 } 896 } 897 bb_starts.push(Some(cur_offset)); 898 last_offset = Some(cur_offset); 899 } 900 901 if let Some(block_start) = I::gen_block_start( 902 self.block_order.is_indirect_branch_target(block), 903 is_forward_edge_cfi_enabled, 904 ) { 905 do_emit(&block_start, &[], &mut disasm, &mut buffer, &mut state); 906 } 907 908 for inst_or_edit in regalloc.block_insts_and_edits(&self, block) { 909 match inst_or_edit { 910 InstOrEdit::Inst(iix) => { 911 if !self.debug_value_labels.is_empty() { 912 // If we need to produce debug info, 913 // record the offset of each instruction 914 // so that we can translate value-label 915 // ranges to machine-code offsets. 916 917 // Cold blocks violate monotonicity 918 // assumptions elsewhere (that 919 // instructions in inst-index order are in 920 // order in machine code), so we omit 921 // their offsets here. Value-label range 922 // generation below will skip empty ranges 923 // and ranges with to-offsets of zero. 924 if !self.block_order.is_cold(block) { 925 inst_offsets[iix.index()] = buffer.cur_offset(); 926 } 927 } 928 929 if self.insts[iix.index()].is_move().is_some() { 930 // Skip moves in the pre-regalloc program; 931 // all of these are incorporated by the 932 // regalloc into its unified move handling 933 // and they come out the other end, if 934 // still needed (not elided), as 935 // regalloc-inserted moves. 936 continue; 937 } 938 939 // Update the srcloc at this point in the buffer. 940 let srcloc = self.srclocs[iix.index()]; 941 if cur_srcloc != Some(srcloc) { 942 if cur_srcloc.is_some() { 943 buffer.end_srcloc(); 944 } 945 buffer.start_srcloc(srcloc); 946 cur_srcloc = Some(srcloc); 947 } 948 state.pre_sourceloc(cur_srcloc.unwrap_or_default()); 949 950 // If this is a safepoint, compute a stack map 951 // and pass it to the emit state. 952 if self.insts[iix.index()].is_safepoint() { 953 let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![]; 954 // Find the contiguous range of 955 // (progpoint, allocation) safepoint slot 956 // records in `regalloc.safepoint_slots` 957 // for this instruction index. 958 let safepoint_slots_start = regalloc 959 .safepoint_slots 960 .binary_search_by(|(progpoint, _alloc)| { 961 if progpoint.inst() >= iix { 962 std::cmp::Ordering::Greater 963 } else { 964 std::cmp::Ordering::Less 965 } 966 }) 967 .unwrap_err(); 968 969 for (_, alloc) in regalloc.safepoint_slots[safepoint_slots_start..] 970 .iter() 971 .take_while(|(progpoint, _)| progpoint.inst() == iix) 972 { 973 let slot = alloc.as_stack().unwrap(); 974 safepoint_slots.push(slot); 975 } 976 if !safepoint_slots.is_empty() { 977 let stack_map = self 978 .abi 979 .spillslots_to_stack_map(&safepoint_slots[..], &state); 980 state.pre_safepoint(stack_map); 981 } 982 } 983 984 // Get the allocations for this inst from the regalloc result. 985 let allocs = regalloc.inst_allocs(iix); 986 987 // If the instruction we are about to emit is 988 // a return, place an epilogue at this point 989 // (and don't emit the return; the actual 990 // epilogue will contain it). 991 if self.insts[iix.index()].is_term() == MachTerminator::Ret { 992 for inst in self.abi.gen_epilogue() { 993 do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state); 994 } 995 } else { 996 // Emit the instruction! 997 do_emit( 998 &self.insts[iix.index()], 999 allocs, 1000 &mut disasm, 1001 &mut buffer, 1002 &mut state, 1003 ); 1004 } 1005 } 1006 1007 InstOrEdit::Edit(Edit::Move { from, to }) => { 1008 // Create a move/spill/reload instruction and 1009 // immediately emit it. 1010 match (from.as_reg(), to.as_reg()) { 1011 (Some(from), Some(to)) => { 1012 // Reg-to-reg move. 1013 let from_rreg = Reg::from(from); 1014 let to_rreg = Writable::from_reg(Reg::from(to)); 1015 debug_assert_eq!(from.class(), to.class()); 1016 let ty = I::canonical_type_for_rc(from.class()); 1017 let mv = I::gen_move(to_rreg, from_rreg, ty); 1018 do_emit(&mv, &[], &mut disasm, &mut buffer, &mut state); 1019 } 1020 (Some(from), None) => { 1021 // Spill from register to spillslot. 1022 let to = to.as_stack().unwrap(); 1023 let from_rreg = RealReg::from(from); 1024 let spill = self.abi.gen_spill(to, from_rreg); 1025 do_emit(&spill, &[], &mut disasm, &mut buffer, &mut state); 1026 } 1027 (None, Some(to)) => { 1028 // Load from spillslot to register. 1029 let from = from.as_stack().unwrap(); 1030 let to_rreg = Writable::from_reg(RealReg::from(to)); 1031 let reload = self.abi.gen_reload(to_rreg, from); 1032 do_emit(&reload, &[], &mut disasm, &mut buffer, &mut state); 1033 } 1034 (None, None) => { 1035 panic!("regalloc2 should have eliminated stack-to-stack moves!"); 1036 } 1037 } 1038 } 1039 } 1040 } 1041 1042 if cur_srcloc.is_some() { 1043 buffer.end_srcloc(); 1044 cur_srcloc = None; 1045 } 1046 1047 // Do we need an island? Get the worst-case size of the 1048 // next BB and see if, having emitted that many bytes, we 1049 // will be beyond the deadline. 1050 if block_order_idx < final_order.len() - 1 { 1051 let next_block = final_order[block_order_idx + 1]; 1052 let next_block_range = self.block_ranges[next_block.index()]; 1053 let next_block_size = 1054 (next_block_range.1.index() - next_block_range.0.index()) as u32; 1055 let next_block_ra_insertions = ra_edits_per_block[next_block.index()]; 1056 let worst_case_next_bb = 1057 I::worst_case_size() * (next_block_size + next_block_ra_insertions); 1058 if buffer.island_needed(worst_case_next_bb) { 1059 buffer.emit_island(worst_case_next_bb, ctrl_plane); 1060 } 1061 } 1062 } 1063 1064 // emission state is not needed anymore, move control plane back out 1065 *ctrl_plane = state.take_ctrl_plane(); 1066 1067 // Emit the constants used by the function. 1068 let mut alignment = 1; 1069 for (constant, data) in self.constants.iter() { 1070 alignment = data.alignment().max(alignment); 1071 1072 let label = buffer.get_label_for_constant(constant); 1073 buffer.defer_constant(label, data.alignment(), data.as_slice(), u32::max_value()); 1074 } 1075 1076 let func_body_len = buffer.cur_offset(); 1077 1078 // Create `bb_edges` and final (filtered) `bb_starts`. 1079 let mut bb_edges = vec![]; 1080 let mut bb_offsets = vec![]; 1081 if want_metadata { 1082 for block in 0..self.num_blocks() { 1083 if bb_starts[block].is_none() { 1084 // Block was deleted by MachBuffer; skip. 1085 continue; 1086 } 1087 let from = bb_starts[block].unwrap(); 1088 1089 bb_offsets.push(from); 1090 // Resolve each `succ` label and add edges. 1091 let succs = self.block_succs(BlockIndex::new(block)); 1092 for &succ in succs.iter() { 1093 let to = buffer.resolve_label_offset(MachLabel::from_block(succ)); 1094 bb_edges.push((from, to)); 1095 } 1096 } 1097 } 1098 1099 let value_labels_ranges = 1100 self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len); 1101 let frame_size = self.abi.frame_size(); 1102 1103 EmitResult { 1104 buffer, 1105 bb_offsets, 1106 bb_edges, 1107 inst_offsets, 1108 func_body_len, 1109 disasm: if want_disasm { Some(disasm) } else { None }, 1110 sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(), 1111 dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(), 1112 value_labels_ranges, 1113 frame_size, 1114 alignment, 1115 } 1116 } 1117 1118 fn compute_value_labels_ranges( 1119 &self, 1120 regalloc: ®alloc2::Output, 1121 inst_offsets: &[CodeOffset], 1122 func_body_len: u32, 1123 ) -> ValueLabelsRanges { 1124 if self.debug_value_labels.is_empty() { 1125 return ValueLabelsRanges::default(); 1126 } 1127 1128 let mut value_labels_ranges: ValueLabelsRanges = HashMap::new(); 1129 for &(label, from, to, alloc) in ®alloc.debug_locations { 1130 let ranges = value_labels_ranges 1131 .entry(ValueLabel::from_u32(label)) 1132 .or_insert_with(|| vec![]); 1133 let from_offset = inst_offsets[from.inst().index()]; 1134 let to_offset = if to.inst().index() == inst_offsets.len() { 1135 func_body_len 1136 } else { 1137 inst_offsets[to.inst().index()] 1138 }; 1139 1140 // Empty range or to-offset of zero can happen because of 1141 // cold blocks (see above). 1142 if to_offset == 0 || from_offset == to_offset { 1143 continue; 1144 } 1145 1146 let loc = if let Some(preg) = alloc.as_reg() { 1147 LabelValueLoc::Reg(Reg::from(preg)) 1148 } else { 1149 // We can't translate spillslot locations at the 1150 // moment because ValueLabelLoc requires an 1151 // instantaneous SP offset, and this can *change* 1152 // within the range we have here because of callsites 1153 // adjusting SP temporarily. To avoid the complexity 1154 // of accurately plumbing through nominal-SP 1155 // adjustment sites, we just omit debug info for 1156 // values that are spilled. Not ideal, but debug info 1157 // is best-effort. 1158 continue; 1159 }; 1160 1161 ranges.push(ValueLocRange { 1162 loc, 1163 // ValueLocRanges are recorded by *instruction-end 1164 // offset*. `from_offset` is the *start* of the 1165 // instruction; that is the same as the end of another 1166 // instruction, so we only want to begin coverage once 1167 // we are past the previous instruction's end. 1168 start: from_offset + 1, 1169 // Likewise, `end` is exclusive, but we want to 1170 // *include* the end of the last 1171 // instruction. `to_offset` is the start of the 1172 // `to`-instruction, which is the exclusive end, i.e., 1173 // the first instruction not covered. That 1174 // instruction's start is the same as the end of the 1175 // last instruction that is included, so we go one 1176 // byte further to be sure to include it. 1177 end: to_offset + 1, 1178 }); 1179 } 1180 1181 value_labels_ranges 1182 } 1183 1184 /// Get the IR block for a BlockIndex, if one exists. 1185 pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> { 1186 self.block_order.lowered_order()[block.index()].orig_block() 1187 } 1188 1189 #[inline] 1190 fn assert_no_vreg_aliases<'a>(&self, list: &'a [VReg]) -> &'a [VReg] { 1191 for vreg in list { 1192 self.assert_not_vreg_alias(*vreg); 1193 } 1194 list 1195 } 1196 1197 #[inline] 1198 fn assert_not_vreg_alias(&self, vreg: VReg) -> VReg { 1199 debug_assert!(VCodeBuilder::<I>::resolve_vreg_alias_impl(&self.vreg_aliases, vreg) == vreg); 1200 vreg 1201 } 1202 1203 #[inline] 1204 fn assert_operand_not_vreg_alias(&self, op: Operand) -> Operand { 1205 // It should be true by construction that `Operand`s do not contain any 1206 // aliased vregs since they're all collected and mapped when the VCode 1207 // is itself constructed. 1208 self.assert_not_vreg_alias(op.vreg()); 1209 op 1210 } 1211 } 1212 1213 impl<I: VCodeInst> RegallocFunction for VCode<I> { 1214 fn num_insts(&self) -> usize { 1215 self.insts.len() 1216 } 1217 1218 fn num_blocks(&self) -> usize { 1219 self.block_ranges.len() 1220 } 1221 1222 fn entry_block(&self) -> BlockIndex { 1223 self.entry 1224 } 1225 1226 fn block_insns(&self, block: BlockIndex) -> InstRange { 1227 let (start, end) = self.block_ranges[block.index()]; 1228 InstRange::forward(start, end) 1229 } 1230 1231 fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] { 1232 let (start, end) = self.block_succ_range[block.index()]; 1233 &self.block_succs_preds[start as usize..end as usize] 1234 } 1235 1236 fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] { 1237 let (start, end) = self.block_pred_range[block.index()]; 1238 &self.block_succs_preds[start as usize..end as usize] 1239 } 1240 1241 fn block_params(&self, block: BlockIndex) -> &[VReg] { 1242 // As a special case we don't return block params for the entry block, as all the arguments 1243 // will be defined by the `Inst::Args` instruction. 1244 if block == self.entry { 1245 return &[]; 1246 } 1247 1248 let (start, end) = self.block_params_range[block.index()]; 1249 let ret = &self.block_params[start as usize..end as usize]; 1250 // Currently block params are never aliased to another vreg, but 1251 // double-check just to be sure. 1252 self.assert_no_vreg_aliases(ret) 1253 } 1254 1255 fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] { 1256 let (succ_range_start, succ_range_end) = self.branch_block_arg_succ_range[block.index()]; 1257 let succ_ranges = 1258 &self.branch_block_arg_range[succ_range_start as usize..succ_range_end as usize]; 1259 let (branch_block_args_start, branch_block_args_end) = succ_ranges[succ_idx]; 1260 let ret = &self.branch_block_args 1261 [branch_block_args_start as usize..branch_block_args_end as usize]; 1262 self.assert_no_vreg_aliases(ret) 1263 } 1264 1265 fn is_ret(&self, insn: InsnIndex) -> bool { 1266 match self.insts[insn.index()].is_term() { 1267 // We treat blocks terminated by an unconditional trap like a return for regalloc. 1268 MachTerminator::None => self.insts[insn.index()].is_trap(), 1269 MachTerminator::Ret => true, 1270 _ => false, 1271 } 1272 } 1273 1274 fn is_branch(&self, insn: InsnIndex) -> bool { 1275 match self.insts[insn.index()].is_term() { 1276 MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true, 1277 _ => false, 1278 } 1279 } 1280 1281 fn requires_refs_on_stack(&self, insn: InsnIndex) -> bool { 1282 self.insts[insn.index()].is_safepoint() 1283 } 1284 1285 fn is_move(&self, insn: InsnIndex) -> Option<(Operand, Operand)> { 1286 let (a, b) = self.is_move.get(&insn)?; 1287 Some(( 1288 self.assert_operand_not_vreg_alias(*a), 1289 self.assert_operand_not_vreg_alias(*b), 1290 )) 1291 } 1292 1293 fn inst_operands(&self, insn: InsnIndex) -> &[Operand] { 1294 let (start, end) = self.operand_ranges[insn.index()]; 1295 let ret = &self.operands[start as usize..end as usize]; 1296 for op in ret { 1297 self.assert_operand_not_vreg_alias(*op); 1298 } 1299 ret 1300 } 1301 1302 fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet { 1303 self.clobbers.get(&insn).cloned().unwrap_or_default() 1304 } 1305 1306 fn num_vregs(&self) -> usize { 1307 std::cmp::max(self.vreg_types.len(), first_user_vreg_index()) 1308 } 1309 1310 fn reftype_vregs(&self) -> &[VReg] { 1311 self.assert_no_vreg_aliases(&self.reftyped_vregs[..]) 1312 } 1313 1314 fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] { 1315 // VRegs here are inserted into `debug_value_labels` after code is 1316 // generated and aliases are fully defined, so no double-check that 1317 // aliases are not lingering. 1318 for (vreg, ..) in self.debug_value_labels.iter() { 1319 self.assert_not_vreg_alias(*vreg); 1320 } 1321 &self.debug_value_labels[..] 1322 } 1323 1324 fn spillslot_size(&self, regclass: RegClass) -> usize { 1325 self.abi.get_spillslot_size(regclass) as usize 1326 } 1327 1328 fn allow_multiple_vreg_defs(&self) -> bool { 1329 // At least the s390x backend requires this, because the 1330 // `Loop` pseudo-instruction aggregates all Operands so pinned 1331 // vregs (RealRegs) may occur more than once. 1332 true 1333 } 1334 } 1335 1336 impl<I: VCodeInst> fmt::Debug for VCode<I> { 1337 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 1338 writeln!(f, "VCode {{")?; 1339 writeln!(f, " Entry block: {}", self.entry.index())?; 1340 1341 let mut state = Default::default(); 1342 1343 let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>(); 1344 alias_keys.sort_unstable(); 1345 for key in alias_keys { 1346 let dest = self.vreg_aliases.get(&key).unwrap(); 1347 writeln!(f, " {:?} := {:?}", Reg::from(key), Reg::from(*dest))?; 1348 } 1349 1350 for block in 0..self.num_blocks() { 1351 let block = BlockIndex::new(block); 1352 writeln!(f, "Block {}:", block.index())?; 1353 if let Some(bb) = self.bindex_to_bb(block) { 1354 writeln!(f, " (original IR block: {})", bb)?; 1355 } 1356 for succ in self.succs(block) { 1357 writeln!(f, " (successor: Block {})", succ.index())?; 1358 } 1359 let (start, end) = self.block_ranges[block.index()]; 1360 writeln!( 1361 f, 1362 " (instruction range: {} .. {})", 1363 start.index(), 1364 end.index() 1365 )?; 1366 for inst in start.index()..end.index() { 1367 writeln!( 1368 f, 1369 " Inst {}: {}", 1370 inst, 1371 self.insts[inst].pretty_print_inst(&[], &mut state) 1372 )?; 1373 } 1374 } 1375 1376 writeln!(f, "}}")?; 1377 Ok(()) 1378 } 1379 } 1380 1381 /// This structure manages VReg allocation during the lifetime of the VCodeBuilder. 1382 pub struct VRegAllocator<I> { 1383 /// Next virtual register number to allocate. 1384 next_vreg: usize, 1385 1386 /// VReg IR-level types. 1387 vreg_types: Vec<Type>, 1388 1389 /// A set with the same contents as `reftyped_vregs`, in order to 1390 /// avoid inserting more than once. 1391 reftyped_vregs_set: FxHashSet<VReg>, 1392 1393 /// Reference-typed `regalloc2::VReg`s. The regalloc requires 1394 /// these in a dense slice (as opposed to querying the 1395 /// reftype-status of each vreg) for efficient iteration. 1396 reftyped_vregs: Vec<VReg>, 1397 1398 /// The type of instruction that this allocator makes registers for. 1399 _inst: core::marker::PhantomData<I>, 1400 } 1401 1402 impl<I: VCodeInst> VRegAllocator<I> { 1403 /// Make a new VRegAllocator. 1404 pub fn new() -> Self { 1405 Self { 1406 next_vreg: first_user_vreg_index(), 1407 vreg_types: vec![], 1408 reftyped_vregs_set: FxHashSet::default(), 1409 reftyped_vregs: vec![], 1410 _inst: core::marker::PhantomData::default(), 1411 } 1412 } 1413 1414 /// Allocate a fresh ValueRegs. 1415 pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> { 1416 let v = self.next_vreg; 1417 let (regclasses, tys) = I::rc_for_type(ty)?; 1418 self.next_vreg += regclasses.len(); 1419 if self.next_vreg >= VReg::MAX { 1420 return Err(CodegenError::CodeTooLarge); 1421 } 1422 1423 let regs: ValueRegs<Reg> = match regclasses { 1424 &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()), 1425 &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()), 1426 // We can extend this if/when we support 32-bit targets; e.g., 1427 // an i128 on a 32-bit machine will need up to four machine regs 1428 // for a `Value`. 1429 _ => panic!("Value must reside in 1 or 2 registers"), 1430 }; 1431 for (®_ty, ®) in tys.iter().zip(regs.regs().iter()) { 1432 self.set_vreg_type(reg.to_virtual_reg().unwrap(), reg_ty); 1433 } 1434 Ok(regs) 1435 } 1436 1437 /// Set the type of this virtual register. 1438 pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) { 1439 if self.vreg_types.len() <= vreg.index() { 1440 self.vreg_types.resize(vreg.index() + 1, ir::types::INVALID); 1441 } 1442 self.vreg_types[vreg.index()] = ty; 1443 if is_reftype(ty) { 1444 let vreg: VReg = vreg.into(); 1445 if self.reftyped_vregs_set.insert(vreg) { 1446 self.reftyped_vregs.push(vreg); 1447 } 1448 } 1449 } 1450 } 1451 1452 /// This structure tracks the large constants used in VCode that will be emitted separately by the 1453 /// [MachBuffer]. 1454 /// 1455 /// First, during the lowering phase, constants are inserted using 1456 /// [VCodeConstants.insert]; an intermediate handle, [VCodeConstant], tracks what constants are 1457 /// used in this phase. Some deduplication is performed, when possible, as constant 1458 /// values are inserted. 1459 /// 1460 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the 1461 /// constants so that instructions can refer to the value's memory location. The [MachBuffer] 1462 /// then writes the constant values to the buffer. 1463 #[derive(Default)] 1464 pub struct VCodeConstants { 1465 constants: PrimaryMap<VCodeConstant, VCodeConstantData>, 1466 pool_uses: HashMap<Constant, VCodeConstant>, 1467 well_known_uses: HashMap<*const [u8], VCodeConstant>, 1468 u64s: HashMap<[u8; 8], VCodeConstant>, 1469 } 1470 impl VCodeConstants { 1471 /// Initialize the structure with the expected number of constants. 1472 pub fn with_capacity(expected_num_constants: usize) -> Self { 1473 Self { 1474 constants: PrimaryMap::with_capacity(expected_num_constants), 1475 pool_uses: HashMap::with_capacity(expected_num_constants), 1476 well_known_uses: HashMap::new(), 1477 u64s: HashMap::new(), 1478 } 1479 } 1480 1481 /// Insert a constant; using this method indicates that a constant value will be used and thus 1482 /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants 1483 /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not 1484 /// [VCodeConstantData::Generated]. 1485 pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant { 1486 match data { 1487 VCodeConstantData::Generated(_) => self.constants.push(data), 1488 VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) { 1489 None => { 1490 let vcode_constant = self.constants.push(data); 1491 self.pool_uses.insert(constant, vcode_constant); 1492 vcode_constant 1493 } 1494 Some(&vcode_constant) => vcode_constant, 1495 }, 1496 VCodeConstantData::WellKnown(data_ref) => { 1497 match self.well_known_uses.entry(data_ref as *const [u8]) { 1498 Entry::Vacant(v) => { 1499 let vcode_constant = self.constants.push(data); 1500 v.insert(vcode_constant); 1501 vcode_constant 1502 } 1503 Entry::Occupied(o) => *o.get(), 1504 } 1505 } 1506 VCodeConstantData::U64(value) => match self.u64s.entry(value) { 1507 Entry::Vacant(v) => { 1508 let vcode_constant = self.constants.push(data); 1509 v.insert(vcode_constant); 1510 vcode_constant 1511 } 1512 Entry::Occupied(o) => *o.get(), 1513 }, 1514 } 1515 } 1516 1517 /// Return the number of constants inserted. 1518 pub fn len(&self) -> usize { 1519 self.constants.len() 1520 } 1521 1522 /// Iterate over the [VCodeConstant] keys inserted in this structure. 1523 pub fn keys(&self) -> Keys<VCodeConstant> { 1524 self.constants.keys() 1525 } 1526 1527 /// Iterate over the [VCodeConstant] keys and the data (as a byte slice) inserted in this 1528 /// structure. 1529 pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> { 1530 self.constants.iter() 1531 } 1532 } 1533 1534 /// A use of a constant by one or more VCode instructions; see [VCodeConstants]. 1535 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 1536 pub struct VCodeConstant(u32); 1537 entity_impl!(VCodeConstant); 1538 1539 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking 1540 /// these separately instead of as raw byte buffers allows us to avoid some duplication. 1541 pub enum VCodeConstantData { 1542 /// A constant already present in the Cranelift IR 1543 /// [ConstantPool](crate::ir::constant::ConstantPool). 1544 Pool(Constant, ConstantData), 1545 /// A reference to a well-known constant value that is statically encoded within the compiler. 1546 WellKnown(&'static [u8]), 1547 /// A constant value generated during lowering; the value may depend on the instruction context 1548 /// which makes it difficult to de-duplicate--if possible, use other variants. 1549 Generated(ConstantData), 1550 /// A constant of at most 64 bits. These are deduplicated as 1551 /// well. Stored as a fixed-size array of `u8` so that we do not 1552 /// encounter endianness problems when cross-compiling. 1553 U64([u8; 8]), 1554 } 1555 impl VCodeConstantData { 1556 /// Retrieve the constant data as a byte slice. 1557 pub fn as_slice(&self) -> &[u8] { 1558 match self { 1559 VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(), 1560 VCodeConstantData::WellKnown(d) => d, 1561 VCodeConstantData::U64(value) => &value[..], 1562 } 1563 } 1564 1565 /// Calculate the alignment of the constant data. 1566 pub fn alignment(&self) -> u32 { 1567 if self.as_slice().len() <= 8 { 1568 8 1569 } else { 1570 16 1571 } 1572 } 1573 } 1574 1575 #[cfg(test)] 1576 mod test { 1577 use super::*; 1578 use std::mem::size_of; 1579 1580 #[test] 1581 fn size_of_constant_structs() { 1582 assert_eq!(size_of::<Constant>(), 4); 1583 assert_eq!(size_of::<VCodeConstant>(), 4); 1584 assert_eq!(size_of::<ConstantData>(), 24); 1585 assert_eq!(size_of::<VCodeConstantData>(), 32); 1586 assert_eq!( 1587 size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(), 1588 24 1589 ); 1590 // TODO The VCodeConstants structure's memory size could be further optimized. 1591 // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at 1592 // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes. 1593 } 1594 } 1595