1 //! This implements the VCode container: a CFG of Insts that have been lowered. 2 //! 3 //! VCode is virtual-register code. An instruction in VCode is almost a machine 4 //! instruction; however, its register slots can refer to virtual registers in 5 //! addition to real machine registers. 6 //! 7 //! VCode is structured with traditional basic blocks, and 8 //! each block must be terminated by an unconditional branch (one target), a 9 //! conditional branch (two targets), or a return (no targets). Note that this 10 //! slightly differs from the machine code of most ISAs: in most ISAs, a 11 //! conditional branch has one target (and the not-taken case falls through). 12 //! However, we expect that machine backends will elide branches to the following 13 //! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond / 14 //! branch-uncond pair if *both* targets are not fallthrough. This allows us to 15 //! play with layout prior to final binary emission, as well, if we want. 16 //! 17 //! See the main module comment in `mod.rs` for more details on the VCode-based 18 //! backend pipeline. 19 20 use crate::fx::FxHashMap; 21 use crate::fx::FxHashSet; 22 use crate::ir::{self, types, Constant, ConstantData, LabelValueLoc, SourceLoc, ValueLabel}; 23 use crate::machinst::*; 24 use crate::timing; 25 use crate::ValueLocRange; 26 use regalloc2::{ 27 Edit, Function as RegallocFunction, InstOrEdit, InstRange, Operand, OperandKind, PReg, PRegSet, 28 RegClass, VReg, 29 }; 30 31 use alloc::boxed::Box; 32 use alloc::vec::Vec; 33 use cranelift_entity::{entity_impl, Keys, PrimaryMap}; 34 use std::collections::hash_map::Entry; 35 use std::collections::HashMap; 36 use std::fmt; 37 38 /// Index referring to an instruction in VCode. 39 pub type InsnIndex = regalloc2::Inst; 40 41 /// Index referring to a basic block in VCode. 42 pub type BlockIndex = regalloc2::Block; 43 44 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be 45 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`. 46 pub trait VCodeInst: MachInst + MachInstEmit {} 47 impl<I: MachInst + MachInstEmit> VCodeInst for I {} 48 49 /// A function in "VCode" (virtualized-register code) form, after 50 /// lowering. This is essentially a standard CFG of basic blocks, 51 /// where each basic block consists of lowered instructions produced 52 /// by the machine-specific backend. 53 /// 54 /// Note that the VCode is immutable once produced, and is not 55 /// modified by register allocation in particular. Rather, register 56 /// allocation on the `VCode` produces a separate `regalloc2::Output` 57 /// struct, and this can be passed to `emit`. `emit` in turn does not 58 /// modify the vcode, but produces an `EmitResult`, which contains the 59 /// machine code itself, and the associated disassembly and/or 60 /// metadata as requested. 61 pub struct VCode<I: VCodeInst> { 62 /// VReg IR-level types. 63 vreg_types: Vec<Type>, 64 65 /// Do we have any ref values among our vregs? 66 have_ref_values: bool, 67 68 /// Lowered machine instructions in order corresponding to the original IR. 69 insts: Vec<I>, 70 71 /// Operands: pre-regalloc references to virtual registers with 72 /// constraints, in one flattened array. This allows the regalloc 73 /// to efficiently access all operands without requiring expensive 74 /// matches or method invocations on insts. 75 operands: Vec<Operand>, 76 77 /// Operand index ranges: for each instruction in `insts`, there 78 /// is a tuple here providing the range in `operands` for that 79 /// instruction's operands. 80 operand_ranges: Vec<(u32, u32)>, 81 82 /// Clobbers: a sparse map from instruction indices to clobber masks. 83 clobbers: FxHashMap<InsnIndex, PRegSet>, 84 85 /// Move information: for a given InsnIndex, (src, dst) operand pair. 86 is_move: FxHashMap<InsnIndex, (Operand, Operand)>, 87 88 /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is 89 /// reasonable to keep one of these per instruction.) 90 srclocs: Vec<SourceLoc>, 91 92 /// Entry block. 93 entry: BlockIndex, 94 95 /// Block instruction indices. 96 block_ranges: Vec<(InsnIndex, InsnIndex)>, 97 98 /// Block successors: index range in the `block_succs_preds` list. 99 block_succ_range: Vec<(u32, u32)>, 100 101 /// Block predecessors: index range in the `block_succs_preds` list. 102 block_pred_range: Vec<(u32, u32)>, 103 104 /// Block successor and predecessor lists, concatenated into one 105 /// Vec. The `block_succ_range` and `block_pred_range` lists of 106 /// tuples above give (start, end) ranges within this list that 107 /// correspond to each basic block's successors or predecessors, 108 /// respectively. 109 block_succs_preds: Vec<regalloc2::Block>, 110 111 /// Block parameters: index range in `block_params` below. 112 block_params_range: Vec<(u32, u32)>, 113 114 /// Block parameter lists, concatenated into one vec. The 115 /// `block_params_range` list of tuples above gives (start, end) 116 /// ranges within this list that correspond to each basic block's 117 /// blockparam vregs. 118 block_params: Vec<regalloc2::VReg>, 119 120 /// Outgoing block arguments on branch instructions, concatenated 121 /// into one list. 122 /// 123 /// Note that this is conceptually a 3D array: we have a VReg list 124 /// per block, per successor. We flatten those three dimensions 125 /// into this 1D vec, then store index ranges in two levels of 126 /// indirection. 127 /// 128 /// Indexed by the indices in `branch_block_arg_succ_range`. 129 branch_block_args: Vec<regalloc2::VReg>, 130 131 /// Array of sequences of (start, end) tuples in 132 /// `branch_block_args`, one for each successor; these sequences 133 /// for each block are concatenated. 134 /// 135 /// Indexed by the indices in `branch_block_arg_succ_range`. 136 branch_block_arg_range: Vec<(u32, u32)>, 137 138 /// For a given block, indices in `branch_block_arg_range` 139 /// corresponding to all of its successors. 140 branch_block_arg_succ_range: Vec<(u32, u32)>, 141 142 /// VReg aliases. Each key in this table is translated to its 143 /// value when gathering Operands from instructions. Aliases are 144 /// not chased transitively (we do not further look up the 145 /// translated reg to see if it is another alias). 146 /// 147 /// We use these aliases to rename an instruction's expected 148 /// result vregs to the returned vregs from lowering, which are 149 /// usually freshly-allocated temps. 150 /// 151 /// Operands and branch arguments will already have been 152 /// translated through this alias table; but it helps to make 153 /// sense of instructions when pretty-printed, for example. 154 vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>, 155 156 /// Block-order information. 157 block_order: BlockLoweringOrder, 158 159 /// ABI object. 160 abi: Box<dyn ABICallee<I = I>>, 161 162 /// Constant information used during code emission. This should be 163 /// immutable across function compilations within the same module. 164 emit_info: I::Info, 165 166 /// Reference-typed `regalloc2::VReg`s. The regalloc requires 167 /// these in a dense slice (as opposed to querying the 168 /// reftype-status of each vreg) for efficient iteration. 169 reftyped_vregs: Vec<VReg>, 170 171 /// A set with the same contents as `reftyped_vregs`, in order to 172 /// avoid inserting more than once. 173 reftyped_vregs_set: FxHashSet<VReg>, 174 175 /// Constants. 176 constants: VCodeConstants, 177 178 /// Value labels for debuginfo attached to vregs. 179 debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>, 180 } 181 182 /// The result of `VCode::emit`. Contains all information computed 183 /// during emission: actual machine code, optionally a disassembly, 184 /// and optionally metadata about the code layout. 185 pub struct EmitResult<I: VCodeInst> { 186 /// The MachBuffer containing the machine code. 187 pub buffer: MachBuffer<I>, 188 189 /// Offset of each basic block, recorded during emission. Computed 190 /// only if `debug_value_labels` is non-empty. 191 pub bb_offsets: Vec<CodeOffset>, 192 193 /// Final basic-block edges, in terms of code offsets of 194 /// bb-starts. Computed only if `debug_value_labels` is non-empty. 195 pub bb_edges: Vec<(CodeOffset, CodeOffset)>, 196 197 /// Final instruction offsets, recorded during emission. Computed 198 /// only if `debug_value_labels` is non-empty. 199 pub inst_offsets: Vec<CodeOffset>, 200 201 /// Final length of function body. 202 pub func_body_len: CodeOffset, 203 204 /// The pretty-printed disassembly, if any. This uses the same 205 /// pretty-printing for MachInsts as the pre-regalloc VCode Debug 206 /// implementation, but additionally includes the prologue and 207 /// epilogue(s), and makes use of the regalloc results. 208 pub disasm: Option<String>, 209 210 /// Offsets of stackslots. 211 pub stackslot_offsets: PrimaryMap<StackSlot, u32>, 212 213 /// Value-labels information (debug metadata). 214 pub value_labels_ranges: ValueLabelsRanges, 215 216 /// Stack frame size. 217 pub frame_size: u32, 218 } 219 220 /// A builder for a VCode function body. 221 /// 222 /// This builder has the ability to accept instructions in either 223 /// forward or reverse order, depending on the pass direction that 224 /// produces the VCode. The lowering from CLIF to VCode<MachInst> 225 /// ordinarily occurs in reverse order (in order to allow instructions 226 /// to be lowered only if used, and not merged) so a reversal will 227 /// occur at the end of lowering to ensure the VCode is in machine 228 /// order. 229 /// 230 /// If built in reverse, block and instruction indices used once the 231 /// VCode is built are relative to the final (reversed) order, not the 232 /// order of construction. Note that this means we do not know the 233 /// final block or instruction indices when building, so we do not 234 /// hand them out. (The user is assumed to know them when appending 235 /// terminator instructions with successor blocks.) 236 pub struct VCodeBuilder<I: VCodeInst> { 237 /// In-progress VCode. 238 vcode: VCode<I>, 239 240 /// In what direction is the build occuring? 241 direction: VCodeBuildDirection, 242 243 /// Index of the last block-start in the vcode. 244 block_start: usize, 245 246 /// Start of succs for the current block in the concatenated succs list. 247 succ_start: usize, 248 249 /// Start of blockparams for the current block in the concatenated 250 /// blockparams list. 251 block_params_start: usize, 252 253 /// Start of successor blockparam arg list entries in 254 /// the concatenated branch_block_arg_range list. 255 branch_block_arg_succ_start: usize, 256 257 /// Current source location. 258 cur_srcloc: SourceLoc, 259 260 /// Debug-value label in-progress map, keyed by label. For each 261 /// label, we keep disjoint ranges mapping to vregs. We'll flatten 262 /// this into (vreg, range, label) tuples when done. 263 debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>, 264 } 265 266 /// Direction in which a VCodeBuilder builds VCode. 267 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 268 pub enum VCodeBuildDirection { 269 // TODO: add `Forward` once we need it and can test it adequately. 270 /// Backward-build pass: we expect the producer to call `emit()` 271 /// with instructions in reverse program order within each block. 272 Backward, 273 } 274 275 impl<I: VCodeInst> VCodeBuilder<I> { 276 /// Create a new VCodeBuilder. 277 pub fn new( 278 abi: Box<dyn ABICallee<I = I>>, 279 emit_info: I::Info, 280 block_order: BlockLoweringOrder, 281 constants: VCodeConstants, 282 direction: VCodeBuildDirection, 283 ) -> VCodeBuilder<I> { 284 let vcode = VCode::new(abi, emit_info, block_order, constants); 285 286 VCodeBuilder { 287 vcode, 288 direction, 289 block_start: 0, 290 succ_start: 0, 291 block_params_start: 0, 292 branch_block_arg_succ_start: 0, 293 cur_srcloc: SourceLoc::default(), 294 debug_info: FxHashMap::default(), 295 } 296 } 297 298 /// Access the ABI object. 299 pub fn abi(&mut self) -> &mut dyn ABICallee<I = I> { 300 &mut *self.vcode.abi 301 } 302 303 /// Access to the BlockLoweringOrder object. 304 pub fn block_order(&self) -> &BlockLoweringOrder { 305 &self.vcode.block_order 306 } 307 308 /// Set the type of a VReg. 309 pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) { 310 if self.vcode.vreg_types.len() <= vreg.index() { 311 self.vcode 312 .vreg_types 313 .resize(vreg.index() + 1, ir::types::I8); 314 } 315 self.vcode.vreg_types[vreg.index()] = ty; 316 if is_reftype(ty) { 317 let vreg: VReg = vreg.into(); 318 if self.vcode.reftyped_vregs_set.insert(vreg) { 319 self.vcode.reftyped_vregs.push(vreg); 320 } 321 self.vcode.have_ref_values = true; 322 } 323 } 324 325 /// Get the type of a VReg. 326 pub fn get_vreg_type(&self, vreg: VirtualReg) -> Type { 327 self.vcode.vreg_types[vreg.index()] 328 } 329 330 /// Set the current block as the entry block. 331 pub fn set_entry(&mut self, block: BlockIndex) { 332 self.vcode.entry = block; 333 } 334 335 /// End the current basic block. Must be called after emitting vcode insts 336 /// for IR insts and prior to ending the function (building the VCode). 337 pub fn end_bb(&mut self) { 338 let start_idx = self.block_start; 339 let end_idx = self.vcode.insts.len(); 340 self.block_start = end_idx; 341 // Add the instruction index range to the list of blocks. 342 self.vcode 343 .block_ranges 344 .push((InsnIndex::new(start_idx), InsnIndex::new(end_idx))); 345 // End the successors list. 346 let succ_end = self.vcode.block_succs_preds.len(); 347 self.vcode 348 .block_succ_range 349 .push((self.succ_start as u32, succ_end as u32)); 350 self.succ_start = succ_end; 351 // End the blockparams list. 352 let block_params_end = self.vcode.block_params.len(); 353 self.vcode 354 .block_params_range 355 .push((self.block_params_start as u32, block_params_end as u32)); 356 self.block_params_start = block_params_end; 357 // End the branch blockparam args list. 358 let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len(); 359 self.vcode.branch_block_arg_succ_range.push(( 360 self.branch_block_arg_succ_start as u32, 361 branch_block_arg_succ_end as u32, 362 )); 363 self.branch_block_arg_succ_start = branch_block_arg_succ_end; 364 } 365 366 pub fn add_block_param(&mut self, param: VirtualReg, ty: Type) { 367 self.set_vreg_type(param, ty); 368 self.vcode.block_params.push(param.into()); 369 } 370 371 fn add_branch_args_for_succ(&mut self, args: &[Reg]) { 372 let start = self.vcode.branch_block_args.len(); 373 self.vcode 374 .branch_block_args 375 .extend(args.iter().map(|&arg| VReg::from(arg))); 376 let end = self.vcode.branch_block_args.len(); 377 self.vcode 378 .branch_block_arg_range 379 .push((start as u32, end as u32)); 380 } 381 382 /// Push an instruction for the current BB and current IR inst 383 /// within the BB. 384 pub fn push(&mut self, insn: I) { 385 self.vcode.insts.push(insn); 386 self.vcode.srclocs.push(self.cur_srcloc); 387 } 388 389 /// Add a successor block with branch args. 390 pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) { 391 self.vcode.block_succs_preds.push(block); 392 self.add_branch_args_for_succ(args); 393 } 394 395 /// Set the current source location. 396 pub fn set_srcloc(&mut self, srcloc: SourceLoc) { 397 self.cur_srcloc = srcloc; 398 } 399 400 /// Add a debug value label to a register. 401 pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) { 402 // We'll fix up labels in reverse(). Because we're generating 403 // code bottom-to-top, the liverange of the label goes *from* 404 // the last index at which was defined (or 0, which is the end 405 // of the eventual function) *to* just this instruction, and 406 // no further. 407 let inst = InsnIndex::new(self.vcode.insts.len()); 408 let labels = self.debug_info.entry(label).or_insert_with(|| vec![]); 409 let last = labels 410 .last() 411 .map(|(_start, end, _vreg)| *end) 412 .unwrap_or(InsnIndex::new(0)); 413 labels.push((last, inst, reg.into())); 414 } 415 416 pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) { 417 let from = from.into(); 418 let resolved_to = self.resolve_vreg_alias(to.into()); 419 // Disallow cycles (see below). 420 assert_ne!(resolved_to, from); 421 self.vcode.vreg_aliases.insert(from, resolved_to); 422 } 423 424 pub fn resolve_vreg_alias(&self, from: regalloc2::VReg) -> regalloc2::VReg { 425 Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, from) 426 } 427 428 fn resolve_vreg_alias_impl( 429 aliases: &FxHashMap<regalloc2::VReg, regalloc2::VReg>, 430 from: regalloc2::VReg, 431 ) -> regalloc2::VReg { 432 // We prevent cycles from existing by resolving targets of 433 // aliases eagerly before setting them. If the target resolves 434 // to the origin of the alias, then a cycle would be created 435 // and the alias is disallowed. Because of the structure of 436 // SSA code (one instruction can refer to another's defs but 437 // not vice-versa, except indirectly through 438 // phis/blockparams), cycles should not occur as we use 439 // aliases to redirect vregs to the temps that actually define 440 // them. 441 442 let mut vreg = from; 443 while let Some(to) = aliases.get(&vreg) { 444 vreg = *to; 445 } 446 vreg 447 } 448 449 /// Access the constants. 450 pub fn constants(&mut self) -> &mut VCodeConstants { 451 &mut self.vcode.constants 452 } 453 454 fn compute_preds_from_succs(&mut self) { 455 // Compute predecessors from successors. In order to gather 456 // all preds for a block into a contiguous sequence, we build 457 // a list of (succ, pred) tuples and then sort. 458 let mut succ_pred_edges: Vec<(BlockIndex, BlockIndex)> = 459 Vec::with_capacity(self.vcode.block_succs_preds.len()); 460 for (pred, &(start, end)) in self.vcode.block_succ_range.iter().enumerate() { 461 let pred = BlockIndex::new(pred); 462 for i in start..end { 463 let succ = BlockIndex::new(self.vcode.block_succs_preds[i as usize].index()); 464 succ_pred_edges.push((succ, pred)); 465 } 466 } 467 succ_pred_edges.sort_unstable(); 468 469 let mut i = 0; 470 for succ in 0..self.vcode.num_blocks() { 471 let succ = BlockIndex::new(succ); 472 let start = self.vcode.block_succs_preds.len(); 473 while i < succ_pred_edges.len() && succ_pred_edges[i].0 == succ { 474 let pred = succ_pred_edges[i].1; 475 self.vcode.block_succs_preds.push(pred); 476 i += 1; 477 } 478 let end = self.vcode.block_succs_preds.len(); 479 self.vcode.block_pred_range.push((start as u32, end as u32)); 480 } 481 } 482 483 /// Called once, when a build in Backward order is complete, to 484 /// perform the overall reversal (into final forward order) and 485 /// finalize metadata accordingly. 486 fn reverse_and_finalize(&mut self) { 487 let n_insts = self.vcode.insts.len(); 488 if n_insts == 0 { 489 return; 490 } 491 492 // Reverse the per-block and per-inst sequences. 493 self.vcode.block_ranges.reverse(); 494 // block_params_range is indexed by block (and blocks were 495 // traversed in reverse) so we reverse it; but block-param 496 // sequences in the concatenated vec can remain in reverse 497 // order (it is effectively an arena of arbitrarily-placed 498 // referenced sequences). 499 self.vcode.block_params_range.reverse(); 500 // Likewise, we reverse block_succ_range, but the block_succ 501 // concatenated array can remain as-is. 502 self.vcode.block_succ_range.reverse(); 503 self.vcode.insts.reverse(); 504 self.vcode.srclocs.reverse(); 505 // Likewise, branch_block_arg_succ_range is indexed by block 506 // so must be reversed. 507 self.vcode.branch_block_arg_succ_range.reverse(); 508 509 // To translate an instruction index *endpoint* in reversed 510 // order to forward order, compute `n_insts - i`. 511 // 512 // Why not `n_insts - 1 - i`? That would be correct to 513 // translate an individual instruction index (for ten insts 0 514 // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes 515 // 0). But for the usual inclusive-start, exclusive-end range 516 // idiom, inclusive starts become exclusive ends and 517 // vice-versa, so e.g. an (inclusive) start of 0 becomes an 518 // (exclusive) end of 10. 519 let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index()); 520 521 // Edit the block-range instruction indices. 522 for tuple in &mut self.vcode.block_ranges { 523 let (start, end) = *tuple; 524 *tuple = (translate(end), translate(start)); // Note reversed order. 525 } 526 527 // Generate debug-value labels based on per-label maps. 528 for (label, tuples) in &self.debug_info { 529 for &(start, end, vreg) in tuples { 530 let vreg = self.resolve_vreg_alias(vreg); 531 let fwd_start = translate(end); 532 let fwd_end = translate(start); 533 self.vcode 534 .debug_value_labels 535 .push((vreg, fwd_start, fwd_end, label.as_u32())); 536 } 537 } 538 539 // Now sort debug value labels by VReg, as required 540 // by regalloc2. 541 self.vcode 542 .debug_value_labels 543 .sort_unstable_by_key(|(vreg, _, _, _)| *vreg); 544 } 545 546 fn collect_operands(&mut self) { 547 for (i, insn) in self.vcode.insts.iter().enumerate() { 548 // Push operands from the instruction onto the operand list. 549 // 550 // We rename through the vreg alias table as we collect 551 // the operands. This is better than a separate post-pass 552 // over operands, because it has more cache locality: 553 // operands only need to pass through L1 once. This is 554 // also better than renaming instructions' 555 // operands/registers while lowering, because here we only 556 // need to do the `match` over the instruction to visit 557 // its register fields (which is slow, branchy code) once. 558 559 let vreg_aliases = &self.vcode.vreg_aliases; 560 let mut op_collector = OperandCollector::new(&mut self.vcode.operands, |vreg| { 561 Self::resolve_vreg_alias_impl(vreg_aliases, vreg) 562 }); 563 insn.get_operands(&mut op_collector); 564 let (ops, clobbers) = op_collector.finish(); 565 self.vcode.operand_ranges.push(ops); 566 567 if clobbers != PRegSet::default() { 568 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers); 569 } 570 571 if let Some((dst, src)) = insn.is_move() { 572 let src = Operand::reg_use(Self::resolve_vreg_alias_impl(vreg_aliases, src.into())); 573 let dst = Operand::reg_def(Self::resolve_vreg_alias_impl( 574 vreg_aliases, 575 dst.to_reg().into(), 576 )); 577 // Note that regalloc2 requires these in (src, dst) order. 578 self.vcode.is_move.insert(InsnIndex::new(i), (src, dst)); 579 } 580 } 581 582 // Translate blockparam args via the vreg aliases table as well. 583 for arg in &mut self.vcode.branch_block_args { 584 let new_arg = Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *arg); 585 log::trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg); 586 *arg = new_arg; 587 } 588 } 589 590 /// Build the final VCode. 591 pub fn build(mut self) -> VCode<I> { 592 if self.direction == VCodeBuildDirection::Backward { 593 self.reverse_and_finalize(); 594 } 595 self.collect_operands(); 596 self.compute_preds_from_succs(); 597 self.vcode.debug_value_labels.sort_unstable(); 598 self.vcode 599 } 600 } 601 602 /// Is this type a reference type? 603 fn is_reftype(ty: Type) -> bool { 604 ty == types::R64 || ty == types::R32 605 } 606 607 impl<I: VCodeInst> VCode<I> { 608 /// New empty VCode. 609 fn new( 610 abi: Box<dyn ABICallee<I = I>>, 611 emit_info: I::Info, 612 block_order: BlockLoweringOrder, 613 constants: VCodeConstants, 614 ) -> VCode<I> { 615 let n_blocks = block_order.lowered_order().len(); 616 VCode { 617 vreg_types: vec![], 618 have_ref_values: false, 619 insts: Vec::with_capacity(10 * n_blocks), 620 operands: Vec::with_capacity(30 * n_blocks), 621 operand_ranges: Vec::with_capacity(10 * n_blocks), 622 clobbers: FxHashMap::default(), 623 is_move: FxHashMap::default(), 624 srclocs: Vec::with_capacity(10 * n_blocks), 625 entry: BlockIndex::new(0), 626 block_ranges: Vec::with_capacity(n_blocks), 627 block_succ_range: Vec::with_capacity(n_blocks), 628 block_succs_preds: Vec::with_capacity(2 * n_blocks), 629 block_pred_range: Vec::with_capacity(n_blocks), 630 block_params_range: Vec::with_capacity(n_blocks), 631 block_params: Vec::with_capacity(5 * n_blocks), 632 branch_block_args: Vec::with_capacity(10 * n_blocks), 633 branch_block_arg_range: Vec::with_capacity(2 * n_blocks), 634 branch_block_arg_succ_range: Vec::with_capacity(n_blocks), 635 block_order, 636 abi, 637 emit_info, 638 reftyped_vregs: vec![], 639 reftyped_vregs_set: FxHashSet::default(), 640 constants, 641 debug_value_labels: vec![], 642 vreg_aliases: FxHashMap::with_capacity_and_hasher(10 * n_blocks, Default::default()), 643 } 644 } 645 646 /// Get the number of blocks. Block indices will be in the range `0 .. 647 /// (self.num_blocks() - 1)`. 648 pub fn num_blocks(&self) -> usize { 649 self.block_ranges.len() 650 } 651 652 /// Get the successors for a block. 653 pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] { 654 let (start, end) = self.block_succ_range[block.index()]; 655 &self.block_succs_preds[start as usize..end as usize] 656 } 657 658 fn compute_clobbers(&self, regalloc: ®alloc2::Output) -> Vec<Writable<RealReg>> { 659 // Compute clobbered registers. 660 let mut clobbered = vec![]; 661 let mut clobbered_set = FxHashSet::default(); 662 663 // All moves are included in clobbers. 664 for edit in ®alloc.edits { 665 let Edit::Move { to, .. } = edit.1; 666 if let Some(preg) = to.as_reg() { 667 let reg = RealReg::from(preg); 668 if clobbered_set.insert(reg) { 669 clobbered.push(Writable::from_reg(reg)); 670 } 671 } 672 } 673 674 for (i, (start, end)) in self.operand_ranges.iter().enumerate() { 675 // Skip this instruction if not "included in clobbers" as 676 // per the MachInst. (Some backends use this to implement 677 // ABI specifics; e.g., excluding calls of the same ABI as 678 // the current function from clobbers, because by 679 // definition everything clobbered by the call can be 680 // clobbered by this function without saving as well.) 681 if !self.insts[i].is_included_in_clobbers() { 682 continue; 683 } 684 685 let start = *start as usize; 686 let end = *end as usize; 687 let operands = &self.operands[start..end]; 688 let allocs = ®alloc.allocs[start..end]; 689 for (operand, alloc) in operands.iter().zip(allocs.iter()) { 690 // We're interested only in writes (Mods or Defs). 691 if operand.kind() == OperandKind::Use { 692 continue; 693 } 694 if let Some(preg) = alloc.as_reg() { 695 let reg = RealReg::from(preg); 696 if clobbered_set.insert(reg) { 697 clobbered.push(Writable::from_reg(reg)); 698 } 699 } 700 } 701 702 // Also add explicitly-clobbered registers. 703 for preg in self 704 .clobbers 705 .get(&InsnIndex::new(i)) 706 .cloned() 707 .unwrap_or_default() 708 { 709 let reg = RealReg::from(preg); 710 if clobbered_set.insert(reg) { 711 clobbered.push(Writable::from_reg(reg)); 712 } 713 } 714 } 715 716 clobbered 717 } 718 719 /// Emit the instructions to a `MachBuffer`, containing fixed-up 720 /// code and external reloc/trap/etc. records ready for use. Takes 721 /// the regalloc results as well. 722 /// 723 /// Returns the machine code itself, and optionally metadata 724 /// and/or a disassembly, as an `EmitResult`. The `VCode` itself 725 /// is consumed by the emission process. 726 pub fn emit( 727 mut self, 728 regalloc: ®alloc2::Output, 729 want_disasm: bool, 730 want_metadata: bool, 731 ) -> EmitResult<I> 732 where 733 I: MachInstEmit, 734 { 735 // To write into disasm string. 736 use core::fmt::Write; 737 738 let _tt = timing::vcode_emit(); 739 let mut buffer = MachBuffer::new(); 740 let mut bb_starts: Vec<Option<CodeOffset>> = vec![]; 741 742 // The first M MachLabels are reserved for block indices, the next N MachLabels for 743 // constants. 744 buffer.reserve_labels_for_blocks(self.num_blocks()); 745 buffer.reserve_labels_for_constants(&self.constants); 746 747 // Construct the final order we emit code in: cold blocks at the end. 748 let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![]; 749 let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![]; 750 for block in 0..self.num_blocks() { 751 let block = BlockIndex::new(block); 752 if self.block_order.is_cold(block) { 753 cold_blocks.push(block); 754 } else { 755 final_order.push(block); 756 } 757 } 758 final_order.extend(cold_blocks.clone()); 759 760 // Compute/save info we need for the prologue: clobbers and 761 // number of spillslots. 762 // 763 // We clone `abi` here because we will mutate it as we 764 // generate the prologue and set other info, but we can't 765 // mutate `VCode`. The info it usually carries prior to 766 // setting clobbers is fairly minimal so this should be 767 // relatively cheap. 768 let clobbers = self.compute_clobbers(regalloc); 769 self.abi.set_num_spillslots(regalloc.num_spillslots); 770 self.abi.set_clobbered(clobbers); 771 772 // We need to generate the prologue in order to get the ABI 773 // object into the right state first. We'll emit it when we 774 // hit the right block below. 775 let prologue_insts = self.abi.gen_prologue(); 776 777 // Emit blocks. 778 let mut cur_srcloc = None; 779 let mut last_offset = None; 780 let mut inst_offsets = vec![]; 781 let mut state = I::State::new(&*self.abi); 782 783 let mut disasm = String::new(); 784 785 if !self.debug_value_labels.is_empty() { 786 inst_offsets.resize(self.insts.len(), 0); 787 } 788 789 for block in final_order { 790 log::trace!("emitting block {:?}", block); 791 let new_offset = I::align_basic_block(buffer.cur_offset()); 792 while new_offset > buffer.cur_offset() { 793 // Pad with NOPs up to the aligned block offset. 794 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize); 795 nop.emit(&[], &mut buffer, &self.emit_info, &mut Default::default()); 796 } 797 assert_eq!(buffer.cur_offset(), new_offset); 798 799 let do_emit = |inst: &I, 800 allocs: &[Allocation], 801 disasm: &mut String, 802 buffer: &mut MachBuffer<I>, 803 state: &mut I::State| { 804 if want_disasm { 805 let mut s = state.clone(); 806 writeln!(disasm, " {}", inst.pretty_print_inst(allocs, &mut s)).unwrap(); 807 } 808 inst.emit(allocs, buffer, &self.emit_info, state); 809 }; 810 811 // Is this the first block? Emit the prologue directly if so. 812 if block == self.entry { 813 log::trace!(" -> entry block"); 814 buffer.start_srcloc(SourceLoc::default()); 815 state.pre_sourceloc(SourceLoc::default()); 816 for inst in &prologue_insts { 817 do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state); 818 } 819 buffer.end_srcloc(); 820 } 821 822 // Now emit the regular block body. 823 824 buffer.bind_label(MachLabel::from_block(block)); 825 826 if want_disasm { 827 writeln!(&mut disasm, "block{}:", block.index()).unwrap(); 828 } 829 830 if want_metadata { 831 // Track BB starts. If we have backed up due to MachBuffer 832 // branch opts, note that the removed blocks were removed. 833 let cur_offset = buffer.cur_offset(); 834 if last_offset.is_some() && cur_offset <= last_offset.unwrap() { 835 for i in (0..bb_starts.len()).rev() { 836 if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() { 837 break; 838 } 839 bb_starts[i] = None; 840 } 841 } 842 bb_starts.push(Some(cur_offset)); 843 last_offset = Some(cur_offset); 844 } 845 846 for inst_or_edit in regalloc.block_insts_and_edits(&self, block) { 847 match inst_or_edit { 848 InstOrEdit::Inst(iix) => { 849 if !self.debug_value_labels.is_empty() { 850 // If we need to produce debug info, 851 // record the offset of each instruction 852 // so that we can translate value-label 853 // ranges to machine-code offsets. 854 855 // Cold blocks violate monotonicity 856 // assumptions elsewhere (that 857 // instructions in inst-index order are in 858 // order in machine code), so we omit 859 // their offsets here. Value-label range 860 // generation below will skip empty ranges 861 // and ranges with to-offsets of zero. 862 if !self.block_order.is_cold(block) { 863 inst_offsets[iix.index()] = buffer.cur_offset(); 864 } 865 } 866 867 if self.insts[iix.index()].is_move().is_some() { 868 // Skip moves in the pre-regalloc program; 869 // all of these are incorporated by the 870 // regalloc into its unified move handling 871 // and they come out the other end, if 872 // still needed (not elided), as 873 // regalloc-inserted moves. 874 continue; 875 } 876 877 // Update the srcloc at this point in the buffer. 878 let srcloc = self.srclocs[iix.index()]; 879 if cur_srcloc != Some(srcloc) { 880 if cur_srcloc.is_some() { 881 buffer.end_srcloc(); 882 } 883 buffer.start_srcloc(srcloc); 884 cur_srcloc = Some(srcloc); 885 } 886 state.pre_sourceloc(cur_srcloc.unwrap_or(SourceLoc::default())); 887 888 // If this is a safepoint, compute a stack map 889 // and pass it to the emit state. 890 if self.insts[iix.index()].is_safepoint() { 891 let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![]; 892 // Find the contiguous range of 893 // (progpoint, allocation) safepoint slot 894 // records in `regalloc.safepoint_slots` 895 // for this instruction index. 896 let safepoint_slots_start = regalloc 897 .safepoint_slots 898 .binary_search_by(|(progpoint, _alloc)| { 899 if progpoint.inst() >= iix { 900 std::cmp::Ordering::Greater 901 } else { 902 std::cmp::Ordering::Less 903 } 904 }) 905 .unwrap_err(); 906 907 for (_, alloc) in regalloc.safepoint_slots[safepoint_slots_start..] 908 .iter() 909 .take_while(|(progpoint, _)| progpoint.inst() == iix) 910 { 911 let slot = alloc.as_stack().unwrap(); 912 safepoint_slots.push(slot); 913 } 914 if !safepoint_slots.is_empty() { 915 let stack_map = self 916 .abi 917 .spillslots_to_stack_map(&safepoint_slots[..], &state); 918 state.pre_safepoint(stack_map); 919 } 920 } 921 922 // Get the allocations for this inst from the regalloc result. 923 let allocs = regalloc.inst_allocs(iix); 924 925 // If the instruction we are about to emit is 926 // a return, place an epilogue at this point 927 // (and don't emit the return; the actual 928 // epilogue will contain it). 929 if self.insts[iix.index()].is_term() == MachTerminator::Ret { 930 for inst in self.abi.gen_epilogue() { 931 do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state); 932 } 933 } else { 934 // Emit the instruction! 935 do_emit( 936 &self.insts[iix.index()], 937 allocs, 938 &mut disasm, 939 &mut buffer, 940 &mut state, 941 ); 942 } 943 } 944 945 InstOrEdit::Edit(Edit::Move { from, to }) => { 946 // Create a move/spill/reload instruction and 947 // immediately emit it. 948 match (from.as_reg(), to.as_reg()) { 949 (Some(from), Some(to)) => { 950 // Reg-to-reg move. 951 let from_rreg = Reg::from(from); 952 let to_rreg = Writable::from_reg(Reg::from(to)); 953 debug_assert_eq!(from.class(), to.class()); 954 let ty = I::canonical_type_for_rc(from.class()); 955 let mv = I::gen_move(to_rreg, from_rreg, ty); 956 do_emit(&mv, &[], &mut disasm, &mut buffer, &mut state); 957 } 958 (Some(from), None) => { 959 // Spill from register to spillslot. 960 let to = to.as_stack().unwrap(); 961 let from_rreg = RealReg::from(from); 962 debug_assert_eq!(from.class(), to.class()); 963 let spill = self.abi.gen_spill(to, from_rreg); 964 do_emit(&spill, &[], &mut disasm, &mut buffer, &mut state); 965 } 966 (None, Some(to)) => { 967 // Load from spillslot to register. 968 let from = from.as_stack().unwrap(); 969 let to_rreg = Writable::from_reg(RealReg::from(to)); 970 debug_assert_eq!(from.class(), to.class()); 971 let reload = self.abi.gen_reload(to_rreg, from); 972 do_emit(&reload, &[], &mut disasm, &mut buffer, &mut state); 973 } 974 (None, None) => { 975 panic!("regalloc2 should have eliminated stack-to-stack moves!"); 976 } 977 } 978 } 979 } 980 } 981 982 if cur_srcloc.is_some() { 983 buffer.end_srcloc(); 984 cur_srcloc = None; 985 } 986 987 // Do we need an island? Get the worst-case size of the 988 // next BB and see if, having emitted that many bytes, we 989 // will be beyond the deadline. 990 if block.index() < (self.num_blocks() - 1) { 991 let next_block = block.index() + 1; 992 let next_block_range = self.block_ranges[next_block]; 993 let next_block_size = next_block_range.1.index() - next_block_range.0.index(); 994 let worst_case_next_bb = I::worst_case_size() * next_block_size as u32; 995 if buffer.island_needed(worst_case_next_bb) { 996 buffer.emit_island(worst_case_next_bb); 997 } 998 } 999 } 1000 1001 // Emit the constants used by the function. 1002 for (constant, data) in self.constants.iter() { 1003 let label = buffer.get_label_for_constant(constant); 1004 buffer.defer_constant(label, data.alignment(), data.as_slice(), u32::max_value()); 1005 } 1006 1007 let func_body_len = buffer.cur_offset(); 1008 1009 // Create `bb_edges` and final (filtered) `bb_starts`. 1010 let mut bb_edges = vec![]; 1011 let mut bb_offsets = vec![]; 1012 if want_metadata { 1013 for block in 0..self.num_blocks() { 1014 if bb_starts[block].is_none() { 1015 // Block was deleted by MachBuffer; skip. 1016 continue; 1017 } 1018 let from = bb_starts[block].unwrap(); 1019 1020 bb_offsets.push(from); 1021 // Resolve each `succ` label and add edges. 1022 let succs = self.block_succs(BlockIndex::new(block)); 1023 for &succ in succs.iter() { 1024 let to = buffer.resolve_label_offset(MachLabel::from_block(succ)); 1025 bb_edges.push((from, to)); 1026 } 1027 } 1028 } 1029 1030 let value_labels_ranges = 1031 self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len); 1032 let frame_size = self.abi.frame_size(); 1033 1034 EmitResult { 1035 buffer, 1036 bb_offsets, 1037 bb_edges, 1038 inst_offsets, 1039 func_body_len, 1040 disasm: if want_disasm { Some(disasm) } else { None }, 1041 stackslot_offsets: self.abi.stackslot_offsets().clone(), 1042 value_labels_ranges, 1043 frame_size, 1044 } 1045 } 1046 1047 fn compute_value_labels_ranges( 1048 &self, 1049 regalloc: ®alloc2::Output, 1050 inst_offsets: &[CodeOffset], 1051 func_body_len: u32, 1052 ) -> ValueLabelsRanges { 1053 if self.debug_value_labels.is_empty() { 1054 return ValueLabelsRanges::default(); 1055 } 1056 1057 let mut value_labels_ranges: ValueLabelsRanges = HashMap::new(); 1058 for &(label, from, to, alloc) in ®alloc.debug_locations { 1059 let ranges = value_labels_ranges 1060 .entry(ValueLabel::from_u32(label)) 1061 .or_insert_with(|| vec![]); 1062 let from_offset = inst_offsets[from.inst().index()]; 1063 let to_offset = if to.inst().index() == inst_offsets.len() { 1064 func_body_len 1065 } else { 1066 inst_offsets[to.inst().index()] 1067 }; 1068 1069 // Empty range or to-offset of zero can happen because of 1070 // cold blocks (see above). 1071 if to_offset == 0 || from_offset == to_offset { 1072 continue; 1073 } 1074 1075 let loc = if let Some(preg) = alloc.as_reg() { 1076 LabelValueLoc::Reg(Reg::from(preg)) 1077 } else { 1078 // We can't translate spillslot locations at the 1079 // moment because ValueLabelLoc requires an 1080 // instantaneous SP offset, and this can *change* 1081 // within the range we have here because of callsites 1082 // adjusting SP temporarily. To avoid the complexity 1083 // of accurately plumbing through nominal-SP 1084 // adjustment sites, we just omit debug info for 1085 // values that are spilled. Not ideal, but debug info 1086 // is best-effort. 1087 continue; 1088 }; 1089 1090 ranges.push(ValueLocRange { 1091 loc, 1092 // ValueLocRanges are recorded by *instruction-end 1093 // offset*. `from_offset` is the *start* of the 1094 // instruction; that is the same as the end of another 1095 // instruction, so we only want to begin coverage once 1096 // we are past the previous instruction's end. 1097 start: from_offset + 1, 1098 // Likewise, `end` is exclusive, but we want to 1099 // *include* the end of the last 1100 // instruction. `to_offset` is the start of the 1101 // `to`-instruction, which is the exclusive end, i.e., 1102 // the first instruction not covered. That 1103 // instruction's start is the same as the end of the 1104 // last instruction that is included, so we go one 1105 // byte further to be sure to include it. 1106 end: to_offset + 1, 1107 }); 1108 } 1109 1110 value_labels_ranges 1111 } 1112 1113 /// Get the IR block for a BlockIndex, if one exists. 1114 pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> { 1115 self.block_order.lowered_order()[block.index()].orig_block() 1116 } 1117 } 1118 1119 impl<I: VCodeInst> RegallocFunction for VCode<I> { 1120 fn num_insts(&self) -> usize { 1121 self.insts.len() 1122 } 1123 1124 fn num_blocks(&self) -> usize { 1125 self.block_ranges.len() 1126 } 1127 1128 fn entry_block(&self) -> BlockIndex { 1129 self.entry 1130 } 1131 1132 fn block_insns(&self, block: BlockIndex) -> InstRange { 1133 let (start, end) = self.block_ranges[block.index()]; 1134 InstRange::forward(start, end) 1135 } 1136 1137 fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] { 1138 let (start, end) = self.block_succ_range[block.index()]; 1139 &self.block_succs_preds[start as usize..end as usize] 1140 } 1141 1142 fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] { 1143 let (start, end) = self.block_pred_range[block.index()]; 1144 &self.block_succs_preds[start as usize..end as usize] 1145 } 1146 1147 fn block_params(&self, block: BlockIndex) -> &[VReg] { 1148 let (start, end) = self.block_params_range[block.index()]; 1149 &self.block_params[start as usize..end as usize] 1150 } 1151 1152 fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] { 1153 let (succ_range_start, succ_range_end) = self.branch_block_arg_succ_range[block.index()]; 1154 let succ_ranges = 1155 &self.branch_block_arg_range[succ_range_start as usize..succ_range_end as usize]; 1156 let (branch_block_args_start, branch_block_args_end) = succ_ranges[succ_idx]; 1157 &self.branch_block_args[branch_block_args_start as usize..branch_block_args_end as usize] 1158 } 1159 1160 fn is_ret(&self, insn: InsnIndex) -> bool { 1161 match self.insts[insn.index()].is_term() { 1162 MachTerminator::Ret => true, 1163 _ => false, 1164 } 1165 } 1166 1167 fn is_branch(&self, insn: InsnIndex) -> bool { 1168 match self.insts[insn.index()].is_term() { 1169 MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true, 1170 _ => false, 1171 } 1172 } 1173 1174 fn requires_refs_on_stack(&self, insn: InsnIndex) -> bool { 1175 self.insts[insn.index()].is_safepoint() 1176 } 1177 1178 fn is_move(&self, insn: InsnIndex) -> Option<(Operand, Operand)> { 1179 self.is_move.get(&insn).cloned() 1180 } 1181 1182 fn inst_operands(&self, insn: InsnIndex) -> &[Operand] { 1183 let (start, end) = self.operand_ranges[insn.index()]; 1184 &self.operands[start as usize..end as usize] 1185 } 1186 1187 fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet { 1188 self.clobbers.get(&insn).cloned().unwrap_or_default() 1189 } 1190 1191 fn num_vregs(&self) -> usize { 1192 std::cmp::max(self.vreg_types.len(), first_user_vreg_index()) 1193 } 1194 1195 fn reftype_vregs(&self) -> &[VReg] { 1196 &self.reftyped_vregs[..] 1197 } 1198 1199 fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] { 1200 &self.debug_value_labels[..] 1201 } 1202 1203 fn is_pinned_vreg(&self, vreg: VReg) -> Option<PReg> { 1204 pinned_vreg_to_preg(vreg) 1205 } 1206 1207 fn spillslot_size(&self, regclass: RegClass) -> usize { 1208 self.abi.get_spillslot_size(regclass) as usize 1209 } 1210 1211 fn allow_multiple_vreg_defs(&self) -> bool { 1212 // At least the s390x backend requires this, because the 1213 // `Loop` pseudo-instruction aggregates all Operands so pinned 1214 // vregs (RealRegs) may occur more than once. 1215 true 1216 } 1217 } 1218 1219 impl<I: VCodeInst> fmt::Debug for VCode<I> { 1220 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 1221 writeln!(f, "VCode {{")?; 1222 writeln!(f, " Entry block: {}", self.entry.index())?; 1223 1224 let mut state = Default::default(); 1225 1226 let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>(); 1227 alias_keys.sort_unstable(); 1228 for key in alias_keys { 1229 let dest = self.vreg_aliases.get(&key).unwrap(); 1230 writeln!(f, " {:?} := {:?}", Reg::from(key), Reg::from(*dest))?; 1231 } 1232 1233 for block in 0..self.num_blocks() { 1234 let block = BlockIndex::new(block); 1235 writeln!(f, "Block {}:", block.index())?; 1236 if let Some(bb) = self.bindex_to_bb(block) { 1237 writeln!(f, " (original IR block: {})", bb)?; 1238 } 1239 for succ in self.succs(block) { 1240 writeln!(f, " (successor: Block {})", succ.index())?; 1241 } 1242 let (start, end) = self.block_ranges[block.index()]; 1243 writeln!( 1244 f, 1245 " (instruction range: {} .. {})", 1246 start.index(), 1247 end.index() 1248 )?; 1249 for inst in start.index()..end.index() { 1250 writeln!( 1251 f, 1252 " Inst {}: {}", 1253 inst, 1254 self.insts[inst].pretty_print_inst(&[], &mut state) 1255 )?; 1256 } 1257 } 1258 1259 writeln!(f, "}}")?; 1260 Ok(()) 1261 } 1262 } 1263 1264 /// This structure tracks the large constants used in VCode that will be emitted separately by the 1265 /// [MachBuffer]. 1266 /// 1267 /// First, during the lowering phase, constants are inserted using 1268 /// [VCodeConstants.insert]; an intermediate handle, [VCodeConstant], tracks what constants are 1269 /// used in this phase. Some deduplication is performed, when possible, as constant 1270 /// values are inserted. 1271 /// 1272 /// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the 1273 /// constants so that instructions can refer to the value's memory location. The [MachBuffer] 1274 /// then writes the constant values to the buffer. 1275 #[derive(Default)] 1276 pub struct VCodeConstants { 1277 constants: PrimaryMap<VCodeConstant, VCodeConstantData>, 1278 pool_uses: HashMap<Constant, VCodeConstant>, 1279 well_known_uses: HashMap<*const [u8], VCodeConstant>, 1280 u64s: HashMap<[u8; 8], VCodeConstant>, 1281 } 1282 impl VCodeConstants { 1283 /// Initialize the structure with the expected number of constants. 1284 pub fn with_capacity(expected_num_constants: usize) -> Self { 1285 Self { 1286 constants: PrimaryMap::with_capacity(expected_num_constants), 1287 pool_uses: HashMap::with_capacity(expected_num_constants), 1288 well_known_uses: HashMap::new(), 1289 u64s: HashMap::new(), 1290 } 1291 } 1292 1293 /// Insert a constant; using this method indicates that a constant value will be used and thus 1294 /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants 1295 /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not 1296 /// [VCodeConstantData::Generated]. 1297 pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant { 1298 match data { 1299 VCodeConstantData::Generated(_) => self.constants.push(data), 1300 VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) { 1301 None => { 1302 let vcode_constant = self.constants.push(data); 1303 self.pool_uses.insert(constant, vcode_constant); 1304 vcode_constant 1305 } 1306 Some(&vcode_constant) => vcode_constant, 1307 }, 1308 VCodeConstantData::WellKnown(data_ref) => { 1309 match self.well_known_uses.entry(data_ref as *const [u8]) { 1310 Entry::Vacant(v) => { 1311 let vcode_constant = self.constants.push(data); 1312 v.insert(vcode_constant); 1313 vcode_constant 1314 } 1315 Entry::Occupied(o) => *o.get(), 1316 } 1317 } 1318 VCodeConstantData::U64(value) => match self.u64s.entry(value) { 1319 Entry::Vacant(v) => { 1320 let vcode_constant = self.constants.push(data); 1321 v.insert(vcode_constant); 1322 vcode_constant 1323 } 1324 Entry::Occupied(o) => *o.get(), 1325 }, 1326 } 1327 } 1328 1329 /// Return the number of constants inserted. 1330 pub fn len(&self) -> usize { 1331 self.constants.len() 1332 } 1333 1334 /// Iterate over the [VCodeConstant] keys inserted in this structure. 1335 pub fn keys(&self) -> Keys<VCodeConstant> { 1336 self.constants.keys() 1337 } 1338 1339 /// Iterate over the [VCodeConstant] keys and the data (as a byte slice) inserted in this 1340 /// structure. 1341 pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> { 1342 self.constants.iter() 1343 } 1344 } 1345 1346 /// A use of a constant by one or more VCode instructions; see [VCodeConstants]. 1347 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 1348 pub struct VCodeConstant(u32); 1349 entity_impl!(VCodeConstant); 1350 1351 /// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking 1352 /// these separately instead of as raw byte buffers allows us to avoid some duplication. 1353 pub enum VCodeConstantData { 1354 /// A constant already present in the Cranelift IR 1355 /// [ConstantPool](crate::ir::constant::ConstantPool). 1356 Pool(Constant, ConstantData), 1357 /// A reference to a well-known constant value that is statically encoded within the compiler. 1358 WellKnown(&'static [u8]), 1359 /// A constant value generated during lowering; the value may depend on the instruction context 1360 /// which makes it difficult to de-duplicate--if possible, use other variants. 1361 Generated(ConstantData), 1362 /// A constant of at most 64 bits. These are deduplicated as 1363 /// well. Stored as a fixed-size array of `u8` so that we do not 1364 /// encounter endianness problems when cross-compiling. 1365 U64([u8; 8]), 1366 } 1367 impl VCodeConstantData { 1368 /// Retrieve the constant data as a byte slice. 1369 pub fn as_slice(&self) -> &[u8] { 1370 match self { 1371 VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(), 1372 VCodeConstantData::WellKnown(d) => d, 1373 VCodeConstantData::U64(value) => &value[..], 1374 } 1375 } 1376 1377 /// Calculate the alignment of the constant data. 1378 pub fn alignment(&self) -> u32 { 1379 if self.as_slice().len() <= 8 { 1380 8 1381 } else { 1382 16 1383 } 1384 } 1385 } 1386 1387 #[cfg(test)] 1388 mod test { 1389 use super::*; 1390 use std::mem::size_of; 1391 1392 #[test] 1393 fn size_of_constant_structs() { 1394 assert_eq!(size_of::<Constant>(), 4); 1395 assert_eq!(size_of::<VCodeConstant>(), 4); 1396 assert_eq!(size_of::<ConstantData>(), 24); 1397 assert_eq!(size_of::<VCodeConstantData>(), 32); 1398 assert_eq!( 1399 size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(), 1400 24 1401 ); 1402 // TODO The VCodeConstants structure's memory size could be further optimized. 1403 // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at 1404 // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes. 1405 } 1406 } 1407