1 //! Function inlining infrastructure. 2 //! 3 //! This module provides "inlining as a library" to Cranelift users; it does 4 //! _not_ provide a complete, off-the-shelf inlining solution. Cranelift's 5 //! compilation context is per-function and does not encompass the full call 6 //! graph. It does not know which functions are hot and which are cold, which 7 //! have been marked the equivalent of `#[inline(never)]`, etc... Only the 8 //! Cranelift user can understand these aspects of the full compilation 9 //! pipeline, and these things can be very different between (say) Wasmtime and 10 //! `cg_clif`. Therefore, this module does not attempt to define hueristics for 11 //! when inlining a particular call is likely beneficial. This module only 12 //! provides hooks for the Cranelift user to define whether a given call should 13 //! be inlined or not, and the mechanics to inline a callee into a particular 14 //! call site when directed to do so by the Cranelift user. 15 //! 16 //! The top-level inlining entry point during Cranelift compilation is 17 //! [`Context::inline`][crate::Context::inline]. It takes an [`Inline`] trait 18 //! implementation, which is authored by the Cranelift user and directs 19 //! Cranelift whether to inline a particular call, and, when inlining, gives 20 //! Cranelift the body of the callee that is to be inlined. 21 22 use crate::cursor::{Cursor as _, FuncCursor}; 23 use crate::ir::{self, DebugTag, ExceptionTableData, ExceptionTableItem, InstBuilder as _}; 24 use crate::result::CodegenResult; 25 use crate::trace; 26 use crate::traversals::Dfs; 27 use alloc::borrow::Cow; 28 use alloc::vec::Vec; 29 use cranelift_entity::{SecondaryMap, packed_option::PackedOption}; 30 use smallvec::SmallVec; 31 32 type SmallValueVec = SmallVec<[ir::Value; 8]>; 33 type SmallBlockArgVec = SmallVec<[ir::BlockArg; 8]>; 34 type SmallBlockCallVec = SmallVec<[ir::BlockCall; 8]>; 35 36 /// A command directing Cranelift whether or not to inline a particular call. 37 pub enum InlineCommand<'a> { 38 /// Keep the call as-is, out-of-line, and do not inline the callee. 39 KeepCall, 40 41 /// Inline the call, using this function as the body of the callee. 42 /// 43 /// It is the `Inline` implementor's responsibility to ensure that this 44 /// function is the correct callee. Providing the wrong function may result 45 /// in panics during compilation or incorrect runtime behavior. 46 Inline { 47 /// The callee function's body. 48 callee: Cow<'a, ir::Function>, 49 /// Whether to visit any function calls within the callee body after 50 /// inlining and consider them for further inlining. 51 visit_callee: bool, 52 }, 53 } 54 55 /// A trait for directing Cranelift whether to inline a particular call or not. 56 /// 57 /// Used in combination with the [`Context::inline`][crate::Context::inline] 58 /// method. 59 pub trait Inline { 60 /// A hook invoked for each direct call instruction in a function, whose 61 /// result determines whether Cranelift should inline a given call. 62 /// 63 /// The Cranelift user is responsible for defining their own hueristics and 64 /// deciding whether inlining the call is beneficial. 65 /// 66 /// When returning a function and directing Cranelift to inline its body 67 /// into the call site, the `Inline` implementer must ensure the following: 68 /// 69 /// * The returned function's signature exactly matches the `callee` 70 /// `FuncRef`'s signature. 71 /// 72 /// * The returned function must be legalized. 73 /// 74 /// * The returned function must be valid (i.e. it must pass the CLIF 75 /// verifier). 76 /// 77 /// * The returned function is a correct and valid implementation of the 78 /// `callee` according to your language's semantics. 79 /// 80 /// Failure to uphold these invariants may result in panics during 81 /// compilation or incorrect runtime behavior in the generated code. 82 fn inline( 83 &mut self, 84 caller: &ir::Function, 85 call_inst: ir::Inst, 86 call_opcode: ir::Opcode, 87 callee: ir::FuncRef, 88 call_args: &[ir::Value], 89 ) -> InlineCommand<'_>; 90 } 91 92 impl<'a, T> Inline for &'a mut T 93 where 94 T: Inline, 95 { 96 fn inline( 97 &mut self, 98 caller: &ir::Function, 99 inst: ir::Inst, 100 opcode: ir::Opcode, 101 callee: ir::FuncRef, 102 args: &[ir::Value], 103 ) -> InlineCommand<'_> { 104 (*self).inline(caller, inst, opcode, callee, args) 105 } 106 } 107 108 /// Walk the given function, invoke the `Inline` implementation for each call 109 /// instruction, and inline the callee when directed to do so. 110 /// 111 /// Returns whether any call was inlined. 112 pub(crate) fn do_inlining( 113 func: &mut ir::Function, 114 mut inliner: impl Inline, 115 ) -> CodegenResult<bool> { 116 trace!("function {} before inlining: {}", func.name, func); 117 118 let mut inlined_any = false; 119 let mut allocs = InliningAllocs::default(); 120 121 let mut cursor = FuncCursor::new(func); 122 'block_loop: while let Some(block) = cursor.next_block() { 123 // Always keep track of our previous cursor position. Assuming that the 124 // current position is a function call that we will inline, then the 125 // previous position is just before the inlined callee function. After 126 // inlining a call, the Cranelift user can decide whether to consider 127 // any function calls in the inlined callee for further inlining or 128 // not. When they do, then we back up to this previous cursor position 129 // so that our traversal will then continue over the inlined body. 130 let mut prev_pos; 131 132 while let Some(inst) = { 133 prev_pos = cursor.position(); 134 cursor.next_inst() 135 } { 136 // Make sure that `block` is always `inst`'s block, even with all of 137 // our cursor-position-updating and block-splitting-during-inlining 138 // shenanigans below. 139 debug_assert_eq!(Some(block), cursor.func.layout.inst_block(inst)); 140 141 match cursor.func.dfg.insts[inst] { 142 ir::InstructionData::Call { 143 opcode: opcode @ ir::Opcode::Call | opcode @ ir::Opcode::ReturnCall, 144 args: _, 145 func_ref, 146 } => { 147 trace!( 148 "considering call site for inlining: {inst}: {}", 149 cursor.func.dfg.display_inst(inst), 150 ); 151 let args = cursor.func.dfg.inst_args(inst); 152 match inliner.inline(&cursor.func, inst, opcode, func_ref, args) { 153 InlineCommand::KeepCall => { 154 trace!(" --> keeping call"); 155 } 156 InlineCommand::Inline { 157 callee, 158 visit_callee, 159 } => { 160 let last_inlined_block = inline_one( 161 &mut allocs, 162 cursor.func, 163 func_ref, 164 block, 165 inst, 166 opcode, 167 &callee, 168 None, 169 ); 170 inlined_any = true; 171 if visit_callee { 172 cursor.set_position(prev_pos); 173 } else { 174 // Arrange it so that the `next_block()` loop 175 // will continue to the next block that is not 176 // associated with the just-inlined callee. 177 cursor.goto_bottom(last_inlined_block); 178 continue 'block_loop; 179 } 180 } 181 } 182 } 183 ir::InstructionData::TryCall { 184 opcode: opcode @ ir::Opcode::TryCall, 185 args: _, 186 func_ref, 187 exception, 188 } => { 189 trace!( 190 "considering call site for inlining: {inst}: {}", 191 cursor.func.dfg.display_inst(inst), 192 ); 193 let args = cursor.func.dfg.inst_args(inst); 194 match inliner.inline(&cursor.func, inst, opcode, func_ref, args) { 195 InlineCommand::KeepCall => { 196 trace!(" --> keeping call"); 197 } 198 InlineCommand::Inline { 199 callee, 200 visit_callee, 201 } => { 202 let last_inlined_block = inline_one( 203 &mut allocs, 204 cursor.func, 205 func_ref, 206 block, 207 inst, 208 opcode, 209 &callee, 210 Some(exception), 211 ); 212 inlined_any = true; 213 if visit_callee { 214 cursor.set_position(prev_pos); 215 } else { 216 // Arrange it so that the `next_block()` loop 217 // will continue to the next block that is not 218 // associated with the just-inlined callee. 219 cursor.goto_bottom(last_inlined_block); 220 continue 'block_loop; 221 } 222 } 223 } 224 } 225 ir::InstructionData::CallIndirect { .. } 226 | ir::InstructionData::TryCallIndirect { .. } => { 227 // Can't inline indirect calls; need to have some earlier 228 // pass rewrite them into direct calls first, when possible. 229 } 230 ir::InstructionData::Call { 231 opcode: ir::Opcode::PatchableCall, 232 .. 233 } => { 234 // Can't inline patchable calls; they need to 235 // remain patchable and inlining the whole body is 236 // decidedly *not* patchable! 237 } 238 _ => { 239 debug_assert!( 240 !cursor.func.dfg.insts[inst].opcode().is_call(), 241 "should have matched all call instructions, but found: {inst}: {}", 242 cursor.func.dfg.display_inst(inst), 243 ); 244 } 245 } 246 } 247 } 248 249 if inlined_any { 250 trace!("function {} after inlining: {}", func.name, func); 251 } else { 252 trace!("function {} did not have any callees inlined", func.name); 253 } 254 255 Ok(inlined_any) 256 } 257 258 #[derive(Default)] 259 struct InliningAllocs { 260 /// Map from callee value to inlined caller value. 261 values: SecondaryMap<ir::Value, PackedOption<ir::Value>>, 262 263 /// Map from callee constant to inlined caller constant. 264 /// 265 /// Not in `EntityMap` because these are hash-consed inside the 266 /// `ir::Function`. 267 constants: SecondaryMap<ir::Constant, PackedOption<ir::Constant>>, 268 269 /// Map from callee to inlined caller external name refs. 270 /// 271 /// Not in `EntityMap` because these are hash-consed inside the 272 /// `ir::Function`. 273 user_external_name_refs: 274 SecondaryMap<ir::UserExternalNameRef, PackedOption<ir::UserExternalNameRef>>, 275 276 /// The set of _caller_ inlined call instructions that need exception table 277 /// fixups at the end of inlining. 278 /// 279 /// This includes all kinds of non-returning calls, not just the literal 280 /// `call` instruction: `call_indirect`, `try_call`, `try_call_indirect`, 281 /// etc... However, it does not include `return_call` and 282 /// `return_call_indirect` instructions because the caller cannot catch 283 /// exceptions that those calls throw because the caller is no longer on the 284 /// stack as soon as they are executed. 285 /// 286 /// Note: this is a simple `Vec`, and not an `EntitySet`, because it is very 287 /// sparse: most of the caller's instructions are not inlined call 288 /// instructions. Additionally, we require deterministic iteration order and 289 /// do not require set-membership testing, so a hash set is not a good 290 /// choice either. 291 calls_needing_exception_table_fixup: Vec<ir::Inst>, 292 } 293 294 impl InliningAllocs { 295 fn reset(&mut self, callee: &ir::Function) { 296 let InliningAllocs { 297 values, 298 constants, 299 user_external_name_refs, 300 calls_needing_exception_table_fixup, 301 } = self; 302 303 values.clear(); 304 values.resize(callee.dfg.len_values()); 305 306 constants.clear(); 307 constants.resize(callee.dfg.constants.len()); 308 309 user_external_name_refs.clear(); 310 user_external_name_refs.resize(callee.params.user_named_funcs().len()); 311 312 // Note: We do not reserve capacity for 313 // `calls_needing_exception_table_fixup` because it is a sparse set and 314 // we don't know how large it needs to be ahead of time. 315 calls_needing_exception_table_fixup.clear(); 316 } 317 318 fn set_inlined_value( 319 &mut self, 320 callee: &ir::Function, 321 callee_val: ir::Value, 322 inlined_val: ir::Value, 323 ) { 324 trace!(" --> callee {callee_val:?} = inlined {inlined_val:?}"); 325 debug_assert!(self.values[callee_val].is_none()); 326 let resolved_callee_val = callee.dfg.resolve_aliases(callee_val); 327 debug_assert!(self.values[resolved_callee_val].is_none()); 328 self.values[resolved_callee_val] = Some(inlined_val).into(); 329 } 330 331 fn get_inlined_value(&self, callee: &ir::Function, callee_val: ir::Value) -> Option<ir::Value> { 332 let resolved_callee_val = callee.dfg.resolve_aliases(callee_val); 333 self.values[resolved_callee_val].expand() 334 } 335 } 336 337 /// Inline one particular function call. 338 /// 339 /// Returns the last inlined block in the layout. 340 fn inline_one( 341 allocs: &mut InliningAllocs, 342 func: &mut ir::Function, 343 callee_func_ref: ir::FuncRef, 344 call_block: ir::Block, 345 call_inst: ir::Inst, 346 call_opcode: ir::Opcode, 347 callee: &ir::Function, 348 call_exception_table: Option<ir::ExceptionTable>, 349 ) -> ir::Block { 350 trace!( 351 "Inlining call {call_inst:?}: {}\n\ 352 with callee = {callee:?}", 353 func.dfg.display_inst(call_inst) 354 ); 355 356 // Type check callee signature. 357 let expected_callee_sig = func.dfg.ext_funcs[callee_func_ref].signature; 358 let expected_callee_sig = &func.dfg.signatures[expected_callee_sig]; 359 assert_eq!(expected_callee_sig, &callee.signature); 360 361 allocs.reset(callee); 362 363 // First, append various callee entity arenas to the end of the caller's 364 // entity arenas. 365 let entity_map = create_entities(allocs, func, callee); 366 367 // Inlined prologue: split the call instruction's block at the point of the 368 // call and replace the call with a jump. 369 let return_block = split_off_return_block(func, call_inst, call_opcode, callee); 370 let call_stack_map = replace_call_with_jump(allocs, func, call_inst, callee, &entity_map); 371 372 // Prepare for translating the actual instructions by inserting the inlined 373 // blocks into the caller's layout in the same order that they appear in the 374 // callee. 375 let mut last_inlined_block = inline_block_layout(func, call_block, callee, &entity_map); 376 377 // Get a copy of debug tags on the call instruction; these are 378 // prepended to debug tags on inlined instructions. Remove them 379 // from the call itself as it will be rewritten to a jump (which 380 // cannot have tags). 381 let call_debug_tags = func.debug_tags.get(call_inst).to_vec(); 382 func.debug_tags.set(call_inst, []); 383 384 // Translate each instruction from the callee into the caller, 385 // appending them to their associated block in the caller. 386 // 387 // Note that we iterate over the callee with a pre-order traversal so that 388 // we see value defs before uses. 389 for callee_block in Dfs::new().pre_order_iter(callee) { 390 let inlined_block = entity_map.inlined_block(callee_block); 391 trace!( 392 "Processing instructions in callee block {callee_block:?} (inlined block {inlined_block:?}" 393 ); 394 395 let mut next_callee_inst = callee.layout.first_inst(callee_block); 396 while let Some(callee_inst) = next_callee_inst { 397 trace!( 398 "Processing callee instruction {callee_inst:?}: {}", 399 callee.dfg.display_inst(callee_inst) 400 ); 401 402 assert_ne!( 403 callee.dfg.insts[callee_inst].opcode(), 404 ir::Opcode::GlobalValue, 405 "callee must already be legalized, we shouldn't see any `global_value` \ 406 instructions when inlining; found {callee_inst:?}: {}", 407 callee.dfg.display_inst(callee_inst) 408 ); 409 410 // Remap the callee instruction's entities and insert it into the 411 // caller's DFG. 412 let inlined_inst_data = callee.dfg.insts[callee_inst].map(InliningInstRemapper { 413 allocs: &allocs, 414 func, 415 callee, 416 entity_map: &entity_map, 417 }); 418 let inlined_inst = func.dfg.make_inst(inlined_inst_data); 419 func.layout.append_inst(inlined_inst, inlined_block); 420 421 // Copy over debug tags, translating referenced entities 422 // as appropriate. 423 let debug_tags = callee.debug_tags.get(callee_inst); 424 // If there are tags on the inlined instruction, we always 425 // add tags, and we prepend any tags from the call 426 // instruction; but we don't add tags if only the callsite 427 // had them (this would otherwise mean that every single 428 // instruction in an inlined function body would get 429 // tags). 430 if !debug_tags.is_empty() { 431 let tags = call_debug_tags 432 .iter() 433 .cloned() 434 .chain(debug_tags.iter().map(|tag| match *tag { 435 DebugTag::User(value) => DebugTag::User(value), 436 DebugTag::StackSlot(slot) => { 437 DebugTag::StackSlot(entity_map.inlined_stack_slot(slot)) 438 } 439 })) 440 .collect::<SmallVec<[_; 4]>>(); 441 func.debug_tags.set(inlined_inst, tags); 442 } 443 444 let opcode = callee.dfg.insts[callee_inst].opcode(); 445 if opcode.is_return() { 446 // Instructions that return do not define any values, so we 447 // don't need to worry about that, but we do need to fix them up 448 // so that they return by jumping to our control-flow join 449 // block, rather than returning from the caller. 450 if let Some(return_block) = return_block { 451 fixup_inst_that_returns( 452 allocs, 453 func, 454 callee, 455 &entity_map, 456 call_opcode, 457 inlined_inst, 458 callee_inst, 459 return_block, 460 call_stack_map.as_ref().map(|es| &**es), 461 ); 462 } else { 463 // If we are inlining a callee that was invoked via 464 // `return_call`, we leave inlined return instructions 465 // as-is: there is no logical caller frame on the stack to 466 // continue to. 467 debug_assert_eq!(call_opcode, ir::Opcode::ReturnCall); 468 } 469 } else { 470 // Make the instruction's result values. 471 let ctrl_typevar = callee.dfg.ctrl_typevar(callee_inst); 472 func.dfg.make_inst_results(inlined_inst, ctrl_typevar); 473 474 // Update the value map for this instruction's defs. 475 let callee_results = callee.dfg.inst_results(callee_inst); 476 let inlined_results = func.dfg.inst_results(inlined_inst); 477 debug_assert_eq!(callee_results.len(), inlined_results.len()); 478 for (callee_val, inlined_val) in callee_results.iter().zip(inlined_results) { 479 allocs.set_inlined_value(callee, *callee_val, *inlined_val); 480 } 481 482 if opcode.is_call() { 483 append_stack_map_entries( 484 func, 485 callee, 486 &entity_map, 487 call_stack_map.as_deref(), 488 inlined_inst, 489 callee_inst, 490 ); 491 492 // When we are inlining a `try_call` call site, we need to merge 493 // the call site's exception table into the inlined calls' 494 // exception tables. This can involve rewriting regular `call`s 495 // into `try_call`s, which requires mutating the CFG because 496 // `try_call` is a block terminator. However, we can't mutate 497 // the CFG in the middle of this traversal because we rely on 498 // the existence of a one-to-one mapping between the callee 499 // layout and the inlined layout. Instead, we record the set of 500 // inlined call instructions that will need fixing up, and 501 // perform that possibly-CFG-mutating exception table merging in 502 // a follow up pass, when we no longer rely on that one-to-one 503 // layout mapping. 504 debug_assert_eq!( 505 call_opcode == ir::Opcode::TryCall, 506 call_exception_table.is_some() 507 ); 508 // Note that we do not fix up patchable calls 509 // inlined at a try-call to a try-call, because 510 // the "patchable ABI" does not support catching 511 // exceptions. This does mean that we cannot have 512 // an exception-throw propagate out of a 513 // breakpoint when we use patchable calls to set 514 // up breakpoints, but we don't expect that to 515 // occur. 516 // 517 // FIXME: consider making patchability an aspect 518 // of any call; then we can remove this special 519 // case. 520 if call_opcode == ir::Opcode::TryCall && opcode != ir::Opcode::PatchableCall { 521 allocs 522 .calls_needing_exception_table_fixup 523 .push(inlined_inst); 524 } 525 } 526 } 527 528 trace!( 529 " --> inserted inlined instruction {inlined_inst:?}: {}", 530 func.dfg.display_inst(inlined_inst) 531 ); 532 533 next_callee_inst = callee.layout.next_inst(callee_inst); 534 } 535 } 536 537 // We copied *all* callee blocks into the caller's layout, but only copied 538 // the callee instructions in *reachable* callee blocks into the caller's 539 // associated blocks. Therefore, any *unreachable* blocks are empty in the 540 // caller, which is invalid CLIF because all blocks must end in a 541 // terminator, so do a quick pass over the inlined blocks and remove any 542 // empty blocks from the caller's layout. 543 for block in entity_map.iter_inlined_blocks(func) { 544 if func.layout.is_block_inserted(block) && func.layout.first_inst(block).is_none() { 545 log::trace!("removing unreachable inlined block from layout: {block}"); 546 547 // If the block being removed is our last-inlined block, then back 548 // it up to the previous block in the layout, which will be the new 549 // last-inlined block after this one's removal. 550 if block == last_inlined_block { 551 last_inlined_block = func.layout.prev_block(last_inlined_block).expect( 552 "there will always at least be the block that contained the call we are \ 553 inlining", 554 ); 555 } 556 557 func.layout.remove_block(block); 558 } 559 } 560 561 // Final step: fixup the exception tables of any inlined calls when we are 562 // inlining a `try_call` site. 563 // 564 // Subtly, this requires rewriting non-catching `call[_indirect]` 565 // instructions into `try_call[_indirect]` instructions so that exceptions 566 // that unwound through the original callee frame and were caught by the 567 // caller's `try_call` do not unwind past this inlined frame. And turning a 568 // `call` into a `try_call` mutates the CFG, breaking our one-to-one mapping 569 // between callee blocks and inlined blocks, so we delay these fixups to 570 // this final step, when we no longer rely on that mapping. 571 debug_assert!( 572 allocs.calls_needing_exception_table_fixup.is_empty() || call_exception_table.is_some() 573 ); 574 debug_assert_eq!( 575 call_opcode == ir::Opcode::TryCall, 576 call_exception_table.is_some() 577 ); 578 if let Some(call_exception_table) = call_exception_table { 579 fixup_inlined_call_exception_tables(allocs, func, call_exception_table); 580 } 581 582 debug_assert!( 583 func.layout.is_block_inserted(last_inlined_block), 584 "last_inlined_block={last_inlined_block} should be inserted in the layout" 585 ); 586 last_inlined_block 587 } 588 589 /// Append stack map entries from the caller and callee to the given inlined 590 /// instruction. 591 fn append_stack_map_entries( 592 func: &mut ir::Function, 593 callee: &ir::Function, 594 entity_map: &EntityMap, 595 call_stack_map: Option<&[ir::UserStackMapEntry]>, 596 inlined_inst: ir::Inst, 597 callee_inst: ir::Inst, 598 ) { 599 // Add the caller's stack map to this call. These entries 600 // already refer to caller entities and do not need further 601 // translation. 602 func.dfg.append_user_stack_map_entries( 603 inlined_inst, 604 call_stack_map 605 .iter() 606 .flat_map(|entries| entries.iter().cloned()), 607 ); 608 609 // Append the callee's stack map to this call. These entries 610 // refer to callee entities and therefore do require 611 // translation into the caller's index space. 612 func.dfg.append_user_stack_map_entries( 613 inlined_inst, 614 callee 615 .dfg 616 .user_stack_map_entries(callee_inst) 617 .iter() 618 .flat_map(|entries| entries.iter()) 619 .map(|entry| ir::UserStackMapEntry { 620 ty: entry.ty, 621 slot: entity_map.inlined_stack_slot(entry.slot), 622 offset: entry.offset, 623 }), 624 ); 625 } 626 627 /// Create or update the exception tables for any inlined call instructions: 628 /// when inlining at a `try_call` site, we must forward our exceptional edges 629 /// into each inlined call instruction. 630 fn fixup_inlined_call_exception_tables( 631 allocs: &mut InliningAllocs, 632 func: &mut ir::Function, 633 call_exception_table: ir::ExceptionTable, 634 ) { 635 // Split a block at a `call[_indirect]` instruction, detach the 636 // instruction's results, and alias them to the new block's parameters. 637 let split_block_for_new_try_call = |func: &mut ir::Function, inst: ir::Inst| -> ir::Block { 638 debug_assert!(func.dfg.insts[inst].opcode().is_call()); 639 debug_assert!(!func.dfg.insts[inst].opcode().is_terminator()); 640 641 // Split the block. 642 let next_inst = func 643 .layout 644 .next_inst(inst) 645 .expect("inst is not a terminator, should have a successor"); 646 let new_block = func.dfg.blocks.add(); 647 func.layout.split_block(new_block, next_inst); 648 649 // `try_call[_indirect]` instructions do not define values themselves; 650 // the normal-return block has parameters for the results. So remove 651 // this instruction's results, create an associated block parameter for 652 // each of them, and alias them to the new block parameter. 653 let old_results = SmallValueVec::from_iter(func.dfg.inst_results(inst).iter().copied()); 654 func.dfg.detach_inst_results(inst); 655 for old_result in old_results { 656 let ty = func.dfg.value_type(old_result); 657 let new_block_param = func.dfg.append_block_param(new_block, ty); 658 func.dfg.change_to_alias(old_result, new_block_param); 659 } 660 661 new_block 662 }; 663 664 // Clone the caller's exception table, updating it for use in the current 665 // `call[_indirect]` instruction as it becomes a `try_call[_indirect]`. 666 let clone_exception_table_for_this_call = |func: &mut ir::Function, 667 signature: ir::SigRef, 668 new_block: ir::Block| 669 -> ir::ExceptionTable { 670 let mut exception = func.stencil.dfg.exception_tables[call_exception_table] 671 .deep_clone(&mut func.stencil.dfg.value_lists); 672 673 *exception.signature_mut() = signature; 674 675 let returns_len = func.dfg.signatures[signature].returns.len(); 676 let returns_len = u32::try_from(returns_len).unwrap(); 677 678 *exception.normal_return_mut() = ir::BlockCall::new( 679 new_block, 680 (0..returns_len).map(|i| ir::BlockArg::TryCallRet(i)), 681 &mut func.dfg.value_lists, 682 ); 683 684 func.dfg.exception_tables.push(exception) 685 }; 686 687 for inst in allocs.calls_needing_exception_table_fixup.drain(..) { 688 debug_assert!(func.dfg.insts[inst].opcode().is_call()); 689 debug_assert!(!func.dfg.insts[inst].opcode().is_return()); 690 match func.dfg.insts[inst] { 691 // current_block: 692 // preds... 693 // rets... = call f(args...) 694 // succs... 695 // 696 // becomes 697 // 698 // current_block: 699 // preds... 700 // try_call f(args...), new_block(rets...), [call_exception_table...] 701 // new_block(rets...): 702 // succs... 703 ir::InstructionData::Call { 704 opcode: ir::Opcode::Call, 705 args, 706 func_ref, 707 } => { 708 let new_block = split_block_for_new_try_call(func, inst); 709 let signature = func.dfg.ext_funcs[func_ref].signature; 710 let exception = clone_exception_table_for_this_call(func, signature, new_block); 711 func.dfg.insts[inst] = ir::InstructionData::TryCall { 712 opcode: ir::Opcode::TryCall, 713 args, 714 func_ref, 715 exception, 716 }; 717 } 718 719 // current_block: 720 // preds... 721 // rets... = call_indirect sig, val(args...) 722 // succs... 723 // 724 // becomes 725 // 726 // current_block: 727 // preds... 728 // try_call_indirect sig, val(args...), new_block(rets...), [call_exception_table...] 729 // new_block(rets...): 730 // succs... 731 ir::InstructionData::CallIndirect { 732 opcode: ir::Opcode::CallIndirect, 733 args, 734 sig_ref, 735 } => { 736 let new_block = split_block_for_new_try_call(func, inst); 737 let exception = clone_exception_table_for_this_call(func, sig_ref, new_block); 738 func.dfg.insts[inst] = ir::InstructionData::TryCallIndirect { 739 opcode: ir::Opcode::TryCallIndirect, 740 args, 741 exception, 742 }; 743 } 744 745 // For `try_call[_indirect]` instructions, we just need to merge the 746 // exception tables. 747 ir::InstructionData::TryCall { 748 opcode: ir::Opcode::TryCall, 749 exception, 750 .. 751 } 752 | ir::InstructionData::TryCallIndirect { 753 opcode: ir::Opcode::TryCallIndirect, 754 exception, 755 .. 756 } => { 757 // Construct a new exception table that consists of 758 // the inlined instruction's exception table match 759 // sequence, with the inlining site's exception table 760 // appended. This will ensure that the first-match 761 // semantics emulates the original behavior of 762 // matching in the inner frame first. 763 let sig = func.dfg.exception_tables[exception].signature(); 764 let normal_return = *func.dfg.exception_tables[exception].normal_return(); 765 let exception_data = ExceptionTableData::new( 766 sig, 767 normal_return, 768 func.dfg.exception_tables[exception] 769 .items() 770 .chain(func.dfg.exception_tables[call_exception_table].items()), 771 ) 772 .deep_clone(&mut func.dfg.value_lists); 773 774 func.dfg.exception_tables[exception] = exception_data; 775 } 776 777 otherwise => unreachable!("unknown non-return call instruction: {otherwise:?}"), 778 } 779 } 780 } 781 782 /// After having created an inlined version of a callee instruction that returns 783 /// in the caller, we need to fix it up so that it doesn't actually return 784 /// (since we are already in the caller's frame) and instead just jumps to the 785 /// control-flow join point. 786 fn fixup_inst_that_returns( 787 allocs: &mut InliningAllocs, 788 func: &mut ir::Function, 789 callee: &ir::Function, 790 entity_map: &EntityMap, 791 call_opcode: ir::Opcode, 792 inlined_inst: ir::Inst, 793 callee_inst: ir::Inst, 794 return_block: ir::Block, 795 call_stack_map: Option<&[ir::UserStackMapEntry]>, 796 ) { 797 debug_assert!(func.dfg.insts[inlined_inst].opcode().is_return()); 798 match func.dfg.insts[inlined_inst] { 799 // return rets... 800 // 801 // becomes 802 // 803 // jump return_block(rets...) 804 ir::InstructionData::MultiAry { 805 opcode: ir::Opcode::Return, 806 args, 807 } => { 808 let rets = SmallBlockArgVec::from_iter( 809 args.as_slice(&func.dfg.value_lists) 810 .iter() 811 .copied() 812 .map(|v| v.into()), 813 ); 814 func.dfg.replace(inlined_inst).jump(return_block, &rets); 815 } 816 817 // return_call f(args...) 818 // 819 // becomes 820 // 821 // rets... = call f(args...) 822 // jump return_block(rets...) 823 ir::InstructionData::Call { 824 opcode: ir::Opcode::ReturnCall, 825 args, 826 func_ref, 827 } => { 828 func.dfg.insts[inlined_inst] = ir::InstructionData::Call { 829 opcode: ir::Opcode::Call, 830 args, 831 func_ref, 832 }; 833 func.dfg.make_inst_results(inlined_inst, ir::types::INVALID); 834 835 append_stack_map_entries( 836 func, 837 callee, 838 &entity_map, 839 call_stack_map, 840 inlined_inst, 841 callee_inst, 842 ); 843 844 let rets = SmallBlockArgVec::from_iter( 845 func.dfg 846 .inst_results(inlined_inst) 847 .iter() 848 .copied() 849 .map(|v| v.into()), 850 ); 851 let mut cursor = FuncCursor::new(func); 852 cursor.goto_after_inst(inlined_inst); 853 cursor.ins().jump(return_block, &rets); 854 855 if call_opcode == ir::Opcode::TryCall { 856 allocs 857 .calls_needing_exception_table_fixup 858 .push(inlined_inst); 859 } 860 } 861 862 // return_call_indirect val(args...) 863 // 864 // becomes 865 // 866 // rets... = call_indirect val(args...) 867 // jump return_block(rets...) 868 ir::InstructionData::CallIndirect { 869 opcode: ir::Opcode::ReturnCallIndirect, 870 args, 871 sig_ref, 872 } => { 873 func.dfg.insts[inlined_inst] = ir::InstructionData::CallIndirect { 874 opcode: ir::Opcode::CallIndirect, 875 args, 876 sig_ref, 877 }; 878 func.dfg.make_inst_results(inlined_inst, ir::types::INVALID); 879 880 append_stack_map_entries( 881 func, 882 callee, 883 &entity_map, 884 call_stack_map, 885 inlined_inst, 886 callee_inst, 887 ); 888 889 let rets = SmallBlockArgVec::from_iter( 890 func.dfg 891 .inst_results(inlined_inst) 892 .iter() 893 .copied() 894 .map(|v| v.into()), 895 ); 896 let mut cursor = FuncCursor::new(func); 897 cursor.goto_after_inst(inlined_inst); 898 cursor.ins().jump(return_block, &rets); 899 900 if call_opcode == ir::Opcode::TryCall { 901 allocs 902 .calls_needing_exception_table_fixup 903 .push(inlined_inst); 904 } 905 } 906 907 inst_data => unreachable!( 908 "should have handled all `is_return() == true` instructions above; \ 909 got {inst_data:?}" 910 ), 911 } 912 } 913 914 /// An `InstructionMapper` implementation that remaps a callee instruction's 915 /// entity references to their new indices in the caller function. 916 struct InliningInstRemapper<'a> { 917 allocs: &'a InliningAllocs, 918 func: &'a mut ir::Function, 919 callee: &'a ir::Function, 920 entity_map: &'a EntityMap, 921 } 922 923 impl<'a> ir::instructions::InstructionMapper for InliningInstRemapper<'a> { 924 fn map_value(&mut self, value: ir::Value) -> ir::Value { 925 self.allocs.get_inlined_value(self.callee, value).expect( 926 "defs come before uses; we should have already inlined all values \ 927 used by an instruction", 928 ) 929 } 930 931 fn map_value_list(&mut self, value_list: ir::ValueList) -> ir::ValueList { 932 let mut inlined_list = ir::ValueList::new(); 933 for callee_val in value_list.as_slice(&self.callee.dfg.value_lists) { 934 let inlined_val = self.map_value(*callee_val); 935 inlined_list.push(inlined_val, &mut self.func.dfg.value_lists); 936 } 937 inlined_list 938 } 939 940 fn map_global_value(&mut self, global_value: ir::GlobalValue) -> ir::GlobalValue { 941 self.entity_map.inlined_global_value(global_value) 942 } 943 944 fn map_jump_table(&mut self, jump_table: ir::JumpTable) -> ir::JumpTable { 945 let inlined_default = 946 self.map_block_call(self.callee.dfg.jump_tables[jump_table].default_block()); 947 let inlined_table = self.callee.dfg.jump_tables[jump_table] 948 .as_slice() 949 .iter() 950 .map(|callee_block_call| self.map_block_call(*callee_block_call)) 951 .collect::<SmallBlockCallVec>(); 952 self.func 953 .dfg 954 .jump_tables 955 .push(ir::JumpTableData::new(inlined_default, &inlined_table)) 956 } 957 958 fn map_exception_table(&mut self, exception_table: ir::ExceptionTable) -> ir::ExceptionTable { 959 let exception_table = &self.callee.dfg.exception_tables[exception_table]; 960 let inlined_sig_ref = self.map_sig_ref(exception_table.signature()); 961 let inlined_normal_return = self.map_block_call(*exception_table.normal_return()); 962 let inlined_table = exception_table 963 .items() 964 .map(|item| match item { 965 ExceptionTableItem::Tag(tag, block_call) => { 966 ExceptionTableItem::Tag(tag, self.map_block_call(block_call)) 967 } 968 ExceptionTableItem::Default(block_call) => { 969 ExceptionTableItem::Default(self.map_block_call(block_call)) 970 } 971 ExceptionTableItem::Context(value) => { 972 ExceptionTableItem::Context(self.map_value(value)) 973 } 974 }) 975 .collect::<SmallVec<[_; 8]>>(); 976 self.func 977 .dfg 978 .exception_tables 979 .push(ir::ExceptionTableData::new( 980 inlined_sig_ref, 981 inlined_normal_return, 982 inlined_table, 983 )) 984 } 985 986 fn map_block_call(&mut self, block_call: ir::BlockCall) -> ir::BlockCall { 987 let callee_block = block_call.block(&self.callee.dfg.value_lists); 988 let inlined_block = self.entity_map.inlined_block(callee_block); 989 let args = block_call 990 .args(&self.callee.dfg.value_lists) 991 .map(|arg| match arg { 992 ir::BlockArg::Value(value) => self.map_value(value).into(), 993 ir::BlockArg::TryCallRet(_) | ir::BlockArg::TryCallExn(_) => arg, 994 }) 995 .collect::<SmallBlockArgVec>(); 996 ir::BlockCall::new(inlined_block, args, &mut self.func.dfg.value_lists) 997 } 998 999 fn map_block(&mut self, block: ir::Block) -> ir::Block { 1000 self.entity_map.inlined_block(block) 1001 } 1002 1003 fn map_func_ref(&mut self, func_ref: ir::FuncRef) -> ir::FuncRef { 1004 self.entity_map.inlined_func_ref(func_ref) 1005 } 1006 1007 fn map_sig_ref(&mut self, sig_ref: ir::SigRef) -> ir::SigRef { 1008 self.entity_map.inlined_sig_ref(sig_ref) 1009 } 1010 1011 fn map_stack_slot(&mut self, stack_slot: ir::StackSlot) -> ir::StackSlot { 1012 self.entity_map.inlined_stack_slot(stack_slot) 1013 } 1014 1015 fn map_dynamic_stack_slot( 1016 &mut self, 1017 dynamic_stack_slot: ir::DynamicStackSlot, 1018 ) -> ir::DynamicStackSlot { 1019 self.entity_map 1020 .inlined_dynamic_stack_slot(dynamic_stack_slot) 1021 } 1022 1023 fn map_constant(&mut self, constant: ir::Constant) -> ir::Constant { 1024 self.allocs 1025 .constants 1026 .get(constant) 1027 .and_then(|o| o.expand()) 1028 .expect("should have inlined all callee constants") 1029 } 1030 1031 fn map_immediate(&mut self, immediate: ir::Immediate) -> ir::Immediate { 1032 self.entity_map.inlined_immediate(immediate) 1033 } 1034 } 1035 1036 /// Inline the callee's layout into the caller's layout. 1037 /// 1038 /// Returns the last inlined block in the layout. 1039 fn inline_block_layout( 1040 func: &mut ir::Function, 1041 call_block: ir::Block, 1042 callee: &ir::Function, 1043 entity_map: &EntityMap, 1044 ) -> ir::Block { 1045 debug_assert!(func.layout.is_block_inserted(call_block)); 1046 1047 // Iterate over callee blocks in layout order, inserting their associated 1048 // inlined block into the caller's layout. 1049 let mut prev_inlined_block = call_block; 1050 let mut next_callee_block = callee.layout.entry_block(); 1051 while let Some(callee_block) = next_callee_block { 1052 debug_assert!(func.layout.is_block_inserted(prev_inlined_block)); 1053 1054 let inlined_block = entity_map.inlined_block(callee_block); 1055 func.layout 1056 .insert_block_after(inlined_block, prev_inlined_block); 1057 1058 prev_inlined_block = inlined_block; 1059 next_callee_block = callee.layout.next_block(callee_block); 1060 } 1061 1062 debug_assert!(func.layout.is_block_inserted(prev_inlined_block)); 1063 prev_inlined_block 1064 } 1065 1066 /// Split the call instruction's block just after the call instruction to create 1067 /// the point where control-flow joins after the inlined callee "returns". 1068 /// 1069 /// Note that tail calls do not return to the caller and therefore do not have a 1070 /// control-flow join point. 1071 fn split_off_return_block( 1072 func: &mut ir::Function, 1073 call_inst: ir::Inst, 1074 opcode: ir::Opcode, 1075 callee: &ir::Function, 1076 ) -> Option<ir::Block> { 1077 // When the `call_inst` is not a block terminator, we need to split the 1078 // block. 1079 let return_block = func.layout.next_inst(call_inst).map(|next_inst| { 1080 let return_block = func.dfg.blocks.add(); 1081 func.layout.split_block(return_block, next_inst); 1082 1083 // Add block parameters for each return value and alias the call 1084 // instruction's results to them. 1085 let old_results = 1086 SmallValueVec::from_iter(func.dfg.inst_results(call_inst).iter().copied()); 1087 debug_assert_eq!(old_results.len(), callee.signature.returns.len()); 1088 func.dfg.detach_inst_results(call_inst); 1089 for (abi, old_val) in callee.signature.returns.iter().zip(old_results) { 1090 debug_assert_eq!(abi.value_type, func.dfg.value_type(old_val)); 1091 let ret_param = func.dfg.append_block_param(return_block, abi.value_type); 1092 func.dfg.change_to_alias(old_val, ret_param); 1093 } 1094 1095 return_block 1096 }); 1097 1098 // When the `call_inst` is a block terminator, then it is either a 1099 // `return_call` or a `try_call`: 1100 // 1101 // * For `return_call`s, we don't have a control-flow join point, because 1102 // the caller permanently transfers control to the callee. 1103 // 1104 // * For `try_call`s, we probably already have a block for the control-flow 1105 // join point, but it isn't guaranteed: the `try_call` might ignore the 1106 // call's returns and not forward them to the normal-return block or it 1107 // might also pass additional arguments. We can only reuse the existing 1108 // normal-return block when the `try_call` forwards exactly our callee's 1109 // returns to that block (and therefore that block's parameter types also 1110 // exactly match the callee's return types). Otherwise, we must create a new 1111 // return block that forwards to the existing normal-return 1112 // block. (Elsewhere, at the end of inlining, we will also update any inlined 1113 // calls to forward any raised exceptions to the caller's exception table, 1114 // as necessary.) 1115 // 1116 // Finally, note that reusing the normal-return's target block is just an 1117 // optimization to emit a simpler CFG when we can, and is not 1118 // fundamentally required for correctness. We could always insert a 1119 // temporary block as our control-flow join point that then forwards to 1120 // the normal-return's target block. However, at the time of writing, 1121 // Cranelift doesn't currently do any jump-threading or branch 1122 // simplification in the mid-end, and removing unnecessary blocks in this 1123 // way can help some subsequent mid-end optimizations. If, in the future, 1124 // we gain support for jump-threading optimizations in the mid-end, we can 1125 // come back and simplify the below code a bit to always generate the 1126 // temporary block, and then rely on the subsequent optimizations to clean 1127 // everything up. 1128 debug_assert_eq!( 1129 return_block.is_none(), 1130 opcode == ir::Opcode::ReturnCall || opcode == ir::Opcode::TryCall, 1131 ); 1132 return_block.or_else(|| match func.dfg.insts[call_inst] { 1133 ir::InstructionData::TryCall { 1134 opcode: ir::Opcode::TryCall, 1135 args: _, 1136 func_ref: _, 1137 exception, 1138 } => { 1139 let normal_return = func.dfg.exception_tables[exception].normal_return(); 1140 let normal_return_block = normal_return.block(&func.dfg.value_lists); 1141 1142 // Check to see if we can reuse the existing normal-return block. 1143 { 1144 let normal_return_args = normal_return.args(&func.dfg.value_lists); 1145 if normal_return_args.len() == callee.signature.returns.len() 1146 && normal_return_args.enumerate().all(|(i, arg)| { 1147 let i = u32::try_from(i).unwrap(); 1148 arg == ir::BlockArg::TryCallRet(i) 1149 }) 1150 { 1151 return Some(normal_return_block); 1152 } 1153 } 1154 1155 // Okay, we cannot reuse the normal-return block. Create a new block 1156 // that has the expected block parameter types and have it jump to 1157 // the normal-return block. 1158 let return_block = func.dfg.blocks.add(); 1159 func.layout.insert_block(return_block, normal_return_block); 1160 1161 let return_block_params = callee 1162 .signature 1163 .returns 1164 .iter() 1165 .map(|abi| func.dfg.append_block_param(return_block, abi.value_type)) 1166 .collect::<SmallValueVec>(); 1167 1168 let normal_return_args = func.dfg.exception_tables[exception] 1169 .normal_return() 1170 .args(&func.dfg.value_lists) 1171 .collect::<SmallBlockArgVec>(); 1172 let jump_args = normal_return_args 1173 .into_iter() 1174 .map(|arg| match arg { 1175 ir::BlockArg::Value(value) => ir::BlockArg::Value(value), 1176 ir::BlockArg::TryCallRet(i) => { 1177 let i = usize::try_from(i).unwrap(); 1178 ir::BlockArg::Value(return_block_params[i]) 1179 } 1180 ir::BlockArg::TryCallExn(_) => { 1181 unreachable!("normal-return edges cannot use exceptional results") 1182 } 1183 }) 1184 .collect::<SmallBlockArgVec>(); 1185 1186 let mut cursor = FuncCursor::new(func); 1187 cursor.goto_first_insertion_point(return_block); 1188 cursor.ins().jump(normal_return_block, &jump_args); 1189 1190 Some(return_block) 1191 } 1192 _ => None, 1193 }) 1194 } 1195 1196 /// Replace the caller's call instruction with a jump to the caller's inlined 1197 /// copy of the callee's entry block. 1198 /// 1199 /// Also associates the callee's parameters with the caller's arguments in our 1200 /// value map. 1201 /// 1202 /// Returns the caller's stack map entries, if any. 1203 fn replace_call_with_jump( 1204 allocs: &mut InliningAllocs, 1205 func: &mut ir::Function, 1206 call_inst: ir::Inst, 1207 callee: &ir::Function, 1208 entity_map: &EntityMap, 1209 ) -> Option<ir::UserStackMapEntryVec> { 1210 trace!("Replacing `call` with `jump`"); 1211 trace!( 1212 " --> call instruction: {call_inst:?}: {}", 1213 func.dfg.display_inst(call_inst) 1214 ); 1215 1216 let callee_entry_block = callee 1217 .layout 1218 .entry_block() 1219 .expect("callee function should have an entry block"); 1220 let callee_param_values = callee.dfg.block_params(callee_entry_block); 1221 let caller_arg_values = SmallValueVec::from_iter(func.dfg.inst_args(call_inst).iter().copied()); 1222 debug_assert_eq!(callee_param_values.len(), caller_arg_values.len()); 1223 debug_assert_eq!(callee_param_values.len(), callee.signature.params.len()); 1224 for (abi, (callee_param_value, caller_arg_value)) in callee 1225 .signature 1226 .params 1227 .iter() 1228 .zip(callee_param_values.into_iter().zip(caller_arg_values)) 1229 { 1230 debug_assert_eq!(abi.value_type, callee.dfg.value_type(*callee_param_value)); 1231 debug_assert_eq!(abi.value_type, func.dfg.value_type(caller_arg_value)); 1232 allocs.set_inlined_value(callee, *callee_param_value, caller_arg_value); 1233 } 1234 1235 // Replace the caller's call instruction with a jump to the caller's inlined 1236 // copy of the callee's entry block. 1237 // 1238 // Note that the call block dominates the inlined entry block (and also all 1239 // other inlined blocks) so we can reference the arguments directly, and do 1240 // not need to add block parameters to the inlined entry block. 1241 let inlined_entry_block = entity_map.inlined_block(callee_entry_block); 1242 func.dfg.replace(call_inst).jump(inlined_entry_block, &[]); 1243 trace!( 1244 " --> replaced with jump instruction: {call_inst:?}: {}", 1245 func.dfg.display_inst(call_inst) 1246 ); 1247 1248 let stack_map_entries = func.dfg.take_user_stack_map_entries(call_inst); 1249 stack_map_entries 1250 } 1251 1252 /// Keeps track of mapping callee entities to their associated inlined caller 1253 /// entities. 1254 #[derive(Default)] 1255 struct EntityMap { 1256 // Rather than doing an implicit, demand-based, DCE'ing translation of 1257 // entities, which would require maps from each callee entity to its 1258 // associated caller entity, we copy all entities into the caller, remember 1259 // each entity's initial offset, and then mapping from the callee to the 1260 // inlined caller entity is just adding that initial offset to the callee's 1261 // index. This should be both faster and simpler than the alternative. Most 1262 // of these sets are relatively small, and they rarely have too much dead 1263 // code in practice, so this is a good trade off. 1264 // 1265 // Note that there are a few kinds of entities that are excluded from the 1266 // `EntityMap`, and for which we do actually take the demand-based approach: 1267 // values and value lists being the notable ones. 1268 block_offset: Option<u32>, 1269 global_value_offset: Option<u32>, 1270 sig_ref_offset: Option<u32>, 1271 func_ref_offset: Option<u32>, 1272 stack_slot_offset: Option<u32>, 1273 dynamic_type_offset: Option<u32>, 1274 dynamic_stack_slot_offset: Option<u32>, 1275 immediate_offset: Option<u32>, 1276 } 1277 1278 impl EntityMap { 1279 fn inlined_block(&self, callee_block: ir::Block) -> ir::Block { 1280 let offset = self 1281 .block_offset 1282 .expect("must create inlined `ir::Block`s before calling `EntityMap::inlined_block`"); 1283 ir::Block::from_u32(offset + callee_block.as_u32()) 1284 } 1285 1286 fn iter_inlined_blocks(&self, func: &ir::Function) -> impl Iterator<Item = ir::Block> + use<> { 1287 let start = self.block_offset.expect( 1288 "must create inlined `ir::Block`s before calling `EntityMap::iter_inlined_blocks`", 1289 ); 1290 1291 let end = func.dfg.blocks.len(); 1292 let end = u32::try_from(end).unwrap(); 1293 1294 (start..end).map(|i| ir::Block::from_u32(i)) 1295 } 1296 1297 fn inlined_global_value(&self, callee_global_value: ir::GlobalValue) -> ir::GlobalValue { 1298 let offset = self 1299 .global_value_offset 1300 .expect("must create inlined `ir::GlobalValue`s before calling `EntityMap::inlined_global_value`"); 1301 ir::GlobalValue::from_u32(offset + callee_global_value.as_u32()) 1302 } 1303 1304 fn inlined_sig_ref(&self, callee_sig_ref: ir::SigRef) -> ir::SigRef { 1305 let offset = self.sig_ref_offset.expect( 1306 "must create inlined `ir::SigRef`s before calling `EntityMap::inlined_sig_ref`", 1307 ); 1308 ir::SigRef::from_u32(offset + callee_sig_ref.as_u32()) 1309 } 1310 1311 fn inlined_func_ref(&self, callee_func_ref: ir::FuncRef) -> ir::FuncRef { 1312 let offset = self.func_ref_offset.expect( 1313 "must create inlined `ir::FuncRef`s before calling `EntityMap::inlined_func_ref`", 1314 ); 1315 ir::FuncRef::from_u32(offset + callee_func_ref.as_u32()) 1316 } 1317 1318 fn inlined_stack_slot(&self, callee_stack_slot: ir::StackSlot) -> ir::StackSlot { 1319 let offset = self.stack_slot_offset.expect( 1320 "must create inlined `ir::StackSlot`s before calling `EntityMap::inlined_stack_slot`", 1321 ); 1322 ir::StackSlot::from_u32(offset + callee_stack_slot.as_u32()) 1323 } 1324 1325 fn inlined_dynamic_type(&self, callee_dynamic_type: ir::DynamicType) -> ir::DynamicType { 1326 let offset = self.dynamic_type_offset.expect( 1327 "must create inlined `ir::DynamicType`s before calling `EntityMap::inlined_dynamic_type`", 1328 ); 1329 ir::DynamicType::from_u32(offset + callee_dynamic_type.as_u32()) 1330 } 1331 1332 fn inlined_dynamic_stack_slot( 1333 &self, 1334 callee_dynamic_stack_slot: ir::DynamicStackSlot, 1335 ) -> ir::DynamicStackSlot { 1336 let offset = self.dynamic_stack_slot_offset.expect( 1337 "must create inlined `ir::DynamicStackSlot`s before calling `EntityMap::inlined_dynamic_stack_slot`", 1338 ); 1339 ir::DynamicStackSlot::from_u32(offset + callee_dynamic_stack_slot.as_u32()) 1340 } 1341 1342 fn inlined_immediate(&self, callee_immediate: ir::Immediate) -> ir::Immediate { 1343 let offset = self.immediate_offset.expect( 1344 "must create inlined `ir::Immediate`s before calling `EntityMap::inlined_immediate`", 1345 ); 1346 ir::Immediate::from_u32(offset + callee_immediate.as_u32()) 1347 } 1348 } 1349 1350 /// Translate all of the callee's various entities into the caller, producing an 1351 /// `EntityMap` that can be used to translate callee entity references into 1352 /// inlined caller entity references. 1353 fn create_entities( 1354 allocs: &mut InliningAllocs, 1355 func: &mut ir::Function, 1356 callee: &ir::Function, 1357 ) -> EntityMap { 1358 let mut entity_map = EntityMap::default(); 1359 1360 entity_map.block_offset = Some(create_blocks(allocs, func, callee)); 1361 entity_map.global_value_offset = Some(create_global_values(func, callee)); 1362 entity_map.sig_ref_offset = Some(create_sig_refs(func, callee)); 1363 create_user_external_name_refs(allocs, func, callee); 1364 entity_map.func_ref_offset = Some(create_func_refs(allocs, func, callee, &entity_map)); 1365 entity_map.stack_slot_offset = Some(create_stack_slots(func, callee)); 1366 entity_map.dynamic_type_offset = Some(create_dynamic_types(func, callee, &entity_map)); 1367 entity_map.dynamic_stack_slot_offset = 1368 Some(create_dynamic_stack_slots(func, callee, &entity_map)); 1369 entity_map.immediate_offset = Some(create_immediates(func, callee)); 1370 1371 // `ir::ConstantData` is deduplicated, so we cannot use our offset scheme 1372 // for `ir::Constant`s. Nonetheless, we still insert them into the caller 1373 // now, at the same time as the rest of our entities. 1374 create_constants(allocs, func, callee); 1375 1376 entity_map 1377 } 1378 1379 /// Create inlined blocks in the caller for every block in the callee. 1380 fn create_blocks( 1381 allocs: &mut InliningAllocs, 1382 func: &mut ir::Function, 1383 callee: &ir::Function, 1384 ) -> u32 { 1385 let offset = func.dfg.blocks.len(); 1386 let offset = u32::try_from(offset).unwrap(); 1387 1388 func.dfg.blocks.reserve(callee.dfg.blocks.len()); 1389 for callee_block in callee.dfg.blocks.iter() { 1390 let caller_block = func.dfg.blocks.add(); 1391 trace!("Callee {callee_block:?} = inlined {caller_block:?}"); 1392 1393 if callee.layout.is_cold(callee_block) { 1394 func.layout.set_cold(caller_block); 1395 } 1396 1397 // Note: the entry block does not need parameters because the only 1398 // predecessor is the call block and we associate the callee's 1399 // parameters with the caller's arguments directly. 1400 if callee.layout.entry_block() != Some(callee_block) { 1401 for callee_param in callee.dfg.blocks[callee_block].params(&callee.dfg.value_lists) { 1402 let ty = callee.dfg.value_type(*callee_param); 1403 let caller_param = func.dfg.append_block_param(caller_block, ty); 1404 1405 allocs.set_inlined_value(callee, *callee_param, caller_param); 1406 } 1407 } 1408 } 1409 1410 offset 1411 } 1412 1413 /// Copy and translate global values from the callee into the caller. 1414 fn create_global_values(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1415 let gv_offset = func.global_values.len(); 1416 let gv_offset = u32::try_from(gv_offset).unwrap(); 1417 1418 func.global_values.reserve(callee.global_values.len()); 1419 for gv in callee.global_values.values() { 1420 func.global_values.push(match gv { 1421 // These kinds of global values reference other global values, so we 1422 // need to fixup that reference. 1423 ir::GlobalValueData::Load { 1424 base, 1425 offset, 1426 global_type, 1427 flags, 1428 } => ir::GlobalValueData::Load { 1429 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset), 1430 offset: *offset, 1431 global_type: *global_type, 1432 flags: *flags, 1433 }, 1434 ir::GlobalValueData::IAddImm { 1435 base, 1436 offset, 1437 global_type, 1438 } => ir::GlobalValueData::IAddImm { 1439 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset), 1440 offset: *offset, 1441 global_type: *global_type, 1442 }, 1443 1444 // These kinds of global values do not reference other global 1445 // values, so we can just clone them. 1446 ir::GlobalValueData::VMContext 1447 | ir::GlobalValueData::Symbol { .. } 1448 | ir::GlobalValueData::DynScaleTargetConst { .. } => gv.clone(), 1449 }); 1450 } 1451 1452 gv_offset 1453 } 1454 1455 /// Copy `ir::SigRef`s from the callee into the caller. 1456 fn create_sig_refs(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1457 let offset = func.dfg.signatures.len(); 1458 let offset = u32::try_from(offset).unwrap(); 1459 1460 func.dfg.signatures.reserve(callee.dfg.signatures.len()); 1461 for sig in callee.dfg.signatures.values() { 1462 func.dfg.signatures.push(sig.clone()); 1463 } 1464 1465 offset 1466 } 1467 1468 fn create_user_external_name_refs( 1469 allocs: &mut InliningAllocs, 1470 func: &mut ir::Function, 1471 callee: &ir::Function, 1472 ) { 1473 for (callee_named_func_ref, name) in callee.params.user_named_funcs().iter() { 1474 let caller_named_func_ref = func.declare_imported_user_function(name.clone()); 1475 allocs.user_external_name_refs[callee_named_func_ref] = Some(caller_named_func_ref).into(); 1476 } 1477 } 1478 1479 /// Translate `ir::FuncRef`s from the callee into the caller. 1480 fn create_func_refs( 1481 allocs: &InliningAllocs, 1482 func: &mut ir::Function, 1483 callee: &ir::Function, 1484 entity_map: &EntityMap, 1485 ) -> u32 { 1486 let offset = func.dfg.ext_funcs.len(); 1487 let offset = u32::try_from(offset).unwrap(); 1488 1489 func.dfg.ext_funcs.reserve(callee.dfg.ext_funcs.len()); 1490 for ir::ExtFuncData { 1491 name, 1492 signature, 1493 colocated, 1494 } in callee.dfg.ext_funcs.values() 1495 { 1496 func.dfg.ext_funcs.push(ir::ExtFuncData { 1497 name: match name { 1498 ir::ExternalName::User(name_ref) => { 1499 ir::ExternalName::User(allocs.user_external_name_refs[*name_ref].expect( 1500 "should have translated all `ir::UserExternalNameRef`s before translating \ 1501 `ir::FuncRef`s", 1502 )) 1503 } 1504 ir::ExternalName::TestCase(_) 1505 | ir::ExternalName::LibCall(_) 1506 | ir::ExternalName::KnownSymbol(_) => name.clone(), 1507 }, 1508 signature: entity_map.inlined_sig_ref(*signature), 1509 colocated: *colocated, 1510 }); 1511 } 1512 1513 offset 1514 } 1515 1516 /// Copy stack slots from the callee into the caller. 1517 fn create_stack_slots(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1518 let offset = func.sized_stack_slots.len(); 1519 let offset = u32::try_from(offset).unwrap(); 1520 1521 func.sized_stack_slots 1522 .reserve(callee.sized_stack_slots.len()); 1523 for slot in callee.sized_stack_slots.values() { 1524 func.sized_stack_slots.push(slot.clone()); 1525 } 1526 1527 offset 1528 } 1529 1530 /// Copy dynamic types from the callee into the caller. 1531 fn create_dynamic_types( 1532 func: &mut ir::Function, 1533 callee: &ir::Function, 1534 entity_map: &EntityMap, 1535 ) -> u32 { 1536 let offset = func.dynamic_stack_slots.len(); 1537 let offset = u32::try_from(offset).unwrap(); 1538 1539 func.dfg 1540 .dynamic_types 1541 .reserve(callee.dfg.dynamic_types.len()); 1542 for ir::DynamicTypeData { 1543 base_vector_ty, 1544 dynamic_scale, 1545 } in callee.dfg.dynamic_types.values() 1546 { 1547 func.dfg.dynamic_types.push(ir::DynamicTypeData { 1548 base_vector_ty: *base_vector_ty, 1549 dynamic_scale: entity_map.inlined_global_value(*dynamic_scale), 1550 }); 1551 } 1552 1553 offset 1554 } 1555 1556 /// Copy dynamic stack slots from the callee into the caller. 1557 fn create_dynamic_stack_slots( 1558 func: &mut ir::Function, 1559 callee: &ir::Function, 1560 entity_map: &EntityMap, 1561 ) -> u32 { 1562 let offset = func.dynamic_stack_slots.len(); 1563 let offset = u32::try_from(offset).unwrap(); 1564 1565 func.dynamic_stack_slots 1566 .reserve(callee.dynamic_stack_slots.len()); 1567 for ir::DynamicStackSlotData { kind, dyn_ty } in callee.dynamic_stack_slots.values() { 1568 func.dynamic_stack_slots.push(ir::DynamicStackSlotData { 1569 kind: *kind, 1570 dyn_ty: entity_map.inlined_dynamic_type(*dyn_ty), 1571 }); 1572 } 1573 1574 offset 1575 } 1576 1577 /// Copy immediates from the callee into the caller. 1578 fn create_immediates(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1579 let offset = func.dfg.immediates.len(); 1580 let offset = u32::try_from(offset).unwrap(); 1581 1582 func.dfg.immediates.reserve(callee.dfg.immediates.len()); 1583 for imm in callee.dfg.immediates.values() { 1584 func.dfg.immediates.push(imm.clone()); 1585 } 1586 1587 offset 1588 } 1589 1590 /// Copy constants from the callee into the caller. 1591 fn create_constants(allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function) { 1592 for (callee_constant, data) in callee.dfg.constants.iter() { 1593 let inlined_constant = func.dfg.constants.insert(data.clone()); 1594 allocs.constants[*callee_constant] = Some(inlined_constant).into(); 1595 } 1596 } 1597