1 //! Function inlining infrastructure. 2 //! 3 //! This module provides "inlining as a library" to Cranelift users; it does 4 //! _not_ provide a complete, off-the-shelf inlining solution. Cranelift's 5 //! compilation context is per-function and does not encompass the full call 6 //! graph. It does not know which functions are hot and which are cold, which 7 //! have been marked the equivalent of `#[inline(never)]`, etc... Only the 8 //! Cranelift user can understand these aspects of the full compilation 9 //! pipeline, and these things can be very different between (say) Wasmtime and 10 //! `cg_clif`. Therefore, this module does not attempt to define hueristics for 11 //! when inlining a particular call is likely beneficial. This module only 12 //! provides hooks for the Cranelift user to define whether a given call should 13 //! be inlined or not, and the mechanics to inline a callee into a particular 14 //! call site when directed to do so by the Cranelift user. 15 //! 16 //! The top-level inlining entry point during Cranelift compilation is 17 //! [`Context::inline`][crate::Context::inline]. It takes an [`Inline`] trait 18 //! implementation, which is authored by the Cranelift user and directs 19 //! Cranelift whether to inline a particular call, and, when inlining, gives 20 //! Cranelift the body of the callee that is to be inlined. 21 22 use crate::cursor::{Cursor as _, FuncCursor}; 23 use crate::ir::{self, ExceptionTableData, ExceptionTableItem, InstBuilder as _}; 24 use crate::result::CodegenResult; 25 use crate::trace; 26 use crate::traversals::Dfs; 27 use alloc::borrow::Cow; 28 use alloc::vec::Vec; 29 use cranelift_entity::{SecondaryMap, packed_option::PackedOption}; 30 use smallvec::SmallVec; 31 32 type SmallValueVec = SmallVec<[ir::Value; 8]>; 33 type SmallBlockArgVec = SmallVec<[ir::BlockArg; 8]>; 34 type SmallBlockCallVec = SmallVec<[ir::BlockCall; 8]>; 35 36 /// A command directing Cranelift whether or not to inline a particular call. 37 pub enum InlineCommand<'a> { 38 /// Keep the call as-is, out-of-line, and do not inline the callee. 39 KeepCall, 40 41 /// Inline the call, using this function as the body of the callee. 42 /// 43 /// It is the `Inline` implementor's responsibility to ensure that this 44 /// function is the correct callee. Providing the wrong function may result 45 /// in panics during compilation or incorrect runtime behavior. 46 Inline { 47 /// The callee function's body. 48 callee: Cow<'a, ir::Function>, 49 /// Whether to visit any function calls within the callee body after 50 /// inlining and consider them for further inlining. 51 visit_callee: bool, 52 }, 53 } 54 55 /// A trait for directing Cranelift whether to inline a particular call or not. 56 /// 57 /// Used in combination with the [`Context::inline`][crate::Context::inline] 58 /// method. 59 pub trait Inline { 60 /// A hook invoked for each direct call instruction in a function, whose 61 /// result determines whether Cranelift should inline a given call. 62 /// 63 /// The Cranelift user is responsible for defining their own hueristics and 64 /// deciding whether inlining the call is beneficial. 65 /// 66 /// When returning a function and directing Cranelift to inline its body 67 /// into the call site, the `Inline` implementer must ensure the following: 68 /// 69 /// * The returned function's signature exactly matches the `callee` 70 /// `FuncRef`'s signature. 71 /// 72 /// * The returned function must be legalized. 73 /// 74 /// * The returned function must be valid (i.e. it must pass the CLIF 75 /// verifier). 76 /// 77 /// * The returned function is a correct and valid implementation of the 78 /// `callee` according to your language's semantics. 79 /// 80 /// Failure to uphold these invariants may result in panics during 81 /// compilation or incorrect runtime behavior in the generated code. 82 fn inline( 83 &mut self, 84 caller: &ir::Function, 85 call_inst: ir::Inst, 86 call_opcode: ir::Opcode, 87 callee: ir::FuncRef, 88 call_args: &[ir::Value], 89 ) -> InlineCommand<'_>; 90 } 91 92 impl<'a, T> Inline for &'a mut T 93 where 94 T: Inline, 95 { 96 fn inline( 97 &mut self, 98 caller: &ir::Function, 99 inst: ir::Inst, 100 opcode: ir::Opcode, 101 callee: ir::FuncRef, 102 args: &[ir::Value], 103 ) -> InlineCommand<'_> { 104 (*self).inline(caller, inst, opcode, callee, args) 105 } 106 } 107 108 /// Walk the given function, invoke the `Inline` implementation for each call 109 /// instruction, and inline the callee when directed to do so. 110 /// 111 /// Returns whether any call was inlined. 112 pub(crate) fn do_inlining( 113 func: &mut ir::Function, 114 mut inliner: impl Inline, 115 ) -> CodegenResult<bool> { 116 trace!("function {} before inlining: {}", func.name, func); 117 118 let mut inlined_any = false; 119 let mut allocs = InliningAllocs::default(); 120 121 let mut cursor = FuncCursor::new(func); 122 'block_loop: while let Some(block) = cursor.next_block() { 123 // Always keep track of our previous cursor position. Assuming that the 124 // current position is a function call that we will inline, then the 125 // previous position is just before the inlined callee function. After 126 // inlining a call, the Cranelift user can decide whether to consider 127 // any function calls in the inlined callee for further inlining or 128 // not. When they do, then we back up to this previous cursor position 129 // so that our traversal will then continue over the inlined body. 130 let mut prev_pos; 131 132 while let Some(inst) = { 133 prev_pos = cursor.position(); 134 cursor.next_inst() 135 } { 136 // Make sure that `block` is always `inst`'s block, even with all of 137 // our cursor-position-updating and block-splitting-during-inlining 138 // shenanigans below. 139 debug_assert_eq!(Some(block), cursor.func.layout.inst_block(inst)); 140 141 match cursor.func.dfg.insts[inst] { 142 ir::InstructionData::Call { 143 opcode: opcode @ ir::Opcode::Call | opcode @ ir::Opcode::ReturnCall, 144 args: _, 145 func_ref, 146 } => { 147 trace!( 148 "considering call site for inlining: {inst}: {}", 149 cursor.func.dfg.display_inst(inst), 150 ); 151 let args = cursor.func.dfg.inst_args(inst); 152 match inliner.inline(&cursor.func, inst, opcode, func_ref, args) { 153 InlineCommand::KeepCall => { 154 trace!(" --> keeping call"); 155 } 156 InlineCommand::Inline { 157 callee, 158 visit_callee, 159 } => { 160 let last_inlined_block = inline_one( 161 &mut allocs, 162 cursor.func, 163 func_ref, 164 block, 165 inst, 166 opcode, 167 &callee, 168 None, 169 ); 170 inlined_any = true; 171 if visit_callee { 172 cursor.set_position(prev_pos); 173 } else { 174 // Arrange it so that the `next_block()` loop 175 // will continue to the next block that is not 176 // associated with the just-inlined callee. 177 cursor.goto_bottom(last_inlined_block); 178 continue 'block_loop; 179 } 180 } 181 } 182 } 183 ir::InstructionData::TryCall { 184 opcode: opcode @ ir::Opcode::TryCall, 185 args: _, 186 func_ref, 187 exception, 188 } => { 189 trace!( 190 "considering call site for inlining: {inst}: {}", 191 cursor.func.dfg.display_inst(inst), 192 ); 193 let args = cursor.func.dfg.inst_args(inst); 194 match inliner.inline(&cursor.func, inst, opcode, func_ref, args) { 195 InlineCommand::KeepCall => { 196 trace!(" --> keeping call"); 197 } 198 InlineCommand::Inline { 199 callee, 200 visit_callee, 201 } => { 202 let last_inlined_block = inline_one( 203 &mut allocs, 204 cursor.func, 205 func_ref, 206 block, 207 inst, 208 opcode, 209 &callee, 210 Some(exception), 211 ); 212 inlined_any = true; 213 if visit_callee { 214 cursor.set_position(prev_pos); 215 } else { 216 // Arrange it so that the `next_block()` loop 217 // will continue to the next block that is not 218 // associated with the just-inlined callee. 219 cursor.goto_bottom(last_inlined_block); 220 continue 'block_loop; 221 } 222 } 223 } 224 } 225 ir::InstructionData::CallIndirect { .. } 226 | ir::InstructionData::TryCallIndirect { .. } => { 227 // Can't inline indirect calls; need to have some earlier 228 // pass rewrite them into direct calls first, when possible. 229 } 230 _ => { 231 debug_assert!( 232 !cursor.func.dfg.insts[inst].opcode().is_call(), 233 "should have matched all call instructions, but found: {inst}: {}", 234 cursor.func.dfg.display_inst(inst), 235 ); 236 } 237 } 238 } 239 } 240 241 if inlined_any { 242 trace!("function {} after inlining: {}", func.name, func); 243 } else { 244 trace!("function {} did not have any callees inlined", func.name); 245 } 246 247 Ok(inlined_any) 248 } 249 250 #[derive(Default)] 251 struct InliningAllocs { 252 /// Map from callee value to inlined caller value. 253 values: SecondaryMap<ir::Value, PackedOption<ir::Value>>, 254 255 /// Map from callee constant to inlined caller constant. 256 /// 257 /// Not in `EntityMap` because these are hash-consed inside the 258 /// `ir::Function`. 259 constants: SecondaryMap<ir::Constant, PackedOption<ir::Constant>>, 260 261 /// Map from callee to inlined caller external name refs. 262 /// 263 /// Not in `EntityMap` because these are hash-consed inside the 264 /// `ir::Function`. 265 user_external_name_refs: 266 SecondaryMap<ir::UserExternalNameRef, PackedOption<ir::UserExternalNameRef>>, 267 268 /// The set of _caller_ inlined call instructions that need exception table 269 /// fixups at the end of inlining. 270 /// 271 /// This includes all kinds of non-returning calls, not just the literal 272 /// `call` instruction: `call_indirect`, `try_call`, `try_call_indirect`, 273 /// etc... However, it does not include `return_call` and 274 /// `return_call_indirect` instructions because the caller cannot catch 275 /// exceptions that those calls throw because the caller is no longer on the 276 /// stack as soon as they are executed. 277 /// 278 /// Note: this is a simple `Vec`, and not an `EntitySet`, because it is very 279 /// sparse: most of the caller's instructions are not inlined call 280 /// instructions. Additionally, we require deterministic iteration order and 281 /// do not require set-membership testing, so a hash set is not a good 282 /// choice either. 283 calls_needing_exception_table_fixup: Vec<ir::Inst>, 284 } 285 286 impl InliningAllocs { 287 fn reset(&mut self, callee: &ir::Function) { 288 let InliningAllocs { 289 values, 290 constants, 291 user_external_name_refs, 292 calls_needing_exception_table_fixup, 293 } = self; 294 295 values.clear(); 296 values.resize(callee.dfg.len_values()); 297 298 constants.clear(); 299 constants.resize(callee.dfg.constants.len()); 300 301 user_external_name_refs.clear(); 302 user_external_name_refs.resize(callee.params.user_named_funcs().len()); 303 304 // Note: We do not reserve capacity for 305 // `calls_needing_exception_table_fixup` because it is a sparse set and 306 // we don't know how large it needs to be ahead of time. 307 calls_needing_exception_table_fixup.clear(); 308 } 309 310 fn set_inlined_value( 311 &mut self, 312 callee: &ir::Function, 313 callee_val: ir::Value, 314 inlined_val: ir::Value, 315 ) { 316 trace!(" --> callee {callee_val:?} = inlined {inlined_val:?}"); 317 debug_assert!(self.values[callee_val].is_none()); 318 let resolved_callee_val = callee.dfg.resolve_aliases(callee_val); 319 debug_assert!(self.values[resolved_callee_val].is_none()); 320 self.values[resolved_callee_val] = Some(inlined_val).into(); 321 } 322 323 fn get_inlined_value(&self, callee: &ir::Function, callee_val: ir::Value) -> Option<ir::Value> { 324 let resolved_callee_val = callee.dfg.resolve_aliases(callee_val); 325 self.values[resolved_callee_val].expand() 326 } 327 } 328 329 /// Inline one particular function call. 330 /// 331 /// Returns the last inlined block in the layout. 332 fn inline_one( 333 allocs: &mut InliningAllocs, 334 func: &mut ir::Function, 335 callee_func_ref: ir::FuncRef, 336 call_block: ir::Block, 337 call_inst: ir::Inst, 338 call_opcode: ir::Opcode, 339 callee: &ir::Function, 340 call_exception_table: Option<ir::ExceptionTable>, 341 ) -> ir::Block { 342 trace!( 343 "Inlining call {call_inst:?}: {}\n\ 344 with callee = {callee:?}", 345 func.dfg.display_inst(call_inst) 346 ); 347 348 // Type check callee signature. 349 let expected_callee_sig = func.dfg.ext_funcs[callee_func_ref].signature; 350 let expected_callee_sig = &func.dfg.signatures[expected_callee_sig]; 351 assert_eq!(expected_callee_sig, &callee.signature); 352 353 allocs.reset(callee); 354 355 // First, append various callee entity arenas to the end of the caller's 356 // entity arenas. 357 let entity_map = create_entities(allocs, func, callee); 358 359 // Inlined prologue: split the call instruction's block at the point of the 360 // call and replace the call with a jump. 361 let return_block = split_off_return_block(func, call_inst, call_opcode, callee); 362 let call_stack_map = replace_call_with_jump(allocs, func, call_inst, callee, &entity_map); 363 364 // Prepare for translating the actual instructions by inserting the inlined 365 // blocks into the caller's layout in the same order that they appear in the 366 // callee. 367 let last_inlined_block = inline_block_layout(func, call_block, callee, &entity_map); 368 369 // Translate each instruction from the callee into the caller, 370 // appending them to their associated block in the caller. 371 // 372 // Note that we iterate over the callee with a pre-order traversal so that 373 // we see value defs before uses. 374 for callee_block in Dfs::new().pre_order_iter(callee) { 375 let inlined_block = entity_map.inlined_block(callee_block); 376 trace!( 377 "Processing instructions in callee block {callee_block:?} (inlined block {inlined_block:?}" 378 ); 379 380 let mut next_callee_inst = callee.layout.first_inst(callee_block); 381 while let Some(callee_inst) = next_callee_inst { 382 trace!( 383 "Processing callee instruction {callee_inst:?}: {}", 384 callee.dfg.display_inst(callee_inst) 385 ); 386 387 assert_ne!( 388 callee.dfg.insts[callee_inst].opcode(), 389 ir::Opcode::GlobalValue, 390 "callee must already be legalized, we shouldn't see any `global_value` \ 391 instructions when inlining; found {callee_inst:?}: {}", 392 callee.dfg.display_inst(callee_inst) 393 ); 394 395 // Remap the callee instruction's entities and insert it into the 396 // caller's DFG. 397 let inlined_inst_data = callee.dfg.insts[callee_inst].map(InliningInstRemapper { 398 allocs: &allocs, 399 func, 400 callee, 401 entity_map: &entity_map, 402 }); 403 let inlined_inst = func.dfg.make_inst(inlined_inst_data); 404 func.layout.append_inst(inlined_inst, inlined_block); 405 406 let opcode = callee.dfg.insts[callee_inst].opcode(); 407 if opcode.is_return() { 408 // Instructions that return do not define any values, so we 409 // don't need to worry about that, but we do need to fix them up 410 // so that they return by jumping to our control-flow join 411 // block, rather than returning from the caller. 412 if let Some(return_block) = return_block { 413 fixup_inst_that_returns( 414 allocs, 415 func, 416 callee, 417 &entity_map, 418 call_opcode, 419 inlined_inst, 420 callee_inst, 421 return_block, 422 call_stack_map.as_ref().map(|es| &**es), 423 ); 424 } else { 425 // If we are inlining a callee that was invoked via 426 // `return_call`, we leave inlined return instructions 427 // as-is: there is no logical caller frame on the stack to 428 // continue to. 429 debug_assert_eq!(call_opcode, ir::Opcode::ReturnCall); 430 } 431 } else { 432 // Make the instruction's result values. 433 let ctrl_typevar = callee.dfg.ctrl_typevar(callee_inst); 434 func.dfg.make_inst_results(inlined_inst, ctrl_typevar); 435 436 // Update the value map for this instruction's defs. 437 let callee_results = callee.dfg.inst_results(callee_inst); 438 let inlined_results = func.dfg.inst_results(inlined_inst); 439 debug_assert_eq!(callee_results.len(), inlined_results.len()); 440 for (callee_val, inlined_val) in callee_results.iter().zip(inlined_results) { 441 allocs.set_inlined_value(callee, *callee_val, *inlined_val); 442 } 443 444 if opcode.is_call() { 445 append_stack_map_entries( 446 func, 447 callee, 448 &entity_map, 449 call_stack_map.as_deref(), 450 inlined_inst, 451 callee_inst, 452 ); 453 454 // When we are inlining a `try_call` call site, we need to merge 455 // the call site's exception table into the inlined calls' 456 // exception tables. This can involve rewriting regular `call`s 457 // into `try_call`s, which requires mutating the CFG because 458 // `try_call` is a block terminator. However, we can't mutate 459 // the CFG in the middle of this traversal because we rely on 460 // the existence of a one-to-one mapping between the callee 461 // layout and the inlined layout. Instead, we record the set of 462 // inlined call instructions that will need fixing up, and 463 // perform that possibly-CFG-mutating exception table merging in 464 // a follow up pass, when we no longer rely on that one-to-one 465 // layout mapping. 466 debug_assert_eq!( 467 call_opcode == ir::Opcode::TryCall, 468 call_exception_table.is_some() 469 ); 470 if call_opcode == ir::Opcode::TryCall { 471 allocs 472 .calls_needing_exception_table_fixup 473 .push(inlined_inst); 474 } 475 } 476 } 477 478 trace!( 479 " --> inserted inlined instruction {inlined_inst:?}: {}", 480 func.dfg.display_inst(inlined_inst) 481 ); 482 483 next_callee_inst = callee.layout.next_inst(callee_inst); 484 } 485 } 486 487 // We copied *all* callee blocks into the caller's layout, but only copied 488 // the callee instructions in *reachable* callee blocks into the caller's 489 // associated blocks. Therefore, any *unreachable* blocks are empty in the 490 // caller, which is invalid CLIF because all blocks must end in a 491 // terminator, so do a quick pass over the inlined blocks and remove any 492 // empty blocks from the caller's layout. 493 for block in entity_map.iter_inlined_blocks(func) { 494 if func.layout.is_block_inserted(block) && func.layout.first_inst(block).is_none() { 495 func.layout.remove_block(block); 496 } 497 } 498 499 // Final step: fixup the exception tables of any inlined calls when we are 500 // inlining a `try_call` site. 501 // 502 // Subtly, this requires rewriting non-catching `call[_indirect]` 503 // instructions into `try_call[_indirect]` instructions so that exceptions 504 // that unwound through the original callee frame and were caught by the 505 // caller's `try_call` do not unwind past this inlined frame. And turning a 506 // `call` into a `try_call` mutates the CFG, breaking our one-to-one mapping 507 // between callee blocks and inlined blocks, so we delay these fixups to 508 // this final step, when we no longer rely on that mapping. 509 debug_assert!( 510 allocs.calls_needing_exception_table_fixup.is_empty() || call_exception_table.is_some() 511 ); 512 debug_assert_eq!( 513 call_opcode == ir::Opcode::TryCall, 514 call_exception_table.is_some() 515 ); 516 if let Some(call_exception_table) = call_exception_table { 517 fixup_inlined_call_exception_tables(allocs, func, call_exception_table); 518 } 519 520 last_inlined_block 521 } 522 523 /// Append stack map entries from the caller and callee to the given inlined 524 /// instruction. 525 fn append_stack_map_entries( 526 func: &mut ir::Function, 527 callee: &ir::Function, 528 entity_map: &EntityMap, 529 call_stack_map: Option<&[ir::UserStackMapEntry]>, 530 inlined_inst: ir::Inst, 531 callee_inst: ir::Inst, 532 ) { 533 // Add the caller's stack map to this call. These entries 534 // already refer to caller entities and do not need further 535 // translation. 536 func.dfg.append_user_stack_map_entries( 537 inlined_inst, 538 call_stack_map 539 .iter() 540 .flat_map(|entries| entries.iter().cloned()), 541 ); 542 543 // Append the callee's stack map to this call. These entries 544 // refer to callee entities and therefore do require 545 // translation into the caller's index space. 546 func.dfg.append_user_stack_map_entries( 547 inlined_inst, 548 callee 549 .dfg 550 .user_stack_map_entries(callee_inst) 551 .iter() 552 .flat_map(|entries| entries.iter()) 553 .map(|entry| ir::UserStackMapEntry { 554 ty: entry.ty, 555 slot: entity_map.inlined_stack_slot(entry.slot), 556 offset: entry.offset, 557 }), 558 ); 559 } 560 561 /// Create or update the exception tables for any inlined call instructions: 562 /// when inlining at a `try_call` site, we must forward our exceptional edges 563 /// into each inlined call instruction. 564 fn fixup_inlined_call_exception_tables( 565 allocs: &mut InliningAllocs, 566 func: &mut ir::Function, 567 call_exception_table: ir::ExceptionTable, 568 ) { 569 // Split a block at a `call[_indirect]` instruction, detach the 570 // instruction's results, and alias them to the new block's parameters. 571 let split_block_for_new_try_call = |func: &mut ir::Function, inst: ir::Inst| -> ir::Block { 572 debug_assert!(func.dfg.insts[inst].opcode().is_call()); 573 debug_assert!(!func.dfg.insts[inst].opcode().is_terminator()); 574 575 // Split the block. 576 let next_inst = func 577 .layout 578 .next_inst(inst) 579 .expect("inst is not a terminator, should have a successor"); 580 let new_block = func.dfg.blocks.add(); 581 func.layout.split_block(new_block, next_inst); 582 583 // `try_call[_indirect]` instructions do not define values themselves; 584 // the normal-return block has parameters for the results. So remove 585 // this instruction's results, create an associated block parameter for 586 // each of them, and alias them to the new block parameter. 587 let old_results = SmallValueVec::from_iter(func.dfg.inst_results(inst).iter().copied()); 588 func.dfg.detach_inst_results(inst); 589 for old_result in old_results { 590 let ty = func.dfg.value_type(old_result); 591 let new_block_param = func.dfg.append_block_param(new_block, ty); 592 func.dfg.change_to_alias(old_result, new_block_param); 593 } 594 595 new_block 596 }; 597 598 // Clone the caller's exception table, updating it for use in the current 599 // `call[_indirect]` instruction as it becomes a `try_call[_indirect]`. 600 let clone_exception_table_for_this_call = |func: &mut ir::Function, 601 signature: ir::SigRef, 602 new_block: ir::Block| 603 -> ir::ExceptionTable { 604 let mut exception = func.stencil.dfg.exception_tables[call_exception_table] 605 .deep_clone(&mut func.stencil.dfg.value_lists); 606 607 *exception.signature_mut() = signature; 608 609 let returns_len = func.dfg.signatures[signature].returns.len(); 610 let returns_len = u32::try_from(returns_len).unwrap(); 611 612 *exception.normal_return_mut() = ir::BlockCall::new( 613 new_block, 614 (0..returns_len).map(|i| ir::BlockArg::TryCallRet(i)), 615 &mut func.dfg.value_lists, 616 ); 617 618 func.dfg.exception_tables.push(exception) 619 }; 620 621 for inst in allocs.calls_needing_exception_table_fixup.drain(..) { 622 debug_assert!(func.dfg.insts[inst].opcode().is_call()); 623 debug_assert!(!func.dfg.insts[inst].opcode().is_return()); 624 match func.dfg.insts[inst] { 625 // current_block: 626 // preds... 627 // rets... = call f(args...) 628 // succs... 629 // 630 // becomes 631 // 632 // current_block: 633 // preds... 634 // try_call f(args...), new_block(rets...), [call_exception_table...] 635 // new_block(rets...): 636 // succs... 637 ir::InstructionData::Call { 638 opcode: ir::Opcode::Call, 639 args, 640 func_ref, 641 } => { 642 let new_block = split_block_for_new_try_call(func, inst); 643 let signature = func.dfg.ext_funcs[func_ref].signature; 644 let exception = clone_exception_table_for_this_call(func, signature, new_block); 645 func.dfg.insts[inst] = ir::InstructionData::TryCall { 646 opcode: ir::Opcode::TryCall, 647 args, 648 func_ref, 649 exception, 650 }; 651 } 652 653 // current_block: 654 // preds... 655 // rets... = call_indirect sig, val(args...) 656 // succs... 657 // 658 // becomes 659 // 660 // current_block: 661 // preds... 662 // try_call_indirect sig, val(args...), new_block(rets...), [call_exception_table...] 663 // new_block(rets...): 664 // succs... 665 ir::InstructionData::CallIndirect { 666 opcode: ir::Opcode::CallIndirect, 667 args, 668 sig_ref, 669 } => { 670 let new_block = split_block_for_new_try_call(func, inst); 671 let exception = clone_exception_table_for_this_call(func, sig_ref, new_block); 672 func.dfg.insts[inst] = ir::InstructionData::TryCallIndirect { 673 opcode: ir::Opcode::TryCallIndirect, 674 args, 675 exception, 676 }; 677 } 678 679 // For `try_call[_indirect]` instructions, we just need to merge the 680 // exception tables. 681 ir::InstructionData::TryCall { 682 opcode: ir::Opcode::TryCall, 683 exception, 684 .. 685 } 686 | ir::InstructionData::TryCallIndirect { 687 opcode: ir::Opcode::TryCallIndirect, 688 exception, 689 .. 690 } => { 691 // Construct a new exception table that consists of 692 // the inlined instruction's exception table match 693 // sequence, with the inlining site's exception table 694 // appended. This will ensure that the first-match 695 // semantics emulates the original behavior of 696 // matching in the inner frame first. 697 let sig = func.dfg.exception_tables[exception].signature(); 698 let normal_return = *func.dfg.exception_tables[exception].normal_return(); 699 let exception_data = ExceptionTableData::new( 700 sig, 701 normal_return, 702 func.dfg.exception_tables[exception] 703 .items() 704 .chain(func.dfg.exception_tables[call_exception_table].items()), 705 ) 706 .deep_clone(&mut func.dfg.value_lists); 707 708 func.dfg.exception_tables[exception] = exception_data; 709 } 710 711 otherwise => unreachable!("unknown non-return call instruction: {otherwise:?}"), 712 } 713 } 714 } 715 716 /// After having created an inlined version of a callee instruction that returns 717 /// in the caller, we need to fix it up so that it doesn't actually return 718 /// (since we are already in the caller's frame) and instead just jumps to the 719 /// control-flow join point. 720 fn fixup_inst_that_returns( 721 allocs: &mut InliningAllocs, 722 func: &mut ir::Function, 723 callee: &ir::Function, 724 entity_map: &EntityMap, 725 call_opcode: ir::Opcode, 726 inlined_inst: ir::Inst, 727 callee_inst: ir::Inst, 728 return_block: ir::Block, 729 call_stack_map: Option<&[ir::UserStackMapEntry]>, 730 ) { 731 debug_assert!(func.dfg.insts[inlined_inst].opcode().is_return()); 732 match func.dfg.insts[inlined_inst] { 733 // return rets... 734 // 735 // becomes 736 // 737 // jump return_block(rets...) 738 ir::InstructionData::MultiAry { 739 opcode: ir::Opcode::Return, 740 args, 741 } => { 742 let rets = SmallBlockArgVec::from_iter( 743 args.as_slice(&func.dfg.value_lists) 744 .iter() 745 .copied() 746 .map(|v| v.into()), 747 ); 748 func.dfg.replace(inlined_inst).jump(return_block, &rets); 749 } 750 751 // return_call f(args...) 752 // 753 // becomes 754 // 755 // rets... = call f(args...) 756 // jump return_block(rets...) 757 ir::InstructionData::Call { 758 opcode: ir::Opcode::ReturnCall, 759 args, 760 func_ref, 761 } => { 762 func.dfg.insts[inlined_inst] = ir::InstructionData::Call { 763 opcode: ir::Opcode::Call, 764 args, 765 func_ref, 766 }; 767 func.dfg.make_inst_results(inlined_inst, ir::types::INVALID); 768 769 append_stack_map_entries( 770 func, 771 callee, 772 &entity_map, 773 call_stack_map, 774 inlined_inst, 775 callee_inst, 776 ); 777 778 let rets = SmallBlockArgVec::from_iter( 779 func.dfg 780 .inst_results(inlined_inst) 781 .iter() 782 .copied() 783 .map(|v| v.into()), 784 ); 785 let mut cursor = FuncCursor::new(func); 786 cursor.goto_after_inst(inlined_inst); 787 cursor.ins().jump(return_block, &rets); 788 789 if call_opcode == ir::Opcode::TryCall { 790 allocs 791 .calls_needing_exception_table_fixup 792 .push(inlined_inst); 793 } 794 } 795 796 // return_call_indirect val(args...) 797 // 798 // becomes 799 // 800 // rets... = call_indirect val(args...) 801 // jump return_block(rets...) 802 ir::InstructionData::CallIndirect { 803 opcode: ir::Opcode::ReturnCallIndirect, 804 args, 805 sig_ref, 806 } => { 807 func.dfg.insts[inlined_inst] = ir::InstructionData::CallIndirect { 808 opcode: ir::Opcode::CallIndirect, 809 args, 810 sig_ref, 811 }; 812 func.dfg.make_inst_results(inlined_inst, ir::types::INVALID); 813 814 append_stack_map_entries( 815 func, 816 callee, 817 &entity_map, 818 call_stack_map, 819 inlined_inst, 820 callee_inst, 821 ); 822 823 let rets = SmallBlockArgVec::from_iter( 824 func.dfg 825 .inst_results(inlined_inst) 826 .iter() 827 .copied() 828 .map(|v| v.into()), 829 ); 830 let mut cursor = FuncCursor::new(func); 831 cursor.goto_after_inst(inlined_inst); 832 cursor.ins().jump(return_block, &rets); 833 834 if call_opcode == ir::Opcode::TryCall { 835 allocs 836 .calls_needing_exception_table_fixup 837 .push(inlined_inst); 838 } 839 } 840 841 inst_data => unreachable!( 842 "should have handled all `is_return() == true` instructions above; \ 843 got {inst_data:?}" 844 ), 845 } 846 } 847 848 /// An `InstructionMapper` implementation that remaps a callee instruction's 849 /// entity references to their new indices in the caller function. 850 struct InliningInstRemapper<'a> { 851 allocs: &'a InliningAllocs, 852 func: &'a mut ir::Function, 853 callee: &'a ir::Function, 854 entity_map: &'a EntityMap, 855 } 856 857 impl<'a> ir::instructions::InstructionMapper for InliningInstRemapper<'a> { 858 fn map_value(&mut self, value: ir::Value) -> ir::Value { 859 self.allocs.get_inlined_value(self.callee, value).expect( 860 "defs come before uses; we should have already inlined all values \ 861 used by an instruction", 862 ) 863 } 864 865 fn map_value_list(&mut self, value_list: ir::ValueList) -> ir::ValueList { 866 let mut inlined_list = ir::ValueList::new(); 867 for callee_val in value_list.as_slice(&self.callee.dfg.value_lists) { 868 let inlined_val = self.map_value(*callee_val); 869 inlined_list.push(inlined_val, &mut self.func.dfg.value_lists); 870 } 871 inlined_list 872 } 873 874 fn map_global_value(&mut self, global_value: ir::GlobalValue) -> ir::GlobalValue { 875 self.entity_map.inlined_global_value(global_value) 876 } 877 878 fn map_jump_table(&mut self, jump_table: ir::JumpTable) -> ir::JumpTable { 879 let inlined_default = 880 self.map_block_call(self.callee.dfg.jump_tables[jump_table].default_block()); 881 let inlined_table = self.callee.dfg.jump_tables[jump_table] 882 .as_slice() 883 .iter() 884 .map(|callee_block_call| self.map_block_call(*callee_block_call)) 885 .collect::<SmallBlockCallVec>(); 886 self.func 887 .dfg 888 .jump_tables 889 .push(ir::JumpTableData::new(inlined_default, &inlined_table)) 890 } 891 892 fn map_exception_table(&mut self, exception_table: ir::ExceptionTable) -> ir::ExceptionTable { 893 let exception_table = &self.callee.dfg.exception_tables[exception_table]; 894 let inlined_sig_ref = self.map_sig_ref(exception_table.signature()); 895 let inlined_normal_return = self.map_block_call(*exception_table.normal_return()); 896 let inlined_table = exception_table 897 .items() 898 .map(|item| match item { 899 ExceptionTableItem::Tag(tag, block_call) => { 900 ExceptionTableItem::Tag(tag, self.map_block_call(block_call)) 901 } 902 ExceptionTableItem::Default(block_call) => { 903 ExceptionTableItem::Default(self.map_block_call(block_call)) 904 } 905 ExceptionTableItem::Context(value) => { 906 ExceptionTableItem::Context(self.map_value(value)) 907 } 908 }) 909 .collect::<SmallVec<[_; 8]>>(); 910 self.func 911 .dfg 912 .exception_tables 913 .push(ir::ExceptionTableData::new( 914 inlined_sig_ref, 915 inlined_normal_return, 916 inlined_table, 917 )) 918 } 919 920 fn map_block_call(&mut self, block_call: ir::BlockCall) -> ir::BlockCall { 921 let callee_block = block_call.block(&self.callee.dfg.value_lists); 922 let inlined_block = self.entity_map.inlined_block(callee_block); 923 let args = block_call 924 .args(&self.callee.dfg.value_lists) 925 .map(|arg| match arg { 926 ir::BlockArg::Value(value) => self.map_value(value).into(), 927 ir::BlockArg::TryCallRet(_) | ir::BlockArg::TryCallExn(_) => arg, 928 }) 929 .collect::<SmallBlockArgVec>(); 930 ir::BlockCall::new(inlined_block, args, &mut self.func.dfg.value_lists) 931 } 932 933 fn map_func_ref(&mut self, func_ref: ir::FuncRef) -> ir::FuncRef { 934 self.entity_map.inlined_func_ref(func_ref) 935 } 936 937 fn map_sig_ref(&mut self, sig_ref: ir::SigRef) -> ir::SigRef { 938 self.entity_map.inlined_sig_ref(sig_ref) 939 } 940 941 fn map_stack_slot(&mut self, stack_slot: ir::StackSlot) -> ir::StackSlot { 942 self.entity_map.inlined_stack_slot(stack_slot) 943 } 944 945 fn map_dynamic_stack_slot( 946 &mut self, 947 dynamic_stack_slot: ir::DynamicStackSlot, 948 ) -> ir::DynamicStackSlot { 949 self.entity_map 950 .inlined_dynamic_stack_slot(dynamic_stack_slot) 951 } 952 953 fn map_constant(&mut self, constant: ir::Constant) -> ir::Constant { 954 self.allocs 955 .constants 956 .get(constant) 957 .and_then(|o| o.expand()) 958 .expect("should have inlined all callee constants") 959 } 960 961 fn map_immediate(&mut self, immediate: ir::Immediate) -> ir::Immediate { 962 self.entity_map.inlined_immediate(immediate) 963 } 964 } 965 966 /// Inline the callee's layout into the caller's layout. 967 /// 968 /// Returns the last inlined block in the layout. 969 fn inline_block_layout( 970 func: &mut ir::Function, 971 call_block: ir::Block, 972 callee: &ir::Function, 973 entity_map: &EntityMap, 974 ) -> ir::Block { 975 // Iterate over callee blocks in layout order, inserting their associated 976 // inlined block into the caller's layout. 977 let mut prev_inlined_block = call_block; 978 let mut next_callee_block = callee.layout.entry_block(); 979 while let Some(callee_block) = next_callee_block { 980 let inlined_block = entity_map.inlined_block(callee_block); 981 func.layout 982 .insert_block_after(inlined_block, prev_inlined_block); 983 984 prev_inlined_block = inlined_block; 985 next_callee_block = callee.layout.next_block(callee_block); 986 } 987 prev_inlined_block 988 } 989 990 /// Split the call instruction's block just after the call instruction to create 991 /// the point where control-flow joins after the inlined callee "returns". 992 /// 993 /// Note that tail calls do not return to the caller and therefore do not have a 994 /// control-flow join point. 995 fn split_off_return_block( 996 func: &mut ir::Function, 997 call_inst: ir::Inst, 998 opcode: ir::Opcode, 999 callee: &ir::Function, 1000 ) -> Option<ir::Block> { 1001 // When the `call_inst` is not a block terminator, we need to split the 1002 // block. 1003 let return_block = func.layout.next_inst(call_inst).map(|next_inst| { 1004 let return_block = func.dfg.blocks.add(); 1005 func.layout.split_block(return_block, next_inst); 1006 1007 // Add block parameters for each return value and alias the call 1008 // instruction's results to them. 1009 let old_results = 1010 SmallValueVec::from_iter(func.dfg.inst_results(call_inst).iter().copied()); 1011 debug_assert_eq!(old_results.len(), callee.signature.returns.len()); 1012 func.dfg.detach_inst_results(call_inst); 1013 for (abi, old_val) in callee.signature.returns.iter().zip(old_results) { 1014 debug_assert_eq!(abi.value_type, func.dfg.value_type(old_val)); 1015 let ret_param = func.dfg.append_block_param(return_block, abi.value_type); 1016 func.dfg.change_to_alias(old_val, ret_param); 1017 } 1018 1019 return_block 1020 }); 1021 1022 // When the `call_inst` is a block terminator, then it is either a 1023 // `return_call` or a `try_call`: 1024 // 1025 // * For `return_call`s, we don't have a control-flow join point, because 1026 // the caller permanently transfers control to the callee. 1027 // 1028 // * For `try_call`s, we probably already have a block for the control-flow 1029 // join point, but it isn't guaranteed: the `try_call` might ignore the 1030 // call's returns and not forward them to the normal-return block or it 1031 // might also pass additional arguments. We can only reuse the existing 1032 // normal-return block when the `try_call` forwards exactly our callee's 1033 // returns to that block (and therefore that block's parameter types also 1034 // exactly match the callee's return types). Otherwise, we must create a new 1035 // return block that forwards to the existing normal-return 1036 // block. (Elsewhere, at the end of inlining, we will also update any inlined 1037 // calls to forward any raised exceptions to the caller's exception table, 1038 // as necessary.) 1039 // 1040 // Finally, note that reusing the normal-return's target block is just an 1041 // optimization to emit a simpler CFG when we can, and is not 1042 // fundamentally required for correctness. We could always insert a 1043 // temporary block as our control-flow join point that then forwards to 1044 // the normal-return's target block. However, at the time of writing, 1045 // Cranelift doesn't currently do any jump-threading or branch 1046 // simplification in the mid-end, and removing unnecessary blocks in this 1047 // way can help some subsequent mid-end optimizations. If, in the future, 1048 // we gain support for jump-threading optimizations in the mid-end, we can 1049 // come back and simplify the below code a bit to always generate the 1050 // temporary block, and then rely on the subsequent optimizations to clean 1051 // everything up. 1052 debug_assert_eq!( 1053 return_block.is_none(), 1054 opcode == ir::Opcode::ReturnCall || opcode == ir::Opcode::TryCall, 1055 ); 1056 return_block.or_else(|| match func.dfg.insts[call_inst] { 1057 ir::InstructionData::TryCall { 1058 opcode: ir::Opcode::TryCall, 1059 args: _, 1060 func_ref: _, 1061 exception, 1062 } => { 1063 let normal_return = func.dfg.exception_tables[exception].normal_return(); 1064 let normal_return_block = normal_return.block(&func.dfg.value_lists); 1065 1066 // Check to see if we can reuse the existing normal-return block. 1067 { 1068 let normal_return_args = normal_return.args(&func.dfg.value_lists); 1069 if normal_return_args.len() == callee.signature.returns.len() 1070 && normal_return_args.enumerate().all(|(i, arg)| { 1071 let i = u32::try_from(i).unwrap(); 1072 arg == ir::BlockArg::TryCallRet(i) 1073 }) 1074 { 1075 return Some(normal_return_block); 1076 } 1077 } 1078 1079 // Okay, we cannot reuse the normal-return block. Create a new block 1080 // that has the expected block parameter types and have it jump to 1081 // the normal-return block. 1082 let return_block = func.dfg.blocks.add(); 1083 func.layout.insert_block(return_block, normal_return_block); 1084 1085 let return_block_params = callee 1086 .signature 1087 .returns 1088 .iter() 1089 .map(|abi| func.dfg.append_block_param(return_block, abi.value_type)) 1090 .collect::<SmallValueVec>(); 1091 1092 let normal_return_args = func.dfg.exception_tables[exception] 1093 .normal_return() 1094 .args(&func.dfg.value_lists) 1095 .collect::<SmallBlockArgVec>(); 1096 let jump_args = normal_return_args 1097 .into_iter() 1098 .map(|arg| match arg { 1099 ir::BlockArg::Value(value) => ir::BlockArg::Value(value), 1100 ir::BlockArg::TryCallRet(i) => { 1101 let i = usize::try_from(i).unwrap(); 1102 ir::BlockArg::Value(return_block_params[i]) 1103 } 1104 ir::BlockArg::TryCallExn(_) => { 1105 unreachable!("normal-return edges cannot use exceptional results") 1106 } 1107 }) 1108 .collect::<SmallBlockArgVec>(); 1109 1110 let mut cursor = FuncCursor::new(func); 1111 cursor.goto_first_insertion_point(return_block); 1112 cursor.ins().jump(normal_return_block, &jump_args); 1113 1114 Some(return_block) 1115 } 1116 _ => None, 1117 }) 1118 } 1119 1120 /// Replace the caller's call instruction with a jump to the caller's inlined 1121 /// copy of the callee's entry block. 1122 /// 1123 /// Also associates the callee's parameters with the caller's arguments in our 1124 /// value map. 1125 /// 1126 /// Returns the caller's stack map entries, if any. 1127 fn replace_call_with_jump( 1128 allocs: &mut InliningAllocs, 1129 func: &mut ir::Function, 1130 call_inst: ir::Inst, 1131 callee: &ir::Function, 1132 entity_map: &EntityMap, 1133 ) -> Option<ir::UserStackMapEntryVec> { 1134 trace!("Replacing `call` with `jump`"); 1135 trace!( 1136 " --> call instruction: {call_inst:?}: {}", 1137 func.dfg.display_inst(call_inst) 1138 ); 1139 1140 let callee_entry_block = callee 1141 .layout 1142 .entry_block() 1143 .expect("callee function should have an entry block"); 1144 let callee_param_values = callee.dfg.block_params(callee_entry_block); 1145 let caller_arg_values = SmallValueVec::from_iter(func.dfg.inst_args(call_inst).iter().copied()); 1146 debug_assert_eq!(callee_param_values.len(), caller_arg_values.len()); 1147 debug_assert_eq!(callee_param_values.len(), callee.signature.params.len()); 1148 for (abi, (callee_param_value, caller_arg_value)) in callee 1149 .signature 1150 .params 1151 .iter() 1152 .zip(callee_param_values.into_iter().zip(caller_arg_values)) 1153 { 1154 debug_assert_eq!(abi.value_type, callee.dfg.value_type(*callee_param_value)); 1155 debug_assert_eq!(abi.value_type, func.dfg.value_type(caller_arg_value)); 1156 allocs.set_inlined_value(callee, *callee_param_value, caller_arg_value); 1157 } 1158 1159 // Replace the caller's call instruction with a jump to the caller's inlined 1160 // copy of the callee's entry block. 1161 // 1162 // Note that the call block dominates the inlined entry block (and also all 1163 // other inlined blocks) so we can reference the arguments directly, and do 1164 // not need to add block parameters to the inlined entry block. 1165 let inlined_entry_block = entity_map.inlined_block(callee_entry_block); 1166 func.dfg.replace(call_inst).jump(inlined_entry_block, &[]); 1167 trace!( 1168 " --> replaced with jump instruction: {call_inst:?}: {}", 1169 func.dfg.display_inst(call_inst) 1170 ); 1171 1172 let stack_map_entries = func.dfg.take_user_stack_map_entries(call_inst); 1173 stack_map_entries 1174 } 1175 1176 /// Keeps track of mapping callee entities to their associated inlined caller 1177 /// entities. 1178 #[derive(Default)] 1179 struct EntityMap { 1180 // Rather than doing an implicit, demand-based, DCE'ing translation of 1181 // entities, which would require maps from each callee entity to its 1182 // associated caller entity, we copy all entities into the caller, remember 1183 // each entity's initial offset, and then mapping from the callee to the 1184 // inlined caller entity is just adding that initial offset to the callee's 1185 // index. This should be both faster and simpler than the alternative. Most 1186 // of these sets are relatively small, and they rarely have too much dead 1187 // code in practice, so this is a good trade off. 1188 // 1189 // Note that there are a few kinds of entities that are excluded from the 1190 // `EntityMap`, and for which we do actually take the demand-based approach: 1191 // values and value lists being the notable ones. 1192 block_offset: Option<u32>, 1193 global_value_offset: Option<u32>, 1194 sig_ref_offset: Option<u32>, 1195 func_ref_offset: Option<u32>, 1196 stack_slot_offset: Option<u32>, 1197 dynamic_type_offset: Option<u32>, 1198 dynamic_stack_slot_offset: Option<u32>, 1199 immediate_offset: Option<u32>, 1200 } 1201 1202 impl EntityMap { 1203 fn inlined_block(&self, callee_block: ir::Block) -> ir::Block { 1204 let offset = self 1205 .block_offset 1206 .expect("must create inlined `ir::Block`s before calling `EntityMap::inlined_block`"); 1207 ir::Block::from_u32(offset + callee_block.as_u32()) 1208 } 1209 1210 fn iter_inlined_blocks(&self, func: &ir::Function) -> impl Iterator<Item = ir::Block> + use<> { 1211 let start = self.block_offset.expect( 1212 "must create inlined `ir::Block`s before calling `EntityMap::iter_inlined_blocks`", 1213 ); 1214 1215 let end = func.dfg.blocks.len(); 1216 let end = u32::try_from(end).unwrap(); 1217 1218 (start..end).map(|i| ir::Block::from_u32(i)) 1219 } 1220 1221 fn inlined_global_value(&self, callee_global_value: ir::GlobalValue) -> ir::GlobalValue { 1222 let offset = self 1223 .global_value_offset 1224 .expect("must create inlined `ir::GlobalValue`s before calling `EntityMap::inlined_global_value`"); 1225 ir::GlobalValue::from_u32(offset + callee_global_value.as_u32()) 1226 } 1227 1228 fn inlined_sig_ref(&self, callee_sig_ref: ir::SigRef) -> ir::SigRef { 1229 let offset = self.sig_ref_offset.expect( 1230 "must create inlined `ir::SigRef`s before calling `EntityMap::inlined_sig_ref`", 1231 ); 1232 ir::SigRef::from_u32(offset + callee_sig_ref.as_u32()) 1233 } 1234 1235 fn inlined_func_ref(&self, callee_func_ref: ir::FuncRef) -> ir::FuncRef { 1236 let offset = self.func_ref_offset.expect( 1237 "must create inlined `ir::FuncRef`s before calling `EntityMap::inlined_func_ref`", 1238 ); 1239 ir::FuncRef::from_u32(offset + callee_func_ref.as_u32()) 1240 } 1241 1242 fn inlined_stack_slot(&self, callee_stack_slot: ir::StackSlot) -> ir::StackSlot { 1243 let offset = self.stack_slot_offset.expect( 1244 "must create inlined `ir::StackSlot`s before calling `EntityMap::inlined_stack_slot`", 1245 ); 1246 ir::StackSlot::from_u32(offset + callee_stack_slot.as_u32()) 1247 } 1248 1249 fn inlined_dynamic_type(&self, callee_dynamic_type: ir::DynamicType) -> ir::DynamicType { 1250 let offset = self.dynamic_type_offset.expect( 1251 "must create inlined `ir::DynamicType`s before calling `EntityMap::inlined_dynamic_type`", 1252 ); 1253 ir::DynamicType::from_u32(offset + callee_dynamic_type.as_u32()) 1254 } 1255 1256 fn inlined_dynamic_stack_slot( 1257 &self, 1258 callee_dynamic_stack_slot: ir::DynamicStackSlot, 1259 ) -> ir::DynamicStackSlot { 1260 let offset = self.dynamic_stack_slot_offset.expect( 1261 "must create inlined `ir::DynamicStackSlot`s before calling `EntityMap::inlined_dynamic_stack_slot`", 1262 ); 1263 ir::DynamicStackSlot::from_u32(offset + callee_dynamic_stack_slot.as_u32()) 1264 } 1265 1266 fn inlined_immediate(&self, callee_immediate: ir::Immediate) -> ir::Immediate { 1267 let offset = self.immediate_offset.expect( 1268 "must create inlined `ir::Immediate`s before calling `EntityMap::inlined_immediate`", 1269 ); 1270 ir::Immediate::from_u32(offset + callee_immediate.as_u32()) 1271 } 1272 } 1273 1274 /// Translate all of the callee's various entities into the caller, producing an 1275 /// `EntityMap` that can be used to translate callee entity references into 1276 /// inlined caller entity references. 1277 fn create_entities( 1278 allocs: &mut InliningAllocs, 1279 func: &mut ir::Function, 1280 callee: &ir::Function, 1281 ) -> EntityMap { 1282 let mut entity_map = EntityMap::default(); 1283 1284 entity_map.block_offset = Some(create_blocks(allocs, func, callee)); 1285 entity_map.global_value_offset = Some(create_global_values(func, callee)); 1286 entity_map.sig_ref_offset = Some(create_sig_refs(func, callee)); 1287 create_user_external_name_refs(allocs, func, callee); 1288 entity_map.func_ref_offset = Some(create_func_refs(allocs, func, callee, &entity_map)); 1289 entity_map.stack_slot_offset = Some(create_stack_slots(func, callee)); 1290 entity_map.dynamic_type_offset = Some(create_dynamic_types(func, callee, &entity_map)); 1291 entity_map.dynamic_stack_slot_offset = 1292 Some(create_dynamic_stack_slots(func, callee, &entity_map)); 1293 entity_map.immediate_offset = Some(create_immediates(func, callee)); 1294 1295 // `ir::ConstantData` is deduplicated, so we cannot use our offset scheme 1296 // for `ir::Constant`s. Nonetheless, we still insert them into the caller 1297 // now, at the same time as the rest of our entities. 1298 create_constants(allocs, func, callee); 1299 1300 entity_map 1301 } 1302 1303 /// Create inlined blocks in the caller for every block in the callee. 1304 fn create_blocks( 1305 allocs: &mut InliningAllocs, 1306 func: &mut ir::Function, 1307 callee: &ir::Function, 1308 ) -> u32 { 1309 let offset = func.dfg.blocks.len(); 1310 let offset = u32::try_from(offset).unwrap(); 1311 1312 func.dfg.blocks.reserve(callee.dfg.blocks.len()); 1313 for callee_block in callee.dfg.blocks.iter() { 1314 let caller_block = func.dfg.blocks.add(); 1315 trace!("Callee {callee_block:?} = inlined {caller_block:?}"); 1316 1317 if callee.layout.is_cold(callee_block) { 1318 func.layout.set_cold(caller_block); 1319 } 1320 1321 // Note: the entry block does not need parameters because the only 1322 // predecessor is the call block and we associate the callee's 1323 // parameters with the caller's arguments directly. 1324 if callee.layout.entry_block() != Some(callee_block) { 1325 for callee_param in callee.dfg.blocks[callee_block].params(&callee.dfg.value_lists) { 1326 let ty = callee.dfg.value_type(*callee_param); 1327 let caller_param = func.dfg.append_block_param(caller_block, ty); 1328 1329 allocs.set_inlined_value(callee, *callee_param, caller_param); 1330 } 1331 } 1332 } 1333 1334 offset 1335 } 1336 1337 /// Copy and translate global values from the callee into the caller. 1338 fn create_global_values(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1339 let gv_offset = func.global_values.len(); 1340 let gv_offset = u32::try_from(gv_offset).unwrap(); 1341 1342 func.global_values.reserve(callee.global_values.len()); 1343 for gv in callee.global_values.values() { 1344 func.global_values.push(match gv { 1345 // These kinds of global values reference other global values, so we 1346 // need to fixup that reference. 1347 ir::GlobalValueData::Load { 1348 base, 1349 offset, 1350 global_type, 1351 flags, 1352 } => ir::GlobalValueData::Load { 1353 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset), 1354 offset: *offset, 1355 global_type: *global_type, 1356 flags: *flags, 1357 }, 1358 ir::GlobalValueData::IAddImm { 1359 base, 1360 offset, 1361 global_type, 1362 } => ir::GlobalValueData::IAddImm { 1363 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset), 1364 offset: *offset, 1365 global_type: *global_type, 1366 }, 1367 1368 // These kinds of global values do not reference other global 1369 // values, so we can just clone them. 1370 ir::GlobalValueData::VMContext 1371 | ir::GlobalValueData::Symbol { .. } 1372 | ir::GlobalValueData::DynScaleTargetConst { .. } => gv.clone(), 1373 }); 1374 } 1375 1376 gv_offset 1377 } 1378 1379 /// Copy `ir::SigRef`s from the callee into the caller. 1380 fn create_sig_refs(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1381 let offset = func.dfg.signatures.len(); 1382 let offset = u32::try_from(offset).unwrap(); 1383 1384 func.dfg.signatures.reserve(callee.dfg.signatures.len()); 1385 for sig in callee.dfg.signatures.values() { 1386 func.dfg.signatures.push(sig.clone()); 1387 } 1388 1389 offset 1390 } 1391 1392 fn create_user_external_name_refs( 1393 allocs: &mut InliningAllocs, 1394 func: &mut ir::Function, 1395 callee: &ir::Function, 1396 ) { 1397 for (callee_named_func_ref, name) in callee.params.user_named_funcs().iter() { 1398 let caller_named_func_ref = func.declare_imported_user_function(name.clone()); 1399 allocs.user_external_name_refs[callee_named_func_ref] = Some(caller_named_func_ref).into(); 1400 } 1401 } 1402 1403 /// Translate `ir::FuncRef`s from the callee into the caller. 1404 fn create_func_refs( 1405 allocs: &InliningAllocs, 1406 func: &mut ir::Function, 1407 callee: &ir::Function, 1408 entity_map: &EntityMap, 1409 ) -> u32 { 1410 let offset = func.dfg.ext_funcs.len(); 1411 let offset = u32::try_from(offset).unwrap(); 1412 1413 func.dfg.ext_funcs.reserve(callee.dfg.ext_funcs.len()); 1414 for ir::ExtFuncData { 1415 name, 1416 signature, 1417 colocated, 1418 } in callee.dfg.ext_funcs.values() 1419 { 1420 func.dfg.ext_funcs.push(ir::ExtFuncData { 1421 name: match name { 1422 ir::ExternalName::User(name_ref) => { 1423 ir::ExternalName::User(allocs.user_external_name_refs[*name_ref].expect( 1424 "should have translated all `ir::UserExternalNameRef`s before translating \ 1425 `ir::FuncRef`s", 1426 )) 1427 } 1428 ir::ExternalName::TestCase(_) 1429 | ir::ExternalName::LibCall(_) 1430 | ir::ExternalName::KnownSymbol(_) => name.clone(), 1431 }, 1432 signature: entity_map.inlined_sig_ref(*signature), 1433 colocated: *colocated, 1434 }); 1435 } 1436 1437 offset 1438 } 1439 1440 /// Copy stack slots from the callee into the caller. 1441 fn create_stack_slots(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1442 let offset = func.sized_stack_slots.len(); 1443 let offset = u32::try_from(offset).unwrap(); 1444 1445 func.sized_stack_slots 1446 .reserve(callee.sized_stack_slots.len()); 1447 for slot in callee.sized_stack_slots.values() { 1448 func.sized_stack_slots.push(slot.clone()); 1449 } 1450 1451 offset 1452 } 1453 1454 /// Copy dynamic types from the callee into the caller. 1455 fn create_dynamic_types( 1456 func: &mut ir::Function, 1457 callee: &ir::Function, 1458 entity_map: &EntityMap, 1459 ) -> u32 { 1460 let offset = func.dynamic_stack_slots.len(); 1461 let offset = u32::try_from(offset).unwrap(); 1462 1463 func.dfg 1464 .dynamic_types 1465 .reserve(callee.dfg.dynamic_types.len()); 1466 for ir::DynamicTypeData { 1467 base_vector_ty, 1468 dynamic_scale, 1469 } in callee.dfg.dynamic_types.values() 1470 { 1471 func.dfg.dynamic_types.push(ir::DynamicTypeData { 1472 base_vector_ty: *base_vector_ty, 1473 dynamic_scale: entity_map.inlined_global_value(*dynamic_scale), 1474 }); 1475 } 1476 1477 offset 1478 } 1479 1480 /// Copy dynamic stack slots from the callee into the caller. 1481 fn create_dynamic_stack_slots( 1482 func: &mut ir::Function, 1483 callee: &ir::Function, 1484 entity_map: &EntityMap, 1485 ) -> u32 { 1486 let offset = func.dynamic_stack_slots.len(); 1487 let offset = u32::try_from(offset).unwrap(); 1488 1489 func.dynamic_stack_slots 1490 .reserve(callee.dynamic_stack_slots.len()); 1491 for ir::DynamicStackSlotData { kind, dyn_ty } in callee.dynamic_stack_slots.values() { 1492 func.dynamic_stack_slots.push(ir::DynamicStackSlotData { 1493 kind: *kind, 1494 dyn_ty: entity_map.inlined_dynamic_type(*dyn_ty), 1495 }); 1496 } 1497 1498 offset 1499 } 1500 1501 /// Copy immediates from the callee into the caller. 1502 fn create_immediates(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1503 let offset = func.dfg.immediates.len(); 1504 let offset = u32::try_from(offset).unwrap(); 1505 1506 func.dfg.immediates.reserve(callee.dfg.immediates.len()); 1507 for imm in callee.dfg.immediates.values() { 1508 func.dfg.immediates.push(imm.clone()); 1509 } 1510 1511 offset 1512 } 1513 1514 /// Copy constants from the callee into the caller. 1515 fn create_constants(allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function) { 1516 for (callee_constant, data) in callee.dfg.constants.iter() { 1517 let inlined_constant = func.dfg.constants.insert(data.clone()); 1518 allocs.constants[*callee_constant] = Some(inlined_constant).into(); 1519 } 1520 } 1521