1 //! Function inlining infrastructure. 2 //! 3 //! This module provides "inlining as a library" to Cranelift users; it does 4 //! _not_ provide a complete, off-the-shelf inlining solution. Cranelift's 5 //! compilation context is per-function and does not encompass the full call 6 //! graph. It does not know which functions are hot and which are cold, which 7 //! have been marked the equivalent of `#[inline(never)]`, etc... Only the 8 //! Cranelift user can understand these aspects of the full compilation 9 //! pipeline, and these things can be very different between (say) Wasmtime and 10 //! `cg_clif`. Therefore, this module does not attempt to define hueristics for 11 //! when inlining a particular call is likely beneficial. This module only 12 //! provides hooks for the Cranelift user to define whether a given call should 13 //! be inlined or not, and the mechanics to inline a callee into a particular 14 //! call site when directed to do so by the Cranelift user. 15 //! 16 //! The top-level inlining entry point during Cranelift compilation is 17 //! [`Context::inline`][crate::Context::inline]. It takes an [`Inline`] trait 18 //! implementation, which is authored by the Cranelift user and directs 19 //! Cranelift whether to inline a particular call, and, when inlining, gives 20 //! Cranelift the body of the callee that is to be inlined. 21 22 use crate::cursor::{Cursor as _, FuncCursor}; 23 use crate::ir::{self, ExceptionTableData, ExceptionTableItem, InstBuilder as _}; 24 use crate::result::CodegenResult; 25 use crate::trace; 26 use crate::traversals::Dfs; 27 use alloc::borrow::Cow; 28 use alloc::vec::Vec; 29 use cranelift_entity::{SecondaryMap, packed_option::PackedOption}; 30 use smallvec::SmallVec; 31 32 type SmallValueVec = SmallVec<[ir::Value; 8]>; 33 type SmallBlockArgVec = SmallVec<[ir::BlockArg; 8]>; 34 type SmallBlockCallVec = SmallVec<[ir::BlockCall; 8]>; 35 36 /// A command directing Cranelift whether or not to inline a particular call. 37 pub enum InlineCommand<'a> { 38 /// Keep the call as-is, out-of-line, and do not inline the callee. 39 KeepCall, 40 41 /// Inline the call, using this function as the body of the callee. 42 /// 43 /// It is the `Inline` implementor's responsibility to ensure that this 44 /// function is the correct callee. Providing the wrong function may result 45 /// in panics during compilation or incorrect runtime behavior. 46 Inline { 47 /// The callee function's body. 48 callee: Cow<'a, ir::Function>, 49 /// Whether to visit any function calls within the callee body after 50 /// inlining and consider them for further inlining. 51 visit_callee: bool, 52 }, 53 } 54 55 /// A trait for directing Cranelift whether to inline a particular call or not. 56 /// 57 /// Used in combination with the [`Context::inline`][crate::Context::inline] 58 /// method. 59 pub trait Inline { 60 /// A hook invoked for each direct call instruction in a function, whose 61 /// result determines whether Cranelift should inline a given call. 62 /// 63 /// The Cranelift user is responsible for defining their own hueristics and 64 /// deciding whether inlining the call is beneficial. 65 /// 66 /// When returning a function and directing Cranelift to inline its body 67 /// into the call site, the `Inline` implementer must ensure the following: 68 /// 69 /// * The returned function's signature exactly matches the `callee` 70 /// `FuncRef`'s signature. 71 /// 72 /// * The returned function must be legalized. 73 /// 74 /// * The returned function must be valid (i.e. it must pass the CLIF 75 /// verifier). 76 /// 77 /// * The returned function is a correct and valid implementation of the 78 /// `callee` according to your language's semantics. 79 /// 80 /// Failure to uphold these invariants may result in panics during 81 /// compilation or incorrect runtime behavior in the generated code. 82 fn inline( 83 &mut self, 84 caller: &ir::Function, 85 call_inst: ir::Inst, 86 call_opcode: ir::Opcode, 87 callee: ir::FuncRef, 88 call_args: &[ir::Value], 89 ) -> InlineCommand<'_>; 90 } 91 92 impl<'a, T> Inline for &'a mut T 93 where 94 T: Inline, 95 { 96 fn inline( 97 &mut self, 98 caller: &ir::Function, 99 inst: ir::Inst, 100 opcode: ir::Opcode, 101 callee: ir::FuncRef, 102 args: &[ir::Value], 103 ) -> InlineCommand<'_> { 104 (*self).inline(caller, inst, opcode, callee, args) 105 } 106 } 107 108 /// Walk the given function, invoke the `Inline` implementation for each call 109 /// instruction, and inline the callee when directed to do so. 110 /// 111 /// Returns whether any call was inlined. 112 pub(crate) fn do_inlining( 113 func: &mut ir::Function, 114 mut inliner: impl Inline, 115 ) -> CodegenResult<bool> { 116 trace!("function {} before inlining: {}", func.name, func); 117 118 let mut inlined_any = false; 119 let mut allocs = InliningAllocs::default(); 120 121 let mut cursor = FuncCursor::new(func); 122 'block_loop: while let Some(block) = cursor.next_block() { 123 // Always keep track of our previous cursor position. Assuming that the 124 // current position is a function call that we will inline, then the 125 // previous position is just before the inlined callee function. After 126 // inlining a call, the Cranelift user can decide whether to consider 127 // any function calls in the inlined callee for further inlining or 128 // not. When they do, then we back up to this previous cursor position 129 // so that our traversal will then continue over the inlined body. 130 let mut prev_pos; 131 132 while let Some(inst) = { 133 prev_pos = cursor.position(); 134 cursor.next_inst() 135 } { 136 // Make sure that `block` is always `inst`'s block, even with all of 137 // our cursor-position-updating and block-splitting-during-inlining 138 // shenanigans below. 139 debug_assert_eq!(Some(block), cursor.func.layout.inst_block(inst)); 140 141 match cursor.func.dfg.insts[inst] { 142 ir::InstructionData::Call { 143 opcode: opcode @ ir::Opcode::Call | opcode @ ir::Opcode::ReturnCall, 144 args: _, 145 func_ref, 146 } => { 147 trace!( 148 "considering call site for inlining: {inst}: {}", 149 cursor.func.dfg.display_inst(inst), 150 ); 151 let args = cursor.func.dfg.inst_args(inst); 152 match inliner.inline(&cursor.func, inst, opcode, func_ref, args) { 153 InlineCommand::KeepCall => { 154 trace!(" --> keeping call"); 155 } 156 InlineCommand::Inline { 157 callee, 158 visit_callee, 159 } => { 160 let last_inlined_block = inline_one( 161 &mut allocs, 162 cursor.func, 163 func_ref, 164 block, 165 inst, 166 opcode, 167 &callee, 168 None, 169 ); 170 inlined_any = true; 171 if visit_callee { 172 cursor.set_position(prev_pos); 173 } else { 174 // Arrange it so that the `next_block()` loop 175 // will continue to the next block that is not 176 // associated with the just-inlined callee. 177 cursor.goto_bottom(last_inlined_block); 178 continue 'block_loop; 179 } 180 } 181 } 182 } 183 ir::InstructionData::TryCall { 184 opcode: opcode @ ir::Opcode::TryCall, 185 args: _, 186 func_ref, 187 exception, 188 } => { 189 trace!( 190 "considering call site for inlining: {inst}: {}", 191 cursor.func.dfg.display_inst(inst), 192 ); 193 let args = cursor.func.dfg.inst_args(inst); 194 match inliner.inline(&cursor.func, inst, opcode, func_ref, args) { 195 InlineCommand::KeepCall => { 196 trace!(" --> keeping call"); 197 } 198 InlineCommand::Inline { 199 callee, 200 visit_callee, 201 } => { 202 let last_inlined_block = inline_one( 203 &mut allocs, 204 cursor.func, 205 func_ref, 206 block, 207 inst, 208 opcode, 209 &callee, 210 Some(exception), 211 ); 212 inlined_any = true; 213 if visit_callee { 214 cursor.set_position(prev_pos); 215 } else { 216 // Arrange it so that the `next_block()` loop 217 // will continue to the next block that is not 218 // associated with the just-inlined callee. 219 cursor.goto_bottom(last_inlined_block); 220 continue 'block_loop; 221 } 222 } 223 } 224 } 225 ir::InstructionData::CallIndirect { .. } 226 | ir::InstructionData::TryCallIndirect { .. } => { 227 // Can't inline indirect calls; need to have some earlier 228 // pass rewrite them into direct calls first, when possible. 229 } 230 _ => { 231 debug_assert!( 232 !cursor.func.dfg.insts[inst].opcode().is_call(), 233 "should have matched all call instructions, but found: {inst}: {}", 234 cursor.func.dfg.display_inst(inst), 235 ); 236 } 237 } 238 } 239 } 240 241 if inlined_any { 242 trace!("function {} after inlining: {}", func.name, func); 243 } else { 244 trace!("function {} did not have any callees inlined", func.name); 245 } 246 247 Ok(inlined_any) 248 } 249 250 #[derive(Default)] 251 struct InliningAllocs { 252 /// Map from callee value to inlined caller value. 253 values: SecondaryMap<ir::Value, PackedOption<ir::Value>>, 254 255 /// Map from callee constant to inlined caller constant. 256 /// 257 /// Not in `EntityMap` because these are hash-consed inside the 258 /// `ir::Function`. 259 constants: SecondaryMap<ir::Constant, PackedOption<ir::Constant>>, 260 261 /// Map from callee to inlined caller external name refs. 262 /// 263 /// Not in `EntityMap` because these are hash-consed inside the 264 /// `ir::Function`. 265 user_external_name_refs: 266 SecondaryMap<ir::UserExternalNameRef, PackedOption<ir::UserExternalNameRef>>, 267 268 /// The set of _caller_ inlined call instructions that need exception table 269 /// fixups at the end of inlining. 270 /// 271 /// This includes all kinds of non-returning calls, not just the literal 272 /// `call` instruction: `call_indirect`, `try_call`, `try_call_indirect`, 273 /// etc... However, it does not include `return_call` and 274 /// `return_call_indirect` instructions because the caller cannot catch 275 /// exceptions that those calls throw because the caller is no longer on the 276 /// stack as soon as they are executed. 277 /// 278 /// Note: this is a simple `Vec`, and not an `EntitySet`, because it is very 279 /// sparse: most of the caller's instructions are not inlined call 280 /// instructions. Additionally, we require deterministic iteration order and 281 /// do not require set-membership testing, so a hash set is not a good 282 /// choice either. 283 calls_needing_exception_table_fixup: Vec<ir::Inst>, 284 } 285 286 impl InliningAllocs { 287 fn reset(&mut self, callee: &ir::Function) { 288 let InliningAllocs { 289 values, 290 constants, 291 user_external_name_refs, 292 calls_needing_exception_table_fixup, 293 } = self; 294 295 values.clear(); 296 values.resize(callee.dfg.len_values()); 297 298 constants.clear(); 299 constants.resize(callee.dfg.constants.len()); 300 301 user_external_name_refs.clear(); 302 user_external_name_refs.resize(callee.params.user_named_funcs().len()); 303 304 // Note: We do not reserve capacity for 305 // `calls_needing_exception_table_fixup` because it is a sparse set and 306 // we don't know how large it needs to be ahead of time. 307 calls_needing_exception_table_fixup.clear(); 308 } 309 310 fn set_inlined_value( 311 &mut self, 312 callee: &ir::Function, 313 callee_val: ir::Value, 314 inlined_val: ir::Value, 315 ) { 316 trace!(" --> callee {callee_val:?} = inlined {inlined_val:?}"); 317 debug_assert!(self.values[callee_val].is_none()); 318 let resolved_callee_val = callee.dfg.resolve_aliases(callee_val); 319 debug_assert!(self.values[resolved_callee_val].is_none()); 320 self.values[resolved_callee_val] = Some(inlined_val).into(); 321 } 322 323 fn get_inlined_value(&self, callee: &ir::Function, callee_val: ir::Value) -> Option<ir::Value> { 324 let resolved_callee_val = callee.dfg.resolve_aliases(callee_val); 325 self.values[resolved_callee_val].expand() 326 } 327 } 328 329 /// Inline one particular function call. 330 /// 331 /// Returns the last inlined block in the layout. 332 fn inline_one( 333 allocs: &mut InliningAllocs, 334 func: &mut ir::Function, 335 callee_func_ref: ir::FuncRef, 336 call_block: ir::Block, 337 call_inst: ir::Inst, 338 call_opcode: ir::Opcode, 339 callee: &ir::Function, 340 call_exception_table: Option<ir::ExceptionTable>, 341 ) -> ir::Block { 342 trace!( 343 "Inlining call {call_inst:?}: {}\n\ 344 with callee = {callee:?}", 345 func.dfg.display_inst(call_inst) 346 ); 347 348 // Type check callee signature. 349 let expected_callee_sig = func.dfg.ext_funcs[callee_func_ref].signature; 350 let expected_callee_sig = &func.dfg.signatures[expected_callee_sig]; 351 assert_eq!(expected_callee_sig, &callee.signature); 352 353 allocs.reset(callee); 354 355 // First, append various callee entity arenas to the end of the caller's 356 // entity arenas. 357 let entity_map = create_entities(allocs, func, callee); 358 359 // Inlined prologue: split the call instruction's block at the point of the 360 // call and replace the call with a jump. 361 let return_block = split_off_return_block(func, call_inst, call_opcode, callee); 362 let call_stack_map = replace_call_with_jump(allocs, func, call_inst, callee, &entity_map); 363 364 // Prepare for translating the actual instructions by inserting the inlined 365 // blocks into the caller's layout in the same order that they appear in the 366 // callee. 367 let mut last_inlined_block = inline_block_layout(func, call_block, callee, &entity_map); 368 369 // Translate each instruction from the callee into the caller, 370 // appending them to their associated block in the caller. 371 // 372 // Note that we iterate over the callee with a pre-order traversal so that 373 // we see value defs before uses. 374 for callee_block in Dfs::new().pre_order_iter(callee) { 375 let inlined_block = entity_map.inlined_block(callee_block); 376 trace!( 377 "Processing instructions in callee block {callee_block:?} (inlined block {inlined_block:?}" 378 ); 379 380 let mut next_callee_inst = callee.layout.first_inst(callee_block); 381 while let Some(callee_inst) = next_callee_inst { 382 trace!( 383 "Processing callee instruction {callee_inst:?}: {}", 384 callee.dfg.display_inst(callee_inst) 385 ); 386 387 assert_ne!( 388 callee.dfg.insts[callee_inst].opcode(), 389 ir::Opcode::GlobalValue, 390 "callee must already be legalized, we shouldn't see any `global_value` \ 391 instructions when inlining; found {callee_inst:?}: {}", 392 callee.dfg.display_inst(callee_inst) 393 ); 394 395 // Remap the callee instruction's entities and insert it into the 396 // caller's DFG. 397 let inlined_inst_data = callee.dfg.insts[callee_inst].map(InliningInstRemapper { 398 allocs: &allocs, 399 func, 400 callee, 401 entity_map: &entity_map, 402 }); 403 let inlined_inst = func.dfg.make_inst(inlined_inst_data); 404 func.layout.append_inst(inlined_inst, inlined_block); 405 406 let opcode = callee.dfg.insts[callee_inst].opcode(); 407 if opcode.is_return() { 408 // Instructions that return do not define any values, so we 409 // don't need to worry about that, but we do need to fix them up 410 // so that they return by jumping to our control-flow join 411 // block, rather than returning from the caller. 412 if let Some(return_block) = return_block { 413 fixup_inst_that_returns( 414 allocs, 415 func, 416 callee, 417 &entity_map, 418 call_opcode, 419 inlined_inst, 420 callee_inst, 421 return_block, 422 call_stack_map.as_ref().map(|es| &**es), 423 ); 424 } else { 425 // If we are inlining a callee that was invoked via 426 // `return_call`, we leave inlined return instructions 427 // as-is: there is no logical caller frame on the stack to 428 // continue to. 429 debug_assert_eq!(call_opcode, ir::Opcode::ReturnCall); 430 } 431 } else { 432 // Make the instruction's result values. 433 let ctrl_typevar = callee.dfg.ctrl_typevar(callee_inst); 434 func.dfg.make_inst_results(inlined_inst, ctrl_typevar); 435 436 // Update the value map for this instruction's defs. 437 let callee_results = callee.dfg.inst_results(callee_inst); 438 let inlined_results = func.dfg.inst_results(inlined_inst); 439 debug_assert_eq!(callee_results.len(), inlined_results.len()); 440 for (callee_val, inlined_val) in callee_results.iter().zip(inlined_results) { 441 allocs.set_inlined_value(callee, *callee_val, *inlined_val); 442 } 443 444 if opcode.is_call() { 445 append_stack_map_entries( 446 func, 447 callee, 448 &entity_map, 449 call_stack_map.as_deref(), 450 inlined_inst, 451 callee_inst, 452 ); 453 454 // When we are inlining a `try_call` call site, we need to merge 455 // the call site's exception table into the inlined calls' 456 // exception tables. This can involve rewriting regular `call`s 457 // into `try_call`s, which requires mutating the CFG because 458 // `try_call` is a block terminator. However, we can't mutate 459 // the CFG in the middle of this traversal because we rely on 460 // the existence of a one-to-one mapping between the callee 461 // layout and the inlined layout. Instead, we record the set of 462 // inlined call instructions that will need fixing up, and 463 // perform that possibly-CFG-mutating exception table merging in 464 // a follow up pass, when we no longer rely on that one-to-one 465 // layout mapping. 466 debug_assert_eq!( 467 call_opcode == ir::Opcode::TryCall, 468 call_exception_table.is_some() 469 ); 470 if call_opcode == ir::Opcode::TryCall { 471 allocs 472 .calls_needing_exception_table_fixup 473 .push(inlined_inst); 474 } 475 } 476 } 477 478 trace!( 479 " --> inserted inlined instruction {inlined_inst:?}: {}", 480 func.dfg.display_inst(inlined_inst) 481 ); 482 483 next_callee_inst = callee.layout.next_inst(callee_inst); 484 } 485 } 486 487 // We copied *all* callee blocks into the caller's layout, but only copied 488 // the callee instructions in *reachable* callee blocks into the caller's 489 // associated blocks. Therefore, any *unreachable* blocks are empty in the 490 // caller, which is invalid CLIF because all blocks must end in a 491 // terminator, so do a quick pass over the inlined blocks and remove any 492 // empty blocks from the caller's layout. 493 for block in entity_map.iter_inlined_blocks(func) { 494 if func.layout.is_block_inserted(block) && func.layout.first_inst(block).is_none() { 495 log::trace!("removing unreachable inlined block from layout: {block}"); 496 497 // If the block being removed is our last-inlined block, then back 498 // it up to the previous block in the layout, which will be the new 499 // last-inlined block after this one's removal. 500 if block == last_inlined_block { 501 last_inlined_block = func.layout.prev_block(last_inlined_block).expect( 502 "there will always at least be the block that contained the call we are \ 503 inlining", 504 ); 505 } 506 507 func.layout.remove_block(block); 508 } 509 } 510 511 // Final step: fixup the exception tables of any inlined calls when we are 512 // inlining a `try_call` site. 513 // 514 // Subtly, this requires rewriting non-catching `call[_indirect]` 515 // instructions into `try_call[_indirect]` instructions so that exceptions 516 // that unwound through the original callee frame and were caught by the 517 // caller's `try_call` do not unwind past this inlined frame. And turning a 518 // `call` into a `try_call` mutates the CFG, breaking our one-to-one mapping 519 // between callee blocks and inlined blocks, so we delay these fixups to 520 // this final step, when we no longer rely on that mapping. 521 debug_assert!( 522 allocs.calls_needing_exception_table_fixup.is_empty() || call_exception_table.is_some() 523 ); 524 debug_assert_eq!( 525 call_opcode == ir::Opcode::TryCall, 526 call_exception_table.is_some() 527 ); 528 if let Some(call_exception_table) = call_exception_table { 529 fixup_inlined_call_exception_tables(allocs, func, call_exception_table); 530 } 531 532 debug_assert!( 533 func.layout.is_block_inserted(last_inlined_block), 534 "last_inlined_block={last_inlined_block} should be inserted in the layout" 535 ); 536 last_inlined_block 537 } 538 539 /// Append stack map entries from the caller and callee to the given inlined 540 /// instruction. 541 fn append_stack_map_entries( 542 func: &mut ir::Function, 543 callee: &ir::Function, 544 entity_map: &EntityMap, 545 call_stack_map: Option<&[ir::UserStackMapEntry]>, 546 inlined_inst: ir::Inst, 547 callee_inst: ir::Inst, 548 ) { 549 // Add the caller's stack map to this call. These entries 550 // already refer to caller entities and do not need further 551 // translation. 552 func.dfg.append_user_stack_map_entries( 553 inlined_inst, 554 call_stack_map 555 .iter() 556 .flat_map(|entries| entries.iter().cloned()), 557 ); 558 559 // Append the callee's stack map to this call. These entries 560 // refer to callee entities and therefore do require 561 // translation into the caller's index space. 562 func.dfg.append_user_stack_map_entries( 563 inlined_inst, 564 callee 565 .dfg 566 .user_stack_map_entries(callee_inst) 567 .iter() 568 .flat_map(|entries| entries.iter()) 569 .map(|entry| ir::UserStackMapEntry { 570 ty: entry.ty, 571 slot: entity_map.inlined_stack_slot(entry.slot), 572 offset: entry.offset, 573 }), 574 ); 575 } 576 577 /// Create or update the exception tables for any inlined call instructions: 578 /// when inlining at a `try_call` site, we must forward our exceptional edges 579 /// into each inlined call instruction. 580 fn fixup_inlined_call_exception_tables( 581 allocs: &mut InliningAllocs, 582 func: &mut ir::Function, 583 call_exception_table: ir::ExceptionTable, 584 ) { 585 // Split a block at a `call[_indirect]` instruction, detach the 586 // instruction's results, and alias them to the new block's parameters. 587 let split_block_for_new_try_call = |func: &mut ir::Function, inst: ir::Inst| -> ir::Block { 588 debug_assert!(func.dfg.insts[inst].opcode().is_call()); 589 debug_assert!(!func.dfg.insts[inst].opcode().is_terminator()); 590 591 // Split the block. 592 let next_inst = func 593 .layout 594 .next_inst(inst) 595 .expect("inst is not a terminator, should have a successor"); 596 let new_block = func.dfg.blocks.add(); 597 func.layout.split_block(new_block, next_inst); 598 599 // `try_call[_indirect]` instructions do not define values themselves; 600 // the normal-return block has parameters for the results. So remove 601 // this instruction's results, create an associated block parameter for 602 // each of them, and alias them to the new block parameter. 603 let old_results = SmallValueVec::from_iter(func.dfg.inst_results(inst).iter().copied()); 604 func.dfg.detach_inst_results(inst); 605 for old_result in old_results { 606 let ty = func.dfg.value_type(old_result); 607 let new_block_param = func.dfg.append_block_param(new_block, ty); 608 func.dfg.change_to_alias(old_result, new_block_param); 609 } 610 611 new_block 612 }; 613 614 // Clone the caller's exception table, updating it for use in the current 615 // `call[_indirect]` instruction as it becomes a `try_call[_indirect]`. 616 let clone_exception_table_for_this_call = |func: &mut ir::Function, 617 signature: ir::SigRef, 618 new_block: ir::Block| 619 -> ir::ExceptionTable { 620 let mut exception = func.stencil.dfg.exception_tables[call_exception_table] 621 .deep_clone(&mut func.stencil.dfg.value_lists); 622 623 *exception.signature_mut() = signature; 624 625 let returns_len = func.dfg.signatures[signature].returns.len(); 626 let returns_len = u32::try_from(returns_len).unwrap(); 627 628 *exception.normal_return_mut() = ir::BlockCall::new( 629 new_block, 630 (0..returns_len).map(|i| ir::BlockArg::TryCallRet(i)), 631 &mut func.dfg.value_lists, 632 ); 633 634 func.dfg.exception_tables.push(exception) 635 }; 636 637 for inst in allocs.calls_needing_exception_table_fixup.drain(..) { 638 debug_assert!(func.dfg.insts[inst].opcode().is_call()); 639 debug_assert!(!func.dfg.insts[inst].opcode().is_return()); 640 match func.dfg.insts[inst] { 641 // current_block: 642 // preds... 643 // rets... = call f(args...) 644 // succs... 645 // 646 // becomes 647 // 648 // current_block: 649 // preds... 650 // try_call f(args...), new_block(rets...), [call_exception_table...] 651 // new_block(rets...): 652 // succs... 653 ir::InstructionData::Call { 654 opcode: ir::Opcode::Call, 655 args, 656 func_ref, 657 } => { 658 let new_block = split_block_for_new_try_call(func, inst); 659 let signature = func.dfg.ext_funcs[func_ref].signature; 660 let exception = clone_exception_table_for_this_call(func, signature, new_block); 661 func.dfg.insts[inst] = ir::InstructionData::TryCall { 662 opcode: ir::Opcode::TryCall, 663 args, 664 func_ref, 665 exception, 666 }; 667 } 668 669 // current_block: 670 // preds... 671 // rets... = call_indirect sig, val(args...) 672 // succs... 673 // 674 // becomes 675 // 676 // current_block: 677 // preds... 678 // try_call_indirect sig, val(args...), new_block(rets...), [call_exception_table...] 679 // new_block(rets...): 680 // succs... 681 ir::InstructionData::CallIndirect { 682 opcode: ir::Opcode::CallIndirect, 683 args, 684 sig_ref, 685 } => { 686 let new_block = split_block_for_new_try_call(func, inst); 687 let exception = clone_exception_table_for_this_call(func, sig_ref, new_block); 688 func.dfg.insts[inst] = ir::InstructionData::TryCallIndirect { 689 opcode: ir::Opcode::TryCallIndirect, 690 args, 691 exception, 692 }; 693 } 694 695 // For `try_call[_indirect]` instructions, we just need to merge the 696 // exception tables. 697 ir::InstructionData::TryCall { 698 opcode: ir::Opcode::TryCall, 699 exception, 700 .. 701 } 702 | ir::InstructionData::TryCallIndirect { 703 opcode: ir::Opcode::TryCallIndirect, 704 exception, 705 .. 706 } => { 707 // Construct a new exception table that consists of 708 // the inlined instruction's exception table match 709 // sequence, with the inlining site's exception table 710 // appended. This will ensure that the first-match 711 // semantics emulates the original behavior of 712 // matching in the inner frame first. 713 let sig = func.dfg.exception_tables[exception].signature(); 714 let normal_return = *func.dfg.exception_tables[exception].normal_return(); 715 let exception_data = ExceptionTableData::new( 716 sig, 717 normal_return, 718 func.dfg.exception_tables[exception] 719 .items() 720 .chain(func.dfg.exception_tables[call_exception_table].items()), 721 ) 722 .deep_clone(&mut func.dfg.value_lists); 723 724 func.dfg.exception_tables[exception] = exception_data; 725 } 726 727 otherwise => unreachable!("unknown non-return call instruction: {otherwise:?}"), 728 } 729 } 730 } 731 732 /// After having created an inlined version of a callee instruction that returns 733 /// in the caller, we need to fix it up so that it doesn't actually return 734 /// (since we are already in the caller's frame) and instead just jumps to the 735 /// control-flow join point. 736 fn fixup_inst_that_returns( 737 allocs: &mut InliningAllocs, 738 func: &mut ir::Function, 739 callee: &ir::Function, 740 entity_map: &EntityMap, 741 call_opcode: ir::Opcode, 742 inlined_inst: ir::Inst, 743 callee_inst: ir::Inst, 744 return_block: ir::Block, 745 call_stack_map: Option<&[ir::UserStackMapEntry]>, 746 ) { 747 debug_assert!(func.dfg.insts[inlined_inst].opcode().is_return()); 748 match func.dfg.insts[inlined_inst] { 749 // return rets... 750 // 751 // becomes 752 // 753 // jump return_block(rets...) 754 ir::InstructionData::MultiAry { 755 opcode: ir::Opcode::Return, 756 args, 757 } => { 758 let rets = SmallBlockArgVec::from_iter( 759 args.as_slice(&func.dfg.value_lists) 760 .iter() 761 .copied() 762 .map(|v| v.into()), 763 ); 764 func.dfg.replace(inlined_inst).jump(return_block, &rets); 765 } 766 767 // return_call f(args...) 768 // 769 // becomes 770 // 771 // rets... = call f(args...) 772 // jump return_block(rets...) 773 ir::InstructionData::Call { 774 opcode: ir::Opcode::ReturnCall, 775 args, 776 func_ref, 777 } => { 778 func.dfg.insts[inlined_inst] = ir::InstructionData::Call { 779 opcode: ir::Opcode::Call, 780 args, 781 func_ref, 782 }; 783 func.dfg.make_inst_results(inlined_inst, ir::types::INVALID); 784 785 append_stack_map_entries( 786 func, 787 callee, 788 &entity_map, 789 call_stack_map, 790 inlined_inst, 791 callee_inst, 792 ); 793 794 let rets = SmallBlockArgVec::from_iter( 795 func.dfg 796 .inst_results(inlined_inst) 797 .iter() 798 .copied() 799 .map(|v| v.into()), 800 ); 801 let mut cursor = FuncCursor::new(func); 802 cursor.goto_after_inst(inlined_inst); 803 cursor.ins().jump(return_block, &rets); 804 805 if call_opcode == ir::Opcode::TryCall { 806 allocs 807 .calls_needing_exception_table_fixup 808 .push(inlined_inst); 809 } 810 } 811 812 // return_call_indirect val(args...) 813 // 814 // becomes 815 // 816 // rets... = call_indirect val(args...) 817 // jump return_block(rets...) 818 ir::InstructionData::CallIndirect { 819 opcode: ir::Opcode::ReturnCallIndirect, 820 args, 821 sig_ref, 822 } => { 823 func.dfg.insts[inlined_inst] = ir::InstructionData::CallIndirect { 824 opcode: ir::Opcode::CallIndirect, 825 args, 826 sig_ref, 827 }; 828 func.dfg.make_inst_results(inlined_inst, ir::types::INVALID); 829 830 append_stack_map_entries( 831 func, 832 callee, 833 &entity_map, 834 call_stack_map, 835 inlined_inst, 836 callee_inst, 837 ); 838 839 let rets = SmallBlockArgVec::from_iter( 840 func.dfg 841 .inst_results(inlined_inst) 842 .iter() 843 .copied() 844 .map(|v| v.into()), 845 ); 846 let mut cursor = FuncCursor::new(func); 847 cursor.goto_after_inst(inlined_inst); 848 cursor.ins().jump(return_block, &rets); 849 850 if call_opcode == ir::Opcode::TryCall { 851 allocs 852 .calls_needing_exception_table_fixup 853 .push(inlined_inst); 854 } 855 } 856 857 inst_data => unreachable!( 858 "should have handled all `is_return() == true` instructions above; \ 859 got {inst_data:?}" 860 ), 861 } 862 } 863 864 /// An `InstructionMapper` implementation that remaps a callee instruction's 865 /// entity references to their new indices in the caller function. 866 struct InliningInstRemapper<'a> { 867 allocs: &'a InliningAllocs, 868 func: &'a mut ir::Function, 869 callee: &'a ir::Function, 870 entity_map: &'a EntityMap, 871 } 872 873 impl<'a> ir::instructions::InstructionMapper for InliningInstRemapper<'a> { 874 fn map_value(&mut self, value: ir::Value) -> ir::Value { 875 self.allocs.get_inlined_value(self.callee, value).expect( 876 "defs come before uses; we should have already inlined all values \ 877 used by an instruction", 878 ) 879 } 880 881 fn map_value_list(&mut self, value_list: ir::ValueList) -> ir::ValueList { 882 let mut inlined_list = ir::ValueList::new(); 883 for callee_val in value_list.as_slice(&self.callee.dfg.value_lists) { 884 let inlined_val = self.map_value(*callee_val); 885 inlined_list.push(inlined_val, &mut self.func.dfg.value_lists); 886 } 887 inlined_list 888 } 889 890 fn map_global_value(&mut self, global_value: ir::GlobalValue) -> ir::GlobalValue { 891 self.entity_map.inlined_global_value(global_value) 892 } 893 894 fn map_jump_table(&mut self, jump_table: ir::JumpTable) -> ir::JumpTable { 895 let inlined_default = 896 self.map_block_call(self.callee.dfg.jump_tables[jump_table].default_block()); 897 let inlined_table = self.callee.dfg.jump_tables[jump_table] 898 .as_slice() 899 .iter() 900 .map(|callee_block_call| self.map_block_call(*callee_block_call)) 901 .collect::<SmallBlockCallVec>(); 902 self.func 903 .dfg 904 .jump_tables 905 .push(ir::JumpTableData::new(inlined_default, &inlined_table)) 906 } 907 908 fn map_exception_table(&mut self, exception_table: ir::ExceptionTable) -> ir::ExceptionTable { 909 let exception_table = &self.callee.dfg.exception_tables[exception_table]; 910 let inlined_sig_ref = self.map_sig_ref(exception_table.signature()); 911 let inlined_normal_return = self.map_block_call(*exception_table.normal_return()); 912 let inlined_table = exception_table 913 .items() 914 .map(|item| match item { 915 ExceptionTableItem::Tag(tag, block_call) => { 916 ExceptionTableItem::Tag(tag, self.map_block_call(block_call)) 917 } 918 ExceptionTableItem::Default(block_call) => { 919 ExceptionTableItem::Default(self.map_block_call(block_call)) 920 } 921 ExceptionTableItem::Context(value) => { 922 ExceptionTableItem::Context(self.map_value(value)) 923 } 924 }) 925 .collect::<SmallVec<[_; 8]>>(); 926 self.func 927 .dfg 928 .exception_tables 929 .push(ir::ExceptionTableData::new( 930 inlined_sig_ref, 931 inlined_normal_return, 932 inlined_table, 933 )) 934 } 935 936 fn map_block_call(&mut self, block_call: ir::BlockCall) -> ir::BlockCall { 937 let callee_block = block_call.block(&self.callee.dfg.value_lists); 938 let inlined_block = self.entity_map.inlined_block(callee_block); 939 let args = block_call 940 .args(&self.callee.dfg.value_lists) 941 .map(|arg| match arg { 942 ir::BlockArg::Value(value) => self.map_value(value).into(), 943 ir::BlockArg::TryCallRet(_) | ir::BlockArg::TryCallExn(_) => arg, 944 }) 945 .collect::<SmallBlockArgVec>(); 946 ir::BlockCall::new(inlined_block, args, &mut self.func.dfg.value_lists) 947 } 948 949 fn map_func_ref(&mut self, func_ref: ir::FuncRef) -> ir::FuncRef { 950 self.entity_map.inlined_func_ref(func_ref) 951 } 952 953 fn map_sig_ref(&mut self, sig_ref: ir::SigRef) -> ir::SigRef { 954 self.entity_map.inlined_sig_ref(sig_ref) 955 } 956 957 fn map_stack_slot(&mut self, stack_slot: ir::StackSlot) -> ir::StackSlot { 958 self.entity_map.inlined_stack_slot(stack_slot) 959 } 960 961 fn map_dynamic_stack_slot( 962 &mut self, 963 dynamic_stack_slot: ir::DynamicStackSlot, 964 ) -> ir::DynamicStackSlot { 965 self.entity_map 966 .inlined_dynamic_stack_slot(dynamic_stack_slot) 967 } 968 969 fn map_constant(&mut self, constant: ir::Constant) -> ir::Constant { 970 self.allocs 971 .constants 972 .get(constant) 973 .and_then(|o| o.expand()) 974 .expect("should have inlined all callee constants") 975 } 976 977 fn map_immediate(&mut self, immediate: ir::Immediate) -> ir::Immediate { 978 self.entity_map.inlined_immediate(immediate) 979 } 980 } 981 982 /// Inline the callee's layout into the caller's layout. 983 /// 984 /// Returns the last inlined block in the layout. 985 fn inline_block_layout( 986 func: &mut ir::Function, 987 call_block: ir::Block, 988 callee: &ir::Function, 989 entity_map: &EntityMap, 990 ) -> ir::Block { 991 debug_assert!(func.layout.is_block_inserted(call_block)); 992 993 // Iterate over callee blocks in layout order, inserting their associated 994 // inlined block into the caller's layout. 995 let mut prev_inlined_block = call_block; 996 let mut next_callee_block = callee.layout.entry_block(); 997 while let Some(callee_block) = next_callee_block { 998 debug_assert!(func.layout.is_block_inserted(prev_inlined_block)); 999 1000 let inlined_block = entity_map.inlined_block(callee_block); 1001 func.layout 1002 .insert_block_after(inlined_block, prev_inlined_block); 1003 1004 prev_inlined_block = inlined_block; 1005 next_callee_block = callee.layout.next_block(callee_block); 1006 } 1007 1008 debug_assert!(func.layout.is_block_inserted(prev_inlined_block)); 1009 prev_inlined_block 1010 } 1011 1012 /// Split the call instruction's block just after the call instruction to create 1013 /// the point where control-flow joins after the inlined callee "returns". 1014 /// 1015 /// Note that tail calls do not return to the caller and therefore do not have a 1016 /// control-flow join point. 1017 fn split_off_return_block( 1018 func: &mut ir::Function, 1019 call_inst: ir::Inst, 1020 opcode: ir::Opcode, 1021 callee: &ir::Function, 1022 ) -> Option<ir::Block> { 1023 // When the `call_inst` is not a block terminator, we need to split the 1024 // block. 1025 let return_block = func.layout.next_inst(call_inst).map(|next_inst| { 1026 let return_block = func.dfg.blocks.add(); 1027 func.layout.split_block(return_block, next_inst); 1028 1029 // Add block parameters for each return value and alias the call 1030 // instruction's results to them. 1031 let old_results = 1032 SmallValueVec::from_iter(func.dfg.inst_results(call_inst).iter().copied()); 1033 debug_assert_eq!(old_results.len(), callee.signature.returns.len()); 1034 func.dfg.detach_inst_results(call_inst); 1035 for (abi, old_val) in callee.signature.returns.iter().zip(old_results) { 1036 debug_assert_eq!(abi.value_type, func.dfg.value_type(old_val)); 1037 let ret_param = func.dfg.append_block_param(return_block, abi.value_type); 1038 func.dfg.change_to_alias(old_val, ret_param); 1039 } 1040 1041 return_block 1042 }); 1043 1044 // When the `call_inst` is a block terminator, then it is either a 1045 // `return_call` or a `try_call`: 1046 // 1047 // * For `return_call`s, we don't have a control-flow join point, because 1048 // the caller permanently transfers control to the callee. 1049 // 1050 // * For `try_call`s, we probably already have a block for the control-flow 1051 // join point, but it isn't guaranteed: the `try_call` might ignore the 1052 // call's returns and not forward them to the normal-return block or it 1053 // might also pass additional arguments. We can only reuse the existing 1054 // normal-return block when the `try_call` forwards exactly our callee's 1055 // returns to that block (and therefore that block's parameter types also 1056 // exactly match the callee's return types). Otherwise, we must create a new 1057 // return block that forwards to the existing normal-return 1058 // block. (Elsewhere, at the end of inlining, we will also update any inlined 1059 // calls to forward any raised exceptions to the caller's exception table, 1060 // as necessary.) 1061 // 1062 // Finally, note that reusing the normal-return's target block is just an 1063 // optimization to emit a simpler CFG when we can, and is not 1064 // fundamentally required for correctness. We could always insert a 1065 // temporary block as our control-flow join point that then forwards to 1066 // the normal-return's target block. However, at the time of writing, 1067 // Cranelift doesn't currently do any jump-threading or branch 1068 // simplification in the mid-end, and removing unnecessary blocks in this 1069 // way can help some subsequent mid-end optimizations. If, in the future, 1070 // we gain support for jump-threading optimizations in the mid-end, we can 1071 // come back and simplify the below code a bit to always generate the 1072 // temporary block, and then rely on the subsequent optimizations to clean 1073 // everything up. 1074 debug_assert_eq!( 1075 return_block.is_none(), 1076 opcode == ir::Opcode::ReturnCall || opcode == ir::Opcode::TryCall, 1077 ); 1078 return_block.or_else(|| match func.dfg.insts[call_inst] { 1079 ir::InstructionData::TryCall { 1080 opcode: ir::Opcode::TryCall, 1081 args: _, 1082 func_ref: _, 1083 exception, 1084 } => { 1085 let normal_return = func.dfg.exception_tables[exception].normal_return(); 1086 let normal_return_block = normal_return.block(&func.dfg.value_lists); 1087 1088 // Check to see if we can reuse the existing normal-return block. 1089 { 1090 let normal_return_args = normal_return.args(&func.dfg.value_lists); 1091 if normal_return_args.len() == callee.signature.returns.len() 1092 && normal_return_args.enumerate().all(|(i, arg)| { 1093 let i = u32::try_from(i).unwrap(); 1094 arg == ir::BlockArg::TryCallRet(i) 1095 }) 1096 { 1097 return Some(normal_return_block); 1098 } 1099 } 1100 1101 // Okay, we cannot reuse the normal-return block. Create a new block 1102 // that has the expected block parameter types and have it jump to 1103 // the normal-return block. 1104 let return_block = func.dfg.blocks.add(); 1105 func.layout.insert_block(return_block, normal_return_block); 1106 1107 let return_block_params = callee 1108 .signature 1109 .returns 1110 .iter() 1111 .map(|abi| func.dfg.append_block_param(return_block, abi.value_type)) 1112 .collect::<SmallValueVec>(); 1113 1114 let normal_return_args = func.dfg.exception_tables[exception] 1115 .normal_return() 1116 .args(&func.dfg.value_lists) 1117 .collect::<SmallBlockArgVec>(); 1118 let jump_args = normal_return_args 1119 .into_iter() 1120 .map(|arg| match arg { 1121 ir::BlockArg::Value(value) => ir::BlockArg::Value(value), 1122 ir::BlockArg::TryCallRet(i) => { 1123 let i = usize::try_from(i).unwrap(); 1124 ir::BlockArg::Value(return_block_params[i]) 1125 } 1126 ir::BlockArg::TryCallExn(_) => { 1127 unreachable!("normal-return edges cannot use exceptional results") 1128 } 1129 }) 1130 .collect::<SmallBlockArgVec>(); 1131 1132 let mut cursor = FuncCursor::new(func); 1133 cursor.goto_first_insertion_point(return_block); 1134 cursor.ins().jump(normal_return_block, &jump_args); 1135 1136 Some(return_block) 1137 } 1138 _ => None, 1139 }) 1140 } 1141 1142 /// Replace the caller's call instruction with a jump to the caller's inlined 1143 /// copy of the callee's entry block. 1144 /// 1145 /// Also associates the callee's parameters with the caller's arguments in our 1146 /// value map. 1147 /// 1148 /// Returns the caller's stack map entries, if any. 1149 fn replace_call_with_jump( 1150 allocs: &mut InliningAllocs, 1151 func: &mut ir::Function, 1152 call_inst: ir::Inst, 1153 callee: &ir::Function, 1154 entity_map: &EntityMap, 1155 ) -> Option<ir::UserStackMapEntryVec> { 1156 trace!("Replacing `call` with `jump`"); 1157 trace!( 1158 " --> call instruction: {call_inst:?}: {}", 1159 func.dfg.display_inst(call_inst) 1160 ); 1161 1162 let callee_entry_block = callee 1163 .layout 1164 .entry_block() 1165 .expect("callee function should have an entry block"); 1166 let callee_param_values = callee.dfg.block_params(callee_entry_block); 1167 let caller_arg_values = SmallValueVec::from_iter(func.dfg.inst_args(call_inst).iter().copied()); 1168 debug_assert_eq!(callee_param_values.len(), caller_arg_values.len()); 1169 debug_assert_eq!(callee_param_values.len(), callee.signature.params.len()); 1170 for (abi, (callee_param_value, caller_arg_value)) in callee 1171 .signature 1172 .params 1173 .iter() 1174 .zip(callee_param_values.into_iter().zip(caller_arg_values)) 1175 { 1176 debug_assert_eq!(abi.value_type, callee.dfg.value_type(*callee_param_value)); 1177 debug_assert_eq!(abi.value_type, func.dfg.value_type(caller_arg_value)); 1178 allocs.set_inlined_value(callee, *callee_param_value, caller_arg_value); 1179 } 1180 1181 // Replace the caller's call instruction with a jump to the caller's inlined 1182 // copy of the callee's entry block. 1183 // 1184 // Note that the call block dominates the inlined entry block (and also all 1185 // other inlined blocks) so we can reference the arguments directly, and do 1186 // not need to add block parameters to the inlined entry block. 1187 let inlined_entry_block = entity_map.inlined_block(callee_entry_block); 1188 func.dfg.replace(call_inst).jump(inlined_entry_block, &[]); 1189 trace!( 1190 " --> replaced with jump instruction: {call_inst:?}: {}", 1191 func.dfg.display_inst(call_inst) 1192 ); 1193 1194 let stack_map_entries = func.dfg.take_user_stack_map_entries(call_inst); 1195 stack_map_entries 1196 } 1197 1198 /// Keeps track of mapping callee entities to their associated inlined caller 1199 /// entities. 1200 #[derive(Default)] 1201 struct EntityMap { 1202 // Rather than doing an implicit, demand-based, DCE'ing translation of 1203 // entities, which would require maps from each callee entity to its 1204 // associated caller entity, we copy all entities into the caller, remember 1205 // each entity's initial offset, and then mapping from the callee to the 1206 // inlined caller entity is just adding that initial offset to the callee's 1207 // index. This should be both faster and simpler than the alternative. Most 1208 // of these sets are relatively small, and they rarely have too much dead 1209 // code in practice, so this is a good trade off. 1210 // 1211 // Note that there are a few kinds of entities that are excluded from the 1212 // `EntityMap`, and for which we do actually take the demand-based approach: 1213 // values and value lists being the notable ones. 1214 block_offset: Option<u32>, 1215 global_value_offset: Option<u32>, 1216 sig_ref_offset: Option<u32>, 1217 func_ref_offset: Option<u32>, 1218 stack_slot_offset: Option<u32>, 1219 dynamic_type_offset: Option<u32>, 1220 dynamic_stack_slot_offset: Option<u32>, 1221 immediate_offset: Option<u32>, 1222 } 1223 1224 impl EntityMap { 1225 fn inlined_block(&self, callee_block: ir::Block) -> ir::Block { 1226 let offset = self 1227 .block_offset 1228 .expect("must create inlined `ir::Block`s before calling `EntityMap::inlined_block`"); 1229 ir::Block::from_u32(offset + callee_block.as_u32()) 1230 } 1231 1232 fn iter_inlined_blocks(&self, func: &ir::Function) -> impl Iterator<Item = ir::Block> + use<> { 1233 let start = self.block_offset.expect( 1234 "must create inlined `ir::Block`s before calling `EntityMap::iter_inlined_blocks`", 1235 ); 1236 1237 let end = func.dfg.blocks.len(); 1238 let end = u32::try_from(end).unwrap(); 1239 1240 (start..end).map(|i| ir::Block::from_u32(i)) 1241 } 1242 1243 fn inlined_global_value(&self, callee_global_value: ir::GlobalValue) -> ir::GlobalValue { 1244 let offset = self 1245 .global_value_offset 1246 .expect("must create inlined `ir::GlobalValue`s before calling `EntityMap::inlined_global_value`"); 1247 ir::GlobalValue::from_u32(offset + callee_global_value.as_u32()) 1248 } 1249 1250 fn inlined_sig_ref(&self, callee_sig_ref: ir::SigRef) -> ir::SigRef { 1251 let offset = self.sig_ref_offset.expect( 1252 "must create inlined `ir::SigRef`s before calling `EntityMap::inlined_sig_ref`", 1253 ); 1254 ir::SigRef::from_u32(offset + callee_sig_ref.as_u32()) 1255 } 1256 1257 fn inlined_func_ref(&self, callee_func_ref: ir::FuncRef) -> ir::FuncRef { 1258 let offset = self.func_ref_offset.expect( 1259 "must create inlined `ir::FuncRef`s before calling `EntityMap::inlined_func_ref`", 1260 ); 1261 ir::FuncRef::from_u32(offset + callee_func_ref.as_u32()) 1262 } 1263 1264 fn inlined_stack_slot(&self, callee_stack_slot: ir::StackSlot) -> ir::StackSlot { 1265 let offset = self.stack_slot_offset.expect( 1266 "must create inlined `ir::StackSlot`s before calling `EntityMap::inlined_stack_slot`", 1267 ); 1268 ir::StackSlot::from_u32(offset + callee_stack_slot.as_u32()) 1269 } 1270 1271 fn inlined_dynamic_type(&self, callee_dynamic_type: ir::DynamicType) -> ir::DynamicType { 1272 let offset = self.dynamic_type_offset.expect( 1273 "must create inlined `ir::DynamicType`s before calling `EntityMap::inlined_dynamic_type`", 1274 ); 1275 ir::DynamicType::from_u32(offset + callee_dynamic_type.as_u32()) 1276 } 1277 1278 fn inlined_dynamic_stack_slot( 1279 &self, 1280 callee_dynamic_stack_slot: ir::DynamicStackSlot, 1281 ) -> ir::DynamicStackSlot { 1282 let offset = self.dynamic_stack_slot_offset.expect( 1283 "must create inlined `ir::DynamicStackSlot`s before calling `EntityMap::inlined_dynamic_stack_slot`", 1284 ); 1285 ir::DynamicStackSlot::from_u32(offset + callee_dynamic_stack_slot.as_u32()) 1286 } 1287 1288 fn inlined_immediate(&self, callee_immediate: ir::Immediate) -> ir::Immediate { 1289 let offset = self.immediate_offset.expect( 1290 "must create inlined `ir::Immediate`s before calling `EntityMap::inlined_immediate`", 1291 ); 1292 ir::Immediate::from_u32(offset + callee_immediate.as_u32()) 1293 } 1294 } 1295 1296 /// Translate all of the callee's various entities into the caller, producing an 1297 /// `EntityMap` that can be used to translate callee entity references into 1298 /// inlined caller entity references. 1299 fn create_entities( 1300 allocs: &mut InliningAllocs, 1301 func: &mut ir::Function, 1302 callee: &ir::Function, 1303 ) -> EntityMap { 1304 let mut entity_map = EntityMap::default(); 1305 1306 entity_map.block_offset = Some(create_blocks(allocs, func, callee)); 1307 entity_map.global_value_offset = Some(create_global_values(func, callee)); 1308 entity_map.sig_ref_offset = Some(create_sig_refs(func, callee)); 1309 create_user_external_name_refs(allocs, func, callee); 1310 entity_map.func_ref_offset = Some(create_func_refs(allocs, func, callee, &entity_map)); 1311 entity_map.stack_slot_offset = Some(create_stack_slots(func, callee)); 1312 entity_map.dynamic_type_offset = Some(create_dynamic_types(func, callee, &entity_map)); 1313 entity_map.dynamic_stack_slot_offset = 1314 Some(create_dynamic_stack_slots(func, callee, &entity_map)); 1315 entity_map.immediate_offset = Some(create_immediates(func, callee)); 1316 1317 // `ir::ConstantData` is deduplicated, so we cannot use our offset scheme 1318 // for `ir::Constant`s. Nonetheless, we still insert them into the caller 1319 // now, at the same time as the rest of our entities. 1320 create_constants(allocs, func, callee); 1321 1322 entity_map 1323 } 1324 1325 /// Create inlined blocks in the caller for every block in the callee. 1326 fn create_blocks( 1327 allocs: &mut InliningAllocs, 1328 func: &mut ir::Function, 1329 callee: &ir::Function, 1330 ) -> u32 { 1331 let offset = func.dfg.blocks.len(); 1332 let offset = u32::try_from(offset).unwrap(); 1333 1334 func.dfg.blocks.reserve(callee.dfg.blocks.len()); 1335 for callee_block in callee.dfg.blocks.iter() { 1336 let caller_block = func.dfg.blocks.add(); 1337 trace!("Callee {callee_block:?} = inlined {caller_block:?}"); 1338 1339 if callee.layout.is_cold(callee_block) { 1340 func.layout.set_cold(caller_block); 1341 } 1342 1343 // Note: the entry block does not need parameters because the only 1344 // predecessor is the call block and we associate the callee's 1345 // parameters with the caller's arguments directly. 1346 if callee.layout.entry_block() != Some(callee_block) { 1347 for callee_param in callee.dfg.blocks[callee_block].params(&callee.dfg.value_lists) { 1348 let ty = callee.dfg.value_type(*callee_param); 1349 let caller_param = func.dfg.append_block_param(caller_block, ty); 1350 1351 allocs.set_inlined_value(callee, *callee_param, caller_param); 1352 } 1353 } 1354 } 1355 1356 offset 1357 } 1358 1359 /// Copy and translate global values from the callee into the caller. 1360 fn create_global_values(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1361 let gv_offset = func.global_values.len(); 1362 let gv_offset = u32::try_from(gv_offset).unwrap(); 1363 1364 func.global_values.reserve(callee.global_values.len()); 1365 for gv in callee.global_values.values() { 1366 func.global_values.push(match gv { 1367 // These kinds of global values reference other global values, so we 1368 // need to fixup that reference. 1369 ir::GlobalValueData::Load { 1370 base, 1371 offset, 1372 global_type, 1373 flags, 1374 } => ir::GlobalValueData::Load { 1375 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset), 1376 offset: *offset, 1377 global_type: *global_type, 1378 flags: *flags, 1379 }, 1380 ir::GlobalValueData::IAddImm { 1381 base, 1382 offset, 1383 global_type, 1384 } => ir::GlobalValueData::IAddImm { 1385 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset), 1386 offset: *offset, 1387 global_type: *global_type, 1388 }, 1389 1390 // These kinds of global values do not reference other global 1391 // values, so we can just clone them. 1392 ir::GlobalValueData::VMContext 1393 | ir::GlobalValueData::Symbol { .. } 1394 | ir::GlobalValueData::DynScaleTargetConst { .. } => gv.clone(), 1395 }); 1396 } 1397 1398 gv_offset 1399 } 1400 1401 /// Copy `ir::SigRef`s from the callee into the caller. 1402 fn create_sig_refs(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1403 let offset = func.dfg.signatures.len(); 1404 let offset = u32::try_from(offset).unwrap(); 1405 1406 func.dfg.signatures.reserve(callee.dfg.signatures.len()); 1407 for sig in callee.dfg.signatures.values() { 1408 func.dfg.signatures.push(sig.clone()); 1409 } 1410 1411 offset 1412 } 1413 1414 fn create_user_external_name_refs( 1415 allocs: &mut InliningAllocs, 1416 func: &mut ir::Function, 1417 callee: &ir::Function, 1418 ) { 1419 for (callee_named_func_ref, name) in callee.params.user_named_funcs().iter() { 1420 let caller_named_func_ref = func.declare_imported_user_function(name.clone()); 1421 allocs.user_external_name_refs[callee_named_func_ref] = Some(caller_named_func_ref).into(); 1422 } 1423 } 1424 1425 /// Translate `ir::FuncRef`s from the callee into the caller. 1426 fn create_func_refs( 1427 allocs: &InliningAllocs, 1428 func: &mut ir::Function, 1429 callee: &ir::Function, 1430 entity_map: &EntityMap, 1431 ) -> u32 { 1432 let offset = func.dfg.ext_funcs.len(); 1433 let offset = u32::try_from(offset).unwrap(); 1434 1435 func.dfg.ext_funcs.reserve(callee.dfg.ext_funcs.len()); 1436 for ir::ExtFuncData { 1437 name, 1438 signature, 1439 colocated, 1440 } in callee.dfg.ext_funcs.values() 1441 { 1442 func.dfg.ext_funcs.push(ir::ExtFuncData { 1443 name: match name { 1444 ir::ExternalName::User(name_ref) => { 1445 ir::ExternalName::User(allocs.user_external_name_refs[*name_ref].expect( 1446 "should have translated all `ir::UserExternalNameRef`s before translating \ 1447 `ir::FuncRef`s", 1448 )) 1449 } 1450 ir::ExternalName::TestCase(_) 1451 | ir::ExternalName::LibCall(_) 1452 | ir::ExternalName::KnownSymbol(_) => name.clone(), 1453 }, 1454 signature: entity_map.inlined_sig_ref(*signature), 1455 colocated: *colocated, 1456 }); 1457 } 1458 1459 offset 1460 } 1461 1462 /// Copy stack slots from the callee into the caller. 1463 fn create_stack_slots(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1464 let offset = func.sized_stack_slots.len(); 1465 let offset = u32::try_from(offset).unwrap(); 1466 1467 func.sized_stack_slots 1468 .reserve(callee.sized_stack_slots.len()); 1469 for slot in callee.sized_stack_slots.values() { 1470 func.sized_stack_slots.push(slot.clone()); 1471 } 1472 1473 offset 1474 } 1475 1476 /// Copy dynamic types from the callee into the caller. 1477 fn create_dynamic_types( 1478 func: &mut ir::Function, 1479 callee: &ir::Function, 1480 entity_map: &EntityMap, 1481 ) -> u32 { 1482 let offset = func.dynamic_stack_slots.len(); 1483 let offset = u32::try_from(offset).unwrap(); 1484 1485 func.dfg 1486 .dynamic_types 1487 .reserve(callee.dfg.dynamic_types.len()); 1488 for ir::DynamicTypeData { 1489 base_vector_ty, 1490 dynamic_scale, 1491 } in callee.dfg.dynamic_types.values() 1492 { 1493 func.dfg.dynamic_types.push(ir::DynamicTypeData { 1494 base_vector_ty: *base_vector_ty, 1495 dynamic_scale: entity_map.inlined_global_value(*dynamic_scale), 1496 }); 1497 } 1498 1499 offset 1500 } 1501 1502 /// Copy dynamic stack slots from the callee into the caller. 1503 fn create_dynamic_stack_slots( 1504 func: &mut ir::Function, 1505 callee: &ir::Function, 1506 entity_map: &EntityMap, 1507 ) -> u32 { 1508 let offset = func.dynamic_stack_slots.len(); 1509 let offset = u32::try_from(offset).unwrap(); 1510 1511 func.dynamic_stack_slots 1512 .reserve(callee.dynamic_stack_slots.len()); 1513 for ir::DynamicStackSlotData { kind, dyn_ty } in callee.dynamic_stack_slots.values() { 1514 func.dynamic_stack_slots.push(ir::DynamicStackSlotData { 1515 kind: *kind, 1516 dyn_ty: entity_map.inlined_dynamic_type(*dyn_ty), 1517 }); 1518 } 1519 1520 offset 1521 } 1522 1523 /// Copy immediates from the callee into the caller. 1524 fn create_immediates(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1525 let offset = func.dfg.immediates.len(); 1526 let offset = u32::try_from(offset).unwrap(); 1527 1528 func.dfg.immediates.reserve(callee.dfg.immediates.len()); 1529 for imm in callee.dfg.immediates.values() { 1530 func.dfg.immediates.push(imm.clone()); 1531 } 1532 1533 offset 1534 } 1535 1536 /// Copy constants from the callee into the caller. 1537 fn create_constants(allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function) { 1538 for (callee_constant, data) in callee.dfg.constants.iter() { 1539 let inlined_constant = func.dfg.constants.insert(data.clone()); 1540 allocs.constants[*callee_constant] = Some(inlined_constant).into(); 1541 } 1542 } 1543