1 //! Function inlining infrastructure. 2 //! 3 //! This module provides "inlining as a library" to Cranelift users; it does 4 //! _not_ provide a complete, off-the-shelf inlining solution. Cranelift's 5 //! compilation context is per-function and does not encompass the full call 6 //! graph. It does not know which functions are hot and which are cold, which 7 //! have been marked the equivalent of `#[inline(never)]`, etc... Only the 8 //! Cranelift user can understand these aspects of the full compilation 9 //! pipeline, and these things can be very different between (say) Wasmtime and 10 //! `cg_clif`. Therefore, this module does not attempt to define hueristics for 11 //! when inlining a particular call is likely beneficial. This module only 12 //! provides hooks for the Cranelift user to define whether a given call should 13 //! be inlined or not, and the mechanics to inline a callee into a particular 14 //! call site when directed to do so by the Cranelift user. 15 //! 16 //! The top-level inlining entry point during Cranelift compilation is 17 //! [`Context::inline`][crate::Context::inline]. It takes an [`Inline`] trait 18 //! implementation, which is authored by the Cranelift user and directs 19 //! Cranelift whether to inline a particular call, and, when inlining, gives 20 //! Cranelift the body of the callee that is to be inlined. 21 22 use crate::cursor::{Cursor as _, FuncCursor}; 23 use crate::ir::{self, ExceptionTableData, ExceptionTableItem, InstBuilder as _}; 24 use crate::result::CodegenResult; 25 use crate::trace; 26 use crate::traversals::Dfs; 27 use alloc::borrow::Cow; 28 use alloc::vec::Vec; 29 use cranelift_entity::{SecondaryMap, packed_option::PackedOption}; 30 use smallvec::SmallVec; 31 32 type SmallValueVec = SmallVec<[ir::Value; 8]>; 33 type SmallBlockArgVec = SmallVec<[ir::BlockArg; 8]>; 34 type SmallBlockCallVec = SmallVec<[ir::BlockCall; 8]>; 35 36 /// A command directing Cranelift whether or not to inline a particular call. 37 pub enum InlineCommand<'a> { 38 /// Keep the call as-is, out-of-line, and do not inline the callee. 39 KeepCall, 40 41 /// Inline the call, using this function as the body of the callee. 42 /// 43 /// It is the `Inline` implementor's responsibility to ensure that this 44 /// function is the correct callee. Providing the wrong function may result 45 /// in panics during compilation or incorrect runtime behavior. 46 Inline { 47 /// The callee function's body. 48 callee: Cow<'a, ir::Function>, 49 /// Whether to visit any function calls within the callee body after 50 /// inlining and consider them for further inlining. 51 visit_callee: bool, 52 }, 53 } 54 55 /// A trait for directing Cranelift whether to inline a particular call or not. 56 /// 57 /// Used in combination with the [`Context::inline`][crate::Context::inline] 58 /// method. 59 pub trait Inline { 60 /// A hook invoked for each direct call instruction in a function, whose 61 /// result determines whether Cranelift should inline a given call. 62 /// 63 /// The Cranelift user is responsible for defining their own hueristics and 64 /// deciding whether inlining the call is beneficial. 65 /// 66 /// When returning a function and directing Cranelift to inline its body 67 /// into the call site, the `Inline` implementer must ensure the following: 68 /// 69 /// * The returned function's signature exactly matches the `callee` 70 /// `FuncRef`'s signature. 71 /// 72 /// * The returned function must be legalized. 73 /// 74 /// * The returned function must be valid (i.e. it must pass the CLIF 75 /// verifier). 76 /// 77 /// * The returned function is a correct and valid implementation of the 78 /// `callee` according to your language's semantics. 79 /// 80 /// Failure to uphold these invariants may result in panics during 81 /// compilation or incorrect runtime behavior in the generated code. 82 fn inline( 83 &mut self, 84 caller: &ir::Function, 85 call_inst: ir::Inst, 86 call_opcode: ir::Opcode, 87 callee: ir::FuncRef, 88 call_args: &[ir::Value], 89 ) -> InlineCommand<'_>; 90 } 91 92 impl<'a, T> Inline for &'a mut T 93 where 94 T: Inline, 95 { 96 fn inline( 97 &mut self, 98 caller: &ir::Function, 99 inst: ir::Inst, 100 opcode: ir::Opcode, 101 callee: ir::FuncRef, 102 args: &[ir::Value], 103 ) -> InlineCommand<'_> { 104 (*self).inline(caller, inst, opcode, callee, args) 105 } 106 } 107 108 /// Walk the given function, invoke the `Inline` implementation for each call 109 /// instruction, and inline the callee when directed to do so. 110 /// 111 /// Returns whether any call was inlined. 112 pub(crate) fn do_inlining( 113 func: &mut ir::Function, 114 mut inliner: impl Inline, 115 ) -> CodegenResult<bool> { 116 trace!("function {} before inlining: {}", func.name, func); 117 118 let mut inlined_any = false; 119 let mut allocs = InliningAllocs::default(); 120 121 let mut cursor = FuncCursor::new(func); 122 'block_loop: while let Some(block) = cursor.next_block() { 123 // Always keep track of our previous cursor position. Assuming that the 124 // current position is a function call that we will inline, then the 125 // previous position is just before the inlined callee function. After 126 // inlining a call, the Cranelift user can decide whether to consider 127 // any function calls in the inlined callee for further inlining or 128 // not. When they do, then we back up to this previous cursor position 129 // so that our traversal will then continue over the inlined body. 130 let mut prev_pos; 131 132 while let Some(inst) = { 133 prev_pos = cursor.position(); 134 cursor.next_inst() 135 } { 136 // Make sure that `block` is always `inst`'s block, even with all of 137 // our cursor-position-updating and block-splitting-during-inlining 138 // shenanigans below. 139 debug_assert_eq!(Some(block), cursor.func.layout.inst_block(inst)); 140 141 match cursor.func.dfg.insts[inst] { 142 ir::InstructionData::Call { 143 opcode: opcode @ ir::Opcode::Call | opcode @ ir::Opcode::ReturnCall, 144 args: _, 145 func_ref, 146 } => { 147 trace!( 148 "considering call site for inlining: {inst}: {}", 149 cursor.func.dfg.display_inst(inst), 150 ); 151 let args = cursor.func.dfg.inst_args(inst); 152 match inliner.inline(&cursor.func, inst, opcode, func_ref, args) { 153 InlineCommand::KeepCall => { 154 trace!(" --> keeping call"); 155 } 156 InlineCommand::Inline { 157 callee, 158 visit_callee, 159 } => { 160 let last_inlined_block = inline_one( 161 &mut allocs, 162 cursor.func, 163 func_ref, 164 block, 165 inst, 166 opcode, 167 &callee, 168 None, 169 ); 170 inlined_any = true; 171 if visit_callee { 172 cursor.set_position(prev_pos); 173 } else { 174 // Arrange it so that the `next_block()` loop 175 // will continue to the next block that is not 176 // associated with the just-inlined callee. 177 cursor.goto_bottom(last_inlined_block); 178 continue 'block_loop; 179 } 180 } 181 } 182 } 183 ir::InstructionData::TryCall { 184 opcode: opcode @ ir::Opcode::TryCall, 185 args: _, 186 func_ref, 187 exception, 188 } => { 189 trace!( 190 "considering call site for inlining: {inst}: {}", 191 cursor.func.dfg.display_inst(inst), 192 ); 193 let args = cursor.func.dfg.inst_args(inst); 194 match inliner.inline(&cursor.func, inst, opcode, func_ref, args) { 195 InlineCommand::KeepCall => { 196 trace!(" --> keeping call"); 197 } 198 InlineCommand::Inline { 199 callee, 200 visit_callee, 201 } => { 202 let last_inlined_block = inline_one( 203 &mut allocs, 204 cursor.func, 205 func_ref, 206 block, 207 inst, 208 opcode, 209 &callee, 210 Some(exception), 211 ); 212 inlined_any = true; 213 if visit_callee { 214 cursor.set_position(prev_pos); 215 } else { 216 // Arrange it so that the `next_block()` loop 217 // will continue to the next block that is not 218 // associated with the just-inlined callee. 219 cursor.goto_bottom(last_inlined_block); 220 continue 'block_loop; 221 } 222 } 223 } 224 } 225 ir::InstructionData::CallIndirect { .. } 226 | ir::InstructionData::TryCallIndirect { .. } => { 227 // Can't inline indirect calls; need to have some earlier 228 // pass rewrite them into direct calls first, when possible. 229 } 230 _ => { 231 debug_assert!( 232 !cursor.func.dfg.insts[inst].opcode().is_call(), 233 "should have matched all call instructions, but found: {inst}: {}", 234 cursor.func.dfg.display_inst(inst), 235 ); 236 } 237 } 238 } 239 } 240 241 if inlined_any { 242 trace!("function {} after inlining: {}", func.name, func); 243 } else { 244 trace!("function {} did not have any callees inlined", func.name); 245 } 246 247 Ok(inlined_any) 248 } 249 250 #[derive(Default)] 251 struct InliningAllocs { 252 /// Map from callee value to inlined caller value. 253 values: SecondaryMap<ir::Value, PackedOption<ir::Value>>, 254 255 /// Map from callee constant to inlined caller constant. 256 /// 257 /// Not in `EntityMap` because these are hash-consed inside the 258 /// `ir::Function`. 259 constants: SecondaryMap<ir::Constant, PackedOption<ir::Constant>>, 260 261 /// Map from callee to inlined caller external name refs. 262 /// 263 /// Not in `EntityMap` because these are hash-consed inside the 264 /// `ir::Function`. 265 user_external_name_refs: 266 SecondaryMap<ir::UserExternalNameRef, PackedOption<ir::UserExternalNameRef>>, 267 268 /// The set of _caller_ inlined call instructions that need exception table 269 /// fixups at the end of inlining. 270 /// 271 /// This includes all kinds of non-returning calls, not just the literal 272 /// `call` instruction: `call_indirect`, `try_call`, `try_call_indirect`, 273 /// etc... However, it does not include `return_call` and 274 /// `return_call_indirect` instructions because the caller cannot catch 275 /// exceptions that those calls throw because the caller is no longer on the 276 /// stack as soon as they are executed. 277 /// 278 /// Note: this is a simple `Vec`, and not an `EntitySet`, because it is very 279 /// sparse: most of the caller's instructions are not inlined call 280 /// instructions. Additionally, we require deterministic iteration order and 281 /// do not require set-membership testing, so a hash set is not a good 282 /// choice either. 283 calls_needing_exception_table_fixup: Vec<ir::Inst>, 284 } 285 286 impl InliningAllocs { 287 fn reset(&mut self, callee: &ir::Function) { 288 let InliningAllocs { 289 values, 290 constants, 291 user_external_name_refs, 292 calls_needing_exception_table_fixup, 293 } = self; 294 295 values.clear(); 296 values.resize(callee.dfg.len_values()); 297 298 constants.clear(); 299 constants.resize(callee.dfg.constants.len()); 300 301 user_external_name_refs.clear(); 302 user_external_name_refs.resize(callee.params.user_named_funcs().len()); 303 304 // Note: We do not reserve capacity for 305 // `calls_needing_exception_table_fixup` because it is a sparse set and 306 // we don't know how large it needs to be ahead of time. 307 calls_needing_exception_table_fixup.clear(); 308 } 309 310 fn set_inlined_value( 311 &mut self, 312 callee: &ir::Function, 313 callee_val: ir::Value, 314 inlined_val: ir::Value, 315 ) { 316 trace!(" --> callee {callee_val:?} = inlined {inlined_val:?}"); 317 debug_assert!(self.values[callee_val].is_none()); 318 let resolved_callee_val = callee.dfg.resolve_aliases(callee_val); 319 debug_assert!(self.values[resolved_callee_val].is_none()); 320 self.values[resolved_callee_val] = Some(inlined_val).into(); 321 } 322 323 fn get_inlined_value(&self, callee: &ir::Function, callee_val: ir::Value) -> Option<ir::Value> { 324 let resolved_callee_val = callee.dfg.resolve_aliases(callee_val); 325 self.values[resolved_callee_val].expand() 326 } 327 } 328 329 /// Inline one particular function call. 330 /// 331 /// Returns the last inlined block in the layout. 332 fn inline_one( 333 allocs: &mut InliningAllocs, 334 func: &mut ir::Function, 335 callee_func_ref: ir::FuncRef, 336 call_block: ir::Block, 337 call_inst: ir::Inst, 338 call_opcode: ir::Opcode, 339 callee: &ir::Function, 340 call_exception_table: Option<ir::ExceptionTable>, 341 ) -> ir::Block { 342 trace!( 343 "Inlining call {call_inst:?}: {}\n\ 344 with callee = {callee:?}", 345 func.dfg.display_inst(call_inst) 346 ); 347 348 // Type check callee signature. 349 let expected_callee_sig = func.dfg.ext_funcs[callee_func_ref].signature; 350 let expected_callee_sig = &func.dfg.signatures[expected_callee_sig]; 351 assert_eq!(expected_callee_sig, &callee.signature); 352 353 allocs.reset(callee); 354 355 // First, append various callee entity arenas to the end of the caller's 356 // entity arenas. 357 let entity_map = create_entities(allocs, func, callee); 358 359 // Inlined prologue: split the call instruction's block at the point of the 360 // call and replace the call with a jump. 361 let return_block = split_off_return_block(func, call_inst, call_opcode, callee); 362 let call_stack_map = replace_call_with_jump(allocs, func, call_inst, callee, &entity_map); 363 364 // Prepare for translating the actual instructions by inserting the inlined 365 // blocks into the caller's layout in the same order that they appear in the 366 // callee. 367 let mut last_inlined_block = inline_block_layout(func, call_block, callee, &entity_map); 368 369 // Translate each instruction from the callee into the caller, 370 // appending them to their associated block in the caller. 371 // 372 // Note that we iterate over the callee with a pre-order traversal so that 373 // we see value defs before uses. 374 for callee_block in Dfs::new().pre_order_iter(callee) { 375 let inlined_block = entity_map.inlined_block(callee_block); 376 trace!( 377 "Processing instructions in callee block {callee_block:?} (inlined block {inlined_block:?}" 378 ); 379 380 let mut next_callee_inst = callee.layout.first_inst(callee_block); 381 while let Some(callee_inst) = next_callee_inst { 382 trace!( 383 "Processing callee instruction {callee_inst:?}: {}", 384 callee.dfg.display_inst(callee_inst) 385 ); 386 387 assert_ne!( 388 callee.dfg.insts[callee_inst].opcode(), 389 ir::Opcode::GlobalValue, 390 "callee must already be legalized, we shouldn't see any `global_value` \ 391 instructions when inlining; found {callee_inst:?}: {}", 392 callee.dfg.display_inst(callee_inst) 393 ); 394 395 // Remap the callee instruction's entities and insert it into the 396 // caller's DFG. 397 let inlined_inst_data = callee.dfg.insts[callee_inst].map(InliningInstRemapper { 398 allocs: &allocs, 399 func, 400 callee, 401 entity_map: &entity_map, 402 }); 403 let inlined_inst = func.dfg.make_inst(inlined_inst_data); 404 func.layout.append_inst(inlined_inst, inlined_block); 405 406 let opcode = callee.dfg.insts[callee_inst].opcode(); 407 if opcode.is_return() { 408 // Instructions that return do not define any values, so we 409 // don't need to worry about that, but we do need to fix them up 410 // so that they return by jumping to our control-flow join 411 // block, rather than returning from the caller. 412 if let Some(return_block) = return_block { 413 fixup_inst_that_returns( 414 allocs, 415 func, 416 callee, 417 &entity_map, 418 call_opcode, 419 inlined_inst, 420 callee_inst, 421 return_block, 422 call_stack_map.as_ref().map(|es| &**es), 423 ); 424 } else { 425 // If we are inlining a callee that was invoked via 426 // `return_call`, we leave inlined return instructions 427 // as-is: there is no logical caller frame on the stack to 428 // continue to. 429 debug_assert_eq!(call_opcode, ir::Opcode::ReturnCall); 430 } 431 } else { 432 // Make the instruction's result values. 433 let ctrl_typevar = callee.dfg.ctrl_typevar(callee_inst); 434 func.dfg.make_inst_results(inlined_inst, ctrl_typevar); 435 436 // Update the value map for this instruction's defs. 437 let callee_results = callee.dfg.inst_results(callee_inst); 438 let inlined_results = func.dfg.inst_results(inlined_inst); 439 debug_assert_eq!(callee_results.len(), inlined_results.len()); 440 for (callee_val, inlined_val) in callee_results.iter().zip(inlined_results) { 441 allocs.set_inlined_value(callee, *callee_val, *inlined_val); 442 } 443 444 if opcode.is_call() { 445 append_stack_map_entries( 446 func, 447 callee, 448 &entity_map, 449 call_stack_map.as_deref(), 450 inlined_inst, 451 callee_inst, 452 ); 453 454 // When we are inlining a `try_call` call site, we need to merge 455 // the call site's exception table into the inlined calls' 456 // exception tables. This can involve rewriting regular `call`s 457 // into `try_call`s, which requires mutating the CFG because 458 // `try_call` is a block terminator. However, we can't mutate 459 // the CFG in the middle of this traversal because we rely on 460 // the existence of a one-to-one mapping between the callee 461 // layout and the inlined layout. Instead, we record the set of 462 // inlined call instructions that will need fixing up, and 463 // perform that possibly-CFG-mutating exception table merging in 464 // a follow up pass, when we no longer rely on that one-to-one 465 // layout mapping. 466 debug_assert_eq!( 467 call_opcode == ir::Opcode::TryCall, 468 call_exception_table.is_some() 469 ); 470 if call_opcode == ir::Opcode::TryCall { 471 allocs 472 .calls_needing_exception_table_fixup 473 .push(inlined_inst); 474 } 475 } 476 } 477 478 trace!( 479 " --> inserted inlined instruction {inlined_inst:?}: {}", 480 func.dfg.display_inst(inlined_inst) 481 ); 482 483 next_callee_inst = callee.layout.next_inst(callee_inst); 484 } 485 } 486 487 // We copied *all* callee blocks into the caller's layout, but only copied 488 // the callee instructions in *reachable* callee blocks into the caller's 489 // associated blocks. Therefore, any *unreachable* blocks are empty in the 490 // caller, which is invalid CLIF because all blocks must end in a 491 // terminator, so do a quick pass over the inlined blocks and remove any 492 // empty blocks from the caller's layout. 493 for block in entity_map.iter_inlined_blocks(func) { 494 if func.layout.is_block_inserted(block) && func.layout.first_inst(block).is_none() { 495 log::trace!("removing unreachable inlined block from layout: {block}"); 496 497 // If the block being removed is our last-inlined block, then back 498 // it up to the previous block in the layout, which will be the new 499 // last-inlined block after this one's removal. 500 if block == last_inlined_block { 501 last_inlined_block = func.layout.prev_block(last_inlined_block).expect( 502 "there will always at least be the block that contained the call we are \ 503 inlining", 504 ); 505 } 506 507 func.layout.remove_block(block); 508 } 509 } 510 511 // Final step: fixup the exception tables of any inlined calls when we are 512 // inlining a `try_call` site. 513 // 514 // Subtly, this requires rewriting non-catching `call[_indirect]` 515 // instructions into `try_call[_indirect]` instructions so that exceptions 516 // that unwound through the original callee frame and were caught by the 517 // caller's `try_call` do not unwind past this inlined frame. And turning a 518 // `call` into a `try_call` mutates the CFG, breaking our one-to-one mapping 519 // between callee blocks and inlined blocks, so we delay these fixups to 520 // this final step, when we no longer rely on that mapping. 521 debug_assert!( 522 allocs.calls_needing_exception_table_fixup.is_empty() || call_exception_table.is_some() 523 ); 524 debug_assert_eq!( 525 call_opcode == ir::Opcode::TryCall, 526 call_exception_table.is_some() 527 ); 528 if let Some(call_exception_table) = call_exception_table { 529 fixup_inlined_call_exception_tables(allocs, func, call_exception_table); 530 } 531 532 debug_assert!( 533 func.layout.is_block_inserted(last_inlined_block), 534 "last_inlined_block={last_inlined_block} should be inserted in the layout" 535 ); 536 last_inlined_block 537 } 538 539 /// Append stack map entries from the caller and callee to the given inlined 540 /// instruction. 541 fn append_stack_map_entries( 542 func: &mut ir::Function, 543 callee: &ir::Function, 544 entity_map: &EntityMap, 545 call_stack_map: Option<&[ir::UserStackMapEntry]>, 546 inlined_inst: ir::Inst, 547 callee_inst: ir::Inst, 548 ) { 549 // Add the caller's stack map to this call. These entries 550 // already refer to caller entities and do not need further 551 // translation. 552 func.dfg.append_user_stack_map_entries( 553 inlined_inst, 554 call_stack_map 555 .iter() 556 .flat_map(|entries| entries.iter().cloned()), 557 ); 558 559 // Append the callee's stack map to this call. These entries 560 // refer to callee entities and therefore do require 561 // translation into the caller's index space. 562 func.dfg.append_user_stack_map_entries( 563 inlined_inst, 564 callee 565 .dfg 566 .user_stack_map_entries(callee_inst) 567 .iter() 568 .flat_map(|entries| entries.iter()) 569 .map(|entry| ir::UserStackMapEntry { 570 ty: entry.ty, 571 slot: entity_map.inlined_stack_slot(entry.slot), 572 offset: entry.offset, 573 }), 574 ); 575 } 576 577 /// Create or update the exception tables for any inlined call instructions: 578 /// when inlining at a `try_call` site, we must forward our exceptional edges 579 /// into each inlined call instruction. 580 fn fixup_inlined_call_exception_tables( 581 allocs: &mut InliningAllocs, 582 func: &mut ir::Function, 583 call_exception_table: ir::ExceptionTable, 584 ) { 585 // Split a block at a `call[_indirect]` instruction, detach the 586 // instruction's results, and alias them to the new block's parameters. 587 let split_block_for_new_try_call = |func: &mut ir::Function, inst: ir::Inst| -> ir::Block { 588 debug_assert!(func.dfg.insts[inst].opcode().is_call()); 589 debug_assert!(!func.dfg.insts[inst].opcode().is_terminator()); 590 591 // Split the block. 592 let next_inst = func 593 .layout 594 .next_inst(inst) 595 .expect("inst is not a terminator, should have a successor"); 596 let new_block = func.dfg.blocks.add(); 597 func.layout.split_block(new_block, next_inst); 598 599 // `try_call[_indirect]` instructions do not define values themselves; 600 // the normal-return block has parameters for the results. So remove 601 // this instruction's results, create an associated block parameter for 602 // each of them, and alias them to the new block parameter. 603 let old_results = SmallValueVec::from_iter(func.dfg.inst_results(inst).iter().copied()); 604 func.dfg.detach_inst_results(inst); 605 for old_result in old_results { 606 let ty = func.dfg.value_type(old_result); 607 let new_block_param = func.dfg.append_block_param(new_block, ty); 608 func.dfg.change_to_alias(old_result, new_block_param); 609 } 610 611 new_block 612 }; 613 614 // Clone the caller's exception table, updating it for use in the current 615 // `call[_indirect]` instruction as it becomes a `try_call[_indirect]`. 616 let clone_exception_table_for_this_call = |func: &mut ir::Function, 617 signature: ir::SigRef, 618 new_block: ir::Block| 619 -> ir::ExceptionTable { 620 let mut exception = func.stencil.dfg.exception_tables[call_exception_table] 621 .deep_clone(&mut func.stencil.dfg.value_lists); 622 623 *exception.signature_mut() = signature; 624 625 let returns_len = func.dfg.signatures[signature].returns.len(); 626 let returns_len = u32::try_from(returns_len).unwrap(); 627 628 *exception.normal_return_mut() = ir::BlockCall::new( 629 new_block, 630 (0..returns_len).map(|i| ir::BlockArg::TryCallRet(i)), 631 &mut func.dfg.value_lists, 632 ); 633 634 func.dfg.exception_tables.push(exception) 635 }; 636 637 for inst in allocs.calls_needing_exception_table_fixup.drain(..) { 638 debug_assert!(func.dfg.insts[inst].opcode().is_call()); 639 debug_assert!(!func.dfg.insts[inst].opcode().is_return()); 640 match func.dfg.insts[inst] { 641 // current_block: 642 // preds... 643 // rets... = call f(args...) 644 // succs... 645 // 646 // becomes 647 // 648 // current_block: 649 // preds... 650 // try_call f(args...), new_block(rets...), [call_exception_table...] 651 // new_block(rets...): 652 // succs... 653 ir::InstructionData::Call { 654 opcode: ir::Opcode::Call, 655 args, 656 func_ref, 657 } => { 658 let new_block = split_block_for_new_try_call(func, inst); 659 let signature = func.dfg.ext_funcs[func_ref].signature; 660 let exception = clone_exception_table_for_this_call(func, signature, new_block); 661 func.dfg.insts[inst] = ir::InstructionData::TryCall { 662 opcode: ir::Opcode::TryCall, 663 args, 664 func_ref, 665 exception, 666 }; 667 } 668 669 // current_block: 670 // preds... 671 // rets... = call_indirect sig, val(args...) 672 // succs... 673 // 674 // becomes 675 // 676 // current_block: 677 // preds... 678 // try_call_indirect sig, val(args...), new_block(rets...), [call_exception_table...] 679 // new_block(rets...): 680 // succs... 681 ir::InstructionData::CallIndirect { 682 opcode: ir::Opcode::CallIndirect, 683 args, 684 sig_ref, 685 } => { 686 let new_block = split_block_for_new_try_call(func, inst); 687 let exception = clone_exception_table_for_this_call(func, sig_ref, new_block); 688 func.dfg.insts[inst] = ir::InstructionData::TryCallIndirect { 689 opcode: ir::Opcode::TryCallIndirect, 690 args, 691 exception, 692 }; 693 } 694 695 // For `try_call[_indirect]` instructions, we just need to merge the 696 // exception tables. 697 ir::InstructionData::TryCall { 698 opcode: ir::Opcode::TryCall, 699 exception, 700 .. 701 } 702 | ir::InstructionData::TryCallIndirect { 703 opcode: ir::Opcode::TryCallIndirect, 704 exception, 705 .. 706 } => { 707 // Construct a new exception table that consists of 708 // the inlined instruction's exception table match 709 // sequence, with the inlining site's exception table 710 // appended. This will ensure that the first-match 711 // semantics emulates the original behavior of 712 // matching in the inner frame first. 713 let sig = func.dfg.exception_tables[exception].signature(); 714 let normal_return = *func.dfg.exception_tables[exception].normal_return(); 715 let exception_data = ExceptionTableData::new( 716 sig, 717 normal_return, 718 func.dfg.exception_tables[exception] 719 .items() 720 .chain(func.dfg.exception_tables[call_exception_table].items()), 721 ) 722 .deep_clone(&mut func.dfg.value_lists); 723 724 func.dfg.exception_tables[exception] = exception_data; 725 } 726 727 otherwise => unreachable!("unknown non-return call instruction: {otherwise:?}"), 728 } 729 } 730 } 731 732 /// After having created an inlined version of a callee instruction that returns 733 /// in the caller, we need to fix it up so that it doesn't actually return 734 /// (since we are already in the caller's frame) and instead just jumps to the 735 /// control-flow join point. 736 fn fixup_inst_that_returns( 737 allocs: &mut InliningAllocs, 738 func: &mut ir::Function, 739 callee: &ir::Function, 740 entity_map: &EntityMap, 741 call_opcode: ir::Opcode, 742 inlined_inst: ir::Inst, 743 callee_inst: ir::Inst, 744 return_block: ir::Block, 745 call_stack_map: Option<&[ir::UserStackMapEntry]>, 746 ) { 747 debug_assert!(func.dfg.insts[inlined_inst].opcode().is_return()); 748 match func.dfg.insts[inlined_inst] { 749 // return rets... 750 // 751 // becomes 752 // 753 // jump return_block(rets...) 754 ir::InstructionData::MultiAry { 755 opcode: ir::Opcode::Return, 756 args, 757 } => { 758 let rets = SmallBlockArgVec::from_iter( 759 args.as_slice(&func.dfg.value_lists) 760 .iter() 761 .copied() 762 .map(|v| v.into()), 763 ); 764 func.dfg.replace(inlined_inst).jump(return_block, &rets); 765 } 766 767 // return_call f(args...) 768 // 769 // becomes 770 // 771 // rets... = call f(args...) 772 // jump return_block(rets...) 773 ir::InstructionData::Call { 774 opcode: ir::Opcode::ReturnCall, 775 args, 776 func_ref, 777 } => { 778 func.dfg.insts[inlined_inst] = ir::InstructionData::Call { 779 opcode: ir::Opcode::Call, 780 args, 781 func_ref, 782 }; 783 func.dfg.make_inst_results(inlined_inst, ir::types::INVALID); 784 785 append_stack_map_entries( 786 func, 787 callee, 788 &entity_map, 789 call_stack_map, 790 inlined_inst, 791 callee_inst, 792 ); 793 794 let rets = SmallBlockArgVec::from_iter( 795 func.dfg 796 .inst_results(inlined_inst) 797 .iter() 798 .copied() 799 .map(|v| v.into()), 800 ); 801 let mut cursor = FuncCursor::new(func); 802 cursor.goto_after_inst(inlined_inst); 803 cursor.ins().jump(return_block, &rets); 804 805 if call_opcode == ir::Opcode::TryCall { 806 allocs 807 .calls_needing_exception_table_fixup 808 .push(inlined_inst); 809 } 810 } 811 812 // return_call_indirect val(args...) 813 // 814 // becomes 815 // 816 // rets... = call_indirect val(args...) 817 // jump return_block(rets...) 818 ir::InstructionData::CallIndirect { 819 opcode: ir::Opcode::ReturnCallIndirect, 820 args, 821 sig_ref, 822 } => { 823 func.dfg.insts[inlined_inst] = ir::InstructionData::CallIndirect { 824 opcode: ir::Opcode::CallIndirect, 825 args, 826 sig_ref, 827 }; 828 func.dfg.make_inst_results(inlined_inst, ir::types::INVALID); 829 830 append_stack_map_entries( 831 func, 832 callee, 833 &entity_map, 834 call_stack_map, 835 inlined_inst, 836 callee_inst, 837 ); 838 839 let rets = SmallBlockArgVec::from_iter( 840 func.dfg 841 .inst_results(inlined_inst) 842 .iter() 843 .copied() 844 .map(|v| v.into()), 845 ); 846 let mut cursor = FuncCursor::new(func); 847 cursor.goto_after_inst(inlined_inst); 848 cursor.ins().jump(return_block, &rets); 849 850 if call_opcode == ir::Opcode::TryCall { 851 allocs 852 .calls_needing_exception_table_fixup 853 .push(inlined_inst); 854 } 855 } 856 857 inst_data => unreachable!( 858 "should have handled all `is_return() == true` instructions above; \ 859 got {inst_data:?}" 860 ), 861 } 862 } 863 864 /// An `InstructionMapper` implementation that remaps a callee instruction's 865 /// entity references to their new indices in the caller function. 866 struct InliningInstRemapper<'a> { 867 allocs: &'a InliningAllocs, 868 func: &'a mut ir::Function, 869 callee: &'a ir::Function, 870 entity_map: &'a EntityMap, 871 } 872 873 impl<'a> ir::instructions::InstructionMapper for InliningInstRemapper<'a> { 874 fn map_value(&mut self, value: ir::Value) -> ir::Value { 875 self.allocs.get_inlined_value(self.callee, value).expect( 876 "defs come before uses; we should have already inlined all values \ 877 used by an instruction", 878 ) 879 } 880 881 fn map_value_list(&mut self, value_list: ir::ValueList) -> ir::ValueList { 882 let mut inlined_list = ir::ValueList::new(); 883 for callee_val in value_list.as_slice(&self.callee.dfg.value_lists) { 884 let inlined_val = self.map_value(*callee_val); 885 inlined_list.push(inlined_val, &mut self.func.dfg.value_lists); 886 } 887 inlined_list 888 } 889 890 fn map_global_value(&mut self, global_value: ir::GlobalValue) -> ir::GlobalValue { 891 self.entity_map.inlined_global_value(global_value) 892 } 893 894 fn map_jump_table(&mut self, jump_table: ir::JumpTable) -> ir::JumpTable { 895 let inlined_default = 896 self.map_block_call(self.callee.dfg.jump_tables[jump_table].default_block()); 897 let inlined_table = self.callee.dfg.jump_tables[jump_table] 898 .as_slice() 899 .iter() 900 .map(|callee_block_call| self.map_block_call(*callee_block_call)) 901 .collect::<SmallBlockCallVec>(); 902 self.func 903 .dfg 904 .jump_tables 905 .push(ir::JumpTableData::new(inlined_default, &inlined_table)) 906 } 907 908 fn map_exception_table(&mut self, exception_table: ir::ExceptionTable) -> ir::ExceptionTable { 909 let exception_table = &self.callee.dfg.exception_tables[exception_table]; 910 let inlined_sig_ref = self.map_sig_ref(exception_table.signature()); 911 let inlined_normal_return = self.map_block_call(*exception_table.normal_return()); 912 let inlined_table = exception_table 913 .items() 914 .map(|item| match item { 915 ExceptionTableItem::Tag(tag, block_call) => { 916 ExceptionTableItem::Tag(tag, self.map_block_call(block_call)) 917 } 918 ExceptionTableItem::Default(block_call) => { 919 ExceptionTableItem::Default(self.map_block_call(block_call)) 920 } 921 ExceptionTableItem::Context(value) => { 922 ExceptionTableItem::Context(self.map_value(value)) 923 } 924 }) 925 .collect::<SmallVec<[_; 8]>>(); 926 self.func 927 .dfg 928 .exception_tables 929 .push(ir::ExceptionTableData::new( 930 inlined_sig_ref, 931 inlined_normal_return, 932 inlined_table, 933 )) 934 } 935 936 fn map_block_call(&mut self, block_call: ir::BlockCall) -> ir::BlockCall { 937 let callee_block = block_call.block(&self.callee.dfg.value_lists); 938 let inlined_block = self.entity_map.inlined_block(callee_block); 939 let args = block_call 940 .args(&self.callee.dfg.value_lists) 941 .map(|arg| match arg { 942 ir::BlockArg::Value(value) => self.map_value(value).into(), 943 ir::BlockArg::TryCallRet(_) | ir::BlockArg::TryCallExn(_) => arg, 944 }) 945 .collect::<SmallBlockArgVec>(); 946 ir::BlockCall::new(inlined_block, args, &mut self.func.dfg.value_lists) 947 } 948 949 fn map_block(&mut self, block: ir::Block) -> ir::Block { 950 self.entity_map.inlined_block(block) 951 } 952 953 fn map_func_ref(&mut self, func_ref: ir::FuncRef) -> ir::FuncRef { 954 self.entity_map.inlined_func_ref(func_ref) 955 } 956 957 fn map_sig_ref(&mut self, sig_ref: ir::SigRef) -> ir::SigRef { 958 self.entity_map.inlined_sig_ref(sig_ref) 959 } 960 961 fn map_stack_slot(&mut self, stack_slot: ir::StackSlot) -> ir::StackSlot { 962 self.entity_map.inlined_stack_slot(stack_slot) 963 } 964 965 fn map_dynamic_stack_slot( 966 &mut self, 967 dynamic_stack_slot: ir::DynamicStackSlot, 968 ) -> ir::DynamicStackSlot { 969 self.entity_map 970 .inlined_dynamic_stack_slot(dynamic_stack_slot) 971 } 972 973 fn map_constant(&mut self, constant: ir::Constant) -> ir::Constant { 974 self.allocs 975 .constants 976 .get(constant) 977 .and_then(|o| o.expand()) 978 .expect("should have inlined all callee constants") 979 } 980 981 fn map_immediate(&mut self, immediate: ir::Immediate) -> ir::Immediate { 982 self.entity_map.inlined_immediate(immediate) 983 } 984 } 985 986 /// Inline the callee's layout into the caller's layout. 987 /// 988 /// Returns the last inlined block in the layout. 989 fn inline_block_layout( 990 func: &mut ir::Function, 991 call_block: ir::Block, 992 callee: &ir::Function, 993 entity_map: &EntityMap, 994 ) -> ir::Block { 995 debug_assert!(func.layout.is_block_inserted(call_block)); 996 997 // Iterate over callee blocks in layout order, inserting their associated 998 // inlined block into the caller's layout. 999 let mut prev_inlined_block = call_block; 1000 let mut next_callee_block = callee.layout.entry_block(); 1001 while let Some(callee_block) = next_callee_block { 1002 debug_assert!(func.layout.is_block_inserted(prev_inlined_block)); 1003 1004 let inlined_block = entity_map.inlined_block(callee_block); 1005 func.layout 1006 .insert_block_after(inlined_block, prev_inlined_block); 1007 1008 prev_inlined_block = inlined_block; 1009 next_callee_block = callee.layout.next_block(callee_block); 1010 } 1011 1012 debug_assert!(func.layout.is_block_inserted(prev_inlined_block)); 1013 prev_inlined_block 1014 } 1015 1016 /// Split the call instruction's block just after the call instruction to create 1017 /// the point where control-flow joins after the inlined callee "returns". 1018 /// 1019 /// Note that tail calls do not return to the caller and therefore do not have a 1020 /// control-flow join point. 1021 fn split_off_return_block( 1022 func: &mut ir::Function, 1023 call_inst: ir::Inst, 1024 opcode: ir::Opcode, 1025 callee: &ir::Function, 1026 ) -> Option<ir::Block> { 1027 // When the `call_inst` is not a block terminator, we need to split the 1028 // block. 1029 let return_block = func.layout.next_inst(call_inst).map(|next_inst| { 1030 let return_block = func.dfg.blocks.add(); 1031 func.layout.split_block(return_block, next_inst); 1032 1033 // Add block parameters for each return value and alias the call 1034 // instruction's results to them. 1035 let old_results = 1036 SmallValueVec::from_iter(func.dfg.inst_results(call_inst).iter().copied()); 1037 debug_assert_eq!(old_results.len(), callee.signature.returns.len()); 1038 func.dfg.detach_inst_results(call_inst); 1039 for (abi, old_val) in callee.signature.returns.iter().zip(old_results) { 1040 debug_assert_eq!(abi.value_type, func.dfg.value_type(old_val)); 1041 let ret_param = func.dfg.append_block_param(return_block, abi.value_type); 1042 func.dfg.change_to_alias(old_val, ret_param); 1043 } 1044 1045 return_block 1046 }); 1047 1048 // When the `call_inst` is a block terminator, then it is either a 1049 // `return_call` or a `try_call`: 1050 // 1051 // * For `return_call`s, we don't have a control-flow join point, because 1052 // the caller permanently transfers control to the callee. 1053 // 1054 // * For `try_call`s, we probably already have a block for the control-flow 1055 // join point, but it isn't guaranteed: the `try_call` might ignore the 1056 // call's returns and not forward them to the normal-return block or it 1057 // might also pass additional arguments. We can only reuse the existing 1058 // normal-return block when the `try_call` forwards exactly our callee's 1059 // returns to that block (and therefore that block's parameter types also 1060 // exactly match the callee's return types). Otherwise, we must create a new 1061 // return block that forwards to the existing normal-return 1062 // block. (Elsewhere, at the end of inlining, we will also update any inlined 1063 // calls to forward any raised exceptions to the caller's exception table, 1064 // as necessary.) 1065 // 1066 // Finally, note that reusing the normal-return's target block is just an 1067 // optimization to emit a simpler CFG when we can, and is not 1068 // fundamentally required for correctness. We could always insert a 1069 // temporary block as our control-flow join point that then forwards to 1070 // the normal-return's target block. However, at the time of writing, 1071 // Cranelift doesn't currently do any jump-threading or branch 1072 // simplification in the mid-end, and removing unnecessary blocks in this 1073 // way can help some subsequent mid-end optimizations. If, in the future, 1074 // we gain support for jump-threading optimizations in the mid-end, we can 1075 // come back and simplify the below code a bit to always generate the 1076 // temporary block, and then rely on the subsequent optimizations to clean 1077 // everything up. 1078 debug_assert_eq!( 1079 return_block.is_none(), 1080 opcode == ir::Opcode::ReturnCall || opcode == ir::Opcode::TryCall, 1081 ); 1082 return_block.or_else(|| match func.dfg.insts[call_inst] { 1083 ir::InstructionData::TryCall { 1084 opcode: ir::Opcode::TryCall, 1085 args: _, 1086 func_ref: _, 1087 exception, 1088 } => { 1089 let normal_return = func.dfg.exception_tables[exception].normal_return(); 1090 let normal_return_block = normal_return.block(&func.dfg.value_lists); 1091 1092 // Check to see if we can reuse the existing normal-return block. 1093 { 1094 let normal_return_args = normal_return.args(&func.dfg.value_lists); 1095 if normal_return_args.len() == callee.signature.returns.len() 1096 && normal_return_args.enumerate().all(|(i, arg)| { 1097 let i = u32::try_from(i).unwrap(); 1098 arg == ir::BlockArg::TryCallRet(i) 1099 }) 1100 { 1101 return Some(normal_return_block); 1102 } 1103 } 1104 1105 // Okay, we cannot reuse the normal-return block. Create a new block 1106 // that has the expected block parameter types and have it jump to 1107 // the normal-return block. 1108 let return_block = func.dfg.blocks.add(); 1109 func.layout.insert_block(return_block, normal_return_block); 1110 1111 let return_block_params = callee 1112 .signature 1113 .returns 1114 .iter() 1115 .map(|abi| func.dfg.append_block_param(return_block, abi.value_type)) 1116 .collect::<SmallValueVec>(); 1117 1118 let normal_return_args = func.dfg.exception_tables[exception] 1119 .normal_return() 1120 .args(&func.dfg.value_lists) 1121 .collect::<SmallBlockArgVec>(); 1122 let jump_args = normal_return_args 1123 .into_iter() 1124 .map(|arg| match arg { 1125 ir::BlockArg::Value(value) => ir::BlockArg::Value(value), 1126 ir::BlockArg::TryCallRet(i) => { 1127 let i = usize::try_from(i).unwrap(); 1128 ir::BlockArg::Value(return_block_params[i]) 1129 } 1130 ir::BlockArg::TryCallExn(_) => { 1131 unreachable!("normal-return edges cannot use exceptional results") 1132 } 1133 }) 1134 .collect::<SmallBlockArgVec>(); 1135 1136 let mut cursor = FuncCursor::new(func); 1137 cursor.goto_first_insertion_point(return_block); 1138 cursor.ins().jump(normal_return_block, &jump_args); 1139 1140 Some(return_block) 1141 } 1142 _ => None, 1143 }) 1144 } 1145 1146 /// Replace the caller's call instruction with a jump to the caller's inlined 1147 /// copy of the callee's entry block. 1148 /// 1149 /// Also associates the callee's parameters with the caller's arguments in our 1150 /// value map. 1151 /// 1152 /// Returns the caller's stack map entries, if any. 1153 fn replace_call_with_jump( 1154 allocs: &mut InliningAllocs, 1155 func: &mut ir::Function, 1156 call_inst: ir::Inst, 1157 callee: &ir::Function, 1158 entity_map: &EntityMap, 1159 ) -> Option<ir::UserStackMapEntryVec> { 1160 trace!("Replacing `call` with `jump`"); 1161 trace!( 1162 " --> call instruction: {call_inst:?}: {}", 1163 func.dfg.display_inst(call_inst) 1164 ); 1165 1166 let callee_entry_block = callee 1167 .layout 1168 .entry_block() 1169 .expect("callee function should have an entry block"); 1170 let callee_param_values = callee.dfg.block_params(callee_entry_block); 1171 let caller_arg_values = SmallValueVec::from_iter(func.dfg.inst_args(call_inst).iter().copied()); 1172 debug_assert_eq!(callee_param_values.len(), caller_arg_values.len()); 1173 debug_assert_eq!(callee_param_values.len(), callee.signature.params.len()); 1174 for (abi, (callee_param_value, caller_arg_value)) in callee 1175 .signature 1176 .params 1177 .iter() 1178 .zip(callee_param_values.into_iter().zip(caller_arg_values)) 1179 { 1180 debug_assert_eq!(abi.value_type, callee.dfg.value_type(*callee_param_value)); 1181 debug_assert_eq!(abi.value_type, func.dfg.value_type(caller_arg_value)); 1182 allocs.set_inlined_value(callee, *callee_param_value, caller_arg_value); 1183 } 1184 1185 // Replace the caller's call instruction with a jump to the caller's inlined 1186 // copy of the callee's entry block. 1187 // 1188 // Note that the call block dominates the inlined entry block (and also all 1189 // other inlined blocks) so we can reference the arguments directly, and do 1190 // not need to add block parameters to the inlined entry block. 1191 let inlined_entry_block = entity_map.inlined_block(callee_entry_block); 1192 func.dfg.replace(call_inst).jump(inlined_entry_block, &[]); 1193 trace!( 1194 " --> replaced with jump instruction: {call_inst:?}: {}", 1195 func.dfg.display_inst(call_inst) 1196 ); 1197 1198 let stack_map_entries = func.dfg.take_user_stack_map_entries(call_inst); 1199 stack_map_entries 1200 } 1201 1202 /// Keeps track of mapping callee entities to their associated inlined caller 1203 /// entities. 1204 #[derive(Default)] 1205 struct EntityMap { 1206 // Rather than doing an implicit, demand-based, DCE'ing translation of 1207 // entities, which would require maps from each callee entity to its 1208 // associated caller entity, we copy all entities into the caller, remember 1209 // each entity's initial offset, and then mapping from the callee to the 1210 // inlined caller entity is just adding that initial offset to the callee's 1211 // index. This should be both faster and simpler than the alternative. Most 1212 // of these sets are relatively small, and they rarely have too much dead 1213 // code in practice, so this is a good trade off. 1214 // 1215 // Note that there are a few kinds of entities that are excluded from the 1216 // `EntityMap`, and for which we do actually take the demand-based approach: 1217 // values and value lists being the notable ones. 1218 block_offset: Option<u32>, 1219 global_value_offset: Option<u32>, 1220 sig_ref_offset: Option<u32>, 1221 func_ref_offset: Option<u32>, 1222 stack_slot_offset: Option<u32>, 1223 dynamic_type_offset: Option<u32>, 1224 dynamic_stack_slot_offset: Option<u32>, 1225 immediate_offset: Option<u32>, 1226 } 1227 1228 impl EntityMap { 1229 fn inlined_block(&self, callee_block: ir::Block) -> ir::Block { 1230 let offset = self 1231 .block_offset 1232 .expect("must create inlined `ir::Block`s before calling `EntityMap::inlined_block`"); 1233 ir::Block::from_u32(offset + callee_block.as_u32()) 1234 } 1235 1236 fn iter_inlined_blocks(&self, func: &ir::Function) -> impl Iterator<Item = ir::Block> + use<> { 1237 let start = self.block_offset.expect( 1238 "must create inlined `ir::Block`s before calling `EntityMap::iter_inlined_blocks`", 1239 ); 1240 1241 let end = func.dfg.blocks.len(); 1242 let end = u32::try_from(end).unwrap(); 1243 1244 (start..end).map(|i| ir::Block::from_u32(i)) 1245 } 1246 1247 fn inlined_global_value(&self, callee_global_value: ir::GlobalValue) -> ir::GlobalValue { 1248 let offset = self 1249 .global_value_offset 1250 .expect("must create inlined `ir::GlobalValue`s before calling `EntityMap::inlined_global_value`"); 1251 ir::GlobalValue::from_u32(offset + callee_global_value.as_u32()) 1252 } 1253 1254 fn inlined_sig_ref(&self, callee_sig_ref: ir::SigRef) -> ir::SigRef { 1255 let offset = self.sig_ref_offset.expect( 1256 "must create inlined `ir::SigRef`s before calling `EntityMap::inlined_sig_ref`", 1257 ); 1258 ir::SigRef::from_u32(offset + callee_sig_ref.as_u32()) 1259 } 1260 1261 fn inlined_func_ref(&self, callee_func_ref: ir::FuncRef) -> ir::FuncRef { 1262 let offset = self.func_ref_offset.expect( 1263 "must create inlined `ir::FuncRef`s before calling `EntityMap::inlined_func_ref`", 1264 ); 1265 ir::FuncRef::from_u32(offset + callee_func_ref.as_u32()) 1266 } 1267 1268 fn inlined_stack_slot(&self, callee_stack_slot: ir::StackSlot) -> ir::StackSlot { 1269 let offset = self.stack_slot_offset.expect( 1270 "must create inlined `ir::StackSlot`s before calling `EntityMap::inlined_stack_slot`", 1271 ); 1272 ir::StackSlot::from_u32(offset + callee_stack_slot.as_u32()) 1273 } 1274 1275 fn inlined_dynamic_type(&self, callee_dynamic_type: ir::DynamicType) -> ir::DynamicType { 1276 let offset = self.dynamic_type_offset.expect( 1277 "must create inlined `ir::DynamicType`s before calling `EntityMap::inlined_dynamic_type`", 1278 ); 1279 ir::DynamicType::from_u32(offset + callee_dynamic_type.as_u32()) 1280 } 1281 1282 fn inlined_dynamic_stack_slot( 1283 &self, 1284 callee_dynamic_stack_slot: ir::DynamicStackSlot, 1285 ) -> ir::DynamicStackSlot { 1286 let offset = self.dynamic_stack_slot_offset.expect( 1287 "must create inlined `ir::DynamicStackSlot`s before calling `EntityMap::inlined_dynamic_stack_slot`", 1288 ); 1289 ir::DynamicStackSlot::from_u32(offset + callee_dynamic_stack_slot.as_u32()) 1290 } 1291 1292 fn inlined_immediate(&self, callee_immediate: ir::Immediate) -> ir::Immediate { 1293 let offset = self.immediate_offset.expect( 1294 "must create inlined `ir::Immediate`s before calling `EntityMap::inlined_immediate`", 1295 ); 1296 ir::Immediate::from_u32(offset + callee_immediate.as_u32()) 1297 } 1298 } 1299 1300 /// Translate all of the callee's various entities into the caller, producing an 1301 /// `EntityMap` that can be used to translate callee entity references into 1302 /// inlined caller entity references. 1303 fn create_entities( 1304 allocs: &mut InliningAllocs, 1305 func: &mut ir::Function, 1306 callee: &ir::Function, 1307 ) -> EntityMap { 1308 let mut entity_map = EntityMap::default(); 1309 1310 entity_map.block_offset = Some(create_blocks(allocs, func, callee)); 1311 entity_map.global_value_offset = Some(create_global_values(func, callee)); 1312 entity_map.sig_ref_offset = Some(create_sig_refs(func, callee)); 1313 create_user_external_name_refs(allocs, func, callee); 1314 entity_map.func_ref_offset = Some(create_func_refs(allocs, func, callee, &entity_map)); 1315 entity_map.stack_slot_offset = Some(create_stack_slots(func, callee)); 1316 entity_map.dynamic_type_offset = Some(create_dynamic_types(func, callee, &entity_map)); 1317 entity_map.dynamic_stack_slot_offset = 1318 Some(create_dynamic_stack_slots(func, callee, &entity_map)); 1319 entity_map.immediate_offset = Some(create_immediates(func, callee)); 1320 1321 // `ir::ConstantData` is deduplicated, so we cannot use our offset scheme 1322 // for `ir::Constant`s. Nonetheless, we still insert them into the caller 1323 // now, at the same time as the rest of our entities. 1324 create_constants(allocs, func, callee); 1325 1326 entity_map 1327 } 1328 1329 /// Create inlined blocks in the caller for every block in the callee. 1330 fn create_blocks( 1331 allocs: &mut InliningAllocs, 1332 func: &mut ir::Function, 1333 callee: &ir::Function, 1334 ) -> u32 { 1335 let offset = func.dfg.blocks.len(); 1336 let offset = u32::try_from(offset).unwrap(); 1337 1338 func.dfg.blocks.reserve(callee.dfg.blocks.len()); 1339 for callee_block in callee.dfg.blocks.iter() { 1340 let caller_block = func.dfg.blocks.add(); 1341 trace!("Callee {callee_block:?} = inlined {caller_block:?}"); 1342 1343 if callee.layout.is_cold(callee_block) { 1344 func.layout.set_cold(caller_block); 1345 } 1346 1347 // Note: the entry block does not need parameters because the only 1348 // predecessor is the call block and we associate the callee's 1349 // parameters with the caller's arguments directly. 1350 if callee.layout.entry_block() != Some(callee_block) { 1351 for callee_param in callee.dfg.blocks[callee_block].params(&callee.dfg.value_lists) { 1352 let ty = callee.dfg.value_type(*callee_param); 1353 let caller_param = func.dfg.append_block_param(caller_block, ty); 1354 1355 allocs.set_inlined_value(callee, *callee_param, caller_param); 1356 } 1357 } 1358 } 1359 1360 offset 1361 } 1362 1363 /// Copy and translate global values from the callee into the caller. 1364 fn create_global_values(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1365 let gv_offset = func.global_values.len(); 1366 let gv_offset = u32::try_from(gv_offset).unwrap(); 1367 1368 func.global_values.reserve(callee.global_values.len()); 1369 for gv in callee.global_values.values() { 1370 func.global_values.push(match gv { 1371 // These kinds of global values reference other global values, so we 1372 // need to fixup that reference. 1373 ir::GlobalValueData::Load { 1374 base, 1375 offset, 1376 global_type, 1377 flags, 1378 } => ir::GlobalValueData::Load { 1379 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset), 1380 offset: *offset, 1381 global_type: *global_type, 1382 flags: *flags, 1383 }, 1384 ir::GlobalValueData::IAddImm { 1385 base, 1386 offset, 1387 global_type, 1388 } => ir::GlobalValueData::IAddImm { 1389 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset), 1390 offset: *offset, 1391 global_type: *global_type, 1392 }, 1393 1394 // These kinds of global values do not reference other global 1395 // values, so we can just clone them. 1396 ir::GlobalValueData::VMContext 1397 | ir::GlobalValueData::Symbol { .. } 1398 | ir::GlobalValueData::DynScaleTargetConst { .. } => gv.clone(), 1399 }); 1400 } 1401 1402 gv_offset 1403 } 1404 1405 /// Copy `ir::SigRef`s from the callee into the caller. 1406 fn create_sig_refs(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1407 let offset = func.dfg.signatures.len(); 1408 let offset = u32::try_from(offset).unwrap(); 1409 1410 func.dfg.signatures.reserve(callee.dfg.signatures.len()); 1411 for sig in callee.dfg.signatures.values() { 1412 func.dfg.signatures.push(sig.clone()); 1413 } 1414 1415 offset 1416 } 1417 1418 fn create_user_external_name_refs( 1419 allocs: &mut InliningAllocs, 1420 func: &mut ir::Function, 1421 callee: &ir::Function, 1422 ) { 1423 for (callee_named_func_ref, name) in callee.params.user_named_funcs().iter() { 1424 let caller_named_func_ref = func.declare_imported_user_function(name.clone()); 1425 allocs.user_external_name_refs[callee_named_func_ref] = Some(caller_named_func_ref).into(); 1426 } 1427 } 1428 1429 /// Translate `ir::FuncRef`s from the callee into the caller. 1430 fn create_func_refs( 1431 allocs: &InliningAllocs, 1432 func: &mut ir::Function, 1433 callee: &ir::Function, 1434 entity_map: &EntityMap, 1435 ) -> u32 { 1436 let offset = func.dfg.ext_funcs.len(); 1437 let offset = u32::try_from(offset).unwrap(); 1438 1439 func.dfg.ext_funcs.reserve(callee.dfg.ext_funcs.len()); 1440 for ir::ExtFuncData { 1441 name, 1442 signature, 1443 colocated, 1444 } in callee.dfg.ext_funcs.values() 1445 { 1446 func.dfg.ext_funcs.push(ir::ExtFuncData { 1447 name: match name { 1448 ir::ExternalName::User(name_ref) => { 1449 ir::ExternalName::User(allocs.user_external_name_refs[*name_ref].expect( 1450 "should have translated all `ir::UserExternalNameRef`s before translating \ 1451 `ir::FuncRef`s", 1452 )) 1453 } 1454 ir::ExternalName::TestCase(_) 1455 | ir::ExternalName::LibCall(_) 1456 | ir::ExternalName::KnownSymbol(_) => name.clone(), 1457 }, 1458 signature: entity_map.inlined_sig_ref(*signature), 1459 colocated: *colocated, 1460 }); 1461 } 1462 1463 offset 1464 } 1465 1466 /// Copy stack slots from the callee into the caller. 1467 fn create_stack_slots(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1468 let offset = func.sized_stack_slots.len(); 1469 let offset = u32::try_from(offset).unwrap(); 1470 1471 func.sized_stack_slots 1472 .reserve(callee.sized_stack_slots.len()); 1473 for slot in callee.sized_stack_slots.values() { 1474 func.sized_stack_slots.push(slot.clone()); 1475 } 1476 1477 offset 1478 } 1479 1480 /// Copy dynamic types from the callee into the caller. 1481 fn create_dynamic_types( 1482 func: &mut ir::Function, 1483 callee: &ir::Function, 1484 entity_map: &EntityMap, 1485 ) -> u32 { 1486 let offset = func.dynamic_stack_slots.len(); 1487 let offset = u32::try_from(offset).unwrap(); 1488 1489 func.dfg 1490 .dynamic_types 1491 .reserve(callee.dfg.dynamic_types.len()); 1492 for ir::DynamicTypeData { 1493 base_vector_ty, 1494 dynamic_scale, 1495 } in callee.dfg.dynamic_types.values() 1496 { 1497 func.dfg.dynamic_types.push(ir::DynamicTypeData { 1498 base_vector_ty: *base_vector_ty, 1499 dynamic_scale: entity_map.inlined_global_value(*dynamic_scale), 1500 }); 1501 } 1502 1503 offset 1504 } 1505 1506 /// Copy dynamic stack slots from the callee into the caller. 1507 fn create_dynamic_stack_slots( 1508 func: &mut ir::Function, 1509 callee: &ir::Function, 1510 entity_map: &EntityMap, 1511 ) -> u32 { 1512 let offset = func.dynamic_stack_slots.len(); 1513 let offset = u32::try_from(offset).unwrap(); 1514 1515 func.dynamic_stack_slots 1516 .reserve(callee.dynamic_stack_slots.len()); 1517 for ir::DynamicStackSlotData { kind, dyn_ty } in callee.dynamic_stack_slots.values() { 1518 func.dynamic_stack_slots.push(ir::DynamicStackSlotData { 1519 kind: *kind, 1520 dyn_ty: entity_map.inlined_dynamic_type(*dyn_ty), 1521 }); 1522 } 1523 1524 offset 1525 } 1526 1527 /// Copy immediates from the callee into the caller. 1528 fn create_immediates(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1529 let offset = func.dfg.immediates.len(); 1530 let offset = u32::try_from(offset).unwrap(); 1531 1532 func.dfg.immediates.reserve(callee.dfg.immediates.len()); 1533 for imm in callee.dfg.immediates.values() { 1534 func.dfg.immediates.push(imm.clone()); 1535 } 1536 1537 offset 1538 } 1539 1540 /// Copy constants from the callee into the caller. 1541 fn create_constants(allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function) { 1542 for (callee_constant, data) in callee.dfg.constants.iter() { 1543 let inlined_constant = func.dfg.constants.insert(data.clone()); 1544 allocs.constants[*callee_constant] = Some(inlined_constant).into(); 1545 } 1546 } 1547