1 //! Function inlining infrastructure. 2 //! 3 //! This module provides "inlining as a library" to Cranelift users; it does 4 //! _not_ provide a complete, off-the-shelf inlining solution. Cranelift's 5 //! compilation context is per-function and does not encompass the full call 6 //! graph. It does not know which functions are hot and which are cold, which 7 //! have been marked the equivalent of `#[inline(never)]`, etc... Only the 8 //! Cranelift user can understand these aspects of the full compilation 9 //! pipeline, and these things can be very different between (say) Wasmtime and 10 //! `cg_clif`. Therefore, this module does not attempt to define hueristics for 11 //! when inlining a particular call is likely beneficial. This module only 12 //! provides hooks for the Cranelift user to define whether a given call should 13 //! be inlined or not, and the mechanics to inline a callee into a particular 14 //! call site when directed to do so by the Cranelift user. 15 //! 16 //! The top-level inlining entry point during Cranelift compilation is 17 //! [`Context::inline`][crate::Context::inline]. It takes an [`Inline`] trait 18 //! implementation, which is authored by the Cranelift user and directs 19 //! Cranelift whether to inline a particular call, and, when inlining, gives 20 //! Cranelift the body of the callee that is to be inlined. 21 22 use crate::cursor::{Cursor as _, FuncCursor}; 23 use crate::ir::{self, DebugTag, ExceptionTableData, ExceptionTableItem, InstBuilder as _}; 24 use crate::result::CodegenResult; 25 use crate::trace; 26 use crate::traversals::Dfs; 27 use alloc::borrow::Cow; 28 use alloc::vec::Vec; 29 use cranelift_entity::{SecondaryMap, packed_option::PackedOption}; 30 use smallvec::SmallVec; 31 32 type SmallValueVec = SmallVec<[ir::Value; 8]>; 33 type SmallBlockArgVec = SmallVec<[ir::BlockArg; 8]>; 34 type SmallBlockCallVec = SmallVec<[ir::BlockCall; 8]>; 35 36 /// A command directing Cranelift whether or not to inline a particular call. 37 pub enum InlineCommand<'a> { 38 /// Keep the call as-is, out-of-line, and do not inline the callee. 39 KeepCall, 40 41 /// Inline the call, using this function as the body of the callee. 42 /// 43 /// It is the `Inline` implementor's responsibility to ensure that this 44 /// function is the correct callee. Providing the wrong function may result 45 /// in panics during compilation or incorrect runtime behavior. 46 Inline { 47 /// The callee function's body. 48 callee: Cow<'a, ir::Function>, 49 /// Whether to visit any function calls within the callee body after 50 /// inlining and consider them for further inlining. 51 visit_callee: bool, 52 }, 53 } 54 55 /// A trait for directing Cranelift whether to inline a particular call or not. 56 /// 57 /// Used in combination with the [`Context::inline`][crate::Context::inline] 58 /// method. 59 pub trait Inline { 60 /// A hook invoked for each direct call instruction in a function, whose 61 /// result determines whether Cranelift should inline a given call. 62 /// 63 /// The Cranelift user is responsible for defining their own hueristics and 64 /// deciding whether inlining the call is beneficial. 65 /// 66 /// When returning a function and directing Cranelift to inline its body 67 /// into the call site, the `Inline` implementer must ensure the following: 68 /// 69 /// * The returned function's signature exactly matches the `callee` 70 /// `FuncRef`'s signature. 71 /// 72 /// * The returned function must be legalized. 73 /// 74 /// * The returned function must be valid (i.e. it must pass the CLIF 75 /// verifier). 76 /// 77 /// * The returned function is a correct and valid implementation of the 78 /// `callee` according to your language's semantics. 79 /// 80 /// Failure to uphold these invariants may result in panics during 81 /// compilation or incorrect runtime behavior in the generated code. 82 fn inline( 83 &mut self, 84 caller: &ir::Function, 85 call_inst: ir::Inst, 86 call_opcode: ir::Opcode, 87 callee: ir::FuncRef, 88 call_args: &[ir::Value], 89 ) -> InlineCommand<'_>; 90 } 91 92 impl<'a, T> Inline for &'a mut T 93 where 94 T: Inline, 95 { 96 fn inline( 97 &mut self, 98 caller: &ir::Function, 99 inst: ir::Inst, 100 opcode: ir::Opcode, 101 callee: ir::FuncRef, 102 args: &[ir::Value], 103 ) -> InlineCommand<'_> { 104 (*self).inline(caller, inst, opcode, callee, args) 105 } 106 } 107 108 /// Walk the given function, invoke the `Inline` implementation for each call 109 /// instruction, and inline the callee when directed to do so. 110 /// 111 /// Returns whether any call was inlined. 112 pub(crate) fn do_inlining( 113 func: &mut ir::Function, 114 mut inliner: impl Inline, 115 ) -> CodegenResult<bool> { 116 trace!("function {} before inlining: {}", func.name, func); 117 118 let mut inlined_any = false; 119 let mut allocs = InliningAllocs::default(); 120 121 let mut cursor = FuncCursor::new(func); 122 'block_loop: while let Some(block) = cursor.next_block() { 123 // Always keep track of our previous cursor position. Assuming that the 124 // current position is a function call that we will inline, then the 125 // previous position is just before the inlined callee function. After 126 // inlining a call, the Cranelift user can decide whether to consider 127 // any function calls in the inlined callee for further inlining or 128 // not. When they do, then we back up to this previous cursor position 129 // so that our traversal will then continue over the inlined body. 130 let mut prev_pos; 131 132 while let Some(inst) = { 133 prev_pos = cursor.position(); 134 cursor.next_inst() 135 } { 136 // Make sure that `block` is always `inst`'s block, even with all of 137 // our cursor-position-updating and block-splitting-during-inlining 138 // shenanigans below. 139 debug_assert_eq!(Some(block), cursor.func.layout.inst_block(inst)); 140 141 match cursor.func.dfg.insts[inst] { 142 ir::InstructionData::Call { 143 opcode: opcode @ ir::Opcode::Call | opcode @ ir::Opcode::ReturnCall, 144 args: _, 145 func_ref, 146 } => { 147 trace!( 148 "considering call site for inlining: {inst}: {}", 149 cursor.func.dfg.display_inst(inst), 150 ); 151 let args = cursor.func.dfg.inst_args(inst); 152 match inliner.inline(&cursor.func, inst, opcode, func_ref, args) { 153 InlineCommand::KeepCall => { 154 trace!(" --> keeping call"); 155 } 156 InlineCommand::Inline { 157 callee, 158 visit_callee, 159 } => { 160 let last_inlined_block = inline_one( 161 &mut allocs, 162 cursor.func, 163 func_ref, 164 block, 165 inst, 166 opcode, 167 &callee, 168 None, 169 ); 170 inlined_any = true; 171 if visit_callee { 172 cursor.set_position(prev_pos); 173 } else { 174 // Arrange it so that the `next_block()` loop 175 // will continue to the next block that is not 176 // associated with the just-inlined callee. 177 cursor.goto_bottom(last_inlined_block); 178 continue 'block_loop; 179 } 180 } 181 } 182 } 183 ir::InstructionData::TryCall { 184 opcode: opcode @ ir::Opcode::TryCall, 185 args: _, 186 func_ref, 187 exception, 188 } => { 189 trace!( 190 "considering call site for inlining: {inst}: {}", 191 cursor.func.dfg.display_inst(inst), 192 ); 193 let args = cursor.func.dfg.inst_args(inst); 194 match inliner.inline(&cursor.func, inst, opcode, func_ref, args) { 195 InlineCommand::KeepCall => { 196 trace!(" --> keeping call"); 197 } 198 InlineCommand::Inline { 199 callee, 200 visit_callee, 201 } => { 202 let last_inlined_block = inline_one( 203 &mut allocs, 204 cursor.func, 205 func_ref, 206 block, 207 inst, 208 opcode, 209 &callee, 210 Some(exception), 211 ); 212 inlined_any = true; 213 if visit_callee { 214 cursor.set_position(prev_pos); 215 } else { 216 // Arrange it so that the `next_block()` loop 217 // will continue to the next block that is not 218 // associated with the just-inlined callee. 219 cursor.goto_bottom(last_inlined_block); 220 continue 'block_loop; 221 } 222 } 223 } 224 } 225 ir::InstructionData::CallIndirect { .. } 226 | ir::InstructionData::TryCallIndirect { .. } => { 227 // Can't inline indirect calls; need to have some earlier 228 // pass rewrite them into direct calls first, when possible. 229 } 230 _ => { 231 debug_assert!( 232 !cursor.func.dfg.insts[inst].opcode().is_call(), 233 "should have matched all call instructions, but found: {inst}: {}", 234 cursor.func.dfg.display_inst(inst), 235 ); 236 } 237 } 238 } 239 } 240 241 if inlined_any { 242 trace!("function {} after inlining: {}", func.name, func); 243 } else { 244 trace!("function {} did not have any callees inlined", func.name); 245 } 246 247 Ok(inlined_any) 248 } 249 250 #[derive(Default)] 251 struct InliningAllocs { 252 /// Map from callee value to inlined caller value. 253 values: SecondaryMap<ir::Value, PackedOption<ir::Value>>, 254 255 /// Map from callee constant to inlined caller constant. 256 /// 257 /// Not in `EntityMap` because these are hash-consed inside the 258 /// `ir::Function`. 259 constants: SecondaryMap<ir::Constant, PackedOption<ir::Constant>>, 260 261 /// Map from callee to inlined caller external name refs. 262 /// 263 /// Not in `EntityMap` because these are hash-consed inside the 264 /// `ir::Function`. 265 user_external_name_refs: 266 SecondaryMap<ir::UserExternalNameRef, PackedOption<ir::UserExternalNameRef>>, 267 268 /// The set of _caller_ inlined call instructions that need exception table 269 /// fixups at the end of inlining. 270 /// 271 /// This includes all kinds of non-returning calls, not just the literal 272 /// `call` instruction: `call_indirect`, `try_call`, `try_call_indirect`, 273 /// etc... However, it does not include `return_call` and 274 /// `return_call_indirect` instructions because the caller cannot catch 275 /// exceptions that those calls throw because the caller is no longer on the 276 /// stack as soon as they are executed. 277 /// 278 /// Note: this is a simple `Vec`, and not an `EntitySet`, because it is very 279 /// sparse: most of the caller's instructions are not inlined call 280 /// instructions. Additionally, we require deterministic iteration order and 281 /// do not require set-membership testing, so a hash set is not a good 282 /// choice either. 283 calls_needing_exception_table_fixup: Vec<ir::Inst>, 284 } 285 286 impl InliningAllocs { 287 fn reset(&mut self, callee: &ir::Function) { 288 let InliningAllocs { 289 values, 290 constants, 291 user_external_name_refs, 292 calls_needing_exception_table_fixup, 293 } = self; 294 295 values.clear(); 296 values.resize(callee.dfg.len_values()); 297 298 constants.clear(); 299 constants.resize(callee.dfg.constants.len()); 300 301 user_external_name_refs.clear(); 302 user_external_name_refs.resize(callee.params.user_named_funcs().len()); 303 304 // Note: We do not reserve capacity for 305 // `calls_needing_exception_table_fixup` because it is a sparse set and 306 // we don't know how large it needs to be ahead of time. 307 calls_needing_exception_table_fixup.clear(); 308 } 309 310 fn set_inlined_value( 311 &mut self, 312 callee: &ir::Function, 313 callee_val: ir::Value, 314 inlined_val: ir::Value, 315 ) { 316 trace!(" --> callee {callee_val:?} = inlined {inlined_val:?}"); 317 debug_assert!(self.values[callee_val].is_none()); 318 let resolved_callee_val = callee.dfg.resolve_aliases(callee_val); 319 debug_assert!(self.values[resolved_callee_val].is_none()); 320 self.values[resolved_callee_val] = Some(inlined_val).into(); 321 } 322 323 fn get_inlined_value(&self, callee: &ir::Function, callee_val: ir::Value) -> Option<ir::Value> { 324 let resolved_callee_val = callee.dfg.resolve_aliases(callee_val); 325 self.values[resolved_callee_val].expand() 326 } 327 } 328 329 /// Inline one particular function call. 330 /// 331 /// Returns the last inlined block in the layout. 332 fn inline_one( 333 allocs: &mut InliningAllocs, 334 func: &mut ir::Function, 335 callee_func_ref: ir::FuncRef, 336 call_block: ir::Block, 337 call_inst: ir::Inst, 338 call_opcode: ir::Opcode, 339 callee: &ir::Function, 340 call_exception_table: Option<ir::ExceptionTable>, 341 ) -> ir::Block { 342 trace!( 343 "Inlining call {call_inst:?}: {}\n\ 344 with callee = {callee:?}", 345 func.dfg.display_inst(call_inst) 346 ); 347 348 // Type check callee signature. 349 let expected_callee_sig = func.dfg.ext_funcs[callee_func_ref].signature; 350 let expected_callee_sig = &func.dfg.signatures[expected_callee_sig]; 351 assert_eq!(expected_callee_sig, &callee.signature); 352 353 allocs.reset(callee); 354 355 // First, append various callee entity arenas to the end of the caller's 356 // entity arenas. 357 let entity_map = create_entities(allocs, func, callee); 358 359 // Inlined prologue: split the call instruction's block at the point of the 360 // call and replace the call with a jump. 361 let return_block = split_off_return_block(func, call_inst, call_opcode, callee); 362 let call_stack_map = replace_call_with_jump(allocs, func, call_inst, callee, &entity_map); 363 364 // Prepare for translating the actual instructions by inserting the inlined 365 // blocks into the caller's layout in the same order that they appear in the 366 // callee. 367 let mut last_inlined_block = inline_block_layout(func, call_block, callee, &entity_map); 368 369 // Get a copy of debug tags on the call instruction; these are 370 // prepended to debug tags on inlined instructions. Remove them 371 // from the call itself as it will be rewritten to a jump (which 372 // cannot have tags). 373 let call_debug_tags = func.debug_tags.get(call_inst).to_vec(); 374 func.debug_tags.set(call_inst, []); 375 376 // Translate each instruction from the callee into the caller, 377 // appending them to their associated block in the caller. 378 // 379 // Note that we iterate over the callee with a pre-order traversal so that 380 // we see value defs before uses. 381 for callee_block in Dfs::new().pre_order_iter(callee) { 382 let inlined_block = entity_map.inlined_block(callee_block); 383 trace!( 384 "Processing instructions in callee block {callee_block:?} (inlined block {inlined_block:?}" 385 ); 386 387 let mut next_callee_inst = callee.layout.first_inst(callee_block); 388 while let Some(callee_inst) = next_callee_inst { 389 trace!( 390 "Processing callee instruction {callee_inst:?}: {}", 391 callee.dfg.display_inst(callee_inst) 392 ); 393 394 assert_ne!( 395 callee.dfg.insts[callee_inst].opcode(), 396 ir::Opcode::GlobalValue, 397 "callee must already be legalized, we shouldn't see any `global_value` \ 398 instructions when inlining; found {callee_inst:?}: {}", 399 callee.dfg.display_inst(callee_inst) 400 ); 401 402 // Remap the callee instruction's entities and insert it into the 403 // caller's DFG. 404 let inlined_inst_data = callee.dfg.insts[callee_inst].map(InliningInstRemapper { 405 allocs: &allocs, 406 func, 407 callee, 408 entity_map: &entity_map, 409 }); 410 let inlined_inst = func.dfg.make_inst(inlined_inst_data); 411 func.layout.append_inst(inlined_inst, inlined_block); 412 413 // Copy over debug tags, translating referenced entities 414 // as appropriate. 415 let debug_tags = callee.debug_tags.get(callee_inst); 416 // If there are tags on the inlined instruction, we always 417 // add tags, and we prepend any tags from the call 418 // instruction; but we don't add tags if only the callsite 419 // had them (this would otherwise mean that every single 420 // instruction in an inlined function body would get 421 // tags). 422 if !debug_tags.is_empty() { 423 let tags = call_debug_tags 424 .iter() 425 .cloned() 426 .chain(debug_tags.iter().map(|tag| match *tag { 427 DebugTag::User(value) => DebugTag::User(value), 428 DebugTag::StackSlot(slot) => { 429 DebugTag::StackSlot(entity_map.inlined_stack_slot(slot)) 430 } 431 })) 432 .collect::<SmallVec<[_; 4]>>(); 433 func.debug_tags.set(inlined_inst, tags); 434 } 435 436 let opcode = callee.dfg.insts[callee_inst].opcode(); 437 if opcode.is_return() { 438 // Instructions that return do not define any values, so we 439 // don't need to worry about that, but we do need to fix them up 440 // so that they return by jumping to our control-flow join 441 // block, rather than returning from the caller. 442 if let Some(return_block) = return_block { 443 fixup_inst_that_returns( 444 allocs, 445 func, 446 callee, 447 &entity_map, 448 call_opcode, 449 inlined_inst, 450 callee_inst, 451 return_block, 452 call_stack_map.as_ref().map(|es| &**es), 453 ); 454 } else { 455 // If we are inlining a callee that was invoked via 456 // `return_call`, we leave inlined return instructions 457 // as-is: there is no logical caller frame on the stack to 458 // continue to. 459 debug_assert_eq!(call_opcode, ir::Opcode::ReturnCall); 460 } 461 } else { 462 // Make the instruction's result values. 463 let ctrl_typevar = callee.dfg.ctrl_typevar(callee_inst); 464 func.dfg.make_inst_results(inlined_inst, ctrl_typevar); 465 466 // Update the value map for this instruction's defs. 467 let callee_results = callee.dfg.inst_results(callee_inst); 468 let inlined_results = func.dfg.inst_results(inlined_inst); 469 debug_assert_eq!(callee_results.len(), inlined_results.len()); 470 for (callee_val, inlined_val) in callee_results.iter().zip(inlined_results) { 471 allocs.set_inlined_value(callee, *callee_val, *inlined_val); 472 } 473 474 if opcode.is_call() { 475 append_stack_map_entries( 476 func, 477 callee, 478 &entity_map, 479 call_stack_map.as_deref(), 480 inlined_inst, 481 callee_inst, 482 ); 483 484 // When we are inlining a `try_call` call site, we need to merge 485 // the call site's exception table into the inlined calls' 486 // exception tables. This can involve rewriting regular `call`s 487 // into `try_call`s, which requires mutating the CFG because 488 // `try_call` is a block terminator. However, we can't mutate 489 // the CFG in the middle of this traversal because we rely on 490 // the existence of a one-to-one mapping between the callee 491 // layout and the inlined layout. Instead, we record the set of 492 // inlined call instructions that will need fixing up, and 493 // perform that possibly-CFG-mutating exception table merging in 494 // a follow up pass, when we no longer rely on that one-to-one 495 // layout mapping. 496 debug_assert_eq!( 497 call_opcode == ir::Opcode::TryCall, 498 call_exception_table.is_some() 499 ); 500 if call_opcode == ir::Opcode::TryCall { 501 allocs 502 .calls_needing_exception_table_fixup 503 .push(inlined_inst); 504 } 505 } 506 } 507 508 trace!( 509 " --> inserted inlined instruction {inlined_inst:?}: {}", 510 func.dfg.display_inst(inlined_inst) 511 ); 512 513 next_callee_inst = callee.layout.next_inst(callee_inst); 514 } 515 } 516 517 // We copied *all* callee blocks into the caller's layout, but only copied 518 // the callee instructions in *reachable* callee blocks into the caller's 519 // associated blocks. Therefore, any *unreachable* blocks are empty in the 520 // caller, which is invalid CLIF because all blocks must end in a 521 // terminator, so do a quick pass over the inlined blocks and remove any 522 // empty blocks from the caller's layout. 523 for block in entity_map.iter_inlined_blocks(func) { 524 if func.layout.is_block_inserted(block) && func.layout.first_inst(block).is_none() { 525 log::trace!("removing unreachable inlined block from layout: {block}"); 526 527 // If the block being removed is our last-inlined block, then back 528 // it up to the previous block in the layout, which will be the new 529 // last-inlined block after this one's removal. 530 if block == last_inlined_block { 531 last_inlined_block = func.layout.prev_block(last_inlined_block).expect( 532 "there will always at least be the block that contained the call we are \ 533 inlining", 534 ); 535 } 536 537 func.layout.remove_block(block); 538 } 539 } 540 541 // Final step: fixup the exception tables of any inlined calls when we are 542 // inlining a `try_call` site. 543 // 544 // Subtly, this requires rewriting non-catching `call[_indirect]` 545 // instructions into `try_call[_indirect]` instructions so that exceptions 546 // that unwound through the original callee frame and were caught by the 547 // caller's `try_call` do not unwind past this inlined frame. And turning a 548 // `call` into a `try_call` mutates the CFG, breaking our one-to-one mapping 549 // between callee blocks and inlined blocks, so we delay these fixups to 550 // this final step, when we no longer rely on that mapping. 551 debug_assert!( 552 allocs.calls_needing_exception_table_fixup.is_empty() || call_exception_table.is_some() 553 ); 554 debug_assert_eq!( 555 call_opcode == ir::Opcode::TryCall, 556 call_exception_table.is_some() 557 ); 558 if let Some(call_exception_table) = call_exception_table { 559 fixup_inlined_call_exception_tables(allocs, func, call_exception_table); 560 } 561 562 debug_assert!( 563 func.layout.is_block_inserted(last_inlined_block), 564 "last_inlined_block={last_inlined_block} should be inserted in the layout" 565 ); 566 last_inlined_block 567 } 568 569 /// Append stack map entries from the caller and callee to the given inlined 570 /// instruction. 571 fn append_stack_map_entries( 572 func: &mut ir::Function, 573 callee: &ir::Function, 574 entity_map: &EntityMap, 575 call_stack_map: Option<&[ir::UserStackMapEntry]>, 576 inlined_inst: ir::Inst, 577 callee_inst: ir::Inst, 578 ) { 579 // Add the caller's stack map to this call. These entries 580 // already refer to caller entities and do not need further 581 // translation. 582 func.dfg.append_user_stack_map_entries( 583 inlined_inst, 584 call_stack_map 585 .iter() 586 .flat_map(|entries| entries.iter().cloned()), 587 ); 588 589 // Append the callee's stack map to this call. These entries 590 // refer to callee entities and therefore do require 591 // translation into the caller's index space. 592 func.dfg.append_user_stack_map_entries( 593 inlined_inst, 594 callee 595 .dfg 596 .user_stack_map_entries(callee_inst) 597 .iter() 598 .flat_map(|entries| entries.iter()) 599 .map(|entry| ir::UserStackMapEntry { 600 ty: entry.ty, 601 slot: entity_map.inlined_stack_slot(entry.slot), 602 offset: entry.offset, 603 }), 604 ); 605 } 606 607 /// Create or update the exception tables for any inlined call instructions: 608 /// when inlining at a `try_call` site, we must forward our exceptional edges 609 /// into each inlined call instruction. 610 fn fixup_inlined_call_exception_tables( 611 allocs: &mut InliningAllocs, 612 func: &mut ir::Function, 613 call_exception_table: ir::ExceptionTable, 614 ) { 615 // Split a block at a `call[_indirect]` instruction, detach the 616 // instruction's results, and alias them to the new block's parameters. 617 let split_block_for_new_try_call = |func: &mut ir::Function, inst: ir::Inst| -> ir::Block { 618 debug_assert!(func.dfg.insts[inst].opcode().is_call()); 619 debug_assert!(!func.dfg.insts[inst].opcode().is_terminator()); 620 621 // Split the block. 622 let next_inst = func 623 .layout 624 .next_inst(inst) 625 .expect("inst is not a terminator, should have a successor"); 626 let new_block = func.dfg.blocks.add(); 627 func.layout.split_block(new_block, next_inst); 628 629 // `try_call[_indirect]` instructions do not define values themselves; 630 // the normal-return block has parameters for the results. So remove 631 // this instruction's results, create an associated block parameter for 632 // each of them, and alias them to the new block parameter. 633 let old_results = SmallValueVec::from_iter(func.dfg.inst_results(inst).iter().copied()); 634 func.dfg.detach_inst_results(inst); 635 for old_result in old_results { 636 let ty = func.dfg.value_type(old_result); 637 let new_block_param = func.dfg.append_block_param(new_block, ty); 638 func.dfg.change_to_alias(old_result, new_block_param); 639 } 640 641 new_block 642 }; 643 644 // Clone the caller's exception table, updating it for use in the current 645 // `call[_indirect]` instruction as it becomes a `try_call[_indirect]`. 646 let clone_exception_table_for_this_call = |func: &mut ir::Function, 647 signature: ir::SigRef, 648 new_block: ir::Block| 649 -> ir::ExceptionTable { 650 let mut exception = func.stencil.dfg.exception_tables[call_exception_table] 651 .deep_clone(&mut func.stencil.dfg.value_lists); 652 653 *exception.signature_mut() = signature; 654 655 let returns_len = func.dfg.signatures[signature].returns.len(); 656 let returns_len = u32::try_from(returns_len).unwrap(); 657 658 *exception.normal_return_mut() = ir::BlockCall::new( 659 new_block, 660 (0..returns_len).map(|i| ir::BlockArg::TryCallRet(i)), 661 &mut func.dfg.value_lists, 662 ); 663 664 func.dfg.exception_tables.push(exception) 665 }; 666 667 for inst in allocs.calls_needing_exception_table_fixup.drain(..) { 668 debug_assert!(func.dfg.insts[inst].opcode().is_call()); 669 debug_assert!(!func.dfg.insts[inst].opcode().is_return()); 670 match func.dfg.insts[inst] { 671 // current_block: 672 // preds... 673 // rets... = call f(args...) 674 // succs... 675 // 676 // becomes 677 // 678 // current_block: 679 // preds... 680 // try_call f(args...), new_block(rets...), [call_exception_table...] 681 // new_block(rets...): 682 // succs... 683 ir::InstructionData::Call { 684 opcode: ir::Opcode::Call, 685 args, 686 func_ref, 687 } => { 688 let new_block = split_block_for_new_try_call(func, inst); 689 let signature = func.dfg.ext_funcs[func_ref].signature; 690 let exception = clone_exception_table_for_this_call(func, signature, new_block); 691 func.dfg.insts[inst] = ir::InstructionData::TryCall { 692 opcode: ir::Opcode::TryCall, 693 args, 694 func_ref, 695 exception, 696 }; 697 } 698 699 // current_block: 700 // preds... 701 // rets... = call_indirect sig, val(args...) 702 // succs... 703 // 704 // becomes 705 // 706 // current_block: 707 // preds... 708 // try_call_indirect sig, val(args...), new_block(rets...), [call_exception_table...] 709 // new_block(rets...): 710 // succs... 711 ir::InstructionData::CallIndirect { 712 opcode: ir::Opcode::CallIndirect, 713 args, 714 sig_ref, 715 } => { 716 let new_block = split_block_for_new_try_call(func, inst); 717 let exception = clone_exception_table_for_this_call(func, sig_ref, new_block); 718 func.dfg.insts[inst] = ir::InstructionData::TryCallIndirect { 719 opcode: ir::Opcode::TryCallIndirect, 720 args, 721 exception, 722 }; 723 } 724 725 // For `try_call[_indirect]` instructions, we just need to merge the 726 // exception tables. 727 ir::InstructionData::TryCall { 728 opcode: ir::Opcode::TryCall, 729 exception, 730 .. 731 } 732 | ir::InstructionData::TryCallIndirect { 733 opcode: ir::Opcode::TryCallIndirect, 734 exception, 735 .. 736 } => { 737 // Construct a new exception table that consists of 738 // the inlined instruction's exception table match 739 // sequence, with the inlining site's exception table 740 // appended. This will ensure that the first-match 741 // semantics emulates the original behavior of 742 // matching in the inner frame first. 743 let sig = func.dfg.exception_tables[exception].signature(); 744 let normal_return = *func.dfg.exception_tables[exception].normal_return(); 745 let exception_data = ExceptionTableData::new( 746 sig, 747 normal_return, 748 func.dfg.exception_tables[exception] 749 .items() 750 .chain(func.dfg.exception_tables[call_exception_table].items()), 751 ) 752 .deep_clone(&mut func.dfg.value_lists); 753 754 func.dfg.exception_tables[exception] = exception_data; 755 } 756 757 otherwise => unreachable!("unknown non-return call instruction: {otherwise:?}"), 758 } 759 } 760 } 761 762 /// After having created an inlined version of a callee instruction that returns 763 /// in the caller, we need to fix it up so that it doesn't actually return 764 /// (since we are already in the caller's frame) and instead just jumps to the 765 /// control-flow join point. 766 fn fixup_inst_that_returns( 767 allocs: &mut InliningAllocs, 768 func: &mut ir::Function, 769 callee: &ir::Function, 770 entity_map: &EntityMap, 771 call_opcode: ir::Opcode, 772 inlined_inst: ir::Inst, 773 callee_inst: ir::Inst, 774 return_block: ir::Block, 775 call_stack_map: Option<&[ir::UserStackMapEntry]>, 776 ) { 777 debug_assert!(func.dfg.insts[inlined_inst].opcode().is_return()); 778 match func.dfg.insts[inlined_inst] { 779 // return rets... 780 // 781 // becomes 782 // 783 // jump return_block(rets...) 784 ir::InstructionData::MultiAry { 785 opcode: ir::Opcode::Return, 786 args, 787 } => { 788 let rets = SmallBlockArgVec::from_iter( 789 args.as_slice(&func.dfg.value_lists) 790 .iter() 791 .copied() 792 .map(|v| v.into()), 793 ); 794 func.dfg.replace(inlined_inst).jump(return_block, &rets); 795 } 796 797 // return_call f(args...) 798 // 799 // becomes 800 // 801 // rets... = call f(args...) 802 // jump return_block(rets...) 803 ir::InstructionData::Call { 804 opcode: ir::Opcode::ReturnCall, 805 args, 806 func_ref, 807 } => { 808 func.dfg.insts[inlined_inst] = ir::InstructionData::Call { 809 opcode: ir::Opcode::Call, 810 args, 811 func_ref, 812 }; 813 func.dfg.make_inst_results(inlined_inst, ir::types::INVALID); 814 815 append_stack_map_entries( 816 func, 817 callee, 818 &entity_map, 819 call_stack_map, 820 inlined_inst, 821 callee_inst, 822 ); 823 824 let rets = SmallBlockArgVec::from_iter( 825 func.dfg 826 .inst_results(inlined_inst) 827 .iter() 828 .copied() 829 .map(|v| v.into()), 830 ); 831 let mut cursor = FuncCursor::new(func); 832 cursor.goto_after_inst(inlined_inst); 833 cursor.ins().jump(return_block, &rets); 834 835 if call_opcode == ir::Opcode::TryCall { 836 allocs 837 .calls_needing_exception_table_fixup 838 .push(inlined_inst); 839 } 840 } 841 842 // return_call_indirect val(args...) 843 // 844 // becomes 845 // 846 // rets... = call_indirect val(args...) 847 // jump return_block(rets...) 848 ir::InstructionData::CallIndirect { 849 opcode: ir::Opcode::ReturnCallIndirect, 850 args, 851 sig_ref, 852 } => { 853 func.dfg.insts[inlined_inst] = ir::InstructionData::CallIndirect { 854 opcode: ir::Opcode::CallIndirect, 855 args, 856 sig_ref, 857 }; 858 func.dfg.make_inst_results(inlined_inst, ir::types::INVALID); 859 860 append_stack_map_entries( 861 func, 862 callee, 863 &entity_map, 864 call_stack_map, 865 inlined_inst, 866 callee_inst, 867 ); 868 869 let rets = SmallBlockArgVec::from_iter( 870 func.dfg 871 .inst_results(inlined_inst) 872 .iter() 873 .copied() 874 .map(|v| v.into()), 875 ); 876 let mut cursor = FuncCursor::new(func); 877 cursor.goto_after_inst(inlined_inst); 878 cursor.ins().jump(return_block, &rets); 879 880 if call_opcode == ir::Opcode::TryCall { 881 allocs 882 .calls_needing_exception_table_fixup 883 .push(inlined_inst); 884 } 885 } 886 887 inst_data => unreachable!( 888 "should have handled all `is_return() == true` instructions above; \ 889 got {inst_data:?}" 890 ), 891 } 892 } 893 894 /// An `InstructionMapper` implementation that remaps a callee instruction's 895 /// entity references to their new indices in the caller function. 896 struct InliningInstRemapper<'a> { 897 allocs: &'a InliningAllocs, 898 func: &'a mut ir::Function, 899 callee: &'a ir::Function, 900 entity_map: &'a EntityMap, 901 } 902 903 impl<'a> ir::instructions::InstructionMapper for InliningInstRemapper<'a> { 904 fn map_value(&mut self, value: ir::Value) -> ir::Value { 905 self.allocs.get_inlined_value(self.callee, value).expect( 906 "defs come before uses; we should have already inlined all values \ 907 used by an instruction", 908 ) 909 } 910 911 fn map_value_list(&mut self, value_list: ir::ValueList) -> ir::ValueList { 912 let mut inlined_list = ir::ValueList::new(); 913 for callee_val in value_list.as_slice(&self.callee.dfg.value_lists) { 914 let inlined_val = self.map_value(*callee_val); 915 inlined_list.push(inlined_val, &mut self.func.dfg.value_lists); 916 } 917 inlined_list 918 } 919 920 fn map_global_value(&mut self, global_value: ir::GlobalValue) -> ir::GlobalValue { 921 self.entity_map.inlined_global_value(global_value) 922 } 923 924 fn map_jump_table(&mut self, jump_table: ir::JumpTable) -> ir::JumpTable { 925 let inlined_default = 926 self.map_block_call(self.callee.dfg.jump_tables[jump_table].default_block()); 927 let inlined_table = self.callee.dfg.jump_tables[jump_table] 928 .as_slice() 929 .iter() 930 .map(|callee_block_call| self.map_block_call(*callee_block_call)) 931 .collect::<SmallBlockCallVec>(); 932 self.func 933 .dfg 934 .jump_tables 935 .push(ir::JumpTableData::new(inlined_default, &inlined_table)) 936 } 937 938 fn map_exception_table(&mut self, exception_table: ir::ExceptionTable) -> ir::ExceptionTable { 939 let exception_table = &self.callee.dfg.exception_tables[exception_table]; 940 let inlined_sig_ref = self.map_sig_ref(exception_table.signature()); 941 let inlined_normal_return = self.map_block_call(*exception_table.normal_return()); 942 let inlined_table = exception_table 943 .items() 944 .map(|item| match item { 945 ExceptionTableItem::Tag(tag, block_call) => { 946 ExceptionTableItem::Tag(tag, self.map_block_call(block_call)) 947 } 948 ExceptionTableItem::Default(block_call) => { 949 ExceptionTableItem::Default(self.map_block_call(block_call)) 950 } 951 ExceptionTableItem::Context(value) => { 952 ExceptionTableItem::Context(self.map_value(value)) 953 } 954 }) 955 .collect::<SmallVec<[_; 8]>>(); 956 self.func 957 .dfg 958 .exception_tables 959 .push(ir::ExceptionTableData::new( 960 inlined_sig_ref, 961 inlined_normal_return, 962 inlined_table, 963 )) 964 } 965 966 fn map_block_call(&mut self, block_call: ir::BlockCall) -> ir::BlockCall { 967 let callee_block = block_call.block(&self.callee.dfg.value_lists); 968 let inlined_block = self.entity_map.inlined_block(callee_block); 969 let args = block_call 970 .args(&self.callee.dfg.value_lists) 971 .map(|arg| match arg { 972 ir::BlockArg::Value(value) => self.map_value(value).into(), 973 ir::BlockArg::TryCallRet(_) | ir::BlockArg::TryCallExn(_) => arg, 974 }) 975 .collect::<SmallBlockArgVec>(); 976 ir::BlockCall::new(inlined_block, args, &mut self.func.dfg.value_lists) 977 } 978 979 fn map_block(&mut self, block: ir::Block) -> ir::Block { 980 self.entity_map.inlined_block(block) 981 } 982 983 fn map_func_ref(&mut self, func_ref: ir::FuncRef) -> ir::FuncRef { 984 self.entity_map.inlined_func_ref(func_ref) 985 } 986 987 fn map_sig_ref(&mut self, sig_ref: ir::SigRef) -> ir::SigRef { 988 self.entity_map.inlined_sig_ref(sig_ref) 989 } 990 991 fn map_stack_slot(&mut self, stack_slot: ir::StackSlot) -> ir::StackSlot { 992 self.entity_map.inlined_stack_slot(stack_slot) 993 } 994 995 fn map_dynamic_stack_slot( 996 &mut self, 997 dynamic_stack_slot: ir::DynamicStackSlot, 998 ) -> ir::DynamicStackSlot { 999 self.entity_map 1000 .inlined_dynamic_stack_slot(dynamic_stack_slot) 1001 } 1002 1003 fn map_constant(&mut self, constant: ir::Constant) -> ir::Constant { 1004 self.allocs 1005 .constants 1006 .get(constant) 1007 .and_then(|o| o.expand()) 1008 .expect("should have inlined all callee constants") 1009 } 1010 1011 fn map_immediate(&mut self, immediate: ir::Immediate) -> ir::Immediate { 1012 self.entity_map.inlined_immediate(immediate) 1013 } 1014 } 1015 1016 /// Inline the callee's layout into the caller's layout. 1017 /// 1018 /// Returns the last inlined block in the layout. 1019 fn inline_block_layout( 1020 func: &mut ir::Function, 1021 call_block: ir::Block, 1022 callee: &ir::Function, 1023 entity_map: &EntityMap, 1024 ) -> ir::Block { 1025 debug_assert!(func.layout.is_block_inserted(call_block)); 1026 1027 // Iterate over callee blocks in layout order, inserting their associated 1028 // inlined block into the caller's layout. 1029 let mut prev_inlined_block = call_block; 1030 let mut next_callee_block = callee.layout.entry_block(); 1031 while let Some(callee_block) = next_callee_block { 1032 debug_assert!(func.layout.is_block_inserted(prev_inlined_block)); 1033 1034 let inlined_block = entity_map.inlined_block(callee_block); 1035 func.layout 1036 .insert_block_after(inlined_block, prev_inlined_block); 1037 1038 prev_inlined_block = inlined_block; 1039 next_callee_block = callee.layout.next_block(callee_block); 1040 } 1041 1042 debug_assert!(func.layout.is_block_inserted(prev_inlined_block)); 1043 prev_inlined_block 1044 } 1045 1046 /// Split the call instruction's block just after the call instruction to create 1047 /// the point where control-flow joins after the inlined callee "returns". 1048 /// 1049 /// Note that tail calls do not return to the caller and therefore do not have a 1050 /// control-flow join point. 1051 fn split_off_return_block( 1052 func: &mut ir::Function, 1053 call_inst: ir::Inst, 1054 opcode: ir::Opcode, 1055 callee: &ir::Function, 1056 ) -> Option<ir::Block> { 1057 // When the `call_inst` is not a block terminator, we need to split the 1058 // block. 1059 let return_block = func.layout.next_inst(call_inst).map(|next_inst| { 1060 let return_block = func.dfg.blocks.add(); 1061 func.layout.split_block(return_block, next_inst); 1062 1063 // Add block parameters for each return value and alias the call 1064 // instruction's results to them. 1065 let old_results = 1066 SmallValueVec::from_iter(func.dfg.inst_results(call_inst).iter().copied()); 1067 debug_assert_eq!(old_results.len(), callee.signature.returns.len()); 1068 func.dfg.detach_inst_results(call_inst); 1069 for (abi, old_val) in callee.signature.returns.iter().zip(old_results) { 1070 debug_assert_eq!(abi.value_type, func.dfg.value_type(old_val)); 1071 let ret_param = func.dfg.append_block_param(return_block, abi.value_type); 1072 func.dfg.change_to_alias(old_val, ret_param); 1073 } 1074 1075 return_block 1076 }); 1077 1078 // When the `call_inst` is a block terminator, then it is either a 1079 // `return_call` or a `try_call`: 1080 // 1081 // * For `return_call`s, we don't have a control-flow join point, because 1082 // the caller permanently transfers control to the callee. 1083 // 1084 // * For `try_call`s, we probably already have a block for the control-flow 1085 // join point, but it isn't guaranteed: the `try_call` might ignore the 1086 // call's returns and not forward them to the normal-return block or it 1087 // might also pass additional arguments. We can only reuse the existing 1088 // normal-return block when the `try_call` forwards exactly our callee's 1089 // returns to that block (and therefore that block's parameter types also 1090 // exactly match the callee's return types). Otherwise, we must create a new 1091 // return block that forwards to the existing normal-return 1092 // block. (Elsewhere, at the end of inlining, we will also update any inlined 1093 // calls to forward any raised exceptions to the caller's exception table, 1094 // as necessary.) 1095 // 1096 // Finally, note that reusing the normal-return's target block is just an 1097 // optimization to emit a simpler CFG when we can, and is not 1098 // fundamentally required for correctness. We could always insert a 1099 // temporary block as our control-flow join point that then forwards to 1100 // the normal-return's target block. However, at the time of writing, 1101 // Cranelift doesn't currently do any jump-threading or branch 1102 // simplification in the mid-end, and removing unnecessary blocks in this 1103 // way can help some subsequent mid-end optimizations. If, in the future, 1104 // we gain support for jump-threading optimizations in the mid-end, we can 1105 // come back and simplify the below code a bit to always generate the 1106 // temporary block, and then rely on the subsequent optimizations to clean 1107 // everything up. 1108 debug_assert_eq!( 1109 return_block.is_none(), 1110 opcode == ir::Opcode::ReturnCall || opcode == ir::Opcode::TryCall, 1111 ); 1112 return_block.or_else(|| match func.dfg.insts[call_inst] { 1113 ir::InstructionData::TryCall { 1114 opcode: ir::Opcode::TryCall, 1115 args: _, 1116 func_ref: _, 1117 exception, 1118 } => { 1119 let normal_return = func.dfg.exception_tables[exception].normal_return(); 1120 let normal_return_block = normal_return.block(&func.dfg.value_lists); 1121 1122 // Check to see if we can reuse the existing normal-return block. 1123 { 1124 let normal_return_args = normal_return.args(&func.dfg.value_lists); 1125 if normal_return_args.len() == callee.signature.returns.len() 1126 && normal_return_args.enumerate().all(|(i, arg)| { 1127 let i = u32::try_from(i).unwrap(); 1128 arg == ir::BlockArg::TryCallRet(i) 1129 }) 1130 { 1131 return Some(normal_return_block); 1132 } 1133 } 1134 1135 // Okay, we cannot reuse the normal-return block. Create a new block 1136 // that has the expected block parameter types and have it jump to 1137 // the normal-return block. 1138 let return_block = func.dfg.blocks.add(); 1139 func.layout.insert_block(return_block, normal_return_block); 1140 1141 let return_block_params = callee 1142 .signature 1143 .returns 1144 .iter() 1145 .map(|abi| func.dfg.append_block_param(return_block, abi.value_type)) 1146 .collect::<SmallValueVec>(); 1147 1148 let normal_return_args = func.dfg.exception_tables[exception] 1149 .normal_return() 1150 .args(&func.dfg.value_lists) 1151 .collect::<SmallBlockArgVec>(); 1152 let jump_args = normal_return_args 1153 .into_iter() 1154 .map(|arg| match arg { 1155 ir::BlockArg::Value(value) => ir::BlockArg::Value(value), 1156 ir::BlockArg::TryCallRet(i) => { 1157 let i = usize::try_from(i).unwrap(); 1158 ir::BlockArg::Value(return_block_params[i]) 1159 } 1160 ir::BlockArg::TryCallExn(_) => { 1161 unreachable!("normal-return edges cannot use exceptional results") 1162 } 1163 }) 1164 .collect::<SmallBlockArgVec>(); 1165 1166 let mut cursor = FuncCursor::new(func); 1167 cursor.goto_first_insertion_point(return_block); 1168 cursor.ins().jump(normal_return_block, &jump_args); 1169 1170 Some(return_block) 1171 } 1172 _ => None, 1173 }) 1174 } 1175 1176 /// Replace the caller's call instruction with a jump to the caller's inlined 1177 /// copy of the callee's entry block. 1178 /// 1179 /// Also associates the callee's parameters with the caller's arguments in our 1180 /// value map. 1181 /// 1182 /// Returns the caller's stack map entries, if any. 1183 fn replace_call_with_jump( 1184 allocs: &mut InliningAllocs, 1185 func: &mut ir::Function, 1186 call_inst: ir::Inst, 1187 callee: &ir::Function, 1188 entity_map: &EntityMap, 1189 ) -> Option<ir::UserStackMapEntryVec> { 1190 trace!("Replacing `call` with `jump`"); 1191 trace!( 1192 " --> call instruction: {call_inst:?}: {}", 1193 func.dfg.display_inst(call_inst) 1194 ); 1195 1196 let callee_entry_block = callee 1197 .layout 1198 .entry_block() 1199 .expect("callee function should have an entry block"); 1200 let callee_param_values = callee.dfg.block_params(callee_entry_block); 1201 let caller_arg_values = SmallValueVec::from_iter(func.dfg.inst_args(call_inst).iter().copied()); 1202 debug_assert_eq!(callee_param_values.len(), caller_arg_values.len()); 1203 debug_assert_eq!(callee_param_values.len(), callee.signature.params.len()); 1204 for (abi, (callee_param_value, caller_arg_value)) in callee 1205 .signature 1206 .params 1207 .iter() 1208 .zip(callee_param_values.into_iter().zip(caller_arg_values)) 1209 { 1210 debug_assert_eq!(abi.value_type, callee.dfg.value_type(*callee_param_value)); 1211 debug_assert_eq!(abi.value_type, func.dfg.value_type(caller_arg_value)); 1212 allocs.set_inlined_value(callee, *callee_param_value, caller_arg_value); 1213 } 1214 1215 // Replace the caller's call instruction with a jump to the caller's inlined 1216 // copy of the callee's entry block. 1217 // 1218 // Note that the call block dominates the inlined entry block (and also all 1219 // other inlined blocks) so we can reference the arguments directly, and do 1220 // not need to add block parameters to the inlined entry block. 1221 let inlined_entry_block = entity_map.inlined_block(callee_entry_block); 1222 func.dfg.replace(call_inst).jump(inlined_entry_block, &[]); 1223 trace!( 1224 " --> replaced with jump instruction: {call_inst:?}: {}", 1225 func.dfg.display_inst(call_inst) 1226 ); 1227 1228 let stack_map_entries = func.dfg.take_user_stack_map_entries(call_inst); 1229 stack_map_entries 1230 } 1231 1232 /// Keeps track of mapping callee entities to their associated inlined caller 1233 /// entities. 1234 #[derive(Default)] 1235 struct EntityMap { 1236 // Rather than doing an implicit, demand-based, DCE'ing translation of 1237 // entities, which would require maps from each callee entity to its 1238 // associated caller entity, we copy all entities into the caller, remember 1239 // each entity's initial offset, and then mapping from the callee to the 1240 // inlined caller entity is just adding that initial offset to the callee's 1241 // index. This should be both faster and simpler than the alternative. Most 1242 // of these sets are relatively small, and they rarely have too much dead 1243 // code in practice, so this is a good trade off. 1244 // 1245 // Note that there are a few kinds of entities that are excluded from the 1246 // `EntityMap`, and for which we do actually take the demand-based approach: 1247 // values and value lists being the notable ones. 1248 block_offset: Option<u32>, 1249 global_value_offset: Option<u32>, 1250 sig_ref_offset: Option<u32>, 1251 func_ref_offset: Option<u32>, 1252 stack_slot_offset: Option<u32>, 1253 dynamic_type_offset: Option<u32>, 1254 dynamic_stack_slot_offset: Option<u32>, 1255 immediate_offset: Option<u32>, 1256 } 1257 1258 impl EntityMap { 1259 fn inlined_block(&self, callee_block: ir::Block) -> ir::Block { 1260 let offset = self 1261 .block_offset 1262 .expect("must create inlined `ir::Block`s before calling `EntityMap::inlined_block`"); 1263 ir::Block::from_u32(offset + callee_block.as_u32()) 1264 } 1265 1266 fn iter_inlined_blocks(&self, func: &ir::Function) -> impl Iterator<Item = ir::Block> + use<> { 1267 let start = self.block_offset.expect( 1268 "must create inlined `ir::Block`s before calling `EntityMap::iter_inlined_blocks`", 1269 ); 1270 1271 let end = func.dfg.blocks.len(); 1272 let end = u32::try_from(end).unwrap(); 1273 1274 (start..end).map(|i| ir::Block::from_u32(i)) 1275 } 1276 1277 fn inlined_global_value(&self, callee_global_value: ir::GlobalValue) -> ir::GlobalValue { 1278 let offset = self 1279 .global_value_offset 1280 .expect("must create inlined `ir::GlobalValue`s before calling `EntityMap::inlined_global_value`"); 1281 ir::GlobalValue::from_u32(offset + callee_global_value.as_u32()) 1282 } 1283 1284 fn inlined_sig_ref(&self, callee_sig_ref: ir::SigRef) -> ir::SigRef { 1285 let offset = self.sig_ref_offset.expect( 1286 "must create inlined `ir::SigRef`s before calling `EntityMap::inlined_sig_ref`", 1287 ); 1288 ir::SigRef::from_u32(offset + callee_sig_ref.as_u32()) 1289 } 1290 1291 fn inlined_func_ref(&self, callee_func_ref: ir::FuncRef) -> ir::FuncRef { 1292 let offset = self.func_ref_offset.expect( 1293 "must create inlined `ir::FuncRef`s before calling `EntityMap::inlined_func_ref`", 1294 ); 1295 ir::FuncRef::from_u32(offset + callee_func_ref.as_u32()) 1296 } 1297 1298 fn inlined_stack_slot(&self, callee_stack_slot: ir::StackSlot) -> ir::StackSlot { 1299 let offset = self.stack_slot_offset.expect( 1300 "must create inlined `ir::StackSlot`s before calling `EntityMap::inlined_stack_slot`", 1301 ); 1302 ir::StackSlot::from_u32(offset + callee_stack_slot.as_u32()) 1303 } 1304 1305 fn inlined_dynamic_type(&self, callee_dynamic_type: ir::DynamicType) -> ir::DynamicType { 1306 let offset = self.dynamic_type_offset.expect( 1307 "must create inlined `ir::DynamicType`s before calling `EntityMap::inlined_dynamic_type`", 1308 ); 1309 ir::DynamicType::from_u32(offset + callee_dynamic_type.as_u32()) 1310 } 1311 1312 fn inlined_dynamic_stack_slot( 1313 &self, 1314 callee_dynamic_stack_slot: ir::DynamicStackSlot, 1315 ) -> ir::DynamicStackSlot { 1316 let offset = self.dynamic_stack_slot_offset.expect( 1317 "must create inlined `ir::DynamicStackSlot`s before calling `EntityMap::inlined_dynamic_stack_slot`", 1318 ); 1319 ir::DynamicStackSlot::from_u32(offset + callee_dynamic_stack_slot.as_u32()) 1320 } 1321 1322 fn inlined_immediate(&self, callee_immediate: ir::Immediate) -> ir::Immediate { 1323 let offset = self.immediate_offset.expect( 1324 "must create inlined `ir::Immediate`s before calling `EntityMap::inlined_immediate`", 1325 ); 1326 ir::Immediate::from_u32(offset + callee_immediate.as_u32()) 1327 } 1328 } 1329 1330 /// Translate all of the callee's various entities into the caller, producing an 1331 /// `EntityMap` that can be used to translate callee entity references into 1332 /// inlined caller entity references. 1333 fn create_entities( 1334 allocs: &mut InliningAllocs, 1335 func: &mut ir::Function, 1336 callee: &ir::Function, 1337 ) -> EntityMap { 1338 let mut entity_map = EntityMap::default(); 1339 1340 entity_map.block_offset = Some(create_blocks(allocs, func, callee)); 1341 entity_map.global_value_offset = Some(create_global_values(func, callee)); 1342 entity_map.sig_ref_offset = Some(create_sig_refs(func, callee)); 1343 create_user_external_name_refs(allocs, func, callee); 1344 entity_map.func_ref_offset = Some(create_func_refs(allocs, func, callee, &entity_map)); 1345 entity_map.stack_slot_offset = Some(create_stack_slots(func, callee)); 1346 entity_map.dynamic_type_offset = Some(create_dynamic_types(func, callee, &entity_map)); 1347 entity_map.dynamic_stack_slot_offset = 1348 Some(create_dynamic_stack_slots(func, callee, &entity_map)); 1349 entity_map.immediate_offset = Some(create_immediates(func, callee)); 1350 1351 // `ir::ConstantData` is deduplicated, so we cannot use our offset scheme 1352 // for `ir::Constant`s. Nonetheless, we still insert them into the caller 1353 // now, at the same time as the rest of our entities. 1354 create_constants(allocs, func, callee); 1355 1356 entity_map 1357 } 1358 1359 /// Create inlined blocks in the caller for every block in the callee. 1360 fn create_blocks( 1361 allocs: &mut InliningAllocs, 1362 func: &mut ir::Function, 1363 callee: &ir::Function, 1364 ) -> u32 { 1365 let offset = func.dfg.blocks.len(); 1366 let offset = u32::try_from(offset).unwrap(); 1367 1368 func.dfg.blocks.reserve(callee.dfg.blocks.len()); 1369 for callee_block in callee.dfg.blocks.iter() { 1370 let caller_block = func.dfg.blocks.add(); 1371 trace!("Callee {callee_block:?} = inlined {caller_block:?}"); 1372 1373 if callee.layout.is_cold(callee_block) { 1374 func.layout.set_cold(caller_block); 1375 } 1376 1377 // Note: the entry block does not need parameters because the only 1378 // predecessor is the call block and we associate the callee's 1379 // parameters with the caller's arguments directly. 1380 if callee.layout.entry_block() != Some(callee_block) { 1381 for callee_param in callee.dfg.blocks[callee_block].params(&callee.dfg.value_lists) { 1382 let ty = callee.dfg.value_type(*callee_param); 1383 let caller_param = func.dfg.append_block_param(caller_block, ty); 1384 1385 allocs.set_inlined_value(callee, *callee_param, caller_param); 1386 } 1387 } 1388 } 1389 1390 offset 1391 } 1392 1393 /// Copy and translate global values from the callee into the caller. 1394 fn create_global_values(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1395 let gv_offset = func.global_values.len(); 1396 let gv_offset = u32::try_from(gv_offset).unwrap(); 1397 1398 func.global_values.reserve(callee.global_values.len()); 1399 for gv in callee.global_values.values() { 1400 func.global_values.push(match gv { 1401 // These kinds of global values reference other global values, so we 1402 // need to fixup that reference. 1403 ir::GlobalValueData::Load { 1404 base, 1405 offset, 1406 global_type, 1407 flags, 1408 } => ir::GlobalValueData::Load { 1409 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset), 1410 offset: *offset, 1411 global_type: *global_type, 1412 flags: *flags, 1413 }, 1414 ir::GlobalValueData::IAddImm { 1415 base, 1416 offset, 1417 global_type, 1418 } => ir::GlobalValueData::IAddImm { 1419 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset), 1420 offset: *offset, 1421 global_type: *global_type, 1422 }, 1423 1424 // These kinds of global values do not reference other global 1425 // values, so we can just clone them. 1426 ir::GlobalValueData::VMContext 1427 | ir::GlobalValueData::Symbol { .. } 1428 | ir::GlobalValueData::DynScaleTargetConst { .. } => gv.clone(), 1429 }); 1430 } 1431 1432 gv_offset 1433 } 1434 1435 /// Copy `ir::SigRef`s from the callee into the caller. 1436 fn create_sig_refs(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1437 let offset = func.dfg.signatures.len(); 1438 let offset = u32::try_from(offset).unwrap(); 1439 1440 func.dfg.signatures.reserve(callee.dfg.signatures.len()); 1441 for sig in callee.dfg.signatures.values() { 1442 func.dfg.signatures.push(sig.clone()); 1443 } 1444 1445 offset 1446 } 1447 1448 fn create_user_external_name_refs( 1449 allocs: &mut InliningAllocs, 1450 func: &mut ir::Function, 1451 callee: &ir::Function, 1452 ) { 1453 for (callee_named_func_ref, name) in callee.params.user_named_funcs().iter() { 1454 let caller_named_func_ref = func.declare_imported_user_function(name.clone()); 1455 allocs.user_external_name_refs[callee_named_func_ref] = Some(caller_named_func_ref).into(); 1456 } 1457 } 1458 1459 /// Translate `ir::FuncRef`s from the callee into the caller. 1460 fn create_func_refs( 1461 allocs: &InliningAllocs, 1462 func: &mut ir::Function, 1463 callee: &ir::Function, 1464 entity_map: &EntityMap, 1465 ) -> u32 { 1466 let offset = func.dfg.ext_funcs.len(); 1467 let offset = u32::try_from(offset).unwrap(); 1468 1469 func.dfg.ext_funcs.reserve(callee.dfg.ext_funcs.len()); 1470 for ir::ExtFuncData { 1471 name, 1472 signature, 1473 colocated, 1474 } in callee.dfg.ext_funcs.values() 1475 { 1476 func.dfg.ext_funcs.push(ir::ExtFuncData { 1477 name: match name { 1478 ir::ExternalName::User(name_ref) => { 1479 ir::ExternalName::User(allocs.user_external_name_refs[*name_ref].expect( 1480 "should have translated all `ir::UserExternalNameRef`s before translating \ 1481 `ir::FuncRef`s", 1482 )) 1483 } 1484 ir::ExternalName::TestCase(_) 1485 | ir::ExternalName::LibCall(_) 1486 | ir::ExternalName::KnownSymbol(_) => name.clone(), 1487 }, 1488 signature: entity_map.inlined_sig_ref(*signature), 1489 colocated: *colocated, 1490 }); 1491 } 1492 1493 offset 1494 } 1495 1496 /// Copy stack slots from the callee into the caller. 1497 fn create_stack_slots(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1498 let offset = func.sized_stack_slots.len(); 1499 let offset = u32::try_from(offset).unwrap(); 1500 1501 func.sized_stack_slots 1502 .reserve(callee.sized_stack_slots.len()); 1503 for slot in callee.sized_stack_slots.values() { 1504 func.sized_stack_slots.push(slot.clone()); 1505 } 1506 1507 offset 1508 } 1509 1510 /// Copy dynamic types from the callee into the caller. 1511 fn create_dynamic_types( 1512 func: &mut ir::Function, 1513 callee: &ir::Function, 1514 entity_map: &EntityMap, 1515 ) -> u32 { 1516 let offset = func.dynamic_stack_slots.len(); 1517 let offset = u32::try_from(offset).unwrap(); 1518 1519 func.dfg 1520 .dynamic_types 1521 .reserve(callee.dfg.dynamic_types.len()); 1522 for ir::DynamicTypeData { 1523 base_vector_ty, 1524 dynamic_scale, 1525 } in callee.dfg.dynamic_types.values() 1526 { 1527 func.dfg.dynamic_types.push(ir::DynamicTypeData { 1528 base_vector_ty: *base_vector_ty, 1529 dynamic_scale: entity_map.inlined_global_value(*dynamic_scale), 1530 }); 1531 } 1532 1533 offset 1534 } 1535 1536 /// Copy dynamic stack slots from the callee into the caller. 1537 fn create_dynamic_stack_slots( 1538 func: &mut ir::Function, 1539 callee: &ir::Function, 1540 entity_map: &EntityMap, 1541 ) -> u32 { 1542 let offset = func.dynamic_stack_slots.len(); 1543 let offset = u32::try_from(offset).unwrap(); 1544 1545 func.dynamic_stack_slots 1546 .reserve(callee.dynamic_stack_slots.len()); 1547 for ir::DynamicStackSlotData { kind, dyn_ty } in callee.dynamic_stack_slots.values() { 1548 func.dynamic_stack_slots.push(ir::DynamicStackSlotData { 1549 kind: *kind, 1550 dyn_ty: entity_map.inlined_dynamic_type(*dyn_ty), 1551 }); 1552 } 1553 1554 offset 1555 } 1556 1557 /// Copy immediates from the callee into the caller. 1558 fn create_immediates(func: &mut ir::Function, callee: &ir::Function) -> u32 { 1559 let offset = func.dfg.immediates.len(); 1560 let offset = u32::try_from(offset).unwrap(); 1561 1562 func.dfg.immediates.reserve(callee.dfg.immediates.len()); 1563 for imm in callee.dfg.immediates.values() { 1564 func.dfg.immediates.push(imm.clone()); 1565 } 1566 1567 offset 1568 } 1569 1570 /// Copy constants from the callee into the caller. 1571 fn create_constants(allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function) { 1572 for (callee_constant, data) in callee.dfg.constants.iter() { 1573 let inlined_constant = func.dfg.constants.insert(data.clone()); 1574 allocs.constants[*callee_constant] = Some(inlined_constant).into(); 1575 } 1576 } 1577