1 //! Function inlining infrastructure.
2 //!
3 //! This module provides "inlining as a library" to Cranelift users; it does
4 //! _not_ provide a complete, off-the-shelf inlining solution. Cranelift's
5 //! compilation context is per-function and does not encompass the full call
6 //! graph. It does not know which functions are hot and which are cold, which
7 //! have been marked the equivalent of `#[inline(never)]`, etc... Only the
8 //! Cranelift user can understand these aspects of the full compilation
9 //! pipeline, and these things can be very different between (say) Wasmtime and
10 //! `cg_clif`. Therefore, this module does not attempt to define hueristics for
11 //! when inlining a particular call is likely beneficial. This module only
12 //! provides hooks for the Cranelift user to define whether a given call should
13 //! be inlined or not, and the mechanics to inline a callee into a particular
14 //! call site when directed to do so by the Cranelift user.
15 //!
16 //! The top-level inlining entry point during Cranelift compilation is
17 //! [`Context::inline`][crate::Context::inline]. It takes an [`Inline`] trait
18 //! implementation, which is authored by the Cranelift user and directs
19 //! Cranelift whether to inline a particular call, and, when inlining, gives
20 //! Cranelift the body of the callee that is to be inlined.
21 
22 use crate::cursor::{Cursor as _, FuncCursor};
23 use crate::ir::{self, DebugTag, ExceptionTableData, ExceptionTableItem, InstBuilder as _};
24 use crate::result::CodegenResult;
25 use crate::trace;
26 use crate::traversals::Dfs;
27 use alloc::borrow::Cow;
28 use alloc::vec::Vec;
29 use cranelift_entity::{SecondaryMap, packed_option::PackedOption};
30 use smallvec::SmallVec;
31 
32 type SmallValueVec = SmallVec<[ir::Value; 8]>;
33 type SmallBlockArgVec = SmallVec<[ir::BlockArg; 8]>;
34 type SmallBlockCallVec = SmallVec<[ir::BlockCall; 8]>;
35 
36 /// A command directing Cranelift whether or not to inline a particular call.
37 pub enum InlineCommand<'a> {
38     /// Keep the call as-is, out-of-line, and do not inline the callee.
39     KeepCall,
40 
41     /// Inline the call, using this function as the body of the callee.
42     ///
43     /// It is the `Inline` implementor's responsibility to ensure that this
44     /// function is the correct callee. Providing the wrong function may result
45     /// in panics during compilation or incorrect runtime behavior.
46     Inline {
47         /// The callee function's body.
48         callee: Cow<'a, ir::Function>,
49         /// Whether to visit any function calls within the callee body after
50         /// inlining and consider them for further inlining.
51         visit_callee: bool,
52     },
53 }
54 
55 /// A trait for directing Cranelift whether to inline a particular call or not.
56 ///
57 /// Used in combination with the [`Context::inline`][crate::Context::inline]
58 /// method.
59 pub trait Inline {
60     /// A hook invoked for each direct call instruction in a function, whose
61     /// result determines whether Cranelift should inline a given call.
62     ///
63     /// The Cranelift user is responsible for defining their own hueristics and
64     /// deciding whether inlining the call is beneficial.
65     ///
66     /// When returning a function and directing Cranelift to inline its body
67     /// into the call site, the `Inline` implementer must ensure the following:
68     ///
69     /// * The returned function's signature exactly matches the `callee`
70     ///   `FuncRef`'s signature.
71     ///
72     /// * The returned function must be legalized.
73     ///
74     /// * The returned function must be valid (i.e. it must pass the CLIF
75     ///   verifier).
76     ///
77     /// * The returned function is a correct and valid implementation of the
78     ///   `callee` according to your language's semantics.
79     ///
80     /// Failure to uphold these invariants may result in panics during
81     /// compilation or incorrect runtime behavior in the generated code.
82     fn inline(
83         &mut self,
84         caller: &ir::Function,
85         call_inst: ir::Inst,
86         call_opcode: ir::Opcode,
87         callee: ir::FuncRef,
88         call_args: &[ir::Value],
89     ) -> InlineCommand<'_>;
90 }
91 
92 impl<'a, T> Inline for &'a mut T
93 where
94     T: Inline,
95 {
96     fn inline(
97         &mut self,
98         caller: &ir::Function,
99         inst: ir::Inst,
100         opcode: ir::Opcode,
101         callee: ir::FuncRef,
102         args: &[ir::Value],
103     ) -> InlineCommand<'_> {
104         (*self).inline(caller, inst, opcode, callee, args)
105     }
106 }
107 
108 /// Walk the given function, invoke the `Inline` implementation for each call
109 /// instruction, and inline the callee when directed to do so.
110 ///
111 /// Returns whether any call was inlined.
112 pub(crate) fn do_inlining(
113     func: &mut ir::Function,
114     mut inliner: impl Inline,
115 ) -> CodegenResult<bool> {
116     trace!("function {} before inlining: {}", func.name, func);
117 
118     let mut inlined_any = false;
119     let mut allocs = InliningAllocs::default();
120 
121     let mut cursor = FuncCursor::new(func);
122     'block_loop: while let Some(block) = cursor.next_block() {
123         // Always keep track of our previous cursor position. Assuming that the
124         // current position is a function call that we will inline, then the
125         // previous position is just before the inlined callee function. After
126         // inlining a call, the Cranelift user can decide whether to consider
127         // any function calls in the inlined callee for further inlining or
128         // not. When they do, then we back up to this previous cursor position
129         // so that our traversal will then continue over the inlined body.
130         let mut prev_pos;
131 
132         while let Some(inst) = {
133             prev_pos = cursor.position();
134             cursor.next_inst()
135         } {
136             // Make sure that `block` is always `inst`'s block, even with all of
137             // our cursor-position-updating and block-splitting-during-inlining
138             // shenanigans below.
139             debug_assert_eq!(Some(block), cursor.func.layout.inst_block(inst));
140 
141             match cursor.func.dfg.insts[inst] {
142                 ir::InstructionData::Call { func_ref, .. }
143                     if cursor.func.dfg.ext_funcs[func_ref].patchable =>
144                 {
145                     // Can't inline patchable calls; they need to
146                     // remain patchable and inlining the whole body is
147                     // decidedly *not* patchable!
148                 }
149 
150                 ir::InstructionData::Call {
151                     opcode: opcode @ ir::Opcode::Call | opcode @ ir::Opcode::ReturnCall,
152                     args: _,
153                     func_ref,
154                 } => {
155                     trace!(
156                         "considering call site for inlining: {inst}: {}",
157                         cursor.func.dfg.display_inst(inst),
158                     );
159                     let args = cursor.func.dfg.inst_args(inst);
160                     match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
161                         InlineCommand::KeepCall => {
162                             trace!("  --> keeping call");
163                         }
164                         InlineCommand::Inline {
165                             callee,
166                             visit_callee,
167                         } => {
168                             let last_inlined_block = inline_one(
169                                 &mut allocs,
170                                 cursor.func,
171                                 func_ref,
172                                 block,
173                                 inst,
174                                 opcode,
175                                 &callee,
176                                 None,
177                             );
178                             inlined_any = true;
179                             if visit_callee {
180                                 cursor.set_position(prev_pos);
181                             } else {
182                                 // Arrange it so that the `next_block()` loop
183                                 // will continue to the next block that is not
184                                 // associated with the just-inlined callee.
185                                 cursor.goto_bottom(last_inlined_block);
186                                 continue 'block_loop;
187                             }
188                         }
189                     }
190                 }
191                 ir::InstructionData::TryCall {
192                     opcode: opcode @ ir::Opcode::TryCall,
193                     args: _,
194                     func_ref,
195                     exception,
196                 } => {
197                     trace!(
198                         "considering call site for inlining: {inst}: {}",
199                         cursor.func.dfg.display_inst(inst),
200                     );
201                     let args = cursor.func.dfg.inst_args(inst);
202                     match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
203                         InlineCommand::KeepCall => {
204                             trace!("  --> keeping call");
205                         }
206                         InlineCommand::Inline {
207                             callee,
208                             visit_callee,
209                         } => {
210                             let last_inlined_block = inline_one(
211                                 &mut allocs,
212                                 cursor.func,
213                                 func_ref,
214                                 block,
215                                 inst,
216                                 opcode,
217                                 &callee,
218                                 Some(exception),
219                             );
220                             inlined_any = true;
221                             if visit_callee {
222                                 cursor.set_position(prev_pos);
223                             } else {
224                                 // Arrange it so that the `next_block()` loop
225                                 // will continue to the next block that is not
226                                 // associated with the just-inlined callee.
227                                 cursor.goto_bottom(last_inlined_block);
228                                 continue 'block_loop;
229                             }
230                         }
231                     }
232                 }
233                 ir::InstructionData::CallIndirect { .. }
234                 | ir::InstructionData::TryCallIndirect { .. } => {
235                     // Can't inline indirect calls; need to have some earlier
236                     // pass rewrite them into direct calls first, when possible.
237                 }
238                 _ => {
239                     debug_assert!(
240                         !cursor.func.dfg.insts[inst].opcode().is_call(),
241                         "should have matched all call instructions, but found: {inst}: {}",
242                         cursor.func.dfg.display_inst(inst),
243                     );
244                 }
245             }
246         }
247     }
248 
249     if inlined_any {
250         trace!("function {} after inlining: {}", func.name, func);
251     } else {
252         trace!("function {} did not have any callees inlined", func.name);
253     }
254 
255     Ok(inlined_any)
256 }
257 
258 #[derive(Default)]
259 struct InliningAllocs {
260     /// Map from callee value to inlined caller value.
261     values: SecondaryMap<ir::Value, PackedOption<ir::Value>>,
262 
263     /// Map from callee constant to inlined caller constant.
264     ///
265     /// Not in `EntityMap` because these are hash-consed inside the
266     /// `ir::Function`.
267     constants: SecondaryMap<ir::Constant, PackedOption<ir::Constant>>,
268 
269     /// Map from callee to inlined caller external name refs.
270     ///
271     /// Not in `EntityMap` because these are hash-consed inside the
272     /// `ir::Function`.
273     user_external_name_refs:
274         SecondaryMap<ir::UserExternalNameRef, PackedOption<ir::UserExternalNameRef>>,
275 
276     /// The set of _caller_ inlined call instructions that need exception table
277     /// fixups at the end of inlining.
278     ///
279     /// This includes all kinds of non-returning calls, not just the literal
280     /// `call` instruction: `call_indirect`, `try_call`, `try_call_indirect`,
281     /// etc... However, it does not include `return_call` and
282     /// `return_call_indirect` instructions because the caller cannot catch
283     /// exceptions that those calls throw because the caller is no longer on the
284     /// stack as soon as they are executed.
285     ///
286     /// Note: this is a simple `Vec`, and not an `EntitySet`, because it is very
287     /// sparse: most of the caller's instructions are not inlined call
288     /// instructions. Additionally, we require deterministic iteration order and
289     /// do not require set-membership testing, so a hash set is not a good
290     /// choice either.
291     calls_needing_exception_table_fixup: Vec<ir::Inst>,
292 }
293 
294 impl InliningAllocs {
295     fn reset(&mut self, callee: &ir::Function) {
296         let InliningAllocs {
297             values,
298             constants,
299             user_external_name_refs,
300             calls_needing_exception_table_fixup,
301         } = self;
302 
303         values.clear();
304         values.resize(callee.dfg.len_values());
305 
306         constants.clear();
307         constants.resize(callee.dfg.constants.len());
308 
309         user_external_name_refs.clear();
310         user_external_name_refs.resize(callee.params.user_named_funcs().len());
311 
312         // Note: We do not reserve capacity for
313         // `calls_needing_exception_table_fixup` because it is a sparse set and
314         // we don't know how large it needs to be ahead of time.
315         calls_needing_exception_table_fixup.clear();
316     }
317 
318     fn set_inlined_value(
319         &mut self,
320         callee: &ir::Function,
321         callee_val: ir::Value,
322         inlined_val: ir::Value,
323     ) {
324         trace!("  --> callee {callee_val:?} = inlined {inlined_val:?}");
325         debug_assert!(self.values[callee_val].is_none());
326         let resolved_callee_val = callee.dfg.resolve_aliases(callee_val);
327         debug_assert!(self.values[resolved_callee_val].is_none());
328         self.values[resolved_callee_val] = Some(inlined_val).into();
329     }
330 
331     fn get_inlined_value(&self, callee: &ir::Function, callee_val: ir::Value) -> Option<ir::Value> {
332         let resolved_callee_val = callee.dfg.resolve_aliases(callee_val);
333         self.values[resolved_callee_val].expand()
334     }
335 }
336 
337 /// Inline one particular function call.
338 ///
339 /// Returns the last inlined block in the layout.
340 fn inline_one(
341     allocs: &mut InliningAllocs,
342     func: &mut ir::Function,
343     callee_func_ref: ir::FuncRef,
344     call_block: ir::Block,
345     call_inst: ir::Inst,
346     call_opcode: ir::Opcode,
347     callee: &ir::Function,
348     call_exception_table: Option<ir::ExceptionTable>,
349 ) -> ir::Block {
350     trace!(
351         "Inlining call {call_inst:?}: {}\n\
352          with callee = {callee:?}",
353         func.dfg.display_inst(call_inst)
354     );
355 
356     // Type check callee signature.
357     let expected_callee_sig = func.dfg.ext_funcs[callee_func_ref].signature;
358     let expected_callee_sig = &func.dfg.signatures[expected_callee_sig];
359     assert_eq!(expected_callee_sig, &callee.signature);
360 
361     allocs.reset(callee);
362 
363     // First, append various callee entity arenas to the end of the caller's
364     // entity arenas.
365     let entity_map = create_entities(allocs, func, callee);
366 
367     // Inlined prologue: split the call instruction's block at the point of the
368     // call and replace the call with a jump.
369     let return_block = split_off_return_block(func, call_inst, call_opcode, callee);
370     let call_stack_map = replace_call_with_jump(allocs, func, call_inst, callee, &entity_map);
371 
372     // Prepare for translating the actual instructions by inserting the inlined
373     // blocks into the caller's layout in the same order that they appear in the
374     // callee.
375     let mut last_inlined_block = inline_block_layout(func, call_block, callee, &entity_map);
376 
377     // Get a copy of debug tags on the call instruction; these are
378     // prepended to debug tags on inlined instructions. Remove them
379     // from the call itself as it will be rewritten to a jump (which
380     // cannot have tags).
381     let call_debug_tags = func.debug_tags.get(call_inst).to_vec();
382     func.debug_tags.set(call_inst, []);
383 
384     // Translate each instruction from the callee into the caller,
385     // appending them to their associated block in the caller.
386     //
387     // Note that we iterate over the callee with a pre-order traversal so that
388     // we see value defs before uses.
389     for callee_block in Dfs::new().pre_order_iter(callee) {
390         let inlined_block = entity_map.inlined_block(callee_block);
391         trace!(
392             "Processing instructions in callee block {callee_block:?} (inlined block {inlined_block:?}"
393         );
394 
395         let mut next_callee_inst = callee.layout.first_inst(callee_block);
396         while let Some(callee_inst) = next_callee_inst {
397             trace!(
398                 "Processing callee instruction {callee_inst:?}: {}",
399                 callee.dfg.display_inst(callee_inst)
400             );
401 
402             assert_ne!(
403                 callee.dfg.insts[callee_inst].opcode(),
404                 ir::Opcode::GlobalValue,
405                 "callee must already be legalized, we shouldn't see any `global_value` \
406                  instructions when inlining; found {callee_inst:?}: {}",
407                 callee.dfg.display_inst(callee_inst)
408             );
409 
410             // Remap the callee instruction's entities and insert it into the
411             // caller's DFG.
412             let inlined_inst_data = callee.dfg.insts[callee_inst].map(InliningInstRemapper {
413                 allocs: &allocs,
414                 func,
415                 callee,
416                 entity_map: &entity_map,
417             });
418             let inlined_inst = func.dfg.make_inst(inlined_inst_data);
419             func.layout.append_inst(inlined_inst, inlined_block);
420 
421             // Copy over debug tags, translating referenced entities
422             // as appropriate.
423             let debug_tags = callee.debug_tags.get(callee_inst);
424             // If there are tags on the inlined instruction, we always
425             // add tags, and we prepend any tags from the call
426             // instruction; but we don't add tags if only the callsite
427             // had them (this would otherwise mean that every single
428             // instruction in an inlined function body would get
429             // tags).
430             if !debug_tags.is_empty() {
431                 let tags = call_debug_tags
432                     .iter()
433                     .cloned()
434                     .chain(debug_tags.iter().map(|tag| match *tag {
435                         DebugTag::User(value) => DebugTag::User(value),
436                         DebugTag::StackSlot(slot) => {
437                             DebugTag::StackSlot(entity_map.inlined_stack_slot(slot))
438                         }
439                     }))
440                     .collect::<SmallVec<[_; 4]>>();
441                 func.debug_tags.set(inlined_inst, tags);
442             }
443 
444             let opcode = callee.dfg.insts[callee_inst].opcode();
445             if opcode.is_return() {
446                 // Instructions that return do not define any values, so we
447                 // don't need to worry about that, but we do need to fix them up
448                 // so that they return by jumping to our control-flow join
449                 // block, rather than returning from the caller.
450                 if let Some(return_block) = return_block {
451                     fixup_inst_that_returns(
452                         allocs,
453                         func,
454                         callee,
455                         &entity_map,
456                         call_opcode,
457                         inlined_inst,
458                         callee_inst,
459                         return_block,
460                         call_stack_map.as_ref().map(|es| &**es),
461                     );
462                 } else {
463                     // If we are inlining a callee that was invoked via
464                     // `return_call`, we leave inlined return instructions
465                     // as-is: there is no logical caller frame on the stack to
466                     // continue to.
467                     debug_assert_eq!(call_opcode, ir::Opcode::ReturnCall);
468                 }
469             } else {
470                 // Make the instruction's result values.
471                 let ctrl_typevar = callee.dfg.ctrl_typevar(callee_inst);
472                 func.dfg.make_inst_results(inlined_inst, ctrl_typevar);
473 
474                 // Update the value map for this instruction's defs.
475                 let callee_results = callee.dfg.inst_results(callee_inst);
476                 let inlined_results = func.dfg.inst_results(inlined_inst);
477                 debug_assert_eq!(callee_results.len(), inlined_results.len());
478                 for (callee_val, inlined_val) in callee_results.iter().zip(inlined_results) {
479                     allocs.set_inlined_value(callee, *callee_val, *inlined_val);
480                 }
481 
482                 if opcode.is_call() {
483                     append_stack_map_entries(
484                         func,
485                         callee,
486                         &entity_map,
487                         call_stack_map.as_deref(),
488                         inlined_inst,
489                         callee_inst,
490                     );
491 
492                     // When we are inlining a `try_call` call site, we need to merge
493                     // the call site's exception table into the inlined calls'
494                     // exception tables. This can involve rewriting regular `call`s
495                     // into `try_call`s, which requires mutating the CFG because
496                     // `try_call` is a block terminator. However, we can't mutate
497                     // the CFG in the middle of this traversal because we rely on
498                     // the existence of a one-to-one mapping between the callee
499                     // layout and the inlined layout. Instead, we record the set of
500                     // inlined call instructions that will need fixing up, and
501                     // perform that possibly-CFG-mutating exception table merging in
502                     // a follow up pass, when we no longer rely on that one-to-one
503                     // layout mapping.
504                     debug_assert_eq!(
505                         call_opcode == ir::Opcode::TryCall,
506                         call_exception_table.is_some()
507                     );
508                     if call_opcode == ir::Opcode::TryCall {
509                         allocs
510                             .calls_needing_exception_table_fixup
511                             .push(inlined_inst);
512                     }
513                 }
514             }
515 
516             trace!(
517                 "  --> inserted inlined instruction {inlined_inst:?}: {}",
518                 func.dfg.display_inst(inlined_inst)
519             );
520 
521             next_callee_inst = callee.layout.next_inst(callee_inst);
522         }
523     }
524 
525     // We copied *all* callee blocks into the caller's layout, but only copied
526     // the callee instructions in *reachable* callee blocks into the caller's
527     // associated blocks. Therefore, any *unreachable* blocks are empty in the
528     // caller, which is invalid CLIF because all blocks must end in a
529     // terminator, so do a quick pass over the inlined blocks and remove any
530     // empty blocks from the caller's layout.
531     for block in entity_map.iter_inlined_blocks(func) {
532         if func.layout.is_block_inserted(block) && func.layout.first_inst(block).is_none() {
533             log::trace!("removing unreachable inlined block from layout: {block}");
534 
535             // If the block being removed is our last-inlined block, then back
536             // it up to the previous block in the layout, which will be the new
537             // last-inlined block after this one's removal.
538             if block == last_inlined_block {
539                 last_inlined_block = func.layout.prev_block(last_inlined_block).expect(
540                     "there will always at least be the block that contained the call we are \
541                      inlining",
542                 );
543             }
544 
545             func.layout.remove_block(block);
546         }
547     }
548 
549     // Final step: fixup the exception tables of any inlined calls when we are
550     // inlining a `try_call` site.
551     //
552     // Subtly, this requires rewriting non-catching `call[_indirect]`
553     // instructions into `try_call[_indirect]` instructions so that exceptions
554     // that unwound through the original callee frame and were caught by the
555     // caller's `try_call` do not unwind past this inlined frame. And turning a
556     // `call` into a `try_call` mutates the CFG, breaking our one-to-one mapping
557     // between callee blocks and inlined blocks, so we delay these fixups to
558     // this final step, when we no longer rely on that mapping.
559     debug_assert!(
560         allocs.calls_needing_exception_table_fixup.is_empty() || call_exception_table.is_some()
561     );
562     debug_assert_eq!(
563         call_opcode == ir::Opcode::TryCall,
564         call_exception_table.is_some()
565     );
566     if let Some(call_exception_table) = call_exception_table {
567         fixup_inlined_call_exception_tables(allocs, func, call_exception_table);
568     }
569 
570     debug_assert!(
571         func.layout.is_block_inserted(last_inlined_block),
572         "last_inlined_block={last_inlined_block} should be inserted in the layout"
573     );
574     last_inlined_block
575 }
576 
577 /// Append stack map entries from the caller and callee to the given inlined
578 /// instruction.
579 fn append_stack_map_entries(
580     func: &mut ir::Function,
581     callee: &ir::Function,
582     entity_map: &EntityMap,
583     call_stack_map: Option<&[ir::UserStackMapEntry]>,
584     inlined_inst: ir::Inst,
585     callee_inst: ir::Inst,
586 ) {
587     // Add the caller's stack map to this call. These entries
588     // already refer to caller entities and do not need further
589     // translation.
590     func.dfg.append_user_stack_map_entries(
591         inlined_inst,
592         call_stack_map
593             .iter()
594             .flat_map(|entries| entries.iter().cloned()),
595     );
596 
597     // Append the callee's stack map to this call. These entries
598     // refer to callee entities and therefore do require
599     // translation into the caller's index space.
600     func.dfg.append_user_stack_map_entries(
601         inlined_inst,
602         callee
603             .dfg
604             .user_stack_map_entries(callee_inst)
605             .iter()
606             .flat_map(|entries| entries.iter())
607             .map(|entry| ir::UserStackMapEntry {
608                 ty: entry.ty,
609                 slot: entity_map.inlined_stack_slot(entry.slot),
610                 offset: entry.offset,
611             }),
612     );
613 }
614 
615 /// Create or update the exception tables for any inlined call instructions:
616 /// when inlining at a `try_call` site, we must forward our exceptional edges
617 /// into each inlined call instruction.
618 fn fixup_inlined_call_exception_tables(
619     allocs: &mut InliningAllocs,
620     func: &mut ir::Function,
621     call_exception_table: ir::ExceptionTable,
622 ) {
623     // Split a block at a `call[_indirect]` instruction, detach the
624     // instruction's results, and alias them to the new block's parameters.
625     let split_block_for_new_try_call = |func: &mut ir::Function, inst: ir::Inst| -> ir::Block {
626         debug_assert!(func.dfg.insts[inst].opcode().is_call());
627         debug_assert!(!func.dfg.insts[inst].opcode().is_terminator());
628 
629         // Split the block.
630         let next_inst = func
631             .layout
632             .next_inst(inst)
633             .expect("inst is not a terminator, should have a successor");
634         let new_block = func.dfg.blocks.add();
635         func.layout.split_block(new_block, next_inst);
636 
637         // `try_call[_indirect]` instructions do not define values themselves;
638         // the normal-return block has parameters for the results. So remove
639         // this instruction's results, create an associated block parameter for
640         // each of them, and alias them to the new block parameter.
641         let old_results = SmallValueVec::from_iter(func.dfg.inst_results(inst).iter().copied());
642         func.dfg.detach_inst_results(inst);
643         for old_result in old_results {
644             let ty = func.dfg.value_type(old_result);
645             let new_block_param = func.dfg.append_block_param(new_block, ty);
646             func.dfg.change_to_alias(old_result, new_block_param);
647         }
648 
649         new_block
650     };
651 
652     // Clone the caller's exception table, updating it for use in the current
653     // `call[_indirect]` instruction as it becomes a `try_call[_indirect]`.
654     let clone_exception_table_for_this_call = |func: &mut ir::Function,
655                                                signature: ir::SigRef,
656                                                new_block: ir::Block|
657      -> ir::ExceptionTable {
658         let mut exception = func.stencil.dfg.exception_tables[call_exception_table]
659             .deep_clone(&mut func.stencil.dfg.value_lists);
660 
661         *exception.signature_mut() = signature;
662 
663         let returns_len = func.dfg.signatures[signature].returns.len();
664         let returns_len = u32::try_from(returns_len).unwrap();
665 
666         *exception.normal_return_mut() = ir::BlockCall::new(
667             new_block,
668             (0..returns_len).map(|i| ir::BlockArg::TryCallRet(i)),
669             &mut func.dfg.value_lists,
670         );
671 
672         func.dfg.exception_tables.push(exception)
673     };
674 
675     for inst in allocs.calls_needing_exception_table_fixup.drain(..) {
676         debug_assert!(func.dfg.insts[inst].opcode().is_call());
677         debug_assert!(!func.dfg.insts[inst].opcode().is_return());
678         match func.dfg.insts[inst] {
679             //     current_block:
680             //         preds...
681             //         rets... = call f(args...)
682             //         succs...
683             //
684             // becomes
685             //
686             //     current_block:
687             //         preds...
688             //         try_call f(args...), new_block(rets...), [call_exception_table...]
689             //     new_block(rets...):
690             //         succs...
691             ir::InstructionData::Call {
692                 opcode: ir::Opcode::Call,
693                 args,
694                 func_ref,
695             } => {
696                 let new_block = split_block_for_new_try_call(func, inst);
697                 let signature = func.dfg.ext_funcs[func_ref].signature;
698                 let exception = clone_exception_table_for_this_call(func, signature, new_block);
699                 func.dfg.insts[inst] = ir::InstructionData::TryCall {
700                     opcode: ir::Opcode::TryCall,
701                     args,
702                     func_ref,
703                     exception,
704                 };
705             }
706 
707             //     current_block:
708             //         preds...
709             //         rets... = call_indirect sig, val(args...)
710             //         succs...
711             //
712             // becomes
713             //
714             //     current_block:
715             //         preds...
716             //         try_call_indirect sig, val(args...), new_block(rets...), [call_exception_table...]
717             //     new_block(rets...):
718             //         succs...
719             ir::InstructionData::CallIndirect {
720                 opcode: ir::Opcode::CallIndirect,
721                 args,
722                 sig_ref,
723             } => {
724                 let new_block = split_block_for_new_try_call(func, inst);
725                 let exception = clone_exception_table_for_this_call(func, sig_ref, new_block);
726                 func.dfg.insts[inst] = ir::InstructionData::TryCallIndirect {
727                     opcode: ir::Opcode::TryCallIndirect,
728                     args,
729                     exception,
730                 };
731             }
732 
733             // For `try_call[_indirect]` instructions, we just need to merge the
734             // exception tables.
735             ir::InstructionData::TryCall {
736                 opcode: ir::Opcode::TryCall,
737                 exception,
738                 ..
739             }
740             | ir::InstructionData::TryCallIndirect {
741                 opcode: ir::Opcode::TryCallIndirect,
742                 exception,
743                 ..
744             } => {
745                 // Construct a new exception table that consists of
746                 // the inlined instruction's exception table match
747                 // sequence, with the inlining site's exception table
748                 // appended. This will ensure that the first-match
749                 // semantics emulates the original behavior of
750                 // matching in the inner frame first.
751                 let sig = func.dfg.exception_tables[exception].signature();
752                 let normal_return = *func.dfg.exception_tables[exception].normal_return();
753                 let exception_data = ExceptionTableData::new(
754                     sig,
755                     normal_return,
756                     func.dfg.exception_tables[exception]
757                         .items()
758                         .chain(func.dfg.exception_tables[call_exception_table].items()),
759                 )
760                 .deep_clone(&mut func.dfg.value_lists);
761 
762                 func.dfg.exception_tables[exception] = exception_data;
763             }
764 
765             otherwise => unreachable!("unknown non-return call instruction: {otherwise:?}"),
766         }
767     }
768 }
769 
770 /// After having created an inlined version of a callee instruction that returns
771 /// in the caller, we need to fix it up so that it doesn't actually return
772 /// (since we are already in the caller's frame) and instead just jumps to the
773 /// control-flow join point.
774 fn fixup_inst_that_returns(
775     allocs: &mut InliningAllocs,
776     func: &mut ir::Function,
777     callee: &ir::Function,
778     entity_map: &EntityMap,
779     call_opcode: ir::Opcode,
780     inlined_inst: ir::Inst,
781     callee_inst: ir::Inst,
782     return_block: ir::Block,
783     call_stack_map: Option<&[ir::UserStackMapEntry]>,
784 ) {
785     debug_assert!(func.dfg.insts[inlined_inst].opcode().is_return());
786     match func.dfg.insts[inlined_inst] {
787         //     return rets...
788         //
789         // becomes
790         //
791         //     jump return_block(rets...)
792         ir::InstructionData::MultiAry {
793             opcode: ir::Opcode::Return,
794             args,
795         } => {
796             let rets = SmallBlockArgVec::from_iter(
797                 args.as_slice(&func.dfg.value_lists)
798                     .iter()
799                     .copied()
800                     .map(|v| v.into()),
801             );
802             func.dfg.replace(inlined_inst).jump(return_block, &rets);
803         }
804 
805         //     return_call f(args...)
806         //
807         // becomes
808         //
809         //     rets... = call f(args...)
810         //     jump return_block(rets...)
811         ir::InstructionData::Call {
812             opcode: ir::Opcode::ReturnCall,
813             args,
814             func_ref,
815         } => {
816             func.dfg.insts[inlined_inst] = ir::InstructionData::Call {
817                 opcode: ir::Opcode::Call,
818                 args,
819                 func_ref,
820             };
821             func.dfg.make_inst_results(inlined_inst, ir::types::INVALID);
822 
823             append_stack_map_entries(
824                 func,
825                 callee,
826                 &entity_map,
827                 call_stack_map,
828                 inlined_inst,
829                 callee_inst,
830             );
831 
832             let rets = SmallBlockArgVec::from_iter(
833                 func.dfg
834                     .inst_results(inlined_inst)
835                     .iter()
836                     .copied()
837                     .map(|v| v.into()),
838             );
839             let mut cursor = FuncCursor::new(func);
840             cursor.goto_after_inst(inlined_inst);
841             cursor.ins().jump(return_block, &rets);
842 
843             if call_opcode == ir::Opcode::TryCall {
844                 allocs
845                     .calls_needing_exception_table_fixup
846                     .push(inlined_inst);
847             }
848         }
849 
850         //     return_call_indirect val(args...)
851         //
852         // becomes
853         //
854         //     rets... = call_indirect val(args...)
855         //     jump return_block(rets...)
856         ir::InstructionData::CallIndirect {
857             opcode: ir::Opcode::ReturnCallIndirect,
858             args,
859             sig_ref,
860         } => {
861             func.dfg.insts[inlined_inst] = ir::InstructionData::CallIndirect {
862                 opcode: ir::Opcode::CallIndirect,
863                 args,
864                 sig_ref,
865             };
866             func.dfg.make_inst_results(inlined_inst, ir::types::INVALID);
867 
868             append_stack_map_entries(
869                 func,
870                 callee,
871                 &entity_map,
872                 call_stack_map,
873                 inlined_inst,
874                 callee_inst,
875             );
876 
877             let rets = SmallBlockArgVec::from_iter(
878                 func.dfg
879                     .inst_results(inlined_inst)
880                     .iter()
881                     .copied()
882                     .map(|v| v.into()),
883             );
884             let mut cursor = FuncCursor::new(func);
885             cursor.goto_after_inst(inlined_inst);
886             cursor.ins().jump(return_block, &rets);
887 
888             if call_opcode == ir::Opcode::TryCall {
889                 allocs
890                     .calls_needing_exception_table_fixup
891                     .push(inlined_inst);
892             }
893         }
894 
895         inst_data => unreachable!(
896             "should have handled all `is_return() == true` instructions above; \
897              got {inst_data:?}"
898         ),
899     }
900 }
901 
902 /// An `InstructionMapper` implementation that remaps a callee instruction's
903 /// entity references to their new indices in the caller function.
904 struct InliningInstRemapper<'a> {
905     allocs: &'a InliningAllocs,
906     func: &'a mut ir::Function,
907     callee: &'a ir::Function,
908     entity_map: &'a EntityMap,
909 }
910 
911 impl<'a> ir::instructions::InstructionMapper for InliningInstRemapper<'a> {
912     fn map_value(&mut self, value: ir::Value) -> ir::Value {
913         self.allocs.get_inlined_value(self.callee, value).expect(
914             "defs come before uses; we should have already inlined all values \
915              used by an instruction",
916         )
917     }
918 
919     fn map_value_list(&mut self, value_list: ir::ValueList) -> ir::ValueList {
920         let mut inlined_list = ir::ValueList::new();
921         for callee_val in value_list.as_slice(&self.callee.dfg.value_lists) {
922             let inlined_val = self.map_value(*callee_val);
923             inlined_list.push(inlined_val, &mut self.func.dfg.value_lists);
924         }
925         inlined_list
926     }
927 
928     fn map_global_value(&mut self, global_value: ir::GlobalValue) -> ir::GlobalValue {
929         self.entity_map.inlined_global_value(global_value)
930     }
931 
932     fn map_jump_table(&mut self, jump_table: ir::JumpTable) -> ir::JumpTable {
933         let inlined_default =
934             self.map_block_call(self.callee.dfg.jump_tables[jump_table].default_block());
935         let inlined_table = self.callee.dfg.jump_tables[jump_table]
936             .as_slice()
937             .iter()
938             .map(|callee_block_call| self.map_block_call(*callee_block_call))
939             .collect::<SmallBlockCallVec>();
940         self.func
941             .dfg
942             .jump_tables
943             .push(ir::JumpTableData::new(inlined_default, &inlined_table))
944     }
945 
946     fn map_exception_table(&mut self, exception_table: ir::ExceptionTable) -> ir::ExceptionTable {
947         let exception_table = &self.callee.dfg.exception_tables[exception_table];
948         let inlined_sig_ref = self.map_sig_ref(exception_table.signature());
949         let inlined_normal_return = self.map_block_call(*exception_table.normal_return());
950         let inlined_table = exception_table
951             .items()
952             .map(|item| match item {
953                 ExceptionTableItem::Tag(tag, block_call) => {
954                     ExceptionTableItem::Tag(tag, self.map_block_call(block_call))
955                 }
956                 ExceptionTableItem::Default(block_call) => {
957                     ExceptionTableItem::Default(self.map_block_call(block_call))
958                 }
959                 ExceptionTableItem::Context(value) => {
960                     ExceptionTableItem::Context(self.map_value(value))
961                 }
962             })
963             .collect::<SmallVec<[_; 8]>>();
964         self.func
965             .dfg
966             .exception_tables
967             .push(ir::ExceptionTableData::new(
968                 inlined_sig_ref,
969                 inlined_normal_return,
970                 inlined_table,
971             ))
972     }
973 
974     fn map_block_call(&mut self, block_call: ir::BlockCall) -> ir::BlockCall {
975         let callee_block = block_call.block(&self.callee.dfg.value_lists);
976         let inlined_block = self.entity_map.inlined_block(callee_block);
977         let args = block_call
978             .args(&self.callee.dfg.value_lists)
979             .map(|arg| match arg {
980                 ir::BlockArg::Value(value) => self.map_value(value).into(),
981                 ir::BlockArg::TryCallRet(_) | ir::BlockArg::TryCallExn(_) => arg,
982             })
983             .collect::<SmallBlockArgVec>();
984         ir::BlockCall::new(inlined_block, args, &mut self.func.dfg.value_lists)
985     }
986 
987     fn map_block(&mut self, block: ir::Block) -> ir::Block {
988         self.entity_map.inlined_block(block)
989     }
990 
991     fn map_func_ref(&mut self, func_ref: ir::FuncRef) -> ir::FuncRef {
992         self.entity_map.inlined_func_ref(func_ref)
993     }
994 
995     fn map_sig_ref(&mut self, sig_ref: ir::SigRef) -> ir::SigRef {
996         self.entity_map.inlined_sig_ref(sig_ref)
997     }
998 
999     fn map_stack_slot(&mut self, stack_slot: ir::StackSlot) -> ir::StackSlot {
1000         self.entity_map.inlined_stack_slot(stack_slot)
1001     }
1002 
1003     fn map_dynamic_stack_slot(
1004         &mut self,
1005         dynamic_stack_slot: ir::DynamicStackSlot,
1006     ) -> ir::DynamicStackSlot {
1007         self.entity_map
1008             .inlined_dynamic_stack_slot(dynamic_stack_slot)
1009     }
1010 
1011     fn map_constant(&mut self, constant: ir::Constant) -> ir::Constant {
1012         self.allocs
1013             .constants
1014             .get(constant)
1015             .and_then(|o| o.expand())
1016             .expect("should have inlined all callee constants")
1017     }
1018 
1019     fn map_immediate(&mut self, immediate: ir::Immediate) -> ir::Immediate {
1020         self.entity_map.inlined_immediate(immediate)
1021     }
1022 }
1023 
1024 /// Inline the callee's layout into the caller's layout.
1025 ///
1026 /// Returns the last inlined block in the layout.
1027 fn inline_block_layout(
1028     func: &mut ir::Function,
1029     call_block: ir::Block,
1030     callee: &ir::Function,
1031     entity_map: &EntityMap,
1032 ) -> ir::Block {
1033     debug_assert!(func.layout.is_block_inserted(call_block));
1034 
1035     // Iterate over callee blocks in layout order, inserting their associated
1036     // inlined block into the caller's layout.
1037     let mut prev_inlined_block = call_block;
1038     let mut next_callee_block = callee.layout.entry_block();
1039     while let Some(callee_block) = next_callee_block {
1040         debug_assert!(func.layout.is_block_inserted(prev_inlined_block));
1041 
1042         let inlined_block = entity_map.inlined_block(callee_block);
1043         func.layout
1044             .insert_block_after(inlined_block, prev_inlined_block);
1045 
1046         prev_inlined_block = inlined_block;
1047         next_callee_block = callee.layout.next_block(callee_block);
1048     }
1049 
1050     debug_assert!(func.layout.is_block_inserted(prev_inlined_block));
1051     prev_inlined_block
1052 }
1053 
1054 /// Split the call instruction's block just after the call instruction to create
1055 /// the point where control-flow joins after the inlined callee "returns".
1056 ///
1057 /// Note that tail calls do not return to the caller and therefore do not have a
1058 /// control-flow join point.
1059 fn split_off_return_block(
1060     func: &mut ir::Function,
1061     call_inst: ir::Inst,
1062     opcode: ir::Opcode,
1063     callee: &ir::Function,
1064 ) -> Option<ir::Block> {
1065     // When the `call_inst` is not a block terminator, we need to split the
1066     // block.
1067     let return_block = func.layout.next_inst(call_inst).map(|next_inst| {
1068         let return_block = func.dfg.blocks.add();
1069         func.layout.split_block(return_block, next_inst);
1070 
1071         // Add block parameters for each return value and alias the call
1072         // instruction's results to them.
1073         let old_results =
1074             SmallValueVec::from_iter(func.dfg.inst_results(call_inst).iter().copied());
1075         debug_assert_eq!(old_results.len(), callee.signature.returns.len());
1076         func.dfg.detach_inst_results(call_inst);
1077         for (abi, old_val) in callee.signature.returns.iter().zip(old_results) {
1078             debug_assert_eq!(abi.value_type, func.dfg.value_type(old_val));
1079             let ret_param = func.dfg.append_block_param(return_block, abi.value_type);
1080             func.dfg.change_to_alias(old_val, ret_param);
1081         }
1082 
1083         return_block
1084     });
1085 
1086     // When the `call_inst` is a block terminator, then it is either a
1087     // `return_call` or a `try_call`:
1088     //
1089     // * For `return_call`s, we don't have a control-flow join point, because
1090     //   the caller permanently transfers control to the callee.
1091     //
1092     // * For `try_call`s, we probably already have a block for the control-flow
1093     //   join point, but it isn't guaranteed: the `try_call` might ignore the
1094     //   call's returns and not forward them to the normal-return block or it
1095     //   might also pass additional arguments. We can only reuse the existing
1096     //   normal-return block when the `try_call` forwards exactly our callee's
1097     //   returns to that block (and therefore that block's parameter types also
1098     //   exactly match the callee's return types). Otherwise, we must create a new
1099     //   return block that forwards to the existing normal-return
1100     //   block. (Elsewhere, at the end of inlining, we will also update any inlined
1101     //   calls to forward any raised exceptions to the caller's exception table,
1102     //   as necessary.)
1103     //
1104     //   Finally, note that reusing the normal-return's target block is just an
1105     //   optimization to emit a simpler CFG when we can, and is not
1106     //   fundamentally required for correctness. We could always insert a
1107     //   temporary block as our control-flow join point that then forwards to
1108     //   the normal-return's target block. However, at the time of writing,
1109     //   Cranelift doesn't currently do any jump-threading or branch
1110     //   simplification in the mid-end, and removing unnecessary blocks in this
1111     //   way can help some subsequent mid-end optimizations. If, in the future,
1112     //   we gain support for jump-threading optimizations in the mid-end, we can
1113     //   come back and simplify the below code a bit to always generate the
1114     //   temporary block, and then rely on the subsequent optimizations to clean
1115     //   everything up.
1116     debug_assert_eq!(
1117         return_block.is_none(),
1118         opcode == ir::Opcode::ReturnCall || opcode == ir::Opcode::TryCall,
1119     );
1120     return_block.or_else(|| match func.dfg.insts[call_inst] {
1121         ir::InstructionData::TryCall {
1122             opcode: ir::Opcode::TryCall,
1123             args: _,
1124             func_ref: _,
1125             exception,
1126         } => {
1127             let normal_return = func.dfg.exception_tables[exception].normal_return();
1128             let normal_return_block = normal_return.block(&func.dfg.value_lists);
1129 
1130             // Check to see if we can reuse the existing normal-return block.
1131             {
1132                 let normal_return_args = normal_return.args(&func.dfg.value_lists);
1133                 if normal_return_args.len() == callee.signature.returns.len()
1134                     && normal_return_args.enumerate().all(|(i, arg)| {
1135                         let i = u32::try_from(i).unwrap();
1136                         arg == ir::BlockArg::TryCallRet(i)
1137                     })
1138                 {
1139                     return Some(normal_return_block);
1140                 }
1141             }
1142 
1143             // Okay, we cannot reuse the normal-return block. Create a new block
1144             // that has the expected block parameter types and have it jump to
1145             // the normal-return block.
1146             let return_block = func.dfg.blocks.add();
1147             func.layout.insert_block(return_block, normal_return_block);
1148 
1149             let return_block_params = callee
1150                 .signature
1151                 .returns
1152                 .iter()
1153                 .map(|abi| func.dfg.append_block_param(return_block, abi.value_type))
1154                 .collect::<SmallValueVec>();
1155 
1156             let normal_return_args = func.dfg.exception_tables[exception]
1157                 .normal_return()
1158                 .args(&func.dfg.value_lists)
1159                 .collect::<SmallBlockArgVec>();
1160             let jump_args = normal_return_args
1161                 .into_iter()
1162                 .map(|arg| match arg {
1163                     ir::BlockArg::Value(value) => ir::BlockArg::Value(value),
1164                     ir::BlockArg::TryCallRet(i) => {
1165                         let i = usize::try_from(i).unwrap();
1166                         ir::BlockArg::Value(return_block_params[i])
1167                     }
1168                     ir::BlockArg::TryCallExn(_) => {
1169                         unreachable!("normal-return edges cannot use exceptional results")
1170                     }
1171                 })
1172                 .collect::<SmallBlockArgVec>();
1173 
1174             let mut cursor = FuncCursor::new(func);
1175             cursor.goto_first_insertion_point(return_block);
1176             cursor.ins().jump(normal_return_block, &jump_args);
1177 
1178             Some(return_block)
1179         }
1180         _ => None,
1181     })
1182 }
1183 
1184 /// Replace the caller's call instruction with a jump to the caller's inlined
1185 /// copy of the callee's entry block.
1186 ///
1187 /// Also associates the callee's parameters with the caller's arguments in our
1188 /// value map.
1189 ///
1190 /// Returns the caller's stack map entries, if any.
1191 fn replace_call_with_jump(
1192     allocs: &mut InliningAllocs,
1193     func: &mut ir::Function,
1194     call_inst: ir::Inst,
1195     callee: &ir::Function,
1196     entity_map: &EntityMap,
1197 ) -> Option<ir::UserStackMapEntryVec> {
1198     trace!("Replacing `call` with `jump`");
1199     trace!(
1200         "  --> call instruction: {call_inst:?}: {}",
1201         func.dfg.display_inst(call_inst)
1202     );
1203 
1204     let callee_entry_block = callee
1205         .layout
1206         .entry_block()
1207         .expect("callee function should have an entry block");
1208     let callee_param_values = callee.dfg.block_params(callee_entry_block);
1209     let caller_arg_values = SmallValueVec::from_iter(func.dfg.inst_args(call_inst).iter().copied());
1210     debug_assert_eq!(callee_param_values.len(), caller_arg_values.len());
1211     debug_assert_eq!(callee_param_values.len(), callee.signature.params.len());
1212     for (abi, (callee_param_value, caller_arg_value)) in callee
1213         .signature
1214         .params
1215         .iter()
1216         .zip(callee_param_values.into_iter().zip(caller_arg_values))
1217     {
1218         debug_assert_eq!(abi.value_type, callee.dfg.value_type(*callee_param_value));
1219         debug_assert_eq!(abi.value_type, func.dfg.value_type(caller_arg_value));
1220         allocs.set_inlined_value(callee, *callee_param_value, caller_arg_value);
1221     }
1222 
1223     // Replace the caller's call instruction with a jump to the caller's inlined
1224     // copy of the callee's entry block.
1225     //
1226     // Note that the call block dominates the inlined entry block (and also all
1227     // other inlined blocks) so we can reference the arguments directly, and do
1228     // not need to add block parameters to the inlined entry block.
1229     let inlined_entry_block = entity_map.inlined_block(callee_entry_block);
1230     func.dfg.replace(call_inst).jump(inlined_entry_block, &[]);
1231     trace!(
1232         "  --> replaced with jump instruction: {call_inst:?}: {}",
1233         func.dfg.display_inst(call_inst)
1234     );
1235 
1236     let stack_map_entries = func.dfg.take_user_stack_map_entries(call_inst);
1237     stack_map_entries
1238 }
1239 
1240 /// Keeps track of mapping callee entities to their associated inlined caller
1241 /// entities.
1242 #[derive(Default)]
1243 struct EntityMap {
1244     // Rather than doing an implicit, demand-based, DCE'ing translation of
1245     // entities, which would require maps from each callee entity to its
1246     // associated caller entity, we copy all entities into the caller, remember
1247     // each entity's initial offset, and then mapping from the callee to the
1248     // inlined caller entity is just adding that initial offset to the callee's
1249     // index. This should be both faster and simpler than the alternative. Most
1250     // of these sets are relatively small, and they rarely have too much dead
1251     // code in practice, so this is a good trade off.
1252     //
1253     // Note that there are a few kinds of entities that are excluded from the
1254     // `EntityMap`, and for which we do actually take the demand-based approach:
1255     // values and value lists being the notable ones.
1256     block_offset: Option<u32>,
1257     global_value_offset: Option<u32>,
1258     sig_ref_offset: Option<u32>,
1259     func_ref_offset: Option<u32>,
1260     stack_slot_offset: Option<u32>,
1261     dynamic_type_offset: Option<u32>,
1262     dynamic_stack_slot_offset: Option<u32>,
1263     immediate_offset: Option<u32>,
1264 }
1265 
1266 impl EntityMap {
1267     fn inlined_block(&self, callee_block: ir::Block) -> ir::Block {
1268         let offset = self
1269             .block_offset
1270             .expect("must create inlined `ir::Block`s before calling `EntityMap::inlined_block`");
1271         ir::Block::from_u32(offset + callee_block.as_u32())
1272     }
1273 
1274     fn iter_inlined_blocks(&self, func: &ir::Function) -> impl Iterator<Item = ir::Block> + use<> {
1275         let start = self.block_offset.expect(
1276             "must create inlined `ir::Block`s before calling `EntityMap::iter_inlined_blocks`",
1277         );
1278 
1279         let end = func.dfg.blocks.len();
1280         let end = u32::try_from(end).unwrap();
1281 
1282         (start..end).map(|i| ir::Block::from_u32(i))
1283     }
1284 
1285     fn inlined_global_value(&self, callee_global_value: ir::GlobalValue) -> ir::GlobalValue {
1286         let offset = self
1287             .global_value_offset
1288             .expect("must create inlined `ir::GlobalValue`s before calling `EntityMap::inlined_global_value`");
1289         ir::GlobalValue::from_u32(offset + callee_global_value.as_u32())
1290     }
1291 
1292     fn inlined_sig_ref(&self, callee_sig_ref: ir::SigRef) -> ir::SigRef {
1293         let offset = self.sig_ref_offset.expect(
1294             "must create inlined `ir::SigRef`s before calling `EntityMap::inlined_sig_ref`",
1295         );
1296         ir::SigRef::from_u32(offset + callee_sig_ref.as_u32())
1297     }
1298 
1299     fn inlined_func_ref(&self, callee_func_ref: ir::FuncRef) -> ir::FuncRef {
1300         let offset = self.func_ref_offset.expect(
1301             "must create inlined `ir::FuncRef`s before calling `EntityMap::inlined_func_ref`",
1302         );
1303         ir::FuncRef::from_u32(offset + callee_func_ref.as_u32())
1304     }
1305 
1306     fn inlined_stack_slot(&self, callee_stack_slot: ir::StackSlot) -> ir::StackSlot {
1307         let offset = self.stack_slot_offset.expect(
1308             "must create inlined `ir::StackSlot`s before calling `EntityMap::inlined_stack_slot`",
1309         );
1310         ir::StackSlot::from_u32(offset + callee_stack_slot.as_u32())
1311     }
1312 
1313     fn inlined_dynamic_type(&self, callee_dynamic_type: ir::DynamicType) -> ir::DynamicType {
1314         let offset = self.dynamic_type_offset.expect(
1315             "must create inlined `ir::DynamicType`s before calling `EntityMap::inlined_dynamic_type`",
1316         );
1317         ir::DynamicType::from_u32(offset + callee_dynamic_type.as_u32())
1318     }
1319 
1320     fn inlined_dynamic_stack_slot(
1321         &self,
1322         callee_dynamic_stack_slot: ir::DynamicStackSlot,
1323     ) -> ir::DynamicStackSlot {
1324         let offset = self.dynamic_stack_slot_offset.expect(
1325             "must create inlined `ir::DynamicStackSlot`s before calling `EntityMap::inlined_dynamic_stack_slot`",
1326         );
1327         ir::DynamicStackSlot::from_u32(offset + callee_dynamic_stack_slot.as_u32())
1328     }
1329 
1330     fn inlined_immediate(&self, callee_immediate: ir::Immediate) -> ir::Immediate {
1331         let offset = self.immediate_offset.expect(
1332             "must create inlined `ir::Immediate`s before calling `EntityMap::inlined_immediate`",
1333         );
1334         ir::Immediate::from_u32(offset + callee_immediate.as_u32())
1335     }
1336 }
1337 
1338 /// Translate all of the callee's various entities into the caller, producing an
1339 /// `EntityMap` that can be used to translate callee entity references into
1340 /// inlined caller entity references.
1341 fn create_entities(
1342     allocs: &mut InliningAllocs,
1343     func: &mut ir::Function,
1344     callee: &ir::Function,
1345 ) -> EntityMap {
1346     let mut entity_map = EntityMap::default();
1347 
1348     entity_map.block_offset = Some(create_blocks(allocs, func, callee));
1349     entity_map.global_value_offset = Some(create_global_values(func, callee));
1350     entity_map.sig_ref_offset = Some(create_sig_refs(func, callee));
1351     create_user_external_name_refs(allocs, func, callee);
1352     entity_map.func_ref_offset = Some(create_func_refs(allocs, func, callee, &entity_map));
1353     entity_map.stack_slot_offset = Some(create_stack_slots(func, callee));
1354     entity_map.dynamic_type_offset = Some(create_dynamic_types(func, callee, &entity_map));
1355     entity_map.dynamic_stack_slot_offset =
1356         Some(create_dynamic_stack_slots(func, callee, &entity_map));
1357     entity_map.immediate_offset = Some(create_immediates(func, callee));
1358 
1359     // `ir::ConstantData` is deduplicated, so we cannot use our offset scheme
1360     // for `ir::Constant`s. Nonetheless, we still insert them into the caller
1361     // now, at the same time as the rest of our entities.
1362     create_constants(allocs, func, callee);
1363 
1364     entity_map
1365 }
1366 
1367 /// Create inlined blocks in the caller for every block in the callee.
1368 fn create_blocks(
1369     allocs: &mut InliningAllocs,
1370     func: &mut ir::Function,
1371     callee: &ir::Function,
1372 ) -> u32 {
1373     let offset = func.dfg.blocks.len();
1374     let offset = u32::try_from(offset).unwrap();
1375 
1376     func.dfg.blocks.reserve(callee.dfg.blocks.len());
1377     for callee_block in callee.dfg.blocks.iter() {
1378         let caller_block = func.dfg.blocks.add();
1379         trace!("Callee {callee_block:?} = inlined {caller_block:?}");
1380 
1381         if callee.layout.is_cold(callee_block) {
1382             func.layout.set_cold(caller_block);
1383         }
1384 
1385         // Note: the entry block does not need parameters because the only
1386         // predecessor is the call block and we associate the callee's
1387         // parameters with the caller's arguments directly.
1388         if callee.layout.entry_block() != Some(callee_block) {
1389             for callee_param in callee.dfg.blocks[callee_block].params(&callee.dfg.value_lists) {
1390                 let ty = callee.dfg.value_type(*callee_param);
1391                 let caller_param = func.dfg.append_block_param(caller_block, ty);
1392 
1393                 allocs.set_inlined_value(callee, *callee_param, caller_param);
1394             }
1395         }
1396     }
1397 
1398     offset
1399 }
1400 
1401 /// Copy and translate global values from the callee into the caller.
1402 fn create_global_values(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1403     let gv_offset = func.global_values.len();
1404     let gv_offset = u32::try_from(gv_offset).unwrap();
1405 
1406     func.global_values.reserve(callee.global_values.len());
1407     for gv in callee.global_values.values() {
1408         func.global_values.push(match gv {
1409             // These kinds of global values reference other global values, so we
1410             // need to fixup that reference.
1411             ir::GlobalValueData::Load {
1412                 base,
1413                 offset,
1414                 global_type,
1415                 flags,
1416             } => ir::GlobalValueData::Load {
1417                 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset),
1418                 offset: *offset,
1419                 global_type: *global_type,
1420                 flags: *flags,
1421             },
1422             ir::GlobalValueData::IAddImm {
1423                 base,
1424                 offset,
1425                 global_type,
1426             } => ir::GlobalValueData::IAddImm {
1427                 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset),
1428                 offset: *offset,
1429                 global_type: *global_type,
1430             },
1431 
1432             // These kinds of global values do not reference other global
1433             // values, so we can just clone them.
1434             ir::GlobalValueData::VMContext
1435             | ir::GlobalValueData::Symbol { .. }
1436             | ir::GlobalValueData::DynScaleTargetConst { .. } => gv.clone(),
1437         });
1438     }
1439 
1440     gv_offset
1441 }
1442 
1443 /// Copy `ir::SigRef`s from the callee into the caller.
1444 fn create_sig_refs(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1445     let offset = func.dfg.signatures.len();
1446     let offset = u32::try_from(offset).unwrap();
1447 
1448     func.dfg.signatures.reserve(callee.dfg.signatures.len());
1449     for sig in callee.dfg.signatures.values() {
1450         func.dfg.signatures.push(sig.clone());
1451     }
1452 
1453     offset
1454 }
1455 
1456 fn create_user_external_name_refs(
1457     allocs: &mut InliningAllocs,
1458     func: &mut ir::Function,
1459     callee: &ir::Function,
1460 ) {
1461     for (callee_named_func_ref, name) in callee.params.user_named_funcs().iter() {
1462         let caller_named_func_ref = func.declare_imported_user_function(name.clone());
1463         allocs.user_external_name_refs[callee_named_func_ref] = Some(caller_named_func_ref).into();
1464     }
1465 }
1466 
1467 /// Translate `ir::FuncRef`s from the callee into the caller.
1468 fn create_func_refs(
1469     allocs: &InliningAllocs,
1470     func: &mut ir::Function,
1471     callee: &ir::Function,
1472     entity_map: &EntityMap,
1473 ) -> u32 {
1474     let offset = func.dfg.ext_funcs.len();
1475     let offset = u32::try_from(offset).unwrap();
1476 
1477     func.dfg.ext_funcs.reserve(callee.dfg.ext_funcs.len());
1478     for ir::ExtFuncData {
1479         name,
1480         signature,
1481         colocated,
1482         patchable,
1483     } in callee.dfg.ext_funcs.values()
1484     {
1485         func.dfg.ext_funcs.push(ir::ExtFuncData {
1486             name: match name {
1487                 ir::ExternalName::User(name_ref) => {
1488                     ir::ExternalName::User(allocs.user_external_name_refs[*name_ref].expect(
1489                         "should have translated all `ir::UserExternalNameRef`s before translating \
1490                          `ir::FuncRef`s",
1491                     ))
1492                 }
1493                 ir::ExternalName::TestCase(_)
1494                 | ir::ExternalName::LibCall(_)
1495                 | ir::ExternalName::KnownSymbol(_) => name.clone(),
1496             },
1497             signature: entity_map.inlined_sig_ref(*signature),
1498             colocated: *colocated,
1499             patchable: *patchable,
1500         });
1501     }
1502 
1503     offset
1504 }
1505 
1506 /// Copy stack slots from the callee into the caller.
1507 fn create_stack_slots(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1508     let offset = func.sized_stack_slots.len();
1509     let offset = u32::try_from(offset).unwrap();
1510 
1511     func.sized_stack_slots
1512         .reserve(callee.sized_stack_slots.len());
1513     for slot in callee.sized_stack_slots.values() {
1514         func.sized_stack_slots.push(slot.clone());
1515     }
1516 
1517     offset
1518 }
1519 
1520 /// Copy dynamic types from the callee into the caller.
1521 fn create_dynamic_types(
1522     func: &mut ir::Function,
1523     callee: &ir::Function,
1524     entity_map: &EntityMap,
1525 ) -> u32 {
1526     let offset = func.dynamic_stack_slots.len();
1527     let offset = u32::try_from(offset).unwrap();
1528 
1529     func.dfg
1530         .dynamic_types
1531         .reserve(callee.dfg.dynamic_types.len());
1532     for ir::DynamicTypeData {
1533         base_vector_ty,
1534         dynamic_scale,
1535     } in callee.dfg.dynamic_types.values()
1536     {
1537         func.dfg.dynamic_types.push(ir::DynamicTypeData {
1538             base_vector_ty: *base_vector_ty,
1539             dynamic_scale: entity_map.inlined_global_value(*dynamic_scale),
1540         });
1541     }
1542 
1543     offset
1544 }
1545 
1546 /// Copy dynamic stack slots from the callee into the caller.
1547 fn create_dynamic_stack_slots(
1548     func: &mut ir::Function,
1549     callee: &ir::Function,
1550     entity_map: &EntityMap,
1551 ) -> u32 {
1552     let offset = func.dynamic_stack_slots.len();
1553     let offset = u32::try_from(offset).unwrap();
1554 
1555     func.dynamic_stack_slots
1556         .reserve(callee.dynamic_stack_slots.len());
1557     for ir::DynamicStackSlotData { kind, dyn_ty } in callee.dynamic_stack_slots.values() {
1558         func.dynamic_stack_slots.push(ir::DynamicStackSlotData {
1559             kind: *kind,
1560             dyn_ty: entity_map.inlined_dynamic_type(*dyn_ty),
1561         });
1562     }
1563 
1564     offset
1565 }
1566 
1567 /// Copy immediates from the callee into the caller.
1568 fn create_immediates(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1569     let offset = func.dfg.immediates.len();
1570     let offset = u32::try_from(offset).unwrap();
1571 
1572     func.dfg.immediates.reserve(callee.dfg.immediates.len());
1573     for imm in callee.dfg.immediates.values() {
1574         func.dfg.immediates.push(imm.clone());
1575     }
1576 
1577     offset
1578 }
1579 
1580 /// Copy constants from the callee into the caller.
1581 fn create_constants(allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function) {
1582     for (callee_constant, data) in callee.dfg.constants.iter() {
1583         let inlined_constant = func.dfg.constants.insert(data.clone());
1584         allocs.constants[*callee_constant] = Some(inlined_constant).into();
1585     }
1586 }
1587