1 //! Function inlining infrastructure.
2 //!
3 //! This module provides "inlining as a library" to Cranelift users; it does
4 //! _not_ provide a complete, off-the-shelf inlining solution. Cranelift's
5 //! compilation context is per-function and does not encompass the full call
6 //! graph. It does not know which functions are hot and which are cold, which
7 //! have been marked the equivalent of `#[inline(never)]`, etc... Only the
8 //! Cranelift user can understand these aspects of the full compilation
9 //! pipeline, and these things can be very different between (say) Wasmtime and
10 //! `cg_clif`. Therefore, this module does not attempt to define hueristics for
11 //! when inlining a particular call is likely beneficial. This module only
12 //! provides hooks for the Cranelift user to define whether a given call should
13 //! be inlined or not, and the mechanics to inline a callee into a particular
14 //! call site when directed to do so by the Cranelift user.
15 //!
16 //! The top-level inlining entry point during Cranelift compilation is
17 //! [`Context::inline`][crate::Context::inline]. It takes an [`Inline`] trait
18 //! implementation, which is authored by the Cranelift user and directs
19 //! Cranelift whether to inline a particular call, and, when inlining, gives
20 //! Cranelift the body of the callee that is to be inlined.
21 
22 use crate::cursor::{Cursor as _, FuncCursor};
23 use crate::ir::{self, DebugTag, ExceptionTableData, ExceptionTableItem, InstBuilder as _};
24 use crate::result::CodegenResult;
25 use crate::trace;
26 use crate::traversals::Dfs;
27 use alloc::borrow::Cow;
28 use alloc::vec::Vec;
29 use cranelift_entity::{SecondaryMap, packed_option::PackedOption};
30 use smallvec::SmallVec;
31 
32 type SmallValueVec = SmallVec<[ir::Value; 8]>;
33 type SmallBlockArgVec = SmallVec<[ir::BlockArg; 8]>;
34 type SmallBlockCallVec = SmallVec<[ir::BlockCall; 8]>;
35 
36 /// A command directing Cranelift whether or not to inline a particular call.
37 pub enum InlineCommand<'a> {
38     /// Keep the call as-is, out-of-line, and do not inline the callee.
39     KeepCall,
40 
41     /// Inline the call, using this function as the body of the callee.
42     ///
43     /// It is the `Inline` implementor's responsibility to ensure that this
44     /// function is the correct callee. Providing the wrong function may result
45     /// in panics during compilation or incorrect runtime behavior.
46     Inline {
47         /// The callee function's body.
48         callee: Cow<'a, ir::Function>,
49         /// Whether to visit any function calls within the callee body after
50         /// inlining and consider them for further inlining.
51         visit_callee: bool,
52     },
53 }
54 
55 /// A trait for directing Cranelift whether to inline a particular call or not.
56 ///
57 /// Used in combination with the [`Context::inline`][crate::Context::inline]
58 /// method.
59 pub trait Inline {
60     /// A hook invoked for each direct call instruction in a function, whose
61     /// result determines whether Cranelift should inline a given call.
62     ///
63     /// The Cranelift user is responsible for defining their own hueristics and
64     /// deciding whether inlining the call is beneficial.
65     ///
66     /// When returning a function and directing Cranelift to inline its body
67     /// into the call site, the `Inline` implementer must ensure the following:
68     ///
69     /// * The returned function's signature exactly matches the `callee`
70     ///   `FuncRef`'s signature.
71     ///
72     /// * The returned function must be legalized.
73     ///
74     /// * The returned function must be valid (i.e. it must pass the CLIF
75     ///   verifier).
76     ///
77     /// * The returned function is a correct and valid implementation of the
78     ///   `callee` according to your language's semantics.
79     ///
80     /// Failure to uphold these invariants may result in panics during
81     /// compilation or incorrect runtime behavior in the generated code.
82     fn inline(
83         &mut self,
84         caller: &ir::Function,
85         call_inst: ir::Inst,
86         call_opcode: ir::Opcode,
87         callee: ir::FuncRef,
88         call_args: &[ir::Value],
89     ) -> InlineCommand<'_>;
90 }
91 
92 impl<'a, T> Inline for &'a mut T
93 where
94     T: Inline,
95 {
96     fn inline(
97         &mut self,
98         caller: &ir::Function,
99         inst: ir::Inst,
100         opcode: ir::Opcode,
101         callee: ir::FuncRef,
102         args: &[ir::Value],
103     ) -> InlineCommand<'_> {
104         (*self).inline(caller, inst, opcode, callee, args)
105     }
106 }
107 
108 /// Walk the given function, invoke the `Inline` implementation for each call
109 /// instruction, and inline the callee when directed to do so.
110 ///
111 /// Returns whether any call was inlined.
112 pub(crate) fn do_inlining(
113     func: &mut ir::Function,
114     mut inliner: impl Inline,
115 ) -> CodegenResult<bool> {
116     trace!("function {} before inlining: {}", func.name, func);
117 
118     let mut inlined_any = false;
119     let mut allocs = InliningAllocs::default();
120 
121     let mut cursor = FuncCursor::new(func);
122     'block_loop: while let Some(block) = cursor.next_block() {
123         // Always keep track of our previous cursor position. Assuming that the
124         // current position is a function call that we will inline, then the
125         // previous position is just before the inlined callee function. After
126         // inlining a call, the Cranelift user can decide whether to consider
127         // any function calls in the inlined callee for further inlining or
128         // not. When they do, then we back up to this previous cursor position
129         // so that our traversal will then continue over the inlined body.
130         let mut prev_pos;
131 
132         while let Some(inst) = {
133             prev_pos = cursor.position();
134             cursor.next_inst()
135         } {
136             // Make sure that `block` is always `inst`'s block, even with all of
137             // our cursor-position-updating and block-splitting-during-inlining
138             // shenanigans below.
139             debug_assert_eq!(Some(block), cursor.func.layout.inst_block(inst));
140 
141             match cursor.func.dfg.insts[inst] {
142                 ir::InstructionData::Call {
143                     opcode: opcode @ ir::Opcode::Call | opcode @ ir::Opcode::ReturnCall,
144                     args: _,
145                     func_ref,
146                 } => {
147                     trace!(
148                         "considering call site for inlining: {inst}: {}",
149                         cursor.func.dfg.display_inst(inst),
150                     );
151                     let args = cursor.func.dfg.inst_args(inst);
152                     match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
153                         InlineCommand::KeepCall => {
154                             trace!("  --> keeping call");
155                         }
156                         InlineCommand::Inline {
157                             callee,
158                             visit_callee,
159                         } => {
160                             let last_inlined_block = inline_one(
161                                 &mut allocs,
162                                 cursor.func,
163                                 func_ref,
164                                 block,
165                                 inst,
166                                 opcode,
167                                 &callee,
168                                 None,
169                             );
170                             inlined_any = true;
171                             if visit_callee {
172                                 cursor.set_position(prev_pos);
173                             } else {
174                                 // Arrange it so that the `next_block()` loop
175                                 // will continue to the next block that is not
176                                 // associated with the just-inlined callee.
177                                 cursor.goto_bottom(last_inlined_block);
178                                 continue 'block_loop;
179                             }
180                         }
181                     }
182                 }
183                 ir::InstructionData::TryCall {
184                     opcode: opcode @ ir::Opcode::TryCall,
185                     args: _,
186                     func_ref,
187                     exception,
188                 } => {
189                     trace!(
190                         "considering call site for inlining: {inst}: {}",
191                         cursor.func.dfg.display_inst(inst),
192                     );
193                     let args = cursor.func.dfg.inst_args(inst);
194                     match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
195                         InlineCommand::KeepCall => {
196                             trace!("  --> keeping call");
197                         }
198                         InlineCommand::Inline {
199                             callee,
200                             visit_callee,
201                         } => {
202                             let last_inlined_block = inline_one(
203                                 &mut allocs,
204                                 cursor.func,
205                                 func_ref,
206                                 block,
207                                 inst,
208                                 opcode,
209                                 &callee,
210                                 Some(exception),
211                             );
212                             inlined_any = true;
213                             if visit_callee {
214                                 cursor.set_position(prev_pos);
215                             } else {
216                                 // Arrange it so that the `next_block()` loop
217                                 // will continue to the next block that is not
218                                 // associated with the just-inlined callee.
219                                 cursor.goto_bottom(last_inlined_block);
220                                 continue 'block_loop;
221                             }
222                         }
223                     }
224                 }
225                 ir::InstructionData::CallIndirect { .. }
226                 | ir::InstructionData::TryCallIndirect { .. } => {
227                     // Can't inline indirect calls; need to have some earlier
228                     // pass rewrite them into direct calls first, when possible.
229                 }
230                 ir::InstructionData::Call {
231                     opcode: ir::Opcode::PatchableCall,
232                     ..
233                 } => {
234                     // Can't inline patchable calls; they need to
235                     // remain patchable and inlining the whole body is
236                     // decidedly *not* patchable!
237                 }
238                 _ => {
239                     debug_assert!(
240                         !cursor.func.dfg.insts[inst].opcode().is_call(),
241                         "should have matched all call instructions, but found: {inst}: {}",
242                         cursor.func.dfg.display_inst(inst),
243                     );
244                 }
245             }
246         }
247     }
248 
249     if inlined_any {
250         trace!("function {} after inlining: {}", func.name, func);
251     } else {
252         trace!("function {} did not have any callees inlined", func.name);
253     }
254 
255     Ok(inlined_any)
256 }
257 
258 #[derive(Default)]
259 struct InliningAllocs {
260     /// Map from callee value to inlined caller value.
261     values: SecondaryMap<ir::Value, PackedOption<ir::Value>>,
262 
263     /// Map from callee constant to inlined caller constant.
264     ///
265     /// Not in `EntityMap` because these are hash-consed inside the
266     /// `ir::Function`.
267     constants: SecondaryMap<ir::Constant, PackedOption<ir::Constant>>,
268 
269     /// Map from callee to inlined caller external name refs.
270     ///
271     /// Not in `EntityMap` because these are hash-consed inside the
272     /// `ir::Function`.
273     user_external_name_refs:
274         SecondaryMap<ir::UserExternalNameRef, PackedOption<ir::UserExternalNameRef>>,
275 
276     /// The set of _caller_ inlined call instructions that need exception table
277     /// fixups at the end of inlining.
278     ///
279     /// This includes all kinds of non-returning calls, not just the literal
280     /// `call` instruction: `call_indirect`, `try_call`, `try_call_indirect`,
281     /// etc... However, it does not include `return_call` and
282     /// `return_call_indirect` instructions because the caller cannot catch
283     /// exceptions that those calls throw because the caller is no longer on the
284     /// stack as soon as they are executed.
285     ///
286     /// Note: this is a simple `Vec`, and not an `EntitySet`, because it is very
287     /// sparse: most of the caller's instructions are not inlined call
288     /// instructions. Additionally, we require deterministic iteration order and
289     /// do not require set-membership testing, so a hash set is not a good
290     /// choice either.
291     calls_needing_exception_table_fixup: Vec<ir::Inst>,
292 }
293 
294 impl InliningAllocs {
295     fn reset(&mut self, callee: &ir::Function) {
296         let InliningAllocs {
297             values,
298             constants,
299             user_external_name_refs,
300             calls_needing_exception_table_fixup,
301         } = self;
302 
303         values.clear();
304         values.resize(callee.dfg.len_values());
305 
306         constants.clear();
307         constants.resize(callee.dfg.constants.len());
308 
309         user_external_name_refs.clear();
310         user_external_name_refs.resize(callee.params.user_named_funcs().len());
311 
312         // Note: We do not reserve capacity for
313         // `calls_needing_exception_table_fixup` because it is a sparse set and
314         // we don't know how large it needs to be ahead of time.
315         calls_needing_exception_table_fixup.clear();
316     }
317 
318     fn set_inlined_value(
319         &mut self,
320         callee: &ir::Function,
321         callee_val: ir::Value,
322         inlined_val: ir::Value,
323     ) {
324         trace!("  --> callee {callee_val:?} = inlined {inlined_val:?}");
325         debug_assert!(self.values[callee_val].is_none());
326         let resolved_callee_val = callee.dfg.resolve_aliases(callee_val);
327         debug_assert!(self.values[resolved_callee_val].is_none());
328         self.values[resolved_callee_val] = Some(inlined_val).into();
329     }
330 
331     fn get_inlined_value(&self, callee: &ir::Function, callee_val: ir::Value) -> Option<ir::Value> {
332         let resolved_callee_val = callee.dfg.resolve_aliases(callee_val);
333         self.values[resolved_callee_val].expand()
334     }
335 }
336 
337 /// Inline one particular function call.
338 ///
339 /// Returns the last inlined block in the layout.
340 fn inline_one(
341     allocs: &mut InliningAllocs,
342     func: &mut ir::Function,
343     callee_func_ref: ir::FuncRef,
344     call_block: ir::Block,
345     call_inst: ir::Inst,
346     call_opcode: ir::Opcode,
347     callee: &ir::Function,
348     call_exception_table: Option<ir::ExceptionTable>,
349 ) -> ir::Block {
350     trace!(
351         "Inlining call {call_inst:?}: {}\n\
352          with callee = {callee:?}",
353         func.dfg.display_inst(call_inst)
354     );
355 
356     // Type check callee signature.
357     let expected_callee_sig = func.dfg.ext_funcs[callee_func_ref].signature;
358     let expected_callee_sig = &func.dfg.signatures[expected_callee_sig];
359     assert_eq!(expected_callee_sig, &callee.signature);
360 
361     allocs.reset(callee);
362 
363     // First, append various callee entity arenas to the end of the caller's
364     // entity arenas.
365     let entity_map = create_entities(allocs, func, callee);
366 
367     // Inlined prologue: split the call instruction's block at the point of the
368     // call and replace the call with a jump.
369     let return_block = split_off_return_block(func, call_inst, call_opcode, callee);
370     let call_stack_map = replace_call_with_jump(allocs, func, call_inst, callee, &entity_map);
371 
372     // Prepare for translating the actual instructions by inserting the inlined
373     // blocks into the caller's layout in the same order that they appear in the
374     // callee.
375     let mut last_inlined_block = inline_block_layout(func, call_block, callee, &entity_map);
376 
377     // Get a copy of debug tags on the call instruction; these are
378     // prepended to debug tags on inlined instructions. Remove them
379     // from the call itself as it will be rewritten to a jump (which
380     // cannot have tags).
381     let call_debug_tags = func.debug_tags.get(call_inst).to_vec();
382     func.debug_tags.set(call_inst, []);
383 
384     // Translate each instruction from the callee into the caller,
385     // appending them to their associated block in the caller.
386     //
387     // Note that we iterate over the callee with a pre-order traversal so that
388     // we see value defs before uses.
389     for callee_block in Dfs::new().pre_order_iter(callee) {
390         let inlined_block = entity_map.inlined_block(callee_block);
391         trace!(
392             "Processing instructions in callee block {callee_block:?} (inlined block {inlined_block:?}"
393         );
394 
395         let mut next_callee_inst = callee.layout.first_inst(callee_block);
396         while let Some(callee_inst) = next_callee_inst {
397             trace!(
398                 "Processing callee instruction {callee_inst:?}: {}",
399                 callee.dfg.display_inst(callee_inst)
400             );
401 
402             assert_ne!(
403                 callee.dfg.insts[callee_inst].opcode(),
404                 ir::Opcode::GlobalValue,
405                 "callee must already be legalized, we shouldn't see any `global_value` \
406                  instructions when inlining; found {callee_inst:?}: {}",
407                 callee.dfg.display_inst(callee_inst)
408             );
409 
410             // Remap the callee instruction's entities and insert it into the
411             // caller's DFG.
412             let inlined_inst_data = callee.dfg.insts[callee_inst].map(InliningInstRemapper {
413                 allocs: &allocs,
414                 func,
415                 callee,
416                 entity_map: &entity_map,
417             });
418             let inlined_inst = func.dfg.make_inst(inlined_inst_data);
419             func.layout.append_inst(inlined_inst, inlined_block);
420 
421             // Copy over debug tags, translating referenced entities
422             // as appropriate.
423             let debug_tags = callee.debug_tags.get(callee_inst);
424             // If there are tags on the inlined instruction, we always
425             // add tags, and we prepend any tags from the call
426             // instruction; but we don't add tags if only the callsite
427             // had them (this would otherwise mean that every single
428             // instruction in an inlined function body would get
429             // tags).
430             if !debug_tags.is_empty() {
431                 let tags = call_debug_tags
432                     .iter()
433                     .cloned()
434                     .chain(debug_tags.iter().map(|tag| match *tag {
435                         DebugTag::User(value) => DebugTag::User(value),
436                         DebugTag::StackSlot(slot) => {
437                             DebugTag::StackSlot(entity_map.inlined_stack_slot(slot))
438                         }
439                     }))
440                     .collect::<SmallVec<[_; 4]>>();
441                 func.debug_tags.set(inlined_inst, tags);
442             }
443 
444             let opcode = callee.dfg.insts[callee_inst].opcode();
445             if opcode.is_return() {
446                 // Instructions that return do not define any values, so we
447                 // don't need to worry about that, but we do need to fix them up
448                 // so that they return by jumping to our control-flow join
449                 // block, rather than returning from the caller.
450                 if let Some(return_block) = return_block {
451                     fixup_inst_that_returns(
452                         allocs,
453                         func,
454                         callee,
455                         &entity_map,
456                         call_opcode,
457                         inlined_inst,
458                         callee_inst,
459                         return_block,
460                         call_stack_map.as_ref().map(|es| &**es),
461                     );
462                 } else {
463                     // If we are inlining a callee that was invoked via
464                     // `return_call`, we leave inlined return instructions
465                     // as-is: there is no logical caller frame on the stack to
466                     // continue to.
467                     debug_assert_eq!(call_opcode, ir::Opcode::ReturnCall);
468                 }
469             } else {
470                 // Make the instruction's result values.
471                 let ctrl_typevar = callee.dfg.ctrl_typevar(callee_inst);
472                 func.dfg.make_inst_results(inlined_inst, ctrl_typevar);
473 
474                 // Update the value map for this instruction's defs.
475                 let callee_results = callee.dfg.inst_results(callee_inst);
476                 let inlined_results = func.dfg.inst_results(inlined_inst);
477                 debug_assert_eq!(callee_results.len(), inlined_results.len());
478                 for (callee_val, inlined_val) in callee_results.iter().zip(inlined_results) {
479                     allocs.set_inlined_value(callee, *callee_val, *inlined_val);
480                 }
481 
482                 if opcode.is_call() {
483                     append_stack_map_entries(
484                         func,
485                         callee,
486                         &entity_map,
487                         call_stack_map.as_deref(),
488                         inlined_inst,
489                         callee_inst,
490                     );
491 
492                     // When we are inlining a `try_call` call site, we need to merge
493                     // the call site's exception table into the inlined calls'
494                     // exception tables. This can involve rewriting regular `call`s
495                     // into `try_call`s, which requires mutating the CFG because
496                     // `try_call` is a block terminator. However, we can't mutate
497                     // the CFG in the middle of this traversal because we rely on
498                     // the existence of a one-to-one mapping between the callee
499                     // layout and the inlined layout. Instead, we record the set of
500                     // inlined call instructions that will need fixing up, and
501                     // perform that possibly-CFG-mutating exception table merging in
502                     // a follow up pass, when we no longer rely on that one-to-one
503                     // layout mapping.
504                     debug_assert_eq!(
505                         call_opcode == ir::Opcode::TryCall,
506                         call_exception_table.is_some()
507                     );
508                     // Note that we do not fix up patchable calls
509                     // inlined at a try-call to a try-call, because
510                     // the "patchable ABI" does not support catching
511                     // exceptions. This does mean that we cannot have
512                     // an exception-throw propagate out of a
513                     // breakpoint when we use patchable calls to set
514                     // up breakpoints, but we don't expect that to
515                     // occur.
516                     //
517                     // FIXME: consider making patchability an aspect
518                     // of any call; then we can remove this special
519                     // case.
520                     if call_opcode == ir::Opcode::TryCall && opcode != ir::Opcode::PatchableCall {
521                         allocs
522                             .calls_needing_exception_table_fixup
523                             .push(inlined_inst);
524                     }
525                 }
526             }
527 
528             trace!(
529                 "  --> inserted inlined instruction {inlined_inst:?}: {}",
530                 func.dfg.display_inst(inlined_inst)
531             );
532 
533             next_callee_inst = callee.layout.next_inst(callee_inst);
534         }
535     }
536 
537     // We copied *all* callee blocks into the caller's layout, but only copied
538     // the callee instructions in *reachable* callee blocks into the caller's
539     // associated blocks. Therefore, any *unreachable* blocks are empty in the
540     // caller, which is invalid CLIF because all blocks must end in a
541     // terminator, so do a quick pass over the inlined blocks and remove any
542     // empty blocks from the caller's layout.
543     for block in entity_map.iter_inlined_blocks(func) {
544         if func.layout.is_block_inserted(block) && func.layout.first_inst(block).is_none() {
545             log::trace!("removing unreachable inlined block from layout: {block}");
546 
547             // If the block being removed is our last-inlined block, then back
548             // it up to the previous block in the layout, which will be the new
549             // last-inlined block after this one's removal.
550             if block == last_inlined_block {
551                 last_inlined_block = func.layout.prev_block(last_inlined_block).expect(
552                     "there will always at least be the block that contained the call we are \
553                      inlining",
554                 );
555             }
556 
557             func.layout.remove_block(block);
558         }
559     }
560 
561     // Final step: fixup the exception tables of any inlined calls when we are
562     // inlining a `try_call` site.
563     //
564     // Subtly, this requires rewriting non-catching `call[_indirect]`
565     // instructions into `try_call[_indirect]` instructions so that exceptions
566     // that unwound through the original callee frame and were caught by the
567     // caller's `try_call` do not unwind past this inlined frame. And turning a
568     // `call` into a `try_call` mutates the CFG, breaking our one-to-one mapping
569     // between callee blocks and inlined blocks, so we delay these fixups to
570     // this final step, when we no longer rely on that mapping.
571     debug_assert!(
572         allocs.calls_needing_exception_table_fixup.is_empty() || call_exception_table.is_some()
573     );
574     debug_assert_eq!(
575         call_opcode == ir::Opcode::TryCall,
576         call_exception_table.is_some()
577     );
578     if let Some(call_exception_table) = call_exception_table {
579         fixup_inlined_call_exception_tables(allocs, func, call_exception_table);
580     }
581 
582     debug_assert!(
583         func.layout.is_block_inserted(last_inlined_block),
584         "last_inlined_block={last_inlined_block} should be inserted in the layout"
585     );
586     last_inlined_block
587 }
588 
589 /// Append stack map entries from the caller and callee to the given inlined
590 /// instruction.
591 fn append_stack_map_entries(
592     func: &mut ir::Function,
593     callee: &ir::Function,
594     entity_map: &EntityMap,
595     call_stack_map: Option<&[ir::UserStackMapEntry]>,
596     inlined_inst: ir::Inst,
597     callee_inst: ir::Inst,
598 ) {
599     // Add the caller's stack map to this call. These entries
600     // already refer to caller entities and do not need further
601     // translation.
602     func.dfg.append_user_stack_map_entries(
603         inlined_inst,
604         call_stack_map
605             .iter()
606             .flat_map(|entries| entries.iter().cloned()),
607     );
608 
609     // Append the callee's stack map to this call. These entries
610     // refer to callee entities and therefore do require
611     // translation into the caller's index space.
612     func.dfg.append_user_stack_map_entries(
613         inlined_inst,
614         callee
615             .dfg
616             .user_stack_map_entries(callee_inst)
617             .iter()
618             .flat_map(|entries| entries.iter())
619             .map(|entry| ir::UserStackMapEntry {
620                 ty: entry.ty,
621                 slot: entity_map.inlined_stack_slot(entry.slot),
622                 offset: entry.offset,
623             }),
624     );
625 }
626 
627 /// Create or update the exception tables for any inlined call instructions:
628 /// when inlining at a `try_call` site, we must forward our exceptional edges
629 /// into each inlined call instruction.
630 fn fixup_inlined_call_exception_tables(
631     allocs: &mut InliningAllocs,
632     func: &mut ir::Function,
633     call_exception_table: ir::ExceptionTable,
634 ) {
635     // Split a block at a `call[_indirect]` instruction, detach the
636     // instruction's results, and alias them to the new block's parameters.
637     let split_block_for_new_try_call = |func: &mut ir::Function, inst: ir::Inst| -> ir::Block {
638         debug_assert!(func.dfg.insts[inst].opcode().is_call());
639         debug_assert!(!func.dfg.insts[inst].opcode().is_terminator());
640 
641         // Split the block.
642         let next_inst = func
643             .layout
644             .next_inst(inst)
645             .expect("inst is not a terminator, should have a successor");
646         let new_block = func.dfg.blocks.add();
647         func.layout.split_block(new_block, next_inst);
648 
649         // `try_call[_indirect]` instructions do not define values themselves;
650         // the normal-return block has parameters for the results. So remove
651         // this instruction's results, create an associated block parameter for
652         // each of them, and alias them to the new block parameter.
653         let old_results = SmallValueVec::from_iter(func.dfg.inst_results(inst).iter().copied());
654         func.dfg.detach_inst_results(inst);
655         for old_result in old_results {
656             let ty = func.dfg.value_type(old_result);
657             let new_block_param = func.dfg.append_block_param(new_block, ty);
658             func.dfg.change_to_alias(old_result, new_block_param);
659         }
660 
661         new_block
662     };
663 
664     // Clone the caller's exception table, updating it for use in the current
665     // `call[_indirect]` instruction as it becomes a `try_call[_indirect]`.
666     let clone_exception_table_for_this_call = |func: &mut ir::Function,
667                                                signature: ir::SigRef,
668                                                new_block: ir::Block|
669      -> ir::ExceptionTable {
670         let mut exception = func.stencil.dfg.exception_tables[call_exception_table]
671             .deep_clone(&mut func.stencil.dfg.value_lists);
672 
673         *exception.signature_mut() = signature;
674 
675         let returns_len = func.dfg.signatures[signature].returns.len();
676         let returns_len = u32::try_from(returns_len).unwrap();
677 
678         *exception.normal_return_mut() = ir::BlockCall::new(
679             new_block,
680             (0..returns_len).map(|i| ir::BlockArg::TryCallRet(i)),
681             &mut func.dfg.value_lists,
682         );
683 
684         func.dfg.exception_tables.push(exception)
685     };
686 
687     for inst in allocs.calls_needing_exception_table_fixup.drain(..) {
688         debug_assert!(func.dfg.insts[inst].opcode().is_call());
689         debug_assert!(!func.dfg.insts[inst].opcode().is_return());
690         match func.dfg.insts[inst] {
691             //     current_block:
692             //         preds...
693             //         rets... = call f(args...)
694             //         succs...
695             //
696             // becomes
697             //
698             //     current_block:
699             //         preds...
700             //         try_call f(args...), new_block(rets...), [call_exception_table...]
701             //     new_block(rets...):
702             //         succs...
703             ir::InstructionData::Call {
704                 opcode: ir::Opcode::Call,
705                 args,
706                 func_ref,
707             } => {
708                 let new_block = split_block_for_new_try_call(func, inst);
709                 let signature = func.dfg.ext_funcs[func_ref].signature;
710                 let exception = clone_exception_table_for_this_call(func, signature, new_block);
711                 func.dfg.insts[inst] = ir::InstructionData::TryCall {
712                     opcode: ir::Opcode::TryCall,
713                     args,
714                     func_ref,
715                     exception,
716                 };
717             }
718 
719             //     current_block:
720             //         preds...
721             //         rets... = call_indirect sig, val(args...)
722             //         succs...
723             //
724             // becomes
725             //
726             //     current_block:
727             //         preds...
728             //         try_call_indirect sig, val(args...), new_block(rets...), [call_exception_table...]
729             //     new_block(rets...):
730             //         succs...
731             ir::InstructionData::CallIndirect {
732                 opcode: ir::Opcode::CallIndirect,
733                 args,
734                 sig_ref,
735             } => {
736                 let new_block = split_block_for_new_try_call(func, inst);
737                 let exception = clone_exception_table_for_this_call(func, sig_ref, new_block);
738                 func.dfg.insts[inst] = ir::InstructionData::TryCallIndirect {
739                     opcode: ir::Opcode::TryCallIndirect,
740                     args,
741                     exception,
742                 };
743             }
744 
745             // For `try_call[_indirect]` instructions, we just need to merge the
746             // exception tables.
747             ir::InstructionData::TryCall {
748                 opcode: ir::Opcode::TryCall,
749                 exception,
750                 ..
751             }
752             | ir::InstructionData::TryCallIndirect {
753                 opcode: ir::Opcode::TryCallIndirect,
754                 exception,
755                 ..
756             } => {
757                 // Construct a new exception table that consists of
758                 // the inlined instruction's exception table match
759                 // sequence, with the inlining site's exception table
760                 // appended. This will ensure that the first-match
761                 // semantics emulates the original behavior of
762                 // matching in the inner frame first.
763                 let sig = func.dfg.exception_tables[exception].signature();
764                 let normal_return = *func.dfg.exception_tables[exception].normal_return();
765                 let exception_data = ExceptionTableData::new(
766                     sig,
767                     normal_return,
768                     func.dfg.exception_tables[exception]
769                         .items()
770                         .chain(func.dfg.exception_tables[call_exception_table].items()),
771                 )
772                 .deep_clone(&mut func.dfg.value_lists);
773 
774                 func.dfg.exception_tables[exception] = exception_data;
775             }
776 
777             otherwise => unreachable!("unknown non-return call instruction: {otherwise:?}"),
778         }
779     }
780 }
781 
782 /// After having created an inlined version of a callee instruction that returns
783 /// in the caller, we need to fix it up so that it doesn't actually return
784 /// (since we are already in the caller's frame) and instead just jumps to the
785 /// control-flow join point.
786 fn fixup_inst_that_returns(
787     allocs: &mut InliningAllocs,
788     func: &mut ir::Function,
789     callee: &ir::Function,
790     entity_map: &EntityMap,
791     call_opcode: ir::Opcode,
792     inlined_inst: ir::Inst,
793     callee_inst: ir::Inst,
794     return_block: ir::Block,
795     call_stack_map: Option<&[ir::UserStackMapEntry]>,
796 ) {
797     debug_assert!(func.dfg.insts[inlined_inst].opcode().is_return());
798     match func.dfg.insts[inlined_inst] {
799         //     return rets...
800         //
801         // becomes
802         //
803         //     jump return_block(rets...)
804         ir::InstructionData::MultiAry {
805             opcode: ir::Opcode::Return,
806             args,
807         } => {
808             let rets = SmallBlockArgVec::from_iter(
809                 args.as_slice(&func.dfg.value_lists)
810                     .iter()
811                     .copied()
812                     .map(|v| v.into()),
813             );
814             func.dfg.replace(inlined_inst).jump(return_block, &rets);
815         }
816 
817         //     return_call f(args...)
818         //
819         // becomes
820         //
821         //     rets... = call f(args...)
822         //     jump return_block(rets...)
823         ir::InstructionData::Call {
824             opcode: ir::Opcode::ReturnCall,
825             args,
826             func_ref,
827         } => {
828             func.dfg.insts[inlined_inst] = ir::InstructionData::Call {
829                 opcode: ir::Opcode::Call,
830                 args,
831                 func_ref,
832             };
833             func.dfg.make_inst_results(inlined_inst, ir::types::INVALID);
834 
835             append_stack_map_entries(
836                 func,
837                 callee,
838                 &entity_map,
839                 call_stack_map,
840                 inlined_inst,
841                 callee_inst,
842             );
843 
844             let rets = SmallBlockArgVec::from_iter(
845                 func.dfg
846                     .inst_results(inlined_inst)
847                     .iter()
848                     .copied()
849                     .map(|v| v.into()),
850             );
851             let mut cursor = FuncCursor::new(func);
852             cursor.goto_after_inst(inlined_inst);
853             cursor.ins().jump(return_block, &rets);
854 
855             if call_opcode == ir::Opcode::TryCall {
856                 allocs
857                     .calls_needing_exception_table_fixup
858                     .push(inlined_inst);
859             }
860         }
861 
862         //     return_call_indirect val(args...)
863         //
864         // becomes
865         //
866         //     rets... = call_indirect val(args...)
867         //     jump return_block(rets...)
868         ir::InstructionData::CallIndirect {
869             opcode: ir::Opcode::ReturnCallIndirect,
870             args,
871             sig_ref,
872         } => {
873             func.dfg.insts[inlined_inst] = ir::InstructionData::CallIndirect {
874                 opcode: ir::Opcode::CallIndirect,
875                 args,
876                 sig_ref,
877             };
878             func.dfg.make_inst_results(inlined_inst, ir::types::INVALID);
879 
880             append_stack_map_entries(
881                 func,
882                 callee,
883                 &entity_map,
884                 call_stack_map,
885                 inlined_inst,
886                 callee_inst,
887             );
888 
889             let rets = SmallBlockArgVec::from_iter(
890                 func.dfg
891                     .inst_results(inlined_inst)
892                     .iter()
893                     .copied()
894                     .map(|v| v.into()),
895             );
896             let mut cursor = FuncCursor::new(func);
897             cursor.goto_after_inst(inlined_inst);
898             cursor.ins().jump(return_block, &rets);
899 
900             if call_opcode == ir::Opcode::TryCall {
901                 allocs
902                     .calls_needing_exception_table_fixup
903                     .push(inlined_inst);
904             }
905         }
906 
907         inst_data => unreachable!(
908             "should have handled all `is_return() == true` instructions above; \
909              got {inst_data:?}"
910         ),
911     }
912 }
913 
914 /// An `InstructionMapper` implementation that remaps a callee instruction's
915 /// entity references to their new indices in the caller function.
916 struct InliningInstRemapper<'a> {
917     allocs: &'a InliningAllocs,
918     func: &'a mut ir::Function,
919     callee: &'a ir::Function,
920     entity_map: &'a EntityMap,
921 }
922 
923 impl<'a> ir::instructions::InstructionMapper for InliningInstRemapper<'a> {
924     fn map_value(&mut self, value: ir::Value) -> ir::Value {
925         self.allocs.get_inlined_value(self.callee, value).expect(
926             "defs come before uses; we should have already inlined all values \
927              used by an instruction",
928         )
929     }
930 
931     fn map_value_list(&mut self, value_list: ir::ValueList) -> ir::ValueList {
932         let mut inlined_list = ir::ValueList::new();
933         for callee_val in value_list.as_slice(&self.callee.dfg.value_lists) {
934             let inlined_val = self.map_value(*callee_val);
935             inlined_list.push(inlined_val, &mut self.func.dfg.value_lists);
936         }
937         inlined_list
938     }
939 
940     fn map_global_value(&mut self, global_value: ir::GlobalValue) -> ir::GlobalValue {
941         self.entity_map.inlined_global_value(global_value)
942     }
943 
944     fn map_jump_table(&mut self, jump_table: ir::JumpTable) -> ir::JumpTable {
945         let inlined_default =
946             self.map_block_call(self.callee.dfg.jump_tables[jump_table].default_block());
947         let inlined_table = self.callee.dfg.jump_tables[jump_table]
948             .as_slice()
949             .iter()
950             .map(|callee_block_call| self.map_block_call(*callee_block_call))
951             .collect::<SmallBlockCallVec>();
952         self.func
953             .dfg
954             .jump_tables
955             .push(ir::JumpTableData::new(inlined_default, &inlined_table))
956     }
957 
958     fn map_exception_table(&mut self, exception_table: ir::ExceptionTable) -> ir::ExceptionTable {
959         let exception_table = &self.callee.dfg.exception_tables[exception_table];
960         let inlined_sig_ref = self.map_sig_ref(exception_table.signature());
961         let inlined_normal_return = self.map_block_call(*exception_table.normal_return());
962         let inlined_table = exception_table
963             .items()
964             .map(|item| match item {
965                 ExceptionTableItem::Tag(tag, block_call) => {
966                     ExceptionTableItem::Tag(tag, self.map_block_call(block_call))
967                 }
968                 ExceptionTableItem::Default(block_call) => {
969                     ExceptionTableItem::Default(self.map_block_call(block_call))
970                 }
971                 ExceptionTableItem::Context(value) => {
972                     ExceptionTableItem::Context(self.map_value(value))
973                 }
974             })
975             .collect::<SmallVec<[_; 8]>>();
976         self.func
977             .dfg
978             .exception_tables
979             .push(ir::ExceptionTableData::new(
980                 inlined_sig_ref,
981                 inlined_normal_return,
982                 inlined_table,
983             ))
984     }
985 
986     fn map_block_call(&mut self, block_call: ir::BlockCall) -> ir::BlockCall {
987         let callee_block = block_call.block(&self.callee.dfg.value_lists);
988         let inlined_block = self.entity_map.inlined_block(callee_block);
989         let args = block_call
990             .args(&self.callee.dfg.value_lists)
991             .map(|arg| match arg {
992                 ir::BlockArg::Value(value) => self.map_value(value).into(),
993                 ir::BlockArg::TryCallRet(_) | ir::BlockArg::TryCallExn(_) => arg,
994             })
995             .collect::<SmallBlockArgVec>();
996         ir::BlockCall::new(inlined_block, args, &mut self.func.dfg.value_lists)
997     }
998 
999     fn map_block(&mut self, block: ir::Block) -> ir::Block {
1000         self.entity_map.inlined_block(block)
1001     }
1002 
1003     fn map_func_ref(&mut self, func_ref: ir::FuncRef) -> ir::FuncRef {
1004         self.entity_map.inlined_func_ref(func_ref)
1005     }
1006 
1007     fn map_sig_ref(&mut self, sig_ref: ir::SigRef) -> ir::SigRef {
1008         self.entity_map.inlined_sig_ref(sig_ref)
1009     }
1010 
1011     fn map_stack_slot(&mut self, stack_slot: ir::StackSlot) -> ir::StackSlot {
1012         self.entity_map.inlined_stack_slot(stack_slot)
1013     }
1014 
1015     fn map_dynamic_stack_slot(
1016         &mut self,
1017         dynamic_stack_slot: ir::DynamicStackSlot,
1018     ) -> ir::DynamicStackSlot {
1019         self.entity_map
1020             .inlined_dynamic_stack_slot(dynamic_stack_slot)
1021     }
1022 
1023     fn map_constant(&mut self, constant: ir::Constant) -> ir::Constant {
1024         self.allocs
1025             .constants
1026             .get(constant)
1027             .and_then(|o| o.expand())
1028             .expect("should have inlined all callee constants")
1029     }
1030 
1031     fn map_immediate(&mut self, immediate: ir::Immediate) -> ir::Immediate {
1032         self.entity_map.inlined_immediate(immediate)
1033     }
1034 }
1035 
1036 /// Inline the callee's layout into the caller's layout.
1037 ///
1038 /// Returns the last inlined block in the layout.
1039 fn inline_block_layout(
1040     func: &mut ir::Function,
1041     call_block: ir::Block,
1042     callee: &ir::Function,
1043     entity_map: &EntityMap,
1044 ) -> ir::Block {
1045     debug_assert!(func.layout.is_block_inserted(call_block));
1046 
1047     // Iterate over callee blocks in layout order, inserting their associated
1048     // inlined block into the caller's layout.
1049     let mut prev_inlined_block = call_block;
1050     let mut next_callee_block = callee.layout.entry_block();
1051     while let Some(callee_block) = next_callee_block {
1052         debug_assert!(func.layout.is_block_inserted(prev_inlined_block));
1053 
1054         let inlined_block = entity_map.inlined_block(callee_block);
1055         func.layout
1056             .insert_block_after(inlined_block, prev_inlined_block);
1057 
1058         prev_inlined_block = inlined_block;
1059         next_callee_block = callee.layout.next_block(callee_block);
1060     }
1061 
1062     debug_assert!(func.layout.is_block_inserted(prev_inlined_block));
1063     prev_inlined_block
1064 }
1065 
1066 /// Split the call instruction's block just after the call instruction to create
1067 /// the point where control-flow joins after the inlined callee "returns".
1068 ///
1069 /// Note that tail calls do not return to the caller and therefore do not have a
1070 /// control-flow join point.
1071 fn split_off_return_block(
1072     func: &mut ir::Function,
1073     call_inst: ir::Inst,
1074     opcode: ir::Opcode,
1075     callee: &ir::Function,
1076 ) -> Option<ir::Block> {
1077     // When the `call_inst` is not a block terminator, we need to split the
1078     // block.
1079     let return_block = func.layout.next_inst(call_inst).map(|next_inst| {
1080         let return_block = func.dfg.blocks.add();
1081         func.layout.split_block(return_block, next_inst);
1082 
1083         // Add block parameters for each return value and alias the call
1084         // instruction's results to them.
1085         let old_results =
1086             SmallValueVec::from_iter(func.dfg.inst_results(call_inst).iter().copied());
1087         debug_assert_eq!(old_results.len(), callee.signature.returns.len());
1088         func.dfg.detach_inst_results(call_inst);
1089         for (abi, old_val) in callee.signature.returns.iter().zip(old_results) {
1090             debug_assert_eq!(abi.value_type, func.dfg.value_type(old_val));
1091             let ret_param = func.dfg.append_block_param(return_block, abi.value_type);
1092             func.dfg.change_to_alias(old_val, ret_param);
1093         }
1094 
1095         return_block
1096     });
1097 
1098     // When the `call_inst` is a block terminator, then it is either a
1099     // `return_call` or a `try_call`:
1100     //
1101     // * For `return_call`s, we don't have a control-flow join point, because
1102     //   the caller permanently transfers control to the callee.
1103     //
1104     // * For `try_call`s, we probably already have a block for the control-flow
1105     //   join point, but it isn't guaranteed: the `try_call` might ignore the
1106     //   call's returns and not forward them to the normal-return block or it
1107     //   might also pass additional arguments. We can only reuse the existing
1108     //   normal-return block when the `try_call` forwards exactly our callee's
1109     //   returns to that block (and therefore that block's parameter types also
1110     //   exactly match the callee's return types). Otherwise, we must create a new
1111     //   return block that forwards to the existing normal-return
1112     //   block. (Elsewhere, at the end of inlining, we will also update any inlined
1113     //   calls to forward any raised exceptions to the caller's exception table,
1114     //   as necessary.)
1115     //
1116     //   Finally, note that reusing the normal-return's target block is just an
1117     //   optimization to emit a simpler CFG when we can, and is not
1118     //   fundamentally required for correctness. We could always insert a
1119     //   temporary block as our control-flow join point that then forwards to
1120     //   the normal-return's target block. However, at the time of writing,
1121     //   Cranelift doesn't currently do any jump-threading or branch
1122     //   simplification in the mid-end, and removing unnecessary blocks in this
1123     //   way can help some subsequent mid-end optimizations. If, in the future,
1124     //   we gain support for jump-threading optimizations in the mid-end, we can
1125     //   come back and simplify the below code a bit to always generate the
1126     //   temporary block, and then rely on the subsequent optimizations to clean
1127     //   everything up.
1128     debug_assert_eq!(
1129         return_block.is_none(),
1130         opcode == ir::Opcode::ReturnCall || opcode == ir::Opcode::TryCall,
1131     );
1132     return_block.or_else(|| match func.dfg.insts[call_inst] {
1133         ir::InstructionData::TryCall {
1134             opcode: ir::Opcode::TryCall,
1135             args: _,
1136             func_ref: _,
1137             exception,
1138         } => {
1139             let normal_return = func.dfg.exception_tables[exception].normal_return();
1140             let normal_return_block = normal_return.block(&func.dfg.value_lists);
1141 
1142             // Check to see if we can reuse the existing normal-return block.
1143             {
1144                 let normal_return_args = normal_return.args(&func.dfg.value_lists);
1145                 if normal_return_args.len() == callee.signature.returns.len()
1146                     && normal_return_args.enumerate().all(|(i, arg)| {
1147                         let i = u32::try_from(i).unwrap();
1148                         arg == ir::BlockArg::TryCallRet(i)
1149                     })
1150                 {
1151                     return Some(normal_return_block);
1152                 }
1153             }
1154 
1155             // Okay, we cannot reuse the normal-return block. Create a new block
1156             // that has the expected block parameter types and have it jump to
1157             // the normal-return block.
1158             let return_block = func.dfg.blocks.add();
1159             func.layout.insert_block(return_block, normal_return_block);
1160 
1161             let return_block_params = callee
1162                 .signature
1163                 .returns
1164                 .iter()
1165                 .map(|abi| func.dfg.append_block_param(return_block, abi.value_type))
1166                 .collect::<SmallValueVec>();
1167 
1168             let normal_return_args = func.dfg.exception_tables[exception]
1169                 .normal_return()
1170                 .args(&func.dfg.value_lists)
1171                 .collect::<SmallBlockArgVec>();
1172             let jump_args = normal_return_args
1173                 .into_iter()
1174                 .map(|arg| match arg {
1175                     ir::BlockArg::Value(value) => ir::BlockArg::Value(value),
1176                     ir::BlockArg::TryCallRet(i) => {
1177                         let i = usize::try_from(i).unwrap();
1178                         ir::BlockArg::Value(return_block_params[i])
1179                     }
1180                     ir::BlockArg::TryCallExn(_) => {
1181                         unreachable!("normal-return edges cannot use exceptional results")
1182                     }
1183                 })
1184                 .collect::<SmallBlockArgVec>();
1185 
1186             let mut cursor = FuncCursor::new(func);
1187             cursor.goto_first_insertion_point(return_block);
1188             cursor.ins().jump(normal_return_block, &jump_args);
1189 
1190             Some(return_block)
1191         }
1192         _ => None,
1193     })
1194 }
1195 
1196 /// Replace the caller's call instruction with a jump to the caller's inlined
1197 /// copy of the callee's entry block.
1198 ///
1199 /// Also associates the callee's parameters with the caller's arguments in our
1200 /// value map.
1201 ///
1202 /// Returns the caller's stack map entries, if any.
1203 fn replace_call_with_jump(
1204     allocs: &mut InliningAllocs,
1205     func: &mut ir::Function,
1206     call_inst: ir::Inst,
1207     callee: &ir::Function,
1208     entity_map: &EntityMap,
1209 ) -> Option<ir::UserStackMapEntryVec> {
1210     trace!("Replacing `call` with `jump`");
1211     trace!(
1212         "  --> call instruction: {call_inst:?}: {}",
1213         func.dfg.display_inst(call_inst)
1214     );
1215 
1216     let callee_entry_block = callee
1217         .layout
1218         .entry_block()
1219         .expect("callee function should have an entry block");
1220     let callee_param_values = callee.dfg.block_params(callee_entry_block);
1221     let caller_arg_values = SmallValueVec::from_iter(func.dfg.inst_args(call_inst).iter().copied());
1222     debug_assert_eq!(callee_param_values.len(), caller_arg_values.len());
1223     debug_assert_eq!(callee_param_values.len(), callee.signature.params.len());
1224     for (abi, (callee_param_value, caller_arg_value)) in callee
1225         .signature
1226         .params
1227         .iter()
1228         .zip(callee_param_values.into_iter().zip(caller_arg_values))
1229     {
1230         debug_assert_eq!(abi.value_type, callee.dfg.value_type(*callee_param_value));
1231         debug_assert_eq!(abi.value_type, func.dfg.value_type(caller_arg_value));
1232         allocs.set_inlined_value(callee, *callee_param_value, caller_arg_value);
1233     }
1234 
1235     // Replace the caller's call instruction with a jump to the caller's inlined
1236     // copy of the callee's entry block.
1237     //
1238     // Note that the call block dominates the inlined entry block (and also all
1239     // other inlined blocks) so we can reference the arguments directly, and do
1240     // not need to add block parameters to the inlined entry block.
1241     let inlined_entry_block = entity_map.inlined_block(callee_entry_block);
1242     func.dfg.replace(call_inst).jump(inlined_entry_block, &[]);
1243     trace!(
1244         "  --> replaced with jump instruction: {call_inst:?}: {}",
1245         func.dfg.display_inst(call_inst)
1246     );
1247 
1248     let stack_map_entries = func.dfg.take_user_stack_map_entries(call_inst);
1249     stack_map_entries
1250 }
1251 
1252 /// Keeps track of mapping callee entities to their associated inlined caller
1253 /// entities.
1254 #[derive(Default)]
1255 struct EntityMap {
1256     // Rather than doing an implicit, demand-based, DCE'ing translation of
1257     // entities, which would require maps from each callee entity to its
1258     // associated caller entity, we copy all entities into the caller, remember
1259     // each entity's initial offset, and then mapping from the callee to the
1260     // inlined caller entity is just adding that initial offset to the callee's
1261     // index. This should be both faster and simpler than the alternative. Most
1262     // of these sets are relatively small, and they rarely have too much dead
1263     // code in practice, so this is a good trade off.
1264     //
1265     // Note that there are a few kinds of entities that are excluded from the
1266     // `EntityMap`, and for which we do actually take the demand-based approach:
1267     // values and value lists being the notable ones.
1268     block_offset: Option<u32>,
1269     global_value_offset: Option<u32>,
1270     sig_ref_offset: Option<u32>,
1271     func_ref_offset: Option<u32>,
1272     stack_slot_offset: Option<u32>,
1273     dynamic_type_offset: Option<u32>,
1274     dynamic_stack_slot_offset: Option<u32>,
1275     immediate_offset: Option<u32>,
1276 }
1277 
1278 impl EntityMap {
1279     fn inlined_block(&self, callee_block: ir::Block) -> ir::Block {
1280         let offset = self
1281             .block_offset
1282             .expect("must create inlined `ir::Block`s before calling `EntityMap::inlined_block`");
1283         ir::Block::from_u32(offset + callee_block.as_u32())
1284     }
1285 
1286     fn iter_inlined_blocks(&self, func: &ir::Function) -> impl Iterator<Item = ir::Block> + use<> {
1287         let start = self.block_offset.expect(
1288             "must create inlined `ir::Block`s before calling `EntityMap::iter_inlined_blocks`",
1289         );
1290 
1291         let end = func.dfg.blocks.len();
1292         let end = u32::try_from(end).unwrap();
1293 
1294         (start..end).map(|i| ir::Block::from_u32(i))
1295     }
1296 
1297     fn inlined_global_value(&self, callee_global_value: ir::GlobalValue) -> ir::GlobalValue {
1298         let offset = self
1299             .global_value_offset
1300             .expect("must create inlined `ir::GlobalValue`s before calling `EntityMap::inlined_global_value`");
1301         ir::GlobalValue::from_u32(offset + callee_global_value.as_u32())
1302     }
1303 
1304     fn inlined_sig_ref(&self, callee_sig_ref: ir::SigRef) -> ir::SigRef {
1305         let offset = self.sig_ref_offset.expect(
1306             "must create inlined `ir::SigRef`s before calling `EntityMap::inlined_sig_ref`",
1307         );
1308         ir::SigRef::from_u32(offset + callee_sig_ref.as_u32())
1309     }
1310 
1311     fn inlined_func_ref(&self, callee_func_ref: ir::FuncRef) -> ir::FuncRef {
1312         let offset = self.func_ref_offset.expect(
1313             "must create inlined `ir::FuncRef`s before calling `EntityMap::inlined_func_ref`",
1314         );
1315         ir::FuncRef::from_u32(offset + callee_func_ref.as_u32())
1316     }
1317 
1318     fn inlined_stack_slot(&self, callee_stack_slot: ir::StackSlot) -> ir::StackSlot {
1319         let offset = self.stack_slot_offset.expect(
1320             "must create inlined `ir::StackSlot`s before calling `EntityMap::inlined_stack_slot`",
1321         );
1322         ir::StackSlot::from_u32(offset + callee_stack_slot.as_u32())
1323     }
1324 
1325     fn inlined_dynamic_type(&self, callee_dynamic_type: ir::DynamicType) -> ir::DynamicType {
1326         let offset = self.dynamic_type_offset.expect(
1327             "must create inlined `ir::DynamicType`s before calling `EntityMap::inlined_dynamic_type`",
1328         );
1329         ir::DynamicType::from_u32(offset + callee_dynamic_type.as_u32())
1330     }
1331 
1332     fn inlined_dynamic_stack_slot(
1333         &self,
1334         callee_dynamic_stack_slot: ir::DynamicStackSlot,
1335     ) -> ir::DynamicStackSlot {
1336         let offset = self.dynamic_stack_slot_offset.expect(
1337             "must create inlined `ir::DynamicStackSlot`s before calling `EntityMap::inlined_dynamic_stack_slot`",
1338         );
1339         ir::DynamicStackSlot::from_u32(offset + callee_dynamic_stack_slot.as_u32())
1340     }
1341 
1342     fn inlined_immediate(&self, callee_immediate: ir::Immediate) -> ir::Immediate {
1343         let offset = self.immediate_offset.expect(
1344             "must create inlined `ir::Immediate`s before calling `EntityMap::inlined_immediate`",
1345         );
1346         ir::Immediate::from_u32(offset + callee_immediate.as_u32())
1347     }
1348 }
1349 
1350 /// Translate all of the callee's various entities into the caller, producing an
1351 /// `EntityMap` that can be used to translate callee entity references into
1352 /// inlined caller entity references.
1353 fn create_entities(
1354     allocs: &mut InliningAllocs,
1355     func: &mut ir::Function,
1356     callee: &ir::Function,
1357 ) -> EntityMap {
1358     let mut entity_map = EntityMap::default();
1359 
1360     entity_map.block_offset = Some(create_blocks(allocs, func, callee));
1361     entity_map.global_value_offset = Some(create_global_values(func, callee));
1362     entity_map.sig_ref_offset = Some(create_sig_refs(func, callee));
1363     create_user_external_name_refs(allocs, func, callee);
1364     entity_map.func_ref_offset = Some(create_func_refs(allocs, func, callee, &entity_map));
1365     entity_map.stack_slot_offset = Some(create_stack_slots(func, callee));
1366     entity_map.dynamic_type_offset = Some(create_dynamic_types(func, callee, &entity_map));
1367     entity_map.dynamic_stack_slot_offset =
1368         Some(create_dynamic_stack_slots(func, callee, &entity_map));
1369     entity_map.immediate_offset = Some(create_immediates(func, callee));
1370 
1371     // `ir::ConstantData` is deduplicated, so we cannot use our offset scheme
1372     // for `ir::Constant`s. Nonetheless, we still insert them into the caller
1373     // now, at the same time as the rest of our entities.
1374     create_constants(allocs, func, callee);
1375 
1376     entity_map
1377 }
1378 
1379 /// Create inlined blocks in the caller for every block in the callee.
1380 fn create_blocks(
1381     allocs: &mut InliningAllocs,
1382     func: &mut ir::Function,
1383     callee: &ir::Function,
1384 ) -> u32 {
1385     let offset = func.dfg.blocks.len();
1386     let offset = u32::try_from(offset).unwrap();
1387 
1388     func.dfg.blocks.reserve(callee.dfg.blocks.len());
1389     for callee_block in callee.dfg.blocks.iter() {
1390         let caller_block = func.dfg.blocks.add();
1391         trace!("Callee {callee_block:?} = inlined {caller_block:?}");
1392 
1393         if callee.layout.is_cold(callee_block) {
1394             func.layout.set_cold(caller_block);
1395         }
1396 
1397         // Note: the entry block does not need parameters because the only
1398         // predecessor is the call block and we associate the callee's
1399         // parameters with the caller's arguments directly.
1400         if callee.layout.entry_block() != Some(callee_block) {
1401             for callee_param in callee.dfg.blocks[callee_block].params(&callee.dfg.value_lists) {
1402                 let ty = callee.dfg.value_type(*callee_param);
1403                 let caller_param = func.dfg.append_block_param(caller_block, ty);
1404 
1405                 allocs.set_inlined_value(callee, *callee_param, caller_param);
1406             }
1407         }
1408     }
1409 
1410     offset
1411 }
1412 
1413 /// Copy and translate global values from the callee into the caller.
1414 fn create_global_values(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1415     let gv_offset = func.global_values.len();
1416     let gv_offset = u32::try_from(gv_offset).unwrap();
1417 
1418     func.global_values.reserve(callee.global_values.len());
1419     for gv in callee.global_values.values() {
1420         func.global_values.push(match gv {
1421             // These kinds of global values reference other global values, so we
1422             // need to fixup that reference.
1423             ir::GlobalValueData::Load {
1424                 base,
1425                 offset,
1426                 global_type,
1427                 flags,
1428             } => ir::GlobalValueData::Load {
1429                 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset),
1430                 offset: *offset,
1431                 global_type: *global_type,
1432                 flags: *flags,
1433             },
1434             ir::GlobalValueData::IAddImm {
1435                 base,
1436                 offset,
1437                 global_type,
1438             } => ir::GlobalValueData::IAddImm {
1439                 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset),
1440                 offset: *offset,
1441                 global_type: *global_type,
1442             },
1443 
1444             // These kinds of global values do not reference other global
1445             // values, so we can just clone them.
1446             ir::GlobalValueData::VMContext
1447             | ir::GlobalValueData::Symbol { .. }
1448             | ir::GlobalValueData::DynScaleTargetConst { .. } => gv.clone(),
1449         });
1450     }
1451 
1452     gv_offset
1453 }
1454 
1455 /// Copy `ir::SigRef`s from the callee into the caller.
1456 fn create_sig_refs(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1457     let offset = func.dfg.signatures.len();
1458     let offset = u32::try_from(offset).unwrap();
1459 
1460     func.dfg.signatures.reserve(callee.dfg.signatures.len());
1461     for sig in callee.dfg.signatures.values() {
1462         func.dfg.signatures.push(sig.clone());
1463     }
1464 
1465     offset
1466 }
1467 
1468 fn create_user_external_name_refs(
1469     allocs: &mut InliningAllocs,
1470     func: &mut ir::Function,
1471     callee: &ir::Function,
1472 ) {
1473     for (callee_named_func_ref, name) in callee.params.user_named_funcs().iter() {
1474         let caller_named_func_ref = func.declare_imported_user_function(name.clone());
1475         allocs.user_external_name_refs[callee_named_func_ref] = Some(caller_named_func_ref).into();
1476     }
1477 }
1478 
1479 /// Translate `ir::FuncRef`s from the callee into the caller.
1480 fn create_func_refs(
1481     allocs: &InliningAllocs,
1482     func: &mut ir::Function,
1483     callee: &ir::Function,
1484     entity_map: &EntityMap,
1485 ) -> u32 {
1486     let offset = func.dfg.ext_funcs.len();
1487     let offset = u32::try_from(offset).unwrap();
1488 
1489     func.dfg.ext_funcs.reserve(callee.dfg.ext_funcs.len());
1490     for ir::ExtFuncData {
1491         name,
1492         signature,
1493         colocated,
1494     } in callee.dfg.ext_funcs.values()
1495     {
1496         func.dfg.ext_funcs.push(ir::ExtFuncData {
1497             name: match name {
1498                 ir::ExternalName::User(name_ref) => {
1499                     ir::ExternalName::User(allocs.user_external_name_refs[*name_ref].expect(
1500                         "should have translated all `ir::UserExternalNameRef`s before translating \
1501                          `ir::FuncRef`s",
1502                     ))
1503                 }
1504                 ir::ExternalName::TestCase(_)
1505                 | ir::ExternalName::LibCall(_)
1506                 | ir::ExternalName::KnownSymbol(_) => name.clone(),
1507             },
1508             signature: entity_map.inlined_sig_ref(*signature),
1509             colocated: *colocated,
1510         });
1511     }
1512 
1513     offset
1514 }
1515 
1516 /// Copy stack slots from the callee into the caller.
1517 fn create_stack_slots(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1518     let offset = func.sized_stack_slots.len();
1519     let offset = u32::try_from(offset).unwrap();
1520 
1521     func.sized_stack_slots
1522         .reserve(callee.sized_stack_slots.len());
1523     for slot in callee.sized_stack_slots.values() {
1524         func.sized_stack_slots.push(slot.clone());
1525     }
1526 
1527     offset
1528 }
1529 
1530 /// Copy dynamic types from the callee into the caller.
1531 fn create_dynamic_types(
1532     func: &mut ir::Function,
1533     callee: &ir::Function,
1534     entity_map: &EntityMap,
1535 ) -> u32 {
1536     let offset = func.dynamic_stack_slots.len();
1537     let offset = u32::try_from(offset).unwrap();
1538 
1539     func.dfg
1540         .dynamic_types
1541         .reserve(callee.dfg.dynamic_types.len());
1542     for ir::DynamicTypeData {
1543         base_vector_ty,
1544         dynamic_scale,
1545     } in callee.dfg.dynamic_types.values()
1546     {
1547         func.dfg.dynamic_types.push(ir::DynamicTypeData {
1548             base_vector_ty: *base_vector_ty,
1549             dynamic_scale: entity_map.inlined_global_value(*dynamic_scale),
1550         });
1551     }
1552 
1553     offset
1554 }
1555 
1556 /// Copy dynamic stack slots from the callee into the caller.
1557 fn create_dynamic_stack_slots(
1558     func: &mut ir::Function,
1559     callee: &ir::Function,
1560     entity_map: &EntityMap,
1561 ) -> u32 {
1562     let offset = func.dynamic_stack_slots.len();
1563     let offset = u32::try_from(offset).unwrap();
1564 
1565     func.dynamic_stack_slots
1566         .reserve(callee.dynamic_stack_slots.len());
1567     for ir::DynamicStackSlotData { kind, dyn_ty } in callee.dynamic_stack_slots.values() {
1568         func.dynamic_stack_slots.push(ir::DynamicStackSlotData {
1569             kind: *kind,
1570             dyn_ty: entity_map.inlined_dynamic_type(*dyn_ty),
1571         });
1572     }
1573 
1574     offset
1575 }
1576 
1577 /// Copy immediates from the callee into the caller.
1578 fn create_immediates(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1579     let offset = func.dfg.immediates.len();
1580     let offset = u32::try_from(offset).unwrap();
1581 
1582     func.dfg.immediates.reserve(callee.dfg.immediates.len());
1583     for imm in callee.dfg.immediates.values() {
1584         func.dfg.immediates.push(imm.clone());
1585     }
1586 
1587     offset
1588 }
1589 
1590 /// Copy constants from the callee into the caller.
1591 fn create_constants(allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function) {
1592     for (callee_constant, data) in callee.dfg.constants.iter() {
1593         let inlined_constant = func.dfg.constants.insert(data.clone());
1594         allocs.constants[*callee_constant] = Some(inlined_constant).into();
1595     }
1596 }
1597