1 //! Function inlining infrastructure.
2 //!
3 //! This module provides "inlining as a library" to Cranelift users; it does
4 //! _not_ provide a complete, off-the-shelf inlining solution. Cranelift's
5 //! compilation context is per-function and does not encompass the full call
6 //! graph. It does not know which functions are hot and which are cold, which
7 //! have been marked the equivalent of `#[inline(never)]`, etc... Only the
8 //! Cranelift user can understand these aspects of the full compilation
9 //! pipeline, and these things can be very different between (say) Wasmtime and
10 //! `cg_clif`. Therefore, this module does not attempt to define hueristics for
11 //! when inlining a particular call is likely beneficial. This module only
12 //! provides hooks for the Cranelift user to define whether a given call should
13 //! be inlined or not, and the mechanics to inline a callee into a particular
14 //! call site when directed to do so by the Cranelift user.
15 //!
16 //! The top-level inlining entry point during Cranelift compilation is
17 //! [`Context::inline`][crate::Context::inline]. It takes an [`Inline`] trait
18 //! implementation, which is authored by the Cranelift user and directs
19 //! Cranelift whether to inline a particular call, and, when inlining, gives
20 //! Cranelift the body of the callee that is to be inlined.
21 
22 use crate::cursor::{Cursor as _, FuncCursor};
23 use crate::ir::{self, DebugTag, ExceptionTableData, ExceptionTableItem, InstBuilder as _};
24 use crate::result::CodegenResult;
25 use crate::trace;
26 use crate::traversals::Dfs;
27 use alloc::borrow::Cow;
28 use alloc::vec::Vec;
29 use cranelift_entity::{SecondaryMap, packed_option::PackedOption};
30 use smallvec::SmallVec;
31 
32 type SmallValueVec = SmallVec<[ir::Value; 8]>;
33 type SmallBlockArgVec = SmallVec<[ir::BlockArg; 8]>;
34 type SmallBlockCallVec = SmallVec<[ir::BlockCall; 8]>;
35 
36 /// A command directing Cranelift whether or not to inline a particular call.
37 pub enum InlineCommand<'a> {
38     /// Keep the call as-is, out-of-line, and do not inline the callee.
39     KeepCall,
40 
41     /// Inline the call, using this function as the body of the callee.
42     ///
43     /// It is the `Inline` implementor's responsibility to ensure that this
44     /// function is the correct callee. Providing the wrong function may result
45     /// in panics during compilation or incorrect runtime behavior.
46     Inline {
47         /// The callee function's body.
48         callee: Cow<'a, ir::Function>,
49         /// Whether to visit any function calls within the callee body after
50         /// inlining and consider them for further inlining.
51         visit_callee: bool,
52     },
53 }
54 
55 /// A trait for directing Cranelift whether to inline a particular call or not.
56 ///
57 /// Used in combination with the [`Context::inline`][crate::Context::inline]
58 /// method.
59 pub trait Inline {
60     /// A hook invoked for each direct call instruction in a function, whose
61     /// result determines whether Cranelift should inline a given call.
62     ///
63     /// The Cranelift user is responsible for defining their own hueristics and
64     /// deciding whether inlining the call is beneficial.
65     ///
66     /// When returning a function and directing Cranelift to inline its body
67     /// into the call site, the `Inline` implementer must ensure the following:
68     ///
69     /// * The returned function's signature exactly matches the `callee`
70     ///   `FuncRef`'s signature.
71     ///
72     /// * The returned function must be legalized.
73     ///
74     /// * The returned function must be valid (i.e. it must pass the CLIF
75     ///   verifier).
76     ///
77     /// * The returned function is a correct and valid implementation of the
78     ///   `callee` according to your language's semantics.
79     ///
80     /// Failure to uphold these invariants may result in panics during
81     /// compilation or incorrect runtime behavior in the generated code.
82     fn inline(
83         &mut self,
84         caller: &ir::Function,
85         call_inst: ir::Inst,
86         call_opcode: ir::Opcode,
87         callee: ir::FuncRef,
88         call_args: &[ir::Value],
89     ) -> InlineCommand<'_>;
90 }
91 
92 impl<'a, T> Inline for &'a mut T
93 where
94     T: Inline,
95 {
96     fn inline(
97         &mut self,
98         caller: &ir::Function,
99         inst: ir::Inst,
100         opcode: ir::Opcode,
101         callee: ir::FuncRef,
102         args: &[ir::Value],
103     ) -> InlineCommand<'_> {
104         (*self).inline(caller, inst, opcode, callee, args)
105     }
106 }
107 
108 /// Walk the given function, invoke the `Inline` implementation for each call
109 /// instruction, and inline the callee when directed to do so.
110 ///
111 /// Returns whether any call was inlined.
112 pub(crate) fn do_inlining(
113     func: &mut ir::Function,
114     mut inliner: impl Inline,
115 ) -> CodegenResult<bool> {
116     trace!("function {} before inlining: {}", func.name, func);
117 
118     let mut inlined_any = false;
119     let mut allocs = InliningAllocs::default();
120 
121     let mut cursor = FuncCursor::new(func);
122     'block_loop: while let Some(block) = cursor.next_block() {
123         // Always keep track of our previous cursor position. Assuming that the
124         // current position is a function call that we will inline, then the
125         // previous position is just before the inlined callee function. After
126         // inlining a call, the Cranelift user can decide whether to consider
127         // any function calls in the inlined callee for further inlining or
128         // not. When they do, then we back up to this previous cursor position
129         // so that our traversal will then continue over the inlined body.
130         let mut prev_pos;
131 
132         while let Some(inst) = {
133             prev_pos = cursor.position();
134             cursor.next_inst()
135         } {
136             // Make sure that `block` is always `inst`'s block, even with all of
137             // our cursor-position-updating and block-splitting-during-inlining
138             // shenanigans below.
139             debug_assert_eq!(Some(block), cursor.func.layout.inst_block(inst));
140 
141             match cursor.func.dfg.insts[inst] {
142                 ir::InstructionData::Call {
143                     opcode: opcode @ ir::Opcode::Call | opcode @ ir::Opcode::ReturnCall,
144                     args: _,
145                     func_ref,
146                 } => {
147                     trace!(
148                         "considering call site for inlining: {inst}: {}",
149                         cursor.func.dfg.display_inst(inst),
150                     );
151                     let args = cursor.func.dfg.inst_args(inst);
152                     match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
153                         InlineCommand::KeepCall => {
154                             trace!("  --> keeping call");
155                         }
156                         InlineCommand::Inline {
157                             callee,
158                             visit_callee,
159                         } => {
160                             let last_inlined_block = inline_one(
161                                 &mut allocs,
162                                 cursor.func,
163                                 func_ref,
164                                 block,
165                                 inst,
166                                 opcode,
167                                 &callee,
168                                 None,
169                             );
170                             inlined_any = true;
171                             if visit_callee {
172                                 cursor.set_position(prev_pos);
173                             } else {
174                                 // Arrange it so that the `next_block()` loop
175                                 // will continue to the next block that is not
176                                 // associated with the just-inlined callee.
177                                 cursor.goto_bottom(last_inlined_block);
178                                 continue 'block_loop;
179                             }
180                         }
181                     }
182                 }
183                 ir::InstructionData::TryCall {
184                     opcode: opcode @ ir::Opcode::TryCall,
185                     args: _,
186                     func_ref,
187                     exception,
188                 } => {
189                     trace!(
190                         "considering call site for inlining: {inst}: {}",
191                         cursor.func.dfg.display_inst(inst),
192                     );
193                     let args = cursor.func.dfg.inst_args(inst);
194                     match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
195                         InlineCommand::KeepCall => {
196                             trace!("  --> keeping call");
197                         }
198                         InlineCommand::Inline {
199                             callee,
200                             visit_callee,
201                         } => {
202                             let last_inlined_block = inline_one(
203                                 &mut allocs,
204                                 cursor.func,
205                                 func_ref,
206                                 block,
207                                 inst,
208                                 opcode,
209                                 &callee,
210                                 Some(exception),
211                             );
212                             inlined_any = true;
213                             if visit_callee {
214                                 cursor.set_position(prev_pos);
215                             } else {
216                                 // Arrange it so that the `next_block()` loop
217                                 // will continue to the next block that is not
218                                 // associated with the just-inlined callee.
219                                 cursor.goto_bottom(last_inlined_block);
220                                 continue 'block_loop;
221                             }
222                         }
223                     }
224                 }
225                 ir::InstructionData::CallIndirect { .. }
226                 | ir::InstructionData::TryCallIndirect { .. } => {
227                     // Can't inline indirect calls; need to have some earlier
228                     // pass rewrite them into direct calls first, when possible.
229                 }
230                 _ => {
231                     debug_assert!(
232                         !cursor.func.dfg.insts[inst].opcode().is_call(),
233                         "should have matched all call instructions, but found: {inst}: {}",
234                         cursor.func.dfg.display_inst(inst),
235                     );
236                 }
237             }
238         }
239     }
240 
241     if inlined_any {
242         trace!("function {} after inlining: {}", func.name, func);
243     } else {
244         trace!("function {} did not have any callees inlined", func.name);
245     }
246 
247     Ok(inlined_any)
248 }
249 
250 #[derive(Default)]
251 struct InliningAllocs {
252     /// Map from callee value to inlined caller value.
253     values: SecondaryMap<ir::Value, PackedOption<ir::Value>>,
254 
255     /// Map from callee constant to inlined caller constant.
256     ///
257     /// Not in `EntityMap` because these are hash-consed inside the
258     /// `ir::Function`.
259     constants: SecondaryMap<ir::Constant, PackedOption<ir::Constant>>,
260 
261     /// Map from callee to inlined caller external name refs.
262     ///
263     /// Not in `EntityMap` because these are hash-consed inside the
264     /// `ir::Function`.
265     user_external_name_refs:
266         SecondaryMap<ir::UserExternalNameRef, PackedOption<ir::UserExternalNameRef>>,
267 
268     /// The set of _caller_ inlined call instructions that need exception table
269     /// fixups at the end of inlining.
270     ///
271     /// This includes all kinds of non-returning calls, not just the literal
272     /// `call` instruction: `call_indirect`, `try_call`, `try_call_indirect`,
273     /// etc... However, it does not include `return_call` and
274     /// `return_call_indirect` instructions because the caller cannot catch
275     /// exceptions that those calls throw because the caller is no longer on the
276     /// stack as soon as they are executed.
277     ///
278     /// Note: this is a simple `Vec`, and not an `EntitySet`, because it is very
279     /// sparse: most of the caller's instructions are not inlined call
280     /// instructions. Additionally, we require deterministic iteration order and
281     /// do not require set-membership testing, so a hash set is not a good
282     /// choice either.
283     calls_needing_exception_table_fixup: Vec<ir::Inst>,
284 }
285 
286 impl InliningAllocs {
287     fn reset(&mut self, callee: &ir::Function) {
288         let InliningAllocs {
289             values,
290             constants,
291             user_external_name_refs,
292             calls_needing_exception_table_fixup,
293         } = self;
294 
295         values.clear();
296         values.resize(callee.dfg.len_values());
297 
298         constants.clear();
299         constants.resize(callee.dfg.constants.len());
300 
301         user_external_name_refs.clear();
302         user_external_name_refs.resize(callee.params.user_named_funcs().len());
303 
304         // Note: We do not reserve capacity for
305         // `calls_needing_exception_table_fixup` because it is a sparse set and
306         // we don't know how large it needs to be ahead of time.
307         calls_needing_exception_table_fixup.clear();
308     }
309 
310     fn set_inlined_value(
311         &mut self,
312         callee: &ir::Function,
313         callee_val: ir::Value,
314         inlined_val: ir::Value,
315     ) {
316         trace!("  --> callee {callee_val:?} = inlined {inlined_val:?}");
317         debug_assert!(self.values[callee_val].is_none());
318         let resolved_callee_val = callee.dfg.resolve_aliases(callee_val);
319         debug_assert!(self.values[resolved_callee_val].is_none());
320         self.values[resolved_callee_val] = Some(inlined_val).into();
321     }
322 
323     fn get_inlined_value(&self, callee: &ir::Function, callee_val: ir::Value) -> Option<ir::Value> {
324         let resolved_callee_val = callee.dfg.resolve_aliases(callee_val);
325         self.values[resolved_callee_val].expand()
326     }
327 }
328 
329 /// Inline one particular function call.
330 ///
331 /// Returns the last inlined block in the layout.
332 fn inline_one(
333     allocs: &mut InliningAllocs,
334     func: &mut ir::Function,
335     callee_func_ref: ir::FuncRef,
336     call_block: ir::Block,
337     call_inst: ir::Inst,
338     call_opcode: ir::Opcode,
339     callee: &ir::Function,
340     call_exception_table: Option<ir::ExceptionTable>,
341 ) -> ir::Block {
342     trace!(
343         "Inlining call {call_inst:?}: {}\n\
344          with callee = {callee:?}",
345         func.dfg.display_inst(call_inst)
346     );
347 
348     // Type check callee signature.
349     let expected_callee_sig = func.dfg.ext_funcs[callee_func_ref].signature;
350     let expected_callee_sig = &func.dfg.signatures[expected_callee_sig];
351     assert_eq!(expected_callee_sig, &callee.signature);
352 
353     allocs.reset(callee);
354 
355     // First, append various callee entity arenas to the end of the caller's
356     // entity arenas.
357     let entity_map = create_entities(allocs, func, callee);
358 
359     // Inlined prologue: split the call instruction's block at the point of the
360     // call and replace the call with a jump.
361     let return_block = split_off_return_block(func, call_inst, call_opcode, callee);
362     let call_stack_map = replace_call_with_jump(allocs, func, call_inst, callee, &entity_map);
363 
364     // Prepare for translating the actual instructions by inserting the inlined
365     // blocks into the caller's layout in the same order that they appear in the
366     // callee.
367     let mut last_inlined_block = inline_block_layout(func, call_block, callee, &entity_map);
368 
369     // Get a copy of debug tags on the call instruction; these are
370     // prepended to debug tags on inlined instructions. Remove them
371     // from the call itself as it will be rewritten to a jump (which
372     // cannot have tags).
373     let call_debug_tags = func.debug_tags.get(call_inst).to_vec();
374     func.debug_tags.set(call_inst, []);
375 
376     // Translate each instruction from the callee into the caller,
377     // appending them to their associated block in the caller.
378     //
379     // Note that we iterate over the callee with a pre-order traversal so that
380     // we see value defs before uses.
381     for callee_block in Dfs::new().pre_order_iter(callee) {
382         let inlined_block = entity_map.inlined_block(callee_block);
383         trace!(
384             "Processing instructions in callee block {callee_block:?} (inlined block {inlined_block:?}"
385         );
386 
387         let mut next_callee_inst = callee.layout.first_inst(callee_block);
388         while let Some(callee_inst) = next_callee_inst {
389             trace!(
390                 "Processing callee instruction {callee_inst:?}: {}",
391                 callee.dfg.display_inst(callee_inst)
392             );
393 
394             assert_ne!(
395                 callee.dfg.insts[callee_inst].opcode(),
396                 ir::Opcode::GlobalValue,
397                 "callee must already be legalized, we shouldn't see any `global_value` \
398                  instructions when inlining; found {callee_inst:?}: {}",
399                 callee.dfg.display_inst(callee_inst)
400             );
401 
402             // Remap the callee instruction's entities and insert it into the
403             // caller's DFG.
404             let inlined_inst_data = callee.dfg.insts[callee_inst].map(InliningInstRemapper {
405                 allocs: &allocs,
406                 func,
407                 callee,
408                 entity_map: &entity_map,
409             });
410             let inlined_inst = func.dfg.make_inst(inlined_inst_data);
411             func.layout.append_inst(inlined_inst, inlined_block);
412 
413             // Copy over debug tags, translating referenced entities
414             // as appropriate.
415             let debug_tags = callee.debug_tags.get(callee_inst);
416             // If there are tags on the inlined instruction, we always
417             // add tags, and we prepend any tags from the call
418             // instruction; but we don't add tags if only the callsite
419             // had them (this would otherwise mean that every single
420             // instruction in an inlined function body would get
421             // tags).
422             if !debug_tags.is_empty() {
423                 let tags = call_debug_tags
424                     .iter()
425                     .cloned()
426                     .chain(debug_tags.iter().map(|tag| match *tag {
427                         DebugTag::User(value) => DebugTag::User(value),
428                         DebugTag::StackSlot(slot) => {
429                             DebugTag::StackSlot(entity_map.inlined_stack_slot(slot))
430                         }
431                     }))
432                     .collect::<SmallVec<[_; 4]>>();
433                 func.debug_tags.set(inlined_inst, tags);
434             }
435 
436             let opcode = callee.dfg.insts[callee_inst].opcode();
437             if opcode.is_return() {
438                 // Instructions that return do not define any values, so we
439                 // don't need to worry about that, but we do need to fix them up
440                 // so that they return by jumping to our control-flow join
441                 // block, rather than returning from the caller.
442                 if let Some(return_block) = return_block {
443                     fixup_inst_that_returns(
444                         allocs,
445                         func,
446                         callee,
447                         &entity_map,
448                         call_opcode,
449                         inlined_inst,
450                         callee_inst,
451                         return_block,
452                         call_stack_map.as_ref().map(|es| &**es),
453                     );
454                 } else {
455                     // If we are inlining a callee that was invoked via
456                     // `return_call`, we leave inlined return instructions
457                     // as-is: there is no logical caller frame on the stack to
458                     // continue to.
459                     debug_assert_eq!(call_opcode, ir::Opcode::ReturnCall);
460                 }
461             } else {
462                 // Make the instruction's result values.
463                 let ctrl_typevar = callee.dfg.ctrl_typevar(callee_inst);
464                 func.dfg.make_inst_results(inlined_inst, ctrl_typevar);
465 
466                 // Update the value map for this instruction's defs.
467                 let callee_results = callee.dfg.inst_results(callee_inst);
468                 let inlined_results = func.dfg.inst_results(inlined_inst);
469                 debug_assert_eq!(callee_results.len(), inlined_results.len());
470                 for (callee_val, inlined_val) in callee_results.iter().zip(inlined_results) {
471                     allocs.set_inlined_value(callee, *callee_val, *inlined_val);
472                 }
473 
474                 if opcode.is_call() {
475                     append_stack_map_entries(
476                         func,
477                         callee,
478                         &entity_map,
479                         call_stack_map.as_deref(),
480                         inlined_inst,
481                         callee_inst,
482                     );
483 
484                     // When we are inlining a `try_call` call site, we need to merge
485                     // the call site's exception table into the inlined calls'
486                     // exception tables. This can involve rewriting regular `call`s
487                     // into `try_call`s, which requires mutating the CFG because
488                     // `try_call` is a block terminator. However, we can't mutate
489                     // the CFG in the middle of this traversal because we rely on
490                     // the existence of a one-to-one mapping between the callee
491                     // layout and the inlined layout. Instead, we record the set of
492                     // inlined call instructions that will need fixing up, and
493                     // perform that possibly-CFG-mutating exception table merging in
494                     // a follow up pass, when we no longer rely on that one-to-one
495                     // layout mapping.
496                     debug_assert_eq!(
497                         call_opcode == ir::Opcode::TryCall,
498                         call_exception_table.is_some()
499                     );
500                     if call_opcode == ir::Opcode::TryCall {
501                         allocs
502                             .calls_needing_exception_table_fixup
503                             .push(inlined_inst);
504                     }
505                 }
506             }
507 
508             trace!(
509                 "  --> inserted inlined instruction {inlined_inst:?}: {}",
510                 func.dfg.display_inst(inlined_inst)
511             );
512 
513             next_callee_inst = callee.layout.next_inst(callee_inst);
514         }
515     }
516 
517     // We copied *all* callee blocks into the caller's layout, but only copied
518     // the callee instructions in *reachable* callee blocks into the caller's
519     // associated blocks. Therefore, any *unreachable* blocks are empty in the
520     // caller, which is invalid CLIF because all blocks must end in a
521     // terminator, so do a quick pass over the inlined blocks and remove any
522     // empty blocks from the caller's layout.
523     for block in entity_map.iter_inlined_blocks(func) {
524         if func.layout.is_block_inserted(block) && func.layout.first_inst(block).is_none() {
525             log::trace!("removing unreachable inlined block from layout: {block}");
526 
527             // If the block being removed is our last-inlined block, then back
528             // it up to the previous block in the layout, which will be the new
529             // last-inlined block after this one's removal.
530             if block == last_inlined_block {
531                 last_inlined_block = func.layout.prev_block(last_inlined_block).expect(
532                     "there will always at least be the block that contained the call we are \
533                      inlining",
534                 );
535             }
536 
537             func.layout.remove_block(block);
538         }
539     }
540 
541     // Final step: fixup the exception tables of any inlined calls when we are
542     // inlining a `try_call` site.
543     //
544     // Subtly, this requires rewriting non-catching `call[_indirect]`
545     // instructions into `try_call[_indirect]` instructions so that exceptions
546     // that unwound through the original callee frame and were caught by the
547     // caller's `try_call` do not unwind past this inlined frame. And turning a
548     // `call` into a `try_call` mutates the CFG, breaking our one-to-one mapping
549     // between callee blocks and inlined blocks, so we delay these fixups to
550     // this final step, when we no longer rely on that mapping.
551     debug_assert!(
552         allocs.calls_needing_exception_table_fixup.is_empty() || call_exception_table.is_some()
553     );
554     debug_assert_eq!(
555         call_opcode == ir::Opcode::TryCall,
556         call_exception_table.is_some()
557     );
558     if let Some(call_exception_table) = call_exception_table {
559         fixup_inlined_call_exception_tables(allocs, func, call_exception_table);
560     }
561 
562     debug_assert!(
563         func.layout.is_block_inserted(last_inlined_block),
564         "last_inlined_block={last_inlined_block} should be inserted in the layout"
565     );
566     last_inlined_block
567 }
568 
569 /// Append stack map entries from the caller and callee to the given inlined
570 /// instruction.
571 fn append_stack_map_entries(
572     func: &mut ir::Function,
573     callee: &ir::Function,
574     entity_map: &EntityMap,
575     call_stack_map: Option<&[ir::UserStackMapEntry]>,
576     inlined_inst: ir::Inst,
577     callee_inst: ir::Inst,
578 ) {
579     // Add the caller's stack map to this call. These entries
580     // already refer to caller entities and do not need further
581     // translation.
582     func.dfg.append_user_stack_map_entries(
583         inlined_inst,
584         call_stack_map
585             .iter()
586             .flat_map(|entries| entries.iter().cloned()),
587     );
588 
589     // Append the callee's stack map to this call. These entries
590     // refer to callee entities and therefore do require
591     // translation into the caller's index space.
592     func.dfg.append_user_stack_map_entries(
593         inlined_inst,
594         callee
595             .dfg
596             .user_stack_map_entries(callee_inst)
597             .iter()
598             .flat_map(|entries| entries.iter())
599             .map(|entry| ir::UserStackMapEntry {
600                 ty: entry.ty,
601                 slot: entity_map.inlined_stack_slot(entry.slot),
602                 offset: entry.offset,
603             }),
604     );
605 }
606 
607 /// Create or update the exception tables for any inlined call instructions:
608 /// when inlining at a `try_call` site, we must forward our exceptional edges
609 /// into each inlined call instruction.
610 fn fixup_inlined_call_exception_tables(
611     allocs: &mut InliningAllocs,
612     func: &mut ir::Function,
613     call_exception_table: ir::ExceptionTable,
614 ) {
615     // Split a block at a `call[_indirect]` instruction, detach the
616     // instruction's results, and alias them to the new block's parameters.
617     let split_block_for_new_try_call = |func: &mut ir::Function, inst: ir::Inst| -> ir::Block {
618         debug_assert!(func.dfg.insts[inst].opcode().is_call());
619         debug_assert!(!func.dfg.insts[inst].opcode().is_terminator());
620 
621         // Split the block.
622         let next_inst = func
623             .layout
624             .next_inst(inst)
625             .expect("inst is not a terminator, should have a successor");
626         let new_block = func.dfg.blocks.add();
627         func.layout.split_block(new_block, next_inst);
628 
629         // `try_call[_indirect]` instructions do not define values themselves;
630         // the normal-return block has parameters for the results. So remove
631         // this instruction's results, create an associated block parameter for
632         // each of them, and alias them to the new block parameter.
633         let old_results = SmallValueVec::from_iter(func.dfg.inst_results(inst).iter().copied());
634         func.dfg.detach_inst_results(inst);
635         for old_result in old_results {
636             let ty = func.dfg.value_type(old_result);
637             let new_block_param = func.dfg.append_block_param(new_block, ty);
638             func.dfg.change_to_alias(old_result, new_block_param);
639         }
640 
641         new_block
642     };
643 
644     // Clone the caller's exception table, updating it for use in the current
645     // `call[_indirect]` instruction as it becomes a `try_call[_indirect]`.
646     let clone_exception_table_for_this_call = |func: &mut ir::Function,
647                                                signature: ir::SigRef,
648                                                new_block: ir::Block|
649      -> ir::ExceptionTable {
650         let mut exception = func.stencil.dfg.exception_tables[call_exception_table]
651             .deep_clone(&mut func.stencil.dfg.value_lists);
652 
653         *exception.signature_mut() = signature;
654 
655         let returns_len = func.dfg.signatures[signature].returns.len();
656         let returns_len = u32::try_from(returns_len).unwrap();
657 
658         *exception.normal_return_mut() = ir::BlockCall::new(
659             new_block,
660             (0..returns_len).map(|i| ir::BlockArg::TryCallRet(i)),
661             &mut func.dfg.value_lists,
662         );
663 
664         func.dfg.exception_tables.push(exception)
665     };
666 
667     for inst in allocs.calls_needing_exception_table_fixup.drain(..) {
668         debug_assert!(func.dfg.insts[inst].opcode().is_call());
669         debug_assert!(!func.dfg.insts[inst].opcode().is_return());
670         match func.dfg.insts[inst] {
671             //     current_block:
672             //         preds...
673             //         rets... = call f(args...)
674             //         succs...
675             //
676             // becomes
677             //
678             //     current_block:
679             //         preds...
680             //         try_call f(args...), new_block(rets...), [call_exception_table...]
681             //     new_block(rets...):
682             //         succs...
683             ir::InstructionData::Call {
684                 opcode: ir::Opcode::Call,
685                 args,
686                 func_ref,
687             } => {
688                 let new_block = split_block_for_new_try_call(func, inst);
689                 let signature = func.dfg.ext_funcs[func_ref].signature;
690                 let exception = clone_exception_table_for_this_call(func, signature, new_block);
691                 func.dfg.insts[inst] = ir::InstructionData::TryCall {
692                     opcode: ir::Opcode::TryCall,
693                     args,
694                     func_ref,
695                     exception,
696                 };
697             }
698 
699             //     current_block:
700             //         preds...
701             //         rets... = call_indirect sig, val(args...)
702             //         succs...
703             //
704             // becomes
705             //
706             //     current_block:
707             //         preds...
708             //         try_call_indirect sig, val(args...), new_block(rets...), [call_exception_table...]
709             //     new_block(rets...):
710             //         succs...
711             ir::InstructionData::CallIndirect {
712                 opcode: ir::Opcode::CallIndirect,
713                 args,
714                 sig_ref,
715             } => {
716                 let new_block = split_block_for_new_try_call(func, inst);
717                 let exception = clone_exception_table_for_this_call(func, sig_ref, new_block);
718                 func.dfg.insts[inst] = ir::InstructionData::TryCallIndirect {
719                     opcode: ir::Opcode::TryCallIndirect,
720                     args,
721                     exception,
722                 };
723             }
724 
725             // For `try_call[_indirect]` instructions, we just need to merge the
726             // exception tables.
727             ir::InstructionData::TryCall {
728                 opcode: ir::Opcode::TryCall,
729                 exception,
730                 ..
731             }
732             | ir::InstructionData::TryCallIndirect {
733                 opcode: ir::Opcode::TryCallIndirect,
734                 exception,
735                 ..
736             } => {
737                 // Construct a new exception table that consists of
738                 // the inlined instruction's exception table match
739                 // sequence, with the inlining site's exception table
740                 // appended. This will ensure that the first-match
741                 // semantics emulates the original behavior of
742                 // matching in the inner frame first.
743                 let sig = func.dfg.exception_tables[exception].signature();
744                 let normal_return = *func.dfg.exception_tables[exception].normal_return();
745                 let exception_data = ExceptionTableData::new(
746                     sig,
747                     normal_return,
748                     func.dfg.exception_tables[exception]
749                         .items()
750                         .chain(func.dfg.exception_tables[call_exception_table].items()),
751                 )
752                 .deep_clone(&mut func.dfg.value_lists);
753 
754                 func.dfg.exception_tables[exception] = exception_data;
755             }
756 
757             otherwise => unreachable!("unknown non-return call instruction: {otherwise:?}"),
758         }
759     }
760 }
761 
762 /// After having created an inlined version of a callee instruction that returns
763 /// in the caller, we need to fix it up so that it doesn't actually return
764 /// (since we are already in the caller's frame) and instead just jumps to the
765 /// control-flow join point.
766 fn fixup_inst_that_returns(
767     allocs: &mut InliningAllocs,
768     func: &mut ir::Function,
769     callee: &ir::Function,
770     entity_map: &EntityMap,
771     call_opcode: ir::Opcode,
772     inlined_inst: ir::Inst,
773     callee_inst: ir::Inst,
774     return_block: ir::Block,
775     call_stack_map: Option<&[ir::UserStackMapEntry]>,
776 ) {
777     debug_assert!(func.dfg.insts[inlined_inst].opcode().is_return());
778     match func.dfg.insts[inlined_inst] {
779         //     return rets...
780         //
781         // becomes
782         //
783         //     jump return_block(rets...)
784         ir::InstructionData::MultiAry {
785             opcode: ir::Opcode::Return,
786             args,
787         } => {
788             let rets = SmallBlockArgVec::from_iter(
789                 args.as_slice(&func.dfg.value_lists)
790                     .iter()
791                     .copied()
792                     .map(|v| v.into()),
793             );
794             func.dfg.replace(inlined_inst).jump(return_block, &rets);
795         }
796 
797         //     return_call f(args...)
798         //
799         // becomes
800         //
801         //     rets... = call f(args...)
802         //     jump return_block(rets...)
803         ir::InstructionData::Call {
804             opcode: ir::Opcode::ReturnCall,
805             args,
806             func_ref,
807         } => {
808             func.dfg.insts[inlined_inst] = ir::InstructionData::Call {
809                 opcode: ir::Opcode::Call,
810                 args,
811                 func_ref,
812             };
813             func.dfg.make_inst_results(inlined_inst, ir::types::INVALID);
814 
815             append_stack_map_entries(
816                 func,
817                 callee,
818                 &entity_map,
819                 call_stack_map,
820                 inlined_inst,
821                 callee_inst,
822             );
823 
824             let rets = SmallBlockArgVec::from_iter(
825                 func.dfg
826                     .inst_results(inlined_inst)
827                     .iter()
828                     .copied()
829                     .map(|v| v.into()),
830             );
831             let mut cursor = FuncCursor::new(func);
832             cursor.goto_after_inst(inlined_inst);
833             cursor.ins().jump(return_block, &rets);
834 
835             if call_opcode == ir::Opcode::TryCall {
836                 allocs
837                     .calls_needing_exception_table_fixup
838                     .push(inlined_inst);
839             }
840         }
841 
842         //     return_call_indirect val(args...)
843         //
844         // becomes
845         //
846         //     rets... = call_indirect val(args...)
847         //     jump return_block(rets...)
848         ir::InstructionData::CallIndirect {
849             opcode: ir::Opcode::ReturnCallIndirect,
850             args,
851             sig_ref,
852         } => {
853             func.dfg.insts[inlined_inst] = ir::InstructionData::CallIndirect {
854                 opcode: ir::Opcode::CallIndirect,
855                 args,
856                 sig_ref,
857             };
858             func.dfg.make_inst_results(inlined_inst, ir::types::INVALID);
859 
860             append_stack_map_entries(
861                 func,
862                 callee,
863                 &entity_map,
864                 call_stack_map,
865                 inlined_inst,
866                 callee_inst,
867             );
868 
869             let rets = SmallBlockArgVec::from_iter(
870                 func.dfg
871                     .inst_results(inlined_inst)
872                     .iter()
873                     .copied()
874                     .map(|v| v.into()),
875             );
876             let mut cursor = FuncCursor::new(func);
877             cursor.goto_after_inst(inlined_inst);
878             cursor.ins().jump(return_block, &rets);
879 
880             if call_opcode == ir::Opcode::TryCall {
881                 allocs
882                     .calls_needing_exception_table_fixup
883                     .push(inlined_inst);
884             }
885         }
886 
887         inst_data => unreachable!(
888             "should have handled all `is_return() == true` instructions above; \
889              got {inst_data:?}"
890         ),
891     }
892 }
893 
894 /// An `InstructionMapper` implementation that remaps a callee instruction's
895 /// entity references to their new indices in the caller function.
896 struct InliningInstRemapper<'a> {
897     allocs: &'a InliningAllocs,
898     func: &'a mut ir::Function,
899     callee: &'a ir::Function,
900     entity_map: &'a EntityMap,
901 }
902 
903 impl<'a> ir::instructions::InstructionMapper for InliningInstRemapper<'a> {
904     fn map_value(&mut self, value: ir::Value) -> ir::Value {
905         self.allocs.get_inlined_value(self.callee, value).expect(
906             "defs come before uses; we should have already inlined all values \
907              used by an instruction",
908         )
909     }
910 
911     fn map_value_list(&mut self, value_list: ir::ValueList) -> ir::ValueList {
912         let mut inlined_list = ir::ValueList::new();
913         for callee_val in value_list.as_slice(&self.callee.dfg.value_lists) {
914             let inlined_val = self.map_value(*callee_val);
915             inlined_list.push(inlined_val, &mut self.func.dfg.value_lists);
916         }
917         inlined_list
918     }
919 
920     fn map_global_value(&mut self, global_value: ir::GlobalValue) -> ir::GlobalValue {
921         self.entity_map.inlined_global_value(global_value)
922     }
923 
924     fn map_jump_table(&mut self, jump_table: ir::JumpTable) -> ir::JumpTable {
925         let inlined_default =
926             self.map_block_call(self.callee.dfg.jump_tables[jump_table].default_block());
927         let inlined_table = self.callee.dfg.jump_tables[jump_table]
928             .as_slice()
929             .iter()
930             .map(|callee_block_call| self.map_block_call(*callee_block_call))
931             .collect::<SmallBlockCallVec>();
932         self.func
933             .dfg
934             .jump_tables
935             .push(ir::JumpTableData::new(inlined_default, &inlined_table))
936     }
937 
938     fn map_exception_table(&mut self, exception_table: ir::ExceptionTable) -> ir::ExceptionTable {
939         let exception_table = &self.callee.dfg.exception_tables[exception_table];
940         let inlined_sig_ref = self.map_sig_ref(exception_table.signature());
941         let inlined_normal_return = self.map_block_call(*exception_table.normal_return());
942         let inlined_table = exception_table
943             .items()
944             .map(|item| match item {
945                 ExceptionTableItem::Tag(tag, block_call) => {
946                     ExceptionTableItem::Tag(tag, self.map_block_call(block_call))
947                 }
948                 ExceptionTableItem::Default(block_call) => {
949                     ExceptionTableItem::Default(self.map_block_call(block_call))
950                 }
951                 ExceptionTableItem::Context(value) => {
952                     ExceptionTableItem::Context(self.map_value(value))
953                 }
954             })
955             .collect::<SmallVec<[_; 8]>>();
956         self.func
957             .dfg
958             .exception_tables
959             .push(ir::ExceptionTableData::new(
960                 inlined_sig_ref,
961                 inlined_normal_return,
962                 inlined_table,
963             ))
964     }
965 
966     fn map_block_call(&mut self, block_call: ir::BlockCall) -> ir::BlockCall {
967         let callee_block = block_call.block(&self.callee.dfg.value_lists);
968         let inlined_block = self.entity_map.inlined_block(callee_block);
969         let args = block_call
970             .args(&self.callee.dfg.value_lists)
971             .map(|arg| match arg {
972                 ir::BlockArg::Value(value) => self.map_value(value).into(),
973                 ir::BlockArg::TryCallRet(_) | ir::BlockArg::TryCallExn(_) => arg,
974             })
975             .collect::<SmallBlockArgVec>();
976         ir::BlockCall::new(inlined_block, args, &mut self.func.dfg.value_lists)
977     }
978 
979     fn map_block(&mut self, block: ir::Block) -> ir::Block {
980         self.entity_map.inlined_block(block)
981     }
982 
983     fn map_func_ref(&mut self, func_ref: ir::FuncRef) -> ir::FuncRef {
984         self.entity_map.inlined_func_ref(func_ref)
985     }
986 
987     fn map_sig_ref(&mut self, sig_ref: ir::SigRef) -> ir::SigRef {
988         self.entity_map.inlined_sig_ref(sig_ref)
989     }
990 
991     fn map_stack_slot(&mut self, stack_slot: ir::StackSlot) -> ir::StackSlot {
992         self.entity_map.inlined_stack_slot(stack_slot)
993     }
994 
995     fn map_dynamic_stack_slot(
996         &mut self,
997         dynamic_stack_slot: ir::DynamicStackSlot,
998     ) -> ir::DynamicStackSlot {
999         self.entity_map
1000             .inlined_dynamic_stack_slot(dynamic_stack_slot)
1001     }
1002 
1003     fn map_constant(&mut self, constant: ir::Constant) -> ir::Constant {
1004         self.allocs
1005             .constants
1006             .get(constant)
1007             .and_then(|o| o.expand())
1008             .expect("should have inlined all callee constants")
1009     }
1010 
1011     fn map_immediate(&mut self, immediate: ir::Immediate) -> ir::Immediate {
1012         self.entity_map.inlined_immediate(immediate)
1013     }
1014 }
1015 
1016 /// Inline the callee's layout into the caller's layout.
1017 ///
1018 /// Returns the last inlined block in the layout.
1019 fn inline_block_layout(
1020     func: &mut ir::Function,
1021     call_block: ir::Block,
1022     callee: &ir::Function,
1023     entity_map: &EntityMap,
1024 ) -> ir::Block {
1025     debug_assert!(func.layout.is_block_inserted(call_block));
1026 
1027     // Iterate over callee blocks in layout order, inserting their associated
1028     // inlined block into the caller's layout.
1029     let mut prev_inlined_block = call_block;
1030     let mut next_callee_block = callee.layout.entry_block();
1031     while let Some(callee_block) = next_callee_block {
1032         debug_assert!(func.layout.is_block_inserted(prev_inlined_block));
1033 
1034         let inlined_block = entity_map.inlined_block(callee_block);
1035         func.layout
1036             .insert_block_after(inlined_block, prev_inlined_block);
1037 
1038         prev_inlined_block = inlined_block;
1039         next_callee_block = callee.layout.next_block(callee_block);
1040     }
1041 
1042     debug_assert!(func.layout.is_block_inserted(prev_inlined_block));
1043     prev_inlined_block
1044 }
1045 
1046 /// Split the call instruction's block just after the call instruction to create
1047 /// the point where control-flow joins after the inlined callee "returns".
1048 ///
1049 /// Note that tail calls do not return to the caller and therefore do not have a
1050 /// control-flow join point.
1051 fn split_off_return_block(
1052     func: &mut ir::Function,
1053     call_inst: ir::Inst,
1054     opcode: ir::Opcode,
1055     callee: &ir::Function,
1056 ) -> Option<ir::Block> {
1057     // When the `call_inst` is not a block terminator, we need to split the
1058     // block.
1059     let return_block = func.layout.next_inst(call_inst).map(|next_inst| {
1060         let return_block = func.dfg.blocks.add();
1061         func.layout.split_block(return_block, next_inst);
1062 
1063         // Add block parameters for each return value and alias the call
1064         // instruction's results to them.
1065         let old_results =
1066             SmallValueVec::from_iter(func.dfg.inst_results(call_inst).iter().copied());
1067         debug_assert_eq!(old_results.len(), callee.signature.returns.len());
1068         func.dfg.detach_inst_results(call_inst);
1069         for (abi, old_val) in callee.signature.returns.iter().zip(old_results) {
1070             debug_assert_eq!(abi.value_type, func.dfg.value_type(old_val));
1071             let ret_param = func.dfg.append_block_param(return_block, abi.value_type);
1072             func.dfg.change_to_alias(old_val, ret_param);
1073         }
1074 
1075         return_block
1076     });
1077 
1078     // When the `call_inst` is a block terminator, then it is either a
1079     // `return_call` or a `try_call`:
1080     //
1081     // * For `return_call`s, we don't have a control-flow join point, because
1082     //   the caller permanently transfers control to the callee.
1083     //
1084     // * For `try_call`s, we probably already have a block for the control-flow
1085     //   join point, but it isn't guaranteed: the `try_call` might ignore the
1086     //   call's returns and not forward them to the normal-return block or it
1087     //   might also pass additional arguments. We can only reuse the existing
1088     //   normal-return block when the `try_call` forwards exactly our callee's
1089     //   returns to that block (and therefore that block's parameter types also
1090     //   exactly match the callee's return types). Otherwise, we must create a new
1091     //   return block that forwards to the existing normal-return
1092     //   block. (Elsewhere, at the end of inlining, we will also update any inlined
1093     //   calls to forward any raised exceptions to the caller's exception table,
1094     //   as necessary.)
1095     //
1096     //   Finally, note that reusing the normal-return's target block is just an
1097     //   optimization to emit a simpler CFG when we can, and is not
1098     //   fundamentally required for correctness. We could always insert a
1099     //   temporary block as our control-flow join point that then forwards to
1100     //   the normal-return's target block. However, at the time of writing,
1101     //   Cranelift doesn't currently do any jump-threading or branch
1102     //   simplification in the mid-end, and removing unnecessary blocks in this
1103     //   way can help some subsequent mid-end optimizations. If, in the future,
1104     //   we gain support for jump-threading optimizations in the mid-end, we can
1105     //   come back and simplify the below code a bit to always generate the
1106     //   temporary block, and then rely on the subsequent optimizations to clean
1107     //   everything up.
1108     debug_assert_eq!(
1109         return_block.is_none(),
1110         opcode == ir::Opcode::ReturnCall || opcode == ir::Opcode::TryCall,
1111     );
1112     return_block.or_else(|| match func.dfg.insts[call_inst] {
1113         ir::InstructionData::TryCall {
1114             opcode: ir::Opcode::TryCall,
1115             args: _,
1116             func_ref: _,
1117             exception,
1118         } => {
1119             let normal_return = func.dfg.exception_tables[exception].normal_return();
1120             let normal_return_block = normal_return.block(&func.dfg.value_lists);
1121 
1122             // Check to see if we can reuse the existing normal-return block.
1123             {
1124                 let normal_return_args = normal_return.args(&func.dfg.value_lists);
1125                 if normal_return_args.len() == callee.signature.returns.len()
1126                     && normal_return_args.enumerate().all(|(i, arg)| {
1127                         let i = u32::try_from(i).unwrap();
1128                         arg == ir::BlockArg::TryCallRet(i)
1129                     })
1130                 {
1131                     return Some(normal_return_block);
1132                 }
1133             }
1134 
1135             // Okay, we cannot reuse the normal-return block. Create a new block
1136             // that has the expected block parameter types and have it jump to
1137             // the normal-return block.
1138             let return_block = func.dfg.blocks.add();
1139             func.layout.insert_block(return_block, normal_return_block);
1140 
1141             let return_block_params = callee
1142                 .signature
1143                 .returns
1144                 .iter()
1145                 .map(|abi| func.dfg.append_block_param(return_block, abi.value_type))
1146                 .collect::<SmallValueVec>();
1147 
1148             let normal_return_args = func.dfg.exception_tables[exception]
1149                 .normal_return()
1150                 .args(&func.dfg.value_lists)
1151                 .collect::<SmallBlockArgVec>();
1152             let jump_args = normal_return_args
1153                 .into_iter()
1154                 .map(|arg| match arg {
1155                     ir::BlockArg::Value(value) => ir::BlockArg::Value(value),
1156                     ir::BlockArg::TryCallRet(i) => {
1157                         let i = usize::try_from(i).unwrap();
1158                         ir::BlockArg::Value(return_block_params[i])
1159                     }
1160                     ir::BlockArg::TryCallExn(_) => {
1161                         unreachable!("normal-return edges cannot use exceptional results")
1162                     }
1163                 })
1164                 .collect::<SmallBlockArgVec>();
1165 
1166             let mut cursor = FuncCursor::new(func);
1167             cursor.goto_first_insertion_point(return_block);
1168             cursor.ins().jump(normal_return_block, &jump_args);
1169 
1170             Some(return_block)
1171         }
1172         _ => None,
1173     })
1174 }
1175 
1176 /// Replace the caller's call instruction with a jump to the caller's inlined
1177 /// copy of the callee's entry block.
1178 ///
1179 /// Also associates the callee's parameters with the caller's arguments in our
1180 /// value map.
1181 ///
1182 /// Returns the caller's stack map entries, if any.
1183 fn replace_call_with_jump(
1184     allocs: &mut InliningAllocs,
1185     func: &mut ir::Function,
1186     call_inst: ir::Inst,
1187     callee: &ir::Function,
1188     entity_map: &EntityMap,
1189 ) -> Option<ir::UserStackMapEntryVec> {
1190     trace!("Replacing `call` with `jump`");
1191     trace!(
1192         "  --> call instruction: {call_inst:?}: {}",
1193         func.dfg.display_inst(call_inst)
1194     );
1195 
1196     let callee_entry_block = callee
1197         .layout
1198         .entry_block()
1199         .expect("callee function should have an entry block");
1200     let callee_param_values = callee.dfg.block_params(callee_entry_block);
1201     let caller_arg_values = SmallValueVec::from_iter(func.dfg.inst_args(call_inst).iter().copied());
1202     debug_assert_eq!(callee_param_values.len(), caller_arg_values.len());
1203     debug_assert_eq!(callee_param_values.len(), callee.signature.params.len());
1204     for (abi, (callee_param_value, caller_arg_value)) in callee
1205         .signature
1206         .params
1207         .iter()
1208         .zip(callee_param_values.into_iter().zip(caller_arg_values))
1209     {
1210         debug_assert_eq!(abi.value_type, callee.dfg.value_type(*callee_param_value));
1211         debug_assert_eq!(abi.value_type, func.dfg.value_type(caller_arg_value));
1212         allocs.set_inlined_value(callee, *callee_param_value, caller_arg_value);
1213     }
1214 
1215     // Replace the caller's call instruction with a jump to the caller's inlined
1216     // copy of the callee's entry block.
1217     //
1218     // Note that the call block dominates the inlined entry block (and also all
1219     // other inlined blocks) so we can reference the arguments directly, and do
1220     // not need to add block parameters to the inlined entry block.
1221     let inlined_entry_block = entity_map.inlined_block(callee_entry_block);
1222     func.dfg.replace(call_inst).jump(inlined_entry_block, &[]);
1223     trace!(
1224         "  --> replaced with jump instruction: {call_inst:?}: {}",
1225         func.dfg.display_inst(call_inst)
1226     );
1227 
1228     let stack_map_entries = func.dfg.take_user_stack_map_entries(call_inst);
1229     stack_map_entries
1230 }
1231 
1232 /// Keeps track of mapping callee entities to their associated inlined caller
1233 /// entities.
1234 #[derive(Default)]
1235 struct EntityMap {
1236     // Rather than doing an implicit, demand-based, DCE'ing translation of
1237     // entities, which would require maps from each callee entity to its
1238     // associated caller entity, we copy all entities into the caller, remember
1239     // each entity's initial offset, and then mapping from the callee to the
1240     // inlined caller entity is just adding that initial offset to the callee's
1241     // index. This should be both faster and simpler than the alternative. Most
1242     // of these sets are relatively small, and they rarely have too much dead
1243     // code in practice, so this is a good trade off.
1244     //
1245     // Note that there are a few kinds of entities that are excluded from the
1246     // `EntityMap`, and for which we do actually take the demand-based approach:
1247     // values and value lists being the notable ones.
1248     block_offset: Option<u32>,
1249     global_value_offset: Option<u32>,
1250     sig_ref_offset: Option<u32>,
1251     func_ref_offset: Option<u32>,
1252     stack_slot_offset: Option<u32>,
1253     dynamic_type_offset: Option<u32>,
1254     dynamic_stack_slot_offset: Option<u32>,
1255     immediate_offset: Option<u32>,
1256 }
1257 
1258 impl EntityMap {
1259     fn inlined_block(&self, callee_block: ir::Block) -> ir::Block {
1260         let offset = self
1261             .block_offset
1262             .expect("must create inlined `ir::Block`s before calling `EntityMap::inlined_block`");
1263         ir::Block::from_u32(offset + callee_block.as_u32())
1264     }
1265 
1266     fn iter_inlined_blocks(&self, func: &ir::Function) -> impl Iterator<Item = ir::Block> + use<> {
1267         let start = self.block_offset.expect(
1268             "must create inlined `ir::Block`s before calling `EntityMap::iter_inlined_blocks`",
1269         );
1270 
1271         let end = func.dfg.blocks.len();
1272         let end = u32::try_from(end).unwrap();
1273 
1274         (start..end).map(|i| ir::Block::from_u32(i))
1275     }
1276 
1277     fn inlined_global_value(&self, callee_global_value: ir::GlobalValue) -> ir::GlobalValue {
1278         let offset = self
1279             .global_value_offset
1280             .expect("must create inlined `ir::GlobalValue`s before calling `EntityMap::inlined_global_value`");
1281         ir::GlobalValue::from_u32(offset + callee_global_value.as_u32())
1282     }
1283 
1284     fn inlined_sig_ref(&self, callee_sig_ref: ir::SigRef) -> ir::SigRef {
1285         let offset = self.sig_ref_offset.expect(
1286             "must create inlined `ir::SigRef`s before calling `EntityMap::inlined_sig_ref`",
1287         );
1288         ir::SigRef::from_u32(offset + callee_sig_ref.as_u32())
1289     }
1290 
1291     fn inlined_func_ref(&self, callee_func_ref: ir::FuncRef) -> ir::FuncRef {
1292         let offset = self.func_ref_offset.expect(
1293             "must create inlined `ir::FuncRef`s before calling `EntityMap::inlined_func_ref`",
1294         );
1295         ir::FuncRef::from_u32(offset + callee_func_ref.as_u32())
1296     }
1297 
1298     fn inlined_stack_slot(&self, callee_stack_slot: ir::StackSlot) -> ir::StackSlot {
1299         let offset = self.stack_slot_offset.expect(
1300             "must create inlined `ir::StackSlot`s before calling `EntityMap::inlined_stack_slot`",
1301         );
1302         ir::StackSlot::from_u32(offset + callee_stack_slot.as_u32())
1303     }
1304 
1305     fn inlined_dynamic_type(&self, callee_dynamic_type: ir::DynamicType) -> ir::DynamicType {
1306         let offset = self.dynamic_type_offset.expect(
1307             "must create inlined `ir::DynamicType`s before calling `EntityMap::inlined_dynamic_type`",
1308         );
1309         ir::DynamicType::from_u32(offset + callee_dynamic_type.as_u32())
1310     }
1311 
1312     fn inlined_dynamic_stack_slot(
1313         &self,
1314         callee_dynamic_stack_slot: ir::DynamicStackSlot,
1315     ) -> ir::DynamicStackSlot {
1316         let offset = self.dynamic_stack_slot_offset.expect(
1317             "must create inlined `ir::DynamicStackSlot`s before calling `EntityMap::inlined_dynamic_stack_slot`",
1318         );
1319         ir::DynamicStackSlot::from_u32(offset + callee_dynamic_stack_slot.as_u32())
1320     }
1321 
1322     fn inlined_immediate(&self, callee_immediate: ir::Immediate) -> ir::Immediate {
1323         let offset = self.immediate_offset.expect(
1324             "must create inlined `ir::Immediate`s before calling `EntityMap::inlined_immediate`",
1325         );
1326         ir::Immediate::from_u32(offset + callee_immediate.as_u32())
1327     }
1328 }
1329 
1330 /// Translate all of the callee's various entities into the caller, producing an
1331 /// `EntityMap` that can be used to translate callee entity references into
1332 /// inlined caller entity references.
1333 fn create_entities(
1334     allocs: &mut InliningAllocs,
1335     func: &mut ir::Function,
1336     callee: &ir::Function,
1337 ) -> EntityMap {
1338     let mut entity_map = EntityMap::default();
1339 
1340     entity_map.block_offset = Some(create_blocks(allocs, func, callee));
1341     entity_map.global_value_offset = Some(create_global_values(func, callee));
1342     entity_map.sig_ref_offset = Some(create_sig_refs(func, callee));
1343     create_user_external_name_refs(allocs, func, callee);
1344     entity_map.func_ref_offset = Some(create_func_refs(allocs, func, callee, &entity_map));
1345     entity_map.stack_slot_offset = Some(create_stack_slots(func, callee));
1346     entity_map.dynamic_type_offset = Some(create_dynamic_types(func, callee, &entity_map));
1347     entity_map.dynamic_stack_slot_offset =
1348         Some(create_dynamic_stack_slots(func, callee, &entity_map));
1349     entity_map.immediate_offset = Some(create_immediates(func, callee));
1350 
1351     // `ir::ConstantData` is deduplicated, so we cannot use our offset scheme
1352     // for `ir::Constant`s. Nonetheless, we still insert them into the caller
1353     // now, at the same time as the rest of our entities.
1354     create_constants(allocs, func, callee);
1355 
1356     entity_map
1357 }
1358 
1359 /// Create inlined blocks in the caller for every block in the callee.
1360 fn create_blocks(
1361     allocs: &mut InliningAllocs,
1362     func: &mut ir::Function,
1363     callee: &ir::Function,
1364 ) -> u32 {
1365     let offset = func.dfg.blocks.len();
1366     let offset = u32::try_from(offset).unwrap();
1367 
1368     func.dfg.blocks.reserve(callee.dfg.blocks.len());
1369     for callee_block in callee.dfg.blocks.iter() {
1370         let caller_block = func.dfg.blocks.add();
1371         trace!("Callee {callee_block:?} = inlined {caller_block:?}");
1372 
1373         if callee.layout.is_cold(callee_block) {
1374             func.layout.set_cold(caller_block);
1375         }
1376 
1377         // Note: the entry block does not need parameters because the only
1378         // predecessor is the call block and we associate the callee's
1379         // parameters with the caller's arguments directly.
1380         if callee.layout.entry_block() != Some(callee_block) {
1381             for callee_param in callee.dfg.blocks[callee_block].params(&callee.dfg.value_lists) {
1382                 let ty = callee.dfg.value_type(*callee_param);
1383                 let caller_param = func.dfg.append_block_param(caller_block, ty);
1384 
1385                 allocs.set_inlined_value(callee, *callee_param, caller_param);
1386             }
1387         }
1388     }
1389 
1390     offset
1391 }
1392 
1393 /// Copy and translate global values from the callee into the caller.
1394 fn create_global_values(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1395     let gv_offset = func.global_values.len();
1396     let gv_offset = u32::try_from(gv_offset).unwrap();
1397 
1398     func.global_values.reserve(callee.global_values.len());
1399     for gv in callee.global_values.values() {
1400         func.global_values.push(match gv {
1401             // These kinds of global values reference other global values, so we
1402             // need to fixup that reference.
1403             ir::GlobalValueData::Load {
1404                 base,
1405                 offset,
1406                 global_type,
1407                 flags,
1408             } => ir::GlobalValueData::Load {
1409                 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset),
1410                 offset: *offset,
1411                 global_type: *global_type,
1412                 flags: *flags,
1413             },
1414             ir::GlobalValueData::IAddImm {
1415                 base,
1416                 offset,
1417                 global_type,
1418             } => ir::GlobalValueData::IAddImm {
1419                 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset),
1420                 offset: *offset,
1421                 global_type: *global_type,
1422             },
1423 
1424             // These kinds of global values do not reference other global
1425             // values, so we can just clone them.
1426             ir::GlobalValueData::VMContext
1427             | ir::GlobalValueData::Symbol { .. }
1428             | ir::GlobalValueData::DynScaleTargetConst { .. } => gv.clone(),
1429         });
1430     }
1431 
1432     gv_offset
1433 }
1434 
1435 /// Copy `ir::SigRef`s from the callee into the caller.
1436 fn create_sig_refs(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1437     let offset = func.dfg.signatures.len();
1438     let offset = u32::try_from(offset).unwrap();
1439 
1440     func.dfg.signatures.reserve(callee.dfg.signatures.len());
1441     for sig in callee.dfg.signatures.values() {
1442         func.dfg.signatures.push(sig.clone());
1443     }
1444 
1445     offset
1446 }
1447 
1448 fn create_user_external_name_refs(
1449     allocs: &mut InliningAllocs,
1450     func: &mut ir::Function,
1451     callee: &ir::Function,
1452 ) {
1453     for (callee_named_func_ref, name) in callee.params.user_named_funcs().iter() {
1454         let caller_named_func_ref = func.declare_imported_user_function(name.clone());
1455         allocs.user_external_name_refs[callee_named_func_ref] = Some(caller_named_func_ref).into();
1456     }
1457 }
1458 
1459 /// Translate `ir::FuncRef`s from the callee into the caller.
1460 fn create_func_refs(
1461     allocs: &InliningAllocs,
1462     func: &mut ir::Function,
1463     callee: &ir::Function,
1464     entity_map: &EntityMap,
1465 ) -> u32 {
1466     let offset = func.dfg.ext_funcs.len();
1467     let offset = u32::try_from(offset).unwrap();
1468 
1469     func.dfg.ext_funcs.reserve(callee.dfg.ext_funcs.len());
1470     for ir::ExtFuncData {
1471         name,
1472         signature,
1473         colocated,
1474     } in callee.dfg.ext_funcs.values()
1475     {
1476         func.dfg.ext_funcs.push(ir::ExtFuncData {
1477             name: match name {
1478                 ir::ExternalName::User(name_ref) => {
1479                     ir::ExternalName::User(allocs.user_external_name_refs[*name_ref].expect(
1480                         "should have translated all `ir::UserExternalNameRef`s before translating \
1481                          `ir::FuncRef`s",
1482                     ))
1483                 }
1484                 ir::ExternalName::TestCase(_)
1485                 | ir::ExternalName::LibCall(_)
1486                 | ir::ExternalName::KnownSymbol(_) => name.clone(),
1487             },
1488             signature: entity_map.inlined_sig_ref(*signature),
1489             colocated: *colocated,
1490         });
1491     }
1492 
1493     offset
1494 }
1495 
1496 /// Copy stack slots from the callee into the caller.
1497 fn create_stack_slots(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1498     let offset = func.sized_stack_slots.len();
1499     let offset = u32::try_from(offset).unwrap();
1500 
1501     func.sized_stack_slots
1502         .reserve(callee.sized_stack_slots.len());
1503     for slot in callee.sized_stack_slots.values() {
1504         func.sized_stack_slots.push(slot.clone());
1505     }
1506 
1507     offset
1508 }
1509 
1510 /// Copy dynamic types from the callee into the caller.
1511 fn create_dynamic_types(
1512     func: &mut ir::Function,
1513     callee: &ir::Function,
1514     entity_map: &EntityMap,
1515 ) -> u32 {
1516     let offset = func.dynamic_stack_slots.len();
1517     let offset = u32::try_from(offset).unwrap();
1518 
1519     func.dfg
1520         .dynamic_types
1521         .reserve(callee.dfg.dynamic_types.len());
1522     for ir::DynamicTypeData {
1523         base_vector_ty,
1524         dynamic_scale,
1525     } in callee.dfg.dynamic_types.values()
1526     {
1527         func.dfg.dynamic_types.push(ir::DynamicTypeData {
1528             base_vector_ty: *base_vector_ty,
1529             dynamic_scale: entity_map.inlined_global_value(*dynamic_scale),
1530         });
1531     }
1532 
1533     offset
1534 }
1535 
1536 /// Copy dynamic stack slots from the callee into the caller.
1537 fn create_dynamic_stack_slots(
1538     func: &mut ir::Function,
1539     callee: &ir::Function,
1540     entity_map: &EntityMap,
1541 ) -> u32 {
1542     let offset = func.dynamic_stack_slots.len();
1543     let offset = u32::try_from(offset).unwrap();
1544 
1545     func.dynamic_stack_slots
1546         .reserve(callee.dynamic_stack_slots.len());
1547     for ir::DynamicStackSlotData { kind, dyn_ty } in callee.dynamic_stack_slots.values() {
1548         func.dynamic_stack_slots.push(ir::DynamicStackSlotData {
1549             kind: *kind,
1550             dyn_ty: entity_map.inlined_dynamic_type(*dyn_ty),
1551         });
1552     }
1553 
1554     offset
1555 }
1556 
1557 /// Copy immediates from the callee into the caller.
1558 fn create_immediates(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1559     let offset = func.dfg.immediates.len();
1560     let offset = u32::try_from(offset).unwrap();
1561 
1562     func.dfg.immediates.reserve(callee.dfg.immediates.len());
1563     for imm in callee.dfg.immediates.values() {
1564         func.dfg.immediates.push(imm.clone());
1565     }
1566 
1567     offset
1568 }
1569 
1570 /// Copy constants from the callee into the caller.
1571 fn create_constants(allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function) {
1572     for (callee_constant, data) in callee.dfg.constants.iter() {
1573         let inlined_constant = func.dfg.constants.insert(data.clone());
1574         allocs.constants[*callee_constant] = Some(inlined_constant).into();
1575     }
1576 }
1577