1 //! Function inlining infrastructure.
2 //!
3 //! This module provides "inlining as a library" to Cranelift users; it does
4 //! _not_ provide a complete, off-the-shelf inlining solution. Cranelift's
5 //! compilation context is per-function and does not encompass the full call
6 //! graph. It does not know which functions are hot and which are cold, which
7 //! have been marked the equivalent of `#[inline(never)]`, etc... Only the
8 //! Cranelift user can understand these aspects of the full compilation
9 //! pipeline, and these things can be very different between (say) Wasmtime and
10 //! `cg_clif`. Therefore, this module does not attempt to define heuristics for
11 //! when inlining a particular call is likely beneficial. This module only
12 //! provides hooks for the Cranelift user to define whether a given call should
13 //! be inlined or not, and the mechanics to inline a callee into a particular
14 //! call site when directed to do so by the Cranelift user.
15 //!
16 //! The top-level inlining entry point during Cranelift compilation is
17 //! [`Context::inline`][crate::Context::inline]. It takes an [`Inline`] trait
18 //! implementation, which is authored by the Cranelift user and directs
19 //! Cranelift whether to inline a particular call, and, when inlining, gives
20 //! Cranelift the body of the callee that is to be inlined.
21
22 use crate::cursor::{Cursor as _, FuncCursor};
23 use crate::ir::{self, DebugTag, ExceptionTableData, ExceptionTableItem, InstBuilder as _};
24 use crate::result::CodegenResult;
25 use crate::trace;
26 use crate::traversals::Dfs;
27 use alloc::borrow::Cow;
28 use alloc::vec::Vec;
29 use cranelift_entity::{SecondaryMap, packed_option::PackedOption};
30 use smallvec::SmallVec;
31
32 type SmallValueVec = SmallVec<[ir::Value; 8]>;
33 type SmallBlockArgVec = SmallVec<[ir::BlockArg; 8]>;
34 type SmallBlockCallVec = SmallVec<[ir::BlockCall; 8]>;
35
36 /// A command directing Cranelift whether or not to inline a particular call.
37 pub enum InlineCommand<'a> {
38 /// Keep the call as-is, out-of-line, and do not inline the callee.
39 KeepCall,
40
41 /// Inline the call, using this function as the body of the callee.
42 ///
43 /// It is the `Inline` implementor's responsibility to ensure that this
44 /// function is the correct callee. Providing the wrong function may result
45 /// in panics during compilation or incorrect runtime behavior.
46 Inline {
47 /// The callee function's body.
48 callee: Cow<'a, ir::Function>,
49 /// Whether to visit any function calls within the callee body after
50 /// inlining and consider them for further inlining.
51 visit_callee: bool,
52 },
53 }
54
55 /// A trait for directing Cranelift whether to inline a particular call or not.
56 ///
57 /// Used in combination with the [`Context::inline`][crate::Context::inline]
58 /// method.
59 pub trait Inline {
60 /// A hook invoked for each direct call instruction in a function, whose
61 /// result determines whether Cranelift should inline a given call.
62 ///
63 /// The Cranelift user is responsible for defining their own heuristics and
64 /// deciding whether inlining the call is beneficial.
65 ///
66 /// When returning a function and directing Cranelift to inline its body
67 /// into the call site, the `Inline` implementer must ensure the following:
68 ///
69 /// * The returned function's signature exactly matches the `callee`
70 /// `FuncRef`'s signature.
71 ///
72 /// * The returned function must be legalized.
73 ///
74 /// * The returned function must be valid (i.e. it must pass the CLIF
75 /// verifier).
76 ///
77 /// * The returned function is a correct and valid implementation of the
78 /// `callee` according to your language's semantics.
79 ///
80 /// Failure to uphold these invariants may result in panics during
81 /// compilation or incorrect runtime behavior in the generated code.
inline( &mut self, caller: &ir::Function, call_inst: ir::Inst, call_opcode: ir::Opcode, callee: ir::FuncRef, call_args: &[ir::Value], ) -> InlineCommand<'_>82 fn inline(
83 &mut self,
84 caller: &ir::Function,
85 call_inst: ir::Inst,
86 call_opcode: ir::Opcode,
87 callee: ir::FuncRef,
88 call_args: &[ir::Value],
89 ) -> InlineCommand<'_>;
90 }
91
92 impl<'a, T> Inline for &'a mut T
93 where
94 T: Inline,
95 {
inline( &mut self, caller: &ir::Function, inst: ir::Inst, opcode: ir::Opcode, callee: ir::FuncRef, args: &[ir::Value], ) -> InlineCommand<'_>96 fn inline(
97 &mut self,
98 caller: &ir::Function,
99 inst: ir::Inst,
100 opcode: ir::Opcode,
101 callee: ir::FuncRef,
102 args: &[ir::Value],
103 ) -> InlineCommand<'_> {
104 (*self).inline(caller, inst, opcode, callee, args)
105 }
106 }
107
108 /// Walk the given function, invoke the `Inline` implementation for each call
109 /// instruction, and inline the callee when directed to do so.
110 ///
111 /// Returns whether any call was inlined.
do_inlining( func: &mut ir::Function, mut inliner: impl Inline, ) -> CodegenResult<bool>112 pub(crate) fn do_inlining(
113 func: &mut ir::Function,
114 mut inliner: impl Inline,
115 ) -> CodegenResult<bool> {
116 trace!("function {} before inlining: {}", func.name, func);
117
118 let mut inlined_any = false;
119 let mut allocs = InliningAllocs::default();
120
121 let mut cursor = FuncCursor::new(func);
122 'block_loop: while let Some(block) = cursor.next_block() {
123 // Always keep track of our previous cursor position. Assuming that the
124 // current position is a function call that we will inline, then the
125 // previous position is just before the inlined callee function. After
126 // inlining a call, the Cranelift user can decide whether to consider
127 // any function calls in the inlined callee for further inlining or
128 // not. When they do, then we back up to this previous cursor position
129 // so that our traversal will then continue over the inlined body.
130 let mut prev_pos;
131
132 while let Some(inst) = {
133 prev_pos = cursor.position();
134 cursor.next_inst()
135 } {
136 // Make sure that `block` is always `inst`'s block, even with all of
137 // our cursor-position-updating and block-splitting-during-inlining
138 // shenanigans below.
139 debug_assert_eq!(Some(block), cursor.func.layout.inst_block(inst));
140
141 match cursor.func.dfg.insts[inst] {
142 ir::InstructionData::Call { func_ref, .. }
143 if cursor.func.dfg.ext_funcs[func_ref].patchable =>
144 {
145 // Can't inline patchable calls; they need to
146 // remain patchable and inlining the whole body is
147 // decidedly *not* patchable!
148 }
149
150 ir::InstructionData::Call {
151 opcode: opcode @ ir::Opcode::Call | opcode @ ir::Opcode::ReturnCall,
152 args: _,
153 func_ref,
154 } => {
155 trace!(
156 "considering call site for inlining: {inst}: {}",
157 cursor.func.dfg.display_inst(inst),
158 );
159 let args = cursor.func.dfg.inst_args(inst);
160 match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
161 InlineCommand::KeepCall => {
162 trace!(" --> keeping call");
163 }
164 InlineCommand::Inline {
165 callee,
166 visit_callee,
167 } => {
168 let last_inlined_block = inline_one(
169 &mut allocs,
170 cursor.func,
171 func_ref,
172 block,
173 inst,
174 opcode,
175 &callee,
176 None,
177 );
178 inlined_any = true;
179 if visit_callee {
180 cursor.set_position(prev_pos);
181 } else {
182 // Arrange it so that the `next_block()` loop
183 // will continue to the next block that is not
184 // associated with the just-inlined callee.
185 cursor.goto_bottom(last_inlined_block);
186 continue 'block_loop;
187 }
188 }
189 }
190 }
191 ir::InstructionData::TryCall {
192 opcode: opcode @ ir::Opcode::TryCall,
193 args: _,
194 func_ref,
195 exception,
196 } => {
197 trace!(
198 "considering call site for inlining: {inst}: {}",
199 cursor.func.dfg.display_inst(inst),
200 );
201 let args = cursor.func.dfg.inst_args(inst);
202 match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
203 InlineCommand::KeepCall => {
204 trace!(" --> keeping call");
205 }
206 InlineCommand::Inline {
207 callee,
208 visit_callee,
209 } => {
210 let last_inlined_block = inline_one(
211 &mut allocs,
212 cursor.func,
213 func_ref,
214 block,
215 inst,
216 opcode,
217 &callee,
218 Some(exception),
219 );
220 inlined_any = true;
221 if visit_callee {
222 cursor.set_position(prev_pos);
223 } else {
224 // Arrange it so that the `next_block()` loop
225 // will continue to the next block that is not
226 // associated with the just-inlined callee.
227 cursor.goto_bottom(last_inlined_block);
228 continue 'block_loop;
229 }
230 }
231 }
232 }
233 ir::InstructionData::CallIndirect { .. }
234 | ir::InstructionData::TryCallIndirect { .. } => {
235 // Can't inline indirect calls; need to have some earlier
236 // pass rewrite them into direct calls first, when possible.
237 }
238 _ => {
239 debug_assert!(
240 !cursor.func.dfg.insts[inst].opcode().is_call(),
241 "should have matched all call instructions, but found: {inst}: {}",
242 cursor.func.dfg.display_inst(inst),
243 );
244 }
245 }
246 }
247 }
248
249 if inlined_any {
250 trace!("function {} after inlining: {}", func.name, func);
251 } else {
252 trace!("function {} did not have any callees inlined", func.name);
253 }
254
255 Ok(inlined_any)
256 }
257
258 #[derive(Default)]
259 struct InliningAllocs {
260 /// Map from callee value to inlined caller value.
261 values: SecondaryMap<ir::Value, PackedOption<ir::Value>>,
262
263 /// Map from callee constant to inlined caller constant.
264 ///
265 /// Not in `EntityMap` because these are hash-consed inside the
266 /// `ir::Function`.
267 constants: SecondaryMap<ir::Constant, PackedOption<ir::Constant>>,
268
269 /// Map from callee to inlined caller external name refs.
270 ///
271 /// Not in `EntityMap` because these are hash-consed inside the
272 /// `ir::Function`.
273 user_external_name_refs:
274 SecondaryMap<ir::UserExternalNameRef, PackedOption<ir::UserExternalNameRef>>,
275
276 /// The set of _caller_ inlined call instructions that need exception table
277 /// fixups at the end of inlining.
278 ///
279 /// This includes all kinds of non-returning calls, not just the literal
280 /// `call` instruction: `call_indirect`, `try_call`, `try_call_indirect`,
281 /// etc... However, it does not include `return_call` and
282 /// `return_call_indirect` instructions because the caller cannot catch
283 /// exceptions that those calls throw because the caller is no longer on the
284 /// stack as soon as they are executed.
285 ///
286 /// Note: this is a simple `Vec`, and not an `EntitySet`, because it is very
287 /// sparse: most of the caller's instructions are not inlined call
288 /// instructions. Additionally, we require deterministic iteration order and
289 /// do not require set-membership testing, so a hash set is not a good
290 /// choice either.
291 calls_needing_exception_table_fixup: Vec<ir::Inst>,
292 }
293
294 impl InliningAllocs {
reset(&mut self, callee: &ir::Function)295 fn reset(&mut self, callee: &ir::Function) {
296 let InliningAllocs {
297 values,
298 constants,
299 user_external_name_refs,
300 calls_needing_exception_table_fixup,
301 } = self;
302
303 values.clear();
304 values.resize(callee.dfg.len_values());
305
306 constants.clear();
307 constants.resize(callee.dfg.constants.len());
308
309 user_external_name_refs.clear();
310 user_external_name_refs.resize(callee.params.user_named_funcs().len());
311
312 // Note: We do not reserve capacity for
313 // `calls_needing_exception_table_fixup` because it is a sparse set and
314 // we don't know how large it needs to be ahead of time.
315 calls_needing_exception_table_fixup.clear();
316 }
317
set_inlined_value( &mut self, callee: &ir::Function, callee_val: ir::Value, inlined_val: ir::Value, )318 fn set_inlined_value(
319 &mut self,
320 callee: &ir::Function,
321 callee_val: ir::Value,
322 inlined_val: ir::Value,
323 ) {
324 trace!(" --> callee {callee_val:?} = inlined {inlined_val:?}");
325 debug_assert!(self.values[callee_val].is_none());
326 let resolved_callee_val = callee.dfg.resolve_aliases(callee_val);
327 debug_assert!(self.values[resolved_callee_val].is_none());
328 self.values[resolved_callee_val] = Some(inlined_val).into();
329 }
330
get_inlined_value(&self, callee: &ir::Function, callee_val: ir::Value) -> Option<ir::Value>331 fn get_inlined_value(&self, callee: &ir::Function, callee_val: ir::Value) -> Option<ir::Value> {
332 let resolved_callee_val = callee.dfg.resolve_aliases(callee_val);
333 self.values[resolved_callee_val].expand()
334 }
335 }
336
337 /// Inline one particular function call.
338 ///
339 /// Returns the last inlined block in the layout.
inline_one( allocs: &mut InliningAllocs, func: &mut ir::Function, callee_func_ref: ir::FuncRef, call_block: ir::Block, call_inst: ir::Inst, call_opcode: ir::Opcode, callee: &ir::Function, call_exception_table: Option<ir::ExceptionTable>, ) -> ir::Block340 fn inline_one(
341 allocs: &mut InliningAllocs,
342 func: &mut ir::Function,
343 callee_func_ref: ir::FuncRef,
344 call_block: ir::Block,
345 call_inst: ir::Inst,
346 call_opcode: ir::Opcode,
347 callee: &ir::Function,
348 call_exception_table: Option<ir::ExceptionTable>,
349 ) -> ir::Block {
350 trace!(
351 "Inlining call {call_inst:?}: {}\n\
352 with callee = {callee:?}",
353 func.dfg.display_inst(call_inst)
354 );
355
356 // Type check callee signature.
357 let expected_callee_sig = func.dfg.ext_funcs[callee_func_ref].signature;
358 let expected_callee_sig = &func.dfg.signatures[expected_callee_sig];
359 assert_eq!(expected_callee_sig, &callee.signature);
360
361 allocs.reset(callee);
362
363 // First, append various callee entity arenas to the end of the caller's
364 // entity arenas.
365 let entity_map = create_entities(allocs, func, callee);
366
367 // Inlined prologue: split the call instruction's block at the point of the
368 // call and replace the call with a jump.
369 let return_block = split_off_return_block(func, call_inst, call_opcode, callee);
370 let call_stack_map = replace_call_with_jump(allocs, func, call_inst, callee, &entity_map);
371
372 // Prepare for translating the actual instructions by inserting the inlined
373 // blocks into the caller's layout in the same order that they appear in the
374 // callee.
375 let mut last_inlined_block = inline_block_layout(func, call_block, callee, &entity_map);
376
377 // Get a copy of debug tags on the call instruction; these are
378 // prepended to debug tags on inlined instructions. Remove them
379 // from the call itself as it will be rewritten to a jump (which
380 // cannot have tags).
381 let call_debug_tags = func.debug_tags.get(call_inst).to_vec();
382 func.debug_tags.set(call_inst, []);
383
384 // Translate each instruction from the callee into the caller,
385 // appending them to their associated block in the caller.
386 //
387 // Note that we iterate over the callee with a pre-order traversal so that
388 // we see value defs before uses.
389 for callee_block in Dfs::new().pre_order_iter(callee) {
390 let inlined_block = entity_map.inlined_block(callee_block);
391 trace!(
392 "Processing instructions in callee block {callee_block:?} (inlined block {inlined_block:?}"
393 );
394
395 let mut next_callee_inst = callee.layout.first_inst(callee_block);
396 while let Some(callee_inst) = next_callee_inst {
397 trace!(
398 "Processing callee instruction {callee_inst:?}: {}",
399 callee.dfg.display_inst(callee_inst)
400 );
401
402 assert_ne!(
403 callee.dfg.insts[callee_inst].opcode(),
404 ir::Opcode::GlobalValue,
405 "callee must already be legalized, we shouldn't see any `global_value` \
406 instructions when inlining; found {callee_inst:?}: {}",
407 callee.dfg.display_inst(callee_inst)
408 );
409
410 // Remap the callee instruction's entities and insert it into the
411 // caller's DFG.
412 let inlined_inst_data = callee.dfg.insts[callee_inst].map(InliningInstRemapper {
413 allocs: &allocs,
414 func,
415 callee,
416 entity_map: &entity_map,
417 });
418 let inlined_inst = func.dfg.make_inst(inlined_inst_data);
419 func.layout.append_inst(inlined_inst, inlined_block);
420
421 // Copy over debug tags, translating referenced entities
422 // as appropriate.
423 let debug_tags = callee.debug_tags.get(callee_inst);
424 // If there are tags on the inlined instruction, we always
425 // add tags, and we prepend any tags from the call
426 // instruction; but we don't add tags if only the callsite
427 // had them (this would otherwise mean that every single
428 // instruction in an inlined function body would get
429 // tags).
430 if !debug_tags.is_empty() {
431 let tags = call_debug_tags
432 .iter()
433 .cloned()
434 .chain(debug_tags.iter().map(|tag| match *tag {
435 DebugTag::User(value) => DebugTag::User(value),
436 DebugTag::StackSlot(slot) => {
437 DebugTag::StackSlot(entity_map.inlined_stack_slot(slot))
438 }
439 }))
440 .collect::<SmallVec<[_; 4]>>();
441 func.debug_tags.set(inlined_inst, tags);
442 }
443
444 let opcode = callee.dfg.insts[callee_inst].opcode();
445 if opcode.is_return() {
446 // Instructions that return do not define any values, so we
447 // don't need to worry about that, but we do need to fix them up
448 // so that they return by jumping to our control-flow join
449 // block, rather than returning from the caller.
450 if let Some(return_block) = return_block {
451 fixup_inst_that_returns(
452 allocs,
453 func,
454 callee,
455 &entity_map,
456 call_opcode,
457 inlined_inst,
458 callee_inst,
459 return_block,
460 call_stack_map.as_ref().map(|es| &**es),
461 );
462 } else {
463 // If we are inlining a callee that was invoked via
464 // `return_call`, we leave inlined return instructions
465 // as-is: there is no logical caller frame on the stack to
466 // continue to.
467 debug_assert_eq!(call_opcode, ir::Opcode::ReturnCall);
468 }
469 } else {
470 // Make the instruction's result values.
471 let ctrl_typevar = callee.dfg.ctrl_typevar(callee_inst);
472 func.dfg.make_inst_results(inlined_inst, ctrl_typevar);
473
474 // Update the value map for this instruction's defs.
475 let callee_results = callee.dfg.inst_results(callee_inst);
476 let inlined_results = func.dfg.inst_results(inlined_inst);
477 debug_assert_eq!(callee_results.len(), inlined_results.len());
478 for (callee_val, inlined_val) in callee_results.iter().zip(inlined_results) {
479 allocs.set_inlined_value(callee, *callee_val, *inlined_val);
480 }
481
482 if opcode.is_call() {
483 append_stack_map_entries(
484 func,
485 callee,
486 &entity_map,
487 call_stack_map.as_deref(),
488 inlined_inst,
489 callee_inst,
490 );
491
492 // When we are inlining a `try_call` call site, we need to merge
493 // the call site's exception table into the inlined calls'
494 // exception tables. This can involve rewriting regular `call`s
495 // into `try_call`s, which requires mutating the CFG because
496 // `try_call` is a block terminator. However, we can't mutate
497 // the CFG in the middle of this traversal because we rely on
498 // the existence of a one-to-one mapping between the callee
499 // layout and the inlined layout. Instead, we record the set of
500 // inlined call instructions that will need fixing up, and
501 // perform that possibly-CFG-mutating exception table merging in
502 // a follow up pass, when we no longer rely on that one-to-one
503 // layout mapping.
504 debug_assert_eq!(
505 call_opcode == ir::Opcode::TryCall,
506 call_exception_table.is_some()
507 );
508 if call_opcode == ir::Opcode::TryCall {
509 allocs
510 .calls_needing_exception_table_fixup
511 .push(inlined_inst);
512 }
513 }
514 }
515
516 trace!(
517 " --> inserted inlined instruction {inlined_inst:?}: {}",
518 func.dfg.display_inst(inlined_inst)
519 );
520
521 next_callee_inst = callee.layout.next_inst(callee_inst);
522 }
523 }
524
525 // We copied *all* callee blocks into the caller's layout, but only copied
526 // the callee instructions in *reachable* callee blocks into the caller's
527 // associated blocks. Therefore, any *unreachable* blocks are empty in the
528 // caller, which is invalid CLIF because all blocks must end in a
529 // terminator, so do a quick pass over the inlined blocks and remove any
530 // empty blocks from the caller's layout.
531 for block in entity_map.iter_inlined_blocks(func) {
532 if func.layout.is_block_inserted(block) && func.layout.first_inst(block).is_none() {
533 log::trace!("removing unreachable inlined block from layout: {block}");
534
535 // If the block being removed is our last-inlined block, then back
536 // it up to the previous block in the layout, which will be the new
537 // last-inlined block after this one's removal.
538 if block == last_inlined_block {
539 last_inlined_block = func.layout.prev_block(last_inlined_block).expect(
540 "there will always at least be the block that contained the call we are \
541 inlining",
542 );
543 }
544
545 func.layout.remove_block(block);
546 }
547 }
548
549 // Final step: fixup the exception tables of any inlined calls when we are
550 // inlining a `try_call` site.
551 //
552 // Subtly, this requires rewriting non-catching `call[_indirect]`
553 // instructions into `try_call[_indirect]` instructions so that exceptions
554 // that unwound through the original callee frame and were caught by the
555 // caller's `try_call` do not unwind past this inlined frame. And turning a
556 // `call` into a `try_call` mutates the CFG, breaking our one-to-one mapping
557 // between callee blocks and inlined blocks, so we delay these fixups to
558 // this final step, when we no longer rely on that mapping.
559 debug_assert!(
560 allocs.calls_needing_exception_table_fixup.is_empty() || call_exception_table.is_some()
561 );
562 debug_assert_eq!(
563 call_opcode == ir::Opcode::TryCall,
564 call_exception_table.is_some()
565 );
566 if let Some(call_exception_table) = call_exception_table {
567 fixup_inlined_call_exception_tables(allocs, func, call_exception_table);
568 }
569
570 debug_assert!(
571 func.layout.is_block_inserted(last_inlined_block),
572 "last_inlined_block={last_inlined_block} should be inserted in the layout"
573 );
574 last_inlined_block
575 }
576
577 /// Append stack map entries from the caller and callee to the given inlined
578 /// instruction.
append_stack_map_entries( func: &mut ir::Function, callee: &ir::Function, entity_map: &EntityMap, call_stack_map: Option<&[ir::UserStackMapEntry]>, inlined_inst: ir::Inst, callee_inst: ir::Inst, )579 fn append_stack_map_entries(
580 func: &mut ir::Function,
581 callee: &ir::Function,
582 entity_map: &EntityMap,
583 call_stack_map: Option<&[ir::UserStackMapEntry]>,
584 inlined_inst: ir::Inst,
585 callee_inst: ir::Inst,
586 ) {
587 // Add the caller's stack map to this call. These entries
588 // already refer to caller entities and do not need further
589 // translation.
590 func.dfg.append_user_stack_map_entries(
591 inlined_inst,
592 call_stack_map
593 .iter()
594 .flat_map(|entries| entries.iter().cloned()),
595 );
596
597 // Append the callee's stack map to this call. These entries
598 // refer to callee entities and therefore do require
599 // translation into the caller's index space.
600 func.dfg.append_user_stack_map_entries(
601 inlined_inst,
602 callee
603 .dfg
604 .user_stack_map_entries(callee_inst)
605 .iter()
606 .flat_map(|entries| entries.iter())
607 .map(|entry| ir::UserStackMapEntry {
608 ty: entry.ty,
609 slot: entity_map.inlined_stack_slot(entry.slot),
610 offset: entry.offset,
611 }),
612 );
613 }
614
615 /// Create or update the exception tables for any inlined call instructions:
616 /// when inlining at a `try_call` site, we must forward our exceptional edges
617 /// into each inlined call instruction.
fixup_inlined_call_exception_tables( allocs: &mut InliningAllocs, func: &mut ir::Function, call_exception_table: ir::ExceptionTable, )618 fn fixup_inlined_call_exception_tables(
619 allocs: &mut InliningAllocs,
620 func: &mut ir::Function,
621 call_exception_table: ir::ExceptionTable,
622 ) {
623 // Split a block at a `call[_indirect]` instruction, detach the
624 // instruction's results, and alias them to the new block's parameters.
625 let split_block_for_new_try_call = |func: &mut ir::Function, inst: ir::Inst| -> ir::Block {
626 debug_assert!(func.dfg.insts[inst].opcode().is_call());
627 debug_assert!(!func.dfg.insts[inst].opcode().is_terminator());
628
629 // Split the block.
630 let next_inst = func
631 .layout
632 .next_inst(inst)
633 .expect("inst is not a terminator, should have a successor");
634 let new_block = func.dfg.blocks.add();
635 func.layout.split_block(new_block, next_inst);
636
637 // `try_call[_indirect]` instructions do not define values themselves;
638 // the normal-return block has parameters for the results. So remove
639 // this instruction's results, create an associated block parameter for
640 // each of them, and alias them to the new block parameter.
641 let old_results = SmallValueVec::from_iter(func.dfg.inst_results(inst).iter().copied());
642 func.dfg.detach_inst_results(inst);
643 for old_result in old_results {
644 let ty = func.dfg.value_type(old_result);
645 let new_block_param = func.dfg.append_block_param(new_block, ty);
646 func.dfg.change_to_alias(old_result, new_block_param);
647 }
648
649 new_block
650 };
651
652 // Clone the caller's exception table, updating it for use in the current
653 // `call[_indirect]` instruction as it becomes a `try_call[_indirect]`.
654 let clone_exception_table_for_this_call = |func: &mut ir::Function,
655 signature: ir::SigRef,
656 new_block: ir::Block|
657 -> ir::ExceptionTable {
658 let mut exception = func.stencil.dfg.exception_tables[call_exception_table]
659 .deep_clone(&mut func.stencil.dfg.value_lists);
660
661 *exception.signature_mut() = signature;
662
663 let returns_len = func.dfg.signatures[signature].returns.len();
664 let returns_len = u32::try_from(returns_len).unwrap();
665
666 *exception.normal_return_mut() = ir::BlockCall::new(
667 new_block,
668 (0..returns_len).map(|i| ir::BlockArg::TryCallRet(i)),
669 &mut func.dfg.value_lists,
670 );
671
672 func.dfg.exception_tables.push(exception)
673 };
674
675 for inst in allocs.calls_needing_exception_table_fixup.drain(..) {
676 debug_assert!(func.dfg.insts[inst].opcode().is_call());
677 debug_assert!(!func.dfg.insts[inst].opcode().is_return());
678 match func.dfg.insts[inst] {
679 // current_block:
680 // preds...
681 // rets... = call f(args...)
682 // succs...
683 //
684 // becomes
685 //
686 // current_block:
687 // preds...
688 // try_call f(args...), new_block(rets...), [call_exception_table...]
689 // new_block(rets...):
690 // succs...
691 ir::InstructionData::Call {
692 opcode: ir::Opcode::Call,
693 args,
694 func_ref,
695 } => {
696 let new_block = split_block_for_new_try_call(func, inst);
697 let signature = func.dfg.ext_funcs[func_ref].signature;
698 let exception = clone_exception_table_for_this_call(func, signature, new_block);
699 func.dfg.insts[inst] = ir::InstructionData::TryCall {
700 opcode: ir::Opcode::TryCall,
701 args,
702 func_ref,
703 exception,
704 };
705 }
706
707 // current_block:
708 // preds...
709 // rets... = call_indirect sig, val(args...)
710 // succs...
711 //
712 // becomes
713 //
714 // current_block:
715 // preds...
716 // try_call_indirect sig, val(args...), new_block(rets...), [call_exception_table...]
717 // new_block(rets...):
718 // succs...
719 ir::InstructionData::CallIndirect {
720 opcode: ir::Opcode::CallIndirect,
721 args,
722 sig_ref,
723 } => {
724 let new_block = split_block_for_new_try_call(func, inst);
725 let exception = clone_exception_table_for_this_call(func, sig_ref, new_block);
726 func.dfg.insts[inst] = ir::InstructionData::TryCallIndirect {
727 opcode: ir::Opcode::TryCallIndirect,
728 args,
729 exception,
730 };
731 }
732
733 // For `try_call[_indirect]` instructions, we just need to merge the
734 // exception tables.
735 ir::InstructionData::TryCall {
736 opcode: ir::Opcode::TryCall,
737 exception,
738 ..
739 }
740 | ir::InstructionData::TryCallIndirect {
741 opcode: ir::Opcode::TryCallIndirect,
742 exception,
743 ..
744 } => {
745 // Construct a new exception table that consists of
746 // the inlined instruction's exception table match
747 // sequence, with the inlining site's exception table
748 // appended. This will ensure that the first-match
749 // semantics emulates the original behavior of
750 // matching in the inner frame first.
751 let sig = func.dfg.exception_tables[exception].signature();
752 let normal_return = *func.dfg.exception_tables[exception].normal_return();
753 let exception_data = ExceptionTableData::new(
754 sig,
755 normal_return,
756 func.dfg.exception_tables[exception]
757 .items()
758 .chain(func.dfg.exception_tables[call_exception_table].items()),
759 )
760 .deep_clone(&mut func.dfg.value_lists);
761
762 func.dfg.exception_tables[exception] = exception_data;
763 }
764
765 otherwise => unreachable!("unknown non-return call instruction: {otherwise:?}"),
766 }
767 }
768 }
769
770 /// After having created an inlined version of a callee instruction that returns
771 /// in the caller, we need to fix it up so that it doesn't actually return
772 /// (since we are already in the caller's frame) and instead just jumps to the
773 /// control-flow join point.
fixup_inst_that_returns( allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function, entity_map: &EntityMap, call_opcode: ir::Opcode, inlined_inst: ir::Inst, callee_inst: ir::Inst, return_block: ir::Block, call_stack_map: Option<&[ir::UserStackMapEntry]>, )774 fn fixup_inst_that_returns(
775 allocs: &mut InliningAllocs,
776 func: &mut ir::Function,
777 callee: &ir::Function,
778 entity_map: &EntityMap,
779 call_opcode: ir::Opcode,
780 inlined_inst: ir::Inst,
781 callee_inst: ir::Inst,
782 return_block: ir::Block,
783 call_stack_map: Option<&[ir::UserStackMapEntry]>,
784 ) {
785 debug_assert!(func.dfg.insts[inlined_inst].opcode().is_return());
786 match func.dfg.insts[inlined_inst] {
787 // return rets...
788 //
789 // becomes
790 //
791 // jump return_block(rets...)
792 ir::InstructionData::MultiAry {
793 opcode: ir::Opcode::Return,
794 args,
795 } => {
796 let rets = SmallBlockArgVec::from_iter(
797 args.as_slice(&func.dfg.value_lists)
798 .iter()
799 .copied()
800 .map(|v| v.into()),
801 );
802 func.dfg.replace(inlined_inst).jump(return_block, &rets);
803 }
804
805 // return_call f(args...)
806 //
807 // becomes
808 //
809 // rets... = call f(args...)
810 // jump return_block(rets...)
811 ir::InstructionData::Call {
812 opcode: ir::Opcode::ReturnCall,
813 args,
814 func_ref,
815 } => {
816 func.dfg.insts[inlined_inst] = ir::InstructionData::Call {
817 opcode: ir::Opcode::Call,
818 args,
819 func_ref,
820 };
821 func.dfg.make_inst_results(inlined_inst, ir::types::INVALID);
822
823 append_stack_map_entries(
824 func,
825 callee,
826 &entity_map,
827 call_stack_map,
828 inlined_inst,
829 callee_inst,
830 );
831
832 let rets = SmallBlockArgVec::from_iter(
833 func.dfg
834 .inst_results(inlined_inst)
835 .iter()
836 .copied()
837 .map(|v| v.into()),
838 );
839 let mut cursor = FuncCursor::new(func);
840 cursor.goto_after_inst(inlined_inst);
841 cursor.ins().jump(return_block, &rets);
842
843 if call_opcode == ir::Opcode::TryCall {
844 allocs
845 .calls_needing_exception_table_fixup
846 .push(inlined_inst);
847 }
848 }
849
850 // return_call_indirect val(args...)
851 //
852 // becomes
853 //
854 // rets... = call_indirect val(args...)
855 // jump return_block(rets...)
856 ir::InstructionData::CallIndirect {
857 opcode: ir::Opcode::ReturnCallIndirect,
858 args,
859 sig_ref,
860 } => {
861 func.dfg.insts[inlined_inst] = ir::InstructionData::CallIndirect {
862 opcode: ir::Opcode::CallIndirect,
863 args,
864 sig_ref,
865 };
866 func.dfg.make_inst_results(inlined_inst, ir::types::INVALID);
867
868 append_stack_map_entries(
869 func,
870 callee,
871 &entity_map,
872 call_stack_map,
873 inlined_inst,
874 callee_inst,
875 );
876
877 let rets = SmallBlockArgVec::from_iter(
878 func.dfg
879 .inst_results(inlined_inst)
880 .iter()
881 .copied()
882 .map(|v| v.into()),
883 );
884 let mut cursor = FuncCursor::new(func);
885 cursor.goto_after_inst(inlined_inst);
886 cursor.ins().jump(return_block, &rets);
887
888 if call_opcode == ir::Opcode::TryCall {
889 allocs
890 .calls_needing_exception_table_fixup
891 .push(inlined_inst);
892 }
893 }
894
895 inst_data => unreachable!(
896 "should have handled all `is_return() == true` instructions above; \
897 got {inst_data:?}"
898 ),
899 }
900 }
901
902 /// An `InstructionMapper` implementation that remaps a callee instruction's
903 /// entity references to their new indices in the caller function.
904 struct InliningInstRemapper<'a> {
905 allocs: &'a InliningAllocs,
906 func: &'a mut ir::Function,
907 callee: &'a ir::Function,
908 entity_map: &'a EntityMap,
909 }
910
911 impl<'a> ir::instructions::InstructionMapper for InliningInstRemapper<'a> {
map_value(&mut self, value: ir::Value) -> ir::Value912 fn map_value(&mut self, value: ir::Value) -> ir::Value {
913 self.allocs.get_inlined_value(self.callee, value).expect(
914 "defs come before uses; we should have already inlined all values \
915 used by an instruction",
916 )
917 }
918
map_value_list(&mut self, value_list: ir::ValueList) -> ir::ValueList919 fn map_value_list(&mut self, value_list: ir::ValueList) -> ir::ValueList {
920 let mut inlined_list = ir::ValueList::new();
921 for callee_val in value_list.as_slice(&self.callee.dfg.value_lists) {
922 let inlined_val = self.map_value(*callee_val);
923 inlined_list.push(inlined_val, &mut self.func.dfg.value_lists);
924 }
925 inlined_list
926 }
927
map_global_value(&mut self, global_value: ir::GlobalValue) -> ir::GlobalValue928 fn map_global_value(&mut self, global_value: ir::GlobalValue) -> ir::GlobalValue {
929 self.entity_map.inlined_global_value(global_value)
930 }
931
map_jump_table(&mut self, jump_table: ir::JumpTable) -> ir::JumpTable932 fn map_jump_table(&mut self, jump_table: ir::JumpTable) -> ir::JumpTable {
933 let inlined_default =
934 self.map_block_call(self.callee.dfg.jump_tables[jump_table].default_block());
935 let inlined_table = self.callee.dfg.jump_tables[jump_table]
936 .as_slice()
937 .iter()
938 .map(|callee_block_call| self.map_block_call(*callee_block_call))
939 .collect::<SmallBlockCallVec>();
940 self.func
941 .dfg
942 .jump_tables
943 .push(ir::JumpTableData::new(inlined_default, &inlined_table))
944 }
945
map_exception_table(&mut self, exception_table: ir::ExceptionTable) -> ir::ExceptionTable946 fn map_exception_table(&mut self, exception_table: ir::ExceptionTable) -> ir::ExceptionTable {
947 let exception_table = &self.callee.dfg.exception_tables[exception_table];
948 let inlined_sig_ref = self.map_sig_ref(exception_table.signature());
949 let inlined_normal_return = self.map_block_call(*exception_table.normal_return());
950 let inlined_table = exception_table
951 .items()
952 .map(|item| match item {
953 ExceptionTableItem::Tag(tag, block_call) => {
954 ExceptionTableItem::Tag(tag, self.map_block_call(block_call))
955 }
956 ExceptionTableItem::Default(block_call) => {
957 ExceptionTableItem::Default(self.map_block_call(block_call))
958 }
959 ExceptionTableItem::Context(value) => {
960 ExceptionTableItem::Context(self.map_value(value))
961 }
962 })
963 .collect::<SmallVec<[_; 8]>>();
964 self.func
965 .dfg
966 .exception_tables
967 .push(ir::ExceptionTableData::new(
968 inlined_sig_ref,
969 inlined_normal_return,
970 inlined_table,
971 ))
972 }
973
map_block_call(&mut self, block_call: ir::BlockCall) -> ir::BlockCall974 fn map_block_call(&mut self, block_call: ir::BlockCall) -> ir::BlockCall {
975 let callee_block = block_call.block(&self.callee.dfg.value_lists);
976 let inlined_block = self.entity_map.inlined_block(callee_block);
977 let args = block_call
978 .args(&self.callee.dfg.value_lists)
979 .map(|arg| match arg {
980 ir::BlockArg::Value(value) => self.map_value(value).into(),
981 ir::BlockArg::TryCallRet(_) | ir::BlockArg::TryCallExn(_) => arg,
982 })
983 .collect::<SmallBlockArgVec>();
984 ir::BlockCall::new(inlined_block, args, &mut self.func.dfg.value_lists)
985 }
986
map_block(&mut self, block: ir::Block) -> ir::Block987 fn map_block(&mut self, block: ir::Block) -> ir::Block {
988 self.entity_map.inlined_block(block)
989 }
990
map_func_ref(&mut self, func_ref: ir::FuncRef) -> ir::FuncRef991 fn map_func_ref(&mut self, func_ref: ir::FuncRef) -> ir::FuncRef {
992 self.entity_map.inlined_func_ref(func_ref)
993 }
994
map_sig_ref(&mut self, sig_ref: ir::SigRef) -> ir::SigRef995 fn map_sig_ref(&mut self, sig_ref: ir::SigRef) -> ir::SigRef {
996 self.entity_map.inlined_sig_ref(sig_ref)
997 }
998
map_stack_slot(&mut self, stack_slot: ir::StackSlot) -> ir::StackSlot999 fn map_stack_slot(&mut self, stack_slot: ir::StackSlot) -> ir::StackSlot {
1000 self.entity_map.inlined_stack_slot(stack_slot)
1001 }
1002
map_dynamic_stack_slot( &mut self, dynamic_stack_slot: ir::DynamicStackSlot, ) -> ir::DynamicStackSlot1003 fn map_dynamic_stack_slot(
1004 &mut self,
1005 dynamic_stack_slot: ir::DynamicStackSlot,
1006 ) -> ir::DynamicStackSlot {
1007 self.entity_map
1008 .inlined_dynamic_stack_slot(dynamic_stack_slot)
1009 }
1010
map_constant(&mut self, constant: ir::Constant) -> ir::Constant1011 fn map_constant(&mut self, constant: ir::Constant) -> ir::Constant {
1012 self.allocs
1013 .constants
1014 .get(constant)
1015 .and_then(|o| o.expand())
1016 .expect("should have inlined all callee constants")
1017 }
1018
map_immediate(&mut self, immediate: ir::Immediate) -> ir::Immediate1019 fn map_immediate(&mut self, immediate: ir::Immediate) -> ir::Immediate {
1020 self.entity_map.inlined_immediate(immediate)
1021 }
1022 }
1023
1024 /// Inline the callee's layout into the caller's layout.
1025 ///
1026 /// Returns the last inlined block in the layout.
inline_block_layout( func: &mut ir::Function, call_block: ir::Block, callee: &ir::Function, entity_map: &EntityMap, ) -> ir::Block1027 fn inline_block_layout(
1028 func: &mut ir::Function,
1029 call_block: ir::Block,
1030 callee: &ir::Function,
1031 entity_map: &EntityMap,
1032 ) -> ir::Block {
1033 debug_assert!(func.layout.is_block_inserted(call_block));
1034
1035 // Iterate over callee blocks in layout order, inserting their associated
1036 // inlined block into the caller's layout.
1037 let mut prev_inlined_block = call_block;
1038 let mut next_callee_block = callee.layout.entry_block();
1039 while let Some(callee_block) = next_callee_block {
1040 debug_assert!(func.layout.is_block_inserted(prev_inlined_block));
1041
1042 let inlined_block = entity_map.inlined_block(callee_block);
1043 func.layout
1044 .insert_block_after(inlined_block, prev_inlined_block);
1045
1046 prev_inlined_block = inlined_block;
1047 next_callee_block = callee.layout.next_block(callee_block);
1048 }
1049
1050 debug_assert!(func.layout.is_block_inserted(prev_inlined_block));
1051 prev_inlined_block
1052 }
1053
1054 /// Split the call instruction's block just after the call instruction to create
1055 /// the point where control-flow joins after the inlined callee "returns".
1056 ///
1057 /// Note that tail calls do not return to the caller and therefore do not have a
1058 /// control-flow join point.
split_off_return_block( func: &mut ir::Function, call_inst: ir::Inst, opcode: ir::Opcode, callee: &ir::Function, ) -> Option<ir::Block>1059 fn split_off_return_block(
1060 func: &mut ir::Function,
1061 call_inst: ir::Inst,
1062 opcode: ir::Opcode,
1063 callee: &ir::Function,
1064 ) -> Option<ir::Block> {
1065 // When the `call_inst` is not a block terminator, we need to split the
1066 // block.
1067 let return_block = func.layout.next_inst(call_inst).map(|next_inst| {
1068 let return_block = func.dfg.blocks.add();
1069 func.layout.split_block(return_block, next_inst);
1070
1071 // Add block parameters for each return value and alias the call
1072 // instruction's results to them.
1073 let old_results =
1074 SmallValueVec::from_iter(func.dfg.inst_results(call_inst).iter().copied());
1075 debug_assert_eq!(old_results.len(), callee.signature.returns.len());
1076 func.dfg.detach_inst_results(call_inst);
1077 for (abi, old_val) in callee.signature.returns.iter().zip(old_results) {
1078 debug_assert_eq!(abi.value_type, func.dfg.value_type(old_val));
1079 let ret_param = func.dfg.append_block_param(return_block, abi.value_type);
1080 func.dfg.change_to_alias(old_val, ret_param);
1081 }
1082
1083 return_block
1084 });
1085
1086 // When the `call_inst` is a block terminator, then it is either a
1087 // `return_call` or a `try_call`:
1088 //
1089 // * For `return_call`s, we don't have a control-flow join point, because
1090 // the caller permanently transfers control to the callee.
1091 //
1092 // * For `try_call`s, we probably already have a block for the control-flow
1093 // join point, but it isn't guaranteed: the `try_call` might ignore the
1094 // call's returns and not forward them to the normal-return block or it
1095 // might also pass additional arguments. We can only reuse the existing
1096 // normal-return block when the `try_call` forwards exactly our callee's
1097 // returns to that block (and therefore that block's parameter types also
1098 // exactly match the callee's return types). Otherwise, we must create a new
1099 // return block that forwards to the existing normal-return
1100 // block. (Elsewhere, at the end of inlining, we will also update any inlined
1101 // calls to forward any raised exceptions to the caller's exception table,
1102 // as necessary.)
1103 //
1104 // Finally, note that reusing the normal-return's target block is just an
1105 // optimization to emit a simpler CFG when we can, and is not
1106 // fundamentally required for correctness. We could always insert a
1107 // temporary block as our control-flow join point that then forwards to
1108 // the normal-return's target block. However, at the time of writing,
1109 // Cranelift doesn't currently do any jump-threading or branch
1110 // simplification in the mid-end, and removing unnecessary blocks in this
1111 // way can help some subsequent mid-end optimizations. If, in the future,
1112 // we gain support for jump-threading optimizations in the mid-end, we can
1113 // come back and simplify the below code a bit to always generate the
1114 // temporary block, and then rely on the subsequent optimizations to clean
1115 // everything up.
1116 debug_assert_eq!(
1117 return_block.is_none(),
1118 opcode == ir::Opcode::ReturnCall || opcode == ir::Opcode::TryCall,
1119 );
1120 return_block.or_else(|| match func.dfg.insts[call_inst] {
1121 ir::InstructionData::TryCall {
1122 opcode: ir::Opcode::TryCall,
1123 args: _,
1124 func_ref: _,
1125 exception,
1126 } => {
1127 let normal_return = func.dfg.exception_tables[exception].normal_return();
1128 let normal_return_block = normal_return.block(&func.dfg.value_lists);
1129
1130 // Check to see if we can reuse the existing normal-return block.
1131 {
1132 let normal_return_args = normal_return.args(&func.dfg.value_lists);
1133 if normal_return_args.len() == callee.signature.returns.len()
1134 && normal_return_args.enumerate().all(|(i, arg)| {
1135 let i = u32::try_from(i).unwrap();
1136 arg == ir::BlockArg::TryCallRet(i)
1137 })
1138 {
1139 return Some(normal_return_block);
1140 }
1141 }
1142
1143 // Okay, we cannot reuse the normal-return block. Create a new block
1144 // that has the expected block parameter types and have it jump to
1145 // the normal-return block.
1146 let return_block = func.dfg.blocks.add();
1147 func.layout.insert_block(return_block, normal_return_block);
1148
1149 let return_block_params = callee
1150 .signature
1151 .returns
1152 .iter()
1153 .map(|abi| func.dfg.append_block_param(return_block, abi.value_type))
1154 .collect::<SmallValueVec>();
1155
1156 let normal_return_args = func.dfg.exception_tables[exception]
1157 .normal_return()
1158 .args(&func.dfg.value_lists)
1159 .collect::<SmallBlockArgVec>();
1160 let jump_args = normal_return_args
1161 .into_iter()
1162 .map(|arg| match arg {
1163 ir::BlockArg::Value(value) => ir::BlockArg::Value(value),
1164 ir::BlockArg::TryCallRet(i) => {
1165 let i = usize::try_from(i).unwrap();
1166 ir::BlockArg::Value(return_block_params[i])
1167 }
1168 ir::BlockArg::TryCallExn(_) => {
1169 unreachable!("normal-return edges cannot use exceptional results")
1170 }
1171 })
1172 .collect::<SmallBlockArgVec>();
1173
1174 let mut cursor = FuncCursor::new(func);
1175 cursor.goto_first_insertion_point(return_block);
1176 cursor.ins().jump(normal_return_block, &jump_args);
1177
1178 Some(return_block)
1179 }
1180 _ => None,
1181 })
1182 }
1183
1184 /// Replace the caller's call instruction with a jump to the caller's inlined
1185 /// copy of the callee's entry block.
1186 ///
1187 /// Also associates the callee's parameters with the caller's arguments in our
1188 /// value map.
1189 ///
1190 /// Returns the caller's stack map entries, if any.
replace_call_with_jump( allocs: &mut InliningAllocs, func: &mut ir::Function, call_inst: ir::Inst, callee: &ir::Function, entity_map: &EntityMap, ) -> Option<ir::UserStackMapEntryVec>1191 fn replace_call_with_jump(
1192 allocs: &mut InliningAllocs,
1193 func: &mut ir::Function,
1194 call_inst: ir::Inst,
1195 callee: &ir::Function,
1196 entity_map: &EntityMap,
1197 ) -> Option<ir::UserStackMapEntryVec> {
1198 trace!("Replacing `call` with `jump`");
1199 trace!(
1200 " --> call instruction: {call_inst:?}: {}",
1201 func.dfg.display_inst(call_inst)
1202 );
1203
1204 let callee_entry_block = callee
1205 .layout
1206 .entry_block()
1207 .expect("callee function should have an entry block");
1208 let callee_param_values = callee.dfg.block_params(callee_entry_block);
1209 let caller_arg_values = SmallValueVec::from_iter(func.dfg.inst_args(call_inst).iter().copied());
1210 debug_assert_eq!(callee_param_values.len(), caller_arg_values.len());
1211 debug_assert_eq!(callee_param_values.len(), callee.signature.params.len());
1212 for (abi, (callee_param_value, caller_arg_value)) in callee
1213 .signature
1214 .params
1215 .iter()
1216 .zip(callee_param_values.into_iter().zip(caller_arg_values))
1217 {
1218 debug_assert_eq!(abi.value_type, callee.dfg.value_type(*callee_param_value));
1219 debug_assert_eq!(abi.value_type, func.dfg.value_type(caller_arg_value));
1220 allocs.set_inlined_value(callee, *callee_param_value, caller_arg_value);
1221 }
1222
1223 // Replace the caller's call instruction with a jump to the caller's inlined
1224 // copy of the callee's entry block.
1225 //
1226 // Note that the call block dominates the inlined entry block (and also all
1227 // other inlined blocks) so we can reference the arguments directly, and do
1228 // not need to add block parameters to the inlined entry block.
1229 let inlined_entry_block = entity_map.inlined_block(callee_entry_block);
1230 func.dfg.replace(call_inst).jump(inlined_entry_block, &[]);
1231 trace!(
1232 " --> replaced with jump instruction: {call_inst:?}: {}",
1233 func.dfg.display_inst(call_inst)
1234 );
1235
1236 let stack_map_entries = func.dfg.take_user_stack_map_entries(call_inst);
1237 stack_map_entries
1238 }
1239
1240 /// Keeps track of mapping callee entities to their associated inlined caller
1241 /// entities.
1242 #[derive(Default)]
1243 struct EntityMap {
1244 // Rather than doing an implicit, demand-based, DCE'ing translation of
1245 // entities, which would require maps from each callee entity to its
1246 // associated caller entity, we copy all entities into the caller, remember
1247 // each entity's initial offset, and then mapping from the callee to the
1248 // inlined caller entity is just adding that initial offset to the callee's
1249 // index. This should be both faster and simpler than the alternative. Most
1250 // of these sets are relatively small, and they rarely have too much dead
1251 // code in practice, so this is a good trade off.
1252 //
1253 // Note that there are a few kinds of entities that are excluded from the
1254 // `EntityMap`, and for which we do actually take the demand-based approach:
1255 // values and value lists being the notable ones.
1256 block_offset: Option<u32>,
1257 global_value_offset: Option<u32>,
1258 sig_ref_offset: Option<u32>,
1259 func_ref_offset: Option<u32>,
1260 stack_slot_offset: Option<u32>,
1261 dynamic_type_offset: Option<u32>,
1262 dynamic_stack_slot_offset: Option<u32>,
1263 immediate_offset: Option<u32>,
1264 }
1265
1266 impl EntityMap {
inlined_block(&self, callee_block: ir::Block) -> ir::Block1267 fn inlined_block(&self, callee_block: ir::Block) -> ir::Block {
1268 let offset = self
1269 .block_offset
1270 .expect("must create inlined `ir::Block`s before calling `EntityMap::inlined_block`");
1271 ir::Block::from_u32(offset + callee_block.as_u32())
1272 }
1273
iter_inlined_blocks(&self, func: &ir::Function) -> impl Iterator<Item = ir::Block> + use<>1274 fn iter_inlined_blocks(&self, func: &ir::Function) -> impl Iterator<Item = ir::Block> + use<> {
1275 let start = self.block_offset.expect(
1276 "must create inlined `ir::Block`s before calling `EntityMap::iter_inlined_blocks`",
1277 );
1278
1279 let end = func.dfg.blocks.len();
1280 let end = u32::try_from(end).unwrap();
1281
1282 (start..end).map(|i| ir::Block::from_u32(i))
1283 }
1284
inlined_global_value(&self, callee_global_value: ir::GlobalValue) -> ir::GlobalValue1285 fn inlined_global_value(&self, callee_global_value: ir::GlobalValue) -> ir::GlobalValue {
1286 let offset = self
1287 .global_value_offset
1288 .expect("must create inlined `ir::GlobalValue`s before calling `EntityMap::inlined_global_value`");
1289 ir::GlobalValue::from_u32(offset + callee_global_value.as_u32())
1290 }
1291
inlined_sig_ref(&self, callee_sig_ref: ir::SigRef) -> ir::SigRef1292 fn inlined_sig_ref(&self, callee_sig_ref: ir::SigRef) -> ir::SigRef {
1293 let offset = self.sig_ref_offset.expect(
1294 "must create inlined `ir::SigRef`s before calling `EntityMap::inlined_sig_ref`",
1295 );
1296 ir::SigRef::from_u32(offset + callee_sig_ref.as_u32())
1297 }
1298
inlined_func_ref(&self, callee_func_ref: ir::FuncRef) -> ir::FuncRef1299 fn inlined_func_ref(&self, callee_func_ref: ir::FuncRef) -> ir::FuncRef {
1300 let offset = self.func_ref_offset.expect(
1301 "must create inlined `ir::FuncRef`s before calling `EntityMap::inlined_func_ref`",
1302 );
1303 ir::FuncRef::from_u32(offset + callee_func_ref.as_u32())
1304 }
1305
inlined_stack_slot(&self, callee_stack_slot: ir::StackSlot) -> ir::StackSlot1306 fn inlined_stack_slot(&self, callee_stack_slot: ir::StackSlot) -> ir::StackSlot {
1307 let offset = self.stack_slot_offset.expect(
1308 "must create inlined `ir::StackSlot`s before calling `EntityMap::inlined_stack_slot`",
1309 );
1310 ir::StackSlot::from_u32(offset + callee_stack_slot.as_u32())
1311 }
1312
inlined_dynamic_type(&self, callee_dynamic_type: ir::DynamicType) -> ir::DynamicType1313 fn inlined_dynamic_type(&self, callee_dynamic_type: ir::DynamicType) -> ir::DynamicType {
1314 let offset = self.dynamic_type_offset.expect(
1315 "must create inlined `ir::DynamicType`s before calling `EntityMap::inlined_dynamic_type`",
1316 );
1317 ir::DynamicType::from_u32(offset + callee_dynamic_type.as_u32())
1318 }
1319
inlined_dynamic_stack_slot( &self, callee_dynamic_stack_slot: ir::DynamicStackSlot, ) -> ir::DynamicStackSlot1320 fn inlined_dynamic_stack_slot(
1321 &self,
1322 callee_dynamic_stack_slot: ir::DynamicStackSlot,
1323 ) -> ir::DynamicStackSlot {
1324 let offset = self.dynamic_stack_slot_offset.expect(
1325 "must create inlined `ir::DynamicStackSlot`s before calling `EntityMap::inlined_dynamic_stack_slot`",
1326 );
1327 ir::DynamicStackSlot::from_u32(offset + callee_dynamic_stack_slot.as_u32())
1328 }
1329
inlined_immediate(&self, callee_immediate: ir::Immediate) -> ir::Immediate1330 fn inlined_immediate(&self, callee_immediate: ir::Immediate) -> ir::Immediate {
1331 let offset = self.immediate_offset.expect(
1332 "must create inlined `ir::Immediate`s before calling `EntityMap::inlined_immediate`",
1333 );
1334 ir::Immediate::from_u32(offset + callee_immediate.as_u32())
1335 }
1336 }
1337
1338 /// Translate all of the callee's various entities into the caller, producing an
1339 /// `EntityMap` that can be used to translate callee entity references into
1340 /// inlined caller entity references.
create_entities( allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function, ) -> EntityMap1341 fn create_entities(
1342 allocs: &mut InliningAllocs,
1343 func: &mut ir::Function,
1344 callee: &ir::Function,
1345 ) -> EntityMap {
1346 let mut entity_map = EntityMap::default();
1347
1348 entity_map.block_offset = Some(create_blocks(allocs, func, callee));
1349 entity_map.global_value_offset = Some(create_global_values(func, callee));
1350 entity_map.sig_ref_offset = Some(create_sig_refs(func, callee));
1351 create_user_external_name_refs(allocs, func, callee);
1352 entity_map.func_ref_offset = Some(create_func_refs(allocs, func, callee, &entity_map));
1353 entity_map.stack_slot_offset = Some(create_stack_slots(func, callee));
1354 entity_map.dynamic_type_offset = Some(create_dynamic_types(func, callee, &entity_map));
1355 entity_map.dynamic_stack_slot_offset =
1356 Some(create_dynamic_stack_slots(func, callee, &entity_map));
1357 entity_map.immediate_offset = Some(create_immediates(func, callee));
1358
1359 // `ir::ConstantData` is deduplicated, so we cannot use our offset scheme
1360 // for `ir::Constant`s. Nonetheless, we still insert them into the caller
1361 // now, at the same time as the rest of our entities.
1362 create_constants(allocs, func, callee);
1363
1364 entity_map
1365 }
1366
1367 /// Create inlined blocks in the caller for every block in the callee.
create_blocks( allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function, ) -> u321368 fn create_blocks(
1369 allocs: &mut InliningAllocs,
1370 func: &mut ir::Function,
1371 callee: &ir::Function,
1372 ) -> u32 {
1373 let offset = func.dfg.blocks.len();
1374 let offset = u32::try_from(offset).unwrap();
1375
1376 func.dfg.blocks.reserve(callee.dfg.blocks.len());
1377 for callee_block in callee.dfg.blocks.iter() {
1378 let caller_block = func.dfg.blocks.add();
1379 trace!("Callee {callee_block:?} = inlined {caller_block:?}");
1380
1381 if callee.layout.is_cold(callee_block) {
1382 func.layout.set_cold(caller_block);
1383 }
1384
1385 // Note: the entry block does not need parameters because the only
1386 // predecessor is the call block and we associate the callee's
1387 // parameters with the caller's arguments directly.
1388 if callee.layout.entry_block() != Some(callee_block) {
1389 for callee_param in callee.dfg.blocks[callee_block].params(&callee.dfg.value_lists) {
1390 let ty = callee.dfg.value_type(*callee_param);
1391 let caller_param = func.dfg.append_block_param(caller_block, ty);
1392
1393 allocs.set_inlined_value(callee, *callee_param, caller_param);
1394 }
1395 }
1396 }
1397
1398 offset
1399 }
1400
1401 /// Copy and translate global values from the callee into the caller.
create_global_values(func: &mut ir::Function, callee: &ir::Function) -> u321402 fn create_global_values(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1403 let gv_offset = func.global_values.len();
1404 let gv_offset = u32::try_from(gv_offset).unwrap();
1405
1406 func.global_values.reserve(callee.global_values.len());
1407 for gv in callee.global_values.values() {
1408 func.global_values.push(match gv {
1409 // These kinds of global values reference other global values, so we
1410 // need to fixup that reference.
1411 ir::GlobalValueData::Load {
1412 base,
1413 offset,
1414 global_type,
1415 flags,
1416 } => ir::GlobalValueData::Load {
1417 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset),
1418 offset: *offset,
1419 global_type: *global_type,
1420 flags: *flags,
1421 },
1422 ir::GlobalValueData::IAddImm {
1423 base,
1424 offset,
1425 global_type,
1426 } => ir::GlobalValueData::IAddImm {
1427 base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset),
1428 offset: *offset,
1429 global_type: *global_type,
1430 },
1431
1432 // These kinds of global values do not reference other global
1433 // values, so we can just clone them.
1434 ir::GlobalValueData::VMContext
1435 | ir::GlobalValueData::Symbol { .. }
1436 | ir::GlobalValueData::DynScaleTargetConst { .. } => gv.clone(),
1437 });
1438 }
1439
1440 gv_offset
1441 }
1442
1443 /// Copy `ir::SigRef`s from the callee into the caller.
create_sig_refs(func: &mut ir::Function, callee: &ir::Function) -> u321444 fn create_sig_refs(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1445 let offset = func.dfg.signatures.len();
1446 let offset = u32::try_from(offset).unwrap();
1447
1448 func.dfg.signatures.reserve(callee.dfg.signatures.len());
1449 for sig in callee.dfg.signatures.values() {
1450 func.dfg.signatures.push(sig.clone());
1451 }
1452
1453 offset
1454 }
1455
create_user_external_name_refs( allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function, )1456 fn create_user_external_name_refs(
1457 allocs: &mut InliningAllocs,
1458 func: &mut ir::Function,
1459 callee: &ir::Function,
1460 ) {
1461 for (callee_named_func_ref, name) in callee.params.user_named_funcs().iter() {
1462 let caller_named_func_ref = func.declare_imported_user_function(name.clone());
1463 allocs.user_external_name_refs[callee_named_func_ref] = Some(caller_named_func_ref).into();
1464 }
1465 }
1466
1467 /// Translate `ir::FuncRef`s from the callee into the caller.
create_func_refs( allocs: &InliningAllocs, func: &mut ir::Function, callee: &ir::Function, entity_map: &EntityMap, ) -> u321468 fn create_func_refs(
1469 allocs: &InliningAllocs,
1470 func: &mut ir::Function,
1471 callee: &ir::Function,
1472 entity_map: &EntityMap,
1473 ) -> u32 {
1474 let offset = func.dfg.ext_funcs.len();
1475 let offset = u32::try_from(offset).unwrap();
1476
1477 func.dfg.ext_funcs.reserve(callee.dfg.ext_funcs.len());
1478 for ir::ExtFuncData {
1479 name,
1480 signature,
1481 colocated,
1482 patchable,
1483 } in callee.dfg.ext_funcs.values()
1484 {
1485 func.dfg.ext_funcs.push(ir::ExtFuncData {
1486 name: match name {
1487 ir::ExternalName::User(name_ref) => {
1488 ir::ExternalName::User(allocs.user_external_name_refs[*name_ref].expect(
1489 "should have translated all `ir::UserExternalNameRef`s before translating \
1490 `ir::FuncRef`s",
1491 ))
1492 }
1493 ir::ExternalName::TestCase(_)
1494 | ir::ExternalName::LibCall(_)
1495 | ir::ExternalName::KnownSymbol(_) => name.clone(),
1496 },
1497 signature: entity_map.inlined_sig_ref(*signature),
1498 colocated: *colocated,
1499 patchable: *patchable,
1500 });
1501 }
1502
1503 offset
1504 }
1505
1506 /// Copy stack slots from the callee into the caller.
create_stack_slots(func: &mut ir::Function, callee: &ir::Function) -> u321507 fn create_stack_slots(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1508 let offset = func.sized_stack_slots.len();
1509 let offset = u32::try_from(offset).unwrap();
1510
1511 func.sized_stack_slots
1512 .reserve(callee.sized_stack_slots.len());
1513 for slot in callee.sized_stack_slots.values() {
1514 func.sized_stack_slots.push(slot.clone());
1515 }
1516
1517 offset
1518 }
1519
1520 /// Copy dynamic types from the callee into the caller.
create_dynamic_types( func: &mut ir::Function, callee: &ir::Function, entity_map: &EntityMap, ) -> u321521 fn create_dynamic_types(
1522 func: &mut ir::Function,
1523 callee: &ir::Function,
1524 entity_map: &EntityMap,
1525 ) -> u32 {
1526 let offset = func.dynamic_stack_slots.len();
1527 let offset = u32::try_from(offset).unwrap();
1528
1529 func.dfg
1530 .dynamic_types
1531 .reserve(callee.dfg.dynamic_types.len());
1532 for ir::DynamicTypeData {
1533 base_vector_ty,
1534 dynamic_scale,
1535 } in callee.dfg.dynamic_types.values()
1536 {
1537 func.dfg.dynamic_types.push(ir::DynamicTypeData {
1538 base_vector_ty: *base_vector_ty,
1539 dynamic_scale: entity_map.inlined_global_value(*dynamic_scale),
1540 });
1541 }
1542
1543 offset
1544 }
1545
1546 /// Copy dynamic stack slots from the callee into the caller.
create_dynamic_stack_slots( func: &mut ir::Function, callee: &ir::Function, entity_map: &EntityMap, ) -> u321547 fn create_dynamic_stack_slots(
1548 func: &mut ir::Function,
1549 callee: &ir::Function,
1550 entity_map: &EntityMap,
1551 ) -> u32 {
1552 let offset = func.dynamic_stack_slots.len();
1553 let offset = u32::try_from(offset).unwrap();
1554
1555 func.dynamic_stack_slots
1556 .reserve(callee.dynamic_stack_slots.len());
1557 for ir::DynamicStackSlotData { kind, dyn_ty } in callee.dynamic_stack_slots.values() {
1558 func.dynamic_stack_slots.push(ir::DynamicStackSlotData {
1559 kind: *kind,
1560 dyn_ty: entity_map.inlined_dynamic_type(*dyn_ty),
1561 });
1562 }
1563
1564 offset
1565 }
1566
1567 /// Copy immediates from the callee into the caller.
create_immediates(func: &mut ir::Function, callee: &ir::Function) -> u321568 fn create_immediates(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1569 let offset = func.dfg.immediates.len();
1570 let offset = u32::try_from(offset).unwrap();
1571
1572 func.dfg.immediates.reserve(callee.dfg.immediates.len());
1573 for imm in callee.dfg.immediates.values() {
1574 func.dfg.immediates.push(imm.clone());
1575 }
1576
1577 offset
1578 }
1579
1580 /// Copy constants from the callee into the caller.
create_constants(allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function)1581 fn create_constants(allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function) {
1582 for (callee_constant, data) in callee.dfg.constants.iter() {
1583 let inlined_constant = func.dfg.constants.insert(data.clone());
1584 allocs.constants[*callee_constant] = Some(inlined_constant).into();
1585 }
1586 }
1587