1 //! The stack layout is expected to look like so:
2 //!
3 //!
4 //! ```text
5 //! 0xB000 +-----------------------+   <- top of stack (TOS)
6 //!        | saved RIP             |
7 //! 0xAff8 +-----------------------+
8 //!        | saved RBP             |
9 //! 0xAff0 +-----------------------+
10 //!        | saved RSP             |
11 //! 0xAfe8 +-----------------------+   <- beginning of "control context",
12 //!        | args_capacity         |
13 //! 0xAfe0 +-----------------------+
14 //!        | args buffer, size:    |
15 //!        | (16 * args_capacity)  |
16 //! 0xAfc0 +-----------------------+   <- below: beginning of usable stack space
17 //!        |                       |      (16-byte aligned)
18 //!        |                       |
19 //!        ~        ...            ~   <- actual native stack space to use
20 //!        |                       |
21 //! 0x1000 +-----------------------+
22 //!        |  guard page           |   <- (not currently enabled)
23 //! 0x0000 +-----------------------+
24 //! ```
25 //!
26 //! The "control context" indicates how to resume a computation. The layout is
27 //! determined by Cranelift's stack_switch instruction, which reads and writes
28 //! these fields. The fields are used as follows, where we distinguish two
29 //! cases:
30 //!
31 //! 1.
32 //! If the continuation is currently active (i.e., running directly, or ancestor
33 //! of the running continuation), it stores the PC, RSP, and RBP of the *parent*
34 //! of the running continuation.
35 //!
36 //! 2.
37 //! If the picture shows a suspended computation, the fields store the PC, RSP,
38 //! and RBP at the time of the suspension.
39 //!
40 //! Note that this design ensures that external tools can construct backtraces
41 //! in the presence of stack switching by using frame pointers only: The
42 //! wasmtime_continuation_start trampoline uses the address of the RBP field in the
43 //! control context (0xAff0 above) as its frame pointer. This means that when
44 //! passing the wasmtime_continuation_start frame while doing frame pointer walking,
45 //! the parent of that frame is the last frame in the parent of this
46 //! continuation.
47 //!
48 //! Wasmtime's own mechanism for constructing backtraces also relies on frame
49 //! pointer chains. However, it understands continuations and does not rely on
50 //! the trickery outlined here to go from the frames in one continuation to the
51 //! parent.
52 //!
53 //! The args buffer is used as follows: It is used by the array calling
54 //! trampoline to read and store the arguments and return values of the function
55 //! running inside the continuation. If this function has m parameters and n
56 //! return values, then args_capacity is defined as max(m, n) and the size of
57 //! the args buffer is args_capacity * 16 bytes. The start address (0xAfc0 in
58 //! the example above, thus assuming args_capacity = 2) is saved as the `data`
59 //! field of the VMContRef's `args` object.
60 
61 use core::ptr::NonNull;
62 use std::io;
63 use std::ops::Range;
64 use std::ptr;
65 
66 use crate::runtime::vm::stack_switching::VMHostArray;
67 use crate::runtime::vm::{VMContext, VMFuncRef, ValRaw};
68 
69 #[derive(Debug, PartialEq, Eq)]
70 pub enum Allocator {
71     Mmap,
72     Custom,
73 }
74 
75 #[derive(Debug)]
76 #[repr(C)]
77 pub struct VMContinuationStack {
78     // The top of the stack; for stacks allocated by the fiber implementation itself,
79     // the base address of the allocation will be `top.sub(len.unwrap())`
80     top: *mut u8,
81     // The length of the stack
82     len: usize,
83     // allocation strategy
84     allocator: Allocator,
85 }
86 
87 impl VMContinuationStack {
new(size: usize) -> io::Result<Self>88     pub fn new(size: usize) -> io::Result<Self> {
89         // Round up our stack size request to the nearest multiple of the
90         // page size.
91         let page_size = rustix::param::page_size();
92         let size = if size == 0 {
93             page_size
94         } else {
95             size.next_multiple_of(page_size)
96         };
97 
98         unsafe {
99             // Add in one page for a guard page and then ask for some memory.
100             let mmap_len = size + page_size;
101             let mmap = rustix::mm::mmap_anonymous(
102                 ptr::null_mut(),
103                 mmap_len,
104                 rustix::mm::ProtFlags::empty(),
105                 rustix::mm::MapFlags::PRIVATE,
106             )?;
107 
108             rustix::mm::mprotect(
109                 mmap.cast::<u8>().add(page_size).cast(),
110                 size,
111                 rustix::mm::MprotectFlags::READ | rustix::mm::MprotectFlags::WRITE,
112             )?;
113 
114             Ok(Self {
115                 top: mmap.cast::<u8>().add(mmap_len),
116                 len: mmap_len,
117                 allocator: Allocator::Mmap,
118             })
119         }
120     }
121 
unallocated() -> Self122     pub fn unallocated() -> Self {
123         Self {
124             top: std::ptr::null_mut(),
125             len: 0,
126             allocator: Allocator::Custom,
127         }
128     }
129 
is_unallocated(&self) -> bool130     pub fn is_unallocated(&self) -> bool {
131         debug_assert_eq!(self.len == 0, self.top == std::ptr::null_mut());
132         self.len == 0
133     }
134 
from_raw_parts( base: *mut u8, _guard_size: usize, len: usize, ) -> io::Result<Self>135     pub unsafe fn from_raw_parts(
136         base: *mut u8,
137         _guard_size: usize,
138         len: usize,
139     ) -> io::Result<Self> {
140         Ok(Self {
141             top: unsafe { base.add(len) },
142             len,
143             allocator: Allocator::Custom,
144         })
145     }
146 
is_from_raw_parts(&self) -> bool147     pub fn is_from_raw_parts(&self) -> bool {
148         self.allocator == Allocator::Custom
149     }
150 
top(&self) -> Option<*mut u8>151     pub fn top(&self) -> Option<*mut u8> {
152         Some(self.top)
153     }
154 
range(&self) -> Option<Range<usize>>155     pub fn range(&self) -> Option<Range<usize>> {
156         let base = unsafe { self.top.sub(self.len).addr() };
157         Some(base..base + self.len)
158     }
159 
control_context_instruction_pointer(&self) -> usize160     pub fn control_context_instruction_pointer(&self) -> usize {
161         // See picture at top of this file:
162         // RIP is stored 8 bytes below top of stack.
163         unsafe {
164             let ptr = self.top.sub(8).cast::<usize>();
165             *ptr
166         }
167     }
168 
control_context_frame_pointer(&self) -> usize169     pub fn control_context_frame_pointer(&self) -> usize {
170         // See picture at top of this file:
171         // RBP is stored 16 bytes below top of stack.
172         unsafe {
173             let ptr = self.top.sub(16).cast::<usize>();
174             *ptr
175         }
176     }
177 
control_context_stack_pointer(&self) -> usize178     pub fn control_context_stack_pointer(&self) -> usize {
179         // See picture at top of this file:
180         // RSP is stored 24 bytes below top of stack.
181         unsafe {
182             let ptr = self.top.sub(24).cast::<usize>();
183             *ptr
184         }
185     }
186 
187     /// This function installs the launchpad for the computation to run on the
188     /// fiber, such that executing a `stack_switch` instruction on the stack
189     /// actually runs the desired computation.
190     ///
191     /// Concretely, switching to the stack prepared by this function
192     /// causes that we enter `wasmtime_continuation_start`, which then in turn
193     /// calls `fiber_start` with  the following arguments:
194     /// TOS, func_ref, caller_vmctx, args_ptr, args_capacity
195     ///
196     /// Note that at this point we also allocate the args buffer
197     /// (see picture at the top of this file).
198     /// We define `args_capacity` as the max of parameter and return value count.
199     /// Then the size s of the actual buffer size is calculated as follows:
200     /// s = size_of(ValRaw) * `args_capacity`,
201     ///
202     /// Note that this value is used below, and we may have s = 0.
203     ///
204     /// The layout of the VMContinuationStack near the top of stack (TOS)
205     /// *after* running this function is as follows:
206     ///
207     ///
208     ///  Offset from    |
209     ///       TOS       | Contents
210     ///  ---------------|-------------------------------------------------------
211     ///       -0x08     | address of wasmtime_continuation_start function (future PC)
212     ///       -0x10     | TOS - 0x10 (future RBP)
213     ///       -0x18     | TOS - 0x40 - s (future RSP)
214     ///       -0x20     | args_capacity
215     ///
216     ///
217     /// The data stored behind the args buffer is as follows:
218     ///
219     ///  Offset from    |
220     ///       TOS       | Contents
221     ///  ---------------|-------------------------------------------------------
222     ///       -0x28 - s | func_ref
223     ///       -0x30 - s | caller_vmctx
224     ///       -0x38 - s | args (of type *mut ArrayRef<ValRaw>)
225     ///       -0x40 - s | return_value_count
initialize( &self, func_ref: *const VMFuncRef, caller_vmctx: *mut VMContext, args: *mut VMHostArray<ValRaw>, parameter_count: u32, return_value_count: u32, )226     pub fn initialize(
227         &self,
228         func_ref: *const VMFuncRef,
229         caller_vmctx: *mut VMContext,
230         args: *mut VMHostArray<ValRaw>,
231         parameter_count: u32,
232         return_value_count: u32,
233     ) {
234         let tos = self.top;
235 
236         unsafe {
237             let store = |tos_neg_offset, value| {
238                 let target = tos.sub(tos_neg_offset).cast::<usize>();
239                 target.write(value)
240             };
241 
242             let args_ref = &mut *args;
243             let args_capacity = std::cmp::max(parameter_count, return_value_count);
244             // The args object must currently be empty.
245             debug_assert_eq!(args_ref.capacity, 0);
246             debug_assert_eq!(args_ref.length, 0);
247 
248             let args_data_size =
249                 usize::try_from(args_capacity).unwrap() * std::mem::size_of::<ValRaw>();
250             let args_data_ptr = if args_capacity == 0 {
251                 ptr::null_mut()
252             } else {
253                 tos.sub(0x20 + args_data_size)
254             };
255 
256             args_ref.capacity = args_capacity;
257             args_ref.data = args_data_ptr.cast::<ValRaw>();
258 
259             let to_store = [
260                 // Data near top of stack:
261                 (0x08, wasmtime_continuation_start_address().addr()),
262                 (0x10, tos.sub(0x10).addr()),
263                 (0x18, tos.sub(0x40 + args_data_size).addr()),
264                 (0x20, usize::try_from(args_capacity).unwrap()),
265                 // Data after the args buffer:
266                 (0x28 + args_data_size, func_ref.addr()),
267                 (0x30 + args_data_size, caller_vmctx.addr()),
268                 (0x38 + args_data_size, args.addr()),
269                 (
270                     0x40 + args_data_size,
271                     usize::try_from(return_value_count).unwrap(),
272                 ),
273             ];
274 
275             for (offset, data) in to_store {
276                 store(offset, data);
277             }
278         }
279     }
280 }
281 
282 impl Drop for VMContinuationStack {
drop(&mut self)283     fn drop(&mut self) {
284         unsafe {
285             match self.allocator {
286                 Allocator::Mmap => {
287                     let ret = rustix::mm::munmap(self.top.sub(self.len) as _, self.len);
288                     debug_assert!(ret.is_ok());
289                 }
290                 Allocator::Custom => {} // It's the creator's responsibility to reclaim the memory.
291             }
292         }
293     }
294 }
295 
296 /// This function is responsible for actually running a wasm function inside a
297 /// continuation. It is only ever called from `wasmtime_continuation_start`.
fiber_start( func_ref: *mut VMFuncRef, caller_vmctx: *mut VMContext, args: *mut VMHostArray<ValRaw>, return_value_count: u32, )298 unsafe extern "C" fn fiber_start(
299     func_ref: *mut VMFuncRef,
300     caller_vmctx: *mut VMContext,
301     args: *mut VMHostArray<ValRaw>,
302     return_value_count: u32,
303 ) {
304     unsafe {
305         let func_ref = NonNull::new(func_ref).unwrap();
306         let caller_vmxtx = NonNull::new_unchecked(caller_vmctx);
307         let args = &mut *args;
308         let params_and_returns: NonNull<[ValRaw]> = if args.capacity == 0 {
309             NonNull::from(&[])
310         } else {
311             std::slice::from_raw_parts_mut(args.data, usize::try_from(args.capacity).unwrap())
312                 .into()
313         };
314 
315         // NOTE(frank-emrich) The usage of the `caller_vmctx` is probably not
316         // 100% correct here. Currently, we determine the "caller" vmctx when
317         // initializing the fiber stack/continuation (i.e. as part of
318         // `cont.new`). However, we may subsequenly `resume` the continuation
319         // from a different Wasm instance. The way to fix this would be to make
320         // the currently active `VMContext` an additional parameter of
321         // `wasmtime_continuation_switch` and pipe it through to this point. However,
322         // since the caller vmctx is only really used to access stuff in the
323         // underlying `Store`, it's fine to be slightly sloppy about the exact
324         // value we set.
325         //
326         // TODO(dhil): we are ignoring the boolean return value
327         // here... we probably shouldn't.
328         VMFuncRef::array_call(func_ref, None, caller_vmxtx, params_and_returns);
329 
330         // The array call trampoline should have just written
331         // `return_value_count` values to the `args` buffer. Let's reflect that
332         // in its length field, to make various bounds checks happy.
333         args.length = return_value_count;
334 
335         // Note that after this function returns, wasmtime_continuation_start
336         // will switch back to the parent stack.
337     }
338 }
339 
340 cfg_if::cfg_if! {
341     if #[cfg(target_arch = "x86_64")] {
342         mod x86_64;
343         use x86_64::*;
344     } else {
345         // Note that this should be unreachable: In stack.rs, we currently select
346         // the module defined in the current file only if we are on unix AND
347         // x86_64.
348         compile_error!("the stack switching feature is not supported on this CPU architecture");
349     }
350 }
351