1 //! Implements a registry of modules for a store.
2 
3 use crate::code::{EngineCode, EngineCodePC, ModuleWithCode, StoreCode, StoreCodePC};
4 #[cfg(feature = "component-model")]
5 use crate::component::Component;
6 use crate::runtime::vm::VMWasmCallFunction;
7 use crate::sync::{OnceLock, RwLock};
8 use crate::vm::CompiledModuleId;
9 use crate::{Engine, FrameInfo, Module, code_memory::CodeMemory, prelude::*};
10 use alloc::sync::Arc;
11 #[cfg(not(feature = "debug"))]
12 use core::marker::PhantomData;
13 use core::ops::Range;
14 use core::ptr::NonNull;
15 use wasmtime_environ::{VMSharedTypeIndex, collections::btree_map::Entry};
16 
17 /// Used for registering modules with a store.
18 ///
19 /// There are two basic purposes that this registry serves:
20 ///
21 /// - It keeps all modules and their metadata alive as long as the
22 ///   store exists.
23 /// - It owns the [`StoreCode`], i.e. possibly-private-copy of machine
24 ///   code, for all modules that execute in this store.
25 ///
26 /// The registry allows for translation of EngineCode to StoreCode,
27 /// deduplicating by the start address of the EngineCode; and allows
28 /// for looking up modules by "registered module ID", and looking up
29 /// StoreCode and Modules by PC.
30 ///
31 /// Note that multiple modules may be backed by a single
32 /// `StoreCode`. This is specifically the case for components in
33 /// general. When a component is first instantiated, the component
34 /// itself is registered (which loads the StoreCode into the
35 /// registry), then each individual module within that component is
36 /// registered and added to the data structures.
37 ///
38 /// A brief overview of the kinds of compiled object and their
39 /// relationships:
40 ///
41 /// - `Module` is a Wasm module. It owns a `CompiledModule`.
42 /// - `CompiledModule` contains metadata about the module (e.g., a map
43 ///   from Wasm function indices to locations in the machine code),
44 ///   and also owns an `EngineCode`.
45 /// - `EngineCode` holds an `Arc` to a `CodeMemory` with the canonical
46 ///   copy of machine code, as well as some lower-level metadata
47 ///   (signatures and types). It is instantiated by this registry into
48 ///   `StoreCode`.
49 /// - `StoreCode` owns either another `Arc` to the same `CodeMemory`
50 ///   as `EngineCode`, or if guest debugging is enabled and causes us
51 ///   to clone private copies of code for patching per store, owns its
52 ///   own private `CodeMemory` at a different address.
53 /// - Instances hold a `RegisteredModuleId` to be able to look up their modules.
54 #[derive(Default)]
55 pub struct ModuleRegistry {
56     /// StoreCode and Modules associated with it.
57     ///
58     /// Keyed by the start address of the `StoreCode`. We maintain the
59     /// invariant of no overlaps on insertion. We use a range query to
60     /// find the StoreCode for a given PC: take the range `0..=pc`,
61     /// then take the last element of the range. That picks the
62     /// highest start address <= the query, and we can check whether
63     /// it contains the address.
64     loaded_code: TryBTreeMap<StoreCodePC, LoadedCode>,
65 
66     /// Map from EngineCodePC start to StoreCodePC start. We use this
67     /// to memoize the store-code creation process: each EngineCode is
68     /// instantiated to a StoreCode only once per store.
69     store_code: TryBTreeMap<EngineCodePC, StoreCodePC>,
70 
71     /// Modules instantiated in this registry.
72     ///
73     /// Every module is placed in this map, but not every module will
74     /// be in a LoadedCode entry, because the module may have no text.
75     modules: TryBTreeMap<RegisteredModuleId, Module>,
76 }
77 
78 struct LoadedCode {
79     /// The StoreCode in this range.
80     code: StoreCode,
81 
82     /// Map by starting text offset of Modules in this code region.
83     modules: TryBTreeMap<usize, RegisteredModuleId>,
84 }
85 
86 /// An identifier of a module that has previously been inserted into a
87 /// `ModuleRegistry`.
88 ///
89 /// This is just a newtype around `CompiledModuleId`, which is unique
90 /// within the Engine.
91 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
92 pub struct RegisteredModuleId(CompiledModuleId);
93 
assert_no_overlap( loaded_code: &TryBTreeMap<StoreCodePC, LoadedCode>, range: Range<StoreCodePC>, )94 fn assert_no_overlap(
95     loaded_code: &TryBTreeMap<StoreCodePC, LoadedCode>,
96     range: Range<StoreCodePC>,
97 ) {
98     if let Some((start, _)) = loaded_code.range(range.start..).next() {
99         assert!(start >= range.end);
100     }
101     if let Some((_, code)) = loaded_code.range(..range.end).next_back() {
102         assert!(code.code.text_range().end <= range.start);
103     }
104 }
105 
106 #[cfg(feature = "debug")]
107 pub struct RegisterBreakpointState<'a>(pub(crate) &'a crate::runtime::debug::BreakpointState);
108 #[cfg(not(feature = "debug"))]
109 pub struct RegisterBreakpointState<'a>(pub(crate) PhantomData<&'a ()>);
110 
111 impl<'a> RegisterBreakpointState<'a> {
112     #[cfg(feature = "debug")]
update(&self, code: &mut StoreCode, module: &Module) -> Result<()>113     fn update(&self, code: &mut StoreCode, module: &Module) -> Result<()> {
114         self.0.patch_new_module(code, module)
115     }
116     #[cfg(not(feature = "debug"))]
update(&self, _code: &mut StoreCode, _module: &Module) -> Result<()>117     fn update(&self, _code: &mut StoreCode, _module: &Module) -> Result<()> {
118         Ok(())
119     }
120 }
121 
122 impl ModuleRegistry {
123     /// Get a previously-registered module by id.
module_by_id(&self, id: RegisteredModuleId) -> Option<&Module>124     pub fn module_by_id(&self, id: RegisteredModuleId) -> Option<&Module> {
125         self.modules.get(id)
126     }
127 
128     /// Get a module by CompiledModuleId, if present.
module_by_compiled_id(&self, id: CompiledModuleId) -> Option<&Module>129     pub fn module_by_compiled_id(&self, id: CompiledModuleId) -> Option<&Module> {
130         self.modules.get(RegisteredModuleId(id))
131     }
132 
133     /// Fetches a registered StoreCode and module and an offset within
134     /// it given a program counter value.
module_and_code_by_pc<'a>(&'a self, pc: usize) -> Option<(ModuleWithCode<'a>, usize)>135     pub fn module_and_code_by_pc<'a>(&'a self, pc: usize) -> Option<(ModuleWithCode<'a>, usize)> {
136         let (_, code) = self
137             .loaded_code
138             .range(..=StoreCodePC::from_raw(pc))
139             .next_back()?;
140         let offset = StoreCodePC::offset_of(code.code.text_range(), pc)?;
141         let (_, module_id) = code.modules.range(..=offset).next_back()?;
142         let module = self.modules.get(*module_id)?;
143         Some((ModuleWithCode::from_raw(module, &code.code), offset))
144     }
145 
146     /// Fetches the `StoreCode` for a given `EngineCode`.
store_code(&self, engine_code: &EngineCode) -> Option<&StoreCode>147     pub fn store_code(&self, engine_code: &EngineCode) -> Option<&StoreCode> {
148         let store_code_pc = self.store_code_base(engine_code)?;
149         let (_, code) = self.loaded_code.range(store_code_pc..).next()?;
150         Some(&code.code)
151     }
152 
153     /// Fetches the base `StoreCodePC` for a given `EngineCode`.
store_code_base(&self, engine_code: &EngineCode) -> Option<StoreCodePC>154     pub fn store_code_base(&self, engine_code: &EngineCode) -> Option<StoreCodePC> {
155         self.store_code.get(engine_code.text_range().start).cloned()
156     }
157 
158     /// Fetches the base `StoreCodePC` for a given `EngineCode` with
159     /// `Module`, registering the module if not already registered.
store_code_base_or_register( &mut self, module: &Module, breakpoint_state: RegisterBreakpointState, ) -> Result<StoreCodePC>160     pub fn store_code_base_or_register(
161         &mut self,
162         module: &Module,
163         breakpoint_state: RegisterBreakpointState,
164     ) -> Result<StoreCodePC> {
165         let key = module.engine_code().text_range().start;
166         if !self.store_code.contains_key(key) {
167             let engine = module.engine().clone();
168             self.register_module(module, &engine, breakpoint_state)?;
169         }
170         Ok(*self.store_code.get(key).unwrap())
171     }
172 
173     /// Fetches a mutable `StoreCode` for a given base `StoreCodePC`.
store_code_mut(&mut self, store_code_base: StoreCodePC) -> Option<&mut StoreCode>174     pub fn store_code_mut(&mut self, store_code_base: StoreCodePC) -> Option<&mut StoreCode> {
175         let (_, code) = self.loaded_code.range_mut(store_code_base..).next()?;
176         assert_eq!(code.code.text_range().start, store_code_base);
177         Some(&mut code.code)
178     }
179 
180     /// Gets an iterator over all modules in the registry.
181     #[cfg(any(feature = "coredump", feature = "debug"))]
all_modules(&self) -> impl Iterator<Item = &'_ Module> + '_182     pub fn all_modules(&self) -> impl Iterator<Item = &'_ Module> + '_ {
183         self.modules.values()
184     }
185 
186     /// Registers a new module with the registry.
register_module( &mut self, module: &Module, engine: &Engine, breakpoint_state: RegisterBreakpointState, ) -> Result<RegisteredModuleId>187     pub fn register_module(
188         &mut self,
189         module: &Module,
190         engine: &Engine,
191         breakpoint_state: RegisterBreakpointState,
192     ) -> Result<RegisteredModuleId> {
193         self.register(
194             module.id(),
195             module.engine_code(),
196             Some(module),
197             engine,
198             breakpoint_state,
199         )
200         .map(|id| id.unwrap())
201     }
202 
203     #[cfg(feature = "component-model")]
register_component( &mut self, component: &Component, engine: &Engine, breakpoint_state: RegisterBreakpointState, ) -> Result<()>204     pub fn register_component(
205         &mut self,
206         component: &Component,
207         engine: &Engine,
208         breakpoint_state: RegisterBreakpointState,
209     ) -> Result<()> {
210         self.register(
211             component.id(),
212             component.engine_code(),
213             None,
214             engine,
215             breakpoint_state,
216         )?;
217         Ok(())
218     }
219 
220     /// Registers a new module with the registry.
register( &mut self, compiled_id: CompiledModuleId, code: &Arc<EngineCode>, module: Option<&Module>, engine: &Engine, breakpoint_state: RegisterBreakpointState, ) -> Result<Option<RegisteredModuleId>>221     fn register(
222         &mut self,
223         compiled_id: CompiledModuleId,
224         code: &Arc<EngineCode>,
225         module: Option<&Module>,
226         engine: &Engine,
227         breakpoint_state: RegisterBreakpointState,
228     ) -> Result<Option<RegisteredModuleId>> {
229         // Register the module, if any.
230         let id = if let Some(module) = module {
231             let id = RegisteredModuleId(compiled_id);
232             self.modules.entry(id).or_insert_with(|| module.clone())?;
233             Some(id)
234         } else {
235             None
236         };
237 
238         // Create a StoreCode if one does not already exist.
239         let store_code_pc = match self.store_code.entry(code.text_range().start) {
240             Entry::Vacant(v) => {
241                 let store_code = StoreCode::new(engine, code)?;
242                 let store_code_pc = store_code.text_range().start;
243                 assert_no_overlap(&self.loaded_code, store_code.text_range());
244                 self.loaded_code.insert(
245                     store_code_pc,
246                     LoadedCode {
247                         code: store_code,
248                         modules: TryBTreeMap::default(),
249                     },
250                 )?;
251                 *v.insert(store_code_pc)?
252             }
253             Entry::Occupied(o) => *o.get(),
254         };
255 
256         // Add this module to the LoadedCode if not present.
257         if let (Some(module), Some(id)) = (module, id) {
258             if let Some((_, range)) = module.compiled_module().finished_function_ranges().next() {
259                 let loaded_code = self
260                     .loaded_code
261                     .get_mut(store_code_pc)
262                     .expect("loaded_code must have entry for StoreCodePC");
263                 loaded_code.modules.insert(range.start, id)?;
264                 breakpoint_state.update(&mut loaded_code.code, module)?;
265             }
266         }
267 
268         Ok(id)
269     }
270 
271     /// Fetches frame information about a program counter in a backtrace.
272     ///
273     /// Returns an object if this `pc` is known to some previously registered
274     /// module, or returns `None` if no information can be found. The first
275     /// boolean returned indicates whether the original module has unparsed
276     /// debug information due to the compiler's configuration. The second
277     /// boolean indicates whether the engine used to compile this module is
278     /// using environment variables to control debuginfo parsing.
lookup_frame_info<'a>( &'a self, pc: usize, ) -> Option<(FrameInfo, ModuleWithCode<'a>)>279     pub(crate) fn lookup_frame_info<'a>(
280         &'a self,
281         pc: usize,
282     ) -> Option<(FrameInfo, ModuleWithCode<'a>)> {
283         let (_, code) = self
284             .loaded_code
285             .range(..=StoreCodePC::from_raw(pc))
286             .next_back()?;
287         let text_offset = StoreCodePC::offset_of(code.code.text_range(), pc)?;
288         let (_, module_id) = code.modules.range(..=text_offset).next_back()?;
289         let module = self
290             .modules
291             .get(*module_id)
292             .expect("referenced module ID not found");
293         let info = FrameInfo::new(module.clone(), text_offset)?;
294         let module_with_code = ModuleWithCode::from_raw(module, &code.code);
295         Some((info, module_with_code))
296     }
297 
wasm_to_array_trampoline( &self, sig: VMSharedTypeIndex, ) -> Option<NonNull<VMWasmCallFunction>>298     pub fn wasm_to_array_trampoline(
299         &self,
300         sig: VMSharedTypeIndex,
301     ) -> Option<NonNull<VMWasmCallFunction>> {
302         // TODO: We are doing a linear search over each module. This is fine for
303         // now because we typically have very few modules per store (almost
304         // always one, in fact). If this linear search ever becomes a
305         // bottleneck, we could avoid it by incrementally and lazily building a
306         // `VMSharedSignatureIndex` to `SignatureIndex` map.
307         //
308         // See also the comment in `ModuleInner::wasm_to_native_trampoline`.
309         for module in self.modules.values() {
310             if let Some(trampoline) = module.wasm_to_array_trampoline(sig) {
311                 return Some(trampoline);
312             }
313         }
314         None
315     }
316 }
317 
318 // This is the global code registry that stores information for all loaded code
319 // objects that are currently in use by any `Store` in the current process.
320 //
321 // The purpose of this map is to be called from signal handlers to determine
322 // whether a program counter is a wasm trap or not. Specifically macOS has
323 // no contextual information about the thread available, hence the necessity
324 // for global state rather than using thread local state.
325 //
326 // This is similar to `ModuleRegistry` except that it has less information and
327 // supports removal. Any time anything is registered with a `ModuleRegistry`
328 // it is also automatically registered with the singleton global module
329 // registry. When a `ModuleRegistry` is destroyed then all of its entries
330 // are removed from the global registry.
global_code() -> &'static RwLock<GlobalRegistry>331 fn global_code() -> &'static RwLock<GlobalRegistry> {
332     static GLOBAL_CODE: OnceLock<RwLock<GlobalRegistry>> = OnceLock::new();
333     GLOBAL_CODE.get_or_init(Default::default)
334 }
335 
336 type GlobalRegistry = TryBTreeMap<usize, (usize, Arc<CodeMemory>)>;
337 
338 /// Find which registered region of code contains the given program counter, and
339 /// what offset that PC is within that module's code.
lookup_code(pc: usize) -> Option<(Arc<CodeMemory>, usize)>340 pub fn lookup_code(pc: usize) -> Option<(Arc<CodeMemory>, usize)> {
341     let all_modules = global_code().read();
342     let (_end, (start, module)) = all_modules.range(pc..).next()?;
343     let text_offset = pc.checked_sub(*start)?;
344     Some((module.clone(), text_offset))
345 }
346 
347 /// Registers a new region of code.
348 ///
349 /// Must not have been previously registered and must be `unregister`'d to
350 /// prevent leaking memory.
351 ///
352 /// This is required to enable traps to work correctly since the signal handler
353 /// will lookup in the `GLOBAL_CODE` list to determine which a particular pc
354 /// is a trap or not.
register_code(image: &Arc<CodeMemory>, address: Range<usize>) -> Result<(), OutOfMemory>355 pub fn register_code(image: &Arc<CodeMemory>, address: Range<usize>) -> Result<(), OutOfMemory> {
356     if address.is_empty() {
357         return Ok(());
358     }
359     let start = address.start;
360     let end = address.end - 1;
361     let prev = global_code().write().insert(end, (start, image.clone()))?;
362     assert!(prev.is_none());
363     Ok(())
364 }
365 
366 /// Unregisters a code mmap from the global map.
367 ///
368 /// Must have been previously registered with `register`.
unregister_code(address: Range<usize>)369 pub fn unregister_code(address: Range<usize>) {
370     if address.is_empty() {
371         return;
372     }
373     let end = address.end - 1;
374     let code = global_code().write().remove(end);
375     assert!(code.is_some());
376 }
377 
378 #[test]
379 #[cfg_attr(miri, ignore)]
test_frame_info() -> Result<(), crate::Error>380 fn test_frame_info() -> Result<(), crate::Error> {
381     use crate::*;
382 
383     let mut store = Store::<()>::default();
384     let module = Module::new(
385         store.engine(),
386         r#"
387             (module
388                 (func (export "add") (param $x i32) (param $y i32) (result i32) (i32.add (local.get $x) (local.get $y)))
389                 (func (export "sub") (param $x i32) (param $y i32) (result i32) (i32.sub (local.get $x) (local.get $y)))
390                 (func (export "mul") (param $x i32) (param $y i32) (result i32) (i32.mul (local.get $x) (local.get $y)))
391                 (func (export "div_s") (param $x i32) (param $y i32) (result i32) (i32.div_s (local.get $x) (local.get $y)))
392                 (func (export "div_u") (param $x i32) (param $y i32) (result i32) (i32.div_u (local.get $x) (local.get $y)))
393                 (func (export "rem_s") (param $x i32) (param $y i32) (result i32) (i32.rem_s (local.get $x) (local.get $y)))
394                 (func (export "rem_u") (param $x i32) (param $y i32) (result i32) (i32.rem_u (local.get $x) (local.get $y)))
395             )
396          "#,
397     )?;
398     // Create an instance to ensure the frame information is registered.
399     Instance::new(&mut store, &module, &[])?;
400 
401     // Look for frame info for each function. Assume that StoreCode
402     // does not actually clone in the default configuration.
403     for (i, range) in module.compiled_module().finished_function_ranges() {
404         let base = module.engine_code().text_range().start.raw();
405         let start = base + range.start;
406         let end = base + range.end;
407         for pc in start..end {
408             let (frame, _) = store
409                 .as_context()
410                 .0
411                 .modules()
412                 .lookup_frame_info(pc)
413                 .unwrap();
414             assert!(
415                 frame.func_index() == i.as_u32(),
416                 "lookup of {:#x} returned {}, expected {}",
417                 pc,
418                 frame.func_index(),
419                 i.as_u32()
420             );
421         }
422     }
423     Ok(())
424 }
425