1 //! Implements a registry of modules for a store.
2
3 use crate::code::{EngineCode, EngineCodePC, ModuleWithCode, StoreCode, StoreCodePC};
4 #[cfg(feature = "component-model")]
5 use crate::component::Component;
6 use crate::runtime::vm::VMWasmCallFunction;
7 use crate::sync::{OnceLock, RwLock};
8 use crate::vm::CompiledModuleId;
9 use crate::{Engine, FrameInfo, Module, code_memory::CodeMemory, prelude::*};
10 use alloc::sync::Arc;
11 #[cfg(not(feature = "debug"))]
12 use core::marker::PhantomData;
13 use core::ops::Range;
14 use core::ptr::NonNull;
15 use wasmtime_environ::{VMSharedTypeIndex, collections::btree_map::Entry};
16
17 /// Used for registering modules with a store.
18 ///
19 /// There are two basic purposes that this registry serves:
20 ///
21 /// - It keeps all modules and their metadata alive as long as the
22 /// store exists.
23 /// - It owns the [`StoreCode`], i.e. possibly-private-copy of machine
24 /// code, for all modules that execute in this store.
25 ///
26 /// The registry allows for translation of EngineCode to StoreCode,
27 /// deduplicating by the start address of the EngineCode; and allows
28 /// for looking up modules by "registered module ID", and looking up
29 /// StoreCode and Modules by PC.
30 ///
31 /// Note that multiple modules may be backed by a single
32 /// `StoreCode`. This is specifically the case for components in
33 /// general. When a component is first instantiated, the component
34 /// itself is registered (which loads the StoreCode into the
35 /// registry), then each individual module within that component is
36 /// registered and added to the data structures.
37 ///
38 /// A brief overview of the kinds of compiled object and their
39 /// relationships:
40 ///
41 /// - `Module` is a Wasm module. It owns a `CompiledModule`.
42 /// - `CompiledModule` contains metadata about the module (e.g., a map
43 /// from Wasm function indices to locations in the machine code),
44 /// and also owns an `EngineCode`.
45 /// - `EngineCode` holds an `Arc` to a `CodeMemory` with the canonical
46 /// copy of machine code, as well as some lower-level metadata
47 /// (signatures and types). It is instantiated by this registry into
48 /// `StoreCode`.
49 /// - `StoreCode` owns either another `Arc` to the same `CodeMemory`
50 /// as `EngineCode`, or if guest debugging is enabled and causes us
51 /// to clone private copies of code for patching per store, owns its
52 /// own private `CodeMemory` at a different address.
53 /// - Instances hold a `RegisteredModuleId` to be able to look up their modules.
54 #[derive(Default)]
55 pub struct ModuleRegistry {
56 /// StoreCode and Modules associated with it.
57 ///
58 /// Keyed by the start address of the `StoreCode`. We maintain the
59 /// invariant of no overlaps on insertion. We use a range query to
60 /// find the StoreCode for a given PC: take the range `0..=pc`,
61 /// then take the last element of the range. That picks the
62 /// highest start address <= the query, and we can check whether
63 /// it contains the address.
64 loaded_code: TryBTreeMap<StoreCodePC, LoadedCode>,
65
66 /// Map from EngineCodePC start to StoreCodePC start. We use this
67 /// to memoize the store-code creation process: each EngineCode is
68 /// instantiated to a StoreCode only once per store.
69 store_code: TryBTreeMap<EngineCodePC, StoreCodePC>,
70
71 /// Modules instantiated in this registry.
72 ///
73 /// Every module is placed in this map, but not every module will
74 /// be in a LoadedCode entry, because the module may have no text.
75 modules: TryBTreeMap<RegisteredModuleId, Module>,
76 }
77
78 struct LoadedCode {
79 /// The StoreCode in this range.
80 code: StoreCode,
81
82 /// Map by starting text offset of Modules in this code region.
83 modules: TryBTreeMap<usize, RegisteredModuleId>,
84 }
85
86 /// An identifier of a module that has previously been inserted into a
87 /// `ModuleRegistry`.
88 ///
89 /// This is just a newtype around `CompiledModuleId`, which is unique
90 /// within the Engine.
91 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
92 pub struct RegisteredModuleId(CompiledModuleId);
93
assert_no_overlap( loaded_code: &TryBTreeMap<StoreCodePC, LoadedCode>, range: Range<StoreCodePC>, )94 fn assert_no_overlap(
95 loaded_code: &TryBTreeMap<StoreCodePC, LoadedCode>,
96 range: Range<StoreCodePC>,
97 ) {
98 if let Some((start, _)) = loaded_code.range(range.start..).next() {
99 assert!(start >= range.end);
100 }
101 if let Some((_, code)) = loaded_code.range(..range.end).next_back() {
102 assert!(code.code.text_range().end <= range.start);
103 }
104 }
105
106 #[cfg(feature = "debug")]
107 pub struct RegisterBreakpointState<'a>(pub(crate) &'a crate::runtime::debug::BreakpointState);
108 #[cfg(not(feature = "debug"))]
109 pub struct RegisterBreakpointState<'a>(pub(crate) PhantomData<&'a ()>);
110
111 impl<'a> RegisterBreakpointState<'a> {
112 #[cfg(feature = "debug")]
update(&self, code: &mut StoreCode, module: &Module) -> Result<()>113 fn update(&self, code: &mut StoreCode, module: &Module) -> Result<()> {
114 self.0.patch_new_module(code, module)
115 }
116 #[cfg(not(feature = "debug"))]
update(&self, _code: &mut StoreCode, _module: &Module) -> Result<()>117 fn update(&self, _code: &mut StoreCode, _module: &Module) -> Result<()> {
118 Ok(())
119 }
120 }
121
122 impl ModuleRegistry {
123 /// Get a previously-registered module by id.
module_by_id(&self, id: RegisteredModuleId) -> Option<&Module>124 pub fn module_by_id(&self, id: RegisteredModuleId) -> Option<&Module> {
125 self.modules.get(id)
126 }
127
128 /// Get a module by CompiledModuleId, if present.
module_by_compiled_id(&self, id: CompiledModuleId) -> Option<&Module>129 pub fn module_by_compiled_id(&self, id: CompiledModuleId) -> Option<&Module> {
130 self.modules.get(RegisteredModuleId(id))
131 }
132
133 /// Fetches a registered StoreCode and module and an offset within
134 /// it given a program counter value.
module_and_code_by_pc<'a>(&'a self, pc: usize) -> Option<(ModuleWithCode<'a>, usize)>135 pub fn module_and_code_by_pc<'a>(&'a self, pc: usize) -> Option<(ModuleWithCode<'a>, usize)> {
136 let (_, code) = self
137 .loaded_code
138 .range(..=StoreCodePC::from_raw(pc))
139 .next_back()?;
140 let offset = StoreCodePC::offset_of(code.code.text_range(), pc)?;
141 let (_, module_id) = code.modules.range(..=offset).next_back()?;
142 let module = self.modules.get(*module_id)?;
143 Some((ModuleWithCode::from_raw(module, &code.code), offset))
144 }
145
146 /// Fetches the `StoreCode` for a given `EngineCode`.
store_code(&self, engine_code: &EngineCode) -> Option<&StoreCode>147 pub fn store_code(&self, engine_code: &EngineCode) -> Option<&StoreCode> {
148 let store_code_pc = self.store_code_base(engine_code)?;
149 let (_, code) = self.loaded_code.range(store_code_pc..).next()?;
150 Some(&code.code)
151 }
152
153 /// Fetches the base `StoreCodePC` for a given `EngineCode`.
store_code_base(&self, engine_code: &EngineCode) -> Option<StoreCodePC>154 pub fn store_code_base(&self, engine_code: &EngineCode) -> Option<StoreCodePC> {
155 self.store_code.get(engine_code.text_range().start).cloned()
156 }
157
158 /// Fetches the base `StoreCodePC` for a given `EngineCode` with
159 /// `Module`, registering the module if not already registered.
store_code_base_or_register( &mut self, module: &Module, breakpoint_state: RegisterBreakpointState, ) -> Result<StoreCodePC>160 pub fn store_code_base_or_register(
161 &mut self,
162 module: &Module,
163 breakpoint_state: RegisterBreakpointState,
164 ) -> Result<StoreCodePC> {
165 let key = module.engine_code().text_range().start;
166 if !self.store_code.contains_key(key) {
167 let engine = module.engine().clone();
168 self.register_module(module, &engine, breakpoint_state)?;
169 }
170 Ok(*self.store_code.get(key).unwrap())
171 }
172
173 /// Fetches a mutable `StoreCode` for a given base `StoreCodePC`.
store_code_mut(&mut self, store_code_base: StoreCodePC) -> Option<&mut StoreCode>174 pub fn store_code_mut(&mut self, store_code_base: StoreCodePC) -> Option<&mut StoreCode> {
175 let (_, code) = self.loaded_code.range_mut(store_code_base..).next()?;
176 assert_eq!(code.code.text_range().start, store_code_base);
177 Some(&mut code.code)
178 }
179
180 /// Gets an iterator over all modules in the registry.
181 #[cfg(any(feature = "coredump", feature = "debug"))]
all_modules(&self) -> impl Iterator<Item = &'_ Module> + '_182 pub fn all_modules(&self) -> impl Iterator<Item = &'_ Module> + '_ {
183 self.modules.values()
184 }
185
186 /// Registers a new module with the registry.
register_module( &mut self, module: &Module, engine: &Engine, breakpoint_state: RegisterBreakpointState, ) -> Result<RegisteredModuleId>187 pub fn register_module(
188 &mut self,
189 module: &Module,
190 engine: &Engine,
191 breakpoint_state: RegisterBreakpointState,
192 ) -> Result<RegisteredModuleId> {
193 self.register(
194 module.id(),
195 module.engine_code(),
196 Some(module),
197 engine,
198 breakpoint_state,
199 )
200 .map(|id| id.unwrap())
201 }
202
203 #[cfg(feature = "component-model")]
register_component( &mut self, component: &Component, engine: &Engine, breakpoint_state: RegisterBreakpointState, ) -> Result<()>204 pub fn register_component(
205 &mut self,
206 component: &Component,
207 engine: &Engine,
208 breakpoint_state: RegisterBreakpointState,
209 ) -> Result<()> {
210 self.register(
211 component.id(),
212 component.engine_code(),
213 None,
214 engine,
215 breakpoint_state,
216 )?;
217 Ok(())
218 }
219
220 /// Registers a new module with the registry.
register( &mut self, compiled_id: CompiledModuleId, code: &Arc<EngineCode>, module: Option<&Module>, engine: &Engine, breakpoint_state: RegisterBreakpointState, ) -> Result<Option<RegisteredModuleId>>221 fn register(
222 &mut self,
223 compiled_id: CompiledModuleId,
224 code: &Arc<EngineCode>,
225 module: Option<&Module>,
226 engine: &Engine,
227 breakpoint_state: RegisterBreakpointState,
228 ) -> Result<Option<RegisteredModuleId>> {
229 // Register the module, if any.
230 let id = if let Some(module) = module {
231 let id = RegisteredModuleId(compiled_id);
232 self.modules.entry(id).or_insert_with(|| module.clone())?;
233 Some(id)
234 } else {
235 None
236 };
237
238 // Create a StoreCode if one does not already exist.
239 let store_code_pc = match self.store_code.entry(code.text_range().start) {
240 Entry::Vacant(v) => {
241 let store_code = StoreCode::new(engine, code)?;
242 let store_code_pc = store_code.text_range().start;
243 assert_no_overlap(&self.loaded_code, store_code.text_range());
244 self.loaded_code.insert(
245 store_code_pc,
246 LoadedCode {
247 code: store_code,
248 modules: TryBTreeMap::default(),
249 },
250 )?;
251 *v.insert(store_code_pc)?
252 }
253 Entry::Occupied(o) => *o.get(),
254 };
255
256 // Add this module to the LoadedCode if not present.
257 if let (Some(module), Some(id)) = (module, id) {
258 if let Some((_, range)) = module.compiled_module().finished_function_ranges().next() {
259 let loaded_code = self
260 .loaded_code
261 .get_mut(store_code_pc)
262 .expect("loaded_code must have entry for StoreCodePC");
263 loaded_code.modules.insert(range.start, id)?;
264 breakpoint_state.update(&mut loaded_code.code, module)?;
265 }
266 }
267
268 Ok(id)
269 }
270
271 /// Fetches frame information about a program counter in a backtrace.
272 ///
273 /// Returns an object if this `pc` is known to some previously registered
274 /// module, or returns `None` if no information can be found. The first
275 /// boolean returned indicates whether the original module has unparsed
276 /// debug information due to the compiler's configuration. The second
277 /// boolean indicates whether the engine used to compile this module is
278 /// using environment variables to control debuginfo parsing.
lookup_frame_info<'a>( &'a self, pc: usize, ) -> Option<(FrameInfo, ModuleWithCode<'a>)>279 pub(crate) fn lookup_frame_info<'a>(
280 &'a self,
281 pc: usize,
282 ) -> Option<(FrameInfo, ModuleWithCode<'a>)> {
283 let (_, code) = self
284 .loaded_code
285 .range(..=StoreCodePC::from_raw(pc))
286 .next_back()?;
287 let text_offset = StoreCodePC::offset_of(code.code.text_range(), pc)?;
288 let (_, module_id) = code.modules.range(..=text_offset).next_back()?;
289 let module = self
290 .modules
291 .get(*module_id)
292 .expect("referenced module ID not found");
293 let info = FrameInfo::new(module.clone(), text_offset)?;
294 let module_with_code = ModuleWithCode::from_raw(module, &code.code);
295 Some((info, module_with_code))
296 }
297
wasm_to_array_trampoline( &self, sig: VMSharedTypeIndex, ) -> Option<NonNull<VMWasmCallFunction>>298 pub fn wasm_to_array_trampoline(
299 &self,
300 sig: VMSharedTypeIndex,
301 ) -> Option<NonNull<VMWasmCallFunction>> {
302 // TODO: We are doing a linear search over each module. This is fine for
303 // now because we typically have very few modules per store (almost
304 // always one, in fact). If this linear search ever becomes a
305 // bottleneck, we could avoid it by incrementally and lazily building a
306 // `VMSharedSignatureIndex` to `SignatureIndex` map.
307 //
308 // See also the comment in `ModuleInner::wasm_to_native_trampoline`.
309 for module in self.modules.values() {
310 if let Some(trampoline) = module.wasm_to_array_trampoline(sig) {
311 return Some(trampoline);
312 }
313 }
314 None
315 }
316 }
317
318 // This is the global code registry that stores information for all loaded code
319 // objects that are currently in use by any `Store` in the current process.
320 //
321 // The purpose of this map is to be called from signal handlers to determine
322 // whether a program counter is a wasm trap or not. Specifically macOS has
323 // no contextual information about the thread available, hence the necessity
324 // for global state rather than using thread local state.
325 //
326 // This is similar to `ModuleRegistry` except that it has less information and
327 // supports removal. Any time anything is registered with a `ModuleRegistry`
328 // it is also automatically registered with the singleton global module
329 // registry. When a `ModuleRegistry` is destroyed then all of its entries
330 // are removed from the global registry.
global_code() -> &'static RwLock<GlobalRegistry>331 fn global_code() -> &'static RwLock<GlobalRegistry> {
332 static GLOBAL_CODE: OnceLock<RwLock<GlobalRegistry>> = OnceLock::new();
333 GLOBAL_CODE.get_or_init(Default::default)
334 }
335
336 type GlobalRegistry = TryBTreeMap<usize, (usize, Arc<CodeMemory>)>;
337
338 /// Find which registered region of code contains the given program counter, and
339 /// what offset that PC is within that module's code.
lookup_code(pc: usize) -> Option<(Arc<CodeMemory>, usize)>340 pub fn lookup_code(pc: usize) -> Option<(Arc<CodeMemory>, usize)> {
341 let all_modules = global_code().read();
342 let (_end, (start, module)) = all_modules.range(pc..).next()?;
343 let text_offset = pc.checked_sub(*start)?;
344 Some((module.clone(), text_offset))
345 }
346
347 /// Registers a new region of code.
348 ///
349 /// Must not have been previously registered and must be `unregister`'d to
350 /// prevent leaking memory.
351 ///
352 /// This is required to enable traps to work correctly since the signal handler
353 /// will lookup in the `GLOBAL_CODE` list to determine which a particular pc
354 /// is a trap or not.
register_code(image: &Arc<CodeMemory>, address: Range<usize>) -> Result<(), OutOfMemory>355 pub fn register_code(image: &Arc<CodeMemory>, address: Range<usize>) -> Result<(), OutOfMemory> {
356 if address.is_empty() {
357 return Ok(());
358 }
359 let start = address.start;
360 let end = address.end - 1;
361 let prev = global_code().write().insert(end, (start, image.clone()))?;
362 assert!(prev.is_none());
363 Ok(())
364 }
365
366 /// Unregisters a code mmap from the global map.
367 ///
368 /// Must have been previously registered with `register`.
unregister_code(address: Range<usize>)369 pub fn unregister_code(address: Range<usize>) {
370 if address.is_empty() {
371 return;
372 }
373 let end = address.end - 1;
374 let code = global_code().write().remove(end);
375 assert!(code.is_some());
376 }
377
378 #[test]
379 #[cfg_attr(miri, ignore)]
test_frame_info() -> Result<(), crate::Error>380 fn test_frame_info() -> Result<(), crate::Error> {
381 use crate::*;
382
383 let mut store = Store::<()>::default();
384 let module = Module::new(
385 store.engine(),
386 r#"
387 (module
388 (func (export "add") (param $x i32) (param $y i32) (result i32) (i32.add (local.get $x) (local.get $y)))
389 (func (export "sub") (param $x i32) (param $y i32) (result i32) (i32.sub (local.get $x) (local.get $y)))
390 (func (export "mul") (param $x i32) (param $y i32) (result i32) (i32.mul (local.get $x) (local.get $y)))
391 (func (export "div_s") (param $x i32) (param $y i32) (result i32) (i32.div_s (local.get $x) (local.get $y)))
392 (func (export "div_u") (param $x i32) (param $y i32) (result i32) (i32.div_u (local.get $x) (local.get $y)))
393 (func (export "rem_s") (param $x i32) (param $y i32) (result i32) (i32.rem_s (local.get $x) (local.get $y)))
394 (func (export "rem_u") (param $x i32) (param $y i32) (result i32) (i32.rem_u (local.get $x) (local.get $y)))
395 )
396 "#,
397 )?;
398 // Create an instance to ensure the frame information is registered.
399 Instance::new(&mut store, &module, &[])?;
400
401 // Look for frame info for each function. Assume that StoreCode
402 // does not actually clone in the default configuration.
403 for (i, range) in module.compiled_module().finished_function_ranges() {
404 let base = module.engine_code().text_range().start.raw();
405 let start = base + range.start;
406 let end = base + range.end;
407 for pc in start..end {
408 let (frame, _) = store
409 .as_context()
410 .0
411 .modules()
412 .lookup_frame_info(pc)
413 .unwrap();
414 assert!(
415 frame.func_index() == i.as_u32(),
416 "lookup of {:#x} returned {}, expected {}",
417 pc,
418 frame.func_index(),
419 i.as_u32()
420 );
421 }
422 }
423 Ok(())
424 }
425