xref: /wasmtime-44.0.1/crates/wizer/src/lib.rs (revision e6937050)
1 //! Wizer: the WebAssembly pre-initializer!
2 //!
3 //! See the [`Wizer`] struct for details.
4 
5 #![deny(missing_docs)]
6 #![cfg_attr(docsrs, feature(doc_cfg))]
7 
8 mod info;
9 mod instrument;
10 mod parse;
11 mod rewrite;
12 mod snapshot;
13 
14 #[cfg(feature = "wasmtime")]
15 mod wasmtime;
16 #[cfg(feature = "wasmtime")]
17 pub use wasmtime::*;
18 #[cfg(feature = "component-model")]
19 mod component;
20 #[cfg(feature = "component-model")]
21 pub use component::*;
22 #[cfg(not(feature = "rayon"))]
23 mod rayoff;
24 
25 pub use crate::info::ModuleContext;
26 pub use crate::snapshot::SnapshotVal;
27 use ::wasmtime::{Result, bail, error::Context as _};
28 use std::collections::{HashMap, HashSet};
29 pub use wasmparser::ValType;
30 
31 const DEFAULT_KEEP_INIT_FUNC: bool = false;
32 
33 /// Wizer: the WebAssembly pre-initializer!
34 ///
35 /// Don't wait for your Wasm module to initialize itself, pre-initialize it!
36 /// Wizer instantiates your WebAssembly module, executes its initialization
37 /// function, and then serializes the instance's initialized state out into a
38 /// new WebAssembly module. Now you can use this new, pre-initialized
39 /// WebAssembly module to hit the ground running, without making your users wait
40 /// for that first-time set up code to complete.
41 ///
42 /// ## Caveats
43 ///
44 /// * The initialization function may not call any imported functions. Doing so
45 ///   will trigger a trap and `wizer` will exit.
46 ///
47 /// * The Wasm module may not import globals, tables, or memories.
48 ///
49 /// * Reference types are not supported yet. This is tricky because it would
50 ///   allow the Wasm module to mutate tables, and we would need to be able to
51 ///   snapshot the new table state, but funcrefs and externrefs don't have
52 ///   identity and aren't comparable in the Wasm spec, which makes snapshotting
53 ///   difficult.
54 #[derive(Clone, Debug)]
55 #[cfg_attr(feature = "clap", derive(clap::Parser))]
56 pub struct Wizer {
57     /// The Wasm export name of the function that should be executed to
58     /// initialize the Wasm module.
59     #[cfg_attr(
60         feature = "clap",
61         arg(short = 'f', long, default_value = "wizer-initialize")
62     )]
63     init_func: String,
64 
65     /// Any function renamings to perform.
66     ///
67     /// A renaming specification `dst=src` renames a function export `src` to
68     /// `dst`, overwriting any previous `dst` export.
69     ///
70     /// Multiple renamings can be specified. It is an error to specify more than
71     /// one source to rename to a destination name, or to specify more than one
72     /// renaming destination for one source.
73     ///
74     /// This option can be used, for example, to replace a `_start` entry point
75     /// in an initialized module with an alternate entry point.
76     ///
77     /// When module linking is enabled, these renames are only applied to the
78     /// outermost module.
79     #[cfg_attr(
80         feature = "clap",
81         arg(
82             short = 'r',
83             long = "rename-func",
84             alias = "func-rename",
85             value_name = "dst=src",
86             value_parser = parse_rename,
87         ),
88     )]
89     func_renames: Vec<(String, String)>,
90 
91     /// After initialization, should the Wasm module still export the
92     /// initialization function?
93     ///
94     /// This is `false` by default, meaning that the initialization function is
95     /// no longer exported from the Wasm module.
96     #[cfg_attr(
97         feature = "clap",
98         arg(long, require_equals = true, value_name = "true|false")
99     )]
100     keep_init_func: Option<Option<bool>>,
101 }
102 
103 #[cfg(feature = "clap")]
parse_rename(s: &str) -> Result<(String, String)>104 fn parse_rename(s: &str) -> Result<(String, String)> {
105     let parts: Vec<&str> = s.splitn(2, '=').collect();
106     if parts.len() != 2 {
107         bail!("must contain exactly one equals character ('=')");
108     }
109     Ok((parts[0].into(), parts[1].into()))
110 }
111 
112 #[derive(Default)]
113 struct FuncRenames {
114     /// For a given export name that we encounter in the original module, a map
115     /// to a new name, if any, to emit in the output module.
116     rename_src_to_dst: HashMap<String, String>,
117     /// A set of export names that we ignore in the original module (because
118     /// they are overwritten by renamings).
119     rename_dsts: HashSet<String>,
120 }
121 
122 impl FuncRenames {
parse(renames: &[(String, String)]) -> Result<FuncRenames>123     fn parse(renames: &[(String, String)]) -> Result<FuncRenames> {
124         let mut ret = FuncRenames {
125             rename_src_to_dst: HashMap::new(),
126             rename_dsts: HashSet::new(),
127         };
128         if renames.is_empty() {
129             return Ok(ret);
130         }
131 
132         for (dst, src) in renames {
133             if ret.rename_dsts.contains(dst) {
134                 bail!("Duplicated function rename dst {dst}");
135             }
136             if ret.rename_src_to_dst.contains_key(src) {
137                 bail!("Duplicated function rename src {src}");
138             }
139             ret.rename_dsts.insert(dst.clone());
140             ret.rename_src_to_dst.insert(src.clone(), dst.clone());
141         }
142 
143         Ok(ret)
144     }
145 }
146 
147 impl Wizer {
148     /// Construct a new `Wizer` builder.
new() -> Self149     pub fn new() -> Self {
150         Wizer {
151             init_func: "wizer-initialize".to_string(),
152             func_renames: vec![],
153             keep_init_func: None,
154         }
155     }
156 
157     /// The export name of the initializer function.
158     ///
159     /// Defaults to `"wizer-initialize"`.
init_func(&mut self, init_func: impl Into<String>) -> &mut Self160     pub fn init_func(&mut self, init_func: impl Into<String>) -> &mut Self {
161         self.init_func = init_func.into();
162         self
163     }
164 
165     /// Returns the initialization function that will be run for wizer.
get_init_func(&self) -> &str166     pub fn get_init_func(&self) -> &str {
167         &self.init_func
168     }
169 
170     /// Add a function rename to perform.
func_rename(&mut self, new_name: &str, old_name: &str) -> &mut Self171     pub fn func_rename(&mut self, new_name: &str, old_name: &str) -> &mut Self {
172         self.func_renames
173             .push((new_name.to_string(), old_name.to_string()));
174         self
175     }
176 
177     /// After initialization, should the Wasm module still export the
178     /// initialization function?
179     ///
180     /// This is `false` by default, meaning that the initialization function is
181     /// no longer exported from the Wasm module.
keep_init_func(&mut self, keep: bool) -> &mut Self182     pub fn keep_init_func(&mut self, keep: bool) -> &mut Self {
183         self.keep_init_func = Some(Some(keep));
184         self
185     }
186 
187     /// First half of [`Self::run`] which instruments the provided `wasm` and
188     /// produces a new wasm module which should be run by a runtime.
189     ///
190     /// After the returned wasm is executed the context returned here and the
191     /// state of the instance should be passed to [`Self::snapshot`].
instrument<'a>(&self, wasm: &'a [u8]) -> Result<(ModuleContext<'a>, Vec<u8>)>192     pub fn instrument<'a>(&self, wasm: &'a [u8]) -> Result<(ModuleContext<'a>, Vec<u8>)> {
193         // Make sure we're given valid Wasm from the get go.
194         self.wasm_validate(&wasm)?;
195 
196         let mut cx = parse::parse(wasm)?;
197 
198         // When wizening core modules directly some imports aren't supported,
199         // so check for those here.
200         for import in cx.imports() {
201             match import.ty {
202                 wasmparser::TypeRef::Global(_) => {
203                     bail!("imported globals are not supported")
204                 }
205                 wasmparser::TypeRef::Table(_) => {
206                     bail!("imported tables are not supported")
207                 }
208                 wasmparser::TypeRef::Memory(_) => {
209                     bail!("imported memories are not supported")
210                 }
211                 wasmparser::TypeRef::Func(_) => {}
212                 wasmparser::TypeRef::FuncExact(_) => {}
213                 wasmparser::TypeRef::Tag(_) => {}
214             }
215         }
216 
217         let instrumented_wasm = instrument::instrument(&mut cx);
218         self.debug_assert_valid_wasm(&instrumented_wasm);
219 
220         Ok((cx, instrumented_wasm))
221     }
222 
223     /// Second half of [`Self::run`] which takes the [`ModuleContext`] returned
224     /// by [`Self::instrument`] and the state of the `instance` after it has
225     /// possibly executed its initialization function.
226     ///
227     /// This returns a new WebAssembly binary which has all state
228     /// pre-initialized.
snapshot( &self, mut cx: ModuleContext<'_>, instance: &mut impl InstanceState, ) -> Result<Vec<u8>>229     pub async fn snapshot(
230         &self,
231         mut cx: ModuleContext<'_>,
232         instance: &mut impl InstanceState,
233     ) -> Result<Vec<u8>> {
234         // Parse rename spec.
235         let renames = FuncRenames::parse(&self.func_renames)?;
236 
237         let snapshot = snapshot::snapshot(&cx, instance).await;
238         let rewritten_wasm = self.rewrite(&mut cx, &snapshot, &renames, true);
239 
240         self.debug_assert_valid_wasm(&rewritten_wasm);
241 
242         Ok(rewritten_wasm)
243     }
244 
debug_assert_valid_wasm(&self, wasm: &[u8])245     fn debug_assert_valid_wasm(&self, wasm: &[u8]) {
246         if !cfg!(debug_assertions) {
247             return;
248         }
249         if let Err(error) = self.wasm_validate(&wasm) {
250             #[cfg(feature = "wasmprinter")]
251             let wat = wasmprinter::print_bytes(&wasm)
252                 .unwrap_or_else(|e| format!("Disassembling to WAT failed: {}", e));
253             #[cfg(not(feature = "wasmprinter"))]
254             let wat = "`wasmprinter` cargo feature is not enabled".to_string();
255             panic!("instrumented Wasm is not valid: {error:?}\n\nWAT:\n{wat}");
256         }
257     }
258 
wasm_validate(&self, wasm: &[u8]) -> Result<()>259     fn wasm_validate(&self, wasm: &[u8]) -> Result<()> {
260         log::debug!("Validating input Wasm");
261 
262         wasmparser::Validator::new_with_features(wasmparser::WasmFeatures::all())
263             .validate_all(wasm)
264             .context("wasm validation failed")?;
265 
266         for payload in wasmparser::Parser::new(0).parse_all(wasm) {
267             match payload? {
268                 wasmparser::Payload::CodeSectionEntry(code) => {
269                     let mut ops = code.get_operators_reader()?;
270                     while !ops.eof() {
271                         match ops.read()? {
272                             // Table mutations aren't allowed as wizer has no
273                             // way to record a snapshot of a table at this time.
274                             // The only table mutations allowed are those from
275                             // active element segments which can be
276                             // deterministically replayed, so disallow all other
277                             // forms of mutating a table.
278                             //
279                             // Ideally Wizer could take a snapshot of a table
280                             // post-instantiation and then ensure that after
281                             // running initialization the table didn't get
282                             // mutated, allowing these instructions, but that's
283                             // also not possible at this time.
284                             wasmparser::Operator::TableCopy { .. } => {
285                                 bail!("unsupported `table.copy` instruction")
286                             }
287                             wasmparser::Operator::TableInit { .. } => {
288                                 bail!("unsupported `table.init` instruction")
289                             }
290                             wasmparser::Operator::TableSet { .. } => {
291                                 bail!("unsupported `table.set` instruction")
292                             }
293                             wasmparser::Operator::TableGrow { .. } => {
294                                 bail!("unsupported `table.grow` instruction")
295                             }
296                             wasmparser::Operator::TableFill { .. } => {
297                                 bail!("unsupported `table.fill` instruction")
298                             }
299 
300                             // Wizer has no way of dynamically determining which
301                             // element or data segments were dropped during
302                             // execution so instead disallow these instructions
303                             // entirely. Like above it'd be nice to allow them
304                             // but just forbid their execution during the
305                             // initialization function, but that can't be done
306                             // easily at this time.
307                             wasmparser::Operator::ElemDrop { .. } => {
308                                 bail!("unsupported `elem.drop` instruction")
309                             }
310                             wasmparser::Operator::DataDrop { .. } => {
311                                 bail!("unsupported `data.drop` instruction")
312                             }
313 
314                             // Wizer can't snapshot GC references, so disallow
315                             // any mutation of GC references. This prevents, for
316                             // example, reading something from a table and then
317                             // mutating it.
318                             wasmparser::Operator::StructSet { .. } => {
319                                 bail!("unsupported `struct.set` instruction")
320                             }
321                             wasmparser::Operator::ArraySet { .. } => {
322                                 bail!("unsupported `array.set` instruction")
323                             }
324                             wasmparser::Operator::ArrayFill { .. } => {
325                                 bail!("unsupported `array.fill` instruction")
326                             }
327                             wasmparser::Operator::ArrayCopy { .. } => {
328                                 bail!("unsupported `array.copy` instruction")
329                             }
330                             wasmparser::Operator::ArrayInitData { .. } => {
331                                 bail!("unsupported `array.init_data` instruction")
332                             }
333                             wasmparser::Operator::ArrayInitElem { .. } => {
334                                 bail!("unsupported `array.init_elem` instruction")
335                             }
336 
337                             _ => continue,
338                         }
339                     }
340                 }
341                 wasmparser::Payload::GlobalSection(globals) => {
342                     for g in globals {
343                         let g = g?.ty;
344                         if !g.mutable {
345                             continue;
346                         }
347                         match g.content_type {
348                             wasmparser::ValType::I32
349                             | wasmparser::ValType::I64
350                             | wasmparser::ValType::F32
351                             | wasmparser::ValType::F64
352                             | wasmparser::ValType::V128 => {}
353                             wasmparser::ValType::Ref(_) => {
354                                 bail!("unsupported mutable global containing a reference type")
355                             }
356                         }
357                     }
358                 }
359                 _ => {}
360             }
361         }
362 
363         Ok(())
364     }
365 
get_keep_init_func(&self) -> bool366     fn get_keep_init_func(&self) -> bool {
367         match self.keep_init_func {
368             Some(keep) => keep.unwrap_or(true),
369             None => DEFAULT_KEEP_INIT_FUNC,
370         }
371     }
372 }
373 
374 /// Abstract ability to load state from a WebAssembly instance after it's been
375 /// instantiated and some exports have run.
376 pub trait InstanceState {
377     /// Loads the global specified by `name`, returning a `SnapshotVal`.
378     ///
379     /// # Panics
380     ///
381     /// This function panics if `name` isn't an exported global or if the type
382     /// of the global doesn't fit in `SnapshotVal`.
global_get( &mut self, name: &str, type_hint: ValType, ) -> impl Future<Output = SnapshotVal> + Send383     fn global_get(
384         &mut self,
385         name: &str,
386         type_hint: ValType,
387     ) -> impl Future<Output = SnapshotVal> + Send;
388 
389     /// Loads the contents of the memory specified by `name`, returning the
390     /// entier contents as a `Vec<u8>`.
391     ///
392     /// # Panics
393     ///
394     /// This function panics if `name` isn't an exported memory.
memory_contents( &mut self, name: &str, contents: impl FnOnce(&[u8]) + Send, ) -> impl Future<Output = ()> + Send395     fn memory_contents(
396         &mut self,
397         name: &str,
398         contents: impl FnOnce(&[u8]) + Send,
399     ) -> impl Future<Output = ()> + Send;
400 }
401