1 //! Definitions of runtime structures and metadata which are serialized into ELF
2 //! with `postcard` as part of a module's compilation process.
3 
4 use crate::WasmChecksum;
5 use crate::error::{Result, bail};
6 use crate::prelude::*;
7 use crate::{
8     CompiledModuleInfo, DebugInfoData, FunctionName, MemoryInitialization, Metadata,
9     ModuleTranslation, Tunables, obj,
10 };
11 use object::SectionKind;
12 use object::write::{Object, SectionId, StandardSegment, WritableBuffer};
13 use std::ops::Range;
14 
15 /// Helper structure to create an ELF file as a compilation artifact.
16 ///
17 /// This structure exposes the process which Wasmtime will encode a core wasm
18 /// module into an ELF file, notably managing data sections and all that good
19 /// business going into the final file.
20 pub struct ObjectBuilder<'a> {
21     /// The `object`-crate-defined ELF file write we're using.
22     obj: Object<'a>,
23 
24     /// General compilation configuration.
25     tunables: &'a Tunables,
26 
27     /// The section identifier for "rodata" which is where wasm data segments
28     /// will go.
29     data: SectionId,
30 
31     /// The section identifier for function name information, or otherwise where
32     /// the `name` custom section of wasm is copied into.
33     ///
34     /// This is optional and lazily created on demand.
35     names: Option<SectionId>,
36 
37     /// The section identifier for dwarf information copied from the original
38     /// wasm files.
39     ///
40     /// This is optional and lazily created on demand.
41     dwarf: Option<SectionId>,
42 }
43 
44 impl<'a> ObjectBuilder<'a> {
45     /// Creates a new builder for the `obj` specified.
new(mut obj: Object<'a>, tunables: &'a Tunables) -> ObjectBuilder<'a>46     pub fn new(mut obj: Object<'a>, tunables: &'a Tunables) -> ObjectBuilder<'a> {
47         let data = obj.add_section(
48             obj.segment_name(StandardSegment::Data).to_vec(),
49             obj::ELF_WASM_DATA.as_bytes().to_vec(),
50             SectionKind::ReadOnlyData,
51         );
52         ObjectBuilder {
53             obj,
54             tunables,
55             data,
56             names: None,
57             dwarf: None,
58         }
59     }
60 
61     /// Insert the wasm raw wasm-based debuginfo into the output.
62     /// Note that this is distinct from the native debuginfo
63     /// possibly generated by the native compiler, hence these sections
64     /// getting wasm-specific names.
push_debuginfo( &mut self, dwarf: &mut Vec<(u8, Range<u64>)>, debuginfo: &DebugInfoData<'_>, )65     pub fn push_debuginfo(
66         &mut self,
67         dwarf: &mut Vec<(u8, Range<u64>)>,
68         debuginfo: &DebugInfoData<'_>,
69     ) {
70         self.push_debug(dwarf, &debuginfo.dwarf.debug_abbrev);
71         self.push_debug(dwarf, &debuginfo.dwarf.debug_addr);
72         self.push_debug(dwarf, &debuginfo.dwarf.debug_aranges);
73         self.push_debug(dwarf, &debuginfo.dwarf.debug_info);
74         self.push_debug(dwarf, &debuginfo.dwarf.debug_line);
75         self.push_debug(dwarf, &debuginfo.dwarf.debug_line_str);
76         self.push_debug(dwarf, &debuginfo.dwarf.debug_str);
77         self.push_debug(dwarf, &debuginfo.dwarf.debug_str_offsets);
78         self.push_debug(dwarf, &debuginfo.debug_ranges);
79         self.push_debug(dwarf, &debuginfo.debug_rnglists);
80         self.push_debug(dwarf, &debuginfo.debug_cu_index);
81 
82         // Sort this for binary-search-lookup later in `symbolize_context`.
83         dwarf.sort_by_key(|(id, _)| *id);
84     }
85 
86     /// Completes compilation of the `translation` specified, inserting
87     /// everything necessary into the `Object` being built.
88     ///
89     /// This function will consume the final results of compiling a wasm module
90     /// and finish the ELF image in-progress as part of `self.obj` by appending
91     /// any compiler-agnostic sections.
92     ///
93     /// The auxiliary `CompiledModuleInfo` structure returned here has also been
94     /// serialized into the object returned, but if the caller will quickly
95     /// turn-around and invoke `CompiledModule::from_artifacts` after this then
96     /// the information can be passed to that method to avoid extra
97     /// deserialization. This is done to avoid a serialize-then-deserialize for
98     /// API calls like `Module::new` where the compiled module is immediately
99     /// going to be used.
100     ///
101     /// The various arguments here are:
102     ///
103     /// * `translation` - the core wasm translation that's being completed.
104     ///
105     /// * `funcs` - compilation metadata about functions within the translation
106     ///   as well as where the functions are located in the text section and any
107     ///   associated trampolines.
108     ///
109     /// * `wasm_to_array_trampolines` - list of all trampolines necessary for
110     ///   Wasm callers calling array callees (e.g. `Func::wrap`). One for each
111     ///   function signature in the module. Must be sorted by `SignatureIndex`.
112     ///
113     /// Returns the `CompiledModuleInfo` corresponding to this core Wasm module
114     /// as a result of this append operation. This is then serialized into the
115     /// final artifact by the caller.
append(&mut self, translation: ModuleTranslation<'_>) -> Result<CompiledModuleInfo>116     pub fn append(&mut self, translation: ModuleTranslation<'_>) -> Result<CompiledModuleInfo> {
117         let ModuleTranslation {
118             mut module,
119             debuginfo,
120             has_unparsed_debuginfo,
121             data,
122             data_align,
123             passive_data,
124             wasm,
125             ..
126         } = translation;
127 
128         // Place all data from the wasm module into a section which will the
129         // source of the data later at runtime. This additionally keeps track of
130         // the offset of
131         let mut total_data_len = 0;
132         let data_offset = self
133             .obj
134             .append_section_data(self.data, &[], data_align.unwrap_or(1));
135         for (i, data) in data.iter().enumerate() {
136             // The first data segment has its alignment specified as the alignment
137             // for the entire section, but everything afterwards is adjacent so it
138             // has alignment of 1.
139             let align = if i == 0 { data_align.unwrap_or(1) } else { 1 };
140             self.obj.append_section_data(self.data, data, align);
141             total_data_len += data.len();
142         }
143         for data in passive_data.iter() {
144             self.obj.append_section_data(self.data, data, 1);
145         }
146 
147         // If any names are present in the module then the `ELF_NAME_DATA` section
148         // is create and appended.
149         let mut func_names = Vec::new();
150         if debuginfo.name_section.func_names.len() > 0 {
151             let name_id = *self.names.get_or_insert_with(|| {
152                 self.obj.add_section(
153                     self.obj.segment_name(StandardSegment::Data).to_vec(),
154                     obj::ELF_NAME_DATA.as_bytes().to_vec(),
155                     SectionKind::ReadOnlyData,
156                 )
157             });
158             let mut sorted_names = debuginfo.name_section.func_names.iter().collect::<Vec<_>>();
159             sorted_names.sort_by_key(|(idx, _name)| *idx);
160             for (idx, name) in sorted_names {
161                 let offset = self.obj.append_section_data(name_id, name.as_bytes(), 1);
162                 let offset = match u32::try_from(offset) {
163                     Ok(offset) => offset,
164                     Err(_) => bail!("name section too large (> 4gb)"),
165                 };
166                 let len = u32::try_from(name.len()).unwrap();
167                 func_names.push(FunctionName {
168                     idx: *idx,
169                     offset,
170                     len,
171                 });
172             }
173         }
174 
175         // Data offsets in `MemoryInitialization` are offsets within the
176         // `translation.data` list concatenated which is now present in the data
177         // segment that's appended to the object. Increase the offsets by
178         // `self.data_size` to account for any previously added module.
179         let data_offset = u32::try_from(data_offset).unwrap();
180         match &mut module.memory_initialization {
181             MemoryInitialization::Segmented(list) => {
182                 for segment in list {
183                     segment.data.start = segment.data.start.checked_add(data_offset).unwrap();
184                     segment.data.end = segment.data.end.checked_add(data_offset).unwrap();
185                 }
186             }
187             MemoryInitialization::Static { map } => {
188                 for (_, segment) in map {
189                     if let Some(segment) = segment {
190                         segment.data.start = segment.data.start.checked_add(data_offset).unwrap();
191                         segment.data.end = segment.data.end.checked_add(data_offset).unwrap();
192                     }
193                 }
194             }
195         }
196 
197         // Data offsets for passive data are relative to the start of
198         // `translation.passive_data` which was appended to the data segment
199         // of this object, after active data in `translation.data`. Update the
200         // offsets to account prior modules added in addition to active data.
201         let data_offset = data_offset + u32::try_from(total_data_len).unwrap();
202         for (_, range) in module.passive_data_map.iter_mut() {
203             range.start = range.start.checked_add(data_offset).unwrap();
204             range.end = range.end.checked_add(data_offset).unwrap();
205         }
206 
207         // Insert the wasm raw wasm-based debuginfo into the output, if
208         // requested. Note that this is distinct from the native debuginfo
209         // possibly generated by the native compiler, hence these sections
210         // getting wasm-specific names.
211         let mut dwarf = Vec::new();
212         if self.tunables.parse_wasm_debuginfo {
213             self.push_debuginfo(&mut dwarf, &debuginfo);
214         }
215 
216         Ok(CompiledModuleInfo {
217             module,
218             func_names,
219             meta: Metadata {
220                 has_unparsed_debuginfo,
221                 code_section_offset: debuginfo.wasm_file.code_section_offset,
222                 has_wasm_debuginfo: self.tunables.parse_wasm_debuginfo,
223                 dwarf,
224             },
225             checksum: WasmChecksum::from_binary(wasm, self.tunables.recording),
226         })
227     }
228 
push_debug<'b, T>(&mut self, dwarf: &mut Vec<(u8, Range<u64>)>, section: &T) where T: gimli::Section<gimli::EndianSlice<'b, gimli::LittleEndian>>,229     fn push_debug<'b, T>(&mut self, dwarf: &mut Vec<(u8, Range<u64>)>, section: &T)
230     where
231         T: gimli::Section<gimli::EndianSlice<'b, gimli::LittleEndian>>,
232     {
233         let data = section.reader().slice();
234         if data.is_empty() {
235             return;
236         }
237         let section_id = *self.dwarf.get_or_insert_with(|| {
238             self.obj.add_section(
239                 self.obj.segment_name(StandardSegment::Debug).to_vec(),
240                 obj::ELF_WASMTIME_DWARF.as_bytes().to_vec(),
241                 SectionKind::Debug,
242             )
243         });
244         let offset = self.obj.append_section_data(section_id, data, 1);
245         dwarf.push((T::id() as u8, offset..offset + data.len() as u64));
246     }
247 
248     /// Appends the original Wasm bytecode for one or more core modules as a
249     /// pair of new ELF sections.
250     ///
251     /// `modules` is an iterator of raw Wasm binary slices, one per core
252     /// module, in `StaticModuleIndex` order.
append_wasm_bytecode<'b>(&mut self, modules: impl IntoIterator<Item = &'b [u8]>)253     pub fn append_wasm_bytecode<'b>(&mut self, modules: impl IntoIterator<Item = &'b [u8]>) {
254         let bytecode_id = self.obj.add_section(
255             self.obj.segment_name(StandardSegment::Data).to_vec(),
256             obj::ELF_WASMTIME_WASM_BYTECODE.as_bytes().to_vec(),
257             SectionKind::ReadOnlyData,
258         );
259         let ends_id = self.obj.add_section(
260             self.obj.segment_name(StandardSegment::Data).to_vec(),
261             obj::ELF_WASMTIME_WASM_BYTECODE_ENDS.as_bytes().to_vec(),
262             SectionKind::ReadOnlyData,
263         );
264         let mut end: u32 = 0;
265         for wasm in modules {
266             self.obj.append_section_data(bytecode_id, wasm, 1);
267             end = end
268                 .checked_add(u32::try_from(wasm.len()).expect("module bytecode exceeds 4 GiB"))
269                 .expect("total bytecode exceeds 4 GiB");
270             self.obj.append_section_data(ends_id, &end.to_le_bytes(), 4);
271         }
272     }
273 
274     /// Creates the `ELF_WASMTIME_INFO` section from the given serializable data
275     /// structure.
serialize_info<T>(&mut self, info: &T) where T: serde::Serialize,276     pub fn serialize_info<T>(&mut self, info: &T)
277     where
278         T: serde::Serialize,
279     {
280         let section = self.obj.add_section(
281             self.obj.segment_name(StandardSegment::Data).to_vec(),
282             obj::ELF_WASMTIME_INFO.as_bytes().to_vec(),
283             SectionKind::ReadOnlyData,
284         );
285         let data = postcard::to_allocvec(info).unwrap();
286         self.obj.set_section_data(section, data, 1);
287     }
288 
289     /// Serializes `self` into a buffer. This can be used for execution as well
290     /// as serialization.
finish<T: WritableBuffer>(self, t: &mut T) -> Result<()>291     pub fn finish<T: WritableBuffer>(self, t: &mut T) -> Result<()> {
292         self.obj.emit(t).map_err(|e| e.into())
293     }
294 }
295 
296 /// A type which can be the result of serializing an object.
297 pub trait FinishedObject: Sized {
298     /// State required for `finish_object`, if any.
299     type State;
300 
301     /// Emit the object as `Self`.
finish_object(obj: ObjectBuilder<'_>, state: &Self::State) -> Result<Self>302     fn finish_object(obj: ObjectBuilder<'_>, state: &Self::State) -> Result<Self>;
303 }
304 
305 impl FinishedObject for Vec<u8> {
306     type State = ();
finish_object(obj: ObjectBuilder<'_>, _state: &Self::State) -> Result<Self>307     fn finish_object(obj: ObjectBuilder<'_>, _state: &Self::State) -> Result<Self> {
308         let mut result = ObjectVec::default();
309         obj.finish(&mut result)?;
310         return Ok(result.0);
311 
312         #[derive(Default)]
313         struct ObjectVec(Vec<u8>);
314 
315         impl WritableBuffer for ObjectVec {
316             fn len(&self) -> usize {
317                 self.0.len()
318             }
319 
320             fn reserve(&mut self, additional: usize) -> Result<(), ()> {
321                 assert_eq!(self.0.len(), 0, "cannot reserve twice");
322                 self.0 = Vec::with_capacity(additional);
323                 Ok(())
324             }
325 
326             fn resize(&mut self, new_len: usize) {
327                 if new_len <= self.0.len() {
328                     self.0.truncate(new_len)
329                 } else {
330                     self.0.extend(vec![0; new_len - self.0.len()])
331                 }
332             }
333 
334             fn write_bytes(&mut self, val: &[u8]) {
335                 self.0.extend(val);
336             }
337         }
338     }
339 }
340