1 //! Data structures to provide transformation of the source
2 
3 use core::fmt;
4 use object::{Bytes, LittleEndian, U32};
5 use serde_derive::{Deserialize, Serialize};
6 
7 /// Single source location to generated address mapping.
8 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
9 pub struct InstructionAddressMap {
10     /// Where in the source wasm binary this instruction comes from, specified
11     /// in an offset of bytes from the front of the file.
12     pub srcloc: FilePos,
13 
14     /// Offset from the start of the function's compiled code to where this
15     /// instruction is located, or the region where it starts.
16     pub code_offset: u32,
17 }
18 
19 /// A position within an original source file,
20 ///
21 /// This structure is used as a newtype wrapper around a 32-bit integer which
22 /// represents an offset within a file where a wasm instruction or function is
23 /// to be originally found.
24 #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
25 pub struct FilePos(u32);
26 
27 impl FilePos {
28     /// Create a new file position with the given offset.
new(pos: u32) -> FilePos29     pub fn new(pos: u32) -> FilePos {
30         assert!(pos != u32::MAX);
31         FilePos(pos)
32     }
33 
34     /// Get the null file position.
none() -> FilePos35     pub fn none() -> FilePos {
36         FilePos(u32::MAX)
37     }
38 
39     /// Is this the null file position?
40     #[inline]
is_none(&self) -> bool41     pub fn is_none(&self) -> bool {
42         *self == FilePos::none()
43     }
44 
45     /// Returns the offset that this offset was created with.
46     ///
47     /// Note that positions created with `FilePos::none` and the `Default`
48     /// implementation will return `None` here, whereas positions created with
49     /// `FilePos::new` will return `Some`.
file_offset(self) -> Option<u32>50     pub fn file_offset(self) -> Option<u32> {
51         if self.0 == u32::MAX {
52             None
53         } else {
54             Some(self.0)
55         }
56     }
57 }
58 
59 impl Default for FilePos {
default() -> FilePos60     fn default() -> FilePos {
61         FilePos::none()
62     }
63 }
64 
65 /// A Wasm bytecode offset relative to the start of a component (or
66 /// top-level module) binary.
67 ///
68 /// When compiling a component, the Wasm parser returns source
69 /// positions relative to the entire component binary. This type
70 /// captures that convention. Use
71 /// [`ComponentPC::to_module_pc`] to convert to a
72 /// [`ModulePC`] given the byte offset of the module within the
73 /// component.
74 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
75 pub struct ComponentPC(u32);
76 
77 impl ComponentPC {
78     /// Create a new component-relative PC from a raw offset.
new(offset: u32) -> Self79     pub fn new(offset: u32) -> Self {
80         Self(offset)
81     }
82 
83     /// Get the raw u32 offset.
raw(self) -> u3284     pub fn raw(self) -> u32 {
85         self.0
86     }
87 
88     /// Convert to a module-relative PC by subtracting the byte offset
89     /// of the module within the component binary.
to_module_pc(self, wasm_module_offset: u64) -> ModulePC90     pub fn to_module_pc(self, wasm_module_offset: u64) -> ModulePC {
91         let offset = u32::try_from(wasm_module_offset).unwrap();
92         ModulePC(self.0 - offset)
93     }
94 }
95 
96 impl fmt::Debug for ComponentPC {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result97     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
98         write!(f, "ComponentPC({:#x})", self.0)
99     }
100 }
101 
102 impl fmt::Display for ComponentPC {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result103     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
104         write!(f, "{:#x}", self.0)
105     }
106 }
107 
108 /// A Wasm bytecode offset relative to the start of a core Wasm
109 /// module binary.
110 ///
111 /// In the guest-debug system, PCs are always module-relative because
112 /// the debugger presents a core-Wasm view of the world where
113 /// components are deconstructed into individual core Wasm modules.
114 ///
115 /// For standalone (non-component) modules, `ModulePC` and
116 /// [`ComponentPC`] values are numerically identical.
117 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
118 pub struct ModulePC(u32);
119 
120 impl ModulePC {
121     /// Create a new module-relative PC from a raw offset.
new(offset: u32) -> Self122     pub fn new(offset: u32) -> Self {
123         Self(offset)
124     }
125 
126     /// Get the raw u32 offset.
raw(self) -> u32127     pub fn raw(self) -> u32 {
128         self.0
129     }
130 }
131 
132 impl fmt::Debug for ModulePC {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result133     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
134         write!(f, "ModulePC({:#x})", self.0)
135     }
136 }
137 
138 impl fmt::Display for ModulePC {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result139     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
140         write!(f, "{:#x}", self.0)
141     }
142 }
143 
144 /// Parse an `ELF_WASMTIME_ADDRMAP` section, returning the slice of code offsets
145 /// and the slice of associated file positions for each offset.
parse_address_map(section: &[u8]) -> Option<(&[U32<LittleEndian>], &[U32<LittleEndian>])>146 fn parse_address_map(section: &[u8]) -> Option<(&[U32<LittleEndian>], &[U32<LittleEndian>])> {
147     let mut section = Bytes(section);
148     // NB: this matches the encoding written by `append_to` in the
149     // `compile::address_map` module.
150     let count = section.read::<U32<LittleEndian>>().ok()?;
151     let count = usize::try_from(count.get(LittleEndian)).ok()?;
152     let (offsets, section) =
153         object::slice_from_bytes::<U32<LittleEndian>>(section.0, count).ok()?;
154     let (positions, section) =
155         object::slice_from_bytes::<U32<LittleEndian>>(section, count).ok()?;
156     debug_assert!(section.is_empty());
157     Some((offsets, positions))
158 }
159 
160 /// Lookup an `offset` within an encoded address map section, returning the
161 /// original `FilePos` that corresponds to the offset, if found.
162 ///
163 /// This function takes a `section` as its first argument which must have been
164 /// created with `AddressMapSection` above. This is intended to be the raw
165 /// `ELF_WASMTIME_ADDRMAP` section from the compilation artifact.
166 ///
167 /// The `offset` provided is a relative offset from the start of the text
168 /// section of the pc that is being looked up. If `offset` is out of range or
169 /// doesn't correspond to anything in this file then `None` is returned.
lookup_file_pos(section: &[u8], offset: usize) -> Option<FilePos>170 pub fn lookup_file_pos(section: &[u8], offset: usize) -> Option<FilePos> {
171     let (offsets, positions) = parse_address_map(section)?;
172 
173     // First perform a binary search on the `offsets` array. This is a sorted
174     // array of offsets within the text section, which is conveniently what our
175     // `offset` also is. Note that we are somewhat unlikely to find a precise
176     // match on the element in the array, so we're largely interested in which
177     // "bucket" the `offset` falls into.
178     let offset = u32::try_from(offset).ok()?;
179     let index = match offsets.binary_search_by_key(&offset, |v| v.get(LittleEndian)) {
180         // Exact hit!
181         Ok(i) => i,
182 
183         // This *would* be at the first slot in the array, so no
184         // instructions cover `pc`.
185         Err(0) => return None,
186 
187         // This would be at the `nth` slot, so we're at the `n-1`th slot.
188         Err(n) => n - 1,
189     };
190 
191     // Using the `index` we found of which bucket `offset` corresponds to we can
192     // lookup the actual `FilePos` value in the `positions` array.
193     let pos = positions.get(index)?;
194     Some(FilePos(pos.get(LittleEndian)))
195 }
196 
197 /// Iterate over the address map contained in the given address map section.
198 ///
199 /// This function takes a `section` as its first argument which must have been
200 /// created with `AddressMapSection` above. This is intended to be the raw
201 /// `ELF_WASMTIME_ADDRMAP` section from the compilation artifact.
202 ///
203 /// The yielded offsets are relative to the start of the text section for this
204 /// map's code object.
iterate_address_map<'a>( section: &'a [u8], ) -> Option<impl Iterator<Item = (u32, FilePos)> + 'a>205 pub fn iterate_address_map<'a>(
206     section: &'a [u8],
207 ) -> Option<impl Iterator<Item = (u32, FilePos)> + 'a> {
208     let (offsets, positions) = parse_address_map(section)?;
209 
210     Some(
211         offsets
212             .iter()
213             .map(|o| o.get(LittleEndian))
214             .zip(positions.iter().map(|pos| FilePos(pos.get(LittleEndian)))),
215     )
216 }
217