xref: /wasmtime-44.0.1/crates/cache/src/lib.rs (revision bbd12e92)
1 //! > **⚠️ Warning ⚠️**: this crate is an internal-only crate for the Wasmtime
2 //! > project and is not intended for general use. APIs are not strictly
3 //! > reviewed for safety and usage outside of Wasmtime may have bugs. If
4 //! > you're interested in using this feel free to file an issue on the
5 //! > Wasmtime repository to start a discussion about doing so, but otherwise
6 //! > be aware that your usage of this crate is not supported.
7 
8 use base64::Engine;
9 use log::{debug, trace, warn};
10 use serde::{Deserialize, Serialize};
11 use sha2::{Digest, Sha256};
12 use std::hash::Hash;
13 use std::hash::Hasher;
14 use std::io::Write;
15 use std::path::{Path, PathBuf};
16 use std::sync::Arc;
17 use std::sync::atomic::{AtomicUsize, Ordering::SeqCst};
18 use std::time::Duration;
19 use std::{fs, io};
20 use wasmtime_environ::error::Result;
21 
22 #[macro_use] // for tests
23 mod config;
24 mod worker;
25 
26 pub use config::{CacheConfig, create_new_config};
27 use worker::Worker;
28 
29 /// Global configuration for how the cache is managed
30 #[derive(Debug, Clone)]
31 pub struct Cache {
32     config: CacheConfig,
33     worker: Worker,
34     state: Arc<CacheState>,
35 }
36 
37 macro_rules! generate_config_setting_getter {
38     ($setting:ident: $setting_type:ty) => {
39         #[doc = concat!("Returns ", "`", stringify!($setting), "`.")]
40         pub fn $setting(&self) -> $setting_type {
41             self.config.$setting()
42         }
43     };
44 }
45 
46 impl Cache {
47     /// Builds a [`Cache`] from the configuration and spawns the cache worker.
48     ///
49     /// If you want to load the cache configuration from a file, use [`CacheConfig::from_file`].
50     /// You can call [`CacheConfig::new`] for the default configuration.
51     ///
52     /// # Errors
53     /// Returns an error if the configuration is invalid.
new(mut config: CacheConfig) -> Result<Self>54     pub fn new(mut config: CacheConfig) -> Result<Self> {
55         config.validate()?;
56         Ok(Self {
57             worker: Worker::start_new(&config),
58             config,
59             state: Default::default(),
60         })
61     }
62 
63     /// Loads cache configuration specified at `path`.
64     ///
65     /// This method will read the file specified by `path` on the filesystem and
66     /// attempt to load cache configuration from it. This method can also fail
67     /// due to I/O errors, misconfiguration, syntax errors, etc. For expected
68     /// syntax in the configuration file see the [documentation online][docs].
69     ///
70     /// Passing in `None` loads cache configuration from the system default path.
71     /// This is located, for example, on Unix at `$HOME/.config/wasmtime/config.toml`
72     /// and is typically created with the `wasmtime config new` command.
73     ///
74     /// # Errors
75     ///
76     /// This method can fail due to any error that happens when loading the file
77     /// pointed to by `path` and attempting to load the cache configuration.
78     ///
79     /// [docs]: https://bytecodealliance.github.io/wasmtime/cli-cache.html
from_file(path: Option<&Path>) -> Result<Self>80     pub fn from_file(path: Option<&Path>) -> Result<Self> {
81         let config = CacheConfig::from_file(path)?;
82         Self::new(config)
83     }
84 
85     generate_config_setting_getter!(worker_event_queue_size: u64);
86     generate_config_setting_getter!(baseline_compression_level: i32);
87     generate_config_setting_getter!(optimized_compression_level: i32);
88     generate_config_setting_getter!(optimized_compression_usage_counter_threshold: u64);
89     generate_config_setting_getter!(cleanup_interval: Duration);
90     generate_config_setting_getter!(optimizing_compression_task_timeout: Duration);
91     generate_config_setting_getter!(allowed_clock_drift_for_files_from_future: Duration);
92     generate_config_setting_getter!(file_count_soft_limit: u64);
93     generate_config_setting_getter!(files_total_size_soft_limit: u64);
94     generate_config_setting_getter!(file_count_limit_percent_if_deleting: u8);
95     generate_config_setting_getter!(files_total_size_limit_percent_if_deleting: u8);
96 
97     /// Returns path to the cache directory.
directory(&self) -> &PathBuf98     pub fn directory(&self) -> &PathBuf {
99         &self
100             .config
101             .directory()
102             .expect("directory should be validated in Config::new")
103     }
104 
105     #[cfg(test)]
worker(&self) -> &Worker106     fn worker(&self) -> &Worker {
107         &self.worker
108     }
109 
110     /// Returns the number of cache hits seen so far
cache_hits(&self) -> usize111     pub fn cache_hits(&self) -> usize {
112         self.state.hits.load(SeqCst)
113     }
114 
115     /// Returns the number of cache misses seen so far
cache_misses(&self) -> usize116     pub fn cache_misses(&self) -> usize {
117         self.state.misses.load(SeqCst)
118     }
119 
on_cache_get_async(&self, path: impl AsRef<Path>)120     pub(crate) fn on_cache_get_async(&self, path: impl AsRef<Path>) {
121         self.state.hits.fetch_add(1, SeqCst);
122         self.worker.on_cache_get_async(path)
123     }
124 
on_cache_update_async(&self, path: impl AsRef<Path>)125     pub(crate) fn on_cache_update_async(&self, path: impl AsRef<Path>) {
126         self.state.misses.fetch_add(1, SeqCst);
127         self.worker.on_cache_update_async(path)
128     }
129 }
130 
131 #[derive(Default, Debug)]
132 struct CacheState {
133     hits: AtomicUsize,
134     misses: AtomicUsize,
135 }
136 
137 /// Module level cache entry.
138 pub struct ModuleCacheEntry<'cache>(Option<ModuleCacheEntryInner<'cache>>);
139 
140 struct ModuleCacheEntryInner<'cache> {
141     root_path: PathBuf,
142     cache: &'cache Cache,
143 }
144 
145 struct Sha256Hasher(Sha256);
146 
147 impl<'cache> ModuleCacheEntry<'cache> {
148     /// Create the cache entry.
new(compiler_name: &str, cache: Option<&'cache Cache>) -> Self149     pub fn new(compiler_name: &str, cache: Option<&'cache Cache>) -> Self {
150         Self(cache.map(|cache| ModuleCacheEntryInner::new(compiler_name, cache)))
151     }
152 
153     #[cfg(test)]
from_inner(inner: ModuleCacheEntryInner<'cache>) -> Self154     fn from_inner(inner: ModuleCacheEntryInner<'cache>) -> Self {
155         Self(Some(inner))
156     }
157 
158     /// Gets cached data if state matches, otherwise calls `compute`.
159     ///
160     /// Data is automatically serialized/deserialized with `bincode`.
get_data<T, U, E>(&self, state: T, compute: fn(&T) -> Result<U, E>) -> Result<U, E> where T: Hash, U: Serialize + for<'a> Deserialize<'a>,161     pub fn get_data<T, U, E>(&self, state: T, compute: fn(&T) -> Result<U, E>) -> Result<U, E>
162     where
163         T: Hash,
164         U: Serialize + for<'a> Deserialize<'a>,
165     {
166         self.get_data_raw(
167             &state,
168             compute,
169             |_state, data| postcard::to_allocvec(data).ok(),
170             |_state, data| postcard::from_bytes(&data).ok(),
171         )
172     }
173 
174     /// Gets cached data if state matches, otherwise calls `compute`.
175     ///
176     /// If the cache is disabled or no cached data is found then `compute` is
177     /// called to calculate the data. If the data was found in cache it is
178     /// passed to `deserialize`, which if successful will be the returned value.
179     /// When computed the `serialize` function is used to generate the bytes
180     /// from the returned value.
get_data_raw<T, U, E>( &self, state: &T, compute: fn(&T) -> Result<U, E>, serialize: fn(&T, &U) -> Option<Vec<u8>>, deserialize: fn(&T, Vec<u8>) -> Option<U>, ) -> Result<U, E> where T: Hash,181     pub fn get_data_raw<T, U, E>(
182         &self,
183         state: &T,
184         // NOTE: These are function pointers instead of closures so that they
185         // don't accidentally close over something not accounted in the cache.
186         compute: fn(&T) -> Result<U, E>,
187         serialize: fn(&T, &U) -> Option<Vec<u8>>,
188         deserialize: fn(&T, Vec<u8>) -> Option<U>,
189     ) -> Result<U, E>
190     where
191         T: Hash,
192     {
193         let inner = match &self.0 {
194             Some(inner) => inner,
195             None => return compute(state),
196         };
197 
198         let mut hasher = Sha256Hasher(Sha256::new());
199         state.hash(&mut hasher);
200         let hash: [u8; 32] = hasher.0.finalize().into();
201         // standard encoding uses '/' which can't be used for filename
202         let hash = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(&hash);
203 
204         if let Some(cached_val) = inner.get_data(&hash) {
205             if let Some(val) = deserialize(state, cached_val) {
206                 let mod_cache_path = inner.root_path.join(&hash);
207                 inner.cache.on_cache_get_async(&mod_cache_path); // call on success
208                 return Ok(val);
209             }
210         }
211         let val_to_cache = compute(state)?;
212         if let Some(bytes) = serialize(state, &val_to_cache) {
213             if inner.update_data(&hash, &bytes).is_some() {
214                 let mod_cache_path = inner.root_path.join(&hash);
215                 inner.cache.on_cache_update_async(&mod_cache_path); // call on success
216             }
217         }
218         Ok(val_to_cache)
219     }
220 }
221 
222 impl<'cache> ModuleCacheEntryInner<'cache> {
new(compiler_name: &str, cache: &'cache Cache) -> Self223     fn new(compiler_name: &str, cache: &'cache Cache) -> Self {
224         // If debug assertions are enabled then assume that we're some sort of
225         // local build. We don't want local builds to stomp over caches between
226         // builds, so just use a separate cache directory based on the mtime of
227         // our executable, which should roughly correlate with "you changed the
228         // source code so you get a different directory".
229         //
230         // Otherwise if this is a release build we use the `GIT_REV` env var
231         // which is either the git rev if installed from git or the crate
232         // version if installed from crates.io.
233         let compiler_dir = if cfg!(debug_assertions) {
234             fn self_mtime() -> Option<String> {
235                 let path = std::env::current_exe().ok()?;
236                 let metadata = path.metadata().ok()?;
237                 let mtime = metadata.modified().ok()?;
238                 Some(match mtime.duration_since(std::time::UNIX_EPOCH) {
239                     Ok(dur) => format!("{}", dur.as_millis()),
240                     Err(err) => format!("m{}", err.duration().as_millis()),
241                 })
242             }
243             let self_mtime = self_mtime().unwrap_or("no-mtime".to_string());
244             format!(
245                 "{comp_name}-{comp_ver}-{comp_mtime}",
246                 comp_name = compiler_name,
247                 comp_ver = env!("GIT_REV"),
248                 comp_mtime = self_mtime,
249             )
250         } else {
251             format!(
252                 "{comp_name}-{comp_ver}",
253                 comp_name = compiler_name,
254                 comp_ver = env!("GIT_REV"),
255             )
256         };
257         let root_path = cache.directory().join("modules").join(compiler_dir);
258 
259         Self { root_path, cache }
260     }
261 
get_data(&self, hash: &str) -> Option<Vec<u8>>262     fn get_data(&self, hash: &str) -> Option<Vec<u8>> {
263         let mod_cache_path = self.root_path.join(hash);
264         trace!("get_data() for path: {}", mod_cache_path.display());
265         let compressed_cache_bytes = fs::read(&mod_cache_path).ok()?;
266         let cache_bytes = zstd::decode_all(&compressed_cache_bytes[..])
267             .map_err(|err| warn!("Failed to decompress cached code: {err}"))
268             .ok()?;
269         Some(cache_bytes)
270     }
271 
update_data(&self, hash: &str, serialized_data: &[u8]) -> Option<()>272     fn update_data(&self, hash: &str, serialized_data: &[u8]) -> Option<()> {
273         let mod_cache_path = self.root_path.join(hash);
274         trace!("update_data() for path: {}", mod_cache_path.display());
275         let compressed_data = zstd::encode_all(
276             &serialized_data[..],
277             self.cache.baseline_compression_level(),
278         )
279         .map_err(|err| warn!("Failed to compress cached code: {err}"))
280         .ok()?;
281 
282         // Optimize syscalls: first, try writing to disk. It should succeed in most cases.
283         // Otherwise, try creating the cache directory and retry writing to the file.
284         if fs_write_atomic(&mod_cache_path, "mod", &compressed_data).is_ok() {
285             return Some(());
286         }
287 
288         debug!(
289             "Attempting to create the cache directory, because \
290              failed to write cached code to disk, path: {}",
291             mod_cache_path.display(),
292         );
293 
294         let cache_dir = mod_cache_path.parent().unwrap();
295         fs::create_dir_all(cache_dir)
296             .map_err(|err| {
297                 warn!(
298                     "Failed to create cache directory, path: {}, message: {}",
299                     cache_dir.display(),
300                     err
301                 )
302             })
303             .ok()?;
304 
305         match fs_write_atomic(&mod_cache_path, "mod", &compressed_data) {
306             Ok(_) => Some(()),
307             Err(err) => {
308                 warn!(
309                     "Failed to write file with rename, target path: {}, err: {}",
310                     mod_cache_path.display(),
311                     err
312                 );
313                 None
314             }
315         }
316     }
317 }
318 
319 impl Hasher for Sha256Hasher {
finish(&self) -> u64320     fn finish(&self) -> u64 {
321         panic!("Sha256Hasher doesn't support finish!");
322     }
323 
write(&mut self, bytes: &[u8])324     fn write(&mut self, bytes: &[u8]) {
325         self.0.update(bytes);
326     }
327 }
328 
329 // Assumption: path inside cache directory.
330 // Then, we don't have to use sound OS-specific exclusive file access.
331 // Note: there's no need to remove temporary file here - cleanup task will do it later.
fs_write_atomic(path: &Path, reason: &str, contents: &[u8]) -> io::Result<()>332 fn fs_write_atomic(path: &Path, reason: &str, contents: &[u8]) -> io::Result<()> {
333     let lock_path = path.with_extension(format!("wip-atomic-write-{reason}"));
334     fs::OpenOptions::new()
335         .create_new(true) // atomic file creation (assumption: no one will open it without this flag)
336         .write(true)
337         .open(&lock_path)
338         .and_then(|mut file| file.write_all(contents))
339         // file should go out of scope and be closed at this point
340         .and_then(|()| fs::rename(&lock_path, &path)) // atomic file rename
341 }
342 
343 #[cfg(test)]
344 mod tests;
345