xref: /wasmtime-44.0.1/crates/cache/src/lib.rs (revision b3fa9bfa)
1 use anyhow::Result;
2 use base64::Engine;
3 use log::{debug, trace, warn};
4 use serde::{Deserialize, Serialize};
5 use sha2::{Digest, Sha256};
6 use std::hash::Hash;
7 use std::hash::Hasher;
8 use std::io::Write;
9 use std::path::{Path, PathBuf};
10 use std::sync::Arc;
11 use std::sync::atomic::{AtomicUsize, Ordering::SeqCst};
12 use std::time::Duration;
13 use std::{fs, io};
14 
15 #[macro_use] // for tests
16 mod config;
17 mod worker;
18 
19 pub use config::{CacheConfig, create_new_config};
20 use worker::Worker;
21 
22 /// Global configuration for how the cache is managed
23 #[derive(Debug, Clone)]
24 pub struct Cache {
25     config: CacheConfig,
26     worker: Worker,
27     state: Arc<CacheState>,
28 }
29 
30 macro_rules! generate_config_setting_getter {
31     ($setting:ident: $setting_type:ty) => {
32         /// Returns `$setting`.
33         ///
34         /// Panics if the cache is disabled.
35         pub fn $setting(&self) -> $setting_type {
36             self.config.$setting()
37         }
38     };
39 }
40 
41 impl Cache {
42     /// Builds a [`Cache`] from the configuration and spawns the cache worker.
43     ///
44     /// If you want to load the cache configuration from a file, use [`CacheConfig::from_file`].
45     /// You can call [`CacheConfig::new`] for the default configuration.
46     ///
47     /// # Errors
48     /// Returns an error if the configuration is invalid.
49     pub fn new(mut config: CacheConfig) -> Result<Self> {
50         config.validate()?;
51         Ok(Self {
52             worker: Worker::start_new(&config),
53             config,
54             state: Default::default(),
55         })
56     }
57 
58     /// Loads cache configuration specified at `path`.
59     ///
60     /// This method will read the file specified by `path` on the filesystem and
61     /// attempt to load cache configuration from it. This method can also fail
62     /// due to I/O errors, misconfiguration, syntax errors, etc. For expected
63     /// syntax in the configuration file see the [documentation online][docs].
64     ///
65     /// Passing in `None` loads cache configuration from the system default path.
66     /// This is located, for example, on Unix at `$HOME/.config/wasmtime/config.toml`
67     /// and is typically created with the `wasmtime config new` command.
68     ///
69     /// # Errors
70     ///
71     /// This method can fail due to any error that happens when loading the file
72     /// pointed to by `path` and attempting to load the cache configuration.
73     ///
74     /// [docs]: https://bytecodealliance.github.io/wasmtime/cli-cache.html
75     pub fn from_file(path: Option<&Path>) -> Result<Self> {
76         let config = CacheConfig::from_file(path)?;
77         Self::new(config)
78     }
79 
80     generate_config_setting_getter!(worker_event_queue_size: u64);
81     generate_config_setting_getter!(baseline_compression_level: i32);
82     generate_config_setting_getter!(optimized_compression_level: i32);
83     generate_config_setting_getter!(optimized_compression_usage_counter_threshold: u64);
84     generate_config_setting_getter!(cleanup_interval: Duration);
85     generate_config_setting_getter!(optimizing_compression_task_timeout: Duration);
86     generate_config_setting_getter!(allowed_clock_drift_for_files_from_future: Duration);
87     generate_config_setting_getter!(file_count_soft_limit: u64);
88     generate_config_setting_getter!(files_total_size_soft_limit: u64);
89     generate_config_setting_getter!(file_count_limit_percent_if_deleting: u8);
90     generate_config_setting_getter!(files_total_size_limit_percent_if_deleting: u8);
91 
92     /// Returns path to the cache directory.
93     ///
94     /// Panics if the cache directory is not set.
95     pub fn directory(&self) -> &PathBuf {
96         &self.config.directory()
97     }
98 
99     #[cfg(test)]
100     fn worker(&self) -> &Worker {
101         &self.worker
102     }
103 
104     /// Returns the number of cache hits seen so far
105     pub fn cache_hits(&self) -> usize {
106         self.state.hits.load(SeqCst)
107     }
108 
109     /// Returns the number of cache misses seen so far
110     pub fn cache_misses(&self) -> usize {
111         self.state.misses.load(SeqCst)
112     }
113 
114     pub(crate) fn on_cache_get_async(&self, path: impl AsRef<Path>) {
115         self.state.hits.fetch_add(1, SeqCst);
116         self.worker.on_cache_get_async(path)
117     }
118 
119     pub(crate) fn on_cache_update_async(&self, path: impl AsRef<Path>) {
120         self.state.misses.fetch_add(1, SeqCst);
121         self.worker.on_cache_update_async(path)
122     }
123 }
124 
125 #[derive(Default, Debug)]
126 struct CacheState {
127     hits: AtomicUsize,
128     misses: AtomicUsize,
129 }
130 
131 /// Module level cache entry.
132 pub struct ModuleCacheEntry<'cache>(Option<ModuleCacheEntryInner<'cache>>);
133 
134 struct ModuleCacheEntryInner<'cache> {
135     root_path: PathBuf,
136     cache: &'cache Cache,
137 }
138 
139 struct Sha256Hasher(Sha256);
140 
141 impl<'cache> ModuleCacheEntry<'cache> {
142     /// Create the cache entry.
143     pub fn new(compiler_name: &str, cache: Option<&'cache Cache>) -> Self {
144         Self(cache.map(|cache| ModuleCacheEntryInner::new(compiler_name, cache)))
145     }
146 
147     #[cfg(test)]
148     fn from_inner(inner: ModuleCacheEntryInner<'cache>) -> Self {
149         Self(Some(inner))
150     }
151 
152     /// Gets cached data if state matches, otherwise calls `compute`.
153     ///
154     /// Data is automatically serialized/deserialized with `bincode`.
155     pub fn get_data<T, U, E>(&self, state: T, compute: fn(&T) -> Result<U, E>) -> Result<U, E>
156     where
157         T: Hash,
158         U: Serialize + for<'a> Deserialize<'a>,
159     {
160         self.get_data_raw(
161             &state,
162             compute,
163             |_state, data| postcard::to_allocvec(data).ok(),
164             |_state, data| postcard::from_bytes(&data).ok(),
165         )
166     }
167 
168     /// Gets cached data if state matches, otherwise calls `compute`.
169     ///
170     /// If the cache is disabled or no cached data is found then `compute` is
171     /// called to calculate the data. If the data was found in cache it is
172     /// passed to `deserialize`, which if successful will be the returned value.
173     /// When computed the `serialize` function is used to generate the bytes
174     /// from the returned value.
175     pub fn get_data_raw<T, U, E>(
176         &self,
177         state: &T,
178         // NOTE: These are function pointers instead of closures so that they
179         // don't accidentally close over something not accounted in the cache.
180         compute: fn(&T) -> Result<U, E>,
181         serialize: fn(&T, &U) -> Option<Vec<u8>>,
182         deserialize: fn(&T, Vec<u8>) -> Option<U>,
183     ) -> Result<U, E>
184     where
185         T: Hash,
186     {
187         let inner = match &self.0 {
188             Some(inner) => inner,
189             None => return compute(state),
190         };
191 
192         let mut hasher = Sha256Hasher(Sha256::new());
193         state.hash(&mut hasher);
194         let hash: [u8; 32] = hasher.0.finalize().into();
195         // standard encoding uses '/' which can't be used for filename
196         let hash = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(&hash);
197 
198         if let Some(cached_val) = inner.get_data(&hash) {
199             if let Some(val) = deserialize(state, cached_val) {
200                 let mod_cache_path = inner.root_path.join(&hash);
201                 inner.cache.on_cache_get_async(&mod_cache_path); // call on success
202                 return Ok(val);
203             }
204         }
205         let val_to_cache = compute(state)?;
206         if let Some(bytes) = serialize(state, &val_to_cache) {
207             if inner.update_data(&hash, &bytes).is_some() {
208                 let mod_cache_path = inner.root_path.join(&hash);
209                 inner.cache.on_cache_update_async(&mod_cache_path); // call on success
210             }
211         }
212         Ok(val_to_cache)
213     }
214 }
215 
216 impl<'cache> ModuleCacheEntryInner<'cache> {
217     fn new(compiler_name: &str, cache: &'cache Cache) -> Self {
218         // If debug assertions are enabled then assume that we're some sort of
219         // local build. We don't want local builds to stomp over caches between
220         // builds, so just use a separate cache directory based on the mtime of
221         // our executable, which should roughly correlate with "you changed the
222         // source code so you get a different directory".
223         //
224         // Otherwise if this is a release build we use the `GIT_REV` env var
225         // which is either the git rev if installed from git or the crate
226         // version if installed from crates.io.
227         let compiler_dir = if cfg!(debug_assertions) {
228             fn self_mtime() -> Option<String> {
229                 let path = std::env::current_exe().ok()?;
230                 let metadata = path.metadata().ok()?;
231                 let mtime = metadata.modified().ok()?;
232                 Some(match mtime.duration_since(std::time::UNIX_EPOCH) {
233                     Ok(dur) => format!("{}", dur.as_millis()),
234                     Err(err) => format!("m{}", err.duration().as_millis()),
235                 })
236             }
237             let self_mtime = self_mtime().unwrap_or("no-mtime".to_string());
238             format!(
239                 "{comp_name}-{comp_ver}-{comp_mtime}",
240                 comp_name = compiler_name,
241                 comp_ver = env!("GIT_REV"),
242                 comp_mtime = self_mtime,
243             )
244         } else {
245             format!(
246                 "{comp_name}-{comp_ver}",
247                 comp_name = compiler_name,
248                 comp_ver = env!("GIT_REV"),
249             )
250         };
251         let root_path = cache.directory().join("modules").join(compiler_dir);
252 
253         Self { root_path, cache }
254     }
255 
256     fn get_data(&self, hash: &str) -> Option<Vec<u8>> {
257         let mod_cache_path = self.root_path.join(hash);
258         trace!("get_data() for path: {}", mod_cache_path.display());
259         let compressed_cache_bytes = fs::read(&mod_cache_path).ok()?;
260         let cache_bytes = zstd::decode_all(&compressed_cache_bytes[..])
261             .map_err(|err| warn!("Failed to decompress cached code: {}", err))
262             .ok()?;
263         Some(cache_bytes)
264     }
265 
266     fn update_data(&self, hash: &str, serialized_data: &[u8]) -> Option<()> {
267         let mod_cache_path = self.root_path.join(hash);
268         trace!("update_data() for path: {}", mod_cache_path.display());
269         let compressed_data = zstd::encode_all(
270             &serialized_data[..],
271             self.cache.baseline_compression_level(),
272         )
273         .map_err(|err| warn!("Failed to compress cached code: {}", err))
274         .ok()?;
275 
276         // Optimize syscalls: first, try writing to disk. It should succeed in most cases.
277         // Otherwise, try creating the cache directory and retry writing to the file.
278         if fs_write_atomic(&mod_cache_path, "mod", &compressed_data).is_ok() {
279             return Some(());
280         }
281 
282         debug!(
283             "Attempting to create the cache directory, because \
284              failed to write cached code to disk, path: {}",
285             mod_cache_path.display(),
286         );
287 
288         let cache_dir = mod_cache_path.parent().unwrap();
289         fs::create_dir_all(cache_dir)
290             .map_err(|err| {
291                 warn!(
292                     "Failed to create cache directory, path: {}, message: {}",
293                     cache_dir.display(),
294                     err
295                 )
296             })
297             .ok()?;
298 
299         match fs_write_atomic(&mod_cache_path, "mod", &compressed_data) {
300             Ok(_) => Some(()),
301             Err(err) => {
302                 warn!(
303                     "Failed to write file with rename, target path: {}, err: {}",
304                     mod_cache_path.display(),
305                     err
306                 );
307                 None
308             }
309         }
310     }
311 }
312 
313 impl Hasher for Sha256Hasher {
314     fn finish(&self) -> u64 {
315         panic!("Sha256Hasher doesn't support finish!");
316     }
317 
318     fn write(&mut self, bytes: &[u8]) {
319         self.0.update(bytes);
320     }
321 }
322 
323 // Assumption: path inside cache directory.
324 // Then, we don't have to use sound OS-specific exclusive file access.
325 // Note: there's no need to remove temporary file here - cleanup task will do it later.
326 fn fs_write_atomic(path: &Path, reason: &str, contents: &[u8]) -> io::Result<()> {
327     let lock_path = path.with_extension(format!("wip-atomic-write-{reason}"));
328     fs::OpenOptions::new()
329         .create_new(true) // atomic file creation (assumption: no one will open it without this flag)
330         .write(true)
331         .open(&lock_path)
332         .and_then(|mut file| file.write_all(contents))
333         // file should go out of scope and be closed at this point
334         .and_then(|()| fs::rename(&lock_path, &path)) // atomic file rename
335 }
336 
337 #[cfg(test)]
338 mod tests;
339