xref: /wasmtime-44.0.1/crates/cache/src/lib.rs (revision 331b0dee)
1 use log::{debug, trace, warn};
2 use serde::{Deserialize, Serialize};
3 use sha2::{Digest, Sha256};
4 use std::fs;
5 use std::hash::Hash;
6 use std::hash::Hasher;
7 use std::io::Write;
8 use std::path::{Path, PathBuf};
9 
10 #[macro_use] // for tests
11 mod config;
12 mod worker;
13 
14 pub use config::{create_new_config, CacheConfig};
15 use worker::Worker;
16 
17 /// Module level cache entry.
18 pub struct ModuleCacheEntry<'config>(Option<ModuleCacheEntryInner<'config>>);
19 
20 struct ModuleCacheEntryInner<'config> {
21     root_path: PathBuf,
22     cache_config: &'config CacheConfig,
23 }
24 
25 struct Sha256Hasher(Sha256);
26 
27 impl<'config> ModuleCacheEntry<'config> {
28     /// Create the cache entry.
29     pub fn new<'data>(compiler_name: &str, cache_config: &'config CacheConfig) -> Self {
30         if cache_config.enabled() {
31             Self(Some(ModuleCacheEntryInner::new(
32                 compiler_name,
33                 cache_config,
34             )))
35         } else {
36             Self(None)
37         }
38     }
39 
40     #[cfg(test)]
41     fn from_inner(inner: ModuleCacheEntryInner<'config>) -> Self {
42         Self(Some(inner))
43     }
44 
45     /// Gets cached data if state matches, otherwise calls the `compute`.
46     // NOTE: This takes a function pointer instead of a closure so that it doesn't accidentally
47     // close over something not accounted in the cache.
48     pub fn get_data<T, U, E>(&self, state: T, compute: fn(T) -> Result<U, E>) -> Result<U, E>
49     where
50         T: Hash,
51         U: Serialize + for<'a> Deserialize<'a>,
52     {
53         let inner = match &self.0 {
54             Some(inner) => inner,
55             None => return compute(state),
56         };
57 
58         let mut hasher = Sha256Hasher(Sha256::new());
59         state.hash(&mut hasher);
60         let hash: [u8; 32] = hasher.0.finalize().into();
61         // standard encoding uses '/' which can't be used for filename
62         let hash = base64::encode_config(&hash, base64::URL_SAFE_NO_PAD);
63 
64         if let Some(cached_val) = inner.get_data(&hash) {
65             let mod_cache_path = inner.root_path.join(&hash);
66             inner.cache_config.on_cache_get_async(&mod_cache_path); // call on success
67             return Ok(cached_val);
68         }
69         let val_to_cache = compute(state)?;
70         if inner.update_data(&hash, &val_to_cache).is_some() {
71             let mod_cache_path = inner.root_path.join(&hash);
72             inner.cache_config.on_cache_update_async(&mod_cache_path); // call on success
73         }
74         Ok(val_to_cache)
75     }
76 }
77 
78 impl<'config> ModuleCacheEntryInner<'config> {
79     fn new<'data>(compiler_name: &str, cache_config: &'config CacheConfig) -> Self {
80         // If debug assertions are enabled then assume that we're some sort of
81         // local build. We don't want local builds to stomp over caches between
82         // builds, so just use a separate cache directory based on the mtime of
83         // our executable, which should roughly correlate with "you changed the
84         // source code so you get a different directory".
85         //
86         // Otherwise if this is a release build we use the `GIT_REV` env var
87         // which is either the git rev if installed from git or the crate
88         // version if installed from crates.io.
89         let compiler_dir = if cfg!(debug_assertions) {
90             fn self_mtime() -> Option<String> {
91                 let path = std::env::current_exe().ok()?;
92                 let metadata = path.metadata().ok()?;
93                 let mtime = metadata.modified().ok()?;
94                 Some(match mtime.duration_since(std::time::UNIX_EPOCH) {
95                     Ok(dur) => format!("{}", dur.as_millis()),
96                     Err(err) => format!("m{}", err.duration().as_millis()),
97                 })
98             }
99             let self_mtime = self_mtime().unwrap_or("no-mtime".to_string());
100             format!(
101                 "{comp_name}-{comp_ver}-{comp_mtime}",
102                 comp_name = compiler_name,
103                 comp_ver = env!("GIT_REV"),
104                 comp_mtime = self_mtime,
105             )
106         } else {
107             format!(
108                 "{comp_name}-{comp_ver}",
109                 comp_name = compiler_name,
110                 comp_ver = env!("GIT_REV"),
111             )
112         };
113         let root_path = cache_config.directory().join("modules").join(compiler_dir);
114 
115         Self {
116             root_path,
117             cache_config,
118         }
119     }
120 
121     fn get_data<T>(&self, hash: &str) -> Option<T>
122     where
123         T: for<'a> Deserialize<'a>,
124     {
125         let mod_cache_path = self.root_path.join(hash);
126         trace!("get_data() for path: {}", mod_cache_path.display());
127         let compressed_cache_bytes = fs::read(&mod_cache_path).ok()?;
128         let cache_bytes = zstd::decode_all(&compressed_cache_bytes[..])
129             .map_err(|err| warn!("Failed to decompress cached code: {}", err))
130             .ok()?;
131         bincode::deserialize(&cache_bytes[..])
132             .map_err(|err| warn!("Failed to deserialize cached code: {}", err))
133             .ok()
134     }
135 
136     fn update_data<T: Serialize>(&self, hash: &str, data: &T) -> Option<()> {
137         let mod_cache_path = self.root_path.join(hash);
138         trace!("update_data() for path: {}", mod_cache_path.display());
139         let serialized_data = bincode::serialize(&data)
140             .map_err(|err| warn!("Failed to serialize cached code: {}", err))
141             .ok()?;
142         let compressed_data = zstd::encode_all(
143             &serialized_data[..],
144             self.cache_config.baseline_compression_level(),
145         )
146         .map_err(|err| warn!("Failed to compress cached code: {}", err))
147         .ok()?;
148 
149         // Optimize syscalls: first, try writing to disk. It should succeed in most cases.
150         // Otherwise, try creating the cache directory and retry writing to the file.
151         if fs_write_atomic(&mod_cache_path, "mod", &compressed_data) {
152             return Some(());
153         }
154 
155         debug!(
156             "Attempting to create the cache directory, because \
157              failed to write cached code to disk, path: {}",
158             mod_cache_path.display(),
159         );
160 
161         let cache_dir = mod_cache_path.parent().unwrap();
162         fs::create_dir_all(cache_dir)
163             .map_err(|err| {
164                 warn!(
165                     "Failed to create cache directory, path: {}, message: {}",
166                     cache_dir.display(),
167                     err
168                 )
169             })
170             .ok()?;
171 
172         if fs_write_atomic(&mod_cache_path, "mod", &compressed_data) {
173             Some(())
174         } else {
175             None
176         }
177     }
178 }
179 
180 impl Hasher for Sha256Hasher {
181     fn finish(&self) -> u64 {
182         panic!("Sha256Hasher doesn't support finish!");
183     }
184 
185     fn write(&mut self, bytes: &[u8]) {
186         self.0.update(bytes);
187     }
188 }
189 
190 // Assumption: path inside cache directory.
191 // Then, we don't have to use sound OS-specific exclusive file access.
192 // Note: there's no need to remove temporary file here - cleanup task will do it later.
193 fn fs_write_atomic(path: &Path, reason: &str, contents: &[u8]) -> bool {
194     let lock_path = path.with_extension(format!("wip-atomic-write-{}", reason));
195     fs::OpenOptions::new()
196         .create_new(true) // atomic file creation (assumption: no one will open it without this flag)
197         .write(true)
198         .open(&lock_path)
199         .and_then(|mut file| file.write_all(contents))
200         // file should go out of scope and be closed at this point
201         .and_then(|()| fs::rename(&lock_path, &path)) // atomic file rename
202         .map_err(|err| {
203             warn!(
204                 "Failed to write file with rename, lock path: {}, target path: {}, err: {}",
205                 lock_path.display(),
206                 path.display(),
207                 err
208             )
209         })
210         .is_ok()
211 }
212 
213 #[cfg(test)]
214 mod tests;
215