1 //! > **⚠️ Warning ⚠️**: this crate is an internal-only crate for the Wasmtime 2 //! > project and is not intended for general use. APIs are not strictly 3 //! > reviewed for safety and usage outside of Wasmtime may have bugs. If 4 //! > you're interested in using this feel free to file an issue on the 5 //! > Wasmtime repository to start a discussion about doing so, but otherwise 6 //! > be aware that your usage of this crate is not supported. 7 8 use base64::Engine; 9 use log::{debug, trace, warn}; 10 use serde::{Deserialize, Serialize}; 11 use sha2::{Digest, Sha256}; 12 use std::hash::Hash; 13 use std::hash::Hasher; 14 use std::io::Write; 15 use std::path::{Path, PathBuf}; 16 use std::sync::Arc; 17 use std::sync::atomic::{AtomicUsize, Ordering::SeqCst}; 18 use std::time::Duration; 19 use std::{fs, io}; 20 use wasmtime_environ::error::Result; 21 22 #[macro_use] // for tests 23 mod config; 24 mod worker; 25 26 pub use config::{CacheConfig, create_new_config}; 27 use worker::Worker; 28 29 /// Global configuration for how the cache is managed 30 #[derive(Debug, Clone)] 31 pub struct Cache { 32 config: CacheConfig, 33 worker: Worker, 34 state: Arc<CacheState>, 35 } 36 37 macro_rules! generate_config_setting_getter { 38 ($setting:ident: $setting_type:ty) => { 39 #[doc = concat!("Returns ", "`", stringify!($setting), "`.")] 40 pub fn $setting(&self) -> $setting_type { 41 self.config.$setting() 42 } 43 }; 44 } 45 46 impl Cache { 47 /// Builds a [`Cache`] from the configuration and spawns the cache worker. 48 /// 49 /// If you want to load the cache configuration from a file, use [`CacheConfig::from_file`]. 50 /// You can call [`CacheConfig::new`] for the default configuration. 51 /// 52 /// # Errors 53 /// Returns an error if the configuration is invalid. 54 pub fn new(mut config: CacheConfig) -> Result<Self> { 55 config.validate()?; 56 Ok(Self { 57 worker: Worker::start_new(&config), 58 config, 59 state: Default::default(), 60 }) 61 } 62 63 /// Loads cache configuration specified at `path`. 64 /// 65 /// This method will read the file specified by `path` on the filesystem and 66 /// attempt to load cache configuration from it. This method can also fail 67 /// due to I/O errors, misconfiguration, syntax errors, etc. For expected 68 /// syntax in the configuration file see the [documentation online][docs]. 69 /// 70 /// Passing in `None` loads cache configuration from the system default path. 71 /// This is located, for example, on Unix at `$HOME/.config/wasmtime/config.toml` 72 /// and is typically created with the `wasmtime config new` command. 73 /// 74 /// # Errors 75 /// 76 /// This method can fail due to any error that happens when loading the file 77 /// pointed to by `path` and attempting to load the cache configuration. 78 /// 79 /// [docs]: https://bytecodealliance.github.io/wasmtime/cli-cache.html 80 pub fn from_file(path: Option<&Path>) -> Result<Self> { 81 let config = CacheConfig::from_file(path)?; 82 Self::new(config) 83 } 84 85 generate_config_setting_getter!(worker_event_queue_size: u64); 86 generate_config_setting_getter!(baseline_compression_level: i32); 87 generate_config_setting_getter!(optimized_compression_level: i32); 88 generate_config_setting_getter!(optimized_compression_usage_counter_threshold: u64); 89 generate_config_setting_getter!(cleanup_interval: Duration); 90 generate_config_setting_getter!(optimizing_compression_task_timeout: Duration); 91 generate_config_setting_getter!(allowed_clock_drift_for_files_from_future: Duration); 92 generate_config_setting_getter!(file_count_soft_limit: u64); 93 generate_config_setting_getter!(files_total_size_soft_limit: u64); 94 generate_config_setting_getter!(file_count_limit_percent_if_deleting: u8); 95 generate_config_setting_getter!(files_total_size_limit_percent_if_deleting: u8); 96 97 /// Returns path to the cache directory. 98 pub fn directory(&self) -> &PathBuf { 99 &self 100 .config 101 .directory() 102 .expect("directory should be validated in Config::new") 103 } 104 105 #[cfg(test)] 106 fn worker(&self) -> &Worker { 107 &self.worker 108 } 109 110 /// Returns the number of cache hits seen so far 111 pub fn cache_hits(&self) -> usize { 112 self.state.hits.load(SeqCst) 113 } 114 115 /// Returns the number of cache misses seen so far 116 pub fn cache_misses(&self) -> usize { 117 self.state.misses.load(SeqCst) 118 } 119 120 pub(crate) fn on_cache_get_async(&self, path: impl AsRef<Path>) { 121 self.state.hits.fetch_add(1, SeqCst); 122 self.worker.on_cache_get_async(path) 123 } 124 125 pub(crate) fn on_cache_update_async(&self, path: impl AsRef<Path>) { 126 self.state.misses.fetch_add(1, SeqCst); 127 self.worker.on_cache_update_async(path) 128 } 129 } 130 131 #[derive(Default, Debug)] 132 struct CacheState { 133 hits: AtomicUsize, 134 misses: AtomicUsize, 135 } 136 137 /// Module level cache entry. 138 pub struct ModuleCacheEntry<'cache>(Option<ModuleCacheEntryInner<'cache>>); 139 140 struct ModuleCacheEntryInner<'cache> { 141 root_path: PathBuf, 142 cache: &'cache Cache, 143 } 144 145 struct Sha256Hasher(Sha256); 146 147 impl<'cache> ModuleCacheEntry<'cache> { 148 /// Create the cache entry. 149 pub fn new(compiler_name: &str, cache: Option<&'cache Cache>) -> Self { 150 Self(cache.map(|cache| ModuleCacheEntryInner::new(compiler_name, cache))) 151 } 152 153 #[cfg(test)] 154 fn from_inner(inner: ModuleCacheEntryInner<'cache>) -> Self { 155 Self(Some(inner)) 156 } 157 158 /// Gets cached data if state matches, otherwise calls `compute`. 159 /// 160 /// Data is automatically serialized/deserialized with `bincode`. 161 pub fn get_data<T, U, E>(&self, state: T, compute: fn(&T) -> Result<U, E>) -> Result<U, E> 162 where 163 T: Hash, 164 U: Serialize + for<'a> Deserialize<'a>, 165 { 166 self.get_data_raw( 167 &state, 168 compute, 169 |_state, data| postcard::to_allocvec(data).ok(), 170 |_state, data| postcard::from_bytes(&data).ok(), 171 ) 172 } 173 174 /// Gets cached data if state matches, otherwise calls `compute`. 175 /// 176 /// If the cache is disabled or no cached data is found then `compute` is 177 /// called to calculate the data. If the data was found in cache it is 178 /// passed to `deserialize`, which if successful will be the returned value. 179 /// When computed the `serialize` function is used to generate the bytes 180 /// from the returned value. 181 pub fn get_data_raw<T, U, E>( 182 &self, 183 state: &T, 184 // NOTE: These are function pointers instead of closures so that they 185 // don't accidentally close over something not accounted in the cache. 186 compute: fn(&T) -> Result<U, E>, 187 serialize: fn(&T, &U) -> Option<Vec<u8>>, 188 deserialize: fn(&T, Vec<u8>) -> Option<U>, 189 ) -> Result<U, E> 190 where 191 T: Hash, 192 { 193 let inner = match &self.0 { 194 Some(inner) => inner, 195 None => return compute(state), 196 }; 197 198 let mut hasher = Sha256Hasher(Sha256::new()); 199 state.hash(&mut hasher); 200 let hash: [u8; 32] = hasher.0.finalize().into(); 201 // standard encoding uses '/' which can't be used for filename 202 let hash = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(&hash); 203 204 if let Some(cached_val) = inner.get_data(&hash) { 205 if let Some(val) = deserialize(state, cached_val) { 206 let mod_cache_path = inner.root_path.join(&hash); 207 inner.cache.on_cache_get_async(&mod_cache_path); // call on success 208 return Ok(val); 209 } 210 } 211 let val_to_cache = compute(state)?; 212 if let Some(bytes) = serialize(state, &val_to_cache) { 213 if inner.update_data(&hash, &bytes).is_some() { 214 let mod_cache_path = inner.root_path.join(&hash); 215 inner.cache.on_cache_update_async(&mod_cache_path); // call on success 216 } 217 } 218 Ok(val_to_cache) 219 } 220 } 221 222 impl<'cache> ModuleCacheEntryInner<'cache> { 223 fn new(compiler_name: &str, cache: &'cache Cache) -> Self { 224 // If debug assertions are enabled then assume that we're some sort of 225 // local build. We don't want local builds to stomp over caches between 226 // builds, so just use a separate cache directory based on the mtime of 227 // our executable, which should roughly correlate with "you changed the 228 // source code so you get a different directory". 229 // 230 // Otherwise if this is a release build we use the `GIT_REV` env var 231 // which is either the git rev if installed from git or the crate 232 // version if installed from crates.io. 233 let compiler_dir = if cfg!(debug_assertions) { 234 fn self_mtime() -> Option<String> { 235 let path = std::env::current_exe().ok()?; 236 let metadata = path.metadata().ok()?; 237 let mtime = metadata.modified().ok()?; 238 Some(match mtime.duration_since(std::time::UNIX_EPOCH) { 239 Ok(dur) => format!("{}", dur.as_millis()), 240 Err(err) => format!("m{}", err.duration().as_millis()), 241 }) 242 } 243 let self_mtime = self_mtime().unwrap_or("no-mtime".to_string()); 244 format!( 245 "{comp_name}-{comp_ver}-{comp_mtime}", 246 comp_name = compiler_name, 247 comp_ver = env!("GIT_REV"), 248 comp_mtime = self_mtime, 249 ) 250 } else { 251 format!( 252 "{comp_name}-{comp_ver}", 253 comp_name = compiler_name, 254 comp_ver = env!("GIT_REV"), 255 ) 256 }; 257 let root_path = cache.directory().join("modules").join(compiler_dir); 258 259 Self { root_path, cache } 260 } 261 262 fn get_data(&self, hash: &str) -> Option<Vec<u8>> { 263 let mod_cache_path = self.root_path.join(hash); 264 trace!("get_data() for path: {}", mod_cache_path.display()); 265 let compressed_cache_bytes = fs::read(&mod_cache_path).ok()?; 266 let cache_bytes = zstd::decode_all(&compressed_cache_bytes[..]) 267 .map_err(|err| warn!("Failed to decompress cached code: {err}")) 268 .ok()?; 269 Some(cache_bytes) 270 } 271 272 fn update_data(&self, hash: &str, serialized_data: &[u8]) -> Option<()> { 273 let mod_cache_path = self.root_path.join(hash); 274 trace!("update_data() for path: {}", mod_cache_path.display()); 275 let compressed_data = zstd::encode_all( 276 &serialized_data[..], 277 self.cache.baseline_compression_level(), 278 ) 279 .map_err(|err| warn!("Failed to compress cached code: {err}")) 280 .ok()?; 281 282 // Optimize syscalls: first, try writing to disk. It should succeed in most cases. 283 // Otherwise, try creating the cache directory and retry writing to the file. 284 if fs_write_atomic(&mod_cache_path, "mod", &compressed_data).is_ok() { 285 return Some(()); 286 } 287 288 debug!( 289 "Attempting to create the cache directory, because \ 290 failed to write cached code to disk, path: {}", 291 mod_cache_path.display(), 292 ); 293 294 let cache_dir = mod_cache_path.parent().unwrap(); 295 fs::create_dir_all(cache_dir) 296 .map_err(|err| { 297 warn!( 298 "Failed to create cache directory, path: {}, message: {}", 299 cache_dir.display(), 300 err 301 ) 302 }) 303 .ok()?; 304 305 match fs_write_atomic(&mod_cache_path, "mod", &compressed_data) { 306 Ok(_) => Some(()), 307 Err(err) => { 308 warn!( 309 "Failed to write file with rename, target path: {}, err: {}", 310 mod_cache_path.display(), 311 err 312 ); 313 None 314 } 315 } 316 } 317 } 318 319 impl Hasher for Sha256Hasher { 320 fn finish(&self) -> u64 { 321 panic!("Sha256Hasher doesn't support finish!"); 322 } 323 324 fn write(&mut self, bytes: &[u8]) { 325 self.0.update(bytes); 326 } 327 } 328 329 // Assumption: path inside cache directory. 330 // Then, we don't have to use sound OS-specific exclusive file access. 331 // Note: there's no need to remove temporary file here - cleanup task will do it later. 332 fn fs_write_atomic(path: &Path, reason: &str, contents: &[u8]) -> io::Result<()> { 333 let lock_path = path.with_extension(format!("wip-atomic-write-{reason}")); 334 fs::OpenOptions::new() 335 .create_new(true) // atomic file creation (assumption: no one will open it without this flag) 336 .write(true) 337 .open(&lock_path) 338 .and_then(|mut file| file.write_all(contents)) 339 // file should go out of scope and be closed at this point 340 .and_then(|()| fs::rename(&lock_path, &path)) // atomic file rename 341 } 342 343 #[cfg(test)] 344 mod tests; 345