1 use anyhow::Result; 2 use base64::Engine; 3 use log::{debug, trace, warn}; 4 use serde::{Deserialize, Serialize}; 5 use sha2::{Digest, Sha256}; 6 use std::hash::Hash; 7 use std::hash::Hasher; 8 use std::io::Write; 9 use std::path::{Path, PathBuf}; 10 use std::sync::Arc; 11 use std::sync::atomic::{AtomicUsize, Ordering::SeqCst}; 12 use std::time::Duration; 13 use std::{fs, io}; 14 15 #[macro_use] // for tests 16 mod config; 17 mod worker; 18 19 pub use config::{CacheConfig, create_new_config}; 20 use worker::Worker; 21 22 /// Global configuration for how the cache is managed 23 #[derive(Debug, Clone)] 24 pub struct Cache { 25 config: CacheConfig, 26 worker: Worker, 27 state: Arc<CacheState>, 28 } 29 30 macro_rules! generate_config_setting_getter { 31 ($setting:ident: $setting_type:ty) => { 32 /// Returns `$setting`. 33 /// 34 /// Panics if the cache is disabled. 35 pub fn $setting(&self) -> $setting_type { 36 self.config.$setting() 37 } 38 }; 39 } 40 41 impl Cache { 42 /// Builds a [`Cache`] from the configuration and spawns the cache worker. 43 /// 44 /// If you want to load the cache configuration from a file, use [`CacheConfig::from_file`]. 45 /// You can call [`CacheConfig::new`] for the default configuration. 46 /// 47 /// # Errors 48 /// Returns an error if the configuration is invalid. 49 pub fn new(mut config: CacheConfig) -> Result<Self> { 50 config.validate()?; 51 Ok(Self { 52 worker: Worker::start_new(&config), 53 config, 54 state: Default::default(), 55 }) 56 } 57 58 /// Loads cache configuration specified at `path`. 59 /// 60 /// This method will read the file specified by `path` on the filesystem and 61 /// attempt to load cache configuration from it. This method can also fail 62 /// due to I/O errors, misconfiguration, syntax errors, etc. For expected 63 /// syntax in the configuration file see the [documentation online][docs]. 64 /// 65 /// Passing in `None` loads cache configuration from the system default path. 66 /// This is located, for example, on Unix at `$HOME/.config/wasmtime/config.toml` 67 /// and is typically created with the `wasmtime config new` command. 68 /// 69 /// # Errors 70 /// 71 /// This method can fail due to any error that happens when loading the file 72 /// pointed to by `path` and attempting to load the cache configuration. 73 /// 74 /// [docs]: https://bytecodealliance.github.io/wasmtime/cli-cache.html 75 pub fn from_file(path: Option<&Path>) -> Result<Self> { 76 let config = CacheConfig::from_file(path)?; 77 Self::new(config) 78 } 79 80 generate_config_setting_getter!(worker_event_queue_size: u64); 81 generate_config_setting_getter!(baseline_compression_level: i32); 82 generate_config_setting_getter!(optimized_compression_level: i32); 83 generate_config_setting_getter!(optimized_compression_usage_counter_threshold: u64); 84 generate_config_setting_getter!(cleanup_interval: Duration); 85 generate_config_setting_getter!(optimizing_compression_task_timeout: Duration); 86 generate_config_setting_getter!(allowed_clock_drift_for_files_from_future: Duration); 87 generate_config_setting_getter!(file_count_soft_limit: u64); 88 generate_config_setting_getter!(files_total_size_soft_limit: u64); 89 generate_config_setting_getter!(file_count_limit_percent_if_deleting: u8); 90 generate_config_setting_getter!(files_total_size_limit_percent_if_deleting: u8); 91 92 /// Returns path to the cache directory. 93 /// 94 /// Panics if the cache directory is not set. 95 pub fn directory(&self) -> &PathBuf { 96 &self.config.directory() 97 } 98 99 #[cfg(test)] 100 fn worker(&self) -> &Worker { 101 &self.worker 102 } 103 104 /// Returns the number of cache hits seen so far 105 pub fn cache_hits(&self) -> usize { 106 self.state.hits.load(SeqCst) 107 } 108 109 /// Returns the number of cache misses seen so far 110 pub fn cache_misses(&self) -> usize { 111 self.state.misses.load(SeqCst) 112 } 113 114 pub(crate) fn on_cache_get_async(&self, path: impl AsRef<Path>) { 115 self.state.hits.fetch_add(1, SeqCst); 116 self.worker.on_cache_get_async(path) 117 } 118 119 pub(crate) fn on_cache_update_async(&self, path: impl AsRef<Path>) { 120 self.state.misses.fetch_add(1, SeqCst); 121 self.worker.on_cache_update_async(path) 122 } 123 } 124 125 #[derive(Default, Debug)] 126 struct CacheState { 127 hits: AtomicUsize, 128 misses: AtomicUsize, 129 } 130 131 /// Module level cache entry. 132 pub struct ModuleCacheEntry<'cache>(Option<ModuleCacheEntryInner<'cache>>); 133 134 struct ModuleCacheEntryInner<'cache> { 135 root_path: PathBuf, 136 cache: &'cache Cache, 137 } 138 139 struct Sha256Hasher(Sha256); 140 141 impl<'cache> ModuleCacheEntry<'cache> { 142 /// Create the cache entry. 143 pub fn new(compiler_name: &str, cache: Option<&'cache Cache>) -> Self { 144 Self(cache.map(|cache| ModuleCacheEntryInner::new(compiler_name, cache))) 145 } 146 147 #[cfg(test)] 148 fn from_inner(inner: ModuleCacheEntryInner<'cache>) -> Self { 149 Self(Some(inner)) 150 } 151 152 /// Gets cached data if state matches, otherwise calls `compute`. 153 /// 154 /// Data is automatically serialized/deserialized with `bincode`. 155 pub fn get_data<T, U, E>(&self, state: T, compute: fn(&T) -> Result<U, E>) -> Result<U, E> 156 where 157 T: Hash, 158 U: Serialize + for<'a> Deserialize<'a>, 159 { 160 self.get_data_raw( 161 &state, 162 compute, 163 |_state, data| postcard::to_allocvec(data).ok(), 164 |_state, data| postcard::from_bytes(&data).ok(), 165 ) 166 } 167 168 /// Gets cached data if state matches, otherwise calls `compute`. 169 /// 170 /// If the cache is disabled or no cached data is found then `compute` is 171 /// called to calculate the data. If the data was found in cache it is 172 /// passed to `deserialize`, which if successful will be the returned value. 173 /// When computed the `serialize` function is used to generate the bytes 174 /// from the returned value. 175 pub fn get_data_raw<T, U, E>( 176 &self, 177 state: &T, 178 // NOTE: These are function pointers instead of closures so that they 179 // don't accidentally close over something not accounted in the cache. 180 compute: fn(&T) -> Result<U, E>, 181 serialize: fn(&T, &U) -> Option<Vec<u8>>, 182 deserialize: fn(&T, Vec<u8>) -> Option<U>, 183 ) -> Result<U, E> 184 where 185 T: Hash, 186 { 187 let inner = match &self.0 { 188 Some(inner) => inner, 189 None => return compute(state), 190 }; 191 192 let mut hasher = Sha256Hasher(Sha256::new()); 193 state.hash(&mut hasher); 194 let hash: [u8; 32] = hasher.0.finalize().into(); 195 // standard encoding uses '/' which can't be used for filename 196 let hash = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(&hash); 197 198 if let Some(cached_val) = inner.get_data(&hash) { 199 if let Some(val) = deserialize(state, cached_val) { 200 let mod_cache_path = inner.root_path.join(&hash); 201 inner.cache.on_cache_get_async(&mod_cache_path); // call on success 202 return Ok(val); 203 } 204 } 205 let val_to_cache = compute(state)?; 206 if let Some(bytes) = serialize(state, &val_to_cache) { 207 if inner.update_data(&hash, &bytes).is_some() { 208 let mod_cache_path = inner.root_path.join(&hash); 209 inner.cache.on_cache_update_async(&mod_cache_path); // call on success 210 } 211 } 212 Ok(val_to_cache) 213 } 214 } 215 216 impl<'cache> ModuleCacheEntryInner<'cache> { 217 fn new(compiler_name: &str, cache: &'cache Cache) -> Self { 218 // If debug assertions are enabled then assume that we're some sort of 219 // local build. We don't want local builds to stomp over caches between 220 // builds, so just use a separate cache directory based on the mtime of 221 // our executable, which should roughly correlate with "you changed the 222 // source code so you get a different directory". 223 // 224 // Otherwise if this is a release build we use the `GIT_REV` env var 225 // which is either the git rev if installed from git or the crate 226 // version if installed from crates.io. 227 let compiler_dir = if cfg!(debug_assertions) { 228 fn self_mtime() -> Option<String> { 229 let path = std::env::current_exe().ok()?; 230 let metadata = path.metadata().ok()?; 231 let mtime = metadata.modified().ok()?; 232 Some(match mtime.duration_since(std::time::UNIX_EPOCH) { 233 Ok(dur) => format!("{}", dur.as_millis()), 234 Err(err) => format!("m{}", err.duration().as_millis()), 235 }) 236 } 237 let self_mtime = self_mtime().unwrap_or("no-mtime".to_string()); 238 format!( 239 "{comp_name}-{comp_ver}-{comp_mtime}", 240 comp_name = compiler_name, 241 comp_ver = env!("GIT_REV"), 242 comp_mtime = self_mtime, 243 ) 244 } else { 245 format!( 246 "{comp_name}-{comp_ver}", 247 comp_name = compiler_name, 248 comp_ver = env!("GIT_REV"), 249 ) 250 }; 251 let root_path = cache.directory().join("modules").join(compiler_dir); 252 253 Self { root_path, cache } 254 } 255 256 fn get_data(&self, hash: &str) -> Option<Vec<u8>> { 257 let mod_cache_path = self.root_path.join(hash); 258 trace!("get_data() for path: {}", mod_cache_path.display()); 259 let compressed_cache_bytes = fs::read(&mod_cache_path).ok()?; 260 let cache_bytes = zstd::decode_all(&compressed_cache_bytes[..]) 261 .map_err(|err| warn!("Failed to decompress cached code: {}", err)) 262 .ok()?; 263 Some(cache_bytes) 264 } 265 266 fn update_data(&self, hash: &str, serialized_data: &[u8]) -> Option<()> { 267 let mod_cache_path = self.root_path.join(hash); 268 trace!("update_data() for path: {}", mod_cache_path.display()); 269 let compressed_data = zstd::encode_all( 270 &serialized_data[..], 271 self.cache.baseline_compression_level(), 272 ) 273 .map_err(|err| warn!("Failed to compress cached code: {}", err)) 274 .ok()?; 275 276 // Optimize syscalls: first, try writing to disk. It should succeed in most cases. 277 // Otherwise, try creating the cache directory and retry writing to the file. 278 if fs_write_atomic(&mod_cache_path, "mod", &compressed_data).is_ok() { 279 return Some(()); 280 } 281 282 debug!( 283 "Attempting to create the cache directory, because \ 284 failed to write cached code to disk, path: {}", 285 mod_cache_path.display(), 286 ); 287 288 let cache_dir = mod_cache_path.parent().unwrap(); 289 fs::create_dir_all(cache_dir) 290 .map_err(|err| { 291 warn!( 292 "Failed to create cache directory, path: {}, message: {}", 293 cache_dir.display(), 294 err 295 ) 296 }) 297 .ok()?; 298 299 match fs_write_atomic(&mod_cache_path, "mod", &compressed_data) { 300 Ok(_) => Some(()), 301 Err(err) => { 302 warn!( 303 "Failed to write file with rename, target path: {}, err: {}", 304 mod_cache_path.display(), 305 err 306 ); 307 None 308 } 309 } 310 } 311 } 312 313 impl Hasher for Sha256Hasher { 314 fn finish(&self) -> u64 { 315 panic!("Sha256Hasher doesn't support finish!"); 316 } 317 318 fn write(&mut self, bytes: &[u8]) { 319 self.0.update(bytes); 320 } 321 } 322 323 // Assumption: path inside cache directory. 324 // Then, we don't have to use sound OS-specific exclusive file access. 325 // Note: there's no need to remove temporary file here - cleanup task will do it later. 326 fn fs_write_atomic(path: &Path, reason: &str, contents: &[u8]) -> io::Result<()> { 327 let lock_path = path.with_extension(format!("wip-atomic-write-{reason}")); 328 fs::OpenOptions::new() 329 .create_new(true) // atomic file creation (assumption: no one will open it without this flag) 330 .write(true) 331 .open(&lock_path) 332 .and_then(|mut file| file.write_all(contents)) 333 // file should go out of scope and be closed at this point 334 .and_then(|()| fs::rename(&lock_path, &path)) // atomic file rename 335 } 336 337 #[cfg(test)] 338 mod tests; 339