1 //! Implements a memory pool using a single allocated memory slab.
2 //!
3 //! The pooling instance allocator maps one large slab of memory in advance and
4 //! allocates WebAssembly memories from this slab--a [`MemoryPool`]. Each
5 //! WebAssembly memory is allocated in its own slot (see uses of `index` and
6 //! [`SlotId`] in this module):
7 //!
8 //! ```text
9 //! ┌──────┬──────┬──────┬──────┬──────┐
10 //! │Slot 0│Slot 1│Slot 2│Slot 3│......│
11 //! └──────┴──────┴──────┴──────┴──────┘
12 //! ```
13 //!
14 //! Diving deeper, we note that a [`MemoryPool`] protects Wasmtime from
15 //! out-of-bounds memory accesses by inserting inaccessible guard regions
16 //! between memory slots. These guard regions are configured to raise a signal
17 //! if they are accessed--a WebAssembly out-of-bounds (OOB) memory access. The
18 //! [`MemoryPool`] documentation has a more detailed chart but one can think of
19 //! memory slots being laid out like the following:
20 //!
21 //! ```text
22 //! ┌─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┐
23 //! │Guard│Mem 0│Guard│Mem 1│Guard│Mem 2│.....│Guard│
24 //! └─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┘
25 //! ```
26 //!
27 //! But we can be more efficient about guard regions: with memory protection
28 //! keys (MPK) enabled, the interleaved guard regions can be smaller. If we
29 //! surround a memory with memories from other instances and each instance is
30 //! protected by different protection keys, the guard region can be smaller AND
31 //! the pool will still raise a signal on an OOB access. This complicates how we
32 //! lay out memory slots: we must store memories from the same instance in the
33 //! same "stripe". Each stripe is protected by a different protection key.
34 //!
35 //! This concept, dubbed [ColorGuard] in the original paper, relies on careful
36 //! calculation of the memory sizes to prevent any "overlapping access" (see
37 //! [`calculate`]): there are limited protection keys available (15) so the next
38 //! memory using the same key must be at least as far away as the guard region
39 //! we would insert otherwise. This ends up looking like the following, where a
40 //! store for instance 0 (`I0`) "stripes" two memories (`M0` and `M1`) with the
41 //! same protection key 1 and far enough apart to signal an OOB access:
42 //!
43 //! ```text
44 //! ┌─────┬─────┬─────┬─────┬────────────────┬─────┬─────┬─────┐
45 //! │.....│I0:M1│.....│.....│.<enough slots>.│I0:M2│.....│.....│
46 //! ├─────┼─────┼─────┼─────┼────────────────┼─────┼─────┼─────┤
47 //! │.....│key 1│key 2│key 3│..<more keys>...│key 1│key 2│.....│
48 //! └─────┴─────┴─────┴─────┴────────────────┴─────┴─────┴─────┘
49 //! ```
50 //!
51 //! [ColorGuard]: https://plas2022.github.io/files/pdf/SegueColorGuard.pdf
52
53 use super::{
54 MemoryAllocationIndex,
55 index_allocator::{MemoryInModule, ModuleAffinityIndexAllocator, SlotId},
56 };
57 use crate::prelude::*;
58 use crate::runtime::vm::{
59 CompiledModuleId, InstanceAllocationRequest, InstanceLimits, Memory, MemoryBase,
60 MemoryImageSlot, Mmap, MmapOffset, PoolingInstanceAllocatorConfig, mmap::AlignedLength,
61 };
62 use crate::{
63 Enabled,
64 runtime::vm::mpk::{self, ProtectionKey, ProtectionMask},
65 vm::HostAlignedByteCount,
66 };
67 use std::mem;
68 use std::sync::atomic::{AtomicUsize, Ordering};
69 use std::sync::{Arc, Mutex};
70 use wasmtime_environ::{DefinedMemoryIndex, Module, Tunables};
71
72 /// A set of allocator slots.
73 ///
74 /// The allocated slots can be split by striping them: e.g., with two stripe
75 /// colors 0 and 1, we would allocate all even slots using stripe 0 and all odd
76 /// slots using stripe 1.
77 ///
78 /// This is helpful for the use of protection keys: (a) if a request comes to
79 /// allocate multiple instances, we can allocate them all from the same stripe
80 /// and (b) if a store wants to allocate more from the same stripe it can.
81 #[derive(Debug)]
82 struct Stripe {
83 allocator: ModuleAffinityIndexAllocator,
84 pkey: Option<ProtectionKey>,
85 }
86
87 /// Represents a pool of WebAssembly linear memories.
88 ///
89 /// A linear memory is divided into accessible pages and guard pages. A memory
90 /// pool contains linear memories: each memory occupies a slot in an
91 /// allocated slab (i.e., `mapping`):
92 ///
93 /// ```text
94 /// layout.max_memory_bytes layout.slot_bytes
95 /// | |
96 /// ◄─────┴────► ◄───────────┴──────────►
97 /// ┌───────────┬────────────┬───────────┐ ┌───────────┬───────────┬───────────┐
98 /// | PROT_NONE | | PROT_NONE | ... | | PROT_NONE | PROT_NONE |
99 /// └───────────┴────────────┴───────────┘ └───────────┴───────────┴───────────┘
100 /// | |◄──────────────────┬─────────────────────────────────► ◄────┬────►
101 /// | | | |
102 /// mapping | `layout.num_slots` memories layout.post_slab_guard_size
103 /// |
104 /// layout.pre_slab_guard_size
105 /// ```
106 #[derive(Debug)]
107 pub struct MemoryPool {
108 mapping: Arc<Mmap<AlignedLength>>,
109 /// This memory pool is stripe-aware. If using memory protection keys, this
110 /// will contain one stripe per available key; otherwise, a single stripe
111 /// with an empty key.
112 stripes: Vec<Stripe>,
113
114 /// If using a copy-on-write allocation scheme, the slot management. We
115 /// dynamically transfer ownership of a slot to a Memory when in use.
116 image_slots: Vec<Mutex<ImageSlot>>,
117
118 /// A description of the various memory sizes used in allocating the
119 /// `mapping` slab.
120 layout: SlabLayout,
121
122 /// The maximum number of memories that a single core module instance may
123 /// use.
124 ///
125 /// NB: this is needed for validation but does not affect the pool's size.
126 memories_per_instance: usize,
127
128 /// How much linear memory, in bytes, to keep resident after resetting for
129 /// use with the next instance. This much memory will be `memset` to zero
130 /// when a linear memory is deallocated.
131 ///
132 /// Memory exceeding this amount in the wasm linear memory will be released
133 /// with `madvise` back to the kernel.
134 ///
135 /// Only applicable on Linux.
136 pub(super) keep_resident: HostAlignedByteCount,
137
138 /// Keep track of protection keys handed out to initialized stores; this
139 /// allows us to round-robin the assignment of stores to stripes.
140 next_available_pkey: AtomicUsize,
141 }
142
143 /// The state of memory for each slot in this pool.
144 #[derive(Debug)]
145 enum ImageSlot {
146 /// This slot is guaranteed to be entirely unmapped.
147 ///
148 /// This is the initial state of all slots.
149 Unmapped,
150
151 /// The state of this slot is unknown.
152 ///
153 /// This encompasses a number of situations such as:
154 ///
155 /// * The slot is currently in use.
156 /// * The slot was attempted to be in use, but allocation failed.
157 /// * The slot was used but not deallocated properly.
158 ///
159 /// All of these situations are lumped into this one variant indicating
160 /// that, at a base level, no knowledge is known about this slot. Using a
161 /// slot in this state first requires resetting all memory in this slot by
162 /// mapping anonymous memory on top of the entire slot.
163 Unknown,
164
165 /// This slot was previously used and `MemoryImageSlot` maintains the state
166 /// about what this slot was last configured as.
167 ///
168 /// Future use of this slot will use `MemoryImageSlot` to continue to
169 /// re-instantiate and reuse images and such. This state is entered after
170 /// and allocated slot is successfully deallocated.
171 PreviouslyUsed(MemoryImageSlot),
172 }
173
174 impl MemoryPool {
175 /// Create a new `MemoryPool`.
new(config: &PoolingInstanceAllocatorConfig, tunables: &Tunables) -> Result<Self>176 pub fn new(config: &PoolingInstanceAllocatorConfig, tunables: &Tunables) -> Result<Self> {
177 if u64::try_from(config.limits.max_memory_size).unwrap() > tunables.memory_reservation {
178 bail!(
179 "maximum memory size of {:#x} bytes exceeds the configured \
180 memory reservation of {:#x} bytes",
181 config.limits.max_memory_size,
182 tunables.memory_reservation
183 );
184 }
185 let pkeys = match config.memory_protection_keys {
186 Enabled::Auto => {
187 if mpk::is_supported() {
188 mpk::keys(config.max_memory_protection_keys)
189 } else {
190 &[]
191 }
192 }
193 Enabled::Yes => {
194 if mpk::is_supported() {
195 mpk::keys(config.max_memory_protection_keys)
196 } else {
197 bail!("mpk is disabled on this system")
198 }
199 }
200 Enabled::No => &[],
201 };
202
203 // This is a tricky bit of global state: when creating a memory pool
204 // that uses memory protection keys, we ensure here that any host code
205 // will have access to all keys (i.e., stripes). It's only when we enter
206 // the WebAssembly guest code (see `StoreInner::call_hook`) that we
207 // enforce which keys/stripes can be accessed. Be forewarned about the
208 // assumptions here:
209 // - we expect this "allow all" configuration to reset the default
210 // process state (only allow key 0) _before_ any memories are accessed
211 // - and we expect no other code (e.g., host-side code) to modify this
212 // global MPK configuration
213 if !pkeys.is_empty() {
214 mpk::allow(ProtectionMask::all());
215 }
216
217 // Create a slab layout and allocate it as a completely inaccessible
218 // region to start--`PROT_NONE`.
219 let constraints = SlabConstraints::new(&config.limits, tunables, pkeys.len())?;
220 let layout = calculate(&constraints)?;
221 log::debug!(
222 "creating memory pool: {constraints:?} -> {layout:?} (total: {})",
223 layout.total_slab_bytes()?
224 );
225 let mut mapping =
226 Mmap::accessible_reserved(HostAlignedByteCount::ZERO, layout.total_slab_bytes()?)
227 .context("failed to create memory pool mapping")?;
228
229 // Then, stripe the memory with the available protection keys. This is
230 // unnecessary if there is only one stripe color.
231 if layout.num_stripes >= 2 {
232 let mut cursor = layout.pre_slab_guard_bytes;
233 let pkeys = &pkeys[..layout.num_stripes];
234 for i in 0..constraints.num_slots {
235 let pkey = &pkeys[i % pkeys.len()];
236 let region = unsafe {
237 mapping.slice_mut(
238 cursor.byte_count()..cursor.byte_count() + layout.slot_bytes.byte_count(),
239 )
240 };
241 pkey.protect(region)?;
242 cursor = cursor
243 .checked_add(layout.slot_bytes)
244 .context("cursor + slot_bytes overflows")?;
245 }
246 debug_assert_eq!(
247 cursor
248 .checked_add(layout.post_slab_guard_bytes)
249 .context("cursor + post_slab_guard_bytes overflows")?,
250 layout.total_slab_bytes()?
251 );
252 }
253
254 let image_slots: Vec<_> = std::iter::repeat_with(|| Mutex::new(ImageSlot::Unmapped))
255 .take(constraints.num_slots)
256 .collect();
257
258 let create_stripe = |i| {
259 let num_slots = constraints.num_slots / layout.num_stripes
260 + usize::from(constraints.num_slots % layout.num_stripes > i);
261 let allocator = ModuleAffinityIndexAllocator::new(
262 num_slots.try_into().unwrap(),
263 config.max_unused_warm_slots,
264 );
265 Stripe {
266 allocator,
267 pkey: pkeys.get(i).cloned(),
268 }
269 };
270
271 debug_assert!(layout.num_stripes > 0);
272 let stripes: Vec<_> = (0..layout.num_stripes).map(create_stripe).collect();
273
274 let pool = Self {
275 stripes,
276 mapping: Arc::new(mapping),
277 image_slots,
278 layout,
279 memories_per_instance: usize::try_from(config.limits.max_memories_per_module).unwrap(),
280 keep_resident: HostAlignedByteCount::new_rounded_up(
281 config.linear_memory_keep_resident,
282 )?,
283 next_available_pkey: AtomicUsize::new(0),
284 };
285
286 Ok(pool)
287 }
288
289 /// Return a protection key that stores can use for requesting new
next_available_pkey(&self) -> Option<ProtectionKey>290 pub fn next_available_pkey(&self) -> Option<ProtectionKey> {
291 let index = self.next_available_pkey.fetch_add(1, Ordering::SeqCst) % self.stripes.len();
292 debug_assert!(
293 self.stripes.len() < 2 || self.stripes[index].pkey.is_some(),
294 "if we are using stripes, we cannot have an empty protection key"
295 );
296 self.stripes[index].pkey
297 }
298
299 /// Validate whether this memory pool supports the given module.
validate_memories(&self, module: &Module) -> Result<()>300 pub fn validate_memories(&self, module: &Module) -> Result<()> {
301 let memories = module.num_defined_memories();
302 if memories > self.memories_per_instance {
303 bail!(
304 "defined memories count of {} exceeds the per-instance limit of {}",
305 memories,
306 self.memories_per_instance,
307 );
308 }
309
310 for (i, memory) in module.memories.iter().skip(module.num_imported_memories) {
311 self.validate_memory(memory).with_context(|| {
312 format!(
313 "memory index {} is unsupported in this pooling allocator configuration",
314 i.as_u32()
315 )
316 })?;
317 }
318 Ok(())
319 }
320
321 /// Validate one memory for this pool.
validate_memory(&self, memory: &wasmtime_environ::Memory) -> Result<()>322 pub fn validate_memory(&self, memory: &wasmtime_environ::Memory) -> Result<()> {
323 let min = memory.minimum_byte_size().with_context(|| {
324 format!("memory has a minimum byte size that cannot be represented in a u64",)
325 })?;
326 if min > u64::try_from(self.layout.max_memory_bytes.byte_count()).unwrap() {
327 bail!(
328 "memory has a minimum byte size of {} which exceeds the limit of {} bytes",
329 min,
330 self.layout.max_memory_bytes,
331 );
332 }
333 if memory.shared {
334 // FIXME(#4244): since the pooling allocator owns the memory
335 // allocation (which is torn down with the instance), that
336 // can't be used with shared memory where threads or the host
337 // might persist the memory beyond the lifetime of the instance
338 // itself.
339 bail!("memory is shared which is not supported in the pooling allocator");
340 }
341 Ok(())
342 }
343
344 /// Are zero slots in use right now?
is_empty(&self) -> bool345 pub fn is_empty(&self) -> bool {
346 self.stripes.iter().all(|s| s.allocator.is_empty())
347 }
348
349 /// Allocate a single memory for the given instance allocation request.
allocate( &self, request: &mut InstanceAllocationRequest<'_, '_>, ty: &wasmtime_environ::Memory, memory_index: Option<DefinedMemoryIndex>, ) -> Result<(MemoryAllocationIndex, Memory)>350 pub async fn allocate(
351 &self,
352 request: &mut InstanceAllocationRequest<'_, '_>,
353 ty: &wasmtime_environ::Memory,
354 memory_index: Option<DefinedMemoryIndex>,
355 ) -> Result<(MemoryAllocationIndex, Memory)> {
356 let tunables = request.store.engine().tunables();
357 let stripe_index = if let Some(pkey) = request.store.get_pkey() {
358 pkey.as_stripe()
359 } else {
360 debug_assert!(self.stripes.len() < 2);
361 0
362 };
363
364 let striped_allocation_index = self.stripes[stripe_index]
365 .allocator
366 .alloc(memory_index.and_then(|mem_idx| {
367 request
368 .runtime_info
369 .unique_id()
370 .map(|id| MemoryInModule(id, mem_idx))
371 }))
372 .map(|slot| StripedAllocationIndex(u32::try_from(slot.index()).unwrap()))
373 .ok_or_else(|| {
374 super::PoolConcurrencyLimitError::new(
375 self.stripes[stripe_index].allocator.len(),
376 format!("memory stripe {stripe_index}"),
377 )
378 })?;
379 let mut guard = DeallocateIndexGuard {
380 pool: self,
381 stripe_index,
382 striped_allocation_index,
383 active: true,
384 };
385
386 let allocation_index =
387 striped_allocation_index.as_unstriped_slot_index(stripe_index, self.stripes.len());
388
389 // Double-check that the runtime requirements of the memory are
390 // satisfied by the configuration of this pooling allocator. This
391 // should be returned as an error through `validate_memory_plans`
392 // but double-check here to be sure.
393 assert!(
394 tunables.memory_reservation + tunables.memory_guard_size
395 <= u64::try_from(self.layout.bytes_to_next_stripe_slot().byte_count()).unwrap()
396 );
397
398 let base = self.get_base(allocation_index);
399 let base_capacity = self.layout.max_memory_bytes;
400
401 let mut slot = self.take_memory_image_slot(allocation_index)?;
402 let image = match memory_index {
403 Some(memory_index) => request.runtime_info.memory_image(memory_index)?,
404 None => None,
405 };
406 let initial_size = ty
407 .minimum_byte_size()
408 .expect("min size checked in validation");
409
410 // If instantiation fails, we can propagate the error
411 // upward and drop the slot. This will cause the Drop
412 // handler to attempt to map the range with PROT_NONE
413 // memory, to reserve the space while releasing any
414 // stale mappings. The next use of this slot will then
415 // create a new slot that will try to map over
416 // this, returning errors as well if the mapping
417 // errors persist. The unmap-on-drop is best effort;
418 // if it fails, then we can still soundly continue
419 // using the rest of the pool and allowing the rest of
420 // the process to continue, because we never perform a
421 // mmap that would leave an open space for someone
422 // else to come in and map something.
423 let initial_size = usize::try_from(initial_size).unwrap();
424 slot.instantiate(initial_size, image, ty, tunables)?;
425
426 let memory = Memory::new_static(
427 ty,
428 tunables,
429 MemoryBase::Mmap(base),
430 base_capacity.byte_count(),
431 slot,
432 request.limiter.as_deref_mut(),
433 )
434 .await?;
435 guard.active = false;
436 return Ok((allocation_index, memory));
437
438 struct DeallocateIndexGuard<'a> {
439 pool: &'a MemoryPool,
440 stripe_index: usize,
441 striped_allocation_index: StripedAllocationIndex,
442 active: bool,
443 }
444
445 impl Drop for DeallocateIndexGuard<'_> {
446 fn drop(&mut self) {
447 if !self.active {
448 return;
449 }
450 self.pool.stripes[self.stripe_index]
451 .allocator
452 .free(SlotId(self.striped_allocation_index.0), 0);
453 }
454 }
455 }
456
457 /// Deallocate a previously-allocated memory.
458 ///
459 /// If `image` is `None` then the state of this memory's slot is left
460 /// unknown. Otherwise `image` is used to retain information about the state
461 /// of this slot.
462 ///
463 /// # Safety
464 ///
465 /// The memory must have been previously allocated from this pool and
466 /// assigned the given index, must currently be in an allocated state, and
467 /// must never be used again.
468 ///
469 /// The caller must have already called `clear_and_remain_ready` on the
470 /// memory's image and flushed any enqueued decommits for this memory. Note
471 /// that if `image` is `None` then this is not required.
deallocate( &self, allocation_index: MemoryAllocationIndex, image: Option<MemoryImageSlot>, bytes_resident: usize, )472 pub unsafe fn deallocate(
473 &self,
474 allocation_index: MemoryAllocationIndex,
475 image: Option<MemoryImageSlot>,
476 bytes_resident: usize,
477 ) {
478 self.return_memory_image_slot(allocation_index, image);
479
480 let (stripe_index, striped_allocation_index) =
481 StripedAllocationIndex::from_unstriped_slot_index(allocation_index, self.stripes.len());
482 self.stripes[stripe_index]
483 .allocator
484 .free(SlotId(striped_allocation_index.0), bytes_resident);
485 }
486
487 /// Purging everything related to `module`.
purge_module(&self, module: CompiledModuleId)488 pub fn purge_module(&self, module: CompiledModuleId) {
489 // This primarily means clearing out all of its memory images present in
490 // the virtual address space. Go through the index allocator for slots
491 // affine to `module` and reset them, freeing up the index when we're
492 // done.
493 //
494 // Note that this is only called when the specified `module` won't be
495 // allocated further (the module is being dropped) so this shouldn't hit
496 // any sort of infinite loop since this should be the final operation
497 // working with `module`.
498 //
499 // TODO: We are given a module id, but key affinity by pair of module id
500 // and defined memory index. We are missing any defined memory index or
501 // count of how many memories the module defines here. Therefore, we
502 // probe up to the maximum number of memories per instance. This is fine
503 // because that maximum is generally relatively small. If this method
504 // somehow ever gets hot because of unnecessary probing, we should
505 // either pass in the actual number of defined memories for the given
506 // module to this method, or keep a side table of all slots that are
507 // associated with a module (not just module and memory). The latter
508 // would require care to make sure that its maintenance wouldn't be too
509 // expensive for normal allocation/free operations.
510 for (stripe_index, stripe) in self.stripes.iter().enumerate() {
511 for i in 0..self.memories_per_instance {
512 use wasmtime_environ::EntityRef;
513 let memory_index = DefinedMemoryIndex::new(i);
514 while let Some(id) = stripe
515 .allocator
516 .alloc_affine_and_clear_affinity(module, memory_index)
517 {
518 // Attempt to acquire the `MemoryImageSlot` state for this
519 // slot, and then if we have that try to remove the image,
520 // and then if all that succeeds put the slot back in.
521 //
522 // If anything fails then the slot will be in an "unknown"
523 // state which means that on next use it'll be remapped with
524 // anonymous memory.
525 let index = StripedAllocationIndex(id.0)
526 .as_unstriped_slot_index(stripe_index, self.stripes.len());
527 if let Ok(mut slot) = self.take_memory_image_slot(index) {
528 if slot.remove_image().is_ok() {
529 self.return_memory_image_slot(index, Some(slot));
530 }
531 }
532
533 stripe.allocator.free(id, 0);
534 }
535 }
536 }
537 }
538
get_base(&self, allocation_index: MemoryAllocationIndex) -> MmapOffset539 fn get_base(&self, allocation_index: MemoryAllocationIndex) -> MmapOffset {
540 assert!(allocation_index.index() < self.layout.num_slots);
541 let offset = self
542 .layout
543 .slot_bytes
544 .checked_mul(allocation_index.index())
545 .and_then(|c| c.checked_add(self.layout.pre_slab_guard_bytes))
546 .expect("slot_bytes * index + pre_slab_guard_bytes overflows");
547 self.mapping.offset(offset).expect("offset is in bounds")
548 }
549
550 /// Take ownership of the given image slot.
551 ///
552 /// This method is used when a `MemoryAllocationIndex` has been allocated
553 /// and the state of the slot needs to be acquired. This will lazily
554 /// allocate a `MemoryImageSlot` which describes the current (and possibly
555 /// prior) state of the slot.
556 ///
557 /// During deallocation this structure is passed back to
558 /// `return_memory_image_slot`.
559 ///
560 /// Note that this is a fallible method because using a slot might require
561 /// resetting the memory that was previously there. This reset operation
562 /// is a fallible operation that may not succeed. If it fails then this
563 /// slot cannot be used at this time.
take_memory_image_slot( &self, allocation_index: MemoryAllocationIndex, ) -> Result<MemoryImageSlot>564 fn take_memory_image_slot(
565 &self,
566 allocation_index: MemoryAllocationIndex,
567 ) -> Result<MemoryImageSlot> {
568 let (maybe_slot, needs_reset) = {
569 let mut slot = self.image_slots[allocation_index.index()].lock().unwrap();
570 match mem::replace(&mut *slot, ImageSlot::Unknown) {
571 ImageSlot::Unmapped => (None, false),
572 ImageSlot::Unknown => (None, true),
573 ImageSlot::PreviouslyUsed(state) => (Some(state), false),
574 }
575 };
576 let mut slot = maybe_slot.unwrap_or_else(|| {
577 MemoryImageSlot::create(
578 self.get_base(allocation_index),
579 HostAlignedByteCount::ZERO,
580 self.layout.max_memory_bytes.byte_count(),
581 )
582 });
583
584 // For `Unknown` slots it means that `slot` is brand new and isn't
585 // actually tracking the state of the previous slot, so reset it
586 // entirely with anonymous memory to wipe the slate clean and start
587 // from zero. This should only happen if allocation of the previous
588 // slot failed, for example.
589 if needs_reset {
590 slot.reset_with_anon_memory()?;
591 }
592 Ok(slot)
593 }
594
595 /// Return ownership of the given image slot.
596 ///
597 /// If `slot` is not provided then it's reset with `Unknown` meaning a
598 /// future allocation will need to pave over it to use it.
return_memory_image_slot( &self, allocation_index: MemoryAllocationIndex, slot: Option<MemoryImageSlot>, )599 fn return_memory_image_slot(
600 &self,
601 allocation_index: MemoryAllocationIndex,
602 slot: Option<MemoryImageSlot>,
603 ) {
604 let prev = mem::replace(
605 &mut *self.image_slots[allocation_index.index()].lock().unwrap(),
606 match slot {
607 Some(slot) => {
608 assert!(!slot.is_dirty());
609 ImageSlot::PreviouslyUsed(slot)
610 }
611 None => ImageSlot::Unknown,
612 },
613 );
614 assert!(matches!(prev, ImageSlot::Unknown));
615 }
616
unused_warm_slots(&self) -> u32617 pub fn unused_warm_slots(&self) -> u32 {
618 self.stripes
619 .iter()
620 .map(|i| i.allocator.unused_warm_slots())
621 .sum()
622 }
623
unused_bytes_resident(&self) -> usize624 pub fn unused_bytes_resident(&self) -> usize {
625 self.stripes
626 .iter()
627 .map(|i| i.allocator.unused_bytes_resident())
628 .sum()
629 }
630 }
631
632 /// The index of a memory allocation within an `InstanceAllocator`.
633 #[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord)]
634 pub struct StripedAllocationIndex(u32);
635
636 impl StripedAllocationIndex {
from_unstriped_slot_index( index: MemoryAllocationIndex, num_stripes: usize, ) -> (usize, Self)637 fn from_unstriped_slot_index(
638 index: MemoryAllocationIndex,
639 num_stripes: usize,
640 ) -> (usize, Self) {
641 let stripe_index = index.index() % num_stripes;
642 let num_stripes: u32 = num_stripes.try_into().unwrap();
643 let index_within_stripe = Self(index.0 / num_stripes);
644 (stripe_index, index_within_stripe)
645 }
646
as_unstriped_slot_index(self, stripe: usize, num_stripes: usize) -> MemoryAllocationIndex647 fn as_unstriped_slot_index(self, stripe: usize, num_stripes: usize) -> MemoryAllocationIndex {
648 let num_stripes: u32 = num_stripes.try_into().unwrap();
649 let stripe: u32 = stripe.try_into().unwrap();
650 MemoryAllocationIndex(self.0 * num_stripes + stripe)
651 }
652 }
653
654 #[derive(Clone, Debug)]
655 struct SlabConstraints {
656 /// Essentially, the `static_memory_bound`: this is an assumption that the
657 /// runtime and JIT compiler make about how much space will be guarded
658 /// between slots.
659 expected_slot_bytes: HostAlignedByteCount,
660 /// The maximum size of any memory in the pool. Always a non-zero multiple
661 /// of the page size.
662 max_memory_bytes: HostAlignedByteCount,
663 num_slots: usize,
664 num_pkeys_available: usize,
665 guard_bytes: HostAlignedByteCount,
666 guard_before_slots: bool,
667 }
668
669 impl SlabConstraints {
new( limits: &InstanceLimits, tunables: &Tunables, num_pkeys_available: usize, ) -> Result<Self>670 fn new(
671 limits: &InstanceLimits,
672 tunables: &Tunables,
673 num_pkeys_available: usize,
674 ) -> Result<Self> {
675 // `memory_reservation` is the configured number of bytes for a
676 // static memory slot (see `Config::memory_reservation`); even
677 // if the memory never grows to this size (e.g., it has a lower memory
678 // maximum), codegen will assume that this unused memory is mapped
679 // `PROT_NONE`. Typically `memory_reservation` is 4GiB which helps
680 // elide most bounds checks. `MemoryPool` must respect this bound,
681 // though not explicitly: if we can achieve the same effect via
682 // MPK-protected stripes, the slot size can be lower than the
683 // `memory_reservation`.
684 let expected_slot_bytes =
685 HostAlignedByteCount::new_rounded_up_u64(tunables.memory_reservation)
686 .context("memory reservation is too large")?;
687
688 // Page-align the maximum size of memory since that's the granularity that
689 // permissions are going to be controlled at.
690 let max_memory_bytes = HostAlignedByteCount::new_rounded_up(limits.max_memory_size)
691 .context("maximum size of memory is too large")?;
692
693 let guard_bytes = HostAlignedByteCount::new_rounded_up_u64(tunables.memory_guard_size)
694 .context("guard region is too large")?;
695
696 let num_slots = usize::try_from(limits.total_memories).context("too many memories")?;
697
698 let constraints = SlabConstraints {
699 max_memory_bytes,
700 num_slots,
701 expected_slot_bytes,
702 num_pkeys_available,
703 guard_bytes,
704 guard_before_slots: tunables.guard_before_linear_memory,
705 };
706 Ok(constraints)
707 }
708 }
709
710 #[derive(Debug)]
711 struct SlabLayout {
712 /// The total number of slots available in the memory pool slab.
713 num_slots: usize,
714 /// The size of each slot in the memory pool; this contains the maximum
715 /// memory size (i.e., from WebAssembly or Wasmtime configuration) plus any
716 /// guard region after the memory to catch OOB access. On these guard
717 /// regions, note that:
718 /// - users can configure how aggressively (or not) to elide bounds checks
719 /// via `Config::memory_guard_size` (see also:
720 /// `memory_and_guard_size`)
721 /// - memory protection keys can compress the size of the guard region by
722 /// placing slots from a different key (i.e., a stripe) in the guard
723 /// region; this means the slot itself can be smaller and we can allocate
724 /// more of them.
725 slot_bytes: HostAlignedByteCount,
726 /// The maximum size that can become accessible, in bytes, for each linear
727 /// memory. Guaranteed to be a whole number of Wasm pages.
728 max_memory_bytes: HostAlignedByteCount,
729 /// If necessary, the number of bytes to reserve as a guard region at the
730 /// beginning of the slab.
731 pre_slab_guard_bytes: HostAlignedByteCount,
732 /// Like `pre_slab_guard_bytes`, but at the end of the slab.
733 post_slab_guard_bytes: HostAlignedByteCount,
734 /// The number of stripes needed in the slab layout.
735 num_stripes: usize,
736 }
737
738 impl SlabLayout {
739 /// Return the total size of the slab, using the final layout (where `n =
740 /// num_slots`):
741 ///
742 /// ```text
743 /// ┌────────────────────┬──────┬──────┬───┬──────┬─────────────────────┐
744 /// │pre_slab_guard_bytes│slot 1│slot 2│...│slot n│post_slab_guard_bytes│
745 /// └────────────────────┴──────┴──────┴───┴──────┴─────────────────────┘
746 /// ```
total_slab_bytes(&self) -> Result<HostAlignedByteCount>747 fn total_slab_bytes(&self) -> Result<HostAlignedByteCount> {
748 self.slot_bytes
749 .checked_mul(self.num_slots)
750 .and_then(|c| c.checked_add(self.pre_slab_guard_bytes))
751 .and_then(|c| c.checked_add(self.post_slab_guard_bytes))
752 .context("total size of memory reservation exceeds addressable memory")
753 }
754
755 /// Returns the number of Wasm bytes from the beginning of one slot to the
756 /// next slot in the same stripe--this is the striped equivalent of
757 /// `static_memory_bound`. Recall that between slots of the same stripe we
758 /// will see a slot from every other stripe.
759 ///
760 /// For example, in a 3-stripe pool, this function measures the distance
761 /// from the beginning of slot 1 to slot 4, which are of the same stripe:
762 ///
763 /// ```text
764 /// ◄────────────────────►
765 /// ┌────────┬──────┬──────┬────────┬───┐
766 /// │*slot 1*│slot 2│slot 3│*slot 4*│...|
767 /// └────────┴──────┴──────┴────────┴───┘
768 /// ```
bytes_to_next_stripe_slot(&self) -> HostAlignedByteCount769 fn bytes_to_next_stripe_slot(&self) -> HostAlignedByteCount {
770 self.slot_bytes
771 .checked_mul(self.num_stripes)
772 .expect("constructor checks that self.slot_bytes * self.num_stripes is in bounds")
773 }
774 }
775
calculate(constraints: &SlabConstraints) -> Result<SlabLayout>776 fn calculate(constraints: &SlabConstraints) -> Result<SlabLayout> {
777 let SlabConstraints {
778 max_memory_bytes,
779 num_slots,
780 expected_slot_bytes,
781 num_pkeys_available,
782 guard_bytes,
783 guard_before_slots,
784 } = *constraints;
785
786 // If the user specifies a guard region, we always need to allocate a
787 // `PROT_NONE` region for it before any memory slots. Recall that we can
788 // avoid bounds checks for loads and stores with immediates up to
789 // `guard_bytes`, but we rely on Wasmtime to emit bounds checks for any
790 // accesses greater than this.
791 let pre_slab_guard_bytes = if guard_before_slots {
792 guard_bytes
793 } else {
794 HostAlignedByteCount::ZERO
795 };
796
797 // To calculate the slot size, we start with the default configured size and
798 // attempt to chip away at this via MPK protection. Note here how we begin
799 // to define a slot as "all of the memory and guard region."
800 let faulting_region_bytes = expected_slot_bytes
801 .max(max_memory_bytes)
802 .checked_add(guard_bytes)
803 .context("faulting region is too large")?;
804
805 let (num_stripes, slot_bytes) = if guard_bytes == 0 || max_memory_bytes == 0 || num_slots == 0 {
806 // In the uncommon case where the memory/guard regions are empty or we don't need any slots , we
807 // will not need any stripes: we just lay out the slots back-to-back
808 // using a single stripe.
809 (1, faulting_region_bytes.byte_count())
810 } else if num_pkeys_available < 2 {
811 // If we do not have enough protection keys to stripe the memory, we do
812 // the same. We can't elide any of the guard bytes because we aren't
813 // overlapping guard regions with other stripes...
814 (1, faulting_region_bytes.byte_count())
815 } else {
816 // ...but if we can create at least two stripes, we can use another
817 // stripe (i.e., a different pkey) as this slot's guard region--this
818 // reduces the guard bytes each slot has to allocate. We must make
819 // sure, though, that if the size of that other stripe(s) does not
820 // fully cover `guard_bytes`, we keep those around to prevent OOB
821 // access.
822
823 // We first calculate the number of stripes we need: we want to
824 // minimize this so that there is less chance of a single store
825 // running out of slots with its stripe--we need at least two,
826 // though. But this is not just an optimization; we need to handle
827 // the case when there are fewer slots than stripes. E.g., if our
828 // pool is configured with only three slots (`num_memory_slots =
829 // 3`), we will run into failures if we attempt to set up more than
830 // three stripes.
831 let needed_num_stripes = faulting_region_bytes
832 .checked_div(max_memory_bytes)
833 .expect("if condition above implies max_memory_bytes is non-zero")
834 + usize::from(
835 faulting_region_bytes
836 .checked_rem(max_memory_bytes)
837 .expect("if condition above implies max_memory_bytes is non-zero")
838 != 0,
839 );
840 assert!(needed_num_stripes > 0);
841 let num_stripes = num_pkeys_available.min(needed_num_stripes).min(num_slots);
842
843 // Next, we try to reduce the slot size by "overlapping" the stripes: we
844 // can make slot `n` smaller since we know that slot `n+1` and following
845 // are in different stripes and will look just like `PROT_NONE` memory.
846 // Recall that codegen expects a guarantee that at least
847 // `faulting_region_bytes` will catch OOB accesses via segfaults.
848 let needed_slot_bytes = faulting_region_bytes
849 .byte_count()
850 .checked_div(num_stripes)
851 .unwrap_or(faulting_region_bytes.byte_count())
852 .max(max_memory_bytes.byte_count());
853 assert!(needed_slot_bytes >= max_memory_bytes.byte_count());
854
855 (num_stripes, needed_slot_bytes)
856 };
857
858 // The page-aligned slot size; equivalent to `memory_and_guard_size`.
859 let slot_bytes =
860 HostAlignedByteCount::new_rounded_up(slot_bytes).context("slot size is too large")?;
861
862 // We may need another guard region (like `pre_slab_guard_bytes`) at the end
863 // of our slab to maintain our `faulting_region_bytes` guarantee. We could
864 // be conservative and just create it as large as `faulting_region_bytes`,
865 // but because we know that the last slot's `slot_bytes` make up the first
866 // part of that region, we reduce the final guard region by that much.
867 let post_slab_guard_bytes = faulting_region_bytes.saturating_sub(slot_bytes);
868
869 // Check that we haven't exceeded the slab we can calculate given the limits
870 // of `usize`.
871 let layout = SlabLayout {
872 num_slots,
873 slot_bytes,
874 max_memory_bytes,
875 pre_slab_guard_bytes,
876 post_slab_guard_bytes,
877 num_stripes,
878 };
879 match layout.total_slab_bytes() {
880 Ok(_) => Ok(layout),
881 Err(e) => Err(e),
882 }
883 }
884
885 #[cfg(test)]
886 mod tests {
887 use super::*;
888 use proptest::prelude::*;
889
890 const WASM_PAGE_SIZE: u32 = wasmtime_environ::Memory::DEFAULT_PAGE_SIZE;
891
892 #[cfg(target_pointer_width = "64")]
893 #[test]
test_memory_pool() -> Result<()>894 fn test_memory_pool() -> Result<()> {
895 let pool = MemoryPool::new(
896 &PoolingInstanceAllocatorConfig {
897 limits: InstanceLimits {
898 total_memories: 5,
899 max_tables_per_module: 0,
900 max_memories_per_module: 3,
901 table_elements: 0,
902 max_memory_size: WASM_PAGE_SIZE as usize,
903 ..Default::default()
904 },
905 ..Default::default()
906 },
907 &Tunables {
908 memory_reservation: WASM_PAGE_SIZE as u64,
909 memory_guard_size: 0,
910 ..Tunables::default_host()
911 },
912 )?;
913
914 assert_eq!(pool.layout.slot_bytes, WASM_PAGE_SIZE as usize);
915 assert_eq!(pool.layout.num_slots, 5);
916 assert_eq!(pool.layout.max_memory_bytes, WASM_PAGE_SIZE as usize);
917
918 let base = pool.mapping.as_ptr() as usize;
919
920 for i in 0..5 {
921 let index = MemoryAllocationIndex(i);
922 let ptr = pool.get_base(index).as_mut_ptr();
923 assert_eq!(
924 ptr as usize - base,
925 i as usize * pool.layout.slot_bytes.byte_count()
926 );
927 }
928
929 Ok(())
930 }
931
932 #[test]
933 #[cfg_attr(miri, ignore)]
test_pooling_allocator_striping()934 fn test_pooling_allocator_striping() {
935 if !mpk::is_supported() {
936 println!("skipping `test_pooling_allocator_striping` test; mpk is not supported");
937 return;
938 }
939
940 // Force the use of MPK.
941 let config = PoolingInstanceAllocatorConfig {
942 memory_protection_keys: Enabled::Yes,
943 ..PoolingInstanceAllocatorConfig::default()
944 };
945 let pool = MemoryPool::new(&config, &Tunables::default_host()).unwrap();
946 assert!(pool.stripes.len() >= 2);
947
948 let max_memory_slots = config.limits.total_memories;
949 dbg!(pool.stripes[0].allocator.num_empty_slots());
950 dbg!(pool.stripes[1].allocator.num_empty_slots());
951 let available_memory_slots: usize = pool
952 .stripes
953 .iter()
954 .map(|s| s.allocator.num_empty_slots())
955 .sum();
956 assert_eq!(
957 max_memory_slots,
958 u32::try_from(available_memory_slots).unwrap()
959 );
960 }
961
962 #[test]
check_known_layout_calculations()963 fn check_known_layout_calculations() {
964 for num_pkeys_available in 0..16 {
965 for num_memory_slots in [0, 1, 10, 64] {
966 for expected_slot_bytes in [0, 1 << 30 /* 1GB */, 4 << 30 /* 4GB */] {
967 let expected_slot_bytes =
968 HostAlignedByteCount::new(expected_slot_bytes).unwrap();
969 for max_memory_bytes in
970 [0, 1 * WASM_PAGE_SIZE as usize, 10 * WASM_PAGE_SIZE as usize]
971 {
972 // Note new rather than new_rounded_up here -- for now,
973 // WASM_PAGE_SIZE is 64KiB, which is a multiple of the
974 // host page size on all platforms.
975 let max_memory_bytes = HostAlignedByteCount::new(max_memory_bytes).unwrap();
976 for guard_bytes in [0, 2 << 30 /* 2GB */] {
977 let guard_bytes = HostAlignedByteCount::new(guard_bytes).unwrap();
978 for guard_before_slots in [true, false] {
979 let constraints = SlabConstraints {
980 max_memory_bytes,
981 num_slots: num_memory_slots,
982 expected_slot_bytes,
983 num_pkeys_available,
984 guard_bytes,
985 guard_before_slots,
986 };
987 match calculate(&constraints) {
988 Ok(layout) => {
989 assert_slab_layout_invariants(constraints, layout)
990 }
991 Err(e) => {
992 // Only allow failure on 32-bit
993 // platforms where the calculation
994 // exceeded the size of the address
995 // space
996 assert!(
997 cfg!(target_pointer_width = "32")
998 && e.to_string()
999 .contains("exceeds addressable memory"),
1000 "bad error: {e:?}"
1001 );
1002 }
1003 }
1004 }
1005 }
1006 }
1007 }
1008 }
1009 }
1010 }
1011
1012 proptest! {
1013 #[test]
1014 #[cfg_attr(miri, ignore)]
1015 fn check_random_layout_calculations(c in constraints()) {
1016 if let Ok(l) = calculate(&c) {
1017 assert_slab_layout_invariants(c, l);
1018 }
1019 }
1020 }
1021
constraints() -> impl Strategy<Value = SlabConstraints>1022 fn constraints() -> impl Strategy<Value = SlabConstraints> {
1023 (
1024 any::<HostAlignedByteCount>(),
1025 any::<usize>(),
1026 any::<HostAlignedByteCount>(),
1027 any::<usize>(),
1028 any::<HostAlignedByteCount>(),
1029 any::<bool>(),
1030 )
1031 .prop_map(
1032 |(
1033 max_memory_bytes,
1034 num_memory_slots,
1035 expected_slot_bytes,
1036 num_pkeys_available,
1037 guard_bytes,
1038 guard_before_slots,
1039 )| {
1040 SlabConstraints {
1041 max_memory_bytes,
1042 num_slots: num_memory_slots,
1043 expected_slot_bytes,
1044 num_pkeys_available,
1045 guard_bytes,
1046 guard_before_slots,
1047 }
1048 },
1049 )
1050 }
1051
assert_slab_layout_invariants(c: SlabConstraints, s: SlabLayout)1052 fn assert_slab_layout_invariants(c: SlabConstraints, s: SlabLayout) {
1053 // Check that all the sizes add up.
1054 assert_eq!(
1055 s.total_slab_bytes().unwrap(),
1056 s.pre_slab_guard_bytes
1057 .checked_add(s.slot_bytes.checked_mul(c.num_slots).unwrap())
1058 .and_then(|c| c.checked_add(s.post_slab_guard_bytes))
1059 .unwrap(),
1060 "the slab size does not add up: {c:?} => {s:?}"
1061 );
1062 assert!(
1063 s.slot_bytes >= s.max_memory_bytes,
1064 "slot is not big enough: {c:?} => {s:?}"
1065 );
1066
1067 // The HostAlignedByteCount newtype wrapper ensures that the various
1068 // byte values are page-aligned.
1069
1070 // Check that we use no more or less stripes than needed.
1071 assert!(s.num_stripes >= 1, "not enough stripes: {c:?} => {s:?}");
1072 if c.num_pkeys_available == 0 || c.num_slots == 0 {
1073 assert_eq!(
1074 s.num_stripes, 1,
1075 "expected at least one stripe: {c:?} => {s:?}"
1076 );
1077 } else {
1078 assert!(
1079 s.num_stripes <= c.num_pkeys_available,
1080 "layout has more stripes than available pkeys: {c:?} => {s:?}"
1081 );
1082 assert!(
1083 s.num_stripes <= c.num_slots,
1084 "layout has more stripes than memory slots: {c:?} => {s:?}"
1085 );
1086 }
1087
1088 // Check that we use the minimum number of stripes/protection keys.
1089 // - if the next MPK-protected slot is bigger or the same as the
1090 // required guard region, we only need two stripes
1091 // - if the next slot is smaller than the guard region, we only need
1092 // enough stripes to add up to at least that guard region size.
1093 if c.num_pkeys_available > 1 && !c.max_memory_bytes.is_zero() {
1094 assert!(
1095 s.num_stripes <= (c.guard_bytes.checked_div(c.max_memory_bytes).unwrap() + 2),
1096 "calculated more stripes than needed: {c:?} => {s:?}"
1097 );
1098 }
1099
1100 // Check that the memory-striping will not allow OOB access.
1101 // - we may have reduced the slot size from `expected_slot_bytes` to
1102 // `slot_bytes` assuming MPK striping; we check that our guaranteed
1103 // "faulting region" is respected
1104 // - the last slot won't have MPK striping after it; we check that the
1105 // `post_slab_guard_bytes` accounts for this
1106 assert!(
1107 s.bytes_to_next_stripe_slot()
1108 >= c.expected_slot_bytes
1109 .max(c.max_memory_bytes)
1110 .checked_add(c.guard_bytes)
1111 .unwrap(),
1112 "faulting region not large enough: {c:?} => {s:?}"
1113 );
1114 assert!(
1115 s.slot_bytes.checked_add(s.post_slab_guard_bytes).unwrap() >= c.expected_slot_bytes,
1116 "last slot may allow OOB access: {c:?} => {s:?}"
1117 );
1118 }
1119 }
1120