1fc6e66f4SAlice Ryhl // SPDX-License-Identifier: GPL-2.0
2fc6e66f4SAlice Ryhl
3fc6e66f4SAlice Ryhl //! Kernel page allocation and management.
4fc6e66f4SAlice Ryhl
5fc6e66f4SAlice Ryhl use crate::{
6fc6e66f4SAlice Ryhl alloc::{AllocError, Flags},
7fc6e66f4SAlice Ryhl bindings,
8fc6e66f4SAlice Ryhl error::code::*,
9fc6e66f4SAlice Ryhl error::Result,
10fc6e66f4SAlice Ryhl uaccess::UserSliceReader,
11fc6e66f4SAlice Ryhl };
12fc6e66f4SAlice Ryhl use core::ptr::{self, NonNull};
13fc6e66f4SAlice Ryhl
14fc6e66f4SAlice Ryhl /// A bitwise shift for the page size.
15fc6e66f4SAlice Ryhl pub const PAGE_SHIFT: usize = bindings::PAGE_SHIFT as usize;
16fc6e66f4SAlice Ryhl
17fc6e66f4SAlice Ryhl /// The number of bytes in a page.
18fc6e66f4SAlice Ryhl pub const PAGE_SIZE: usize = bindings::PAGE_SIZE;
19fc6e66f4SAlice Ryhl
20fc6e66f4SAlice Ryhl /// A bitmask that gives the page containing a given address.
21fc6e66f4SAlice Ryhl pub const PAGE_MASK: usize = !(PAGE_SIZE - 1);
22fc6e66f4SAlice Ryhl
236e86292fSAlice Ryhl /// Round up the given number to the next multiple of [`PAGE_SIZE`].
246e86292fSAlice Ryhl ///
256e86292fSAlice Ryhl /// It is incorrect to pass an address where the next multiple of [`PAGE_SIZE`] doesn't fit in a
266e86292fSAlice Ryhl /// [`usize`].
page_align(addr: usize) -> usize276e86292fSAlice Ryhl pub const fn page_align(addr: usize) -> usize {
286e86292fSAlice Ryhl // Parentheses around `PAGE_SIZE - 1` to avoid triggering overflow sanitizers in the wrong
296e86292fSAlice Ryhl // cases.
306e86292fSAlice Ryhl (addr + (PAGE_SIZE - 1)) & PAGE_MASK
316e86292fSAlice Ryhl }
326e86292fSAlice Ryhl
33fc6e66f4SAlice Ryhl /// A pointer to a page that owns the page allocation.
34fc6e66f4SAlice Ryhl ///
35fc6e66f4SAlice Ryhl /// # Invariants
36fc6e66f4SAlice Ryhl ///
37fc6e66f4SAlice Ryhl /// The pointer is valid, and has ownership over the page.
38fc6e66f4SAlice Ryhl pub struct Page {
39fc6e66f4SAlice Ryhl page: NonNull<bindings::page>,
40fc6e66f4SAlice Ryhl }
41fc6e66f4SAlice Ryhl
42fc6e66f4SAlice Ryhl // SAFETY: Pages have no logic that relies on them staying on a given thread, so moving them across
43fc6e66f4SAlice Ryhl // threads is safe.
44fc6e66f4SAlice Ryhl unsafe impl Send for Page {}
45fc6e66f4SAlice Ryhl
46fc6e66f4SAlice Ryhl // SAFETY: Pages have no logic that relies on them not being accessed concurrently, so accessing
47fc6e66f4SAlice Ryhl // them concurrently is safe.
48fc6e66f4SAlice Ryhl unsafe impl Sync for Page {}
49fc6e66f4SAlice Ryhl
50fc6e66f4SAlice Ryhl impl Page {
51fc6e66f4SAlice Ryhl /// Allocates a new page.
52fc6e66f4SAlice Ryhl ///
53fc6e66f4SAlice Ryhl /// # Examples
54fc6e66f4SAlice Ryhl ///
55fc6e66f4SAlice Ryhl /// Allocate memory for a page.
56fc6e66f4SAlice Ryhl ///
57fc6e66f4SAlice Ryhl /// ```
58fc6e66f4SAlice Ryhl /// use kernel::page::Page;
59fc6e66f4SAlice Ryhl ///
60fc6e66f4SAlice Ryhl /// let page = Page::alloc_page(GFP_KERNEL)?;
61*57c1ccc7SDaniel Sedlak /// # Ok::<(), kernel::alloc::AllocError>(())
62fc6e66f4SAlice Ryhl /// ```
63fc6e66f4SAlice Ryhl ///
64fc6e66f4SAlice Ryhl /// Allocate memory for a page and zero its contents.
65fc6e66f4SAlice Ryhl ///
66fc6e66f4SAlice Ryhl /// ```
67fc6e66f4SAlice Ryhl /// use kernel::page::Page;
68fc6e66f4SAlice Ryhl ///
69fc6e66f4SAlice Ryhl /// let page = Page::alloc_page(GFP_KERNEL | __GFP_ZERO)?;
70*57c1ccc7SDaniel Sedlak /// # Ok::<(), kernel::alloc::AllocError>(())
71fc6e66f4SAlice Ryhl /// ```
alloc_page(flags: Flags) -> Result<Self, AllocError>72fc6e66f4SAlice Ryhl pub fn alloc_page(flags: Flags) -> Result<Self, AllocError> {
73fc6e66f4SAlice Ryhl // SAFETY: Depending on the value of `gfp_flags`, this call may sleep. Other than that, it
74fc6e66f4SAlice Ryhl // is always safe to call this method.
75fc6e66f4SAlice Ryhl let page = unsafe { bindings::alloc_pages(flags.as_raw(), 0) };
76fc6e66f4SAlice Ryhl let page = NonNull::new(page).ok_or(AllocError)?;
77fc6e66f4SAlice Ryhl // INVARIANT: We just successfully allocated a page, so we now have ownership of the newly
78fc6e66f4SAlice Ryhl // allocated page. We transfer that ownership to the new `Page` object.
79fc6e66f4SAlice Ryhl Ok(Self { page })
80fc6e66f4SAlice Ryhl }
81fc6e66f4SAlice Ryhl
82fc6e66f4SAlice Ryhl /// Returns a raw pointer to the page.
as_ptr(&self) -> *mut bindings::page83fc6e66f4SAlice Ryhl pub fn as_ptr(&self) -> *mut bindings::page {
84fc6e66f4SAlice Ryhl self.page.as_ptr()
85fc6e66f4SAlice Ryhl }
86fc6e66f4SAlice Ryhl
87fc6e66f4SAlice Ryhl /// Runs a piece of code with this page mapped to an address.
88fc6e66f4SAlice Ryhl ///
89fc6e66f4SAlice Ryhl /// The page is unmapped when this call returns.
90fc6e66f4SAlice Ryhl ///
91fc6e66f4SAlice Ryhl /// # Using the raw pointer
92fc6e66f4SAlice Ryhl ///
93fc6e66f4SAlice Ryhl /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for
94fc6e66f4SAlice Ryhl /// `PAGE_SIZE` bytes and for the duration in which the closure is called. The pointer might
95fc6e66f4SAlice Ryhl /// only be mapped on the current thread, and when that is the case, dereferencing it on other
96fc6e66f4SAlice Ryhl /// threads is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't
97fc6e66f4SAlice Ryhl /// cause data races, the memory may be uninitialized, and so on.
98fc6e66f4SAlice Ryhl ///
99fc6e66f4SAlice Ryhl /// If multiple threads map the same page at the same time, then they may reference with
100fc6e66f4SAlice Ryhl /// different addresses. However, even if the addresses are different, the underlying memory is
101fc6e66f4SAlice Ryhl /// still the same for these purposes (e.g., it's still a data race if they both write to the
102fc6e66f4SAlice Ryhl /// same underlying byte at the same time).
with_page_mapped<T>(&self, f: impl FnOnce(*mut u8) -> T) -> T103fc6e66f4SAlice Ryhl fn with_page_mapped<T>(&self, f: impl FnOnce(*mut u8) -> T) -> T {
104fc6e66f4SAlice Ryhl // SAFETY: `page` is valid due to the type invariants on `Page`.
105fc6e66f4SAlice Ryhl let mapped_addr = unsafe { bindings::kmap_local_page(self.as_ptr()) };
106fc6e66f4SAlice Ryhl
107fc6e66f4SAlice Ryhl let res = f(mapped_addr.cast());
108fc6e66f4SAlice Ryhl
109fc6e66f4SAlice Ryhl // This unmaps the page mapped above.
110fc6e66f4SAlice Ryhl //
111fc6e66f4SAlice Ryhl // SAFETY: Since this API takes the user code as a closure, it can only be used in a manner
112fc6e66f4SAlice Ryhl // where the pages are unmapped in reverse order. This is as required by `kunmap_local`.
113fc6e66f4SAlice Ryhl //
114fc6e66f4SAlice Ryhl // In other words, if this call to `kunmap_local` happens when a different page should be
115fc6e66f4SAlice Ryhl // unmapped first, then there must necessarily be a call to `kmap_local_page` other than the
116fc6e66f4SAlice Ryhl // call just above in `with_page_mapped` that made that possible. In this case, it is the
117fc6e66f4SAlice Ryhl // unsafe block that wraps that other call that is incorrect.
118fc6e66f4SAlice Ryhl unsafe { bindings::kunmap_local(mapped_addr) };
119fc6e66f4SAlice Ryhl
120fc6e66f4SAlice Ryhl res
121fc6e66f4SAlice Ryhl }
122fc6e66f4SAlice Ryhl
123fc6e66f4SAlice Ryhl /// Runs a piece of code with a raw pointer to a slice of this page, with bounds checking.
124fc6e66f4SAlice Ryhl ///
125fc6e66f4SAlice Ryhl /// If `f` is called, then it will be called with a pointer that points at `off` bytes into the
126fc6e66f4SAlice Ryhl /// page, and the pointer will be valid for at least `len` bytes. The pointer is only valid on
127fc6e66f4SAlice Ryhl /// this task, as this method uses a local mapping.
128fc6e66f4SAlice Ryhl ///
129fc6e66f4SAlice Ryhl /// If `off` and `len` refers to a region outside of this page, then this method returns
130fc6e66f4SAlice Ryhl /// [`EINVAL`] and does not call `f`.
131fc6e66f4SAlice Ryhl ///
132fc6e66f4SAlice Ryhl /// # Using the raw pointer
133fc6e66f4SAlice Ryhl ///
134fc6e66f4SAlice Ryhl /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for
135fc6e66f4SAlice Ryhl /// `len` bytes and for the duration in which the closure is called. The pointer might only be
136fc6e66f4SAlice Ryhl /// mapped on the current thread, and when that is the case, dereferencing it on other threads
137fc6e66f4SAlice Ryhl /// is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't cause
138fc6e66f4SAlice Ryhl /// data races, the memory may be uninitialized, and so on.
139fc6e66f4SAlice Ryhl ///
140fc6e66f4SAlice Ryhl /// If multiple threads map the same page at the same time, then they may reference with
141fc6e66f4SAlice Ryhl /// different addresses. However, even if the addresses are different, the underlying memory is
142fc6e66f4SAlice Ryhl /// still the same for these purposes (e.g., it's still a data race if they both write to the
143fc6e66f4SAlice Ryhl /// same underlying byte at the same time).
with_pointer_into_page<T>( &self, off: usize, len: usize, f: impl FnOnce(*mut u8) -> Result<T>, ) -> Result<T>144fc6e66f4SAlice Ryhl fn with_pointer_into_page<T>(
145fc6e66f4SAlice Ryhl &self,
146fc6e66f4SAlice Ryhl off: usize,
147fc6e66f4SAlice Ryhl len: usize,
148fc6e66f4SAlice Ryhl f: impl FnOnce(*mut u8) -> Result<T>,
149fc6e66f4SAlice Ryhl ) -> Result<T> {
150fc6e66f4SAlice Ryhl let bounds_ok = off <= PAGE_SIZE && len <= PAGE_SIZE && (off + len) <= PAGE_SIZE;
151fc6e66f4SAlice Ryhl
152fc6e66f4SAlice Ryhl if bounds_ok {
153fc6e66f4SAlice Ryhl self.with_page_mapped(move |page_addr| {
154fc6e66f4SAlice Ryhl // SAFETY: The `off` integer is at most `PAGE_SIZE`, so this pointer offset will
155fc6e66f4SAlice Ryhl // result in a pointer that is in bounds or one off the end of the page.
156fc6e66f4SAlice Ryhl f(unsafe { page_addr.add(off) })
157fc6e66f4SAlice Ryhl })
158fc6e66f4SAlice Ryhl } else {
159fc6e66f4SAlice Ryhl Err(EINVAL)
160fc6e66f4SAlice Ryhl }
161fc6e66f4SAlice Ryhl }
162fc6e66f4SAlice Ryhl
163fc6e66f4SAlice Ryhl /// Maps the page and reads from it into the given buffer.
164fc6e66f4SAlice Ryhl ///
165fc6e66f4SAlice Ryhl /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
166fc6e66f4SAlice Ryhl /// outside of the page, then this call returns [`EINVAL`].
167fc6e66f4SAlice Ryhl ///
168fc6e66f4SAlice Ryhl /// # Safety
169fc6e66f4SAlice Ryhl ///
170fc6e66f4SAlice Ryhl /// * Callers must ensure that `dst` is valid for writing `len` bytes.
171fc6e66f4SAlice Ryhl /// * Callers must ensure that this call does not race with a write to the same page that
172fc6e66f4SAlice Ryhl /// overlaps with this read.
read_raw(&self, dst: *mut u8, offset: usize, len: usize) -> Result173fc6e66f4SAlice Ryhl pub unsafe fn read_raw(&self, dst: *mut u8, offset: usize, len: usize) -> Result {
174fc6e66f4SAlice Ryhl self.with_pointer_into_page(offset, len, move |src| {
175fc6e66f4SAlice Ryhl // SAFETY: If `with_pointer_into_page` calls into this closure, then
176fc6e66f4SAlice Ryhl // it has performed a bounds check and guarantees that `src` is
177fc6e66f4SAlice Ryhl // valid for `len` bytes.
178fc6e66f4SAlice Ryhl //
179fc6e66f4SAlice Ryhl // There caller guarantees that there is no data race.
180fc6e66f4SAlice Ryhl unsafe { ptr::copy_nonoverlapping(src, dst, len) };
181fc6e66f4SAlice Ryhl Ok(())
182fc6e66f4SAlice Ryhl })
183fc6e66f4SAlice Ryhl }
184fc6e66f4SAlice Ryhl
185fc6e66f4SAlice Ryhl /// Maps the page and writes into it from the given buffer.
186fc6e66f4SAlice Ryhl ///
187fc6e66f4SAlice Ryhl /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
188fc6e66f4SAlice Ryhl /// outside of the page, then this call returns [`EINVAL`].
189fc6e66f4SAlice Ryhl ///
190fc6e66f4SAlice Ryhl /// # Safety
191fc6e66f4SAlice Ryhl ///
192fc6e66f4SAlice Ryhl /// * Callers must ensure that `src` is valid for reading `len` bytes.
193fc6e66f4SAlice Ryhl /// * Callers must ensure that this call does not race with a read or write to the same page
194fc6e66f4SAlice Ryhl /// that overlaps with this write.
write_raw(&self, src: *const u8, offset: usize, len: usize) -> Result195fc6e66f4SAlice Ryhl pub unsafe fn write_raw(&self, src: *const u8, offset: usize, len: usize) -> Result {
196fc6e66f4SAlice Ryhl self.with_pointer_into_page(offset, len, move |dst| {
197fc6e66f4SAlice Ryhl // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
198fc6e66f4SAlice Ryhl // bounds check and guarantees that `dst` is valid for `len` bytes.
199fc6e66f4SAlice Ryhl //
200fc6e66f4SAlice Ryhl // There caller guarantees that there is no data race.
201fc6e66f4SAlice Ryhl unsafe { ptr::copy_nonoverlapping(src, dst, len) };
202fc6e66f4SAlice Ryhl Ok(())
203fc6e66f4SAlice Ryhl })
204fc6e66f4SAlice Ryhl }
205fc6e66f4SAlice Ryhl
206fc6e66f4SAlice Ryhl /// Maps the page and zeroes the given slice.
207fc6e66f4SAlice Ryhl ///
208fc6e66f4SAlice Ryhl /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
209fc6e66f4SAlice Ryhl /// outside of the page, then this call returns [`EINVAL`].
210fc6e66f4SAlice Ryhl ///
211fc6e66f4SAlice Ryhl /// # Safety
212fc6e66f4SAlice Ryhl ///
213fc6e66f4SAlice Ryhl /// Callers must ensure that this call does not race with a read or write to the same page that
214fc6e66f4SAlice Ryhl /// overlaps with this write.
fill_zero_raw(&self, offset: usize, len: usize) -> Result215fc6e66f4SAlice Ryhl pub unsafe fn fill_zero_raw(&self, offset: usize, len: usize) -> Result {
216fc6e66f4SAlice Ryhl self.with_pointer_into_page(offset, len, move |dst| {
217fc6e66f4SAlice Ryhl // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
218fc6e66f4SAlice Ryhl // bounds check and guarantees that `dst` is valid for `len` bytes.
219fc6e66f4SAlice Ryhl //
220fc6e66f4SAlice Ryhl // There caller guarantees that there is no data race.
221fc6e66f4SAlice Ryhl unsafe { ptr::write_bytes(dst, 0u8, len) };
222fc6e66f4SAlice Ryhl Ok(())
223fc6e66f4SAlice Ryhl })
224fc6e66f4SAlice Ryhl }
225fc6e66f4SAlice Ryhl
226fc6e66f4SAlice Ryhl /// Copies data from userspace into this page.
227fc6e66f4SAlice Ryhl ///
228fc6e66f4SAlice Ryhl /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
229fc6e66f4SAlice Ryhl /// outside of the page, then this call returns [`EINVAL`].
230fc6e66f4SAlice Ryhl ///
231fc6e66f4SAlice Ryhl /// Like the other `UserSliceReader` methods, data races are allowed on the userspace address.
232fc6e66f4SAlice Ryhl /// However, they are not allowed on the page you are copying into.
233fc6e66f4SAlice Ryhl ///
234fc6e66f4SAlice Ryhl /// # Safety
235fc6e66f4SAlice Ryhl ///
236fc6e66f4SAlice Ryhl /// Callers must ensure that this call does not race with a read or write to the same page that
237fc6e66f4SAlice Ryhl /// overlaps with this write.
copy_from_user_slice_raw( &self, reader: &mut UserSliceReader, offset: usize, len: usize, ) -> Result238fc6e66f4SAlice Ryhl pub unsafe fn copy_from_user_slice_raw(
239fc6e66f4SAlice Ryhl &self,
240fc6e66f4SAlice Ryhl reader: &mut UserSliceReader,
241fc6e66f4SAlice Ryhl offset: usize,
242fc6e66f4SAlice Ryhl len: usize,
243fc6e66f4SAlice Ryhl ) -> Result {
244fc6e66f4SAlice Ryhl self.with_pointer_into_page(offset, len, move |dst| {
245fc6e66f4SAlice Ryhl // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
246fc6e66f4SAlice Ryhl // bounds check and guarantees that `dst` is valid for `len` bytes. Furthermore, we have
247fc6e66f4SAlice Ryhl // exclusive access to the slice since the caller guarantees that there are no races.
248fc6e66f4SAlice Ryhl reader.read_raw(unsafe { core::slice::from_raw_parts_mut(dst.cast(), len) })
249fc6e66f4SAlice Ryhl })
250fc6e66f4SAlice Ryhl }
251fc6e66f4SAlice Ryhl }
252fc6e66f4SAlice Ryhl
253fc6e66f4SAlice Ryhl impl Drop for Page {
drop(&mut self)254fc6e66f4SAlice Ryhl fn drop(&mut self) {
255fc6e66f4SAlice Ryhl // SAFETY: By the type invariants, we have ownership of the page and can free it.
256fc6e66f4SAlice Ryhl unsafe { bindings::__free_pages(self.page.as_ptr(), 0) };
257fc6e66f4SAlice Ryhl }
258fc6e66f4SAlice Ryhl }
259