1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/types.h>
30 #include <sys/proc.h>
31 #include <sys/proc_internal.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/dtrace_ptss.h>
35
36 #include <mach/vm_map.h>
37 #include <mach/vm_param.h>
38 #include <mach/mach_vm.h>
39
40 #include <kern/task.h>
41
42 #include <vm/vm_map.h>
43 #include <vm/vm_kern_xnu.h>
44
45 /*
46 * This function requires the sprlock to be held
47 *
48 * In general, it will not block. If it needs to allocate a new
49 * page of memory, the underlying kernel kalloc may block.
50 */
51 struct dtrace_ptss_page_entry*
dtrace_ptss_claim_entry_locked(struct proc * p)52 dtrace_ptss_claim_entry_locked(struct proc* p)
53 {
54 LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
55
56 struct dtrace_ptss_page_entry* entry = NULL;
57
58 while (TRUE) {
59 struct dtrace_ptss_page_entry* temp = p->p_dtrace_ptss_free_list;
60
61 if (temp == NULL) {
62 // Nothing on the free list. Allocate a new page, its okay if multiple threads race here.
63 struct dtrace_ptss_page* page = dtrace_ptss_allocate_page(p);
64
65 // Make sure we actually got a page
66 if (page == NULL) {
67 return NULL;
68 }
69
70 // Add the page to the page list
71 page->next = p->p_dtrace_ptss_pages;
72 p->p_dtrace_ptss_pages = page;
73
74 // CAS the entries onto the free list.
75 do {
76 page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE - 1].next = p->p_dtrace_ptss_free_list;
77 } while (!OSCompareAndSwapPtr((void *)page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE - 1].next,
78 (void *)&page->entries[0],
79 (void * volatile *)&p->p_dtrace_ptss_free_list));
80
81 // Now that we've added to the free list, try again.
82 continue;
83 }
84
85 // Claim temp
86 if (!OSCompareAndSwapPtr((void *)temp, (void *)temp->next, (void * volatile *)&p->p_dtrace_ptss_free_list)) {
87 continue;
88 }
89
90 // At this point, we own temp.
91 entry = temp;
92
93 break;
94 }
95
96 return entry;
97 }
98
99 /*
100 * This function does not require any locks to be held on entry.
101 */
102 struct dtrace_ptss_page_entry*
dtrace_ptss_claim_entry(struct proc * p)103 dtrace_ptss_claim_entry(struct proc* p)
104 {
105 // Verify no locks held on entry
106 LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED);
107 LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
108
109 struct dtrace_ptss_page_entry* entry = NULL;
110
111 while (TRUE) {
112 struct dtrace_ptss_page_entry* temp = p->p_dtrace_ptss_free_list;
113
114 if (temp == NULL) {
115 lck_mtx_lock(&p->p_dtrace_sprlock);
116 temp = dtrace_ptss_claim_entry_locked(p);
117 lck_mtx_unlock(&p->p_dtrace_sprlock);
118 return temp;
119 }
120
121 // Claim temp
122 if (!OSCompareAndSwapPtr((void *)temp, (void *)temp->next, (void * volatile *)&p->p_dtrace_ptss_free_list)) {
123 continue;
124 }
125
126 // At this point, we own temp.
127 entry = temp;
128
129 break;
130 }
131
132 return entry;
133 }
134
135 /*
136 * This function does not require any locks to be held on entry.
137 *
138 * (PR-11138709) A NULL p->p_dtrace_ptss_pages means the entry can
139 * no longer be referenced safely. When found in this state, the chore
140 * of releasing an entry to the free list is ignored.
141 */
142 void
dtrace_ptss_release_entry(struct proc * p,struct dtrace_ptss_page_entry * e)143 dtrace_ptss_release_entry(struct proc* p, struct dtrace_ptss_page_entry* e)
144 {
145 if (p && p->p_dtrace_ptss_pages && e) {
146 do {
147 e->next = p->p_dtrace_ptss_free_list;
148 } while (!OSCompareAndSwapPtr((void *)e->next, (void *)e, (void * volatile *)&p->p_dtrace_ptss_free_list));
149 }
150 }
151
152 /*
153 * This function allocates a new page in the target process's address space.
154 *
155 * It returns a dtrace_ptss_page that has its entries chained, with the last
156 * entries next field set to NULL. It does not add the page or the entries to
157 * the process's page/entry lists.
158 *
159 * This function does not require that any locks be held when it is invoked.
160 */
161 struct dtrace_ptss_page*
dtrace_ptss_allocate_page(struct proc * p)162 dtrace_ptss_allocate_page(struct proc* p)
163 {
164 // Allocate the kernel side data
165 struct dtrace_ptss_page* ptss_page = kalloc_type(struct dtrace_ptss_page, Z_ZERO | Z_WAITOK);
166 if (ptss_page == NULL) {
167 return NULL;
168 }
169
170 // Now allocate a page in user space and set its protections to allow execute.
171 task_t task = proc_task(p);
172 vm_map_t map = get_task_map_reference(task);
173 if (map == NULL) {
174 goto err;
175 }
176
177 mach_vm_size_t size = PAGE_MAX_SIZE;
178 mach_vm_offset_t addr = 0;
179 mach_vm_offset_t write_addr = 0;
180 /*
181 * The embedded OS has extra permissions for writable and executable pages.
182 * To ensure correct permissions, we must set the page protections separately.
183 */
184 vm_prot_t cur_protection = VM_PROT_READ | VM_PROT_EXECUTE;
185 vm_prot_t max_protection = VM_PROT_READ | VM_PROT_EXECUTE;
186 kern_return_t kr;
187
188 kr = mach_vm_map_kernel(map, &addr, size, 0,
189 VM_MAP_KERNEL_FLAGS_ANYWHERE(), IPC_PORT_NULL, 0, FALSE,
190 cur_protection, max_protection, VM_INHERIT_DEFAULT);
191 if (kr != KERN_SUCCESS) {
192 goto err;
193 }
194
195 /*
196 * To ensure the page is properly marked as user debug, temporarily change
197 * the permissions to rw and then back again to rx. The VM will keep track
198 * of this remapping and on fault will pass PMAP_OPTIONS_XNU_USER_DEBUG
199 * properly to the PMAP layer.
200 */
201 kr = mach_vm_protect(map, (mach_vm_offset_t)addr, (mach_vm_size_t)size, 0,
202 VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY);
203 if (kr != KERN_SUCCESS) {
204 goto err;
205 }
206
207 kr = mach_vm_protect(map, (mach_vm_offset_t)addr, (mach_vm_size_t)size, 0,
208 VM_PROT_READ | VM_PROT_EXECUTE);
209 if (kr != KERN_SUCCESS) {
210 goto err;
211 }
212
213 /*
214 * If on embedded, remap the scratch space as writable at another
215 * virtual address
216 */
217 kr = mach_vm_remap(map, &write_addr, size, 0,
218 VM_FLAGS_ANYWHERE, map, addr, FALSE,
219 &cur_protection, &max_protection, VM_INHERIT_DEFAULT);
220 if (kr != KERN_SUCCESS || !(max_protection & VM_PROT_WRITE)) {
221 goto err;
222 }
223
224 kr = mach_vm_protect(map, (mach_vm_offset_t)write_addr, (mach_vm_size_t)size, 0, VM_PROT_READ | VM_PROT_WRITE);
225 if (kr != KERN_SUCCESS) {
226 goto err;
227 }
228
229 // Chain the page entries.
230 int i;
231 for (i = 0; i < DTRACE_PTSS_ENTRIES_PER_PAGE; i++) {
232 ptss_page->entries[i].addr = addr + (i * DTRACE_PTSS_SCRATCH_SPACE_PER_THREAD);
233 ptss_page->entries[i].write_addr = write_addr + (i * DTRACE_PTSS_SCRATCH_SPACE_PER_THREAD);
234 ptss_page->entries[i].next = &ptss_page->entries[i + 1];
235 }
236
237 // The last entry should point to NULL
238 ptss_page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE - 1].next = NULL;
239
240 vm_map_deallocate(map);
241
242 return ptss_page;
243
244 err:
245 kfree_type(struct dtrace_ptss_page, ptss_page);
246
247 if (map) {
248 vm_map_deallocate(map);
249 }
250
251 return NULL;
252 }
253
254 /*
255 * This function frees an existing page in the target process's address space.
256 *
257 * It does not alter any of the process's page/entry lists.
258 *
259 * TODO: Inline in dtrace_ptrace_exec_exit?
260 */
261 void
dtrace_ptss_free_page(struct proc * p,struct dtrace_ptss_page * ptss_page)262 dtrace_ptss_free_page(struct proc* p, struct dtrace_ptss_page* ptss_page)
263 {
264 // Grab the task and get a reference to its vm_map
265 task_t task = proc_task(p);
266 vm_map_t map = get_task_map_reference(task);
267
268 mach_vm_address_t addr = ptss_page->entries[0].addr;
269 mach_vm_size_t size = PAGE_SIZE; // We need some way to assert that this matches vm_map_round_page() !!!
270
271 // Silent failures, no point in checking return code.
272 mach_vm_deallocate(map, addr, size);
273
274 mach_vm_address_t write_addr = ptss_page->entries[0].write_addr;
275 mach_vm_deallocate(map, write_addr, size);
276
277 vm_map_deallocate(map);
278 }
279
280 /*
281 * This function assumes that the target process has been
282 * suspended, and the proc_lock & sprlock is held
283 */
284 void
dtrace_ptss_enable(struct proc * p)285 dtrace_ptss_enable(struct proc* p)
286 {
287 LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
288 LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_OWNED);
289
290 struct uthread* uth;
291 /*
292 * XXX There has been a concern raised about holding the proc_lock
293 * while calling dtrace_ptss_claim_entry(), due to the fact
294 * that dtrace_ptss_claim_entry() can potentially malloc.
295 */
296 TAILQ_FOREACH(uth, &p->p_uthlist, uu_list) {
297 uth->t_dtrace_scratch = dtrace_ptss_claim_entry_locked(p);
298 }
299 }
300
301 /*
302 * This function is not thread safe.
303 *
304 * It assumes the sprlock is held, and the proc_lock is not.
305 */
306 void
dtrace_ptss_exec_exit(struct proc * p)307 dtrace_ptss_exec_exit(struct proc* p)
308 {
309 /*
310 * Should hold sprlock to touch the pages list. Must not
311 * hold the proc lock to avoid deadlock.
312 */
313 LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
314 LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
315
316 p->p_dtrace_ptss_free_list = NULL;
317
318 struct dtrace_ptss_page* temp = p->p_dtrace_ptss_pages;
319 p->p_dtrace_ptss_pages = NULL;
320
321 while (temp != NULL) {
322 struct dtrace_ptss_page* next = temp->next;
323
324 // Do we need to specifically mach_vm_deallocate the user pages?
325 // This can be called when the process is exiting, I believe the proc's
326 // vm_map_t may already be toast.
327
328 // Must be certain to free the kernel memory!
329 kfree_type(struct dtrace_ptss_page, temp);
330 temp = next;
331 }
332 }
333
334 /*
335 * This function is not thread safe.
336 *
337 * The child proc ptss fields are initialized to NULL at fork time.
338 * Pages allocated in the parent are copied as part of the vm_map copy, though.
339 * We need to deallocate those pages.
340 *
341 * Parent and child sprlock should be held, and proc_lock must NOT be held.
342 */
343 void
dtrace_ptss_fork(struct proc * parent,struct proc * child)344 dtrace_ptss_fork(struct proc* parent, struct proc* child)
345 {
346 // The child should not have any pages/entries allocated at this point.
347 // ASSERT(child->p_dtrace_ptss_pages == NULL);
348 // ASSERT(child->p_dtrace_ptss_free_list == NULL);
349
350 /*
351 * The parent's sprlock should be held, to protect its pages list
352 * from changing while the child references it. The child's sprlock
353 * must also be held, because we are modifying its pages list.
354 * Finally, to prevent a deadlock with the fasttrap cleanup code,
355 * neither the parent or child proc_lock should be held.
356 */
357 LCK_MTX_ASSERT(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
358 LCK_MTX_ASSERT(&parent->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
359 LCK_MTX_ASSERT(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
360 LCK_MTX_ASSERT(&child->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
361
362 // Get page list from *PARENT*
363 struct dtrace_ptss_page* temp = parent->p_dtrace_ptss_pages;
364
365 while (temp != NULL) {
366 // Freeing the page in the *CHILD*
367 dtrace_ptss_free_page(child, temp);
368
369 // Do not free the kernel memory, it belong to the parent.
370 temp = temp->next;
371 }
372 }
373