xref: /xnu-11215/osfmk/kern/affinity.c (revision 5c2921b0)
1 /*
2  * Copyright (c) 2007 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <kern/affinity.h>
30 #include <kern/task.h>
31 #include <kern/kalloc.h>
32 #include <machine/cpu_affinity.h>
33 
34 /*
35  * Affinity involves 2 objects:
36  * - affinity namespace:
37  *	shared by a task family, this controls affinity tag lookup and
38  *	allocation; it anchors all affinity sets in one namespace
39  * - affinity set:
40  *      anchors all threads with membership of this affinity set
41  *	and which share an affinity tag in the owning namespace.
42  *
43  * Locking:
44  * - The task lock protects the creation of an affinity namespace.
45  * - The affinity namespace mutex protects the inheritance of a namespace
46  *   and its thread membership. This includes its destruction when the task
47  *   reference count goes to zero.
48  * - The thread mutex protects a thread's affinity set membership, but in
49  *   addition, the thread_lock is taken to write thread->affinity_set since this
50  *   field (representng the active affinity set) is read by the scheduler.
51  *
52  * The lock ordering is: task lock, thread mutex, namespace mutex, thread lock.
53  */
54 
55 #if AFFINITY_DEBUG
56 #define DBG(x...)       kprintf("DBG: " x)
57 #else
58 #define DBG(x...)
59 #endif
60 
61 struct affinity_space {
62 	lck_mtx_t               aspc_lock;
63 	uint32_t                aspc_task_count;
64 	queue_head_t    aspc_affinities;
65 };
66 typedef struct affinity_space *affinity_space_t;
67 
68 static affinity_space_t affinity_space_alloc(void);
69 static void affinity_space_free(affinity_space_t aspc);
70 static affinity_set_t affinity_set_alloc(void);
71 static void affinity_set_free(affinity_set_t aset);
72 static affinity_set_t affinity_set_find(affinity_space_t aspc, uint32_t tag);
73 static void affinity_set_place(affinity_space_t aspc, affinity_set_t aset);
74 static void affinity_set_add(affinity_set_t aset, thread_t thread);
75 static affinity_set_t affinity_set_remove(affinity_set_t aset, thread_t thread);
76 
77 /*
78  * The following globals may be modified by the sysctls
79  *   kern.affinity_sets_enabled	- disables hinting if cleared
80  *   kern.affinity_sets_mapping	- controls cache distribution policy
81  * See bsd/kern_sysctl.c
82  *
83  * Affinity sets are not used on embedded, which typically only
84  * has a single pset, and last-processor affinity is
85  * more important than pset affinity.
86  */
87 #if !defined(XNU_TARGET_OS_OSX)
88 boolean_t       affinity_sets_enabled = FALSE;
89 int             affinity_sets_mapping = 0;
90 #else /* !defined(XNU_TARGET_OS_OSX) */
91 boolean_t       affinity_sets_enabled = TRUE;
92 int             affinity_sets_mapping = 1;
93 #endif /* !defined(XNU_TARGET_OS_OSX) */
94 
95 boolean_t
thread_affinity_is_supported(void)96 thread_affinity_is_supported(void)
97 {
98 	return ml_get_max_affinity_sets() != 0;
99 }
100 
101 
102 /*
103  * thread_affinity_get()
104  * Return the affinity tag for a thread.
105  * Called with the thread mutex held.
106  */
107 uint32_t
thread_affinity_get(thread_t thread)108 thread_affinity_get(thread_t thread)
109 {
110 	uint32_t tag;
111 
112 	if (thread->affinity_set != NULL) {
113 		tag = thread->affinity_set->aset_tag;
114 	} else {
115 		tag = THREAD_AFFINITY_TAG_NULL;
116 	}
117 
118 	return tag;
119 }
120 
121 
122 /*
123  * thread_affinity_set()
124  * Place a thread in an affinity set identified by a tag.
125  * Called with thread referenced but not locked.
126  */
127 kern_return_t
thread_affinity_set(thread_t thread,uint32_t tag)128 thread_affinity_set(thread_t thread, uint32_t tag)
129 {
130 	task_t                  task = get_threadtask(thread);
131 	affinity_set_t          aset;
132 	affinity_set_t          empty_aset = NULL;
133 	affinity_space_t        aspc;
134 	affinity_space_t        new_aspc = NULL;
135 
136 	DBG("thread_affinity_set(%p,%u)\n", thread, tag);
137 
138 	task_lock(task);
139 	aspc = task->affinity_space;
140 	if (aspc == NULL) {
141 		task_unlock(task);
142 		new_aspc = affinity_space_alloc();
143 		if (new_aspc == NULL) {
144 			return KERN_RESOURCE_SHORTAGE;
145 		}
146 		task_lock(task);
147 		if (task->affinity_space == NULL) {
148 			task->affinity_space = new_aspc;
149 			new_aspc = NULL;
150 		}
151 		aspc = task->affinity_space;
152 	}
153 	task_unlock(task);
154 	if (new_aspc) {
155 		affinity_space_free(new_aspc);
156 	}
157 
158 	thread_mtx_lock(thread);
159 	if (!thread->active) {
160 		/* Beaten to lock and the thread is dead */
161 		thread_mtx_unlock(thread);
162 		return KERN_TERMINATED;
163 	}
164 
165 	lck_mtx_lock(&aspc->aspc_lock);
166 	aset = thread->affinity_set;
167 	if (aset != NULL) {
168 		/*
169 		 * Remove thread from current affinity set
170 		 */
171 		DBG("thread_affinity_set(%p,%u) removing from aset %p\n",
172 		    thread, tag, aset);
173 		empty_aset = affinity_set_remove(aset, thread);
174 	}
175 
176 	if (tag != THREAD_AFFINITY_TAG_NULL) {
177 		aset = affinity_set_find(aspc, tag);
178 		if (aset != NULL) {
179 			/*
180 			 * Add thread to existing affinity set
181 			 */
182 			DBG("thread_affinity_set(%p,%u) found aset %p\n",
183 			    thread, tag, aset);
184 		} else {
185 			/*
186 			 * Use the new affinity set, add this thread
187 			 * and place it in a suitable processor set.
188 			 */
189 			if (empty_aset != NULL) {
190 				aset = empty_aset;
191 				empty_aset = NULL;
192 			} else {
193 				aset = affinity_set_alloc();
194 				if (aset == NULL) {
195 					lck_mtx_unlock(&aspc->aspc_lock);
196 					thread_mtx_unlock(thread);
197 					return KERN_RESOURCE_SHORTAGE;
198 				}
199 			}
200 			DBG("thread_affinity_set(%p,%u) (re-)using aset %p\n",
201 			    thread, tag, aset);
202 			aset->aset_tag = tag;
203 			affinity_set_place(aspc, aset);
204 		}
205 		affinity_set_add(aset, thread);
206 	}
207 
208 	lck_mtx_unlock(&aspc->aspc_lock);
209 	thread_mtx_unlock(thread);
210 
211 	/*
212 	 * If we wound up not using an empty aset we created,
213 	 * free it here.
214 	 */
215 	if (empty_aset != NULL) {
216 		affinity_set_free(empty_aset);
217 	}
218 
219 	if (thread == current_thread()) {
220 		thread_block(THREAD_CONTINUE_NULL);
221 	}
222 
223 	return KERN_SUCCESS;
224 }
225 
226 /*
227  * task_affinity_create()
228  * Called from task create.
229  */
230 void
task_affinity_create(task_t parent_task,task_t child_task)231 task_affinity_create(task_t parent_task, task_t child_task)
232 {
233 	affinity_space_t        aspc = parent_task->affinity_space;
234 
235 	DBG("task_affinity_create(%p,%p)\n", parent_task, child_task);
236 
237 	assert(aspc);
238 
239 	/*
240 	 * Bump the task reference count on the shared namespace and
241 	 * give it to the child.
242 	 */
243 	lck_mtx_lock(&aspc->aspc_lock);
244 	aspc->aspc_task_count++;
245 	child_task->affinity_space = aspc;
246 	lck_mtx_unlock(&aspc->aspc_lock);
247 }
248 
249 /*
250  * task_affinity_deallocate()
251  * Called from task_deallocate() when there's a namespace to dereference.
252  */
253 void
task_affinity_deallocate(task_t task)254 task_affinity_deallocate(task_t task)
255 {
256 	affinity_space_t        aspc = task->affinity_space;
257 
258 	DBG("task_affinity_deallocate(%p) aspc %p task_count %d\n",
259 	    task, aspc, aspc->aspc_task_count);
260 
261 	lck_mtx_lock(&aspc->aspc_lock);
262 	if (--(aspc->aspc_task_count) == 0) {
263 		assert(queue_empty(&aspc->aspc_affinities));
264 		lck_mtx_unlock(&aspc->aspc_lock);
265 		affinity_space_free(aspc);
266 	} else {
267 		lck_mtx_unlock(&aspc->aspc_lock);
268 	}
269 }
270 
271 /*
272  * task_affinity_info()
273  * Return affinity tag info (number, min, max) for the task.
274  *
275  * Conditions: task is locked.
276  */
277 kern_return_t
task_affinity_info(task_t task,task_info_t task_info_out,mach_msg_type_number_t * task_info_count)278 task_affinity_info(
279 	task_t                  task,
280 	task_info_t             task_info_out,
281 	mach_msg_type_number_t  *task_info_count)
282 {
283 	affinity_set_t                  aset;
284 	affinity_space_t                aspc;
285 	task_affinity_tag_info_t        info;
286 
287 	*task_info_count = TASK_AFFINITY_TAG_INFO_COUNT;
288 	info = (task_affinity_tag_info_t) task_info_out;
289 	info->set_count = 0;
290 	info->task_count = 0;
291 	info->min = THREAD_AFFINITY_TAG_NULL;
292 	info->max = THREAD_AFFINITY_TAG_NULL;
293 
294 	aspc = task->affinity_space;
295 	if (aspc) {
296 		lck_mtx_lock(&aspc->aspc_lock);
297 		queue_iterate(&aspc->aspc_affinities,
298 		    aset, affinity_set_t, aset_affinities) {
299 			info->set_count++;
300 			if (info->min == THREAD_AFFINITY_TAG_NULL ||
301 			    aset->aset_tag < (uint32_t) info->min) {
302 				info->min = aset->aset_tag;
303 			}
304 			if (info->max == THREAD_AFFINITY_TAG_NULL ||
305 			    aset->aset_tag > (uint32_t) info->max) {
306 				info->max = aset->aset_tag;
307 			}
308 		}
309 		info->task_count = aspc->aspc_task_count;
310 		lck_mtx_unlock(&aspc->aspc_lock);
311 	}
312 	return KERN_SUCCESS;
313 }
314 
315 /*
316  * Called from thread_dup() during fork() with child's mutex held.
317  * Set the child into the parent's affinity set.
318  * Note the affinity space is shared.
319  */
320 void
thread_affinity_dup(thread_t parent,thread_t child)321 thread_affinity_dup(thread_t parent, thread_t child)
322 {
323 	affinity_set_t                  aset;
324 	affinity_space_t                aspc;
325 
326 	thread_mtx_lock(parent);
327 	aset = parent->affinity_set;
328 	DBG("thread_affinity_dup(%p,%p) aset %p\n", parent, child, aset);
329 	if (aset == NULL) {
330 		thread_mtx_unlock(parent);
331 		return;
332 	}
333 
334 	aspc = aset->aset_space;
335 	assert(aspc == get_threadtask(parent)->affinity_space);
336 	assert(aspc == get_threadtask(child)->affinity_space);
337 
338 	lck_mtx_lock(&aspc->aspc_lock);
339 	affinity_set_add(aset, child);
340 	lck_mtx_unlock(&aspc->aspc_lock);
341 
342 	thread_mtx_unlock(parent);
343 }
344 
345 /*
346  * thread_affinity_terminate()
347  * Remove thread from any affinity set.
348  * Called with the thread mutex locked.
349  */
350 void
thread_affinity_terminate(thread_t thread)351 thread_affinity_terminate(thread_t thread)
352 {
353 	affinity_set_t          aset = thread->affinity_set;
354 	affinity_space_t        aspc;
355 
356 	DBG("thread_affinity_terminate(%p)\n", thread);
357 
358 	aspc = aset->aset_space;
359 	lck_mtx_lock(&aspc->aspc_lock);
360 	if (affinity_set_remove(aset, thread)) {
361 		affinity_set_free(aset);
362 	}
363 	lck_mtx_unlock(&aspc->aspc_lock);
364 }
365 
366 /*
367  * thread_affinity_exec()
368  * Called from execve() to cancel any current affinity - a new image implies
369  * the calling thread terminates any expressed or inherited affinity.
370  */
371 void
thread_affinity_exec(thread_t thread)372 thread_affinity_exec(thread_t thread)
373 {
374 	if (thread->affinity_set != AFFINITY_SET_NULL) {
375 		thread_affinity_terminate(thread);
376 	}
377 }
378 
379 /*
380  * Create an empty affinity namespace data structure.
381  */
382 static affinity_space_t
affinity_space_alloc(void)383 affinity_space_alloc(void)
384 {
385 	affinity_space_t        aspc;
386 
387 	aspc = kalloc_type(struct affinity_space, Z_WAITOK | Z_NOFAIL);
388 
389 	lck_mtx_init(&aspc->aspc_lock, &task_lck_grp, &task_lck_attr);
390 	queue_init(&aspc->aspc_affinities);
391 	aspc->aspc_task_count = 1;
392 
393 	DBG("affinity_space_create() returns %p\n", aspc);
394 	return aspc;
395 }
396 
397 /*
398  * Destroy the given empty affinity namespace data structure.
399  */
400 static void
affinity_space_free(affinity_space_t aspc)401 affinity_space_free(affinity_space_t aspc)
402 {
403 	assert(queue_empty(&aspc->aspc_affinities));
404 
405 	lck_mtx_destroy(&aspc->aspc_lock, &task_lck_grp);
406 	DBG("affinity_space_free(%p)\n", aspc);
407 	kfree_type(struct affinity_space, aspc);
408 }
409 
410 
411 /*
412  * Create an empty affinity set data structure
413  * entering it into a list anchored by the owning task.
414  */
415 static affinity_set_t
affinity_set_alloc(void)416 affinity_set_alloc(void)
417 {
418 	affinity_set_t  aset;
419 
420 	aset = kalloc_type(struct affinity_set, Z_WAITOK | Z_NOFAIL);
421 
422 	aset->aset_thread_count = 0;
423 	queue_init(&aset->aset_affinities);
424 	queue_init(&aset->aset_threads);
425 	aset->aset_num = 0;
426 	aset->aset_pset = PROCESSOR_SET_NULL;
427 	aset->aset_space = NULL;
428 
429 	DBG("affinity_set_create() returns %p\n", aset);
430 	return aset;
431 }
432 
433 /*
434  * Destroy the given empty affinity set data structure
435  * after removing it from the parent task.
436  */
437 static void
affinity_set_free(affinity_set_t aset)438 affinity_set_free(affinity_set_t aset)
439 {
440 	assert(queue_empty(&aset->aset_threads));
441 
442 	DBG("affinity_set_free(%p)\n", aset);
443 	kfree_type(struct affinity_set, aset);
444 }
445 
446 /*
447  * Add a thread to an affinity set.
448  * The caller must have the thread mutex and space locked.
449  */
450 static void
affinity_set_add(affinity_set_t aset,thread_t thread)451 affinity_set_add(affinity_set_t aset, thread_t thread)
452 {
453 	spl_t   s;
454 
455 	DBG("affinity_set_add(%p,%p)\n", aset, thread);
456 	queue_enter(&aset->aset_threads,
457 	    thread, thread_t, affinity_threads);
458 	aset->aset_thread_count++;
459 	s = splsched();
460 	thread_lock(thread);
461 	thread->affinity_set = aset;
462 	thread_unlock(thread);
463 	splx(s);
464 }
465 
466 /*
467  * Remove a thread from an affinity set returning the set if now empty.
468  * The caller must have the thread mutex and space locked.
469  */
470 static affinity_set_t
affinity_set_remove(affinity_set_t aset,thread_t thread)471 affinity_set_remove(affinity_set_t aset, thread_t thread)
472 {
473 	spl_t   s;
474 
475 	s = splsched();
476 	thread_lock(thread);
477 	thread->affinity_set = NULL;
478 	thread_unlock(thread);
479 	splx(s);
480 
481 	aset->aset_thread_count--;
482 	queue_remove(&aset->aset_threads,
483 	    thread, thread_t, affinity_threads);
484 	if (queue_empty(&aset->aset_threads)) {
485 		queue_remove(&aset->aset_space->aspc_affinities,
486 		    aset, affinity_set_t, aset_affinities);
487 		assert(aset->aset_thread_count == 0);
488 		aset->aset_tag = THREAD_AFFINITY_TAG_NULL;
489 		aset->aset_num = 0;
490 		aset->aset_pset = PROCESSOR_SET_NULL;
491 		aset->aset_space = NULL;
492 		DBG("affinity_set_remove(%p,%p) set now empty\n", aset, thread);
493 		return aset;
494 	} else {
495 		DBG("affinity_set_remove(%p,%p)\n", aset, thread);
496 		return NULL;
497 	}
498 }
499 
500 /*
501  * Find an affinity set in the parent task with the given affinity tag.
502  * The caller must have the space locked.
503  */
504 static affinity_set_t
affinity_set_find(affinity_space_t space,uint32_t tag)505 affinity_set_find(affinity_space_t space, uint32_t tag)
506 {
507 	affinity_set_t  aset;
508 
509 	queue_iterate(&space->aspc_affinities,
510 	    aset, affinity_set_t, aset_affinities) {
511 		if (aset->aset_tag == tag) {
512 			DBG("affinity_set_find(%p,%u) finds %p\n",
513 			    space, tag, aset);
514 			return aset;
515 		}
516 	}
517 	DBG("affinity_set_find(%p,%u) not found\n", space, tag);
518 	return NULL;
519 }
520 
521 /*
522  * affinity_set_place() assigns an affinity set to a suitable processor_set.
523  * The selection criteria is:
524  *  - the set currently occupied by the least number of affinities
525  *    belonging to the owning the task.
526  * The caller must have the space locked.
527  */
528 static void
affinity_set_place(affinity_space_t aspc,affinity_set_t new_aset)529 affinity_set_place(affinity_space_t aspc, affinity_set_t new_aset)
530 {
531 	unsigned short    set_occupancy[MAX_CPUS] = { 0 };
532 	unsigned    num_cpu_asets = ml_get_max_affinity_sets();
533 	unsigned    i_least_occupied;
534 	affinity_set_t  aset;
535 
536 	if (__improbable(num_cpu_asets > MAX_CPUS)) {
537 		// If this triggers then the array needs to be made bigger.
538 		panic("num_cpu_asets = %d > %d too big in %s", num_cpu_asets, MAX_CPUS, __FUNCTION__);
539 	}
540 
541 	/*
542 	 * Scan the affinity sets calculating the number of sets
543 	 * occupy the available physical affinities.
544 	 */
545 	queue_iterate(&aspc->aspc_affinities,
546 	    aset, affinity_set_t, aset_affinities) {
547 		if (aset->aset_num < num_cpu_asets) {
548 			set_occupancy[aset->aset_num]++;
549 		} else {
550 			panic("aset_num = %d in %s", aset->aset_num, __FUNCTION__);
551 		}
552 	}
553 
554 	/*
555 	 * Find the least occupied set (or the first empty set).
556 	 * To distribute placements somewhat, start searching from
557 	 * a cpu affinity chosen randomly per namespace:
558 	 *   [(unsigned int)aspc % 127] % num_cpu_asets
559 	 * unless this mapping policy is overridden.
560 	 */
561 	if (affinity_sets_mapping == 0) {
562 		i_least_occupied = 0;
563 	} else {
564 		i_least_occupied = (unsigned int)(((uintptr_t)aspc % 127) % num_cpu_asets);
565 	}
566 	for (unsigned i = 0; i < num_cpu_asets; i++) {
567 		unsigned int    j = (i_least_occupied + i) % num_cpu_asets;
568 		if (set_occupancy[j] == 0) {
569 			i_least_occupied = j;
570 			break;
571 		}
572 		if (set_occupancy[j] < set_occupancy[i_least_occupied]) {
573 			i_least_occupied = j;
574 		}
575 	}
576 	new_aset->aset_num = i_least_occupied;
577 	new_aset->aset_pset = ml_affinity_to_pset(i_least_occupied);
578 
579 	/* Add the new affinity set to the group */
580 	new_aset->aset_space = aspc;
581 	queue_enter(&aspc->aspc_affinities,
582 	    new_aset, affinity_set_t, aset_affinities);
583 
584 	DBG("affinity_set_place(%p,%p) selected affinity %u pset %p\n",
585 	    aspc, new_aset, new_aset->aset_num, new_aset->aset_pset);
586 }
587