xref: /linux-6.15/lib/test_objpool.c (revision 92f90d3b)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Test module for lockless object pool
5  *
6  * Copyright: [email protected]
7  */
8 
9 #include <linux/version.h>
10 #include <linux/errno.h>
11 #include <linux/module.h>
12 #include <linux/moduleparam.h>
13 #include <linux/sched.h>
14 #include <linux/cpumask.h>
15 #include <linux/completion.h>
16 #include <linux/kthread.h>
17 #include <linux/cpu.h>
18 #include <linux/cpuset.h>
19 #include <linux/slab.h>
20 #include <linux/vmalloc.h>
21 #include <linux/delay.h>
22 #include <linux/hrtimer.h>
23 #include <linux/interrupt.h>
24 #include <linux/objpool.h>
25 
26 #define OT_NR_MAX_BULK (16)
27 
28 /* memory usage */
29 struct ot_mem_stat {
30 	atomic_long_t alloc;
31 	atomic_long_t free;
32 };
33 
34 /* object allocation results */
35 struct ot_obj_stat {
36 	unsigned long nhits;
37 	unsigned long nmiss;
38 };
39 
40 /* control & results per testcase */
41 struct ot_data {
42 	struct rw_semaphore start;
43 	struct completion wait;
44 	struct completion rcu;
45 	atomic_t nthreads ____cacheline_aligned_in_smp;
46 	atomic_t stop ____cacheline_aligned_in_smp;
47 	struct ot_mem_stat kmalloc;
48 	struct ot_mem_stat vmalloc;
49 	struct ot_obj_stat objects;
50 	u64    duration;
51 };
52 
53 /* testcase */
54 struct ot_test {
55 	int async; /* synchronous or asynchronous */
56 	int mode; /* only mode 0 supported */
57 	int objsz; /* object size */
58 	int duration; /* ms */
59 	int delay; /* ms */
60 	int bulk_normal;
61 	int bulk_irq;
62 	unsigned long hrtimer; /* ms */
63 	const char *name;
64 	struct ot_data data;
65 };
66 
67 /* per-cpu worker */
68 struct ot_item {
69 	struct objpool_head *pool; /* pool head */
70 	struct ot_test *test; /* test parameters */
71 
72 	void (*worker)(struct ot_item *item, int irq);
73 
74 	/* hrtimer control */
75 	ktime_t hrtcycle;
76 	struct hrtimer hrtimer;
77 
78 	int bulk[2]; /* for thread and irq */
79 	int delay;
80 	u32 niters;
81 
82 	/* summary per thread */
83 	struct ot_obj_stat stat[2]; /* thread and irq */
84 	u64 duration;
85 };
86 
87 /*
88  * memory leakage checking
89  */
90 
91 static void *ot_kzalloc(struct ot_test *test, long size)
92 {
93 	void *ptr = kzalloc(size, GFP_KERNEL);
94 
95 	if (ptr)
96 		atomic_long_add(size, &test->data.kmalloc.alloc);
97 	return ptr;
98 }
99 
100 static void ot_kfree(struct ot_test *test, void *ptr, long size)
101 {
102 	if (!ptr)
103 		return;
104 	atomic_long_add(size, &test->data.kmalloc.free);
105 	kfree(ptr);
106 }
107 
108 static void ot_mem_report(struct ot_test *test)
109 {
110 	long alloc, free;
111 
112 	pr_info("memory allocation summary for %s\n", test->name);
113 
114 	alloc = atomic_long_read(&test->data.kmalloc.alloc);
115 	free = atomic_long_read(&test->data.kmalloc.free);
116 	pr_info("  kmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free);
117 
118 	alloc = atomic_long_read(&test->data.vmalloc.alloc);
119 	free = atomic_long_read(&test->data.vmalloc.free);
120 	pr_info("  vmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free);
121 }
122 
123 /* user object instance */
124 struct ot_node {
125 	void *owner;
126 	unsigned long data;
127 	unsigned long refs;
128 	unsigned long payload[32];
129 };
130 
131 /* user objpool manager */
132 struct ot_context {
133 	struct objpool_head pool; /* objpool head */
134 	struct ot_test *test; /* test parameters */
135 	void *ptr; /* user pool buffer */
136 	unsigned long size; /* buffer size */
137 	struct rcu_head rcu;
138 };
139 
140 static DEFINE_PER_CPU(struct ot_item, ot_pcup_items);
141 
142 static int ot_init_data(struct ot_data *data)
143 {
144 	memset(data, 0, sizeof(*data));
145 	init_rwsem(&data->start);
146 	init_completion(&data->wait);
147 	init_completion(&data->rcu);
148 	atomic_set(&data->nthreads, 1);
149 
150 	return 0;
151 }
152 
153 static int ot_init_node(void *nod, void *context)
154 {
155 	struct ot_context *sop = context;
156 	struct ot_node *on = nod;
157 
158 	on->owner = &sop->pool;
159 	return 0;
160 }
161 
162 static enum hrtimer_restart ot_hrtimer_handler(struct hrtimer *hrt)
163 {
164 	struct ot_item *item = container_of(hrt, struct ot_item, hrtimer);
165 	struct ot_test *test = item->test;
166 
167 	if (atomic_read_acquire(&test->data.stop))
168 		return HRTIMER_NORESTART;
169 
170 	/* do bulk-testings for objects pop/push */
171 	item->worker(item, 1);
172 
173 	hrtimer_forward(hrt, hrt->base->get_time(), item->hrtcycle);
174 	return HRTIMER_RESTART;
175 }
176 
177 static void ot_start_hrtimer(struct ot_item *item)
178 {
179 	if (!item->test->hrtimer)
180 		return;
181 	hrtimer_start(&item->hrtimer, item->hrtcycle, HRTIMER_MODE_REL);
182 }
183 
184 static void ot_stop_hrtimer(struct ot_item *item)
185 {
186 	if (!item->test->hrtimer)
187 		return;
188 	hrtimer_cancel(&item->hrtimer);
189 }
190 
191 static int ot_init_hrtimer(struct ot_item *item, unsigned long hrtimer)
192 {
193 	struct hrtimer *hrt = &item->hrtimer;
194 
195 	if (!hrtimer)
196 		return -ENOENT;
197 
198 	item->hrtcycle = ktime_set(0, hrtimer * 1000000UL);
199 	hrtimer_init(hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
200 	hrt->function = ot_hrtimer_handler;
201 	return 0;
202 }
203 
204 static int ot_init_cpu_item(struct ot_item *item,
205 			struct ot_test *test,
206 			struct objpool_head *pool,
207 			void (*worker)(struct ot_item *, int))
208 {
209 	memset(item, 0, sizeof(*item));
210 	item->pool = pool;
211 	item->test = test;
212 	item->worker = worker;
213 
214 	item->bulk[0] = test->bulk_normal;
215 	item->bulk[1] = test->bulk_irq;
216 	item->delay = test->delay;
217 
218 	/* initialize hrtimer */
219 	ot_init_hrtimer(item, item->test->hrtimer);
220 	return 0;
221 }
222 
223 static int ot_thread_worker(void *arg)
224 {
225 	struct ot_item *item = arg;
226 	struct ot_test *test = item->test;
227 	ktime_t start;
228 
229 	atomic_inc(&test->data.nthreads);
230 	down_read(&test->data.start);
231 	up_read(&test->data.start);
232 	start = ktime_get();
233 	ot_start_hrtimer(item);
234 	do {
235 		if (atomic_read_acquire(&test->data.stop))
236 			break;
237 		/* do bulk-testings for objects pop/push */
238 		item->worker(item, 0);
239 	} while (!kthread_should_stop());
240 	ot_stop_hrtimer(item);
241 	item->duration = (u64) ktime_us_delta(ktime_get(), start);
242 	if (atomic_dec_and_test(&test->data.nthreads))
243 		complete(&test->data.wait);
244 
245 	return 0;
246 }
247 
248 static void ot_perf_report(struct ot_test *test, u64 duration)
249 {
250 	struct ot_obj_stat total, normal = {0}, irq = {0};
251 	int cpu, nthreads = 0;
252 
253 	pr_info("\n");
254 	pr_info("Testing summary for %s\n", test->name);
255 
256 	for_each_possible_cpu(cpu) {
257 		struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
258 		if (!item->duration)
259 			continue;
260 		normal.nhits += item->stat[0].nhits;
261 		normal.nmiss += item->stat[0].nmiss;
262 		irq.nhits += item->stat[1].nhits;
263 		irq.nmiss += item->stat[1].nmiss;
264 		pr_info("CPU: %d  duration: %lluus\n", cpu, item->duration);
265 		pr_info("\tthread:\t%16lu hits \t%16lu miss\n",
266 			item->stat[0].nhits, item->stat[0].nmiss);
267 		pr_info("\tirq:   \t%16lu hits \t%16lu miss\n",
268 			item->stat[1].nhits, item->stat[1].nmiss);
269 		pr_info("\ttotal: \t%16lu hits \t%16lu miss\n",
270 			item->stat[0].nhits + item->stat[1].nhits,
271 			item->stat[0].nmiss + item->stat[1].nmiss);
272 		nthreads++;
273 	}
274 
275 	total.nhits = normal.nhits + irq.nhits;
276 	total.nmiss = normal.nmiss + irq.nmiss;
277 
278 	pr_info("ALL: \tnthreads: %d  duration: %lluus\n", nthreads, duration);
279 	pr_info("SUM: \t%16lu hits \t%16lu miss\n",
280 		total.nhits, total.nmiss);
281 
282 	test->data.objects = total;
283 	test->data.duration = duration;
284 }
285 
286 /*
287  * synchronous test cases for objpool manipulation
288  */
289 
290 /* objpool manipulation for synchronous mode (percpu objpool) */
291 static struct ot_context *ot_init_sync_m0(struct ot_test *test)
292 {
293 	struct ot_context *sop = NULL;
294 	int max = num_possible_cpus() << 3;
295 	gfp_t gfp = GFP_KERNEL;
296 
297 	sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop));
298 	if (!sop)
299 		return NULL;
300 	sop->test = test;
301 	if (test->objsz < 512)
302 		gfp = GFP_ATOMIC;
303 
304 	if (objpool_init(&sop->pool, max, test->objsz,
305 			 gfp, sop, ot_init_node, NULL)) {
306 		ot_kfree(test, sop, sizeof(*sop));
307 		return NULL;
308 	}
309 	WARN_ON(max != sop->pool.nr_objs);
310 
311 	return sop;
312 }
313 
314 static void ot_fini_sync(struct ot_context *sop)
315 {
316 	objpool_fini(&sop->pool);
317 	ot_kfree(sop->test, sop, sizeof(*sop));
318 }
319 
320 struct {
321 	struct ot_context * (*init)(struct ot_test *oc);
322 	void (*fini)(struct ot_context *sop);
323 } g_ot_sync_ops[] = {
324 	{.init = ot_init_sync_m0, .fini = ot_fini_sync},
325 };
326 
327 /*
328  * synchronous test cases: performance mode
329  */
330 
331 static void ot_bulk_sync(struct ot_item *item, int irq)
332 {
333 	struct ot_node *nods[OT_NR_MAX_BULK];
334 	int i;
335 
336 	for (i = 0; i < item->bulk[irq]; i++)
337 		nods[i] = objpool_pop(item->pool);
338 
339 	if (!irq && (item->delay || !(++(item->niters) & 0x7FFF)))
340 		msleep(item->delay);
341 
342 	while (i-- > 0) {
343 		struct ot_node *on = nods[i];
344 		if (on) {
345 			on->refs++;
346 			objpool_push(on, item->pool);
347 			item->stat[irq].nhits++;
348 		} else {
349 			item->stat[irq].nmiss++;
350 		}
351 	}
352 }
353 
354 static int ot_start_sync(struct ot_test *test)
355 {
356 	struct ot_context *sop;
357 	ktime_t start;
358 	u64 duration;
359 	unsigned long timeout;
360 	int cpu;
361 
362 	/* initialize objpool for syncrhonous testcase */
363 	sop = g_ot_sync_ops[test->mode].init(test);
364 	if (!sop)
365 		return -ENOMEM;
366 
367 	/* grab rwsem to block testing threads */
368 	down_write(&test->data.start);
369 
370 	for_each_possible_cpu(cpu) {
371 		struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
372 		struct task_struct *work;
373 
374 		ot_init_cpu_item(item, test, &sop->pool, ot_bulk_sync);
375 
376 		/* skip offline cpus */
377 		if (!cpu_online(cpu))
378 			continue;
379 
380 		work = kthread_create_on_node(ot_thread_worker, item,
381 				cpu_to_node(cpu), "ot_worker_%d", cpu);
382 		if (IS_ERR(work)) {
383 			pr_err("failed to create thread for cpu %d\n", cpu);
384 		} else {
385 			kthread_bind(work, cpu);
386 			wake_up_process(work);
387 		}
388 	}
389 
390 	/* wait a while to make sure all threads waiting at start line */
391 	msleep(20);
392 
393 	/* in case no threads were created: memory insufficient ? */
394 	if (atomic_dec_and_test(&test->data.nthreads))
395 		complete(&test->data.wait);
396 
397 	// sched_set_fifo_low(current);
398 
399 	/* start objpool testing threads */
400 	start = ktime_get();
401 	up_write(&test->data.start);
402 
403 	/* yeild cpu to worker threads for duration ms */
404 	timeout = msecs_to_jiffies(test->duration);
405 	schedule_timeout_interruptible(timeout);
406 
407 	/* tell workers threads to quit */
408 	atomic_set_release(&test->data.stop, 1);
409 
410 	/* wait all workers threads finish and quit */
411 	wait_for_completion(&test->data.wait);
412 	duration = (u64) ktime_us_delta(ktime_get(), start);
413 
414 	/* cleanup objpool */
415 	g_ot_sync_ops[test->mode].fini(sop);
416 
417 	/* report testing summary and performance results */
418 	ot_perf_report(test, duration);
419 
420 	/* report memory allocation summary */
421 	ot_mem_report(test);
422 
423 	return 0;
424 }
425 
426 /*
427  * asynchronous test cases: pool lifecycle controlled by refcount
428  */
429 
430 static void ot_fini_async_rcu(struct rcu_head *rcu)
431 {
432 	struct ot_context *sop = container_of(rcu, struct ot_context, rcu);
433 	struct ot_test *test = sop->test;
434 
435 	/* here all cpus are aware of the stop event: test->data.stop = 1 */
436 	WARN_ON(!atomic_read_acquire(&test->data.stop));
437 
438 	objpool_fini(&sop->pool);
439 	complete(&test->data.rcu);
440 }
441 
442 static void ot_fini_async(struct ot_context *sop)
443 {
444 	/* make sure the stop event is acknowledged by all cores */
445 	call_rcu(&sop->rcu, ot_fini_async_rcu);
446 }
447 
448 static int ot_objpool_release(struct objpool_head *head, void *context)
449 {
450 	struct ot_context *sop = context;
451 
452 	WARN_ON(!head || !sop || head != &sop->pool);
453 
454 	/* do context cleaning if needed */
455 	if (sop)
456 		ot_kfree(sop->test, sop, sizeof(*sop));
457 
458 	return 0;
459 }
460 
461 static struct ot_context *ot_init_async_m0(struct ot_test *test)
462 {
463 	struct ot_context *sop = NULL;
464 	int max = num_possible_cpus() << 3;
465 	gfp_t gfp = GFP_KERNEL;
466 
467 	sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop));
468 	if (!sop)
469 		return NULL;
470 	sop->test = test;
471 	if (test->objsz < 512)
472 		gfp = GFP_ATOMIC;
473 
474 	if (objpool_init(&sop->pool, max, test->objsz, gfp, sop,
475 			 ot_init_node, ot_objpool_release)) {
476 		ot_kfree(test, sop, sizeof(*sop));
477 		return NULL;
478 	}
479 	WARN_ON(max != sop->pool.nr_objs);
480 
481 	return sop;
482 }
483 
484 struct {
485 	struct ot_context * (*init)(struct ot_test *oc);
486 	void (*fini)(struct ot_context *sop);
487 } g_ot_async_ops[] = {
488 	{.init = ot_init_async_m0, .fini = ot_fini_async},
489 };
490 
491 static void ot_nod_recycle(struct ot_node *on, struct objpool_head *pool,
492 			int release)
493 {
494 	struct ot_context *sop;
495 
496 	on->refs++;
497 
498 	if (!release) {
499 		/* push object back to opjpool for reuse */
500 		objpool_push(on, pool);
501 		return;
502 	}
503 
504 	sop = container_of(pool, struct ot_context, pool);
505 	WARN_ON(sop != pool->context);
506 
507 	/* unref objpool with nod removed forever */
508 	objpool_drop(on, pool);
509 }
510 
511 static void ot_bulk_async(struct ot_item *item, int irq)
512 {
513 	struct ot_test *test = item->test;
514 	struct ot_node *nods[OT_NR_MAX_BULK];
515 	int i, stop;
516 
517 	for (i = 0; i < item->bulk[irq]; i++)
518 		nods[i] = objpool_pop(item->pool);
519 
520 	if (!irq) {
521 		if (item->delay || !(++(item->niters) & 0x7FFF))
522 			msleep(item->delay);
523 		get_cpu();
524 	}
525 
526 	stop = atomic_read_acquire(&test->data.stop);
527 
528 	/* drop all objects and deref objpool */
529 	while (i-- > 0) {
530 		struct ot_node *on = nods[i];
531 
532 		if (on) {
533 			on->refs++;
534 			ot_nod_recycle(on, item->pool, stop);
535 			item->stat[irq].nhits++;
536 		} else {
537 			item->stat[irq].nmiss++;
538 		}
539 	}
540 
541 	if (!irq)
542 		put_cpu();
543 }
544 
545 static int ot_start_async(struct ot_test *test)
546 {
547 	struct ot_context *sop;
548 	ktime_t start;
549 	u64 duration;
550 	unsigned long timeout;
551 	int cpu;
552 
553 	/* initialize objpool for syncrhonous testcase */
554 	sop = g_ot_async_ops[test->mode].init(test);
555 	if (!sop)
556 		return -ENOMEM;
557 
558 	/* grab rwsem to block testing threads */
559 	down_write(&test->data.start);
560 
561 	for_each_possible_cpu(cpu) {
562 		struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
563 		struct task_struct *work;
564 
565 		ot_init_cpu_item(item, test, &sop->pool, ot_bulk_async);
566 
567 		/* skip offline cpus */
568 		if (!cpu_online(cpu))
569 			continue;
570 
571 		work = kthread_create_on_node(ot_thread_worker, item,
572 				cpu_to_node(cpu), "ot_worker_%d", cpu);
573 		if (IS_ERR(work)) {
574 			pr_err("failed to create thread for cpu %d\n", cpu);
575 		} else {
576 			kthread_bind(work, cpu);
577 			wake_up_process(work);
578 		}
579 	}
580 
581 	/* wait a while to make sure all threads waiting at start line */
582 	msleep(20);
583 
584 	/* in case no threads were created: memory insufficient ? */
585 	if (atomic_dec_and_test(&test->data.nthreads))
586 		complete(&test->data.wait);
587 
588 	/* start objpool testing threads */
589 	start = ktime_get();
590 	up_write(&test->data.start);
591 
592 	/* yeild cpu to worker threads for duration ms */
593 	timeout = msecs_to_jiffies(test->duration);
594 	schedule_timeout_interruptible(timeout);
595 
596 	/* tell workers threads to quit */
597 	atomic_set_release(&test->data.stop, 1);
598 
599 	/* do async-finalization */
600 	g_ot_async_ops[test->mode].fini(sop);
601 
602 	/* wait all workers threads finish and quit */
603 	wait_for_completion(&test->data.wait);
604 	duration = (u64) ktime_us_delta(ktime_get(), start);
605 
606 	/* assure rcu callback is triggered */
607 	wait_for_completion(&test->data.rcu);
608 
609 	/*
610 	 * now we are sure that objpool is finalized either
611 	 * by rcu callback or by worker threads
612 	 */
613 
614 	/* report testing summary and performance results */
615 	ot_perf_report(test, duration);
616 
617 	/* report memory allocation summary */
618 	ot_mem_report(test);
619 
620 	return 0;
621 }
622 
623 /*
624  * predefined testing cases:
625  *   synchronous case / overrun case / async case
626  *
627  * async: synchronous or asynchronous testing
628  * mode: only mode 0 supported
629  * objsz: object size
630  * duration: int, total test time in ms
631  * delay: int, delay (in ms) between each iteration
632  * bulk_normal: int, repeat times for thread worker
633  * bulk_irq: int, repeat times for irq consumer
634  * hrtimer: unsigned long, hrtimer intervnal in ms
635  * name: char *, tag for current test ot_item
636  */
637 
638 #define NODE_COMPACT sizeof(struct ot_node)
639 #define NODE_VMALLOC (512)
640 
641 struct ot_test g_testcases[] = {
642 
643 	/* sync & normal */
644 	{0, 0, NODE_COMPACT, 1000, 0,  1,  0,  0, "sync: percpu objpool"},
645 	{0, 0, NODE_VMALLOC, 1000, 0,  1,  0,  0, "sync: percpu objpool from vmalloc"},
646 
647 	/* sync & hrtimer */
648 	{0, 0, NODE_COMPACT, 1000, 0,  1,  1,  4, "sync & hrtimer: percpu objpool"},
649 	{0, 0, NODE_VMALLOC, 1000, 0,  1,  1,  4, "sync & hrtimer: percpu objpool from vmalloc"},
650 
651 	/* sync & overrun */
652 	{0, 0, NODE_COMPACT, 1000, 0, 16,  0,  0, "sync overrun: percpu objpool"},
653 	{0, 0, NODE_VMALLOC, 1000, 0, 16,  0,  0, "sync overrun: percpu objpool from vmalloc"},
654 
655 	/* async mode */
656 	{1, 0, NODE_COMPACT, 1000, 100,  1,  0,  0, "async: percpu objpool"},
657 	{1, 0, NODE_VMALLOC, 1000, 100,  1,  0,  0, "async: percpu objpool from vmalloc"},
658 
659 	/* async + hrtimer mode */
660 	{1, 0, NODE_COMPACT, 1000, 0,  4,  4,  4, "async & hrtimer: percpu objpool"},
661 	{1, 0, NODE_VMALLOC, 1000, 0,  4,  4,  4, "async & hrtimer: percpu objpool from vmalloc"},
662 };
663 
664 static int __init ot_mod_init(void)
665 {
666 	int i;
667 
668 	/* perform testings */
669 	for (i = 0; i < ARRAY_SIZE(g_testcases); i++) {
670 		ot_init_data(&g_testcases[i].data);
671 		if (g_testcases[i].async)
672 			ot_start_async(&g_testcases[i]);
673 		else
674 			ot_start_sync(&g_testcases[i]);
675 	}
676 
677 	/* show tests summary */
678 	pr_info("\n");
679 	pr_info("Summary of testcases:\n");
680 	for (i = 0; i < ARRAY_SIZE(g_testcases); i++) {
681 		pr_info("    duration: %lluus \thits: %10lu \tmiss: %10lu \t%s\n",
682 			g_testcases[i].data.duration, g_testcases[i].data.objects.nhits,
683 			g_testcases[i].data.objects.nmiss, g_testcases[i].name);
684 	}
685 
686 	return -EAGAIN;
687 }
688 
689 static void __exit ot_mod_exit(void)
690 {
691 }
692 
693 module_init(ot_mod_init);
694 module_exit(ot_mod_exit);
695 
696 MODULE_LICENSE("GPL");