1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Test module for lockless object pool 5 * 6 * Copyright: [email protected] 7 */ 8 9 #include <linux/version.h> 10 #include <linux/errno.h> 11 #include <linux/module.h> 12 #include <linux/moduleparam.h> 13 #include <linux/sched.h> 14 #include <linux/cpumask.h> 15 #include <linux/completion.h> 16 #include <linux/kthread.h> 17 #include <linux/cpu.h> 18 #include <linux/cpuset.h> 19 #include <linux/slab.h> 20 #include <linux/vmalloc.h> 21 #include <linux/delay.h> 22 #include <linux/hrtimer.h> 23 #include <linux/interrupt.h> 24 #include <linux/objpool.h> 25 26 #define OT_NR_MAX_BULK (16) 27 28 /* memory usage */ 29 struct ot_mem_stat { 30 atomic_long_t alloc; 31 atomic_long_t free; 32 }; 33 34 /* object allocation results */ 35 struct ot_obj_stat { 36 unsigned long nhits; 37 unsigned long nmiss; 38 }; 39 40 /* control & results per testcase */ 41 struct ot_data { 42 struct rw_semaphore start; 43 struct completion wait; 44 struct completion rcu; 45 atomic_t nthreads ____cacheline_aligned_in_smp; 46 atomic_t stop ____cacheline_aligned_in_smp; 47 struct ot_mem_stat kmalloc; 48 struct ot_mem_stat vmalloc; 49 struct ot_obj_stat objects; 50 u64 duration; 51 }; 52 53 /* testcase */ 54 struct ot_test { 55 int async; /* synchronous or asynchronous */ 56 int mode; /* only mode 0 supported */ 57 int objsz; /* object size */ 58 int duration; /* ms */ 59 int delay; /* ms */ 60 int bulk_normal; 61 int bulk_irq; 62 unsigned long hrtimer; /* ms */ 63 const char *name; 64 struct ot_data data; 65 }; 66 67 /* per-cpu worker */ 68 struct ot_item { 69 struct objpool_head *pool; /* pool head */ 70 struct ot_test *test; /* test parameters */ 71 72 void (*worker)(struct ot_item *item, int irq); 73 74 /* hrtimer control */ 75 ktime_t hrtcycle; 76 struct hrtimer hrtimer; 77 78 int bulk[2]; /* for thread and irq */ 79 int delay; 80 u32 niters; 81 82 /* summary per thread */ 83 struct ot_obj_stat stat[2]; /* thread and irq */ 84 u64 duration; 85 }; 86 87 /* 88 * memory leakage checking 89 */ 90 91 static void *ot_kzalloc(struct ot_test *test, long size) 92 { 93 void *ptr = kzalloc(size, GFP_KERNEL); 94 95 if (ptr) 96 atomic_long_add(size, &test->data.kmalloc.alloc); 97 return ptr; 98 } 99 100 static void ot_kfree(struct ot_test *test, void *ptr, long size) 101 { 102 if (!ptr) 103 return; 104 atomic_long_add(size, &test->data.kmalloc.free); 105 kfree(ptr); 106 } 107 108 static void ot_mem_report(struct ot_test *test) 109 { 110 long alloc, free; 111 112 pr_info("memory allocation summary for %s\n", test->name); 113 114 alloc = atomic_long_read(&test->data.kmalloc.alloc); 115 free = atomic_long_read(&test->data.kmalloc.free); 116 pr_info(" kmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free); 117 118 alloc = atomic_long_read(&test->data.vmalloc.alloc); 119 free = atomic_long_read(&test->data.vmalloc.free); 120 pr_info(" vmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free); 121 } 122 123 /* user object instance */ 124 struct ot_node { 125 void *owner; 126 unsigned long data; 127 unsigned long refs; 128 unsigned long payload[32]; 129 }; 130 131 /* user objpool manager */ 132 struct ot_context { 133 struct objpool_head pool; /* objpool head */ 134 struct ot_test *test; /* test parameters */ 135 void *ptr; /* user pool buffer */ 136 unsigned long size; /* buffer size */ 137 struct rcu_head rcu; 138 }; 139 140 static DEFINE_PER_CPU(struct ot_item, ot_pcup_items); 141 142 static int ot_init_data(struct ot_data *data) 143 { 144 memset(data, 0, sizeof(*data)); 145 init_rwsem(&data->start); 146 init_completion(&data->wait); 147 init_completion(&data->rcu); 148 atomic_set(&data->nthreads, 1); 149 150 return 0; 151 } 152 153 static int ot_init_node(void *nod, void *context) 154 { 155 struct ot_context *sop = context; 156 struct ot_node *on = nod; 157 158 on->owner = &sop->pool; 159 return 0; 160 } 161 162 static enum hrtimer_restart ot_hrtimer_handler(struct hrtimer *hrt) 163 { 164 struct ot_item *item = container_of(hrt, struct ot_item, hrtimer); 165 struct ot_test *test = item->test; 166 167 if (atomic_read_acquire(&test->data.stop)) 168 return HRTIMER_NORESTART; 169 170 /* do bulk-testings for objects pop/push */ 171 item->worker(item, 1); 172 173 hrtimer_forward(hrt, hrt->base->get_time(), item->hrtcycle); 174 return HRTIMER_RESTART; 175 } 176 177 static void ot_start_hrtimer(struct ot_item *item) 178 { 179 if (!item->test->hrtimer) 180 return; 181 hrtimer_start(&item->hrtimer, item->hrtcycle, HRTIMER_MODE_REL); 182 } 183 184 static void ot_stop_hrtimer(struct ot_item *item) 185 { 186 if (!item->test->hrtimer) 187 return; 188 hrtimer_cancel(&item->hrtimer); 189 } 190 191 static int ot_init_hrtimer(struct ot_item *item, unsigned long hrtimer) 192 { 193 struct hrtimer *hrt = &item->hrtimer; 194 195 if (!hrtimer) 196 return -ENOENT; 197 198 item->hrtcycle = ktime_set(0, hrtimer * 1000000UL); 199 hrtimer_init(hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 200 hrt->function = ot_hrtimer_handler; 201 return 0; 202 } 203 204 static int ot_init_cpu_item(struct ot_item *item, 205 struct ot_test *test, 206 struct objpool_head *pool, 207 void (*worker)(struct ot_item *, int)) 208 { 209 memset(item, 0, sizeof(*item)); 210 item->pool = pool; 211 item->test = test; 212 item->worker = worker; 213 214 item->bulk[0] = test->bulk_normal; 215 item->bulk[1] = test->bulk_irq; 216 item->delay = test->delay; 217 218 /* initialize hrtimer */ 219 ot_init_hrtimer(item, item->test->hrtimer); 220 return 0; 221 } 222 223 static int ot_thread_worker(void *arg) 224 { 225 struct ot_item *item = arg; 226 struct ot_test *test = item->test; 227 ktime_t start; 228 229 atomic_inc(&test->data.nthreads); 230 down_read(&test->data.start); 231 up_read(&test->data.start); 232 start = ktime_get(); 233 ot_start_hrtimer(item); 234 do { 235 if (atomic_read_acquire(&test->data.stop)) 236 break; 237 /* do bulk-testings for objects pop/push */ 238 item->worker(item, 0); 239 } while (!kthread_should_stop()); 240 ot_stop_hrtimer(item); 241 item->duration = (u64) ktime_us_delta(ktime_get(), start); 242 if (atomic_dec_and_test(&test->data.nthreads)) 243 complete(&test->data.wait); 244 245 return 0; 246 } 247 248 static void ot_perf_report(struct ot_test *test, u64 duration) 249 { 250 struct ot_obj_stat total, normal = {0}, irq = {0}; 251 int cpu, nthreads = 0; 252 253 pr_info("\n"); 254 pr_info("Testing summary for %s\n", test->name); 255 256 for_each_possible_cpu(cpu) { 257 struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu); 258 if (!item->duration) 259 continue; 260 normal.nhits += item->stat[0].nhits; 261 normal.nmiss += item->stat[0].nmiss; 262 irq.nhits += item->stat[1].nhits; 263 irq.nmiss += item->stat[1].nmiss; 264 pr_info("CPU: %d duration: %lluus\n", cpu, item->duration); 265 pr_info("\tthread:\t%16lu hits \t%16lu miss\n", 266 item->stat[0].nhits, item->stat[0].nmiss); 267 pr_info("\tirq: \t%16lu hits \t%16lu miss\n", 268 item->stat[1].nhits, item->stat[1].nmiss); 269 pr_info("\ttotal: \t%16lu hits \t%16lu miss\n", 270 item->stat[0].nhits + item->stat[1].nhits, 271 item->stat[0].nmiss + item->stat[1].nmiss); 272 nthreads++; 273 } 274 275 total.nhits = normal.nhits + irq.nhits; 276 total.nmiss = normal.nmiss + irq.nmiss; 277 278 pr_info("ALL: \tnthreads: %d duration: %lluus\n", nthreads, duration); 279 pr_info("SUM: \t%16lu hits \t%16lu miss\n", 280 total.nhits, total.nmiss); 281 282 test->data.objects = total; 283 test->data.duration = duration; 284 } 285 286 /* 287 * synchronous test cases for objpool manipulation 288 */ 289 290 /* objpool manipulation for synchronous mode (percpu objpool) */ 291 static struct ot_context *ot_init_sync_m0(struct ot_test *test) 292 { 293 struct ot_context *sop = NULL; 294 int max = num_possible_cpus() << 3; 295 gfp_t gfp = GFP_KERNEL; 296 297 sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop)); 298 if (!sop) 299 return NULL; 300 sop->test = test; 301 if (test->objsz < 512) 302 gfp = GFP_ATOMIC; 303 304 if (objpool_init(&sop->pool, max, test->objsz, 305 gfp, sop, ot_init_node, NULL)) { 306 ot_kfree(test, sop, sizeof(*sop)); 307 return NULL; 308 } 309 WARN_ON(max != sop->pool.nr_objs); 310 311 return sop; 312 } 313 314 static void ot_fini_sync(struct ot_context *sop) 315 { 316 objpool_fini(&sop->pool); 317 ot_kfree(sop->test, sop, sizeof(*sop)); 318 } 319 320 struct { 321 struct ot_context * (*init)(struct ot_test *oc); 322 void (*fini)(struct ot_context *sop); 323 } g_ot_sync_ops[] = { 324 {.init = ot_init_sync_m0, .fini = ot_fini_sync}, 325 }; 326 327 /* 328 * synchronous test cases: performance mode 329 */ 330 331 static void ot_bulk_sync(struct ot_item *item, int irq) 332 { 333 struct ot_node *nods[OT_NR_MAX_BULK]; 334 int i; 335 336 for (i = 0; i < item->bulk[irq]; i++) 337 nods[i] = objpool_pop(item->pool); 338 339 if (!irq && (item->delay || !(++(item->niters) & 0x7FFF))) 340 msleep(item->delay); 341 342 while (i-- > 0) { 343 struct ot_node *on = nods[i]; 344 if (on) { 345 on->refs++; 346 objpool_push(on, item->pool); 347 item->stat[irq].nhits++; 348 } else { 349 item->stat[irq].nmiss++; 350 } 351 } 352 } 353 354 static int ot_start_sync(struct ot_test *test) 355 { 356 struct ot_context *sop; 357 ktime_t start; 358 u64 duration; 359 unsigned long timeout; 360 int cpu; 361 362 /* initialize objpool for syncrhonous testcase */ 363 sop = g_ot_sync_ops[test->mode].init(test); 364 if (!sop) 365 return -ENOMEM; 366 367 /* grab rwsem to block testing threads */ 368 down_write(&test->data.start); 369 370 for_each_possible_cpu(cpu) { 371 struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu); 372 struct task_struct *work; 373 374 ot_init_cpu_item(item, test, &sop->pool, ot_bulk_sync); 375 376 /* skip offline cpus */ 377 if (!cpu_online(cpu)) 378 continue; 379 380 work = kthread_create_on_node(ot_thread_worker, item, 381 cpu_to_node(cpu), "ot_worker_%d", cpu); 382 if (IS_ERR(work)) { 383 pr_err("failed to create thread for cpu %d\n", cpu); 384 } else { 385 kthread_bind(work, cpu); 386 wake_up_process(work); 387 } 388 } 389 390 /* wait a while to make sure all threads waiting at start line */ 391 msleep(20); 392 393 /* in case no threads were created: memory insufficient ? */ 394 if (atomic_dec_and_test(&test->data.nthreads)) 395 complete(&test->data.wait); 396 397 // sched_set_fifo_low(current); 398 399 /* start objpool testing threads */ 400 start = ktime_get(); 401 up_write(&test->data.start); 402 403 /* yeild cpu to worker threads for duration ms */ 404 timeout = msecs_to_jiffies(test->duration); 405 schedule_timeout_interruptible(timeout); 406 407 /* tell workers threads to quit */ 408 atomic_set_release(&test->data.stop, 1); 409 410 /* wait all workers threads finish and quit */ 411 wait_for_completion(&test->data.wait); 412 duration = (u64) ktime_us_delta(ktime_get(), start); 413 414 /* cleanup objpool */ 415 g_ot_sync_ops[test->mode].fini(sop); 416 417 /* report testing summary and performance results */ 418 ot_perf_report(test, duration); 419 420 /* report memory allocation summary */ 421 ot_mem_report(test); 422 423 return 0; 424 } 425 426 /* 427 * asynchronous test cases: pool lifecycle controlled by refcount 428 */ 429 430 static void ot_fini_async_rcu(struct rcu_head *rcu) 431 { 432 struct ot_context *sop = container_of(rcu, struct ot_context, rcu); 433 struct ot_test *test = sop->test; 434 435 /* here all cpus are aware of the stop event: test->data.stop = 1 */ 436 WARN_ON(!atomic_read_acquire(&test->data.stop)); 437 438 objpool_fini(&sop->pool); 439 complete(&test->data.rcu); 440 } 441 442 static void ot_fini_async(struct ot_context *sop) 443 { 444 /* make sure the stop event is acknowledged by all cores */ 445 call_rcu(&sop->rcu, ot_fini_async_rcu); 446 } 447 448 static int ot_objpool_release(struct objpool_head *head, void *context) 449 { 450 struct ot_context *sop = context; 451 452 WARN_ON(!head || !sop || head != &sop->pool); 453 454 /* do context cleaning if needed */ 455 if (sop) 456 ot_kfree(sop->test, sop, sizeof(*sop)); 457 458 return 0; 459 } 460 461 static struct ot_context *ot_init_async_m0(struct ot_test *test) 462 { 463 struct ot_context *sop = NULL; 464 int max = num_possible_cpus() << 3; 465 gfp_t gfp = GFP_KERNEL; 466 467 sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop)); 468 if (!sop) 469 return NULL; 470 sop->test = test; 471 if (test->objsz < 512) 472 gfp = GFP_ATOMIC; 473 474 if (objpool_init(&sop->pool, max, test->objsz, gfp, sop, 475 ot_init_node, ot_objpool_release)) { 476 ot_kfree(test, sop, sizeof(*sop)); 477 return NULL; 478 } 479 WARN_ON(max != sop->pool.nr_objs); 480 481 return sop; 482 } 483 484 struct { 485 struct ot_context * (*init)(struct ot_test *oc); 486 void (*fini)(struct ot_context *sop); 487 } g_ot_async_ops[] = { 488 {.init = ot_init_async_m0, .fini = ot_fini_async}, 489 }; 490 491 static void ot_nod_recycle(struct ot_node *on, struct objpool_head *pool, 492 int release) 493 { 494 struct ot_context *sop; 495 496 on->refs++; 497 498 if (!release) { 499 /* push object back to opjpool for reuse */ 500 objpool_push(on, pool); 501 return; 502 } 503 504 sop = container_of(pool, struct ot_context, pool); 505 WARN_ON(sop != pool->context); 506 507 /* unref objpool with nod removed forever */ 508 objpool_drop(on, pool); 509 } 510 511 static void ot_bulk_async(struct ot_item *item, int irq) 512 { 513 struct ot_test *test = item->test; 514 struct ot_node *nods[OT_NR_MAX_BULK]; 515 int i, stop; 516 517 for (i = 0; i < item->bulk[irq]; i++) 518 nods[i] = objpool_pop(item->pool); 519 520 if (!irq) { 521 if (item->delay || !(++(item->niters) & 0x7FFF)) 522 msleep(item->delay); 523 get_cpu(); 524 } 525 526 stop = atomic_read_acquire(&test->data.stop); 527 528 /* drop all objects and deref objpool */ 529 while (i-- > 0) { 530 struct ot_node *on = nods[i]; 531 532 if (on) { 533 on->refs++; 534 ot_nod_recycle(on, item->pool, stop); 535 item->stat[irq].nhits++; 536 } else { 537 item->stat[irq].nmiss++; 538 } 539 } 540 541 if (!irq) 542 put_cpu(); 543 } 544 545 static int ot_start_async(struct ot_test *test) 546 { 547 struct ot_context *sop; 548 ktime_t start; 549 u64 duration; 550 unsigned long timeout; 551 int cpu; 552 553 /* initialize objpool for syncrhonous testcase */ 554 sop = g_ot_async_ops[test->mode].init(test); 555 if (!sop) 556 return -ENOMEM; 557 558 /* grab rwsem to block testing threads */ 559 down_write(&test->data.start); 560 561 for_each_possible_cpu(cpu) { 562 struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu); 563 struct task_struct *work; 564 565 ot_init_cpu_item(item, test, &sop->pool, ot_bulk_async); 566 567 /* skip offline cpus */ 568 if (!cpu_online(cpu)) 569 continue; 570 571 work = kthread_create_on_node(ot_thread_worker, item, 572 cpu_to_node(cpu), "ot_worker_%d", cpu); 573 if (IS_ERR(work)) { 574 pr_err("failed to create thread for cpu %d\n", cpu); 575 } else { 576 kthread_bind(work, cpu); 577 wake_up_process(work); 578 } 579 } 580 581 /* wait a while to make sure all threads waiting at start line */ 582 msleep(20); 583 584 /* in case no threads were created: memory insufficient ? */ 585 if (atomic_dec_and_test(&test->data.nthreads)) 586 complete(&test->data.wait); 587 588 /* start objpool testing threads */ 589 start = ktime_get(); 590 up_write(&test->data.start); 591 592 /* yeild cpu to worker threads for duration ms */ 593 timeout = msecs_to_jiffies(test->duration); 594 schedule_timeout_interruptible(timeout); 595 596 /* tell workers threads to quit */ 597 atomic_set_release(&test->data.stop, 1); 598 599 /* do async-finalization */ 600 g_ot_async_ops[test->mode].fini(sop); 601 602 /* wait all workers threads finish and quit */ 603 wait_for_completion(&test->data.wait); 604 duration = (u64) ktime_us_delta(ktime_get(), start); 605 606 /* assure rcu callback is triggered */ 607 wait_for_completion(&test->data.rcu); 608 609 /* 610 * now we are sure that objpool is finalized either 611 * by rcu callback or by worker threads 612 */ 613 614 /* report testing summary and performance results */ 615 ot_perf_report(test, duration); 616 617 /* report memory allocation summary */ 618 ot_mem_report(test); 619 620 return 0; 621 } 622 623 /* 624 * predefined testing cases: 625 * synchronous case / overrun case / async case 626 * 627 * async: synchronous or asynchronous testing 628 * mode: only mode 0 supported 629 * objsz: object size 630 * duration: int, total test time in ms 631 * delay: int, delay (in ms) between each iteration 632 * bulk_normal: int, repeat times for thread worker 633 * bulk_irq: int, repeat times for irq consumer 634 * hrtimer: unsigned long, hrtimer intervnal in ms 635 * name: char *, tag for current test ot_item 636 */ 637 638 #define NODE_COMPACT sizeof(struct ot_node) 639 #define NODE_VMALLOC (512) 640 641 struct ot_test g_testcases[] = { 642 643 /* sync & normal */ 644 {0, 0, NODE_COMPACT, 1000, 0, 1, 0, 0, "sync: percpu objpool"}, 645 {0, 0, NODE_VMALLOC, 1000, 0, 1, 0, 0, "sync: percpu objpool from vmalloc"}, 646 647 /* sync & hrtimer */ 648 {0, 0, NODE_COMPACT, 1000, 0, 1, 1, 4, "sync & hrtimer: percpu objpool"}, 649 {0, 0, NODE_VMALLOC, 1000, 0, 1, 1, 4, "sync & hrtimer: percpu objpool from vmalloc"}, 650 651 /* sync & overrun */ 652 {0, 0, NODE_COMPACT, 1000, 0, 16, 0, 0, "sync overrun: percpu objpool"}, 653 {0, 0, NODE_VMALLOC, 1000, 0, 16, 0, 0, "sync overrun: percpu objpool from vmalloc"}, 654 655 /* async mode */ 656 {1, 0, NODE_COMPACT, 1000, 100, 1, 0, 0, "async: percpu objpool"}, 657 {1, 0, NODE_VMALLOC, 1000, 100, 1, 0, 0, "async: percpu objpool from vmalloc"}, 658 659 /* async + hrtimer mode */ 660 {1, 0, NODE_COMPACT, 1000, 0, 4, 4, 4, "async & hrtimer: percpu objpool"}, 661 {1, 0, NODE_VMALLOC, 1000, 0, 4, 4, 4, "async & hrtimer: percpu objpool from vmalloc"}, 662 }; 663 664 static int __init ot_mod_init(void) 665 { 666 int i; 667 668 /* perform testings */ 669 for (i = 0; i < ARRAY_SIZE(g_testcases); i++) { 670 ot_init_data(&g_testcases[i].data); 671 if (g_testcases[i].async) 672 ot_start_async(&g_testcases[i]); 673 else 674 ot_start_sync(&g_testcases[i]); 675 } 676 677 /* show tests summary */ 678 pr_info("\n"); 679 pr_info("Summary of testcases:\n"); 680 for (i = 0; i < ARRAY_SIZE(g_testcases); i++) { 681 pr_info(" duration: %lluus \thits: %10lu \tmiss: %10lu \t%s\n", 682 g_testcases[i].data.duration, g_testcases[i].data.objects.nhits, 683 g_testcases[i].data.objects.nmiss, g_testcases[i].name); 684 } 685 686 return -EAGAIN; 687 } 688 689 static void __exit ot_mod_exit(void) 690 { 691 } 692 693 module_init(ot_mod_init); 694 module_exit(ot_mod_exit); 695 696 MODULE_LICENSE("GPL");