1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Test module for stress and analyze performance of vmalloc allocator. 5 * (C) 2018 Uladzislau Rezki (Sony) <[email protected]> 6 */ 7 #include <linux/init.h> 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/vmalloc.h> 11 #include <linux/random.h> 12 #include <linux/kthread.h> 13 #include <linux/moduleparam.h> 14 #include <linux/completion.h> 15 #include <linux/delay.h> 16 #include <linux/rwsem.h> 17 #include <linux/mm.h> 18 #include <linux/rcupdate.h> 19 #include <linux/slab.h> 20 21 #define __param(type, name, init, msg) \ 22 static type name = init; \ 23 module_param(name, type, 0444); \ 24 MODULE_PARM_DESC(name, msg) \ 25 26 __param(bool, single_cpu_test, false, 27 "Use single first online CPU to run tests"); 28 29 __param(bool, sequential_test_order, false, 30 "Use sequential stress tests order"); 31 32 __param(int, test_repeat_count, 1, 33 "Set test repeat counter"); 34 35 __param(int, test_loop_count, 1000000, 36 "Set test loop counter"); 37 38 __param(int, run_test_mask, INT_MAX, 39 "Set tests specified in the mask.\n\n" 40 "\t\tid: 1, name: fix_size_alloc_test\n" 41 "\t\tid: 2, name: full_fit_alloc_test\n" 42 "\t\tid: 4, name: long_busy_list_alloc_test\n" 43 "\t\tid: 8, name: random_size_alloc_test\n" 44 "\t\tid: 16, name: fix_align_alloc_test\n" 45 "\t\tid: 32, name: random_size_align_alloc_test\n" 46 "\t\tid: 64, name: align_shift_alloc_test\n" 47 "\t\tid: 128, name: pcpu_alloc_test\n" 48 "\t\tid: 256, name: kvfree_rcu_1_arg_vmalloc_test\n" 49 "\t\tid: 512, name: kvfree_rcu_2_arg_vmalloc_test\n" 50 /* Add a new test case description here. */ 51 ); 52 53 /* 54 * Depends on single_cpu_test parameter. If it is true, then 55 * use first online CPU to trigger a test on, otherwise go with 56 * all online CPUs. 57 */ 58 static cpumask_t cpus_run_test_mask = CPU_MASK_NONE; 59 60 /* 61 * Read write semaphore for synchronization of setup 62 * phase that is done in main thread and workers. 63 */ 64 static DECLARE_RWSEM(prepare_for_test_rwsem); 65 66 /* 67 * Completion tracking for worker threads. 68 */ 69 static DECLARE_COMPLETION(test_all_done_comp); 70 static atomic_t test_n_undone = ATOMIC_INIT(0); 71 72 static inline void 73 test_report_one_done(void) 74 { 75 if (atomic_dec_and_test(&test_n_undone)) 76 complete(&test_all_done_comp); 77 } 78 79 static int random_size_align_alloc_test(void) 80 { 81 unsigned long size, align, rnd; 82 void *ptr; 83 int i; 84 85 for (i = 0; i < test_loop_count; i++) { 86 get_random_bytes(&rnd, sizeof(rnd)); 87 88 /* 89 * Maximum 1024 pages, if PAGE_SIZE is 4096. 90 */ 91 align = 1 << (rnd % 23); 92 93 /* 94 * Maximum 10 pages. 95 */ 96 size = ((rnd % 10) + 1) * PAGE_SIZE; 97 98 ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0, 99 __builtin_return_address(0)); 100 if (!ptr) 101 return -1; 102 103 vfree(ptr); 104 } 105 106 return 0; 107 } 108 109 /* 110 * This test case is supposed to be failed. 111 */ 112 static int align_shift_alloc_test(void) 113 { 114 unsigned long align; 115 void *ptr; 116 int i; 117 118 for (i = 0; i < BITS_PER_LONG; i++) { 119 align = ((unsigned long) 1) << i; 120 121 ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0, 122 __builtin_return_address(0)); 123 if (!ptr) 124 return -1; 125 126 vfree(ptr); 127 } 128 129 return 0; 130 } 131 132 static int fix_align_alloc_test(void) 133 { 134 void *ptr; 135 int i; 136 137 for (i = 0; i < test_loop_count; i++) { 138 ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1, 139 GFP_KERNEL | __GFP_ZERO, 0, 140 __builtin_return_address(0)); 141 if (!ptr) 142 return -1; 143 144 vfree(ptr); 145 } 146 147 return 0; 148 } 149 150 static int random_size_alloc_test(void) 151 { 152 unsigned int n; 153 void *p; 154 int i; 155 156 for (i = 0; i < test_loop_count; i++) { 157 get_random_bytes(&n, sizeof(i)); 158 n = (n % 100) + 1; 159 160 p = vmalloc(n * PAGE_SIZE); 161 162 if (!p) 163 return -1; 164 165 *((__u8 *)p) = 1; 166 vfree(p); 167 } 168 169 return 0; 170 } 171 172 static int long_busy_list_alloc_test(void) 173 { 174 void *ptr_1, *ptr_2; 175 void **ptr; 176 int rv = -1; 177 int i; 178 179 ptr = vmalloc(sizeof(void *) * 15000); 180 if (!ptr) 181 return rv; 182 183 for (i = 0; i < 15000; i++) 184 ptr[i] = vmalloc(1 * PAGE_SIZE); 185 186 for (i = 0; i < test_loop_count; i++) { 187 ptr_1 = vmalloc(100 * PAGE_SIZE); 188 if (!ptr_1) 189 goto leave; 190 191 ptr_2 = vmalloc(1 * PAGE_SIZE); 192 if (!ptr_2) { 193 vfree(ptr_1); 194 goto leave; 195 } 196 197 *((__u8 *)ptr_1) = 0; 198 *((__u8 *)ptr_2) = 1; 199 200 vfree(ptr_1); 201 vfree(ptr_2); 202 } 203 204 /* Success */ 205 rv = 0; 206 207 leave: 208 for (i = 0; i < 15000; i++) 209 vfree(ptr[i]); 210 211 vfree(ptr); 212 return rv; 213 } 214 215 static int full_fit_alloc_test(void) 216 { 217 void **ptr, **junk_ptr, *tmp; 218 int junk_length; 219 int rv = -1; 220 int i; 221 222 junk_length = fls(num_online_cpus()); 223 junk_length *= (32 * 1024 * 1024 / PAGE_SIZE); 224 225 ptr = vmalloc(sizeof(void *) * junk_length); 226 if (!ptr) 227 return rv; 228 229 junk_ptr = vmalloc(sizeof(void *) * junk_length); 230 if (!junk_ptr) { 231 vfree(ptr); 232 return rv; 233 } 234 235 for (i = 0; i < junk_length; i++) { 236 ptr[i] = vmalloc(1 * PAGE_SIZE); 237 junk_ptr[i] = vmalloc(1 * PAGE_SIZE); 238 } 239 240 for (i = 0; i < junk_length; i++) 241 vfree(junk_ptr[i]); 242 243 for (i = 0; i < test_loop_count; i++) { 244 tmp = vmalloc(1 * PAGE_SIZE); 245 246 if (!tmp) 247 goto error; 248 249 *((__u8 *)tmp) = 1; 250 vfree(tmp); 251 } 252 253 /* Success */ 254 rv = 0; 255 256 error: 257 for (i = 0; i < junk_length; i++) 258 vfree(ptr[i]); 259 260 vfree(ptr); 261 vfree(junk_ptr); 262 263 return rv; 264 } 265 266 static int fix_size_alloc_test(void) 267 { 268 void *ptr; 269 int i; 270 271 for (i = 0; i < test_loop_count; i++) { 272 ptr = vmalloc(3 * PAGE_SIZE); 273 274 if (!ptr) 275 return -1; 276 277 *((__u8 *)ptr) = 0; 278 279 vfree(ptr); 280 } 281 282 return 0; 283 } 284 285 static int 286 pcpu_alloc_test(void) 287 { 288 int rv = 0; 289 #ifndef CONFIG_NEED_PER_CPU_KM 290 void __percpu **pcpu; 291 size_t size, align; 292 int i; 293 294 pcpu = vmalloc(sizeof(void __percpu *) * 35000); 295 if (!pcpu) 296 return -1; 297 298 for (i = 0; i < 35000; i++) { 299 unsigned int r; 300 301 get_random_bytes(&r, sizeof(i)); 302 size = (r % (PAGE_SIZE / 4)) + 1; 303 304 /* 305 * Maximum PAGE_SIZE 306 */ 307 get_random_bytes(&r, sizeof(i)); 308 align = 1 << ((i % 11) + 1); 309 310 pcpu[i] = __alloc_percpu(size, align); 311 if (!pcpu[i]) 312 rv = -1; 313 } 314 315 for (i = 0; i < 35000; i++) 316 free_percpu(pcpu[i]); 317 318 vfree(pcpu); 319 #endif 320 return rv; 321 } 322 323 struct test_kvfree_rcu { 324 struct rcu_head rcu; 325 unsigned char array[20]; 326 }; 327 328 static int 329 kvfree_rcu_1_arg_vmalloc_test(void) 330 { 331 struct test_kvfree_rcu *p; 332 int i; 333 334 for (i = 0; i < test_loop_count; i++) { 335 p = vmalloc(1 * PAGE_SIZE); 336 if (!p) 337 return -1; 338 339 p->array[0] = 'a'; 340 kvfree_rcu(p); 341 } 342 343 return 0; 344 } 345 346 static int 347 kvfree_rcu_2_arg_vmalloc_test(void) 348 { 349 struct test_kvfree_rcu *p; 350 int i; 351 352 for (i = 0; i < test_loop_count; i++) { 353 p = vmalloc(1 * PAGE_SIZE); 354 if (!p) 355 return -1; 356 357 p->array[0] = 'a'; 358 kvfree_rcu(p, rcu); 359 } 360 361 return 0; 362 } 363 364 struct test_case_desc { 365 const char *test_name; 366 int (*test_func)(void); 367 }; 368 369 static struct test_case_desc test_case_array[] = { 370 { "fix_size_alloc_test", fix_size_alloc_test }, 371 { "full_fit_alloc_test", full_fit_alloc_test }, 372 { "long_busy_list_alloc_test", long_busy_list_alloc_test }, 373 { "random_size_alloc_test", random_size_alloc_test }, 374 { "fix_align_alloc_test", fix_align_alloc_test }, 375 { "random_size_align_alloc_test", random_size_align_alloc_test }, 376 { "align_shift_alloc_test", align_shift_alloc_test }, 377 { "pcpu_alloc_test", pcpu_alloc_test }, 378 { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test }, 379 { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test }, 380 /* Add a new test case here. */ 381 }; 382 383 struct test_case_data { 384 int test_failed; 385 int test_passed; 386 u64 time; 387 }; 388 389 /* Split it to get rid of: WARNING: line over 80 characters */ 390 static struct test_case_data 391 per_cpu_test_data[NR_CPUS][ARRAY_SIZE(test_case_array)]; 392 393 static struct test_driver { 394 struct task_struct *task; 395 unsigned long start; 396 unsigned long stop; 397 int cpu; 398 } per_cpu_test_driver[NR_CPUS]; 399 400 static void shuffle_array(int *arr, int n) 401 { 402 unsigned int rnd; 403 int i, j, x; 404 405 for (i = n - 1; i > 0; i--) { 406 get_random_bytes(&rnd, sizeof(rnd)); 407 408 /* Cut the range. */ 409 j = rnd % i; 410 411 /* Swap indexes. */ 412 x = arr[i]; 413 arr[i] = arr[j]; 414 arr[j] = x; 415 } 416 } 417 418 static int test_func(void *private) 419 { 420 struct test_driver *t = private; 421 int random_array[ARRAY_SIZE(test_case_array)]; 422 int index, i, j; 423 ktime_t kt; 424 u64 delta; 425 426 if (set_cpus_allowed_ptr(current, cpumask_of(t->cpu)) < 0) 427 pr_err("Failed to set affinity to %d CPU\n", t->cpu); 428 429 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) 430 random_array[i] = i; 431 432 if (!sequential_test_order) 433 shuffle_array(random_array, ARRAY_SIZE(test_case_array)); 434 435 /* 436 * Block until initialization is done. 437 */ 438 down_read(&prepare_for_test_rwsem); 439 440 t->start = get_cycles(); 441 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 442 index = random_array[i]; 443 444 /* 445 * Skip tests if run_test_mask has been specified. 446 */ 447 if (!((run_test_mask & (1 << index)) >> index)) 448 continue; 449 450 kt = ktime_get(); 451 for (j = 0; j < test_repeat_count; j++) { 452 if (!test_case_array[index].test_func()) 453 per_cpu_test_data[t->cpu][index].test_passed++; 454 else 455 per_cpu_test_data[t->cpu][index].test_failed++; 456 } 457 458 /* 459 * Take an average time that test took. 460 */ 461 delta = (u64) ktime_us_delta(ktime_get(), kt); 462 do_div(delta, (u32) test_repeat_count); 463 464 per_cpu_test_data[t->cpu][index].time = delta; 465 } 466 t->stop = get_cycles(); 467 468 up_read(&prepare_for_test_rwsem); 469 test_report_one_done(); 470 471 /* 472 * Wait for the kthread_stop() call. 473 */ 474 while (!kthread_should_stop()) 475 msleep(10); 476 477 return 0; 478 } 479 480 static void 481 init_test_configurtion(void) 482 { 483 /* 484 * Reset all data of all CPUs. 485 */ 486 memset(per_cpu_test_data, 0, sizeof(per_cpu_test_data)); 487 488 if (single_cpu_test) 489 cpumask_set_cpu(cpumask_first(cpu_online_mask), 490 &cpus_run_test_mask); 491 else 492 cpumask_and(&cpus_run_test_mask, cpu_online_mask, 493 cpu_online_mask); 494 495 if (test_repeat_count <= 0) 496 test_repeat_count = 1; 497 498 if (test_loop_count <= 0) 499 test_loop_count = 1; 500 } 501 502 static void do_concurrent_test(void) 503 { 504 int cpu, ret; 505 506 /* 507 * Set some basic configurations plus sanity check. 508 */ 509 init_test_configurtion(); 510 511 /* 512 * Put on hold all workers. 513 */ 514 down_write(&prepare_for_test_rwsem); 515 516 for_each_cpu(cpu, &cpus_run_test_mask) { 517 struct test_driver *t = &per_cpu_test_driver[cpu]; 518 519 t->cpu = cpu; 520 t->task = kthread_run(test_func, t, "vmalloc_test/%d", cpu); 521 522 if (!IS_ERR(t->task)) 523 /* Success. */ 524 atomic_inc(&test_n_undone); 525 else 526 pr_err("Failed to start kthread for %d CPU\n", cpu); 527 } 528 529 /* 530 * Now let the workers do their job. 531 */ 532 up_write(&prepare_for_test_rwsem); 533 534 /* 535 * Sleep quiet until all workers are done with 1 second 536 * interval. Since the test can take a lot of time we 537 * can run into a stack trace of the hung task. That is 538 * why we go with completion_timeout and HZ value. 539 */ 540 do { 541 ret = wait_for_completion_timeout(&test_all_done_comp, HZ); 542 } while (!ret); 543 544 for_each_cpu(cpu, &cpus_run_test_mask) { 545 struct test_driver *t = &per_cpu_test_driver[cpu]; 546 int i; 547 548 if (!IS_ERR(t->task)) 549 kthread_stop(t->task); 550 551 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 552 if (!((run_test_mask & (1 << i)) >> i)) 553 continue; 554 555 pr_info( 556 "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n", 557 test_case_array[i].test_name, 558 per_cpu_test_data[cpu][i].test_passed, 559 per_cpu_test_data[cpu][i].test_failed, 560 test_repeat_count, test_loop_count, 561 per_cpu_test_data[cpu][i].time); 562 } 563 564 pr_info("All test took CPU%d=%lu cycles\n", 565 cpu, t->stop - t->start); 566 } 567 } 568 569 static int vmalloc_test_init(void) 570 { 571 do_concurrent_test(); 572 return -EAGAIN; /* Fail will directly unload the module */ 573 } 574 575 static void vmalloc_test_exit(void) 576 { 577 } 578 579 module_init(vmalloc_test_init) 580 module_exit(vmalloc_test_exit) 581 582 MODULE_LICENSE("GPL"); 583 MODULE_AUTHOR("Uladzislau Rezki"); 584 MODULE_DESCRIPTION("vmalloc test module"); 585