1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Test module for stress and analyze performance of vmalloc allocator. 5 * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> 6 */ 7 #include <linux/init.h> 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/vmalloc.h> 11 #include <linux/random.h> 12 #include <linux/kthread.h> 13 #include <linux/moduleparam.h> 14 #include <linux/completion.h> 15 #include <linux/delay.h> 16 #include <linux/rwsem.h> 17 #include <linux/mm.h> 18 #include <linux/rcupdate.h> 19 #include <linux/slab.h> 20 21 #define __param(type, name, init, msg) \ 22 static type name = init; \ 23 module_param(name, type, 0444); \ 24 MODULE_PARM_DESC(name, msg) \ 25 26 __param(int, nr_threads, 0, 27 "Number of workers to perform tests(min: 1 max: USHRT_MAX)"); 28 29 __param(bool, sequential_test_order, false, 30 "Use sequential stress tests order"); 31 32 __param(int, test_repeat_count, 1, 33 "Set test repeat counter"); 34 35 __param(int, test_loop_count, 1000000, 36 "Set test loop counter"); 37 38 __param(int, nr_pages, 0, 39 "Set number of pages for fix_size_alloc_test(default: 1)"); 40 41 __param(int, run_test_mask, INT_MAX, 42 "Set tests specified in the mask.\n\n" 43 "\t\tid: 1, name: fix_size_alloc_test\n" 44 "\t\tid: 2, name: full_fit_alloc_test\n" 45 "\t\tid: 4, name: long_busy_list_alloc_test\n" 46 "\t\tid: 8, name: random_size_alloc_test\n" 47 "\t\tid: 16, name: fix_align_alloc_test\n" 48 "\t\tid: 32, name: random_size_align_alloc_test\n" 49 "\t\tid: 64, name: align_shift_alloc_test\n" 50 "\t\tid: 128, name: pcpu_alloc_test\n" 51 "\t\tid: 256, name: kvfree_rcu_1_arg_vmalloc_test\n" 52 "\t\tid: 512, name: kvfree_rcu_2_arg_vmalloc_test\n" 53 /* Add a new test case description here. */ 54 ); 55 56 /* 57 * Read write semaphore for synchronization of setup 58 * phase that is done in main thread and workers. 59 */ 60 static DECLARE_RWSEM(prepare_for_test_rwsem); 61 62 /* 63 * Completion tracking for worker threads. 64 */ 65 static DECLARE_COMPLETION(test_all_done_comp); 66 static atomic_t test_n_undone = ATOMIC_INIT(0); 67 68 static inline void 69 test_report_one_done(void) 70 { 71 if (atomic_dec_and_test(&test_n_undone)) 72 complete(&test_all_done_comp); 73 } 74 75 static int random_size_align_alloc_test(void) 76 { 77 unsigned long size, align; 78 unsigned int rnd; 79 void *ptr; 80 int i; 81 82 for (i = 0; i < test_loop_count; i++) { 83 rnd = prandom_u32(); 84 85 /* 86 * Maximum 1024 pages, if PAGE_SIZE is 4096. 87 */ 88 align = 1 << (rnd % 23); 89 90 /* 91 * Maximum 10 pages. 92 */ 93 size = ((rnd % 10) + 1) * PAGE_SIZE; 94 95 ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0, 96 __builtin_return_address(0)); 97 if (!ptr) 98 return -1; 99 100 vfree(ptr); 101 } 102 103 return 0; 104 } 105 106 /* 107 * This test case is supposed to be failed. 108 */ 109 static int align_shift_alloc_test(void) 110 { 111 unsigned long align; 112 void *ptr; 113 int i; 114 115 for (i = 0; i < BITS_PER_LONG; i++) { 116 align = ((unsigned long) 1) << i; 117 118 ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0, 119 __builtin_return_address(0)); 120 if (!ptr) 121 return -1; 122 123 vfree(ptr); 124 } 125 126 return 0; 127 } 128 129 static int fix_align_alloc_test(void) 130 { 131 void *ptr; 132 int i; 133 134 for (i = 0; i < test_loop_count; i++) { 135 ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1, 136 GFP_KERNEL | __GFP_ZERO, 0, 137 __builtin_return_address(0)); 138 if (!ptr) 139 return -1; 140 141 vfree(ptr); 142 } 143 144 return 0; 145 } 146 147 static int random_size_alloc_test(void) 148 { 149 unsigned int n; 150 void *p; 151 int i; 152 153 for (i = 0; i < test_loop_count; i++) { 154 n = prandom_u32(); 155 n = (n % 100) + 1; 156 157 p = vmalloc(n * PAGE_SIZE); 158 159 if (!p) 160 return -1; 161 162 *((__u8 *)p) = 1; 163 vfree(p); 164 } 165 166 return 0; 167 } 168 169 static int long_busy_list_alloc_test(void) 170 { 171 void *ptr_1, *ptr_2; 172 void **ptr; 173 int rv = -1; 174 int i; 175 176 ptr = vmalloc(sizeof(void *) * 15000); 177 if (!ptr) 178 return rv; 179 180 for (i = 0; i < 15000; i++) 181 ptr[i] = vmalloc(1 * PAGE_SIZE); 182 183 for (i = 0; i < test_loop_count; i++) { 184 ptr_1 = vmalloc(100 * PAGE_SIZE); 185 if (!ptr_1) 186 goto leave; 187 188 ptr_2 = vmalloc(1 * PAGE_SIZE); 189 if (!ptr_2) { 190 vfree(ptr_1); 191 goto leave; 192 } 193 194 *((__u8 *)ptr_1) = 0; 195 *((__u8 *)ptr_2) = 1; 196 197 vfree(ptr_1); 198 vfree(ptr_2); 199 } 200 201 /* Success */ 202 rv = 0; 203 204 leave: 205 for (i = 0; i < 15000; i++) 206 vfree(ptr[i]); 207 208 vfree(ptr); 209 return rv; 210 } 211 212 static int full_fit_alloc_test(void) 213 { 214 void **ptr, **junk_ptr, *tmp; 215 int junk_length; 216 int rv = -1; 217 int i; 218 219 junk_length = fls(num_online_cpus()); 220 junk_length *= (32 * 1024 * 1024 / PAGE_SIZE); 221 222 ptr = vmalloc(sizeof(void *) * junk_length); 223 if (!ptr) 224 return rv; 225 226 junk_ptr = vmalloc(sizeof(void *) * junk_length); 227 if (!junk_ptr) { 228 vfree(ptr); 229 return rv; 230 } 231 232 for (i = 0; i < junk_length; i++) { 233 ptr[i] = vmalloc(1 * PAGE_SIZE); 234 junk_ptr[i] = vmalloc(1 * PAGE_SIZE); 235 } 236 237 for (i = 0; i < junk_length; i++) 238 vfree(junk_ptr[i]); 239 240 for (i = 0; i < test_loop_count; i++) { 241 tmp = vmalloc(1 * PAGE_SIZE); 242 243 if (!tmp) 244 goto error; 245 246 *((__u8 *)tmp) = 1; 247 vfree(tmp); 248 } 249 250 /* Success */ 251 rv = 0; 252 253 error: 254 for (i = 0; i < junk_length; i++) 255 vfree(ptr[i]); 256 257 vfree(ptr); 258 vfree(junk_ptr); 259 260 return rv; 261 } 262 263 static int fix_size_alloc_test(void) 264 { 265 void *ptr; 266 int i; 267 268 for (i = 0; i < test_loop_count; i++) { 269 ptr = vmalloc((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE); 270 271 if (!ptr) 272 return -1; 273 274 *((__u8 *)ptr) = 0; 275 276 vfree(ptr); 277 } 278 279 return 0; 280 } 281 282 static int 283 pcpu_alloc_test(void) 284 { 285 int rv = 0; 286 #ifndef CONFIG_NEED_PER_CPU_KM 287 void __percpu **pcpu; 288 size_t size, align; 289 int i; 290 291 pcpu = vmalloc(sizeof(void __percpu *) * 35000); 292 if (!pcpu) 293 return -1; 294 295 for (i = 0; i < 35000; i++) { 296 unsigned int r; 297 298 r = prandom_u32(); 299 size = (r % (PAGE_SIZE / 4)) + 1; 300 301 /* 302 * Maximum PAGE_SIZE 303 */ 304 r = prandom_u32(); 305 align = 1 << ((r % 11) + 1); 306 307 pcpu[i] = __alloc_percpu(size, align); 308 if (!pcpu[i]) 309 rv = -1; 310 } 311 312 for (i = 0; i < 35000; i++) 313 free_percpu(pcpu[i]); 314 315 vfree(pcpu); 316 #endif 317 return rv; 318 } 319 320 struct test_kvfree_rcu { 321 struct rcu_head rcu; 322 unsigned char array[20]; 323 }; 324 325 static int 326 kvfree_rcu_1_arg_vmalloc_test(void) 327 { 328 struct test_kvfree_rcu *p; 329 int i; 330 331 for (i = 0; i < test_loop_count; i++) { 332 p = vmalloc(1 * PAGE_SIZE); 333 if (!p) 334 return -1; 335 336 p->array[0] = 'a'; 337 kvfree_rcu(p); 338 } 339 340 return 0; 341 } 342 343 static int 344 kvfree_rcu_2_arg_vmalloc_test(void) 345 { 346 struct test_kvfree_rcu *p; 347 int i; 348 349 for (i = 0; i < test_loop_count; i++) { 350 p = vmalloc(1 * PAGE_SIZE); 351 if (!p) 352 return -1; 353 354 p->array[0] = 'a'; 355 kvfree_rcu(p, rcu); 356 } 357 358 return 0; 359 } 360 361 struct test_case_desc { 362 const char *test_name; 363 int (*test_func)(void); 364 }; 365 366 static struct test_case_desc test_case_array[] = { 367 { "fix_size_alloc_test", fix_size_alloc_test }, 368 { "full_fit_alloc_test", full_fit_alloc_test }, 369 { "long_busy_list_alloc_test", long_busy_list_alloc_test }, 370 { "random_size_alloc_test", random_size_alloc_test }, 371 { "fix_align_alloc_test", fix_align_alloc_test }, 372 { "random_size_align_alloc_test", random_size_align_alloc_test }, 373 { "align_shift_alloc_test", align_shift_alloc_test }, 374 { "pcpu_alloc_test", pcpu_alloc_test }, 375 { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test }, 376 { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test }, 377 /* Add a new test case here. */ 378 }; 379 380 struct test_case_data { 381 int test_failed; 382 int test_passed; 383 u64 time; 384 }; 385 386 static struct test_driver { 387 struct task_struct *task; 388 struct test_case_data data[ARRAY_SIZE(test_case_array)]; 389 390 unsigned long start; 391 unsigned long stop; 392 } *tdriver; 393 394 static void shuffle_array(int *arr, int n) 395 { 396 unsigned int rnd; 397 int i, j; 398 399 for (i = n - 1; i > 0; i--) { 400 rnd = prandom_u32(); 401 402 /* Cut the range. */ 403 j = rnd % i; 404 405 /* Swap indexes. */ 406 swap(arr[i], arr[j]); 407 } 408 } 409 410 static int test_func(void *private) 411 { 412 struct test_driver *t = private; 413 int random_array[ARRAY_SIZE(test_case_array)]; 414 int index, i, j; 415 ktime_t kt; 416 u64 delta; 417 418 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) 419 random_array[i] = i; 420 421 if (!sequential_test_order) 422 shuffle_array(random_array, ARRAY_SIZE(test_case_array)); 423 424 /* 425 * Block until initialization is done. 426 */ 427 down_read(&prepare_for_test_rwsem); 428 429 t->start = get_cycles(); 430 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 431 index = random_array[i]; 432 433 /* 434 * Skip tests if run_test_mask has been specified. 435 */ 436 if (!((run_test_mask & (1 << index)) >> index)) 437 continue; 438 439 kt = ktime_get(); 440 for (j = 0; j < test_repeat_count; j++) { 441 if (!test_case_array[index].test_func()) 442 t->data[index].test_passed++; 443 else 444 t->data[index].test_failed++; 445 } 446 447 /* 448 * Take an average time that test took. 449 */ 450 delta = (u64) ktime_us_delta(ktime_get(), kt); 451 do_div(delta, (u32) test_repeat_count); 452 453 t->data[index].time = delta; 454 } 455 t->stop = get_cycles(); 456 457 up_read(&prepare_for_test_rwsem); 458 test_report_one_done(); 459 460 /* 461 * Wait for the kthread_stop() call. 462 */ 463 while (!kthread_should_stop()) 464 msleep(10); 465 466 return 0; 467 } 468 469 static int 470 init_test_configurtion(void) 471 { 472 /* 473 * A maximum number of workers is defined as hard-coded 474 * value and set to USHRT_MAX. We add such gap just in 475 * case and for potential heavy stressing. 476 */ 477 nr_threads = clamp(nr_threads, 1, (int) USHRT_MAX); 478 479 /* Allocate the space for test instances. */ 480 tdriver = kvcalloc(nr_threads, sizeof(*tdriver), GFP_KERNEL); 481 if (tdriver == NULL) 482 return -1; 483 484 if (test_repeat_count <= 0) 485 test_repeat_count = 1; 486 487 if (test_loop_count <= 0) 488 test_loop_count = 1; 489 490 return 0; 491 } 492 493 static void do_concurrent_test(void) 494 { 495 int i, ret; 496 497 /* 498 * Set some basic configurations plus sanity check. 499 */ 500 ret = init_test_configurtion(); 501 if (ret < 0) 502 return; 503 504 /* 505 * Put on hold all workers. 506 */ 507 down_write(&prepare_for_test_rwsem); 508 509 for (i = 0; i < nr_threads; i++) { 510 struct test_driver *t = &tdriver[i]; 511 512 t->task = kthread_run(test_func, t, "vmalloc_test/%d", i); 513 514 if (!IS_ERR(t->task)) 515 /* Success. */ 516 atomic_inc(&test_n_undone); 517 else 518 pr_err("Failed to start %d kthread\n", i); 519 } 520 521 /* 522 * Now let the workers do their job. 523 */ 524 up_write(&prepare_for_test_rwsem); 525 526 /* 527 * Sleep quiet until all workers are done with 1 second 528 * interval. Since the test can take a lot of time we 529 * can run into a stack trace of the hung task. That is 530 * why we go with completion_timeout and HZ value. 531 */ 532 do { 533 ret = wait_for_completion_timeout(&test_all_done_comp, HZ); 534 } while (!ret); 535 536 for (i = 0; i < nr_threads; i++) { 537 struct test_driver *t = &tdriver[i]; 538 int j; 539 540 if (!IS_ERR(t->task)) 541 kthread_stop(t->task); 542 543 for (j = 0; j < ARRAY_SIZE(test_case_array); j++) { 544 if (!((run_test_mask & (1 << j)) >> j)) 545 continue; 546 547 pr_info( 548 "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n", 549 test_case_array[j].test_name, 550 t->data[j].test_passed, 551 t->data[j].test_failed, 552 test_repeat_count, test_loop_count, 553 t->data[j].time); 554 } 555 556 pr_info("All test took worker%d=%lu cycles\n", 557 i, t->stop - t->start); 558 } 559 560 kvfree(tdriver); 561 } 562 563 static int vmalloc_test_init(void) 564 { 565 do_concurrent_test(); 566 return -EAGAIN; /* Fail will directly unload the module */ 567 } 568 569 static void vmalloc_test_exit(void) 570 { 571 } 572 573 module_init(vmalloc_test_init) 574 module_exit(vmalloc_test_exit) 575 576 MODULE_LICENSE("GPL"); 577 MODULE_AUTHOR("Uladzislau Rezki"); 578 MODULE_DESCRIPTION("vmalloc test module"); 579