1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Test module for stress and analyze performance of vmalloc allocator. 5 * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> 6 */ 7 #include <linux/init.h> 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/vmalloc.h> 11 #include <linux/random.h> 12 #include <linux/kthread.h> 13 #include <linux/moduleparam.h> 14 #include <linux/completion.h> 15 #include <linux/delay.h> 16 #include <linux/rwsem.h> 17 #include <linux/mm.h> 18 19 #define __param(type, name, init, msg) \ 20 static type name = init; \ 21 module_param(name, type, 0444); \ 22 MODULE_PARM_DESC(name, msg) \ 23 24 __param(bool, single_cpu_test, false, 25 "Use single first online CPU to run tests"); 26 27 __param(bool, sequential_test_order, false, 28 "Use sequential stress tests order"); 29 30 __param(int, test_repeat_count, 1, 31 "Set test repeat counter"); 32 33 __param(int, test_loop_count, 1000000, 34 "Set test loop counter"); 35 36 __param(int, run_test_mask, INT_MAX, 37 "Set tests specified in the mask.\n\n" 38 "\t\tid: 1, name: fix_size_alloc_test\n" 39 "\t\tid: 2, name: full_fit_alloc_test\n" 40 "\t\tid: 4, name: long_busy_list_alloc_test\n" 41 "\t\tid: 8, name: random_size_alloc_test\n" 42 "\t\tid: 16, name: fix_align_alloc_test\n" 43 "\t\tid: 32, name: random_size_align_alloc_test\n" 44 "\t\tid: 64, name: align_shift_alloc_test\n" 45 "\t\tid: 128, name: pcpu_alloc_test\n" 46 /* Add a new test case description here. */ 47 ); 48 49 /* 50 * Depends on single_cpu_test parameter. If it is true, then 51 * use first online CPU to trigger a test on, otherwise go with 52 * all online CPUs. 53 */ 54 static cpumask_t cpus_run_test_mask = CPU_MASK_NONE; 55 56 /* 57 * Read write semaphore for synchronization of setup 58 * phase that is done in main thread and workers. 59 */ 60 static DECLARE_RWSEM(prepare_for_test_rwsem); 61 62 /* 63 * Completion tracking for worker threads. 64 */ 65 static DECLARE_COMPLETION(test_all_done_comp); 66 static atomic_t test_n_undone = ATOMIC_INIT(0); 67 68 static inline void 69 test_report_one_done(void) 70 { 71 if (atomic_dec_and_test(&test_n_undone)) 72 complete(&test_all_done_comp); 73 } 74 75 static int random_size_align_alloc_test(void) 76 { 77 unsigned long size, align, rnd; 78 void *ptr; 79 int i; 80 81 for (i = 0; i < test_loop_count; i++) { 82 get_random_bytes(&rnd, sizeof(rnd)); 83 84 /* 85 * Maximum 1024 pages, if PAGE_SIZE is 4096. 86 */ 87 align = 1 << (rnd % 23); 88 89 /* 90 * Maximum 10 pages. 91 */ 92 size = ((rnd % 10) + 1) * PAGE_SIZE; 93 94 ptr = __vmalloc_node_range(size, align, 95 VMALLOC_START, VMALLOC_END, 96 GFP_KERNEL | __GFP_ZERO, 97 PAGE_KERNEL, 98 0, 0, __builtin_return_address(0)); 99 100 if (!ptr) 101 return -1; 102 103 vfree(ptr); 104 } 105 106 return 0; 107 } 108 109 /* 110 * This test case is supposed to be failed. 111 */ 112 static int align_shift_alloc_test(void) 113 { 114 unsigned long align; 115 void *ptr; 116 int i; 117 118 for (i = 0; i < BITS_PER_LONG; i++) { 119 align = ((unsigned long) 1) << i; 120 121 ptr = __vmalloc_node_range(PAGE_SIZE, align, 122 VMALLOC_START, VMALLOC_END, 123 GFP_KERNEL | __GFP_ZERO, 124 PAGE_KERNEL, 125 0, 0, __builtin_return_address(0)); 126 127 if (!ptr) 128 return -1; 129 130 vfree(ptr); 131 } 132 133 return 0; 134 } 135 136 static int fix_align_alloc_test(void) 137 { 138 void *ptr; 139 int i; 140 141 for (i = 0; i < test_loop_count; i++) { 142 ptr = __vmalloc_node_range(5 * PAGE_SIZE, 143 THREAD_ALIGN << 1, 144 VMALLOC_START, VMALLOC_END, 145 GFP_KERNEL | __GFP_ZERO, 146 PAGE_KERNEL, 147 0, 0, __builtin_return_address(0)); 148 149 if (!ptr) 150 return -1; 151 152 vfree(ptr); 153 } 154 155 return 0; 156 } 157 158 static int random_size_alloc_test(void) 159 { 160 unsigned int n; 161 void *p; 162 int i; 163 164 for (i = 0; i < test_loop_count; i++) { 165 get_random_bytes(&n, sizeof(i)); 166 n = (n % 100) + 1; 167 168 p = vmalloc(n * PAGE_SIZE); 169 170 if (!p) 171 return -1; 172 173 *((__u8 *)p) = 1; 174 vfree(p); 175 } 176 177 return 0; 178 } 179 180 static int long_busy_list_alloc_test(void) 181 { 182 void *ptr_1, *ptr_2; 183 void **ptr; 184 int rv = -1; 185 int i; 186 187 ptr = vmalloc(sizeof(void *) * 15000); 188 if (!ptr) 189 return rv; 190 191 for (i = 0; i < 15000; i++) 192 ptr[i] = vmalloc(1 * PAGE_SIZE); 193 194 for (i = 0; i < test_loop_count; i++) { 195 ptr_1 = vmalloc(100 * PAGE_SIZE); 196 if (!ptr_1) 197 goto leave; 198 199 ptr_2 = vmalloc(1 * PAGE_SIZE); 200 if (!ptr_2) { 201 vfree(ptr_1); 202 goto leave; 203 } 204 205 *((__u8 *)ptr_1) = 0; 206 *((__u8 *)ptr_2) = 1; 207 208 vfree(ptr_1); 209 vfree(ptr_2); 210 } 211 212 /* Success */ 213 rv = 0; 214 215 leave: 216 for (i = 0; i < 15000; i++) 217 vfree(ptr[i]); 218 219 vfree(ptr); 220 return rv; 221 } 222 223 static int full_fit_alloc_test(void) 224 { 225 void **ptr, **junk_ptr, *tmp; 226 int junk_length; 227 int rv = -1; 228 int i; 229 230 junk_length = fls(num_online_cpus()); 231 junk_length *= (32 * 1024 * 1024 / PAGE_SIZE); 232 233 ptr = vmalloc(sizeof(void *) * junk_length); 234 if (!ptr) 235 return rv; 236 237 junk_ptr = vmalloc(sizeof(void *) * junk_length); 238 if (!junk_ptr) { 239 vfree(ptr); 240 return rv; 241 } 242 243 for (i = 0; i < junk_length; i++) { 244 ptr[i] = vmalloc(1 * PAGE_SIZE); 245 junk_ptr[i] = vmalloc(1 * PAGE_SIZE); 246 } 247 248 for (i = 0; i < junk_length; i++) 249 vfree(junk_ptr[i]); 250 251 for (i = 0; i < test_loop_count; i++) { 252 tmp = vmalloc(1 * PAGE_SIZE); 253 254 if (!tmp) 255 goto error; 256 257 *((__u8 *)tmp) = 1; 258 vfree(tmp); 259 } 260 261 /* Success */ 262 rv = 0; 263 264 error: 265 for (i = 0; i < junk_length; i++) 266 vfree(ptr[i]); 267 268 vfree(ptr); 269 vfree(junk_ptr); 270 271 return rv; 272 } 273 274 static int fix_size_alloc_test(void) 275 { 276 void *ptr; 277 int i; 278 279 for (i = 0; i < test_loop_count; i++) { 280 ptr = vmalloc(3 * PAGE_SIZE); 281 282 if (!ptr) 283 return -1; 284 285 *((__u8 *)ptr) = 0; 286 287 vfree(ptr); 288 } 289 290 return 0; 291 } 292 293 static int 294 pcpu_alloc_test(void) 295 { 296 int rv = 0; 297 #ifndef CONFIG_NEED_PER_CPU_KM 298 void __percpu **pcpu; 299 size_t size, align; 300 int i; 301 302 pcpu = vmalloc(sizeof(void __percpu *) * 35000); 303 if (!pcpu) 304 return -1; 305 306 for (i = 0; i < 35000; i++) { 307 unsigned int r; 308 309 get_random_bytes(&r, sizeof(i)); 310 size = (r % (PAGE_SIZE / 4)) + 1; 311 312 /* 313 * Maximum PAGE_SIZE 314 */ 315 get_random_bytes(&r, sizeof(i)); 316 align = 1 << ((i % 11) + 1); 317 318 pcpu[i] = __alloc_percpu(size, align); 319 if (!pcpu[i]) 320 rv = -1; 321 } 322 323 for (i = 0; i < 35000; i++) 324 free_percpu(pcpu[i]); 325 326 vfree(pcpu); 327 #endif 328 return rv; 329 } 330 331 struct test_case_desc { 332 const char *test_name; 333 int (*test_func)(void); 334 }; 335 336 static struct test_case_desc test_case_array[] = { 337 { "fix_size_alloc_test", fix_size_alloc_test }, 338 { "full_fit_alloc_test", full_fit_alloc_test }, 339 { "long_busy_list_alloc_test", long_busy_list_alloc_test }, 340 { "random_size_alloc_test", random_size_alloc_test }, 341 { "fix_align_alloc_test", fix_align_alloc_test }, 342 { "random_size_align_alloc_test", random_size_align_alloc_test }, 343 { "align_shift_alloc_test", align_shift_alloc_test }, 344 { "pcpu_alloc_test", pcpu_alloc_test }, 345 /* Add a new test case here. */ 346 }; 347 348 struct test_case_data { 349 int test_failed; 350 int test_passed; 351 u64 time; 352 }; 353 354 /* Split it to get rid of: WARNING: line over 80 characters */ 355 static struct test_case_data 356 per_cpu_test_data[NR_CPUS][ARRAY_SIZE(test_case_array)]; 357 358 static struct test_driver { 359 struct task_struct *task; 360 unsigned long start; 361 unsigned long stop; 362 int cpu; 363 } per_cpu_test_driver[NR_CPUS]; 364 365 static void shuffle_array(int *arr, int n) 366 { 367 unsigned int rnd; 368 int i, j, x; 369 370 for (i = n - 1; i > 0; i--) { 371 get_random_bytes(&rnd, sizeof(rnd)); 372 373 /* Cut the range. */ 374 j = rnd % i; 375 376 /* Swap indexes. */ 377 x = arr[i]; 378 arr[i] = arr[j]; 379 arr[j] = x; 380 } 381 } 382 383 static int test_func(void *private) 384 { 385 struct test_driver *t = private; 386 cpumask_t newmask = CPU_MASK_NONE; 387 int random_array[ARRAY_SIZE(test_case_array)]; 388 int index, i, j, ret; 389 ktime_t kt; 390 u64 delta; 391 392 cpumask_set_cpu(t->cpu, &newmask); 393 set_cpus_allowed_ptr(current, &newmask); 394 395 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) 396 random_array[i] = i; 397 398 if (!sequential_test_order) 399 shuffle_array(random_array, ARRAY_SIZE(test_case_array)); 400 401 /* 402 * Block until initialization is done. 403 */ 404 down_read(&prepare_for_test_rwsem); 405 406 t->start = get_cycles(); 407 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 408 index = random_array[i]; 409 410 /* 411 * Skip tests if run_test_mask has been specified. 412 */ 413 if (!((run_test_mask & (1 << index)) >> index)) 414 continue; 415 416 kt = ktime_get(); 417 for (j = 0; j < test_repeat_count; j++) { 418 ret = test_case_array[index].test_func(); 419 if (!ret) 420 per_cpu_test_data[t->cpu][index].test_passed++; 421 else 422 per_cpu_test_data[t->cpu][index].test_failed++; 423 } 424 425 /* 426 * Take an average time that test took. 427 */ 428 delta = (u64) ktime_us_delta(ktime_get(), kt); 429 do_div(delta, (u32) test_repeat_count); 430 431 per_cpu_test_data[t->cpu][index].time = delta; 432 } 433 t->stop = get_cycles(); 434 435 up_read(&prepare_for_test_rwsem); 436 test_report_one_done(); 437 438 /* 439 * Wait for the kthread_stop() call. 440 */ 441 while (!kthread_should_stop()) 442 msleep(10); 443 444 return 0; 445 } 446 447 static void 448 init_test_configurtion(void) 449 { 450 /* 451 * Reset all data of all CPUs. 452 */ 453 memset(per_cpu_test_data, 0, sizeof(per_cpu_test_data)); 454 455 if (single_cpu_test) 456 cpumask_set_cpu(cpumask_first(cpu_online_mask), 457 &cpus_run_test_mask); 458 else 459 cpumask_and(&cpus_run_test_mask, cpu_online_mask, 460 cpu_online_mask); 461 462 if (test_repeat_count <= 0) 463 test_repeat_count = 1; 464 465 if (test_loop_count <= 0) 466 test_loop_count = 1; 467 } 468 469 static void do_concurrent_test(void) 470 { 471 int cpu, ret; 472 473 /* 474 * Set some basic configurations plus sanity check. 475 */ 476 init_test_configurtion(); 477 478 /* 479 * Put on hold all workers. 480 */ 481 down_write(&prepare_for_test_rwsem); 482 483 for_each_cpu(cpu, &cpus_run_test_mask) { 484 struct test_driver *t = &per_cpu_test_driver[cpu]; 485 486 t->cpu = cpu; 487 t->task = kthread_run(test_func, t, "vmalloc_test/%d", cpu); 488 489 if (!IS_ERR(t->task)) 490 /* Success. */ 491 atomic_inc(&test_n_undone); 492 else 493 pr_err("Failed to start kthread for %d CPU\n", cpu); 494 } 495 496 /* 497 * Now let the workers do their job. 498 */ 499 up_write(&prepare_for_test_rwsem); 500 501 /* 502 * Sleep quiet until all workers are done with 1 second 503 * interval. Since the test can take a lot of time we 504 * can run into a stack trace of the hung task. That is 505 * why we go with completion_timeout and HZ value. 506 */ 507 do { 508 ret = wait_for_completion_timeout(&test_all_done_comp, HZ); 509 } while (!ret); 510 511 for_each_cpu(cpu, &cpus_run_test_mask) { 512 struct test_driver *t = &per_cpu_test_driver[cpu]; 513 int i; 514 515 if (!IS_ERR(t->task)) 516 kthread_stop(t->task); 517 518 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 519 if (!((run_test_mask & (1 << i)) >> i)) 520 continue; 521 522 pr_info( 523 "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n", 524 test_case_array[i].test_name, 525 per_cpu_test_data[cpu][i].test_passed, 526 per_cpu_test_data[cpu][i].test_failed, 527 test_repeat_count, test_loop_count, 528 per_cpu_test_data[cpu][i].time); 529 } 530 531 pr_info("All test took CPU%d=%lu cycles\n", 532 cpu, t->stop - t->start); 533 } 534 } 535 536 static int vmalloc_test_init(void) 537 { 538 do_concurrent_test(); 539 return -EAGAIN; /* Fail will directly unload the module */ 540 } 541 542 static void vmalloc_test_exit(void) 543 { 544 } 545 546 module_init(vmalloc_test_init) 547 module_exit(vmalloc_test_exit) 548 549 MODULE_LICENSE("GPL"); 550 MODULE_AUTHOR("Uladzislau Rezki"); 551 MODULE_DESCRIPTION("vmalloc test module"); 552