1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Test module for stress and analyze performance of vmalloc allocator. 5 * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> 6 */ 7 #include <linux/init.h> 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/vmalloc.h> 11 #include <linux/random.h> 12 #include <linux/kthread.h> 13 #include <linux/moduleparam.h> 14 #include <linux/completion.h> 15 #include <linux/delay.h> 16 #include <linux/rwsem.h> 17 #include <linux/mm.h> 18 19 #define __param(type, name, init, msg) \ 20 static type name = init; \ 21 module_param(name, type, 0444); \ 22 MODULE_PARM_DESC(name, msg) \ 23 24 __param(bool, single_cpu_test, false, 25 "Use single first online CPU to run tests"); 26 27 __param(bool, sequential_test_order, false, 28 "Use sequential stress tests order"); 29 30 __param(int, test_repeat_count, 1, 31 "Set test repeat counter"); 32 33 __param(int, test_loop_count, 1000000, 34 "Set test loop counter"); 35 36 __param(int, run_test_mask, INT_MAX, 37 "Set tests specified in the mask.\n\n" 38 "\t\tid: 1, name: fix_size_alloc_test\n" 39 "\t\tid: 2, name: full_fit_alloc_test\n" 40 "\t\tid: 4, name: long_busy_list_alloc_test\n" 41 "\t\tid: 8, name: random_size_alloc_test\n" 42 "\t\tid: 16, name: fix_align_alloc_test\n" 43 "\t\tid: 32, name: random_size_align_alloc_test\n" 44 "\t\tid: 64, name: align_shift_alloc_test\n" 45 "\t\tid: 128, name: pcpu_alloc_test\n" 46 /* Add a new test case description here. */ 47 ); 48 49 /* 50 * Depends on single_cpu_test parameter. If it is true, then 51 * use first online CPU to trigger a test on, otherwise go with 52 * all online CPUs. 53 */ 54 static cpumask_t cpus_run_test_mask = CPU_MASK_NONE; 55 56 /* 57 * Read write semaphore for synchronization of setup 58 * phase that is done in main thread and workers. 59 */ 60 static DECLARE_RWSEM(prepare_for_test_rwsem); 61 62 /* 63 * Completion tracking for worker threads. 64 */ 65 static DECLARE_COMPLETION(test_all_done_comp); 66 static atomic_t test_n_undone = ATOMIC_INIT(0); 67 68 static inline void 69 test_report_one_done(void) 70 { 71 if (atomic_dec_and_test(&test_n_undone)) 72 complete(&test_all_done_comp); 73 } 74 75 static int random_size_align_alloc_test(void) 76 { 77 unsigned long size, align, rnd; 78 void *ptr; 79 int i; 80 81 for (i = 0; i < test_loop_count; i++) { 82 get_random_bytes(&rnd, sizeof(rnd)); 83 84 /* 85 * Maximum 1024 pages, if PAGE_SIZE is 4096. 86 */ 87 align = 1 << (rnd % 23); 88 89 /* 90 * Maximum 10 pages. 91 */ 92 size = ((rnd % 10) + 1) * PAGE_SIZE; 93 94 ptr = __vmalloc_node_range(size, align, 95 VMALLOC_START, VMALLOC_END, 96 GFP_KERNEL | __GFP_ZERO, 97 PAGE_KERNEL, 98 0, 0, __builtin_return_address(0)); 99 100 if (!ptr) 101 return -1; 102 103 vfree(ptr); 104 } 105 106 return 0; 107 } 108 109 /* 110 * This test case is supposed to be failed. 111 */ 112 static int align_shift_alloc_test(void) 113 { 114 unsigned long align; 115 void *ptr; 116 int i; 117 118 for (i = 0; i < BITS_PER_LONG; i++) { 119 align = ((unsigned long) 1) << i; 120 121 ptr = __vmalloc_node_range(PAGE_SIZE, align, 122 VMALLOC_START, VMALLOC_END, 123 GFP_KERNEL | __GFP_ZERO, 124 PAGE_KERNEL, 125 0, 0, __builtin_return_address(0)); 126 127 if (!ptr) 128 return -1; 129 130 vfree(ptr); 131 } 132 133 return 0; 134 } 135 136 static int fix_align_alloc_test(void) 137 { 138 void *ptr; 139 int i; 140 141 for (i = 0; i < test_loop_count; i++) { 142 ptr = __vmalloc_node_range(5 * PAGE_SIZE, 143 THREAD_ALIGN << 1, 144 VMALLOC_START, VMALLOC_END, 145 GFP_KERNEL | __GFP_ZERO, 146 PAGE_KERNEL, 147 0, 0, __builtin_return_address(0)); 148 149 if (!ptr) 150 return -1; 151 152 vfree(ptr); 153 } 154 155 return 0; 156 } 157 158 static int random_size_alloc_test(void) 159 { 160 unsigned int n; 161 void *p; 162 int i; 163 164 for (i = 0; i < test_loop_count; i++) { 165 get_random_bytes(&n, sizeof(i)); 166 n = (n % 100) + 1; 167 168 p = vmalloc(n * PAGE_SIZE); 169 170 if (!p) 171 return -1; 172 173 *((__u8 *)p) = 1; 174 vfree(p); 175 } 176 177 return 0; 178 } 179 180 static int long_busy_list_alloc_test(void) 181 { 182 void *ptr_1, *ptr_2; 183 void **ptr; 184 int rv = -1; 185 int i; 186 187 ptr = vmalloc(sizeof(void *) * 15000); 188 if (!ptr) 189 return rv; 190 191 for (i = 0; i < 15000; i++) 192 ptr[i] = vmalloc(1 * PAGE_SIZE); 193 194 for (i = 0; i < test_loop_count; i++) { 195 ptr_1 = vmalloc(100 * PAGE_SIZE); 196 if (!ptr_1) 197 goto leave; 198 199 ptr_2 = vmalloc(1 * PAGE_SIZE); 200 if (!ptr_2) { 201 vfree(ptr_1); 202 goto leave; 203 } 204 205 *((__u8 *)ptr_1) = 0; 206 *((__u8 *)ptr_2) = 1; 207 208 vfree(ptr_1); 209 vfree(ptr_2); 210 } 211 212 /* Success */ 213 rv = 0; 214 215 leave: 216 for (i = 0; i < 15000; i++) 217 vfree(ptr[i]); 218 219 vfree(ptr); 220 return rv; 221 } 222 223 static int full_fit_alloc_test(void) 224 { 225 void **ptr, **junk_ptr, *tmp; 226 int junk_length; 227 int rv = -1; 228 int i; 229 230 junk_length = fls(num_online_cpus()); 231 junk_length *= (32 * 1024 * 1024 / PAGE_SIZE); 232 233 ptr = vmalloc(sizeof(void *) * junk_length); 234 if (!ptr) 235 return rv; 236 237 junk_ptr = vmalloc(sizeof(void *) * junk_length); 238 if (!junk_ptr) { 239 vfree(ptr); 240 return rv; 241 } 242 243 for (i = 0; i < junk_length; i++) { 244 ptr[i] = vmalloc(1 * PAGE_SIZE); 245 junk_ptr[i] = vmalloc(1 * PAGE_SIZE); 246 } 247 248 for (i = 0; i < junk_length; i++) 249 vfree(junk_ptr[i]); 250 251 for (i = 0; i < test_loop_count; i++) { 252 tmp = vmalloc(1 * PAGE_SIZE); 253 254 if (!tmp) 255 goto error; 256 257 *((__u8 *)tmp) = 1; 258 vfree(tmp); 259 } 260 261 /* Success */ 262 rv = 0; 263 264 error: 265 for (i = 0; i < junk_length; i++) 266 vfree(ptr[i]); 267 268 vfree(ptr); 269 vfree(junk_ptr); 270 271 return rv; 272 } 273 274 static int fix_size_alloc_test(void) 275 { 276 void *ptr; 277 int i; 278 279 for (i = 0; i < test_loop_count; i++) { 280 ptr = vmalloc(3 * PAGE_SIZE); 281 282 if (!ptr) 283 return -1; 284 285 *((__u8 *)ptr) = 0; 286 287 vfree(ptr); 288 } 289 290 return 0; 291 } 292 293 static int 294 pcpu_alloc_test(void) 295 { 296 int rv = 0; 297 #ifndef CONFIG_NEED_PER_CPU_KM 298 void __percpu **pcpu; 299 size_t size, align; 300 int i; 301 302 pcpu = vmalloc(sizeof(void __percpu *) * 35000); 303 if (!pcpu) 304 return -1; 305 306 for (i = 0; i < 35000; i++) { 307 unsigned int r; 308 309 get_random_bytes(&r, sizeof(i)); 310 size = (r % (PAGE_SIZE / 4)) + 1; 311 312 /* 313 * Maximum PAGE_SIZE 314 */ 315 get_random_bytes(&r, sizeof(i)); 316 align = 1 << ((i % 11) + 1); 317 318 pcpu[i] = __alloc_percpu(size, align); 319 if (!pcpu[i]) 320 rv = -1; 321 } 322 323 for (i = 0; i < 35000; i++) 324 free_percpu(pcpu[i]); 325 326 vfree(pcpu); 327 #endif 328 return rv; 329 } 330 331 struct test_case_desc { 332 const char *test_name; 333 int (*test_func)(void); 334 }; 335 336 static struct test_case_desc test_case_array[] = { 337 { "fix_size_alloc_test", fix_size_alloc_test }, 338 { "full_fit_alloc_test", full_fit_alloc_test }, 339 { "long_busy_list_alloc_test", long_busy_list_alloc_test }, 340 { "random_size_alloc_test", random_size_alloc_test }, 341 { "fix_align_alloc_test", fix_align_alloc_test }, 342 { "random_size_align_alloc_test", random_size_align_alloc_test }, 343 { "align_shift_alloc_test", align_shift_alloc_test }, 344 { "pcpu_alloc_test", pcpu_alloc_test }, 345 /* Add a new test case here. */ 346 }; 347 348 struct test_case_data { 349 int test_failed; 350 int test_passed; 351 u64 time; 352 }; 353 354 /* Split it to get rid of: WARNING: line over 80 characters */ 355 static struct test_case_data 356 per_cpu_test_data[NR_CPUS][ARRAY_SIZE(test_case_array)]; 357 358 static struct test_driver { 359 struct task_struct *task; 360 unsigned long start; 361 unsigned long stop; 362 int cpu; 363 } per_cpu_test_driver[NR_CPUS]; 364 365 static void shuffle_array(int *arr, int n) 366 { 367 unsigned int rnd; 368 int i, j, x; 369 370 for (i = n - 1; i > 0; i--) { 371 get_random_bytes(&rnd, sizeof(rnd)); 372 373 /* Cut the range. */ 374 j = rnd % i; 375 376 /* Swap indexes. */ 377 x = arr[i]; 378 arr[i] = arr[j]; 379 arr[j] = x; 380 } 381 } 382 383 static int test_func(void *private) 384 { 385 struct test_driver *t = private; 386 int random_array[ARRAY_SIZE(test_case_array)]; 387 int index, i, j; 388 ktime_t kt; 389 u64 delta; 390 391 if (set_cpus_allowed_ptr(current, cpumask_of(t->cpu)) < 0) 392 pr_err("Failed to set affinity to %d CPU\n", t->cpu); 393 394 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) 395 random_array[i] = i; 396 397 if (!sequential_test_order) 398 shuffle_array(random_array, ARRAY_SIZE(test_case_array)); 399 400 /* 401 * Block until initialization is done. 402 */ 403 down_read(&prepare_for_test_rwsem); 404 405 t->start = get_cycles(); 406 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 407 index = random_array[i]; 408 409 /* 410 * Skip tests if run_test_mask has been specified. 411 */ 412 if (!((run_test_mask & (1 << index)) >> index)) 413 continue; 414 415 kt = ktime_get(); 416 for (j = 0; j < test_repeat_count; j++) { 417 if (!test_case_array[index].test_func()) 418 per_cpu_test_data[t->cpu][index].test_passed++; 419 else 420 per_cpu_test_data[t->cpu][index].test_failed++; 421 } 422 423 /* 424 * Take an average time that test took. 425 */ 426 delta = (u64) ktime_us_delta(ktime_get(), kt); 427 do_div(delta, (u32) test_repeat_count); 428 429 per_cpu_test_data[t->cpu][index].time = delta; 430 } 431 t->stop = get_cycles(); 432 433 up_read(&prepare_for_test_rwsem); 434 test_report_one_done(); 435 436 /* 437 * Wait for the kthread_stop() call. 438 */ 439 while (!kthread_should_stop()) 440 msleep(10); 441 442 return 0; 443 } 444 445 static void 446 init_test_configurtion(void) 447 { 448 /* 449 * Reset all data of all CPUs. 450 */ 451 memset(per_cpu_test_data, 0, sizeof(per_cpu_test_data)); 452 453 if (single_cpu_test) 454 cpumask_set_cpu(cpumask_first(cpu_online_mask), 455 &cpus_run_test_mask); 456 else 457 cpumask_and(&cpus_run_test_mask, cpu_online_mask, 458 cpu_online_mask); 459 460 if (test_repeat_count <= 0) 461 test_repeat_count = 1; 462 463 if (test_loop_count <= 0) 464 test_loop_count = 1; 465 } 466 467 static void do_concurrent_test(void) 468 { 469 int cpu, ret; 470 471 /* 472 * Set some basic configurations plus sanity check. 473 */ 474 init_test_configurtion(); 475 476 /* 477 * Put on hold all workers. 478 */ 479 down_write(&prepare_for_test_rwsem); 480 481 for_each_cpu(cpu, &cpus_run_test_mask) { 482 struct test_driver *t = &per_cpu_test_driver[cpu]; 483 484 t->cpu = cpu; 485 t->task = kthread_run(test_func, t, "vmalloc_test/%d", cpu); 486 487 if (!IS_ERR(t->task)) 488 /* Success. */ 489 atomic_inc(&test_n_undone); 490 else 491 pr_err("Failed to start kthread for %d CPU\n", cpu); 492 } 493 494 /* 495 * Now let the workers do their job. 496 */ 497 up_write(&prepare_for_test_rwsem); 498 499 /* 500 * Sleep quiet until all workers are done with 1 second 501 * interval. Since the test can take a lot of time we 502 * can run into a stack trace of the hung task. That is 503 * why we go with completion_timeout and HZ value. 504 */ 505 do { 506 ret = wait_for_completion_timeout(&test_all_done_comp, HZ); 507 } while (!ret); 508 509 for_each_cpu(cpu, &cpus_run_test_mask) { 510 struct test_driver *t = &per_cpu_test_driver[cpu]; 511 int i; 512 513 if (!IS_ERR(t->task)) 514 kthread_stop(t->task); 515 516 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 517 if (!((run_test_mask & (1 << i)) >> i)) 518 continue; 519 520 pr_info( 521 "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n", 522 test_case_array[i].test_name, 523 per_cpu_test_data[cpu][i].test_passed, 524 per_cpu_test_data[cpu][i].test_failed, 525 test_repeat_count, test_loop_count, 526 per_cpu_test_data[cpu][i].time); 527 } 528 529 pr_info("All test took CPU%d=%lu cycles\n", 530 cpu, t->stop - t->start); 531 } 532 } 533 534 static int vmalloc_test_init(void) 535 { 536 do_concurrent_test(); 537 return -EAGAIN; /* Fail will directly unload the module */ 538 } 539 540 static void vmalloc_test_exit(void) 541 { 542 } 543 544 module_init(vmalloc_test_init) 545 module_exit(vmalloc_test_exit) 546 547 MODULE_LICENSE("GPL"); 548 MODULE_AUTHOR("Uladzislau Rezki"); 549 MODULE_DESCRIPTION("vmalloc test module"); 550