1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Test module for stress and analyze performance of vmalloc allocator. 5 * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> 6 */ 7 #include <linux/init.h> 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/vmalloc.h> 11 #include <linux/random.h> 12 #include <linux/kthread.h> 13 #include <linux/moduleparam.h> 14 #include <linux/completion.h> 15 #include <linux/delay.h> 16 #include <linux/rwsem.h> 17 #include <linux/mm.h> 18 19 #define __param(type, name, init, msg) \ 20 static type name = init; \ 21 module_param(name, type, 0444); \ 22 MODULE_PARM_DESC(name, msg) \ 23 24 __param(bool, single_cpu_test, false, 25 "Use single first online CPU to run tests"); 26 27 __param(bool, sequential_test_order, false, 28 "Use sequential stress tests order"); 29 30 __param(int, test_repeat_count, 1, 31 "Set test repeat counter"); 32 33 __param(int, test_loop_count, 1000000, 34 "Set test loop counter"); 35 36 __param(int, run_test_mask, INT_MAX, 37 "Set tests specified in the mask.\n\n" 38 "\t\tid: 1, name: fix_size_alloc_test\n" 39 "\t\tid: 2, name: full_fit_alloc_test\n" 40 "\t\tid: 4, name: long_busy_list_alloc_test\n" 41 "\t\tid: 8, name: random_size_alloc_test\n" 42 "\t\tid: 16, name: fix_align_alloc_test\n" 43 "\t\tid: 32, name: random_size_align_alloc_test\n" 44 "\t\tid: 64, name: align_shift_alloc_test\n" 45 "\t\tid: 128, name: pcpu_alloc_test\n" 46 /* Add a new test case description here. */ 47 ); 48 49 /* 50 * Depends on single_cpu_test parameter. If it is true, then 51 * use first online CPU to trigger a test on, otherwise go with 52 * all online CPUs. 53 */ 54 static cpumask_t cpus_run_test_mask = CPU_MASK_NONE; 55 56 /* 57 * Read write semaphore for synchronization of setup 58 * phase that is done in main thread and workers. 59 */ 60 static DECLARE_RWSEM(prepare_for_test_rwsem); 61 62 /* 63 * Completion tracking for worker threads. 64 */ 65 static DECLARE_COMPLETION(test_all_done_comp); 66 static atomic_t test_n_undone = ATOMIC_INIT(0); 67 68 static inline void 69 test_report_one_done(void) 70 { 71 if (atomic_dec_and_test(&test_n_undone)) 72 complete(&test_all_done_comp); 73 } 74 75 static int random_size_align_alloc_test(void) 76 { 77 unsigned long size, align, rnd; 78 void *ptr; 79 int i; 80 81 for (i = 0; i < test_loop_count; i++) { 82 get_random_bytes(&rnd, sizeof(rnd)); 83 84 /* 85 * Maximum 1024 pages, if PAGE_SIZE is 4096. 86 */ 87 align = 1 << (rnd % 23); 88 89 /* 90 * Maximum 10 pages. 91 */ 92 size = ((rnd % 10) + 1) * PAGE_SIZE; 93 94 ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0, 95 __builtin_return_address(0)); 96 if (!ptr) 97 return -1; 98 99 vfree(ptr); 100 } 101 102 return 0; 103 } 104 105 /* 106 * This test case is supposed to be failed. 107 */ 108 static int align_shift_alloc_test(void) 109 { 110 unsigned long align; 111 void *ptr; 112 int i; 113 114 for (i = 0; i < BITS_PER_LONG; i++) { 115 align = ((unsigned long) 1) << i; 116 117 ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0, 118 __builtin_return_address(0)); 119 if (!ptr) 120 return -1; 121 122 vfree(ptr); 123 } 124 125 return 0; 126 } 127 128 static int fix_align_alloc_test(void) 129 { 130 void *ptr; 131 int i; 132 133 for (i = 0; i < test_loop_count; i++) { 134 ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1, 135 GFP_KERNEL | __GFP_ZERO, 0, 136 __builtin_return_address(0)); 137 if (!ptr) 138 return -1; 139 140 vfree(ptr); 141 } 142 143 return 0; 144 } 145 146 static int random_size_alloc_test(void) 147 { 148 unsigned int n; 149 void *p; 150 int i; 151 152 for (i = 0; i < test_loop_count; i++) { 153 get_random_bytes(&n, sizeof(i)); 154 n = (n % 100) + 1; 155 156 p = vmalloc(n * PAGE_SIZE); 157 158 if (!p) 159 return -1; 160 161 *((__u8 *)p) = 1; 162 vfree(p); 163 } 164 165 return 0; 166 } 167 168 static int long_busy_list_alloc_test(void) 169 { 170 void *ptr_1, *ptr_2; 171 void **ptr; 172 int rv = -1; 173 int i; 174 175 ptr = vmalloc(sizeof(void *) * 15000); 176 if (!ptr) 177 return rv; 178 179 for (i = 0; i < 15000; i++) 180 ptr[i] = vmalloc(1 * PAGE_SIZE); 181 182 for (i = 0; i < test_loop_count; i++) { 183 ptr_1 = vmalloc(100 * PAGE_SIZE); 184 if (!ptr_1) 185 goto leave; 186 187 ptr_2 = vmalloc(1 * PAGE_SIZE); 188 if (!ptr_2) { 189 vfree(ptr_1); 190 goto leave; 191 } 192 193 *((__u8 *)ptr_1) = 0; 194 *((__u8 *)ptr_2) = 1; 195 196 vfree(ptr_1); 197 vfree(ptr_2); 198 } 199 200 /* Success */ 201 rv = 0; 202 203 leave: 204 for (i = 0; i < 15000; i++) 205 vfree(ptr[i]); 206 207 vfree(ptr); 208 return rv; 209 } 210 211 static int full_fit_alloc_test(void) 212 { 213 void **ptr, **junk_ptr, *tmp; 214 int junk_length; 215 int rv = -1; 216 int i; 217 218 junk_length = fls(num_online_cpus()); 219 junk_length *= (32 * 1024 * 1024 / PAGE_SIZE); 220 221 ptr = vmalloc(sizeof(void *) * junk_length); 222 if (!ptr) 223 return rv; 224 225 junk_ptr = vmalloc(sizeof(void *) * junk_length); 226 if (!junk_ptr) { 227 vfree(ptr); 228 return rv; 229 } 230 231 for (i = 0; i < junk_length; i++) { 232 ptr[i] = vmalloc(1 * PAGE_SIZE); 233 junk_ptr[i] = vmalloc(1 * PAGE_SIZE); 234 } 235 236 for (i = 0; i < junk_length; i++) 237 vfree(junk_ptr[i]); 238 239 for (i = 0; i < test_loop_count; i++) { 240 tmp = vmalloc(1 * PAGE_SIZE); 241 242 if (!tmp) 243 goto error; 244 245 *((__u8 *)tmp) = 1; 246 vfree(tmp); 247 } 248 249 /* Success */ 250 rv = 0; 251 252 error: 253 for (i = 0; i < junk_length; i++) 254 vfree(ptr[i]); 255 256 vfree(ptr); 257 vfree(junk_ptr); 258 259 return rv; 260 } 261 262 static int fix_size_alloc_test(void) 263 { 264 void *ptr; 265 int i; 266 267 for (i = 0; i < test_loop_count; i++) { 268 ptr = vmalloc(3 * PAGE_SIZE); 269 270 if (!ptr) 271 return -1; 272 273 *((__u8 *)ptr) = 0; 274 275 vfree(ptr); 276 } 277 278 return 0; 279 } 280 281 static int 282 pcpu_alloc_test(void) 283 { 284 int rv = 0; 285 #ifndef CONFIG_NEED_PER_CPU_KM 286 void __percpu **pcpu; 287 size_t size, align; 288 int i; 289 290 pcpu = vmalloc(sizeof(void __percpu *) * 35000); 291 if (!pcpu) 292 return -1; 293 294 for (i = 0; i < 35000; i++) { 295 unsigned int r; 296 297 get_random_bytes(&r, sizeof(i)); 298 size = (r % (PAGE_SIZE / 4)) + 1; 299 300 /* 301 * Maximum PAGE_SIZE 302 */ 303 get_random_bytes(&r, sizeof(i)); 304 align = 1 << ((i % 11) + 1); 305 306 pcpu[i] = __alloc_percpu(size, align); 307 if (!pcpu[i]) 308 rv = -1; 309 } 310 311 for (i = 0; i < 35000; i++) 312 free_percpu(pcpu[i]); 313 314 vfree(pcpu); 315 #endif 316 return rv; 317 } 318 319 struct test_case_desc { 320 const char *test_name; 321 int (*test_func)(void); 322 }; 323 324 static struct test_case_desc test_case_array[] = { 325 { "fix_size_alloc_test", fix_size_alloc_test }, 326 { "full_fit_alloc_test", full_fit_alloc_test }, 327 { "long_busy_list_alloc_test", long_busy_list_alloc_test }, 328 { "random_size_alloc_test", random_size_alloc_test }, 329 { "fix_align_alloc_test", fix_align_alloc_test }, 330 { "random_size_align_alloc_test", random_size_align_alloc_test }, 331 { "align_shift_alloc_test", align_shift_alloc_test }, 332 { "pcpu_alloc_test", pcpu_alloc_test }, 333 /* Add a new test case here. */ 334 }; 335 336 struct test_case_data { 337 int test_failed; 338 int test_passed; 339 u64 time; 340 }; 341 342 /* Split it to get rid of: WARNING: line over 80 characters */ 343 static struct test_case_data 344 per_cpu_test_data[NR_CPUS][ARRAY_SIZE(test_case_array)]; 345 346 static struct test_driver { 347 struct task_struct *task; 348 unsigned long start; 349 unsigned long stop; 350 int cpu; 351 } per_cpu_test_driver[NR_CPUS]; 352 353 static void shuffle_array(int *arr, int n) 354 { 355 unsigned int rnd; 356 int i, j, x; 357 358 for (i = n - 1; i > 0; i--) { 359 get_random_bytes(&rnd, sizeof(rnd)); 360 361 /* Cut the range. */ 362 j = rnd % i; 363 364 /* Swap indexes. */ 365 x = arr[i]; 366 arr[i] = arr[j]; 367 arr[j] = x; 368 } 369 } 370 371 static int test_func(void *private) 372 { 373 struct test_driver *t = private; 374 int random_array[ARRAY_SIZE(test_case_array)]; 375 int index, i, j; 376 ktime_t kt; 377 u64 delta; 378 379 if (set_cpus_allowed_ptr(current, cpumask_of(t->cpu)) < 0) 380 pr_err("Failed to set affinity to %d CPU\n", t->cpu); 381 382 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) 383 random_array[i] = i; 384 385 if (!sequential_test_order) 386 shuffle_array(random_array, ARRAY_SIZE(test_case_array)); 387 388 /* 389 * Block until initialization is done. 390 */ 391 down_read(&prepare_for_test_rwsem); 392 393 t->start = get_cycles(); 394 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 395 index = random_array[i]; 396 397 /* 398 * Skip tests if run_test_mask has been specified. 399 */ 400 if (!((run_test_mask & (1 << index)) >> index)) 401 continue; 402 403 kt = ktime_get(); 404 for (j = 0; j < test_repeat_count; j++) { 405 if (!test_case_array[index].test_func()) 406 per_cpu_test_data[t->cpu][index].test_passed++; 407 else 408 per_cpu_test_data[t->cpu][index].test_failed++; 409 } 410 411 /* 412 * Take an average time that test took. 413 */ 414 delta = (u64) ktime_us_delta(ktime_get(), kt); 415 do_div(delta, (u32) test_repeat_count); 416 417 per_cpu_test_data[t->cpu][index].time = delta; 418 } 419 t->stop = get_cycles(); 420 421 up_read(&prepare_for_test_rwsem); 422 test_report_one_done(); 423 424 /* 425 * Wait for the kthread_stop() call. 426 */ 427 while (!kthread_should_stop()) 428 msleep(10); 429 430 return 0; 431 } 432 433 static void 434 init_test_configurtion(void) 435 { 436 /* 437 * Reset all data of all CPUs. 438 */ 439 memset(per_cpu_test_data, 0, sizeof(per_cpu_test_data)); 440 441 if (single_cpu_test) 442 cpumask_set_cpu(cpumask_first(cpu_online_mask), 443 &cpus_run_test_mask); 444 else 445 cpumask_and(&cpus_run_test_mask, cpu_online_mask, 446 cpu_online_mask); 447 448 if (test_repeat_count <= 0) 449 test_repeat_count = 1; 450 451 if (test_loop_count <= 0) 452 test_loop_count = 1; 453 } 454 455 static void do_concurrent_test(void) 456 { 457 int cpu, ret; 458 459 /* 460 * Set some basic configurations plus sanity check. 461 */ 462 init_test_configurtion(); 463 464 /* 465 * Put on hold all workers. 466 */ 467 down_write(&prepare_for_test_rwsem); 468 469 for_each_cpu(cpu, &cpus_run_test_mask) { 470 struct test_driver *t = &per_cpu_test_driver[cpu]; 471 472 t->cpu = cpu; 473 t->task = kthread_run(test_func, t, "vmalloc_test/%d", cpu); 474 475 if (!IS_ERR(t->task)) 476 /* Success. */ 477 atomic_inc(&test_n_undone); 478 else 479 pr_err("Failed to start kthread for %d CPU\n", cpu); 480 } 481 482 /* 483 * Now let the workers do their job. 484 */ 485 up_write(&prepare_for_test_rwsem); 486 487 /* 488 * Sleep quiet until all workers are done with 1 second 489 * interval. Since the test can take a lot of time we 490 * can run into a stack trace of the hung task. That is 491 * why we go with completion_timeout and HZ value. 492 */ 493 do { 494 ret = wait_for_completion_timeout(&test_all_done_comp, HZ); 495 } while (!ret); 496 497 for_each_cpu(cpu, &cpus_run_test_mask) { 498 struct test_driver *t = &per_cpu_test_driver[cpu]; 499 int i; 500 501 if (!IS_ERR(t->task)) 502 kthread_stop(t->task); 503 504 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 505 if (!((run_test_mask & (1 << i)) >> i)) 506 continue; 507 508 pr_info( 509 "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n", 510 test_case_array[i].test_name, 511 per_cpu_test_data[cpu][i].test_passed, 512 per_cpu_test_data[cpu][i].test_failed, 513 test_repeat_count, test_loop_count, 514 per_cpu_test_data[cpu][i].time); 515 } 516 517 pr_info("All test took CPU%d=%lu cycles\n", 518 cpu, t->stop - t->start); 519 } 520 } 521 522 static int vmalloc_test_init(void) 523 { 524 do_concurrent_test(); 525 return -EAGAIN; /* Fail will directly unload the module */ 526 } 527 528 static void vmalloc_test_exit(void) 529 { 530 } 531 532 module_init(vmalloc_test_init) 533 module_exit(vmalloc_test_exit) 534 535 MODULE_LICENSE("GPL"); 536 MODULE_AUTHOR("Uladzislau Rezki"); 537 MODULE_DESCRIPTION("vmalloc test module"); 538