1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <fcntl.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <string.h> 9 #include <sys/stat.h> 10 #include <sys/types.h> 11 #include <unistd.h> 12 #include <sys/wait.h> 13 #include <errno.h> 14 #include <sys/sysinfo.h> 15 #include <pthread.h> 16 17 #include "../kselftest.h" 18 #include "cgroup_util.h" 19 20 21 /* 22 * Memory cgroup charging is performed using percpu batches 64 pages 23 * big (look at MEMCG_CHARGE_BATCH), whereas memory.stat is exact. So 24 * the maximum discrepancy between charge and vmstat entries is number 25 * of cpus multiplied by 64 pages. 26 */ 27 #define MAX_VMSTAT_ERROR (4096 * 64 * get_nprocs()) 28 29 30 static int alloc_dcache(const char *cgroup, void *arg) 31 { 32 unsigned long i; 33 struct stat st; 34 char buf[128]; 35 36 for (i = 0; i < (unsigned long)arg; i++) { 37 snprintf(buf, sizeof(buf), 38 "/something-non-existent-with-a-long-name-%64lu-%d", 39 i, getpid()); 40 stat(buf, &st); 41 } 42 43 return 0; 44 } 45 46 /* 47 * This test allocates 100000 of negative dentries with long names. 48 * Then it checks that "slab" in memory.stat is larger than 1M. 49 * Then it sets memory.high to 1M and checks that at least 1/2 50 * of slab memory has been reclaimed. 51 */ 52 static int test_kmem_basic(const char *root) 53 { 54 int ret = KSFT_FAIL; 55 char *cg = NULL; 56 long slab0, slab1, current; 57 58 cg = cg_name(root, "kmem_basic_test"); 59 if (!cg) 60 goto cleanup; 61 62 if (cg_create(cg)) 63 goto cleanup; 64 65 if (cg_run(cg, alloc_dcache, (void *)100000)) 66 goto cleanup; 67 68 slab0 = cg_read_key_long(cg, "memory.stat", "slab "); 69 if (slab0 < (1 << 20)) 70 goto cleanup; 71 72 cg_write(cg, "memory.high", "1M"); 73 74 /* wait for RCU freeing */ 75 sleep(1); 76 77 slab1 = cg_read_key_long(cg, "memory.stat", "slab "); 78 if (slab1 <= 0) 79 goto cleanup; 80 81 current = cg_read_long(cg, "memory.current"); 82 if (current <= 0) 83 goto cleanup; 84 85 if (slab1 < slab0 / 2 && current < slab0 / 2) 86 ret = KSFT_PASS; 87 cleanup: 88 cg_destroy(cg); 89 free(cg); 90 91 return ret; 92 } 93 94 static void *alloc_kmem_fn(void *arg) 95 { 96 alloc_dcache(NULL, (void *)100); 97 return NULL; 98 } 99 100 static int alloc_kmem_smp(const char *cgroup, void *arg) 101 { 102 int nr_threads = 2 * get_nprocs(); 103 pthread_t *tinfo; 104 unsigned long i; 105 int ret = -1; 106 107 tinfo = calloc(nr_threads, sizeof(pthread_t)); 108 if (tinfo == NULL) 109 return -1; 110 111 for (i = 0; i < nr_threads; i++) { 112 if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn, 113 (void *)i)) { 114 free(tinfo); 115 return -1; 116 } 117 } 118 119 for (i = 0; i < nr_threads; i++) { 120 ret = pthread_join(tinfo[i], NULL); 121 if (ret) 122 break; 123 } 124 125 free(tinfo); 126 return ret; 127 } 128 129 static int cg_run_in_subcgroups(const char *parent, 130 int (*fn)(const char *cgroup, void *arg), 131 void *arg, int times) 132 { 133 char *child; 134 int i; 135 136 for (i = 0; i < times; i++) { 137 child = cg_name_indexed(parent, "child", i); 138 if (!child) 139 return -1; 140 141 if (cg_create(child)) { 142 cg_destroy(child); 143 free(child); 144 return -1; 145 } 146 147 if (cg_run(child, fn, NULL)) { 148 cg_destroy(child); 149 free(child); 150 return -1; 151 } 152 153 cg_destroy(child); 154 free(child); 155 } 156 157 return 0; 158 } 159 160 /* 161 * The test creates and destroys a large number of cgroups. In each cgroup it 162 * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS 163 * threads. Then it checks the sanity of numbers on the parent level: 164 * the total size of the cgroups should be roughly equal to 165 * anon + file + slab + kernel_stack. 166 */ 167 static int test_kmem_memcg_deletion(const char *root) 168 { 169 long current, slab, anon, file, kernel_stack, pagetables, percpu, sock, sum; 170 int ret = KSFT_FAIL; 171 char *parent; 172 173 parent = cg_name(root, "kmem_memcg_deletion_test"); 174 if (!parent) 175 goto cleanup; 176 177 if (cg_create(parent)) 178 goto cleanup; 179 180 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 181 goto cleanup; 182 183 if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100)) 184 goto cleanup; 185 186 current = cg_read_long(parent, "memory.current"); 187 slab = cg_read_key_long(parent, "memory.stat", "slab "); 188 anon = cg_read_key_long(parent, "memory.stat", "anon "); 189 file = cg_read_key_long(parent, "memory.stat", "file "); 190 kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack "); 191 pagetables = cg_read_key_long(parent, "memory.stat", "pagetables "); 192 percpu = cg_read_key_long(parent, "memory.stat", "percpu "); 193 sock = cg_read_key_long(parent, "memory.stat", "sock "); 194 if (current < 0 || slab < 0 || anon < 0 || file < 0 || 195 kernel_stack < 0 || pagetables < 0 || percpu < 0 || sock < 0) 196 goto cleanup; 197 198 sum = slab + anon + file + kernel_stack + pagetables + percpu + sock; 199 if (abs(sum - current) < MAX_VMSTAT_ERROR) { 200 ret = KSFT_PASS; 201 } else { 202 printf("memory.current = %ld\n", current); 203 printf("slab + anon + file + kernel_stack = %ld\n", sum); 204 printf("slab = %ld\n", slab); 205 printf("anon = %ld\n", anon); 206 printf("file = %ld\n", file); 207 printf("kernel_stack = %ld\n", kernel_stack); 208 printf("pagetables = %ld\n", pagetables); 209 printf("percpu = %ld\n", percpu); 210 printf("sock = %ld\n", sock); 211 } 212 213 cleanup: 214 cg_destroy(parent); 215 free(parent); 216 217 return ret; 218 } 219 220 /* 221 * The test reads the entire /proc/kpagecgroup. If the operation went 222 * successfully (and the kernel didn't panic), the test is treated as passed. 223 */ 224 static int test_kmem_proc_kpagecgroup(const char *root) 225 { 226 unsigned long buf[128]; 227 int ret = KSFT_FAIL; 228 ssize_t len; 229 int fd; 230 231 fd = open("/proc/kpagecgroup", O_RDONLY); 232 if (fd < 0) 233 return ret; 234 235 do { 236 len = read(fd, buf, sizeof(buf)); 237 } while (len > 0); 238 239 if (len == 0) 240 ret = KSFT_PASS; 241 242 close(fd); 243 return ret; 244 } 245 246 static void *pthread_wait_fn(void *arg) 247 { 248 sleep(100); 249 return NULL; 250 } 251 252 static int spawn_1000_threads(const char *cgroup, void *arg) 253 { 254 int nr_threads = 1000; 255 pthread_t *tinfo; 256 unsigned long i; 257 long stack; 258 int ret = -1; 259 260 tinfo = calloc(nr_threads, sizeof(pthread_t)); 261 if (tinfo == NULL) 262 return -1; 263 264 for (i = 0; i < nr_threads; i++) { 265 if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn, 266 (void *)i)) { 267 free(tinfo); 268 return(-1); 269 } 270 } 271 272 stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack "); 273 if (stack >= 4096 * 1000) 274 ret = 0; 275 276 free(tinfo); 277 return ret; 278 } 279 280 /* 281 * The test spawns a process, which spawns 1000 threads. Then it checks 282 * that memory.stat's kernel_stack is at least 1000 pages large. 283 */ 284 static int test_kmem_kernel_stacks(const char *root) 285 { 286 int ret = KSFT_FAIL; 287 char *cg = NULL; 288 289 cg = cg_name(root, "kmem_kernel_stacks_test"); 290 if (!cg) 291 goto cleanup; 292 293 if (cg_create(cg)) 294 goto cleanup; 295 296 if (cg_run(cg, spawn_1000_threads, NULL)) 297 goto cleanup; 298 299 ret = KSFT_PASS; 300 cleanup: 301 cg_destroy(cg); 302 free(cg); 303 304 return ret; 305 } 306 307 /* 308 * This test sequentionally creates 30 child cgroups, allocates some 309 * kernel memory in each of them, and deletes them. Then it checks 310 * that the number of dying cgroups on the parent level is 0. 311 */ 312 static int test_kmem_dead_cgroups(const char *root) 313 { 314 int ret = KSFT_FAIL; 315 char *parent; 316 long dead; 317 int i; 318 319 parent = cg_name(root, "kmem_dead_cgroups_test"); 320 if (!parent) 321 goto cleanup; 322 323 if (cg_create(parent)) 324 goto cleanup; 325 326 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 327 goto cleanup; 328 329 if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30)) 330 goto cleanup; 331 332 for (i = 0; i < 5; i++) { 333 dead = cg_read_key_long(parent, "cgroup.stat", 334 "nr_dying_descendants "); 335 if (dead == 0) { 336 ret = KSFT_PASS; 337 break; 338 } 339 /* 340 * Reclaiming cgroups might take some time, 341 * let's wait a bit and repeat. 342 */ 343 sleep(1); 344 } 345 346 cleanup: 347 cg_destroy(parent); 348 free(parent); 349 350 return ret; 351 } 352 353 /* 354 * This test creates a sub-tree with 1000 memory cgroups. 355 * Then it checks that the memory.current on the parent level 356 * is greater than 0 and approximates matches the percpu value 357 * from memory.stat. 358 */ 359 static int test_percpu_basic(const char *root) 360 { 361 int ret = KSFT_FAIL; 362 char *parent, *child; 363 long current, percpu; 364 int i; 365 366 parent = cg_name(root, "percpu_basic_test"); 367 if (!parent) 368 goto cleanup; 369 370 if (cg_create(parent)) 371 goto cleanup; 372 373 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 374 goto cleanup; 375 376 for (i = 0; i < 1000; i++) { 377 child = cg_name_indexed(parent, "child", i); 378 if (!child) 379 return -1; 380 381 if (cg_create(child)) 382 goto cleanup_children; 383 384 free(child); 385 } 386 387 current = cg_read_long(parent, "memory.current"); 388 percpu = cg_read_key_long(parent, "memory.stat", "percpu "); 389 390 if (current > 0 && percpu > 0 && abs(current - percpu) < 391 MAX_VMSTAT_ERROR) 392 ret = KSFT_PASS; 393 else 394 printf("memory.current %ld\npercpu %ld\n", 395 current, percpu); 396 397 cleanup_children: 398 for (i = 0; i < 1000; i++) { 399 child = cg_name_indexed(parent, "child", i); 400 cg_destroy(child); 401 free(child); 402 } 403 404 cleanup: 405 cg_destroy(parent); 406 free(parent); 407 408 return ret; 409 } 410 411 #define T(x) { x, #x } 412 struct kmem_test { 413 int (*fn)(const char *root); 414 const char *name; 415 } tests[] = { 416 T(test_kmem_basic), 417 T(test_kmem_memcg_deletion), 418 T(test_kmem_proc_kpagecgroup), 419 T(test_kmem_kernel_stacks), 420 T(test_kmem_dead_cgroups), 421 T(test_percpu_basic), 422 }; 423 #undef T 424 425 int main(int argc, char **argv) 426 { 427 char root[PATH_MAX]; 428 int i, ret = EXIT_SUCCESS; 429 430 if (cg_find_unified_root(root, sizeof(root))) 431 ksft_exit_skip("cgroup v2 isn't mounted\n"); 432 433 /* 434 * Check that memory controller is available: 435 * memory is listed in cgroup.controllers 436 */ 437 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 438 ksft_exit_skip("memory controller isn't available\n"); 439 440 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 441 if (cg_write(root, "cgroup.subtree_control", "+memory")) 442 ksft_exit_skip("Failed to set memory controller\n"); 443 444 for (i = 0; i < ARRAY_SIZE(tests); i++) { 445 switch (tests[i].fn(root)) { 446 case KSFT_PASS: 447 ksft_test_result_pass("%s\n", tests[i].name); 448 break; 449 case KSFT_SKIP: 450 ksft_test_result_skip("%s\n", tests[i].name); 451 break; 452 default: 453 ret = EXIT_FAILURE; 454 ksft_test_result_fail("%s\n", tests[i].name); 455 break; 456 } 457 } 458 459 return ret; 460 } 461