1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <fcntl.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <string.h> 9 #include <sys/stat.h> 10 #include <sys/types.h> 11 #include <unistd.h> 12 #include <sys/wait.h> 13 #include <errno.h> 14 #include <sys/sysinfo.h> 15 #include <pthread.h> 16 17 #include "../kselftest.h" 18 #include "cgroup_util.h" 19 20 21 /* 22 * Memory cgroup charging and vmstat data aggregation is performed using 23 * percpu batches 32 pages big (look at MEMCG_CHARGE_BATCH). So the maximum 24 * discrepancy between charge and vmstat entries is number of cpus multiplied 25 * by 32 pages multiplied by 2. 26 */ 27 #define MAX_VMSTAT_ERROR (4096 * 32 * 2 * get_nprocs()) 28 29 30 static int alloc_dcache(const char *cgroup, void *arg) 31 { 32 unsigned long i; 33 struct stat st; 34 char buf[128]; 35 36 for (i = 0; i < (unsigned long)arg; i++) { 37 snprintf(buf, sizeof(buf), 38 "/something-non-existent-with-a-long-name-%64lu-%d", 39 i, getpid()); 40 stat(buf, &st); 41 } 42 43 return 0; 44 } 45 46 /* 47 * This test allocates 100000 of negative dentries with long names. 48 * Then it checks that "slab" in memory.stat is larger than 1M. 49 * Then it sets memory.high to 1M and checks that at least 1/2 50 * of slab memory has been reclaimed. 51 */ 52 static int test_kmem_basic(const char *root) 53 { 54 int ret = KSFT_FAIL; 55 char *cg = NULL; 56 long slab0, slab1, current; 57 58 cg = cg_name(root, "kmem_basic_test"); 59 if (!cg) 60 goto cleanup; 61 62 if (cg_create(cg)) 63 goto cleanup; 64 65 if (cg_run(cg, alloc_dcache, (void *)100000)) 66 goto cleanup; 67 68 slab0 = cg_read_key_long(cg, "memory.stat", "slab "); 69 if (slab0 < (1 << 20)) 70 goto cleanup; 71 72 cg_write(cg, "memory.high", "1M"); 73 slab1 = cg_read_key_long(cg, "memory.stat", "slab "); 74 if (slab1 <= 0) 75 goto cleanup; 76 77 current = cg_read_long(cg, "memory.current"); 78 if (current <= 0) 79 goto cleanup; 80 81 if (slab1 < slab0 / 2 && current < slab0 / 2) 82 ret = KSFT_PASS; 83 cleanup: 84 cg_destroy(cg); 85 free(cg); 86 87 return ret; 88 } 89 90 static void *alloc_kmem_fn(void *arg) 91 { 92 alloc_dcache(NULL, (void *)100); 93 return NULL; 94 } 95 96 static int alloc_kmem_smp(const char *cgroup, void *arg) 97 { 98 int nr_threads = 2 * get_nprocs(); 99 pthread_t *tinfo; 100 unsigned long i; 101 int ret = -1; 102 103 tinfo = calloc(nr_threads, sizeof(pthread_t)); 104 if (tinfo == NULL) 105 return -1; 106 107 for (i = 0; i < nr_threads; i++) { 108 if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn, 109 (void *)i)) { 110 free(tinfo); 111 return -1; 112 } 113 } 114 115 for (i = 0; i < nr_threads; i++) { 116 ret = pthread_join(tinfo[i], NULL); 117 if (ret) 118 break; 119 } 120 121 free(tinfo); 122 return ret; 123 } 124 125 static int cg_run_in_subcgroups(const char *parent, 126 int (*fn)(const char *cgroup, void *arg), 127 void *arg, int times) 128 { 129 char *child; 130 int i; 131 132 for (i = 0; i < times; i++) { 133 child = cg_name_indexed(parent, "child", i); 134 if (!child) 135 return -1; 136 137 if (cg_create(child)) { 138 cg_destroy(child); 139 free(child); 140 return -1; 141 } 142 143 if (cg_run(child, fn, NULL)) { 144 cg_destroy(child); 145 free(child); 146 return -1; 147 } 148 149 cg_destroy(child); 150 free(child); 151 } 152 153 return 0; 154 } 155 156 /* 157 * The test creates and destroys a large number of cgroups. In each cgroup it 158 * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS 159 * threads. Then it checks the sanity of numbers on the parent level: 160 * the total size of the cgroups should be roughly equal to 161 * anon + file + slab + kernel_stack. 162 */ 163 static int test_kmem_memcg_deletion(const char *root) 164 { 165 long current, slab, anon, file, kernel_stack, sum; 166 int ret = KSFT_FAIL; 167 char *parent; 168 169 parent = cg_name(root, "kmem_memcg_deletion_test"); 170 if (!parent) 171 goto cleanup; 172 173 if (cg_create(parent)) 174 goto cleanup; 175 176 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 177 goto cleanup; 178 179 if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100)) 180 goto cleanup; 181 182 current = cg_read_long(parent, "memory.current"); 183 slab = cg_read_key_long(parent, "memory.stat", "slab "); 184 anon = cg_read_key_long(parent, "memory.stat", "anon "); 185 file = cg_read_key_long(parent, "memory.stat", "file "); 186 kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack "); 187 if (current < 0 || slab < 0 || anon < 0 || file < 0 || 188 kernel_stack < 0) 189 goto cleanup; 190 191 sum = slab + anon + file + kernel_stack; 192 if (abs(sum - current) < MAX_VMSTAT_ERROR) { 193 ret = KSFT_PASS; 194 } else { 195 printf("memory.current = %ld\n", current); 196 printf("slab + anon + file + kernel_stack = %ld\n", sum); 197 printf("slab = %ld\n", slab); 198 printf("anon = %ld\n", anon); 199 printf("file = %ld\n", file); 200 printf("kernel_stack = %ld\n", kernel_stack); 201 } 202 203 cleanup: 204 cg_destroy(parent); 205 free(parent); 206 207 return ret; 208 } 209 210 /* 211 * The test reads the entire /proc/kpagecgroup. If the operation went 212 * successfully (and the kernel didn't panic), the test is treated as passed. 213 */ 214 static int test_kmem_proc_kpagecgroup(const char *root) 215 { 216 unsigned long buf[128]; 217 int ret = KSFT_FAIL; 218 ssize_t len; 219 int fd; 220 221 fd = open("/proc/kpagecgroup", O_RDONLY); 222 if (fd < 0) 223 return ret; 224 225 do { 226 len = read(fd, buf, sizeof(buf)); 227 } while (len > 0); 228 229 if (len == 0) 230 ret = KSFT_PASS; 231 232 close(fd); 233 return ret; 234 } 235 236 static void *pthread_wait_fn(void *arg) 237 { 238 sleep(100); 239 return NULL; 240 } 241 242 static int spawn_1000_threads(const char *cgroup, void *arg) 243 { 244 int nr_threads = 1000; 245 pthread_t *tinfo; 246 unsigned long i; 247 long stack; 248 int ret = -1; 249 250 tinfo = calloc(nr_threads, sizeof(pthread_t)); 251 if (tinfo == NULL) 252 return -1; 253 254 for (i = 0; i < nr_threads; i++) { 255 if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn, 256 (void *)i)) { 257 free(tinfo); 258 return(-1); 259 } 260 } 261 262 stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack "); 263 if (stack >= 4096 * 1000) 264 ret = 0; 265 266 free(tinfo); 267 return ret; 268 } 269 270 /* 271 * The test spawns a process, which spawns 1000 threads. Then it checks 272 * that memory.stat's kernel_stack is at least 1000 pages large. 273 */ 274 static int test_kmem_kernel_stacks(const char *root) 275 { 276 int ret = KSFT_FAIL; 277 char *cg = NULL; 278 279 cg = cg_name(root, "kmem_kernel_stacks_test"); 280 if (!cg) 281 goto cleanup; 282 283 if (cg_create(cg)) 284 goto cleanup; 285 286 if (cg_run(cg, spawn_1000_threads, NULL)) 287 goto cleanup; 288 289 ret = KSFT_PASS; 290 cleanup: 291 cg_destroy(cg); 292 free(cg); 293 294 return ret; 295 } 296 297 /* 298 * This test sequentionally creates 30 child cgroups, allocates some 299 * kernel memory in each of them, and deletes them. Then it checks 300 * that the number of dying cgroups on the parent level is 0. 301 */ 302 static int test_kmem_dead_cgroups(const char *root) 303 { 304 int ret = KSFT_FAIL; 305 char *parent; 306 long dead; 307 int i; 308 309 parent = cg_name(root, "kmem_dead_cgroups_test"); 310 if (!parent) 311 goto cleanup; 312 313 if (cg_create(parent)) 314 goto cleanup; 315 316 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 317 goto cleanup; 318 319 if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30)) 320 goto cleanup; 321 322 for (i = 0; i < 5; i++) { 323 dead = cg_read_key_long(parent, "cgroup.stat", 324 "nr_dying_descendants "); 325 if (dead == 0) { 326 ret = KSFT_PASS; 327 break; 328 } 329 /* 330 * Reclaiming cgroups might take some time, 331 * let's wait a bit and repeat. 332 */ 333 sleep(1); 334 } 335 336 cleanup: 337 cg_destroy(parent); 338 free(parent); 339 340 return ret; 341 } 342 343 /* 344 * This test creates a sub-tree with 1000 memory cgroups. 345 * Then it checks that the memory.current on the parent level 346 * is greater than 0 and approximates matches the percpu value 347 * from memory.stat. 348 */ 349 static int test_percpu_basic(const char *root) 350 { 351 int ret = KSFT_FAIL; 352 char *parent, *child; 353 long current, percpu; 354 int i; 355 356 parent = cg_name(root, "percpu_basic_test"); 357 if (!parent) 358 goto cleanup; 359 360 if (cg_create(parent)) 361 goto cleanup; 362 363 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 364 goto cleanup; 365 366 for (i = 0; i < 1000; i++) { 367 child = cg_name_indexed(parent, "child", i); 368 if (!child) 369 return -1; 370 371 if (cg_create(child)) 372 goto cleanup_children; 373 374 free(child); 375 } 376 377 current = cg_read_long(parent, "memory.current"); 378 percpu = cg_read_key_long(parent, "memory.stat", "percpu "); 379 380 if (current > 0 && percpu > 0 && abs(current - percpu) < 381 MAX_VMSTAT_ERROR) 382 ret = KSFT_PASS; 383 else 384 printf("memory.current %ld\npercpu %ld\n", 385 current, percpu); 386 387 cleanup_children: 388 for (i = 0; i < 1000; i++) { 389 child = cg_name_indexed(parent, "child", i); 390 cg_destroy(child); 391 free(child); 392 } 393 394 cleanup: 395 cg_destroy(parent); 396 free(parent); 397 398 return ret; 399 } 400 401 #define T(x) { x, #x } 402 struct kmem_test { 403 int (*fn)(const char *root); 404 const char *name; 405 } tests[] = { 406 T(test_kmem_basic), 407 T(test_kmem_memcg_deletion), 408 T(test_kmem_proc_kpagecgroup), 409 T(test_kmem_kernel_stacks), 410 T(test_kmem_dead_cgroups), 411 T(test_percpu_basic), 412 }; 413 #undef T 414 415 int main(int argc, char **argv) 416 { 417 char root[PATH_MAX]; 418 int i, ret = EXIT_SUCCESS; 419 420 if (cg_find_unified_root(root, sizeof(root))) 421 ksft_exit_skip("cgroup v2 isn't mounted\n"); 422 423 /* 424 * Check that memory controller is available: 425 * memory is listed in cgroup.controllers 426 */ 427 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 428 ksft_exit_skip("memory controller isn't available\n"); 429 430 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 431 if (cg_write(root, "cgroup.subtree_control", "+memory")) 432 ksft_exit_skip("Failed to set memory controller\n"); 433 434 for (i = 0; i < ARRAY_SIZE(tests); i++) { 435 switch (tests[i].fn(root)) { 436 case KSFT_PASS: 437 ksft_test_result_pass("%s\n", tests[i].name); 438 break; 439 case KSFT_SKIP: 440 ksft_test_result_skip("%s\n", tests[i].name); 441 break; 442 default: 443 ret = EXIT_FAILURE; 444 ksft_test_result_fail("%s\n", tests[i].name); 445 break; 446 } 447 } 448 449 return ret; 450 } 451