1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <unistd.h> 6 #include <stdio.h> 7 #include <signal.h> 8 #include <sys/sysinfo.h> 9 #include <string.h> 10 #include <sys/wait.h> 11 #include <sys/mman.h> 12 13 #include "../kselftest.h" 14 #include "cgroup_util.h" 15 16 static int read_int(const char *path, size_t *value) 17 { 18 FILE *file; 19 int ret = 0; 20 21 file = fopen(path, "r"); 22 if (!file) 23 return -1; 24 if (fscanf(file, "%ld", value) != 1) 25 ret = -1; 26 fclose(file); 27 return ret; 28 } 29 30 static int set_min_free_kb(size_t value) 31 { 32 FILE *file; 33 int ret; 34 35 file = fopen("/proc/sys/vm/min_free_kbytes", "w"); 36 if (!file) 37 return -1; 38 ret = fprintf(file, "%ld\n", value); 39 fclose(file); 40 return ret; 41 } 42 43 static int read_min_free_kb(size_t *value) 44 { 45 return read_int("/proc/sys/vm/min_free_kbytes", value); 46 } 47 48 static int get_zswap_stored_pages(size_t *value) 49 { 50 return read_int("/sys/kernel/debug/zswap/stored_pages", value); 51 } 52 53 static int get_zswap_written_back_pages(size_t *value) 54 { 55 return read_int("/sys/kernel/debug/zswap/written_back_pages", value); 56 } 57 58 static int allocate_bytes(const char *cgroup, void *arg) 59 { 60 size_t size = (size_t)arg; 61 char *mem = (char *)malloc(size); 62 63 if (!mem) 64 return -1; 65 for (int i = 0; i < size; i += 4095) 66 mem[i] = 'a'; 67 free(mem); 68 return 0; 69 } 70 71 /* 72 * When trying to store a memcg page in zswap, if the memcg hits its memory 73 * limit in zswap, writeback should not be triggered. 74 * 75 * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may 76 * not zswap"). Needs to be revised when a per memcg writeback mechanism is 77 * implemented. 78 */ 79 static int test_no_invasive_cgroup_shrink(const char *root) 80 { 81 size_t written_back_before, written_back_after; 82 int ret = KSFT_FAIL; 83 char *test_group; 84 85 /* Set up */ 86 test_group = cg_name(root, "no_shrink_test"); 87 if (!test_group) 88 goto out; 89 if (cg_create(test_group)) 90 goto out; 91 if (cg_write(test_group, "memory.max", "1M")) 92 goto out; 93 if (cg_write(test_group, "memory.zswap.max", "10K")) 94 goto out; 95 if (get_zswap_written_back_pages(&written_back_before)) 96 goto out; 97 98 /* Allocate 10x memory.max to push memory into zswap */ 99 if (cg_run(test_group, allocate_bytes, (void *)MB(10))) 100 goto out; 101 102 /* Verify that no writeback happened because of the memcg allocation */ 103 if (get_zswap_written_back_pages(&written_back_after)) 104 goto out; 105 if (written_back_after == written_back_before) 106 ret = KSFT_PASS; 107 out: 108 cg_destroy(test_group); 109 free(test_group); 110 return ret; 111 } 112 113 struct no_kmem_bypass_child_args { 114 size_t target_alloc_bytes; 115 size_t child_allocated; 116 }; 117 118 static int no_kmem_bypass_child(const char *cgroup, void *arg) 119 { 120 struct no_kmem_bypass_child_args *values = arg; 121 void *allocation; 122 123 allocation = malloc(values->target_alloc_bytes); 124 if (!allocation) { 125 values->child_allocated = true; 126 return -1; 127 } 128 for (long i = 0; i < values->target_alloc_bytes; i += 4095) 129 ((char *)allocation)[i] = 'a'; 130 values->child_allocated = true; 131 pause(); 132 free(allocation); 133 return 0; 134 } 135 136 /* 137 * When pages owned by a memcg are pushed to zswap by kswapd, they should be 138 * charged to that cgroup. This wasn't the case before commit 139 * cd08d80ecdac("mm: correctly charge compressed memory to its memcg"). 140 * 141 * The test first allocates memory in a memcg, then raises min_free_kbytes to 142 * a very high value so that the allocation falls below low wm, then makes 143 * another allocation to trigger kswapd that should push the memcg-owned pages 144 * to zswap and verifies that the zswap pages are correctly charged. 145 * 146 * To be run on a VM with at most 4G of memory. 147 */ 148 static int test_no_kmem_bypass(const char *root) 149 { 150 size_t min_free_kb_high, min_free_kb_low, min_free_kb_original; 151 struct no_kmem_bypass_child_args *values; 152 size_t trigger_allocation_size; 153 int wait_child_iteration = 0; 154 long stored_pages_threshold; 155 struct sysinfo sys_info; 156 int ret = KSFT_FAIL; 157 int child_status; 158 char *test_group; 159 pid_t child_pid; 160 161 /* Read sys info and compute test values accordingly */ 162 if (sysinfo(&sys_info) != 0) 163 return KSFT_FAIL; 164 if (sys_info.totalram > 5000000000) 165 return KSFT_SKIP; 166 values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ | 167 PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 168 if (values == MAP_FAILED) 169 return KSFT_FAIL; 170 if (read_min_free_kb(&min_free_kb_original)) 171 return KSFT_FAIL; 172 min_free_kb_high = sys_info.totalram / 2000; 173 min_free_kb_low = sys_info.totalram / 500000; 174 values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) + 175 sys_info.totalram * 5 / 100; 176 stored_pages_threshold = sys_info.totalram / 5 / 4096; 177 trigger_allocation_size = sys_info.totalram / 20; 178 179 /* Set up test memcg */ 180 if (cg_write(root, "cgroup.subtree_control", "+memory")) 181 goto out; 182 test_group = cg_name(root, "kmem_bypass_test"); 183 if (!test_group) 184 goto out; 185 186 /* Spawn memcg child and wait for it to allocate */ 187 set_min_free_kb(min_free_kb_low); 188 if (cg_create(test_group)) 189 goto out; 190 values->child_allocated = false; 191 child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values); 192 if (child_pid < 0) 193 goto out; 194 while (!values->child_allocated && wait_child_iteration++ < 10000) 195 usleep(1000); 196 197 /* Try to wakeup kswapd and let it push child memory to zswap */ 198 set_min_free_kb(min_free_kb_high); 199 for (int i = 0; i < 20; i++) { 200 size_t stored_pages; 201 char *trigger_allocation = malloc(trigger_allocation_size); 202 203 if (!trigger_allocation) 204 break; 205 for (int i = 0; i < trigger_allocation_size; i += 4095) 206 trigger_allocation[i] = 'b'; 207 usleep(100000); 208 free(trigger_allocation); 209 if (get_zswap_stored_pages(&stored_pages)) 210 break; 211 if (stored_pages < 0) 212 break; 213 /* If memory was pushed to zswap, verify it belongs to memcg */ 214 if (stored_pages > stored_pages_threshold) { 215 int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped "); 216 int delta = stored_pages * 4096 - zswapped; 217 int result_ok = delta < stored_pages * 4096 / 4; 218 219 ret = result_ok ? KSFT_PASS : KSFT_FAIL; 220 break; 221 } 222 } 223 224 kill(child_pid, SIGTERM); 225 waitpid(child_pid, &child_status, 0); 226 out: 227 set_min_free_kb(min_free_kb_original); 228 cg_destroy(test_group); 229 free(test_group); 230 return ret; 231 } 232 233 #define T(x) { x, #x } 234 struct zswap_test { 235 int (*fn)(const char *root); 236 const char *name; 237 } tests[] = { 238 T(test_no_kmem_bypass), 239 T(test_no_invasive_cgroup_shrink), 240 }; 241 #undef T 242 243 static bool zswap_configured(void) 244 { 245 return access("/sys/module/zswap", F_OK) == 0; 246 } 247 248 int main(int argc, char **argv) 249 { 250 char root[PATH_MAX]; 251 int i, ret = EXIT_SUCCESS; 252 253 if (cg_find_unified_root(root, sizeof(root), NULL)) 254 ksft_exit_skip("cgroup v2 isn't mounted\n"); 255 256 if (!zswap_configured()) 257 ksft_exit_skip("zswap isn't configured\n"); 258 259 /* 260 * Check that memory controller is available: 261 * memory is listed in cgroup.controllers 262 */ 263 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 264 ksft_exit_skip("memory controller isn't available\n"); 265 266 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 267 if (cg_write(root, "cgroup.subtree_control", "+memory")) 268 ksft_exit_skip("Failed to set memory controller\n"); 269 270 for (i = 0; i < ARRAY_SIZE(tests); i++) { 271 switch (tests[i].fn(root)) { 272 case KSFT_PASS: 273 ksft_test_result_pass("%s\n", tests[i].name); 274 break; 275 case KSFT_SKIP: 276 ksft_test_result_skip("%s\n", tests[i].name); 277 break; 278 default: 279 ret = EXIT_FAILURE; 280 ksft_test_result_fail("%s\n", tests[i].name); 281 break; 282 } 283 } 284 285 return ret; 286 } 287