1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3
4 #include <linux/limits.h>
5 #include <unistd.h>
6 #include <stdio.h>
7 #include <signal.h>
8 #include <sys/sysinfo.h>
9 #include <string.h>
10 #include <sys/wait.h>
11 #include <sys/mman.h>
12
13 #include "../kselftest.h"
14 #include "cgroup_util.h"
15
read_int(const char * path,size_t * value)16 static int read_int(const char *path, size_t *value)
17 {
18 FILE *file;
19 int ret = 0;
20
21 file = fopen(path, "r");
22 if (!file)
23 return -1;
24 if (fscanf(file, "%ld", value) != 1)
25 ret = -1;
26 fclose(file);
27 return ret;
28 }
29
set_min_free_kb(size_t value)30 static int set_min_free_kb(size_t value)
31 {
32 FILE *file;
33 int ret;
34
35 file = fopen("/proc/sys/vm/min_free_kbytes", "w");
36 if (!file)
37 return -1;
38 ret = fprintf(file, "%ld\n", value);
39 fclose(file);
40 return ret;
41 }
42
read_min_free_kb(size_t * value)43 static int read_min_free_kb(size_t *value)
44 {
45 return read_int("/proc/sys/vm/min_free_kbytes", value);
46 }
47
get_zswap_stored_pages(size_t * value)48 static int get_zswap_stored_pages(size_t *value)
49 {
50 return read_int("/sys/kernel/debug/zswap/stored_pages", value);
51 }
52
get_zswap_written_back_pages(size_t * value)53 static int get_zswap_written_back_pages(size_t *value)
54 {
55 return read_int("/sys/kernel/debug/zswap/written_back_pages", value);
56 }
57
allocate_bytes(const char * cgroup,void * arg)58 static int allocate_bytes(const char *cgroup, void *arg)
59 {
60 size_t size = (size_t)arg;
61 char *mem = (char *)malloc(size);
62
63 if (!mem)
64 return -1;
65 for (int i = 0; i < size; i += 4095)
66 mem[i] = 'a';
67 free(mem);
68 return 0;
69 }
70
71 /*
72 * When trying to store a memcg page in zswap, if the memcg hits its memory
73 * limit in zswap, writeback should not be triggered.
74 *
75 * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may
76 * not zswap"). Needs to be revised when a per memcg writeback mechanism is
77 * implemented.
78 */
test_no_invasive_cgroup_shrink(const char * root)79 static int test_no_invasive_cgroup_shrink(const char *root)
80 {
81 size_t written_back_before, written_back_after;
82 int ret = KSFT_FAIL;
83 char *test_group;
84
85 /* Set up */
86 test_group = cg_name(root, "no_shrink_test");
87 if (!test_group)
88 goto out;
89 if (cg_create(test_group))
90 goto out;
91 if (cg_write(test_group, "memory.max", "1M"))
92 goto out;
93 if (cg_write(test_group, "memory.zswap.max", "10K"))
94 goto out;
95 if (get_zswap_written_back_pages(&written_back_before))
96 goto out;
97
98 /* Allocate 10x memory.max to push memory into zswap */
99 if (cg_run(test_group, allocate_bytes, (void *)MB(10)))
100 goto out;
101
102 /* Verify that no writeback happened because of the memcg allocation */
103 if (get_zswap_written_back_pages(&written_back_after))
104 goto out;
105 if (written_back_after == written_back_before)
106 ret = KSFT_PASS;
107 out:
108 cg_destroy(test_group);
109 free(test_group);
110 return ret;
111 }
112
113 struct no_kmem_bypass_child_args {
114 size_t target_alloc_bytes;
115 size_t child_allocated;
116 };
117
no_kmem_bypass_child(const char * cgroup,void * arg)118 static int no_kmem_bypass_child(const char *cgroup, void *arg)
119 {
120 struct no_kmem_bypass_child_args *values = arg;
121 void *allocation;
122
123 allocation = malloc(values->target_alloc_bytes);
124 if (!allocation) {
125 values->child_allocated = true;
126 return -1;
127 }
128 for (long i = 0; i < values->target_alloc_bytes; i += 4095)
129 ((char *)allocation)[i] = 'a';
130 values->child_allocated = true;
131 pause();
132 free(allocation);
133 return 0;
134 }
135
136 /*
137 * When pages owned by a memcg are pushed to zswap by kswapd, they should be
138 * charged to that cgroup. This wasn't the case before commit
139 * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
140 *
141 * The test first allocates memory in a memcg, then raises min_free_kbytes to
142 * a very high value so that the allocation falls below low wm, then makes
143 * another allocation to trigger kswapd that should push the memcg-owned pages
144 * to zswap and verifies that the zswap pages are correctly charged.
145 *
146 * To be run on a VM with at most 4G of memory.
147 */
test_no_kmem_bypass(const char * root)148 static int test_no_kmem_bypass(const char *root)
149 {
150 size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
151 struct no_kmem_bypass_child_args *values;
152 size_t trigger_allocation_size;
153 int wait_child_iteration = 0;
154 long stored_pages_threshold;
155 struct sysinfo sys_info;
156 int ret = KSFT_FAIL;
157 int child_status;
158 char *test_group;
159 pid_t child_pid;
160
161 /* Read sys info and compute test values accordingly */
162 if (sysinfo(&sys_info) != 0)
163 return KSFT_FAIL;
164 if (sys_info.totalram > 5000000000)
165 return KSFT_SKIP;
166 values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
167 PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
168 if (values == MAP_FAILED)
169 return KSFT_FAIL;
170 if (read_min_free_kb(&min_free_kb_original))
171 return KSFT_FAIL;
172 min_free_kb_high = sys_info.totalram / 2000;
173 min_free_kb_low = sys_info.totalram / 500000;
174 values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
175 sys_info.totalram * 5 / 100;
176 stored_pages_threshold = sys_info.totalram / 5 / 4096;
177 trigger_allocation_size = sys_info.totalram / 20;
178
179 /* Set up test memcg */
180 if (cg_write(root, "cgroup.subtree_control", "+memory"))
181 goto out;
182 test_group = cg_name(root, "kmem_bypass_test");
183 if (!test_group)
184 goto out;
185
186 /* Spawn memcg child and wait for it to allocate */
187 set_min_free_kb(min_free_kb_low);
188 if (cg_create(test_group))
189 goto out;
190 values->child_allocated = false;
191 child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
192 if (child_pid < 0)
193 goto out;
194 while (!values->child_allocated && wait_child_iteration++ < 10000)
195 usleep(1000);
196
197 /* Try to wakeup kswapd and let it push child memory to zswap */
198 set_min_free_kb(min_free_kb_high);
199 for (int i = 0; i < 20; i++) {
200 size_t stored_pages;
201 char *trigger_allocation = malloc(trigger_allocation_size);
202
203 if (!trigger_allocation)
204 break;
205 for (int i = 0; i < trigger_allocation_size; i += 4095)
206 trigger_allocation[i] = 'b';
207 usleep(100000);
208 free(trigger_allocation);
209 if (get_zswap_stored_pages(&stored_pages))
210 break;
211 if (stored_pages < 0)
212 break;
213 /* If memory was pushed to zswap, verify it belongs to memcg */
214 if (stored_pages > stored_pages_threshold) {
215 int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
216 int delta = stored_pages * 4096 - zswapped;
217 int result_ok = delta < stored_pages * 4096 / 4;
218
219 ret = result_ok ? KSFT_PASS : KSFT_FAIL;
220 break;
221 }
222 }
223
224 kill(child_pid, SIGTERM);
225 waitpid(child_pid, &child_status, 0);
226 out:
227 set_min_free_kb(min_free_kb_original);
228 cg_destroy(test_group);
229 free(test_group);
230 return ret;
231 }
232
233 #define T(x) { x, #x }
234 struct zswap_test {
235 int (*fn)(const char *root);
236 const char *name;
237 } tests[] = {
238 T(test_no_kmem_bypass),
239 T(test_no_invasive_cgroup_shrink),
240 };
241 #undef T
242
zswap_configured(void)243 static bool zswap_configured(void)
244 {
245 return access("/sys/module/zswap", F_OK) == 0;
246 }
247
main(int argc,char ** argv)248 int main(int argc, char **argv)
249 {
250 char root[PATH_MAX];
251 int i, ret = EXIT_SUCCESS;
252
253 if (cg_find_unified_root(root, sizeof(root), NULL))
254 ksft_exit_skip("cgroup v2 isn't mounted\n");
255
256 if (!zswap_configured())
257 ksft_exit_skip("zswap isn't configured\n");
258
259 /*
260 * Check that memory controller is available:
261 * memory is listed in cgroup.controllers
262 */
263 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
264 ksft_exit_skip("memory controller isn't available\n");
265
266 if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
267 if (cg_write(root, "cgroup.subtree_control", "+memory"))
268 ksft_exit_skip("Failed to set memory controller\n");
269
270 for (i = 0; i < ARRAY_SIZE(tests); i++) {
271 switch (tests[i].fn(root)) {
272 case KSFT_PASS:
273 ksft_test_result_pass("%s\n", tests[i].name);
274 break;
275 case KSFT_SKIP:
276 ksft_test_result_skip("%s\n", tests[i].name);
277 break;
278 default:
279 ret = EXIT_FAILURE;
280 ksft_test_result_fail("%s\n", tests[i].name);
281 break;
282 }
283 }
284
285 return ret;
286 }
287