1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright 2020 Google LLC
4  */
5 #define _GNU_SOURCE
6 
7 #include <errno.h>
8 #include <stdlib.h>
9 #include <stdio.h>
10 #include <string.h>
11 #include <sys/mman.h>
12 #include <time.h>
13 #include <stdbool.h>
14 
15 #include "../kselftest.h"
16 
17 #define EXPECT_SUCCESS 0
18 #define EXPECT_FAILURE 1
19 #define NON_OVERLAPPING 0
20 #define OVERLAPPING 1
21 #define NS_PER_SEC 1000000000ULL
22 #define VALIDATION_DEFAULT_THRESHOLD 4	/* 4MB */
23 #define VALIDATION_NO_THRESHOLD 0	/* Verify the entire region */
24 
25 #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
26 
27 struct config {
28 	unsigned long long src_alignment;
29 	unsigned long long dest_alignment;
30 	unsigned long long region_size;
31 	int overlapping;
32 };
33 
34 struct test {
35 	const char *name;
36 	struct config config;
37 	int expect_failure;
38 };
39 
40 enum {
41 	_1KB = 1ULL << 10,	/* 1KB -> not page aligned */
42 	_4KB = 4ULL << 10,
43 	_8KB = 8ULL << 10,
44 	_1MB = 1ULL << 20,
45 	_2MB = 2ULL << 20,
46 	_4MB = 4ULL << 20,
47 	_1GB = 1ULL << 30,
48 	_2GB = 2ULL << 30,
49 	PMD = _2MB,
50 	PUD = _1GB,
51 };
52 
53 #define PTE page_size
54 
55 #define MAKE_TEST(source_align, destination_align, size,	\
56 		  overlaps, should_fail, test_name)		\
57 (struct test){							\
58 	.name = test_name,					\
59 	.config = {						\
60 		.src_alignment = source_align,			\
61 		.dest_alignment = destination_align,		\
62 		.region_size = size,				\
63 		.overlapping = overlaps,			\
64 	},							\
65 	.expect_failure = should_fail				\
66 }
67 
68 /*
69  * Returns false if the requested remap region overlaps with an
70  * existing mapping (e.g text, stack) else returns true.
71  */
is_remap_region_valid(void * addr,unsigned long long size)72 static bool is_remap_region_valid(void *addr, unsigned long long size)
73 {
74 	void *remap_addr = NULL;
75 	bool ret = true;
76 
77 	/* Use MAP_FIXED_NOREPLACE flag to ensure region is not mapped */
78 	remap_addr = mmap(addr, size, PROT_READ | PROT_WRITE,
79 					 MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
80 					 -1, 0);
81 
82 	if (remap_addr == MAP_FAILED) {
83 		if (errno == EEXIST)
84 			ret = false;
85 	} else {
86 		munmap(remap_addr, size);
87 	}
88 
89 	return ret;
90 }
91 
92 /* Returns mmap_min_addr sysctl tunable from procfs */
get_mmap_min_addr(void)93 static unsigned long long get_mmap_min_addr(void)
94 {
95 	FILE *fp;
96 	int n_matched;
97 	static unsigned long long addr;
98 
99 	if (addr)
100 		return addr;
101 
102 	fp = fopen("/proc/sys/vm/mmap_min_addr", "r");
103 	if (fp == NULL) {
104 		ksft_print_msg("Failed to open /proc/sys/vm/mmap_min_addr: %s\n",
105 			strerror(errno));
106 		exit(KSFT_SKIP);
107 	}
108 
109 	n_matched = fscanf(fp, "%llu", &addr);
110 	if (n_matched != 1) {
111 		ksft_print_msg("Failed to read /proc/sys/vm/mmap_min_addr: %s\n",
112 			strerror(errno));
113 		fclose(fp);
114 		exit(KSFT_SKIP);
115 	}
116 
117 	fclose(fp);
118 	return addr;
119 }
120 
121 /*
122  * Using /proc/self/maps, assert that the specified address range is contained
123  * within a single mapping.
124  */
is_range_mapped(FILE * maps_fp,void * start,void * end)125 static bool is_range_mapped(FILE *maps_fp, void *start, void *end)
126 {
127 	char *line = NULL;
128 	size_t len = 0;
129 	bool success = false;
130 
131 	rewind(maps_fp);
132 
133 	while (getline(&line, &len, maps_fp) != -1) {
134 		char *first = strtok(line, "- ");
135 		void *first_val = (void *)strtol(first, NULL, 16);
136 		char *second = strtok(NULL, "- ");
137 		void *second_val = (void *) strtol(second, NULL, 16);
138 
139 		if (first_val <= start && second_val >= end) {
140 			success = true;
141 			break;
142 		}
143 	}
144 
145 	return success;
146 }
147 
148 /*
149  * This test validates that merge is called when expanding a mapping.
150  * Mapping containing three pages is created, middle page is unmapped
151  * and then the mapping containing the first page is expanded so that
152  * it fills the created hole. The two parts should merge creating
153  * single mapping with three pages.
154  */
mremap_expand_merge(FILE * maps_fp,unsigned long page_size)155 static void mremap_expand_merge(FILE *maps_fp, unsigned long page_size)
156 {
157 	char *test_name = "mremap expand merge";
158 	bool success = false;
159 	char *remap, *start;
160 
161 	start = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
162 		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
163 
164 	if (start == MAP_FAILED) {
165 		ksft_print_msg("mmap failed: %s\n", strerror(errno));
166 		goto out;
167 	}
168 
169 	munmap(start + page_size, page_size);
170 	remap = mremap(start, page_size, 2 * page_size, 0);
171 	if (remap == MAP_FAILED) {
172 		ksft_print_msg("mremap failed: %s\n", strerror(errno));
173 		munmap(start, page_size);
174 		munmap(start + 2 * page_size, page_size);
175 		goto out;
176 	}
177 
178 	success = is_range_mapped(maps_fp, start, start + 3 * page_size);
179 	munmap(start, 3 * page_size);
180 
181 out:
182 	if (success)
183 		ksft_test_result_pass("%s\n", test_name);
184 	else
185 		ksft_test_result_fail("%s\n", test_name);
186 }
187 
188 /*
189  * Similar to mremap_expand_merge() except instead of removing the middle page,
190  * we remove the last then attempt to remap offset from the second page. This
191  * should result in the mapping being restored to its former state.
192  */
mremap_expand_merge_offset(FILE * maps_fp,unsigned long page_size)193 static void mremap_expand_merge_offset(FILE *maps_fp, unsigned long page_size)
194 {
195 
196 	char *test_name = "mremap expand merge offset";
197 	bool success = false;
198 	char *remap, *start;
199 
200 	start = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
201 		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
202 
203 	if (start == MAP_FAILED) {
204 		ksft_print_msg("mmap failed: %s\n", strerror(errno));
205 		goto out;
206 	}
207 
208 	/* Unmap final page to ensure we have space to expand. */
209 	munmap(start + 2 * page_size, page_size);
210 	remap = mremap(start + page_size, page_size, 2 * page_size, 0);
211 	if (remap == MAP_FAILED) {
212 		ksft_print_msg("mremap failed: %s\n", strerror(errno));
213 		munmap(start, 2 * page_size);
214 		goto out;
215 	}
216 
217 	success = is_range_mapped(maps_fp, start, start + 3 * page_size);
218 	munmap(start, 3 * page_size);
219 
220 out:
221 	if (success)
222 		ksft_test_result_pass("%s\n", test_name);
223 	else
224 		ksft_test_result_fail("%s\n", test_name);
225 }
226 
227 /*
228  * Returns the start address of the mapping on success, else returns
229  * NULL on failure.
230  */
get_source_mapping(struct config c)231 static void *get_source_mapping(struct config c)
232 {
233 	unsigned long long addr = 0ULL;
234 	void *src_addr = NULL;
235 	unsigned long long mmap_min_addr;
236 
237 	mmap_min_addr = get_mmap_min_addr();
238 
239 retry:
240 	addr += c.src_alignment;
241 	if (addr < mmap_min_addr)
242 		goto retry;
243 
244 	src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
245 					MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
246 					-1, 0);
247 	if (src_addr == MAP_FAILED) {
248 		if (errno == EPERM || errno == EEXIST)
249 			goto retry;
250 		goto error;
251 	}
252 	/*
253 	 * Check that the address is aligned to the specified alignment.
254 	 * Addresses which have alignments that are multiples of that
255 	 * specified are not considered valid. For instance, 1GB address is
256 	 * 2MB-aligned, however it will not be considered valid for a
257 	 * requested alignment of 2MB. This is done to reduce coincidental
258 	 * alignment in the tests.
259 	 */
260 	if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
261 			!((unsigned long long) src_addr & c.src_alignment)) {
262 		munmap(src_addr, c.region_size);
263 		goto retry;
264 	}
265 
266 	if (!src_addr)
267 		goto error;
268 
269 	return src_addr;
270 error:
271 	ksft_print_msg("Failed to map source region: %s\n",
272 			strerror(errno));
273 	return NULL;
274 }
275 
276 /* Returns the time taken for the remap on success else returns -1. */
remap_region(struct config c,unsigned int threshold_mb,char pattern_seed)277 static long long remap_region(struct config c, unsigned int threshold_mb,
278 			      char pattern_seed)
279 {
280 	void *addr, *src_addr, *dest_addr;
281 	unsigned long long i;
282 	struct timespec t_start = {0, 0}, t_end = {0, 0};
283 	long long  start_ns, end_ns, align_mask, ret, offset;
284 	unsigned long long threshold;
285 
286 	if (threshold_mb == VALIDATION_NO_THRESHOLD)
287 		threshold = c.region_size;
288 	else
289 		threshold = MIN(threshold_mb * _1MB, c.region_size);
290 
291 	src_addr = get_source_mapping(c);
292 	if (!src_addr) {
293 		ret = -1;
294 		goto out;
295 	}
296 
297 	/* Set byte pattern */
298 	srand(pattern_seed);
299 	for (i = 0; i < threshold; i++)
300 		memset((char *) src_addr + i, (char) rand(), 1);
301 
302 	/* Mask to zero out lower bits of address for alignment */
303 	align_mask = ~(c.dest_alignment - 1);
304 	/* Offset of destination address from the end of the source region */
305 	offset = (c.overlapping) ? -c.dest_alignment : c.dest_alignment;
306 	addr = (void *) (((unsigned long long) src_addr + c.region_size
307 			  + offset) & align_mask);
308 
309 	/* See comment in get_source_mapping() */
310 	if (!((unsigned long long) addr & c.dest_alignment))
311 		addr = (void *) ((unsigned long long) addr | c.dest_alignment);
312 
313 	/* Don't destroy existing mappings unless expected to overlap */
314 	while (!is_remap_region_valid(addr, c.region_size) && !c.overlapping) {
315 		/* Check for unsigned overflow */
316 		if (addr + c.dest_alignment < addr) {
317 			ksft_print_msg("Couldn't find a valid region to remap to\n");
318 			ret = -1;
319 			goto out;
320 		}
321 		addr += c.dest_alignment;
322 	}
323 
324 	clock_gettime(CLOCK_MONOTONIC, &t_start);
325 	dest_addr = mremap(src_addr, c.region_size, c.region_size,
326 					  MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
327 	clock_gettime(CLOCK_MONOTONIC, &t_end);
328 
329 	if (dest_addr == MAP_FAILED) {
330 		ksft_print_msg("mremap failed: %s\n", strerror(errno));
331 		ret = -1;
332 		goto clean_up_src;
333 	}
334 
335 	/* Verify byte pattern after remapping */
336 	srand(pattern_seed);
337 	for (i = 0; i < threshold; i++) {
338 		char c = (char) rand();
339 
340 		if (((char *) dest_addr)[i] != c) {
341 			ksft_print_msg("Data after remap doesn't match at offset %d\n",
342 				       i);
343 			ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
344 					((char *) dest_addr)[i] & 0xff);
345 			ret = -1;
346 			goto clean_up_dest;
347 		}
348 	}
349 
350 	start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec;
351 	end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec;
352 	ret = end_ns - start_ns;
353 
354 /*
355  * Since the destination address is specified using MREMAP_FIXED, subsequent
356  * mremap will unmap any previous mapping at the address range specified by
357  * dest_addr and region_size. This significantly affects the remap time of
358  * subsequent tests. So we clean up mappings after each test.
359  */
360 clean_up_dest:
361 	munmap(dest_addr, c.region_size);
362 clean_up_src:
363 	munmap(src_addr, c.region_size);
364 out:
365 	return ret;
366 }
367 
run_mremap_test_case(struct test test_case,int * failures,unsigned int threshold_mb,unsigned int pattern_seed)368 static void run_mremap_test_case(struct test test_case, int *failures,
369 				 unsigned int threshold_mb,
370 				 unsigned int pattern_seed)
371 {
372 	long long remap_time = remap_region(test_case.config, threshold_mb,
373 					    pattern_seed);
374 
375 	if (remap_time < 0) {
376 		if (test_case.expect_failure)
377 			ksft_test_result_xfail("%s\n\tExpected mremap failure\n",
378 					      test_case.name);
379 		else {
380 			ksft_test_result_fail("%s\n", test_case.name);
381 			*failures += 1;
382 		}
383 	} else {
384 		/*
385 		 * Comparing mremap time is only applicable if entire region
386 		 * was faulted in.
387 		 */
388 		if (threshold_mb == VALIDATION_NO_THRESHOLD ||
389 		    test_case.config.region_size <= threshold_mb * _1MB)
390 			ksft_test_result_pass("%s\n\tmremap time: %12lldns\n",
391 					      test_case.name, remap_time);
392 		else
393 			ksft_test_result_pass("%s\n", test_case.name);
394 	}
395 }
396 
usage(const char * cmd)397 static void usage(const char *cmd)
398 {
399 	fprintf(stderr,
400 		"Usage: %s [[-t <threshold_mb>] [-p <pattern_seed>]]\n"
401 		"-t\t only validate threshold_mb of the remapped region\n"
402 		"  \t if 0 is supplied no threshold is used; all tests\n"
403 		"  \t are run and remapped regions validated fully.\n"
404 		"  \t The default threshold used is 4MB.\n"
405 		"-p\t provide a seed to generate the random pattern for\n"
406 		"  \t validating the remapped region.\n", cmd);
407 }
408 
parse_args(int argc,char ** argv,unsigned int * threshold_mb,unsigned int * pattern_seed)409 static int parse_args(int argc, char **argv, unsigned int *threshold_mb,
410 		      unsigned int *pattern_seed)
411 {
412 	const char *optstr = "t:p:";
413 	int opt;
414 
415 	while ((opt = getopt(argc, argv, optstr)) != -1) {
416 		switch (opt) {
417 		case 't':
418 			*threshold_mb = atoi(optarg);
419 			break;
420 		case 'p':
421 			*pattern_seed = atoi(optarg);
422 			break;
423 		default:
424 			usage(argv[0]);
425 			return -1;
426 		}
427 	}
428 
429 	if (optind < argc) {
430 		usage(argv[0]);
431 		return -1;
432 	}
433 
434 	return 0;
435 }
436 
437 #define MAX_TEST 13
438 #define MAX_PERF_TEST 3
main(int argc,char ** argv)439 int main(int argc, char **argv)
440 {
441 	int failures = 0;
442 	int i, run_perf_tests;
443 	unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
444 	unsigned int pattern_seed;
445 	int num_expand_tests = 2;
446 	struct test test_cases[MAX_TEST];
447 	struct test perf_test_cases[MAX_PERF_TEST];
448 	int page_size;
449 	time_t t;
450 	FILE *maps_fp;
451 
452 	pattern_seed = (unsigned int) time(&t);
453 
454 	if (parse_args(argc, argv, &threshold_mb, &pattern_seed) < 0)
455 		exit(EXIT_FAILURE);
456 
457 	ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n",
458 		       threshold_mb, pattern_seed);
459 
460 	page_size = sysconf(_SC_PAGESIZE);
461 
462 	/* Expected mremap failures */
463 	test_cases[0] =	MAKE_TEST(page_size, page_size, page_size,
464 				  OVERLAPPING, EXPECT_FAILURE,
465 				  "mremap - Source and Destination Regions Overlapping");
466 
467 	test_cases[1] = MAKE_TEST(page_size, page_size/4, page_size,
468 				  NON_OVERLAPPING, EXPECT_FAILURE,
469 				  "mremap - Destination Address Misaligned (1KB-aligned)");
470 	test_cases[2] = MAKE_TEST(page_size/4, page_size, page_size,
471 				  NON_OVERLAPPING, EXPECT_FAILURE,
472 				  "mremap - Source Address Misaligned (1KB-aligned)");
473 
474 	/* Src addr PTE aligned */
475 	test_cases[3] = MAKE_TEST(PTE, PTE, PTE * 2,
476 				  NON_OVERLAPPING, EXPECT_SUCCESS,
477 				  "8KB mremap - Source PTE-aligned, Destination PTE-aligned");
478 
479 	/* Src addr 1MB aligned */
480 	test_cases[4] = MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
481 				  "2MB mremap - Source 1MB-aligned, Destination PTE-aligned");
482 	test_cases[5] = MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
483 				  "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned");
484 
485 	/* Src addr PMD aligned */
486 	test_cases[6] = MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
487 				  "4MB mremap - Source PMD-aligned, Destination PTE-aligned");
488 	test_cases[7] =	MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
489 				  "4MB mremap - Source PMD-aligned, Destination 1MB-aligned");
490 	test_cases[8] = MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
491 				  "4MB mremap - Source PMD-aligned, Destination PMD-aligned");
492 
493 	/* Src addr PUD aligned */
494 	test_cases[9] = MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
495 				  "2GB mremap - Source PUD-aligned, Destination PTE-aligned");
496 	test_cases[10] = MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
497 				   "2GB mremap - Source PUD-aligned, Destination 1MB-aligned");
498 	test_cases[11] = MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
499 				   "2GB mremap - Source PUD-aligned, Destination PMD-aligned");
500 	test_cases[12] = MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
501 				   "2GB mremap - Source PUD-aligned, Destination PUD-aligned");
502 
503 	perf_test_cases[0] =  MAKE_TEST(page_size, page_size, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
504 					"1GB mremap - Source PTE-aligned, Destination PTE-aligned");
505 	/*
506 	 * mremap 1GB region - Page table level aligned time
507 	 * comparison.
508 	 */
509 	perf_test_cases[1] = MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
510 				       "1GB mremap - Source PMD-aligned, Destination PMD-aligned");
511 	perf_test_cases[2] = MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
512 				       "1GB mremap - Source PUD-aligned, Destination PUD-aligned");
513 
514 	run_perf_tests =  (threshold_mb == VALIDATION_NO_THRESHOLD) ||
515 				(threshold_mb * _1MB >= _1GB);
516 
517 	ksft_set_plan(ARRAY_SIZE(test_cases) + (run_perf_tests ?
518 		      ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests);
519 
520 	for (i = 0; i < ARRAY_SIZE(test_cases); i++)
521 		run_mremap_test_case(test_cases[i], &failures, threshold_mb,
522 				     pattern_seed);
523 
524 	maps_fp = fopen("/proc/self/maps", "r");
525 
526 	if (maps_fp == NULL) {
527 		ksft_print_msg("Failed to read /proc/self/maps: %s\n", strerror(errno));
528 		exit(KSFT_FAIL);
529 	}
530 
531 	mremap_expand_merge(maps_fp, page_size);
532 	mremap_expand_merge_offset(maps_fp, page_size);
533 
534 	fclose(maps_fp);
535 
536 	if (run_perf_tests) {
537 		ksft_print_msg("\n%s\n",
538 		 "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:");
539 		for (i = 0; i < ARRAY_SIZE(perf_test_cases); i++)
540 			run_mremap_test_case(perf_test_cases[i], &failures,
541 					     threshold_mb, pattern_seed);
542 	}
543 
544 	if (failures > 0)
545 		ksft_exit_fail();
546 	else
547 		ksft_exit_pass();
548 }
549