xref: /openbmc/linux/mm/shuffle.c (revision 85a34107)
1e900a918SDan Williams // SPDX-License-Identifier: GPL-2.0
2e900a918SDan Williams // Copyright(c) 2018 Intel Corporation. All rights reserved.
3e900a918SDan Williams 
4e900a918SDan Williams #include <linux/mm.h>
5e900a918SDan Williams #include <linux/init.h>
6e900a918SDan Williams #include <linux/mmzone.h>
7e900a918SDan Williams #include <linux/random.h>
8e900a918SDan Williams #include <linux/moduleparam.h>
9e900a918SDan Williams #include "internal.h"
10e900a918SDan Williams #include "shuffle.h"
11e900a918SDan Williams 
12e900a918SDan Williams DEFINE_STATIC_KEY_FALSE(page_alloc_shuffle_key);
13e900a918SDan Williams 
14e900a918SDan Williams static bool shuffle_param;
15e900a918SDan Williams 
shuffle_param_set(const char * val,const struct kernel_param * kp)16*85a34107SLiu Shixin static __meminit int shuffle_param_set(const char *val,
17e900a918SDan Williams 		const struct kernel_param *kp)
18e900a918SDan Williams {
19*85a34107SLiu Shixin 	if (param_set_bool(val, kp))
20*85a34107SLiu Shixin 		return -EINVAL;
21*85a34107SLiu Shixin 	if (*(bool *)kp->arg)
2283919535SDavid Hildenbrand 		static_branch_enable(&page_alloc_shuffle_key);
23e900a918SDan Williams 	return 0;
24e900a918SDan Williams }
25*85a34107SLiu Shixin 
26*85a34107SLiu Shixin static const struct kernel_param_ops shuffle_param_ops = {
27*85a34107SLiu Shixin 	.set = shuffle_param_set,
28*85a34107SLiu Shixin 	.get = param_get_bool,
29*85a34107SLiu Shixin };
30*85a34107SLiu Shixin module_param_cb(shuffle, &shuffle_param_ops, &shuffle_param, 0400);
31e900a918SDan Williams 
32e900a918SDan Williams /*
33e900a918SDan Williams  * For two pages to be swapped in the shuffle, they must be free (on a
34e900a918SDan Williams  * 'free_area' lru), have the same order, and have the same migratetype.
35e900a918SDan Williams  */
shuffle_valid_page(struct zone * zone,unsigned long pfn,int order)364a93025cSDavid Hildenbrand static struct page * __meminit shuffle_valid_page(struct zone *zone,
374a93025cSDavid Hildenbrand 						  unsigned long pfn, int order)
38e900a918SDan Williams {
394a93025cSDavid Hildenbrand 	struct page *page = pfn_to_online_page(pfn);
40e900a918SDan Williams 
41e900a918SDan Williams 	/*
42e900a918SDan Williams 	 * Given we're dealing with randomly selected pfns in a zone we
43e900a918SDan Williams 	 * need to ask questions like...
44e900a918SDan Williams 	 */
45e900a918SDan Williams 
464a93025cSDavid Hildenbrand 	/* ... is the page managed by the buddy? */
474a93025cSDavid Hildenbrand 	if (!page)
48e900a918SDan Williams 		return NULL;
49e900a918SDan Williams 
504a93025cSDavid Hildenbrand 	/* ... is the page assigned to the same zone? */
514a93025cSDavid Hildenbrand 	if (page_zone(page) != zone)
52e900a918SDan Williams 		return NULL;
53e900a918SDan Williams 
54e900a918SDan Williams 	/* ...is the page free and currently on a free_area list? */
55e900a918SDan Williams 	if (!PageBuddy(page))
56e900a918SDan Williams 		return NULL;
57e900a918SDan Williams 
58e900a918SDan Williams 	/*
59e900a918SDan Williams 	 * ...is the page on the same list as the page we will
60e900a918SDan Williams 	 * shuffle it with?
61e900a918SDan Williams 	 */
62ab130f91SMatthew Wilcox (Oracle) 	if (buddy_order(page) != order)
63e900a918SDan Williams 		return NULL;
64e900a918SDan Williams 
65e900a918SDan Williams 	return page;
66e900a918SDan Williams }
67e900a918SDan Williams 
68e900a918SDan Williams /*
69e900a918SDan Williams  * Fisher-Yates shuffle the freelist which prescribes iterating through an
70e900a918SDan Williams  * array, pfns in this case, and randomly swapping each entry with another in
71e900a918SDan Williams  * the span, end_pfn - start_pfn.
72e900a918SDan Williams  *
73e900a918SDan Williams  * To keep the implementation simple it does not attempt to correct for sources
74e900a918SDan Williams  * of bias in the distribution, like modulo bias or pseudo-random number
75e900a918SDan Williams  * generator bias. I.e. the expectation is that this shuffling raises the bar
76e900a918SDan Williams  * for attacks that exploit the predictability of page allocations, but need not
77e900a918SDan Williams  * be a perfect shuffle.
78e900a918SDan Williams  */
79e900a918SDan Williams #define SHUFFLE_RETRY 10
__shuffle_zone(struct zone * z)80e900a918SDan Williams void __meminit __shuffle_zone(struct zone *z)
81e900a918SDan Williams {
82e900a918SDan Williams 	unsigned long i, flags;
83e900a918SDan Williams 	unsigned long start_pfn = z->zone_start_pfn;
84e900a918SDan Williams 	unsigned long end_pfn = zone_end_pfn(z);
85e900a918SDan Williams 	const int order = SHUFFLE_ORDER;
86e900a918SDan Williams 	const int order_pages = 1 << order;
87e900a918SDan Williams 
88e900a918SDan Williams 	spin_lock_irqsave(&z->lock, flags);
89e900a918SDan Williams 	start_pfn = ALIGN(start_pfn, order_pages);
90e900a918SDan Williams 	for (i = start_pfn; i < end_pfn; i += order_pages) {
91e900a918SDan Williams 		unsigned long j;
92e900a918SDan Williams 		int migratetype, retry;
93e900a918SDan Williams 		struct page *page_i, *page_j;
94e900a918SDan Williams 
95e900a918SDan Williams 		/*
96e900a918SDan Williams 		 * We expect page_i, in the sub-range of a zone being added
97e900a918SDan Williams 		 * (@start_pfn to @end_pfn), to more likely be valid compared to
98e900a918SDan Williams 		 * page_j randomly selected in the span @zone_start_pfn to
99e900a918SDan Williams 		 * @spanned_pages.
100e900a918SDan Williams 		 */
1014a93025cSDavid Hildenbrand 		page_i = shuffle_valid_page(z, i, order);
102e900a918SDan Williams 		if (!page_i)
103e900a918SDan Williams 			continue;
104e900a918SDan Williams 
105e900a918SDan Williams 		for (retry = 0; retry < SHUFFLE_RETRY; retry++) {
106e900a918SDan Williams 			/*
107e900a918SDan Williams 			 * Pick a random order aligned page in the zone span as
108e900a918SDan Williams 			 * a swap target. If the selected pfn is a hole, retry
109e900a918SDan Williams 			 * up to SHUFFLE_RETRY attempts find a random valid pfn
110e900a918SDan Williams 			 * in the zone.
111e900a918SDan Williams 			 */
112e900a918SDan Williams 			j = z->zone_start_pfn +
113e900a918SDan Williams 				ALIGN_DOWN(get_random_long() % z->spanned_pages,
114e900a918SDan Williams 						order_pages);
1154a93025cSDavid Hildenbrand 			page_j = shuffle_valid_page(z, j, order);
116e900a918SDan Williams 			if (page_j && page_j != page_i)
117e900a918SDan Williams 				break;
118e900a918SDan Williams 		}
119e900a918SDan Williams 		if (retry >= SHUFFLE_RETRY) {
120e900a918SDan Williams 			pr_debug("%s: failed to swap %#lx\n", __func__, i);
121e900a918SDan Williams 			continue;
122e900a918SDan Williams 		}
123e900a918SDan Williams 
124e900a918SDan Williams 		/*
125e900a918SDan Williams 		 * Each migratetype corresponds to its own list, make sure the
126e900a918SDan Williams 		 * types match otherwise we're moving pages to lists where they
127e900a918SDan Williams 		 * do not belong.
128e900a918SDan Williams 		 */
129e900a918SDan Williams 		migratetype = get_pageblock_migratetype(page_i);
130e900a918SDan Williams 		if (get_pageblock_migratetype(page_j) != migratetype) {
131e900a918SDan Williams 			pr_debug("%s: migratetype mismatch %#lx\n", __func__, i);
132e900a918SDan Williams 			continue;
133e900a918SDan Williams 		}
134e900a918SDan Williams 
135e900a918SDan Williams 		list_swap(&page_i->lru, &page_j->lru);
136e900a918SDan Williams 
137e900a918SDan Williams 		pr_debug("%s: swap: %#lx -> %#lx\n", __func__, i, j);
138e900a918SDan Williams 
139e900a918SDan Williams 		/* take it easy on the zone lock */
140e900a918SDan Williams 		if ((i % (100 * order_pages)) == 0) {
141e900a918SDan Williams 			spin_unlock_irqrestore(&z->lock, flags);
142e900a918SDan Williams 			cond_resched();
143e900a918SDan Williams 			spin_lock_irqsave(&z->lock, flags);
144e900a918SDan Williams 		}
145e900a918SDan Williams 	}
146e900a918SDan Williams 	spin_unlock_irqrestore(&z->lock, flags);
147e900a918SDan Williams }
148e900a918SDan Williams 
149845be1cdSRandy Dunlap /*
150845be1cdSRandy Dunlap  * __shuffle_free_memory - reduce the predictability of the page allocator
151e900a918SDan Williams  * @pgdat: node page data
152e900a918SDan Williams  */
__shuffle_free_memory(pg_data_t * pgdat)153e900a918SDan Williams void __meminit __shuffle_free_memory(pg_data_t *pgdat)
154e900a918SDan Williams {
155e900a918SDan Williams 	struct zone *z;
156e900a918SDan Williams 
157e900a918SDan Williams 	for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
158e900a918SDan Williams 		shuffle_zone(z);
159e900a918SDan Williams }
16097500a4aSDan Williams 
shuffle_pick_tail(void)161a2129f24SAlexander Duyck bool shuffle_pick_tail(void)
16297500a4aSDan Williams {
16397500a4aSDan Williams 	static u64 rand;
16497500a4aSDan Williams 	static u8 rand_bits;
165a2129f24SAlexander Duyck 	bool ret;
16697500a4aSDan Williams 
16797500a4aSDan Williams 	/*
16897500a4aSDan Williams 	 * The lack of locking is deliberate. If 2 threads race to
16997500a4aSDan Williams 	 * update the rand state it just adds to the entropy.
17097500a4aSDan Williams 	 */
17197500a4aSDan Williams 	if (rand_bits == 0) {
17297500a4aSDan Williams 		rand_bits = 64;
17397500a4aSDan Williams 		rand = get_random_u64();
17497500a4aSDan Williams 	}
17597500a4aSDan Williams 
176a2129f24SAlexander Duyck 	ret = rand & 1;
177a2129f24SAlexander Duyck 
17897500a4aSDan Williams 	rand_bits--;
17997500a4aSDan Williams 	rand >>= 1;
180a2129f24SAlexander Duyck 
181a2129f24SAlexander Duyck 	return ret;
18297500a4aSDan Williams }
183