1e900a918SDan Williams // SPDX-License-Identifier: GPL-2.0
2e900a918SDan Williams // Copyright(c) 2018 Intel Corporation. All rights reserved.
3e900a918SDan Williams
4e900a918SDan Williams #include <linux/mm.h>
5e900a918SDan Williams #include <linux/init.h>
6e900a918SDan Williams #include <linux/mmzone.h>
7e900a918SDan Williams #include <linux/random.h>
8e900a918SDan Williams #include <linux/moduleparam.h>
9e900a918SDan Williams #include "internal.h"
10e900a918SDan Williams #include "shuffle.h"
11e900a918SDan Williams
12e900a918SDan Williams DEFINE_STATIC_KEY_FALSE(page_alloc_shuffle_key);
13e900a918SDan Williams
14e900a918SDan Williams static bool shuffle_param;
15e900a918SDan Williams
shuffle_param_set(const char * val,const struct kernel_param * kp)16*85a34107SLiu Shixin static __meminit int shuffle_param_set(const char *val,
17e900a918SDan Williams const struct kernel_param *kp)
18e900a918SDan Williams {
19*85a34107SLiu Shixin if (param_set_bool(val, kp))
20*85a34107SLiu Shixin return -EINVAL;
21*85a34107SLiu Shixin if (*(bool *)kp->arg)
2283919535SDavid Hildenbrand static_branch_enable(&page_alloc_shuffle_key);
23e900a918SDan Williams return 0;
24e900a918SDan Williams }
25*85a34107SLiu Shixin
26*85a34107SLiu Shixin static const struct kernel_param_ops shuffle_param_ops = {
27*85a34107SLiu Shixin .set = shuffle_param_set,
28*85a34107SLiu Shixin .get = param_get_bool,
29*85a34107SLiu Shixin };
30*85a34107SLiu Shixin module_param_cb(shuffle, &shuffle_param_ops, &shuffle_param, 0400);
31e900a918SDan Williams
32e900a918SDan Williams /*
33e900a918SDan Williams * For two pages to be swapped in the shuffle, they must be free (on a
34e900a918SDan Williams * 'free_area' lru), have the same order, and have the same migratetype.
35e900a918SDan Williams */
shuffle_valid_page(struct zone * zone,unsigned long pfn,int order)364a93025cSDavid Hildenbrand static struct page * __meminit shuffle_valid_page(struct zone *zone,
374a93025cSDavid Hildenbrand unsigned long pfn, int order)
38e900a918SDan Williams {
394a93025cSDavid Hildenbrand struct page *page = pfn_to_online_page(pfn);
40e900a918SDan Williams
41e900a918SDan Williams /*
42e900a918SDan Williams * Given we're dealing with randomly selected pfns in a zone we
43e900a918SDan Williams * need to ask questions like...
44e900a918SDan Williams */
45e900a918SDan Williams
464a93025cSDavid Hildenbrand /* ... is the page managed by the buddy? */
474a93025cSDavid Hildenbrand if (!page)
48e900a918SDan Williams return NULL;
49e900a918SDan Williams
504a93025cSDavid Hildenbrand /* ... is the page assigned to the same zone? */
514a93025cSDavid Hildenbrand if (page_zone(page) != zone)
52e900a918SDan Williams return NULL;
53e900a918SDan Williams
54e900a918SDan Williams /* ...is the page free and currently on a free_area list? */
55e900a918SDan Williams if (!PageBuddy(page))
56e900a918SDan Williams return NULL;
57e900a918SDan Williams
58e900a918SDan Williams /*
59e900a918SDan Williams * ...is the page on the same list as the page we will
60e900a918SDan Williams * shuffle it with?
61e900a918SDan Williams */
62ab130f91SMatthew Wilcox (Oracle) if (buddy_order(page) != order)
63e900a918SDan Williams return NULL;
64e900a918SDan Williams
65e900a918SDan Williams return page;
66e900a918SDan Williams }
67e900a918SDan Williams
68e900a918SDan Williams /*
69e900a918SDan Williams * Fisher-Yates shuffle the freelist which prescribes iterating through an
70e900a918SDan Williams * array, pfns in this case, and randomly swapping each entry with another in
71e900a918SDan Williams * the span, end_pfn - start_pfn.
72e900a918SDan Williams *
73e900a918SDan Williams * To keep the implementation simple it does not attempt to correct for sources
74e900a918SDan Williams * of bias in the distribution, like modulo bias or pseudo-random number
75e900a918SDan Williams * generator bias. I.e. the expectation is that this shuffling raises the bar
76e900a918SDan Williams * for attacks that exploit the predictability of page allocations, but need not
77e900a918SDan Williams * be a perfect shuffle.
78e900a918SDan Williams */
79e900a918SDan Williams #define SHUFFLE_RETRY 10
__shuffle_zone(struct zone * z)80e900a918SDan Williams void __meminit __shuffle_zone(struct zone *z)
81e900a918SDan Williams {
82e900a918SDan Williams unsigned long i, flags;
83e900a918SDan Williams unsigned long start_pfn = z->zone_start_pfn;
84e900a918SDan Williams unsigned long end_pfn = zone_end_pfn(z);
85e900a918SDan Williams const int order = SHUFFLE_ORDER;
86e900a918SDan Williams const int order_pages = 1 << order;
87e900a918SDan Williams
88e900a918SDan Williams spin_lock_irqsave(&z->lock, flags);
89e900a918SDan Williams start_pfn = ALIGN(start_pfn, order_pages);
90e900a918SDan Williams for (i = start_pfn; i < end_pfn; i += order_pages) {
91e900a918SDan Williams unsigned long j;
92e900a918SDan Williams int migratetype, retry;
93e900a918SDan Williams struct page *page_i, *page_j;
94e900a918SDan Williams
95e900a918SDan Williams /*
96e900a918SDan Williams * We expect page_i, in the sub-range of a zone being added
97e900a918SDan Williams * (@start_pfn to @end_pfn), to more likely be valid compared to
98e900a918SDan Williams * page_j randomly selected in the span @zone_start_pfn to
99e900a918SDan Williams * @spanned_pages.
100e900a918SDan Williams */
1014a93025cSDavid Hildenbrand page_i = shuffle_valid_page(z, i, order);
102e900a918SDan Williams if (!page_i)
103e900a918SDan Williams continue;
104e900a918SDan Williams
105e900a918SDan Williams for (retry = 0; retry < SHUFFLE_RETRY; retry++) {
106e900a918SDan Williams /*
107e900a918SDan Williams * Pick a random order aligned page in the zone span as
108e900a918SDan Williams * a swap target. If the selected pfn is a hole, retry
109e900a918SDan Williams * up to SHUFFLE_RETRY attempts find a random valid pfn
110e900a918SDan Williams * in the zone.
111e900a918SDan Williams */
112e900a918SDan Williams j = z->zone_start_pfn +
113e900a918SDan Williams ALIGN_DOWN(get_random_long() % z->spanned_pages,
114e900a918SDan Williams order_pages);
1154a93025cSDavid Hildenbrand page_j = shuffle_valid_page(z, j, order);
116e900a918SDan Williams if (page_j && page_j != page_i)
117e900a918SDan Williams break;
118e900a918SDan Williams }
119e900a918SDan Williams if (retry >= SHUFFLE_RETRY) {
120e900a918SDan Williams pr_debug("%s: failed to swap %#lx\n", __func__, i);
121e900a918SDan Williams continue;
122e900a918SDan Williams }
123e900a918SDan Williams
124e900a918SDan Williams /*
125e900a918SDan Williams * Each migratetype corresponds to its own list, make sure the
126e900a918SDan Williams * types match otherwise we're moving pages to lists where they
127e900a918SDan Williams * do not belong.
128e900a918SDan Williams */
129e900a918SDan Williams migratetype = get_pageblock_migratetype(page_i);
130e900a918SDan Williams if (get_pageblock_migratetype(page_j) != migratetype) {
131e900a918SDan Williams pr_debug("%s: migratetype mismatch %#lx\n", __func__, i);
132e900a918SDan Williams continue;
133e900a918SDan Williams }
134e900a918SDan Williams
135e900a918SDan Williams list_swap(&page_i->lru, &page_j->lru);
136e900a918SDan Williams
137e900a918SDan Williams pr_debug("%s: swap: %#lx -> %#lx\n", __func__, i, j);
138e900a918SDan Williams
139e900a918SDan Williams /* take it easy on the zone lock */
140e900a918SDan Williams if ((i % (100 * order_pages)) == 0) {
141e900a918SDan Williams spin_unlock_irqrestore(&z->lock, flags);
142e900a918SDan Williams cond_resched();
143e900a918SDan Williams spin_lock_irqsave(&z->lock, flags);
144e900a918SDan Williams }
145e900a918SDan Williams }
146e900a918SDan Williams spin_unlock_irqrestore(&z->lock, flags);
147e900a918SDan Williams }
148e900a918SDan Williams
149845be1cdSRandy Dunlap /*
150845be1cdSRandy Dunlap * __shuffle_free_memory - reduce the predictability of the page allocator
151e900a918SDan Williams * @pgdat: node page data
152e900a918SDan Williams */
__shuffle_free_memory(pg_data_t * pgdat)153e900a918SDan Williams void __meminit __shuffle_free_memory(pg_data_t *pgdat)
154e900a918SDan Williams {
155e900a918SDan Williams struct zone *z;
156e900a918SDan Williams
157e900a918SDan Williams for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
158e900a918SDan Williams shuffle_zone(z);
159e900a918SDan Williams }
16097500a4aSDan Williams
shuffle_pick_tail(void)161a2129f24SAlexander Duyck bool shuffle_pick_tail(void)
16297500a4aSDan Williams {
16397500a4aSDan Williams static u64 rand;
16497500a4aSDan Williams static u8 rand_bits;
165a2129f24SAlexander Duyck bool ret;
16697500a4aSDan Williams
16797500a4aSDan Williams /*
16897500a4aSDan Williams * The lack of locking is deliberate. If 2 threads race to
16997500a4aSDan Williams * update the rand state it just adds to the entropy.
17097500a4aSDan Williams */
17197500a4aSDan Williams if (rand_bits == 0) {
17297500a4aSDan Williams rand_bits = 64;
17397500a4aSDan Williams rand = get_random_u64();
17497500a4aSDan Williams }
17597500a4aSDan Williams
176a2129f24SAlexander Duyck ret = rand & 1;
177a2129f24SAlexander Duyck
17897500a4aSDan Williams rand_bits--;
17997500a4aSDan Williams rand >>= 1;
180a2129f24SAlexander Duyck
181a2129f24SAlexander Duyck return ret;
18297500a4aSDan Williams }
183