xref: /openbmc/linux/mm/hugetlb_vmemmap.c (revision 173940b3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Optimize vmemmap pages associated with HugeTLB
4  *
5  * Copyright (c) 2020, Bytedance. All rights reserved.
6  *
7  *     Author: Muchun Song <songmuchun@bytedance.com>
8  *
9  * See Documentation/vm/vmemmap_dedup.rst
10  */
11 #define pr_fmt(fmt)	"HugeTLB: " fmt
12 
13 #include <linux/memory_hotplug.h>
14 #include "hugetlb_vmemmap.h"
15 
16 /*
17  * There are a lot of struct page structures associated with each HugeTLB page.
18  * For tail pages, the value of compound_head is the same. So we can reuse first
19  * page of head page structures. We map the virtual addresses of all the pages
20  * of tail page structures to the head page struct, and then free these page
21  * frames. Therefore, we need to reserve one pages as vmemmap areas.
22  */
23 #define RESERVE_VMEMMAP_NR		1U
24 #define RESERVE_VMEMMAP_SIZE		(RESERVE_VMEMMAP_NR << PAGE_SHIFT)
25 
26 enum vmemmap_optimize_mode {
27 	VMEMMAP_OPTIMIZE_OFF,
28 	VMEMMAP_OPTIMIZE_ON,
29 };
30 
31 DEFINE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON,
32 			hugetlb_optimize_vmemmap_key);
33 EXPORT_SYMBOL(hugetlb_optimize_vmemmap_key);
34 
35 static enum vmemmap_optimize_mode vmemmap_optimize_mode =
36 	IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON);
37 
38 static void vmemmap_optimize_mode_switch(enum vmemmap_optimize_mode to)
39 {
40 	if (vmemmap_optimize_mode == to)
41 		return;
42 
43 	if (to == VMEMMAP_OPTIMIZE_OFF)
44 		static_branch_dec(&hugetlb_optimize_vmemmap_key);
45 	else
46 		static_branch_inc(&hugetlb_optimize_vmemmap_key);
47 	WRITE_ONCE(vmemmap_optimize_mode, to);
48 }
49 
50 static int __init hugetlb_vmemmap_early_param(char *buf)
51 {
52 	bool enable;
53 	enum vmemmap_optimize_mode mode;
54 
55 	if (kstrtobool(buf, &enable))
56 		return -EINVAL;
57 
58 	mode = enable ? VMEMMAP_OPTIMIZE_ON : VMEMMAP_OPTIMIZE_OFF;
59 	vmemmap_optimize_mode_switch(mode);
60 
61 	return 0;
62 }
63 early_param("hugetlb_free_vmemmap", hugetlb_vmemmap_early_param);
64 
65 /*
66  * Previously discarded vmemmap pages will be allocated and remapping
67  * after this function returns zero.
68  */
69 int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head)
70 {
71 	int ret;
72 	unsigned long vmemmap_addr = (unsigned long)head;
73 	unsigned long vmemmap_end, vmemmap_reuse, vmemmap_pages;
74 
75 	if (!HPageVmemmapOptimized(head))
76 		return 0;
77 
78 	vmemmap_addr	+= RESERVE_VMEMMAP_SIZE;
79 	vmemmap_pages	= hugetlb_optimize_vmemmap_pages(h);
80 	vmemmap_end	= vmemmap_addr + (vmemmap_pages << PAGE_SHIFT);
81 	vmemmap_reuse	= vmemmap_addr - PAGE_SIZE;
82 
83 	/*
84 	 * The pages which the vmemmap virtual address range [@vmemmap_addr,
85 	 * @vmemmap_end) are mapped to are freed to the buddy allocator, and
86 	 * the range is mapped to the page which @vmemmap_reuse is mapped to.
87 	 * When a HugeTLB page is freed to the buddy allocator, previously
88 	 * discarded vmemmap pages must be allocated and remapping.
89 	 */
90 	ret = vmemmap_remap_alloc(vmemmap_addr, vmemmap_end, vmemmap_reuse,
91 				  GFP_KERNEL | __GFP_NORETRY | __GFP_THISNODE);
92 	if (!ret) {
93 		ClearHPageVmemmapOptimized(head);
94 		static_branch_dec(&hugetlb_optimize_vmemmap_key);
95 	}
96 
97 	return ret;
98 }
99 
100 void hugetlb_vmemmap_free(struct hstate *h, struct page *head)
101 {
102 	unsigned long vmemmap_addr = (unsigned long)head;
103 	unsigned long vmemmap_end, vmemmap_reuse, vmemmap_pages;
104 
105 	vmemmap_pages = hugetlb_optimize_vmemmap_pages(h);
106 	if (!vmemmap_pages)
107 		return;
108 
109 	if (READ_ONCE(vmemmap_optimize_mode) == VMEMMAP_OPTIMIZE_OFF)
110 		return;
111 
112 	static_branch_inc(&hugetlb_optimize_vmemmap_key);
113 
114 	vmemmap_addr	+= RESERVE_VMEMMAP_SIZE;
115 	vmemmap_end	= vmemmap_addr + (vmemmap_pages << PAGE_SHIFT);
116 	vmemmap_reuse	= vmemmap_addr - PAGE_SIZE;
117 
118 	/*
119 	 * Remap the vmemmap virtual address range [@vmemmap_addr, @vmemmap_end)
120 	 * to the page which @vmemmap_reuse is mapped to, then free the pages
121 	 * which the range [@vmemmap_addr, @vmemmap_end] is mapped to.
122 	 */
123 	if (vmemmap_remap_free(vmemmap_addr, vmemmap_end, vmemmap_reuse))
124 		static_branch_dec(&hugetlb_optimize_vmemmap_key);
125 	else
126 		SetHPageVmemmapOptimized(head);
127 }
128 
129 void __init hugetlb_vmemmap_init(struct hstate *h)
130 {
131 	unsigned int nr_pages = pages_per_huge_page(h);
132 	unsigned int vmemmap_pages;
133 
134 	/*
135 	 * There are only (RESERVE_VMEMMAP_SIZE / sizeof(struct page)) struct
136 	 * page structs that can be used when CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP,
137 	 * so add a BUILD_BUG_ON to catch invalid usage of the tail struct page.
138 	 */
139 	BUILD_BUG_ON(__NR_USED_SUBPAGE >=
140 		     RESERVE_VMEMMAP_SIZE / sizeof(struct page));
141 
142 	if (!is_power_of_2(sizeof(struct page))) {
143 		pr_warn_once("cannot optimize vmemmap pages because \"struct page\" crosses page boundaries\n");
144 		static_branch_disable(&hugetlb_optimize_vmemmap_key);
145 		return;
146 	}
147 
148 	vmemmap_pages = (nr_pages * sizeof(struct page)) >> PAGE_SHIFT;
149 	/*
150 	 * The head page is not to be freed to buddy allocator, the other tail
151 	 * pages will map to the head page, so they can be freed.
152 	 *
153 	 * Could RESERVE_VMEMMAP_NR be greater than @vmemmap_pages? It is true
154 	 * on some architectures (e.g. aarch64). See Documentation/arm64/
155 	 * hugetlbpage.rst for more details.
156 	 */
157 	if (likely(vmemmap_pages > RESERVE_VMEMMAP_NR))
158 		h->optimize_vmemmap_pages = vmemmap_pages - RESERVE_VMEMMAP_NR;
159 
160 	pr_info("can optimize %d vmemmap pages for %s\n",
161 		h->optimize_vmemmap_pages, h->name);
162 }
163 
164 #ifdef CONFIG_PROC_SYSCTL
165 static int hugetlb_optimize_vmemmap_handler(struct ctl_table *table, int write,
166 					    void *buffer, size_t *length,
167 					    loff_t *ppos)
168 {
169 	int ret;
170 	enum vmemmap_optimize_mode mode;
171 	static DEFINE_MUTEX(sysctl_mutex);
172 
173 	if (write && !capable(CAP_SYS_ADMIN))
174 		return -EPERM;
175 
176 	mutex_lock(&sysctl_mutex);
177 	mode = vmemmap_optimize_mode;
178 	table->data = &mode;
179 	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
180 	if (write && !ret)
181 		vmemmap_optimize_mode_switch(mode);
182 	mutex_unlock(&sysctl_mutex);
183 
184 	return ret;
185 }
186 
187 static struct ctl_table hugetlb_vmemmap_sysctls[] = {
188 	{
189 		.procname	= "hugetlb_optimize_vmemmap",
190 		.maxlen		= sizeof(enum vmemmap_optimize_mode),
191 		.mode		= 0644,
192 		.proc_handler	= hugetlb_optimize_vmemmap_handler,
193 		.extra1		= SYSCTL_ZERO,
194 		.extra2		= SYSCTL_ONE,
195 	},
196 	{ }
197 };
198 
199 static __init int hugetlb_vmemmap_sysctls_init(void)
200 {
201 	/*
202 	 * If "memory_hotplug.memmap_on_memory" is enabled or "struct page"
203 	 * crosses page boundaries, the vmemmap pages cannot be optimized.
204 	 */
205 	if (!mhp_memmap_on_memory() && is_power_of_2(sizeof(struct page)))
206 		register_sysctl_init("vm", hugetlb_vmemmap_sysctls);
207 
208 	return 0;
209 }
210 late_initcall(hugetlb_vmemmap_sysctls_init);
211 #endif /* CONFIG_PROC_SYSCTL */
212