memory_hotplug.c (85a2b4b08f202d67be81e2453064e01572ec19c8) memory_hotplug.c (2d1f649c7c0855751c7ff43f4e34784061bc72f7)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * linux/mm/memory_hotplug.c
4 *
5 * Copyright (C)
6 */
7
8#include <linux/stddef.h>

--- 27 unchanged lines hidden (view full) ---

36#include <linux/rmap.h>
37#include <linux/module.h>
38
39#include <asm/tlbflush.h>
40
41#include "internal.h"
42#include "shuffle.h"
43
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * linux/mm/memory_hotplug.c
4 *
5 * Copyright (C)
6 */
7
8#include <linux/stddef.h>

--- 27 unchanged lines hidden (view full) ---

36#include <linux/rmap.h>
37#include <linux/module.h>
38
39#include <asm/tlbflush.h>
40
41#include "internal.h"
42#include "shuffle.h"
43
44enum {
45 MEMMAP_ON_MEMORY_DISABLE = 0,
46 MEMMAP_ON_MEMORY_ENABLE,
47 MEMMAP_ON_MEMORY_FORCE,
48};
49
50static int memmap_mode __read_mostly = MEMMAP_ON_MEMORY_DISABLE;
51
52static inline unsigned long memory_block_memmap_size(void)
53{
54 return PHYS_PFN(memory_block_size_bytes()) * sizeof(struct page);
55}
56
57static inline unsigned long memory_block_memmap_on_memory_pages(void)
58{
59 unsigned long nr_pages = PFN_UP(memory_block_memmap_size());
60
61 /*
62 * In "forced" memmap_on_memory mode, we add extra pages to align the
63 * vmemmap size to cover full pageblocks. That way, we can add memory
64 * even if the vmemmap size is not properly aligned, however, we might waste
65 * memory.
66 */
67 if (memmap_mode == MEMMAP_ON_MEMORY_FORCE)
68 return pageblock_align(nr_pages);
69 return nr_pages;
70}
71
44#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
45/*
46 * memory_hotplug.memmap_on_memory parameter
47 */
72#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
73/*
74 * memory_hotplug.memmap_on_memory parameter
75 */
48static bool memmap_on_memory __ro_after_init;
49module_param(memmap_on_memory, bool, 0444);
50MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug");
76static int set_memmap_mode(const char *val, const struct kernel_param *kp)
77{
78 int ret, mode;
79 bool enabled;
51
80
81 if (sysfs_streq(val, "force") || sysfs_streq(val, "FORCE")) {
82 mode = MEMMAP_ON_MEMORY_FORCE;
83 } else {
84 ret = kstrtobool(val, &enabled);
85 if (ret < 0)
86 return ret;
87 if (enabled)
88 mode = MEMMAP_ON_MEMORY_ENABLE;
89 else
90 mode = MEMMAP_ON_MEMORY_DISABLE;
91 }
92 *((int *)kp->arg) = mode;
93 if (mode == MEMMAP_ON_MEMORY_FORCE) {
94 unsigned long memmap_pages = memory_block_memmap_on_memory_pages();
95
96 pr_info_once("Memory hotplug will waste %ld pages in each memory block\n",
97 memmap_pages - PFN_UP(memory_block_memmap_size()));
98 }
99 return 0;
100}
101
102static int get_memmap_mode(char *buffer, const struct kernel_param *kp)
103{
104 if (*((int *)kp->arg) == MEMMAP_ON_MEMORY_FORCE)
105 return sprintf(buffer, "force\n");
106 return param_get_bool(buffer, kp);
107}
108
109static const struct kernel_param_ops memmap_mode_ops = {
110 .set = set_memmap_mode,
111 .get = get_memmap_mode,
112};
113module_param_cb(memmap_on_memory, &memmap_mode_ops, &memmap_mode, 0444);
114MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug\n"
115 "With value \"force\" it could result in memory wastage due "
116 "to memmap size limitations (Y/N/force)");
117
52static inline bool mhp_memmap_on_memory(void)
53{
118static inline bool mhp_memmap_on_memory(void)
119{
54 return memmap_on_memory;
120 return memmap_mode != MEMMAP_ON_MEMORY_DISABLE;
55}
56#else
57static inline bool mhp_memmap_on_memory(void)
58{
59 return false;
60}
61#endif
62

--- 1179 unchanged lines hidden (view full) ---

1242}
1243
1244static int online_memory_block(struct memory_block *mem, void *arg)
1245{
1246 mem->online_type = mhp_default_online_type;
1247 return device_online(&mem->dev);
1248}
1249
121}
122#else
123static inline bool mhp_memmap_on_memory(void)
124{
125 return false;
126}
127#endif
128

--- 1179 unchanged lines hidden (view full) ---

1308}
1309
1310static int online_memory_block(struct memory_block *mem, void *arg)
1311{
1312 mem->online_type = mhp_default_online_type;
1313 return device_online(&mem->dev);
1314}
1315
1250static inline unsigned long memory_block_memmap_size(void)
1251{
1252 return PHYS_PFN(memory_block_size_bytes()) * sizeof(struct page);
1253}
1254
1255#ifndef arch_supports_memmap_on_memory
1256static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size)
1257{
1258 /*
1259 * As default, we want the vmemmap to span a complete PMD such that we
1260 * can map the vmemmap using a single PMD if supported by the
1261 * architecture.
1262 */
1263 return IS_ALIGNED(vmemmap_size, PMD_SIZE);
1264}
1265#endif
1266
1267static bool mhp_supports_memmap_on_memory(unsigned long size)
1268{
1269 unsigned long vmemmap_size = memory_block_memmap_size();
1316#ifndef arch_supports_memmap_on_memory
1317static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size)
1318{
1319 /*
1320 * As default, we want the vmemmap to span a complete PMD such that we
1321 * can map the vmemmap using a single PMD if supported by the
1322 * architecture.
1323 */
1324 return IS_ALIGNED(vmemmap_size, PMD_SIZE);
1325}
1326#endif
1327
1328static bool mhp_supports_memmap_on_memory(unsigned long size)
1329{
1330 unsigned long vmemmap_size = memory_block_memmap_size();
1270 unsigned long remaining_size = size - vmemmap_size;
1331 unsigned long memmap_pages = memory_block_memmap_on_memory_pages();
1271
1272 /*
1273 * Besides having arch support and the feature enabled at runtime, we
1274 * need a few more assumptions to hold true:
1275 *
1276 * a) We span a single memory block: memory onlining/offlinin;g happens
1277 * in memory block granularity. We don't want the vmemmap of online
1278 * memory blocks to reside on offline memory blocks. In the future,

--- 11 unchanged lines hidden (view full) ---

1290 *
1291 * TODO: Although we have a check here to make sure that vmemmap pages
1292 * fully populate a PMD, it is not the right place to check for
1293 * this. A much better solution involves improving vmemmap code
1294 * to fallback to base pages when trying to populate vmemmap using
1295 * altmap as an alternative source of memory, and we do not exactly
1296 * populate a single PMD.
1297 */
1332
1333 /*
1334 * Besides having arch support and the feature enabled at runtime, we
1335 * need a few more assumptions to hold true:
1336 *
1337 * a) We span a single memory block: memory onlining/offlinin;g happens
1338 * in memory block granularity. We don't want the vmemmap of online
1339 * memory blocks to reside on offline memory blocks. In the future,

--- 11 unchanged lines hidden (view full) ---

1351 *
1352 * TODO: Although we have a check here to make sure that vmemmap pages
1353 * fully populate a PMD, it is not the right place to check for
1354 * this. A much better solution involves improving vmemmap code
1355 * to fallback to base pages when trying to populate vmemmap using
1356 * altmap as an alternative source of memory, and we do not exactly
1357 * populate a single PMD.
1358 */
1298 return mhp_memmap_on_memory() &&
1299 size == memory_block_size_bytes() &&
1300 IS_ALIGNED(remaining_size, (pageblock_nr_pages << PAGE_SHIFT)) &&
1301 arch_supports_memmap_on_memory(vmemmap_size);
1359 if (!mhp_memmap_on_memory() || size != memory_block_size_bytes())
1360 return false;
1361
1362 /*
1363 * Make sure the vmemmap allocation is fully contained
1364 * so that we always allocate vmemmap memory from altmap area.
1365 */
1366 if (!IS_ALIGNED(vmemmap_size, PAGE_SIZE))
1367 return false;
1368
1369 /*
1370 * start pfn should be pageblock_nr_pages aligned for correctly
1371 * setting migrate types
1372 */
1373 if (!pageblock_aligned(memmap_pages))
1374 return false;
1375
1376 if (memmap_pages == PHYS_PFN(memory_block_size_bytes()))
1377 /* No effective hotplugged memory doesn't make sense. */
1378 return false;
1379
1380 return arch_supports_memmap_on_memory(vmemmap_size);
1302}
1303
1304/*
1305 * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
1306 * and online/offline operations (triggered e.g. by sysfs).
1307 *
1308 * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG
1309 */
1310int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
1311{
1312 struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) };
1313 enum memblock_flags memblock_flags = MEMBLOCK_NONE;
1381}
1382
1383/*
1384 * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
1385 * and online/offline operations (triggered e.g. by sysfs).
1386 *
1387 * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG
1388 */
1389int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
1390{
1391 struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) };
1392 enum memblock_flags memblock_flags = MEMBLOCK_NONE;
1314 struct vmem_altmap mhp_altmap = {};
1393 struct vmem_altmap mhp_altmap = {
1394 .base_pfn = PHYS_PFN(res->start),
1395 .end_pfn = PHYS_PFN(res->end),
1396 };
1315 struct memory_group *group = NULL;
1316 u64 start, size;
1317 bool new_node = false;
1318 int ret;
1319
1320 start = res->start;
1321 size = resource_size(res);
1322

--- 28 unchanged lines hidden (view full) ---

1351 goto error;
1352 new_node = ret;
1353
1354 /*
1355 * Self hosted memmap array
1356 */
1357 if (mhp_flags & MHP_MEMMAP_ON_MEMORY) {
1358 if (mhp_supports_memmap_on_memory(size)) {
1397 struct memory_group *group = NULL;
1398 u64 start, size;
1399 bool new_node = false;
1400 int ret;
1401
1402 start = res->start;
1403 size = resource_size(res);
1404

--- 28 unchanged lines hidden (view full) ---

1433 goto error;
1434 new_node = ret;
1435
1436 /*
1437 * Self hosted memmap array
1438 */
1439 if (mhp_flags & MHP_MEMMAP_ON_MEMORY) {
1440 if (mhp_supports_memmap_on_memory(size)) {
1359 mhp_altmap.free = PHYS_PFN(size);
1360 mhp_altmap.base_pfn = PHYS_PFN(start);
1441 mhp_altmap.free = memory_block_memmap_on_memory_pages();
1361 params.altmap = &mhp_altmap;
1362 }
1363 /* fallback to not using altmap */
1364 }
1365
1366 /* call arch's memory hotadd */
1367 ret = arch_add_memory(nid, start, size, &params);
1368 if (ret < 0)
1369 goto error;
1370
1371 /* create memory block devices after memory was added */
1442 params.altmap = &mhp_altmap;
1443 }
1444 /* fallback to not using altmap */
1445 }
1446
1447 /* call arch's memory hotadd */
1448 ret = arch_add_memory(nid, start, size, &params);
1449 if (ret < 0)
1450 goto error;
1451
1452 /* create memory block devices after memory was added */
1372 ret = create_memory_block_devices(start, size, mhp_altmap.alloc,
1373 group);
1453 ret = create_memory_block_devices(start, size, mhp_altmap.free, group);
1374 if (ret) {
1375 arch_remove_memory(start, size, NULL);
1376 goto error;
1377 }
1378
1379 if (new_node) {
1380 /* If sysfs file of new node can't be created, cpu on the node
1381 * can't be hot-added. There is no rollback way now.

--- 709 unchanged lines hidden (view full) ---

2091 return -EINVAL;
2092 }
2093
2094 /*
2095 * Let remove_pmd_table->free_hugepage_table do the
2096 * right thing if we used vmem_altmap when hot-adding
2097 * the range.
2098 */
1454 if (ret) {
1455 arch_remove_memory(start, size, NULL);
1456 goto error;
1457 }
1458
1459 if (new_node) {
1460 /* If sysfs file of new node can't be created, cpu on the node
1461 * can't be hot-added. There is no rollback way now.

--- 709 unchanged lines hidden (view full) ---

2171 return -EINVAL;
2172 }
2173
2174 /*
2175 * Let remove_pmd_table->free_hugepage_table do the
2176 * right thing if we used vmem_altmap when hot-adding
2177 * the range.
2178 */
2179 mhp_altmap.base_pfn = PHYS_PFN(start);
2180 mhp_altmap.free = nr_vmemmap_pages;
2099 mhp_altmap.alloc = nr_vmemmap_pages;
2100 altmap = &mhp_altmap;
2101 }
2102 }
2103
2104 /* remove memmap entry */
2105 firmware_map_remove(start, start + size, "System RAM");
2106

--- 170 unchanged lines hidden ---
2181 mhp_altmap.alloc = nr_vmemmap_pages;
2182 altmap = &mhp_altmap;
2183 }
2184 }
2185
2186 /* remove memmap entry */
2187 firmware_map_remove(start, start + size, "System RAM");
2188

--- 170 unchanged lines hidden ---