1c221c0b0SDave Hansen // SPDX-License-Identifier: GPL-2.0 2c221c0b0SDave Hansen /* Copyright(c) 2016-2019 Intel Corporation. All rights reserved. */ 3c221c0b0SDave Hansen #include <linux/memremap.h> 4c221c0b0SDave Hansen #include <linux/pagemap.h> 5c221c0b0SDave Hansen #include <linux/memory.h> 6c221c0b0SDave Hansen #include <linux/module.h> 7c221c0b0SDave Hansen #include <linux/device.h> 8c221c0b0SDave Hansen #include <linux/pfn_t.h> 9c221c0b0SDave Hansen #include <linux/slab.h> 10c221c0b0SDave Hansen #include <linux/dax.h> 11c221c0b0SDave Hansen #include <linux/fs.h> 12c221c0b0SDave Hansen #include <linux/mm.h> 13c221c0b0SDave Hansen #include <linux/mman.h> 14c221c0b0SDave Hansen #include "dax-private.h" 15c221c0b0SDave Hansen #include "bus.h" 16c221c0b0SDave Hansen 178a725e46SDavid Hildenbrand /* Memory resource name used for add_memory_driver_managed(). */ 188a725e46SDavid Hildenbrand static const char *kmem_name; 198a725e46SDavid Hildenbrand /* Set if any memory will remain added when the driver will be unloaded. */ 208a725e46SDavid Hildenbrand static bool any_hotremove_failed; 218a725e46SDavid Hildenbrand 2260e93dc0SDan Williams static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r) 2359bc8d10SDan Williams { 2460e93dc0SDan Williams struct dev_dax_range *dax_range = &dev_dax->ranges[i]; 2560e93dc0SDan Williams struct range *range = &dax_range->range; 2659bc8d10SDan Williams 2759bc8d10SDan Williams /* memory-block align the hotplug range */ 2860e93dc0SDan Williams r->start = ALIGN(range->start, memory_block_size_bytes()); 2960e93dc0SDan Williams r->end = ALIGN_DOWN(range->end + 1, memory_block_size_bytes()) - 1; 3060e93dc0SDan Williams if (r->start >= r->end) { 3160e93dc0SDan Williams r->start = range->start; 3260e93dc0SDan Williams r->end = range->end; 3360e93dc0SDan Williams return -ENOSPC; 3460e93dc0SDan Williams } 3560e93dc0SDan Williams return 0; 3659bc8d10SDan Williams } 3759bc8d10SDan Williams 38a455aa72SDan Williams struct dax_kmem_data { 39a455aa72SDan Williams const char *res_name; 40a455aa72SDan Williams struct resource *res[]; 41a455aa72SDan Williams }; 42a455aa72SDan Williams 43f11cf813SDan Williams static int dev_dax_kmem_probe(struct dev_dax *dev_dax) 44c221c0b0SDave Hansen { 45f11cf813SDan Williams struct device *dev = &dev_dax->dev; 46a455aa72SDan Williams struct dax_kmem_data *data; 47a455aa72SDan Williams int rc = -ENOMEM; 4860e93dc0SDan Williams int i, mapped = 0; 49c221c0b0SDave Hansen int numa_node; 50c221c0b0SDave Hansen 51c221c0b0SDave Hansen /* 52c221c0b0SDave Hansen * Ensure good NUMA information for the persistent memory. 53c221c0b0SDave Hansen * Without this check, there is a risk that slow memory 54c221c0b0SDave Hansen * could be mixed in a node with faster memory, causing 55c221c0b0SDave Hansen * unavoidable performance issues. 56c221c0b0SDave Hansen */ 57c221c0b0SDave Hansen numa_node = dev_dax->target_node; 58c221c0b0SDave Hansen if (numa_node < 0) { 59f5516ec5SDan Williams dev_warn(dev, "rejecting DAX region with invalid node: %d\n", 60f5516ec5SDan Williams numa_node); 61c221c0b0SDave Hansen return -EINVAL; 62c221c0b0SDave Hansen } 63c221c0b0SDave Hansen 64a455aa72SDan Williams data = kzalloc(sizeof(*data) + sizeof(struct resource *) * dev_dax->nr_range, GFP_KERNEL); 65a455aa72SDan Williams if (!data) 6660858c00SDavid Hildenbrand return -ENOMEM; 6760858c00SDavid Hildenbrand 68a455aa72SDan Williams data->res_name = kstrdup(dev_name(dev), GFP_KERNEL); 69a455aa72SDan Williams if (!data->res_name) 70a455aa72SDan Williams goto err_res_name; 71a455aa72SDan Williams 7260e93dc0SDan Williams for (i = 0; i < dev_dax->nr_range; i++) { 7360e93dc0SDan Williams struct resource *res; 7460e93dc0SDan Williams struct range range; 7560e93dc0SDan Williams 7660e93dc0SDan Williams rc = dax_kmem_range(dev_dax, i, &range); 7760e93dc0SDan Williams if (rc) { 7860e93dc0SDan Williams dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n", 7960e93dc0SDan Williams i, range.start, range.end); 8060e93dc0SDan Williams continue; 8160e93dc0SDan Williams } 8260e93dc0SDan Williams 8360858c00SDavid Hildenbrand /* Region is permanently reserved if hotremove fails. */ 84a455aa72SDan Williams res = request_mem_region(range.start, range_len(&range), data->res_name); 850513bd5bSDan Williams if (!res) { 8660e93dc0SDan Williams dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve region\n", 8760e93dc0SDan Williams i, range.start, range.end); 8860e93dc0SDan Williams /* 8960e93dc0SDan Williams * Once some memory has been onlined we can't 9060e93dc0SDan Williams * assume that it can be un-onlined safely. 9160e93dc0SDan Williams */ 9260e93dc0SDan Williams if (mapped) 9360e93dc0SDan Williams continue; 94a455aa72SDan Williams rc = -EBUSY; 95a455aa72SDan Williams goto err_request_mem; 96c221c0b0SDave Hansen } 97a455aa72SDan Williams data->res[i] = res; 98c221c0b0SDave Hansen 99c221c0b0SDave Hansen /* 100c221c0b0SDave Hansen * Set flags appropriate for System RAM. Leave ..._BUSY clear 101c221c0b0SDave Hansen * so that add_memory() can add a child resource. Do not 102c221c0b0SDave Hansen * inherit flags from the parent since it may set new flags 103c221c0b0SDave Hansen * unknown to us that will break add_memory() below. 104c221c0b0SDave Hansen */ 1050513bd5bSDan Williams res->flags = IORESOURCE_SYSTEM_RAM; 106c221c0b0SDave Hansen 1078a725e46SDavid Hildenbrand /* 10860e93dc0SDan Williams * Ensure that future kexec'd kernels will not treat 10960e93dc0SDan Williams * this as RAM automatically. 1108a725e46SDavid Hildenbrand */ 11160e93dc0SDan Williams rc = add_memory_driver_managed(numa_node, range.start, 11260e93dc0SDan Williams range_len(&range), kmem_name); 11360e93dc0SDan Williams 11431e4ca92SPavel Tatashin if (rc) { 11560e93dc0SDan Williams dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n", 11660e93dc0SDan Williams i, range.start, range.end); 117a455aa72SDan Williams release_resource(res); 118a455aa72SDan Williams kfree(res); 119a455aa72SDan Williams data->res[i] = NULL; 12060e93dc0SDan Williams if (mapped) 12160e93dc0SDan Williams continue; 122a455aa72SDan Williams goto err_request_mem; 12331e4ca92SPavel Tatashin } 12460e93dc0SDan Williams mapped++; 12560e93dc0SDan Williams } 1267e6b431aSDan Williams 127a455aa72SDan Williams dev_set_drvdata(dev, data); 128c221c0b0SDave Hansen 129c221c0b0SDave Hansen return 0; 130a455aa72SDan Williams 131a455aa72SDan Williams err_request_mem: 132a455aa72SDan Williams kfree(data->res_name); 133a455aa72SDan Williams err_res_name: 134a455aa72SDan Williams kfree(data); 135a455aa72SDan Williams return rc; 136c221c0b0SDave Hansen } 137c221c0b0SDave Hansen 1389f960da7SPavel Tatashin #ifdef CONFIG_MEMORY_HOTREMOVE 139f11cf813SDan Williams static int dev_dax_kmem_remove(struct dev_dax *dev_dax) 1409f960da7SPavel Tatashin { 14160e93dc0SDan Williams int i, success = 0; 142f11cf813SDan Williams struct device *dev = &dev_dax->dev; 143a455aa72SDan Williams struct dax_kmem_data *data = dev_get_drvdata(dev); 1449f960da7SPavel Tatashin 1459f960da7SPavel Tatashin /* 1469f960da7SPavel Tatashin * We have one shot for removing memory, if some memory blocks were not 1479f960da7SPavel Tatashin * offline prior to calling this function remove_memory() will fail, and 1489f960da7SPavel Tatashin * there is no way to hotremove this memory until reboot because device 1499f960da7SPavel Tatashin * unbind will succeed even if we return failure. 1509f960da7SPavel Tatashin */ 15160e93dc0SDan Williams for (i = 0; i < dev_dax->nr_range; i++) { 15260e93dc0SDan Williams struct range range; 15360e93dc0SDan Williams int rc; 15460e93dc0SDan Williams 15560e93dc0SDan Williams rc = dax_kmem_range(dev_dax, i, &range); 15660e93dc0SDan Williams if (rc) 15760e93dc0SDan Williams continue; 15860e93dc0SDan Williams 15960e93dc0SDan Williams rc = remove_memory(dev_dax->target_node, range.start, 16060e93dc0SDan Williams range_len(&range)); 16160e93dc0SDan Williams if (rc == 0) { 162a455aa72SDan Williams release_resource(data->res[i]); 163a455aa72SDan Williams kfree(data->res[i]); 164a455aa72SDan Williams data->res[i] = NULL; 16560e93dc0SDan Williams success++; 16660e93dc0SDan Williams continue; 16760e93dc0SDan Williams } 1688a725e46SDavid Hildenbrand any_hotremove_failed = true; 16960e93dc0SDan Williams dev_err(dev, 17060e93dc0SDan Williams "mapping%d: %#llx-%#llx cannot be hotremoved until the next reboot\n", 17160e93dc0SDan Williams i, range.start, range.end); 1729f960da7SPavel Tatashin } 1739f960da7SPavel Tatashin 17460e93dc0SDan Williams if (success >= dev_dax->nr_range) { 175a455aa72SDan Williams kfree(data->res_name); 176a455aa72SDan Williams kfree(data); 17760e93dc0SDan Williams dev_set_drvdata(dev, NULL); 17860e93dc0SDan Williams } 1799f960da7SPavel Tatashin 1809f960da7SPavel Tatashin return 0; 1819f960da7SPavel Tatashin } 1829f960da7SPavel Tatashin #else 183f11cf813SDan Williams static int dev_dax_kmem_remove(struct dev_dax *dev_dax) 184c221c0b0SDave Hansen { 185c221c0b0SDave Hansen /* 1869f960da7SPavel Tatashin * Without hotremove purposely leak the request_mem_region() for the 1879f960da7SPavel Tatashin * device-dax range and return '0' to ->remove() attempts. The removal 1889f960da7SPavel Tatashin * of the device from the driver always succeeds, but the region is 1899f960da7SPavel Tatashin * permanently pinned as reserved by the unreleased 190c221c0b0SDave Hansen * request_mem_region(). 191c221c0b0SDave Hansen */ 1928a725e46SDavid Hildenbrand any_hotremove_failed = true; 193c221c0b0SDave Hansen return 0; 194c221c0b0SDave Hansen } 1959f960da7SPavel Tatashin #endif /* CONFIG_MEMORY_HOTREMOVE */ 196c221c0b0SDave Hansen 197c221c0b0SDave Hansen static struct dax_device_driver device_dax_kmem_driver = { 198c221c0b0SDave Hansen .probe = dev_dax_kmem_probe, 199c221c0b0SDave Hansen .remove = dev_dax_kmem_remove, 200c221c0b0SDave Hansen }; 201c221c0b0SDave Hansen 202c221c0b0SDave Hansen static int __init dax_kmem_init(void) 203c221c0b0SDave Hansen { 2048a725e46SDavid Hildenbrand int rc; 2058a725e46SDavid Hildenbrand 2068a725e46SDavid Hildenbrand /* Resource name is permanently allocated if any hotremove fails. */ 2078a725e46SDavid Hildenbrand kmem_name = kstrdup_const("System RAM (kmem)", GFP_KERNEL); 2088a725e46SDavid Hildenbrand if (!kmem_name) 2098a725e46SDavid Hildenbrand return -ENOMEM; 2108a725e46SDavid Hildenbrand 2118a725e46SDavid Hildenbrand rc = dax_driver_register(&device_dax_kmem_driver); 2128a725e46SDavid Hildenbrand if (rc) 2138a725e46SDavid Hildenbrand kfree_const(kmem_name); 2148a725e46SDavid Hildenbrand return rc; 215c221c0b0SDave Hansen } 216c221c0b0SDave Hansen 217c221c0b0SDave Hansen static void __exit dax_kmem_exit(void) 218c221c0b0SDave Hansen { 219c221c0b0SDave Hansen dax_driver_unregister(&device_dax_kmem_driver); 2208a725e46SDavid Hildenbrand if (!any_hotremove_failed) 2218a725e46SDavid Hildenbrand kfree_const(kmem_name); 222c221c0b0SDave Hansen } 223c221c0b0SDave Hansen 224c221c0b0SDave Hansen MODULE_AUTHOR("Intel Corporation"); 225c221c0b0SDave Hansen MODULE_LICENSE("GPL v2"); 226c221c0b0SDave Hansen module_init(dax_kmem_init); 227c221c0b0SDave Hansen module_exit(dax_kmem_exit); 228c221c0b0SDave Hansen MODULE_ALIAS_DAX_DEVICE(0); 229