1c221c0b0SDave Hansen // SPDX-License-Identifier: GPL-2.0 2c221c0b0SDave Hansen /* Copyright(c) 2016-2019 Intel Corporation. All rights reserved. */ 3c221c0b0SDave Hansen #include <linux/memremap.h> 4c221c0b0SDave Hansen #include <linux/pagemap.h> 5c221c0b0SDave Hansen #include <linux/memory.h> 6c221c0b0SDave Hansen #include <linux/module.h> 7c221c0b0SDave Hansen #include <linux/device.h> 8c221c0b0SDave Hansen #include <linux/pfn_t.h> 9c221c0b0SDave Hansen #include <linux/slab.h> 10c221c0b0SDave Hansen #include <linux/dax.h> 11c221c0b0SDave Hansen #include <linux/fs.h> 12c221c0b0SDave Hansen #include <linux/mm.h> 13c221c0b0SDave Hansen #include <linux/mman.h> 14c221c0b0SDave Hansen #include "dax-private.h" 15c221c0b0SDave Hansen #include "bus.h" 16c221c0b0SDave Hansen 178a725e46SDavid Hildenbrand /* Memory resource name used for add_memory_driver_managed(). */ 188a725e46SDavid Hildenbrand static const char *kmem_name; 198a725e46SDavid Hildenbrand /* Set if any memory will remain added when the driver will be unloaded. */ 208a725e46SDavid Hildenbrand static bool any_hotremove_failed; 218a725e46SDavid Hildenbrand 2260e93dc0SDan Williams static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r) 2359bc8d10SDan Williams { 2460e93dc0SDan Williams struct dev_dax_range *dax_range = &dev_dax->ranges[i]; 2560e93dc0SDan Williams struct range *range = &dax_range->range; 2659bc8d10SDan Williams 2759bc8d10SDan Williams /* memory-block align the hotplug range */ 2860e93dc0SDan Williams r->start = ALIGN(range->start, memory_block_size_bytes()); 2960e93dc0SDan Williams r->end = ALIGN_DOWN(range->end + 1, memory_block_size_bytes()) - 1; 3060e93dc0SDan Williams if (r->start >= r->end) { 3160e93dc0SDan Williams r->start = range->start; 3260e93dc0SDan Williams r->end = range->end; 3360e93dc0SDan Williams return -ENOSPC; 3460e93dc0SDan Williams } 3560e93dc0SDan Williams return 0; 3659bc8d10SDan Williams } 3759bc8d10SDan Williams 38f11cf813SDan Williams static int dev_dax_kmem_probe(struct dev_dax *dev_dax) 39c221c0b0SDave Hansen { 40f11cf813SDan Williams struct device *dev = &dev_dax->dev; 4160e93dc0SDan Williams int i, mapped = 0; 427e6b431aSDan Williams char *res_name; 43c221c0b0SDave Hansen int numa_node; 44c221c0b0SDave Hansen 45c221c0b0SDave Hansen /* 46c221c0b0SDave Hansen * Ensure good NUMA information for the persistent memory. 47c221c0b0SDave Hansen * Without this check, there is a risk that slow memory 48c221c0b0SDave Hansen * could be mixed in a node with faster memory, causing 49c221c0b0SDave Hansen * unavoidable performance issues. 50c221c0b0SDave Hansen */ 51c221c0b0SDave Hansen numa_node = dev_dax->target_node; 52c221c0b0SDave Hansen if (numa_node < 0) { 53f5516ec5SDan Williams dev_warn(dev, "rejecting DAX region with invalid node: %d\n", 54f5516ec5SDan Williams numa_node); 55c221c0b0SDave Hansen return -EINVAL; 56c221c0b0SDave Hansen } 57c221c0b0SDave Hansen 587e6b431aSDan Williams res_name = kstrdup(dev_name(dev), GFP_KERNEL); 597e6b431aSDan Williams if (!res_name) 6060858c00SDavid Hildenbrand return -ENOMEM; 6160858c00SDavid Hildenbrand 6260e93dc0SDan Williams for (i = 0; i < dev_dax->nr_range; i++) { 6360e93dc0SDan Williams struct resource *res; 6460e93dc0SDan Williams struct range range; 6560e93dc0SDan Williams int rc; 6660e93dc0SDan Williams 6760e93dc0SDan Williams rc = dax_kmem_range(dev_dax, i, &range); 6860e93dc0SDan Williams if (rc) { 6960e93dc0SDan Williams dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n", 7060e93dc0SDan Williams i, range.start, range.end); 7160e93dc0SDan Williams continue; 7260e93dc0SDan Williams } 7360e93dc0SDan Williams 7460858c00SDavid Hildenbrand /* Region is permanently reserved if hotremove fails. */ 750513bd5bSDan Williams res = request_mem_region(range.start, range_len(&range), res_name); 760513bd5bSDan Williams if (!res) { 7760e93dc0SDan Williams dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve region\n", 7860e93dc0SDan Williams i, range.start, range.end); 7960e93dc0SDan Williams /* 8060e93dc0SDan Williams * Once some memory has been onlined we can't 8160e93dc0SDan Williams * assume that it can be un-onlined safely. 8260e93dc0SDan Williams */ 8360e93dc0SDan Williams if (mapped) 8460e93dc0SDan Williams continue; 857e6b431aSDan Williams kfree(res_name); 86c221c0b0SDave Hansen return -EBUSY; 87c221c0b0SDave Hansen } 88c221c0b0SDave Hansen 89c221c0b0SDave Hansen /* 90c221c0b0SDave Hansen * Set flags appropriate for System RAM. Leave ..._BUSY clear 91c221c0b0SDave Hansen * so that add_memory() can add a child resource. Do not 92c221c0b0SDave Hansen * inherit flags from the parent since it may set new flags 93c221c0b0SDave Hansen * unknown to us that will break add_memory() below. 94c221c0b0SDave Hansen */ 950513bd5bSDan Williams res->flags = IORESOURCE_SYSTEM_RAM; 96c221c0b0SDave Hansen 978a725e46SDavid Hildenbrand /* 9860e93dc0SDan Williams * Ensure that future kexec'd kernels will not treat 9960e93dc0SDan Williams * this as RAM automatically. 1008a725e46SDavid Hildenbrand */ 10160e93dc0SDan Williams rc = add_memory_driver_managed(numa_node, range.start, 10260e93dc0SDan Williams range_len(&range), kmem_name); 10360e93dc0SDan Williams 10431e4ca92SPavel Tatashin if (rc) { 10560e93dc0SDan Williams dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n", 10660e93dc0SDan Williams i, range.start, range.end); 1070513bd5bSDan Williams release_mem_region(range.start, range_len(&range)); 10860e93dc0SDan Williams if (mapped) 10960e93dc0SDan Williams continue; 1107e6b431aSDan Williams kfree(res_name); 111c221c0b0SDave Hansen return rc; 11231e4ca92SPavel Tatashin } 11360e93dc0SDan Williams mapped++; 11460e93dc0SDan Williams } 1157e6b431aSDan Williams 1167e6b431aSDan Williams dev_set_drvdata(dev, res_name); 117c221c0b0SDave Hansen 118c221c0b0SDave Hansen return 0; 119c221c0b0SDave Hansen } 120c221c0b0SDave Hansen 1219f960da7SPavel Tatashin #ifdef CONFIG_MEMORY_HOTREMOVE 122f11cf813SDan Williams static int dev_dax_kmem_remove(struct dev_dax *dev_dax) 1239f960da7SPavel Tatashin { 12460e93dc0SDan Williams int i, success = 0; 125f11cf813SDan Williams struct device *dev = &dev_dax->dev; 1267e6b431aSDan Williams const char *res_name = dev_get_drvdata(dev); 1279f960da7SPavel Tatashin 1289f960da7SPavel Tatashin /* 1299f960da7SPavel Tatashin * We have one shot for removing memory, if some memory blocks were not 1309f960da7SPavel Tatashin * offline prior to calling this function remove_memory() will fail, and 1319f960da7SPavel Tatashin * there is no way to hotremove this memory until reboot because device 1329f960da7SPavel Tatashin * unbind will succeed even if we return failure. 1339f960da7SPavel Tatashin */ 13460e93dc0SDan Williams for (i = 0; i < dev_dax->nr_range; i++) { 13560e93dc0SDan Williams struct range range; 13660e93dc0SDan Williams int rc; 13760e93dc0SDan Williams 13860e93dc0SDan Williams rc = dax_kmem_range(dev_dax, i, &range); 13960e93dc0SDan Williams if (rc) 14060e93dc0SDan Williams continue; 14160e93dc0SDan Williams 14260e93dc0SDan Williams rc = remove_memory(dev_dax->target_node, range.start, 14360e93dc0SDan Williams range_len(&range)); 14460e93dc0SDan Williams if (rc == 0) { 14560e93dc0SDan Williams release_mem_region(range.start, range_len(&range)); 14660e93dc0SDan Williams success++; 14760e93dc0SDan Williams continue; 14860e93dc0SDan Williams } 1498a725e46SDavid Hildenbrand any_hotremove_failed = true; 15060e93dc0SDan Williams dev_err(dev, 15160e93dc0SDan Williams "mapping%d: %#llx-%#llx cannot be hotremoved until the next reboot\n", 15260e93dc0SDan Williams i, range.start, range.end); 1539f960da7SPavel Tatashin } 1549f960da7SPavel Tatashin 15560e93dc0SDan Williams if (success >= dev_dax->nr_range) { 15660858c00SDavid Hildenbrand kfree(res_name); 15760e93dc0SDan Williams dev_set_drvdata(dev, NULL); 15860e93dc0SDan Williams } 1599f960da7SPavel Tatashin 1609f960da7SPavel Tatashin return 0; 1619f960da7SPavel Tatashin } 1629f960da7SPavel Tatashin #else 163f11cf813SDan Williams static int dev_dax_kmem_remove(struct dev_dax *dev_dax) 164c221c0b0SDave Hansen { 165c221c0b0SDave Hansen /* 1669f960da7SPavel Tatashin * Without hotremove purposely leak the request_mem_region() for the 1679f960da7SPavel Tatashin * device-dax range and return '0' to ->remove() attempts. The removal 1689f960da7SPavel Tatashin * of the device from the driver always succeeds, but the region is 1699f960da7SPavel Tatashin * permanently pinned as reserved by the unreleased 170c221c0b0SDave Hansen * request_mem_region(). 171c221c0b0SDave Hansen */ 1728a725e46SDavid Hildenbrand any_hotremove_failed = true; 173c221c0b0SDave Hansen return 0; 174c221c0b0SDave Hansen } 1759f960da7SPavel Tatashin #endif /* CONFIG_MEMORY_HOTREMOVE */ 176c221c0b0SDave Hansen 177c221c0b0SDave Hansen static struct dax_device_driver device_dax_kmem_driver = { 178c221c0b0SDave Hansen .probe = dev_dax_kmem_probe, 179c221c0b0SDave Hansen .remove = dev_dax_kmem_remove, 180c221c0b0SDave Hansen }; 181c221c0b0SDave Hansen 182c221c0b0SDave Hansen static int __init dax_kmem_init(void) 183c221c0b0SDave Hansen { 1848a725e46SDavid Hildenbrand int rc; 1858a725e46SDavid Hildenbrand 1868a725e46SDavid Hildenbrand /* Resource name is permanently allocated if any hotremove fails. */ 1878a725e46SDavid Hildenbrand kmem_name = kstrdup_const("System RAM (kmem)", GFP_KERNEL); 1888a725e46SDavid Hildenbrand if (!kmem_name) 1898a725e46SDavid Hildenbrand return -ENOMEM; 1908a725e46SDavid Hildenbrand 1918a725e46SDavid Hildenbrand rc = dax_driver_register(&device_dax_kmem_driver); 1928a725e46SDavid Hildenbrand if (rc) 1938a725e46SDavid Hildenbrand kfree_const(kmem_name); 1948a725e46SDavid Hildenbrand return rc; 195c221c0b0SDave Hansen } 196c221c0b0SDave Hansen 197c221c0b0SDave Hansen static void __exit dax_kmem_exit(void) 198c221c0b0SDave Hansen { 199c221c0b0SDave Hansen dax_driver_unregister(&device_dax_kmem_driver); 2008a725e46SDavid Hildenbrand if (!any_hotremove_failed) 2018a725e46SDavid Hildenbrand kfree_const(kmem_name); 202c221c0b0SDave Hansen } 203c221c0b0SDave Hansen 204c221c0b0SDave Hansen MODULE_AUTHOR("Intel Corporation"); 205c221c0b0SDave Hansen MODULE_LICENSE("GPL v2"); 206c221c0b0SDave Hansen module_init(dax_kmem_init); 207c221c0b0SDave Hansen module_exit(dax_kmem_exit); 208c221c0b0SDave Hansen MODULE_ALIAS_DAX_DEVICE(0); 209