12840d498SZhiyuan Dai // SPDX-License-Identifier: GPL-2.0
214c5cebaSChristoph Hellwig /* Copyright(c) 2015 Intel Corporation. All rights reserved. */
314c5cebaSChristoph Hellwig #include <linux/device.h>
414c5cebaSChristoph Hellwig #include <linux/io.h>
514c5cebaSChristoph Hellwig #include <linux/kasan.h>
614c5cebaSChristoph Hellwig #include <linux/memory_hotplug.h>
7dc90f084SChristoph Hellwig #include <linux/memremap.h>
814c5cebaSChristoph Hellwig #include <linux/pfn_t.h>
914c5cebaSChristoph Hellwig #include <linux/swap.h>
109ffc1d19SDan Williams #include <linux/mmzone.h>
1114c5cebaSChristoph Hellwig #include <linux/swapops.h>
1214c5cebaSChristoph Hellwig #include <linux/types.h>
1314c5cebaSChristoph Hellwig #include <linux/wait_bit.h>
1414c5cebaSChristoph Hellwig #include <linux/xarray.h>
1527674ef6SChristoph Hellwig #include "internal.h"
1614c5cebaSChristoph Hellwig
1714c5cebaSChristoph Hellwig static DEFINE_XARRAY(pgmap_array);
1814c5cebaSChristoph Hellwig
199ffc1d19SDan Williams /*
209ffc1d19SDan Williams * The memremap() and memremap_pages() interfaces are alternately used
219ffc1d19SDan Williams * to map persistent memory namespaces. These interfaces place different
229ffc1d19SDan Williams * constraints on the alignment and size of the mapping (namespace).
239ffc1d19SDan Williams * memremap() can map individual PAGE_SIZE pages. memremap_pages() can
249ffc1d19SDan Williams * only map subsections (2MB), and at least one architecture (PowerPC)
259ffc1d19SDan Williams * the minimum mapping granularity of memremap_pages() is 16MB.
269ffc1d19SDan Williams *
279ffc1d19SDan Williams * The role of memremap_compat_align() is to communicate the minimum
289ffc1d19SDan Williams * arch supported alignment of a namespace such that it can freely
299ffc1d19SDan Williams * switch modes without violating the arch constraint. Namely, do not
309ffc1d19SDan Williams * allow a namespace to be PAGE_SIZE aligned since that namespace may be
319ffc1d19SDan Williams * reconfigured into a mode that requires SUBSECTION_SIZE alignment.
329ffc1d19SDan Williams */
339ffc1d19SDan Williams #ifndef CONFIG_ARCH_HAS_MEMREMAP_COMPAT_ALIGN
memremap_compat_align(void)349ffc1d19SDan Williams unsigned long memremap_compat_align(void)
359ffc1d19SDan Williams {
369ffc1d19SDan Williams return SUBSECTION_SIZE;
379ffc1d19SDan Williams }
389ffc1d19SDan Williams EXPORT_SYMBOL_GPL(memremap_compat_align);
399ffc1d19SDan Williams #endif
409ffc1d19SDan Williams
4127674ef6SChristoph Hellwig #ifdef CONFIG_FS_DAX
4214c5cebaSChristoph Hellwig DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
4314c5cebaSChristoph Hellwig EXPORT_SYMBOL(devmap_managed_key);
4414c5cebaSChristoph Hellwig
devmap_managed_enable_put(struct dev_pagemap * pgmap)4546b1ee38SRalph Campbell static void devmap_managed_enable_put(struct dev_pagemap *pgmap)
4614c5cebaSChristoph Hellwig {
4727674ef6SChristoph Hellwig if (pgmap->type == MEMORY_DEVICE_FS_DAX)
48433e7d31SIra Weiny static_branch_dec(&devmap_managed_key);
4914c5cebaSChristoph Hellwig }
5014c5cebaSChristoph Hellwig
devmap_managed_enable_get(struct dev_pagemap * pgmap)5146b1ee38SRalph Campbell static void devmap_managed_enable_get(struct dev_pagemap *pgmap)
5214c5cebaSChristoph Hellwig {
5327674ef6SChristoph Hellwig if (pgmap->type == MEMORY_DEVICE_FS_DAX)
54433e7d31SIra Weiny static_branch_inc(&devmap_managed_key);
5514c5cebaSChristoph Hellwig }
5614c5cebaSChristoph Hellwig #else
devmap_managed_enable_get(struct dev_pagemap * pgmap)5746b1ee38SRalph Campbell static void devmap_managed_enable_get(struct dev_pagemap *pgmap)
5814c5cebaSChristoph Hellwig {
5914c5cebaSChristoph Hellwig }
devmap_managed_enable_put(struct dev_pagemap * pgmap)6046b1ee38SRalph Campbell static void devmap_managed_enable_put(struct dev_pagemap *pgmap)
61daa138a5SJason Gunthorpe {
62daa138a5SJason Gunthorpe }
6327674ef6SChristoph Hellwig #endif /* CONFIG_FS_DAX */
6414c5cebaSChristoph Hellwig
pgmap_array_delete(struct range * range)65a4574f63SDan Williams static void pgmap_array_delete(struct range *range)
6614c5cebaSChristoph Hellwig {
67a4574f63SDan Williams xa_store_range(&pgmap_array, PHYS_PFN(range->start), PHYS_PFN(range->end),
6814c5cebaSChristoph Hellwig NULL, GFP_KERNEL);
6914c5cebaSChristoph Hellwig synchronize_rcu();
7014c5cebaSChristoph Hellwig }
7114c5cebaSChristoph Hellwig
pfn_first(struct dev_pagemap * pgmap,int range_id)72b7b3c01bSDan Williams static unsigned long pfn_first(struct dev_pagemap *pgmap, int range_id)
7314c5cebaSChristoph Hellwig {
74b7b3c01bSDan Williams struct range *range = &pgmap->ranges[range_id];
75b7b3c01bSDan Williams unsigned long pfn = PHYS_PFN(range->start);
76b7b3c01bSDan Williams
77b7b3c01bSDan Williams if (range_id)
78b7b3c01bSDan Williams return pfn;
79b7b3c01bSDan Williams return pfn + vmem_altmap_offset(pgmap_altmap(pgmap));
8014c5cebaSChristoph Hellwig }
8114c5cebaSChristoph Hellwig
pgmap_pfn_valid(struct dev_pagemap * pgmap,unsigned long pfn)8234dc45beSDan Williams bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn)
8334dc45beSDan Williams {
8434dc45beSDan Williams int i;
8534dc45beSDan Williams
8634dc45beSDan Williams for (i = 0; i < pgmap->nr_range; i++) {
8734dc45beSDan Williams struct range *range = &pgmap->ranges[i];
8834dc45beSDan Williams
8934dc45beSDan Williams if (pfn >= PHYS_PFN(range->start) &&
9034dc45beSDan Williams pfn <= PHYS_PFN(range->end))
9134dc45beSDan Williams return pfn >= pfn_first(pgmap, i);
9234dc45beSDan Williams }
9334dc45beSDan Williams
9434dc45beSDan Williams return false;
9534dc45beSDan Williams }
9634dc45beSDan Williams
pfn_end(struct dev_pagemap * pgmap,int range_id)97b7b3c01bSDan Williams static unsigned long pfn_end(struct dev_pagemap *pgmap, int range_id)
9814c5cebaSChristoph Hellwig {
99b7b3c01bSDan Williams const struct range *range = &pgmap->ranges[range_id];
10014c5cebaSChristoph Hellwig
101a4574f63SDan Williams return (range->start + range_len(range)) >> PAGE_SHIFT;
10214c5cebaSChristoph Hellwig }
10314c5cebaSChristoph Hellwig
pfn_len(struct dev_pagemap * pgmap,unsigned long range_id)104c4386bd8SJoao Martins static unsigned long pfn_len(struct dev_pagemap *pgmap, unsigned long range_id)
105c4386bd8SJoao Martins {
106c4386bd8SJoao Martins return (pfn_end(pgmap, range_id) -
107c4386bd8SJoao Martins pfn_first(pgmap, range_id)) >> pgmap->vmemmap_shift;
10814c5cebaSChristoph Hellwig }
10914c5cebaSChristoph Hellwig
pageunmap_range(struct dev_pagemap * pgmap,int range_id)110b7b3c01bSDan Williams static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
11114c5cebaSChristoph Hellwig {
112b7b3c01bSDan Williams struct range *range = &pgmap->ranges[range_id];
11377e080e7SAneesh Kumar K.V struct page *first_page;
11414c5cebaSChristoph Hellwig
11577e080e7SAneesh Kumar K.V /* make sure to access a memmap that was actually initialized */
116b7b3c01bSDan Williams first_page = pfn_to_page(pfn_first(pgmap, range_id));
11777e080e7SAneesh Kumar K.V
11814c5cebaSChristoph Hellwig /* pages are dead and unused, undo the arch mapping */
11914c5cebaSChristoph Hellwig mem_hotplug_begin();
120a4574f63SDan Williams remove_pfn_range_from_zone(page_zone(first_page), PHYS_PFN(range->start),
121a4574f63SDan Williams PHYS_PFN(range_len(range)));
12214c5cebaSChristoph Hellwig if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
123a4574f63SDan Williams __remove_pages(PHYS_PFN(range->start),
124a4574f63SDan Williams PHYS_PFN(range_len(range)), NULL);
12514c5cebaSChristoph Hellwig } else {
12665a2aa5fSDavid Hildenbrand arch_remove_memory(range->start, range_len(range),
12714c5cebaSChristoph Hellwig pgmap_altmap(pgmap));
128a4574f63SDan Williams kasan_remove_zero_shadow(__va(range->start), range_len(range));
12914c5cebaSChristoph Hellwig }
13014c5cebaSChristoph Hellwig mem_hotplug_done();
13114c5cebaSChristoph Hellwig
13268f48381SSuren Baghdasaryan untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range), true);
133a4574f63SDan Williams pgmap_array_delete(range);
134b7b3c01bSDan Williams }
135b7b3c01bSDan Williams
memunmap_pages(struct dev_pagemap * pgmap)136b7b3c01bSDan Williams void memunmap_pages(struct dev_pagemap *pgmap)
137b7b3c01bSDan Williams {
138b7b3c01bSDan Williams int i;
139b7b3c01bSDan Williams
140b80892caSChristoph Hellwig percpu_ref_kill(&pgmap->ref);
1410dc45ca1SAlistair Popple if (pgmap->type != MEMORY_DEVICE_PRIVATE &&
1420dc45ca1SAlistair Popple pgmap->type != MEMORY_DEVICE_COHERENT)
143b7b3c01bSDan Williams for (i = 0; i < pgmap->nr_range; i++)
14427674ef6SChristoph Hellwig percpu_ref_put_many(&pgmap->ref, pfn_len(pgmap, i));
1450dc45ca1SAlistair Popple
146b80892caSChristoph Hellwig wait_for_completion(&pgmap->done);
147b7b3c01bSDan Williams
148b7b3c01bSDan Williams for (i = 0; i < pgmap->nr_range; i++)
149b7b3c01bSDan Williams pageunmap_range(pgmap, i);
1501e57ffb6SMiaohe Lin percpu_ref_exit(&pgmap->ref);
151b7b3c01bSDan Williams
152daa138a5SJason Gunthorpe WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n");
15346b1ee38SRalph Campbell devmap_managed_enable_put(pgmap);
154daa138a5SJason Gunthorpe }
155daa138a5SJason Gunthorpe EXPORT_SYMBOL_GPL(memunmap_pages);
156daa138a5SJason Gunthorpe
devm_memremap_pages_release(void * data)157daa138a5SJason Gunthorpe static void devm_memremap_pages_release(void *data)
158daa138a5SJason Gunthorpe {
159daa138a5SJason Gunthorpe memunmap_pages(data);
16014c5cebaSChristoph Hellwig }
16114c5cebaSChristoph Hellwig
dev_pagemap_percpu_release(struct percpu_ref * ref)16214c5cebaSChristoph Hellwig static void dev_pagemap_percpu_release(struct percpu_ref *ref)
16314c5cebaSChristoph Hellwig {
164b80892caSChristoph Hellwig struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref);
16514c5cebaSChristoph Hellwig
16614c5cebaSChristoph Hellwig complete(&pgmap->done);
16714c5cebaSChristoph Hellwig }
16814c5cebaSChristoph Hellwig
pagemap_range(struct dev_pagemap * pgmap,struct mhp_params * params,int range_id,int nid)169b7b3c01bSDan Williams static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
170b7b3c01bSDan Williams int range_id, int nid)
171b7b3c01bSDan Williams {
172bca3feaaSAnshuman Khandual const bool is_private = pgmap->type == MEMORY_DEVICE_PRIVATE;
173b7b3c01bSDan Williams struct range *range = &pgmap->ranges[range_id];
174b7b3c01bSDan Williams struct dev_pagemap *conflict_pgmap;
175b7b3c01bSDan Williams int error, is_ram;
176b7b3c01bSDan Williams
177b7b3c01bSDan Williams if (WARN_ONCE(pgmap_altmap(pgmap) && range_id > 0,
178b7b3c01bSDan Williams "altmap not supported for multiple ranges\n"))
179b7b3c01bSDan Williams return -EINVAL;
180b7b3c01bSDan Williams
181b7b3c01bSDan Williams conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->start), NULL);
182b7b3c01bSDan Williams if (conflict_pgmap) {
183b7b3c01bSDan Williams WARN(1, "Conflicting mapping in same section\n");
184b7b3c01bSDan Williams put_dev_pagemap(conflict_pgmap);
185b7b3c01bSDan Williams return -ENOMEM;
186b7b3c01bSDan Williams }
187b7b3c01bSDan Williams
188b7b3c01bSDan Williams conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->end), NULL);
189b7b3c01bSDan Williams if (conflict_pgmap) {
190b7b3c01bSDan Williams WARN(1, "Conflicting mapping in same section\n");
191b7b3c01bSDan Williams put_dev_pagemap(conflict_pgmap);
192b7b3c01bSDan Williams return -ENOMEM;
193b7b3c01bSDan Williams }
194b7b3c01bSDan Williams
195b7b3c01bSDan Williams is_ram = region_intersects(range->start, range_len(range),
196b7b3c01bSDan Williams IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
197b7b3c01bSDan Williams
198b7b3c01bSDan Williams if (is_ram != REGION_DISJOINT) {
199b7b3c01bSDan Williams WARN_ONCE(1, "attempted on %s region %#llx-%#llx\n",
200b7b3c01bSDan Williams is_ram == REGION_MIXED ? "mixed" : "ram",
201b7b3c01bSDan Williams range->start, range->end);
202b7b3c01bSDan Williams return -ENXIO;
203b7b3c01bSDan Williams }
204b7b3c01bSDan Williams
205b7b3c01bSDan Williams error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(range->start),
206b7b3c01bSDan Williams PHYS_PFN(range->end), pgmap, GFP_KERNEL));
207b7b3c01bSDan Williams if (error)
208b7b3c01bSDan Williams return error;
209b7b3c01bSDan Williams
210b7b3c01bSDan Williams if (nid < 0)
211b7b3c01bSDan Williams nid = numa_mem_id();
212b7b3c01bSDan Williams
213b7b3c01bSDan Williams error = track_pfn_remap(NULL, ¶ms->pgprot, PHYS_PFN(range->start), 0,
214b7b3c01bSDan Williams range_len(range));
215b7b3c01bSDan Williams if (error)
216b7b3c01bSDan Williams goto err_pfn_remap;
217b7b3c01bSDan Williams
218bca3feaaSAnshuman Khandual if (!mhp_range_allowed(range->start, range_len(range), !is_private)) {
219bca3feaaSAnshuman Khandual error = -EINVAL;
220a04e1928SMiaohe Lin goto err_kasan;
221bca3feaaSAnshuman Khandual }
222bca3feaaSAnshuman Khandual
223b7b3c01bSDan Williams mem_hotplug_begin();
224b7b3c01bSDan Williams
225b7b3c01bSDan Williams /*
226b7b3c01bSDan Williams * For device private memory we call add_pages() as we only need to
227b7b3c01bSDan Williams * allocate and initialize struct page for the device memory. More-
228b7b3c01bSDan Williams * over the device memory is un-accessible thus we do not want to
229b7b3c01bSDan Williams * create a linear mapping for the memory like arch_add_memory()
230b7b3c01bSDan Williams * would do.
231b7b3c01bSDan Williams *
232b7b3c01bSDan Williams * For all other device memory types, which are accessible by
233b7b3c01bSDan Williams * the CPU, we do want the linear mapping and thus use
234b7b3c01bSDan Williams * arch_add_memory().
235b7b3c01bSDan Williams */
236bca3feaaSAnshuman Khandual if (is_private) {
237b7b3c01bSDan Williams error = add_pages(nid, PHYS_PFN(range->start),
238b7b3c01bSDan Williams PHYS_PFN(range_len(range)), params);
239b7b3c01bSDan Williams } else {
240b7b3c01bSDan Williams error = kasan_add_zero_shadow(__va(range->start), range_len(range));
241b7b3c01bSDan Williams if (error) {
242b7b3c01bSDan Williams mem_hotplug_done();
243b7b3c01bSDan Williams goto err_kasan;
244b7b3c01bSDan Williams }
245b7b3c01bSDan Williams
246b7b3c01bSDan Williams error = arch_add_memory(nid, range->start, range_len(range),
247b7b3c01bSDan Williams params);
248b7b3c01bSDan Williams }
249b7b3c01bSDan Williams
250b7b3c01bSDan Williams if (!error) {
251b7b3c01bSDan Williams struct zone *zone;
252b7b3c01bSDan Williams
253b7b3c01bSDan Williams zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
254b7b3c01bSDan Williams move_pfn_range_to_zone(zone, PHYS_PFN(range->start),
255d882c006SDavid Hildenbrand PHYS_PFN(range_len(range)), params->altmap,
256d882c006SDavid Hildenbrand MIGRATE_MOVABLE);
257b7b3c01bSDan Williams }
258b7b3c01bSDan Williams
259b7b3c01bSDan Williams mem_hotplug_done();
260b7b3c01bSDan Williams if (error)
261b7b3c01bSDan Williams goto err_add_memory;
262b7b3c01bSDan Williams
263b7b3c01bSDan Williams /*
264b7b3c01bSDan Williams * Initialization of the pages has been deferred until now in order
265b7b3c01bSDan Williams * to allow us to do the work while not holding the hotplug lock.
266b7b3c01bSDan Williams */
267b7b3c01bSDan Williams memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
268b7b3c01bSDan Williams PHYS_PFN(range->start),
269b7b3c01bSDan Williams PHYS_PFN(range_len(range)), pgmap);
2700dc45ca1SAlistair Popple if (pgmap->type != MEMORY_DEVICE_PRIVATE &&
2710dc45ca1SAlistair Popple pgmap->type != MEMORY_DEVICE_COHERENT)
272f56caedaSLinus Torvalds percpu_ref_get_many(&pgmap->ref, pfn_len(pgmap, range_id));
273b7b3c01bSDan Williams return 0;
274b7b3c01bSDan Williams
275b7b3c01bSDan Williams err_add_memory:
276a74c6c00SMiaohe Lin if (!is_private)
277b7b3c01bSDan Williams kasan_remove_zero_shadow(__va(range->start), range_len(range));
278b7b3c01bSDan Williams err_kasan:
27968f48381SSuren Baghdasaryan untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range), true);
280b7b3c01bSDan Williams err_pfn_remap:
281b7b3c01bSDan Williams pgmap_array_delete(range);
282b7b3c01bSDan Williams return error;
283b7b3c01bSDan Williams }
284b7b3c01bSDan Williams
285b7b3c01bSDan Williams
286daa138a5SJason Gunthorpe /*
28723689037SMiaohe Lin * Not device managed version of devm_memremap_pages, undone by
28823689037SMiaohe Lin * memunmap_pages(). Please use devm_memremap_pages if you have a struct
289daa138a5SJason Gunthorpe * device available.
29014c5cebaSChristoph Hellwig */
memremap_pages(struct dev_pagemap * pgmap,int nid)291daa138a5SJason Gunthorpe void *memremap_pages(struct dev_pagemap *pgmap, int nid)
29214c5cebaSChristoph Hellwig {
293f5637d3bSLogan Gunthorpe struct mhp_params params = {
29414c5cebaSChristoph Hellwig .altmap = pgmap_altmap(pgmap),
2954917f55bSJoao Martins .pgmap = pgmap,
296bfeb022fSLogan Gunthorpe .pgprot = PAGE_KERNEL,
29714c5cebaSChristoph Hellwig };
298b7b3c01bSDan Williams const int nr_range = pgmap->nr_range;
299b7b3c01bSDan Williams int error, i;
300b7b3c01bSDan Williams
301b7b3c01bSDan Williams if (WARN_ONCE(!nr_range, "nr_range must be specified\n"))
302b7b3c01bSDan Williams return ERR_PTR(-EINVAL);
30314c5cebaSChristoph Hellwig
30414c5cebaSChristoph Hellwig switch (pgmap->type) {
30514c5cebaSChristoph Hellwig case MEMORY_DEVICE_PRIVATE:
30614c5cebaSChristoph Hellwig if (!IS_ENABLED(CONFIG_DEVICE_PRIVATE)) {
30714c5cebaSChristoph Hellwig WARN(1, "Device private memory not supported\n");
30814c5cebaSChristoph Hellwig return ERR_PTR(-EINVAL);
30914c5cebaSChristoph Hellwig }
31014c5cebaSChristoph Hellwig if (!pgmap->ops || !pgmap->ops->migrate_to_ram) {
31114c5cebaSChristoph Hellwig WARN(1, "Missing migrate_to_ram method\n");
31214c5cebaSChristoph Hellwig return ERR_PTR(-EINVAL);
31314c5cebaSChristoph Hellwig }
31446b1ee38SRalph Campbell if (!pgmap->ops->page_free) {
31546b1ee38SRalph Campbell WARN(1, "Missing page_free method\n");
31646b1ee38SRalph Campbell return ERR_PTR(-EINVAL);
31746b1ee38SRalph Campbell }
318f894ddd5SChristoph Hellwig if (!pgmap->owner) {
319f894ddd5SChristoph Hellwig WARN(1, "Missing owner\n");
320f894ddd5SChristoph Hellwig return ERR_PTR(-EINVAL);
321f894ddd5SChristoph Hellwig }
32214c5cebaSChristoph Hellwig break;
323f25cbb7aSAlex Sierra case MEMORY_DEVICE_COHERENT:
324f25cbb7aSAlex Sierra if (!pgmap->ops->page_free) {
325f25cbb7aSAlex Sierra WARN(1, "Missing page_free method\n");
326f25cbb7aSAlex Sierra return ERR_PTR(-EINVAL);
327f25cbb7aSAlex Sierra }
328f25cbb7aSAlex Sierra if (!pgmap->owner) {
329f25cbb7aSAlex Sierra WARN(1, "Missing owner\n");
330f25cbb7aSAlex Sierra return ERR_PTR(-EINVAL);
331f25cbb7aSAlex Sierra }
332f25cbb7aSAlex Sierra break;
33314c5cebaSChristoph Hellwig case MEMORY_DEVICE_FS_DAX:
334be8a80b3SChristoph Hellwig if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) {
33514c5cebaSChristoph Hellwig WARN(1, "File system DAX not supported\n");
33614c5cebaSChristoph Hellwig return ERR_PTR(-EINVAL);
33714c5cebaSChristoph Hellwig }
338867400afSPankaj Gupta params.pgprot = pgprot_decrypted(params.pgprot);
33914c5cebaSChristoph Hellwig break;
3404533d3aeSRoger Pau Monne case MEMORY_DEVICE_GENERIC:
341a50d8d98SLogan Gunthorpe break;
34214c5cebaSChristoph Hellwig case MEMORY_DEVICE_PCI_P2PDMA:
343a50d8d98SLogan Gunthorpe params.pgprot = pgprot_noncached(params.pgprot);
34414c5cebaSChristoph Hellwig break;
34514c5cebaSChristoph Hellwig default:
34614c5cebaSChristoph Hellwig WARN(1, "Invalid pgmap type %d\n", pgmap->type);
34714c5cebaSChristoph Hellwig break;
34814c5cebaSChristoph Hellwig }
34914c5cebaSChristoph Hellwig
35014c5cebaSChristoph Hellwig init_completion(&pgmap->done);
351b80892caSChristoph Hellwig error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0,
352b80892caSChristoph Hellwig GFP_KERNEL);
35314c5cebaSChristoph Hellwig if (error)
35414c5cebaSChristoph Hellwig return ERR_PTR(error);
35514c5cebaSChristoph Hellwig
35646b1ee38SRalph Campbell devmap_managed_enable_get(pgmap);
35714c5cebaSChristoph Hellwig
35814c5cebaSChristoph Hellwig /*
359b7b3c01bSDan Williams * Clear the pgmap nr_range as it will be incremented for each
360b7b3c01bSDan Williams * successfully processed range. This communicates how many
361b7b3c01bSDan Williams * regions to unwind in the abort case.
36214c5cebaSChristoph Hellwig */
363b7b3c01bSDan Williams pgmap->nr_range = 0;
364b7b3c01bSDan Williams error = 0;
365b7b3c01bSDan Williams for (i = 0; i < nr_range; i++) {
366b7b3c01bSDan Williams error = pagemap_range(pgmap, ¶ms, i, nid);
36714c5cebaSChristoph Hellwig if (error)
368b7b3c01bSDan Williams break;
369b7b3c01bSDan Williams pgmap->nr_range++;
370b7b3c01bSDan Williams }
37114c5cebaSChristoph Hellwig
372b7b3c01bSDan Williams if (i < nr_range) {
373b7b3c01bSDan Williams memunmap_pages(pgmap);
374b7b3c01bSDan Williams pgmap->nr_range = nr_range;
37514c5cebaSChristoph Hellwig return ERR_PTR(error);
37614c5cebaSChristoph Hellwig }
377b7b3c01bSDan Williams
378b7b3c01bSDan Williams return __va(pgmap->ranges[0].start);
379b7b3c01bSDan Williams }
380daa138a5SJason Gunthorpe EXPORT_SYMBOL_GPL(memremap_pages);
381daa138a5SJason Gunthorpe
382daa138a5SJason Gunthorpe /**
383daa138a5SJason Gunthorpe * devm_memremap_pages - remap and provide memmap backing for the given resource
384daa138a5SJason Gunthorpe * @dev: hosting device for @res
385daa138a5SJason Gunthorpe * @pgmap: pointer to a struct dev_pagemap
386daa138a5SJason Gunthorpe *
387daa138a5SJason Gunthorpe * Notes:
388*223ec6abSLi Zhijian * 1/ At a minimum the range and type members of @pgmap must be initialized
389daa138a5SJason Gunthorpe * by the caller before passing it to this function
390daa138a5SJason Gunthorpe *
391daa138a5SJason Gunthorpe * 2/ The altmap field may optionally be initialized, in which case
392daa138a5SJason Gunthorpe * PGMAP_ALTMAP_VALID must be set in pgmap->flags.
393daa138a5SJason Gunthorpe *
394daa138a5SJason Gunthorpe * 3/ The ref field may optionally be provided, in which pgmap->ref must be
395daa138a5SJason Gunthorpe * 'live' on entry and will be killed and reaped at
396daa138a5SJason Gunthorpe * devm_memremap_pages_release() time, or if this routine fails.
397daa138a5SJason Gunthorpe *
398a4574f63SDan Williams * 4/ range is expected to be a host memory range that could feasibly be
399daa138a5SJason Gunthorpe * treated as a "System RAM" range, i.e. not a device mmio range, but
400daa138a5SJason Gunthorpe * this is not enforced.
401daa138a5SJason Gunthorpe */
devm_memremap_pages(struct device * dev,struct dev_pagemap * pgmap)402daa138a5SJason Gunthorpe void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
403daa138a5SJason Gunthorpe {
404daa138a5SJason Gunthorpe int error;
405daa138a5SJason Gunthorpe void *ret;
406daa138a5SJason Gunthorpe
407daa138a5SJason Gunthorpe ret = memremap_pages(pgmap, dev_to_node(dev));
408daa138a5SJason Gunthorpe if (IS_ERR(ret))
409daa138a5SJason Gunthorpe return ret;
410daa138a5SJason Gunthorpe
411daa138a5SJason Gunthorpe error = devm_add_action_or_reset(dev, devm_memremap_pages_release,
412daa138a5SJason Gunthorpe pgmap);
413daa138a5SJason Gunthorpe if (error)
414daa138a5SJason Gunthorpe return ERR_PTR(error);
415daa138a5SJason Gunthorpe return ret;
416daa138a5SJason Gunthorpe }
41714c5cebaSChristoph Hellwig EXPORT_SYMBOL_GPL(devm_memremap_pages);
41814c5cebaSChristoph Hellwig
devm_memunmap_pages(struct device * dev,struct dev_pagemap * pgmap)41914c5cebaSChristoph Hellwig void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap)
42014c5cebaSChristoph Hellwig {
42114c5cebaSChristoph Hellwig devm_release_action(dev, devm_memremap_pages_release, pgmap);
42214c5cebaSChristoph Hellwig }
42314c5cebaSChristoph Hellwig EXPORT_SYMBOL_GPL(devm_memunmap_pages);
42414c5cebaSChristoph Hellwig
vmem_altmap_offset(struct vmem_altmap * altmap)42514c5cebaSChristoph Hellwig unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
42614c5cebaSChristoph Hellwig {
42714c5cebaSChristoph Hellwig /* number of pfns from base where pfn_to_page() is valid */
42814c5cebaSChristoph Hellwig if (altmap)
42914c5cebaSChristoph Hellwig return altmap->reserve + altmap->free;
43014c5cebaSChristoph Hellwig return 0;
43114c5cebaSChristoph Hellwig }
43214c5cebaSChristoph Hellwig
vmem_altmap_free(struct vmem_altmap * altmap,unsigned long nr_pfns)43314c5cebaSChristoph Hellwig void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
43414c5cebaSChristoph Hellwig {
43514c5cebaSChristoph Hellwig altmap->alloc -= nr_pfns;
43614c5cebaSChristoph Hellwig }
43714c5cebaSChristoph Hellwig
43814c5cebaSChristoph Hellwig /**
43914c5cebaSChristoph Hellwig * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
44014c5cebaSChristoph Hellwig * @pfn: page frame number to lookup page_map
44114c5cebaSChristoph Hellwig * @pgmap: optional known pgmap that already has a reference
44214c5cebaSChristoph Hellwig *
44314c5cebaSChristoph Hellwig * If @pgmap is non-NULL and covers @pfn it will be returned as-is. If @pgmap
44414c5cebaSChristoph Hellwig * is non-NULL but does not cover @pfn the reference to it will be released.
44514c5cebaSChristoph Hellwig */
get_dev_pagemap(unsigned long pfn,struct dev_pagemap * pgmap)44614c5cebaSChristoph Hellwig struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
44714c5cebaSChristoph Hellwig struct dev_pagemap *pgmap)
44814c5cebaSChristoph Hellwig {
44914c5cebaSChristoph Hellwig resource_size_t phys = PFN_PHYS(pfn);
45014c5cebaSChristoph Hellwig
45114c5cebaSChristoph Hellwig /*
45214c5cebaSChristoph Hellwig * In the cached case we're already holding a live reference.
45314c5cebaSChristoph Hellwig */
45414c5cebaSChristoph Hellwig if (pgmap) {
455a4574f63SDan Williams if (phys >= pgmap->range.start && phys <= pgmap->range.end)
45614c5cebaSChristoph Hellwig return pgmap;
45714c5cebaSChristoph Hellwig put_dev_pagemap(pgmap);
45814c5cebaSChristoph Hellwig }
45914c5cebaSChristoph Hellwig
46014c5cebaSChristoph Hellwig /* fall back to slow path lookup */
46114c5cebaSChristoph Hellwig rcu_read_lock();
46214c5cebaSChristoph Hellwig pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
463e7b72c48SChristophe JAILLET if (pgmap && !percpu_ref_tryget_live_rcu(&pgmap->ref))
46414c5cebaSChristoph Hellwig pgmap = NULL;
46514c5cebaSChristoph Hellwig rcu_read_unlock();
46614c5cebaSChristoph Hellwig
46714c5cebaSChristoph Hellwig return pgmap;
46814c5cebaSChristoph Hellwig }
46914c5cebaSChristoph Hellwig EXPORT_SYMBOL_GPL(get_dev_pagemap);
47014c5cebaSChristoph Hellwig
free_zone_device_page(struct page * page)47127674ef6SChristoph Hellwig void free_zone_device_page(struct page *page)
47214c5cebaSChristoph Hellwig {
4735cbf9942SChristoph Hellwig if (WARN_ON_ONCE(!page->pgmap->ops || !page->pgmap->ops->page_free))
474429589d6SDan Williams return;
475429589d6SDan Williams
476bbc6b703SMatthew Wilcox (Oracle) mem_cgroup_uncharge(page_folio(page));
47714c5cebaSChristoph Hellwig
4787ab0ad0eSRalph Campbell /*
47978fbe906SDavid Hildenbrand * Note: we don't expect anonymous compound pages yet. Once supported
48078fbe906SDavid Hildenbrand * and we could PTE-map them similar to THP, we'd have to clear
48178fbe906SDavid Hildenbrand * PG_anon_exclusive on all tail pages.
48278fbe906SDavid Hildenbrand */
48378fbe906SDavid Hildenbrand VM_BUG_ON_PAGE(PageAnon(page) && PageCompound(page), page);
48478fbe906SDavid Hildenbrand if (PageAnon(page))
48578fbe906SDavid Hildenbrand __ClearPageAnonExclusive(page);
48678fbe906SDavid Hildenbrand
48778fbe906SDavid Hildenbrand /*
4885cbf9942SChristoph Hellwig * When a device managed page is freed, the page->mapping field
4897ab0ad0eSRalph Campbell * may still contain a (stale) mapping value. For example, the
490429589d6SDan Williams * lower bits of page->mapping may still identify the page as an
491429589d6SDan Williams * anonymous page. Ultimately, this entire field is just stale
492429589d6SDan Williams * and wrong, and it will cause errors if not cleared. One
493429589d6SDan Williams * example is:
4947ab0ad0eSRalph Campbell *
4957ab0ad0eSRalph Campbell * migrate_vma_pages()
4967ab0ad0eSRalph Campbell * migrate_vma_insert_page()
4977ab0ad0eSRalph Campbell * page_add_new_anon_rmap()
4987ab0ad0eSRalph Campbell * __page_set_anon_rmap()
4997ab0ad0eSRalph Campbell * ...checks page->mapping, via PageAnon(page) call,
5007ab0ad0eSRalph Campbell * and incorrectly concludes that the page is an
5017ab0ad0eSRalph Campbell * anonymous page. Therefore, it incorrectly,
5027ab0ad0eSRalph Campbell * silently fails to set up the new anon rmap.
5037ab0ad0eSRalph Campbell *
5047ab0ad0eSRalph Campbell * For other types of ZONE_DEVICE pages, migration is either
5057ab0ad0eSRalph Campbell * handled differently or not done at all, so there is no need
5067ab0ad0eSRalph Campbell * to clear page->mapping.
5077ab0ad0eSRalph Campbell */
5087ab0ad0eSRalph Campbell page->mapping = NULL;
50914c5cebaSChristoph Hellwig page->pgmap->ops->page_free(page);
51027674ef6SChristoph Hellwig
511ef233450SAlistair Popple if (page->pgmap->type != MEMORY_DEVICE_PRIVATE &&
512ef233450SAlistair Popple page->pgmap->type != MEMORY_DEVICE_COHERENT)
5130dc45ca1SAlistair Popple /*
5140dc45ca1SAlistair Popple * Reset the page count to 1 to prepare for handing out the page
5150dc45ca1SAlistair Popple * again.
5160dc45ca1SAlistair Popple */
51727674ef6SChristoph Hellwig set_page_count(page, 1);
5180dc45ca1SAlistair Popple else
5190dc45ca1SAlistair Popple put_dev_pagemap(page->pgmap);
52014c5cebaSChristoph Hellwig }
52175e55d8aSChristoph Hellwig
zone_device_page_init(struct page * page)522ef233450SAlistair Popple void zone_device_page_init(struct page *page)
523ef233450SAlistair Popple {
5240dc45ca1SAlistair Popple /*
5250dc45ca1SAlistair Popple * Drivers shouldn't be allocating pages after calling
5260dc45ca1SAlistair Popple * memunmap_pages().
5270dc45ca1SAlistair Popple */
5280dc45ca1SAlistair Popple WARN_ON_ONCE(!percpu_ref_tryget_live(&page->pgmap->ref));
529ef233450SAlistair Popple set_page_count(page, 1);
530ef233450SAlistair Popple lock_page(page);
531ef233450SAlistair Popple }
532ef233450SAlistair Popple EXPORT_SYMBOL_GPL(zone_device_page_init);
533ef233450SAlistair Popple
53427674ef6SChristoph Hellwig #ifdef CONFIG_FS_DAX
__put_devmap_managed_page_refs(struct page * page,int refs)535f4f451a1SMuchun Song bool __put_devmap_managed_page_refs(struct page *page, int refs)
53675e55d8aSChristoph Hellwig {
53727674ef6SChristoph Hellwig if (page->pgmap->type != MEMORY_DEVICE_FS_DAX)
538dc90f084SChristoph Hellwig return false;
539dc90f084SChristoph Hellwig
54075e55d8aSChristoph Hellwig /*
54127674ef6SChristoph Hellwig * fsdax page refcounts are 1-based, rather than 0-based: if
54275e55d8aSChristoph Hellwig * refcount is 1, then the page is free and the refcount is
54375e55d8aSChristoph Hellwig * stable because nobody holds a reference on the page.
54475e55d8aSChristoph Hellwig */
545f4f451a1SMuchun Song if (page_ref_sub_return(page, refs) == 1)
54627674ef6SChristoph Hellwig wake_up_var(&page->_refcount);
54789574945SChristoph Hellwig return true;
54889574945SChristoph Hellwig }
549f4f451a1SMuchun Song EXPORT_SYMBOL(__put_devmap_managed_page_refs);
55027674ef6SChristoph Hellwig #endif /* CONFIG_FS_DAX */
551