xref: /openbmc/linux/arch/x86/mm/ioremap.c (revision de5a44f351ca7efd9add9851b218f5353e2224b7)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Re-map IO memory to kernel address space so that we can access it.
4  * This is needed for high PCI addresses that aren't mapped in the
5  * 640k-1MB IO memory area on PC's
6  *
7  * (C) Copyright 1995 1996 Linus Torvalds
8  */
9 
10 #include <linux/memblock.h>
11 #include <linux/init.h>
12 #include <linux/io.h>
13 #include <linux/ioport.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 #include <linux/mmiotrace.h>
17 #include <linux/cc_platform.h>
18 #include <linux/efi.h>
19 #include <linux/pgtable.h>
20 #include <linux/kmsan.h>
21 
22 #include <asm/set_memory.h>
23 #include <asm/e820/api.h>
24 #include <asm/efi.h>
25 #include <asm/fixmap.h>
26 #include <asm/tlbflush.h>
27 #include <asm/pgalloc.h>
28 #include <asm/memtype.h>
29 #include <asm/setup.h>
30 
31 #include "physaddr.h"
32 
33 /*
34  * Descriptor controlling ioremap() behavior.
35  */
36 struct ioremap_desc {
37 	unsigned int flags;
38 };
39 
40 /*
41  * Fix up the linear direct mapping of the kernel to avoid cache attribute
42  * conflicts.
43  */
44 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
45 			enum page_cache_mode pcm)
46 {
47 	unsigned long nrpages = size >> PAGE_SHIFT;
48 	int err;
49 
50 	switch (pcm) {
51 	case _PAGE_CACHE_MODE_UC:
52 	default:
53 		err = _set_memory_uc(vaddr, nrpages);
54 		break;
55 	case _PAGE_CACHE_MODE_WC:
56 		err = _set_memory_wc(vaddr, nrpages);
57 		break;
58 	case _PAGE_CACHE_MODE_WT:
59 		err = _set_memory_wt(vaddr, nrpages);
60 		break;
61 	case _PAGE_CACHE_MODE_WB:
62 		err = _set_memory_wb(vaddr, nrpages);
63 		break;
64 	}
65 
66 	return err;
67 }
68 
69 /* Does the range (or a subset of) contain normal RAM? */
70 static unsigned int __ioremap_check_ram(struct resource *res)
71 {
72 	unsigned long start_pfn, stop_pfn;
73 	unsigned long i;
74 
75 	if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM)
76 		return 0;
77 
78 	start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT;
79 	stop_pfn = (res->end + 1) >> PAGE_SHIFT;
80 	if (stop_pfn > start_pfn) {
81 		for (i = 0; i < (stop_pfn - start_pfn); ++i)
82 			if (pfn_valid(start_pfn + i) &&
83 			    !PageReserved(pfn_to_page(start_pfn + i)))
84 				return IORES_MAP_SYSTEM_RAM;
85 	}
86 
87 	return 0;
88 }
89 
90 /*
91  * In a SEV guest, NONE and RESERVED should not be mapped encrypted because
92  * there the whole memory is already encrypted.
93  */
94 static unsigned int __ioremap_check_encrypted(struct resource *res)
95 {
96 	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
97 		return 0;
98 
99 	switch (res->desc) {
100 	case IORES_DESC_NONE:
101 	case IORES_DESC_RESERVED:
102 		break;
103 	default:
104 		return IORES_MAP_ENCRYPTED;
105 	}
106 
107 	return 0;
108 }
109 
110 /*
111  * The EFI runtime services data area is not covered by walk_mem_res(), but must
112  * be mapped encrypted when SEV is active.
113  */
114 static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc)
115 {
116 	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
117 		return;
118 
119 	if (x86_platform.hyper.is_private_mmio(addr)) {
120 		desc->flags |= IORES_MAP_ENCRYPTED;
121 		return;
122 	}
123 
124 	if (!IS_ENABLED(CONFIG_EFI))
125 		return;
126 
127 	if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA ||
128 	    (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA &&
129 	     efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME))
130 		desc->flags |= IORES_MAP_ENCRYPTED;
131 }
132 
133 static int __ioremap_collect_map_flags(struct resource *res, void *arg)
134 {
135 	struct ioremap_desc *desc = arg;
136 
137 	if (!(desc->flags & IORES_MAP_SYSTEM_RAM))
138 		desc->flags |= __ioremap_check_ram(res);
139 
140 	if (!(desc->flags & IORES_MAP_ENCRYPTED))
141 		desc->flags |= __ioremap_check_encrypted(res);
142 
143 	return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) ==
144 			       (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED));
145 }
146 
147 /*
148  * To avoid multiple resource walks, this function walks resources marked as
149  * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a
150  * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
151  *
152  * After that, deal with misc other ranges in __ioremap_check_other() which do
153  * not fall into the above category.
154  */
155 static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
156 				struct ioremap_desc *desc)
157 {
158 	u64 start, end;
159 
160 	start = (u64)addr;
161 	end = start + size - 1;
162 	memset(desc, 0, sizeof(struct ioremap_desc));
163 
164 	walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
165 
166 	__ioremap_check_other(addr, desc);
167 }
168 
169 /*
170  * Remap an arbitrary physical address space into the kernel virtual
171  * address space. It transparently creates kernel huge I/O mapping when
172  * the physical address is aligned by a huge page size (1GB or 2MB) and
173  * the requested size is at least the huge page size.
174  *
175  * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
176  * Therefore, the mapping code falls back to use a smaller page toward 4KB
177  * when a mapping range is covered by non-WB type of MTRRs.
178  *
179  * NOTE! We need to allow non-page-aligned mappings too: we will obviously
180  * have to convert them into an offset in a page-aligned mapping, but the
181  * caller shouldn't need to know that small detail.
182  */
183 static void __iomem *
184 __ioremap_caller(resource_size_t phys_addr, unsigned long size,
185 		 enum page_cache_mode pcm, void *caller, bool encrypted)
186 {
187 	unsigned long offset, vaddr;
188 	resource_size_t last_addr;
189 	const resource_size_t unaligned_phys_addr = phys_addr;
190 	const unsigned long unaligned_size = size;
191 	struct ioremap_desc io_desc;
192 	struct vm_struct *area;
193 	enum page_cache_mode new_pcm;
194 	pgprot_t prot;
195 	int retval;
196 	void __iomem *ret_addr;
197 
198 	/* Don't allow wraparound or zero size */
199 	last_addr = phys_addr + size - 1;
200 	if (!size || last_addr < phys_addr)
201 		return NULL;
202 
203 	if (!phys_addr_valid(phys_addr)) {
204 		printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
205 		       (unsigned long long)phys_addr);
206 		WARN_ON_ONCE(1);
207 		return NULL;
208 	}
209 
210 	__ioremap_check_mem(phys_addr, size, &io_desc);
211 
212 	/*
213 	 * Don't allow anybody to remap normal RAM that we're using..
214 	 */
215 	if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
216 		WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
217 			  &phys_addr, &last_addr);
218 		return NULL;
219 	}
220 
221 	/*
222 	 * Mappings have to be page-aligned
223 	 */
224 	offset = phys_addr & ~PAGE_MASK;
225 	phys_addr &= PAGE_MASK;
226 	size = PAGE_ALIGN(last_addr+1) - phys_addr;
227 
228 	/*
229 	 * Mask out any bits not part of the actual physical
230 	 * address, like memory encryption bits.
231 	 */
232 	phys_addr &= PHYSICAL_PAGE_MASK;
233 
234 	retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
235 						pcm, &new_pcm);
236 	if (retval) {
237 		printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval);
238 		return NULL;
239 	}
240 
241 	if (pcm != new_pcm) {
242 		if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
243 			printk(KERN_ERR
244 		"ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
245 				(unsigned long long)phys_addr,
246 				(unsigned long long)(phys_addr + size),
247 				pcm, new_pcm);
248 			goto err_free_memtype;
249 		}
250 		pcm = new_pcm;
251 	}
252 
253 	/*
254 	 * If the page being mapped is in memory and SEV is active then
255 	 * make sure the memory encryption attribute is enabled in the
256 	 * resulting mapping.
257 	 * In TDX guests, memory is marked private by default. If encryption
258 	 * is not requested (using encrypted), explicitly set decrypt
259 	 * attribute in all IOREMAPPED memory.
260 	 */
261 	prot = PAGE_KERNEL_IO;
262 	if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
263 		prot = pgprot_encrypted(prot);
264 	else
265 		prot = pgprot_decrypted(prot);
266 
267 	switch (pcm) {
268 	case _PAGE_CACHE_MODE_UC:
269 	default:
270 		prot = __pgprot(pgprot_val(prot) |
271 				cachemode2protval(_PAGE_CACHE_MODE_UC));
272 		break;
273 	case _PAGE_CACHE_MODE_UC_MINUS:
274 		prot = __pgprot(pgprot_val(prot) |
275 				cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
276 		break;
277 	case _PAGE_CACHE_MODE_WC:
278 		prot = __pgprot(pgprot_val(prot) |
279 				cachemode2protval(_PAGE_CACHE_MODE_WC));
280 		break;
281 	case _PAGE_CACHE_MODE_WT:
282 		prot = __pgprot(pgprot_val(prot) |
283 				cachemode2protval(_PAGE_CACHE_MODE_WT));
284 		break;
285 	case _PAGE_CACHE_MODE_WB:
286 		break;
287 	}
288 
289 	/*
290 	 * Ok, go for it..
291 	 */
292 	area = get_vm_area_caller(size, VM_IOREMAP, caller);
293 	if (!area)
294 		goto err_free_memtype;
295 	area->phys_addr = phys_addr;
296 	vaddr = (unsigned long) area->addr;
297 
298 	if (memtype_kernel_map_sync(phys_addr, size, pcm))
299 		goto err_free_area;
300 
301 	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
302 		goto err_free_area;
303 
304 	ret_addr = (void __iomem *) (vaddr + offset);
305 	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
306 
307 	/*
308 	 * Check if the request spans more than any BAR in the iomem resource
309 	 * tree.
310 	 */
311 	if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
312 		pr_warn("caller %pS mapping multiple BARs\n", caller);
313 
314 	return ret_addr;
315 err_free_area:
316 	free_vm_area(area);
317 err_free_memtype:
318 	memtype_free(phys_addr, phys_addr + size);
319 	return NULL;
320 }
321 
322 /**
323  * ioremap     -   map bus memory into CPU space
324  * @phys_addr:    bus address of the memory
325  * @size:      size of the resource to map
326  *
327  * ioremap performs a platform specific sequence of operations to
328  * make bus memory CPU accessible via the readb/readw/readl/writeb/
329  * writew/writel functions and the other mmio helpers. The returned
330  * address is not guaranteed to be usable directly as a virtual
331  * address.
332  *
333  * This version of ioremap ensures that the memory is marked uncachable
334  * on the CPU as well as honouring existing caching rules from things like
335  * the PCI bus. Note that there are other caches and buffers on many
336  * busses. In particular driver authors should read up on PCI writes
337  *
338  * It's useful if some control registers are in such an area and
339  * write combining or read caching is not desirable:
340  *
341  * Must be freed with iounmap.
342  */
343 void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
344 {
345 	/*
346 	 * Ideally, this should be:
347 	 *	pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
348 	 *
349 	 * Till we fix all X drivers to use ioremap_wc(), we will use
350 	 * UC MINUS. Drivers that are certain they need or can already
351 	 * be converted over to strong UC can use ioremap_uc().
352 	 */
353 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
354 
355 	return __ioremap_caller(phys_addr, size, pcm,
356 				__builtin_return_address(0), false);
357 }
358 EXPORT_SYMBOL(ioremap);
359 
360 /**
361  * ioremap_uc     -   map bus memory into CPU space as strongly uncachable
362  * @phys_addr:    bus address of the memory
363  * @size:      size of the resource to map
364  *
365  * ioremap_uc performs a platform specific sequence of operations to
366  * make bus memory CPU accessible via the readb/readw/readl/writeb/
367  * writew/writel functions and the other mmio helpers. The returned
368  * address is not guaranteed to be usable directly as a virtual
369  * address.
370  *
371  * This version of ioremap ensures that the memory is marked with a strong
372  * preference as completely uncachable on the CPU when possible. For non-PAT
373  * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
374  * systems this will set the PAT entry for the pages as strong UC.  This call
375  * will honor existing caching rules from things like the PCI bus. Note that
376  * there are other caches and buffers on many busses. In particular driver
377  * authors should read up on PCI writes.
378  *
379  * It's useful if some control registers are in such an area and
380  * write combining or read caching is not desirable:
381  *
382  * Must be freed with iounmap.
383  */
384 void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
385 {
386 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
387 
388 	return __ioremap_caller(phys_addr, size, pcm,
389 				__builtin_return_address(0), false);
390 }
391 EXPORT_SYMBOL_GPL(ioremap_uc);
392 
393 /**
394  * ioremap_wc	-	map memory into CPU space write combined
395  * @phys_addr:	bus address of the memory
396  * @size:	size of the resource to map
397  *
398  * This version of ioremap ensures that the memory is marked write combining.
399  * Write combining allows faster writes to some hardware devices.
400  *
401  * Must be freed with iounmap.
402  */
403 void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
404 {
405 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
406 					__builtin_return_address(0), false);
407 }
408 EXPORT_SYMBOL(ioremap_wc);
409 
410 /**
411  * ioremap_wt	-	map memory into CPU space write through
412  * @phys_addr:	bus address of the memory
413  * @size:	size of the resource to map
414  *
415  * This version of ioremap ensures that the memory is marked write through.
416  * Write through stores data into memory while keeping the cache up-to-date.
417  *
418  * Must be freed with iounmap.
419  */
420 void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
421 {
422 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
423 					__builtin_return_address(0), false);
424 }
425 EXPORT_SYMBOL(ioremap_wt);
426 
427 void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size)
428 {
429 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
430 				__builtin_return_address(0), true);
431 }
432 EXPORT_SYMBOL(ioremap_encrypted);
433 
434 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
435 {
436 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
437 				__builtin_return_address(0), false);
438 }
439 EXPORT_SYMBOL(ioremap_cache);
440 
441 void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
442 				unsigned long prot_val)
443 {
444 	return __ioremap_caller(phys_addr, size,
445 				pgprot2cachemode(__pgprot(prot_val)),
446 				__builtin_return_address(0), false);
447 }
448 EXPORT_SYMBOL(ioremap_prot);
449 
450 /**
451  * iounmap - Free a IO remapping
452  * @addr: virtual address from ioremap_*
453  *
454  * Caller must ensure there is only one unmapping for the same pointer.
455  */
456 void iounmap(volatile void __iomem *addr)
457 {
458 	struct vm_struct *p, *o;
459 
460 	if ((void __force *)addr <= high_memory)
461 		return;
462 
463 	/*
464 	 * The PCI/ISA range special-casing was removed from __ioremap()
465 	 * so this check, in theory, can be removed. However, there are
466 	 * cases where iounmap() is called for addresses not obtained via
467 	 * ioremap() (vga16fb for example). Add a warning so that these
468 	 * cases can be caught and fixed.
469 	 */
470 	if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
471 	    (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
472 		WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
473 		return;
474 	}
475 
476 	mmiotrace_iounmap(addr);
477 
478 	addr = (volatile void __iomem *)
479 		(PAGE_MASK & (unsigned long __force)addr);
480 
481 	/* Use the vm area unlocked, assuming the caller
482 	   ensures there isn't another iounmap for the same address
483 	   in parallel. Reuse of the virtual address is prevented by
484 	   leaving it in the global lists until we're done with it.
485 	   cpa takes care of the direct mappings. */
486 	p = find_vm_area((void __force *)addr);
487 
488 	if (!p) {
489 		printk(KERN_ERR "iounmap: bad address %p\n", addr);
490 		dump_stack();
491 		return;
492 	}
493 
494 	kmsan_iounmap_page_range((unsigned long)addr,
495 		(unsigned long)addr + get_vm_area_size(p));
496 	memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
497 
498 	/* Finally remove it */
499 	o = remove_vm_area((void __force *)addr);
500 	BUG_ON(p != o || o == NULL);
501 	kfree(p);
502 }
503 EXPORT_SYMBOL(iounmap);
504 
505 /*
506  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
507  * access
508  */
509 void *xlate_dev_mem_ptr(phys_addr_t phys)
510 {
511 	unsigned long start  = phys &  PAGE_MASK;
512 	unsigned long offset = phys & ~PAGE_MASK;
513 	void *vaddr;
514 
515 	/* memremap() maps if RAM, otherwise falls back to ioremap() */
516 	vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
517 
518 	/* Only add the offset on success and return NULL if memremap() failed */
519 	if (vaddr)
520 		vaddr += offset;
521 
522 	return vaddr;
523 }
524 
525 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
526 {
527 	memunmap((void *)((unsigned long)addr & PAGE_MASK));
528 }
529 
530 #ifdef CONFIG_AMD_MEM_ENCRYPT
531 /*
532  * Examine the physical address to determine if it is an area of memory
533  * that should be mapped decrypted.  If the memory is not part of the
534  * kernel usable area it was accessed and created decrypted, so these
535  * areas should be mapped decrypted. And since the encryption key can
536  * change across reboots, persistent memory should also be mapped
537  * decrypted.
538  *
539  * If SEV is active, that implies that BIOS/UEFI also ran encrypted so
540  * only persistent memory should be mapped decrypted.
541  */
542 static bool memremap_should_map_decrypted(resource_size_t phys_addr,
543 					  unsigned long size)
544 {
545 	int is_pmem;
546 
547 	/*
548 	 * Check if the address is part of a persistent memory region.
549 	 * This check covers areas added by E820, EFI and ACPI.
550 	 */
551 	is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
552 				    IORES_DESC_PERSISTENT_MEMORY);
553 	if (is_pmem != REGION_DISJOINT)
554 		return true;
555 
556 	/*
557 	 * Check if the non-volatile attribute is set for an EFI
558 	 * reserved area.
559 	 */
560 	if (efi_enabled(EFI_BOOT)) {
561 		switch (efi_mem_type(phys_addr)) {
562 		case EFI_RESERVED_TYPE:
563 			if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
564 				return true;
565 			break;
566 		default:
567 			break;
568 		}
569 	}
570 
571 	/* Check if the address is outside kernel usable area */
572 	switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
573 	case E820_TYPE_RESERVED:
574 	case E820_TYPE_ACPI:
575 	case E820_TYPE_NVS:
576 	case E820_TYPE_UNUSABLE:
577 		/* For SEV, these areas are encrypted */
578 		if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
579 			break;
580 		fallthrough;
581 
582 	case E820_TYPE_PRAM:
583 		return true;
584 	default:
585 		break;
586 	}
587 
588 	return false;
589 }
590 
591 /*
592  * Examine the physical address to determine if it is EFI data. Check
593  * it against the boot params structure and EFI tables and memory types.
594  */
595 static bool memremap_is_efi_data(resource_size_t phys_addr,
596 				 unsigned long size)
597 {
598 	u64 paddr;
599 
600 	/* Check if the address is part of EFI boot/runtime data */
601 	if (!efi_enabled(EFI_BOOT))
602 		return false;
603 
604 	paddr = boot_params.efi_info.efi_memmap_hi;
605 	paddr <<= 32;
606 	paddr |= boot_params.efi_info.efi_memmap;
607 	if (phys_addr == paddr)
608 		return true;
609 
610 	paddr = boot_params.efi_info.efi_systab_hi;
611 	paddr <<= 32;
612 	paddr |= boot_params.efi_info.efi_systab;
613 	if (phys_addr == paddr)
614 		return true;
615 
616 	if (efi_is_table_address(phys_addr))
617 		return true;
618 
619 	switch (efi_mem_type(phys_addr)) {
620 	case EFI_BOOT_SERVICES_DATA:
621 	case EFI_RUNTIME_SERVICES_DATA:
622 		return true;
623 	default:
624 		break;
625 	}
626 
627 	return false;
628 }
629 
630 /*
631  * Examine the physical address to determine if it is boot data by checking
632  * it against the boot params setup_data chain.
633  */
634 static bool memremap_is_setup_data(resource_size_t phys_addr,
635 				   unsigned long size)
636 {
637 	struct setup_indirect *indirect;
638 	struct setup_data *data;
639 	u64 paddr, paddr_next;
640 
641 	paddr = boot_params.hdr.setup_data;
642 	while (paddr) {
643 		unsigned int len;
644 
645 		if (phys_addr == paddr)
646 			return true;
647 
648 		data = memremap(paddr, sizeof(*data),
649 				MEMREMAP_WB | MEMREMAP_DEC);
650 		if (!data) {
651 			pr_warn("failed to memremap setup_data entry\n");
652 			return false;
653 		}
654 
655 		paddr_next = data->next;
656 		len = data->len;
657 
658 		if ((phys_addr > paddr) &&
659 		    (phys_addr < (paddr + sizeof(struct setup_data) + len))) {
660 			memunmap(data);
661 			return true;
662 		}
663 
664 		if (data->type == SETUP_INDIRECT) {
665 			memunmap(data);
666 			data = memremap(paddr, sizeof(*data) + len,
667 					MEMREMAP_WB | MEMREMAP_DEC);
668 			if (!data) {
669 				pr_warn("failed to memremap indirect setup_data\n");
670 				return false;
671 			}
672 
673 			indirect = (struct setup_indirect *)data->data;
674 
675 			if (indirect->type != SETUP_INDIRECT) {
676 				paddr = indirect->addr;
677 				len = indirect->len;
678 			}
679 		}
680 
681 		memunmap(data);
682 
683 		if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
684 			return true;
685 
686 		paddr = paddr_next;
687 	}
688 
689 	return false;
690 }
691 
692 /*
693  * Examine the physical address to determine if it is boot data by checking
694  * it against the boot params setup_data chain (early boot version).
695  */
696 static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
697 						unsigned long size)
698 {
699 	struct setup_indirect *indirect;
700 	struct setup_data *data;
701 	u64 paddr, paddr_next;
702 
703 	paddr = boot_params.hdr.setup_data;
704 	while (paddr) {
705 		unsigned int len, size;
706 
707 		if (phys_addr == paddr)
708 			return true;
709 
710 		data = early_memremap_decrypted(paddr, sizeof(*data));
711 		if (!data) {
712 			pr_warn("failed to early memremap setup_data entry\n");
713 			return false;
714 		}
715 
716 		size = sizeof(*data);
717 
718 		paddr_next = data->next;
719 		len = data->len;
720 
721 		if ((phys_addr > paddr) &&
722 		    (phys_addr < (paddr + sizeof(struct setup_data) + len))) {
723 			early_memunmap(data, sizeof(*data));
724 			return true;
725 		}
726 
727 		if (data->type == SETUP_INDIRECT) {
728 			size += len;
729 			early_memunmap(data, sizeof(*data));
730 			data = early_memremap_decrypted(paddr, size);
731 			if (!data) {
732 				pr_warn("failed to early memremap indirect setup_data\n");
733 				return false;
734 			}
735 
736 			indirect = (struct setup_indirect *)data->data;
737 
738 			if (indirect->type != SETUP_INDIRECT) {
739 				paddr = indirect->addr;
740 				len = indirect->len;
741 			}
742 		}
743 
744 		early_memunmap(data, size);
745 
746 		if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
747 			return true;
748 
749 		paddr = paddr_next;
750 	}
751 
752 	return false;
753 }
754 
755 /*
756  * Architecture function to determine if RAM remap is allowed. By default, a
757  * RAM remap will map the data as encrypted. Determine if a RAM remap should
758  * not be done so that the data will be mapped decrypted.
759  */
760 bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
761 				 unsigned long flags)
762 {
763 	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
764 		return true;
765 
766 	if (flags & MEMREMAP_ENC)
767 		return true;
768 
769 	if (flags & MEMREMAP_DEC)
770 		return false;
771 
772 	if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
773 		if (memremap_is_setup_data(phys_addr, size) ||
774 		    memremap_is_efi_data(phys_addr, size))
775 			return false;
776 	}
777 
778 	return !memremap_should_map_decrypted(phys_addr, size);
779 }
780 
781 /*
782  * Architecture override of __weak function to adjust the protection attributes
783  * used when remapping memory. By default, early_memremap() will map the data
784  * as encrypted. Determine if an encrypted mapping should not be done and set
785  * the appropriate protection attributes.
786  */
787 pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
788 					     unsigned long size,
789 					     pgprot_t prot)
790 {
791 	bool encrypted_prot;
792 
793 	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
794 		return prot;
795 
796 	encrypted_prot = true;
797 
798 	if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
799 		if (early_memremap_is_setup_data(phys_addr, size) ||
800 		    memremap_is_efi_data(phys_addr, size))
801 			encrypted_prot = false;
802 	}
803 
804 	if (encrypted_prot && memremap_should_map_decrypted(phys_addr, size))
805 		encrypted_prot = false;
806 
807 	return encrypted_prot ? pgprot_encrypted(prot)
808 			      : pgprot_decrypted(prot);
809 }
810 
811 bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
812 {
813 	return arch_memremap_can_ram_remap(phys_addr, size, 0);
814 }
815 
816 /* Remap memory with encryption */
817 void __init *early_memremap_encrypted(resource_size_t phys_addr,
818 				      unsigned long size)
819 {
820 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
821 }
822 
823 /*
824  * Remap memory with encryption and write-protected - cannot be called
825  * before pat_init() is called
826  */
827 void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
828 					 unsigned long size)
829 {
830 	if (!x86_has_pat_wp())
831 		return NULL;
832 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP);
833 }
834 
835 /* Remap memory without encryption */
836 void __init *early_memremap_decrypted(resource_size_t phys_addr,
837 				      unsigned long size)
838 {
839 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC);
840 }
841 
842 /*
843  * Remap memory without encryption and write-protected - cannot be called
844  * before pat_init() is called
845  */
846 void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
847 					 unsigned long size)
848 {
849 	if (!x86_has_pat_wp())
850 		return NULL;
851 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
852 }
853 #endif	/* CONFIG_AMD_MEM_ENCRYPT */
854 
855 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
856 
857 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
858 {
859 	/* Don't assume we're using swapper_pg_dir at this point */
860 	pgd_t *base = __va(read_cr3_pa());
861 	pgd_t *pgd = &base[pgd_index(addr)];
862 	p4d_t *p4d = p4d_offset(pgd, addr);
863 	pud_t *pud = pud_offset(p4d, addr);
864 	pmd_t *pmd = pmd_offset(pud, addr);
865 
866 	return pmd;
867 }
868 
869 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
870 {
871 	return &bm_pte[pte_index(addr)];
872 }
873 
874 bool __init is_early_ioremap_ptep(pte_t *ptep)
875 {
876 	return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
877 }
878 
879 void __init early_ioremap_init(void)
880 {
881 	pmd_t *pmd;
882 
883 #ifdef CONFIG_X86_64
884 	BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
885 #else
886 	WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
887 #endif
888 
889 	early_ioremap_setup();
890 
891 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
892 	memset(bm_pte, 0, sizeof(bm_pte));
893 	pmd_populate_kernel(&init_mm, pmd, bm_pte);
894 
895 	/*
896 	 * The boot-ioremap range spans multiple pmds, for which
897 	 * we are not prepared:
898 	 */
899 #define __FIXADDR_TOP (-PAGE_SIZE)
900 	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
901 		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
902 #undef __FIXADDR_TOP
903 	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
904 		WARN_ON(1);
905 		printk(KERN_WARNING "pmd %p != %p\n",
906 		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
907 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
908 			fix_to_virt(FIX_BTMAP_BEGIN));
909 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
910 			fix_to_virt(FIX_BTMAP_END));
911 
912 		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
913 		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
914 		       FIX_BTMAP_BEGIN);
915 	}
916 }
917 
918 void __init __early_set_fixmap(enum fixed_addresses idx,
919 			       phys_addr_t phys, pgprot_t flags)
920 {
921 	unsigned long addr = __fix_to_virt(idx);
922 	pte_t *pte;
923 
924 	if (idx >= __end_of_fixed_addresses) {
925 		BUG();
926 		return;
927 	}
928 	pte = early_ioremap_pte(addr);
929 
930 	/* Sanitize 'prot' against any unsupported bits: */
931 	pgprot_val(flags) &= __supported_pte_mask;
932 
933 	if (pgprot_val(flags))
934 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
935 	else
936 		pte_clear(&init_mm, addr, pte);
937 	flush_tlb_one_kernel(addr);
938 }
939