xref: /openbmc/linux/arch/arm/kernel/phys2virt.S (revision 9443076e)
1eae78e1aSArd Biesheuvel/* SPDX-License-Identifier: GPL-2.0-only */
2eae78e1aSArd Biesheuvel/*
3eae78e1aSArd Biesheuvel *  Copyright (C) 1994-2002 Russell King
4e8e00f5aSArd Biesheuvel *  Copyright (c) 2003, 2020 ARM Limited
5eae78e1aSArd Biesheuvel *  All Rights Reserved
6eae78e1aSArd Biesheuvel */
7eae78e1aSArd Biesheuvel
8eae78e1aSArd Biesheuvel#include <linux/init.h>
9eae78e1aSArd Biesheuvel#include <linux/linkage.h>
10eae78e1aSArd Biesheuvel#include <asm/assembler.h>
11eae78e1aSArd Biesheuvel#include <asm/page.h>
12eae78e1aSArd Biesheuvel
13eae78e1aSArd Biesheuvel#ifdef __ARMEB__
14eae78e1aSArd Biesheuvel#define LOW_OFFSET	0x4
15eae78e1aSArd Biesheuvel#define HIGH_OFFSET	0x0
16eae78e1aSArd Biesheuvel#else
17eae78e1aSArd Biesheuvel#define LOW_OFFSET	0x0
18eae78e1aSArd Biesheuvel#define HIGH_OFFSET	0x4
19eae78e1aSArd Biesheuvel#endif
20eae78e1aSArd Biesheuvel
21eae78e1aSArd Biesheuvel/*
22eae78e1aSArd Biesheuvel * __fixup_pv_table - patch the stub instructions with the delta between
23eae78e1aSArd Biesheuvel *                    PHYS_OFFSET and PAGE_OFFSET, which is assumed to be
24*9443076eSArd Biesheuvel *                    2 MiB aligned.
25eae78e1aSArd Biesheuvel *
26eae78e1aSArd Biesheuvel * Called from head.S, which expects the following registers to be preserved:
27eae78e1aSArd Biesheuvel *   r1 = machine no, r2 = atags or dtb,
28eae78e1aSArd Biesheuvel *   r8 = phys_offset, r9 = cpuid, r10 = procinfo
29eae78e1aSArd Biesheuvel */
30eae78e1aSArd Biesheuvel	__HEAD
31eae78e1aSArd BiesheuvelENTRY(__fixup_pv_table)
32eae78e1aSArd Biesheuvel	mov	r0, r8, lsr #PAGE_SHIFT	@ convert to PFN
330e3db6c9SArd Biesheuvel	str_l	r0, __pv_phys_pfn_offset, r3
340e3db6c9SArd Biesheuvel
350e3db6c9SArd Biesheuvel	adr_l	r0, __pv_offset
360e3db6c9SArd Biesheuvel	subs	r3, r8, #PAGE_OFFSET	@ PHYS_OFFSET - PAGE_OFFSET
370e3db6c9SArd Biesheuvel	mvn	ip, #0
380e3db6c9SArd Biesheuvel	strcc	ip, [r0, #HIGH_OFFSET]	@ save to __pv_offset high bits
390e3db6c9SArd Biesheuvel	str	r3, [r0, #LOW_OFFSET]	@ save to __pv_offset low bits
400e3db6c9SArd Biesheuvel
41*9443076eSArd Biesheuvel	mov	r0, r3, lsr #21		@ constant for add/sub instructions
42*9443076eSArd Biesheuvel	teq	r3, r0, lsl #21 	@ must be 2 MiB aligned
43eae78e1aSArd Biesheuvel	bne	0f
440e3db6c9SArd Biesheuvel
450e3db6c9SArd Biesheuvel	adr_l	r4, __pv_table_begin
460e3db6c9SArd Biesheuvel	adr_l	r5, __pv_table_end
47eae78e1aSArd Biesheuvel	b	__fixup_a_pv_table
480e3db6c9SArd Biesheuvel
49eae78e1aSArd Biesheuvel0:	mov	r0, r0			@ deadloop on error
50eae78e1aSArd Biesheuvel	b	0b
51eae78e1aSArd BiesheuvelENDPROC(__fixup_pv_table)
52eae78e1aSArd Biesheuvel
53eae78e1aSArd Biesheuvel	.text
54eae78e1aSArd Biesheuvel__fixup_a_pv_table:
552730e8eaSArd Biesheuvel	adr_l	r6, __pv_offset
56eae78e1aSArd Biesheuvel	ldr	r0, [r6, #HIGH_OFFSET]	@ pv_offset high word
57eae78e1aSArd Biesheuvel	ldr	r6, [r6, #LOW_OFFSET]	@ pv_offset low word
58eae78e1aSArd Biesheuvel	cmn	r0, #1
59eae78e1aSArd Biesheuvel#ifdef CONFIG_THUMB2_KERNEL
60e8e00f5aSArd Biesheuvel	@
61e8e00f5aSArd Biesheuvel	@ The Thumb-2 versions of the patchable sequences are
62e8e00f5aSArd Biesheuvel	@
63*9443076eSArd Biesheuvel	@ phys-to-virt:			movw	<reg>, #offset<31:21>
64*9443076eSArd Biesheuvel	@				lsl	<reg>, #21
65e8e00f5aSArd Biesheuvel	@				sub	<VA>, <PA>, <reg>
66e8e00f5aSArd Biesheuvel	@
67*9443076eSArd Biesheuvel	@ virt-to-phys (non-LPAE):	movw	<reg>, #offset<31:21>
68*9443076eSArd Biesheuvel	@				lsl	<reg>, #21
69e8e00f5aSArd Biesheuvel	@				add	<PA>, <VA>, <reg>
70e8e00f5aSArd Biesheuvel	@
71*9443076eSArd Biesheuvel	@ virt-to-phys (LPAE):		movw	<reg>, #offset<31:21>
72*9443076eSArd Biesheuvel	@				lsl	<reg>, #21
73e8e00f5aSArd Biesheuvel	@				adds	<PAlo>, <VA>, <reg>
74e8e00f5aSArd Biesheuvel	@				mov	<PAhi>, #offset<39:32>
75e8e00f5aSArd Biesheuvel	@				adc	<PAhi>, <PAhi>, #0
76e8e00f5aSArd Biesheuvel	@
77e8e00f5aSArd Biesheuvel	@ In the non-LPAE case, all patchable instructions are MOVW
78e8e00f5aSArd Biesheuvel	@ instructions, where we need to patch in the offset into the
79e8e00f5aSArd Biesheuvel	@ second halfword of the opcode (the 16-bit immediate is encoded
80e8e00f5aSArd Biesheuvel	@ as imm4:i:imm3:imm8)
81e8e00f5aSArd Biesheuvel	@
82e8e00f5aSArd Biesheuvel	@       15       11 10  9           4 3    0  15  14  12 11 8 7    0
83e8e00f5aSArd Biesheuvel	@      +-----------+---+-------------+------++---+------+----+------+
84e8e00f5aSArd Biesheuvel	@ MOVW | 1 1 1 1 0 | i | 1 0 0 1 0 0 | imm4 || 0 | imm3 | Rd | imm8 |
85e8e00f5aSArd Biesheuvel	@      +-----------+---+-------------+------++---+------+----+------+
86e8e00f5aSArd Biesheuvel	@
87e8e00f5aSArd Biesheuvel	@ In the LPAE case, we also need to patch in the high word of the
88e8e00f5aSArd Biesheuvel	@ offset into the immediate field of the MOV instruction, or patch it
89e8e00f5aSArd Biesheuvel	@ to a MVN instruction if the offset is negative. In this case, we
90e8e00f5aSArd Biesheuvel	@ need to inspect the first halfword of the opcode, to check whether
91e8e00f5aSArd Biesheuvel	@ it is MOVW or MOV/MVN, and to perform the MOV to MVN patching if
92e8e00f5aSArd Biesheuvel	@ needed. The encoding of the immediate is rather complex for values
93e8e00f5aSArd Biesheuvel	@ of i:imm3 != 0b0000, but fortunately, we never need more than 8 lower
94e8e00f5aSArd Biesheuvel	@ order bits, which can be patched into imm8 directly (and i:imm3
95e8e00f5aSArd Biesheuvel	@ cleared)
96e8e00f5aSArd Biesheuvel	@
97e8e00f5aSArd Biesheuvel	@      15       11 10  9        5         0  15  14  12 11 8 7    0
98e8e00f5aSArd Biesheuvel	@     +-----------+---+---------------------++---+------+----+------+
99e8e00f5aSArd Biesheuvel	@ MOV | 1 1 1 1 0 | i | 0 0 0 1 0 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
100e8e00f5aSArd Biesheuvel	@ MVN | 1 1 1 1 0 | i | 0 0 0 1 1 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
101e8e00f5aSArd Biesheuvel	@     +-----------+---+---------------------++---+------+----+------+
102e8e00f5aSArd Biesheuvel	@
103eae78e1aSArd Biesheuvel	moveq	r0, #0x200000		@ set bit 21, mov to mvn instruction
104*9443076eSArd Biesheuvel	lsrs	r3, r6, #29		@ isolate top 3 bits of displacement
105*9443076eSArd Biesheuvel	ubfx	r6, r6, #21, #8		@ put bits 28:21 into the MOVW imm8 field
106*9443076eSArd Biesheuvel	bfi	r6, r3, #12, #3		@ put bits 31:29 into the MOVW imm3 field
1074b16421cSArd Biesheuvel	b	.Lnext
1082730e8eaSArd Biesheuvel.Lloop:	add	r7, r4
109e8e00f5aSArd Biesheuvel	adds	r4, #4			@ clears Z flag
110e8e00f5aSArd Biesheuvel#ifdef CONFIG_ARM_LPAE
111eae78e1aSArd Biesheuvel	ldrh	ip, [r7]
112eae78e1aSArd BiesheuvelARM_BE8(rev16	ip, ip)
113e8e00f5aSArd Biesheuvel	tst	ip, #0x200		@ MOVW has bit 9 set, MVN has it clear
114e8e00f5aSArd Biesheuvel	bne	0f			@ skip to MOVW handling (Z flag is clear)
115e8e00f5aSArd Biesheuvel	bic	ip, #0x20		@ clear bit 5 (MVN -> MOV)
116e8e00f5aSArd Biesheuvel	orr	ip, ip, r0, lsr #16	@ MOV -> MVN if offset < 0
117eae78e1aSArd BiesheuvelARM_BE8(rev16	ip, ip)
118eae78e1aSArd Biesheuvel	strh	ip, [r7]
119e8e00f5aSArd Biesheuvel	@ Z flag is set
120e8e00f5aSArd Biesheuvel0:
121e8e00f5aSArd Biesheuvel#endif
122e8e00f5aSArd Biesheuvel	ldrh	ip, [r7, #2]
123e8e00f5aSArd BiesheuvelARM_BE8(rev16	ip, ip)
124e8e00f5aSArd Biesheuvel	and	ip, #0xf00		@ clear everything except Rd field
125e8e00f5aSArd Biesheuvel	orreq	ip, r0			@ Z flag set -> MOV/MVN -> patch in high bits
126e8e00f5aSArd Biesheuvel	orrne	ip, r6			@ Z flag clear -> MOVW -> patch in low bits
127e8e00f5aSArd BiesheuvelARM_BE8(rev16	ip, ip)
128e8e00f5aSArd Biesheuvel	strh	ip, [r7, #2]
129eae78e1aSArd Biesheuvel#else
1307a94849eSArd Biesheuvel#ifdef CONFIG_CPU_ENDIAN_BE8
1317a94849eSArd Biesheuvel@ in BE8, we load data in BE, but instructions still in LE
132e8e00f5aSArd Biesheuvel#define PV_BIT24	0x00000001
1337a94849eSArd Biesheuvel#define PV_IMM8_MASK	0xff000000
134*9443076eSArd Biesheuvel#define PV_IMMR_MSB	0x00080000
1357a94849eSArd Biesheuvel#else
136e8e00f5aSArd Biesheuvel#define PV_BIT24	0x01000000
1377a94849eSArd Biesheuvel#define PV_IMM8_MASK	0x000000ff
138*9443076eSArd Biesheuvel#define PV_IMMR_MSB	0x00000800
1397a94849eSArd Biesheuvel#endif
1407a94849eSArd Biesheuvel
141e8e00f5aSArd Biesheuvel	@
142e8e00f5aSArd Biesheuvel	@ The ARM versions of the patchable sequences are
143e8e00f5aSArd Biesheuvel	@
144e8e00f5aSArd Biesheuvel	@ phys-to-virt:			sub	<VA>, <PA>, #offset<31:24>, lsl #24
145*9443076eSArd Biesheuvel	@				sub	<VA>, <PA>, #offset<23:16>, lsl #16
146e8e00f5aSArd Biesheuvel	@
147e8e00f5aSArd Biesheuvel	@ virt-to-phys (non-LPAE):	add	<PA>, <VA>, #offset<31:24>, lsl #24
148*9443076eSArd Biesheuvel	@				add	<PA>, <VA>, #offset<23:16>, lsl #16
149e8e00f5aSArd Biesheuvel	@
150*9443076eSArd Biesheuvel	@ virt-to-phys (LPAE):		movw	<reg>, #offset<31:20>
151*9443076eSArd Biesheuvel	@				adds	<PAlo>, <VA>, <reg>, lsl #20
152e8e00f5aSArd Biesheuvel	@				mov	<PAhi>, #offset<39:32>
153e8e00f5aSArd Biesheuvel	@				adc	<PAhi>, <PAhi>, #0
154e8e00f5aSArd Biesheuvel	@
155e8e00f5aSArd Biesheuvel	@ In the non-LPAE case, all patchable instructions are ADD or SUB
156e8e00f5aSArd Biesheuvel	@ instructions, where we need to patch in the offset into the
157e8e00f5aSArd Biesheuvel	@ immediate field of the opcode, which is emitted with the correct
158e8e00f5aSArd Biesheuvel	@ rotation value. (The effective value of the immediate is imm12<7:0>
159e8e00f5aSArd Biesheuvel	@ rotated right by [2 * imm12<11:8>] bits)
160e8e00f5aSArd Biesheuvel	@
161e8e00f5aSArd Biesheuvel	@      31   28 27      23 22  20 19  16 15  12 11    0
162e8e00f5aSArd Biesheuvel	@      +------+-----------------+------+------+-------+
163e8e00f5aSArd Biesheuvel	@  ADD | cond | 0 0 1 0 1 0 0 0 |  Rn  |  Rd  | imm12 |
164e8e00f5aSArd Biesheuvel	@  SUB | cond | 0 0 1 0 0 1 0 0 |  Rn  |  Rd  | imm12 |
165e8e00f5aSArd Biesheuvel	@  MOV | cond | 0 0 1 1 1 0 1 0 |  Rn  |  Rd  | imm12 |
166e8e00f5aSArd Biesheuvel	@  MVN | cond | 0 0 1 1 1 1 1 0 |  Rn  |  Rd  | imm12 |
167e8e00f5aSArd Biesheuvel	@      +------+-----------------+------+------+-------+
168e8e00f5aSArd Biesheuvel	@
169e8e00f5aSArd Biesheuvel	@ In the LPAE case, we use a MOVW instruction to carry the low offset
170e8e00f5aSArd Biesheuvel	@ word, and patch in the high word of the offset into the immediate
171e8e00f5aSArd Biesheuvel	@ field of the subsequent MOV instruction, or patch it to a MVN
172e8e00f5aSArd Biesheuvel	@ instruction if the offset is negative. We can distinguish MOVW
173e8e00f5aSArd Biesheuvel	@ instructions based on bits 23:22 of the opcode, and ADD/SUB can be
174e8e00f5aSArd Biesheuvel	@ distinguished from MOV/MVN (all using the encodings above) using
175e8e00f5aSArd Biesheuvel	@ bit 24.
176e8e00f5aSArd Biesheuvel	@
177e8e00f5aSArd Biesheuvel	@      31   28 27      23 22  20 19  16 15  12 11    0
178e8e00f5aSArd Biesheuvel	@      +------+-----------------+------+------+-------+
179e8e00f5aSArd Biesheuvel	@ MOVW | cond | 0 0 1 1 0 0 0 0 | imm4 |  Rd  | imm12 |
180e8e00f5aSArd Biesheuvel	@      +------+-----------------+------+------+-------+
181e8e00f5aSArd Biesheuvel	@
182eae78e1aSArd Biesheuvel	moveq	r0, #0x400000		@ set bit 22, mov to mvn instruction
183*9443076eSArd Biesheuvel	mov	r3, r6, lsr #16		@ put offset bits 31-16 into r3
184*9443076eSArd Biesheuvel	mov	r6, r6, lsr #24		@ put offset bits 31-24 into r6
185*9443076eSArd Biesheuvel	and	r3, r3, #0xf0		@ only keep offset bits 23-20 in r3
1864b16421cSArd Biesheuvel	b	.Lnext
1872730e8eaSArd Biesheuvel.Lloop:	ldr	ip, [r7, r4]
188e8e00f5aSArd Biesheuvel#ifdef CONFIG_ARM_LPAE
189e8e00f5aSArd Biesheuvel	tst	ip, #PV_BIT24		@ ADD/SUB have bit 24 clear
190e8e00f5aSArd Biesheuvel	beq	1f
191e8e00f5aSArd BiesheuvelARM_BE8(rev	ip, ip)
192e8e00f5aSArd Biesheuvel	tst	ip, #0xc00000		@ MOVW has bits 23:22 clear
193e8e00f5aSArd Biesheuvel	bic	ip, ip, #0x400000	@ clear bit 22
194e8e00f5aSArd Biesheuvel	bfc	ip, #0, #12		@ clear imm12 field of MOV[W] instruction
195*9443076eSArd Biesheuvel	orreq	ip, ip, r6, lsl #4	@ MOVW -> mask in offset bits 31-24
196*9443076eSArd Biesheuvel	orreq	ip, ip, r3, lsr #4	@ MOVW -> mask in offset bits 23-20
197e8e00f5aSArd Biesheuvel	orrne	ip, ip, r0		@ MOV  -> mask in offset bits 7-0 (or bit 22)
198e8e00f5aSArd BiesheuvelARM_BE8(rev	ip, ip)
199e8e00f5aSArd Biesheuvel	b	2f
200e8e00f5aSArd Biesheuvel1:
201e8e00f5aSArd Biesheuvel#endif
202*9443076eSArd Biesheuvel	tst	ip, #PV_IMMR_MSB		@ rotation value >= 16 ?
2037a94849eSArd Biesheuvel	bic	ip, ip, #PV_IMM8_MASK
204*9443076eSArd Biesheuvel	orreq	ip, ip, r6 ARM_BE8(, lsl #24)	@ mask in offset bits 31-24
205*9443076eSArd Biesheuvel	orrne	ip, ip, r3 ARM_BE8(, lsl #24)	@ mask in offset bits 23-20
206e8e00f5aSArd Biesheuvel2:
2072730e8eaSArd Biesheuvel	str	ip, [r7, r4]
2082730e8eaSArd Biesheuvel	add	r4, r4, #4
209eae78e1aSArd Biesheuvel#endif
2104b16421cSArd Biesheuvel
2114b16421cSArd Biesheuvel.Lnext:
2124b16421cSArd Biesheuvel	cmp	r4, r5
2132730e8eaSArd Biesheuvel	ldrcc	r7, [r4]		@ use branch for delay slot
2144b16421cSArd Biesheuvel	bcc	.Lloop
2154b16421cSArd Biesheuvel	ret	lr
216eae78e1aSArd BiesheuvelENDPROC(__fixup_a_pv_table)
217eae78e1aSArd Biesheuvel
218eae78e1aSArd BiesheuvelENTRY(fixup_pv_table)
219eae78e1aSArd Biesheuvel	stmfd	sp!, {r4 - r7, lr}
220eae78e1aSArd Biesheuvel	mov	r4, r0			@ r0 = table start
221eae78e1aSArd Biesheuvel	add	r5, r0, r1		@ r1 = table size
222eae78e1aSArd Biesheuvel	bl	__fixup_a_pv_table
223eae78e1aSArd Biesheuvel	ldmfd	sp!, {r4 - r7, pc}
224eae78e1aSArd BiesheuvelENDPROC(fixup_pv_table)
225eae78e1aSArd Biesheuvel
226eae78e1aSArd Biesheuvel	.data
227eae78e1aSArd Biesheuvel	.align	2
228eae78e1aSArd Biesheuvel	.globl	__pv_phys_pfn_offset
229eae78e1aSArd Biesheuvel	.type	__pv_phys_pfn_offset, %object
230eae78e1aSArd Biesheuvel__pv_phys_pfn_offset:
231eae78e1aSArd Biesheuvel	.word	0
232eae78e1aSArd Biesheuvel	.size	__pv_phys_pfn_offset, . -__pv_phys_pfn_offset
233eae78e1aSArd Biesheuvel
234eae78e1aSArd Biesheuvel	.globl	__pv_offset
235eae78e1aSArd Biesheuvel	.type	__pv_offset, %object
236eae78e1aSArd Biesheuvel__pv_offset:
237eae78e1aSArd Biesheuvel	.quad	0
238eae78e1aSArd Biesheuvel	.size	__pv_offset, . -__pv_offset
239