1eae78e1aSArd Biesheuvel/* SPDX-License-Identifier: GPL-2.0-only */ 2eae78e1aSArd Biesheuvel/* 3eae78e1aSArd Biesheuvel * Copyright (C) 1994-2002 Russell King 4e8e00f5aSArd Biesheuvel * Copyright (c) 2003, 2020 ARM Limited 5eae78e1aSArd Biesheuvel * All Rights Reserved 6eae78e1aSArd Biesheuvel */ 7eae78e1aSArd Biesheuvel 8eae78e1aSArd Biesheuvel#include <linux/init.h> 9eae78e1aSArd Biesheuvel#include <linux/linkage.h> 10eae78e1aSArd Biesheuvel#include <asm/assembler.h> 11eae78e1aSArd Biesheuvel#include <asm/page.h> 12eae78e1aSArd Biesheuvel 13eae78e1aSArd Biesheuvel#ifdef __ARMEB__ 14eae78e1aSArd Biesheuvel#define LOW_OFFSET 0x4 15eae78e1aSArd Biesheuvel#define HIGH_OFFSET 0x0 16eae78e1aSArd Biesheuvel#else 17eae78e1aSArd Biesheuvel#define LOW_OFFSET 0x0 18eae78e1aSArd Biesheuvel#define HIGH_OFFSET 0x4 19eae78e1aSArd Biesheuvel#endif 20eae78e1aSArd Biesheuvel 21eae78e1aSArd Biesheuvel/* 22eae78e1aSArd Biesheuvel * __fixup_pv_table - patch the stub instructions with the delta between 23eae78e1aSArd Biesheuvel * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 24*9443076eSArd Biesheuvel * 2 MiB aligned. 25eae78e1aSArd Biesheuvel * 26eae78e1aSArd Biesheuvel * Called from head.S, which expects the following registers to be preserved: 27eae78e1aSArd Biesheuvel * r1 = machine no, r2 = atags or dtb, 28eae78e1aSArd Biesheuvel * r8 = phys_offset, r9 = cpuid, r10 = procinfo 29eae78e1aSArd Biesheuvel */ 30eae78e1aSArd Biesheuvel __HEAD 31eae78e1aSArd BiesheuvelENTRY(__fixup_pv_table) 32eae78e1aSArd Biesheuvel mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN 330e3db6c9SArd Biesheuvel str_l r0, __pv_phys_pfn_offset, r3 340e3db6c9SArd Biesheuvel 350e3db6c9SArd Biesheuvel adr_l r0, __pv_offset 360e3db6c9SArd Biesheuvel subs r3, r8, #PAGE_OFFSET @ PHYS_OFFSET - PAGE_OFFSET 370e3db6c9SArd Biesheuvel mvn ip, #0 380e3db6c9SArd Biesheuvel strcc ip, [r0, #HIGH_OFFSET] @ save to __pv_offset high bits 390e3db6c9SArd Biesheuvel str r3, [r0, #LOW_OFFSET] @ save to __pv_offset low bits 400e3db6c9SArd Biesheuvel 41*9443076eSArd Biesheuvel mov r0, r3, lsr #21 @ constant for add/sub instructions 42*9443076eSArd Biesheuvel teq r3, r0, lsl #21 @ must be 2 MiB aligned 43eae78e1aSArd Biesheuvel bne 0f 440e3db6c9SArd Biesheuvel 450e3db6c9SArd Biesheuvel adr_l r4, __pv_table_begin 460e3db6c9SArd Biesheuvel adr_l r5, __pv_table_end 47eae78e1aSArd Biesheuvel b __fixup_a_pv_table 480e3db6c9SArd Biesheuvel 49eae78e1aSArd Biesheuvel0: mov r0, r0 @ deadloop on error 50eae78e1aSArd Biesheuvel b 0b 51eae78e1aSArd BiesheuvelENDPROC(__fixup_pv_table) 52eae78e1aSArd Biesheuvel 53eae78e1aSArd Biesheuvel .text 54eae78e1aSArd Biesheuvel__fixup_a_pv_table: 552730e8eaSArd Biesheuvel adr_l r6, __pv_offset 56eae78e1aSArd Biesheuvel ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word 57eae78e1aSArd Biesheuvel ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word 58eae78e1aSArd Biesheuvel cmn r0, #1 59eae78e1aSArd Biesheuvel#ifdef CONFIG_THUMB2_KERNEL 60e8e00f5aSArd Biesheuvel @ 61e8e00f5aSArd Biesheuvel @ The Thumb-2 versions of the patchable sequences are 62e8e00f5aSArd Biesheuvel @ 63*9443076eSArd Biesheuvel @ phys-to-virt: movw <reg>, #offset<31:21> 64*9443076eSArd Biesheuvel @ lsl <reg>, #21 65e8e00f5aSArd Biesheuvel @ sub <VA>, <PA>, <reg> 66e8e00f5aSArd Biesheuvel @ 67*9443076eSArd Biesheuvel @ virt-to-phys (non-LPAE): movw <reg>, #offset<31:21> 68*9443076eSArd Biesheuvel @ lsl <reg>, #21 69e8e00f5aSArd Biesheuvel @ add <PA>, <VA>, <reg> 70e8e00f5aSArd Biesheuvel @ 71*9443076eSArd Biesheuvel @ virt-to-phys (LPAE): movw <reg>, #offset<31:21> 72*9443076eSArd Biesheuvel @ lsl <reg>, #21 73e8e00f5aSArd Biesheuvel @ adds <PAlo>, <VA>, <reg> 74e8e00f5aSArd Biesheuvel @ mov <PAhi>, #offset<39:32> 75e8e00f5aSArd Biesheuvel @ adc <PAhi>, <PAhi>, #0 76e8e00f5aSArd Biesheuvel @ 77e8e00f5aSArd Biesheuvel @ In the non-LPAE case, all patchable instructions are MOVW 78e8e00f5aSArd Biesheuvel @ instructions, where we need to patch in the offset into the 79e8e00f5aSArd Biesheuvel @ second halfword of the opcode (the 16-bit immediate is encoded 80e8e00f5aSArd Biesheuvel @ as imm4:i:imm3:imm8) 81e8e00f5aSArd Biesheuvel @ 82e8e00f5aSArd Biesheuvel @ 15 11 10 9 4 3 0 15 14 12 11 8 7 0 83e8e00f5aSArd Biesheuvel @ +-----------+---+-------------+------++---+------+----+------+ 84e8e00f5aSArd Biesheuvel @ MOVW | 1 1 1 1 0 | i | 1 0 0 1 0 0 | imm4 || 0 | imm3 | Rd | imm8 | 85e8e00f5aSArd Biesheuvel @ +-----------+---+-------------+------++---+------+----+------+ 86e8e00f5aSArd Biesheuvel @ 87e8e00f5aSArd Biesheuvel @ In the LPAE case, we also need to patch in the high word of the 88e8e00f5aSArd Biesheuvel @ offset into the immediate field of the MOV instruction, or patch it 89e8e00f5aSArd Biesheuvel @ to a MVN instruction if the offset is negative. In this case, we 90e8e00f5aSArd Biesheuvel @ need to inspect the first halfword of the opcode, to check whether 91e8e00f5aSArd Biesheuvel @ it is MOVW or MOV/MVN, and to perform the MOV to MVN patching if 92e8e00f5aSArd Biesheuvel @ needed. The encoding of the immediate is rather complex for values 93e8e00f5aSArd Biesheuvel @ of i:imm3 != 0b0000, but fortunately, we never need more than 8 lower 94e8e00f5aSArd Biesheuvel @ order bits, which can be patched into imm8 directly (and i:imm3 95e8e00f5aSArd Biesheuvel @ cleared) 96e8e00f5aSArd Biesheuvel @ 97e8e00f5aSArd Biesheuvel @ 15 11 10 9 5 0 15 14 12 11 8 7 0 98e8e00f5aSArd Biesheuvel @ +-----------+---+---------------------++---+------+----+------+ 99e8e00f5aSArd Biesheuvel @ MOV | 1 1 1 1 0 | i | 0 0 0 1 0 0 1 1 1 1 || 0 | imm3 | Rd | imm8 | 100e8e00f5aSArd Biesheuvel @ MVN | 1 1 1 1 0 | i | 0 0 0 1 1 0 1 1 1 1 || 0 | imm3 | Rd | imm8 | 101e8e00f5aSArd Biesheuvel @ +-----------+---+---------------------++---+------+----+------+ 102e8e00f5aSArd Biesheuvel @ 103eae78e1aSArd Biesheuvel moveq r0, #0x200000 @ set bit 21, mov to mvn instruction 104*9443076eSArd Biesheuvel lsrs r3, r6, #29 @ isolate top 3 bits of displacement 105*9443076eSArd Biesheuvel ubfx r6, r6, #21, #8 @ put bits 28:21 into the MOVW imm8 field 106*9443076eSArd Biesheuvel bfi r6, r3, #12, #3 @ put bits 31:29 into the MOVW imm3 field 1074b16421cSArd Biesheuvel b .Lnext 1082730e8eaSArd Biesheuvel.Lloop: add r7, r4 109e8e00f5aSArd Biesheuvel adds r4, #4 @ clears Z flag 110e8e00f5aSArd Biesheuvel#ifdef CONFIG_ARM_LPAE 111eae78e1aSArd Biesheuvel ldrh ip, [r7] 112eae78e1aSArd BiesheuvelARM_BE8(rev16 ip, ip) 113e8e00f5aSArd Biesheuvel tst ip, #0x200 @ MOVW has bit 9 set, MVN has it clear 114e8e00f5aSArd Biesheuvel bne 0f @ skip to MOVW handling (Z flag is clear) 115e8e00f5aSArd Biesheuvel bic ip, #0x20 @ clear bit 5 (MVN -> MOV) 116e8e00f5aSArd Biesheuvel orr ip, ip, r0, lsr #16 @ MOV -> MVN if offset < 0 117eae78e1aSArd BiesheuvelARM_BE8(rev16 ip, ip) 118eae78e1aSArd Biesheuvel strh ip, [r7] 119e8e00f5aSArd Biesheuvel @ Z flag is set 120e8e00f5aSArd Biesheuvel0: 121e8e00f5aSArd Biesheuvel#endif 122e8e00f5aSArd Biesheuvel ldrh ip, [r7, #2] 123e8e00f5aSArd BiesheuvelARM_BE8(rev16 ip, ip) 124e8e00f5aSArd Biesheuvel and ip, #0xf00 @ clear everything except Rd field 125e8e00f5aSArd Biesheuvel orreq ip, r0 @ Z flag set -> MOV/MVN -> patch in high bits 126e8e00f5aSArd Biesheuvel orrne ip, r6 @ Z flag clear -> MOVW -> patch in low bits 127e8e00f5aSArd BiesheuvelARM_BE8(rev16 ip, ip) 128e8e00f5aSArd Biesheuvel strh ip, [r7, #2] 129eae78e1aSArd Biesheuvel#else 1307a94849eSArd Biesheuvel#ifdef CONFIG_CPU_ENDIAN_BE8 1317a94849eSArd Biesheuvel@ in BE8, we load data in BE, but instructions still in LE 132e8e00f5aSArd Biesheuvel#define PV_BIT24 0x00000001 1337a94849eSArd Biesheuvel#define PV_IMM8_MASK 0xff000000 134*9443076eSArd Biesheuvel#define PV_IMMR_MSB 0x00080000 1357a94849eSArd Biesheuvel#else 136e8e00f5aSArd Biesheuvel#define PV_BIT24 0x01000000 1377a94849eSArd Biesheuvel#define PV_IMM8_MASK 0x000000ff 138*9443076eSArd Biesheuvel#define PV_IMMR_MSB 0x00000800 1397a94849eSArd Biesheuvel#endif 1407a94849eSArd Biesheuvel 141e8e00f5aSArd Biesheuvel @ 142e8e00f5aSArd Biesheuvel @ The ARM versions of the patchable sequences are 143e8e00f5aSArd Biesheuvel @ 144e8e00f5aSArd Biesheuvel @ phys-to-virt: sub <VA>, <PA>, #offset<31:24>, lsl #24 145*9443076eSArd Biesheuvel @ sub <VA>, <PA>, #offset<23:16>, lsl #16 146e8e00f5aSArd Biesheuvel @ 147e8e00f5aSArd Biesheuvel @ virt-to-phys (non-LPAE): add <PA>, <VA>, #offset<31:24>, lsl #24 148*9443076eSArd Biesheuvel @ add <PA>, <VA>, #offset<23:16>, lsl #16 149e8e00f5aSArd Biesheuvel @ 150*9443076eSArd Biesheuvel @ virt-to-phys (LPAE): movw <reg>, #offset<31:20> 151*9443076eSArd Biesheuvel @ adds <PAlo>, <VA>, <reg>, lsl #20 152e8e00f5aSArd Biesheuvel @ mov <PAhi>, #offset<39:32> 153e8e00f5aSArd Biesheuvel @ adc <PAhi>, <PAhi>, #0 154e8e00f5aSArd Biesheuvel @ 155e8e00f5aSArd Biesheuvel @ In the non-LPAE case, all patchable instructions are ADD or SUB 156e8e00f5aSArd Biesheuvel @ instructions, where we need to patch in the offset into the 157e8e00f5aSArd Biesheuvel @ immediate field of the opcode, which is emitted with the correct 158e8e00f5aSArd Biesheuvel @ rotation value. (The effective value of the immediate is imm12<7:0> 159e8e00f5aSArd Biesheuvel @ rotated right by [2 * imm12<11:8>] bits) 160e8e00f5aSArd Biesheuvel @ 161e8e00f5aSArd Biesheuvel @ 31 28 27 23 22 20 19 16 15 12 11 0 162e8e00f5aSArd Biesheuvel @ +------+-----------------+------+------+-------+ 163e8e00f5aSArd Biesheuvel @ ADD | cond | 0 0 1 0 1 0 0 0 | Rn | Rd | imm12 | 164e8e00f5aSArd Biesheuvel @ SUB | cond | 0 0 1 0 0 1 0 0 | Rn | Rd | imm12 | 165e8e00f5aSArd Biesheuvel @ MOV | cond | 0 0 1 1 1 0 1 0 | Rn | Rd | imm12 | 166e8e00f5aSArd Biesheuvel @ MVN | cond | 0 0 1 1 1 1 1 0 | Rn | Rd | imm12 | 167e8e00f5aSArd Biesheuvel @ +------+-----------------+------+------+-------+ 168e8e00f5aSArd Biesheuvel @ 169e8e00f5aSArd Biesheuvel @ In the LPAE case, we use a MOVW instruction to carry the low offset 170e8e00f5aSArd Biesheuvel @ word, and patch in the high word of the offset into the immediate 171e8e00f5aSArd Biesheuvel @ field of the subsequent MOV instruction, or patch it to a MVN 172e8e00f5aSArd Biesheuvel @ instruction if the offset is negative. We can distinguish MOVW 173e8e00f5aSArd Biesheuvel @ instructions based on bits 23:22 of the opcode, and ADD/SUB can be 174e8e00f5aSArd Biesheuvel @ distinguished from MOV/MVN (all using the encodings above) using 175e8e00f5aSArd Biesheuvel @ bit 24. 176e8e00f5aSArd Biesheuvel @ 177e8e00f5aSArd Biesheuvel @ 31 28 27 23 22 20 19 16 15 12 11 0 178e8e00f5aSArd Biesheuvel @ +------+-----------------+------+------+-------+ 179e8e00f5aSArd Biesheuvel @ MOVW | cond | 0 0 1 1 0 0 0 0 | imm4 | Rd | imm12 | 180e8e00f5aSArd Biesheuvel @ +------+-----------------+------+------+-------+ 181e8e00f5aSArd Biesheuvel @ 182eae78e1aSArd Biesheuvel moveq r0, #0x400000 @ set bit 22, mov to mvn instruction 183*9443076eSArd Biesheuvel mov r3, r6, lsr #16 @ put offset bits 31-16 into r3 184*9443076eSArd Biesheuvel mov r6, r6, lsr #24 @ put offset bits 31-24 into r6 185*9443076eSArd Biesheuvel and r3, r3, #0xf0 @ only keep offset bits 23-20 in r3 1864b16421cSArd Biesheuvel b .Lnext 1872730e8eaSArd Biesheuvel.Lloop: ldr ip, [r7, r4] 188e8e00f5aSArd Biesheuvel#ifdef CONFIG_ARM_LPAE 189e8e00f5aSArd Biesheuvel tst ip, #PV_BIT24 @ ADD/SUB have bit 24 clear 190e8e00f5aSArd Biesheuvel beq 1f 191e8e00f5aSArd BiesheuvelARM_BE8(rev ip, ip) 192e8e00f5aSArd Biesheuvel tst ip, #0xc00000 @ MOVW has bits 23:22 clear 193e8e00f5aSArd Biesheuvel bic ip, ip, #0x400000 @ clear bit 22 194e8e00f5aSArd Biesheuvel bfc ip, #0, #12 @ clear imm12 field of MOV[W] instruction 195*9443076eSArd Biesheuvel orreq ip, ip, r6, lsl #4 @ MOVW -> mask in offset bits 31-24 196*9443076eSArd Biesheuvel orreq ip, ip, r3, lsr #4 @ MOVW -> mask in offset bits 23-20 197e8e00f5aSArd Biesheuvel orrne ip, ip, r0 @ MOV -> mask in offset bits 7-0 (or bit 22) 198e8e00f5aSArd BiesheuvelARM_BE8(rev ip, ip) 199e8e00f5aSArd Biesheuvel b 2f 200e8e00f5aSArd Biesheuvel1: 201e8e00f5aSArd Biesheuvel#endif 202*9443076eSArd Biesheuvel tst ip, #PV_IMMR_MSB @ rotation value >= 16 ? 2037a94849eSArd Biesheuvel bic ip, ip, #PV_IMM8_MASK 204*9443076eSArd Biesheuvel orreq ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 205*9443076eSArd Biesheuvel orrne ip, ip, r3 ARM_BE8(, lsl #24) @ mask in offset bits 23-20 206e8e00f5aSArd Biesheuvel2: 2072730e8eaSArd Biesheuvel str ip, [r7, r4] 2082730e8eaSArd Biesheuvel add r4, r4, #4 209eae78e1aSArd Biesheuvel#endif 2104b16421cSArd Biesheuvel 2114b16421cSArd Biesheuvel.Lnext: 2124b16421cSArd Biesheuvel cmp r4, r5 2132730e8eaSArd Biesheuvel ldrcc r7, [r4] @ use branch for delay slot 2144b16421cSArd Biesheuvel bcc .Lloop 2154b16421cSArd Biesheuvel ret lr 216eae78e1aSArd BiesheuvelENDPROC(__fixup_a_pv_table) 217eae78e1aSArd Biesheuvel 218eae78e1aSArd BiesheuvelENTRY(fixup_pv_table) 219eae78e1aSArd Biesheuvel stmfd sp!, {r4 - r7, lr} 220eae78e1aSArd Biesheuvel mov r4, r0 @ r0 = table start 221eae78e1aSArd Biesheuvel add r5, r0, r1 @ r1 = table size 222eae78e1aSArd Biesheuvel bl __fixup_a_pv_table 223eae78e1aSArd Biesheuvel ldmfd sp!, {r4 - r7, pc} 224eae78e1aSArd BiesheuvelENDPROC(fixup_pv_table) 225eae78e1aSArd Biesheuvel 226eae78e1aSArd Biesheuvel .data 227eae78e1aSArd Biesheuvel .align 2 228eae78e1aSArd Biesheuvel .globl __pv_phys_pfn_offset 229eae78e1aSArd Biesheuvel .type __pv_phys_pfn_offset, %object 230eae78e1aSArd Biesheuvel__pv_phys_pfn_offset: 231eae78e1aSArd Biesheuvel .word 0 232eae78e1aSArd Biesheuvel .size __pv_phys_pfn_offset, . -__pv_phys_pfn_offset 233eae78e1aSArd Biesheuvel 234eae78e1aSArd Biesheuvel .globl __pv_offset 235eae78e1aSArd Biesheuvel .type __pv_offset, %object 236eae78e1aSArd Biesheuvel__pv_offset: 237eae78e1aSArd Biesheuvel .quad 0 238eae78e1aSArd Biesheuvel .size __pv_offset, . -__pv_offset 239