1/* 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License as published by 4 * the Free Software Foundation; either version 2 of the License, or 5 * (at your option) any later version. 6 * 7 * This program is distributed in the hope that it will be useful, 8 * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 * GNU General Public License for more details. 11 * 12 * You should have received a copy of the GNU General Public License 13 * along with this program; if not, write to the Free Software 14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 15 * 16 * Copyright (C) IBM Corporation, 2012 17 * 18 * Author: Anton Blanchard <anton@au.ibm.com> 19 */ 20#include <asm/page.h> 21#include <asm/ppc_asm.h> 22 23_GLOBAL(copypage_power7) 24 /* 25 * We prefetch both the source and destination using enhanced touch 26 * instructions. We use a stream ID of 0 for the load side and 27 * 1 for the store side. Since source and destination are page 28 * aligned we don't need to clear the bottom 7 bits of either 29 * address. 30 */ 31 ori r9,r3,1 /* stream=1 => to */ 32 33#ifdef CONFIG_PPC_64K_PAGES 34 lis r7,0x0E01 /* depth=7 35 * units/cachelines=512 */ 36#else 37 lis r7,0x0E00 /* depth=7 */ 38 ori r7,r7,0x1000 /* units/cachelines=32 */ 39#endif 40 ori r10,r7,1 /* stream=1 */ 41 42 lis r8,0x8000 /* GO=1 */ 43 clrldi r8,r8,32 44 45 /* setup read stream 0 */ 46 dcbt 0,r4,0b01000 /* addr from */ 47 dcbt 0,r7,0b01010 /* length and depth from */ 48 /* setup write stream 1 */ 49 dcbtst 0,r9,0b01000 /* addr to */ 50 dcbtst 0,r10,0b01010 /* length and depth to */ 51 eieio 52 dcbt 0,r8,0b01010 /* all streams GO */ 53 54#ifdef CONFIG_ALTIVEC 55 mflr r0 56 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) 57 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) 58 std r0,16(r1) 59 stdu r1,-STACKFRAMESIZE(r1) 60 bl enter_vmx_ops 61 cmpwi r3,0 62 ld r0,STACKFRAMESIZE+16(r1) 63 ld r3,STK_REG(R31)(r1) 64 ld r4,STK_REG(R30)(r1) 65 mtlr r0 66 67 li r0,(PAGE_SIZE/128) 68 mtctr r0 69 70 beq .Lnonvmx_copy 71 72 addi r1,r1,STACKFRAMESIZE 73 74 li r6,16 75 li r7,32 76 li r8,48 77 li r9,64 78 li r10,80 79 li r11,96 80 li r12,112 81 82 .align 5 831: lvx v7,0,r4 84 lvx v6,r4,r6 85 lvx v5,r4,r7 86 lvx v4,r4,r8 87 lvx v3,r4,r9 88 lvx v2,r4,r10 89 lvx v1,r4,r11 90 lvx v0,r4,r12 91 addi r4,r4,128 92 stvx v7,0,r3 93 stvx v6,r3,r6 94 stvx v5,r3,r7 95 stvx v4,r3,r8 96 stvx v3,r3,r9 97 stvx v2,r3,r10 98 stvx v1,r3,r11 99 stvx v0,r3,r12 100 addi r3,r3,128 101 bdnz 1b 102 103 b exit_vmx_ops /* tail call optimise */ 104 105#else 106 li r0,(PAGE_SIZE/128) 107 mtctr r0 108 109 stdu r1,-STACKFRAMESIZE(r1) 110#endif 111 112.Lnonvmx_copy: 113 std r14,STK_REG(R14)(r1) 114 std r15,STK_REG(R15)(r1) 115 std r16,STK_REG(R16)(r1) 116 std r17,STK_REG(R17)(r1) 117 std r18,STK_REG(R18)(r1) 118 std r19,STK_REG(R19)(r1) 119 std r20,STK_REG(R20)(r1) 120 1211: ld r0,0(r4) 122 ld r5,8(r4) 123 ld r6,16(r4) 124 ld r7,24(r4) 125 ld r8,32(r4) 126 ld r9,40(r4) 127 ld r10,48(r4) 128 ld r11,56(r4) 129 ld r12,64(r4) 130 ld r14,72(r4) 131 ld r15,80(r4) 132 ld r16,88(r4) 133 ld r17,96(r4) 134 ld r18,104(r4) 135 ld r19,112(r4) 136 ld r20,120(r4) 137 addi r4,r4,128 138 std r0,0(r3) 139 std r5,8(r3) 140 std r6,16(r3) 141 std r7,24(r3) 142 std r8,32(r3) 143 std r9,40(r3) 144 std r10,48(r3) 145 std r11,56(r3) 146 std r12,64(r3) 147 std r14,72(r3) 148 std r15,80(r3) 149 std r16,88(r3) 150 std r17,96(r3) 151 std r18,104(r3) 152 std r19,112(r3) 153 std r20,120(r3) 154 addi r3,r3,128 155 bdnz 1b 156 157 ld r14,STK_REG(R14)(r1) 158 ld r15,STK_REG(R15)(r1) 159 ld r16,STK_REG(R16)(r1) 160 ld r17,STK_REG(R17)(r1) 161 ld r18,STK_REG(R18)(r1) 162 ld r19,STK_REG(R19)(r1) 163 ld r20,STK_REG(R20)(r1) 164 addi r1,r1,STACKFRAMESIZE 165 blr 166