1/* 2 * copy_page, __copy_user_page, __copy_user implementation of SuperH 3 * 4 * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima 5 * Copyright (C) 2002 Toshinobu Sugioka 6 * Copyright (C) 2006 Paul Mundt 7 */ 8#include <linux/linkage.h> 9#include <asm/page.h> 10 11/* 12 * copy_page 13 * @to: P1 address 14 * @from: P1 address 15 * 16 * void copy_page(void *to, void *from) 17 */ 18 19/* 20 * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch 21 * r8 --- from + PAGE_SIZE 22 * r9 --- not used 23 * r10 --- to 24 * r11 --- from 25 */ 26ENTRY(copy_page) 27 mov.l r8,@-r15 28 mov.l r10,@-r15 29 mov.l r11,@-r15 30 mov r4,r10 31 mov r5,r11 32 mov r5,r8 33 mov #(PAGE_SIZE >> 10), r0 34 shll8 r0 35 shll2 r0 36 add r0,r8 37 ! 381: mov.l @r11+,r0 39 mov.l @r11+,r1 40 mov.l @r11+,r2 41 mov.l @r11+,r3 42 mov.l @r11+,r4 43 mov.l @r11+,r5 44 mov.l @r11+,r6 45 mov.l @r11+,r7 46#if defined(CONFIG_CPU_SH4) 47 movca.l r0,@r10 48#else 49 mov.l r0,@r10 50#endif 51 add #32,r10 52 mov.l r7,@-r10 53 mov.l r6,@-r10 54 mov.l r5,@-r10 55 mov.l r4,@-r10 56 mov.l r3,@-r10 57 mov.l r2,@-r10 58 mov.l r1,@-r10 59 cmp/eq r11,r8 60 bf/s 1b 61 add #28,r10 62 ! 63 mov.l @r15+,r11 64 mov.l @r15+,r10 65 mov.l @r15+,r8 66 rts 67 nop 68 69/* 70 * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n); 71 * Return the number of bytes NOT copied 72 */ 73#define EX(...) \ 74 9999: __VA_ARGS__ ; \ 75 .section __ex_table, "a"; \ 76 .long 9999b, 6000f ; \ 77 .previous 78#define EX_NO_POP(...) \ 79 9999: __VA_ARGS__ ; \ 80 .section __ex_table, "a"; \ 81 .long 9999b, 6005f ; \ 82 .previous 83ENTRY(__copy_user) 84 ! Check if small number of bytes 85 mov #11,r0 86 mov r4,r3 87 cmp/gt r0,r6 ! r6 (len) > r0 (11) 88 bf/s .L_cleanup_loop_no_pop 89 add r6,r3 ! last destination address 90 91 ! Calculate bytes needed to align to src 92 mov.l r11,@-r15 93 neg r5,r0 94 mov.l r10,@-r15 95 add #4,r0 96 mov.l r9,@-r15 97 and #3,r0 98 mov.l r8,@-r15 99 tst r0,r0 100 bt 2f 101 1021: 103 ! Copy bytes to long word align src 104EX( mov.b @r5+,r1 ) 105 dt r0 106 add #-1,r6 107EX( mov.b r1,@r4 ) 108 bf/s 1b 109 add #1,r4 110 111 ! Jump to appropriate routine depending on dest 1122: mov #3,r1 113 mov r6, r2 114 and r4,r1 115 shlr2 r2 116 shll2 r1 117 mova .L_jump_tbl,r0 118 mov.l @(r0,r1),r1 119 jmp @r1 120 nop 121 122 .align 2 123.L_jump_tbl: 124 .long .L_dest00 125 .long .L_dest01 126 .long .L_dest10 127 .long .L_dest11 128 129/* 130 * Come here if there are less than 12 bytes to copy 131 * 132 * Keep the branch target close, so the bf/s callee doesn't overflow 133 * and result in a more expensive branch being inserted. This is the 134 * fast-path for small copies, the jump via the jump table will hit the 135 * default slow-path cleanup. -PFM. 136 */ 137.L_cleanup_loop_no_pop: 138 tst r6,r6 ! Check explicitly for zero 139 bt 1f 140 1412: 142EX_NO_POP( mov.b @r5+,r0 ) 143 dt r6 144EX_NO_POP( mov.b r0,@r4 ) 145 bf/s 2b 146 add #1,r4 147 1481: mov #0,r0 ! normal return 1495000: 150 151# Exception handler: 152.section .fixup, "ax" 1536005: 154 mov.l 8000f,r1 155 mov r3,r0 156 jmp @r1 157 sub r4,r0 158 .align 2 1598000: .long 5000b 160 161.previous 162 rts 163 nop 164 165! Destination = 00 166 167.L_dest00: 168 ! Skip the large copy for small transfers 169 mov #(32+32-4), r0 170 cmp/gt r6, r0 ! r0 (60) > r6 (len) 171 bt 1f 172 173 ! Align dest to a 32 byte boundary 174 neg r4,r0 175 add #0x20, r0 176 and #0x1f, r0 177 tst r0, r0 178 bt 2f 179 180 sub r0, r6 181 shlr2 r0 1823: 183EX( mov.l @r5+,r1 ) 184 dt r0 185EX( mov.l r1,@r4 ) 186 bf/s 3b 187 add #4,r4 188 1892: 190EX( mov.l @r5+,r0 ) 191EX( mov.l @r5+,r1 ) 192EX( mov.l @r5+,r2 ) 193EX( mov.l @r5+,r7 ) 194EX( mov.l @r5+,r8 ) 195EX( mov.l @r5+,r9 ) 196EX( mov.l @r5+,r10 ) 197EX( mov.l @r5+,r11 ) 198#ifdef CONFIG_CPU_SH4 199EX( movca.l r0,@r4 ) 200#else 201EX( mov.l r0,@r4 ) 202#endif 203 add #-32, r6 204EX( mov.l r1,@(4,r4) ) 205 mov #32, r0 206EX( mov.l r2,@(8,r4) ) 207 cmp/gt r6, r0 ! r0 (32) > r6 (len) 208EX( mov.l r7,@(12,r4) ) 209EX( mov.l r8,@(16,r4) ) 210EX( mov.l r9,@(20,r4) ) 211EX( mov.l r10,@(24,r4) ) 212EX( mov.l r11,@(28,r4) ) 213 bf/s 2b 214 add #32,r4 215 2161: mov r6, r0 217 shlr2 r0 218 tst r0, r0 219 bt .L_cleanup 2201: 221EX( mov.l @r5+,r1 ) 222 dt r0 223EX( mov.l r1,@r4 ) 224 bf/s 1b 225 add #4,r4 226 227 bra .L_cleanup 228 nop 229 230! Destination = 10 231 232.L_dest10: 233 mov r2,r7 234 shlr2 r7 235 shlr r7 236 tst r7,r7 237 mov #7,r0 238 bt/s 1f 239 and r0,r2 2402: 241 dt r7 242#ifdef CONFIG_CPU_LITTLE_ENDIAN 243EX( mov.l @r5+,r0 ) 244EX( mov.l @r5+,r1 ) 245EX( mov.l @r5+,r8 ) 246EX( mov.l @r5+,r9 ) 247EX( mov.l @r5+,r10 ) 248EX( mov.w r0,@r4 ) 249 add #2,r4 250 xtrct r1,r0 251 xtrct r8,r1 252 xtrct r9,r8 253 xtrct r10,r9 254 255EX( mov.l r0,@r4 ) 256EX( mov.l r1,@(4,r4) ) 257EX( mov.l r8,@(8,r4) ) 258EX( mov.l r9,@(12,r4) ) 259 260EX( mov.l @r5+,r1 ) 261EX( mov.l @r5+,r8 ) 262EX( mov.l @r5+,r0 ) 263 xtrct r1,r10 264 xtrct r8,r1 265 xtrct r0,r8 266 shlr16 r0 267EX( mov.l r10,@(16,r4) ) 268EX( mov.l r1,@(20,r4) ) 269EX( mov.l r8,@(24,r4) ) 270EX( mov.w r0,@(28,r4) ) 271 bf/s 2b 272 add #30,r4 273#else 274EX( mov.l @(28,r5),r0 ) 275EX( mov.l @(24,r5),r8 ) 276EX( mov.l @(20,r5),r9 ) 277EX( mov.l @(16,r5),r10 ) 278EX( mov.w r0,@(30,r4) ) 279 add #-2,r4 280 xtrct r8,r0 281 xtrct r9,r8 282 xtrct r10,r9 283EX( mov.l r0,@(28,r4) ) 284EX( mov.l r8,@(24,r4) ) 285EX( mov.l r9,@(20,r4) ) 286 287EX( mov.l @(12,r5),r0 ) 288EX( mov.l @(8,r5),r8 ) 289 xtrct r0,r10 290EX( mov.l @(4,r5),r9 ) 291 mov.l r10,@(16,r4) 292EX( mov.l @r5,r10 ) 293 xtrct r8,r0 294 xtrct r9,r8 295 xtrct r10,r9 296EX( mov.l r0,@(12,r4) ) 297EX( mov.l r8,@(8,r4) ) 298 swap.w r10,r0 299EX( mov.l r9,@(4,r4) ) 300EX( mov.w r0,@(2,r4) ) 301 302 add #32,r5 303 bf/s 2b 304 add #34,r4 305#endif 306 tst r2,r2 307 bt .L_cleanup 308 3091: ! Read longword, write two words per iteration 310EX( mov.l @r5+,r0 ) 311 dt r2 312#ifdef CONFIG_CPU_LITTLE_ENDIAN 313EX( mov.w r0,@r4 ) 314 shlr16 r0 315EX( mov.w r0,@(2,r4) ) 316#else 317EX( mov.w r0,@(2,r4) ) 318 shlr16 r0 319EX( mov.w r0,@r4 ) 320#endif 321 bf/s 1b 322 add #4,r4 323 324 bra .L_cleanup 325 nop 326 327! Destination = 01 or 11 328 329.L_dest01: 330.L_dest11: 331 ! Read longword, write byte, word, byte per iteration 332EX( mov.l @r5+,r0 ) 333 dt r2 334#ifdef CONFIG_CPU_LITTLE_ENDIAN 335EX( mov.b r0,@r4 ) 336 shlr8 r0 337 add #1,r4 338EX( mov.w r0,@r4 ) 339 shlr16 r0 340EX( mov.b r0,@(2,r4) ) 341 bf/s .L_dest01 342 add #3,r4 343#else 344EX( mov.b r0,@(3,r4) ) 345 shlr8 r0 346 swap.w r0,r7 347EX( mov.b r7,@r4 ) 348 add #1,r4 349EX( mov.w r0,@r4 ) 350 bf/s .L_dest01 351 add #3,r4 352#endif 353 354! Cleanup last few bytes 355.L_cleanup: 356 mov r6,r0 357 and #3,r0 358 tst r0,r0 359 bt .L_exit 360 mov r0,r6 361 362.L_cleanup_loop: 363EX( mov.b @r5+,r0 ) 364 dt r6 365EX( mov.b r0,@r4 ) 366 bf/s .L_cleanup_loop 367 add #1,r4 368 369.L_exit: 370 mov #0,r0 ! normal return 371 3725000: 373 374# Exception handler: 375.section .fixup, "ax" 3766000: 377 mov.l 8000f,r1 378 mov r3,r0 379 jmp @r1 380 sub r4,r0 381 .align 2 3828000: .long 5000b 383 384.previous 385 mov.l @r15+,r8 386 mov.l @r15+,r9 387 mov.l @r15+,r10 388 rts 389 mov.l @r15+,r11 390