1/* 2 * copy_page, __copy_user_page, __copy_user implementation of SuperH 3 * 4 * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima 5 * Copyright (C) 2002 Toshinobu Sugioka 6 * Copyright (C) 2006 Paul Mundt 7 */ 8#include <linux/linkage.h> 9#include <asm/page.h> 10 11/* 12 * copy_page 13 * @to: P1 address 14 * @from: P1 address 15 * 16 * void copy_page(void *to, void *from) 17 */ 18 19/* 20 * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch 21 * r8 --- from + PAGE_SIZE 22 * r9 --- not used 23 * r10 --- to 24 * r11 --- from 25 */ 26ENTRY(copy_page) 27 mov.l r8,@-r15 28 mov.l r10,@-r15 29 mov.l r11,@-r15 30 mov r4,r10 31 mov r5,r11 32 mov r5,r8 33 mov.l .Lpsz,r0 34 add r0,r8 35 ! 361: mov.l @r11+,r0 37 mov.l @r11+,r1 38 mov.l @r11+,r2 39 mov.l @r11+,r3 40 mov.l @r11+,r4 41 mov.l @r11+,r5 42 mov.l @r11+,r6 43 mov.l @r11+,r7 44#if defined(CONFIG_CPU_SH4) 45 movca.l r0,@r10 46 mov r10,r0 47#else 48 mov.l r0,@r10 49#endif 50 add #32,r10 51 mov.l r7,@-r10 52 mov.l r6,@-r10 53 mov.l r5,@-r10 54 mov.l r4,@-r10 55 mov.l r3,@-r10 56 mov.l r2,@-r10 57 mov.l r1,@-r10 58#if defined(CONFIG_CPU_SH4) 59 ocbwb @r0 60#endif 61 cmp/eq r11,r8 62 bf/s 1b 63 add #28,r10 64 ! 65 mov.l @r15+,r11 66 mov.l @r15+,r10 67 mov.l @r15+,r8 68 rts 69 nop 70 71 .balign 4 72.Lpsz: .long PAGE_SIZE 73 74/* 75 * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n); 76 * Return the number of bytes NOT copied 77 */ 78#define EX(...) \ 79 9999: __VA_ARGS__ ; \ 80 .section __ex_table, "a"; \ 81 .long 9999b, 6000f ; \ 82 .previous 83#define EX_NO_POP(...) \ 84 9999: __VA_ARGS__ ; \ 85 .section __ex_table, "a"; \ 86 .long 9999b, 6005f ; \ 87 .previous 88ENTRY(__copy_user) 89 ! Check if small number of bytes 90 mov #11,r0 91 mov r4,r3 92 cmp/gt r0,r6 ! r6 (len) > r0 (11) 93 bf/s .L_cleanup_loop_no_pop 94 add r6,r3 ! last destination address 95 96 ! Calculate bytes needed to align to src 97 mov.l r11,@-r15 98 neg r5,r0 99 mov.l r10,@-r15 100 add #4,r0 101 mov.l r9,@-r15 102 and #3,r0 103 mov.l r8,@-r15 104 tst r0,r0 105 bt 2f 106 1071: 108 ! Copy bytes to long word align src 109EX( mov.b @r5+,r1 ) 110 dt r0 111 add #-1,r6 112EX( mov.b r1,@r4 ) 113 bf/s 1b 114 add #1,r4 115 116 ! Jump to appropriate routine depending on dest 1172: mov #3,r1 118 mov r6, r2 119 and r4,r1 120 shlr2 r2 121 shll2 r1 122 mova .L_jump_tbl,r0 123 mov.l @(r0,r1),r1 124 jmp @r1 125 nop 126 127 .align 2 128.L_jump_tbl: 129 .long .L_dest00 130 .long .L_dest01 131 .long .L_dest10 132 .long .L_dest11 133 134/* 135 * Come here if there are less than 12 bytes to copy 136 * 137 * Keep the branch target close, so the bf/s callee doesn't overflow 138 * and result in a more expensive branch being inserted. This is the 139 * fast-path for small copies, the jump via the jump table will hit the 140 * default slow-path cleanup. -PFM. 141 */ 142.L_cleanup_loop_no_pop: 143 tst r6,r6 ! Check explicitly for zero 144 bt 1f 145 1462: 147EX_NO_POP( mov.b @r5+,r0 ) 148 dt r6 149EX_NO_POP( mov.b r0,@r4 ) 150 bf/s 2b 151 add #1,r4 152 1531: mov #0,r0 ! normal return 1545000: 155 156# Exception handler: 157.section .fixup, "ax" 1586005: 159 mov.l 8000f,r1 160 mov r3,r0 161 jmp @r1 162 sub r4,r0 163 .align 2 1648000: .long 5000b 165 166.previous 167 rts 168 nop 169 170! Destination = 00 171 172.L_dest00: 173 ! Skip the large copy for small transfers 174 mov #(32+32-4), r0 175 cmp/gt r6, r0 ! r0 (60) > r6 (len) 176 bt 1f 177 178 ! Align dest to a 32 byte boundary 179 neg r4,r0 180 add #0x20, r0 181 and #0x1f, r0 182 tst r0, r0 183 bt 2f 184 185 sub r0, r6 186 shlr2 r0 1873: 188EX( mov.l @r5+,r1 ) 189 dt r0 190EX( mov.l r1,@r4 ) 191 bf/s 3b 192 add #4,r4 193 1942: 195EX( mov.l @r5+,r0 ) 196EX( mov.l @r5+,r1 ) 197EX( mov.l @r5+,r2 ) 198EX( mov.l @r5+,r7 ) 199EX( mov.l @r5+,r8 ) 200EX( mov.l @r5+,r9 ) 201EX( mov.l @r5+,r10 ) 202EX( mov.l @r5+,r11 ) 203#ifdef CONFIG_CPU_SH4 204EX( movca.l r0,@r4 ) 205#else 206EX( mov.l r0,@r4 ) 207#endif 208 add #-32, r6 209EX( mov.l r1,@(4,r4) ) 210 mov #32, r0 211EX( mov.l r2,@(8,r4) ) 212 cmp/gt r6, r0 ! r0 (32) > r6 (len) 213EX( mov.l r7,@(12,r4) ) 214EX( mov.l r8,@(16,r4) ) 215EX( mov.l r9,@(20,r4) ) 216EX( mov.l r10,@(24,r4) ) 217EX( mov.l r11,@(28,r4) ) 218 bf/s 2b 219 add #32,r4 220 2211: mov r6, r0 222 shlr2 r0 223 tst r0, r0 224 bt .L_cleanup 2251: 226EX( mov.l @r5+,r1 ) 227 dt r0 228EX( mov.l r1,@r4 ) 229 bf/s 1b 230 add #4,r4 231 232 bra .L_cleanup 233 nop 234 235! Destination = 10 236 237.L_dest10: 238 mov r2,r7 239 shlr2 r7 240 shlr r7 241 tst r7,r7 242 mov #7,r0 243 bt/s 1f 244 and r0,r2 2452: 246 dt r7 247#ifdef CONFIG_CPU_LITTLE_ENDIAN 248EX( mov.l @r5+,r0 ) 249EX( mov.l @r5+,r1 ) 250EX( mov.l @r5+,r8 ) 251EX( mov.l @r5+,r9 ) 252EX( mov.l @r5+,r10 ) 253EX( mov.w r0,@r4 ) 254 add #2,r4 255 xtrct r1,r0 256 xtrct r8,r1 257 xtrct r9,r8 258 xtrct r10,r9 259 260EX( mov.l r0,@r4 ) 261EX( mov.l r1,@(4,r4) ) 262EX( mov.l r8,@(8,r4) ) 263EX( mov.l r9,@(12,r4) ) 264 265EX( mov.l @r5+,r1 ) 266EX( mov.l @r5+,r8 ) 267EX( mov.l @r5+,r0 ) 268 xtrct r1,r10 269 xtrct r8,r1 270 xtrct r0,r8 271 shlr16 r0 272EX( mov.l r10,@(16,r4) ) 273EX( mov.l r1,@(20,r4) ) 274EX( mov.l r8,@(24,r4) ) 275EX( mov.w r0,@(28,r4) ) 276 bf/s 2b 277 add #30,r4 278#else 279EX( mov.l @(28,r5),r0 ) 280EX( mov.l @(24,r5),r8 ) 281EX( mov.l @(20,r5),r9 ) 282EX( mov.l @(16,r5),r10 ) 283EX( mov.w r0,@(30,r4) ) 284 add #-2,r4 285 xtrct r8,r0 286 xtrct r9,r8 287 xtrct r10,r9 288EX( mov.l r0,@(28,r4) ) 289EX( mov.l r8,@(24,r4) ) 290EX( mov.l r9,@(20,r4) ) 291 292EX( mov.l @(12,r5),r0 ) 293EX( mov.l @(8,r5),r8 ) 294 xtrct r0,r10 295EX( mov.l @(4,r5),r9 ) 296 mov.l r10,@(16,r4) 297EX( mov.l @r5,r10 ) 298 xtrct r8,r0 299 xtrct r9,r8 300 xtrct r10,r9 301EX( mov.l r0,@(12,r4) ) 302EX( mov.l r8,@(8,r4) ) 303 swap.w r10,r0 304EX( mov.l r9,@(4,r4) ) 305EX( mov.w r0,@(2,r4) ) 306 307 add #32,r5 308 bf/s 2b 309 add #34,r4 310#endif 311 tst r2,r2 312 bt .L_cleanup 313 3141: ! Read longword, write two words per iteration 315EX( mov.l @r5+,r0 ) 316 dt r2 317#ifdef CONFIG_CPU_LITTLE_ENDIAN 318EX( mov.w r0,@r4 ) 319 shlr16 r0 320EX( mov.w r0,@(2,r4) ) 321#else 322EX( mov.w r0,@(2,r4) ) 323 shlr16 r0 324EX( mov.w r0,@r4 ) 325#endif 326 bf/s 1b 327 add #4,r4 328 329 bra .L_cleanup 330 nop 331 332! Destination = 01 or 11 333 334.L_dest01: 335.L_dest11: 336 ! Read longword, write byte, word, byte per iteration 337EX( mov.l @r5+,r0 ) 338 dt r2 339#ifdef CONFIG_CPU_LITTLE_ENDIAN 340EX( mov.b r0,@r4 ) 341 shlr8 r0 342 add #1,r4 343EX( mov.w r0,@r4 ) 344 shlr16 r0 345EX( mov.b r0,@(2,r4) ) 346 bf/s .L_dest01 347 add #3,r4 348#else 349EX( mov.b r0,@(3,r4) ) 350 shlr8 r0 351 swap.w r0,r7 352EX( mov.b r7,@r4 ) 353 add #1,r4 354EX( mov.w r0,@r4 ) 355 bf/s .L_dest01 356 add #3,r4 357#endif 358 359! Cleanup last few bytes 360.L_cleanup: 361 mov r6,r0 362 and #3,r0 363 tst r0,r0 364 bt .L_exit 365 mov r0,r6 366 367.L_cleanup_loop: 368EX( mov.b @r5+,r0 ) 369 dt r6 370EX( mov.b r0,@r4 ) 371 bf/s .L_cleanup_loop 372 add #1,r4 373 374.L_exit: 375 mov #0,r0 ! normal return 376 3775000: 378 379# Exception handler: 380.section .fixup, "ax" 3816000: 382 mov.l 8000f,r1 383 mov r3,r0 384 jmp @r1 385 sub r4,r0 386 .align 2 3878000: .long 5000b 388 389.previous 390 mov.l @r15+,r8 391 mov.l @r15+,r9 392 mov.l @r15+,r10 393 rts 394 mov.l @r15+,r11 395