1/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code. 2 * 3 * Copyright(C) 1995 Linus Torvalds 4 * Copyright(C) 1996 David S. Miller 5 * Copyright(C) 1996 Eddie C. Dost 6 * Copyright(C) 1996,1998 Jakub Jelinek 7 * 8 * derived from: 9 * e-mail between David and Eddie. 10 * 11 * Returns 0 if successful, otherwise count of bytes not copied yet 12 */ 13 14#include <asm/ptrace.h> 15#include <asm/asmmacro.h> 16#include <asm/page.h> 17#include <asm/thread_info.h> 18#include <asm/export.h> 19 20/* Work around cpp -rob */ 21#define ALLOC #alloc 22#define EXECINSTR #execinstr 23#define EX(x,y,a,b) \ 2498: x,y; \ 25 .section .fixup,ALLOC,EXECINSTR; \ 26 .align 4; \ 2799: ba fixupretl; \ 28 a, b, %g3; \ 29 .section __ex_table,ALLOC; \ 30 .align 4; \ 31 .word 98b, 99b; \ 32 .text; \ 33 .align 4 34 35#define EX2(x,y,c,d,e,a,b) \ 3698: x,y; \ 37 .section .fixup,ALLOC,EXECINSTR; \ 38 .align 4; \ 3999: c, d, e; \ 40 ba fixupretl; \ 41 a, b, %g3; \ 42 .section __ex_table,ALLOC; \ 43 .align 4; \ 44 .word 98b, 99b; \ 45 .text; \ 46 .align 4 47 48#define EXO2(x,y) \ 4998: x, y; \ 50 .section __ex_table,ALLOC; \ 51 .align 4; \ 52 .word 98b, 97f; \ 53 .text; \ 54 .align 4 55 56#define EXT(start,end,handler) \ 57 .section __ex_table,ALLOC; \ 58 .align 4; \ 59 .word start, 0, end, handler; \ 60 .text; \ 61 .align 4 62 63/* Please do not change following macros unless you change logic used 64 * in .fixup at the end of this file as well 65 */ 66 67/* Both these macros have to start with exactly the same insn */ 68#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 69 ldd [%src + (offset) + 0x00], %t0; \ 70 ldd [%src + (offset) + 0x08], %t2; \ 71 ldd [%src + (offset) + 0x10], %t4; \ 72 ldd [%src + (offset) + 0x18], %t6; \ 73 st %t0, [%dst + (offset) + 0x00]; \ 74 st %t1, [%dst + (offset) + 0x04]; \ 75 st %t2, [%dst + (offset) + 0x08]; \ 76 st %t3, [%dst + (offset) + 0x0c]; \ 77 st %t4, [%dst + (offset) + 0x10]; \ 78 st %t5, [%dst + (offset) + 0x14]; \ 79 st %t6, [%dst + (offset) + 0x18]; \ 80 st %t7, [%dst + (offset) + 0x1c]; 81 82#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 83 ldd [%src + (offset) + 0x00], %t0; \ 84 ldd [%src + (offset) + 0x08], %t2; \ 85 ldd [%src + (offset) + 0x10], %t4; \ 86 ldd [%src + (offset) + 0x18], %t6; \ 87 std %t0, [%dst + (offset) + 0x00]; \ 88 std %t2, [%dst + (offset) + 0x08]; \ 89 std %t4, [%dst + (offset) + 0x10]; \ 90 std %t6, [%dst + (offset) + 0x18]; 91 92#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ 93 ldd [%src - (offset) - 0x10], %t0; \ 94 ldd [%src - (offset) - 0x08], %t2; \ 95 st %t0, [%dst - (offset) - 0x10]; \ 96 st %t1, [%dst - (offset) - 0x0c]; \ 97 st %t2, [%dst - (offset) - 0x08]; \ 98 st %t3, [%dst - (offset) - 0x04]; 99 100#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \ 101 lduh [%src + (offset) + 0x00], %t0; \ 102 lduh [%src + (offset) + 0x02], %t1; \ 103 lduh [%src + (offset) + 0x04], %t2; \ 104 lduh [%src + (offset) + 0x06], %t3; \ 105 sth %t0, [%dst + (offset) + 0x00]; \ 106 sth %t1, [%dst + (offset) + 0x02]; \ 107 sth %t2, [%dst + (offset) + 0x04]; \ 108 sth %t3, [%dst + (offset) + 0x06]; 109 110#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ 111 ldub [%src - (offset) - 0x02], %t0; \ 112 ldub [%src - (offset) - 0x01], %t1; \ 113 stb %t0, [%dst - (offset) - 0x02]; \ 114 stb %t1, [%dst - (offset) - 0x01]; 115 116 .text 117 .align 4 118 119 .globl __copy_user_begin 120__copy_user_begin: 121 122 .globl __copy_user 123 EXPORT_SYMBOL(__copy_user) 124dword_align: 125 andcc %o1, 1, %g0 126 be 4f 127 andcc %o1, 2, %g0 128 129 EXO2(ldub [%o1], %g2) 130 add %o1, 1, %o1 131 EXO2(stb %g2, [%o0]) 132 sub %o2, 1, %o2 133 bne 3f 134 add %o0, 1, %o0 135 136 EXO2(lduh [%o1], %g2) 137 add %o1, 2, %o1 138 EXO2(sth %g2, [%o0]) 139 sub %o2, 2, %o2 140 b 3f 141 add %o0, 2, %o0 1424: 143 EXO2(lduh [%o1], %g2) 144 add %o1, 2, %o1 145 EXO2(sth %g2, [%o0]) 146 sub %o2, 2, %o2 147 b 3f 148 add %o0, 2, %o0 149 150__copy_user: /* %o0=dst %o1=src %o2=len */ 151 xor %o0, %o1, %o4 1521: 153 andcc %o4, 3, %o5 1542: 155 bne cannot_optimize 156 cmp %o2, 15 157 158 bleu short_aligned_end 159 andcc %o1, 3, %g0 160 161 bne dword_align 1623: 163 andcc %o1, 4, %g0 164 165 be 2f 166 mov %o2, %g1 167 168 EXO2(ld [%o1], %o4) 169 sub %g1, 4, %g1 170 EXO2(st %o4, [%o0]) 171 add %o1, 4, %o1 172 add %o0, 4, %o0 1732: 174 andcc %g1, 0xffffff80, %g7 175 be 3f 176 andcc %o0, 4, %g0 177 178 be ldd_std + 4 1795: 180 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 181 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 182 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 183 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 18480: 185 EXT(5b, 80b, 50f) 186 subcc %g7, 128, %g7 187 add %o1, 128, %o1 188 bne 5b 189 add %o0, 128, %o0 1903: 191 andcc %g1, 0x70, %g7 192 be copy_user_table_end 193 andcc %g1, 8, %g0 194 195 sethi %hi(copy_user_table_end), %o5 196 srl %g7, 1, %o4 197 add %g7, %o4, %o4 198 add %o1, %g7, %o1 199 sub %o5, %o4, %o5 200 jmpl %o5 + %lo(copy_user_table_end), %g0 201 add %o0, %g7, %o0 202 203copy_user_table: 204 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) 205 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) 206 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) 207 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) 208 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) 209 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) 210 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 211copy_user_table_end: 212 EXT(copy_user_table, copy_user_table_end, 51f) 213 be copy_user_last7 214 andcc %g1, 4, %g0 215 216 EX(ldd [%o1], %g2, and %g1, 0xf) 217 add %o0, 8, %o0 218 add %o1, 8, %o1 219 EX(st %g2, [%o0 - 0x08], and %g1, 0xf) 220 EX2(st %g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4) 221copy_user_last7: 222 be 1f 223 andcc %g1, 2, %g0 224 225 EX(ld [%o1], %g2, and %g1, 7) 226 add %o1, 4, %o1 227 EX(st %g2, [%o0], and %g1, 7) 228 add %o0, 4, %o0 2291: 230 be 1f 231 andcc %g1, 1, %g0 232 233 EX(lduh [%o1], %g2, and %g1, 3) 234 add %o1, 2, %o1 235 EX(sth %g2, [%o0], and %g1, 3) 236 add %o0, 2, %o0 2371: 238 be 1f 239 nop 240 241 EX(ldub [%o1], %g2, add %g0, 1) 242 EX(stb %g2, [%o0], add %g0, 1) 2431: 244 retl 245 clr %o0 246 247ldd_std: 248 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 249 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 250 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 251 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 25281: 253 EXT(ldd_std, 81b, 52f) 254 subcc %g7, 128, %g7 255 add %o1, 128, %o1 256 bne ldd_std 257 add %o0, 128, %o0 258 259 andcc %g1, 0x70, %g7 260 be copy_user_table_end 261 andcc %g1, 8, %g0 262 263 sethi %hi(copy_user_table_end), %o5 264 srl %g7, 1, %o4 265 add %g7, %o4, %o4 266 add %o1, %g7, %o1 267 sub %o5, %o4, %o5 268 jmpl %o5 + %lo(copy_user_table_end), %g0 269 add %o0, %g7, %o0 270 271cannot_optimize: 272 bleu short_end 273 cmp %o5, 2 274 275 bne byte_chunk 276 and %o2, 0xfffffff0, %o3 277 278 andcc %o1, 1, %g0 279 be 10f 280 nop 281 282 EXO2(ldub [%o1], %g2) 283 add %o1, 1, %o1 284 EXO2(stb %g2, [%o0]) 285 sub %o2, 1, %o2 286 andcc %o2, 0xfffffff0, %o3 287 be short_end 288 add %o0, 1, %o0 28910: 290 MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 291 MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5) 29282: 293 EXT(10b, 82b, 53f) 294 subcc %o3, 0x10, %o3 295 add %o1, 0x10, %o1 296 bne 10b 297 add %o0, 0x10, %o0 298 b 2f 299 and %o2, 0xe, %o3 300 301byte_chunk: 302 MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3) 303 MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3) 304 MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3) 305 MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3) 306 MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3) 307 MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3) 308 MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3) 309 MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3) 31083: 311 EXT(byte_chunk, 83b, 54f) 312 subcc %o3, 0x10, %o3 313 add %o1, 0x10, %o1 314 bne byte_chunk 315 add %o0, 0x10, %o0 316 317short_end: 318 and %o2, 0xe, %o3 3192: 320 sethi %hi(short_table_end), %o5 321 sll %o3, 3, %o4 322 add %o0, %o3, %o0 323 sub %o5, %o4, %o5 324 add %o1, %o3, %o1 325 jmpl %o5 + %lo(short_table_end), %g0 326 andcc %o2, 1, %g0 32784: 328 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) 329 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) 330 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) 331 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) 332 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) 333 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) 334 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) 335short_table_end: 336 EXT(84b, short_table_end, 55f) 337 be 1f 338 nop 339 EX(ldub [%o1], %g2, add %g0, 1) 340 EX(stb %g2, [%o0], add %g0, 1) 3411: 342 retl 343 clr %o0 344 345short_aligned_end: 346 bne short_end 347 andcc %o2, 8, %g0 348 349 be 1f 350 andcc %o2, 4, %g0 351 352 EXO2(ld [%o1 + 0x00], %g2) 353 EXO2(ld [%o1 + 0x04], %g3) 354 add %o1, 8, %o1 355 EXO2(st %g2, [%o0 + 0x00]) 356 EX(st %g3, [%o0 + 0x04], sub %o2, 4) 357 add %o0, 8, %o0 3581: 359 b copy_user_last7 360 mov %o2, %g1 361 362 .section .fixup,#alloc,#execinstr 363 .align 4 36497: 365 mov %o2, %g3 366fixupretl: 367 retl 368 mov %g3, %o0 369 370/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */ 37150: 372/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK 373 * happens. This is derived from the amount ldd reads, st stores, etc. 374 * x = g2 % 12; 375 * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4); 376 * o0 += (g2 / 12) * 32; 377 */ 378 cmp %g2, 12 379 add %o0, %g7, %o0 380 bcs 1f 381 cmp %g2, 24 382 bcs 2f 383 cmp %g2, 36 384 bcs 3f 385 nop 386 sub %g2, 12, %g2 387 sub %g7, 32, %g7 3883: sub %g2, 12, %g2 389 sub %g7, 32, %g7 3902: sub %g2, 12, %g2 391 sub %g7, 32, %g7 3921: cmp %g2, 4 393 bcs,a 60f 394 clr %g2 395 sub %g2, 4, %g2 396 sll %g2, 2, %g2 39760: and %g1, 0x7f, %g3 398 sub %o0, %g7, %o0 399 add %g3, %g7, %g3 400 ba fixupretl 401 sub %g3, %g2, %g3 40251: 403/* i = 41 - g2; j = i % 6; 404 * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16; 405 * o0 -= (i / 6) * 16 + 16; 406 */ 407 neg %g2 408 and %g1, 0xf, %g1 409 add %g2, 41, %g2 410 add %o0, %g1, %o0 4111: cmp %g2, 6 412 bcs,a 2f 413 cmp %g2, 4 414 add %g1, 16, %g1 415 b 1b 416 sub %g2, 6, %g2 4172: bcc,a 2f 418 mov 16, %g2 419 inc %g2 420 sll %g2, 2, %g2 4212: add %g1, %g2, %g3 422 ba fixupretl 423 sub %o0, %g3, %o0 42452: 425/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0; 426 o0 += (g2 / 8) * 32 */ 427 andn %g2, 7, %g4 428 add %o0, %g7, %o0 429 andcc %g2, 4, %g0 430 and %g2, 3, %g2 431 sll %g4, 2, %g4 432 sll %g2, 3, %g2 433 bne 60b 434 sub %g7, %g4, %g7 435 ba 60b 436 clr %g2 43753: 438/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0; 439 o0 += (g2 & 8) */ 440 and %g2, 3, %g4 441 andcc %g2, 4, %g0 442 and %g2, 8, %g2 443 sll %g4, 1, %g4 444 be 1f 445 add %o0, %g2, %o0 446 add %g2, %g4, %g2 4471: and %o2, 0xf, %g3 448 add %g3, %o3, %g3 449 ba fixupretl 450 sub %g3, %g2, %g3 45154: 452/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0; 453 o0 += (g2 / 4) * 2 */ 454 srl %g2, 2, %o4 455 and %g2, 1, %o5 456 srl %g2, 1, %g2 457 add %o4, %o4, %o4 458 and %o5, %g2, %o5 459 and %o2, 0xf, %o2 460 add %o0, %o4, %o0 461 sub %o3, %o5, %o3 462 sub %o2, %o4, %o2 463 ba fixupretl 464 add %o2, %o3, %g3 46555: 466/* i = 27 - g2; 467 g3 = (o2 & 1) + i / 4 * 2 + !(i & 3); 468 o0 -= i / 4 * 2 + 1 */ 469 neg %g2 470 and %o2, 1, %o2 471 add %g2, 27, %g2 472 srl %g2, 2, %o5 473 andcc %g2, 3, %g0 474 mov 1, %g2 475 add %o5, %o5, %o5 476 be,a 1f 477 clr %g2 4781: add %g2, %o5, %g3 479 sub %o0, %g3, %o0 480 ba fixupretl 481 add %g3, %o2, %g3 482 483 .globl __copy_user_end 484__copy_user_end: 485