1/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code. 2 * 3 * Copyright(C) 1995 Linus Torvalds 4 * Copyright(C) 1996 David S. Miller 5 * Copyright(C) 1996 Eddie C. Dost 6 * Copyright(C) 1996,1998 Jakub Jelinek 7 * 8 * derived from: 9 * e-mail between David and Eddie. 10 * 11 * Returns 0 if successful, otherwise count of bytes not copied yet 12 */ 13 14#include <asm/ptrace.h> 15#include <asm/asmmacro.h> 16#include <asm/page.h> 17#include <asm/thread_info.h> 18#include <asm/export.h> 19 20/* Work around cpp -rob */ 21#define ALLOC #alloc 22#define EXECINSTR #execinstr 23#define EX(x,y,a,b) \ 2498: x,y; \ 25 .section .fixup,ALLOC,EXECINSTR; \ 26 .align 4; \ 2799: ba fixupretl; \ 28 a, b, %g3; \ 29 .section __ex_table,ALLOC; \ 30 .align 4; \ 31 .word 98b, 99b; \ 32 .text; \ 33 .align 4 34 35#define EX2(x,y,c,d,e,a,b) \ 3698: x,y; \ 37 .section .fixup,ALLOC,EXECINSTR; \ 38 .align 4; \ 3999: c, d, e; \ 40 ba fixupretl; \ 41 a, b, %g3; \ 42 .section __ex_table,ALLOC; \ 43 .align 4; \ 44 .word 98b, 99b; \ 45 .text; \ 46 .align 4 47 48#define EXO2(x,y) \ 4998: x, y; \ 50 .section __ex_table,ALLOC; \ 51 .align 4; \ 52 .word 98b, 97f; \ 53 .text; \ 54 .align 4 55 56#define EXT(start,end,handler) \ 57 .section __ex_table,ALLOC; \ 58 .align 4; \ 59 .word start, 0, end, handler; \ 60 .text; \ 61 .align 4 62 63/* Please do not change following macros unless you change logic used 64 * in .fixup at the end of this file as well 65 */ 66 67/* Both these macros have to start with exactly the same insn */ 68#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 69 ldd [%src + (offset) + 0x00], %t0; \ 70 ldd [%src + (offset) + 0x08], %t2; \ 71 ldd [%src + (offset) + 0x10], %t4; \ 72 ldd [%src + (offset) + 0x18], %t6; \ 73 st %t0, [%dst + (offset) + 0x00]; \ 74 st %t1, [%dst + (offset) + 0x04]; \ 75 st %t2, [%dst + (offset) + 0x08]; \ 76 st %t3, [%dst + (offset) + 0x0c]; \ 77 st %t4, [%dst + (offset) + 0x10]; \ 78 st %t5, [%dst + (offset) + 0x14]; \ 79 st %t6, [%dst + (offset) + 0x18]; \ 80 st %t7, [%dst + (offset) + 0x1c]; 81 82#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 83 ldd [%src + (offset) + 0x00], %t0; \ 84 ldd [%src + (offset) + 0x08], %t2; \ 85 ldd [%src + (offset) + 0x10], %t4; \ 86 ldd [%src + (offset) + 0x18], %t6; \ 87 std %t0, [%dst + (offset) + 0x00]; \ 88 std %t2, [%dst + (offset) + 0x08]; \ 89 std %t4, [%dst + (offset) + 0x10]; \ 90 std %t6, [%dst + (offset) + 0x18]; 91 92#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ 93 ldd [%src - (offset) - 0x10], %t0; \ 94 ldd [%src - (offset) - 0x08], %t2; \ 95 st %t0, [%dst - (offset) - 0x10]; \ 96 st %t1, [%dst - (offset) - 0x0c]; \ 97 st %t2, [%dst - (offset) - 0x08]; \ 98 st %t3, [%dst - (offset) - 0x04]; 99 100#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \ 101 lduh [%src + (offset) + 0x00], %t0; \ 102 lduh [%src + (offset) + 0x02], %t1; \ 103 lduh [%src + (offset) + 0x04], %t2; \ 104 lduh [%src + (offset) + 0x06], %t3; \ 105 sth %t0, [%dst + (offset) + 0x00]; \ 106 sth %t1, [%dst + (offset) + 0x02]; \ 107 sth %t2, [%dst + (offset) + 0x04]; \ 108 sth %t3, [%dst + (offset) + 0x06]; 109 110#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ 111 ldub [%src - (offset) - 0x02], %t0; \ 112 ldub [%src - (offset) - 0x01], %t1; \ 113 stb %t0, [%dst - (offset) - 0x02]; \ 114 stb %t1, [%dst - (offset) - 0x01]; 115 116 .text 117 .align 4 118 119 .globl __copy_user_begin 120__copy_user_begin: 121 122 .globl __copy_user 123 EXPORT_SYMBOL(__copy_user) 124dword_align: 125 andcc %o1, 1, %g0 126 be 4f 127 andcc %o1, 2, %g0 128 129 EXO2(ldub [%o1], %g2) 130 add %o1, 1, %o1 131 EXO2(stb %g2, [%o0]) 132 sub %o2, 1, %o2 133 bne 3f 134 add %o0, 1, %o0 135 136 EXO2(lduh [%o1], %g2) 137 add %o1, 2, %o1 138 EXO2(sth %g2, [%o0]) 139 sub %o2, 2, %o2 140 b 3f 141 add %o0, 2, %o0 1424: 143 EXO2(lduh [%o1], %g2) 144 add %o1, 2, %o1 145 EXO2(sth %g2, [%o0]) 146 sub %o2, 2, %o2 147 b 3f 148 add %o0, 2, %o0 149 150__copy_user: /* %o0=dst %o1=src %o2=len */ 151 xor %o0, %o1, %o4 1521: 153 andcc %o4, 3, %o5 1542: 155 bne cannot_optimize 156 cmp %o2, 15 157 158 bleu short_aligned_end 159 andcc %o1, 3, %g0 160 161 bne dword_align 1623: 163 andcc %o1, 4, %g0 164 165 be 2f 166 mov %o2, %g1 167 168 EXO2(ld [%o1], %o4) 169 sub %g1, 4, %g1 170 EXO2(st %o4, [%o0]) 171 add %o1, 4, %o1 172 add %o0, 4, %o0 1732: 174 andcc %g1, 0xffffff80, %g7 175 be 3f 176 andcc %o0, 4, %g0 177 178 be ldd_std + 4 1795: 180 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 181 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 182 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 183 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 18480: 185 EXT(5b, 80b, 50f) 186 subcc %g7, 128, %g7 187 add %o1, 128, %o1 188 bne 5b 189 add %o0, 128, %o0 1903: 191 andcc %g1, 0x70, %g7 192 be copy_user_table_end 193 andcc %g1, 8, %g0 194 195 sethi %hi(copy_user_table_end), %o5 196 srl %g7, 1, %o4 197 add %g7, %o4, %o4 198 add %o1, %g7, %o1 199 sub %o5, %o4, %o5 200 jmpl %o5 + %lo(copy_user_table_end), %g0 201 add %o0, %g7, %o0 202 203copy_user_table: 204 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) 205 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) 206 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) 207 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) 208 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) 209 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) 210 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 211copy_user_table_end: 212 EXT(copy_user_table, copy_user_table_end, 51f) 213 be copy_user_last7 214 andcc %g1, 4, %g0 215 216 EX(ldd [%o1], %g2, and %g1, 0xf) 217 add %o0, 8, %o0 218 add %o1, 8, %o1 219 EX(st %g2, [%o0 - 0x08], and %g1, 0xf) 220 EX2(st %g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4) 221copy_user_last7: 222 be 1f 223 andcc %g1, 2, %g0 224 225 EX(ld [%o1], %g2, and %g1, 7) 226 add %o1, 4, %o1 227 EX(st %g2, [%o0], and %g1, 7) 228 add %o0, 4, %o0 2291: 230 be 1f 231 andcc %g1, 1, %g0 232 233 EX(lduh [%o1], %g2, and %g1, 3) 234 add %o1, 2, %o1 235 EX(sth %g2, [%o0], and %g1, 3) 236 add %o0, 2, %o0 2371: 238 be 1f 239 nop 240 241 EX(ldub [%o1], %g2, add %g0, 1) 242 EX(stb %g2, [%o0], add %g0, 1) 2431: 244 retl 245 clr %o0 246 247ldd_std: 248 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 249 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 250 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 251 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 25281: 253 EXT(ldd_std, 81b, 52f) 254 subcc %g7, 128, %g7 255 add %o1, 128, %o1 256 bne ldd_std 257 add %o0, 128, %o0 258 259 andcc %g1, 0x70, %g7 260 be copy_user_table_end 261 andcc %g1, 8, %g0 262 263 sethi %hi(copy_user_table_end), %o5 264 srl %g7, 1, %o4 265 add %g7, %o4, %o4 266 add %o1, %g7, %o1 267 sub %o5, %o4, %o5 268 jmpl %o5 + %lo(copy_user_table_end), %g0 269 add %o0, %g7, %o0 270 271cannot_optimize: 272 bleu short_end 273 cmp %o5, 2 274 275 bne byte_chunk 276 and %o2, 0xfffffff0, %o3 277 278 andcc %o1, 1, %g0 279 be 10f 280 nop 281 282 EXO2(ldub [%o1], %g2) 283 add %o1, 1, %o1 284 EXO2(stb %g2, [%o0]) 285 sub %o2, 1, %o2 286 andcc %o2, 0xfffffff0, %o3 287 be short_end 288 add %o0, 1, %o0 28910: 290 MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 291 MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5) 29282: 293 EXT(10b, 82b, 53f) 294 subcc %o3, 0x10, %o3 295 add %o1, 0x10, %o1 296 bne 10b 297 add %o0, 0x10, %o0 298 b 2f 299 and %o2, 0xe, %o3 300 301byte_chunk: 302 MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3) 303 MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3) 304 MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3) 305 MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3) 306 MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3) 307 MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3) 308 MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3) 309 MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3) 31083: 311 EXT(byte_chunk, 83b, 54f) 312 subcc %o3, 0x10, %o3 313 add %o1, 0x10, %o1 314 bne byte_chunk 315 add %o0, 0x10, %o0 316 317short_end: 318 and %o2, 0xe, %o3 3192: 320 sethi %hi(short_table_end), %o5 321 sll %o3, 3, %o4 322 add %o0, %o3, %o0 323 sub %o5, %o4, %o5 324 add %o1, %o3, %o1 325 jmpl %o5 + %lo(short_table_end), %g0 326 andcc %o2, 1, %g0 32784: 328 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) 329 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) 330 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) 331 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) 332 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) 333 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) 334 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) 335short_table_end: 336 EXT(84b, short_table_end, 55f) 337 be 1f 338 nop 339 EX(ldub [%o1], %g2, add %g0, 1) 340 EX(stb %g2, [%o0], add %g0, 1) 3411: 342 retl 343 clr %o0 344 345short_aligned_end: 346 bne short_end 347 andcc %o2, 8, %g0 348 349 be 1f 350 andcc %o2, 4, %g0 351 352 EXO2(ld [%o1 + 0x00], %g2) 353 EXO2(ld [%o1 + 0x04], %g3) 354 add %o1, 8, %o1 355 EXO2(st %g2, [%o0 + 0x00]) 356 EX(st %g3, [%o0 + 0x04], sub %o2, 4) 357 add %o0, 8, %o0 3581: 359 b copy_user_last7 360 mov %o2, %g1 361 362 .section .fixup,#alloc,#execinstr 363 .align 4 36497: 365 mov %o2, %g3 366fixupretl: 367 sethi %hi(PAGE_OFFSET), %g1 368 cmp %o0, %g1 369 blu 1f 370 cmp %o1, %g1 371 bgeu 1f 372 ld [%g6 + TI_PREEMPT], %g1 373 cmp %g1, 0 374 bne 1f 375 nop 376 save %sp, -64, %sp 377 mov %i0, %o0 378 call __bzero 379 mov %g3, %o1 380 restore 3811: retl 382 mov %g3, %o0 383 384/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */ 38550: 386/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK 387 * happens. This is derived from the amount ldd reads, st stores, etc. 388 * x = g2 % 12; 389 * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4); 390 * o0 += (g2 / 12) * 32; 391 */ 392 cmp %g2, 12 393 add %o0, %g7, %o0 394 bcs 1f 395 cmp %g2, 24 396 bcs 2f 397 cmp %g2, 36 398 bcs 3f 399 nop 400 sub %g2, 12, %g2 401 sub %g7, 32, %g7 4023: sub %g2, 12, %g2 403 sub %g7, 32, %g7 4042: sub %g2, 12, %g2 405 sub %g7, 32, %g7 4061: cmp %g2, 4 407 bcs,a 60f 408 clr %g2 409 sub %g2, 4, %g2 410 sll %g2, 2, %g2 41160: and %g1, 0x7f, %g3 412 sub %o0, %g7, %o0 413 add %g3, %g7, %g3 414 ba fixupretl 415 sub %g3, %g2, %g3 41651: 417/* i = 41 - g2; j = i % 6; 418 * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16; 419 * o0 -= (i / 6) * 16 + 16; 420 */ 421 neg %g2 422 and %g1, 0xf, %g1 423 add %g2, 41, %g2 424 add %o0, %g1, %o0 4251: cmp %g2, 6 426 bcs,a 2f 427 cmp %g2, 4 428 add %g1, 16, %g1 429 b 1b 430 sub %g2, 6, %g2 4312: bcc,a 2f 432 mov 16, %g2 433 inc %g2 434 sll %g2, 2, %g2 4352: add %g1, %g2, %g3 436 ba fixupretl 437 sub %o0, %g3, %o0 43852: 439/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0; 440 o0 += (g2 / 8) * 32 */ 441 andn %g2, 7, %g4 442 add %o0, %g7, %o0 443 andcc %g2, 4, %g0 444 and %g2, 3, %g2 445 sll %g4, 2, %g4 446 sll %g2, 3, %g2 447 bne 60b 448 sub %g7, %g4, %g7 449 ba 60b 450 clr %g2 45153: 452/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0; 453 o0 += (g2 & 8) */ 454 and %g2, 3, %g4 455 andcc %g2, 4, %g0 456 and %g2, 8, %g2 457 sll %g4, 1, %g4 458 be 1f 459 add %o0, %g2, %o0 460 add %g2, %g4, %g2 4611: and %o2, 0xf, %g3 462 add %g3, %o3, %g3 463 ba fixupretl 464 sub %g3, %g2, %g3 46554: 466/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0; 467 o0 += (g2 / 4) * 2 */ 468 srl %g2, 2, %o4 469 and %g2, 1, %o5 470 srl %g2, 1, %g2 471 add %o4, %o4, %o4 472 and %o5, %g2, %o5 473 and %o2, 0xf, %o2 474 add %o0, %o4, %o0 475 sub %o3, %o5, %o3 476 sub %o2, %o4, %o2 477 ba fixupretl 478 add %o2, %o3, %g3 47955: 480/* i = 27 - g2; 481 g3 = (o2 & 1) + i / 4 * 2 + !(i & 3); 482 o0 -= i / 4 * 2 + 1 */ 483 neg %g2 484 and %o2, 1, %o2 485 add %g2, 27, %g2 486 srl %g2, 2, %o5 487 andcc %g2, 3, %g0 488 mov 1, %g2 489 add %o5, %o5, %o5 490 be,a 1f 491 clr %g2 4921: add %g2, %o5, %g3 493 sub %o0, %g3, %o0 494 ba fixupretl 495 add %g3, %o2, %g3 496 497 .globl __copy_user_end 498__copy_user_end: 499