1/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code. 2 * 3 * Copyright(C) 1995 Linus Torvalds 4 * Copyright(C) 1996 David S. Miller 5 * Copyright(C) 1996 Eddie C. Dost 6 * Copyright(C) 1996,1998 Jakub Jelinek 7 * 8 * derived from: 9 * e-mail between David and Eddie. 10 * 11 * Returns 0 if successful, otherwise count of bytes not copied yet 12 */ 13 14#include <asm/ptrace.h> 15#include <asm/asmmacro.h> 16#include <asm/page.h> 17#include <asm/thread_info.h> 18 19/* Work around cpp -rob */ 20#define ALLOC #alloc 21#define EXECINSTR #execinstr 22#define EX(x,y,a,b) \ 2398: x,y; \ 24 .section .fixup,ALLOC,EXECINSTR; \ 25 .align 4; \ 2699: ba fixupretl; \ 27 a, b, %g3; \ 28 .section __ex_table,ALLOC; \ 29 .align 4; \ 30 .word 98b, 99b; \ 31 .text; \ 32 .align 4 33 34#define EX2(x,y,c,d,e,a,b) \ 3598: x,y; \ 36 .section .fixup,ALLOC,EXECINSTR; \ 37 .align 4; \ 3899: c, d, e; \ 39 ba fixupretl; \ 40 a, b, %g3; \ 41 .section __ex_table,ALLOC; \ 42 .align 4; \ 43 .word 98b, 99b; \ 44 .text; \ 45 .align 4 46 47#define EXO2(x,y) \ 4898: x, y; \ 49 .section __ex_table,ALLOC; \ 50 .align 4; \ 51 .word 98b, 97f; \ 52 .text; \ 53 .align 4 54 55#define EXT(start,end,handler) \ 56 .section __ex_table,ALLOC; \ 57 .align 4; \ 58 .word start, 0, end, handler; \ 59 .text; \ 60 .align 4 61 62/* Please do not change following macros unless you change logic used 63 * in .fixup at the end of this file as well 64 */ 65 66/* Both these macros have to start with exactly the same insn */ 67#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 68 ldd [%src + (offset) + 0x00], %t0; \ 69 ldd [%src + (offset) + 0x08], %t2; \ 70 ldd [%src + (offset) + 0x10], %t4; \ 71 ldd [%src + (offset) + 0x18], %t6; \ 72 st %t0, [%dst + (offset) + 0x00]; \ 73 st %t1, [%dst + (offset) + 0x04]; \ 74 st %t2, [%dst + (offset) + 0x08]; \ 75 st %t3, [%dst + (offset) + 0x0c]; \ 76 st %t4, [%dst + (offset) + 0x10]; \ 77 st %t5, [%dst + (offset) + 0x14]; \ 78 st %t6, [%dst + (offset) + 0x18]; \ 79 st %t7, [%dst + (offset) + 0x1c]; 80 81#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 82 ldd [%src + (offset) + 0x00], %t0; \ 83 ldd [%src + (offset) + 0x08], %t2; \ 84 ldd [%src + (offset) + 0x10], %t4; \ 85 ldd [%src + (offset) + 0x18], %t6; \ 86 std %t0, [%dst + (offset) + 0x00]; \ 87 std %t2, [%dst + (offset) + 0x08]; \ 88 std %t4, [%dst + (offset) + 0x10]; \ 89 std %t6, [%dst + (offset) + 0x18]; 90 91#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ 92 ldd [%src - (offset) - 0x10], %t0; \ 93 ldd [%src - (offset) - 0x08], %t2; \ 94 st %t0, [%dst - (offset) - 0x10]; \ 95 st %t1, [%dst - (offset) - 0x0c]; \ 96 st %t2, [%dst - (offset) - 0x08]; \ 97 st %t3, [%dst - (offset) - 0x04]; 98 99#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \ 100 lduh [%src + (offset) + 0x00], %t0; \ 101 lduh [%src + (offset) + 0x02], %t1; \ 102 lduh [%src + (offset) + 0x04], %t2; \ 103 lduh [%src + (offset) + 0x06], %t3; \ 104 sth %t0, [%dst + (offset) + 0x00]; \ 105 sth %t1, [%dst + (offset) + 0x02]; \ 106 sth %t2, [%dst + (offset) + 0x04]; \ 107 sth %t3, [%dst + (offset) + 0x06]; 108 109#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ 110 ldub [%src - (offset) - 0x02], %t0; \ 111 ldub [%src - (offset) - 0x01], %t1; \ 112 stb %t0, [%dst - (offset) - 0x02]; \ 113 stb %t1, [%dst - (offset) - 0x01]; 114 115 .text 116 .align 4 117 118 .globl __copy_user_begin 119__copy_user_begin: 120 121 .globl __copy_user 122dword_align: 123 andcc %o1, 1, %g0 124 be 4f 125 andcc %o1, 2, %g0 126 127 EXO2(ldub [%o1], %g2) 128 add %o1, 1, %o1 129 EXO2(stb %g2, [%o0]) 130 sub %o2, 1, %o2 131 bne 3f 132 add %o0, 1, %o0 133 134 EXO2(lduh [%o1], %g2) 135 add %o1, 2, %o1 136 EXO2(sth %g2, [%o0]) 137 sub %o2, 2, %o2 138 b 3f 139 add %o0, 2, %o0 1404: 141 EXO2(lduh [%o1], %g2) 142 add %o1, 2, %o1 143 EXO2(sth %g2, [%o0]) 144 sub %o2, 2, %o2 145 b 3f 146 add %o0, 2, %o0 147 148__copy_user: /* %o0=dst %o1=src %o2=len */ 149 xor %o0, %o1, %o4 1501: 151 andcc %o4, 3, %o5 1522: 153 bne cannot_optimize 154 cmp %o2, 15 155 156 bleu short_aligned_end 157 andcc %o1, 3, %g0 158 159 bne dword_align 1603: 161 andcc %o1, 4, %g0 162 163 be 2f 164 mov %o2, %g1 165 166 EXO2(ld [%o1], %o4) 167 sub %g1, 4, %g1 168 EXO2(st %o4, [%o0]) 169 add %o1, 4, %o1 170 add %o0, 4, %o0 1712: 172 andcc %g1, 0xffffff80, %g7 173 be 3f 174 andcc %o0, 4, %g0 175 176 be ldd_std + 4 1775: 178 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 179 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 180 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 181 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 18280: 183 EXT(5b, 80b, 50f) 184 subcc %g7, 128, %g7 185 add %o1, 128, %o1 186 bne 5b 187 add %o0, 128, %o0 1883: 189 andcc %g1, 0x70, %g7 190 be copy_user_table_end 191 andcc %g1, 8, %g0 192 193 sethi %hi(copy_user_table_end), %o5 194 srl %g7, 1, %o4 195 add %g7, %o4, %o4 196 add %o1, %g7, %o1 197 sub %o5, %o4, %o5 198 jmpl %o5 + %lo(copy_user_table_end), %g0 199 add %o0, %g7, %o0 200 201copy_user_table: 202 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) 203 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) 204 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) 205 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) 206 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) 207 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) 208 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 209copy_user_table_end: 210 EXT(copy_user_table, copy_user_table_end, 51f) 211 be copy_user_last7 212 andcc %g1, 4, %g0 213 214 EX(ldd [%o1], %g2, and %g1, 0xf) 215 add %o0, 8, %o0 216 add %o1, 8, %o1 217 EX(st %g2, [%o0 - 0x08], and %g1, 0xf) 218 EX2(st %g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4) 219copy_user_last7: 220 be 1f 221 andcc %g1, 2, %g0 222 223 EX(ld [%o1], %g2, and %g1, 7) 224 add %o1, 4, %o1 225 EX(st %g2, [%o0], and %g1, 7) 226 add %o0, 4, %o0 2271: 228 be 1f 229 andcc %g1, 1, %g0 230 231 EX(lduh [%o1], %g2, and %g1, 3) 232 add %o1, 2, %o1 233 EX(sth %g2, [%o0], and %g1, 3) 234 add %o0, 2, %o0 2351: 236 be 1f 237 nop 238 239 EX(ldub [%o1], %g2, add %g0, 1) 240 EX(stb %g2, [%o0], add %g0, 1) 2411: 242 retl 243 clr %o0 244 245ldd_std: 246 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 247 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 248 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 249 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 25081: 251 EXT(ldd_std, 81b, 52f) 252 subcc %g7, 128, %g7 253 add %o1, 128, %o1 254 bne ldd_std 255 add %o0, 128, %o0 256 257 andcc %g1, 0x70, %g7 258 be copy_user_table_end 259 andcc %g1, 8, %g0 260 261 sethi %hi(copy_user_table_end), %o5 262 srl %g7, 1, %o4 263 add %g7, %o4, %o4 264 add %o1, %g7, %o1 265 sub %o5, %o4, %o5 266 jmpl %o5 + %lo(copy_user_table_end), %g0 267 add %o0, %g7, %o0 268 269cannot_optimize: 270 bleu short_end 271 cmp %o5, 2 272 273 bne byte_chunk 274 and %o2, 0xfffffff0, %o3 275 276 andcc %o1, 1, %g0 277 be 10f 278 nop 279 280 EXO2(ldub [%o1], %g2) 281 add %o1, 1, %o1 282 EXO2(stb %g2, [%o0]) 283 sub %o2, 1, %o2 284 andcc %o2, 0xfffffff0, %o3 285 be short_end 286 add %o0, 1, %o0 28710: 288 MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 289 MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5) 29082: 291 EXT(10b, 82b, 53f) 292 subcc %o3, 0x10, %o3 293 add %o1, 0x10, %o1 294 bne 10b 295 add %o0, 0x10, %o0 296 b 2f 297 and %o2, 0xe, %o3 298 299byte_chunk: 300 MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3) 301 MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3) 302 MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3) 303 MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3) 304 MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3) 305 MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3) 306 MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3) 307 MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3) 30883: 309 EXT(byte_chunk, 83b, 54f) 310 subcc %o3, 0x10, %o3 311 add %o1, 0x10, %o1 312 bne byte_chunk 313 add %o0, 0x10, %o0 314 315short_end: 316 and %o2, 0xe, %o3 3172: 318 sethi %hi(short_table_end), %o5 319 sll %o3, 3, %o4 320 add %o0, %o3, %o0 321 sub %o5, %o4, %o5 322 add %o1, %o3, %o1 323 jmpl %o5 + %lo(short_table_end), %g0 324 andcc %o2, 1, %g0 32584: 326 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) 327 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) 328 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) 329 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) 330 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) 331 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) 332 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) 333short_table_end: 334 EXT(84b, short_table_end, 55f) 335 be 1f 336 nop 337 EX(ldub [%o1], %g2, add %g0, 1) 338 EX(stb %g2, [%o0], add %g0, 1) 3391: 340 retl 341 clr %o0 342 343short_aligned_end: 344 bne short_end 345 andcc %o2, 8, %g0 346 347 be 1f 348 andcc %o2, 4, %g0 349 350 EXO2(ld [%o1 + 0x00], %g2) 351 EXO2(ld [%o1 + 0x04], %g3) 352 add %o1, 8, %o1 353 EXO2(st %g2, [%o0 + 0x00]) 354 EX(st %g3, [%o0 + 0x04], sub %o2, 4) 355 add %o0, 8, %o0 3561: 357 b copy_user_last7 358 mov %o2, %g1 359 360 .section .fixup,#alloc,#execinstr 361 .align 4 36297: 363 mov %o2, %g3 364fixupretl: 365 sethi %hi(PAGE_OFFSET), %g1 366 cmp %o0, %g1 367 blu 1f 368 cmp %o1, %g1 369 bgeu 1f 370 ld [%g6 + TI_PREEMPT], %g1 371 cmp %g1, 0 372 bne 1f 373 nop 374 save %sp, -64, %sp 375 mov %i0, %o0 376 call __bzero 377 mov %g3, %o1 378 restore 3791: retl 380 mov %g3, %o0 381 382/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */ 38350: 384/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK 385 * happens. This is derived from the amount ldd reads, st stores, etc. 386 * x = g2 % 12; 387 * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4); 388 * o0 += (g2 / 12) * 32; 389 */ 390 cmp %g2, 12 391 add %o0, %g7, %o0 392 bcs 1f 393 cmp %g2, 24 394 bcs 2f 395 cmp %g2, 36 396 bcs 3f 397 nop 398 sub %g2, 12, %g2 399 sub %g7, 32, %g7 4003: sub %g2, 12, %g2 401 sub %g7, 32, %g7 4022: sub %g2, 12, %g2 403 sub %g7, 32, %g7 4041: cmp %g2, 4 405 bcs,a 60f 406 clr %g2 407 sub %g2, 4, %g2 408 sll %g2, 2, %g2 40960: and %g1, 0x7f, %g3 410 sub %o0, %g7, %o0 411 add %g3, %g7, %g3 412 ba fixupretl 413 sub %g3, %g2, %g3 41451: 415/* i = 41 - g2; j = i % 6; 416 * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16; 417 * o0 -= (i / 6) * 16 + 16; 418 */ 419 neg %g2 420 and %g1, 0xf, %g1 421 add %g2, 41, %g2 422 add %o0, %g1, %o0 4231: cmp %g2, 6 424 bcs,a 2f 425 cmp %g2, 4 426 add %g1, 16, %g1 427 b 1b 428 sub %g2, 6, %g2 4292: bcc,a 2f 430 mov 16, %g2 431 inc %g2 432 sll %g2, 2, %g2 4332: add %g1, %g2, %g3 434 ba fixupretl 435 sub %o0, %g3, %o0 43652: 437/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0; 438 o0 += (g2 / 8) * 32 */ 439 andn %g2, 7, %g4 440 add %o0, %g7, %o0 441 andcc %g2, 4, %g0 442 and %g2, 3, %g2 443 sll %g4, 2, %g4 444 sll %g2, 3, %g2 445 bne 60b 446 sub %g7, %g4, %g7 447 ba 60b 448 clr %g2 44953: 450/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0; 451 o0 += (g2 & 8) */ 452 and %g2, 3, %g4 453 andcc %g2, 4, %g0 454 and %g2, 8, %g2 455 sll %g4, 1, %g4 456 be 1f 457 add %o0, %g2, %o0 458 add %g2, %g4, %g2 4591: and %o2, 0xf, %g3 460 add %g3, %o3, %g3 461 ba fixupretl 462 sub %g3, %g2, %g3 46354: 464/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0; 465 o0 += (g2 / 4) * 2 */ 466 srl %g2, 2, %o4 467 and %g2, 1, %o5 468 srl %g2, 1, %g2 469 add %o4, %o4, %o4 470 and %o5, %g2, %o5 471 and %o2, 0xf, %o2 472 add %o0, %o4, %o0 473 sub %o3, %o5, %o3 474 sub %o2, %o4, %o2 475 ba fixupretl 476 add %o2, %o3, %g3 47755: 478/* i = 27 - g2; 479 g3 = (o2 & 1) + i / 4 * 2 + !(i & 3); 480 o0 -= i / 4 * 2 + 1 */ 481 neg %g2 482 and %o2, 1, %o2 483 add %g2, 27, %g2 484 srl %g2, 2, %o5 485 andcc %g2, 3, %g0 486 mov 1, %g2 487 add %o5, %o5, %o5 488 be,a 1f 489 clr %g2 4901: add %g2, %o5, %g3 491 sub %o0, %g3, %o0 492 ba fixupretl 493 add %g3, %o2, %g3 494 495 .globl __copy_user_end 496__copy_user_end: 497