1/* 2 * Copyright (C) 2002 Paul Mackerras, IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9#include <asm/processor.h> 10#include <asm/ppc_asm.h> 11#include <asm/export.h> 12 13#ifdef __BIG_ENDIAN__ 14#define sLd sld /* Shift towards low-numbered address. */ 15#define sHd srd /* Shift towards high-numbered address. */ 16#else 17#define sLd srd /* Shift towards low-numbered address. */ 18#define sHd sld /* Shift towards high-numbered address. */ 19#endif 20 21 .align 7 22_GLOBAL_TOC(__copy_tofrom_user) 23BEGIN_FTR_SECTION 24 nop 25FTR_SECTION_ELSE 26 b __copy_tofrom_user_power7 27ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 28_GLOBAL(__copy_tofrom_user_base) 29 /* first check for a whole page copy on a page boundary */ 30 cmpldi cr1,r5,16 31 cmpdi cr6,r5,4096 32 or r0,r3,r4 33 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ 34 andi. r0,r0,4095 35 std r3,-24(r1) 36 crand cr0*4+2,cr0*4+2,cr6*4+2 37 std r4,-16(r1) 38 std r5,-8(r1) 39 dcbt 0,r4 40 beq .Lcopy_page_4K 41 andi. r6,r6,7 42 PPC_MTOCRF(0x01,r5) 43 blt cr1,.Lshort_copy 44/* Below we want to nop out the bne if we're on a CPU that has the 45 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit 46 * cleared. 47 * At the time of writing the only CPU that has this combination of bits 48 * set is Power6. 49 */ 50BEGIN_FTR_SECTION 51 nop 52FTR_SECTION_ELSE 53 bne .Ldst_unaligned 54ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ 55 CPU_FTR_UNALIGNED_LD_STD) 56.Ldst_aligned: 57 addi r3,r3,-16 58BEGIN_FTR_SECTION 59 andi. r0,r4,7 60 bne .Lsrc_unaligned 61END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 62 blt cr1,.Ldo_tail /* if < 16 bytes to copy */ 63 srdi r0,r5,5 64 cmpdi cr1,r0,0 6520: ld r7,0(r4) 66220: ld r6,8(r4) 67 addi r4,r4,16 68 mtctr r0 69 andi. r0,r5,0x10 70 beq 22f 71 addi r3,r3,16 72 addi r4,r4,-16 73 mr r9,r7 74 mr r8,r6 75 beq cr1,72f 7621: ld r7,16(r4) 77221: ld r6,24(r4) 78 addi r4,r4,32 7970: std r9,0(r3) 80270: std r8,8(r3) 8122: ld r9,0(r4) 82222: ld r8,8(r4) 8371: std r7,16(r3) 84271: std r6,24(r3) 85 addi r3,r3,32 86 bdnz 21b 8772: std r9,0(r3) 88272: std r8,8(r3) 89 andi. r5,r5,0xf 90 beq+ 3f 91 addi r4,r4,16 92.Ldo_tail: 93 addi r3,r3,16 94 bf cr7*4+0,246f 95244: ld r9,0(r4) 96 addi r4,r4,8 97245: std r9,0(r3) 98 addi r3,r3,8 99246: bf cr7*4+1,1f 10023: lwz r9,0(r4) 101 addi r4,r4,4 10273: stw r9,0(r3) 103 addi r3,r3,4 1041: bf cr7*4+2,2f 10544: lhz r9,0(r4) 106 addi r4,r4,2 10774: sth r9,0(r3) 108 addi r3,r3,2 1092: bf cr7*4+3,3f 11045: lbz r9,0(r4) 11175: stb r9,0(r3) 1123: li r3,0 113 blr 114 115.Lsrc_unaligned: 116 srdi r6,r5,3 117 addi r5,r5,-16 118 subf r4,r0,r4 119 srdi r7,r5,4 120 sldi r10,r0,3 121 cmpldi cr6,r6,3 122 andi. r5,r5,7 123 mtctr r7 124 subfic r11,r10,64 125 add r5,r5,r0 126 bt cr7*4+0,28f 127 12824: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 12925: ld r0,8(r4) 130 sLd r6,r9,r10 13126: ldu r9,16(r4) 132 sHd r7,r0,r11 133 sLd r8,r0,r10 134 or r7,r7,r6 135 blt cr6,79f 13627: ld r0,8(r4) 137 b 2f 138 13928: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 14029: ldu r9,8(r4) 141 sLd r8,r0,r10 142 addi r3,r3,-8 143 blt cr6,5f 14430: ld r0,8(r4) 145 sHd r12,r9,r11 146 sLd r6,r9,r10 14731: ldu r9,16(r4) 148 or r12,r8,r12 149 sHd r7,r0,r11 150 sLd r8,r0,r10 151 addi r3,r3,16 152 beq cr6,78f 153 1541: or r7,r7,r6 15532: ld r0,8(r4) 15676: std r12,8(r3) 1572: sHd r12,r9,r11 158 sLd r6,r9,r10 15933: ldu r9,16(r4) 160 or r12,r8,r12 16177: stdu r7,16(r3) 162 sHd r7,r0,r11 163 sLd r8,r0,r10 164 bdnz 1b 165 16678: std r12,8(r3) 167 or r7,r7,r6 16879: std r7,16(r3) 1695: sHd r12,r9,r11 170 or r12,r8,r12 17180: std r12,24(r3) 172 bne 6f 173 li r3,0 174 blr 1756: cmpwi cr1,r5,8 176 addi r3,r3,32 177 sLd r9,r9,r10 178 ble cr1,7f 17934: ld r0,8(r4) 180 sHd r7,r0,r11 181 or r9,r7,r9 1827: 183 bf cr7*4+1,1f 184#ifdef __BIG_ENDIAN__ 185 rotldi r9,r9,32 186#endif 18794: stw r9,0(r3) 188#ifdef __LITTLE_ENDIAN__ 189 rotrdi r9,r9,32 190#endif 191 addi r3,r3,4 1921: bf cr7*4+2,2f 193#ifdef __BIG_ENDIAN__ 194 rotldi r9,r9,16 195#endif 19695: sth r9,0(r3) 197#ifdef __LITTLE_ENDIAN__ 198 rotrdi r9,r9,16 199#endif 200 addi r3,r3,2 2012: bf cr7*4+3,3f 202#ifdef __BIG_ENDIAN__ 203 rotldi r9,r9,8 204#endif 20596: stb r9,0(r3) 206#ifdef __LITTLE_ENDIAN__ 207 rotrdi r9,r9,8 208#endif 2093: li r3,0 210 blr 211 212.Ldst_unaligned: 213 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */ 214 subf r5,r6,r5 215 li r7,0 216 cmpldi cr1,r5,16 217 bf cr7*4+3,1f 21835: lbz r0,0(r4) 21981: stb r0,0(r3) 220 addi r7,r7,1 2211: bf cr7*4+2,2f 22236: lhzx r0,r7,r4 22382: sthx r0,r7,r3 224 addi r7,r7,2 2252: bf cr7*4+1,3f 22637: lwzx r0,r7,r4 22783: stwx r0,r7,r3 2283: PPC_MTOCRF(0x01,r5) 229 add r4,r6,r4 230 add r3,r6,r3 231 b .Ldst_aligned 232 233.Lshort_copy: 234 bf cr7*4+0,1f 23538: lwz r0,0(r4) 23639: lwz r9,4(r4) 237 addi r4,r4,8 23884: stw r0,0(r3) 23985: stw r9,4(r3) 240 addi r3,r3,8 2411: bf cr7*4+1,2f 24240: lwz r0,0(r4) 243 addi r4,r4,4 24486: stw r0,0(r3) 245 addi r3,r3,4 2462: bf cr7*4+2,3f 24741: lhz r0,0(r4) 248 addi r4,r4,2 24987: sth r0,0(r3) 250 addi r3,r3,2 2513: bf cr7*4+3,4f 25242: lbz r0,0(r4) 25388: stb r0,0(r3) 2544: li r3,0 255 blr 256 257/* 258 * exception handlers follow 259 * we have to return the number of bytes not copied 260 * for an exception on a load, we set the rest of the destination to 0 261 */ 262 263136: 264137: 265 add r3,r3,r7 266 b 1f 267130: 268131: 269 addi r3,r3,8 270120: 271320: 272122: 273322: 274124: 275125: 276126: 277127: 278128: 279129: 280133: 281 addi r3,r3,8 282132: 283 addi r3,r3,8 284121: 285321: 286344: 287134: 288135: 289138: 290139: 291140: 292141: 293142: 294123: 295144: 296145: 297 298/* 299 * here we have had a fault on a load and r3 points to the first 300 * unmodified byte of the destination 301 */ 3021: ld r6,-24(r1) 303 ld r4,-16(r1) 304 ld r5,-8(r1) 305 subf r6,r6,r3 306 add r4,r4,r6 307 subf r5,r6,r5 /* #bytes left to go */ 308 309/* 310 * first see if we can copy any more bytes before hitting another exception 311 */ 312 mtctr r5 31343: lbz r0,0(r4) 314 addi r4,r4,1 31589: stb r0,0(r3) 316 addi r3,r3,1 317 bdnz 43b 318 li r3,0 /* huh? all copied successfully this time? */ 319 blr 320 321/* 322 * here we have trapped again, need to clear ctr bytes starting at r3 323 */ 324143: mfctr r5 325 li r0,0 326 mr r4,r3 327 mr r3,r5 /* return the number of bytes not copied */ 3281: andi. r9,r4,7 329 beq 3f 33090: stb r0,0(r4) 331 addic. r5,r5,-1 332 addi r4,r4,1 333 bne 1b 334 blr 3353: cmpldi cr1,r5,8 336 srdi r9,r5,3 337 andi. r5,r5,7 338 blt cr1,93f 339 mtctr r9 34091: std r0,0(r4) 341 addi r4,r4,8 342 bdnz 91b 34393: beqlr 344 mtctr r5 34592: stb r0,0(r4) 346 addi r4,r4,1 347 bdnz 92b 348 blr 349 350/* 351 * exception handlers for stores: we just need to work 352 * out how many bytes weren't copied 353 */ 354182: 355183: 356 add r3,r3,r7 357 b 1f 358371: 359180: 360 addi r3,r3,8 361171: 362177: 363179: 364 addi r3,r3,8 365370: 366372: 367176: 368178: 369 addi r3,r3,4 370185: 371 addi r3,r3,4 372170: 373172: 374345: 375173: 376174: 377175: 378181: 379184: 380186: 381187: 382188: 383189: 384194: 385195: 386196: 3871: 388 ld r6,-24(r1) 389 ld r5,-8(r1) 390 add r6,r6,r5 391 subf r3,r3,r6 /* #bytes not copied */ 392190: 393191: 394192: 395 blr /* #bytes not copied in r3 */ 396 397 .section __ex_table,"a" 398 .align 3 399 .llong 20b,120b 400 .llong 220b,320b 401 .llong 21b,121b 402 .llong 221b,321b 403 .llong 70b,170b 404 .llong 270b,370b 405 .llong 22b,122b 406 .llong 222b,322b 407 .llong 71b,171b 408 .llong 271b,371b 409 .llong 72b,172b 410 .llong 272b,372b 411 .llong 244b,344b 412 .llong 245b,345b 413 .llong 23b,123b 414 .llong 73b,173b 415 .llong 44b,144b 416 .llong 74b,174b 417 .llong 45b,145b 418 .llong 75b,175b 419 .llong 24b,124b 420 .llong 25b,125b 421 .llong 26b,126b 422 .llong 27b,127b 423 .llong 28b,128b 424 .llong 29b,129b 425 .llong 30b,130b 426 .llong 31b,131b 427 .llong 32b,132b 428 .llong 76b,176b 429 .llong 33b,133b 430 .llong 77b,177b 431 .llong 78b,178b 432 .llong 79b,179b 433 .llong 80b,180b 434 .llong 34b,134b 435 .llong 94b,194b 436 .llong 95b,195b 437 .llong 96b,196b 438 .llong 35b,135b 439 .llong 81b,181b 440 .llong 36b,136b 441 .llong 82b,182b 442 .llong 37b,137b 443 .llong 83b,183b 444 .llong 38b,138b 445 .llong 39b,139b 446 .llong 84b,184b 447 .llong 85b,185b 448 .llong 40b,140b 449 .llong 86b,186b 450 .llong 41b,141b 451 .llong 87b,187b 452 .llong 42b,142b 453 .llong 88b,188b 454 .llong 43b,143b 455 .llong 89b,189b 456 .llong 90b,190b 457 .llong 91b,191b 458 .llong 92b,192b 459 460 .text 461 462/* 463 * Routine to copy a whole page of data, optimized for POWER4. 464 * On POWER4 it is more than 50% faster than the simple loop 465 * above (following the .Ldst_aligned label). 466 */ 467.Lcopy_page_4K: 468 std r31,-32(1) 469 std r30,-40(1) 470 std r29,-48(1) 471 std r28,-56(1) 472 std r27,-64(1) 473 std r26,-72(1) 474 std r25,-80(1) 475 std r24,-88(1) 476 std r23,-96(1) 477 std r22,-104(1) 478 std r21,-112(1) 479 std r20,-120(1) 480 li r5,4096/32 - 1 481 addi r3,r3,-8 482 li r0,5 4830: addi r5,r5,-24 484 mtctr r0 48520: ld r22,640(4) 48621: ld r21,512(4) 48722: ld r20,384(4) 48823: ld r11,256(4) 48924: ld r9,128(4) 49025: ld r7,0(4) 49126: ld r25,648(4) 49227: ld r24,520(4) 49328: ld r23,392(4) 49429: ld r10,264(4) 49530: ld r8,136(4) 49631: ldu r6,8(4) 497 cmpwi r5,24 4981: 49932: std r22,648(3) 50033: std r21,520(3) 50134: std r20,392(3) 50235: std r11,264(3) 50336: std r9,136(3) 50437: std r7,8(3) 50538: ld r28,648(4) 50639: ld r27,520(4) 50740: ld r26,392(4) 50841: ld r31,264(4) 50942: ld r30,136(4) 51043: ld r29,8(4) 51144: std r25,656(3) 51245: std r24,528(3) 51346: std r23,400(3) 51447: std r10,272(3) 51548: std r8,144(3) 51649: std r6,16(3) 51750: ld r22,656(4) 51851: ld r21,528(4) 51952: ld r20,400(4) 52053: ld r11,272(4) 52154: ld r9,144(4) 52255: ld r7,16(4) 52356: std r28,664(3) 52457: std r27,536(3) 52558: std r26,408(3) 52659: std r31,280(3) 52760: std r30,152(3) 52861: stdu r29,24(3) 52962: ld r25,664(4) 53063: ld r24,536(4) 53164: ld r23,408(4) 53265: ld r10,280(4) 53366: ld r8,152(4) 53467: ldu r6,24(4) 535 bdnz 1b 53668: std r22,648(3) 53769: std r21,520(3) 53870: std r20,392(3) 53971: std r11,264(3) 54072: std r9,136(3) 54173: std r7,8(3) 54274: addi r4,r4,640 54375: addi r3,r3,648 544 bge 0b 545 mtctr r5 54676: ld r7,0(4) 54777: ld r8,8(4) 54878: ldu r9,16(4) 5493: 55079: ld r10,8(4) 55180: std r7,8(3) 55281: ld r7,16(4) 55382: std r8,16(3) 55483: ld r8,24(4) 55584: std r9,24(3) 55685: ldu r9,32(4) 55786: stdu r10,32(3) 558 bdnz 3b 5594: 56087: ld r10,8(4) 56188: std r7,8(3) 56289: std r8,16(3) 56390: std r9,24(3) 56491: std r10,32(3) 5659: ld r20,-120(1) 566 ld r21,-112(1) 567 ld r22,-104(1) 568 ld r23,-96(1) 569 ld r24,-88(1) 570 ld r25,-80(1) 571 ld r26,-72(1) 572 ld r27,-64(1) 573 ld r28,-56(1) 574 ld r29,-48(1) 575 ld r30,-40(1) 576 ld r31,-32(1) 577 li r3,0 578 blr 579 580/* 581 * on an exception, reset to the beginning and jump back into the 582 * standard __copy_tofrom_user 583 */ 584100: ld r20,-120(1) 585 ld r21,-112(1) 586 ld r22,-104(1) 587 ld r23,-96(1) 588 ld r24,-88(1) 589 ld r25,-80(1) 590 ld r26,-72(1) 591 ld r27,-64(1) 592 ld r28,-56(1) 593 ld r29,-48(1) 594 ld r30,-40(1) 595 ld r31,-32(1) 596 ld r3,-24(r1) 597 ld r4,-16(r1) 598 li r5,4096 599 b .Ldst_aligned 600 601 .section __ex_table,"a" 602 .align 3 603 .llong 20b,100b 604 .llong 21b,100b 605 .llong 22b,100b 606 .llong 23b,100b 607 .llong 24b,100b 608 .llong 25b,100b 609 .llong 26b,100b 610 .llong 27b,100b 611 .llong 28b,100b 612 .llong 29b,100b 613 .llong 30b,100b 614 .llong 31b,100b 615 .llong 32b,100b 616 .llong 33b,100b 617 .llong 34b,100b 618 .llong 35b,100b 619 .llong 36b,100b 620 .llong 37b,100b 621 .llong 38b,100b 622 .llong 39b,100b 623 .llong 40b,100b 624 .llong 41b,100b 625 .llong 42b,100b 626 .llong 43b,100b 627 .llong 44b,100b 628 .llong 45b,100b 629 .llong 46b,100b 630 .llong 47b,100b 631 .llong 48b,100b 632 .llong 49b,100b 633 .llong 50b,100b 634 .llong 51b,100b 635 .llong 52b,100b 636 .llong 53b,100b 637 .llong 54b,100b 638 .llong 55b,100b 639 .llong 56b,100b 640 .llong 57b,100b 641 .llong 58b,100b 642 .llong 59b,100b 643 .llong 60b,100b 644 .llong 61b,100b 645 .llong 62b,100b 646 .llong 63b,100b 647 .llong 64b,100b 648 .llong 65b,100b 649 .llong 66b,100b 650 .llong 67b,100b 651 .llong 68b,100b 652 .llong 69b,100b 653 .llong 70b,100b 654 .llong 71b,100b 655 .llong 72b,100b 656 .llong 73b,100b 657 .llong 74b,100b 658 .llong 75b,100b 659 .llong 76b,100b 660 .llong 77b,100b 661 .llong 78b,100b 662 .llong 79b,100b 663 .llong 80b,100b 664 .llong 81b,100b 665 .llong 82b,100b 666 .llong 83b,100b 667 .llong 84b,100b 668 .llong 85b,100b 669 .llong 86b,100b 670 .llong 87b,100b 671 .llong 88b,100b 672 .llong 89b,100b 673 .llong 90b,100b 674 .llong 91b,100b 675EXPORT_SYMBOL(__copy_tofrom_user) 676