1/* 2 * Copyright (C) 2002 Paul Mackerras, IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9#include <asm/processor.h> 10#include <asm/ppc_asm.h> 11#include <asm/export.h> 12 13#ifdef __BIG_ENDIAN__ 14#define sLd sld /* Shift towards low-numbered address. */ 15#define sHd srd /* Shift towards high-numbered address. */ 16#else 17#define sLd srd /* Shift towards low-numbered address. */ 18#define sHd sld /* Shift towards high-numbered address. */ 19#endif 20 21 .align 7 22_GLOBAL_TOC(__copy_tofrom_user) 23BEGIN_FTR_SECTION 24 nop 25FTR_SECTION_ELSE 26 b __copy_tofrom_user_power7 27ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 28_GLOBAL(__copy_tofrom_user_base) 29 /* first check for a whole page copy on a page boundary */ 30 cmpldi cr1,r5,16 31 cmpdi cr6,r5,4096 32 or r0,r3,r4 33 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ 34 andi. r0,r0,4095 35 std r3,-24(r1) 36 crand cr0*4+2,cr0*4+2,cr6*4+2 37 std r4,-16(r1) 38 std r5,-8(r1) 39 dcbt 0,r4 40 beq .Lcopy_page_4K 41 andi. r6,r6,7 42 PPC_MTOCRF(0x01,r5) 43 blt cr1,.Lshort_copy 44/* Below we want to nop out the bne if we're on a CPU that has the 45 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit 46 * cleared. 47 * At the time of writing the only CPU that has this combination of bits 48 * set is Power6. 49 */ 50BEGIN_FTR_SECTION 51 nop 52FTR_SECTION_ELSE 53 bne .Ldst_unaligned 54ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ 55 CPU_FTR_UNALIGNED_LD_STD) 56.Ldst_aligned: 57 addi r3,r3,-16 58BEGIN_FTR_SECTION 59 andi. r0,r4,7 60 bne .Lsrc_unaligned 61END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 62 blt cr1,.Ldo_tail /* if < 16 bytes to copy */ 63 srdi r0,r5,5 64 cmpdi cr1,r0,0 6520: ld r7,0(r4) 66220: ld r6,8(r4) 67 addi r4,r4,16 68 mtctr r0 69 andi. r0,r5,0x10 70 beq 22f 71 addi r3,r3,16 72 addi r4,r4,-16 73 mr r9,r7 74 mr r8,r6 75 beq cr1,72f 7621: ld r7,16(r4) 77221: ld r6,24(r4) 78 addi r4,r4,32 7970: std r9,0(r3) 80270: std r8,8(r3) 8122: ld r9,0(r4) 82222: ld r8,8(r4) 8371: std r7,16(r3) 84271: std r6,24(r3) 85 addi r3,r3,32 86 bdnz 21b 8772: std r9,0(r3) 88272: std r8,8(r3) 89 andi. r5,r5,0xf 90 beq+ 3f 91 addi r4,r4,16 92.Ldo_tail: 93 addi r3,r3,16 94 bf cr7*4+0,246f 95244: ld r9,0(r4) 96 addi r4,r4,8 97245: std r9,0(r3) 98 addi r3,r3,8 99246: bf cr7*4+1,1f 10023: lwz r9,0(r4) 101 addi r4,r4,4 10273: stw r9,0(r3) 103 addi r3,r3,4 1041: bf cr7*4+2,2f 10544: lhz r9,0(r4) 106 addi r4,r4,2 10774: sth r9,0(r3) 108 addi r3,r3,2 1092: bf cr7*4+3,3f 11045: lbz r9,0(r4) 11175: stb r9,0(r3) 1123: li r3,0 113 blr 114 115.Lsrc_unaligned: 116 srdi r6,r5,3 117 addi r5,r5,-16 118 subf r4,r0,r4 119 srdi r7,r5,4 120 sldi r10,r0,3 121 cmpldi cr6,r6,3 122 andi. r5,r5,7 123 mtctr r7 124 subfic r11,r10,64 125 add r5,r5,r0 126 bt cr7*4+0,28f 127 12824: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 12925: ld r0,8(r4) 130 sLd r6,r9,r10 13126: ldu r9,16(r4) 132 sHd r7,r0,r11 133 sLd r8,r0,r10 134 or r7,r7,r6 135 blt cr6,79f 13627: ld r0,8(r4) 137 b 2f 138 13928: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 14029: ldu r9,8(r4) 141 sLd r8,r0,r10 142 addi r3,r3,-8 143 blt cr6,5f 14430: ld r0,8(r4) 145 sHd r12,r9,r11 146 sLd r6,r9,r10 14731: ldu r9,16(r4) 148 or r12,r8,r12 149 sHd r7,r0,r11 150 sLd r8,r0,r10 151 addi r3,r3,16 152 beq cr6,78f 153 1541: or r7,r7,r6 15532: ld r0,8(r4) 15676: std r12,8(r3) 1572: sHd r12,r9,r11 158 sLd r6,r9,r10 15933: ldu r9,16(r4) 160 or r12,r8,r12 16177: stdu r7,16(r3) 162 sHd r7,r0,r11 163 sLd r8,r0,r10 164 bdnz 1b 165 16678: std r12,8(r3) 167 or r7,r7,r6 16879: std r7,16(r3) 1695: sHd r12,r9,r11 170 or r12,r8,r12 17180: std r12,24(r3) 172 bne 6f 173 li r3,0 174 blr 1756: cmpwi cr1,r5,8 176 addi r3,r3,32 177 sLd r9,r9,r10 178 ble cr1,7f 17934: ld r0,8(r4) 180 sHd r7,r0,r11 181 or r9,r7,r9 1827: 183 bf cr7*4+1,1f 184#ifdef __BIG_ENDIAN__ 185 rotldi r9,r9,32 186#endif 18794: stw r9,0(r3) 188#ifdef __LITTLE_ENDIAN__ 189 rotrdi r9,r9,32 190#endif 191 addi r3,r3,4 1921: bf cr7*4+2,2f 193#ifdef __BIG_ENDIAN__ 194 rotldi r9,r9,16 195#endif 19695: sth r9,0(r3) 197#ifdef __LITTLE_ENDIAN__ 198 rotrdi r9,r9,16 199#endif 200 addi r3,r3,2 2012: bf cr7*4+3,3f 202#ifdef __BIG_ENDIAN__ 203 rotldi r9,r9,8 204#endif 20596: stb r9,0(r3) 206#ifdef __LITTLE_ENDIAN__ 207 rotrdi r9,r9,8 208#endif 2093: li r3,0 210 blr 211 212.Ldst_unaligned: 213 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */ 214 subf r5,r6,r5 215 li r7,0 216 cmpldi cr1,r5,16 217 bf cr7*4+3,1f 21835: lbz r0,0(r4) 21981: stb r0,0(r3) 220 addi r7,r7,1 2211: bf cr7*4+2,2f 22236: lhzx r0,r7,r4 22382: sthx r0,r7,r3 224 addi r7,r7,2 2252: bf cr7*4+1,3f 22637: lwzx r0,r7,r4 22783: stwx r0,r7,r3 2283: PPC_MTOCRF(0x01,r5) 229 add r4,r6,r4 230 add r3,r6,r3 231 b .Ldst_aligned 232 233.Lshort_copy: 234 bf cr7*4+0,1f 23538: lwz r0,0(r4) 23639: lwz r9,4(r4) 237 addi r4,r4,8 23884: stw r0,0(r3) 23985: stw r9,4(r3) 240 addi r3,r3,8 2411: bf cr7*4+1,2f 24240: lwz r0,0(r4) 243 addi r4,r4,4 24486: stw r0,0(r3) 245 addi r3,r3,4 2462: bf cr7*4+2,3f 24741: lhz r0,0(r4) 248 addi r4,r4,2 24987: sth r0,0(r3) 250 addi r3,r3,2 2513: bf cr7*4+3,4f 25242: lbz r0,0(r4) 25388: stb r0,0(r3) 2544: li r3,0 255 blr 256 257/* 258 * exception handlers follow 259 * we have to return the number of bytes not copied 260 * for an exception on a load, we set the rest of the destination to 0 261 */ 262 263136: 264137: 265 add r3,r3,r7 266 b 1f 267130: 268131: 269 addi r3,r3,8 270120: 271320: 272122: 273322: 274124: 275125: 276126: 277127: 278128: 279129: 280133: 281 addi r3,r3,8 282132: 283 addi r3,r3,8 284121: 285321: 286344: 287134: 288135: 289138: 290139: 291140: 292141: 293142: 294123: 295144: 296145: 297 298/* 299 * here we have had a fault on a load and r3 points to the first 300 * unmodified byte of the destination 301 */ 3021: ld r6,-24(r1) 303 ld r4,-16(r1) 304 ld r5,-8(r1) 305 subf r6,r6,r3 306 add r4,r4,r6 307 subf r5,r6,r5 /* #bytes left to go */ 308 309/* 310 * first see if we can copy any more bytes before hitting another exception 311 */ 312 mtctr r5 31343: lbz r0,0(r4) 314 addi r4,r4,1 31589: stb r0,0(r3) 316 addi r3,r3,1 317 bdnz 43b 318 li r3,0 /* huh? all copied successfully this time? */ 319 blr 320 321/* 322 * here we have trapped again, need to clear ctr bytes starting at r3 323 */ 324143: mfctr r5 325 li r0,0 326 mr r4,r3 327 mr r3,r5 /* return the number of bytes not copied */ 3281: andi. r9,r4,7 329 beq 3f 33090: stb r0,0(r4) 331 addic. r5,r5,-1 332 addi r4,r4,1 333 bne 1b 334 blr 3353: cmpldi cr1,r5,8 336 srdi r9,r5,3 337 andi. r5,r5,7 338 blt cr1,93f 339 mtctr r9 34091: std r0,0(r4) 341 addi r4,r4,8 342 bdnz 91b 34393: beqlr 344 mtctr r5 34592: stb r0,0(r4) 346 addi r4,r4,1 347 bdnz 92b 348 blr 349 350/* 351 * exception handlers for stores: we just need to work 352 * out how many bytes weren't copied 353 */ 354182: 355183: 356 add r3,r3,r7 357 b 1f 358371: 359180: 360 addi r3,r3,8 361171: 362177: 363179: 364 addi r3,r3,8 365370: 366372: 367176: 368178: 369 addi r3,r3,4 370185: 371 addi r3,r3,4 372170: 373172: 374345: 375173: 376174: 377175: 378181: 379184: 380186: 381187: 382188: 383189: 384194: 385195: 386196: 3871: 388 ld r6,-24(r1) 389 ld r5,-8(r1) 390 add r6,r6,r5 391 subf r3,r3,r6 /* #bytes not copied */ 392190: 393191: 394192: 395 blr /* #bytes not copied in r3 */ 396 397 EX_TABLE(20b,120b) 398 EX_TABLE(220b,320b) 399 EX_TABLE(21b,121b) 400 EX_TABLE(221b,321b) 401 EX_TABLE(70b,170b) 402 EX_TABLE(270b,370b) 403 EX_TABLE(22b,122b) 404 EX_TABLE(222b,322b) 405 EX_TABLE(71b,171b) 406 EX_TABLE(271b,371b) 407 EX_TABLE(72b,172b) 408 EX_TABLE(272b,372b) 409 EX_TABLE(244b,344b) 410 EX_TABLE(245b,345b) 411 EX_TABLE(23b,123b) 412 EX_TABLE(73b,173b) 413 EX_TABLE(44b,144b) 414 EX_TABLE(74b,174b) 415 EX_TABLE(45b,145b) 416 EX_TABLE(75b,175b) 417 EX_TABLE(24b,124b) 418 EX_TABLE(25b,125b) 419 EX_TABLE(26b,126b) 420 EX_TABLE(27b,127b) 421 EX_TABLE(28b,128b) 422 EX_TABLE(29b,129b) 423 EX_TABLE(30b,130b) 424 EX_TABLE(31b,131b) 425 EX_TABLE(32b,132b) 426 EX_TABLE(76b,176b) 427 EX_TABLE(33b,133b) 428 EX_TABLE(77b,177b) 429 EX_TABLE(78b,178b) 430 EX_TABLE(79b,179b) 431 EX_TABLE(80b,180b) 432 EX_TABLE(34b,134b) 433 EX_TABLE(94b,194b) 434 EX_TABLE(95b,195b) 435 EX_TABLE(96b,196b) 436 EX_TABLE(35b,135b) 437 EX_TABLE(81b,181b) 438 EX_TABLE(36b,136b) 439 EX_TABLE(82b,182b) 440 EX_TABLE(37b,137b) 441 EX_TABLE(83b,183b) 442 EX_TABLE(38b,138b) 443 EX_TABLE(39b,139b) 444 EX_TABLE(84b,184b) 445 EX_TABLE(85b,185b) 446 EX_TABLE(40b,140b) 447 EX_TABLE(86b,186b) 448 EX_TABLE(41b,141b) 449 EX_TABLE(87b,187b) 450 EX_TABLE(42b,142b) 451 EX_TABLE(88b,188b) 452 EX_TABLE(43b,143b) 453 EX_TABLE(89b,189b) 454 EX_TABLE(90b,190b) 455 EX_TABLE(91b,191b) 456 EX_TABLE(92b,192b) 457 458/* 459 * Routine to copy a whole page of data, optimized for POWER4. 460 * On POWER4 it is more than 50% faster than the simple loop 461 * above (following the .Ldst_aligned label). 462 */ 463.Lcopy_page_4K: 464 std r31,-32(1) 465 std r30,-40(1) 466 std r29,-48(1) 467 std r28,-56(1) 468 std r27,-64(1) 469 std r26,-72(1) 470 std r25,-80(1) 471 std r24,-88(1) 472 std r23,-96(1) 473 std r22,-104(1) 474 std r21,-112(1) 475 std r20,-120(1) 476 li r5,4096/32 - 1 477 addi r3,r3,-8 478 li r0,5 4790: addi r5,r5,-24 480 mtctr r0 48120: ld r22,640(4) 48221: ld r21,512(4) 48322: ld r20,384(4) 48423: ld r11,256(4) 48524: ld r9,128(4) 48625: ld r7,0(4) 48726: ld r25,648(4) 48827: ld r24,520(4) 48928: ld r23,392(4) 49029: ld r10,264(4) 49130: ld r8,136(4) 49231: ldu r6,8(4) 493 cmpwi r5,24 4941: 49532: std r22,648(3) 49633: std r21,520(3) 49734: std r20,392(3) 49835: std r11,264(3) 49936: std r9,136(3) 50037: std r7,8(3) 50138: ld r28,648(4) 50239: ld r27,520(4) 50340: ld r26,392(4) 50441: ld r31,264(4) 50542: ld r30,136(4) 50643: ld r29,8(4) 50744: std r25,656(3) 50845: std r24,528(3) 50946: std r23,400(3) 51047: std r10,272(3) 51148: std r8,144(3) 51249: std r6,16(3) 51350: ld r22,656(4) 51451: ld r21,528(4) 51552: ld r20,400(4) 51653: ld r11,272(4) 51754: ld r9,144(4) 51855: ld r7,16(4) 51956: std r28,664(3) 52057: std r27,536(3) 52158: std r26,408(3) 52259: std r31,280(3) 52360: std r30,152(3) 52461: stdu r29,24(3) 52562: ld r25,664(4) 52663: ld r24,536(4) 52764: ld r23,408(4) 52865: ld r10,280(4) 52966: ld r8,152(4) 53067: ldu r6,24(4) 531 bdnz 1b 53268: std r22,648(3) 53369: std r21,520(3) 53470: std r20,392(3) 53571: std r11,264(3) 53672: std r9,136(3) 53773: std r7,8(3) 53874: addi r4,r4,640 53975: addi r3,r3,648 540 bge 0b 541 mtctr r5 54276: ld r7,0(4) 54377: ld r8,8(4) 54478: ldu r9,16(4) 5453: 54679: ld r10,8(4) 54780: std r7,8(3) 54881: ld r7,16(4) 54982: std r8,16(3) 55083: ld r8,24(4) 55184: std r9,24(3) 55285: ldu r9,32(4) 55386: stdu r10,32(3) 554 bdnz 3b 5554: 55687: ld r10,8(4) 55788: std r7,8(3) 55889: std r8,16(3) 55990: std r9,24(3) 56091: std r10,32(3) 5619: ld r20,-120(1) 562 ld r21,-112(1) 563 ld r22,-104(1) 564 ld r23,-96(1) 565 ld r24,-88(1) 566 ld r25,-80(1) 567 ld r26,-72(1) 568 ld r27,-64(1) 569 ld r28,-56(1) 570 ld r29,-48(1) 571 ld r30,-40(1) 572 ld r31,-32(1) 573 li r3,0 574 blr 575 576/* 577 * on an exception, reset to the beginning and jump back into the 578 * standard __copy_tofrom_user 579 */ 580100: ld r20,-120(1) 581 ld r21,-112(1) 582 ld r22,-104(1) 583 ld r23,-96(1) 584 ld r24,-88(1) 585 ld r25,-80(1) 586 ld r26,-72(1) 587 ld r27,-64(1) 588 ld r28,-56(1) 589 ld r29,-48(1) 590 ld r30,-40(1) 591 ld r31,-32(1) 592 ld r3,-24(r1) 593 ld r4,-16(r1) 594 li r5,4096 595 b .Ldst_aligned 596 597 EX_TABLE(20b,100b) 598 EX_TABLE(21b,100b) 599 EX_TABLE(22b,100b) 600 EX_TABLE(23b,100b) 601 EX_TABLE(24b,100b) 602 EX_TABLE(25b,100b) 603 EX_TABLE(26b,100b) 604 EX_TABLE(27b,100b) 605 EX_TABLE(28b,100b) 606 EX_TABLE(29b,100b) 607 EX_TABLE(30b,100b) 608 EX_TABLE(31b,100b) 609 EX_TABLE(32b,100b) 610 EX_TABLE(33b,100b) 611 EX_TABLE(34b,100b) 612 EX_TABLE(35b,100b) 613 EX_TABLE(36b,100b) 614 EX_TABLE(37b,100b) 615 EX_TABLE(38b,100b) 616 EX_TABLE(39b,100b) 617 EX_TABLE(40b,100b) 618 EX_TABLE(41b,100b) 619 EX_TABLE(42b,100b) 620 EX_TABLE(43b,100b) 621 EX_TABLE(44b,100b) 622 EX_TABLE(45b,100b) 623 EX_TABLE(46b,100b) 624 EX_TABLE(47b,100b) 625 EX_TABLE(48b,100b) 626 EX_TABLE(49b,100b) 627 EX_TABLE(50b,100b) 628 EX_TABLE(51b,100b) 629 EX_TABLE(52b,100b) 630 EX_TABLE(53b,100b) 631 EX_TABLE(54b,100b) 632 EX_TABLE(55b,100b) 633 EX_TABLE(56b,100b) 634 EX_TABLE(57b,100b) 635 EX_TABLE(58b,100b) 636 EX_TABLE(59b,100b) 637 EX_TABLE(60b,100b) 638 EX_TABLE(61b,100b) 639 EX_TABLE(62b,100b) 640 EX_TABLE(63b,100b) 641 EX_TABLE(64b,100b) 642 EX_TABLE(65b,100b) 643 EX_TABLE(66b,100b) 644 EX_TABLE(67b,100b) 645 EX_TABLE(68b,100b) 646 EX_TABLE(69b,100b) 647 EX_TABLE(70b,100b) 648 EX_TABLE(71b,100b) 649 EX_TABLE(72b,100b) 650 EX_TABLE(73b,100b) 651 EX_TABLE(74b,100b) 652 EX_TABLE(75b,100b) 653 EX_TABLE(76b,100b) 654 EX_TABLE(77b,100b) 655 EX_TABLE(78b,100b) 656 EX_TABLE(79b,100b) 657 EX_TABLE(80b,100b) 658 EX_TABLE(81b,100b) 659 EX_TABLE(82b,100b) 660 EX_TABLE(83b,100b) 661 EX_TABLE(84b,100b) 662 EX_TABLE(85b,100b) 663 EX_TABLE(86b,100b) 664 EX_TABLE(87b,100b) 665 EX_TABLE(88b,100b) 666 EX_TABLE(89b,100b) 667 EX_TABLE(90b,100b) 668 EX_TABLE(91b,100b) 669 670EXPORT_SYMBOL(__copy_tofrom_user) 671