1/* 2 * Copyright (C) 2002 Paul Mackerras, IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9#include <asm/processor.h> 10#include <asm/ppc_asm.h> 11#include <asm/export.h> 12 13#ifdef __BIG_ENDIAN__ 14#define sLd sld /* Shift towards low-numbered address. */ 15#define sHd srd /* Shift towards high-numbered address. */ 16#else 17#define sLd srd /* Shift towards low-numbered address. */ 18#define sHd sld /* Shift towards high-numbered address. */ 19#endif 20 21 .align 7 22_GLOBAL_TOC(__copy_tofrom_user) 23BEGIN_FTR_SECTION 24 nop 25FTR_SECTION_ELSE 26 b __copy_tofrom_user_power7 27ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 28_GLOBAL(__copy_tofrom_user_base) 29 /* first check for a whole page copy on a page boundary */ 30 cmpldi cr1,r5,16 31 cmpdi cr6,r5,4096 32 or r0,r3,r4 33 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ 34 andi. r0,r0,4095 35 std r3,-24(r1) 36 crand cr0*4+2,cr0*4+2,cr6*4+2 37 std r4,-16(r1) 38 std r5,-8(r1) 39 dcbt 0,r4 40 beq .Lcopy_page_4K 41 andi. r6,r6,7 42 PPC_MTOCRF(0x01,r5) 43 blt cr1,.Lshort_copy 44/* Below we want to nop out the bne if we're on a CPU that has the 45 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit 46 * cleared. 47 * At the time of writing the only CPU that has this combination of bits 48 * set is Power6. 49 */ 50BEGIN_FTR_SECTION 51 nop 52FTR_SECTION_ELSE 53 bne .Ldst_unaligned 54ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ 55 CPU_FTR_UNALIGNED_LD_STD) 56.Ldst_aligned: 57 addi r3,r3,-16 58BEGIN_FTR_SECTION 59 andi. r0,r4,7 60 bne .Lsrc_unaligned 61END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 62 blt cr1,.Ldo_tail /* if < 16 bytes to copy */ 63 srdi r0,r5,5 64 cmpdi cr1,r0,0 6520: ld r7,0(r4) 66220: ld r6,8(r4) 67 addi r4,r4,16 68 mtctr r0 69 andi. r0,r5,0x10 70 beq 22f 71 addi r3,r3,16 72 addi r4,r4,-16 73 mr r9,r7 74 mr r8,r6 75 beq cr1,72f 7621: ld r7,16(r4) 77221: ld r6,24(r4) 78 addi r4,r4,32 7970: std r9,0(r3) 80270: std r8,8(r3) 8122: ld r9,0(r4) 82222: ld r8,8(r4) 8371: std r7,16(r3) 84271: std r6,24(r3) 85 addi r3,r3,32 86 bdnz 21b 8772: std r9,0(r3) 88272: std r8,8(r3) 89 andi. r5,r5,0xf 90 beq+ 3f 91 addi r4,r4,16 92.Ldo_tail: 93 addi r3,r3,16 94 bf cr7*4+0,246f 95244: ld r9,0(r4) 96 addi r4,r4,8 97245: std r9,0(r3) 98 addi r3,r3,8 99246: bf cr7*4+1,1f 10023: lwz r9,0(r4) 101 addi r4,r4,4 10273: stw r9,0(r3) 103 addi r3,r3,4 1041: bf cr7*4+2,2f 10544: lhz r9,0(r4) 106 addi r4,r4,2 10774: sth r9,0(r3) 108 addi r3,r3,2 1092: bf cr7*4+3,3f 11045: lbz r9,0(r4) 11175: stb r9,0(r3) 1123: li r3,0 113 blr 114 115.Lsrc_unaligned: 116 srdi r6,r5,3 117 addi r5,r5,-16 118 subf r4,r0,r4 119 srdi r7,r5,4 120 sldi r10,r0,3 121 cmpldi cr6,r6,3 122 andi. r5,r5,7 123 mtctr r7 124 subfic r11,r10,64 125 add r5,r5,r0 126 bt cr7*4+0,28f 127 12824: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 12925: ld r0,8(r4) 130 sLd r6,r9,r10 13126: ldu r9,16(r4) 132 sHd r7,r0,r11 133 sLd r8,r0,r10 134 or r7,r7,r6 135 blt cr6,79f 13627: ld r0,8(r4) 137 b 2f 138 13928: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 14029: ldu r9,8(r4) 141 sLd r8,r0,r10 142 addi r3,r3,-8 143 blt cr6,5f 14430: ld r0,8(r4) 145 sHd r12,r9,r11 146 sLd r6,r9,r10 14731: ldu r9,16(r4) 148 or r12,r8,r12 149 sHd r7,r0,r11 150 sLd r8,r0,r10 151 addi r3,r3,16 152 beq cr6,78f 153 1541: or r7,r7,r6 15532: ld r0,8(r4) 15676: std r12,8(r3) 1572: sHd r12,r9,r11 158 sLd r6,r9,r10 15933: ldu r9,16(r4) 160 or r12,r8,r12 16177: stdu r7,16(r3) 162 sHd r7,r0,r11 163 sLd r8,r0,r10 164 bdnz 1b 165 16678: std r12,8(r3) 167 or r7,r7,r6 16879: std r7,16(r3) 1695: sHd r12,r9,r11 170 or r12,r8,r12 17180: std r12,24(r3) 172 bne 6f 173 li r3,0 174 blr 1756: cmpwi cr1,r5,8 176 addi r3,r3,32 177 sLd r9,r9,r10 178 ble cr1,7f 17934: ld r0,8(r4) 180 sHd r7,r0,r11 181 or r9,r7,r9 1827: 183 bf cr7*4+1,1f 184#ifdef __BIG_ENDIAN__ 185 rotldi r9,r9,32 186#endif 18794: stw r9,0(r3) 188#ifdef __LITTLE_ENDIAN__ 189 rotrdi r9,r9,32 190#endif 191 addi r3,r3,4 1921: bf cr7*4+2,2f 193#ifdef __BIG_ENDIAN__ 194 rotldi r9,r9,16 195#endif 19695: sth r9,0(r3) 197#ifdef __LITTLE_ENDIAN__ 198 rotrdi r9,r9,16 199#endif 200 addi r3,r3,2 2012: bf cr7*4+3,3f 202#ifdef __BIG_ENDIAN__ 203 rotldi r9,r9,8 204#endif 20596: stb r9,0(r3) 206#ifdef __LITTLE_ENDIAN__ 207 rotrdi r9,r9,8 208#endif 2093: li r3,0 210 blr 211 212.Ldst_unaligned: 213 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */ 214 subf r5,r6,r5 215 li r7,0 216 cmpldi cr1,r5,16 217 bf cr7*4+3,1f 21835: lbz r0,0(r4) 21981: stb r0,0(r3) 220 addi r7,r7,1 2211: bf cr7*4+2,2f 22236: lhzx r0,r7,r4 22382: sthx r0,r7,r3 224 addi r7,r7,2 2252: bf cr7*4+1,3f 22637: lwzx r0,r7,r4 22783: stwx r0,r7,r3 2283: PPC_MTOCRF(0x01,r5) 229 add r4,r6,r4 230 add r3,r6,r3 231 b .Ldst_aligned 232 233.Lshort_copy: 234 bf cr7*4+0,1f 23538: lwz r0,0(r4) 23639: lwz r9,4(r4) 237 addi r4,r4,8 23884: stw r0,0(r3) 23985: stw r9,4(r3) 240 addi r3,r3,8 2411: bf cr7*4+1,2f 24240: lwz r0,0(r4) 243 addi r4,r4,4 24486: stw r0,0(r3) 245 addi r3,r3,4 2462: bf cr7*4+2,3f 24741: lhz r0,0(r4) 248 addi r4,r4,2 24987: sth r0,0(r3) 250 addi r3,r3,2 2513: bf cr7*4+3,4f 25242: lbz r0,0(r4) 25388: stb r0,0(r3) 2544: li r3,0 255 blr 256 257/* 258 * exception handlers follow 259 * we have to return the number of bytes not copied 260 * for an exception on a load, we set the rest of the destination to 0 261 */ 262 263136: 264137: 265 add r3,r3,r7 266 b 1f 267130: 268131: 269 addi r3,r3,8 270120: 271320: 272122: 273322: 274124: 275125: 276126: 277127: 278128: 279129: 280133: 281 addi r3,r3,8 282132: 283 addi r3,r3,8 284121: 285321: 286344: 287134: 288135: 289138: 290139: 291140: 292141: 293142: 294123: 295144: 296145: 297 298/* 299 * here we have had a fault on a load and r3 points to the first 300 * unmodified byte of the destination 301 */ 3021: ld r6,-24(r1) 303 ld r4,-16(r1) 304 ld r5,-8(r1) 305 subf r6,r6,r3 306 add r4,r4,r6 307 subf r5,r6,r5 /* #bytes left to go */ 308 309/* 310 * first see if we can copy any more bytes before hitting another exception 311 */ 312 mtctr r5 31343: lbz r0,0(r4) 314 addi r4,r4,1 31589: stb r0,0(r3) 316 addi r3,r3,1 317 bdnz 43b 318 li r3,0 /* huh? all copied successfully this time? */ 319 blr 320 321/* 322 * here we have trapped again, amount remaining is in ctr. 323 */ 324143: mfctr r3 325 blr 326 327/* 328 * exception handlers for stores: we just need to work 329 * out how many bytes weren't copied 330 */ 331182: 332183: 333 add r3,r3,r7 334 b 1f 335371: 336180: 337 addi r3,r3,8 338171: 339177: 340179: 341 addi r3,r3,8 342370: 343372: 344176: 345178: 346 addi r3,r3,4 347185: 348 addi r3,r3,4 349170: 350172: 351345: 352173: 353174: 354175: 355181: 356184: 357186: 358187: 359188: 360189: 361194: 362195: 363196: 3641: 365 ld r6,-24(r1) 366 ld r5,-8(r1) 367 add r6,r6,r5 368 subf r3,r3,r6 /* #bytes not copied */ 369 blr 370 371 EX_TABLE(20b,120b) 372 EX_TABLE(220b,320b) 373 EX_TABLE(21b,121b) 374 EX_TABLE(221b,321b) 375 EX_TABLE(70b,170b) 376 EX_TABLE(270b,370b) 377 EX_TABLE(22b,122b) 378 EX_TABLE(222b,322b) 379 EX_TABLE(71b,171b) 380 EX_TABLE(271b,371b) 381 EX_TABLE(72b,172b) 382 EX_TABLE(272b,372b) 383 EX_TABLE(244b,344b) 384 EX_TABLE(245b,345b) 385 EX_TABLE(23b,123b) 386 EX_TABLE(73b,173b) 387 EX_TABLE(44b,144b) 388 EX_TABLE(74b,174b) 389 EX_TABLE(45b,145b) 390 EX_TABLE(75b,175b) 391 EX_TABLE(24b,124b) 392 EX_TABLE(25b,125b) 393 EX_TABLE(26b,126b) 394 EX_TABLE(27b,127b) 395 EX_TABLE(28b,128b) 396 EX_TABLE(29b,129b) 397 EX_TABLE(30b,130b) 398 EX_TABLE(31b,131b) 399 EX_TABLE(32b,132b) 400 EX_TABLE(76b,176b) 401 EX_TABLE(33b,133b) 402 EX_TABLE(77b,177b) 403 EX_TABLE(78b,178b) 404 EX_TABLE(79b,179b) 405 EX_TABLE(80b,180b) 406 EX_TABLE(34b,134b) 407 EX_TABLE(94b,194b) 408 EX_TABLE(95b,195b) 409 EX_TABLE(96b,196b) 410 EX_TABLE(35b,135b) 411 EX_TABLE(81b,181b) 412 EX_TABLE(36b,136b) 413 EX_TABLE(82b,182b) 414 EX_TABLE(37b,137b) 415 EX_TABLE(83b,183b) 416 EX_TABLE(38b,138b) 417 EX_TABLE(39b,139b) 418 EX_TABLE(84b,184b) 419 EX_TABLE(85b,185b) 420 EX_TABLE(40b,140b) 421 EX_TABLE(86b,186b) 422 EX_TABLE(41b,141b) 423 EX_TABLE(87b,187b) 424 EX_TABLE(42b,142b) 425 EX_TABLE(88b,188b) 426 EX_TABLE(43b,143b) 427 EX_TABLE(89b,189b) 428 429/* 430 * Routine to copy a whole page of data, optimized for POWER4. 431 * On POWER4 it is more than 50% faster than the simple loop 432 * above (following the .Ldst_aligned label). 433 */ 434.Lcopy_page_4K: 435 std r31,-32(1) 436 std r30,-40(1) 437 std r29,-48(1) 438 std r28,-56(1) 439 std r27,-64(1) 440 std r26,-72(1) 441 std r25,-80(1) 442 std r24,-88(1) 443 std r23,-96(1) 444 std r22,-104(1) 445 std r21,-112(1) 446 std r20,-120(1) 447 li r5,4096/32 - 1 448 addi r3,r3,-8 449 li r0,5 4500: addi r5,r5,-24 451 mtctr r0 45220: ld r22,640(4) 45321: ld r21,512(4) 45422: ld r20,384(4) 45523: ld r11,256(4) 45624: ld r9,128(4) 45725: ld r7,0(4) 45826: ld r25,648(4) 45927: ld r24,520(4) 46028: ld r23,392(4) 46129: ld r10,264(4) 46230: ld r8,136(4) 46331: ldu r6,8(4) 464 cmpwi r5,24 4651: 46632: std r22,648(3) 46733: std r21,520(3) 46834: std r20,392(3) 46935: std r11,264(3) 47036: std r9,136(3) 47137: std r7,8(3) 47238: ld r28,648(4) 47339: ld r27,520(4) 47440: ld r26,392(4) 47541: ld r31,264(4) 47642: ld r30,136(4) 47743: ld r29,8(4) 47844: std r25,656(3) 47945: std r24,528(3) 48046: std r23,400(3) 48147: std r10,272(3) 48248: std r8,144(3) 48349: std r6,16(3) 48450: ld r22,656(4) 48551: ld r21,528(4) 48652: ld r20,400(4) 48753: ld r11,272(4) 48854: ld r9,144(4) 48955: ld r7,16(4) 49056: std r28,664(3) 49157: std r27,536(3) 49258: std r26,408(3) 49359: std r31,280(3) 49460: std r30,152(3) 49561: stdu r29,24(3) 49662: ld r25,664(4) 49763: ld r24,536(4) 49864: ld r23,408(4) 49965: ld r10,280(4) 50066: ld r8,152(4) 50167: ldu r6,24(4) 502 bdnz 1b 50368: std r22,648(3) 50469: std r21,520(3) 50570: std r20,392(3) 50671: std r11,264(3) 50772: std r9,136(3) 50873: std r7,8(3) 50974: addi r4,r4,640 51075: addi r3,r3,648 511 bge 0b 512 mtctr r5 51376: ld r7,0(4) 51477: ld r8,8(4) 51578: ldu r9,16(4) 5163: 51779: ld r10,8(4) 51880: std r7,8(3) 51981: ld r7,16(4) 52082: std r8,16(3) 52183: ld r8,24(4) 52284: std r9,24(3) 52385: ldu r9,32(4) 52486: stdu r10,32(3) 525 bdnz 3b 5264: 52787: ld r10,8(4) 52888: std r7,8(3) 52989: std r8,16(3) 53090: std r9,24(3) 53191: std r10,32(3) 5329: ld r20,-120(1) 533 ld r21,-112(1) 534 ld r22,-104(1) 535 ld r23,-96(1) 536 ld r24,-88(1) 537 ld r25,-80(1) 538 ld r26,-72(1) 539 ld r27,-64(1) 540 ld r28,-56(1) 541 ld r29,-48(1) 542 ld r30,-40(1) 543 ld r31,-32(1) 544 li r3,0 545 blr 546 547/* 548 * on an exception, reset to the beginning and jump back into the 549 * standard __copy_tofrom_user 550 */ 551100: ld r20,-120(1) 552 ld r21,-112(1) 553 ld r22,-104(1) 554 ld r23,-96(1) 555 ld r24,-88(1) 556 ld r25,-80(1) 557 ld r26,-72(1) 558 ld r27,-64(1) 559 ld r28,-56(1) 560 ld r29,-48(1) 561 ld r30,-40(1) 562 ld r31,-32(1) 563 ld r3,-24(r1) 564 ld r4,-16(r1) 565 li r5,4096 566 b .Ldst_aligned 567 568 EX_TABLE(20b,100b) 569 EX_TABLE(21b,100b) 570 EX_TABLE(22b,100b) 571 EX_TABLE(23b,100b) 572 EX_TABLE(24b,100b) 573 EX_TABLE(25b,100b) 574 EX_TABLE(26b,100b) 575 EX_TABLE(27b,100b) 576 EX_TABLE(28b,100b) 577 EX_TABLE(29b,100b) 578 EX_TABLE(30b,100b) 579 EX_TABLE(31b,100b) 580 EX_TABLE(32b,100b) 581 EX_TABLE(33b,100b) 582 EX_TABLE(34b,100b) 583 EX_TABLE(35b,100b) 584 EX_TABLE(36b,100b) 585 EX_TABLE(37b,100b) 586 EX_TABLE(38b,100b) 587 EX_TABLE(39b,100b) 588 EX_TABLE(40b,100b) 589 EX_TABLE(41b,100b) 590 EX_TABLE(42b,100b) 591 EX_TABLE(43b,100b) 592 EX_TABLE(44b,100b) 593 EX_TABLE(45b,100b) 594 EX_TABLE(46b,100b) 595 EX_TABLE(47b,100b) 596 EX_TABLE(48b,100b) 597 EX_TABLE(49b,100b) 598 EX_TABLE(50b,100b) 599 EX_TABLE(51b,100b) 600 EX_TABLE(52b,100b) 601 EX_TABLE(53b,100b) 602 EX_TABLE(54b,100b) 603 EX_TABLE(55b,100b) 604 EX_TABLE(56b,100b) 605 EX_TABLE(57b,100b) 606 EX_TABLE(58b,100b) 607 EX_TABLE(59b,100b) 608 EX_TABLE(60b,100b) 609 EX_TABLE(61b,100b) 610 EX_TABLE(62b,100b) 611 EX_TABLE(63b,100b) 612 EX_TABLE(64b,100b) 613 EX_TABLE(65b,100b) 614 EX_TABLE(66b,100b) 615 EX_TABLE(67b,100b) 616 EX_TABLE(68b,100b) 617 EX_TABLE(69b,100b) 618 EX_TABLE(70b,100b) 619 EX_TABLE(71b,100b) 620 EX_TABLE(72b,100b) 621 EX_TABLE(73b,100b) 622 EX_TABLE(74b,100b) 623 EX_TABLE(75b,100b) 624 EX_TABLE(76b,100b) 625 EX_TABLE(77b,100b) 626 EX_TABLE(78b,100b) 627 EX_TABLE(79b,100b) 628 EX_TABLE(80b,100b) 629 EX_TABLE(81b,100b) 630 EX_TABLE(82b,100b) 631 EX_TABLE(83b,100b) 632 EX_TABLE(84b,100b) 633 EX_TABLE(85b,100b) 634 EX_TABLE(86b,100b) 635 EX_TABLE(87b,100b) 636 EX_TABLE(88b,100b) 637 EX_TABLE(89b,100b) 638 EX_TABLE(90b,100b) 639 EX_TABLE(91b,100b) 640 641EXPORT_SYMBOL(__copy_tofrom_user) 642