1/* 2 * Copyright (C) 2002 Paul Mackerras, IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9#include <asm/processor.h> 10#include <asm/ppc_asm.h> 11#include <asm/export.h> 12 13#ifdef __BIG_ENDIAN__ 14#define sLd sld /* Shift towards low-numbered address. */ 15#define sHd srd /* Shift towards high-numbered address. */ 16#else 17#define sLd srd /* Shift towards low-numbered address. */ 18#define sHd sld /* Shift towards high-numbered address. */ 19#endif 20 21 .align 7 22_GLOBAL_TOC(__copy_tofrom_user) 23#ifdef CONFIG_PPC_BOOK3S_64 24BEGIN_FTR_SECTION 25 nop 26FTR_SECTION_ELSE 27 b __copy_tofrom_user_power7 28ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 29#endif 30_GLOBAL(__copy_tofrom_user_base) 31 /* first check for a whole page copy on a page boundary */ 32 cmpldi cr1,r5,16 33 cmpdi cr6,r5,4096 34 or r0,r3,r4 35 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ 36 andi. r0,r0,4095 37 std r3,-24(r1) 38 crand cr0*4+2,cr0*4+2,cr6*4+2 39 std r4,-16(r1) 40 std r5,-8(r1) 41 dcbt 0,r4 42 beq .Lcopy_page_4K 43 andi. r6,r6,7 44 PPC_MTOCRF(0x01,r5) 45 blt cr1,.Lshort_copy 46/* Below we want to nop out the bne if we're on a CPU that has the 47 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit 48 * cleared. 49 * At the time of writing the only CPU that has this combination of bits 50 * set is Power6. 51 */ 52BEGIN_FTR_SECTION 53 nop 54FTR_SECTION_ELSE 55 bne .Ldst_unaligned 56ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ 57 CPU_FTR_UNALIGNED_LD_STD) 58.Ldst_aligned: 59 addi r3,r3,-16 60BEGIN_FTR_SECTION 61 andi. r0,r4,7 62 bne .Lsrc_unaligned 63END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 64 blt cr1,.Ldo_tail /* if < 16 bytes to copy */ 65 srdi r0,r5,5 66 cmpdi cr1,r0,0 6720: ld r7,0(r4) 68220: ld r6,8(r4) 69 addi r4,r4,16 70 mtctr r0 71 andi. r0,r5,0x10 72 beq 22f 73 addi r3,r3,16 74 addi r4,r4,-16 75 mr r9,r7 76 mr r8,r6 77 beq cr1,72f 7821: ld r7,16(r4) 79221: ld r6,24(r4) 80 addi r4,r4,32 8170: std r9,0(r3) 82270: std r8,8(r3) 8322: ld r9,0(r4) 84222: ld r8,8(r4) 8571: std r7,16(r3) 86271: std r6,24(r3) 87 addi r3,r3,32 88 bdnz 21b 8972: std r9,0(r3) 90272: std r8,8(r3) 91 andi. r5,r5,0xf 92 beq+ 3f 93 addi r4,r4,16 94.Ldo_tail: 95 addi r3,r3,16 96 bf cr7*4+0,246f 97244: ld r9,0(r4) 98 addi r4,r4,8 99245: std r9,0(r3) 100 addi r3,r3,8 101246: bf cr7*4+1,1f 10223: lwz r9,0(r4) 103 addi r4,r4,4 10473: stw r9,0(r3) 105 addi r3,r3,4 1061: bf cr7*4+2,2f 10744: lhz r9,0(r4) 108 addi r4,r4,2 10974: sth r9,0(r3) 110 addi r3,r3,2 1112: bf cr7*4+3,3f 11245: lbz r9,0(r4) 11375: stb r9,0(r3) 1143: li r3,0 115 blr 116 117.Lsrc_unaligned: 118 srdi r6,r5,3 119 addi r5,r5,-16 120 subf r4,r0,r4 121 srdi r7,r5,4 122 sldi r10,r0,3 123 cmpldi cr6,r6,3 124 andi. r5,r5,7 125 mtctr r7 126 subfic r11,r10,64 127 add r5,r5,r0 128 bt cr7*4+0,28f 129 13024: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 13125: ld r0,8(r4) 132 sLd r6,r9,r10 13326: ldu r9,16(r4) 134 sHd r7,r0,r11 135 sLd r8,r0,r10 136 or r7,r7,r6 137 blt cr6,79f 13827: ld r0,8(r4) 139 b 2f 140 14128: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 14229: ldu r9,8(r4) 143 sLd r8,r0,r10 144 addi r3,r3,-8 145 blt cr6,5f 14630: ld r0,8(r4) 147 sHd r12,r9,r11 148 sLd r6,r9,r10 14931: ldu r9,16(r4) 150 or r12,r8,r12 151 sHd r7,r0,r11 152 sLd r8,r0,r10 153 addi r3,r3,16 154 beq cr6,78f 155 1561: or r7,r7,r6 15732: ld r0,8(r4) 15876: std r12,8(r3) 1592: sHd r12,r9,r11 160 sLd r6,r9,r10 16133: ldu r9,16(r4) 162 or r12,r8,r12 16377: stdu r7,16(r3) 164 sHd r7,r0,r11 165 sLd r8,r0,r10 166 bdnz 1b 167 16878: std r12,8(r3) 169 or r7,r7,r6 17079: std r7,16(r3) 1715: sHd r12,r9,r11 172 or r12,r8,r12 17380: std r12,24(r3) 174 bne 6f 175 li r3,0 176 blr 1776: cmpwi cr1,r5,8 178 addi r3,r3,32 179 sLd r9,r9,r10 180 ble cr1,7f 18134: ld r0,8(r4) 182 sHd r7,r0,r11 183 or r9,r7,r9 1847: 185 bf cr7*4+1,1f 186#ifdef __BIG_ENDIAN__ 187 rotldi r9,r9,32 188#endif 18994: stw r9,0(r3) 190#ifdef __LITTLE_ENDIAN__ 191 rotrdi r9,r9,32 192#endif 193 addi r3,r3,4 1941: bf cr7*4+2,2f 195#ifdef __BIG_ENDIAN__ 196 rotldi r9,r9,16 197#endif 19895: sth r9,0(r3) 199#ifdef __LITTLE_ENDIAN__ 200 rotrdi r9,r9,16 201#endif 202 addi r3,r3,2 2032: bf cr7*4+3,3f 204#ifdef __BIG_ENDIAN__ 205 rotldi r9,r9,8 206#endif 20796: stb r9,0(r3) 208#ifdef __LITTLE_ENDIAN__ 209 rotrdi r9,r9,8 210#endif 2113: li r3,0 212 blr 213 214.Ldst_unaligned: 215 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */ 216 subf r5,r6,r5 217 li r7,0 218 cmpldi cr1,r5,16 219 bf cr7*4+3,1f 22035: lbz r0,0(r4) 22181: stb r0,0(r3) 222 addi r7,r7,1 2231: bf cr7*4+2,2f 22436: lhzx r0,r7,r4 22582: sthx r0,r7,r3 226 addi r7,r7,2 2272: bf cr7*4+1,3f 22837: lwzx r0,r7,r4 22983: stwx r0,r7,r3 2303: PPC_MTOCRF(0x01,r5) 231 add r4,r6,r4 232 add r3,r6,r3 233 b .Ldst_aligned 234 235.Lshort_copy: 236 bf cr7*4+0,1f 23738: lwz r0,0(r4) 23839: lwz r9,4(r4) 239 addi r4,r4,8 24084: stw r0,0(r3) 24185: stw r9,4(r3) 242 addi r3,r3,8 2431: bf cr7*4+1,2f 24440: lwz r0,0(r4) 245 addi r4,r4,4 24686: stw r0,0(r3) 247 addi r3,r3,4 2482: bf cr7*4+2,3f 24941: lhz r0,0(r4) 250 addi r4,r4,2 25187: sth r0,0(r3) 252 addi r3,r3,2 2533: bf cr7*4+3,4f 25442: lbz r0,0(r4) 25588: stb r0,0(r3) 2564: li r3,0 257 blr 258 259/* 260 * exception handlers follow 261 * we have to return the number of bytes not copied 262 * for an exception on a load, we set the rest of the destination to 0 263 */ 264 265136: 266137: 267 add r3,r3,r7 268 b 1f 269130: 270131: 271 addi r3,r3,8 272120: 273320: 274122: 275322: 276124: 277125: 278126: 279127: 280128: 281129: 282133: 283 addi r3,r3,8 284132: 285 addi r3,r3,8 286121: 287321: 288344: 289134: 290135: 291138: 292139: 293140: 294141: 295142: 296123: 297144: 298145: 299 300/* 301 * here we have had a fault on a load and r3 points to the first 302 * unmodified byte of the destination 303 */ 3041: ld r6,-24(r1) 305 ld r4,-16(r1) 306 ld r5,-8(r1) 307 subf r6,r6,r3 308 add r4,r4,r6 309 subf r5,r6,r5 /* #bytes left to go */ 310 311/* 312 * first see if we can copy any more bytes before hitting another exception 313 */ 314 mtctr r5 31543: lbz r0,0(r4) 316 addi r4,r4,1 31789: stb r0,0(r3) 318 addi r3,r3,1 319 bdnz 43b 320 li r3,0 /* huh? all copied successfully this time? */ 321 blr 322 323/* 324 * here we have trapped again, amount remaining is in ctr. 325 */ 326143: mfctr r3 327 blr 328 329/* 330 * exception handlers for stores: we just need to work 331 * out how many bytes weren't copied 332 */ 333182: 334183: 335 add r3,r3,r7 336 b 1f 337371: 338180: 339 addi r3,r3,8 340171: 341177: 342179: 343 addi r3,r3,8 344370: 345372: 346176: 347178: 348 addi r3,r3,4 349185: 350 addi r3,r3,4 351170: 352172: 353345: 354173: 355174: 356175: 357181: 358184: 359186: 360187: 361188: 362189: 363194: 364195: 365196: 3661: 367 ld r6,-24(r1) 368 ld r5,-8(r1) 369 add r6,r6,r5 370 subf r3,r3,r6 /* #bytes not copied */ 371 blr 372 373 EX_TABLE(20b,120b) 374 EX_TABLE(220b,320b) 375 EX_TABLE(21b,121b) 376 EX_TABLE(221b,321b) 377 EX_TABLE(70b,170b) 378 EX_TABLE(270b,370b) 379 EX_TABLE(22b,122b) 380 EX_TABLE(222b,322b) 381 EX_TABLE(71b,171b) 382 EX_TABLE(271b,371b) 383 EX_TABLE(72b,172b) 384 EX_TABLE(272b,372b) 385 EX_TABLE(244b,344b) 386 EX_TABLE(245b,345b) 387 EX_TABLE(23b,123b) 388 EX_TABLE(73b,173b) 389 EX_TABLE(44b,144b) 390 EX_TABLE(74b,174b) 391 EX_TABLE(45b,145b) 392 EX_TABLE(75b,175b) 393 EX_TABLE(24b,124b) 394 EX_TABLE(25b,125b) 395 EX_TABLE(26b,126b) 396 EX_TABLE(27b,127b) 397 EX_TABLE(28b,128b) 398 EX_TABLE(29b,129b) 399 EX_TABLE(30b,130b) 400 EX_TABLE(31b,131b) 401 EX_TABLE(32b,132b) 402 EX_TABLE(76b,176b) 403 EX_TABLE(33b,133b) 404 EX_TABLE(77b,177b) 405 EX_TABLE(78b,178b) 406 EX_TABLE(79b,179b) 407 EX_TABLE(80b,180b) 408 EX_TABLE(34b,134b) 409 EX_TABLE(94b,194b) 410 EX_TABLE(95b,195b) 411 EX_TABLE(96b,196b) 412 EX_TABLE(35b,135b) 413 EX_TABLE(81b,181b) 414 EX_TABLE(36b,136b) 415 EX_TABLE(82b,182b) 416 EX_TABLE(37b,137b) 417 EX_TABLE(83b,183b) 418 EX_TABLE(38b,138b) 419 EX_TABLE(39b,139b) 420 EX_TABLE(84b,184b) 421 EX_TABLE(85b,185b) 422 EX_TABLE(40b,140b) 423 EX_TABLE(86b,186b) 424 EX_TABLE(41b,141b) 425 EX_TABLE(87b,187b) 426 EX_TABLE(42b,142b) 427 EX_TABLE(88b,188b) 428 EX_TABLE(43b,143b) 429 EX_TABLE(89b,189b) 430 431/* 432 * Routine to copy a whole page of data, optimized for POWER4. 433 * On POWER4 it is more than 50% faster than the simple loop 434 * above (following the .Ldst_aligned label). 435 */ 436.Lcopy_page_4K: 437 std r31,-32(1) 438 std r30,-40(1) 439 std r29,-48(1) 440 std r28,-56(1) 441 std r27,-64(1) 442 std r26,-72(1) 443 std r25,-80(1) 444 std r24,-88(1) 445 std r23,-96(1) 446 std r22,-104(1) 447 std r21,-112(1) 448 std r20,-120(1) 449 li r5,4096/32 - 1 450 addi r3,r3,-8 451 li r0,5 4520: addi r5,r5,-24 453 mtctr r0 45420: ld r22,640(4) 45521: ld r21,512(4) 45622: ld r20,384(4) 45723: ld r11,256(4) 45824: ld r9,128(4) 45925: ld r7,0(4) 46026: ld r25,648(4) 46127: ld r24,520(4) 46228: ld r23,392(4) 46329: ld r10,264(4) 46430: ld r8,136(4) 46531: ldu r6,8(4) 466 cmpwi r5,24 4671: 46832: std r22,648(3) 46933: std r21,520(3) 47034: std r20,392(3) 47135: std r11,264(3) 47236: std r9,136(3) 47337: std r7,8(3) 47438: ld r28,648(4) 47539: ld r27,520(4) 47640: ld r26,392(4) 47741: ld r31,264(4) 47842: ld r30,136(4) 47943: ld r29,8(4) 48044: std r25,656(3) 48145: std r24,528(3) 48246: std r23,400(3) 48347: std r10,272(3) 48448: std r8,144(3) 48549: std r6,16(3) 48650: ld r22,656(4) 48751: ld r21,528(4) 48852: ld r20,400(4) 48953: ld r11,272(4) 49054: ld r9,144(4) 49155: ld r7,16(4) 49256: std r28,664(3) 49357: std r27,536(3) 49458: std r26,408(3) 49559: std r31,280(3) 49660: std r30,152(3) 49761: stdu r29,24(3) 49862: ld r25,664(4) 49963: ld r24,536(4) 50064: ld r23,408(4) 50165: ld r10,280(4) 50266: ld r8,152(4) 50367: ldu r6,24(4) 504 bdnz 1b 50568: std r22,648(3) 50669: std r21,520(3) 50770: std r20,392(3) 50871: std r11,264(3) 50972: std r9,136(3) 51073: std r7,8(3) 51174: addi r4,r4,640 51275: addi r3,r3,648 513 bge 0b 514 mtctr r5 51576: ld r7,0(4) 51677: ld r8,8(4) 51778: ldu r9,16(4) 5183: 51979: ld r10,8(4) 52080: std r7,8(3) 52181: ld r7,16(4) 52282: std r8,16(3) 52383: ld r8,24(4) 52484: std r9,24(3) 52585: ldu r9,32(4) 52686: stdu r10,32(3) 527 bdnz 3b 5284: 52987: ld r10,8(4) 53088: std r7,8(3) 53189: std r8,16(3) 53290: std r9,24(3) 53391: std r10,32(3) 5349: ld r20,-120(1) 535 ld r21,-112(1) 536 ld r22,-104(1) 537 ld r23,-96(1) 538 ld r24,-88(1) 539 ld r25,-80(1) 540 ld r26,-72(1) 541 ld r27,-64(1) 542 ld r28,-56(1) 543 ld r29,-48(1) 544 ld r30,-40(1) 545 ld r31,-32(1) 546 li r3,0 547 blr 548 549/* 550 * on an exception, reset to the beginning and jump back into the 551 * standard __copy_tofrom_user 552 */ 553100: ld r20,-120(1) 554 ld r21,-112(1) 555 ld r22,-104(1) 556 ld r23,-96(1) 557 ld r24,-88(1) 558 ld r25,-80(1) 559 ld r26,-72(1) 560 ld r27,-64(1) 561 ld r28,-56(1) 562 ld r29,-48(1) 563 ld r30,-40(1) 564 ld r31,-32(1) 565 ld r3,-24(r1) 566 ld r4,-16(r1) 567 li r5,4096 568 b .Ldst_aligned 569 570 EX_TABLE(20b,100b) 571 EX_TABLE(21b,100b) 572 EX_TABLE(22b,100b) 573 EX_TABLE(23b,100b) 574 EX_TABLE(24b,100b) 575 EX_TABLE(25b,100b) 576 EX_TABLE(26b,100b) 577 EX_TABLE(27b,100b) 578 EX_TABLE(28b,100b) 579 EX_TABLE(29b,100b) 580 EX_TABLE(30b,100b) 581 EX_TABLE(31b,100b) 582 EX_TABLE(32b,100b) 583 EX_TABLE(33b,100b) 584 EX_TABLE(34b,100b) 585 EX_TABLE(35b,100b) 586 EX_TABLE(36b,100b) 587 EX_TABLE(37b,100b) 588 EX_TABLE(38b,100b) 589 EX_TABLE(39b,100b) 590 EX_TABLE(40b,100b) 591 EX_TABLE(41b,100b) 592 EX_TABLE(42b,100b) 593 EX_TABLE(43b,100b) 594 EX_TABLE(44b,100b) 595 EX_TABLE(45b,100b) 596 EX_TABLE(46b,100b) 597 EX_TABLE(47b,100b) 598 EX_TABLE(48b,100b) 599 EX_TABLE(49b,100b) 600 EX_TABLE(50b,100b) 601 EX_TABLE(51b,100b) 602 EX_TABLE(52b,100b) 603 EX_TABLE(53b,100b) 604 EX_TABLE(54b,100b) 605 EX_TABLE(55b,100b) 606 EX_TABLE(56b,100b) 607 EX_TABLE(57b,100b) 608 EX_TABLE(58b,100b) 609 EX_TABLE(59b,100b) 610 EX_TABLE(60b,100b) 611 EX_TABLE(61b,100b) 612 EX_TABLE(62b,100b) 613 EX_TABLE(63b,100b) 614 EX_TABLE(64b,100b) 615 EX_TABLE(65b,100b) 616 EX_TABLE(66b,100b) 617 EX_TABLE(67b,100b) 618 EX_TABLE(68b,100b) 619 EX_TABLE(69b,100b) 620 EX_TABLE(70b,100b) 621 EX_TABLE(71b,100b) 622 EX_TABLE(72b,100b) 623 EX_TABLE(73b,100b) 624 EX_TABLE(74b,100b) 625 EX_TABLE(75b,100b) 626 EX_TABLE(76b,100b) 627 EX_TABLE(77b,100b) 628 EX_TABLE(78b,100b) 629 EX_TABLE(79b,100b) 630 EX_TABLE(80b,100b) 631 EX_TABLE(81b,100b) 632 EX_TABLE(82b,100b) 633 EX_TABLE(83b,100b) 634 EX_TABLE(84b,100b) 635 EX_TABLE(85b,100b) 636 EX_TABLE(86b,100b) 637 EX_TABLE(87b,100b) 638 EX_TABLE(88b,100b) 639 EX_TABLE(89b,100b) 640 EX_TABLE(90b,100b) 641 EX_TABLE(91b,100b) 642 643EXPORT_SYMBOL(__copy_tofrom_user) 644