1/* 2 * Copyright (C) 2002 Paul Mackerras, IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9#include <asm/processor.h> 10#include <asm/ppc_asm.h> 11 12 .align 7 13_GLOBAL(__copy_tofrom_user) 14BEGIN_FTR_SECTION 15 nop 16FTR_SECTION_ELSE 17 b __copy_tofrom_user_power7 18ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 19_GLOBAL(__copy_tofrom_user_base) 20 /* first check for a whole page copy on a page boundary */ 21 cmpldi cr1,r5,16 22 cmpdi cr6,r5,4096 23 or r0,r3,r4 24 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ 25 andi. r0,r0,4095 26 std r3,-24(r1) 27 crand cr0*4+2,cr0*4+2,cr6*4+2 28 std r4,-16(r1) 29 std r5,-8(r1) 30 dcbt 0,r4 31 beq .Lcopy_page_4K 32 andi. r6,r6,7 33 PPC_MTOCRF(0x01,r5) 34 blt cr1,.Lshort_copy 35/* Below we want to nop out the bne if we're on a CPU that has the 36 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit 37 * cleared. 38 * At the time of writing the only CPU that has this combination of bits 39 * set is Power6. 40 */ 41BEGIN_FTR_SECTION 42 nop 43FTR_SECTION_ELSE 44 bne .Ldst_unaligned 45ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ 46 CPU_FTR_UNALIGNED_LD_STD) 47.Ldst_aligned: 48 addi r3,r3,-16 49BEGIN_FTR_SECTION 50 andi. r0,r4,7 51 bne .Lsrc_unaligned 52END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 53 blt cr1,.Ldo_tail /* if < 16 bytes to copy */ 54 srdi r0,r5,5 55 cmpdi cr1,r0,0 5620: ld r7,0(r4) 57220: ld r6,8(r4) 58 addi r4,r4,16 59 mtctr r0 60 andi. r0,r5,0x10 61 beq 22f 62 addi r3,r3,16 63 addi r4,r4,-16 64 mr r9,r7 65 mr r8,r6 66 beq cr1,72f 6721: ld r7,16(r4) 68221: ld r6,24(r4) 69 addi r4,r4,32 7070: std r9,0(r3) 71270: std r8,8(r3) 7222: ld r9,0(r4) 73222: ld r8,8(r4) 7471: std r7,16(r3) 75271: std r6,24(r3) 76 addi r3,r3,32 77 bdnz 21b 7872: std r9,0(r3) 79272: std r8,8(r3) 80 andi. r5,r5,0xf 81 beq+ 3f 82 addi r4,r4,16 83.Ldo_tail: 84 addi r3,r3,16 85 bf cr7*4+0,246f 86244: ld r9,0(r4) 87 addi r4,r4,8 88245: std r9,0(r3) 89 addi r3,r3,8 90246: bf cr7*4+1,1f 9123: lwz r9,0(r4) 92 addi r4,r4,4 9373: stw r9,0(r3) 94 addi r3,r3,4 951: bf cr7*4+2,2f 9644: lhz r9,0(r4) 97 addi r4,r4,2 9874: sth r9,0(r3) 99 addi r3,r3,2 1002: bf cr7*4+3,3f 10145: lbz r9,0(r4) 10275: stb r9,0(r3) 1033: li r3,0 104 blr 105 106.Lsrc_unaligned: 107 srdi r6,r5,3 108 addi r5,r5,-16 109 subf r4,r0,r4 110 srdi r7,r5,4 111 sldi r10,r0,3 112 cmpldi cr6,r6,3 113 andi. r5,r5,7 114 mtctr r7 115 subfic r11,r10,64 116 add r5,r5,r0 117 bt cr7*4+0,28f 118 11924: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 12025: ld r0,8(r4) 121 sld r6,r9,r10 12226: ldu r9,16(r4) 123 srd r7,r0,r11 124 sld r8,r0,r10 125 or r7,r7,r6 126 blt cr6,79f 12727: ld r0,8(r4) 128 b 2f 129 13028: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 13129: ldu r9,8(r4) 132 sld r8,r0,r10 133 addi r3,r3,-8 134 blt cr6,5f 13530: ld r0,8(r4) 136 srd r12,r9,r11 137 sld r6,r9,r10 13831: ldu r9,16(r4) 139 or r12,r8,r12 140 srd r7,r0,r11 141 sld r8,r0,r10 142 addi r3,r3,16 143 beq cr6,78f 144 1451: or r7,r7,r6 14632: ld r0,8(r4) 14776: std r12,8(r3) 1482: srd r12,r9,r11 149 sld r6,r9,r10 15033: ldu r9,16(r4) 151 or r12,r8,r12 15277: stdu r7,16(r3) 153 srd r7,r0,r11 154 sld r8,r0,r10 155 bdnz 1b 156 15778: std r12,8(r3) 158 or r7,r7,r6 15979: std r7,16(r3) 1605: srd r12,r9,r11 161 or r12,r8,r12 16280: std r12,24(r3) 163 bne 6f 164 li r3,0 165 blr 1666: cmpwi cr1,r5,8 167 addi r3,r3,32 168 sld r9,r9,r10 169 ble cr1,7f 17034: ld r0,8(r4) 171 srd r7,r0,r11 172 or r9,r7,r9 1737: 174 bf cr7*4+1,1f 175 rotldi r9,r9,32 17694: stw r9,0(r3) 177 addi r3,r3,4 1781: bf cr7*4+2,2f 179 rotldi r9,r9,16 18095: sth r9,0(r3) 181 addi r3,r3,2 1822: bf cr7*4+3,3f 183 rotldi r9,r9,8 18496: stb r9,0(r3) 1853: li r3,0 186 blr 187 188.Ldst_unaligned: 189 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */ 190 subf r5,r6,r5 191 li r7,0 192 cmpldi cr1,r5,16 193 bf cr7*4+3,1f 19435: lbz r0,0(r4) 19581: stb r0,0(r3) 196 addi r7,r7,1 1971: bf cr7*4+2,2f 19836: lhzx r0,r7,r4 19982: sthx r0,r7,r3 200 addi r7,r7,2 2012: bf cr7*4+1,3f 20237: lwzx r0,r7,r4 20383: stwx r0,r7,r3 2043: PPC_MTOCRF(0x01,r5) 205 add r4,r6,r4 206 add r3,r6,r3 207 b .Ldst_aligned 208 209.Lshort_copy: 210 bf cr7*4+0,1f 21138: lwz r0,0(r4) 21239: lwz r9,4(r4) 213 addi r4,r4,8 21484: stw r0,0(r3) 21585: stw r9,4(r3) 216 addi r3,r3,8 2171: bf cr7*4+1,2f 21840: lwz r0,0(r4) 219 addi r4,r4,4 22086: stw r0,0(r3) 221 addi r3,r3,4 2222: bf cr7*4+2,3f 22341: lhz r0,0(r4) 224 addi r4,r4,2 22587: sth r0,0(r3) 226 addi r3,r3,2 2273: bf cr7*4+3,4f 22842: lbz r0,0(r4) 22988: stb r0,0(r3) 2304: li r3,0 231 blr 232 233/* 234 * exception handlers follow 235 * we have to return the number of bytes not copied 236 * for an exception on a load, we set the rest of the destination to 0 237 */ 238 239136: 240137: 241 add r3,r3,r7 242 b 1f 243130: 244131: 245 addi r3,r3,8 246120: 247320: 248122: 249322: 250124: 251125: 252126: 253127: 254128: 255129: 256133: 257 addi r3,r3,8 258132: 259 addi r3,r3,8 260121: 261321: 262344: 263134: 264135: 265138: 266139: 267140: 268141: 269142: 270123: 271144: 272145: 273 274/* 275 * here we have had a fault on a load and r3 points to the first 276 * unmodified byte of the destination 277 */ 2781: ld r6,-24(r1) 279 ld r4,-16(r1) 280 ld r5,-8(r1) 281 subf r6,r6,r3 282 add r4,r4,r6 283 subf r5,r6,r5 /* #bytes left to go */ 284 285/* 286 * first see if we can copy any more bytes before hitting another exception 287 */ 288 mtctr r5 28943: lbz r0,0(r4) 290 addi r4,r4,1 29189: stb r0,0(r3) 292 addi r3,r3,1 293 bdnz 43b 294 li r3,0 /* huh? all copied successfully this time? */ 295 blr 296 297/* 298 * here we have trapped again, need to clear ctr bytes starting at r3 299 */ 300143: mfctr r5 301 li r0,0 302 mr r4,r3 303 mr r3,r5 /* return the number of bytes not copied */ 3041: andi. r9,r4,7 305 beq 3f 30690: stb r0,0(r4) 307 addic. r5,r5,-1 308 addi r4,r4,1 309 bne 1b 310 blr 3113: cmpldi cr1,r5,8 312 srdi r9,r5,3 313 andi. r5,r5,7 314 blt cr1,93f 315 mtctr r9 31691: std r0,0(r4) 317 addi r4,r4,8 318 bdnz 91b 31993: beqlr 320 mtctr r5 32192: stb r0,0(r4) 322 addi r4,r4,1 323 bdnz 92b 324 blr 325 326/* 327 * exception handlers for stores: we just need to work 328 * out how many bytes weren't copied 329 */ 330182: 331183: 332 add r3,r3,r7 333 b 1f 334371: 335180: 336 addi r3,r3,8 337171: 338177: 339 addi r3,r3,8 340370: 341372: 342176: 343178: 344 addi r3,r3,4 345185: 346 addi r3,r3,4 347170: 348172: 349345: 350173: 351174: 352175: 353179: 354181: 355184: 356186: 357187: 358188: 359189: 360194: 361195: 362196: 3631: 364 ld r6,-24(r1) 365 ld r5,-8(r1) 366 add r6,r6,r5 367 subf r3,r3,r6 /* #bytes not copied */ 368190: 369191: 370192: 371 blr /* #bytes not copied in r3 */ 372 373 .section __ex_table,"a" 374 .align 3 375 .llong 20b,120b 376 .llong 220b,320b 377 .llong 21b,121b 378 .llong 221b,321b 379 .llong 70b,170b 380 .llong 270b,370b 381 .llong 22b,122b 382 .llong 222b,322b 383 .llong 71b,171b 384 .llong 271b,371b 385 .llong 72b,172b 386 .llong 272b,372b 387 .llong 244b,344b 388 .llong 245b,345b 389 .llong 23b,123b 390 .llong 73b,173b 391 .llong 44b,144b 392 .llong 74b,174b 393 .llong 45b,145b 394 .llong 75b,175b 395 .llong 24b,124b 396 .llong 25b,125b 397 .llong 26b,126b 398 .llong 27b,127b 399 .llong 28b,128b 400 .llong 29b,129b 401 .llong 30b,130b 402 .llong 31b,131b 403 .llong 32b,132b 404 .llong 76b,176b 405 .llong 33b,133b 406 .llong 77b,177b 407 .llong 78b,178b 408 .llong 79b,179b 409 .llong 80b,180b 410 .llong 34b,134b 411 .llong 94b,194b 412 .llong 95b,195b 413 .llong 96b,196b 414 .llong 35b,135b 415 .llong 81b,181b 416 .llong 36b,136b 417 .llong 82b,182b 418 .llong 37b,137b 419 .llong 83b,183b 420 .llong 38b,138b 421 .llong 39b,139b 422 .llong 84b,184b 423 .llong 85b,185b 424 .llong 40b,140b 425 .llong 86b,186b 426 .llong 41b,141b 427 .llong 87b,187b 428 .llong 42b,142b 429 .llong 88b,188b 430 .llong 43b,143b 431 .llong 89b,189b 432 .llong 90b,190b 433 .llong 91b,191b 434 .llong 92b,192b 435 436 .text 437 438/* 439 * Routine to copy a whole page of data, optimized for POWER4. 440 * On POWER4 it is more than 50% faster than the simple loop 441 * above (following the .Ldst_aligned label) but it runs slightly 442 * slower on POWER3. 443 */ 444.Lcopy_page_4K: 445 std r31,-32(1) 446 std r30,-40(1) 447 std r29,-48(1) 448 std r28,-56(1) 449 std r27,-64(1) 450 std r26,-72(1) 451 std r25,-80(1) 452 std r24,-88(1) 453 std r23,-96(1) 454 std r22,-104(1) 455 std r21,-112(1) 456 std r20,-120(1) 457 li r5,4096/32 - 1 458 addi r3,r3,-8 459 li r0,5 4600: addi r5,r5,-24 461 mtctr r0 46220: ld r22,640(4) 46321: ld r21,512(4) 46422: ld r20,384(4) 46523: ld r11,256(4) 46624: ld r9,128(4) 46725: ld r7,0(4) 46826: ld r25,648(4) 46927: ld r24,520(4) 47028: ld r23,392(4) 47129: ld r10,264(4) 47230: ld r8,136(4) 47331: ldu r6,8(4) 474 cmpwi r5,24 4751: 47632: std r22,648(3) 47733: std r21,520(3) 47834: std r20,392(3) 47935: std r11,264(3) 48036: std r9,136(3) 48137: std r7,8(3) 48238: ld r28,648(4) 48339: ld r27,520(4) 48440: ld r26,392(4) 48541: ld r31,264(4) 48642: ld r30,136(4) 48743: ld r29,8(4) 48844: std r25,656(3) 48945: std r24,528(3) 49046: std r23,400(3) 49147: std r10,272(3) 49248: std r8,144(3) 49349: std r6,16(3) 49450: ld r22,656(4) 49551: ld r21,528(4) 49652: ld r20,400(4) 49753: ld r11,272(4) 49854: ld r9,144(4) 49955: ld r7,16(4) 50056: std r28,664(3) 50157: std r27,536(3) 50258: std r26,408(3) 50359: std r31,280(3) 50460: std r30,152(3) 50561: stdu r29,24(3) 50662: ld r25,664(4) 50763: ld r24,536(4) 50864: ld r23,408(4) 50965: ld r10,280(4) 51066: ld r8,152(4) 51167: ldu r6,24(4) 512 bdnz 1b 51368: std r22,648(3) 51469: std r21,520(3) 51570: std r20,392(3) 51671: std r11,264(3) 51772: std r9,136(3) 51873: std r7,8(3) 51974: addi r4,r4,640 52075: addi r3,r3,648 521 bge 0b 522 mtctr r5 52376: ld r7,0(4) 52477: ld r8,8(4) 52578: ldu r9,16(4) 5263: 52779: ld r10,8(4) 52880: std r7,8(3) 52981: ld r7,16(4) 53082: std r8,16(3) 53183: ld r8,24(4) 53284: std r9,24(3) 53385: ldu r9,32(4) 53486: stdu r10,32(3) 535 bdnz 3b 5364: 53787: ld r10,8(4) 53888: std r7,8(3) 53989: std r8,16(3) 54090: std r9,24(3) 54191: std r10,32(3) 5429: ld r20,-120(1) 543 ld r21,-112(1) 544 ld r22,-104(1) 545 ld r23,-96(1) 546 ld r24,-88(1) 547 ld r25,-80(1) 548 ld r26,-72(1) 549 ld r27,-64(1) 550 ld r28,-56(1) 551 ld r29,-48(1) 552 ld r30,-40(1) 553 ld r31,-32(1) 554 li r3,0 555 blr 556 557/* 558 * on an exception, reset to the beginning and jump back into the 559 * standard __copy_tofrom_user 560 */ 561100: ld r20,-120(1) 562 ld r21,-112(1) 563 ld r22,-104(1) 564 ld r23,-96(1) 565 ld r24,-88(1) 566 ld r25,-80(1) 567 ld r26,-72(1) 568 ld r27,-64(1) 569 ld r28,-56(1) 570 ld r29,-48(1) 571 ld r30,-40(1) 572 ld r31,-32(1) 573 ld r3,-24(r1) 574 ld r4,-16(r1) 575 li r5,4096 576 b .Ldst_aligned 577 578 .section __ex_table,"a" 579 .align 3 580 .llong 20b,100b 581 .llong 21b,100b 582 .llong 22b,100b 583 .llong 23b,100b 584 .llong 24b,100b 585 .llong 25b,100b 586 .llong 26b,100b 587 .llong 27b,100b 588 .llong 28b,100b 589 .llong 29b,100b 590 .llong 30b,100b 591 .llong 31b,100b 592 .llong 32b,100b 593 .llong 33b,100b 594 .llong 34b,100b 595 .llong 35b,100b 596 .llong 36b,100b 597 .llong 37b,100b 598 .llong 38b,100b 599 .llong 39b,100b 600 .llong 40b,100b 601 .llong 41b,100b 602 .llong 42b,100b 603 .llong 43b,100b 604 .llong 44b,100b 605 .llong 45b,100b 606 .llong 46b,100b 607 .llong 47b,100b 608 .llong 48b,100b 609 .llong 49b,100b 610 .llong 50b,100b 611 .llong 51b,100b 612 .llong 52b,100b 613 .llong 53b,100b 614 .llong 54b,100b 615 .llong 55b,100b 616 .llong 56b,100b 617 .llong 57b,100b 618 .llong 58b,100b 619 .llong 59b,100b 620 .llong 60b,100b 621 .llong 61b,100b 622 .llong 62b,100b 623 .llong 63b,100b 624 .llong 64b,100b 625 .llong 65b,100b 626 .llong 66b,100b 627 .llong 67b,100b 628 .llong 68b,100b 629 .llong 69b,100b 630 .llong 70b,100b 631 .llong 71b,100b 632 .llong 72b,100b 633 .llong 73b,100b 634 .llong 74b,100b 635 .llong 75b,100b 636 .llong 76b,100b 637 .llong 77b,100b 638 .llong 78b,100b 639 .llong 79b,100b 640 .llong 80b,100b 641 .llong 81b,100b 642 .llong 82b,100b 643 .llong 83b,100b 644 .llong 84b,100b 645 .llong 85b,100b 646 .llong 86b,100b 647 .llong 87b,100b 648 .llong 88b,100b 649 .llong 89b,100b 650 .llong 90b,100b 651 .llong 91b,100b 652