1/* 2 * Copyright (C) 2002 Paul Mackerras, IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9#include <asm/processor.h> 10#include <asm/ppc_asm.h> 11 12 .align 7 13_GLOBAL(__copy_tofrom_user) 14 /* first check for a whole page copy on a page boundary */ 15 cmpldi cr1,r5,16 16 cmpdi cr6,r5,4096 17 or r0,r3,r4 18 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ 19 andi. r0,r0,4095 20 std r3,-24(r1) 21 crand cr0*4+2,cr0*4+2,cr6*4+2 22 std r4,-16(r1) 23 std r5,-8(r1) 24 dcbt 0,r4 25 beq .Lcopy_page_4K 26 andi. r6,r6,7 27 PPC_MTOCRF 0x01,r5 28 blt cr1,.Lshort_copy 29 bne .Ldst_unaligned 30.Ldst_aligned: 31 andi. r0,r4,7 32 addi r3,r3,-16 33 bne .Lsrc_unaligned 34 srdi r7,r5,4 3520: ld r9,0(r4) 36 addi r4,r4,-8 37 mtctr r7 38 andi. r5,r5,7 39 bf cr7*4+0,22f 40 addi r3,r3,8 41 addi r4,r4,8 42 mr r8,r9 43 blt cr1,72f 4421: ld r9,8(r4) 4570: std r8,8(r3) 4622: ldu r8,16(r4) 4771: stdu r9,16(r3) 48 bdnz 21b 4972: std r8,8(r3) 50 beq+ 3f 51 addi r3,r3,16 5223: ld r9,8(r4) 53.Ldo_tail: 54 bf cr7*4+1,1f 55 rotldi r9,r9,32 5673: stw r9,0(r3) 57 addi r3,r3,4 581: bf cr7*4+2,2f 59 rotldi r9,r9,16 6074: sth r9,0(r3) 61 addi r3,r3,2 622: bf cr7*4+3,3f 63 rotldi r9,r9,8 6475: stb r9,0(r3) 653: li r3,0 66 blr 67 68.Lsrc_unaligned: 69 srdi r6,r5,3 70 addi r5,r5,-16 71 subf r4,r0,r4 72 srdi r7,r5,4 73 sldi r10,r0,3 74 cmpldi cr6,r6,3 75 andi. r5,r5,7 76 mtctr r7 77 subfic r11,r10,64 78 add r5,r5,r0 79 bt cr7*4+0,28f 80 8124: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 8225: ld r0,8(r4) 83 sld r6,r9,r10 8426: ldu r9,16(r4) 85 srd r7,r0,r11 86 sld r8,r0,r10 87 or r7,r7,r6 88 blt cr6,79f 8927: ld r0,8(r4) 90 b 2f 91 9228: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 9329: ldu r9,8(r4) 94 sld r8,r0,r10 95 addi r3,r3,-8 96 blt cr6,5f 9730: ld r0,8(r4) 98 srd r12,r9,r11 99 sld r6,r9,r10 10031: ldu r9,16(r4) 101 or r12,r8,r12 102 srd r7,r0,r11 103 sld r8,r0,r10 104 addi r3,r3,16 105 beq cr6,78f 106 1071: or r7,r7,r6 10832: ld r0,8(r4) 10976: std r12,8(r3) 1102: srd r12,r9,r11 111 sld r6,r9,r10 11233: ldu r9,16(r4) 113 or r12,r8,r12 11477: stdu r7,16(r3) 115 srd r7,r0,r11 116 sld r8,r0,r10 117 bdnz 1b 118 11978: std r12,8(r3) 120 or r7,r7,r6 12179: std r7,16(r3) 1225: srd r12,r9,r11 123 or r12,r8,r12 12480: std r12,24(r3) 125 bne 6f 126 li r3,0 127 blr 1286: cmpwi cr1,r5,8 129 addi r3,r3,32 130 sld r9,r9,r10 131 ble cr1,.Ldo_tail 13234: ld r0,8(r4) 133 srd r7,r0,r11 134 or r9,r7,r9 135 b .Ldo_tail 136 137.Ldst_unaligned: 138 PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */ 139 subf r5,r6,r5 140 li r7,0 141 cmpldi r1,r5,16 142 bf cr7*4+3,1f 14335: lbz r0,0(r4) 14481: stb r0,0(r3) 145 addi r7,r7,1 1461: bf cr7*4+2,2f 14736: lhzx r0,r7,r4 14882: sthx r0,r7,r3 149 addi r7,r7,2 1502: bf cr7*4+1,3f 15137: lwzx r0,r7,r4 15283: stwx r0,r7,r3 1533: PPC_MTOCRF 0x01,r5 154 add r4,r6,r4 155 add r3,r6,r3 156 b .Ldst_aligned 157 158.Lshort_copy: 159 bf cr7*4+0,1f 16038: lwz r0,0(r4) 16139: lwz r9,4(r4) 162 addi r4,r4,8 16384: stw r0,0(r3) 16485: stw r9,4(r3) 165 addi r3,r3,8 1661: bf cr7*4+1,2f 16740: lwz r0,0(r4) 168 addi r4,r4,4 16986: stw r0,0(r3) 170 addi r3,r3,4 1712: bf cr7*4+2,3f 17241: lhz r0,0(r4) 173 addi r4,r4,2 17487: sth r0,0(r3) 175 addi r3,r3,2 1763: bf cr7*4+3,4f 17742: lbz r0,0(r4) 17888: stb r0,0(r3) 1794: li r3,0 180 blr 181 182/* 183 * exception handlers follow 184 * we have to return the number of bytes not copied 185 * for an exception on a load, we set the rest of the destination to 0 186 */ 187 188136: 189137: 190 add r3,r3,r7 191 b 1f 192130: 193131: 194 addi r3,r3,8 195120: 196122: 197124: 198125: 199126: 200127: 201128: 202129: 203133: 204 addi r3,r3,8 205121: 206132: 207 addi r3,r3,8 208123: 209134: 210135: 211138: 212139: 213140: 214141: 215142: 216 217/* 218 * here we have had a fault on a load and r3 points to the first 219 * unmodified byte of the destination 220 */ 2211: ld r6,-24(r1) 222 ld r4,-16(r1) 223 ld r5,-8(r1) 224 subf r6,r6,r3 225 add r4,r4,r6 226 subf r5,r6,r5 /* #bytes left to go */ 227 228/* 229 * first see if we can copy any more bytes before hitting another exception 230 */ 231 mtctr r5 23243: lbz r0,0(r4) 233 addi r4,r4,1 23489: stb r0,0(r3) 235 addi r3,r3,1 236 bdnz 43b 237 li r3,0 /* huh? all copied successfully this time? */ 238 blr 239 240/* 241 * here we have trapped again, need to clear ctr bytes starting at r3 242 */ 243143: mfctr r5 244 li r0,0 245 mr r4,r3 246 mr r3,r5 /* return the number of bytes not copied */ 2471: andi. r9,r4,7 248 beq 3f 24990: stb r0,0(r4) 250 addic. r5,r5,-1 251 addi r4,r4,1 252 bne 1b 253 blr 2543: cmpldi cr1,r5,8 255 srdi r9,r5,3 256 andi. r5,r5,7 257 blt cr1,93f 258 mtctr r9 25991: std r0,0(r4) 260 addi r4,r4,8 261 bdnz 91b 26293: beqlr 263 mtctr r5 26492: stb r0,0(r4) 265 addi r4,r4,1 266 bdnz 92b 267 blr 268 269/* 270 * exception handlers for stores: we just need to work 271 * out how many bytes weren't copied 272 */ 273182: 274183: 275 add r3,r3,r7 276 b 1f 277180: 278 addi r3,r3,8 279171: 280177: 281 addi r3,r3,8 282170: 283172: 284176: 285178: 286 addi r3,r3,4 287185: 288 addi r3,r3,4 289173: 290174: 291175: 292179: 293181: 294184: 295186: 296187: 297188: 298189: 2991: 300 ld r6,-24(r1) 301 ld r5,-8(r1) 302 add r6,r6,r5 303 subf r3,r3,r6 /* #bytes not copied */ 304190: 305191: 306192: 307 blr /* #bytes not copied in r3 */ 308 309 .section __ex_table,"a" 310 .align 3 311 .llong 20b,120b 312 .llong 21b,121b 313 .llong 70b,170b 314 .llong 22b,122b 315 .llong 71b,171b 316 .llong 72b,172b 317 .llong 23b,123b 318 .llong 73b,173b 319 .llong 74b,174b 320 .llong 75b,175b 321 .llong 24b,124b 322 .llong 25b,125b 323 .llong 26b,126b 324 .llong 27b,127b 325 .llong 28b,128b 326 .llong 29b,129b 327 .llong 30b,130b 328 .llong 31b,131b 329 .llong 32b,132b 330 .llong 76b,176b 331 .llong 33b,133b 332 .llong 77b,177b 333 .llong 78b,178b 334 .llong 79b,179b 335 .llong 80b,180b 336 .llong 34b,134b 337 .llong 35b,135b 338 .llong 81b,181b 339 .llong 36b,136b 340 .llong 82b,182b 341 .llong 37b,137b 342 .llong 83b,183b 343 .llong 38b,138b 344 .llong 39b,139b 345 .llong 84b,184b 346 .llong 85b,185b 347 .llong 40b,140b 348 .llong 86b,186b 349 .llong 41b,141b 350 .llong 87b,187b 351 .llong 42b,142b 352 .llong 88b,188b 353 .llong 43b,143b 354 .llong 89b,189b 355 .llong 90b,190b 356 .llong 91b,191b 357 .llong 92b,192b 358 359 .text 360 361/* 362 * Routine to copy a whole page of data, optimized for POWER4. 363 * On POWER4 it is more than 50% faster than the simple loop 364 * above (following the .Ldst_aligned label) but it runs slightly 365 * slower on POWER3. 366 */ 367.Lcopy_page_4K: 368 std r31,-32(1) 369 std r30,-40(1) 370 std r29,-48(1) 371 std r28,-56(1) 372 std r27,-64(1) 373 std r26,-72(1) 374 std r25,-80(1) 375 std r24,-88(1) 376 std r23,-96(1) 377 std r22,-104(1) 378 std r21,-112(1) 379 std r20,-120(1) 380 li r5,4096/32 - 1 381 addi r3,r3,-8 382 li r0,5 3830: addi r5,r5,-24 384 mtctr r0 38520: ld r22,640(4) 38621: ld r21,512(4) 38722: ld r20,384(4) 38823: ld r11,256(4) 38924: ld r9,128(4) 39025: ld r7,0(4) 39126: ld r25,648(4) 39227: ld r24,520(4) 39328: ld r23,392(4) 39429: ld r10,264(4) 39530: ld r8,136(4) 39631: ldu r6,8(4) 397 cmpwi r5,24 3981: 39932: std r22,648(3) 40033: std r21,520(3) 40134: std r20,392(3) 40235: std r11,264(3) 40336: std r9,136(3) 40437: std r7,8(3) 40538: ld r28,648(4) 40639: ld r27,520(4) 40740: ld r26,392(4) 40841: ld r31,264(4) 40942: ld r30,136(4) 41043: ld r29,8(4) 41144: std r25,656(3) 41245: std r24,528(3) 41346: std r23,400(3) 41447: std r10,272(3) 41548: std r8,144(3) 41649: std r6,16(3) 41750: ld r22,656(4) 41851: ld r21,528(4) 41952: ld r20,400(4) 42053: ld r11,272(4) 42154: ld r9,144(4) 42255: ld r7,16(4) 42356: std r28,664(3) 42457: std r27,536(3) 42558: std r26,408(3) 42659: std r31,280(3) 42760: std r30,152(3) 42861: stdu r29,24(3) 42962: ld r25,664(4) 43063: ld r24,536(4) 43164: ld r23,408(4) 43265: ld r10,280(4) 43366: ld r8,152(4) 43467: ldu r6,24(4) 435 bdnz 1b 43668: std r22,648(3) 43769: std r21,520(3) 43870: std r20,392(3) 43971: std r11,264(3) 44072: std r9,136(3) 44173: std r7,8(3) 44274: addi r4,r4,640 44375: addi r3,r3,648 444 bge 0b 445 mtctr r5 44676: ld r7,0(4) 44777: ld r8,8(4) 44878: ldu r9,16(4) 4493: 45079: ld r10,8(4) 45180: std r7,8(3) 45281: ld r7,16(4) 45382: std r8,16(3) 45483: ld r8,24(4) 45584: std r9,24(3) 45685: ldu r9,32(4) 45786: stdu r10,32(3) 458 bdnz 3b 4594: 46087: ld r10,8(4) 46188: std r7,8(3) 46289: std r8,16(3) 46390: std r9,24(3) 46491: std r10,32(3) 4659: ld r20,-120(1) 466 ld r21,-112(1) 467 ld r22,-104(1) 468 ld r23,-96(1) 469 ld r24,-88(1) 470 ld r25,-80(1) 471 ld r26,-72(1) 472 ld r27,-64(1) 473 ld r28,-56(1) 474 ld r29,-48(1) 475 ld r30,-40(1) 476 ld r31,-32(1) 477 li r3,0 478 blr 479 480/* 481 * on an exception, reset to the beginning and jump back into the 482 * standard __copy_tofrom_user 483 */ 484100: ld r20,-120(1) 485 ld r21,-112(1) 486 ld r22,-104(1) 487 ld r23,-96(1) 488 ld r24,-88(1) 489 ld r25,-80(1) 490 ld r26,-72(1) 491 ld r27,-64(1) 492 ld r28,-56(1) 493 ld r29,-48(1) 494 ld r30,-40(1) 495 ld r31,-32(1) 496 ld r3,-24(r1) 497 ld r4,-16(r1) 498 li r5,4096 499 b .Ldst_aligned 500 501 .section __ex_table,"a" 502 .align 3 503 .llong 20b,100b 504 .llong 21b,100b 505 .llong 22b,100b 506 .llong 23b,100b 507 .llong 24b,100b 508 .llong 25b,100b 509 .llong 26b,100b 510 .llong 27b,100b 511 .llong 28b,100b 512 .llong 29b,100b 513 .llong 30b,100b 514 .llong 31b,100b 515 .llong 32b,100b 516 .llong 33b,100b 517 .llong 34b,100b 518 .llong 35b,100b 519 .llong 36b,100b 520 .llong 37b,100b 521 .llong 38b,100b 522 .llong 39b,100b 523 .llong 40b,100b 524 .llong 41b,100b 525 .llong 42b,100b 526 .llong 43b,100b 527 .llong 44b,100b 528 .llong 45b,100b 529 .llong 46b,100b 530 .llong 47b,100b 531 .llong 48b,100b 532 .llong 49b,100b 533 .llong 50b,100b 534 .llong 51b,100b 535 .llong 52b,100b 536 .llong 53b,100b 537 .llong 54b,100b 538 .llong 55b,100b 539 .llong 56b,100b 540 .llong 57b,100b 541 .llong 58b,100b 542 .llong 59b,100b 543 .llong 60b,100b 544 .llong 61b,100b 545 .llong 62b,100b 546 .llong 63b,100b 547 .llong 64b,100b 548 .llong 65b,100b 549 .llong 66b,100b 550 .llong 67b,100b 551 .llong 68b,100b 552 .llong 69b,100b 553 .llong 70b,100b 554 .llong 71b,100b 555 .llong 72b,100b 556 .llong 73b,100b 557 .llong 74b,100b 558 .llong 75b,100b 559 .llong 76b,100b 560 .llong 77b,100b 561 .llong 78b,100b 562 .llong 79b,100b 563 .llong 80b,100b 564 .llong 81b,100b 565 .llong 82b,100b 566 .llong 83b,100b 567 .llong 84b,100b 568 .llong 85b,100b 569 .llong 86b,100b 570 .llong 87b,100b 571 .llong 88b,100b 572 .llong 89b,100b 573 .llong 90b,100b 574 .llong 91b,100b 575