1/* 2 * arch/ppc64/lib/copyuser.S 3 * 4 * Copyright (C) 2002 Paul Mackerras, IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11#include <asm/processor.h> 12#include <asm/ppc_asm.h> 13 14 .align 7 15_GLOBAL(__copy_tofrom_user) 16 /* first check for a whole page copy on a page boundary */ 17 cmpldi cr1,r5,16 18 cmpdi cr6,r5,4096 19 or r0,r3,r4 20 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ 21 andi. r0,r0,4095 22 std r3,-24(r1) 23 crand cr0*4+2,cr0*4+2,cr6*4+2 24 std r4,-16(r1) 25 std r5,-8(r1) 26 dcbt 0,r4 27 beq .Lcopy_page_4K 28 andi. r6,r6,7 29 mtcrf 0x01,r5 30 blt cr1,.Lshort_copy 31 bne .Ldst_unaligned 32.Ldst_aligned: 33 andi. r0,r4,7 34 addi r3,r3,-16 35 bne .Lsrc_unaligned 36 srdi r7,r5,4 3720: ld r9,0(r4) 38 addi r4,r4,-8 39 mtctr r7 40 andi. r5,r5,7 41 bf cr7*4+0,22f 42 addi r3,r3,8 43 addi r4,r4,8 44 mr r8,r9 45 blt cr1,72f 4621: ld r9,8(r4) 4770: std r8,8(r3) 4822: ldu r8,16(r4) 4971: stdu r9,16(r3) 50 bdnz 21b 5172: std r8,8(r3) 52 beq+ 3f 53 addi r3,r3,16 5423: ld r9,8(r4) 55.Ldo_tail: 56 bf cr7*4+1,1f 57 rotldi r9,r9,32 5873: stw r9,0(r3) 59 addi r3,r3,4 601: bf cr7*4+2,2f 61 rotldi r9,r9,16 6274: sth r9,0(r3) 63 addi r3,r3,2 642: bf cr7*4+3,3f 65 rotldi r9,r9,8 6675: stb r9,0(r3) 673: li r3,0 68 blr 69 70.Lsrc_unaligned: 71 srdi r6,r5,3 72 addi r5,r5,-16 73 subf r4,r0,r4 74 srdi r7,r5,4 75 sldi r10,r0,3 76 cmpldi cr6,r6,3 77 andi. r5,r5,7 78 mtctr r7 79 subfic r11,r10,64 80 add r5,r5,r0 81 bt cr7*4+0,28f 82 8324: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 8425: ld r0,8(r4) 85 sld r6,r9,r10 8626: ldu r9,16(r4) 87 srd r7,r0,r11 88 sld r8,r0,r10 89 or r7,r7,r6 90 blt cr6,79f 9127: ld r0,8(r4) 92 b 2f 93 9428: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 9529: ldu r9,8(r4) 96 sld r8,r0,r10 97 addi r3,r3,-8 98 blt cr6,5f 9930: ld r0,8(r4) 100 srd r12,r9,r11 101 sld r6,r9,r10 10231: ldu r9,16(r4) 103 or r12,r8,r12 104 srd r7,r0,r11 105 sld r8,r0,r10 106 addi r3,r3,16 107 beq cr6,78f 108 1091: or r7,r7,r6 11032: ld r0,8(r4) 11176: std r12,8(r3) 1122: srd r12,r9,r11 113 sld r6,r9,r10 11433: ldu r9,16(r4) 115 or r12,r8,r12 11677: stdu r7,16(r3) 117 srd r7,r0,r11 118 sld r8,r0,r10 119 bdnz 1b 120 12178: std r12,8(r3) 122 or r7,r7,r6 12379: std r7,16(r3) 1245: srd r12,r9,r11 125 or r12,r8,r12 12680: std r12,24(r3) 127 bne 6f 128 li r3,0 129 blr 1306: cmpwi cr1,r5,8 131 addi r3,r3,32 132 sld r9,r9,r10 133 ble cr1,.Ldo_tail 13434: ld r0,8(r4) 135 srd r7,r0,r11 136 or r9,r7,r9 137 b .Ldo_tail 138 139.Ldst_unaligned: 140 mtcrf 0x01,r6 /* put #bytes to 8B bdry into cr7 */ 141 subf r5,r6,r5 142 li r7,0 143 cmpldi r1,r5,16 144 bf cr7*4+3,1f 14535: lbz r0,0(r4) 14681: stb r0,0(r3) 147 addi r7,r7,1 1481: bf cr7*4+2,2f 14936: lhzx r0,r7,r4 15082: sthx r0,r7,r3 151 addi r7,r7,2 1522: bf cr7*4+1,3f 15337: lwzx r0,r7,r4 15483: stwx r0,r7,r3 1553: mtcrf 0x01,r5 156 add r4,r6,r4 157 add r3,r6,r3 158 b .Ldst_aligned 159 160.Lshort_copy: 161 bf cr7*4+0,1f 16238: lwz r0,0(r4) 16339: lwz r9,4(r4) 164 addi r4,r4,8 16584: stw r0,0(r3) 16685: stw r9,4(r3) 167 addi r3,r3,8 1681: bf cr7*4+1,2f 16940: lwz r0,0(r4) 170 addi r4,r4,4 17186: stw r0,0(r3) 172 addi r3,r3,4 1732: bf cr7*4+2,3f 17441: lhz r0,0(r4) 175 addi r4,r4,2 17687: sth r0,0(r3) 177 addi r3,r3,2 1783: bf cr7*4+3,4f 17942: lbz r0,0(r4) 18088: stb r0,0(r3) 1814: li r3,0 182 blr 183 184/* 185 * exception handlers follow 186 * we have to return the number of bytes not copied 187 * for an exception on a load, we set the rest of the destination to 0 188 */ 189 190136: 191137: 192 add r3,r3,r7 193 b 1f 194130: 195131: 196 addi r3,r3,8 197120: 198122: 199124: 200125: 201126: 202127: 203128: 204129: 205133: 206 addi r3,r3,8 207121: 208132: 209 addi r3,r3,8 210123: 211134: 212135: 213138: 214139: 215140: 216141: 217142: 218 219/* 220 * here we have had a fault on a load and r3 points to the first 221 * unmodified byte of the destination 222 */ 2231: ld r6,-24(r1) 224 ld r4,-16(r1) 225 ld r5,-8(r1) 226 subf r6,r6,r3 227 add r4,r4,r6 228 subf r5,r6,r5 /* #bytes left to go */ 229 230/* 231 * first see if we can copy any more bytes before hitting another exception 232 */ 233 mtctr r5 23443: lbz r0,0(r4) 235 addi r4,r4,1 23689: stb r0,0(r3) 237 addi r3,r3,1 238 bdnz 43b 239 li r3,0 /* huh? all copied successfully this time? */ 240 blr 241 242/* 243 * here we have trapped again, need to clear ctr bytes starting at r3 244 */ 245143: mfctr r5 246 li r0,0 247 mr r4,r3 248 mr r3,r5 /* return the number of bytes not copied */ 2491: andi. r9,r4,7 250 beq 3f 25190: stb r0,0(r4) 252 addic. r5,r5,-1 253 addi r4,r4,1 254 bne 1b 255 blr 2563: cmpldi cr1,r5,8 257 srdi r9,r5,3 258 andi. r5,r5,7 259 blt cr1,93f 260 mtctr r9 26191: std r0,0(r4) 262 addi r4,r4,8 263 bdnz 91b 26493: beqlr 265 mtctr r5 26692: stb r0,0(r4) 267 addi r4,r4,1 268 bdnz 92b 269 blr 270 271/* 272 * exception handlers for stores: we just need to work 273 * out how many bytes weren't copied 274 */ 275182: 276183: 277 add r3,r3,r7 278 b 1f 279180: 280 addi r3,r3,8 281171: 282177: 283 addi r3,r3,8 284170: 285172: 286176: 287178: 288 addi r3,r3,4 289185: 290 addi r3,r3,4 291173: 292174: 293175: 294179: 295181: 296184: 297186: 298187: 299188: 300189: 3011: 302 ld r6,-24(r1) 303 ld r5,-8(r1) 304 add r6,r6,r5 305 subf r3,r3,r6 /* #bytes not copied */ 306190: 307191: 308192: 309 blr /* #bytes not copied in r3 */ 310 311 .section __ex_table,"a" 312 .align 3 313 .llong 20b,120b 314 .llong 21b,121b 315 .llong 70b,170b 316 .llong 22b,122b 317 .llong 71b,171b 318 .llong 72b,172b 319 .llong 23b,123b 320 .llong 73b,173b 321 .llong 74b,174b 322 .llong 75b,175b 323 .llong 24b,124b 324 .llong 25b,125b 325 .llong 26b,126b 326 .llong 27b,127b 327 .llong 28b,128b 328 .llong 29b,129b 329 .llong 30b,130b 330 .llong 31b,131b 331 .llong 32b,132b 332 .llong 76b,176b 333 .llong 33b,133b 334 .llong 77b,177b 335 .llong 78b,178b 336 .llong 79b,179b 337 .llong 80b,180b 338 .llong 34b,134b 339 .llong 35b,135b 340 .llong 81b,181b 341 .llong 36b,136b 342 .llong 82b,182b 343 .llong 37b,137b 344 .llong 83b,183b 345 .llong 38b,138b 346 .llong 39b,139b 347 .llong 84b,184b 348 .llong 85b,185b 349 .llong 40b,140b 350 .llong 86b,186b 351 .llong 41b,141b 352 .llong 87b,187b 353 .llong 42b,142b 354 .llong 88b,188b 355 .llong 43b,143b 356 .llong 89b,189b 357 .llong 90b,190b 358 .llong 91b,191b 359 .llong 92b,192b 360 361 .text 362 363/* 364 * Routine to copy a whole page of data, optimized for POWER4. 365 * On POWER4 it is more than 50% faster than the simple loop 366 * above (following the .Ldst_aligned label) but it runs slightly 367 * slower on POWER3. 368 */ 369.Lcopy_page_4K: 370 std r31,-32(1) 371 std r30,-40(1) 372 std r29,-48(1) 373 std r28,-56(1) 374 std r27,-64(1) 375 std r26,-72(1) 376 std r25,-80(1) 377 std r24,-88(1) 378 std r23,-96(1) 379 std r22,-104(1) 380 std r21,-112(1) 381 std r20,-120(1) 382 li r5,4096/32 - 1 383 addi r3,r3,-8 384 li r0,5 3850: addi r5,r5,-24 386 mtctr r0 38720: ld r22,640(4) 38821: ld r21,512(4) 38922: ld r20,384(4) 39023: ld r11,256(4) 39124: ld r9,128(4) 39225: ld r7,0(4) 39326: ld r25,648(4) 39427: ld r24,520(4) 39528: ld r23,392(4) 39629: ld r10,264(4) 39730: ld r8,136(4) 39831: ldu r6,8(4) 399 cmpwi r5,24 4001: 40132: std r22,648(3) 40233: std r21,520(3) 40334: std r20,392(3) 40435: std r11,264(3) 40536: std r9,136(3) 40637: std r7,8(3) 40738: ld r28,648(4) 40839: ld r27,520(4) 40940: ld r26,392(4) 41041: ld r31,264(4) 41142: ld r30,136(4) 41243: ld r29,8(4) 41344: std r25,656(3) 41445: std r24,528(3) 41546: std r23,400(3) 41647: std r10,272(3) 41748: std r8,144(3) 41849: std r6,16(3) 41950: ld r22,656(4) 42051: ld r21,528(4) 42152: ld r20,400(4) 42253: ld r11,272(4) 42354: ld r9,144(4) 42455: ld r7,16(4) 42556: std r28,664(3) 42657: std r27,536(3) 42758: std r26,408(3) 42859: std r31,280(3) 42960: std r30,152(3) 43061: stdu r29,24(3) 43162: ld r25,664(4) 43263: ld r24,536(4) 43364: ld r23,408(4) 43465: ld r10,280(4) 43566: ld r8,152(4) 43667: ldu r6,24(4) 437 bdnz 1b 43868: std r22,648(3) 43969: std r21,520(3) 44070: std r20,392(3) 44171: std r11,264(3) 44272: std r9,136(3) 44373: std r7,8(3) 44474: addi r4,r4,640 44575: addi r3,r3,648 446 bge 0b 447 mtctr r5 44876: ld r7,0(4) 44977: ld r8,8(4) 45078: ldu r9,16(4) 4513: 45279: ld r10,8(4) 45380: std r7,8(3) 45481: ld r7,16(4) 45582: std r8,16(3) 45683: ld r8,24(4) 45784: std r9,24(3) 45885: ldu r9,32(4) 45986: stdu r10,32(3) 460 bdnz 3b 4614: 46287: ld r10,8(4) 46388: std r7,8(3) 46489: std r8,16(3) 46590: std r9,24(3) 46691: std r10,32(3) 4679: ld r20,-120(1) 468 ld r21,-112(1) 469 ld r22,-104(1) 470 ld r23,-96(1) 471 ld r24,-88(1) 472 ld r25,-80(1) 473 ld r26,-72(1) 474 ld r27,-64(1) 475 ld r28,-56(1) 476 ld r29,-48(1) 477 ld r30,-40(1) 478 ld r31,-32(1) 479 li r3,0 480 blr 481 482/* 483 * on an exception, reset to the beginning and jump back into the 484 * standard __copy_tofrom_user 485 */ 486100: ld r20,-120(1) 487 ld r21,-112(1) 488 ld r22,-104(1) 489 ld r23,-96(1) 490 ld r24,-88(1) 491 ld r25,-80(1) 492 ld r26,-72(1) 493 ld r27,-64(1) 494 ld r28,-56(1) 495 ld r29,-48(1) 496 ld r30,-40(1) 497 ld r31,-32(1) 498 ld r3,-24(r1) 499 ld r4,-16(r1) 500 li r5,4096 501 b .Ldst_aligned 502 503 .section __ex_table,"a" 504 .align 3 505 .llong 20b,100b 506 .llong 21b,100b 507 .llong 22b,100b 508 .llong 23b,100b 509 .llong 24b,100b 510 .llong 25b,100b 511 .llong 26b,100b 512 .llong 27b,100b 513 .llong 28b,100b 514 .llong 29b,100b 515 .llong 30b,100b 516 .llong 31b,100b 517 .llong 32b,100b 518 .llong 33b,100b 519 .llong 34b,100b 520 .llong 35b,100b 521 .llong 36b,100b 522 .llong 37b,100b 523 .llong 38b,100b 524 .llong 39b,100b 525 .llong 40b,100b 526 .llong 41b,100b 527 .llong 42b,100b 528 .llong 43b,100b 529 .llong 44b,100b 530 .llong 45b,100b 531 .llong 46b,100b 532 .llong 47b,100b 533 .llong 48b,100b 534 .llong 49b,100b 535 .llong 50b,100b 536 .llong 51b,100b 537 .llong 52b,100b 538 .llong 53b,100b 539 .llong 54b,100b 540 .llong 55b,100b 541 .llong 56b,100b 542 .llong 57b,100b 543 .llong 58b,100b 544 .llong 59b,100b 545 .llong 60b,100b 546 .llong 61b,100b 547 .llong 62b,100b 548 .llong 63b,100b 549 .llong 64b,100b 550 .llong 65b,100b 551 .llong 66b,100b 552 .llong 67b,100b 553 .llong 68b,100b 554 .llong 69b,100b 555 .llong 70b,100b 556 .llong 71b,100b 557 .llong 72b,100b 558 .llong 73b,100b 559 .llong 74b,100b 560 .llong 75b,100b 561 .llong 76b,100b 562 .llong 77b,100b 563 .llong 78b,100b 564 .llong 79b,100b 565 .llong 80b,100b 566 .llong 81b,100b 567 .llong 82b,100b 568 .llong 83b,100b 569 .llong 84b,100b 570 .llong 85b,100b 571 .llong 86b,100b 572 .llong 87b,100b 573 .llong 88b,100b 574 .llong 89b,100b 575 .llong 90b,100b 576 .llong 91b,100b 577