1/* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Quick'n'dirty IP checksum ... 7 * 8 * Copyright (C) 1998, 1999 Ralf Baechle 9 * Copyright (C) 1999 Silicon Graphics, Inc. 10 * Copyright (C) 2007 Maciej W. Rozycki 11 * Copyright (C) 2014 Imagination Technologies Ltd. 12 */ 13#include <linux/errno.h> 14#include <asm/asm.h> 15#include <asm/asm-offsets.h> 16#include <asm/regdef.h> 17 18#ifdef CONFIG_64BIT 19/* 20 * As we are sharing code base with the mips32 tree (which use the o32 ABI 21 * register definitions). We need to redefine the register definitions from 22 * the n64 ABI register naming to the o32 ABI register naming. 23 */ 24#undef t0 25#undef t1 26#undef t2 27#undef t3 28#define t0 $8 29#define t1 $9 30#define t2 $10 31#define t3 $11 32#define t4 $12 33#define t5 $13 34#define t6 $14 35#define t7 $15 36 37#define USE_DOUBLE 38#endif 39 40#ifdef USE_DOUBLE 41 42#define LOAD ld 43#define LOAD32 lwu 44#define ADD daddu 45#define NBYTES 8 46 47#else 48 49#define LOAD lw 50#define LOAD32 lw 51#define ADD addu 52#define NBYTES 4 53 54#endif /* USE_DOUBLE */ 55 56#define UNIT(unit) ((unit)*NBYTES) 57 58#define ADDC(sum,reg) \ 59 ADD sum, reg; \ 60 sltu v1, sum, reg; \ 61 ADD sum, v1; \ 62 63#define ADDC32(sum,reg) \ 64 addu sum, reg; \ 65 sltu v1, sum, reg; \ 66 addu sum, v1; \ 67 68#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ 69 LOAD _t0, (offset + UNIT(0))(src); \ 70 LOAD _t1, (offset + UNIT(1))(src); \ 71 LOAD _t2, (offset + UNIT(2))(src); \ 72 LOAD _t3, (offset + UNIT(3))(src); \ 73 ADDC(sum, _t0); \ 74 ADDC(sum, _t1); \ 75 ADDC(sum, _t2); \ 76 ADDC(sum, _t3) 77 78#ifdef USE_DOUBLE 79#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ 80 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) 81#else 82#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ 83 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \ 84 CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3) 85#endif 86 87/* 88 * a0: source address 89 * a1: length of the area to checksum 90 * a2: partial checksum 91 */ 92 93#define src a0 94#define sum v0 95 96 .text 97 .set noreorder 98 .align 5 99LEAF(csum_partial) 100 move sum, zero 101 move t7, zero 102 103 sltiu t8, a1, 0x8 104 bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */ 105 move t2, a1 106 107 andi t7, src, 0x1 /* odd buffer? */ 108 109.Lhword_align: 110 beqz t7, .Lword_align 111 andi t8, src, 0x2 112 113 lbu t0, (src) 114 LONG_SUBU a1, a1, 0x1 115#ifdef __MIPSEL__ 116 sll t0, t0, 8 117#endif 118 ADDC(sum, t0) 119 PTR_ADDU src, src, 0x1 120 andi t8, src, 0x2 121 122.Lword_align: 123 beqz t8, .Ldword_align 124 sltiu t8, a1, 56 125 126 lhu t0, (src) 127 LONG_SUBU a1, a1, 0x2 128 ADDC(sum, t0) 129 sltiu t8, a1, 56 130 PTR_ADDU src, src, 0x2 131 132.Ldword_align: 133 bnez t8, .Ldo_end_words 134 move t8, a1 135 136 andi t8, src, 0x4 137 beqz t8, .Lqword_align 138 andi t8, src, 0x8 139 140 LOAD32 t0, 0x00(src) 141 LONG_SUBU a1, a1, 0x4 142 ADDC(sum, t0) 143 PTR_ADDU src, src, 0x4 144 andi t8, src, 0x8 145 146.Lqword_align: 147 beqz t8, .Loword_align 148 andi t8, src, 0x10 149 150#ifdef USE_DOUBLE 151 ld t0, 0x00(src) 152 LONG_SUBU a1, a1, 0x8 153 ADDC(sum, t0) 154#else 155 lw t0, 0x00(src) 156 lw t1, 0x04(src) 157 LONG_SUBU a1, a1, 0x8 158 ADDC(sum, t0) 159 ADDC(sum, t1) 160#endif 161 PTR_ADDU src, src, 0x8 162 andi t8, src, 0x10 163 164.Loword_align: 165 beqz t8, .Lbegin_movement 166 LONG_SRL t8, a1, 0x7 167 168#ifdef USE_DOUBLE 169 ld t0, 0x00(src) 170 ld t1, 0x08(src) 171 ADDC(sum, t0) 172 ADDC(sum, t1) 173#else 174 CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4) 175#endif 176 LONG_SUBU a1, a1, 0x10 177 PTR_ADDU src, src, 0x10 178 LONG_SRL t8, a1, 0x7 179 180.Lbegin_movement: 181 beqz t8, 1f 182 andi t2, a1, 0x40 183 184.Lmove_128bytes: 185 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) 186 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) 187 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) 188 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) 189 LONG_SUBU t8, t8, 0x01 190 .set reorder /* DADDI_WAR */ 191 PTR_ADDU src, src, 0x80 192 bnez t8, .Lmove_128bytes 193 .set noreorder 194 1951: 196 beqz t2, 1f 197 andi t2, a1, 0x20 198 199.Lmove_64bytes: 200 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) 201 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) 202 PTR_ADDU src, src, 0x40 203 2041: 205 beqz t2, .Ldo_end_words 206 andi t8, a1, 0x1c 207 208.Lmove_32bytes: 209 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) 210 andi t8, a1, 0x1c 211 PTR_ADDU src, src, 0x20 212 213.Ldo_end_words: 214 beqz t8, .Lsmall_csumcpy 215 andi t2, a1, 0x3 216 LONG_SRL t8, t8, 0x2 217 218.Lend_words: 219 LOAD32 t0, (src) 220 LONG_SUBU t8, t8, 0x1 221 ADDC(sum, t0) 222 .set reorder /* DADDI_WAR */ 223 PTR_ADDU src, src, 0x4 224 bnez t8, .Lend_words 225 .set noreorder 226 227/* unknown src alignment and < 8 bytes to go */ 228.Lsmall_csumcpy: 229 move a1, t2 230 231 andi t0, a1, 4 232 beqz t0, 1f 233 andi t0, a1, 2 234 235 /* Still a full word to go */ 236 ulw t1, (src) 237 PTR_ADDIU src, 4 238#ifdef USE_DOUBLE 239 dsll t1, t1, 32 /* clear lower 32bit */ 240#endif 241 ADDC(sum, t1) 242 2431: move t1, zero 244 beqz t0, 1f 245 andi t0, a1, 1 246 247 /* Still a halfword to go */ 248 ulhu t1, (src) 249 PTR_ADDIU src, 2 250 2511: beqz t0, 1f 252 sll t1, t1, 16 253 254 lbu t2, (src) 255 nop 256 257#ifdef __MIPSEB__ 258 sll t2, t2, 8 259#endif 260 or t1, t2 261 2621: ADDC(sum, t1) 263 264 /* fold checksum */ 265#ifdef USE_DOUBLE 266 dsll32 v1, sum, 0 267 daddu sum, v1 268 sltu v1, sum, v1 269 dsra32 sum, sum, 0 270 addu sum, v1 271#endif 272 273 /* odd buffer alignment? */ 274#ifdef CONFIG_CPU_MIPSR2 275 wsbh v1, sum 276 movn sum, v1, t7 277#else 278 beqz t7, 1f /* odd buffer alignment? */ 279 lui v1, 0x00ff 280 addu v1, 0x00ff 281 and t0, sum, v1 282 sll t0, t0, 8 283 srl sum, sum, 8 284 and sum, sum, v1 285 or sum, sum, t0 2861: 287#endif 288 .set reorder 289 /* Add the passed partial csum. */ 290 ADDC32(sum, a2) 291 jr ra 292 .set noreorder 293 END(csum_partial) 294 295 296/* 297 * checksum and copy routines based on memcpy.S 298 * 299 * csum_partial_copy_nocheck(src, dst, len, sum) 300 * __csum_partial_copy_kernel(src, dst, len, sum, errp) 301 * 302 * See "Spec" in memcpy.S for details. Unlike __copy_user, all 303 * function in this file use the standard calling convention. 304 */ 305 306#define src a0 307#define dst a1 308#define len a2 309#define psum a3 310#define sum v0 311#define odd t8 312#define errptr t9 313 314/* 315 * The exception handler for loads requires that: 316 * 1- AT contain the address of the byte just past the end of the source 317 * of the copy, 318 * 2- src_entry <= src < AT, and 319 * 3- (dst - src) == (dst_entry - src_entry), 320 * The _entry suffix denotes values when __copy_user was called. 321 * 322 * (1) is set up up by __csum_partial_copy_from_user and maintained by 323 * not writing AT in __csum_partial_copy 324 * (2) is met by incrementing src by the number of bytes copied 325 * (3) is met by not doing loads between a pair of increments of dst and src 326 * 327 * The exception handlers for stores stores -EFAULT to errptr and return. 328 * These handlers do not need to overwrite any data. 329 */ 330 331/* Instruction type */ 332#define LD_INSN 1 333#define ST_INSN 2 334#define LEGACY_MODE 1 335#define EVA_MODE 2 336#define USEROP 1 337#define KERNELOP 2 338 339/* 340 * Wrapper to add an entry in the exception table 341 * in case the insn causes a memory exception. 342 * Arguments: 343 * insn : Load/store instruction 344 * type : Instruction type 345 * reg : Register 346 * addr : Address 347 * handler : Exception handler 348 */ 349#define EXC(insn, type, reg, addr, handler) \ 350 .if \mode == LEGACY_MODE; \ 3519: insn reg, addr; \ 352 .section __ex_table,"a"; \ 353 PTR 9b, handler; \ 354 .previous; \ 355 /* This is enabled in EVA mode */ \ 356 .else; \ 357 /* If loading from user or storing to user */ \ 358 .if ((\from == USEROP) && (type == LD_INSN)) || \ 359 ((\to == USEROP) && (type == ST_INSN)); \ 3609: __BUILD_EVA_INSN(insn##e, reg, addr); \ 361 .section __ex_table,"a"; \ 362 PTR 9b, handler; \ 363 .previous; \ 364 .else; \ 365 /* EVA without exception */ \ 366 insn reg, addr; \ 367 .endif; \ 368 .endif 369 370#undef LOAD 371 372#ifdef USE_DOUBLE 373 374#define LOADK ld /* No exception */ 375#define LOAD(reg, addr, handler) EXC(ld, LD_INSN, reg, addr, handler) 376#define LOADBU(reg, addr, handler) EXC(lbu, LD_INSN, reg, addr, handler) 377#define LOADL(reg, addr, handler) EXC(ldl, LD_INSN, reg, addr, handler) 378#define LOADR(reg, addr, handler) EXC(ldr, LD_INSN, reg, addr, handler) 379#define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler) 380#define STOREL(reg, addr, handler) EXC(sdl, ST_INSN, reg, addr, handler) 381#define STORER(reg, addr, handler) EXC(sdr, ST_INSN, reg, addr, handler) 382#define STORE(reg, addr, handler) EXC(sd, ST_INSN, reg, addr, handler) 383#define ADD daddu 384#define SUB dsubu 385#define SRL dsrl 386#define SLL dsll 387#define SLLV dsllv 388#define SRLV dsrlv 389#define NBYTES 8 390#define LOG_NBYTES 3 391 392#else 393 394#define LOADK lw /* No exception */ 395#define LOAD(reg, addr, handler) EXC(lw, LD_INSN, reg, addr, handler) 396#define LOADBU(reg, addr, handler) EXC(lbu, LD_INSN, reg, addr, handler) 397#define LOADL(reg, addr, handler) EXC(lwl, LD_INSN, reg, addr, handler) 398#define LOADR(reg, addr, handler) EXC(lwr, LD_INSN, reg, addr, handler) 399#define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler) 400#define STOREL(reg, addr, handler) EXC(swl, ST_INSN, reg, addr, handler) 401#define STORER(reg, addr, handler) EXC(swr, ST_INSN, reg, addr, handler) 402#define STORE(reg, addr, handler) EXC(sw, ST_INSN, reg, addr, handler) 403#define ADD addu 404#define SUB subu 405#define SRL srl 406#define SLL sll 407#define SLLV sllv 408#define SRLV srlv 409#define NBYTES 4 410#define LOG_NBYTES 2 411 412#endif /* USE_DOUBLE */ 413 414#ifdef CONFIG_CPU_LITTLE_ENDIAN 415#define LDFIRST LOADR 416#define LDREST LOADL 417#define STFIRST STORER 418#define STREST STOREL 419#define SHIFT_DISCARD SLLV 420#define SHIFT_DISCARD_REVERT SRLV 421#else 422#define LDFIRST LOADL 423#define LDREST LOADR 424#define STFIRST STOREL 425#define STREST STORER 426#define SHIFT_DISCARD SRLV 427#define SHIFT_DISCARD_REVERT SLLV 428#endif 429 430#define FIRST(unit) ((unit)*NBYTES) 431#define REST(unit) (FIRST(unit)+NBYTES-1) 432 433#define ADDRMASK (NBYTES-1) 434 435#ifndef CONFIG_CPU_DADDI_WORKAROUNDS 436 .set noat 437#else 438 .set at=v1 439#endif 440 441 .macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to, __nocheck 442 443 PTR_ADDU AT, src, len /* See (1) above. */ 444 /* initialize __nocheck if this the first time we execute this 445 * macro 446 */ 447#ifdef CONFIG_64BIT 448 move errptr, a4 449#else 450 lw errptr, 16(sp) 451#endif 452 .if \__nocheck == 1 453 FEXPORT(csum_partial_copy_nocheck) 454 .endif 455 move sum, zero 456 move odd, zero 457 /* 458 * Note: dst & src may be unaligned, len may be 0 459 * Temps 460 */ 461 /* 462 * The "issue break"s below are very approximate. 463 * Issue delays for dcache fills will perturb the schedule, as will 464 * load queue full replay traps, etc. 465 * 466 * If len < NBYTES use byte operations. 467 */ 468 sltu t2, len, NBYTES 469 and t1, dst, ADDRMASK 470 bnez t2, .Lcopy_bytes_checklen\@ 471 and t0, src, ADDRMASK 472 andi odd, dst, 0x1 /* odd buffer? */ 473 bnez t1, .Ldst_unaligned\@ 474 nop 475 bnez t0, .Lsrc_unaligned_dst_aligned\@ 476 /* 477 * use delay slot for fall-through 478 * src and dst are aligned; need to compute rem 479 */ 480.Lboth_aligned\@: 481 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter 482 beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES 483 nop 484 SUB len, 8*NBYTES # subtract here for bgez loop 485 .align 4 4861: 487 LOAD(t0, UNIT(0)(src), .Ll_exc\@) 488 LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) 489 LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) 490 LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@) 491 LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@) 492 LOAD(t5, UNIT(5)(src), .Ll_exc_copy\@) 493 LOAD(t6, UNIT(6)(src), .Ll_exc_copy\@) 494 LOAD(t7, UNIT(7)(src), .Ll_exc_copy\@) 495 SUB len, len, 8*NBYTES 496 ADD src, src, 8*NBYTES 497 STORE(t0, UNIT(0)(dst), .Ls_exc\@) 498 ADDC(sum, t0) 499 STORE(t1, UNIT(1)(dst), .Ls_exc\@) 500 ADDC(sum, t1) 501 STORE(t2, UNIT(2)(dst), .Ls_exc\@) 502 ADDC(sum, t2) 503 STORE(t3, UNIT(3)(dst), .Ls_exc\@) 504 ADDC(sum, t3) 505 STORE(t4, UNIT(4)(dst), .Ls_exc\@) 506 ADDC(sum, t4) 507 STORE(t5, UNIT(5)(dst), .Ls_exc\@) 508 ADDC(sum, t5) 509 STORE(t6, UNIT(6)(dst), .Ls_exc\@) 510 ADDC(sum, t6) 511 STORE(t7, UNIT(7)(dst), .Ls_exc\@) 512 ADDC(sum, t7) 513 .set reorder /* DADDI_WAR */ 514 ADD dst, dst, 8*NBYTES 515 bgez len, 1b 516 .set noreorder 517 ADD len, 8*NBYTES # revert len (see above) 518 519 /* 520 * len == the number of bytes left to copy < 8*NBYTES 521 */ 522.Lcleanup_both_aligned\@: 523#define rem t7 524 beqz len, .Ldone\@ 525 sltu t0, len, 4*NBYTES 526 bnez t0, .Lless_than_4units\@ 527 and rem, len, (NBYTES-1) # rem = len % NBYTES 528 /* 529 * len >= 4*NBYTES 530 */ 531 LOAD(t0, UNIT(0)(src), .Ll_exc\@) 532 LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) 533 LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) 534 LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@) 535 SUB len, len, 4*NBYTES 536 ADD src, src, 4*NBYTES 537 STORE(t0, UNIT(0)(dst), .Ls_exc\@) 538 ADDC(sum, t0) 539 STORE(t1, UNIT(1)(dst), .Ls_exc\@) 540 ADDC(sum, t1) 541 STORE(t2, UNIT(2)(dst), .Ls_exc\@) 542 ADDC(sum, t2) 543 STORE(t3, UNIT(3)(dst), .Ls_exc\@) 544 ADDC(sum, t3) 545 .set reorder /* DADDI_WAR */ 546 ADD dst, dst, 4*NBYTES 547 beqz len, .Ldone\@ 548 .set noreorder 549.Lless_than_4units\@: 550 /* 551 * rem = len % NBYTES 552 */ 553 beq rem, len, .Lcopy_bytes\@ 554 nop 5551: 556 LOAD(t0, 0(src), .Ll_exc\@) 557 ADD src, src, NBYTES 558 SUB len, len, NBYTES 559 STORE(t0, 0(dst), .Ls_exc\@) 560 ADDC(sum, t0) 561 .set reorder /* DADDI_WAR */ 562 ADD dst, dst, NBYTES 563 bne rem, len, 1b 564 .set noreorder 565 566 /* 567 * src and dst are aligned, need to copy rem bytes (rem < NBYTES) 568 * A loop would do only a byte at a time with possible branch 569 * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE 570 * because can't assume read-access to dst. Instead, use 571 * STREST dst, which doesn't require read access to dst. 572 * 573 * This code should perform better than a simple loop on modern, 574 * wide-issue mips processors because the code has fewer branches and 575 * more instruction-level parallelism. 576 */ 577#define bits t2 578 beqz len, .Ldone\@ 579 ADD t1, dst, len # t1 is just past last byte of dst 580 li bits, 8*NBYTES 581 SLL rem, len, 3 # rem = number of bits to keep 582 LOAD(t0, 0(src), .Ll_exc\@) 583 SUB bits, bits, rem # bits = number of bits to discard 584 SHIFT_DISCARD t0, t0, bits 585 STREST(t0, -1(t1), .Ls_exc\@) 586 SHIFT_DISCARD_REVERT t0, t0, bits 587 .set reorder 588 ADDC(sum, t0) 589 b .Ldone\@ 590 .set noreorder 591.Ldst_unaligned\@: 592 /* 593 * dst is unaligned 594 * t0 = src & ADDRMASK 595 * t1 = dst & ADDRMASK; T1 > 0 596 * len >= NBYTES 597 * 598 * Copy enough bytes to align dst 599 * Set match = (src and dst have same alignment) 600 */ 601#define match rem 602 LDFIRST(t3, FIRST(0)(src), .Ll_exc\@) 603 ADD t2, zero, NBYTES 604 LDREST(t3, REST(0)(src), .Ll_exc_copy\@) 605 SUB t2, t2, t1 # t2 = number of bytes copied 606 xor match, t0, t1 607 STFIRST(t3, FIRST(0)(dst), .Ls_exc\@) 608 SLL t4, t1, 3 # t4 = number of bits to discard 609 SHIFT_DISCARD t3, t3, t4 610 /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ 611 ADDC(sum, t3) 612 beq len, t2, .Ldone\@ 613 SUB len, len, t2 614 ADD dst, dst, t2 615 beqz match, .Lboth_aligned\@ 616 ADD src, src, t2 617 618.Lsrc_unaligned_dst_aligned\@: 619 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter 620 beqz t0, .Lcleanup_src_unaligned\@ 621 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES 6221: 623/* 624 * Avoid consecutive LD*'s to the same register since some mips 625 * implementations can't issue them in the same cycle. 626 * It's OK to load FIRST(N+1) before REST(N) because the two addresses 627 * are to the same unit (unless src is aligned, but it's not). 628 */ 629 LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) 630 LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@) 631 SUB len, len, 4*NBYTES 632 LDREST(t0, REST(0)(src), .Ll_exc_copy\@) 633 LDREST(t1, REST(1)(src), .Ll_exc_copy\@) 634 LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@) 635 LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@) 636 LDREST(t2, REST(2)(src), .Ll_exc_copy\@) 637 LDREST(t3, REST(3)(src), .Ll_exc_copy\@) 638 ADD src, src, 4*NBYTES 639#ifdef CONFIG_CPU_SB1 640 nop # improves slotting 641#endif 642 STORE(t0, UNIT(0)(dst), .Ls_exc\@) 643 ADDC(sum, t0) 644 STORE(t1, UNIT(1)(dst), .Ls_exc\@) 645 ADDC(sum, t1) 646 STORE(t2, UNIT(2)(dst), .Ls_exc\@) 647 ADDC(sum, t2) 648 STORE(t3, UNIT(3)(dst), .Ls_exc\@) 649 ADDC(sum, t3) 650 .set reorder /* DADDI_WAR */ 651 ADD dst, dst, 4*NBYTES 652 bne len, rem, 1b 653 .set noreorder 654 655.Lcleanup_src_unaligned\@: 656 beqz len, .Ldone\@ 657 and rem, len, NBYTES-1 # rem = len % NBYTES 658 beq rem, len, .Lcopy_bytes\@ 659 nop 6601: 661 LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) 662 LDREST(t0, REST(0)(src), .Ll_exc_copy\@) 663 ADD src, src, NBYTES 664 SUB len, len, NBYTES 665 STORE(t0, 0(dst), .Ls_exc\@) 666 ADDC(sum, t0) 667 .set reorder /* DADDI_WAR */ 668 ADD dst, dst, NBYTES 669 bne len, rem, 1b 670 .set noreorder 671 672.Lcopy_bytes_checklen\@: 673 beqz len, .Ldone\@ 674 nop 675.Lcopy_bytes\@: 676 /* 0 < len < NBYTES */ 677#ifdef CONFIG_CPU_LITTLE_ENDIAN 678#define SHIFT_START 0 679#define SHIFT_INC 8 680#else 681#define SHIFT_START 8*(NBYTES-1) 682#define SHIFT_INC -8 683#endif 684 move t2, zero # partial word 685 li t3, SHIFT_START # shift 686/* use .Ll_exc_copy here to return correct sum on fault */ 687#define COPY_BYTE(N) \ 688 LOADBU(t0, N(src), .Ll_exc_copy\@); \ 689 SUB len, len, 1; \ 690 STOREB(t0, N(dst), .Ls_exc\@); \ 691 SLLV t0, t0, t3; \ 692 addu t3, SHIFT_INC; \ 693 beqz len, .Lcopy_bytes_done\@; \ 694 or t2, t0 695 696 COPY_BYTE(0) 697 COPY_BYTE(1) 698#ifdef USE_DOUBLE 699 COPY_BYTE(2) 700 COPY_BYTE(3) 701 COPY_BYTE(4) 702 COPY_BYTE(5) 703#endif 704 LOADBU(t0, NBYTES-2(src), .Ll_exc_copy\@) 705 SUB len, len, 1 706 STOREB(t0, NBYTES-2(dst), .Ls_exc\@) 707 SLLV t0, t0, t3 708 or t2, t0 709.Lcopy_bytes_done\@: 710 ADDC(sum, t2) 711.Ldone\@: 712 /* fold checksum */ 713#ifdef USE_DOUBLE 714 dsll32 v1, sum, 0 715 daddu sum, v1 716 sltu v1, sum, v1 717 dsra32 sum, sum, 0 718 addu sum, v1 719#endif 720 721#ifdef CONFIG_CPU_MIPSR2 722 wsbh v1, sum 723 movn sum, v1, odd 724#else 725 beqz odd, 1f /* odd buffer alignment? */ 726 lui v1, 0x00ff 727 addu v1, 0x00ff 728 and t0, sum, v1 729 sll t0, t0, 8 730 srl sum, sum, 8 731 and sum, sum, v1 732 or sum, sum, t0 7331: 734#endif 735 .set reorder 736 ADDC32(sum, psum) 737 jr ra 738 .set noreorder 739 740.Ll_exc_copy\@: 741 /* 742 * Copy bytes from src until faulting load address (or until a 743 * lb faults) 744 * 745 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28) 746 * may be more than a byte beyond the last address. 747 * Hence, the lb below may get an exception. 748 * 749 * Assumes src < THREAD_BUADDR($28) 750 */ 751 LOADK t0, TI_TASK($28) 752 li t2, SHIFT_START 753 LOADK t0, THREAD_BUADDR(t0) 7541: 755 LOADBU(t1, 0(src), .Ll_exc\@) 756 ADD src, src, 1 757 sb t1, 0(dst) # can't fault -- we're copy_from_user 758 SLLV t1, t1, t2 759 addu t2, SHIFT_INC 760 ADDC(sum, t1) 761 .set reorder /* DADDI_WAR */ 762 ADD dst, dst, 1 763 bne src, t0, 1b 764 .set noreorder 765.Ll_exc\@: 766 LOADK t0, TI_TASK($28) 767 nop 768 LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address 769 nop 770 SUB len, AT, t0 # len number of uncopied bytes 771 /* 772 * Here's where we rely on src and dst being incremented in tandem, 773 * See (3) above. 774 * dst += (fault addr - src) to put dst at first byte to clear 775 */ 776 ADD dst, t0 # compute start address in a1 777 SUB dst, src 778 /* 779 * Clear len bytes starting at dst. Can't call __bzero because it 780 * might modify len. An inefficient loop for these rare times... 781 */ 782 .set reorder /* DADDI_WAR */ 783 SUB src, len, 1 784 beqz len, .Ldone\@ 785 .set noreorder 7861: sb zero, 0(dst) 787 ADD dst, dst, 1 788 .set push 789 .set noat 790#ifndef CONFIG_CPU_DADDI_WORKAROUNDS 791 bnez src, 1b 792 SUB src, src, 1 793#else 794 li v1, 1 795 bnez src, 1b 796 SUB src, src, v1 797#endif 798 li v1, -EFAULT 799 b .Ldone\@ 800 sw v1, (errptr) 801 802.Ls_exc\@: 803 li v0, -1 /* invalid checksum */ 804 li v1, -EFAULT 805 jr ra 806 sw v1, (errptr) 807 .set pop 808 .endm 809 810LEAF(__csum_partial_copy_kernel) 811#ifndef CONFIG_EVA 812FEXPORT(__csum_partial_copy_to_user) 813FEXPORT(__csum_partial_copy_from_user) 814#endif 815__BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP 1 816END(__csum_partial_copy_kernel) 817 818#ifdef CONFIG_EVA 819LEAF(__csum_partial_copy_to_user) 820__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP 0 821END(__csum_partial_copy_to_user) 822 823LEAF(__csum_partial_copy_from_user) 824__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP 0 825END(__csum_partial_copy_from_user) 826#endif 827