1/* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Quick'n'dirty IP checksum ... 7 * 8 * Copyright (C) 1998, 1999 Ralf Baechle 9 * Copyright (C) 1999 Silicon Graphics, Inc. 10 * Copyright (C) 2007 Maciej W. Rozycki 11 * Copyright (C) 2014 Imagination Technologies Ltd. 12 */ 13#include <linux/errno.h> 14#include <asm/asm.h> 15#include <asm/asm-offsets.h> 16#include <asm/export.h> 17#include <asm/regdef.h> 18 19#ifdef CONFIG_64BIT 20/* 21 * As we are sharing code base with the mips32 tree (which use the o32 ABI 22 * register definitions). We need to redefine the register definitions from 23 * the n64 ABI register naming to the o32 ABI register naming. 24 */ 25#undef t0 26#undef t1 27#undef t2 28#undef t3 29#define t0 $8 30#define t1 $9 31#define t2 $10 32#define t3 $11 33#define t4 $12 34#define t5 $13 35#define t6 $14 36#define t7 $15 37 38#define USE_DOUBLE 39#endif 40 41#ifdef USE_DOUBLE 42 43#define LOAD ld 44#define LOAD32 lwu 45#define ADD daddu 46#define NBYTES 8 47 48#else 49 50#define LOAD lw 51#define LOAD32 lw 52#define ADD addu 53#define NBYTES 4 54 55#endif /* USE_DOUBLE */ 56 57#define UNIT(unit) ((unit)*NBYTES) 58 59#define ADDC(sum,reg) \ 60 .set push; \ 61 .set noat; \ 62 ADD sum, reg; \ 63 sltu v1, sum, reg; \ 64 ADD sum, v1; \ 65 .set pop 66 67#define ADDC32(sum,reg) \ 68 .set push; \ 69 .set noat; \ 70 addu sum, reg; \ 71 sltu v1, sum, reg; \ 72 addu sum, v1; \ 73 .set pop 74 75#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ 76 LOAD _t0, (offset + UNIT(0))(src); \ 77 LOAD _t1, (offset + UNIT(1))(src); \ 78 LOAD _t2, (offset + UNIT(2))(src); \ 79 LOAD _t3, (offset + UNIT(3))(src); \ 80 ADDC(_t0, _t1); \ 81 ADDC(_t2, _t3); \ 82 ADDC(sum, _t0); \ 83 ADDC(sum, _t2) 84 85#ifdef USE_DOUBLE 86#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ 87 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) 88#else 89#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ 90 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \ 91 CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3) 92#endif 93 94/* 95 * a0: source address 96 * a1: length of the area to checksum 97 * a2: partial checksum 98 */ 99 100#define src a0 101#define sum v0 102 103 .text 104 .set noreorder 105 .align 5 106LEAF(csum_partial) 107EXPORT_SYMBOL(csum_partial) 108 move sum, zero 109 move t7, zero 110 111 sltiu t8, a1, 0x8 112 bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */ 113 move t2, a1 114 115 andi t7, src, 0x1 /* odd buffer? */ 116 117.Lhword_align: 118 beqz t7, .Lword_align 119 andi t8, src, 0x2 120 121 lbu t0, (src) 122 LONG_SUBU a1, a1, 0x1 123#ifdef __MIPSEL__ 124 sll t0, t0, 8 125#endif 126 ADDC(sum, t0) 127 PTR_ADDU src, src, 0x1 128 andi t8, src, 0x2 129 130.Lword_align: 131 beqz t8, .Ldword_align 132 sltiu t8, a1, 56 133 134 lhu t0, (src) 135 LONG_SUBU a1, a1, 0x2 136 ADDC(sum, t0) 137 sltiu t8, a1, 56 138 PTR_ADDU src, src, 0x2 139 140.Ldword_align: 141 bnez t8, .Ldo_end_words 142 move t8, a1 143 144 andi t8, src, 0x4 145 beqz t8, .Lqword_align 146 andi t8, src, 0x8 147 148 LOAD32 t0, 0x00(src) 149 LONG_SUBU a1, a1, 0x4 150 ADDC(sum, t0) 151 PTR_ADDU src, src, 0x4 152 andi t8, src, 0x8 153 154.Lqword_align: 155 beqz t8, .Loword_align 156 andi t8, src, 0x10 157 158#ifdef USE_DOUBLE 159 ld t0, 0x00(src) 160 LONG_SUBU a1, a1, 0x8 161 ADDC(sum, t0) 162#else 163 lw t0, 0x00(src) 164 lw t1, 0x04(src) 165 LONG_SUBU a1, a1, 0x8 166 ADDC(sum, t0) 167 ADDC(sum, t1) 168#endif 169 PTR_ADDU src, src, 0x8 170 andi t8, src, 0x10 171 172.Loword_align: 173 beqz t8, .Lbegin_movement 174 LONG_SRL t8, a1, 0x7 175 176#ifdef USE_DOUBLE 177 ld t0, 0x00(src) 178 ld t1, 0x08(src) 179 ADDC(sum, t0) 180 ADDC(sum, t1) 181#else 182 CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4) 183#endif 184 LONG_SUBU a1, a1, 0x10 185 PTR_ADDU src, src, 0x10 186 LONG_SRL t8, a1, 0x7 187 188.Lbegin_movement: 189 beqz t8, 1f 190 andi t2, a1, 0x40 191 192.Lmove_128bytes: 193 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) 194 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) 195 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) 196 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) 197 LONG_SUBU t8, t8, 0x01 198 .set reorder /* DADDI_WAR */ 199 PTR_ADDU src, src, 0x80 200 bnez t8, .Lmove_128bytes 201 .set noreorder 202 2031: 204 beqz t2, 1f 205 andi t2, a1, 0x20 206 207.Lmove_64bytes: 208 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) 209 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) 210 PTR_ADDU src, src, 0x40 211 2121: 213 beqz t2, .Ldo_end_words 214 andi t8, a1, 0x1c 215 216.Lmove_32bytes: 217 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) 218 andi t8, a1, 0x1c 219 PTR_ADDU src, src, 0x20 220 221.Ldo_end_words: 222 beqz t8, .Lsmall_csumcpy 223 andi t2, a1, 0x3 224 LONG_SRL t8, t8, 0x2 225 226.Lend_words: 227 LOAD32 t0, (src) 228 LONG_SUBU t8, t8, 0x1 229 ADDC(sum, t0) 230 .set reorder /* DADDI_WAR */ 231 PTR_ADDU src, src, 0x4 232 bnez t8, .Lend_words 233 .set noreorder 234 235/* unknown src alignment and < 8 bytes to go */ 236.Lsmall_csumcpy: 237 move a1, t2 238 239 andi t0, a1, 4 240 beqz t0, 1f 241 andi t0, a1, 2 242 243 /* Still a full word to go */ 244 ulw t1, (src) 245 PTR_ADDIU src, 4 246#ifdef USE_DOUBLE 247 dsll t1, t1, 32 /* clear lower 32bit */ 248#endif 249 ADDC(sum, t1) 250 2511: move t1, zero 252 beqz t0, 1f 253 andi t0, a1, 1 254 255 /* Still a halfword to go */ 256 ulhu t1, (src) 257 PTR_ADDIU src, 2 258 2591: beqz t0, 1f 260 sll t1, t1, 16 261 262 lbu t2, (src) 263 nop 264 265#ifdef __MIPSEB__ 266 sll t2, t2, 8 267#endif 268 or t1, t2 269 2701: ADDC(sum, t1) 271 272 /* fold checksum */ 273#ifdef USE_DOUBLE 274 dsll32 v1, sum, 0 275 daddu sum, v1 276 sltu v1, sum, v1 277 dsra32 sum, sum, 0 278 addu sum, v1 279#endif 280 281 /* odd buffer alignment? */ 282#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ 283 defined(CONFIG_CPU_LOONGSON64) 284 .set push 285 .set arch=mips32r2 286 wsbh v1, sum 287 movn sum, v1, t7 288 .set pop 289#else 290 beqz t7, 1f /* odd buffer alignment? */ 291 lui v1, 0x00ff 292 addu v1, 0x00ff 293 and t0, sum, v1 294 sll t0, t0, 8 295 srl sum, sum, 8 296 and sum, sum, v1 297 or sum, sum, t0 2981: 299#endif 300 .set reorder 301 /* Add the passed partial csum. */ 302 ADDC32(sum, a2) 303 jr ra 304 .set noreorder 305 END(csum_partial) 306 307 308/* 309 * checksum and copy routines based on memcpy.S 310 * 311 * csum_partial_copy_nocheck(src, dst, len, sum) 312 * __csum_partial_copy_kernel(src, dst, len, sum, errp) 313 * 314 * See "Spec" in memcpy.S for details. Unlike __copy_user, all 315 * function in this file use the standard calling convention. 316 */ 317 318#define src a0 319#define dst a1 320#define len a2 321#define psum a3 322#define sum v0 323#define odd t8 324#define errptr t9 325 326/* 327 * The exception handler for loads requires that: 328 * 1- AT contain the address of the byte just past the end of the source 329 * of the copy, 330 * 2- src_entry <= src < AT, and 331 * 3- (dst - src) == (dst_entry - src_entry), 332 * The _entry suffix denotes values when __copy_user was called. 333 * 334 * (1) is set up up by __csum_partial_copy_from_user and maintained by 335 * not writing AT in __csum_partial_copy 336 * (2) is met by incrementing src by the number of bytes copied 337 * (3) is met by not doing loads between a pair of increments of dst and src 338 * 339 * The exception handlers for stores stores -EFAULT to errptr and return. 340 * These handlers do not need to overwrite any data. 341 */ 342 343/* Instruction type */ 344#define LD_INSN 1 345#define ST_INSN 2 346#define LEGACY_MODE 1 347#define EVA_MODE 2 348#define USEROP 1 349#define KERNELOP 2 350 351/* 352 * Wrapper to add an entry in the exception table 353 * in case the insn causes a memory exception. 354 * Arguments: 355 * insn : Load/store instruction 356 * type : Instruction type 357 * reg : Register 358 * addr : Address 359 * handler : Exception handler 360 */ 361#define EXC(insn, type, reg, addr, handler) \ 362 .if \mode == LEGACY_MODE; \ 3639: insn reg, addr; \ 364 .section __ex_table,"a"; \ 365 PTR 9b, handler; \ 366 .previous; \ 367 /* This is enabled in EVA mode */ \ 368 .else; \ 369 /* If loading from user or storing to user */ \ 370 .if ((\from == USEROP) && (type == LD_INSN)) || \ 371 ((\to == USEROP) && (type == ST_INSN)); \ 3729: __BUILD_EVA_INSN(insn##e, reg, addr); \ 373 .section __ex_table,"a"; \ 374 PTR 9b, handler; \ 375 .previous; \ 376 .else; \ 377 /* EVA without exception */ \ 378 insn reg, addr; \ 379 .endif; \ 380 .endif 381 382#undef LOAD 383 384#ifdef USE_DOUBLE 385 386#define LOADK ld /* No exception */ 387#define LOAD(reg, addr, handler) EXC(ld, LD_INSN, reg, addr, handler) 388#define LOADBU(reg, addr, handler) EXC(lbu, LD_INSN, reg, addr, handler) 389#define LOADL(reg, addr, handler) EXC(ldl, LD_INSN, reg, addr, handler) 390#define LOADR(reg, addr, handler) EXC(ldr, LD_INSN, reg, addr, handler) 391#define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler) 392#define STOREL(reg, addr, handler) EXC(sdl, ST_INSN, reg, addr, handler) 393#define STORER(reg, addr, handler) EXC(sdr, ST_INSN, reg, addr, handler) 394#define STORE(reg, addr, handler) EXC(sd, ST_INSN, reg, addr, handler) 395#define ADD daddu 396#define SUB dsubu 397#define SRL dsrl 398#define SLL dsll 399#define SLLV dsllv 400#define SRLV dsrlv 401#define NBYTES 8 402#define LOG_NBYTES 3 403 404#else 405 406#define LOADK lw /* No exception */ 407#define LOAD(reg, addr, handler) EXC(lw, LD_INSN, reg, addr, handler) 408#define LOADBU(reg, addr, handler) EXC(lbu, LD_INSN, reg, addr, handler) 409#define LOADL(reg, addr, handler) EXC(lwl, LD_INSN, reg, addr, handler) 410#define LOADR(reg, addr, handler) EXC(lwr, LD_INSN, reg, addr, handler) 411#define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler) 412#define STOREL(reg, addr, handler) EXC(swl, ST_INSN, reg, addr, handler) 413#define STORER(reg, addr, handler) EXC(swr, ST_INSN, reg, addr, handler) 414#define STORE(reg, addr, handler) EXC(sw, ST_INSN, reg, addr, handler) 415#define ADD addu 416#define SUB subu 417#define SRL srl 418#define SLL sll 419#define SLLV sllv 420#define SRLV srlv 421#define NBYTES 4 422#define LOG_NBYTES 2 423 424#endif /* USE_DOUBLE */ 425 426#ifdef CONFIG_CPU_LITTLE_ENDIAN 427#define LDFIRST LOADR 428#define LDREST LOADL 429#define STFIRST STORER 430#define STREST STOREL 431#define SHIFT_DISCARD SLLV 432#define SHIFT_DISCARD_REVERT SRLV 433#else 434#define LDFIRST LOADL 435#define LDREST LOADR 436#define STFIRST STOREL 437#define STREST STORER 438#define SHIFT_DISCARD SRLV 439#define SHIFT_DISCARD_REVERT SLLV 440#endif 441 442#define FIRST(unit) ((unit)*NBYTES) 443#define REST(unit) (FIRST(unit)+NBYTES-1) 444 445#define ADDRMASK (NBYTES-1) 446 447#ifndef CONFIG_CPU_DADDI_WORKAROUNDS 448 .set noat 449#else 450 .set at=v1 451#endif 452 453 .macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to, __nocheck 454 455 PTR_ADDU AT, src, len /* See (1) above. */ 456 /* initialize __nocheck if this the first time we execute this 457 * macro 458 */ 459#ifdef CONFIG_64BIT 460 move errptr, a4 461#else 462 lw errptr, 16(sp) 463#endif 464 .if \__nocheck == 1 465 FEXPORT(csum_partial_copy_nocheck) 466 EXPORT_SYMBOL(csum_partial_copy_nocheck) 467 .endif 468 move sum, zero 469 move odd, zero 470 /* 471 * Note: dst & src may be unaligned, len may be 0 472 * Temps 473 */ 474 /* 475 * The "issue break"s below are very approximate. 476 * Issue delays for dcache fills will perturb the schedule, as will 477 * load queue full replay traps, etc. 478 * 479 * If len < NBYTES use byte operations. 480 */ 481 sltu t2, len, NBYTES 482 and t1, dst, ADDRMASK 483 bnez t2, .Lcopy_bytes_checklen\@ 484 and t0, src, ADDRMASK 485 andi odd, dst, 0x1 /* odd buffer? */ 486 bnez t1, .Ldst_unaligned\@ 487 nop 488 bnez t0, .Lsrc_unaligned_dst_aligned\@ 489 /* 490 * use delay slot for fall-through 491 * src and dst are aligned; need to compute rem 492 */ 493.Lboth_aligned\@: 494 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter 495 beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES 496 nop 497 SUB len, 8*NBYTES # subtract here for bgez loop 498 .align 4 4991: 500 LOAD(t0, UNIT(0)(src), .Ll_exc\@) 501 LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) 502 LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) 503 LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@) 504 LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@) 505 LOAD(t5, UNIT(5)(src), .Ll_exc_copy\@) 506 LOAD(t6, UNIT(6)(src), .Ll_exc_copy\@) 507 LOAD(t7, UNIT(7)(src), .Ll_exc_copy\@) 508 SUB len, len, 8*NBYTES 509 ADD src, src, 8*NBYTES 510 STORE(t0, UNIT(0)(dst), .Ls_exc\@) 511 ADDC(t0, t1) 512 STORE(t1, UNIT(1)(dst), .Ls_exc\@) 513 ADDC(sum, t0) 514 STORE(t2, UNIT(2)(dst), .Ls_exc\@) 515 ADDC(t2, t3) 516 STORE(t3, UNIT(3)(dst), .Ls_exc\@) 517 ADDC(sum, t2) 518 STORE(t4, UNIT(4)(dst), .Ls_exc\@) 519 ADDC(t4, t5) 520 STORE(t5, UNIT(5)(dst), .Ls_exc\@) 521 ADDC(sum, t4) 522 STORE(t6, UNIT(6)(dst), .Ls_exc\@) 523 ADDC(t6, t7) 524 STORE(t7, UNIT(7)(dst), .Ls_exc\@) 525 ADDC(sum, t6) 526 .set reorder /* DADDI_WAR */ 527 ADD dst, dst, 8*NBYTES 528 bgez len, 1b 529 .set noreorder 530 ADD len, 8*NBYTES # revert len (see above) 531 532 /* 533 * len == the number of bytes left to copy < 8*NBYTES 534 */ 535.Lcleanup_both_aligned\@: 536#define rem t7 537 beqz len, .Ldone\@ 538 sltu t0, len, 4*NBYTES 539 bnez t0, .Lless_than_4units\@ 540 and rem, len, (NBYTES-1) # rem = len % NBYTES 541 /* 542 * len >= 4*NBYTES 543 */ 544 LOAD(t0, UNIT(0)(src), .Ll_exc\@) 545 LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) 546 LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) 547 LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@) 548 SUB len, len, 4*NBYTES 549 ADD src, src, 4*NBYTES 550 STORE(t0, UNIT(0)(dst), .Ls_exc\@) 551 ADDC(t0, t1) 552 STORE(t1, UNIT(1)(dst), .Ls_exc\@) 553 ADDC(sum, t0) 554 STORE(t2, UNIT(2)(dst), .Ls_exc\@) 555 ADDC(t2, t3) 556 STORE(t3, UNIT(3)(dst), .Ls_exc\@) 557 ADDC(sum, t2) 558 .set reorder /* DADDI_WAR */ 559 ADD dst, dst, 4*NBYTES 560 beqz len, .Ldone\@ 561 .set noreorder 562.Lless_than_4units\@: 563 /* 564 * rem = len % NBYTES 565 */ 566 beq rem, len, .Lcopy_bytes\@ 567 nop 5681: 569 LOAD(t0, 0(src), .Ll_exc\@) 570 ADD src, src, NBYTES 571 SUB len, len, NBYTES 572 STORE(t0, 0(dst), .Ls_exc\@) 573 ADDC(sum, t0) 574 .set reorder /* DADDI_WAR */ 575 ADD dst, dst, NBYTES 576 bne rem, len, 1b 577 .set noreorder 578 579 /* 580 * src and dst are aligned, need to copy rem bytes (rem < NBYTES) 581 * A loop would do only a byte at a time with possible branch 582 * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE 583 * because can't assume read-access to dst. Instead, use 584 * STREST dst, which doesn't require read access to dst. 585 * 586 * This code should perform better than a simple loop on modern, 587 * wide-issue mips processors because the code has fewer branches and 588 * more instruction-level parallelism. 589 */ 590#define bits t2 591 beqz len, .Ldone\@ 592 ADD t1, dst, len # t1 is just past last byte of dst 593 li bits, 8*NBYTES 594 SLL rem, len, 3 # rem = number of bits to keep 595 LOAD(t0, 0(src), .Ll_exc\@) 596 SUB bits, bits, rem # bits = number of bits to discard 597 SHIFT_DISCARD t0, t0, bits 598 STREST(t0, -1(t1), .Ls_exc\@) 599 SHIFT_DISCARD_REVERT t0, t0, bits 600 .set reorder 601 ADDC(sum, t0) 602 b .Ldone\@ 603 .set noreorder 604.Ldst_unaligned\@: 605 /* 606 * dst is unaligned 607 * t0 = src & ADDRMASK 608 * t1 = dst & ADDRMASK; T1 > 0 609 * len >= NBYTES 610 * 611 * Copy enough bytes to align dst 612 * Set match = (src and dst have same alignment) 613 */ 614#define match rem 615 LDFIRST(t3, FIRST(0)(src), .Ll_exc\@) 616 ADD t2, zero, NBYTES 617 LDREST(t3, REST(0)(src), .Ll_exc_copy\@) 618 SUB t2, t2, t1 # t2 = number of bytes copied 619 xor match, t0, t1 620 STFIRST(t3, FIRST(0)(dst), .Ls_exc\@) 621 SLL t4, t1, 3 # t4 = number of bits to discard 622 SHIFT_DISCARD t3, t3, t4 623 /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ 624 ADDC(sum, t3) 625 beq len, t2, .Ldone\@ 626 SUB len, len, t2 627 ADD dst, dst, t2 628 beqz match, .Lboth_aligned\@ 629 ADD src, src, t2 630 631.Lsrc_unaligned_dst_aligned\@: 632 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter 633 beqz t0, .Lcleanup_src_unaligned\@ 634 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES 6351: 636/* 637 * Avoid consecutive LD*'s to the same register since some mips 638 * implementations can't issue them in the same cycle. 639 * It's OK to load FIRST(N+1) before REST(N) because the two addresses 640 * are to the same unit (unless src is aligned, but it's not). 641 */ 642 LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) 643 LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@) 644 SUB len, len, 4*NBYTES 645 LDREST(t0, REST(0)(src), .Ll_exc_copy\@) 646 LDREST(t1, REST(1)(src), .Ll_exc_copy\@) 647 LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@) 648 LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@) 649 LDREST(t2, REST(2)(src), .Ll_exc_copy\@) 650 LDREST(t3, REST(3)(src), .Ll_exc_copy\@) 651 ADD src, src, 4*NBYTES 652#ifdef CONFIG_CPU_SB1 653 nop # improves slotting 654#endif 655 STORE(t0, UNIT(0)(dst), .Ls_exc\@) 656 ADDC(t0, t1) 657 STORE(t1, UNIT(1)(dst), .Ls_exc\@) 658 ADDC(sum, t0) 659 STORE(t2, UNIT(2)(dst), .Ls_exc\@) 660 ADDC(t2, t3) 661 STORE(t3, UNIT(3)(dst), .Ls_exc\@) 662 ADDC(sum, t2) 663 .set reorder /* DADDI_WAR */ 664 ADD dst, dst, 4*NBYTES 665 bne len, rem, 1b 666 .set noreorder 667 668.Lcleanup_src_unaligned\@: 669 beqz len, .Ldone\@ 670 and rem, len, NBYTES-1 # rem = len % NBYTES 671 beq rem, len, .Lcopy_bytes\@ 672 nop 6731: 674 LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) 675 LDREST(t0, REST(0)(src), .Ll_exc_copy\@) 676 ADD src, src, NBYTES 677 SUB len, len, NBYTES 678 STORE(t0, 0(dst), .Ls_exc\@) 679 ADDC(sum, t0) 680 .set reorder /* DADDI_WAR */ 681 ADD dst, dst, NBYTES 682 bne len, rem, 1b 683 .set noreorder 684 685.Lcopy_bytes_checklen\@: 686 beqz len, .Ldone\@ 687 nop 688.Lcopy_bytes\@: 689 /* 0 < len < NBYTES */ 690#ifdef CONFIG_CPU_LITTLE_ENDIAN 691#define SHIFT_START 0 692#define SHIFT_INC 8 693#else 694#define SHIFT_START 8*(NBYTES-1) 695#define SHIFT_INC -8 696#endif 697 move t2, zero # partial word 698 li t3, SHIFT_START # shift 699/* use .Ll_exc_copy here to return correct sum on fault */ 700#define COPY_BYTE(N) \ 701 LOADBU(t0, N(src), .Ll_exc_copy\@); \ 702 SUB len, len, 1; \ 703 STOREB(t0, N(dst), .Ls_exc\@); \ 704 SLLV t0, t0, t3; \ 705 addu t3, SHIFT_INC; \ 706 beqz len, .Lcopy_bytes_done\@; \ 707 or t2, t0 708 709 COPY_BYTE(0) 710 COPY_BYTE(1) 711#ifdef USE_DOUBLE 712 COPY_BYTE(2) 713 COPY_BYTE(3) 714 COPY_BYTE(4) 715 COPY_BYTE(5) 716#endif 717 LOADBU(t0, NBYTES-2(src), .Ll_exc_copy\@) 718 SUB len, len, 1 719 STOREB(t0, NBYTES-2(dst), .Ls_exc\@) 720 SLLV t0, t0, t3 721 or t2, t0 722.Lcopy_bytes_done\@: 723 ADDC(sum, t2) 724.Ldone\@: 725 /* fold checksum */ 726 .set push 727 .set noat 728#ifdef USE_DOUBLE 729 dsll32 v1, sum, 0 730 daddu sum, v1 731 sltu v1, sum, v1 732 dsra32 sum, sum, 0 733 addu sum, v1 734#endif 735 736#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ 737 defined(CONFIG_CPU_LOONGSON64) 738 .set push 739 .set arch=mips32r2 740 wsbh v1, sum 741 movn sum, v1, odd 742 .set pop 743#else 744 beqz odd, 1f /* odd buffer alignment? */ 745 lui v1, 0x00ff 746 addu v1, 0x00ff 747 and t0, sum, v1 748 sll t0, t0, 8 749 srl sum, sum, 8 750 and sum, sum, v1 751 or sum, sum, t0 7521: 753#endif 754 .set pop 755 .set reorder 756 ADDC32(sum, psum) 757 jr ra 758 .set noreorder 759 760.Ll_exc_copy\@: 761 /* 762 * Copy bytes from src until faulting load address (or until a 763 * lb faults) 764 * 765 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28) 766 * may be more than a byte beyond the last address. 767 * Hence, the lb below may get an exception. 768 * 769 * Assumes src < THREAD_BUADDR($28) 770 */ 771 LOADK t0, TI_TASK($28) 772 li t2, SHIFT_START 773 LOADK t0, THREAD_BUADDR(t0) 7741: 775 LOADBU(t1, 0(src), .Ll_exc\@) 776 ADD src, src, 1 777 sb t1, 0(dst) # can't fault -- we're copy_from_user 778 SLLV t1, t1, t2 779 addu t2, SHIFT_INC 780 ADDC(sum, t1) 781 .set reorder /* DADDI_WAR */ 782 ADD dst, dst, 1 783 bne src, t0, 1b 784 .set noreorder 785.Ll_exc\@: 786 LOADK t0, TI_TASK($28) 787 nop 788 LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address 789 nop 790 SUB len, AT, t0 # len number of uncopied bytes 791 /* 792 * Here's where we rely on src and dst being incremented in tandem, 793 * See (3) above. 794 * dst += (fault addr - src) to put dst at first byte to clear 795 */ 796 ADD dst, t0 # compute start address in a1 797 SUB dst, src 798 /* 799 * Clear len bytes starting at dst. Can't call __bzero because it 800 * might modify len. An inefficient loop for these rare times... 801 */ 802 .set reorder /* DADDI_WAR */ 803 SUB src, len, 1 804 beqz len, .Ldone\@ 805 .set noreorder 8061: sb zero, 0(dst) 807 ADD dst, dst, 1 808 .set push 809 .set noat 810#ifndef CONFIG_CPU_DADDI_WORKAROUNDS 811 bnez src, 1b 812 SUB src, src, 1 813#else 814 li v1, 1 815 bnez src, 1b 816 SUB src, src, v1 817#endif 818 li v1, -EFAULT 819 b .Ldone\@ 820 sw v1, (errptr) 821 822.Ls_exc\@: 823 li v0, -1 /* invalid checksum */ 824 li v1, -EFAULT 825 jr ra 826 sw v1, (errptr) 827 .set pop 828 .endm 829 830LEAF(__csum_partial_copy_kernel) 831EXPORT_SYMBOL(__csum_partial_copy_kernel) 832#ifndef CONFIG_EVA 833FEXPORT(__csum_partial_copy_to_user) 834EXPORT_SYMBOL(__csum_partial_copy_to_user) 835FEXPORT(__csum_partial_copy_from_user) 836EXPORT_SYMBOL(__csum_partial_copy_from_user) 837#endif 838__BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP 1 839END(__csum_partial_copy_kernel) 840 841#ifdef CONFIG_EVA 842LEAF(__csum_partial_copy_to_user) 843__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP 0 844END(__csum_partial_copy_to_user) 845 846LEAF(__csum_partial_copy_from_user) 847__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP 0 848END(__csum_partial_copy_from_user) 849#endif 850