1/* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Unified implementation of memcpy, memmove and the __copy_user backend. 7 * 8 * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org) 9 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. 10 * Copyright (C) 2002 Broadcom, Inc. 11 * memcpy/copy_user author: Mark Vandevoorde 12 * 13 * Mnemonic names for arguments to memcpy/__copy_user 14 */ 15 16#include <asm/asm.h> 17#include <asm/asm-offsets.h> 18#include <asm/regdef.h> 19 20#define dst a0 21#define src a1 22#define len a2 23 24/* 25 * Spec 26 * 27 * memcpy copies len bytes from src to dst and sets v0 to dst. 28 * It assumes that 29 * - src and dst don't overlap 30 * - src is readable 31 * - dst is writable 32 * memcpy uses the standard calling convention 33 * 34 * __copy_user copies up to len bytes from src to dst and sets a2 (len) to 35 * the number of uncopied bytes due to an exception caused by a read or write. 36 * __copy_user assumes that src and dst don't overlap, and that the call is 37 * implementing one of the following: 38 * copy_to_user 39 * - src is readable (no exceptions when reading src) 40 * copy_from_user 41 * - dst is writable (no exceptions when writing dst) 42 * __copy_user uses a non-standard calling convention; see 43 * arch/mips/include/asm/uaccess.h 44 * 45 * When an exception happens on a load, the handler must 46 # ensure that all of the destination buffer is overwritten to prevent 47 * leaking information to user mode programs. 48 */ 49 50/* 51 * Implementation 52 */ 53 54/* 55 * The exception handler for loads requires that: 56 * 1- AT contain the address of the byte just past the end of the source 57 * of the copy, 58 * 2- src_entry <= src < AT, and 59 * 3- (dst - src) == (dst_entry - src_entry), 60 * The _entry suffix denotes values when __copy_user was called. 61 * 62 * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user 63 * (2) is met by incrementing src by the number of bytes copied 64 * (3) is met by not doing loads between a pair of increments of dst and src 65 * 66 * The exception handlers for stores adjust len (if necessary) and return. 67 * These handlers do not need to overwrite any data. 68 * 69 * For __rmemcpy and memmove an exception is always a kernel bug, therefore 70 * they're not protected. 71 */ 72 73#define EXC(inst_reg,addr,handler) \ 749: inst_reg, addr; \ 75 .section __ex_table,"a"; \ 76 PTR 9b, handler; \ 77 .previous 78 79/* 80 * Only on the 64-bit kernel we can made use of 64-bit registers. 81 */ 82#ifdef CONFIG_64BIT 83#define USE_DOUBLE 84#endif 85 86#ifdef USE_DOUBLE 87 88#define LOAD ld 89#define LOADL ldl 90#define LOADR ldr 91#define STOREL sdl 92#define STORER sdr 93#define STORE sd 94#define ADD daddu 95#define SUB dsubu 96#define SRL dsrl 97#define SRA dsra 98#define SLL dsll 99#define SLLV dsllv 100#define SRLV dsrlv 101#define NBYTES 8 102#define LOG_NBYTES 3 103 104/* 105 * As we are sharing code base with the mips32 tree (which use the o32 ABI 106 * register definitions). We need to redefine the register definitions from 107 * the n64 ABI register naming to the o32 ABI register naming. 108 */ 109#undef t0 110#undef t1 111#undef t2 112#undef t3 113#define t0 $8 114#define t1 $9 115#define t2 $10 116#define t3 $11 117#define t4 $12 118#define t5 $13 119#define t6 $14 120#define t7 $15 121 122#else 123 124#define LOAD lw 125#define LOADL lwl 126#define LOADR lwr 127#define STOREL swl 128#define STORER swr 129#define STORE sw 130#define ADD addu 131#define SUB subu 132#define SRL srl 133#define SLL sll 134#define SRA sra 135#define SLLV sllv 136#define SRLV srlv 137#define NBYTES 4 138#define LOG_NBYTES 2 139 140#endif /* USE_DOUBLE */ 141 142#ifdef CONFIG_CPU_LITTLE_ENDIAN 143#define LDFIRST LOADR 144#define LDREST LOADL 145#define STFIRST STORER 146#define STREST STOREL 147#define SHIFT_DISCARD SLLV 148#else 149#define LDFIRST LOADL 150#define LDREST LOADR 151#define STFIRST STOREL 152#define STREST STORER 153#define SHIFT_DISCARD SRLV 154#endif 155 156#define FIRST(unit) ((unit)*NBYTES) 157#define REST(unit) (FIRST(unit)+NBYTES-1) 158#define UNIT(unit) FIRST(unit) 159 160#define ADDRMASK (NBYTES-1) 161 162 .text 163 .set noreorder 164 .set noat 165 166/* 167 * t7 is used as a flag to note inatomic mode. 168 */ 169LEAF(__copy_user_inatomic) 170 b __copy_user_common 171 li t7, 1 172 END(__copy_user_inatomic) 173 174/* 175 * A combined memcpy/__copy_user 176 * __copy_user sets len to 0 for success; else to an upper bound of 177 * the number of uncopied bytes. 178 * memcpy sets v0 to dst. 179 */ 180 .align 5 181LEAF(memcpy) /* a0=dst a1=src a2=len */ 182 move v0, dst /* return value */ 183__memcpy: 184FEXPORT(__copy_user) 185 li t7, 0 /* not inatomic */ 186__copy_user_common: 187 /* 188 * Note: dst & src may be unaligned, len may be 0 189 * Temps 190 */ 191 # 192 # Octeon doesn't care if the destination is unaligned. The hardware 193 # can fix it faster than we can special case the assembly. 194 # 195 pref 0, 0(src) 196 sltu t0, len, NBYTES # Check if < 1 word 197 bnez t0, copy_bytes_checklen 198 and t0, src, ADDRMASK # Check if src unaligned 199 bnez t0, src_unaligned 200 sltu t0, len, 4*NBYTES # Check if < 4 words 201 bnez t0, less_than_4units 202 sltu t0, len, 8*NBYTES # Check if < 8 words 203 bnez t0, less_than_8units 204 sltu t0, len, 16*NBYTES # Check if < 16 words 205 bnez t0, cleanup_both_aligned 206 sltu t0, len, 128+1 # Check if len < 129 207 bnez t0, 1f # Skip prefetch if len is too short 208 sltu t0, len, 256+1 # Check if len < 257 209 bnez t0, 1f # Skip prefetch if len is too short 210 pref 0, 128(src) # We must not prefetch invalid addresses 211 # 212 # This is where we loop if there is more than 128 bytes left 2132: pref 0, 256(src) # We must not prefetch invalid addresses 214 # 215 # This is where we loop if we can't prefetch anymore 2161: 217EXC( LOAD t0, UNIT(0)(src), l_exc) 218EXC( LOAD t1, UNIT(1)(src), l_exc_copy) 219EXC( LOAD t2, UNIT(2)(src), l_exc_copy) 220EXC( LOAD t3, UNIT(3)(src), l_exc_copy) 221 SUB len, len, 16*NBYTES 222EXC( STORE t0, UNIT(0)(dst), s_exc_p16u) 223EXC( STORE t1, UNIT(1)(dst), s_exc_p15u) 224EXC( STORE t2, UNIT(2)(dst), s_exc_p14u) 225EXC( STORE t3, UNIT(3)(dst), s_exc_p13u) 226EXC( LOAD t0, UNIT(4)(src), l_exc_copy) 227EXC( LOAD t1, UNIT(5)(src), l_exc_copy) 228EXC( LOAD t2, UNIT(6)(src), l_exc_copy) 229EXC( LOAD t3, UNIT(7)(src), l_exc_copy) 230EXC( STORE t0, UNIT(4)(dst), s_exc_p12u) 231EXC( STORE t1, UNIT(5)(dst), s_exc_p11u) 232EXC( STORE t2, UNIT(6)(dst), s_exc_p10u) 233 ADD src, src, 16*NBYTES 234EXC( STORE t3, UNIT(7)(dst), s_exc_p9u) 235 ADD dst, dst, 16*NBYTES 236EXC( LOAD t0, UNIT(-8)(src), l_exc_copy) 237EXC( LOAD t1, UNIT(-7)(src), l_exc_copy) 238EXC( LOAD t2, UNIT(-6)(src), l_exc_copy) 239EXC( LOAD t3, UNIT(-5)(src), l_exc_copy) 240EXC( STORE t0, UNIT(-8)(dst), s_exc_p8u) 241EXC( STORE t1, UNIT(-7)(dst), s_exc_p7u) 242EXC( STORE t2, UNIT(-6)(dst), s_exc_p6u) 243EXC( STORE t3, UNIT(-5)(dst), s_exc_p5u) 244EXC( LOAD t0, UNIT(-4)(src), l_exc_copy) 245EXC( LOAD t1, UNIT(-3)(src), l_exc_copy) 246EXC( LOAD t2, UNIT(-2)(src), l_exc_copy) 247EXC( LOAD t3, UNIT(-1)(src), l_exc_copy) 248EXC( STORE t0, UNIT(-4)(dst), s_exc_p4u) 249EXC( STORE t1, UNIT(-3)(dst), s_exc_p3u) 250EXC( STORE t2, UNIT(-2)(dst), s_exc_p2u) 251EXC( STORE t3, UNIT(-1)(dst), s_exc_p1u) 252 sltu t0, len, 256+1 # See if we can prefetch more 253 beqz t0, 2b 254 sltu t0, len, 128 # See if we can loop more time 255 beqz t0, 1b 256 nop 257 # 258 # Jump here if there are less than 16*NBYTES left. 259 # 260cleanup_both_aligned: 261 beqz len, done 262 sltu t0, len, 8*NBYTES 263 bnez t0, less_than_8units 264 nop 265EXC( LOAD t0, UNIT(0)(src), l_exc) 266EXC( LOAD t1, UNIT(1)(src), l_exc_copy) 267EXC( LOAD t2, UNIT(2)(src), l_exc_copy) 268EXC( LOAD t3, UNIT(3)(src), l_exc_copy) 269 SUB len, len, 8*NBYTES 270EXC( STORE t0, UNIT(0)(dst), s_exc_p8u) 271EXC( STORE t1, UNIT(1)(dst), s_exc_p7u) 272EXC( STORE t2, UNIT(2)(dst), s_exc_p6u) 273EXC( STORE t3, UNIT(3)(dst), s_exc_p5u) 274EXC( LOAD t0, UNIT(4)(src), l_exc_copy) 275EXC( LOAD t1, UNIT(5)(src), l_exc_copy) 276EXC( LOAD t2, UNIT(6)(src), l_exc_copy) 277EXC( LOAD t3, UNIT(7)(src), l_exc_copy) 278EXC( STORE t0, UNIT(4)(dst), s_exc_p4u) 279EXC( STORE t1, UNIT(5)(dst), s_exc_p3u) 280EXC( STORE t2, UNIT(6)(dst), s_exc_p2u) 281EXC( STORE t3, UNIT(7)(dst), s_exc_p1u) 282 ADD src, src, 8*NBYTES 283 beqz len, done 284 ADD dst, dst, 8*NBYTES 285 # 286 # Jump here if there are less than 8*NBYTES left. 287 # 288less_than_8units: 289 sltu t0, len, 4*NBYTES 290 bnez t0, less_than_4units 291 nop 292EXC( LOAD t0, UNIT(0)(src), l_exc) 293EXC( LOAD t1, UNIT(1)(src), l_exc_copy) 294EXC( LOAD t2, UNIT(2)(src), l_exc_copy) 295EXC( LOAD t3, UNIT(3)(src), l_exc_copy) 296 SUB len, len, 4*NBYTES 297EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) 298EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) 299EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) 300EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) 301 ADD src, src, 4*NBYTES 302 beqz len, done 303 ADD dst, dst, 4*NBYTES 304 # 305 # Jump here if there are less than 4*NBYTES left. This means 306 # we may need to copy up to 3 NBYTES words. 307 # 308less_than_4units: 309 sltu t0, len, 1*NBYTES 310 bnez t0, copy_bytes_checklen 311 nop 312 # 313 # 1) Copy NBYTES, then check length again 314 # 315EXC( LOAD t0, 0(src), l_exc) 316 SUB len, len, NBYTES 317 sltu t1, len, 8 318EXC( STORE t0, 0(dst), s_exc_p1u) 319 ADD src, src, NBYTES 320 bnez t1, copy_bytes_checklen 321 ADD dst, dst, NBYTES 322 # 323 # 2) Copy NBYTES, then check length again 324 # 325EXC( LOAD t0, 0(src), l_exc) 326 SUB len, len, NBYTES 327 sltu t1, len, 8 328EXC( STORE t0, 0(dst), s_exc_p1u) 329 ADD src, src, NBYTES 330 bnez t1, copy_bytes_checklen 331 ADD dst, dst, NBYTES 332 # 333 # 3) Copy NBYTES, then check length again 334 # 335EXC( LOAD t0, 0(src), l_exc) 336 SUB len, len, NBYTES 337 ADD src, src, NBYTES 338 ADD dst, dst, NBYTES 339 b copy_bytes_checklen 340EXC( STORE t0, -8(dst), s_exc_p1u) 341 342src_unaligned: 343#define rem t8 344 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter 345 beqz t0, cleanup_src_unaligned 346 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES 3471: 348/* 349 * Avoid consecutive LD*'s to the same register since some mips 350 * implementations can't issue them in the same cycle. 351 * It's OK to load FIRST(N+1) before REST(N) because the two addresses 352 * are to the same unit (unless src is aligned, but it's not). 353 */ 354EXC( LDFIRST t0, FIRST(0)(src), l_exc) 355EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) 356 SUB len, len, 4*NBYTES 357EXC( LDREST t0, REST(0)(src), l_exc_copy) 358EXC( LDREST t1, REST(1)(src), l_exc_copy) 359EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) 360EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) 361EXC( LDREST t2, REST(2)(src), l_exc_copy) 362EXC( LDREST t3, REST(3)(src), l_exc_copy) 363 ADD src, src, 4*NBYTES 364EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) 365EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) 366EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) 367EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) 368 bne len, rem, 1b 369 ADD dst, dst, 4*NBYTES 370 371cleanup_src_unaligned: 372 beqz len, done 373 and rem, len, NBYTES-1 # rem = len % NBYTES 374 beq rem, len, copy_bytes 375 nop 3761: 377EXC( LDFIRST t0, FIRST(0)(src), l_exc) 378EXC( LDREST t0, REST(0)(src), l_exc_copy) 379 SUB len, len, NBYTES 380EXC( STORE t0, 0(dst), s_exc_p1u) 381 ADD src, src, NBYTES 382 bne len, rem, 1b 383 ADD dst, dst, NBYTES 384 385copy_bytes_checklen: 386 beqz len, done 387 nop 388copy_bytes: 389 /* 0 < len < NBYTES */ 390#define COPY_BYTE(N) \ 391EXC( lb t0, N(src), l_exc); \ 392 SUB len, len, 1; \ 393 beqz len, done; \ 394EXC( sb t0, N(dst), s_exc_p1) 395 396 COPY_BYTE(0) 397 COPY_BYTE(1) 398#ifdef USE_DOUBLE 399 COPY_BYTE(2) 400 COPY_BYTE(3) 401 COPY_BYTE(4) 402 COPY_BYTE(5) 403#endif 404EXC( lb t0, NBYTES-2(src), l_exc) 405 SUB len, len, 1 406 jr ra 407EXC( sb t0, NBYTES-2(dst), s_exc_p1) 408done: 409 jr ra 410 nop 411 END(memcpy) 412 413l_exc_copy: 414 /* 415 * Copy bytes from src until faulting load address (or until a 416 * lb faults) 417 * 418 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28) 419 * may be more than a byte beyond the last address. 420 * Hence, the lb below may get an exception. 421 * 422 * Assumes src < THREAD_BUADDR($28) 423 */ 424 LOAD t0, TI_TASK($28) 425 LOAD t0, THREAD_BUADDR(t0) 4261: 427EXC( lb t1, 0(src), l_exc) 428 ADD src, src, 1 429 sb t1, 0(dst) # can't fault -- we're copy_from_user 430 bne src, t0, 1b 431 ADD dst, dst, 1 432l_exc: 433 LOAD t0, TI_TASK($28) 434 LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address 435 SUB len, AT, t0 # len number of uncopied bytes 436 bnez t7, 2f /* Skip the zeroing out part if inatomic */ 437 /* 438 * Here's where we rely on src and dst being incremented in tandem, 439 * See (3) above. 440 * dst += (fault addr - src) to put dst at first byte to clear 441 */ 442 ADD dst, t0 # compute start address in a1 443 SUB dst, src 444 /* 445 * Clear len bytes starting at dst. Can't call __bzero because it 446 * might modify len. An inefficient loop for these rare times... 447 */ 448 beqz len, done 449 SUB src, len, 1 4501: sb zero, 0(dst) 451 ADD dst, dst, 1 452 bnez src, 1b 453 SUB src, src, 1 4542: jr ra 455 nop 456 457 458#define SEXC(n) \ 459s_exc_p ## n ## u: \ 460 jr ra; \ 461 ADD len, len, n*NBYTES 462 463SEXC(16) 464SEXC(15) 465SEXC(14) 466SEXC(13) 467SEXC(12) 468SEXC(11) 469SEXC(10) 470SEXC(9) 471SEXC(8) 472SEXC(7) 473SEXC(6) 474SEXC(5) 475SEXC(4) 476SEXC(3) 477SEXC(2) 478SEXC(1) 479 480s_exc_p1: 481 jr ra 482 ADD len, len, 1 483s_exc: 484 jr ra 485 nop 486 487 .align 5 488LEAF(memmove) 489 ADD t0, a0, a2 490 ADD t1, a1, a2 491 sltu t0, a1, t0 # dst + len <= src -> memcpy 492 sltu t1, a0, t1 # dst >= src + len -> memcpy 493 and t0, t1 494 beqz t0, __memcpy 495 move v0, a0 /* return value */ 496 beqz a2, r_out 497 END(memmove) 498 499 /* fall through to __rmemcpy */ 500LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ 501 sltu t0, a1, a0 502 beqz t0, r_end_bytes_up # src >= dst 503 nop 504 ADD a0, a2 # dst = dst + len 505 ADD a1, a2 # src = src + len 506 507r_end_bytes: 508 lb t0, -1(a1) 509 SUB a2, a2, 0x1 510 sb t0, -1(a0) 511 SUB a1, a1, 0x1 512 bnez a2, r_end_bytes 513 SUB a0, a0, 0x1 514 515r_out: 516 jr ra 517 move a2, zero 518 519r_end_bytes_up: 520 lb t0, (a1) 521 SUB a2, a2, 0x1 522 sb t0, (a0) 523 ADD a1, a1, 0x1 524 bnez a2, r_end_bytes_up 525 ADD a0, a0, 0x1 526 527 jr ra 528 move a2, zero 529 END(__rmemcpy) 530