1/* SPDX-License-Identifier: GPL-2.0 */ 2/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code. 3 * 4 * Copyright(C) 1995 Linus Torvalds 5 * Copyright(C) 1996 David S. Miller 6 * Copyright(C) 1996 Eddie C. Dost 7 * Copyright(C) 1996,1998 Jakub Jelinek 8 * 9 * derived from: 10 * e-mail between David and Eddie. 11 * 12 * Returns 0 if successful, otherwise count of bytes not copied yet 13 */ 14 15#include <asm/ptrace.h> 16#include <asm/asmmacro.h> 17#include <asm/page.h> 18#include <asm/thread_info.h> 19#include <asm/export.h> 20 21/* Work around cpp -rob */ 22#define ALLOC #alloc 23#define EXECINSTR #execinstr 24#define EX(x,y,a,b) \ 2598: x,y; \ 26 .section .fixup,ALLOC,EXECINSTR; \ 27 .align 4; \ 2899: ba fixupretl; \ 29 a, b, %g3; \ 30 .section __ex_table,ALLOC; \ 31 .align 4; \ 32 .word 98b, 99b; \ 33 .text; \ 34 .align 4 35 36#define EX2(x,y,c,d,e,a,b) \ 3798: x,y; \ 38 .section .fixup,ALLOC,EXECINSTR; \ 39 .align 4; \ 4099: c, d, e; \ 41 ba fixupretl; \ 42 a, b, %g3; \ 43 .section __ex_table,ALLOC; \ 44 .align 4; \ 45 .word 98b, 99b; \ 46 .text; \ 47 .align 4 48 49#define EXO2(x,y) \ 5098: x, y; \ 51 .section __ex_table,ALLOC; \ 52 .align 4; \ 53 .word 98b, 97f; \ 54 .text; \ 55 .align 4 56 57#define EXT(start,end,handler) \ 58 .section __ex_table,ALLOC; \ 59 .align 4; \ 60 .word start, 0, end, handler; \ 61 .text; \ 62 .align 4 63 64/* Please do not change following macros unless you change logic used 65 * in .fixup at the end of this file as well 66 */ 67 68/* Both these macros have to start with exactly the same insn */ 69#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 70 ldd [%src + (offset) + 0x00], %t0; \ 71 ldd [%src + (offset) + 0x08], %t2; \ 72 ldd [%src + (offset) + 0x10], %t4; \ 73 ldd [%src + (offset) + 0x18], %t6; \ 74 st %t0, [%dst + (offset) + 0x00]; \ 75 st %t1, [%dst + (offset) + 0x04]; \ 76 st %t2, [%dst + (offset) + 0x08]; \ 77 st %t3, [%dst + (offset) + 0x0c]; \ 78 st %t4, [%dst + (offset) + 0x10]; \ 79 st %t5, [%dst + (offset) + 0x14]; \ 80 st %t6, [%dst + (offset) + 0x18]; \ 81 st %t7, [%dst + (offset) + 0x1c]; 82 83#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 84 ldd [%src + (offset) + 0x00], %t0; \ 85 ldd [%src + (offset) + 0x08], %t2; \ 86 ldd [%src + (offset) + 0x10], %t4; \ 87 ldd [%src + (offset) + 0x18], %t6; \ 88 std %t0, [%dst + (offset) + 0x00]; \ 89 std %t2, [%dst + (offset) + 0x08]; \ 90 std %t4, [%dst + (offset) + 0x10]; \ 91 std %t6, [%dst + (offset) + 0x18]; 92 93#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ 94 ldd [%src - (offset) - 0x10], %t0; \ 95 ldd [%src - (offset) - 0x08], %t2; \ 96 st %t0, [%dst - (offset) - 0x10]; \ 97 st %t1, [%dst - (offset) - 0x0c]; \ 98 st %t2, [%dst - (offset) - 0x08]; \ 99 st %t3, [%dst - (offset) - 0x04]; 100 101#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \ 102 lduh [%src + (offset) + 0x00], %t0; \ 103 lduh [%src + (offset) + 0x02], %t1; \ 104 lduh [%src + (offset) + 0x04], %t2; \ 105 lduh [%src + (offset) + 0x06], %t3; \ 106 sth %t0, [%dst + (offset) + 0x00]; \ 107 sth %t1, [%dst + (offset) + 0x02]; \ 108 sth %t2, [%dst + (offset) + 0x04]; \ 109 sth %t3, [%dst + (offset) + 0x06]; 110 111#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ 112 ldub [%src - (offset) - 0x02], %t0; \ 113 ldub [%src - (offset) - 0x01], %t1; \ 114 stb %t0, [%dst - (offset) - 0x02]; \ 115 stb %t1, [%dst - (offset) - 0x01]; 116 117 .text 118 .align 4 119 120 .globl __copy_user_begin 121__copy_user_begin: 122 123 .globl __copy_user 124 EXPORT_SYMBOL(__copy_user) 125dword_align: 126 andcc %o1, 1, %g0 127 be 4f 128 andcc %o1, 2, %g0 129 130 EXO2(ldub [%o1], %g2) 131 add %o1, 1, %o1 132 EXO2(stb %g2, [%o0]) 133 sub %o2, 1, %o2 134 bne 3f 135 add %o0, 1, %o0 136 137 EXO2(lduh [%o1], %g2) 138 add %o1, 2, %o1 139 EXO2(sth %g2, [%o0]) 140 sub %o2, 2, %o2 141 b 3f 142 add %o0, 2, %o0 1434: 144 EXO2(lduh [%o1], %g2) 145 add %o1, 2, %o1 146 EXO2(sth %g2, [%o0]) 147 sub %o2, 2, %o2 148 b 3f 149 add %o0, 2, %o0 150 151__copy_user: /* %o0=dst %o1=src %o2=len */ 152 xor %o0, %o1, %o4 1531: 154 andcc %o4, 3, %o5 1552: 156 bne cannot_optimize 157 cmp %o2, 15 158 159 bleu short_aligned_end 160 andcc %o1, 3, %g0 161 162 bne dword_align 1633: 164 andcc %o1, 4, %g0 165 166 be 2f 167 mov %o2, %g1 168 169 EXO2(ld [%o1], %o4) 170 sub %g1, 4, %g1 171 EXO2(st %o4, [%o0]) 172 add %o1, 4, %o1 173 add %o0, 4, %o0 1742: 175 andcc %g1, 0xffffff80, %g7 176 be 3f 177 andcc %o0, 4, %g0 178 179 be ldd_std + 4 1805: 181 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 182 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 183 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 184 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 18580: 186 EXT(5b, 80b, 50f) 187 subcc %g7, 128, %g7 188 add %o1, 128, %o1 189 bne 5b 190 add %o0, 128, %o0 1913: 192 andcc %g1, 0x70, %g7 193 be copy_user_table_end 194 andcc %g1, 8, %g0 195 196 sethi %hi(copy_user_table_end), %o5 197 srl %g7, 1, %o4 198 add %g7, %o4, %o4 199 add %o1, %g7, %o1 200 sub %o5, %o4, %o5 201 jmpl %o5 + %lo(copy_user_table_end), %g0 202 add %o0, %g7, %o0 203 204copy_user_table: 205 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) 206 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) 207 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) 208 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) 209 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) 210 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) 211 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 212copy_user_table_end: 213 EXT(copy_user_table, copy_user_table_end, 51f) 214 be copy_user_last7 215 andcc %g1, 4, %g0 216 217 EX(ldd [%o1], %g2, and %g1, 0xf) 218 add %o0, 8, %o0 219 add %o1, 8, %o1 220 EX(st %g2, [%o0 - 0x08], and %g1, 0xf) 221 EX2(st %g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4) 222copy_user_last7: 223 be 1f 224 andcc %g1, 2, %g0 225 226 EX(ld [%o1], %g2, and %g1, 7) 227 add %o1, 4, %o1 228 EX(st %g2, [%o0], and %g1, 7) 229 add %o0, 4, %o0 2301: 231 be 1f 232 andcc %g1, 1, %g0 233 234 EX(lduh [%o1], %g2, and %g1, 3) 235 add %o1, 2, %o1 236 EX(sth %g2, [%o0], and %g1, 3) 237 add %o0, 2, %o0 2381: 239 be 1f 240 nop 241 242 EX(ldub [%o1], %g2, add %g0, 1) 243 EX(stb %g2, [%o0], add %g0, 1) 2441: 245 retl 246 clr %o0 247 248ldd_std: 249 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 250 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 251 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 252 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 25381: 254 EXT(ldd_std, 81b, 52f) 255 subcc %g7, 128, %g7 256 add %o1, 128, %o1 257 bne ldd_std 258 add %o0, 128, %o0 259 260 andcc %g1, 0x70, %g7 261 be copy_user_table_end 262 andcc %g1, 8, %g0 263 264 sethi %hi(copy_user_table_end), %o5 265 srl %g7, 1, %o4 266 add %g7, %o4, %o4 267 add %o1, %g7, %o1 268 sub %o5, %o4, %o5 269 jmpl %o5 + %lo(copy_user_table_end), %g0 270 add %o0, %g7, %o0 271 272cannot_optimize: 273 bleu short_end 274 cmp %o5, 2 275 276 bne byte_chunk 277 and %o2, 0xfffffff0, %o3 278 279 andcc %o1, 1, %g0 280 be 10f 281 nop 282 283 EXO2(ldub [%o1], %g2) 284 add %o1, 1, %o1 285 EXO2(stb %g2, [%o0]) 286 sub %o2, 1, %o2 287 andcc %o2, 0xfffffff0, %o3 288 be short_end 289 add %o0, 1, %o0 29010: 291 MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 292 MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5) 29382: 294 EXT(10b, 82b, 53f) 295 subcc %o3, 0x10, %o3 296 add %o1, 0x10, %o1 297 bne 10b 298 add %o0, 0x10, %o0 299 b 2f 300 and %o2, 0xe, %o3 301 302byte_chunk: 303 MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3) 304 MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3) 305 MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3) 306 MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3) 307 MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3) 308 MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3) 309 MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3) 310 MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3) 31183: 312 EXT(byte_chunk, 83b, 54f) 313 subcc %o3, 0x10, %o3 314 add %o1, 0x10, %o1 315 bne byte_chunk 316 add %o0, 0x10, %o0 317 318short_end: 319 and %o2, 0xe, %o3 3202: 321 sethi %hi(short_table_end), %o5 322 sll %o3, 3, %o4 323 add %o0, %o3, %o0 324 sub %o5, %o4, %o5 325 add %o1, %o3, %o1 326 jmpl %o5 + %lo(short_table_end), %g0 327 andcc %o2, 1, %g0 32884: 329 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) 330 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) 331 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) 332 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) 333 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) 334 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) 335 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) 336short_table_end: 337 EXT(84b, short_table_end, 55f) 338 be 1f 339 nop 340 EX(ldub [%o1], %g2, add %g0, 1) 341 EX(stb %g2, [%o0], add %g0, 1) 3421: 343 retl 344 clr %o0 345 346short_aligned_end: 347 bne short_end 348 andcc %o2, 8, %g0 349 350 be 1f 351 andcc %o2, 4, %g0 352 353 EXO2(ld [%o1 + 0x00], %g2) 354 EXO2(ld [%o1 + 0x04], %g3) 355 add %o1, 8, %o1 356 EXO2(st %g2, [%o0 + 0x00]) 357 EX(st %g3, [%o0 + 0x04], sub %o2, 4) 358 add %o0, 8, %o0 3591: 360 b copy_user_last7 361 mov %o2, %g1 362 363 .section .fixup,#alloc,#execinstr 364 .align 4 36597: 366 mov %o2, %g3 367fixupretl: 368 retl 369 mov %g3, %o0 370 371/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */ 37250: 373/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK 374 * happens. This is derived from the amount ldd reads, st stores, etc. 375 * x = g2 % 12; 376 * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4); 377 * o0 += (g2 / 12) * 32; 378 */ 379 cmp %g2, 12 380 add %o0, %g7, %o0 381 bcs 1f 382 cmp %g2, 24 383 bcs 2f 384 cmp %g2, 36 385 bcs 3f 386 nop 387 sub %g2, 12, %g2 388 sub %g7, 32, %g7 3893: sub %g2, 12, %g2 390 sub %g7, 32, %g7 3912: sub %g2, 12, %g2 392 sub %g7, 32, %g7 3931: cmp %g2, 4 394 bcs,a 60f 395 clr %g2 396 sub %g2, 4, %g2 397 sll %g2, 2, %g2 39860: and %g1, 0x7f, %g3 399 sub %o0, %g7, %o0 400 add %g3, %g7, %g3 401 ba fixupretl 402 sub %g3, %g2, %g3 40351: 404/* i = 41 - g2; j = i % 6; 405 * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16; 406 * o0 -= (i / 6) * 16 + 16; 407 */ 408 neg %g2 409 and %g1, 0xf, %g1 410 add %g2, 41, %g2 411 add %o0, %g1, %o0 4121: cmp %g2, 6 413 bcs,a 2f 414 cmp %g2, 4 415 add %g1, 16, %g1 416 b 1b 417 sub %g2, 6, %g2 4182: bcc,a 2f 419 mov 16, %g2 420 inc %g2 421 sll %g2, 2, %g2 4222: add %g1, %g2, %g3 423 ba fixupretl 424 sub %o0, %g3, %o0 42552: 426/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0; 427 o0 += (g2 / 8) * 32 */ 428 andn %g2, 7, %g4 429 add %o0, %g7, %o0 430 andcc %g2, 4, %g0 431 and %g2, 3, %g2 432 sll %g4, 2, %g4 433 sll %g2, 3, %g2 434 bne 60b 435 sub %g7, %g4, %g7 436 ba 60b 437 clr %g2 43853: 439/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0; 440 o0 += (g2 & 8) */ 441 and %g2, 3, %g4 442 andcc %g2, 4, %g0 443 and %g2, 8, %g2 444 sll %g4, 1, %g4 445 be 1f 446 add %o0, %g2, %o0 447 add %g2, %g4, %g2 4481: and %o2, 0xf, %g3 449 add %g3, %o3, %g3 450 ba fixupretl 451 sub %g3, %g2, %g3 45254: 453/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0; 454 o0 += (g2 / 4) * 2 */ 455 srl %g2, 2, %o4 456 and %g2, 1, %o5 457 srl %g2, 1, %g2 458 add %o4, %o4, %o4 459 and %o5, %g2, %o5 460 and %o2, 0xf, %o2 461 add %o0, %o4, %o0 462 sub %o3, %o5, %o3 463 sub %o2, %o4, %o2 464 ba fixupretl 465 add %o2, %o3, %g3 46655: 467/* i = 27 - g2; 468 g3 = (o2 & 1) + i / 4 * 2 + !(i & 3); 469 o0 -= i / 4 * 2 + 1 */ 470 neg %g2 471 and %o2, 1, %o2 472 add %g2, 27, %g2 473 srl %g2, 2, %o5 474 andcc %g2, 3, %g0 475 mov 1, %g2 476 add %o5, %o5, %o5 477 be,a 1f 478 clr %g2 4791: add %g2, %o5, %g3 480 sub %o0, %g3, %o0 481 ba fixupretl 482 add %g3, %o2, %g3 483 484 .globl __copy_user_end 485__copy_user_end: 486