1/* 2 * User Space Access Routines 3 * 4 * Copyright (C) 2000-2002 Hewlett-Packard (John Marvin) 5 * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org> 6 * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr> 7 * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org> 8 * Copyright (C) 2017 Helge Deller <deller@gmx.de> 9 * Copyright (C) 2017 John David Anglin <dave.anglin@bell.net> 10 * 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License as published by 14 * the Free Software Foundation; either version 2, or (at your option) 15 * any later version. 16 * 17 * This program is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 * GNU General Public License for more details. 21 * 22 * You should have received a copy of the GNU General Public License 23 * along with this program; if not, write to the Free Software 24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 */ 26 27/* 28 * These routines still have plenty of room for optimization 29 * (word & doubleword load/store, dual issue, store hints, etc.). 30 */ 31 32/* 33 * The following routines assume that space register 3 (sr3) contains 34 * the space id associated with the current users address space. 35 */ 36 37 38 .text 39 40#include <asm/assembly.h> 41#include <asm/errno.h> 42#include <linux/linkage.h> 43 44 /* 45 * get_sr gets the appropriate space value into 46 * sr1 for kernel/user space access, depending 47 * on the flag stored in the task structure. 48 */ 49 50 .macro get_sr 51 mfctl %cr30,%r1 52 ldw TI_SEGMENT(%r1),%r22 53 mfsp %sr3,%r1 54 or,<> %r22,%r0,%r0 55 copy %r0,%r1 56 mtsp %r1,%sr1 57 .endm 58 59 .macro fixup_branch lbl 60 ldil L%\lbl, %r1 61 ldo R%\lbl(%r1), %r1 62 bv %r0(%r1) 63 .endm 64 65 /* 66 * unsigned long lclear_user(void *to, unsigned long n) 67 * 68 * Returns 0 for success. 69 * otherwise, returns number of bytes not transferred. 70 */ 71 72ENTRY_CFI(lclear_user) 73 .proc 74 .callinfo NO_CALLS 75 .entry 76 comib,=,n 0,%r25,$lclu_done 77 get_sr 78$lclu_loop: 79 addib,<> -1,%r25,$lclu_loop 801: stbs,ma %r0,1(%sr1,%r26) 81 82$lclu_done: 83 bv %r0(%r2) 84 copy %r25,%r28 85 .exit 86ENDPROC_CFI(lclear_user) 87 88 .section .fixup,"ax" 892: fixup_branch $lclu_done 90 ldo 1(%r25),%r25 91 .previous 92 93 ASM_EXCEPTIONTABLE_ENTRY(1b,2b) 94 95 .procend 96 97 /* 98 * long lstrnlen_user(char *s, long n) 99 * 100 * Returns 0 if exception before zero byte or reaching N, 101 * N+1 if N would be exceeded, 102 * else strlen + 1 (i.e. includes zero byte). 103 */ 104 105ENTRY_CFI(lstrnlen_user) 106 .proc 107 .callinfo NO_CALLS 108 .entry 109 comib,= 0,%r25,$lslen_nzero 110 copy %r26,%r24 111 get_sr 1121: ldbs,ma 1(%sr1,%r26),%r1 113$lslen_loop: 114 comib,=,n 0,%r1,$lslen_done 115 addib,<> -1,%r25,$lslen_loop 1162: ldbs,ma 1(%sr1,%r26),%r1 117$lslen_done: 118 bv %r0(%r2) 119 sub %r26,%r24,%r28 120 .exit 121 122$lslen_nzero: 123 b $lslen_done 124 ldo 1(%r26),%r26 /* special case for N == 0 */ 125ENDPROC_CFI(lstrnlen_user) 126 127 .section .fixup,"ax" 1283: fixup_branch $lslen_done 129 copy %r24,%r26 /* reset r26 so 0 is returned on fault */ 130 .previous 131 132 ASM_EXCEPTIONTABLE_ENTRY(1b,3b) 133 ASM_EXCEPTIONTABLE_ENTRY(2b,3b) 134 135 .procend 136 137 138 139/* 140 * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) 141 * 142 * Inputs: 143 * - sr1 already contains space of source region 144 * - sr2 already contains space of destination region 145 * 146 * Returns: 147 * - number of bytes that could not be copied. 148 * On success, this will be zero. 149 * 150 * This code is based on a C-implementation of a copy routine written by 151 * Randolph Chung, which in turn was derived from the glibc. 152 * 153 * Several strategies are tried to try to get the best performance for various 154 * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes 155 * at a time using general registers. Unaligned copies are handled either by 156 * aligning the destination and then using shift-and-write method, or in a few 157 * cases by falling back to a byte-at-a-time copy. 158 * 159 * Testing with various alignments and buffer sizes shows that this code is 160 * often >10x faster than a simple byte-at-a-time copy, even for strangely 161 * aligned operands. It is interesting to note that the glibc version of memcpy 162 * (written in C) is actually quite fast already. This routine is able to beat 163 * it by 30-40% for aligned copies because of the loop unrolling, but in some 164 * cases the glibc version is still slightly faster. This lends more 165 * credibility that gcc can generate very good code as long as we are careful. 166 * 167 * Possible optimizations: 168 * - add cache prefetching 169 * - try not to use the post-increment address modifiers; they may create 170 * additional interlocks. Assumption is that those were only efficient on old 171 * machines (pre PA8000 processors) 172 */ 173 174 dst = arg0 175 src = arg1 176 len = arg2 177 end = arg3 178 t1 = r19 179 t2 = r20 180 t3 = r21 181 t4 = r22 182 srcspc = sr1 183 dstspc = sr2 184 185 t0 = r1 186 a1 = t1 187 a2 = t2 188 a3 = t3 189 a0 = t4 190 191 save_src = ret0 192 save_dst = ret1 193 save_len = r31 194 195ENTRY_CFI(pa_memcpy) 196 .proc 197 .callinfo NO_CALLS 198 .entry 199 200 /* Last destination address */ 201 add dst,len,end 202 203 /* short copy with less than 16 bytes? */ 204 cmpib,>>=,n 15,len,.Lbyte_loop 205 206 /* same alignment? */ 207 xor src,dst,t0 208 extru t0,31,2,t1 209 cmpib,<>,n 0,t1,.Lunaligned_copy 210 211#ifdef CONFIG_64BIT 212 /* only do 64-bit copies if we can get aligned. */ 213 extru t0,31,3,t1 214 cmpib,<>,n 0,t1,.Lalign_loop32 215 216 /* loop until we are 64-bit aligned */ 217.Lalign_loop64: 218 extru dst,31,3,t1 219 cmpib,=,n 0,t1,.Lcopy_loop_16 22020: ldb,ma 1(srcspc,src),t1 22121: stb,ma t1,1(dstspc,dst) 222 b .Lalign_loop64 223 ldo -1(len),len 224 225 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 226 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 227 228 ldi 31,t0 229.Lcopy_loop_16: 230 cmpb,COND(>>=),n t0,len,.Lword_loop 231 23210: ldd 0(srcspc,src),t1 23311: ldd 8(srcspc,src),t2 234 ldo 16(src),src 23512: std,ma t1,8(dstspc,dst) 23613: std,ma t2,8(dstspc,dst) 23714: ldd 0(srcspc,src),t1 23815: ldd 8(srcspc,src),t2 239 ldo 16(src),src 24016: std,ma t1,8(dstspc,dst) 24117: std,ma t2,8(dstspc,dst) 242 243 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 244 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault) 245 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) 246 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) 247 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) 248 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault) 249 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) 250 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) 251 252 b .Lcopy_loop_16 253 ldo -32(len),len 254 255.Lword_loop: 256 cmpib,COND(>>=),n 3,len,.Lbyte_loop 25720: ldw,ma 4(srcspc,src),t1 25821: stw,ma t1,4(dstspc,dst) 259 b .Lword_loop 260 ldo -4(len),len 261 262 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 263 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 264 265#endif /* CONFIG_64BIT */ 266 267 /* loop until we are 32-bit aligned */ 268.Lalign_loop32: 269 extru dst,31,2,t1 270 cmpib,=,n 0,t1,.Lcopy_loop_4 27120: ldb,ma 1(srcspc,src),t1 27221: stb,ma t1,1(dstspc,dst) 273 b .Lalign_loop32 274 ldo -1(len),len 275 276 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 277 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 278 279 280.Lcopy_loop_4: 281 cmpib,COND(>>=),n 15,len,.Lbyte_loop 282 28310: ldw 0(srcspc,src),t1 28411: ldw 4(srcspc,src),t2 28512: stw,ma t1,4(dstspc,dst) 28613: stw,ma t2,4(dstspc,dst) 28714: ldw 8(srcspc,src),t1 28815: ldw 12(srcspc,src),t2 289 ldo 16(src),src 29016: stw,ma t1,4(dstspc,dst) 29117: stw,ma t2,4(dstspc,dst) 292 293 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 294 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault) 295 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) 296 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) 297 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) 298 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault) 299 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) 300 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) 301 302 b .Lcopy_loop_4 303 ldo -16(len),len 304 305.Lbyte_loop: 306 cmpclr,COND(<>) len,%r0,%r0 307 b,n .Lcopy_done 30820: ldb 0(srcspc,src),t1 309 ldo 1(src),src 31021: stb,ma t1,1(dstspc,dst) 311 b .Lbyte_loop 312 ldo -1(len),len 313 314 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 315 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 316 317.Lcopy_done: 318 bv %r0(%r2) 319 sub end,dst,ret0 320 321 322 /* src and dst are not aligned the same way. */ 323 /* need to go the hard way */ 324.Lunaligned_copy: 325 /* align until dst is 32bit-word-aligned */ 326 extru dst,31,2,t1 327 cmpib,COND(=),n 0,t1,.Lcopy_dstaligned 32820: ldb 0(srcspc,src),t1 329 ldo 1(src),src 33021: stb,ma t1,1(dstspc,dst) 331 b .Lunaligned_copy 332 ldo -1(len),len 333 334 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 335 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 336 337.Lcopy_dstaligned: 338 339 /* store src, dst and len in safe place */ 340 copy src,save_src 341 copy dst,save_dst 342 copy len,save_len 343 344 /* len now needs give number of words to copy */ 345 SHRREG len,2,len 346 347 /* 348 * Copy from a not-aligned src to an aligned dst using shifts. 349 * Handles 4 words per loop. 350 */ 351 352 depw,z src,28,2,t0 353 subi 32,t0,t0 354 mtsar t0 355 extru len,31,2,t0 356 cmpib,= 2,t0,.Lcase2 357 /* Make src aligned by rounding it down. */ 358 depi 0,31,2,src 359 360 cmpiclr,<> 3,t0,%r0 361 b,n .Lcase3 362 cmpiclr,<> 1,t0,%r0 363 b,n .Lcase1 364.Lcase0: 365 cmpb,= %r0,len,.Lcda_finish 366 nop 367 3681: ldw,ma 4(srcspc,src), a3 369 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 3701: ldw,ma 4(srcspc,src), a0 371 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 372 b,n .Ldo3 373.Lcase1: 3741: ldw,ma 4(srcspc,src), a2 375 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 3761: ldw,ma 4(srcspc,src), a3 377 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 378 ldo -1(len),len 379 cmpb,=,n %r0,len,.Ldo0 380.Ldo4: 3811: ldw,ma 4(srcspc,src), a0 382 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 383 shrpw a2, a3, %sar, t0 3841: stw,ma t0, 4(dstspc,dst) 385 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 386.Ldo3: 3871: ldw,ma 4(srcspc,src), a1 388 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 389 shrpw a3, a0, %sar, t0 3901: stw,ma t0, 4(dstspc,dst) 391 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 392.Ldo2: 3931: ldw,ma 4(srcspc,src), a2 394 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 395 shrpw a0, a1, %sar, t0 3961: stw,ma t0, 4(dstspc,dst) 397 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 398.Ldo1: 3991: ldw,ma 4(srcspc,src), a3 400 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 401 shrpw a1, a2, %sar, t0 4021: stw,ma t0, 4(dstspc,dst) 403 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 404 ldo -4(len),len 405 cmpb,<> %r0,len,.Ldo4 406 nop 407.Ldo0: 408 shrpw a2, a3, %sar, t0 4091: stw,ma t0, 4(dstspc,dst) 410 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 411 412.Lcda_rdfault: 413.Lcda_finish: 414 /* calculate new src, dst and len and jump to byte-copy loop */ 415 sub dst,save_dst,t0 416 add save_src,t0,src 417 b .Lbyte_loop 418 sub save_len,t0,len 419 420.Lcase3: 4211: ldw,ma 4(srcspc,src), a0 422 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 4231: ldw,ma 4(srcspc,src), a1 424 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 425 b .Ldo2 426 ldo 1(len),len 427.Lcase2: 4281: ldw,ma 4(srcspc,src), a1 429 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 4301: ldw,ma 4(srcspc,src), a2 431 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 432 b .Ldo1 433 ldo 2(len),len 434 435 436 /* fault exception fixup handlers: */ 437#ifdef CONFIG_64BIT 438.Lcopy16_fault: 43910: b .Lcopy_done 440 std,ma t1,8(dstspc,dst) 441 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 442#endif 443 444.Lcopy8_fault: 44510: b .Lcopy_done 446 stw,ma t1,4(dstspc,dst) 447 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 448 449 .exit 450ENDPROC_CFI(pa_memcpy) 451 .procend 452 453 .end 454