1/* 2 * User Space Access Routines 3 * 4 * Copyright (C) 2000-2002 Hewlett-Packard (John Marvin) 5 * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org> 6 * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr> 7 * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org> 8 * Copyright (C) 2017 Helge Deller <deller@gmx.de> 9 * Copyright (C) 2017 John David Anglin <dave.anglin@bell.net> 10 * 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License as published by 14 * the Free Software Foundation; either version 2, or (at your option) 15 * any later version. 16 * 17 * This program is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 * GNU General Public License for more details. 21 * 22 * You should have received a copy of the GNU General Public License 23 * along with this program; if not, write to the Free Software 24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 */ 26 27/* 28 * These routines still have plenty of room for optimization 29 * (word & doubleword load/store, dual issue, store hints, etc.). 30 */ 31 32/* 33 * The following routines assume that space register 3 (sr3) contains 34 * the space id associated with the current users address space. 35 */ 36 37 38 .text 39 40#include <asm/assembly.h> 41#include <asm/errno.h> 42#include <linux/linkage.h> 43 44 /* 45 * get_sr gets the appropriate space value into 46 * sr1 for kernel/user space access, depending 47 * on the flag stored in the task structure. 48 */ 49 50 .macro get_sr 51 mfctl %cr30,%r1 52 ldw TI_SEGMENT(%r1),%r22 53 mfsp %sr3,%r1 54 or,<> %r22,%r0,%r0 55 copy %r0,%r1 56 mtsp %r1,%sr1 57 .endm 58 59 .macro fixup_branch lbl 60 ldil L%\lbl, %r1 61 ldo R%\lbl(%r1), %r1 62 bv %r0(%r1) 63 .endm 64 65 /* 66 * unsigned long lclear_user(void *to, unsigned long n) 67 * 68 * Returns 0 for success. 69 * otherwise, returns number of bytes not transferred. 70 */ 71 72ENTRY_CFI(lclear_user) 73 .proc 74 .callinfo NO_CALLS 75 .entry 76 comib,=,n 0,%r25,$lclu_done 77 get_sr 78$lclu_loop: 79 addib,<> -1,%r25,$lclu_loop 801: stbs,ma %r0,1(%sr1,%r26) 81 82$lclu_done: 83 bv %r0(%r2) 84 copy %r25,%r28 85 .exit 86ENDPROC_CFI(lclear_user) 87 88 .section .fixup,"ax" 892: fixup_branch $lclu_done 90 ldo 1(%r25),%r25 91 .previous 92 93 ASM_EXCEPTIONTABLE_ENTRY(1b,2b) 94 95 .procend 96 97 /* 98 * long lstrnlen_user(char *s, long n) 99 * 100 * Returns 0 if exception before zero byte or reaching N, 101 * N+1 if N would be exceeded, 102 * else strlen + 1 (i.e. includes zero byte). 103 */ 104 105ENTRY_CFI(lstrnlen_user) 106 .proc 107 .callinfo NO_CALLS 108 .entry 109 comib,= 0,%r25,$lslen_nzero 110 copy %r26,%r24 111 get_sr 1121: ldbs,ma 1(%sr1,%r26),%r1 113$lslen_loop: 114 comib,=,n 0,%r1,$lslen_done 115 addib,<> -1,%r25,$lslen_loop 1162: ldbs,ma 1(%sr1,%r26),%r1 117$lslen_done: 118 bv %r0(%r2) 119 sub %r26,%r24,%r28 120 .exit 121 122$lslen_nzero: 123 b $lslen_done 124 ldo 1(%r26),%r26 /* special case for N == 0 */ 125ENDPROC_CFI(lstrnlen_user) 126 127 .section .fixup,"ax" 1283: fixup_branch $lslen_done 129 copy %r24,%r26 /* reset r26 so 0 is returned on fault */ 130 .previous 131 132 ASM_EXCEPTIONTABLE_ENTRY(1b,3b) 133 ASM_EXCEPTIONTABLE_ENTRY(2b,3b) 134 135 .procend 136 137 138 139/* 140 * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) 141 * 142 * Inputs: 143 * - sr1 already contains space of source region 144 * - sr2 already contains space of destination region 145 * 146 * Returns: 147 * - number of bytes that could not be copied. 148 * On success, this will be zero. 149 * 150 * This code is based on a C-implementation of a copy routine written by 151 * Randolph Chung, which in turn was derived from the glibc. 152 * 153 * Several strategies are tried to try to get the best performance for various 154 * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes 155 * at a time using general registers. Unaligned copies are handled either by 156 * aligning the destination and then using shift-and-write method, or in a few 157 * cases by falling back to a byte-at-a-time copy. 158 * 159 * Testing with various alignments and buffer sizes shows that this code is 160 * often >10x faster than a simple byte-at-a-time copy, even for strangely 161 * aligned operands. It is interesting to note that the glibc version of memcpy 162 * (written in C) is actually quite fast already. This routine is able to beat 163 * it by 30-40% for aligned copies because of the loop unrolling, but in some 164 * cases the glibc version is still slightly faster. This lends more 165 * credibility that gcc can generate very good code as long as we are careful. 166 * 167 * Possible optimizations: 168 * - add cache prefetching 169 * - try not to use the post-increment address modifiers; they may create 170 * additional interlocks. Assumption is that those were only efficient on old 171 * machines (pre PA8000 processors) 172 */ 173 174 dst = arg0 175 src = arg1 176 len = arg2 177 end = arg3 178 t1 = r19 179 t2 = r20 180 t3 = r21 181 t4 = r22 182 srcspc = sr1 183 dstspc = sr2 184 185 t0 = r1 186 a1 = t1 187 a2 = t2 188 a3 = t3 189 a0 = t4 190 191 save_src = ret0 192 save_dst = ret1 193 save_len = r31 194 195ENTRY_CFI(pa_memcpy) 196 .proc 197 .callinfo NO_CALLS 198 .entry 199 200 /* Last destination address */ 201 add dst,len,end 202 203 /* short copy with less than 16 bytes? */ 204 cmpib,COND(>>=),n 15,len,.Lbyte_loop 205 206 /* same alignment? */ 207 xor src,dst,t0 208 extru t0,31,2,t1 209 cmpib,<>,n 0,t1,.Lunaligned_copy 210 211#ifdef CONFIG_64BIT 212 /* only do 64-bit copies if we can get aligned. */ 213 extru t0,31,3,t1 214 cmpib,<>,n 0,t1,.Lalign_loop32 215 216 /* loop until we are 64-bit aligned */ 217.Lalign_loop64: 218 extru dst,31,3,t1 219 cmpib,=,n 0,t1,.Lcopy_loop_16_start 22020: ldb,ma 1(srcspc,src),t1 22121: stb,ma t1,1(dstspc,dst) 222 b .Lalign_loop64 223 ldo -1(len),len 224 225 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 226 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 227 228.Lcopy_loop_16_start: 229 ldi 31,t0 230.Lcopy_loop_16: 231 cmpb,COND(>>=),n t0,len,.Lword_loop 232 23310: ldd 0(srcspc,src),t1 23411: ldd 8(srcspc,src),t2 235 ldo 16(src),src 23612: std,ma t1,8(dstspc,dst) 23713: std,ma t2,8(dstspc,dst) 23814: ldd 0(srcspc,src),t1 23915: ldd 8(srcspc,src),t2 240 ldo 16(src),src 24116: std,ma t1,8(dstspc,dst) 24217: std,ma t2,8(dstspc,dst) 243 244 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 245 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault) 246 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) 247 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) 248 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) 249 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault) 250 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) 251 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) 252 253 b .Lcopy_loop_16 254 ldo -32(len),len 255 256.Lword_loop: 257 cmpib,COND(>>=),n 3,len,.Lbyte_loop 25820: ldw,ma 4(srcspc,src),t1 25921: stw,ma t1,4(dstspc,dst) 260 b .Lword_loop 261 ldo -4(len),len 262 263 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 264 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 265 266#endif /* CONFIG_64BIT */ 267 268 /* loop until we are 32-bit aligned */ 269.Lalign_loop32: 270 extru dst,31,2,t1 271 cmpib,=,n 0,t1,.Lcopy_loop_8 27220: ldb,ma 1(srcspc,src),t1 27321: stb,ma t1,1(dstspc,dst) 274 b .Lalign_loop32 275 ldo -1(len),len 276 277 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 278 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 279 280 281.Lcopy_loop_8: 282 cmpib,COND(>>=),n 15,len,.Lbyte_loop 283 28410: ldw 0(srcspc,src),t1 28511: ldw 4(srcspc,src),t2 28612: stw,ma t1,4(dstspc,dst) 28713: stw,ma t2,4(dstspc,dst) 28814: ldw 8(srcspc,src),t1 28915: ldw 12(srcspc,src),t2 290 ldo 16(src),src 29116: stw,ma t1,4(dstspc,dst) 29217: stw,ma t2,4(dstspc,dst) 293 294 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 295 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault) 296 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) 297 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) 298 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) 299 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault) 300 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) 301 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) 302 303 b .Lcopy_loop_8 304 ldo -16(len),len 305 306.Lbyte_loop: 307 cmpclr,COND(<>) len,%r0,%r0 308 b,n .Lcopy_done 30920: ldb 0(srcspc,src),t1 310 ldo 1(src),src 31121: stb,ma t1,1(dstspc,dst) 312 b .Lbyte_loop 313 ldo -1(len),len 314 315 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 316 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 317 318.Lcopy_done: 319 bv %r0(%r2) 320 sub end,dst,ret0 321 322 323 /* src and dst are not aligned the same way. */ 324 /* need to go the hard way */ 325.Lunaligned_copy: 326 /* align until dst is 32bit-word-aligned */ 327 extru dst,31,2,t1 328 cmpib,=,n 0,t1,.Lcopy_dstaligned 32920: ldb 0(srcspc,src),t1 330 ldo 1(src),src 33121: stb,ma t1,1(dstspc,dst) 332 b .Lunaligned_copy 333 ldo -1(len),len 334 335 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 336 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 337 338.Lcopy_dstaligned: 339 340 /* store src, dst and len in safe place */ 341 copy src,save_src 342 copy dst,save_dst 343 copy len,save_len 344 345 /* len now needs give number of words to copy */ 346 SHRREG len,2,len 347 348 /* 349 * Copy from a not-aligned src to an aligned dst using shifts. 350 * Handles 4 words per loop. 351 */ 352 353 depw,z src,28,2,t0 354 subi 32,t0,t0 355 mtsar t0 356 extru len,31,2,t0 357 cmpib,= 2,t0,.Lcase2 358 /* Make src aligned by rounding it down. */ 359 depi 0,31,2,src 360 361 cmpiclr,<> 3,t0,%r0 362 b,n .Lcase3 363 cmpiclr,<> 1,t0,%r0 364 b,n .Lcase1 365.Lcase0: 366 cmpb,COND(=) %r0,len,.Lcda_finish 367 nop 368 3691: ldw,ma 4(srcspc,src), a3 370 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 3711: ldw,ma 4(srcspc,src), a0 372 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 373 b,n .Ldo3 374.Lcase1: 3751: ldw,ma 4(srcspc,src), a2 376 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 3771: ldw,ma 4(srcspc,src), a3 378 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 379 ldo -1(len),len 380 cmpb,COND(=),n %r0,len,.Ldo0 381.Ldo4: 3821: ldw,ma 4(srcspc,src), a0 383 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 384 shrpw a2, a3, %sar, t0 3851: stw,ma t0, 4(dstspc,dst) 386 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 387.Ldo3: 3881: ldw,ma 4(srcspc,src), a1 389 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 390 shrpw a3, a0, %sar, t0 3911: stw,ma t0, 4(dstspc,dst) 392 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 393.Ldo2: 3941: ldw,ma 4(srcspc,src), a2 395 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 396 shrpw a0, a1, %sar, t0 3971: stw,ma t0, 4(dstspc,dst) 398 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 399.Ldo1: 4001: ldw,ma 4(srcspc,src), a3 401 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 402 shrpw a1, a2, %sar, t0 4031: stw,ma t0, 4(dstspc,dst) 404 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 405 ldo -4(len),len 406 cmpb,COND(<>) %r0,len,.Ldo4 407 nop 408.Ldo0: 409 shrpw a2, a3, %sar, t0 4101: stw,ma t0, 4(dstspc,dst) 411 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 412 413.Lcda_rdfault: 414.Lcda_finish: 415 /* calculate new src, dst and len and jump to byte-copy loop */ 416 sub dst,save_dst,t0 417 add save_src,t0,src 418 b .Lbyte_loop 419 sub save_len,t0,len 420 421.Lcase3: 4221: ldw,ma 4(srcspc,src), a0 423 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 4241: ldw,ma 4(srcspc,src), a1 425 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 426 b .Ldo2 427 ldo 1(len),len 428.Lcase2: 4291: ldw,ma 4(srcspc,src), a1 430 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 4311: ldw,ma 4(srcspc,src), a2 432 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 433 b .Ldo1 434 ldo 2(len),len 435 436 437 /* fault exception fixup handlers: */ 438#ifdef CONFIG_64BIT 439.Lcopy16_fault: 440 b .Lcopy_done 44110: std,ma t1,8(dstspc,dst) 442 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 443#endif 444 445.Lcopy8_fault: 446 b .Lcopy_done 44710: stw,ma t1,4(dstspc,dst) 448 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 449 450 .exit 451ENDPROC_CFI(pa_memcpy) 452 .procend 453 454 .end 455