1/* 2 * User Space Access Routines 3 * 4 * Copyright (C) 2000-2002 Hewlett-Packard (John Marvin) 5 * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org> 6 * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr> 7 * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org> 8 * Copyright (C) 2017 Helge Deller <deller@gmx.de> 9 * Copyright (C) 2017 John David Anglin <dave.anglin@bell.net> 10 * 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License as published by 14 * the Free Software Foundation; either version 2, or (at your option) 15 * any later version. 16 * 17 * This program is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 * GNU General Public License for more details. 21 * 22 * You should have received a copy of the GNU General Public License 23 * along with this program; if not, write to the Free Software 24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 */ 26 27/* 28 * These routines still have plenty of room for optimization 29 * (word & doubleword load/store, dual issue, store hints, etc.). 30 */ 31 32/* 33 * The following routines assume that space register 3 (sr3) contains 34 * the space id associated with the current users address space. 35 */ 36 37 38 .text 39 40#include <asm/assembly.h> 41#include <asm/errno.h> 42#include <linux/linkage.h> 43 44 /* 45 * get_sr gets the appropriate space value into 46 * sr1 for kernel/user space access, depending 47 * on the flag stored in the task structure. 48 */ 49 50 .macro get_sr 51 mfctl %cr30,%r1 52 ldw TI_SEGMENT(%r1),%r22 53 mfsp %sr3,%r1 54 or,<> %r22,%r0,%r0 55 copy %r0,%r1 56 mtsp %r1,%sr1 57 .endm 58 59 /* 60 * unsigned long lclear_user(void *to, unsigned long n) 61 * 62 * Returns 0 for success. 63 * otherwise, returns number of bytes not transferred. 64 */ 65 66ENTRY_CFI(lclear_user) 67 .proc 68 .callinfo NO_CALLS 69 .entry 70 comib,=,n 0,%r25,$lclu_done 71 get_sr 72$lclu_loop: 73 addib,<> -1,%r25,$lclu_loop 741: stbs,ma %r0,1(%sr1,%r26) 75 76$lclu_done: 77 bv %r0(%r2) 78 copy %r25,%r28 79 802: b $lclu_done 81 ldo 1(%r25),%r25 82 83 ASM_EXCEPTIONTABLE_ENTRY(1b,2b) 84 85 .exit 86ENDPROC_CFI(lclear_user) 87 88 89 .procend 90 91 /* 92 * long lstrnlen_user(char *s, long n) 93 * 94 * Returns 0 if exception before zero byte or reaching N, 95 * N+1 if N would be exceeded, 96 * else strlen + 1 (i.e. includes zero byte). 97 */ 98 99ENTRY_CFI(lstrnlen_user) 100 .proc 101 .callinfo NO_CALLS 102 .entry 103 comib,= 0,%r25,$lslen_nzero 104 copy %r26,%r24 105 get_sr 1061: ldbs,ma 1(%sr1,%r26),%r1 107$lslen_loop: 108 comib,=,n 0,%r1,$lslen_done 109 addib,<> -1,%r25,$lslen_loop 1102: ldbs,ma 1(%sr1,%r26),%r1 111$lslen_done: 112 bv %r0(%r2) 113 sub %r26,%r24,%r28 114 .exit 115 116$lslen_nzero: 117 b $lslen_done 118 ldo 1(%r26),%r26 /* special case for N == 0 */ 119 1203: b $lslen_done 121 copy %r24,%r26 /* reset r26 so 0 is returned on fault */ 122 123 ASM_EXCEPTIONTABLE_ENTRY(1b,3b) 124 ASM_EXCEPTIONTABLE_ENTRY(2b,3b) 125 126ENDPROC_CFI(lstrnlen_user) 127 128 .procend 129 130 131 132/* 133 * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) 134 * 135 * Inputs: 136 * - sr1 already contains space of source region 137 * - sr2 already contains space of destination region 138 * 139 * Returns: 140 * - number of bytes that could not be copied. 141 * On success, this will be zero. 142 * 143 * This code is based on a C-implementation of a copy routine written by 144 * Randolph Chung, which in turn was derived from the glibc. 145 * 146 * Several strategies are tried to try to get the best performance for various 147 * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes 148 * at a time using general registers. Unaligned copies are handled either by 149 * aligning the destination and then using shift-and-write method, or in a few 150 * cases by falling back to a byte-at-a-time copy. 151 * 152 * Testing with various alignments and buffer sizes shows that this code is 153 * often >10x faster than a simple byte-at-a-time copy, even for strangely 154 * aligned operands. It is interesting to note that the glibc version of memcpy 155 * (written in C) is actually quite fast already. This routine is able to beat 156 * it by 30-40% for aligned copies because of the loop unrolling, but in some 157 * cases the glibc version is still slightly faster. This lends more 158 * credibility that gcc can generate very good code as long as we are careful. 159 * 160 * Possible optimizations: 161 * - add cache prefetching 162 * - try not to use the post-increment address modifiers; they may create 163 * additional interlocks. Assumption is that those were only efficient on old 164 * machines (pre PA8000 processors) 165 */ 166 167 dst = arg0 168 src = arg1 169 len = arg2 170 end = arg3 171 t1 = r19 172 t2 = r20 173 t3 = r21 174 t4 = r22 175 srcspc = sr1 176 dstspc = sr2 177 178 t0 = r1 179 a1 = t1 180 a2 = t2 181 a3 = t3 182 a0 = t4 183 184 save_src = ret0 185 save_dst = ret1 186 save_len = r31 187 188ENTRY_CFI(pa_memcpy) 189 .proc 190 .callinfo NO_CALLS 191 .entry 192 193 /* Last destination address */ 194 add dst,len,end 195 196 /* short copy with less than 16 bytes? */ 197 cmpib,COND(>>=),n 15,len,.Lbyte_loop 198 199 /* same alignment? */ 200 xor src,dst,t0 201 extru t0,31,2,t1 202 cmpib,<>,n 0,t1,.Lunaligned_copy 203 204#ifdef CONFIG_64BIT 205 /* only do 64-bit copies if we can get aligned. */ 206 extru t0,31,3,t1 207 cmpib,<>,n 0,t1,.Lalign_loop32 208 209 /* loop until we are 64-bit aligned */ 210.Lalign_loop64: 211 extru dst,31,3,t1 212 cmpib,=,n 0,t1,.Lcopy_loop_16_start 21320: ldb,ma 1(srcspc,src),t1 21421: stb,ma t1,1(dstspc,dst) 215 b .Lalign_loop64 216 ldo -1(len),len 217 218 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 219 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 220 221.Lcopy_loop_16_start: 222 ldi 31,t0 223.Lcopy_loop_16: 224 cmpb,COND(>>=),n t0,len,.Lword_loop 225 22610: ldd 0(srcspc,src),t1 22711: ldd 8(srcspc,src),t2 228 ldo 16(src),src 22912: std,ma t1,8(dstspc,dst) 23013: std,ma t2,8(dstspc,dst) 23114: ldd 0(srcspc,src),t1 23215: ldd 8(srcspc,src),t2 233 ldo 16(src),src 23416: std,ma t1,8(dstspc,dst) 23517: std,ma t2,8(dstspc,dst) 236 237 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 238 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault) 239 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) 240 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) 241 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) 242 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault) 243 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) 244 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) 245 246 b .Lcopy_loop_16 247 ldo -32(len),len 248 249.Lword_loop: 250 cmpib,COND(>>=),n 3,len,.Lbyte_loop 25120: ldw,ma 4(srcspc,src),t1 25221: stw,ma t1,4(dstspc,dst) 253 b .Lword_loop 254 ldo -4(len),len 255 256 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 257 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 258 259#endif /* CONFIG_64BIT */ 260 261 /* loop until we are 32-bit aligned */ 262.Lalign_loop32: 263 extru dst,31,2,t1 264 cmpib,=,n 0,t1,.Lcopy_loop_8 26520: ldb,ma 1(srcspc,src),t1 26621: stb,ma t1,1(dstspc,dst) 267 b .Lalign_loop32 268 ldo -1(len),len 269 270 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 271 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 272 273 274.Lcopy_loop_8: 275 cmpib,COND(>>=),n 15,len,.Lbyte_loop 276 27710: ldw 0(srcspc,src),t1 27811: ldw 4(srcspc,src),t2 27912: stw,ma t1,4(dstspc,dst) 28013: stw,ma t2,4(dstspc,dst) 28114: ldw 8(srcspc,src),t1 28215: ldw 12(srcspc,src),t2 283 ldo 16(src),src 28416: stw,ma t1,4(dstspc,dst) 28517: stw,ma t2,4(dstspc,dst) 286 287 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 288 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault) 289 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) 290 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) 291 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) 292 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault) 293 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) 294 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) 295 296 b .Lcopy_loop_8 297 ldo -16(len),len 298 299.Lbyte_loop: 300 cmpclr,COND(<>) len,%r0,%r0 301 b,n .Lcopy_done 30220: ldb 0(srcspc,src),t1 303 ldo 1(src),src 30421: stb,ma t1,1(dstspc,dst) 305 b .Lbyte_loop 306 ldo -1(len),len 307 308 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 309 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 310 311.Lcopy_done: 312 bv %r0(%r2) 313 sub end,dst,ret0 314 315 316 /* src and dst are not aligned the same way. */ 317 /* need to go the hard way */ 318.Lunaligned_copy: 319 /* align until dst is 32bit-word-aligned */ 320 extru dst,31,2,t1 321 cmpib,=,n 0,t1,.Lcopy_dstaligned 32220: ldb 0(srcspc,src),t1 323 ldo 1(src),src 32421: stb,ma t1,1(dstspc,dst) 325 b .Lunaligned_copy 326 ldo -1(len),len 327 328 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 329 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 330 331.Lcopy_dstaligned: 332 333 /* store src, dst and len in safe place */ 334 copy src,save_src 335 copy dst,save_dst 336 copy len,save_len 337 338 /* len now needs give number of words to copy */ 339 SHRREG len,2,len 340 341 /* 342 * Copy from a not-aligned src to an aligned dst using shifts. 343 * Handles 4 words per loop. 344 */ 345 346 depw,z src,28,2,t0 347 subi 32,t0,t0 348 mtsar t0 349 extru len,31,2,t0 350 cmpib,= 2,t0,.Lcase2 351 /* Make src aligned by rounding it down. */ 352 depi 0,31,2,src 353 354 cmpiclr,<> 3,t0,%r0 355 b,n .Lcase3 356 cmpiclr,<> 1,t0,%r0 357 b,n .Lcase1 358.Lcase0: 359 cmpb,COND(=) %r0,len,.Lcda_finish 360 nop 361 3621: ldw,ma 4(srcspc,src), a3 363 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 3641: ldw,ma 4(srcspc,src), a0 365 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 366 b,n .Ldo3 367.Lcase1: 3681: ldw,ma 4(srcspc,src), a2 369 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 3701: ldw,ma 4(srcspc,src), a3 371 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 372 ldo -1(len),len 373 cmpb,COND(=),n %r0,len,.Ldo0 374.Ldo4: 3751: ldw,ma 4(srcspc,src), a0 376 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 377 shrpw a2, a3, %sar, t0 3781: stw,ma t0, 4(dstspc,dst) 379 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 380.Ldo3: 3811: ldw,ma 4(srcspc,src), a1 382 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 383 shrpw a3, a0, %sar, t0 3841: stw,ma t0, 4(dstspc,dst) 385 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 386.Ldo2: 3871: ldw,ma 4(srcspc,src), a2 388 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 389 shrpw a0, a1, %sar, t0 3901: stw,ma t0, 4(dstspc,dst) 391 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 392.Ldo1: 3931: ldw,ma 4(srcspc,src), a3 394 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 395 shrpw a1, a2, %sar, t0 3961: stw,ma t0, 4(dstspc,dst) 397 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 398 ldo -4(len),len 399 cmpb,COND(<>) %r0,len,.Ldo4 400 nop 401.Ldo0: 402 shrpw a2, a3, %sar, t0 4031: stw,ma t0, 4(dstspc,dst) 404 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 405 406.Lcda_rdfault: 407.Lcda_finish: 408 /* calculate new src, dst and len and jump to byte-copy loop */ 409 sub dst,save_dst,t0 410 add save_src,t0,src 411 b .Lbyte_loop 412 sub save_len,t0,len 413 414.Lcase3: 4151: ldw,ma 4(srcspc,src), a0 416 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 4171: ldw,ma 4(srcspc,src), a1 418 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 419 b .Ldo2 420 ldo 1(len),len 421.Lcase2: 4221: ldw,ma 4(srcspc,src), a1 423 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 4241: ldw,ma 4(srcspc,src), a2 425 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 426 b .Ldo1 427 ldo 2(len),len 428 429 430 /* fault exception fixup handlers: */ 431#ifdef CONFIG_64BIT 432.Lcopy16_fault: 433 b .Lcopy_done 43410: std,ma t1,8(dstspc,dst) 435 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 436#endif 437 438.Lcopy8_fault: 439 b .Lcopy_done 44010: stw,ma t1,4(dstspc,dst) 441 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 442 443 .exit 444ENDPROC_CFI(pa_memcpy) 445 .procend 446 447 .end 448