xref: /openbmc/linux/arch/ia64/lib/copy_page.S (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */
21da177e4SLinus Torvalds/*
31da177e4SLinus Torvalds *
41da177e4SLinus Torvalds * Optimized version of the standard copy_page() function
51da177e4SLinus Torvalds *
61da177e4SLinus Torvalds * Inputs:
71da177e4SLinus Torvalds *	in0:	address of target page
81da177e4SLinus Torvalds *	in1:	address of source page
91da177e4SLinus Torvalds * Output:
101da177e4SLinus Torvalds *	no return value
111da177e4SLinus Torvalds *
121da177e4SLinus Torvalds * Copyright (C) 1999, 2001 Hewlett-Packard Co
131da177e4SLinus Torvalds *	Stephane Eranian <eranian@hpl.hp.com>
141da177e4SLinus Torvalds *	David Mosberger <davidm@hpl.hp.com>
151da177e4SLinus Torvalds *
161da177e4SLinus Torvalds * 4/06/01 davidm	Tuned to make it perform well both for cached and uncached copies.
171da177e4SLinus Torvalds */
18*ab03e604SMasahiro Yamada#include <linux/export.h>
191da177e4SLinus Torvalds#include <asm/asmmacro.h>
201da177e4SLinus Torvalds#include <asm/page.h>
211da177e4SLinus Torvalds
221da177e4SLinus Torvalds#define PIPE_DEPTH	3
231da177e4SLinus Torvalds#define EPI		p[PIPE_DEPTH-1]
241da177e4SLinus Torvalds
251da177e4SLinus Torvalds#define lcount		r16
261da177e4SLinus Torvalds#define saved_pr	r17
271da177e4SLinus Torvalds#define saved_lc	r18
281da177e4SLinus Torvalds#define saved_pfs	r19
291da177e4SLinus Torvalds#define src1		r20
301da177e4SLinus Torvalds#define src2		r21
311da177e4SLinus Torvalds#define tgt1		r22
321da177e4SLinus Torvalds#define tgt2		r23
331da177e4SLinus Torvalds#define srcf		r24
341da177e4SLinus Torvalds#define tgtf		r25
351da177e4SLinus Torvalds#define tgt_last	r26
361da177e4SLinus Torvalds
371da177e4SLinus Torvalds#define Nrot		((8*PIPE_DEPTH+7)&~7)
381da177e4SLinus Torvalds
391da177e4SLinus TorvaldsGLOBAL_ENTRY(copy_page)
401da177e4SLinus Torvalds	.prologue
411da177e4SLinus Torvalds	.save ar.pfs, saved_pfs
421da177e4SLinus Torvalds	alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
431da177e4SLinus Torvalds
441da177e4SLinus Torvalds	.rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \
451da177e4SLinus Torvalds	      t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH]
461da177e4SLinus Torvalds	.rotp p[PIPE_DEPTH]
471da177e4SLinus Torvalds
481da177e4SLinus Torvalds	.save ar.lc, saved_lc
491da177e4SLinus Torvalds	mov saved_lc=ar.lc
501da177e4SLinus Torvalds	mov ar.ec=PIPE_DEPTH
511da177e4SLinus Torvalds
521da177e4SLinus Torvalds	mov lcount=PAGE_SIZE/64-1
531da177e4SLinus Torvalds	.save pr, saved_pr
541da177e4SLinus Torvalds	mov saved_pr=pr
551da177e4SLinus Torvalds	mov pr.rot=1<<16
561da177e4SLinus Torvalds
571da177e4SLinus Torvalds	.body
581da177e4SLinus Torvalds
591da177e4SLinus Torvalds	mov src1=in1
601da177e4SLinus Torvalds	adds src2=8,in1
611da177e4SLinus Torvalds	mov tgt_last = PAGE_SIZE
621da177e4SLinus Torvalds	;;
631da177e4SLinus Torvalds	adds tgt2=8,in0
641da177e4SLinus Torvalds	add srcf=512,in1
651da177e4SLinus Torvalds	mov ar.lc=lcount
661da177e4SLinus Torvalds	mov tgt1=in0
671da177e4SLinus Torvalds	add tgtf=512,in0
681da177e4SLinus Torvalds	add tgt_last = tgt_last, in0
691da177e4SLinus Torvalds	;;
701da177e4SLinus Torvalds1:
711da177e4SLinus Torvalds(p[0])	ld8 t1[0]=[src1],16
721da177e4SLinus Torvalds(EPI)	st8 [tgt1]=t1[PIPE_DEPTH-1],16
731da177e4SLinus Torvalds(p[0])	ld8 t2[0]=[src2],16
741da177e4SLinus Torvalds(EPI)	st8 [tgt2]=t2[PIPE_DEPTH-1],16
751da177e4SLinus Torvalds	cmp.ltu p6,p0 = tgtf, tgt_last
761da177e4SLinus Torvalds	;;
771da177e4SLinus Torvalds(p[0])	ld8 t3[0]=[src1],16
781da177e4SLinus Torvalds(EPI)	st8 [tgt1]=t3[PIPE_DEPTH-1],16
791da177e4SLinus Torvalds(p[0])	ld8 t4[0]=[src2],16
801da177e4SLinus Torvalds(EPI)	st8 [tgt2]=t4[PIPE_DEPTH-1],16
811da177e4SLinus Torvalds	;;
821da177e4SLinus Torvalds(p[0])	ld8 t5[0]=[src1],16
831da177e4SLinus Torvalds(EPI)	st8 [tgt1]=t5[PIPE_DEPTH-1],16
841da177e4SLinus Torvalds(p[0])	ld8 t6[0]=[src2],16
851da177e4SLinus Torvalds(EPI)	st8 [tgt2]=t6[PIPE_DEPTH-1],16
861da177e4SLinus Torvalds	;;
871da177e4SLinus Torvalds(p[0])	ld8 t7[0]=[src1],16
881da177e4SLinus Torvalds(EPI)	st8 [tgt1]=t7[PIPE_DEPTH-1],16
891da177e4SLinus Torvalds(p[0])	ld8 t8[0]=[src2],16
901da177e4SLinus Torvalds(EPI)	st8 [tgt2]=t8[PIPE_DEPTH-1],16
911da177e4SLinus Torvalds
921da177e4SLinus Torvalds(p6)	lfetch [srcf], 64
931da177e4SLinus Torvalds(p6)	lfetch [tgtf], 64
941da177e4SLinus Torvalds	br.ctop.sptk.few 1b
951da177e4SLinus Torvalds	;;
961da177e4SLinus Torvalds	mov pr=saved_pr,0xffffffffffff0000	// restore predicates
971da177e4SLinus Torvalds	mov ar.pfs=saved_pfs
981da177e4SLinus Torvalds	mov ar.lc=saved_lc
991da177e4SLinus Torvalds	br.ret.sptk.many rp
1001da177e4SLinus TorvaldsEND(copy_page)
101e007c533SAl ViroEXPORT_SYMBOL(copy_page)
102