xref: /openbmc/linux/arch/ia64/lib/clear_page.S (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */
21da177e4SLinus Torvalds/*
31da177e4SLinus Torvalds * Copyright (C) 1999-2002 Hewlett-Packard Co
41da177e4SLinus Torvalds *	Stephane Eranian <eranian@hpl.hp.com>
51da177e4SLinus Torvalds *	David Mosberger-Tang <davidm@hpl.hp.com>
61da177e4SLinus Torvalds * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
71da177e4SLinus Torvalds *
81da177e4SLinus Torvalds * 1/06/01 davidm	Tuned for Itanium.
91da177e4SLinus Torvalds * 2/12/02 kchen	Tuned for both Itanium and McKinley
101da177e4SLinus Torvalds * 3/08/02 davidm	Some more tweaking
111da177e4SLinus Torvalds */
121da177e4SLinus Torvalds
13*ab03e604SMasahiro Yamada#include <linux/export.h>
141da177e4SLinus Torvalds#include <asm/asmmacro.h>
151da177e4SLinus Torvalds#include <asm/page.h>
161da177e4SLinus Torvalds
171da177e4SLinus Torvalds#ifdef CONFIG_ITANIUM
181da177e4SLinus Torvalds# define L3_LINE_SIZE	64	// Itanium L3 line size
191da177e4SLinus Torvalds# define PREFETCH_LINES	9	// magic number
201da177e4SLinus Torvalds#else
211da177e4SLinus Torvalds# define L3_LINE_SIZE	128	// McKinley L3 line size
221da177e4SLinus Torvalds# define PREFETCH_LINES	12	// magic number
231da177e4SLinus Torvalds#endif
241da177e4SLinus Torvalds
251da177e4SLinus Torvalds#define saved_lc	r2
261da177e4SLinus Torvalds#define dst_fetch	r3
271da177e4SLinus Torvalds#define dst1		r8
281da177e4SLinus Torvalds#define dst2		r9
291da177e4SLinus Torvalds#define dst3		r10
301da177e4SLinus Torvalds#define dst4		r11
311da177e4SLinus Torvalds
321da177e4SLinus Torvalds#define dst_last	r31
331da177e4SLinus Torvalds
341da177e4SLinus TorvaldsGLOBAL_ENTRY(clear_page)
351da177e4SLinus Torvalds	.prologue
361da177e4SLinus Torvalds	.regstk 1,0,0,0
371da177e4SLinus Torvalds	mov r16 = PAGE_SIZE/L3_LINE_SIZE-1	// main loop count, -1=repeat/until
381da177e4SLinus Torvalds	.save ar.lc, saved_lc
391da177e4SLinus Torvalds	mov saved_lc = ar.lc
401da177e4SLinus Torvalds
411da177e4SLinus Torvalds	.body
421da177e4SLinus Torvalds	mov ar.lc = (PREFETCH_LINES - 1)
431da177e4SLinus Torvalds	mov dst_fetch = in0
441da177e4SLinus Torvalds	adds dst1 = 16, in0
451da177e4SLinus Torvalds	adds dst2 = 32, in0
461da177e4SLinus Torvalds	;;
471da177e4SLinus Torvalds.fetch:	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
481da177e4SLinus Torvalds	adds dst3 = 48, in0		// executing this multiple times is harmless
491da177e4SLinus Torvalds	br.cloop.sptk.few .fetch
501da177e4SLinus Torvalds	;;
511da177e4SLinus Torvalds	addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch
521da177e4SLinus Torvalds	mov ar.lc = r16			// one L3 line per iteration
531da177e4SLinus Torvalds	adds dst4 = 64, in0
541da177e4SLinus Torvalds	;;
551da177e4SLinus Torvalds#ifdef CONFIG_ITANIUM
561da177e4SLinus Torvalds	// Optimized for Itanium
571da177e4SLinus Torvalds1:	stf.spill.nta [dst1] = f0, 64
581da177e4SLinus Torvalds	stf.spill.nta [dst2] = f0, 64
591da177e4SLinus Torvalds	cmp.lt p8,p0=dst_fetch, dst_last
601da177e4SLinus Torvalds	;;
611da177e4SLinus Torvalds#else
621da177e4SLinus Torvalds	// Optimized for McKinley
631da177e4SLinus Torvalds1:	stf.spill.nta [dst1] = f0, 64
641da177e4SLinus Torvalds	stf.spill.nta [dst2] = f0, 64
651da177e4SLinus Torvalds	stf.spill.nta [dst3] = f0, 64
661da177e4SLinus Torvalds	stf.spill.nta [dst4] = f0, 128
671da177e4SLinus Torvalds	cmp.lt p8,p0=dst_fetch, dst_last
681da177e4SLinus Torvalds	;;
691da177e4SLinus Torvalds	stf.spill.nta [dst1] = f0, 64
701da177e4SLinus Torvalds	stf.spill.nta [dst2] = f0, 64
711da177e4SLinus Torvalds#endif
721da177e4SLinus Torvalds	stf.spill.nta [dst3] = f0, 64
731da177e4SLinus Torvalds(p8)	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
741da177e4SLinus Torvalds	br.cloop.sptk.few 1b
751da177e4SLinus Torvalds	;;
761da177e4SLinus Torvalds	mov ar.lc = saved_lc		// restore lc
771da177e4SLinus Torvalds	br.ret.sptk.many rp
781da177e4SLinus TorvaldsEND(clear_page)
79e007c533SAl ViroEXPORT_SYMBOL(clear_page)
80