1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */ 21da177e4SLinus Torvalds/* 31da177e4SLinus Torvalds * Copyright (C) 1999-2002 Hewlett-Packard Co 41da177e4SLinus Torvalds * Stephane Eranian <eranian@hpl.hp.com> 51da177e4SLinus Torvalds * David Mosberger-Tang <davidm@hpl.hp.com> 61da177e4SLinus Torvalds * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com> 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * 1/06/01 davidm Tuned for Itanium. 91da177e4SLinus Torvalds * 2/12/02 kchen Tuned for both Itanium and McKinley 101da177e4SLinus Torvalds * 3/08/02 davidm Some more tweaking 111da177e4SLinus Torvalds */ 121da177e4SLinus Torvalds 13*ab03e604SMasahiro Yamada#include <linux/export.h> 141da177e4SLinus Torvalds#include <asm/asmmacro.h> 151da177e4SLinus Torvalds#include <asm/page.h> 161da177e4SLinus Torvalds 171da177e4SLinus Torvalds#ifdef CONFIG_ITANIUM 181da177e4SLinus Torvalds# define L3_LINE_SIZE 64 // Itanium L3 line size 191da177e4SLinus Torvalds# define PREFETCH_LINES 9 // magic number 201da177e4SLinus Torvalds#else 211da177e4SLinus Torvalds# define L3_LINE_SIZE 128 // McKinley L3 line size 221da177e4SLinus Torvalds# define PREFETCH_LINES 12 // magic number 231da177e4SLinus Torvalds#endif 241da177e4SLinus Torvalds 251da177e4SLinus Torvalds#define saved_lc r2 261da177e4SLinus Torvalds#define dst_fetch r3 271da177e4SLinus Torvalds#define dst1 r8 281da177e4SLinus Torvalds#define dst2 r9 291da177e4SLinus Torvalds#define dst3 r10 301da177e4SLinus Torvalds#define dst4 r11 311da177e4SLinus Torvalds 321da177e4SLinus Torvalds#define dst_last r31 331da177e4SLinus Torvalds 341da177e4SLinus TorvaldsGLOBAL_ENTRY(clear_page) 351da177e4SLinus Torvalds .prologue 361da177e4SLinus Torvalds .regstk 1,0,0,0 371da177e4SLinus Torvalds mov r16 = PAGE_SIZE/L3_LINE_SIZE-1 // main loop count, -1=repeat/until 381da177e4SLinus Torvalds .save ar.lc, saved_lc 391da177e4SLinus Torvalds mov saved_lc = ar.lc 401da177e4SLinus Torvalds 411da177e4SLinus Torvalds .body 421da177e4SLinus Torvalds mov ar.lc = (PREFETCH_LINES - 1) 431da177e4SLinus Torvalds mov dst_fetch = in0 441da177e4SLinus Torvalds adds dst1 = 16, in0 451da177e4SLinus Torvalds adds dst2 = 32, in0 461da177e4SLinus Torvalds ;; 471da177e4SLinus Torvalds.fetch: stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE 481da177e4SLinus Torvalds adds dst3 = 48, in0 // executing this multiple times is harmless 491da177e4SLinus Torvalds br.cloop.sptk.few .fetch 501da177e4SLinus Torvalds ;; 511da177e4SLinus Torvalds addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch 521da177e4SLinus Torvalds mov ar.lc = r16 // one L3 line per iteration 531da177e4SLinus Torvalds adds dst4 = 64, in0 541da177e4SLinus Torvalds ;; 551da177e4SLinus Torvalds#ifdef CONFIG_ITANIUM 561da177e4SLinus Torvalds // Optimized for Itanium 571da177e4SLinus Torvalds1: stf.spill.nta [dst1] = f0, 64 581da177e4SLinus Torvalds stf.spill.nta [dst2] = f0, 64 591da177e4SLinus Torvalds cmp.lt p8,p0=dst_fetch, dst_last 601da177e4SLinus Torvalds ;; 611da177e4SLinus Torvalds#else 621da177e4SLinus Torvalds // Optimized for McKinley 631da177e4SLinus Torvalds1: stf.spill.nta [dst1] = f0, 64 641da177e4SLinus Torvalds stf.spill.nta [dst2] = f0, 64 651da177e4SLinus Torvalds stf.spill.nta [dst3] = f0, 64 661da177e4SLinus Torvalds stf.spill.nta [dst4] = f0, 128 671da177e4SLinus Torvalds cmp.lt p8,p0=dst_fetch, dst_last 681da177e4SLinus Torvalds ;; 691da177e4SLinus Torvalds stf.spill.nta [dst1] = f0, 64 701da177e4SLinus Torvalds stf.spill.nta [dst2] = f0, 64 711da177e4SLinus Torvalds#endif 721da177e4SLinus Torvalds stf.spill.nta [dst3] = f0, 64 731da177e4SLinus Torvalds(p8) stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE 741da177e4SLinus Torvalds br.cloop.sptk.few 1b 751da177e4SLinus Torvalds ;; 761da177e4SLinus Torvalds mov ar.lc = saved_lc // restore lc 771da177e4SLinus Torvalds br.ret.sptk.many rp 781da177e4SLinus TorvaldsEND(clear_page) 79e007c533SAl ViroEXPORT_SYMBOL(clear_page) 80