Lines Matching +full:- +full:- +full:disable +full:- +full:cloop
1 /* SPDX-License-Identifier: GPL-2.0 */
9 * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
10 * David Mosberger-Tang <davidm@hpl.hp.com>
18 * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2.
29 #include <asm/asm-offsets.h>
50 mov ar.lc=IA64_NUM_DBG_REGS-1;; \
55 br.cloop.sptk.many 1b
58 mov ar.lc=IA64_NUM_DBG_REGS-1;; \
62 br.cloop.sptk.many _lbl
89 mov ar.lc=0x08-1;; \
97 br.cloop.sptk.few RestRR
281 // set IVT entry point---can't access I/O ports without it
315 mov r16=-1
316 (isBP) br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it
326 dep r2=-1,r3,61,3 // IMVA of task
352 * still needs to be there because time-critical stuff such as the context
357 addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2
376 adds r18=-1,r18;;
386 (isBP) mov IA64_KR(PER_CPU_DATA)=r19 // per-CPU base for cpu0
387 (isAP) mov IA64_KR(PER_CPU_DATA)=r0 // clear physical per-CPU base
393 (isBP) dep r28=-1,r28,61,3 // make address virtual
423 mov ar.lc=IA64_NUM_DBG_REGS-1
437 br.cloop.sptk.many 1b
448 mov ar.lc=IA64_NUM_DBG_REGS-1
449 mov r18=-1
461 br.cloop.sptk.many 1b
469 adds loc0=96*16-16,in0
470 adds loc1=96*16-16-128,in0
472 stf.spill.nta [loc0]=f127,-256
473 stf.spill.nta [loc1]=f119,-256
475 stf.spill.nta [loc0]=f111,-256
476 stf.spill.nta [loc1]=f103,-256
478 stf.spill.nta [loc0]=f95,-256
479 stf.spill.nta [loc1]=f87,-256
481 stf.spill.nta [loc0]=f79,-256
482 stf.spill.nta [loc1]=f71,-256
484 stf.spill.nta [loc0]=f63,-256
485 stf.spill.nta [loc1]=f55,-256
486 adds loc2=96*16-32,in0
488 stf.spill.nta [loc0]=f47,-256
489 stf.spill.nta [loc1]=f39,-256
490 adds loc3=96*16-32-128,in0
492 stf.spill.nta [loc2]=f126,-256
493 stf.spill.nta [loc3]=f118,-256
495 stf.spill.nta [loc2]=f110,-256
496 stf.spill.nta [loc3]=f102,-256
498 stf.spill.nta [loc2]=f94,-256
499 stf.spill.nta [loc3]=f86,-256
501 stf.spill.nta [loc2]=f78,-256
502 stf.spill.nta [loc3]=f70,-256
504 stf.spill.nta [loc2]=f62,-256
505 stf.spill.nta [loc3]=f54,-256
506 adds loc0=96*16-48,in0
508 stf.spill.nta [loc2]=f46,-256
509 stf.spill.nta [loc3]=f38,-256
510 adds loc1=96*16-48-128,in0
512 stf.spill.nta [loc0]=f125,-256
513 stf.spill.nta [loc1]=f117,-256
515 stf.spill.nta [loc0]=f109,-256
516 stf.spill.nta [loc1]=f101,-256
518 stf.spill.nta [loc0]=f93,-256
519 stf.spill.nta [loc1]=f85,-256
521 stf.spill.nta [loc0]=f77,-256
522 stf.spill.nta [loc1]=f69,-256
524 stf.spill.nta [loc0]=f61,-256
525 stf.spill.nta [loc1]=f53,-256
526 adds loc2=96*16-64,in0
528 stf.spill.nta [loc0]=f45,-256
529 stf.spill.nta [loc1]=f37,-256
530 adds loc3=96*16-64-128,in0
532 stf.spill.nta [loc2]=f124,-256
533 stf.spill.nta [loc3]=f116,-256
535 stf.spill.nta [loc2]=f108,-256
536 stf.spill.nta [loc3]=f100,-256
538 stf.spill.nta [loc2]=f92,-256
539 stf.spill.nta [loc3]=f84,-256
541 stf.spill.nta [loc2]=f76,-256
542 stf.spill.nta [loc3]=f68,-256
544 stf.spill.nta [loc2]=f60,-256
545 stf.spill.nta [loc3]=f52,-256
546 adds loc0=96*16-80,in0
548 stf.spill.nta [loc2]=f44,-256
549 stf.spill.nta [loc3]=f36,-256
550 adds loc1=96*16-80-128,in0
552 stf.spill.nta [loc0]=f123,-256
553 stf.spill.nta [loc1]=f115,-256
555 stf.spill.nta [loc0]=f107,-256
556 stf.spill.nta [loc1]=f99,-256
558 stf.spill.nta [loc0]=f91,-256
559 stf.spill.nta [loc1]=f83,-256
561 stf.spill.nta [loc0]=f75,-256
562 stf.spill.nta [loc1]=f67,-256
564 stf.spill.nta [loc0]=f59,-256
565 stf.spill.nta [loc1]=f51,-256
566 adds loc2=96*16-96,in0
568 stf.spill.nta [loc0]=f43,-256
569 stf.spill.nta [loc1]=f35,-256
570 adds loc3=96*16-96-128,in0
572 stf.spill.nta [loc2]=f122,-256
573 stf.spill.nta [loc3]=f114,-256
575 stf.spill.nta [loc2]=f106,-256
576 stf.spill.nta [loc3]=f98,-256
578 stf.spill.nta [loc2]=f90,-256
579 stf.spill.nta [loc3]=f82,-256
581 stf.spill.nta [loc2]=f74,-256
582 stf.spill.nta [loc3]=f66,-256
584 stf.spill.nta [loc2]=f58,-256
585 stf.spill.nta [loc3]=f50,-256
586 adds loc0=96*16-112,in0
588 stf.spill.nta [loc2]=f42,-256
589 stf.spill.nta [loc3]=f34,-256
590 adds loc1=96*16-112-128,in0
592 stf.spill.nta [loc0]=f121,-256
593 stf.spill.nta [loc1]=f113,-256
595 stf.spill.nta [loc0]=f105,-256
596 stf.spill.nta [loc1]=f97,-256
598 stf.spill.nta [loc0]=f89,-256
599 stf.spill.nta [loc1]=f81,-256
601 stf.spill.nta [loc0]=f73,-256
602 stf.spill.nta [loc1]=f65,-256
604 stf.spill.nta [loc0]=f57,-256
605 stf.spill.nta [loc1]=f49,-256
606 adds loc2=96*16-128,in0
608 stf.spill.nta [loc0]=f41,-256
609 stf.spill.nta [loc1]=f33,-256
610 adds loc3=96*16-128-128,in0
612 stf.spill.nta [loc2]=f120,-256
613 stf.spill.nta [loc3]=f112,-256
615 stf.spill.nta [loc2]=f104,-256
616 stf.spill.nta [loc3]=f96,-256
618 stf.spill.nta [loc2]=f88,-256
619 stf.spill.nta [loc3]=f80,-256
621 stf.spill.nta [loc2]=f72,-256
622 stf.spill.nta [loc3]=f64,-256
624 stf.spill.nta [loc2]=f56,-256
625 stf.spill.nta [loc3]=f48,-256
638 mov loc1=-1024+16
826 * the remaining registers with simply mov instructions (F-unit).
886 rsm psr.i | psr.ic // disable interrupts and interrupt collection
896 add r3=1f-ia64_switch_mode_phys,r15
903 // going to physical mode, use tpa to translate virt->phys
934 rsm psr.i | psr.ic // disable interrupts and interrupt collection
944 add r3=1f-ia64_switch_mode_virt,r15
950 // - for code addresses, set upper bits of addr to KERNEL_START
951 // - for stack addresses, copy from input argument
953 dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
954 dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
983 // force loop to be 32-byte aligned (GAS bug means we cannot use .align
986 1: br.cloop.sptk.few 1b
993 * Return a CPU-local timestamp in nano-seconds. This timestamp is
998 * The return-value of sched_clock() is NOT supposed to wrap-around.
1000 * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even
1005 * (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT
1007 * except that the multiplication and the shift are done with 128-bit
1008 * intermediate precision so that we can produce a full 64-bit result.
1012 mov.m r9=ar.itc // fetch cycle-counter (35 cyc)
1018 xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc)
1019 xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product
1037 xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc)
1038 xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product
1131 mov pr=r17,-1;;