1/* 2 * Here is where the ball gets rolling as far as the kernel is concerned. 3 * When control is transferred to _start, the bootload has already 4 * loaded us to the correct address. All that's left to do here is 5 * to set up the kernel's global pointer and jump to the kernel 6 * entry point. 7 * 8 * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co 9 * David Mosberger-Tang <davidm@hpl.hp.com> 10 * Stephane Eranian <eranian@hpl.hp.com> 11 * Copyright (C) 1999 VA Linux Systems 12 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> 13 * Copyright (C) 1999 Intel Corp. 14 * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com> 15 * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com> 16 * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com> 17 * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2. 18 */ 19 20#include <linux/config.h> 21 22#include <asm/asmmacro.h> 23#include <asm/fpu.h> 24#include <asm/kregs.h> 25#include <asm/mmu_context.h> 26#include <asm/offsets.h> 27#include <asm/pal.h> 28#include <asm/pgtable.h> 29#include <asm/processor.h> 30#include <asm/ptrace.h> 31#include <asm/system.h> 32 33 .section __special_page_section,"ax" 34 35 .global empty_zero_page 36empty_zero_page: 37 .skip PAGE_SIZE 38 39 .global swapper_pg_dir 40swapper_pg_dir: 41 .skip PAGE_SIZE 42 43 .rodata 44halt_msg: 45 stringz "Halting kernel\n" 46 47 .text 48 49 .global start_ap 50 51 /* 52 * Start the kernel. When the bootloader passes control to _start(), r28 53 * points to the address of the boot parameter area. Execution reaches 54 * here in physical mode. 55 */ 56GLOBAL_ENTRY(_start) 57start_ap: 58 .prologue 59 .save rp, r0 // terminate unwind chain with a NULL rp 60 .body 61 62 rsm psr.i | psr.ic 63 ;; 64 srlz.i 65 ;; 66 /* 67 * Initialize kernel region registers: 68 * rr[0]: VHPT enabled, page size = PAGE_SHIFT 69 * rr[1]: VHPT enabled, page size = PAGE_SHIFT 70 * rr[2]: VHPT enabled, page size = PAGE_SHIFT 71 * rr[3]: VHPT enabled, page size = PAGE_SHIFT 72 * rr[4]: VHPT enabled, page size = PAGE_SHIFT 73 * rr[5]: VHPT enabled, page size = PAGE_SHIFT 74 * rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT 75 * rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT 76 * We initialize all of them to prevent inadvertently assuming 77 * something about the state of address translation early in boot. 78 */ 79 mov r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT << 2) | 1) 80 movl r7=(0<<61) 81 mov r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT << 2) | 1) 82 movl r9=(1<<61) 83 mov r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT << 2) | 1) 84 movl r11=(2<<61) 85 mov r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT << 2) | 1) 86 movl r13=(3<<61) 87 mov r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT << 2) | 1) 88 movl r15=(4<<61) 89 mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1) 90 movl r17=(5<<61) 91 mov r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)) 92 movl r19=(6<<61) 93 mov r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)) 94 movl r21=(7<<61) 95 ;; 96 mov rr[r7]=r6 97 mov rr[r9]=r8 98 mov rr[r11]=r10 99 mov rr[r13]=r12 100 mov rr[r15]=r14 101 mov rr[r17]=r16 102 mov rr[r19]=r18 103 mov rr[r21]=r20 104 ;; 105 /* 106 * Now pin mappings into the TLB for kernel text and data 107 */ 108 mov r18=KERNEL_TR_PAGE_SHIFT<<2 109 movl r17=KERNEL_START 110 ;; 111 mov cr.itir=r18 112 mov cr.ifa=r17 113 mov r16=IA64_TR_KERNEL 114 mov r3=ip 115 movl r18=PAGE_KERNEL 116 ;; 117 dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT 118 ;; 119 or r18=r2,r18 120 ;; 121 srlz.i 122 ;; 123 itr.i itr[r16]=r18 124 ;; 125 itr.d dtr[r16]=r18 126 ;; 127 srlz.i 128 129 /* 130 * Switch into virtual mode: 131 */ 132 movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \ 133 |IA64_PSR_DI) 134 ;; 135 mov cr.ipsr=r16 136 movl r17=1f 137 ;; 138 mov cr.iip=r17 139 mov cr.ifs=r0 140 ;; 141 rfi 142 ;; 1431: // now we are in virtual mode 144 145 // set IVT entry point---can't access I/O ports without it 146 movl r3=ia64_ivt 147 ;; 148 mov cr.iva=r3 149 movl r2=FPSR_DEFAULT 150 ;; 151 srlz.i 152 movl gp=__gp 153 154 mov ar.fpsr=r2 155 ;; 156 157#define isAP p2 // are we an Application Processor? 158#define isBP p3 // are we the Bootstrap Processor? 159 160#ifdef CONFIG_SMP 161 /* 162 * Find the init_task for the currently booting CPU. At poweron, and in 163 * UP mode, task_for_booting_cpu is NULL. 164 */ 165 movl r3=task_for_booting_cpu 166 ;; 167 ld8 r3=[r3] 168 movl r2=init_task 169 ;; 170 cmp.eq isBP,isAP=r3,r0 171 ;; 172(isAP) mov r2=r3 173#else 174 movl r2=init_task 175 cmp.eq isBP,isAP=r0,r0 176#endif 177 ;; 178 tpa r3=r2 // r3 == phys addr of task struct 179 mov r16=-1 180(isBP) br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it 181 182 // load mapping for stack (virtaddr in r2, physaddr in r3) 183 rsm psr.ic 184 movl r17=PAGE_KERNEL 185 ;; 186 srlz.d 187 dep r18=0,r3,0,12 188 ;; 189 or r18=r17,r18 190 dep r2=-1,r3,61,3 // IMVA of task 191 ;; 192 mov r17=rr[r2] 193 shr.u r16=r3,IA64_GRANULE_SHIFT 194 ;; 195 dep r17=0,r17,8,24 196 ;; 197 mov cr.itir=r17 198 mov cr.ifa=r2 199 200 mov r19=IA64_TR_CURRENT_STACK 201 ;; 202 itr.d dtr[r19]=r18 203 ;; 204 ssm psr.ic 205 srlz.d 206 ;; 207 208.load_current: 209 // load the "current" pointer (r13) and ar.k6 with the current task 210 mov IA64_KR(CURRENT)=r2 // virtual address 211 mov IA64_KR(CURRENT_STACK)=r16 212 mov r13=r2 213 /* 214 * Reserve space at the top of the stack for "struct pt_regs". Kernel threads 215 * don't store interesting values in that structure, but the space still needs 216 * to be there because time-critical stuff such as the context switching can 217 * be implemented more efficiently (for example, __switch_to() 218 * always sets the psr.dfh bit of the task it is switching to). 219 */ 220 addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2 221 addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE 222 mov ar.rsc=0 // place RSE in enforced lazy mode 223 ;; 224 loadrs // clear the dirty partition 225 ;; 226 mov ar.bspstore=r2 // establish the new RSE stack 227 ;; 228 mov ar.rsc=0x3 // place RSE in eager mode 229 230(isBP) dep r28=-1,r28,61,3 // make address virtual 231(isBP) movl r2=ia64_boot_param 232 ;; 233(isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader 234 235#ifdef CONFIG_SMP 236(isAP) br.call.sptk.many rp=start_secondary 237.ret0: 238(isAP) br.cond.sptk self 239#endif 240 241 // This is executed by the bootstrap processor (bsp) only: 242 243#ifdef CONFIG_IA64_FW_EMU 244 // initialize PAL & SAL emulator: 245 br.call.sptk.many rp=sys_fw_init 246.ret1: 247#endif 248 br.call.sptk.many rp=start_kernel 249.ret2: addl r3=@ltoff(halt_msg),gp 250 ;; 251 alloc r2=ar.pfs,8,0,2,0 252 ;; 253 ld8 out0=[r3] 254 br.call.sptk.many b0=console_print 255 256self: hint @pause 257 br.sptk.many self // endless loop 258END(_start) 259 260GLOBAL_ENTRY(ia64_save_debug_regs) 261 alloc r16=ar.pfs,1,0,0,0 262 mov r20=ar.lc // preserve ar.lc 263 mov ar.lc=IA64_NUM_DBG_REGS-1 264 mov r18=0 265 add r19=IA64_NUM_DBG_REGS*8,in0 266 ;; 2671: mov r16=dbr[r18] 268#ifdef CONFIG_ITANIUM 269 ;; 270 srlz.d 271#endif 272 mov r17=ibr[r18] 273 add r18=1,r18 274 ;; 275 st8.nta [in0]=r16,8 276 st8.nta [r19]=r17,8 277 br.cloop.sptk.many 1b 278 ;; 279 mov ar.lc=r20 // restore ar.lc 280 br.ret.sptk.many rp 281END(ia64_save_debug_regs) 282 283GLOBAL_ENTRY(ia64_load_debug_regs) 284 alloc r16=ar.pfs,1,0,0,0 285 lfetch.nta [in0] 286 mov r20=ar.lc // preserve ar.lc 287 add r19=IA64_NUM_DBG_REGS*8,in0 288 mov ar.lc=IA64_NUM_DBG_REGS-1 289 mov r18=-1 290 ;; 2911: ld8.nta r16=[in0],8 292 ld8.nta r17=[r19],8 293 add r18=1,r18 294 ;; 295 mov dbr[r18]=r16 296#ifdef CONFIG_ITANIUM 297 ;; 298 srlz.d // Errata 132 (NoFix status) 299#endif 300 mov ibr[r18]=r17 301 br.cloop.sptk.many 1b 302 ;; 303 mov ar.lc=r20 // restore ar.lc 304 br.ret.sptk.many rp 305END(ia64_load_debug_regs) 306 307GLOBAL_ENTRY(__ia64_save_fpu) 308 alloc r2=ar.pfs,1,4,0,0 309 adds loc0=96*16-16,in0 310 adds loc1=96*16-16-128,in0 311 ;; 312 stf.spill.nta [loc0]=f127,-256 313 stf.spill.nta [loc1]=f119,-256 314 ;; 315 stf.spill.nta [loc0]=f111,-256 316 stf.spill.nta [loc1]=f103,-256 317 ;; 318 stf.spill.nta [loc0]=f95,-256 319 stf.spill.nta [loc1]=f87,-256 320 ;; 321 stf.spill.nta [loc0]=f79,-256 322 stf.spill.nta [loc1]=f71,-256 323 ;; 324 stf.spill.nta [loc0]=f63,-256 325 stf.spill.nta [loc1]=f55,-256 326 adds loc2=96*16-32,in0 327 ;; 328 stf.spill.nta [loc0]=f47,-256 329 stf.spill.nta [loc1]=f39,-256 330 adds loc3=96*16-32-128,in0 331 ;; 332 stf.spill.nta [loc2]=f126,-256 333 stf.spill.nta [loc3]=f118,-256 334 ;; 335 stf.spill.nta [loc2]=f110,-256 336 stf.spill.nta [loc3]=f102,-256 337 ;; 338 stf.spill.nta [loc2]=f94,-256 339 stf.spill.nta [loc3]=f86,-256 340 ;; 341 stf.spill.nta [loc2]=f78,-256 342 stf.spill.nta [loc3]=f70,-256 343 ;; 344 stf.spill.nta [loc2]=f62,-256 345 stf.spill.nta [loc3]=f54,-256 346 adds loc0=96*16-48,in0 347 ;; 348 stf.spill.nta [loc2]=f46,-256 349 stf.spill.nta [loc3]=f38,-256 350 adds loc1=96*16-48-128,in0 351 ;; 352 stf.spill.nta [loc0]=f125,-256 353 stf.spill.nta [loc1]=f117,-256 354 ;; 355 stf.spill.nta [loc0]=f109,-256 356 stf.spill.nta [loc1]=f101,-256 357 ;; 358 stf.spill.nta [loc0]=f93,-256 359 stf.spill.nta [loc1]=f85,-256 360 ;; 361 stf.spill.nta [loc0]=f77,-256 362 stf.spill.nta [loc1]=f69,-256 363 ;; 364 stf.spill.nta [loc0]=f61,-256 365 stf.spill.nta [loc1]=f53,-256 366 adds loc2=96*16-64,in0 367 ;; 368 stf.spill.nta [loc0]=f45,-256 369 stf.spill.nta [loc1]=f37,-256 370 adds loc3=96*16-64-128,in0 371 ;; 372 stf.spill.nta [loc2]=f124,-256 373 stf.spill.nta [loc3]=f116,-256 374 ;; 375 stf.spill.nta [loc2]=f108,-256 376 stf.spill.nta [loc3]=f100,-256 377 ;; 378 stf.spill.nta [loc2]=f92,-256 379 stf.spill.nta [loc3]=f84,-256 380 ;; 381 stf.spill.nta [loc2]=f76,-256 382 stf.spill.nta [loc3]=f68,-256 383 ;; 384 stf.spill.nta [loc2]=f60,-256 385 stf.spill.nta [loc3]=f52,-256 386 adds loc0=96*16-80,in0 387 ;; 388 stf.spill.nta [loc2]=f44,-256 389 stf.spill.nta [loc3]=f36,-256 390 adds loc1=96*16-80-128,in0 391 ;; 392 stf.spill.nta [loc0]=f123,-256 393 stf.spill.nta [loc1]=f115,-256 394 ;; 395 stf.spill.nta [loc0]=f107,-256 396 stf.spill.nta [loc1]=f99,-256 397 ;; 398 stf.spill.nta [loc0]=f91,-256 399 stf.spill.nta [loc1]=f83,-256 400 ;; 401 stf.spill.nta [loc0]=f75,-256 402 stf.spill.nta [loc1]=f67,-256 403 ;; 404 stf.spill.nta [loc0]=f59,-256 405 stf.spill.nta [loc1]=f51,-256 406 adds loc2=96*16-96,in0 407 ;; 408 stf.spill.nta [loc0]=f43,-256 409 stf.spill.nta [loc1]=f35,-256 410 adds loc3=96*16-96-128,in0 411 ;; 412 stf.spill.nta [loc2]=f122,-256 413 stf.spill.nta [loc3]=f114,-256 414 ;; 415 stf.spill.nta [loc2]=f106,-256 416 stf.spill.nta [loc3]=f98,-256 417 ;; 418 stf.spill.nta [loc2]=f90,-256 419 stf.spill.nta [loc3]=f82,-256 420 ;; 421 stf.spill.nta [loc2]=f74,-256 422 stf.spill.nta [loc3]=f66,-256 423 ;; 424 stf.spill.nta [loc2]=f58,-256 425 stf.spill.nta [loc3]=f50,-256 426 adds loc0=96*16-112,in0 427 ;; 428 stf.spill.nta [loc2]=f42,-256 429 stf.spill.nta [loc3]=f34,-256 430 adds loc1=96*16-112-128,in0 431 ;; 432 stf.spill.nta [loc0]=f121,-256 433 stf.spill.nta [loc1]=f113,-256 434 ;; 435 stf.spill.nta [loc0]=f105,-256 436 stf.spill.nta [loc1]=f97,-256 437 ;; 438 stf.spill.nta [loc0]=f89,-256 439 stf.spill.nta [loc1]=f81,-256 440 ;; 441 stf.spill.nta [loc0]=f73,-256 442 stf.spill.nta [loc1]=f65,-256 443 ;; 444 stf.spill.nta [loc0]=f57,-256 445 stf.spill.nta [loc1]=f49,-256 446 adds loc2=96*16-128,in0 447 ;; 448 stf.spill.nta [loc0]=f41,-256 449 stf.spill.nta [loc1]=f33,-256 450 adds loc3=96*16-128-128,in0 451 ;; 452 stf.spill.nta [loc2]=f120,-256 453 stf.spill.nta [loc3]=f112,-256 454 ;; 455 stf.spill.nta [loc2]=f104,-256 456 stf.spill.nta [loc3]=f96,-256 457 ;; 458 stf.spill.nta [loc2]=f88,-256 459 stf.spill.nta [loc3]=f80,-256 460 ;; 461 stf.spill.nta [loc2]=f72,-256 462 stf.spill.nta [loc3]=f64,-256 463 ;; 464 stf.spill.nta [loc2]=f56,-256 465 stf.spill.nta [loc3]=f48,-256 466 ;; 467 stf.spill.nta [loc2]=f40 468 stf.spill.nta [loc3]=f32 469 br.ret.sptk.many rp 470END(__ia64_save_fpu) 471 472GLOBAL_ENTRY(__ia64_load_fpu) 473 alloc r2=ar.pfs,1,2,0,0 474 adds r3=128,in0 475 adds r14=256,in0 476 adds r15=384,in0 477 mov loc0=512 478 mov loc1=-1024+16 479 ;; 480 ldf.fill.nta f32=[in0],loc0 481 ldf.fill.nta f40=[ r3],loc0 482 ldf.fill.nta f48=[r14],loc0 483 ldf.fill.nta f56=[r15],loc0 484 ;; 485 ldf.fill.nta f64=[in0],loc0 486 ldf.fill.nta f72=[ r3],loc0 487 ldf.fill.nta f80=[r14],loc0 488 ldf.fill.nta f88=[r15],loc0 489 ;; 490 ldf.fill.nta f96=[in0],loc1 491 ldf.fill.nta f104=[ r3],loc1 492 ldf.fill.nta f112=[r14],loc1 493 ldf.fill.nta f120=[r15],loc1 494 ;; 495 ldf.fill.nta f33=[in0],loc0 496 ldf.fill.nta f41=[ r3],loc0 497 ldf.fill.nta f49=[r14],loc0 498 ldf.fill.nta f57=[r15],loc0 499 ;; 500 ldf.fill.nta f65=[in0],loc0 501 ldf.fill.nta f73=[ r3],loc0 502 ldf.fill.nta f81=[r14],loc0 503 ldf.fill.nta f89=[r15],loc0 504 ;; 505 ldf.fill.nta f97=[in0],loc1 506 ldf.fill.nta f105=[ r3],loc1 507 ldf.fill.nta f113=[r14],loc1 508 ldf.fill.nta f121=[r15],loc1 509 ;; 510 ldf.fill.nta f34=[in0],loc0 511 ldf.fill.nta f42=[ r3],loc0 512 ldf.fill.nta f50=[r14],loc0 513 ldf.fill.nta f58=[r15],loc0 514 ;; 515 ldf.fill.nta f66=[in0],loc0 516 ldf.fill.nta f74=[ r3],loc0 517 ldf.fill.nta f82=[r14],loc0 518 ldf.fill.nta f90=[r15],loc0 519 ;; 520 ldf.fill.nta f98=[in0],loc1 521 ldf.fill.nta f106=[ r3],loc1 522 ldf.fill.nta f114=[r14],loc1 523 ldf.fill.nta f122=[r15],loc1 524 ;; 525 ldf.fill.nta f35=[in0],loc0 526 ldf.fill.nta f43=[ r3],loc0 527 ldf.fill.nta f51=[r14],loc0 528 ldf.fill.nta f59=[r15],loc0 529 ;; 530 ldf.fill.nta f67=[in0],loc0 531 ldf.fill.nta f75=[ r3],loc0 532 ldf.fill.nta f83=[r14],loc0 533 ldf.fill.nta f91=[r15],loc0 534 ;; 535 ldf.fill.nta f99=[in0],loc1 536 ldf.fill.nta f107=[ r3],loc1 537 ldf.fill.nta f115=[r14],loc1 538 ldf.fill.nta f123=[r15],loc1 539 ;; 540 ldf.fill.nta f36=[in0],loc0 541 ldf.fill.nta f44=[ r3],loc0 542 ldf.fill.nta f52=[r14],loc0 543 ldf.fill.nta f60=[r15],loc0 544 ;; 545 ldf.fill.nta f68=[in0],loc0 546 ldf.fill.nta f76=[ r3],loc0 547 ldf.fill.nta f84=[r14],loc0 548 ldf.fill.nta f92=[r15],loc0 549 ;; 550 ldf.fill.nta f100=[in0],loc1 551 ldf.fill.nta f108=[ r3],loc1 552 ldf.fill.nta f116=[r14],loc1 553 ldf.fill.nta f124=[r15],loc1 554 ;; 555 ldf.fill.nta f37=[in0],loc0 556 ldf.fill.nta f45=[ r3],loc0 557 ldf.fill.nta f53=[r14],loc0 558 ldf.fill.nta f61=[r15],loc0 559 ;; 560 ldf.fill.nta f69=[in0],loc0 561 ldf.fill.nta f77=[ r3],loc0 562 ldf.fill.nta f85=[r14],loc0 563 ldf.fill.nta f93=[r15],loc0 564 ;; 565 ldf.fill.nta f101=[in0],loc1 566 ldf.fill.nta f109=[ r3],loc1 567 ldf.fill.nta f117=[r14],loc1 568 ldf.fill.nta f125=[r15],loc1 569 ;; 570 ldf.fill.nta f38 =[in0],loc0 571 ldf.fill.nta f46 =[ r3],loc0 572 ldf.fill.nta f54 =[r14],loc0 573 ldf.fill.nta f62 =[r15],loc0 574 ;; 575 ldf.fill.nta f70 =[in0],loc0 576 ldf.fill.nta f78 =[ r3],loc0 577 ldf.fill.nta f86 =[r14],loc0 578 ldf.fill.nta f94 =[r15],loc0 579 ;; 580 ldf.fill.nta f102=[in0],loc1 581 ldf.fill.nta f110=[ r3],loc1 582 ldf.fill.nta f118=[r14],loc1 583 ldf.fill.nta f126=[r15],loc1 584 ;; 585 ldf.fill.nta f39 =[in0],loc0 586 ldf.fill.nta f47 =[ r3],loc0 587 ldf.fill.nta f55 =[r14],loc0 588 ldf.fill.nta f63 =[r15],loc0 589 ;; 590 ldf.fill.nta f71 =[in0],loc0 591 ldf.fill.nta f79 =[ r3],loc0 592 ldf.fill.nta f87 =[r14],loc0 593 ldf.fill.nta f95 =[r15],loc0 594 ;; 595 ldf.fill.nta f103=[in0] 596 ldf.fill.nta f111=[ r3] 597 ldf.fill.nta f119=[r14] 598 ldf.fill.nta f127=[r15] 599 br.ret.sptk.many rp 600END(__ia64_load_fpu) 601 602GLOBAL_ENTRY(__ia64_init_fpu) 603 stf.spill [sp]=f0 // M3 604 mov f32=f0 // F 605 nop.b 0 606 607 ldfps f33,f34=[sp] // M0 608 ldfps f35,f36=[sp] // M1 609 mov f37=f0 // F 610 ;; 611 612 setf.s f38=r0 // M2 613 setf.s f39=r0 // M3 614 mov f40=f0 // F 615 616 ldfps f41,f42=[sp] // M0 617 ldfps f43,f44=[sp] // M1 618 mov f45=f0 // F 619 620 setf.s f46=r0 // M2 621 setf.s f47=r0 // M3 622 mov f48=f0 // F 623 624 ldfps f49,f50=[sp] // M0 625 ldfps f51,f52=[sp] // M1 626 mov f53=f0 // F 627 628 setf.s f54=r0 // M2 629 setf.s f55=r0 // M3 630 mov f56=f0 // F 631 632 ldfps f57,f58=[sp] // M0 633 ldfps f59,f60=[sp] // M1 634 mov f61=f0 // F 635 636 setf.s f62=r0 // M2 637 setf.s f63=r0 // M3 638 mov f64=f0 // F 639 640 ldfps f65,f66=[sp] // M0 641 ldfps f67,f68=[sp] // M1 642 mov f69=f0 // F 643 644 setf.s f70=r0 // M2 645 setf.s f71=r0 // M3 646 mov f72=f0 // F 647 648 ldfps f73,f74=[sp] // M0 649 ldfps f75,f76=[sp] // M1 650 mov f77=f0 // F 651 652 setf.s f78=r0 // M2 653 setf.s f79=r0 // M3 654 mov f80=f0 // F 655 656 ldfps f81,f82=[sp] // M0 657 ldfps f83,f84=[sp] // M1 658 mov f85=f0 // F 659 660 setf.s f86=r0 // M2 661 setf.s f87=r0 // M3 662 mov f88=f0 // F 663 664 /* 665 * When the instructions are cached, it would be faster to initialize 666 * the remaining registers with simply mov instructions (F-unit). 667 * This gets the time down to ~29 cycles. However, this would use up 668 * 33 bundles, whereas continuing with the above pattern yields 669 * 10 bundles and ~30 cycles. 670 */ 671 672 ldfps f89,f90=[sp] // M0 673 ldfps f91,f92=[sp] // M1 674 mov f93=f0 // F 675 676 setf.s f94=r0 // M2 677 setf.s f95=r0 // M3 678 mov f96=f0 // F 679 680 ldfps f97,f98=[sp] // M0 681 ldfps f99,f100=[sp] // M1 682 mov f101=f0 // F 683 684 setf.s f102=r0 // M2 685 setf.s f103=r0 // M3 686 mov f104=f0 // F 687 688 ldfps f105,f106=[sp] // M0 689 ldfps f107,f108=[sp] // M1 690 mov f109=f0 // F 691 692 setf.s f110=r0 // M2 693 setf.s f111=r0 // M3 694 mov f112=f0 // F 695 696 ldfps f113,f114=[sp] // M0 697 ldfps f115,f116=[sp] // M1 698 mov f117=f0 // F 699 700 setf.s f118=r0 // M2 701 setf.s f119=r0 // M3 702 mov f120=f0 // F 703 704 ldfps f121,f122=[sp] // M0 705 ldfps f123,f124=[sp] // M1 706 mov f125=f0 // F 707 708 setf.s f126=r0 // M2 709 setf.s f127=r0 // M3 710 br.ret.sptk.many rp // F 711END(__ia64_init_fpu) 712 713/* 714 * Switch execution mode from virtual to physical 715 * 716 * Inputs: 717 * r16 = new psr to establish 718 * Output: 719 * r19 = old virtual address of ar.bsp 720 * r20 = old virtual address of sp 721 * 722 * Note: RSE must already be in enforced lazy mode 723 */ 724GLOBAL_ENTRY(ia64_switch_mode_phys) 725 { 726 alloc r2=ar.pfs,0,0,0,0 727 rsm psr.i | psr.ic // disable interrupts and interrupt collection 728 mov r15=ip 729 } 730 ;; 731 { 732 flushrs // must be first insn in group 733 srlz.i 734 } 735 ;; 736 mov cr.ipsr=r16 // set new PSR 737 add r3=1f-ia64_switch_mode_phys,r15 738 739 mov r19=ar.bsp 740 mov r20=sp 741 mov r14=rp // get return address into a general register 742 ;; 743 744 // going to physical mode, use tpa to translate virt->phys 745 tpa r17=r19 746 tpa r3=r3 747 tpa sp=sp 748 tpa r14=r14 749 ;; 750 751 mov r18=ar.rnat // save ar.rnat 752 mov ar.bspstore=r17 // this steps on ar.rnat 753 mov cr.iip=r3 754 mov cr.ifs=r0 755 ;; 756 mov ar.rnat=r18 // restore ar.rnat 757 rfi // must be last insn in group 758 ;; 7591: mov rp=r14 760 br.ret.sptk.many rp 761END(ia64_switch_mode_phys) 762 763/* 764 * Switch execution mode from physical to virtual 765 * 766 * Inputs: 767 * r16 = new psr to establish 768 * r19 = new bspstore to establish 769 * r20 = new sp to establish 770 * 771 * Note: RSE must already be in enforced lazy mode 772 */ 773GLOBAL_ENTRY(ia64_switch_mode_virt) 774 { 775 alloc r2=ar.pfs,0,0,0,0 776 rsm psr.i | psr.ic // disable interrupts and interrupt collection 777 mov r15=ip 778 } 779 ;; 780 { 781 flushrs // must be first insn in group 782 srlz.i 783 } 784 ;; 785 mov cr.ipsr=r16 // set new PSR 786 add r3=1f-ia64_switch_mode_virt,r15 787 788 mov r14=rp // get return address into a general register 789 ;; 790 791 // going to virtual 792 // - for code addresses, set upper bits of addr to KERNEL_START 793 // - for stack addresses, copy from input argument 794 movl r18=KERNEL_START 795 dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT 796 dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT 797 mov sp=r20 798 ;; 799 or r3=r3,r18 800 or r14=r14,r18 801 ;; 802 803 mov r18=ar.rnat // save ar.rnat 804 mov ar.bspstore=r19 // this steps on ar.rnat 805 mov cr.iip=r3 806 mov cr.ifs=r0 807 ;; 808 mov ar.rnat=r18 // restore ar.rnat 809 rfi // must be last insn in group 810 ;; 8111: mov rp=r14 812 br.ret.sptk.many rp 813END(ia64_switch_mode_virt) 814 815GLOBAL_ENTRY(ia64_delay_loop) 816 .prologue 817{ nop 0 // work around GAS unwind info generation bug... 818 .save ar.lc,r2 819 mov r2=ar.lc 820 .body 821 ;; 822 mov ar.lc=r32 823} 824 ;; 825 // force loop to be 32-byte aligned (GAS bug means we cannot use .align 826 // inside function body without corrupting unwind info). 827{ nop 0 } 8281: br.cloop.sptk.few 1b 829 ;; 830 mov ar.lc=r2 831 br.ret.sptk.many rp 832END(ia64_delay_loop) 833 834/* 835 * Return a CPU-local timestamp in nano-seconds. This timestamp is 836 * NOT synchronized across CPUs its return value must never be 837 * compared against the values returned on another CPU. The usage in 838 * kernel/sched.c ensures that. 839 * 840 * The return-value of sched_clock() is NOT supposed to wrap-around. 841 * If it did, it would cause some scheduling hiccups (at the worst). 842 * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even 843 * that would happen only once every 5+ years. 844 * 845 * The code below basically calculates: 846 * 847 * (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT 848 * 849 * except that the multiplication and the shift are done with 128-bit 850 * intermediate precision so that we can produce a full 64-bit result. 851 */ 852GLOBAL_ENTRY(sched_clock) 853 addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 854 mov.m r9=ar.itc // fetch cycle-counter (35 cyc) 855 ;; 856 ldf8 f8=[r8] 857 ;; 858 setf.sig f9=r9 // certain to stall, so issue it _after_ ldf8... 859 ;; 860 xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc) 861 xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product 862 ;; 863 getf.sig r8=f10 // (5 cyc) 864 getf.sig r9=f11 865 ;; 866 shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT 867 br.ret.sptk.many rp 868END(sched_clock) 869 870GLOBAL_ENTRY(start_kernel_thread) 871 .prologue 872 .save rp, r0 // this is the end of the call-chain 873 .body 874 alloc r2 = ar.pfs, 0, 0, 2, 0 875 mov out0 = r9 876 mov out1 = r11;; 877 br.call.sptk.many rp = kernel_thread_helper;; 878 mov out0 = r8 879 br.call.sptk.many rp = sys_exit;; 8801: br.sptk.few 1b // not reached 881END(start_kernel_thread) 882 883#ifdef CONFIG_IA64_BRL_EMU 884 885/* 886 * Assembly routines used by brl_emu.c to set preserved register state. 887 */ 888 889#define SET_REG(reg) \ 890 GLOBAL_ENTRY(ia64_set_##reg); \ 891 alloc r16=ar.pfs,1,0,0,0; \ 892 mov reg=r32; \ 893 ;; \ 894 br.ret.sptk.many rp; \ 895 END(ia64_set_##reg) 896 897SET_REG(b1); 898SET_REG(b2); 899SET_REG(b3); 900SET_REG(b4); 901SET_REG(b5); 902 903#endif /* CONFIG_IA64_BRL_EMU */ 904 905#ifdef CONFIG_SMP 906 /* 907 * This routine handles spinlock contention. It uses a non-standard calling 908 * convention to avoid converting leaf routines into interior routines. Because 909 * of this special convention, there are several restrictions: 910 * 911 * - do not use gp relative variables, this code is called from the kernel 912 * and from modules, r1 is undefined. 913 * - do not use stacked registers, the caller owns them. 914 * - do not use the scratch stack space, the caller owns it. 915 * - do not use any registers other than the ones listed below 916 * 917 * Inputs: 918 * ar.pfs - saved CFM of caller 919 * ar.ccv - 0 (and available for use) 920 * r27 - flags from spin_lock_irqsave or 0. Must be preserved. 921 * r28 - available for use. 922 * r29 - available for use. 923 * r30 - available for use. 924 * r31 - address of lock, available for use. 925 * b6 - return address 926 * p14 - available for use. 927 * p15 - used to track flag status. 928 * 929 * If you patch this code to use more registers, do not forget to update 930 * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h. 931 */ 932 933#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) 934 935GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4) 936 .prologue 937 .save ar.pfs, r0 // this code effectively has a zero frame size 938 .save rp, r28 939 .body 940 nop 0 941 tbit.nz p15,p0=r27,IA64_PSR_I_BIT 942 .restore sp // pop existing prologue after next insn 943 mov b6 = r28 944 .prologue 945 .save ar.pfs, r0 946 .altrp b6 947 .body 948 ;; 949(p15) ssm psr.i // reenable interrupts if they were on 950 // DavidM says that srlz.d is slow and is not required in this case 951.wait: 952 // exponential backoff, kdb, lockmeter etc. go in here 953 hint @pause 954 ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word 955 nop 0 956 ;; 957 cmp4.ne p14,p0=r30,r0 958(p14) br.cond.sptk.few .wait 959(p15) rsm psr.i // disable interrupts if we reenabled them 960 br.cond.sptk.few b6 // lock is now free, try to acquire 961 .global ia64_spinlock_contention_pre3_4_end // for kernprof 962ia64_spinlock_contention_pre3_4_end: 963END(ia64_spinlock_contention_pre3_4) 964 965#else 966 967GLOBAL_ENTRY(ia64_spinlock_contention) 968 .prologue 969 .altrp b6 970 .body 971 tbit.nz p15,p0=r27,IA64_PSR_I_BIT 972 ;; 973.wait: 974(p15) ssm psr.i // reenable interrupts if they were on 975 // DavidM says that srlz.d is slow and is not required in this case 976.wait2: 977 // exponential backoff, kdb, lockmeter etc. go in here 978 hint @pause 979 ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word 980 ;; 981 cmp4.ne p14,p0=r30,r0 982 mov r30 = 1 983(p14) br.cond.sptk.few .wait2 984(p15) rsm psr.i // disable interrupts if we reenabled them 985 ;; 986 cmpxchg4.acq r30=[r31], r30, ar.ccv 987 ;; 988 cmp4.ne p14,p0=r0,r30 989(p14) br.cond.sptk.few .wait 990 991 br.ret.sptk.many b6 // lock is now taken 992END(ia64_spinlock_contention) 993 994#endif 995 996#endif /* CONFIG_SMP */ 997