1/* 2 * This file contains the light-weight system call handlers (fsyscall-handlers). 3 * 4 * Copyright (C) 2003 Hewlett-Packard Co 5 * David Mosberger-Tang <davidm@hpl.hp.com> 6 * 7 * 25-Sep-03 davidm Implement fsys_rt_sigprocmask(). 8 * 18-Feb-03 louisk Implement fsys_gettimeofday(). 9 * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more, 10 * probably broke it along the way... ;-) 11 * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make 12 * it capable of using memory based clocks without falling back to C code. 13 * 08-Feb-07 Fenghua Yu Implement fsys_getcpu. 14 * 15 */ 16 17#include <asm/asmmacro.h> 18#include <asm/errno.h> 19#include <asm/asm-offsets.h> 20#include <asm/percpu.h> 21#include <asm/thread_info.h> 22#include <asm/sal.h> 23#include <asm/signal.h> 24#include <asm/system.h> 25#include <asm/unistd.h> 26 27#include "entry.h" 28#include "paravirt_inst.h" 29 30/* 31 * See Documentation/ia64/fsys.txt for details on fsyscalls. 32 * 33 * On entry to an fsyscall handler: 34 * r10 = 0 (i.e., defaults to "successful syscall return") 35 * r11 = saved ar.pfs (a user-level value) 36 * r15 = system call number 37 * r16 = "current" task pointer (in normal kernel-mode, this is in r13) 38 * r32-r39 = system call arguments 39 * b6 = return address (a user-level value) 40 * ar.pfs = previous frame-state (a user-level value) 41 * PSR.be = cleared to zero (i.e., little-endian byte order is in effect) 42 * all other registers may contain values passed in from user-mode 43 * 44 * On return from an fsyscall handler: 45 * r11 = saved ar.pfs (as passed into the fsyscall handler) 46 * r15 = system call number (as passed into the fsyscall handler) 47 * r32-r39 = system call arguments (as passed into the fsyscall handler) 48 * b6 = return address (as passed into the fsyscall handler) 49 * ar.pfs = previous frame-state (as passed into the fsyscall handler) 50 */ 51 52ENTRY(fsys_ni_syscall) 53 .prologue 54 .altrp b6 55 .body 56 mov r8=ENOSYS 57 mov r10=-1 58 FSYS_RETURN 59END(fsys_ni_syscall) 60 61ENTRY(fsys_getpid) 62 .prologue 63 .altrp b6 64 .body 65 add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 66 ;; 67 ld8 r17=[r17] // r17 = current->group_leader 68 add r9=TI_FLAGS+IA64_TASK_SIZE,r16 69 ;; 70 ld4 r9=[r9] 71 add r17=IA64_TASK_TGIDLINK_OFFSET,r17 72 ;; 73 and r9=TIF_ALLWORK_MASK,r9 74 ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid 75 ;; 76 add r8=IA64_PID_LEVEL_OFFSET,r17 77 ;; 78 ld4 r8=[r8] // r8 = pid->level 79 add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] 80 ;; 81 shl r8=r8,IA64_UPID_SHIFT 82 ;; 83 add r17=r17,r8 // r17 = &pid->numbers[pid->level] 84 ;; 85 ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr 86 ;; 87 mov r17=0 88 ;; 89 cmp.ne p8,p0=0,r9 90(p8) br.spnt.many fsys_fallback_syscall 91 FSYS_RETURN 92END(fsys_getpid) 93 94ENTRY(fsys_getppid) 95 .prologue 96 .altrp b6 97 .body 98 add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 99 ;; 100 ld8 r17=[r17] // r17 = current->group_leader 101 add r9=TI_FLAGS+IA64_TASK_SIZE,r16 102 ;; 103 104 ld4 r9=[r9] 105 add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = ¤t->group_leader->real_parent 106 ;; 107 and r9=TIF_ALLWORK_MASK,r9 108 1091: ld8 r18=[r17] // r18 = current->group_leader->real_parent 110 ;; 111 cmp.ne p8,p0=0,r9 112 add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = ¤t->group_leader->real_parent->tgid 113 ;; 114 115 /* 116 * The .acq is needed to ensure that the read of tgid has returned its data before 117 * we re-check "real_parent". 118 */ 119 ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid 120#ifdef CONFIG_SMP 121 /* 122 * Re-read current->group_leader->real_parent. 123 */ 124 ld8 r19=[r17] // r19 = current->group_leader->real_parent 125(p8) br.spnt.many fsys_fallback_syscall 126 ;; 127 cmp.ne p6,p0=r18,r19 // did real_parent change? 128 mov r19=0 // i must not leak kernel bits... 129(p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check 130 ;; 131 mov r17=0 // i must not leak kernel bits... 132 mov r18=0 // i must not leak kernel bits... 133#else 134 mov r17=0 // i must not leak kernel bits... 135 mov r18=0 // i must not leak kernel bits... 136 mov r19=0 // i must not leak kernel bits... 137#endif 138 FSYS_RETURN 139END(fsys_getppid) 140 141ENTRY(fsys_set_tid_address) 142 .prologue 143 .altrp b6 144 .body 145 add r9=TI_FLAGS+IA64_TASK_SIZE,r16 146 add r17=IA64_TASK_TGIDLINK_OFFSET,r16 147 ;; 148 ld4 r9=[r9] 149 tnat.z p6,p7=r32 // check argument register for being NaT 150 ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid 151 ;; 152 and r9=TIF_ALLWORK_MASK,r9 153 add r8=IA64_PID_LEVEL_OFFSET,r17 154 add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 155 ;; 156 ld4 r8=[r8] // r8 = pid->level 157 add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] 158 ;; 159 shl r8=r8,IA64_UPID_SHIFT 160 ;; 161 add r17=r17,r8 // r17 = &pid->numbers[pid->level] 162 ;; 163 ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr 164 ;; 165 cmp.ne p8,p0=0,r9 166 mov r17=-1 167 ;; 168(p6) st8 [r18]=r32 169(p7) st8 [r18]=r17 170(p8) br.spnt.many fsys_fallback_syscall 171 ;; 172 mov r17=0 // i must not leak kernel bits... 173 mov r18=0 // i must not leak kernel bits... 174 FSYS_RETURN 175END(fsys_set_tid_address) 176 177#if IA64_GTOD_LOCK_OFFSET !=0 178#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t 179#endif 180#if IA64_ITC_JITTER_OFFSET !=0 181#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t 182#endif 183#define CLOCK_REALTIME 0 184#define CLOCK_MONOTONIC 1 185#define CLOCK_DIVIDE_BY_1000 0x4000 186#define CLOCK_ADD_MONOTONIC 0x8000 187 188ENTRY(fsys_gettimeofday) 189 .prologue 190 .altrp b6 191 .body 192 mov r31 = r32 193 tnat.nz p6,p0 = r33 // guard against NaT argument 194(p6) br.cond.spnt.few .fail_einval 195 mov r30 = CLOCK_DIVIDE_BY_1000 196 ;; 197.gettime: 198 // Register map 199 // Incoming r31 = pointer to address where to place result 200 // r30 = flags determining how time is processed 201 // r2,r3 = temp r4-r7 preserved 202 // r8 = result nanoseconds 203 // r9 = result seconds 204 // r10 = temporary storage for clock difference 205 // r11 = preserved: saved ar.pfs 206 // r12 = preserved: memory stack 207 // r13 = preserved: thread pointer 208 // r14 = address of mask / mask value 209 // r15 = preserved: system call number 210 // r16 = preserved: current task pointer 211 // r17 = (not used) 212 // r18 = (not used) 213 // r19 = address of itc_lastcycle 214 // r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence) 215 // r21 = address of mmio_ptr 216 // r22 = address of wall_time or monotonic_time 217 // r23 = address of shift / value 218 // r24 = address mult factor / cycle_last value 219 // r25 = itc_lastcycle value 220 // r26 = address clocksource cycle_last 221 // r27 = (not used) 222 // r28 = sequence number at the beginning of critcal section 223 // r29 = address of itc_jitter 224 // r30 = time processing flags / memory address 225 // r31 = pointer to result 226 // Predicates 227 // p6,p7 short term use 228 // p8 = timesource ar.itc 229 // p9 = timesource mmio64 230 // p10 = timesource mmio32 - not used 231 // p11 = timesource not to be handled by asm code 232 // p12 = memory time source ( = p9 | p10) - not used 233 // p13 = do cmpxchg with itc_lastcycle 234 // p14 = Divide by 1000 235 // p15 = Add monotonic 236 // 237 // Note that instructions are optimized for McKinley. McKinley can 238 // process two bundles simultaneously and therefore we continuously 239 // try to feed the CPU two bundles and then a stop. 240 241 add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 242 tnat.nz p6,p0 = r31 // guard against Nat argument 243(p6) br.cond.spnt.few .fail_einval 244 movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address 245 ;; 246 ld4 r2 = [r2] // process work pending flags 247 movl r29 = itc_jitter_data // itc_jitter 248 add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time 249 add r21 = IA64_CLKSRC_MMIO_OFFSET,r20 250 mov pr = r30,0xc000 // Set predicates according to function 251 ;; 252 and r2 = TIF_ALLWORK_MASK,r2 253 add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 254(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time 255 ;; 256 add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last 257 cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled 258(p6) br.cond.spnt.many fsys_fallback_syscall 259 ;; 260 // Begin critical section 261.time_redo: 262 ld4.acq r28 = [r20] // gtod_lock.sequence, Must take first 263 ;; 264 and r28 = ~1,r28 // And make sequence even to force retry if odd 265 ;; 266 ld8 r30 = [r21] // clocksource->mmio_ptr 267 add r24 = IA64_CLKSRC_MULT_OFFSET,r20 268 ld4 r2 = [r29] // itc_jitter value 269 add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20 270 add r14 = IA64_CLKSRC_MASK_OFFSET,r20 271 ;; 272 ld4 r3 = [r24] // clocksource mult value 273 ld8 r14 = [r14] // clocksource mask value 274 cmp.eq p8,p9 = 0,r30 // use cpu timer if no mmio_ptr 275 ;; 276 setf.sig f7 = r3 // Setup for mult scaling of counter 277(p8) cmp.ne p13,p0 = r2,r0 // need itc_jitter compensation, set p13 278 ld4 r23 = [r23] // clocksource shift value 279 ld8 r24 = [r26] // get clksrc_cycle_last value 280(p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control 281 ;; 282 .pred.rel.mutex p8,p9 283 MOV_FROM_ITC(p8, p6, r2, r10) // CPU_TIMER. 36 clocks latency!!! 284(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. 285(p13) ld8 r25 = [r19] // get itc_lastcycle value 286 ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec 287 ;; 288 ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec 289(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm) 290 ;; 291(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared 292 sub r10 = r2,r24 // current_cycle - last_cycle 293 ;; 294(p6) sub r10 = r25,r24 // time we got was less than last_cycle 295(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg 296 ;; 297(p7) cmpxchg8.rel r3 = [r19],r2,ar.ccv 298 ;; 299(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful 300 ;; 301(p7) sub r10 = r3,r24 // then use new last_cycle instead 302 ;; 303 and r10 = r10,r14 // Apply mask 304 ;; 305 setf.sig f8 = r10 306 nop.i 123 307 ;; 308 // fault check takes 5 cycles and we have spare time 309EX(.fail_efault, probe.w.fault r31, 3) 310 xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) 311 ;; 312 getf.sig r2 = f8 313 mf 314 ;; 315 ld4 r10 = [r20] // gtod_lock.sequence 316 shr.u r2 = r2,r23 // shift by factor 317 ;; 318 add r8 = r8,r2 // Add xtime.nsecs 319 cmp4.ne p7,p0 = r28,r10 320(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo 321 // End critical section. 322 // Now r8=tv->tv_nsec and r9=tv->tv_sec 323 mov r10 = r0 324 movl r2 = 1000000000 325 add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31 326(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack 327 ;; 328.time_normalize: 329 mov r21 = r8 330 cmp.ge p6,p0 = r8,r2 331(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time 332 ;; 333(p14) setf.sig f8 = r20 334(p6) sub r8 = r8,r2 335(p6) add r9 = 1,r9 // two nops before the branch. 336(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod 337(p6) br.cond.dpnt.few .time_normalize 338 ;; 339 // Divided by 8 though shift. Now divide by 125 340 // The compiler was able to do that with a multiply 341 // and a shift and we do the same 342EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles 343(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it 344 ;; 345(p14) getf.sig r2 = f8 346 ;; 347 mov r8 = r0 348(p14) shr.u r21 = r2, 4 349 ;; 350EX(.fail_efault, st8 [r31] = r9) 351EX(.fail_efault, st8 [r23] = r21) 352 FSYS_RETURN 353.fail_einval: 354 mov r8 = EINVAL 355 mov r10 = -1 356 FSYS_RETURN 357.fail_efault: 358 mov r8 = EFAULT 359 mov r10 = -1 360 FSYS_RETURN 361END(fsys_gettimeofday) 362 363ENTRY(fsys_clock_gettime) 364 .prologue 365 .altrp b6 366 .body 367 cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32 368 // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC 369(p6) br.spnt.few fsys_fallback_syscall 370 mov r31 = r33 371 shl r30 = r32,15 372 br.many .gettime 373END(fsys_clock_gettime) 374 375/* 376 * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize). 377 */ 378#if _NSIG_WORDS != 1 379# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1. 380#endif 381ENTRY(fsys_rt_sigprocmask) 382 .prologue 383 .altrp b6 384 .body 385 386 add r2=IA64_TASK_BLOCKED_OFFSET,r16 387 add r9=TI_FLAGS+IA64_TASK_SIZE,r16 388 cmp4.ltu p6,p0=SIG_SETMASK,r32 389 390 cmp.ne p15,p0=r0,r34 // oset != NULL? 391 tnat.nz p8,p0=r34 392 add r31=IA64_TASK_SIGHAND_OFFSET,r16 393 ;; 394 ld8 r3=[r2] // read/prefetch current->blocked 395 ld4 r9=[r9] 396 tnat.nz.or p6,p0=r35 397 398 cmp.ne.or p6,p0=_NSIG_WORDS*8,r35 399 tnat.nz.or p6,p0=r32 400(p6) br.spnt.few .fail_einval // fail with EINVAL 401 ;; 402#ifdef CONFIG_SMP 403 ld8 r31=[r31] // r31 <- current->sighand 404#endif 405 and r9=TIF_ALLWORK_MASK,r9 406 tnat.nz.or p8,p0=r33 407 ;; 408 cmp.ne p7,p0=0,r9 409 cmp.eq p6,p0=r0,r33 // set == NULL? 410 add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock 411(p8) br.spnt.few .fail_efault // fail with EFAULT 412(p7) br.spnt.many fsys_fallback_syscall // got pending kernel work... 413(p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask 414 415 /* Argh, we actually have to do some work and _update_ the signal mask: */ 416 417EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set 418EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set 419 mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1)) 420 ;; 421 422 RSM_PSR_I(p0, r18, r19) // mask interrupt delivery 423 andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP 424 mov r8=EINVAL // default to EINVAL 425 426#ifdef CONFIG_SMP 427 // __ticket_spin_trylock(r31) 428 ld4 r17=[r31] 429 ;; 430 mov.m ar.ccv=r17 431 extr.u r9=r17,17,15 432 adds r19=1,r17 433 extr.u r18=r17,0,15 434 ;; 435 cmp.eq p6,p7=r9,r18 436 ;; 437(p6) cmpxchg4.acq r9=[r31],r19,ar.ccv 438(p6) dep.z r20=r19,1,15 // next serving ticket for unlock 439(p7) br.cond.spnt.many .lock_contention 440 ;; 441 cmp4.eq p0,p7=r9,r17 442 adds r31=2,r31 443(p7) br.cond.spnt.many .lock_contention 444 ld8 r3=[r2] // re-read current->blocked now that we hold the lock 445 ;; 446#else 447 ld8 r3=[r2] // re-read current->blocked now that we hold the lock 448#endif 449 add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16 450 add r19=IA64_TASK_SIGNAL_OFFSET,r16 451 cmp4.eq p6,p0=SIG_BLOCK,r32 452 ;; 453 ld8 r19=[r19] // r19 <- current->signal 454 cmp4.eq p7,p0=SIG_UNBLOCK,r32 455 cmp4.eq p8,p0=SIG_SETMASK,r32 456 ;; 457 ld8 r18=[r18] // r18 <- current->pending.signal 458 .pred.rel.mutex p6,p7,p8 459(p6) or r14=r3,r14 // SIG_BLOCK 460(p7) andcm r14=r3,r14 // SIG_UNBLOCK 461 462(p8) mov r14=r14 // SIG_SETMASK 463(p6) mov r8=0 // clear error code 464 // recalc_sigpending() 465 add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19 466 467 add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19 468 ;; 469 ld4 r17=[r17] // r17 <- current->signal->group_stop_count 470(p7) mov r8=0 // clear error code 471 472 ld8 r19=[r19] // r19 <- current->signal->shared_pending 473 ;; 474 cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)? 475(p8) mov r8=0 // clear error code 476 477 or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending 478 ;; 479 // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked: 480 andcm r18=r18,r14 481 add r9=TI_FLAGS+IA64_TASK_SIZE,r16 482 ;; 483 484(p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending 485 mov r19=0 // i must not leak kernel bits... 486(p6) br.cond.dpnt.many .sig_pending 487 ;; 488 4891: ld4 r17=[r9] // r17 <- current->thread_info->flags 490 ;; 491 mov ar.ccv=r17 492 and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING) 493 ;; 494 495 st8 [r2]=r14 // update current->blocked with new mask 496 cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18 497 ;; 498 cmp.ne p6,p0=r17,r8 // update failed? 499(p6) br.cond.spnt.few 1b // yes -> retry 500 501#ifdef CONFIG_SMP 502 // __ticket_spin_unlock(r31) 503 st2.rel [r31]=r20 504 mov r20=0 // i must not leak kernel bits... 505#endif 506 SSM_PSR_I(p0, p9, r31) 507 ;; 508 509 srlz.d // ensure psr.i is set again 510 mov r18=0 // i must not leak kernel bits... 511 512.store_mask: 513EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset 514EX(.fail_efault, (p15) st8 [r34]=r3) 515 mov r2=0 // i must not leak kernel bits... 516 mov r3=0 // i must not leak kernel bits... 517 mov r8=0 // return 0 518 mov r9=0 // i must not leak kernel bits... 519 mov r14=0 // i must not leak kernel bits... 520 mov r17=0 // i must not leak kernel bits... 521 mov r31=0 // i must not leak kernel bits... 522 FSYS_RETURN 523 524.sig_pending: 525#ifdef CONFIG_SMP 526 // __ticket_spin_unlock(r31) 527 st2.rel [r31]=r20 // release the lock 528#endif 529 SSM_PSR_I(p0, p9, r17) 530 ;; 531 srlz.d 532 br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall 533 534#ifdef CONFIG_SMP 535.lock_contention: 536 /* Rather than spinning here, fall back on doing a heavy-weight syscall. */ 537 SSM_PSR_I(p0, p9, r17) 538 ;; 539 srlz.d 540 br.sptk.many fsys_fallback_syscall 541#endif 542END(fsys_rt_sigprocmask) 543 544/* 545 * fsys_getcpu doesn't use the third parameter in this implementation. It reads 546 * current_thread_info()->cpu and corresponding node in cpu_to_node_map. 547 */ 548ENTRY(fsys_getcpu) 549 .prologue 550 .altrp b6 551 .body 552 ;; 553 add r2=TI_FLAGS+IA64_TASK_SIZE,r16 554 tnat.nz p6,p0 = r32 // guard against NaT argument 555 add r3=TI_CPU+IA64_TASK_SIZE,r16 556 ;; 557 ld4 r3=[r3] // M r3 = thread_info->cpu 558 ld4 r2=[r2] // M r2 = thread_info->flags 559(p6) br.cond.spnt.few .fail_einval // B 560 ;; 561 tnat.nz p7,p0 = r33 // I guard against NaT argument 562(p7) br.cond.spnt.few .fail_einval // B 563#ifdef CONFIG_NUMA 564 movl r17=cpu_to_node_map 565 ;; 566EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles 567EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles 568 shladd r18=r3,1,r17 569 ;; 570 ld2 r20=[r18] // r20 = cpu_to_node_map[cpu] 571 and r2 = TIF_ALLWORK_MASK,r2 572 ;; 573 cmp.ne p8,p0=0,r2 574(p8) br.spnt.many fsys_fallback_syscall 575 ;; 576 ;; 577EX(.fail_efault, st4 [r32] = r3) 578EX(.fail_efault, st2 [r33] = r20) 579 mov r8=0 580 ;; 581#else 582EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles 583EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles 584 and r2 = TIF_ALLWORK_MASK,r2 585 ;; 586 cmp.ne p8,p0=0,r2 587(p8) br.spnt.many fsys_fallback_syscall 588 ;; 589EX(.fail_efault, st4 [r32] = r3) 590EX(.fail_efault, st2 [r33] = r0) 591 mov r8=0 592 ;; 593#endif 594 FSYS_RETURN 595END(fsys_getcpu) 596 597ENTRY(fsys_fallback_syscall) 598 .prologue 599 .altrp b6 600 .body 601 /* 602 * We only get here from light-weight syscall handlers. Thus, we already 603 * know that r15 contains a valid syscall number. No need to re-check. 604 */ 605 adds r17=-1024,r15 606 movl r14=sys_call_table 607 ;; 608 RSM_PSR_I(p0, r26, r27) 609 shladd r18=r17,3,r14 610 ;; 611 ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point 612 MOV_FROM_PSR(p0, r29, r26) // read psr (12 cyc load latency) 613 mov r27=ar.rsc 614 mov r21=ar.fpsr 615 mov r26=ar.pfs 616END(fsys_fallback_syscall) 617 /* FALL THROUGH */ 618GLOBAL_ENTRY(paravirt_fsys_bubble_down) 619 .prologue 620 .altrp b6 621 .body 622 /* 623 * We get here for syscalls that don't have a lightweight 624 * handler. For those, we need to bubble down into the kernel 625 * and that requires setting up a minimal pt_regs structure, 626 * and initializing the CPU state more or less as if an 627 * interruption had occurred. To make syscall-restarts work, 628 * we setup pt_regs such that cr_iip points to the second 629 * instruction in syscall_via_break. Decrementing the IP 630 * hence will restart the syscall via break and not 631 * decrementing IP will return us to the caller, as usual. 632 * Note that we preserve the value of psr.pp rather than 633 * initializing it from dcr.pp. This makes it possible to 634 * distinguish fsyscall execution from other privileged 635 * execution. 636 * 637 * On entry: 638 * - normal fsyscall handler register usage, except 639 * that we also have: 640 * - r18: address of syscall entry point 641 * - r21: ar.fpsr 642 * - r26: ar.pfs 643 * - r27: ar.rsc 644 * - r29: psr 645 * 646 * We used to clear some PSR bits here but that requires slow 647 * serialization. Fortuntely, that isn't really necessary. 648 * The rationale is as follows: we used to clear bits 649 * ~PSR_PRESERVED_BITS in PSR.L. Since 650 * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we 651 * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}. 652 * However, 653 * 654 * PSR.BE : already is turned off in __kernel_syscall_via_epc() 655 * PSR.AC : don't care (kernel normally turns PSR.AC on) 656 * PSR.I : already turned off by the time paravirt_fsys_bubble_down gets 657 * invoked 658 * PSR.DFL: always 0 (kernel never turns it on) 659 * PSR.DFH: don't care --- kernel never touches f32-f127 on its own 660 * initiative 661 * PSR.DI : always 0 (kernel never turns it on) 662 * PSR.SI : always 0 (kernel never turns it on) 663 * PSR.DB : don't care --- kernel never enables kernel-level 664 * breakpoints 665 * PSR.TB : must be 0 already; if it wasn't zero on entry to 666 * __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down 667 * will trigger a taken branch; the taken-trap-handler then 668 * converts the syscall into a break-based system-call. 669 */ 670 /* 671 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. 672 * The rest we have to synthesize. 673 */ 674# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \ 675 | (0x1 << IA64_PSR_RI_BIT) \ 676 | IA64_PSR_BN | IA64_PSR_I) 677 678 invala // M0|1 679 movl r14=ia64_ret_from_syscall // X 680 681 nop.m 0 682 movl r28=__kernel_syscall_via_break // X create cr.iip 683 ;; 684 685 mov r2=r16 // A get task addr to addl-addressable register 686 adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A 687 mov r31=pr // I0 save pr (2 cyc) 688 ;; 689 st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag 690 addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS 691 add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A 692 ;; 693 ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags 694 lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store 695 nop.i 0 696 ;; 697 mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 698#ifdef CONFIG_VIRT_CPU_ACCOUNTING 699 MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting 700#else 701 nop.m 0 702#endif 703 nop.i 0 704 ;; 705 mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore 706 mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!) 707 nop.i 0 708 ;; 709 mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS 710 movl r8=PSR_ONE_BITS // X 711 ;; 712 mov r25=ar.unat // M2 (5 cyc) save ar.unat 713 mov r19=b6 // I0 save b6 (2 cyc) 714 mov r20=r1 // A save caller's gp in r20 715 ;; 716 or r29=r8,r29 // A construct cr.ipsr value to save 717 mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc) 718 addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack 719 720 mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc) 721 cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 722 br.call.sptk.many b7=ia64_syscall_setup // B 723 ;; 724#ifdef CONFIG_VIRT_CPU_ACCOUNTING 725 // mov.m r30=ar.itc is called in advance 726 add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 727 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2 728 ;; 729 ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel 730 ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel 731 ;; 732 ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime 733 ld8 r21=[r17] // cumulated utime 734 sub r22=r19,r18 // stime before leave kernel 735 ;; 736 st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp 737 sub r18=r30,r19 // elapsed time in user mode 738 ;; 739 add r20=r20,r22 // sum stime 740 add r21=r21,r18 // sum utime 741 ;; 742 st8 [r16]=r20 // update stime 743 st8 [r17]=r21 // update utime 744 ;; 745#endif 746 mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 747 mov rp=r14 // I0 set the real return addr 748 and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A 749 ;; 750 SSM_PSR_I(p0, p6, r22) // M2 we're on kernel stacks now, reenable irqs 751 cmp.eq p8,p0=r3,r0 // A 752(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT 753 754 nop.m 0 755(p8) br.call.sptk.many b6=b6 // B (ignore return address) 756 br.cond.spnt ia64_trace_syscall // B 757END(paravirt_fsys_bubble_down) 758 759 .rodata 760 .align 8 761 .globl paravirt_fsyscall_table 762 763 data8 paravirt_fsys_bubble_down 764paravirt_fsyscall_table: 765 data8 fsys_ni_syscall 766 data8 0 // exit // 1025 767 data8 0 // read 768 data8 0 // write 769 data8 0 // open 770 data8 0 // close 771 data8 0 // creat // 1030 772 data8 0 // link 773 data8 0 // unlink 774 data8 0 // execve 775 data8 0 // chdir 776 data8 0 // fchdir // 1035 777 data8 0 // utimes 778 data8 0 // mknod 779 data8 0 // chmod 780 data8 0 // chown 781 data8 0 // lseek // 1040 782 data8 fsys_getpid // getpid 783 data8 fsys_getppid // getppid 784 data8 0 // mount 785 data8 0 // umount 786 data8 0 // setuid // 1045 787 data8 0 // getuid 788 data8 0 // geteuid 789 data8 0 // ptrace 790 data8 0 // access 791 data8 0 // sync // 1050 792 data8 0 // fsync 793 data8 0 // fdatasync 794 data8 0 // kill 795 data8 0 // rename 796 data8 0 // mkdir // 1055 797 data8 0 // rmdir 798 data8 0 // dup 799 data8 0 // pipe 800 data8 0 // times 801 data8 0 // brk // 1060 802 data8 0 // setgid 803 data8 0 // getgid 804 data8 0 // getegid 805 data8 0 // acct 806 data8 0 // ioctl // 1065 807 data8 0 // fcntl 808 data8 0 // umask 809 data8 0 // chroot 810 data8 0 // ustat 811 data8 0 // dup2 // 1070 812 data8 0 // setreuid 813 data8 0 // setregid 814 data8 0 // getresuid 815 data8 0 // setresuid 816 data8 0 // getresgid // 1075 817 data8 0 // setresgid 818 data8 0 // getgroups 819 data8 0 // setgroups 820 data8 0 // getpgid 821 data8 0 // setpgid // 1080 822 data8 0 // setsid 823 data8 0 // getsid 824 data8 0 // sethostname 825 data8 0 // setrlimit 826 data8 0 // getrlimit // 1085 827 data8 0 // getrusage 828 data8 fsys_gettimeofday // gettimeofday 829 data8 0 // settimeofday 830 data8 0 // select 831 data8 0 // poll // 1090 832 data8 0 // symlink 833 data8 0 // readlink 834 data8 0 // uselib 835 data8 0 // swapon 836 data8 0 // swapoff // 1095 837 data8 0 // reboot 838 data8 0 // truncate 839 data8 0 // ftruncate 840 data8 0 // fchmod 841 data8 0 // fchown // 1100 842 data8 0 // getpriority 843 data8 0 // setpriority 844 data8 0 // statfs 845 data8 0 // fstatfs 846 data8 0 // gettid // 1105 847 data8 0 // semget 848 data8 0 // semop 849 data8 0 // semctl 850 data8 0 // msgget 851 data8 0 // msgsnd // 1110 852 data8 0 // msgrcv 853 data8 0 // msgctl 854 data8 0 // shmget 855 data8 0 // shmat 856 data8 0 // shmdt // 1115 857 data8 0 // shmctl 858 data8 0 // syslog 859 data8 0 // setitimer 860 data8 0 // getitimer 861 data8 0 // 1120 862 data8 0 863 data8 0 864 data8 0 // vhangup 865 data8 0 // lchown 866 data8 0 // remap_file_pages // 1125 867 data8 0 // wait4 868 data8 0 // sysinfo 869 data8 0 // clone 870 data8 0 // setdomainname 871 data8 0 // newuname // 1130 872 data8 0 // adjtimex 873 data8 0 874 data8 0 // init_module 875 data8 0 // delete_module 876 data8 0 // 1135 877 data8 0 878 data8 0 // quotactl 879 data8 0 // bdflush 880 data8 0 // sysfs 881 data8 0 // personality // 1140 882 data8 0 // afs_syscall 883 data8 0 // setfsuid 884 data8 0 // setfsgid 885 data8 0 // getdents 886 data8 0 // flock // 1145 887 data8 0 // readv 888 data8 0 // writev 889 data8 0 // pread64 890 data8 0 // pwrite64 891 data8 0 // sysctl // 1150 892 data8 0 // mmap 893 data8 0 // munmap 894 data8 0 // mlock 895 data8 0 // mlockall 896 data8 0 // mprotect // 1155 897 data8 0 // mremap 898 data8 0 // msync 899 data8 0 // munlock 900 data8 0 // munlockall 901 data8 0 // sched_getparam // 1160 902 data8 0 // sched_setparam 903 data8 0 // sched_getscheduler 904 data8 0 // sched_setscheduler 905 data8 0 // sched_yield 906 data8 0 // sched_get_priority_max // 1165 907 data8 0 // sched_get_priority_min 908 data8 0 // sched_rr_get_interval 909 data8 0 // nanosleep 910 data8 0 // nfsservctl 911 data8 0 // prctl // 1170 912 data8 0 // getpagesize 913 data8 0 // mmap2 914 data8 0 // pciconfig_read 915 data8 0 // pciconfig_write 916 data8 0 // perfmonctl // 1175 917 data8 0 // sigaltstack 918 data8 0 // rt_sigaction 919 data8 0 // rt_sigpending 920 data8 fsys_rt_sigprocmask // rt_sigprocmask 921 data8 0 // rt_sigqueueinfo // 1180 922 data8 0 // rt_sigreturn 923 data8 0 // rt_sigsuspend 924 data8 0 // rt_sigtimedwait 925 data8 0 // getcwd 926 data8 0 // capget // 1185 927 data8 0 // capset 928 data8 0 // sendfile 929 data8 0 930 data8 0 931 data8 0 // socket // 1190 932 data8 0 // bind 933 data8 0 // connect 934 data8 0 // listen 935 data8 0 // accept 936 data8 0 // getsockname // 1195 937 data8 0 // getpeername 938 data8 0 // socketpair 939 data8 0 // send 940 data8 0 // sendto 941 data8 0 // recv // 1200 942 data8 0 // recvfrom 943 data8 0 // shutdown 944 data8 0 // setsockopt 945 data8 0 // getsockopt 946 data8 0 // sendmsg // 1205 947 data8 0 // recvmsg 948 data8 0 // pivot_root 949 data8 0 // mincore 950 data8 0 // madvise 951 data8 0 // newstat // 1210 952 data8 0 // newlstat 953 data8 0 // newfstat 954 data8 0 // clone2 955 data8 0 // getdents64 956 data8 0 // getunwind // 1215 957 data8 0 // readahead 958 data8 0 // setxattr 959 data8 0 // lsetxattr 960 data8 0 // fsetxattr 961 data8 0 // getxattr // 1220 962 data8 0 // lgetxattr 963 data8 0 // fgetxattr 964 data8 0 // listxattr 965 data8 0 // llistxattr 966 data8 0 // flistxattr // 1225 967 data8 0 // removexattr 968 data8 0 // lremovexattr 969 data8 0 // fremovexattr 970 data8 0 // tkill 971 data8 0 // futex // 1230 972 data8 0 // sched_setaffinity 973 data8 0 // sched_getaffinity 974 data8 fsys_set_tid_address // set_tid_address 975 data8 0 // fadvise64_64 976 data8 0 // tgkill // 1235 977 data8 0 // exit_group 978 data8 0 // lookup_dcookie 979 data8 0 // io_setup 980 data8 0 // io_destroy 981 data8 0 // io_getevents // 1240 982 data8 0 // io_submit 983 data8 0 // io_cancel 984 data8 0 // epoll_create 985 data8 0 // epoll_ctl 986 data8 0 // epoll_wait // 1245 987 data8 0 // restart_syscall 988 data8 0 // semtimedop 989 data8 0 // timer_create 990 data8 0 // timer_settime 991 data8 0 // timer_gettime // 1250 992 data8 0 // timer_getoverrun 993 data8 0 // timer_delete 994 data8 0 // clock_settime 995 data8 fsys_clock_gettime // clock_gettime 996 data8 0 // clock_getres // 1255 997 data8 0 // clock_nanosleep 998 data8 0 // fstatfs64 999 data8 0 // statfs64 1000 data8 0 // mbind 1001 data8 0 // get_mempolicy // 1260 1002 data8 0 // set_mempolicy 1003 data8 0 // mq_open 1004 data8 0 // mq_unlink 1005 data8 0 // mq_timedsend 1006 data8 0 // mq_timedreceive // 1265 1007 data8 0 // mq_notify 1008 data8 0 // mq_getsetattr 1009 data8 0 // kexec_load 1010 data8 0 // vserver 1011 data8 0 // waitid // 1270 1012 data8 0 // add_key 1013 data8 0 // request_key 1014 data8 0 // keyctl 1015 data8 0 // ioprio_set 1016 data8 0 // ioprio_get // 1275 1017 data8 0 // move_pages 1018 data8 0 // inotify_init 1019 data8 0 // inotify_add_watch 1020 data8 0 // inotify_rm_watch 1021 data8 0 // migrate_pages // 1280 1022 data8 0 // openat 1023 data8 0 // mkdirat 1024 data8 0 // mknodat 1025 data8 0 // fchownat 1026 data8 0 // futimesat // 1285 1027 data8 0 // newfstatat 1028 data8 0 // unlinkat 1029 data8 0 // renameat 1030 data8 0 // linkat 1031 data8 0 // symlinkat // 1290 1032 data8 0 // readlinkat 1033 data8 0 // fchmodat 1034 data8 0 // faccessat 1035 data8 0 1036 data8 0 // 1295 1037 data8 0 // unshare 1038 data8 0 // splice 1039 data8 0 // set_robust_list 1040 data8 0 // get_robust_list 1041 data8 0 // sync_file_range // 1300 1042 data8 0 // tee 1043 data8 0 // vmsplice 1044 data8 0 1045 data8 fsys_getcpu // getcpu // 1304 1046 1047 // fill in zeros for the remaining entries 1048 .zero: 1049 .space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0 1050