1/* 2 * This file contains the light-weight system call handlers (fsyscall-handlers). 3 * 4 * Copyright (C) 2003 Hewlett-Packard Co 5 * David Mosberger-Tang <davidm@hpl.hp.com> 6 * 7 * 25-Sep-03 davidm Implement fsys_rt_sigprocmask(). 8 * 18-Feb-03 louisk Implement fsys_gettimeofday(). 9 * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more, 10 * probably broke it along the way... ;-) 11 * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make 12 * it capable of using memory based clocks without falling back to C code. 13 * 08-Feb-07 Fenghua Yu Implement fsys_getcpu. 14 * 15 */ 16 17#include <asm/asmmacro.h> 18#include <asm/errno.h> 19#include <asm/asm-offsets.h> 20#include <asm/percpu.h> 21#include <asm/thread_info.h> 22#include <asm/sal.h> 23#include <asm/signal.h> 24#include <asm/system.h> 25#include <asm/unistd.h> 26 27#include "entry.h" 28 29/* 30 * See Documentation/ia64/fsys.txt for details on fsyscalls. 31 * 32 * On entry to an fsyscall handler: 33 * r10 = 0 (i.e., defaults to "successful syscall return") 34 * r11 = saved ar.pfs (a user-level value) 35 * r15 = system call number 36 * r16 = "current" task pointer (in normal kernel-mode, this is in r13) 37 * r32-r39 = system call arguments 38 * b6 = return address (a user-level value) 39 * ar.pfs = previous frame-state (a user-level value) 40 * PSR.be = cleared to zero (i.e., little-endian byte order is in effect) 41 * all other registers may contain values passed in from user-mode 42 * 43 * On return from an fsyscall handler: 44 * r11 = saved ar.pfs (as passed into the fsyscall handler) 45 * r15 = system call number (as passed into the fsyscall handler) 46 * r32-r39 = system call arguments (as passed into the fsyscall handler) 47 * b6 = return address (as passed into the fsyscall handler) 48 * ar.pfs = previous frame-state (as passed into the fsyscall handler) 49 */ 50 51ENTRY(fsys_ni_syscall) 52 .prologue 53 .altrp b6 54 .body 55 mov r8=ENOSYS 56 mov r10=-1 57 FSYS_RETURN 58END(fsys_ni_syscall) 59 60ENTRY(fsys_getpid) 61 .prologue 62 .altrp b6 63 .body 64 add r9=TI_FLAGS+IA64_TASK_SIZE,r16 65 ;; 66 ld4 r9=[r9] 67 add r8=IA64_TASK_TGID_OFFSET,r16 68 ;; 69 and r9=TIF_ALLWORK_MASK,r9 70 ld4 r8=[r8] // r8 = current->tgid 71 ;; 72 cmp.ne p8,p0=0,r9 73(p8) br.spnt.many fsys_fallback_syscall 74 FSYS_RETURN 75END(fsys_getpid) 76 77ENTRY(fsys_getppid) 78 .prologue 79 .altrp b6 80 .body 81 add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 82 ;; 83 ld8 r17=[r17] // r17 = current->group_leader 84 add r9=TI_FLAGS+IA64_TASK_SIZE,r16 85 ;; 86 87 ld4 r9=[r9] 88 add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = ¤t->group_leader->real_parent 89 ;; 90 and r9=TIF_ALLWORK_MASK,r9 91 921: ld8 r18=[r17] // r18 = current->group_leader->real_parent 93 ;; 94 cmp.ne p8,p0=0,r9 95 add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = ¤t->group_leader->real_parent->tgid 96 ;; 97 98 /* 99 * The .acq is needed to ensure that the read of tgid has returned its data before 100 * we re-check "real_parent". 101 */ 102 ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid 103#ifdef CONFIG_SMP 104 /* 105 * Re-read current->group_leader->real_parent. 106 */ 107 ld8 r19=[r17] // r19 = current->group_leader->real_parent 108(p8) br.spnt.many fsys_fallback_syscall 109 ;; 110 cmp.ne p6,p0=r18,r19 // did real_parent change? 111 mov r19=0 // i must not leak kernel bits... 112(p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check 113 ;; 114 mov r17=0 // i must not leak kernel bits... 115 mov r18=0 // i must not leak kernel bits... 116#else 117 mov r17=0 // i must not leak kernel bits... 118 mov r18=0 // i must not leak kernel bits... 119 mov r19=0 // i must not leak kernel bits... 120#endif 121 FSYS_RETURN 122END(fsys_getppid) 123 124ENTRY(fsys_set_tid_address) 125 .prologue 126 .altrp b6 127 .body 128 add r9=TI_FLAGS+IA64_TASK_SIZE,r16 129 ;; 130 ld4 r9=[r9] 131 tnat.z p6,p7=r32 // check argument register for being NaT 132 ;; 133 and r9=TIF_ALLWORK_MASK,r9 134 add r8=IA64_TASK_PID_OFFSET,r16 135 add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 136 ;; 137 ld4 r8=[r8] 138 cmp.ne p8,p0=0,r9 139 mov r17=-1 140 ;; 141(p6) st8 [r18]=r32 142(p7) st8 [r18]=r17 143(p8) br.spnt.many fsys_fallback_syscall 144 ;; 145 mov r17=0 // i must not leak kernel bits... 146 mov r18=0 // i must not leak kernel bits... 147 FSYS_RETURN 148END(fsys_set_tid_address) 149 150#if IA64_GTOD_LOCK_OFFSET !=0 151#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t 152#endif 153#if IA64_ITC_JITTER_OFFSET !=0 154#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t 155#endif 156#define CLOCK_REALTIME 0 157#define CLOCK_MONOTONIC 1 158#define CLOCK_DIVIDE_BY_1000 0x4000 159#define CLOCK_ADD_MONOTONIC 0x8000 160 161ENTRY(fsys_gettimeofday) 162 .prologue 163 .altrp b6 164 .body 165 mov r31 = r32 166 tnat.nz p6,p0 = r33 // guard against NaT argument 167(p6) br.cond.spnt.few .fail_einval 168 mov r30 = CLOCK_DIVIDE_BY_1000 169 ;; 170.gettime: 171 // Register map 172 // Incoming r31 = pointer to address where to place result 173 // r30 = flags determining how time is processed 174 // r2,r3 = temp r4-r7 preserved 175 // r8 = result nanoseconds 176 // r9 = result seconds 177 // r10 = temporary storage for clock difference 178 // r11 = preserved: saved ar.pfs 179 // r12 = preserved: memory stack 180 // r13 = preserved: thread pointer 181 // r14 = address of mask / mask value 182 // r15 = preserved: system call number 183 // r16 = preserved: current task pointer 184 // r17 = (not used) 185 // r18 = (not used) 186 // r19 = address of itc_lastcycle 187 // r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence) 188 // r21 = address of mmio_ptr 189 // r22 = address of wall_time or monotonic_time 190 // r23 = address of shift / value 191 // r24 = address mult factor / cycle_last value 192 // r25 = itc_lastcycle value 193 // r26 = address clocksource cycle_last 194 // r27 = (not used) 195 // r28 = sequence number at the beginning of critcal section 196 // r29 = address of itc_jitter 197 // r30 = time processing flags / memory address 198 // r31 = pointer to result 199 // Predicates 200 // p6,p7 short term use 201 // p8 = timesource ar.itc 202 // p9 = timesource mmio64 203 // p10 = timesource mmio32 - not used 204 // p11 = timesource not to be handled by asm code 205 // p12 = memory time source ( = p9 | p10) - not used 206 // p13 = do cmpxchg with itc_lastcycle 207 // p14 = Divide by 1000 208 // p15 = Add monotonic 209 // 210 // Note that instructions are optimized for McKinley. McKinley can 211 // process two bundles simultaneously and therefore we continuously 212 // try to feed the CPU two bundles and then a stop. 213 // 214 // Additional note that code has changed a lot. Optimization is TBD. 215 // Comments begin with "?" are maybe outdated. 216 tnat.nz p6,p0 = r31 // ? branch deferred to fit later bundle 217 mov pr = r30,0xc000 // Set predicates according to function 218 add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 219 movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address 220 ;; 221 movl r29 = itc_jitter_data // itc_jitter 222 add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time 223 ld4 r2 = [r2] // process work pending flags 224 ;; 225(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time 226 add r21 = IA64_CLKSRC_MMIO_OFFSET,r20 227 add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 228 and r2 = TIF_ALLWORK_MASK,r2 229(p6) br.cond.spnt.few .fail_einval // ? deferred branch 230 ;; 231 add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last 232 cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled 233(p6) br.cond.spnt.many fsys_fallback_syscall 234 ;; 235 // Begin critical section 236.time_redo: 237 ld4.acq r28 = [r20] // gtod_lock.sequence, Must take first 238 ;; 239 and r28 = ~1,r28 // And make sequence even to force retry if odd 240 ;; 241 ld8 r30 = [r21] // clocksource->mmio_ptr 242 add r24 = IA64_CLKSRC_MULT_OFFSET,r20 243 ld4 r2 = [r29] // itc_jitter value 244 add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20 245 add r14 = IA64_CLKSRC_MASK_OFFSET,r20 246 ;; 247 ld4 r3 = [r24] // clocksource mult value 248 ld8 r14 = [r14] // clocksource mask value 249 cmp.eq p8,p9 = 0,r30 // use cpu timer if no mmio_ptr 250 ;; 251 setf.sig f7 = r3 // Setup for mult scaling of counter 252(p8) cmp.ne p13,p0 = r2,r0 // need itc_jitter compensation, set p13 253 ld4 r23 = [r23] // clocksource shift value 254 ld8 r24 = [r26] // get clksrc_cycle_last value 255(p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control 256 ;; 257 .pred.rel.mutex p8,p9 258(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! 259(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. 260(p13) ld8 r25 = [r19] // get itc_lastcycle value 261 ;; // ? could be removed by moving the last add upward 262 ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec 263 ;; 264 ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec 265(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm) 266 ;; 267(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared 268 sub r10 = r2,r24 // current_cycle - last_cycle 269 ;; 270(p6) sub r10 = r25,r24 // time we got was less than last_cycle 271(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg 272 ;; 273(p7) cmpxchg8.rel r3 = [r19],r2,ar.ccv 274 ;; 275(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful 276 ;; 277(p7) sub r10 = r3,r24 // then use new last_cycle instead 278 ;; 279 and r10 = r10,r14 // Apply mask 280 ;; 281 setf.sig f8 = r10 282 nop.i 123 283 ;; 284 // fault check takes 5 cycles and we have spare time 285EX(.fail_efault, probe.w.fault r31, 3) 286 xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) 287 ;; 288 // ? simulate tbit.nz.or p7,p0 = r28,0 289 getf.sig r2 = f8 290 mf 291 ;; 292 ld4 r10 = [r20] // gtod_lock.sequence 293 shr.u r2 = r2,r23 // shift by factor 294 ;; // ? overloaded 3 bundles! 295 add r8 = r8,r2 // Add xtime.nsecs 296 cmp4.ne p7,p0 = r28,r10 297(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo 298 // End critical section. 299 // Now r8=tv->tv_nsec and r9=tv->tv_sec 300 mov r10 = r0 301 movl r2 = 1000000000 302 add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31 303(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack 304 ;; 305.time_normalize: 306 mov r21 = r8 307 cmp.ge p6,p0 = r8,r2 308(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time 309 ;; 310(p14) setf.sig f8 = r20 311(p6) sub r8 = r8,r2 312(p6) add r9 = 1,r9 // two nops before the branch. 313(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod 314(p6) br.cond.dpnt.few .time_normalize 315 ;; 316 // Divided by 8 though shift. Now divide by 125 317 // The compiler was able to do that with a multiply 318 // and a shift and we do the same 319EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles 320(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it 321 ;; 322 mov r8 = r0 323(p14) getf.sig r2 = f8 324 ;; 325(p14) shr.u r21 = r2, 4 326 ;; 327EX(.fail_efault, st8 [r31] = r9) 328EX(.fail_efault, st8 [r23] = r21) 329 FSYS_RETURN 330.fail_einval: 331 mov r8 = EINVAL 332 mov r10 = -1 333 FSYS_RETURN 334.fail_efault: 335 mov r8 = EFAULT 336 mov r10 = -1 337 FSYS_RETURN 338END(fsys_gettimeofday) 339 340ENTRY(fsys_clock_gettime) 341 .prologue 342 .altrp b6 343 .body 344 cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32 345 // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC 346(p6) br.spnt.few fsys_fallback_syscall 347 mov r31 = r33 348 shl r30 = r32,15 349 br.many .gettime 350END(fsys_clock_gettime) 351 352/* 353 * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize). 354 */ 355#if _NSIG_WORDS != 1 356# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1. 357#endif 358ENTRY(fsys_rt_sigprocmask) 359 .prologue 360 .altrp b6 361 .body 362 363 add r2=IA64_TASK_BLOCKED_OFFSET,r16 364 add r9=TI_FLAGS+IA64_TASK_SIZE,r16 365 cmp4.ltu p6,p0=SIG_SETMASK,r32 366 367 cmp.ne p15,p0=r0,r34 // oset != NULL? 368 tnat.nz p8,p0=r34 369 add r31=IA64_TASK_SIGHAND_OFFSET,r16 370 ;; 371 ld8 r3=[r2] // read/prefetch current->blocked 372 ld4 r9=[r9] 373 tnat.nz.or p6,p0=r35 374 375 cmp.ne.or p6,p0=_NSIG_WORDS*8,r35 376 tnat.nz.or p6,p0=r32 377(p6) br.spnt.few .fail_einval // fail with EINVAL 378 ;; 379#ifdef CONFIG_SMP 380 ld8 r31=[r31] // r31 <- current->sighand 381#endif 382 and r9=TIF_ALLWORK_MASK,r9 383 tnat.nz.or p8,p0=r33 384 ;; 385 cmp.ne p7,p0=0,r9 386 cmp.eq p6,p0=r0,r33 // set == NULL? 387 add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock 388(p8) br.spnt.few .fail_efault // fail with EFAULT 389(p7) br.spnt.many fsys_fallback_syscall // got pending kernel work... 390(p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask 391 392 /* Argh, we actually have to do some work and _update_ the signal mask: */ 393 394EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set 395EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set 396 mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1)) 397 ;; 398 399 rsm psr.i // mask interrupt delivery 400 mov ar.ccv=0 401 andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP 402 403#ifdef CONFIG_SMP 404 mov r17=1 405 ;; 406 cmpxchg4.acq r18=[r31],r17,ar.ccv // try to acquire the lock 407 mov r8=EINVAL // default to EINVAL 408 ;; 409 ld8 r3=[r2] // re-read current->blocked now that we hold the lock 410 cmp4.ne p6,p0=r18,r0 411(p6) br.cond.spnt.many .lock_contention 412 ;; 413#else 414 ld8 r3=[r2] // re-read current->blocked now that we hold the lock 415 mov r8=EINVAL // default to EINVAL 416#endif 417 add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16 418 add r19=IA64_TASK_SIGNAL_OFFSET,r16 419 cmp4.eq p6,p0=SIG_BLOCK,r32 420 ;; 421 ld8 r19=[r19] // r19 <- current->signal 422 cmp4.eq p7,p0=SIG_UNBLOCK,r32 423 cmp4.eq p8,p0=SIG_SETMASK,r32 424 ;; 425 ld8 r18=[r18] // r18 <- current->pending.signal 426 .pred.rel.mutex p6,p7,p8 427(p6) or r14=r3,r14 // SIG_BLOCK 428(p7) andcm r14=r3,r14 // SIG_UNBLOCK 429 430(p8) mov r14=r14 // SIG_SETMASK 431(p6) mov r8=0 // clear error code 432 // recalc_sigpending() 433 add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19 434 435 add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19 436 ;; 437 ld4 r17=[r17] // r17 <- current->signal->group_stop_count 438(p7) mov r8=0 // clear error code 439 440 ld8 r19=[r19] // r19 <- current->signal->shared_pending 441 ;; 442 cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)? 443(p8) mov r8=0 // clear error code 444 445 or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending 446 ;; 447 // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked: 448 andcm r18=r18,r14 449 add r9=TI_FLAGS+IA64_TASK_SIZE,r16 450 ;; 451 452(p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending 453 mov r19=0 // i must not leak kernel bits... 454(p6) br.cond.dpnt.many .sig_pending 455 ;; 456 4571: ld4 r17=[r9] // r17 <- current->thread_info->flags 458 ;; 459 mov ar.ccv=r17 460 and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING) 461 ;; 462 463 st8 [r2]=r14 // update current->blocked with new mask 464 cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18 465 ;; 466 cmp.ne p6,p0=r17,r8 // update failed? 467(p6) br.cond.spnt.few 1b // yes -> retry 468 469#ifdef CONFIG_SMP 470 st4.rel [r31]=r0 // release the lock 471#endif 472 ssm psr.i 473 ;; 474 475 srlz.d // ensure psr.i is set again 476 mov r18=0 // i must not leak kernel bits... 477 478.store_mask: 479EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset 480EX(.fail_efault, (p15) st8 [r34]=r3) 481 mov r2=0 // i must not leak kernel bits... 482 mov r3=0 // i must not leak kernel bits... 483 mov r8=0 // return 0 484 mov r9=0 // i must not leak kernel bits... 485 mov r14=0 // i must not leak kernel bits... 486 mov r17=0 // i must not leak kernel bits... 487 mov r31=0 // i must not leak kernel bits... 488 FSYS_RETURN 489 490.sig_pending: 491#ifdef CONFIG_SMP 492 st4.rel [r31]=r0 // release the lock 493#endif 494 ssm psr.i 495 ;; 496 srlz.d 497 br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall 498 499#ifdef CONFIG_SMP 500.lock_contention: 501 /* Rather than spinning here, fall back on doing a heavy-weight syscall. */ 502 ssm psr.i 503 ;; 504 srlz.d 505 br.sptk.many fsys_fallback_syscall 506#endif 507END(fsys_rt_sigprocmask) 508 509/* 510 * fsys_getcpu doesn't use the third parameter in this implementation. It reads 511 * current_thread_info()->cpu and corresponding node in cpu_to_node_map. 512 */ 513ENTRY(fsys_getcpu) 514 .prologue 515 .altrp b6 516 .body 517 ;; 518 add r2=TI_FLAGS+IA64_TASK_SIZE,r16 519 tnat.nz p6,p0 = r32 // guard against NaT argument 520 add r3=TI_CPU+IA64_TASK_SIZE,r16 521 ;; 522 ld4 r3=[r3] // M r3 = thread_info->cpu 523 ld4 r2=[r2] // M r2 = thread_info->flags 524(p6) br.cond.spnt.few .fail_einval // B 525 ;; 526 tnat.nz p7,p0 = r33 // I guard against NaT argument 527(p7) br.cond.spnt.few .fail_einval // B 528#ifdef CONFIG_NUMA 529 movl r17=cpu_to_node_map 530 ;; 531EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles 532EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles 533 shladd r18=r3,1,r17 534 ;; 535 ld2 r20=[r18] // r20 = cpu_to_node_map[cpu] 536 and r2 = TIF_ALLWORK_MASK,r2 537 ;; 538 cmp.ne p8,p0=0,r2 539(p8) br.spnt.many fsys_fallback_syscall 540 ;; 541 ;; 542EX(.fail_efault, st4 [r32] = r3) 543EX(.fail_efault, st2 [r33] = r20) 544 mov r8=0 545 ;; 546#else 547EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles 548EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles 549 and r2 = TIF_ALLWORK_MASK,r2 550 ;; 551 cmp.ne p8,p0=0,r2 552(p8) br.spnt.many fsys_fallback_syscall 553 ;; 554EX(.fail_efault, st4 [r32] = r3) 555EX(.fail_efault, st2 [r33] = r0) 556 mov r8=0 557 ;; 558#endif 559 FSYS_RETURN 560END(fsys_getcpu) 561 562ENTRY(fsys_fallback_syscall) 563 .prologue 564 .altrp b6 565 .body 566 /* 567 * We only get here from light-weight syscall handlers. Thus, we already 568 * know that r15 contains a valid syscall number. No need to re-check. 569 */ 570 adds r17=-1024,r15 571 movl r14=sys_call_table 572 ;; 573 rsm psr.i 574 shladd r18=r17,3,r14 575 ;; 576 ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point 577 mov r29=psr // read psr (12 cyc load latency) 578 mov r27=ar.rsc 579 mov r21=ar.fpsr 580 mov r26=ar.pfs 581END(fsys_fallback_syscall) 582 /* FALL THROUGH */ 583GLOBAL_ENTRY(fsys_bubble_down) 584 .prologue 585 .altrp b6 586 .body 587 /* 588 * We get here for syscalls that don't have a lightweight 589 * handler. For those, we need to bubble down into the kernel 590 * and that requires setting up a minimal pt_regs structure, 591 * and initializing the CPU state more or less as if an 592 * interruption had occurred. To make syscall-restarts work, 593 * we setup pt_regs such that cr_iip points to the second 594 * instruction in syscall_via_break. Decrementing the IP 595 * hence will restart the syscall via break and not 596 * decrementing IP will return us to the caller, as usual. 597 * Note that we preserve the value of psr.pp rather than 598 * initializing it from dcr.pp. This makes it possible to 599 * distinguish fsyscall execution from other privileged 600 * execution. 601 * 602 * On entry: 603 * - normal fsyscall handler register usage, except 604 * that we also have: 605 * - r18: address of syscall entry point 606 * - r21: ar.fpsr 607 * - r26: ar.pfs 608 * - r27: ar.rsc 609 * - r29: psr 610 * 611 * We used to clear some PSR bits here but that requires slow 612 * serialization. Fortuntely, that isn't really necessary. 613 * The rationale is as follows: we used to clear bits 614 * ~PSR_PRESERVED_BITS in PSR.L. Since 615 * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we 616 * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}. 617 * However, 618 * 619 * PSR.BE : already is turned off in __kernel_syscall_via_epc() 620 * PSR.AC : don't care (kernel normally turns PSR.AC on) 621 * PSR.I : already turned off by the time fsys_bubble_down gets 622 * invoked 623 * PSR.DFL: always 0 (kernel never turns it on) 624 * PSR.DFH: don't care --- kernel never touches f32-f127 on its own 625 * initiative 626 * PSR.DI : always 0 (kernel never turns it on) 627 * PSR.SI : always 0 (kernel never turns it on) 628 * PSR.DB : don't care --- kernel never enables kernel-level 629 * breakpoints 630 * PSR.TB : must be 0 already; if it wasn't zero on entry to 631 * __kernel_syscall_via_epc, the branch to fsys_bubble_down 632 * will trigger a taken branch; the taken-trap-handler then 633 * converts the syscall into a break-based system-call. 634 */ 635 /* 636 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. 637 * The rest we have to synthesize. 638 */ 639# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \ 640 | (0x1 << IA64_PSR_RI_BIT) \ 641 | IA64_PSR_BN | IA64_PSR_I) 642 643 invala // M0|1 644 movl r14=ia64_ret_from_syscall // X 645 646 nop.m 0 647 movl r28=__kernel_syscall_via_break // X create cr.iip 648 ;; 649 650 mov r2=r16 // A get task addr to addl-addressable register 651 adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A 652 mov r31=pr // I0 save pr (2 cyc) 653 ;; 654 st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag 655 addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS 656 add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A 657 ;; 658 ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags 659 lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store 660 nop.i 0 661 ;; 662 mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 663 nop.m 0 664 nop.i 0 665 ;; 666 mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore 667 mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!) 668 nop.i 0 669 ;; 670 mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS 671 movl r8=PSR_ONE_BITS // X 672 ;; 673 mov r25=ar.unat // M2 (5 cyc) save ar.unat 674 mov r19=b6 // I0 save b6 (2 cyc) 675 mov r20=r1 // A save caller's gp in r20 676 ;; 677 or r29=r8,r29 // A construct cr.ipsr value to save 678 mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc) 679 addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack 680 681 mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc) 682 cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 683 br.call.sptk.many b7=ia64_syscall_setup // B 684 ;; 685 mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 686 mov rp=r14 // I0 set the real return addr 687 and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A 688 ;; 689 ssm psr.i // M2 we're on kernel stacks now, reenable irqs 690 cmp.eq p8,p0=r3,r0 // A 691(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT 692 693 nop.m 0 694(p8) br.call.sptk.many b6=b6 // B (ignore return address) 695 br.cond.spnt ia64_trace_syscall // B 696END(fsys_bubble_down) 697 698 .rodata 699 .align 8 700 .globl fsyscall_table 701 702 data8 fsys_bubble_down 703fsyscall_table: 704 data8 fsys_ni_syscall 705 data8 0 // exit // 1025 706 data8 0 // read 707 data8 0 // write 708 data8 0 // open 709 data8 0 // close 710 data8 0 // creat // 1030 711 data8 0 // link 712 data8 0 // unlink 713 data8 0 // execve 714 data8 0 // chdir 715 data8 0 // fchdir // 1035 716 data8 0 // utimes 717 data8 0 // mknod 718 data8 0 // chmod 719 data8 0 // chown 720 data8 0 // lseek // 1040 721 data8 fsys_getpid // getpid 722 data8 fsys_getppid // getppid 723 data8 0 // mount 724 data8 0 // umount 725 data8 0 // setuid // 1045 726 data8 0 // getuid 727 data8 0 // geteuid 728 data8 0 // ptrace 729 data8 0 // access 730 data8 0 // sync // 1050 731 data8 0 // fsync 732 data8 0 // fdatasync 733 data8 0 // kill 734 data8 0 // rename 735 data8 0 // mkdir // 1055 736 data8 0 // rmdir 737 data8 0 // dup 738 data8 0 // pipe 739 data8 0 // times 740 data8 0 // brk // 1060 741 data8 0 // setgid 742 data8 0 // getgid 743 data8 0 // getegid 744 data8 0 // acct 745 data8 0 // ioctl // 1065 746 data8 0 // fcntl 747 data8 0 // umask 748 data8 0 // chroot 749 data8 0 // ustat 750 data8 0 // dup2 // 1070 751 data8 0 // setreuid 752 data8 0 // setregid 753 data8 0 // getresuid 754 data8 0 // setresuid 755 data8 0 // getresgid // 1075 756 data8 0 // setresgid 757 data8 0 // getgroups 758 data8 0 // setgroups 759 data8 0 // getpgid 760 data8 0 // setpgid // 1080 761 data8 0 // setsid 762 data8 0 // getsid 763 data8 0 // sethostname 764 data8 0 // setrlimit 765 data8 0 // getrlimit // 1085 766 data8 0 // getrusage 767 data8 fsys_gettimeofday // gettimeofday 768 data8 0 // settimeofday 769 data8 0 // select 770 data8 0 // poll // 1090 771 data8 0 // symlink 772 data8 0 // readlink 773 data8 0 // uselib 774 data8 0 // swapon 775 data8 0 // swapoff // 1095 776 data8 0 // reboot 777 data8 0 // truncate 778 data8 0 // ftruncate 779 data8 0 // fchmod 780 data8 0 // fchown // 1100 781 data8 0 // getpriority 782 data8 0 // setpriority 783 data8 0 // statfs 784 data8 0 // fstatfs 785 data8 0 // gettid // 1105 786 data8 0 // semget 787 data8 0 // semop 788 data8 0 // semctl 789 data8 0 // msgget 790 data8 0 // msgsnd // 1110 791 data8 0 // msgrcv 792 data8 0 // msgctl 793 data8 0 // shmget 794 data8 0 // shmat 795 data8 0 // shmdt // 1115 796 data8 0 // shmctl 797 data8 0 // syslog 798 data8 0 // setitimer 799 data8 0 // getitimer 800 data8 0 // 1120 801 data8 0 802 data8 0 803 data8 0 // vhangup 804 data8 0 // lchown 805 data8 0 // remap_file_pages // 1125 806 data8 0 // wait4 807 data8 0 // sysinfo 808 data8 0 // clone 809 data8 0 // setdomainname 810 data8 0 // newuname // 1130 811 data8 0 // adjtimex 812 data8 0 813 data8 0 // init_module 814 data8 0 // delete_module 815 data8 0 // 1135 816 data8 0 817 data8 0 // quotactl 818 data8 0 // bdflush 819 data8 0 // sysfs 820 data8 0 // personality // 1140 821 data8 0 // afs_syscall 822 data8 0 // setfsuid 823 data8 0 // setfsgid 824 data8 0 // getdents 825 data8 0 // flock // 1145 826 data8 0 // readv 827 data8 0 // writev 828 data8 0 // pread64 829 data8 0 // pwrite64 830 data8 0 // sysctl // 1150 831 data8 0 // mmap 832 data8 0 // munmap 833 data8 0 // mlock 834 data8 0 // mlockall 835 data8 0 // mprotect // 1155 836 data8 0 // mremap 837 data8 0 // msync 838 data8 0 // munlock 839 data8 0 // munlockall 840 data8 0 // sched_getparam // 1160 841 data8 0 // sched_setparam 842 data8 0 // sched_getscheduler 843 data8 0 // sched_setscheduler 844 data8 0 // sched_yield 845 data8 0 // sched_get_priority_max // 1165 846 data8 0 // sched_get_priority_min 847 data8 0 // sched_rr_get_interval 848 data8 0 // nanosleep 849 data8 0 // nfsservctl 850 data8 0 // prctl // 1170 851 data8 0 // getpagesize 852 data8 0 // mmap2 853 data8 0 // pciconfig_read 854 data8 0 // pciconfig_write 855 data8 0 // perfmonctl // 1175 856 data8 0 // sigaltstack 857 data8 0 // rt_sigaction 858 data8 0 // rt_sigpending 859 data8 fsys_rt_sigprocmask // rt_sigprocmask 860 data8 0 // rt_sigqueueinfo // 1180 861 data8 0 // rt_sigreturn 862 data8 0 // rt_sigsuspend 863 data8 0 // rt_sigtimedwait 864 data8 0 // getcwd 865 data8 0 // capget // 1185 866 data8 0 // capset 867 data8 0 // sendfile 868 data8 0 869 data8 0 870 data8 0 // socket // 1190 871 data8 0 // bind 872 data8 0 // connect 873 data8 0 // listen 874 data8 0 // accept 875 data8 0 // getsockname // 1195 876 data8 0 // getpeername 877 data8 0 // socketpair 878 data8 0 // send 879 data8 0 // sendto 880 data8 0 // recv // 1200 881 data8 0 // recvfrom 882 data8 0 // shutdown 883 data8 0 // setsockopt 884 data8 0 // getsockopt 885 data8 0 // sendmsg // 1205 886 data8 0 // recvmsg 887 data8 0 // pivot_root 888 data8 0 // mincore 889 data8 0 // madvise 890 data8 0 // newstat // 1210 891 data8 0 // newlstat 892 data8 0 // newfstat 893 data8 0 // clone2 894 data8 0 // getdents64 895 data8 0 // getunwind // 1215 896 data8 0 // readahead 897 data8 0 // setxattr 898 data8 0 // lsetxattr 899 data8 0 // fsetxattr 900 data8 0 // getxattr // 1220 901 data8 0 // lgetxattr 902 data8 0 // fgetxattr 903 data8 0 // listxattr 904 data8 0 // llistxattr 905 data8 0 // flistxattr // 1225 906 data8 0 // removexattr 907 data8 0 // lremovexattr 908 data8 0 // fremovexattr 909 data8 0 // tkill 910 data8 0 // futex // 1230 911 data8 0 // sched_setaffinity 912 data8 0 // sched_getaffinity 913 data8 fsys_set_tid_address // set_tid_address 914 data8 0 // fadvise64_64 915 data8 0 // tgkill // 1235 916 data8 0 // exit_group 917 data8 0 // lookup_dcookie 918 data8 0 // io_setup 919 data8 0 // io_destroy 920 data8 0 // io_getevents // 1240 921 data8 0 // io_submit 922 data8 0 // io_cancel 923 data8 0 // epoll_create 924 data8 0 // epoll_ctl 925 data8 0 // epoll_wait // 1245 926 data8 0 // restart_syscall 927 data8 0 // semtimedop 928 data8 0 // timer_create 929 data8 0 // timer_settime 930 data8 0 // timer_gettime // 1250 931 data8 0 // timer_getoverrun 932 data8 0 // timer_delete 933 data8 0 // clock_settime 934 data8 fsys_clock_gettime // clock_gettime 935 data8 0 // clock_getres // 1255 936 data8 0 // clock_nanosleep 937 data8 0 // fstatfs64 938 data8 0 // statfs64 939 data8 0 // mbind 940 data8 0 // get_mempolicy // 1260 941 data8 0 // set_mempolicy 942 data8 0 // mq_open 943 data8 0 // mq_unlink 944 data8 0 // mq_timedsend 945 data8 0 // mq_timedreceive // 1265 946 data8 0 // mq_notify 947 data8 0 // mq_getsetattr 948 data8 0 // kexec_load 949 data8 0 // vserver 950 data8 0 // waitid // 1270 951 data8 0 // add_key 952 data8 0 // request_key 953 data8 0 // keyctl 954 data8 0 // ioprio_set 955 data8 0 // ioprio_get // 1275 956 data8 0 // move_pages 957 data8 0 // inotify_init 958 data8 0 // inotify_add_watch 959 data8 0 // inotify_rm_watch 960 data8 0 // migrate_pages // 1280 961 data8 0 // openat 962 data8 0 // mkdirat 963 data8 0 // mknodat 964 data8 0 // fchownat 965 data8 0 // futimesat // 1285 966 data8 0 // newfstatat 967 data8 0 // unlinkat 968 data8 0 // renameat 969 data8 0 // linkat 970 data8 0 // symlinkat // 1290 971 data8 0 // readlinkat 972 data8 0 // fchmodat 973 data8 0 // faccessat 974 data8 0 975 data8 0 // 1295 976 data8 0 // unshare 977 data8 0 // splice 978 data8 0 // set_robust_list 979 data8 0 // get_robust_list 980 data8 0 // sync_file_range // 1300 981 data8 0 // tee 982 data8 0 // vmsplice 983 data8 0 984 data8 fsys_getcpu // getcpu // 1304 985 986 // fill in zeros for the remaining entries 987 .zero: 988 .space fsyscall_table + 8*NR_syscalls - .zero, 0 989