xref: /openbmc/linux/arch/ia64/kernel/fsys.S (revision 87c2ce3b)
1/*
2 * This file contains the light-weight system call handlers (fsyscall-handlers).
3 *
4 * Copyright (C) 2003 Hewlett-Packard Co
5 * 	David Mosberger-Tang <davidm@hpl.hp.com>
6 *
7 * 25-Sep-03 davidm	Implement fsys_rt_sigprocmask().
8 * 18-Feb-03 louisk	Implement fsys_gettimeofday().
9 * 28-Feb-03 davidm	Fixed several bugs in fsys_gettimeofday().  Tuned it some more,
10 *			probably broke it along the way... ;-)
11 * 13-Jul-04 clameter   Implement fsys_clock_gettime and revise fsys_gettimeofday to make
12 *                      it capable of using memory based clocks without falling back to C code.
13 */
14
15#include <asm/asmmacro.h>
16#include <asm/errno.h>
17#include <asm/asm-offsets.h>
18#include <asm/percpu.h>
19#include <asm/thread_info.h>
20#include <asm/sal.h>
21#include <asm/signal.h>
22#include <asm/system.h>
23#include <asm/unistd.h>
24
25#include "entry.h"
26
27/*
28 * See Documentation/ia64/fsys.txt for details on fsyscalls.
29 *
30 * On entry to an fsyscall handler:
31 *   r10	= 0 (i.e., defaults to "successful syscall return")
32 *   r11	= saved ar.pfs (a user-level value)
33 *   r15	= system call number
34 *   r16	= "current" task pointer (in normal kernel-mode, this is in r13)
35 *   r32-r39	= system call arguments
36 *   b6		= return address (a user-level value)
37 *   ar.pfs	= previous frame-state (a user-level value)
38 *   PSR.be	= cleared to zero (i.e., little-endian byte order is in effect)
39 *   all other registers may contain values passed in from user-mode
40 *
41 * On return from an fsyscall handler:
42 *   r11	= saved ar.pfs (as passed into the fsyscall handler)
43 *   r15	= system call number (as passed into the fsyscall handler)
44 *   r32-r39	= system call arguments (as passed into the fsyscall handler)
45 *   b6		= return address (as passed into the fsyscall handler)
46 *   ar.pfs	= previous frame-state (as passed into the fsyscall handler)
47 */
48
49ENTRY(fsys_ni_syscall)
50	.prologue
51	.altrp b6
52	.body
53	mov r8=ENOSYS
54	mov r10=-1
55	FSYS_RETURN
56END(fsys_ni_syscall)
57
58ENTRY(fsys_getpid)
59	.prologue
60	.altrp b6
61	.body
62	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
63	;;
64	ld4 r9=[r9]
65	add r8=IA64_TASK_TGID_OFFSET,r16
66	;;
67	and r9=TIF_ALLWORK_MASK,r9
68	ld4 r8=[r8]				// r8 = current->tgid
69	;;
70	cmp.ne p8,p0=0,r9
71(p8)	br.spnt.many fsys_fallback_syscall
72	FSYS_RETURN
73END(fsys_getpid)
74
75ENTRY(fsys_getppid)
76	.prologue
77	.altrp b6
78	.body
79	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
80	;;
81	ld8 r17=[r17]				// r17 = current->group_leader
82	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
83	;;
84
85	ld4 r9=[r9]
86	add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
87	;;
88	and r9=TIF_ALLWORK_MASK,r9
89
901:	ld8 r18=[r17]				// r18 = current->group_leader->real_parent
91	;;
92	cmp.ne p8,p0=0,r9
93	add r8=IA64_TASK_TGID_OFFSET,r18	// r8 = &current->group_leader->real_parent->tgid
94	;;
95
96	/*
97	 * The .acq is needed to ensure that the read of tgid has returned its data before
98	 * we re-check "real_parent".
99	 */
100	ld4.acq r8=[r8]				// r8 = current->group_leader->real_parent->tgid
101#ifdef CONFIG_SMP
102	/*
103	 * Re-read current->group_leader->real_parent.
104	 */
105	ld8 r19=[r17]				// r19 = current->group_leader->real_parent
106(p8)	br.spnt.many fsys_fallback_syscall
107	;;
108	cmp.ne p6,p0=r18,r19			// did real_parent change?
109	mov r19=0			// i must not leak kernel bits...
110(p6)	br.cond.spnt.few 1b			// yes -> redo the read of tgid and the check
111	;;
112	mov r17=0			// i must not leak kernel bits...
113	mov r18=0			// i must not leak kernel bits...
114#else
115	mov r17=0			// i must not leak kernel bits...
116	mov r18=0			// i must not leak kernel bits...
117	mov r19=0			// i must not leak kernel bits...
118#endif
119	FSYS_RETURN
120END(fsys_getppid)
121
122ENTRY(fsys_set_tid_address)
123	.prologue
124	.altrp b6
125	.body
126	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
127	;;
128	ld4 r9=[r9]
129	tnat.z p6,p7=r32		// check argument register for being NaT
130	;;
131	and r9=TIF_ALLWORK_MASK,r9
132	add r8=IA64_TASK_PID_OFFSET,r16
133	add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
134	;;
135	ld4 r8=[r8]
136	cmp.ne p8,p0=0,r9
137	mov r17=-1
138	;;
139(p6)	st8 [r18]=r32
140(p7)	st8 [r18]=r17
141(p8)	br.spnt.many fsys_fallback_syscall
142	;;
143	mov r17=0			// i must not leak kernel bits...
144	mov r18=0			// i must not leak kernel bits...
145	FSYS_RETURN
146END(fsys_set_tid_address)
147
148/*
149 * Ensure that the time interpolator structure is compatible with the asm code
150 */
151#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \
152	|| IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4
153#error fsys_gettimeofday incompatible with changes to struct time_interpolator
154#endif
155#define CLOCK_REALTIME 0
156#define CLOCK_MONOTONIC 1
157#define CLOCK_DIVIDE_BY_1000 0x4000
158#define CLOCK_ADD_MONOTONIC 0x8000
159
160ENTRY(fsys_gettimeofday)
161	.prologue
162	.altrp b6
163	.body
164	mov r31 = r32
165	tnat.nz p6,p0 = r33		// guard against NaT argument
166(p6)    br.cond.spnt.few .fail_einval
167	mov r30 = CLOCK_DIVIDE_BY_1000
168	;;
169.gettime:
170	// Register map
171	// Incoming r31 = pointer to address where to place result
172	//          r30 = flags determining how time is processed
173	// r2,r3 = temp r4-r7 preserved
174	// r8 = result nanoseconds
175	// r9 = result seconds
176	// r10 = temporary storage for clock difference
177	// r11 = preserved: saved ar.pfs
178	// r12 = preserved: memory stack
179	// r13 = preserved: thread pointer
180	// r14 = address of mask / mask
181	// r15 = preserved: system call number
182	// r16 = preserved: current task pointer
183	// r17 = wall to monotonic use
184	// r18 = time_interpolator->offset
185	// r19 = address of wall_to_monotonic
186	// r20 = pointer to struct time_interpolator / pointer to time_interpolator->address
187	// r21 = shift factor
188	// r22 = address of time interpolator->last_counter
189	// r23 = address of time_interpolator->last_cycle
190	// r24 = adress of time_interpolator->offset
191	// r25 = last_cycle value
192	// r26 = last_counter value
193	// r27 = pointer to xtime
194	// r28 = sequence number at the beginning of critcal section
195	// r29 = address of seqlock
196	// r30 = time processing flags / memory address
197	// r31 = pointer to result
198	// Predicates
199	// p6,p7 short term use
200	// p8 = timesource ar.itc
201	// p9 = timesource mmio64
202	// p10 = timesource mmio32
203	// p11 = timesource not to be handled by asm code
204	// p12 = memory time source ( = p9 | p10)
205	// p13 = do cmpxchg with time_interpolator_last_cycle
206	// p14 = Divide by 1000
207	// p15 = Add monotonic
208	//
209	// Note that instructions are optimized for McKinley. McKinley can process two
210	// bundles simultaneously and therefore we continuously try to feed the CPU
211	// two bundles and then a stop.
212	tnat.nz p6,p0 = r31	// branch deferred since it does not fit into bundle structure
213	mov pr = r30,0xc000	// Set predicates according to function
214	add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
215	movl r20 = time_interpolator
216	;;
217	ld8 r20 = [r20]		// get pointer to time_interpolator structure
218	movl r29 = xtime_lock
219	ld4 r2 = [r2]		// process work pending flags
220	movl r27 = xtime
221	;;	// only one bundle here
222	ld8 r21 = [r20]		// first quad with control information
223	and r2 = TIF_ALLWORK_MASK,r2
224(p6)    br.cond.spnt.few .fail_einval	// deferred branch
225	;;
226	add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
227	extr r3 = r21,32,32	// time_interpolator->nsec_per_cyc
228	extr r8 = r21,0,16	// time_interpolator->source
229	cmp.ne p6, p0 = 0, r2	// Fallback if work is scheduled
230(p6)    br.cond.spnt.many fsys_fallback_syscall
231	;;
232	cmp.eq p8,p12 = 0,r8	// Check for cpu timer
233	cmp.eq p9,p0 = 1,r8	// MMIO64 ?
234	extr r2 = r21,24,8	// time_interpolator->jitter
235	cmp.eq p10,p0 = 2,r8	// MMIO32 ?
236	cmp.ltu p11,p0 = 2,r8	// function or other clock
237(p11)	br.cond.spnt.many fsys_fallback_syscall
238	;;
239	setf.sig f7 = r3	// Setup for scaling of counter
240(p15)	movl r19 = wall_to_monotonic
241(p12)	ld8 r30 = [r10]
242	cmp.ne p13,p0 = r2,r0	// need jitter compensation?
243	extr r21 = r21,16,8	// shift factor
244	;;
245.time_redo:
246	.pred.rel.mutex p8,p9,p10
247	ld4.acq r28 = [r29]	// xtime_lock.sequence. Must come first for locking purposes
248(p8)	mov r2 = ar.itc		// CPU_TIMER. 36 clocks latency!!!
249	add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
250(p9)	ld8 r2 = [r30]		// readq(ti->address). Could also have latency issues..
251(p10)	ld4 r2 = [r30]		// readw(ti->address)
252(p13)	add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
253	;;			// could be removed by moving the last add upward
254	ld8 r26 = [r22]		// time_interpolator->last_counter
255(p13)	ld8 r25 = [r23]		// time interpolator->last_cycle
256	add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
257(p15)	ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
258 	ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
259	add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20
260	;;
261	ld8 r18 = [r24]		// time_interpolator->offset
262	ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET	// xtime.tv_nsec
263(p13)	sub r3 = r25,r2	// Diff needed before comparison (thanks davidm)
264	;;
265	ld8 r14 = [r14]		// time_interpolator->mask
266(p13)	cmp.gt.unc p6,p7 = r3,r0	// check if it is less than last. p6,p7 cleared
267	sub r10 = r2,r26	// current_counter - last_counter
268	;;
269(p6)	sub r10 = r25,r26	// time we got was less than last_cycle
270(p7)	mov ar.ccv = r25	// more than last_cycle. Prep for cmpxchg
271	;;
272	and r10 = r10,r14	// Apply mask
273	;;
274	setf.sig f8 = r10
275	nop.i 123
276	;;
277(p7)	cmpxchg8.rel r3 = [r23],r2,ar.ccv
278EX(.fail_efault, probe.w.fault r31, 3)	// This takes 5 cycles and we have spare time
279	xmpy.l f8 = f8,f7	// nsec_per_cyc*(counter-last_counter)
280(p15)	add r9 = r9,r17		// Add wall to monotonic.secs to result secs
281	;;
282(p15)	ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
283(p7)	cmp.ne p7,p0 = r25,r3	// if cmpxchg not successful redo
284	// simulate tbit.nz.or p7,p0 = r28,0
285	and r28 = ~1,r28	// Make sequence even to force retry if odd
286	getf.sig r2 = f8
287	mf
288	add r8 = r8,r18		// Add time interpolator offset
289	;;
290	ld4 r10 = [r29]		// xtime_lock.sequence
291(p15)	add r8 = r8, r17	// Add monotonic.nsecs to nsecs
292	shr.u r2 = r2,r21
293	;;		// overloaded 3 bundles!
294	// End critical section.
295	add r8 = r8,r2		// Add xtime.nsecs
296	cmp4.ne.or p7,p0 = r28,r10
297(p7)	br.cond.dpnt.few .time_redo	// sequence number changed ?
298	// Now r8=tv->tv_nsec and r9=tv->tv_sec
299	mov r10 = r0
300	movl r2 = 1000000000
301	add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
302(p14)	movl r3 = 2361183241434822607	// Prep for / 1000 hack
303	;;
304.time_normalize:
305	mov r21 = r8
306	cmp.ge p6,p0 = r8,r2
307(p14)	shr.u r20 = r8, 3		// We can repeat this if necessary just wasting some time
308	;;
309(p14)	setf.sig f8 = r20
310(p6)	sub r8 = r8,r2
311(p6)	add r9 = 1,r9			// two nops before the branch.
312(p14)	setf.sig f7 = r3		// Chances for repeats are 1 in 10000 for gettod
313(p6)	br.cond.dpnt.few .time_normalize
314	;;
315	// Divided by 8 though shift. Now divide by 125
316	// The compiler was able to do that with a multiply
317	// and a shift and we do the same
318EX(.fail_efault, probe.w.fault r23, 3)		// This also costs 5 cycles
319(p14)	xmpy.hu f8 = f8, f7			// xmpy has 5 cycles latency so use it...
320	;;
321	mov r8 = r0
322(p14)	getf.sig r2 = f8
323	;;
324(p14)	shr.u r21 = r2, 4
325	;;
326EX(.fail_efault, st8 [r31] = r9)
327EX(.fail_efault, st8 [r23] = r21)
328	FSYS_RETURN
329.fail_einval:
330	mov r8 = EINVAL
331	mov r10 = -1
332	FSYS_RETURN
333.fail_efault:
334	mov r8 = EFAULT
335	mov r10 = -1
336	FSYS_RETURN
337END(fsys_gettimeofday)
338
339ENTRY(fsys_clock_gettime)
340	.prologue
341	.altrp b6
342	.body
343	cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
344	// Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
345(p6)	br.spnt.few fsys_fallback_syscall
346	mov r31 = r33
347	shl r30 = r32,15
348	br.many .gettime
349END(fsys_clock_gettime)
350
351/*
352 * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
353 */
354#if _NSIG_WORDS != 1
355# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
356#endif
357ENTRY(fsys_rt_sigprocmask)
358	.prologue
359	.altrp b6
360	.body
361
362	add r2=IA64_TASK_BLOCKED_OFFSET,r16
363	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
364	cmp4.ltu p6,p0=SIG_SETMASK,r32
365
366	cmp.ne p15,p0=r0,r34			// oset != NULL?
367	tnat.nz p8,p0=r34
368	add r31=IA64_TASK_SIGHAND_OFFSET,r16
369	;;
370	ld8 r3=[r2]				// read/prefetch current->blocked
371	ld4 r9=[r9]
372	tnat.nz.or p6,p0=r35
373
374	cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
375	tnat.nz.or p6,p0=r32
376(p6)	br.spnt.few .fail_einval		// fail with EINVAL
377	;;
378#ifdef CONFIG_SMP
379	ld8 r31=[r31]				// r31 <- current->sighand
380#endif
381	and r9=TIF_ALLWORK_MASK,r9
382	tnat.nz.or p8,p0=r33
383	;;
384	cmp.ne p7,p0=0,r9
385	cmp.eq p6,p0=r0,r33			// set == NULL?
386	add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31	// r31 <- current->sighand->siglock
387(p8)	br.spnt.few .fail_efault		// fail with EFAULT
388(p7)	br.spnt.many fsys_fallback_syscall	// got pending kernel work...
389(p6)	br.dpnt.many .store_mask		// -> short-circuit to just reading the signal mask
390
391	/* Argh, we actually have to do some work and _update_ the signal mask: */
392
393EX(.fail_efault, probe.r.fault r33, 3)		// verify user has read-access to *set
394EX(.fail_efault, ld8 r14=[r33])			// r14 <- *set
395	mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
396	;;
397
398	rsm psr.i				// mask interrupt delivery
399	mov ar.ccv=0
400	andcm r14=r14,r17			// filter out SIGKILL & SIGSTOP
401
402#ifdef CONFIG_SMP
403	mov r17=1
404	;;
405	cmpxchg4.acq r18=[r31],r17,ar.ccv	// try to acquire the lock
406	mov r8=EINVAL			// default to EINVAL
407	;;
408	ld8 r3=[r2]			// re-read current->blocked now that we hold the lock
409	cmp4.ne p6,p0=r18,r0
410(p6)	br.cond.spnt.many .lock_contention
411	;;
412#else
413	ld8 r3=[r2]			// re-read current->blocked now that we hold the lock
414	mov r8=EINVAL			// default to EINVAL
415#endif
416	add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
417	add r19=IA64_TASK_SIGNAL_OFFSET,r16
418	cmp4.eq p6,p0=SIG_BLOCK,r32
419	;;
420	ld8 r19=[r19]			// r19 <- current->signal
421	cmp4.eq p7,p0=SIG_UNBLOCK,r32
422	cmp4.eq p8,p0=SIG_SETMASK,r32
423	;;
424	ld8 r18=[r18]			// r18 <- current->pending.signal
425	.pred.rel.mutex p6,p7,p8
426(p6)	or r14=r3,r14			// SIG_BLOCK
427(p7)	andcm r14=r3,r14		// SIG_UNBLOCK
428
429(p8)	mov r14=r14			// SIG_SETMASK
430(p6)	mov r8=0			// clear error code
431	// recalc_sigpending()
432	add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
433
434	add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
435	;;
436	ld4 r17=[r17]		// r17 <- current->signal->group_stop_count
437(p7)	mov r8=0		// clear error code
438
439	ld8 r19=[r19]		// r19 <- current->signal->shared_pending
440	;;
441	cmp4.gt p6,p7=r17,r0	// p6/p7 <- (current->signal->group_stop_count > 0)?
442(p8)	mov r8=0		// clear error code
443
444	or r18=r18,r19		// r18 <- current->pending | current->signal->shared_pending
445	;;
446	// r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
447	andcm r18=r18,r14
448	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
449	;;
450
451(p7)	cmp.ne.or.andcm p6,p7=r18,r0		// p6/p7 <- signal pending
452	mov r19=0					// i must not leak kernel bits...
453(p6)	br.cond.dpnt.many .sig_pending
454	;;
455
4561:	ld4 r17=[r9]				// r17 <- current->thread_info->flags
457	;;
458	mov ar.ccv=r17
459	and r18=~_TIF_SIGPENDING,r17		// r18 <- r17 & ~(1 << TIF_SIGPENDING)
460	;;
461
462	st8 [r2]=r14				// update current->blocked with new mask
463	cmpxchg4.acq r8=[r9],r18,ar.ccv		// current->thread_info->flags <- r18
464	;;
465	cmp.ne p6,p0=r17,r8			// update failed?
466(p6)	br.cond.spnt.few 1b			// yes -> retry
467
468#ifdef CONFIG_SMP
469	st4.rel [r31]=r0			// release the lock
470#endif
471	ssm psr.i
472	;;
473
474	srlz.d					// ensure psr.i is set again
475	mov r18=0					// i must not leak kernel bits...
476
477.store_mask:
478EX(.fail_efault, (p15) probe.w.fault r34, 3)	// verify user has write-access to *oset
479EX(.fail_efault, (p15) st8 [r34]=r3)
480	mov r2=0					// i must not leak kernel bits...
481	mov r3=0					// i must not leak kernel bits...
482	mov r8=0				// return 0
483	mov r9=0					// i must not leak kernel bits...
484	mov r14=0					// i must not leak kernel bits...
485	mov r17=0					// i must not leak kernel bits...
486	mov r31=0					// i must not leak kernel bits...
487	FSYS_RETURN
488
489.sig_pending:
490#ifdef CONFIG_SMP
491	st4.rel [r31]=r0			// release the lock
492#endif
493	ssm psr.i
494	;;
495	srlz.d
496	br.sptk.many fsys_fallback_syscall	// with signal pending, do the heavy-weight syscall
497
498#ifdef CONFIG_SMP
499.lock_contention:
500	/* Rather than spinning here, fall back on doing a heavy-weight syscall.  */
501	ssm psr.i
502	;;
503	srlz.d
504	br.sptk.many fsys_fallback_syscall
505#endif
506END(fsys_rt_sigprocmask)
507
508ENTRY(fsys_fallback_syscall)
509	.prologue
510	.altrp b6
511	.body
512	/*
513	 * We only get here from light-weight syscall handlers.  Thus, we already
514	 * know that r15 contains a valid syscall number.  No need to re-check.
515	 */
516	adds r17=-1024,r15
517	movl r14=sys_call_table
518	;;
519	rsm psr.i
520	shladd r18=r17,3,r14
521	;;
522	ld8 r18=[r18]				// load normal (heavy-weight) syscall entry-point
523	mov r29=psr				// read psr (12 cyc load latency)
524	mov r27=ar.rsc
525	mov r21=ar.fpsr
526	mov r26=ar.pfs
527END(fsys_fallback_syscall)
528	/* FALL THROUGH */
529GLOBAL_ENTRY(fsys_bubble_down)
530	.prologue
531	.altrp b6
532	.body
533	/*
534	 * We get here for syscalls that don't have a lightweight
535	 * handler.  For those, we need to bubble down into the kernel
536	 * and that requires setting up a minimal pt_regs structure,
537	 * and initializing the CPU state more or less as if an
538	 * interruption had occurred.  To make syscall-restarts work,
539	 * we setup pt_regs such that cr_iip points to the second
540	 * instruction in syscall_via_break.  Decrementing the IP
541	 * hence will restart the syscall via break and not
542	 * decrementing IP will return us to the caller, as usual.
543	 * Note that we preserve the value of psr.pp rather than
544	 * initializing it from dcr.pp.  This makes it possible to
545	 * distinguish fsyscall execution from other privileged
546	 * execution.
547	 *
548	 * On entry:
549	 *	- normal fsyscall handler register usage, except
550	 *	  that we also have:
551	 *	- r18: address of syscall entry point
552	 *	- r21: ar.fpsr
553	 *	- r26: ar.pfs
554	 *	- r27: ar.rsc
555	 *	- r29: psr
556	 *
557	 * We used to clear some PSR bits here but that requires slow
558	 * serialization.  Fortuntely, that isn't really necessary.
559	 * The rationale is as follows: we used to clear bits
560	 * ~PSR_PRESERVED_BITS in PSR.L.  Since
561	 * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
562	 * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
563	 * However,
564	 *
565	 * PSR.BE : already is turned off in __kernel_syscall_via_epc()
566	 * PSR.AC : don't care (kernel normally turns PSR.AC on)
567	 * PSR.I  : already turned off by the time fsys_bubble_down gets
568	 *	    invoked
569	 * PSR.DFL: always 0 (kernel never turns it on)
570	 * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
571	 *	    initiative
572	 * PSR.DI : always 0 (kernel never turns it on)
573	 * PSR.SI : always 0 (kernel never turns it on)
574	 * PSR.DB : don't care --- kernel never enables kernel-level
575	 *	    breakpoints
576	 * PSR.TB : must be 0 already; if it wasn't zero on entry to
577	 *          __kernel_syscall_via_epc, the branch to fsys_bubble_down
578	 *          will trigger a taken branch; the taken-trap-handler then
579	 *          converts the syscall into a break-based system-call.
580	 */
581	/*
582	 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
583	 * The rest we have to synthesize.
584	 */
585#	define PSR_ONE_BITS		((3 << IA64_PSR_CPL0_BIT)	\
586					 | (0x1 << IA64_PSR_RI_BIT)	\
587					 | IA64_PSR_BN | IA64_PSR_I)
588
589	invala					// M0|1
590	movl r14=ia64_ret_from_syscall		// X
591
592	nop.m 0
593	movl r28=__kernel_syscall_via_break	// X	create cr.iip
594	;;
595
596	mov r2=r16				// A    get task addr to addl-addressable register
597	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
598	mov r31=pr				// I0   save pr (2 cyc)
599	;;
600	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
601	addl r22=IA64_RBS_OFFSET,r2		// A    compute base of RBS
602	add r3=TI_FLAGS+IA64_TASK_SIZE,r2	// A
603	;;
604	ld4 r3=[r3]				// M0|1 r3 = current_thread_info()->flags
605	lfetch.fault.excl.nt1 [r22]		// M0|1 prefetch register backing-store
606	nop.i 0
607	;;
608	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
609	nop.m 0
610	nop.i 0
611	;;
612	mov r23=ar.bspstore			// M2 (12 cyc) save ar.bspstore
613	mov.m r24=ar.rnat			// M2 (5 cyc) read ar.rnat (dual-issues!)
614	nop.i 0
615	;;
616	mov ar.bspstore=r22			// M2 (6 cyc) switch to kernel RBS
617	movl r8=PSR_ONE_BITS			// X
618	;;
619	mov r25=ar.unat				// M2 (5 cyc) save ar.unat
620	mov r19=b6				// I0   save b6 (2 cyc)
621	mov r20=r1				// A    save caller's gp in r20
622	;;
623	or r29=r8,r29				// A    construct cr.ipsr value to save
624	mov b6=r18				// I0   copy syscall entry-point to b6 (7 cyc)
625	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
626
627	mov r18=ar.bsp				// M2   save (kernel) ar.bsp (12 cyc)
628	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
629	br.call.sptk.many b7=ia64_syscall_setup	// B
630	;;
631	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
632	mov rp=r14				// I0   set the real return addr
633	and r3=_TIF_SYSCALL_TRACEAUDIT,r3	// A
634	;;
635	ssm psr.i				// M2   we're on kernel stacks now, reenable irqs
636	cmp.eq p8,p0=r3,r0			// A
637(p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
638
639	nop.m 0
640(p8)	br.call.sptk.many b6=b6			// B    (ignore return address)
641	br.cond.spnt ia64_trace_syscall		// B
642END(fsys_bubble_down)
643
644	.rodata
645	.align 8
646	.globl fsyscall_table
647
648	data8 fsys_bubble_down
649fsyscall_table:
650	data8 fsys_ni_syscall
651	data8 0				// exit			// 1025
652	data8 0				// read
653	data8 0				// write
654	data8 0				// open
655	data8 0				// close
656	data8 0				// creat		// 1030
657	data8 0				// link
658	data8 0				// unlink
659	data8 0				// execve
660	data8 0				// chdir
661	data8 0				// fchdir		// 1035
662	data8 0				// utimes
663	data8 0				// mknod
664	data8 0				// chmod
665	data8 0				// chown
666	data8 0				// lseek		// 1040
667	data8 fsys_getpid		// getpid
668	data8 fsys_getppid		// getppid
669	data8 0				// mount
670	data8 0				// umount
671	data8 0				// setuid		// 1045
672	data8 0				// getuid
673	data8 0				// geteuid
674	data8 0				// ptrace
675	data8 0				// access
676	data8 0				// sync			// 1050
677	data8 0				// fsync
678	data8 0				// fdatasync
679	data8 0				// kill
680	data8 0				// rename
681	data8 0				// mkdir		// 1055
682	data8 0				// rmdir
683	data8 0				// dup
684	data8 0				// pipe
685	data8 0				// times
686	data8 0				// brk			// 1060
687	data8 0				// setgid
688	data8 0				// getgid
689	data8 0				// getegid
690	data8 0				// acct
691	data8 0				// ioctl		// 1065
692	data8 0				// fcntl
693	data8 0				// umask
694	data8 0				// chroot
695	data8 0				// ustat
696	data8 0				// dup2			// 1070
697	data8 0				// setreuid
698	data8 0				// setregid
699	data8 0				// getresuid
700	data8 0				// setresuid
701	data8 0				// getresgid		// 1075
702	data8 0				// setresgid
703	data8 0				// getgroups
704	data8 0				// setgroups
705	data8 0				// getpgid
706	data8 0				// setpgid		// 1080
707	data8 0				// setsid
708	data8 0				// getsid
709	data8 0				// sethostname
710	data8 0				// setrlimit
711	data8 0				// getrlimit		// 1085
712	data8 0				// getrusage
713	data8 fsys_gettimeofday		// gettimeofday
714	data8 0				// settimeofday
715	data8 0				// select
716	data8 0				// poll			// 1090
717	data8 0				// symlink
718	data8 0				// readlink
719	data8 0				// uselib
720	data8 0				// swapon
721	data8 0				// swapoff		// 1095
722	data8 0				// reboot
723	data8 0				// truncate
724	data8 0				// ftruncate
725	data8 0				// fchmod
726	data8 0				// fchown		// 1100
727	data8 0				// getpriority
728	data8 0				// setpriority
729	data8 0				// statfs
730	data8 0				// fstatfs
731	data8 0				// gettid		// 1105
732	data8 0				// semget
733	data8 0				// semop
734	data8 0				// semctl
735	data8 0				// msgget
736	data8 0				// msgsnd		// 1110
737	data8 0				// msgrcv
738	data8 0				// msgctl
739	data8 0				// shmget
740	data8 0				// shmat
741	data8 0				// shmdt		// 1115
742	data8 0				// shmctl
743	data8 0				// syslog
744	data8 0				// setitimer
745	data8 0				// getitimer
746	data8 0					 		// 1120
747	data8 0
748	data8 0
749	data8 0				// vhangup
750	data8 0				// lchown
751	data8 0				// remap_file_pages	// 1125
752	data8 0				// wait4
753	data8 0				// sysinfo
754	data8 0				// clone
755	data8 0				// setdomainname
756	data8 0				// newuname		// 1130
757	data8 0				// adjtimex
758	data8 0
759	data8 0				// init_module
760	data8 0				// delete_module
761	data8 0							// 1135
762	data8 0
763	data8 0				// quotactl
764	data8 0				// bdflush
765	data8 0				// sysfs
766	data8 0				// personality		// 1140
767	data8 0				// afs_syscall
768	data8 0				// setfsuid
769	data8 0				// setfsgid
770	data8 0				// getdents
771	data8 0				// flock		// 1145
772	data8 0				// readv
773	data8 0				// writev
774	data8 0				// pread64
775	data8 0				// pwrite64
776	data8 0				// sysctl		// 1150
777	data8 0				// mmap
778	data8 0				// munmap
779	data8 0				// mlock
780	data8 0				// mlockall
781	data8 0				// mprotect		// 1155
782	data8 0				// mremap
783	data8 0				// msync
784	data8 0				// munlock
785	data8 0				// munlockall
786	data8 0				// sched_getparam	// 1160
787	data8 0				// sched_setparam
788	data8 0				// sched_getscheduler
789	data8 0				// sched_setscheduler
790	data8 0				// sched_yield
791	data8 0				// sched_get_priority_max	// 1165
792	data8 0				// sched_get_priority_min
793	data8 0				// sched_rr_get_interval
794	data8 0				// nanosleep
795	data8 0				// nfsservctl
796	data8 0				// prctl		// 1170
797	data8 0				// getpagesize
798	data8 0				// mmap2
799	data8 0				// pciconfig_read
800	data8 0				// pciconfig_write
801	data8 0				// perfmonctl		// 1175
802	data8 0				// sigaltstack
803	data8 0				// rt_sigaction
804	data8 0				// rt_sigpending
805	data8 fsys_rt_sigprocmask	// rt_sigprocmask
806	data8 0				// rt_sigqueueinfo	// 1180
807	data8 0				// rt_sigreturn
808	data8 0				// rt_sigsuspend
809	data8 0				// rt_sigtimedwait
810	data8 0				// getcwd
811	data8 0				// capget		// 1185
812	data8 0				// capset
813	data8 0				// sendfile
814	data8 0
815	data8 0
816	data8 0				// socket		// 1190
817	data8 0				// bind
818	data8 0				// connect
819	data8 0				// listen
820	data8 0				// accept
821	data8 0				// getsockname		// 1195
822	data8 0				// getpeername
823	data8 0				// socketpair
824	data8 0				// send
825	data8 0				// sendto
826	data8 0				// recv			// 1200
827	data8 0				// recvfrom
828	data8 0				// shutdown
829	data8 0				// setsockopt
830	data8 0				// getsockopt
831	data8 0				// sendmsg		// 1205
832	data8 0				// recvmsg
833	data8 0				// pivot_root
834	data8 0				// mincore
835	data8 0				// madvise
836	data8 0				// newstat		// 1210
837	data8 0				// newlstat
838	data8 0				// newfstat
839	data8 0				// clone2
840	data8 0				// getdents64
841	data8 0				// getunwind		// 1215
842	data8 0				// readahead
843	data8 0				// setxattr
844	data8 0				// lsetxattr
845	data8 0				// fsetxattr
846	data8 0				// getxattr		// 1220
847	data8 0				// lgetxattr
848	data8 0				// fgetxattr
849	data8 0				// listxattr
850	data8 0				// llistxattr
851	data8 0				// flistxattr		// 1225
852	data8 0				// removexattr
853	data8 0				// lremovexattr
854	data8 0				// fremovexattr
855	data8 0				// tkill
856	data8 0				// futex		// 1230
857	data8 0				// sched_setaffinity
858	data8 0				// sched_getaffinity
859	data8 fsys_set_tid_address	// set_tid_address
860	data8 0				// fadvise64_64
861	data8 0				// tgkill		// 1235
862	data8 0				// exit_group
863	data8 0				// lookup_dcookie
864	data8 0				// io_setup
865	data8 0				// io_destroy
866	data8 0				// io_getevents		// 1240
867	data8 0				// io_submit
868	data8 0				// io_cancel
869	data8 0				// epoll_create
870	data8 0				// epoll_ctl
871	data8 0				// epoll_wait		// 1245
872	data8 0				// restart_syscall
873	data8 0				// semtimedop
874	data8 0				// timer_create
875	data8 0				// timer_settime
876	data8 0				// timer_gettime 	// 1250
877	data8 0				// timer_getoverrun
878	data8 0				// timer_delete
879	data8 0				// clock_settime
880	data8 fsys_clock_gettime	// clock_gettime
881	data8 0				// clock_getres		// 1255
882	data8 0				// clock_nanosleep
883	data8 0				// fstatfs64
884	data8 0				// statfs64
885	data8 0
886	data8 0							// 1260
887	data8 0
888	data8 0				// mq_open
889	data8 0				// mq_unlink
890	data8 0				// mq_timedsend
891	data8 0				// mq_timedreceive	// 1265
892	data8 0				// mq_notify
893	data8 0				// mq_getsetattr
894	data8 0				// kexec_load
895	data8 0
896	data8 0							// 1270
897	data8 0
898	data8 0
899	data8 0
900	data8 0
901	data8 0							// 1275
902	data8 0
903	data8 0
904	data8 0
905	data8 0
906
907	.org fsyscall_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
908