xref: /openbmc/linux/arch/ia64/kernel/fsys.S (revision 82ced6fd)
1/*
2 * This file contains the light-weight system call handlers (fsyscall-handlers).
3 *
4 * Copyright (C) 2003 Hewlett-Packard Co
5 * 	David Mosberger-Tang <davidm@hpl.hp.com>
6 *
7 * 25-Sep-03 davidm	Implement fsys_rt_sigprocmask().
8 * 18-Feb-03 louisk	Implement fsys_gettimeofday().
9 * 28-Feb-03 davidm	Fixed several bugs in fsys_gettimeofday().  Tuned it some more,
10 *			probably broke it along the way... ;-)
11 * 13-Jul-04 clameter   Implement fsys_clock_gettime and revise fsys_gettimeofday to make
12 *                      it capable of using memory based clocks without falling back to C code.
13 * 08-Feb-07 Fenghua Yu Implement fsys_getcpu.
14 *
15 */
16
17#include <asm/asmmacro.h>
18#include <asm/errno.h>
19#include <asm/asm-offsets.h>
20#include <asm/percpu.h>
21#include <asm/thread_info.h>
22#include <asm/sal.h>
23#include <asm/signal.h>
24#include <asm/system.h>
25#include <asm/unistd.h>
26
27#include "entry.h"
28#include "paravirt_inst.h"
29
30/*
31 * See Documentation/ia64/fsys.txt for details on fsyscalls.
32 *
33 * On entry to an fsyscall handler:
34 *   r10	= 0 (i.e., defaults to "successful syscall return")
35 *   r11	= saved ar.pfs (a user-level value)
36 *   r15	= system call number
37 *   r16	= "current" task pointer (in normal kernel-mode, this is in r13)
38 *   r32-r39	= system call arguments
39 *   b6		= return address (a user-level value)
40 *   ar.pfs	= previous frame-state (a user-level value)
41 *   PSR.be	= cleared to zero (i.e., little-endian byte order is in effect)
42 *   all other registers may contain values passed in from user-mode
43 *
44 * On return from an fsyscall handler:
45 *   r11	= saved ar.pfs (as passed into the fsyscall handler)
46 *   r15	= system call number (as passed into the fsyscall handler)
47 *   r32-r39	= system call arguments (as passed into the fsyscall handler)
48 *   b6		= return address (as passed into the fsyscall handler)
49 *   ar.pfs	= previous frame-state (as passed into the fsyscall handler)
50 */
51
52ENTRY(fsys_ni_syscall)
53	.prologue
54	.altrp b6
55	.body
56	mov r8=ENOSYS
57	mov r10=-1
58	FSYS_RETURN
59END(fsys_ni_syscall)
60
61ENTRY(fsys_getpid)
62	.prologue
63	.altrp b6
64	.body
65	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
66	;;
67	ld8 r17=[r17]				// r17 = current->group_leader
68	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
69	;;
70	ld4 r9=[r9]
71	add r17=IA64_TASK_TGIDLINK_OFFSET,r17
72	;;
73	and r9=TIF_ALLWORK_MASK,r9
74	ld8 r17=[r17]				// r17 = current->group_leader->pids[PIDTYPE_PID].pid
75	;;
76	add r8=IA64_PID_LEVEL_OFFSET,r17
77	;;
78	ld4 r8=[r8]				// r8 = pid->level
79	add r17=IA64_PID_UPID_OFFSET,r17	// r17 = &pid->numbers[0]
80	;;
81	shl r8=r8,IA64_UPID_SHIFT
82	;;
83	add r17=r17,r8				// r17 = &pid->numbers[pid->level]
84	;;
85	ld4 r8=[r17]				// r8 = pid->numbers[pid->level].nr
86	;;
87	mov r17=0
88	;;
89	cmp.ne p8,p0=0,r9
90(p8)	br.spnt.many fsys_fallback_syscall
91	FSYS_RETURN
92END(fsys_getpid)
93
94ENTRY(fsys_getppid)
95	.prologue
96	.altrp b6
97	.body
98	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
99	;;
100	ld8 r17=[r17]				// r17 = current->group_leader
101	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
102	;;
103
104	ld4 r9=[r9]
105	add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
106	;;
107	and r9=TIF_ALLWORK_MASK,r9
108
1091:	ld8 r18=[r17]				// r18 = current->group_leader->real_parent
110	;;
111	cmp.ne p8,p0=0,r9
112	add r8=IA64_TASK_TGID_OFFSET,r18	// r8 = &current->group_leader->real_parent->tgid
113	;;
114
115	/*
116	 * The .acq is needed to ensure that the read of tgid has returned its data before
117	 * we re-check "real_parent".
118	 */
119	ld4.acq r8=[r8]				// r8 = current->group_leader->real_parent->tgid
120#ifdef CONFIG_SMP
121	/*
122	 * Re-read current->group_leader->real_parent.
123	 */
124	ld8 r19=[r17]				// r19 = current->group_leader->real_parent
125(p8)	br.spnt.many fsys_fallback_syscall
126	;;
127	cmp.ne p6,p0=r18,r19			// did real_parent change?
128	mov r19=0			// i must not leak kernel bits...
129(p6)	br.cond.spnt.few 1b			// yes -> redo the read of tgid and the check
130	;;
131	mov r17=0			// i must not leak kernel bits...
132	mov r18=0			// i must not leak kernel bits...
133#else
134	mov r17=0			// i must not leak kernel bits...
135	mov r18=0			// i must not leak kernel bits...
136	mov r19=0			// i must not leak kernel bits...
137#endif
138	FSYS_RETURN
139END(fsys_getppid)
140
141ENTRY(fsys_set_tid_address)
142	.prologue
143	.altrp b6
144	.body
145	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
146	add r17=IA64_TASK_TGIDLINK_OFFSET,r16
147	;;
148	ld4 r9=[r9]
149	tnat.z p6,p7=r32		// check argument register for being NaT
150	ld8 r17=[r17]				// r17 = current->pids[PIDTYPE_PID].pid
151	;;
152	and r9=TIF_ALLWORK_MASK,r9
153	add r8=IA64_PID_LEVEL_OFFSET,r17
154	add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
155	;;
156	ld4 r8=[r8]				// r8 = pid->level
157	add r17=IA64_PID_UPID_OFFSET,r17	// r17 = &pid->numbers[0]
158	;;
159	shl r8=r8,IA64_UPID_SHIFT
160	;;
161	add r17=r17,r8				// r17 = &pid->numbers[pid->level]
162	;;
163	ld4 r8=[r17]				// r8 = pid->numbers[pid->level].nr
164	;;
165	cmp.ne p8,p0=0,r9
166	mov r17=-1
167	;;
168(p6)	st8 [r18]=r32
169(p7)	st8 [r18]=r17
170(p8)	br.spnt.many fsys_fallback_syscall
171	;;
172	mov r17=0			// i must not leak kernel bits...
173	mov r18=0			// i must not leak kernel bits...
174	FSYS_RETURN
175END(fsys_set_tid_address)
176
177#if IA64_GTOD_LOCK_OFFSET !=0
178#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t
179#endif
180#if IA64_ITC_JITTER_OFFSET !=0
181#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t
182#endif
183#define CLOCK_REALTIME 0
184#define CLOCK_MONOTONIC 1
185#define CLOCK_DIVIDE_BY_1000 0x4000
186#define CLOCK_ADD_MONOTONIC 0x8000
187
188ENTRY(fsys_gettimeofday)
189	.prologue
190	.altrp b6
191	.body
192	mov r31 = r32
193	tnat.nz p6,p0 = r33		// guard against NaT argument
194(p6)    br.cond.spnt.few .fail_einval
195	mov r30 = CLOCK_DIVIDE_BY_1000
196	;;
197.gettime:
198	// Register map
199	// Incoming r31 = pointer to address where to place result
200	//          r30 = flags determining how time is processed
201	// r2,r3 = temp r4-r7 preserved
202	// r8 = result nanoseconds
203	// r9 = result seconds
204	// r10 = temporary storage for clock difference
205	// r11 = preserved: saved ar.pfs
206	// r12 = preserved: memory stack
207	// r13 = preserved: thread pointer
208	// r14 = address of mask / mask value
209	// r15 = preserved: system call number
210	// r16 = preserved: current task pointer
211	// r17 = (not used)
212	// r18 = (not used)
213	// r19 = address of itc_lastcycle
214	// r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence)
215	// r21 = address of mmio_ptr
216	// r22 = address of wall_time or monotonic_time
217	// r23 = address of shift / value
218	// r24 = address mult factor / cycle_last value
219	// r25 = itc_lastcycle value
220	// r26 = address clocksource cycle_last
221	// r27 = (not used)
222	// r28 = sequence number at the beginning of critcal section
223	// r29 = address of itc_jitter
224	// r30 = time processing flags / memory address
225	// r31 = pointer to result
226	// Predicates
227	// p6,p7 short term use
228	// p8 = timesource ar.itc
229	// p9 = timesource mmio64
230	// p10 = timesource mmio32 - not used
231	// p11 = timesource not to be handled by asm code
232	// p12 = memory time source ( = p9 | p10) - not used
233	// p13 = do cmpxchg with itc_lastcycle
234	// p14 = Divide by 1000
235	// p15 = Add monotonic
236	//
237	// Note that instructions are optimized for McKinley. McKinley can
238	// process two bundles simultaneously and therefore we continuously
239	// try to feed the CPU two bundles and then a stop.
240
241	add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
242	tnat.nz p6,p0 = r31		// guard against Nat argument
243(p6)	br.cond.spnt.few .fail_einval
244	movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
245	;;
246	ld4 r2 = [r2]			// process work pending flags
247	movl r29 = itc_jitter_data	// itc_jitter
248	add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20	// wall_time
249	add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
250	mov pr = r30,0xc000	// Set predicates according to function
251	;;
252	and r2 = TIF_ALLWORK_MASK,r2
253	add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
254(p15)	add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20	// monotonic_time
255	;;
256	add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20	// clksrc_cycle_last
257	cmp.ne p6, p0 = 0, r2	// Fallback if work is scheduled
258(p6)	br.cond.spnt.many fsys_fallback_syscall
259	;;
260	// Begin critical section
261.time_redo:
262	ld4.acq r28 = [r20]	// gtod_lock.sequence, Must take first
263	;;
264	and r28 = ~1,r28	// And make sequence even to force retry if odd
265	;;
266	ld8 r30 = [r21]		// clocksource->mmio_ptr
267	add r24 = IA64_CLKSRC_MULT_OFFSET,r20
268	ld4 r2 = [r29]		// itc_jitter value
269	add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20
270	add r14 = IA64_CLKSRC_MASK_OFFSET,r20
271	;;
272	ld4 r3 = [r24]		// clocksource mult value
273	ld8 r14 = [r14]         // clocksource mask value
274	cmp.eq p8,p9 = 0,r30	// use cpu timer if no mmio_ptr
275	;;
276	setf.sig f7 = r3	// Setup for mult scaling of counter
277(p8)	cmp.ne p13,p0 = r2,r0	// need itc_jitter compensation, set p13
278	ld4 r23 = [r23]		// clocksource shift value
279	ld8 r24 = [r26]		// get clksrc_cycle_last value
280(p9)	cmp.eq p13,p0 = 0,r30	// if mmio_ptr, clear p13 jitter control
281	;;
282	.pred.rel.mutex p8,p9
283	MOV_FROM_ITC(p8, p6, r2, r10)	// CPU_TIMER. 36 clocks latency!!!
284(p9)	ld8 r2 = [r30]		// MMIO_TIMER. Could also have latency issues..
285(p13)	ld8 r25 = [r19]		// get itc_lastcycle value
286	ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_sec
287	;;
288	ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_nsec
289(p13)	sub r3 = r25,r2		// Diff needed before comparison (thanks davidm)
290	;;
291(p13)	cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
292	sub r10 = r2,r24	// current_cycle - last_cycle
293	;;
294(p6)	sub r10 = r25,r24	// time we got was less than last_cycle
295(p7)	mov ar.ccv = r25	// more than last_cycle. Prep for cmpxchg
296	;;
297(p7)	cmpxchg8.rel r3 = [r19],r2,ar.ccv
298	;;
299(p7)	cmp.ne p7,p0 = r25,r3	// if cmpxchg not successful
300	;;
301(p7)	sub r10 = r3,r24	// then use new last_cycle instead
302	;;
303	and r10 = r10,r14	// Apply mask
304	;;
305	setf.sig f8 = r10
306	nop.i 123
307	;;
308	// fault check takes 5 cycles and we have spare time
309EX(.fail_efault, probe.w.fault r31, 3)
310	xmpy.l f8 = f8,f7	// nsec_per_cyc*(counter-last_counter)
311	;;
312	getf.sig r2 = f8
313	mf
314	;;
315	ld4 r10 = [r20]		// gtod_lock.sequence
316	shr.u r2 = r2,r23	// shift by factor
317	;;
318	add r8 = r8,r2		// Add xtime.nsecs
319	cmp4.ne p7,p0 = r28,r10
320(p7)	br.cond.dpnt.few .time_redo	// sequence number changed, redo
321	// End critical section.
322	// Now r8=tv->tv_nsec and r9=tv->tv_sec
323	mov r10 = r0
324	movl r2 = 1000000000
325	add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
326(p14)	movl r3 = 2361183241434822607	// Prep for / 1000 hack
327	;;
328.time_normalize:
329	mov r21 = r8
330	cmp.ge p6,p0 = r8,r2
331(p14)	shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time
332	;;
333(p14)	setf.sig f8 = r20
334(p6)	sub r8 = r8,r2
335(p6)	add r9 = 1,r9		// two nops before the branch.
336(p14)	setf.sig f7 = r3	// Chances for repeats are 1 in 10000 for gettod
337(p6)	br.cond.dpnt.few .time_normalize
338	;;
339	// Divided by 8 though shift. Now divide by 125
340	// The compiler was able to do that with a multiply
341	// and a shift and we do the same
342EX(.fail_efault, probe.w.fault r23, 3)	// This also costs 5 cycles
343(p14)	xmpy.hu f8 = f8, f7		// xmpy has 5 cycles latency so use it
344	;;
345(p14)	getf.sig r2 = f8
346	;;
347	mov r8 = r0
348(p14)	shr.u r21 = r2, 4
349	;;
350EX(.fail_efault, st8 [r31] = r9)
351EX(.fail_efault, st8 [r23] = r21)
352	FSYS_RETURN
353.fail_einval:
354	mov r8 = EINVAL
355	mov r10 = -1
356	FSYS_RETURN
357.fail_efault:
358	mov r8 = EFAULT
359	mov r10 = -1
360	FSYS_RETURN
361END(fsys_gettimeofday)
362
363ENTRY(fsys_clock_gettime)
364	.prologue
365	.altrp b6
366	.body
367	cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
368	// Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
369(p6)	br.spnt.few fsys_fallback_syscall
370	mov r31 = r33
371	shl r30 = r32,15
372	br.many .gettime
373END(fsys_clock_gettime)
374
375/*
376 * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
377 */
378#if _NSIG_WORDS != 1
379# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
380#endif
381ENTRY(fsys_rt_sigprocmask)
382	.prologue
383	.altrp b6
384	.body
385
386	add r2=IA64_TASK_BLOCKED_OFFSET,r16
387	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
388	cmp4.ltu p6,p0=SIG_SETMASK,r32
389
390	cmp.ne p15,p0=r0,r34			// oset != NULL?
391	tnat.nz p8,p0=r34
392	add r31=IA64_TASK_SIGHAND_OFFSET,r16
393	;;
394	ld8 r3=[r2]				// read/prefetch current->blocked
395	ld4 r9=[r9]
396	tnat.nz.or p6,p0=r35
397
398	cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
399	tnat.nz.or p6,p0=r32
400(p6)	br.spnt.few .fail_einval		// fail with EINVAL
401	;;
402#ifdef CONFIG_SMP
403	ld8 r31=[r31]				// r31 <- current->sighand
404#endif
405	and r9=TIF_ALLWORK_MASK,r9
406	tnat.nz.or p8,p0=r33
407	;;
408	cmp.ne p7,p0=0,r9
409	cmp.eq p6,p0=r0,r33			// set == NULL?
410	add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31	// r31 <- current->sighand->siglock
411(p8)	br.spnt.few .fail_efault		// fail with EFAULT
412(p7)	br.spnt.many fsys_fallback_syscall	// got pending kernel work...
413(p6)	br.dpnt.many .store_mask		// -> short-circuit to just reading the signal mask
414
415	/* Argh, we actually have to do some work and _update_ the signal mask: */
416
417EX(.fail_efault, probe.r.fault r33, 3)		// verify user has read-access to *set
418EX(.fail_efault, ld8 r14=[r33])			// r14 <- *set
419	mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
420	;;
421
422	RSM_PSR_I(p0, r18, r19)			// mask interrupt delivery
423	mov ar.ccv=0
424	andcm r14=r14,r17			// filter out SIGKILL & SIGSTOP
425
426#ifdef CONFIG_SMP
427	mov r17=1
428	;;
429	cmpxchg4.acq r18=[r31],r17,ar.ccv	// try to acquire the lock
430	mov r8=EINVAL			// default to EINVAL
431	;;
432	ld8 r3=[r2]			// re-read current->blocked now that we hold the lock
433	cmp4.ne p6,p0=r18,r0
434(p6)	br.cond.spnt.many .lock_contention
435	;;
436#else
437	ld8 r3=[r2]			// re-read current->blocked now that we hold the lock
438	mov r8=EINVAL			// default to EINVAL
439#endif
440	add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
441	add r19=IA64_TASK_SIGNAL_OFFSET,r16
442	cmp4.eq p6,p0=SIG_BLOCK,r32
443	;;
444	ld8 r19=[r19]			// r19 <- current->signal
445	cmp4.eq p7,p0=SIG_UNBLOCK,r32
446	cmp4.eq p8,p0=SIG_SETMASK,r32
447	;;
448	ld8 r18=[r18]			// r18 <- current->pending.signal
449	.pred.rel.mutex p6,p7,p8
450(p6)	or r14=r3,r14			// SIG_BLOCK
451(p7)	andcm r14=r3,r14		// SIG_UNBLOCK
452
453(p8)	mov r14=r14			// SIG_SETMASK
454(p6)	mov r8=0			// clear error code
455	// recalc_sigpending()
456	add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
457
458	add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
459	;;
460	ld4 r17=[r17]		// r17 <- current->signal->group_stop_count
461(p7)	mov r8=0		// clear error code
462
463	ld8 r19=[r19]		// r19 <- current->signal->shared_pending
464	;;
465	cmp4.gt p6,p7=r17,r0	// p6/p7 <- (current->signal->group_stop_count > 0)?
466(p8)	mov r8=0		// clear error code
467
468	or r18=r18,r19		// r18 <- current->pending | current->signal->shared_pending
469	;;
470	// r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
471	andcm r18=r18,r14
472	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
473	;;
474
475(p7)	cmp.ne.or.andcm p6,p7=r18,r0		// p6/p7 <- signal pending
476	mov r19=0					// i must not leak kernel bits...
477(p6)	br.cond.dpnt.many .sig_pending
478	;;
479
4801:	ld4 r17=[r9]				// r17 <- current->thread_info->flags
481	;;
482	mov ar.ccv=r17
483	and r18=~_TIF_SIGPENDING,r17		// r18 <- r17 & ~(1 << TIF_SIGPENDING)
484	;;
485
486	st8 [r2]=r14				// update current->blocked with new mask
487	cmpxchg4.acq r8=[r9],r18,ar.ccv		// current->thread_info->flags <- r18
488	;;
489	cmp.ne p6,p0=r17,r8			// update failed?
490(p6)	br.cond.spnt.few 1b			// yes -> retry
491
492#ifdef CONFIG_SMP
493	st4.rel [r31]=r0			// release the lock
494#endif
495	SSM_PSR_I(p0, p9, r31)
496	;;
497
498	srlz.d					// ensure psr.i is set again
499	mov r18=0					// i must not leak kernel bits...
500
501.store_mask:
502EX(.fail_efault, (p15) probe.w.fault r34, 3)	// verify user has write-access to *oset
503EX(.fail_efault, (p15) st8 [r34]=r3)
504	mov r2=0					// i must not leak kernel bits...
505	mov r3=0					// i must not leak kernel bits...
506	mov r8=0				// return 0
507	mov r9=0					// i must not leak kernel bits...
508	mov r14=0					// i must not leak kernel bits...
509	mov r17=0					// i must not leak kernel bits...
510	mov r31=0					// i must not leak kernel bits...
511	FSYS_RETURN
512
513.sig_pending:
514#ifdef CONFIG_SMP
515	st4.rel [r31]=r0			// release the lock
516#endif
517	SSM_PSR_I(p0, p9, r17)
518	;;
519	srlz.d
520	br.sptk.many fsys_fallback_syscall	// with signal pending, do the heavy-weight syscall
521
522#ifdef CONFIG_SMP
523.lock_contention:
524	/* Rather than spinning here, fall back on doing a heavy-weight syscall.  */
525	SSM_PSR_I(p0, p9, r17)
526	;;
527	srlz.d
528	br.sptk.many fsys_fallback_syscall
529#endif
530END(fsys_rt_sigprocmask)
531
532/*
533 * fsys_getcpu doesn't use the third parameter in this implementation. It reads
534 * current_thread_info()->cpu and corresponding node in cpu_to_node_map.
535 */
536ENTRY(fsys_getcpu)
537	.prologue
538	.altrp b6
539	.body
540	;;
541	add r2=TI_FLAGS+IA64_TASK_SIZE,r16
542	tnat.nz p6,p0 = r32			// guard against NaT argument
543	add r3=TI_CPU+IA64_TASK_SIZE,r16
544	;;
545	ld4 r3=[r3]				// M r3 = thread_info->cpu
546	ld4 r2=[r2]				// M r2 = thread_info->flags
547(p6)    br.cond.spnt.few .fail_einval		// B
548	;;
549	tnat.nz p7,p0 = r33			// I guard against NaT argument
550(p7)    br.cond.spnt.few .fail_einval		// B
551#ifdef CONFIG_NUMA
552	movl r17=cpu_to_node_map
553	;;
554EX(.fail_efault, probe.w.fault r32, 3)		// M This takes 5 cycles
555EX(.fail_efault, probe.w.fault r33, 3)		// M This takes 5 cycles
556	shladd r18=r3,1,r17
557	;;
558	ld2 r20=[r18]				// r20 = cpu_to_node_map[cpu]
559	and r2 = TIF_ALLWORK_MASK,r2
560	;;
561	cmp.ne p8,p0=0,r2
562(p8)	br.spnt.many fsys_fallback_syscall
563	;;
564	;;
565EX(.fail_efault, st4 [r32] = r3)
566EX(.fail_efault, st2 [r33] = r20)
567	mov r8=0
568	;;
569#else
570EX(.fail_efault, probe.w.fault r32, 3)		// M This takes 5 cycles
571EX(.fail_efault, probe.w.fault r33, 3)		// M This takes 5 cycles
572	and r2 = TIF_ALLWORK_MASK,r2
573	;;
574	cmp.ne p8,p0=0,r2
575(p8)	br.spnt.many fsys_fallback_syscall
576	;;
577EX(.fail_efault, st4 [r32] = r3)
578EX(.fail_efault, st2 [r33] = r0)
579	mov r8=0
580	;;
581#endif
582	FSYS_RETURN
583END(fsys_getcpu)
584
585ENTRY(fsys_fallback_syscall)
586	.prologue
587	.altrp b6
588	.body
589	/*
590	 * We only get here from light-weight syscall handlers.  Thus, we already
591	 * know that r15 contains a valid syscall number.  No need to re-check.
592	 */
593	adds r17=-1024,r15
594	movl r14=sys_call_table
595	;;
596	RSM_PSR_I(p0, r26, r27)
597	shladd r18=r17,3,r14
598	;;
599	ld8 r18=[r18]				// load normal (heavy-weight) syscall entry-point
600	MOV_FROM_PSR(p0, r29, r26)		// read psr (12 cyc load latency)
601	mov r27=ar.rsc
602	mov r21=ar.fpsr
603	mov r26=ar.pfs
604END(fsys_fallback_syscall)
605	/* FALL THROUGH */
606GLOBAL_ENTRY(paravirt_fsys_bubble_down)
607	.prologue
608	.altrp b6
609	.body
610	/*
611	 * We get here for syscalls that don't have a lightweight
612	 * handler.  For those, we need to bubble down into the kernel
613	 * and that requires setting up a minimal pt_regs structure,
614	 * and initializing the CPU state more or less as if an
615	 * interruption had occurred.  To make syscall-restarts work,
616	 * we setup pt_regs such that cr_iip points to the second
617	 * instruction in syscall_via_break.  Decrementing the IP
618	 * hence will restart the syscall via break and not
619	 * decrementing IP will return us to the caller, as usual.
620	 * Note that we preserve the value of psr.pp rather than
621	 * initializing it from dcr.pp.  This makes it possible to
622	 * distinguish fsyscall execution from other privileged
623	 * execution.
624	 *
625	 * On entry:
626	 *	- normal fsyscall handler register usage, except
627	 *	  that we also have:
628	 *	- r18: address of syscall entry point
629	 *	- r21: ar.fpsr
630	 *	- r26: ar.pfs
631	 *	- r27: ar.rsc
632	 *	- r29: psr
633	 *
634	 * We used to clear some PSR bits here but that requires slow
635	 * serialization.  Fortuntely, that isn't really necessary.
636	 * The rationale is as follows: we used to clear bits
637	 * ~PSR_PRESERVED_BITS in PSR.L.  Since
638	 * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
639	 * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
640	 * However,
641	 *
642	 * PSR.BE : already is turned off in __kernel_syscall_via_epc()
643	 * PSR.AC : don't care (kernel normally turns PSR.AC on)
644	 * PSR.I  : already turned off by the time paravirt_fsys_bubble_down gets
645	 *	    invoked
646	 * PSR.DFL: always 0 (kernel never turns it on)
647	 * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
648	 *	    initiative
649	 * PSR.DI : always 0 (kernel never turns it on)
650	 * PSR.SI : always 0 (kernel never turns it on)
651	 * PSR.DB : don't care --- kernel never enables kernel-level
652	 *	    breakpoints
653	 * PSR.TB : must be 0 already; if it wasn't zero on entry to
654	 *          __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down
655	 *          will trigger a taken branch; the taken-trap-handler then
656	 *          converts the syscall into a break-based system-call.
657	 */
658	/*
659	 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
660	 * The rest we have to synthesize.
661	 */
662#	define PSR_ONE_BITS		((3 << IA64_PSR_CPL0_BIT)	\
663					 | (0x1 << IA64_PSR_RI_BIT)	\
664					 | IA64_PSR_BN | IA64_PSR_I)
665
666	invala					// M0|1
667	movl r14=ia64_ret_from_syscall		// X
668
669	nop.m 0
670	movl r28=__kernel_syscall_via_break	// X	create cr.iip
671	;;
672
673	mov r2=r16				// A    get task addr to addl-addressable register
674	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
675	mov r31=pr				// I0   save pr (2 cyc)
676	;;
677	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
678	addl r22=IA64_RBS_OFFSET,r2		// A    compute base of RBS
679	add r3=TI_FLAGS+IA64_TASK_SIZE,r2	// A
680	;;
681	ld4 r3=[r3]				// M0|1 r3 = current_thread_info()->flags
682	lfetch.fault.excl.nt1 [r22]		// M0|1 prefetch register backing-store
683	nop.i 0
684	;;
685	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
686#ifdef CONFIG_VIRT_CPU_ACCOUNTING
687	MOV_FROM_ITC(p0, p6, r30, r23)		// M    get cycle for accounting
688#else
689	nop.m 0
690#endif
691	nop.i 0
692	;;
693	mov r23=ar.bspstore			// M2 (12 cyc) save ar.bspstore
694	mov.m r24=ar.rnat			// M2 (5 cyc) read ar.rnat (dual-issues!)
695	nop.i 0
696	;;
697	mov ar.bspstore=r22			// M2 (6 cyc) switch to kernel RBS
698	movl r8=PSR_ONE_BITS			// X
699	;;
700	mov r25=ar.unat				// M2 (5 cyc) save ar.unat
701	mov r19=b6				// I0   save b6 (2 cyc)
702	mov r20=r1				// A    save caller's gp in r20
703	;;
704	or r29=r8,r29				// A    construct cr.ipsr value to save
705	mov b6=r18				// I0   copy syscall entry-point to b6 (7 cyc)
706	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
707
708	mov r18=ar.bsp				// M2   save (kernel) ar.bsp (12 cyc)
709	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
710	br.call.sptk.many b7=ia64_syscall_setup	// B
711	;;
712#ifdef CONFIG_VIRT_CPU_ACCOUNTING
713	// mov.m r30=ar.itc is called in advance
714	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
715	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
716	;;
717	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
718	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at leave kernel
719	;;
720	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
721	ld8 r21=[r17]				// cumulated utime
722	sub r22=r19,r18				// stime before leave kernel
723	;;
724	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// update stamp
725	sub r18=r30,r19				// elapsed time in user mode
726	;;
727	add r20=r20,r22				// sum stime
728	add r21=r21,r18				// sum utime
729	;;
730	st8 [r16]=r20				// update stime
731	st8 [r17]=r21				// update utime
732	;;
733#endif
734	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
735	mov rp=r14				// I0   set the real return addr
736	and r3=_TIF_SYSCALL_TRACEAUDIT,r3	// A
737	;;
738	SSM_PSR_I(p0, p6, r22)			// M2   we're on kernel stacks now, reenable irqs
739	cmp.eq p8,p0=r3,r0			// A
740(p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
741
742	nop.m 0
743(p8)	br.call.sptk.many b6=b6			// B    (ignore return address)
744	br.cond.spnt ia64_trace_syscall		// B
745END(paravirt_fsys_bubble_down)
746
747	.rodata
748	.align 8
749	.globl paravirt_fsyscall_table
750
751	data8 paravirt_fsys_bubble_down
752paravirt_fsyscall_table:
753	data8 fsys_ni_syscall
754	data8 0				// exit			// 1025
755	data8 0				// read
756	data8 0				// write
757	data8 0				// open
758	data8 0				// close
759	data8 0				// creat		// 1030
760	data8 0				// link
761	data8 0				// unlink
762	data8 0				// execve
763	data8 0				// chdir
764	data8 0				// fchdir		// 1035
765	data8 0				// utimes
766	data8 0				// mknod
767	data8 0				// chmod
768	data8 0				// chown
769	data8 0				// lseek		// 1040
770	data8 fsys_getpid		// getpid
771	data8 fsys_getppid		// getppid
772	data8 0				// mount
773	data8 0				// umount
774	data8 0				// setuid		// 1045
775	data8 0				// getuid
776	data8 0				// geteuid
777	data8 0				// ptrace
778	data8 0				// access
779	data8 0				// sync			// 1050
780	data8 0				// fsync
781	data8 0				// fdatasync
782	data8 0				// kill
783	data8 0				// rename
784	data8 0				// mkdir		// 1055
785	data8 0				// rmdir
786	data8 0				// dup
787	data8 0				// pipe
788	data8 0				// times
789	data8 0				// brk			// 1060
790	data8 0				// setgid
791	data8 0				// getgid
792	data8 0				// getegid
793	data8 0				// acct
794	data8 0				// ioctl		// 1065
795	data8 0				// fcntl
796	data8 0				// umask
797	data8 0				// chroot
798	data8 0				// ustat
799	data8 0				// dup2			// 1070
800	data8 0				// setreuid
801	data8 0				// setregid
802	data8 0				// getresuid
803	data8 0				// setresuid
804	data8 0				// getresgid		// 1075
805	data8 0				// setresgid
806	data8 0				// getgroups
807	data8 0				// setgroups
808	data8 0				// getpgid
809	data8 0				// setpgid		// 1080
810	data8 0				// setsid
811	data8 0				// getsid
812	data8 0				// sethostname
813	data8 0				// setrlimit
814	data8 0				// getrlimit		// 1085
815	data8 0				// getrusage
816	data8 fsys_gettimeofday		// gettimeofday
817	data8 0				// settimeofday
818	data8 0				// select
819	data8 0				// poll			// 1090
820	data8 0				// symlink
821	data8 0				// readlink
822	data8 0				// uselib
823	data8 0				// swapon
824	data8 0				// swapoff		// 1095
825	data8 0				// reboot
826	data8 0				// truncate
827	data8 0				// ftruncate
828	data8 0				// fchmod
829	data8 0				// fchown		// 1100
830	data8 0				// getpriority
831	data8 0				// setpriority
832	data8 0				// statfs
833	data8 0				// fstatfs
834	data8 0				// gettid		// 1105
835	data8 0				// semget
836	data8 0				// semop
837	data8 0				// semctl
838	data8 0				// msgget
839	data8 0				// msgsnd		// 1110
840	data8 0				// msgrcv
841	data8 0				// msgctl
842	data8 0				// shmget
843	data8 0				// shmat
844	data8 0				// shmdt		// 1115
845	data8 0				// shmctl
846	data8 0				// syslog
847	data8 0				// setitimer
848	data8 0				// getitimer
849	data8 0					 		// 1120
850	data8 0
851	data8 0
852	data8 0				// vhangup
853	data8 0				// lchown
854	data8 0				// remap_file_pages	// 1125
855	data8 0				// wait4
856	data8 0				// sysinfo
857	data8 0				// clone
858	data8 0				// setdomainname
859	data8 0				// newuname		// 1130
860	data8 0				// adjtimex
861	data8 0
862	data8 0				// init_module
863	data8 0				// delete_module
864	data8 0							// 1135
865	data8 0
866	data8 0				// quotactl
867	data8 0				// bdflush
868	data8 0				// sysfs
869	data8 0				// personality		// 1140
870	data8 0				// afs_syscall
871	data8 0				// setfsuid
872	data8 0				// setfsgid
873	data8 0				// getdents
874	data8 0				// flock		// 1145
875	data8 0				// readv
876	data8 0				// writev
877	data8 0				// pread64
878	data8 0				// pwrite64
879	data8 0				// sysctl		// 1150
880	data8 0				// mmap
881	data8 0				// munmap
882	data8 0				// mlock
883	data8 0				// mlockall
884	data8 0				// mprotect		// 1155
885	data8 0				// mremap
886	data8 0				// msync
887	data8 0				// munlock
888	data8 0				// munlockall
889	data8 0				// sched_getparam	// 1160
890	data8 0				// sched_setparam
891	data8 0				// sched_getscheduler
892	data8 0				// sched_setscheduler
893	data8 0				// sched_yield
894	data8 0				// sched_get_priority_max	// 1165
895	data8 0				// sched_get_priority_min
896	data8 0				// sched_rr_get_interval
897	data8 0				// nanosleep
898	data8 0				// nfsservctl
899	data8 0				// prctl		// 1170
900	data8 0				// getpagesize
901	data8 0				// mmap2
902	data8 0				// pciconfig_read
903	data8 0				// pciconfig_write
904	data8 0				// perfmonctl		// 1175
905	data8 0				// sigaltstack
906	data8 0				// rt_sigaction
907	data8 0				// rt_sigpending
908	data8 fsys_rt_sigprocmask	// rt_sigprocmask
909	data8 0				// rt_sigqueueinfo	// 1180
910	data8 0				// rt_sigreturn
911	data8 0				// rt_sigsuspend
912	data8 0				// rt_sigtimedwait
913	data8 0				// getcwd
914	data8 0				// capget		// 1185
915	data8 0				// capset
916	data8 0				// sendfile
917	data8 0
918	data8 0
919	data8 0				// socket		// 1190
920	data8 0				// bind
921	data8 0				// connect
922	data8 0				// listen
923	data8 0				// accept
924	data8 0				// getsockname		// 1195
925	data8 0				// getpeername
926	data8 0				// socketpair
927	data8 0				// send
928	data8 0				// sendto
929	data8 0				// recv			// 1200
930	data8 0				// recvfrom
931	data8 0				// shutdown
932	data8 0				// setsockopt
933	data8 0				// getsockopt
934	data8 0				// sendmsg		// 1205
935	data8 0				// recvmsg
936	data8 0				// pivot_root
937	data8 0				// mincore
938	data8 0				// madvise
939	data8 0				// newstat		// 1210
940	data8 0				// newlstat
941	data8 0				// newfstat
942	data8 0				// clone2
943	data8 0				// getdents64
944	data8 0				// getunwind		// 1215
945	data8 0				// readahead
946	data8 0				// setxattr
947	data8 0				// lsetxattr
948	data8 0				// fsetxattr
949	data8 0				// getxattr		// 1220
950	data8 0				// lgetxattr
951	data8 0				// fgetxattr
952	data8 0				// listxattr
953	data8 0				// llistxattr
954	data8 0				// flistxattr		// 1225
955	data8 0				// removexattr
956	data8 0				// lremovexattr
957	data8 0				// fremovexattr
958	data8 0				// tkill
959	data8 0				// futex		// 1230
960	data8 0				// sched_setaffinity
961	data8 0				// sched_getaffinity
962	data8 fsys_set_tid_address	// set_tid_address
963	data8 0				// fadvise64_64
964	data8 0				// tgkill		// 1235
965	data8 0				// exit_group
966	data8 0				// lookup_dcookie
967	data8 0				// io_setup
968	data8 0				// io_destroy
969	data8 0				// io_getevents		// 1240
970	data8 0				// io_submit
971	data8 0				// io_cancel
972	data8 0				// epoll_create
973	data8 0				// epoll_ctl
974	data8 0				// epoll_wait		// 1245
975	data8 0				// restart_syscall
976	data8 0				// semtimedop
977	data8 0				// timer_create
978	data8 0				// timer_settime
979	data8 0				// timer_gettime 	// 1250
980	data8 0				// timer_getoverrun
981	data8 0				// timer_delete
982	data8 0				// clock_settime
983	data8 fsys_clock_gettime	// clock_gettime
984	data8 0				// clock_getres		// 1255
985	data8 0				// clock_nanosleep
986	data8 0				// fstatfs64
987	data8 0				// statfs64
988	data8 0				// mbind
989	data8 0				// get_mempolicy	// 1260
990	data8 0				// set_mempolicy
991	data8 0				// mq_open
992	data8 0				// mq_unlink
993	data8 0				// mq_timedsend
994	data8 0				// mq_timedreceive	// 1265
995	data8 0				// mq_notify
996	data8 0				// mq_getsetattr
997	data8 0				// kexec_load
998	data8 0				// vserver
999	data8 0				// waitid		// 1270
1000	data8 0				// add_key
1001	data8 0				// request_key
1002	data8 0				// keyctl
1003	data8 0				// ioprio_set
1004	data8 0				// ioprio_get		// 1275
1005	data8 0				// move_pages
1006	data8 0				// inotify_init
1007	data8 0				// inotify_add_watch
1008	data8 0				// inotify_rm_watch
1009	data8 0				// migrate_pages	// 1280
1010	data8 0				// openat
1011	data8 0				// mkdirat
1012	data8 0				// mknodat
1013	data8 0				// fchownat
1014	data8 0				// futimesat		// 1285
1015	data8 0				// newfstatat
1016	data8 0				// unlinkat
1017	data8 0				// renameat
1018	data8 0				// linkat
1019	data8 0				// symlinkat		// 1290
1020	data8 0				// readlinkat
1021	data8 0				// fchmodat
1022	data8 0				// faccessat
1023	data8 0
1024	data8 0							// 1295
1025	data8 0				// unshare
1026	data8 0				// splice
1027	data8 0				// set_robust_list
1028	data8 0				// get_robust_list
1029	data8 0				// sync_file_range	// 1300
1030	data8 0				// tee
1031	data8 0				// vmsplice
1032	data8 0
1033	data8 fsys_getcpu		// getcpu		// 1304
1034
1035	// fill in zeros for the remaining entries
1036	.zero:
1037	.space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0
1038