xref: /openbmc/linux/arch/arm64/include/asm/assembler.h (revision 4ee812f6)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S
4  *
5  * Copyright (C) 1996-2000 Russell King
6  * Copyright (C) 2012 ARM Ltd.
7  */
8 #ifndef __ASSEMBLY__
9 #error "Only include this from assembly code"
10 #endif
11 
12 #ifndef __ASM_ASSEMBLER_H
13 #define __ASM_ASSEMBLER_H
14 
15 #include <asm-generic/export.h>
16 
17 #include <asm/asm-offsets.h>
18 #include <asm/cpufeature.h>
19 #include <asm/cputype.h>
20 #include <asm/debug-monitors.h>
21 #include <asm/page.h>
22 #include <asm/pgtable-hwdef.h>
23 #include <asm/ptrace.h>
24 #include <asm/thread_info.h>
25 
26 	.macro save_and_disable_daif, flags
27 	mrs	\flags, daif
28 	msr	daifset, #0xf
29 	.endm
30 
31 	.macro disable_daif
32 	msr	daifset, #0xf
33 	.endm
34 
35 	.macro enable_daif
36 	msr	daifclr, #0xf
37 	.endm
38 
39 	.macro	restore_daif, flags:req
40 	msr	daif, \flags
41 	.endm
42 
43 	/* Only on aarch64 pstate, PSR_D_BIT is different for aarch32 */
44 	.macro	inherit_daif, pstate:req, tmp:req
45 	and	\tmp, \pstate, #(PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
46 	msr	daif, \tmp
47 	.endm
48 
49 	/* IRQ is the lowest priority flag, unconditionally unmask the rest. */
50 	.macro enable_da_f
51 	msr	daifclr, #(8 | 4 | 1)
52 	.endm
53 
54 /*
55  * Save/restore interrupts.
56  */
57 	.macro	save_and_disable_irq, flags
58 	mrs	\flags, daif
59 	msr	daifset, #2
60 	.endm
61 
62 	.macro	restore_irq, flags
63 	msr	daif, \flags
64 	.endm
65 
66 	.macro	enable_dbg
67 	msr	daifclr, #8
68 	.endm
69 
70 	.macro	disable_step_tsk, flgs, tmp
71 	tbz	\flgs, #TIF_SINGLESTEP, 9990f
72 	mrs	\tmp, mdscr_el1
73 	bic	\tmp, \tmp, #DBG_MDSCR_SS
74 	msr	mdscr_el1, \tmp
75 	isb	// Synchronise with enable_dbg
76 9990:
77 	.endm
78 
79 	/* call with daif masked */
80 	.macro	enable_step_tsk, flgs, tmp
81 	tbz	\flgs, #TIF_SINGLESTEP, 9990f
82 	mrs	\tmp, mdscr_el1
83 	orr	\tmp, \tmp, #DBG_MDSCR_SS
84 	msr	mdscr_el1, \tmp
85 9990:
86 	.endm
87 
88 /*
89  * SMP data memory barrier
90  */
91 	.macro	smp_dmb, opt
92 	dmb	\opt
93 	.endm
94 
95 /*
96  * RAS Error Synchronization barrier
97  */
98 	.macro  esb
99 #ifdef CONFIG_ARM64_RAS_EXTN
100 	hint    #16
101 #else
102 	nop
103 #endif
104 	.endm
105 
106 /*
107  * Value prediction barrier
108  */
109 	.macro	csdb
110 	hint	#20
111 	.endm
112 
113 /*
114  * Speculation barrier
115  */
116 	.macro	sb
117 alternative_if_not ARM64_HAS_SB
118 	dsb	nsh
119 	isb
120 alternative_else
121 	SB_BARRIER_INSN
122 	nop
123 alternative_endif
124 	.endm
125 
126 /*
127  * NOP sequence
128  */
129 	.macro	nops, num
130 	.rept	\num
131 	nop
132 	.endr
133 	.endm
134 
135 /*
136  * Emit an entry into the exception table
137  */
138 	.macro		_asm_extable, from, to
139 	.pushsection	__ex_table, "a"
140 	.align		3
141 	.long		(\from - .), (\to - .)
142 	.popsection
143 	.endm
144 
145 #define USER(l, x...)				\
146 9999:	x;					\
147 	_asm_extable	9999b, l
148 
149 /*
150  * Register aliases.
151  */
152 lr	.req	x30		// link register
153 
154 /*
155  * Vector entry
156  */
157 	 .macro	ventry	label
158 	.align	7
159 	b	\label
160 	.endm
161 
162 /*
163  * Select code when configured for BE.
164  */
165 #ifdef CONFIG_CPU_BIG_ENDIAN
166 #define CPU_BE(code...) code
167 #else
168 #define CPU_BE(code...)
169 #endif
170 
171 /*
172  * Select code when configured for LE.
173  */
174 #ifdef CONFIG_CPU_BIG_ENDIAN
175 #define CPU_LE(code...)
176 #else
177 #define CPU_LE(code...) code
178 #endif
179 
180 /*
181  * Define a macro that constructs a 64-bit value by concatenating two
182  * 32-bit registers. Note that on big endian systems the order of the
183  * registers is swapped.
184  */
185 #ifndef CONFIG_CPU_BIG_ENDIAN
186 	.macro	regs_to_64, rd, lbits, hbits
187 #else
188 	.macro	regs_to_64, rd, hbits, lbits
189 #endif
190 	orr	\rd, \lbits, \hbits, lsl #32
191 	.endm
192 
193 /*
194  * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
195  * <symbol> is within the range +/- 4 GB of the PC.
196  */
197 	/*
198 	 * @dst: destination register (64 bit wide)
199 	 * @sym: name of the symbol
200 	 */
201 	.macro	adr_l, dst, sym
202 	adrp	\dst, \sym
203 	add	\dst, \dst, :lo12:\sym
204 	.endm
205 
206 	/*
207 	 * @dst: destination register (32 or 64 bit wide)
208 	 * @sym: name of the symbol
209 	 * @tmp: optional 64-bit scratch register to be used if <dst> is a
210 	 *       32-bit wide register, in which case it cannot be used to hold
211 	 *       the address
212 	 */
213 	.macro	ldr_l, dst, sym, tmp=
214 	.ifb	\tmp
215 	adrp	\dst, \sym
216 	ldr	\dst, [\dst, :lo12:\sym]
217 	.else
218 	adrp	\tmp, \sym
219 	ldr	\dst, [\tmp, :lo12:\sym]
220 	.endif
221 	.endm
222 
223 	/*
224 	 * @src: source register (32 or 64 bit wide)
225 	 * @sym: name of the symbol
226 	 * @tmp: mandatory 64-bit scratch register to calculate the address
227 	 *       while <src> needs to be preserved.
228 	 */
229 	.macro	str_l, src, sym, tmp
230 	adrp	\tmp, \sym
231 	str	\src, [\tmp, :lo12:\sym]
232 	.endm
233 
234 	/*
235 	 * @dst: Result of per_cpu(sym, smp_processor_id()) (can be SP)
236 	 * @sym: The name of the per-cpu variable
237 	 * @tmp: scratch register
238 	 */
239 	.macro adr_this_cpu, dst, sym, tmp
240 	adrp	\tmp, \sym
241 	add	\dst, \tmp, #:lo12:\sym
242 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
243 	mrs	\tmp, tpidr_el1
244 alternative_else
245 	mrs	\tmp, tpidr_el2
246 alternative_endif
247 	add	\dst, \dst, \tmp
248 	.endm
249 
250 	/*
251 	 * @dst: Result of READ_ONCE(per_cpu(sym, smp_processor_id()))
252 	 * @sym: The name of the per-cpu variable
253 	 * @tmp: scratch register
254 	 */
255 	.macro ldr_this_cpu dst, sym, tmp
256 	adr_l	\dst, \sym
257 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
258 	mrs	\tmp, tpidr_el1
259 alternative_else
260 	mrs	\tmp, tpidr_el2
261 alternative_endif
262 	ldr	\dst, [\dst, \tmp]
263 	.endm
264 
265 /*
266  * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
267  */
268 	.macro	vma_vm_mm, rd, rn
269 	ldr	\rd, [\rn, #VMA_VM_MM]
270 	.endm
271 
272 /*
273  * mmid - get context id from mm pointer (mm->context.id)
274  */
275 	.macro	mmid, rd, rn
276 	ldr	\rd, [\rn, #MM_CONTEXT_ID]
277 	.endm
278 /*
279  * read_ctr - read CTR_EL0. If the system has mismatched register fields,
280  * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val
281  */
282 	.macro	read_ctr, reg
283 alternative_if_not ARM64_MISMATCHED_CACHE_TYPE
284 	mrs	\reg, ctr_el0			// read CTR
285 	nop
286 alternative_else
287 	ldr_l	\reg, arm64_ftr_reg_ctrel0 + ARM64_FTR_SYSVAL
288 alternative_endif
289 	.endm
290 
291 
292 /*
293  * raw_dcache_line_size - get the minimum D-cache line size on this CPU
294  * from the CTR register.
295  */
296 	.macro	raw_dcache_line_size, reg, tmp
297 	mrs	\tmp, ctr_el0			// read CTR
298 	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
299 	mov	\reg, #4			// bytes per word
300 	lsl	\reg, \reg, \tmp		// actual cache line size
301 	.endm
302 
303 /*
304  * dcache_line_size - get the safe D-cache line size across all CPUs
305  */
306 	.macro	dcache_line_size, reg, tmp
307 	read_ctr	\tmp
308 	ubfm		\tmp, \tmp, #16, #19	// cache line size encoding
309 	mov		\reg, #4		// bytes per word
310 	lsl		\reg, \reg, \tmp	// actual cache line size
311 	.endm
312 
313 /*
314  * raw_icache_line_size - get the minimum I-cache line size on this CPU
315  * from the CTR register.
316  */
317 	.macro	raw_icache_line_size, reg, tmp
318 	mrs	\tmp, ctr_el0			// read CTR
319 	and	\tmp, \tmp, #0xf		// cache line size encoding
320 	mov	\reg, #4			// bytes per word
321 	lsl	\reg, \reg, \tmp		// actual cache line size
322 	.endm
323 
324 /*
325  * icache_line_size - get the safe I-cache line size across all CPUs
326  */
327 	.macro	icache_line_size, reg, tmp
328 	read_ctr	\tmp
329 	and		\tmp, \tmp, #0xf	// cache line size encoding
330 	mov		\reg, #4		// bytes per word
331 	lsl		\reg, \reg, \tmp	// actual cache line size
332 	.endm
333 
334 /*
335  * tcr_set_t0sz - update TCR.T0SZ so that we can load the ID map
336  */
337 	.macro	tcr_set_t0sz, valreg, t0sz
338 	bfi	\valreg, \t0sz, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
339 	.endm
340 
341 /*
342  * tcr_set_t1sz - update TCR.T1SZ
343  */
344 	.macro	tcr_set_t1sz, valreg, t1sz
345 	bfi	\valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH
346 	.endm
347 
348 /*
349  * tcr_compute_pa_size - set TCR.(I)PS to the highest supported
350  * ID_AA64MMFR0_EL1.PARange value
351  *
352  *	tcr:		register with the TCR_ELx value to be updated
353  *	pos:		IPS or PS bitfield position
354  *	tmp{0,1}:	temporary registers
355  */
356 	.macro	tcr_compute_pa_size, tcr, pos, tmp0, tmp1
357 	mrs	\tmp0, ID_AA64MMFR0_EL1
358 	// Narrow PARange to fit the PS field in TCR_ELx
359 	ubfx	\tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3
360 	mov	\tmp1, #ID_AA64MMFR0_PARANGE_MAX
361 	cmp	\tmp0, \tmp1
362 	csel	\tmp0, \tmp1, \tmp0, hi
363 	bfi	\tcr, \tmp0, \pos, #3
364 	.endm
365 
366 /*
367  * Macro to perform a data cache maintenance for the interval
368  * [kaddr, kaddr + size)
369  *
370  * 	op:		operation passed to dc instruction
371  * 	domain:		domain used in dsb instruciton
372  * 	kaddr:		starting virtual address of the region
373  * 	size:		size of the region
374  * 	Corrupts:	kaddr, size, tmp1, tmp2
375  */
376 	.macro __dcache_op_workaround_clean_cache, op, kaddr
377 alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
378 	dc	\op, \kaddr
379 alternative_else
380 	dc	civac, \kaddr
381 alternative_endif
382 	.endm
383 
384 	.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
385 	dcache_line_size \tmp1, \tmp2
386 	add	\size, \kaddr, \size
387 	sub	\tmp2, \tmp1, #1
388 	bic	\kaddr, \kaddr, \tmp2
389 9998:
390 	.ifc	\op, cvau
391 	__dcache_op_workaround_clean_cache \op, \kaddr
392 	.else
393 	.ifc	\op, cvac
394 	__dcache_op_workaround_clean_cache \op, \kaddr
395 	.else
396 	.ifc	\op, cvap
397 	sys	3, c7, c12, 1, \kaddr	// dc cvap
398 	.else
399 	.ifc	\op, cvadp
400 	sys	3, c7, c13, 1, \kaddr	// dc cvadp
401 	.else
402 	dc	\op, \kaddr
403 	.endif
404 	.endif
405 	.endif
406 	.endif
407 	add	\kaddr, \kaddr, \tmp1
408 	cmp	\kaddr, \size
409 	b.lo	9998b
410 	dsb	\domain
411 	.endm
412 
413 /*
414  * Macro to perform an instruction cache maintenance for the interval
415  * [start, end)
416  *
417  * 	start, end:	virtual addresses describing the region
418  *	label:		A label to branch to on user fault.
419  * 	Corrupts:	tmp1, tmp2
420  */
421 	.macro invalidate_icache_by_line start, end, tmp1, tmp2, label
422 	icache_line_size \tmp1, \tmp2
423 	sub	\tmp2, \tmp1, #1
424 	bic	\tmp2, \start, \tmp2
425 9997:
426 USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
427 	add	\tmp2, \tmp2, \tmp1
428 	cmp	\tmp2, \end
429 	b.lo	9997b
430 	dsb	ish
431 	isb
432 	.endm
433 
434 /*
435  * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
436  */
437 	.macro	reset_pmuserenr_el0, tmpreg
438 	mrs	\tmpreg, id_aa64dfr0_el1
439 	sbfx	\tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4
440 	cmp	\tmpreg, #1			// Skip if no PMU present
441 	b.lt	9000f
442 	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
443 9000:
444 	.endm
445 
446 /*
447  * copy_page - copy src to dest using temp registers t1-t8
448  */
449 	.macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req
450 9998:	ldp	\t1, \t2, [\src]
451 	ldp	\t3, \t4, [\src, #16]
452 	ldp	\t5, \t6, [\src, #32]
453 	ldp	\t7, \t8, [\src, #48]
454 	add	\src, \src, #64
455 	stnp	\t1, \t2, [\dest]
456 	stnp	\t3, \t4, [\dest, #16]
457 	stnp	\t5, \t6, [\dest, #32]
458 	stnp	\t7, \t8, [\dest, #48]
459 	add	\dest, \dest, #64
460 	tst	\src, #(PAGE_SIZE - 1)
461 	b.ne	9998b
462 	.endm
463 
464 /*
465  * Annotate a function as position independent, i.e., safe to be called before
466  * the kernel virtual mapping is activated.
467  */
468 #define ENDPIPROC(x)			\
469 	.globl	__pi_##x;		\
470 	.type 	__pi_##x, %function;	\
471 	.set	__pi_##x, x;		\
472 	.size	__pi_##x, . - x;	\
473 	ENDPROC(x)
474 
475 /*
476  * Annotate a function as being unsuitable for kprobes.
477  */
478 #ifdef CONFIG_KPROBES
479 #define NOKPROBE(x)				\
480 	.pushsection "_kprobe_blacklist", "aw";	\
481 	.quad	x;				\
482 	.popsection;
483 #else
484 #define NOKPROBE(x)
485 #endif
486 
487 #ifdef CONFIG_KASAN
488 #define EXPORT_SYMBOL_NOKASAN(name)
489 #else
490 #define EXPORT_SYMBOL_NOKASAN(name)	EXPORT_SYMBOL(name)
491 #endif
492 
493 	/*
494 	 * Emit a 64-bit absolute little endian symbol reference in a way that
495 	 * ensures that it will be resolved at build time, even when building a
496 	 * PIE binary. This requires cooperation from the linker script, which
497 	 * must emit the lo32/hi32 halves individually.
498 	 */
499 	.macro	le64sym, sym
500 	.long	\sym\()_lo32
501 	.long	\sym\()_hi32
502 	.endm
503 
504 	/*
505 	 * mov_q - move an immediate constant into a 64-bit register using
506 	 *         between 2 and 4 movz/movk instructions (depending on the
507 	 *         magnitude and sign of the operand)
508 	 */
509 	.macro	mov_q, reg, val
510 	.if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
511 	movz	\reg, :abs_g1_s:\val
512 	.else
513 	.if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
514 	movz	\reg, :abs_g2_s:\val
515 	.else
516 	movz	\reg, :abs_g3:\val
517 	movk	\reg, :abs_g2_nc:\val
518 	.endif
519 	movk	\reg, :abs_g1_nc:\val
520 	.endif
521 	movk	\reg, :abs_g0_nc:\val
522 	.endm
523 
524 /*
525  * Return the current task_struct.
526  */
527 	.macro	get_current_task, rd
528 	mrs	\rd, sp_el0
529 	.endm
530 
531 /*
532  * Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD.
533  * orr is used as it can cover the immediate value (and is idempotent).
534  * In future this may be nop'ed out when dealing with 52-bit kernel VAs.
535  * 	ttbr: Value of ttbr to set, modified.
536  */
537 	.macro	offset_ttbr1, ttbr, tmp
538 #ifdef CONFIG_ARM64_VA_BITS_52
539 	mrs_s	\tmp, SYS_ID_AA64MMFR2_EL1
540 	and	\tmp, \tmp, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
541 	cbnz	\tmp, .Lskipoffs_\@
542 	orr	\ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
543 .Lskipoffs_\@ :
544 #endif
545 	.endm
546 
547 /*
548  * Perform the reverse of offset_ttbr1.
549  * bic is used as it can cover the immediate value and, in future, won't need
550  * to be nop'ed out when dealing with 52-bit kernel VAs.
551  */
552 	.macro	restore_ttbr1, ttbr
553 #ifdef CONFIG_ARM64_VA_BITS_52
554 	bic	\ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
555 #endif
556 	.endm
557 
558 /*
559  * Arrange a physical address in a TTBR register, taking care of 52-bit
560  * addresses.
561  *
562  * 	phys:	physical address, preserved
563  * 	ttbr:	returns the TTBR value
564  */
565 	.macro	phys_to_ttbr, ttbr, phys
566 #ifdef CONFIG_ARM64_PA_BITS_52
567 	orr	\ttbr, \phys, \phys, lsr #46
568 	and	\ttbr, \ttbr, #TTBR_BADDR_MASK_52
569 #else
570 	mov	\ttbr, \phys
571 #endif
572 	.endm
573 
574 	.macro	phys_to_pte, pte, phys
575 #ifdef CONFIG_ARM64_PA_BITS_52
576 	/*
577 	 * We assume \phys is 64K aligned and this is guaranteed by only
578 	 * supporting this configuration with 64K pages.
579 	 */
580 	orr	\pte, \phys, \phys, lsr #36
581 	and	\pte, \pte, #PTE_ADDR_MASK
582 #else
583 	mov	\pte, \phys
584 #endif
585 	.endm
586 
587 	.macro	pte_to_phys, phys, pte
588 #ifdef CONFIG_ARM64_PA_BITS_52
589 	ubfiz	\phys, \pte, #(48 - 16 - 12), #16
590 	bfxil	\phys, \pte, #16, #32
591 	lsl	\phys, \phys, #16
592 #else
593 	and	\phys, \pte, #PTE_ADDR_MASK
594 #endif
595 	.endm
596 
597 /*
598  * tcr_clear_errata_bits - Clear TCR bits that trigger an errata on this CPU.
599  */
600 	.macro	tcr_clear_errata_bits, tcr, tmp1, tmp2
601 #ifdef CONFIG_FUJITSU_ERRATUM_010001
602 	mrs	\tmp1, midr_el1
603 
604 	mov_q	\tmp2, MIDR_FUJITSU_ERRATUM_010001_MASK
605 	and	\tmp1, \tmp1, \tmp2
606 	mov_q	\tmp2, MIDR_FUJITSU_ERRATUM_010001
607 	cmp	\tmp1, \tmp2
608 	b.ne	10f
609 
610 	mov_q	\tmp2, TCR_CLEAR_FUJITSU_ERRATUM_010001
611 	bic	\tcr, \tcr, \tmp2
612 10:
613 #endif /* CONFIG_FUJITSU_ERRATUM_010001 */
614 	.endm
615 
616 /**
617  * Errata workaround prior to disable MMU. Insert an ISB immediately prior
618  * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
619  */
620 	.macro pre_disable_mmu_workaround
621 #ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
622 	isb
623 #endif
624 	.endm
625 
626 	/*
627 	 * frame_push - Push @regcount callee saved registers to the stack,
628 	 *              starting at x19, as well as x29/x30, and set x29 to
629 	 *              the new value of sp. Add @extra bytes of stack space
630 	 *              for locals.
631 	 */
632 	.macro		frame_push, regcount:req, extra
633 	__frame		st, \regcount, \extra
634 	.endm
635 
636 	/*
637 	 * frame_pop  - Pop the callee saved registers from the stack that were
638 	 *              pushed in the most recent call to frame_push, as well
639 	 *              as x29/x30 and any extra stack space that may have been
640 	 *              allocated.
641 	 */
642 	.macro		frame_pop
643 	__frame		ld
644 	.endm
645 
646 	.macro		__frame_regs, reg1, reg2, op, num
647 	.if		.Lframe_regcount == \num
648 	\op\()r		\reg1, [sp, #(\num + 1) * 8]
649 	.elseif		.Lframe_regcount > \num
650 	\op\()p		\reg1, \reg2, [sp, #(\num + 1) * 8]
651 	.endif
652 	.endm
653 
654 	.macro		__frame, op, regcount, extra=0
655 	.ifc		\op, st
656 	.if		(\regcount) < 0 || (\regcount) > 10
657 	.error		"regcount should be in the range [0 ... 10]"
658 	.endif
659 	.if		((\extra) % 16) != 0
660 	.error		"extra should be a multiple of 16 bytes"
661 	.endif
662 	.ifdef		.Lframe_regcount
663 	.if		.Lframe_regcount != -1
664 	.error		"frame_push/frame_pop may not be nested"
665 	.endif
666 	.endif
667 	.set		.Lframe_regcount, \regcount
668 	.set		.Lframe_extra, \extra
669 	.set		.Lframe_local_offset, ((\regcount + 3) / 2) * 16
670 	stp		x29, x30, [sp, #-.Lframe_local_offset - .Lframe_extra]!
671 	mov		x29, sp
672 	.endif
673 
674 	__frame_regs	x19, x20, \op, 1
675 	__frame_regs	x21, x22, \op, 3
676 	__frame_regs	x23, x24, \op, 5
677 	__frame_regs	x25, x26, \op, 7
678 	__frame_regs	x27, x28, \op, 9
679 
680 	.ifc		\op, ld
681 	.if		.Lframe_regcount == -1
682 	.error		"frame_push/frame_pop may not be nested"
683 	.endif
684 	ldp		x29, x30, [sp], #.Lframe_local_offset + .Lframe_extra
685 	.set		.Lframe_regcount, -1
686 	.endif
687 	.endm
688 
689 /*
690  * Check whether to yield to another runnable task from kernel mode NEON code
691  * (which runs with preemption disabled).
692  *
693  * if_will_cond_yield_neon
694  *        // pre-yield patchup code
695  * do_cond_yield_neon
696  *        // post-yield patchup code
697  * endif_yield_neon    <label>
698  *
699  * where <label> is optional, and marks the point where execution will resume
700  * after a yield has been performed. If omitted, execution resumes right after
701  * the endif_yield_neon invocation. Note that the entire sequence, including
702  * the provided patchup code, will be omitted from the image if CONFIG_PREEMPT
703  * is not defined.
704  *
705  * As a convenience, in the case where no patchup code is required, the above
706  * sequence may be abbreviated to
707  *
708  * cond_yield_neon <label>
709  *
710  * Note that the patchup code does not support assembler directives that change
711  * the output section, any use of such directives is undefined.
712  *
713  * The yield itself consists of the following:
714  * - Check whether the preempt count is exactly 1 and a reschedule is also
715  *   needed. If so, calling of preempt_enable() in kernel_neon_end() will
716  *   trigger a reschedule. If it is not the case, yielding is pointless.
717  * - Disable and re-enable kernel mode NEON, and branch to the yield fixup
718  *   code.
719  *
720  * This macro sequence may clobber all CPU state that is not guaranteed by the
721  * AAPCS to be preserved across an ordinary function call.
722  */
723 
724 	.macro		cond_yield_neon, lbl
725 	if_will_cond_yield_neon
726 	do_cond_yield_neon
727 	endif_yield_neon	\lbl
728 	.endm
729 
730 	.macro		if_will_cond_yield_neon
731 #ifdef CONFIG_PREEMPT
732 	get_current_task	x0
733 	ldr		x0, [x0, #TSK_TI_PREEMPT]
734 	sub		x0, x0, #PREEMPT_DISABLE_OFFSET
735 	cbz		x0, .Lyield_\@
736 	/* fall through to endif_yield_neon */
737 	.subsection	1
738 .Lyield_\@ :
739 #else
740 	.section	".discard.cond_yield_neon", "ax"
741 #endif
742 	.endm
743 
744 	.macro		do_cond_yield_neon
745 	bl		kernel_neon_end
746 	bl		kernel_neon_begin
747 	.endm
748 
749 	.macro		endif_yield_neon, lbl
750 	.ifnb		\lbl
751 	b		\lbl
752 	.else
753 	b		.Lyield_out_\@
754 	.endif
755 	.previous
756 .Lyield_out_\@ :
757 	.endm
758 
759 #endif	/* __ASM_ASSEMBLER_H */
760