xref: /openbmc/linux/arch/arm64/include/asm/assembler.h (revision 8365a898)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S
4  *
5  * Copyright (C) 1996-2000 Russell King
6  * Copyright (C) 2012 ARM Ltd.
7  */
8 #ifndef __ASSEMBLY__
9 #error "Only include this from assembly code"
10 #endif
11 
12 #ifndef __ASM_ASSEMBLER_H
13 #define __ASM_ASSEMBLER_H
14 
15 #include <asm-generic/export.h>
16 
17 #include <asm/asm-offsets.h>
18 #include <asm/cpufeature.h>
19 #include <asm/cputype.h>
20 #include <asm/debug-monitors.h>
21 #include <asm/page.h>
22 #include <asm/pgtable-hwdef.h>
23 #include <asm/ptrace.h>
24 #include <asm/thread_info.h>
25 
26 	.macro save_and_disable_daif, flags
27 	mrs	\flags, daif
28 	msr	daifset, #0xf
29 	.endm
30 
31 	.macro disable_daif
32 	msr	daifset, #0xf
33 	.endm
34 
35 	.macro enable_daif
36 	msr	daifclr, #0xf
37 	.endm
38 
39 	.macro	restore_daif, flags:req
40 	msr	daif, \flags
41 	.endm
42 
43 	/* IRQ is the lowest priority flag, unconditionally unmask the rest. */
44 	.macro enable_da_f
45 	msr	daifclr, #(8 | 4 | 1)
46 	.endm
47 
48 /*
49  * Save/restore interrupts.
50  */
51 	.macro	save_and_disable_irq, flags
52 	mrs	\flags, daif
53 	msr	daifset, #2
54 	.endm
55 
56 	.macro	restore_irq, flags
57 	msr	daif, \flags
58 	.endm
59 
60 	.macro	enable_dbg
61 	msr	daifclr, #8
62 	.endm
63 
64 	.macro	disable_step_tsk, flgs, tmp
65 	tbz	\flgs, #TIF_SINGLESTEP, 9990f
66 	mrs	\tmp, mdscr_el1
67 	bic	\tmp, \tmp, #DBG_MDSCR_SS
68 	msr	mdscr_el1, \tmp
69 	isb	// Synchronise with enable_dbg
70 9990:
71 	.endm
72 
73 	/* call with daif masked */
74 	.macro	enable_step_tsk, flgs, tmp
75 	tbz	\flgs, #TIF_SINGLESTEP, 9990f
76 	mrs	\tmp, mdscr_el1
77 	orr	\tmp, \tmp, #DBG_MDSCR_SS
78 	msr	mdscr_el1, \tmp
79 9990:
80 	.endm
81 
82 /*
83  * RAS Error Synchronization barrier
84  */
85 	.macro  esb
86 #ifdef CONFIG_ARM64_RAS_EXTN
87 	hint    #16
88 #else
89 	nop
90 #endif
91 	.endm
92 
93 /*
94  * Value prediction barrier
95  */
96 	.macro	csdb
97 	hint	#20
98 	.endm
99 
100 /*
101  * Speculation barrier
102  */
103 	.macro	sb
104 alternative_if_not ARM64_HAS_SB
105 	dsb	nsh
106 	isb
107 alternative_else
108 	SB_BARRIER_INSN
109 	nop
110 alternative_endif
111 	.endm
112 
113 /*
114  * NOP sequence
115  */
116 	.macro	nops, num
117 	.rept	\num
118 	nop
119 	.endr
120 	.endm
121 
122 /*
123  * Emit an entry into the exception table
124  */
125 	.macro		_asm_extable, from, to
126 	.pushsection	__ex_table, "a"
127 	.align		3
128 	.long		(\from - .), (\to - .)
129 	.popsection
130 	.endm
131 
132 #define USER(l, x...)				\
133 9999:	x;					\
134 	_asm_extable	9999b, l
135 
136 /*
137  * Register aliases.
138  */
139 lr	.req	x30		// link register
140 
141 /*
142  * Vector entry
143  */
144 	 .macro	ventry	label
145 	.align	7
146 	b	\label
147 	.endm
148 
149 /*
150  * Select code when configured for BE.
151  */
152 #ifdef CONFIG_CPU_BIG_ENDIAN
153 #define CPU_BE(code...) code
154 #else
155 #define CPU_BE(code...)
156 #endif
157 
158 /*
159  * Select code when configured for LE.
160  */
161 #ifdef CONFIG_CPU_BIG_ENDIAN
162 #define CPU_LE(code...)
163 #else
164 #define CPU_LE(code...) code
165 #endif
166 
167 /*
168  * Define a macro that constructs a 64-bit value by concatenating two
169  * 32-bit registers. Note that on big endian systems the order of the
170  * registers is swapped.
171  */
172 #ifndef CONFIG_CPU_BIG_ENDIAN
173 	.macro	regs_to_64, rd, lbits, hbits
174 #else
175 	.macro	regs_to_64, rd, hbits, lbits
176 #endif
177 	orr	\rd, \lbits, \hbits, lsl #32
178 	.endm
179 
180 /*
181  * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
182  * <symbol> is within the range +/- 4 GB of the PC.
183  */
184 	/*
185 	 * @dst: destination register (64 bit wide)
186 	 * @sym: name of the symbol
187 	 */
188 	.macro	adr_l, dst, sym
189 	adrp	\dst, \sym
190 	add	\dst, \dst, :lo12:\sym
191 	.endm
192 
193 	/*
194 	 * @dst: destination register (32 or 64 bit wide)
195 	 * @sym: name of the symbol
196 	 * @tmp: optional 64-bit scratch register to be used if <dst> is a
197 	 *       32-bit wide register, in which case it cannot be used to hold
198 	 *       the address
199 	 */
200 	.macro	ldr_l, dst, sym, tmp=
201 	.ifb	\tmp
202 	adrp	\dst, \sym
203 	ldr	\dst, [\dst, :lo12:\sym]
204 	.else
205 	adrp	\tmp, \sym
206 	ldr	\dst, [\tmp, :lo12:\sym]
207 	.endif
208 	.endm
209 
210 	/*
211 	 * @src: source register (32 or 64 bit wide)
212 	 * @sym: name of the symbol
213 	 * @tmp: mandatory 64-bit scratch register to calculate the address
214 	 *       while <src> needs to be preserved.
215 	 */
216 	.macro	str_l, src, sym, tmp
217 	adrp	\tmp, \sym
218 	str	\src, [\tmp, :lo12:\sym]
219 	.endm
220 
221 	/*
222 	 * @dst: Result of per_cpu(sym, smp_processor_id()) (can be SP)
223 	 * @sym: The name of the per-cpu variable
224 	 * @tmp: scratch register
225 	 */
226 	.macro adr_this_cpu, dst, sym, tmp
227 	adrp	\tmp, \sym
228 	add	\dst, \tmp, #:lo12:\sym
229 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
230 	mrs	\tmp, tpidr_el1
231 alternative_else
232 	mrs	\tmp, tpidr_el2
233 alternative_endif
234 	add	\dst, \dst, \tmp
235 	.endm
236 
237 	/*
238 	 * @dst: Result of READ_ONCE(per_cpu(sym, smp_processor_id()))
239 	 * @sym: The name of the per-cpu variable
240 	 * @tmp: scratch register
241 	 */
242 	.macro ldr_this_cpu dst, sym, tmp
243 	adr_l	\dst, \sym
244 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
245 	mrs	\tmp, tpidr_el1
246 alternative_else
247 	mrs	\tmp, tpidr_el2
248 alternative_endif
249 	ldr	\dst, [\dst, \tmp]
250 	.endm
251 
252 /*
253  * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
254  */
255 	.macro	vma_vm_mm, rd, rn
256 	ldr	\rd, [\rn, #VMA_VM_MM]
257 	.endm
258 
259 /*
260  * read_ctr - read CTR_EL0. If the system has mismatched register fields,
261  * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val
262  */
263 	.macro	read_ctr, reg
264 alternative_if_not ARM64_MISMATCHED_CACHE_TYPE
265 	mrs	\reg, ctr_el0			// read CTR
266 	nop
267 alternative_else
268 	ldr_l	\reg, arm64_ftr_reg_ctrel0 + ARM64_FTR_SYSVAL
269 alternative_endif
270 	.endm
271 
272 
273 /*
274  * raw_dcache_line_size - get the minimum D-cache line size on this CPU
275  * from the CTR register.
276  */
277 	.macro	raw_dcache_line_size, reg, tmp
278 	mrs	\tmp, ctr_el0			// read CTR
279 	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
280 	mov	\reg, #4			// bytes per word
281 	lsl	\reg, \reg, \tmp		// actual cache line size
282 	.endm
283 
284 /*
285  * dcache_line_size - get the safe D-cache line size across all CPUs
286  */
287 	.macro	dcache_line_size, reg, tmp
288 	read_ctr	\tmp
289 	ubfm		\tmp, \tmp, #16, #19	// cache line size encoding
290 	mov		\reg, #4		// bytes per word
291 	lsl		\reg, \reg, \tmp	// actual cache line size
292 	.endm
293 
294 /*
295  * raw_icache_line_size - get the minimum I-cache line size on this CPU
296  * from the CTR register.
297  */
298 	.macro	raw_icache_line_size, reg, tmp
299 	mrs	\tmp, ctr_el0			// read CTR
300 	and	\tmp, \tmp, #0xf		// cache line size encoding
301 	mov	\reg, #4			// bytes per word
302 	lsl	\reg, \reg, \tmp		// actual cache line size
303 	.endm
304 
305 /*
306  * icache_line_size - get the safe I-cache line size across all CPUs
307  */
308 	.macro	icache_line_size, reg, tmp
309 	read_ctr	\tmp
310 	and		\tmp, \tmp, #0xf	// cache line size encoding
311 	mov		\reg, #4		// bytes per word
312 	lsl		\reg, \reg, \tmp	// actual cache line size
313 	.endm
314 
315 /*
316  * tcr_set_t0sz - update TCR.T0SZ so that we can load the ID map
317  */
318 	.macro	tcr_set_t0sz, valreg, t0sz
319 	bfi	\valreg, \t0sz, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
320 	.endm
321 
322 /*
323  * tcr_set_t1sz - update TCR.T1SZ
324  */
325 	.macro	tcr_set_t1sz, valreg, t1sz
326 	bfi	\valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH
327 	.endm
328 
329 /*
330  * tcr_compute_pa_size - set TCR.(I)PS to the highest supported
331  * ID_AA64MMFR0_EL1.PARange value
332  *
333  *	tcr:		register with the TCR_ELx value to be updated
334  *	pos:		IPS or PS bitfield position
335  *	tmp{0,1}:	temporary registers
336  */
337 	.macro	tcr_compute_pa_size, tcr, pos, tmp0, tmp1
338 	mrs	\tmp0, ID_AA64MMFR0_EL1
339 	// Narrow PARange to fit the PS field in TCR_ELx
340 	ubfx	\tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3
341 	mov	\tmp1, #ID_AA64MMFR0_PARANGE_MAX
342 	cmp	\tmp0, \tmp1
343 	csel	\tmp0, \tmp1, \tmp0, hi
344 	bfi	\tcr, \tmp0, \pos, #3
345 	.endm
346 
347 /*
348  * Macro to perform a data cache maintenance for the interval
349  * [kaddr, kaddr + size)
350  *
351  * 	op:		operation passed to dc instruction
352  * 	domain:		domain used in dsb instruciton
353  * 	kaddr:		starting virtual address of the region
354  * 	size:		size of the region
355  * 	Corrupts:	kaddr, size, tmp1, tmp2
356  */
357 	.macro __dcache_op_workaround_clean_cache, op, kaddr
358 alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
359 	dc	\op, \kaddr
360 alternative_else
361 	dc	civac, \kaddr
362 alternative_endif
363 	.endm
364 
365 	.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
366 	dcache_line_size \tmp1, \tmp2
367 	add	\size, \kaddr, \size
368 	sub	\tmp2, \tmp1, #1
369 	bic	\kaddr, \kaddr, \tmp2
370 9998:
371 	.ifc	\op, cvau
372 	__dcache_op_workaround_clean_cache \op, \kaddr
373 	.else
374 	.ifc	\op, cvac
375 	__dcache_op_workaround_clean_cache \op, \kaddr
376 	.else
377 	.ifc	\op, cvap
378 	sys	3, c7, c12, 1, \kaddr	// dc cvap
379 	.else
380 	.ifc	\op, cvadp
381 	sys	3, c7, c13, 1, \kaddr	// dc cvadp
382 	.else
383 	dc	\op, \kaddr
384 	.endif
385 	.endif
386 	.endif
387 	.endif
388 	add	\kaddr, \kaddr, \tmp1
389 	cmp	\kaddr, \size
390 	b.lo	9998b
391 	dsb	\domain
392 	.endm
393 
394 /*
395  * Macro to perform an instruction cache maintenance for the interval
396  * [start, end)
397  *
398  * 	start, end:	virtual addresses describing the region
399  *	label:		A label to branch to on user fault.
400  * 	Corrupts:	tmp1, tmp2
401  */
402 	.macro invalidate_icache_by_line start, end, tmp1, tmp2, label
403 	icache_line_size \tmp1, \tmp2
404 	sub	\tmp2, \tmp1, #1
405 	bic	\tmp2, \start, \tmp2
406 9997:
407 USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
408 	add	\tmp2, \tmp2, \tmp1
409 	cmp	\tmp2, \end
410 	b.lo	9997b
411 	dsb	ish
412 	isb
413 	.endm
414 
415 /*
416  * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
417  */
418 	.macro	reset_pmuserenr_el0, tmpreg
419 	mrs	\tmpreg, id_aa64dfr0_el1
420 	sbfx	\tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4
421 	cmp	\tmpreg, #1			// Skip if no PMU present
422 	b.lt	9000f
423 	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
424 9000:
425 	.endm
426 
427 /*
428  * reset_amuserenr_el0 - reset AMUSERENR_EL0 if AMUv1 present
429  */
430 	.macro	reset_amuserenr_el0, tmpreg
431 	mrs	\tmpreg, id_aa64pfr0_el1	// Check ID_AA64PFR0_EL1
432 	ubfx	\tmpreg, \tmpreg, #ID_AA64PFR0_AMU_SHIFT, #4
433 	cbz	\tmpreg, .Lskip_\@		// Skip if no AMU present
434 	msr_s	SYS_AMUSERENR_EL0, xzr		// Disable AMU access from EL0
435 .Lskip_\@:
436 	.endm
437 /*
438  * copy_page - copy src to dest using temp registers t1-t8
439  */
440 	.macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req
441 9998:	ldp	\t1, \t2, [\src]
442 	ldp	\t3, \t4, [\src, #16]
443 	ldp	\t5, \t6, [\src, #32]
444 	ldp	\t7, \t8, [\src, #48]
445 	add	\src, \src, #64
446 	stnp	\t1, \t2, [\dest]
447 	stnp	\t3, \t4, [\dest, #16]
448 	stnp	\t5, \t6, [\dest, #32]
449 	stnp	\t7, \t8, [\dest, #48]
450 	add	\dest, \dest, #64
451 	tst	\src, #(PAGE_SIZE - 1)
452 	b.ne	9998b
453 	.endm
454 
455 /*
456  * Annotate a function as being unsuitable for kprobes.
457  */
458 #ifdef CONFIG_KPROBES
459 #define NOKPROBE(x)				\
460 	.pushsection "_kprobe_blacklist", "aw";	\
461 	.quad	x;				\
462 	.popsection;
463 #else
464 #define NOKPROBE(x)
465 #endif
466 
467 #ifdef CONFIG_KASAN
468 #define EXPORT_SYMBOL_NOKASAN(name)
469 #else
470 #define EXPORT_SYMBOL_NOKASAN(name)	EXPORT_SYMBOL(name)
471 #endif
472 
473 	/*
474 	 * Emit a 64-bit absolute little endian symbol reference in a way that
475 	 * ensures that it will be resolved at build time, even when building a
476 	 * PIE binary. This requires cooperation from the linker script, which
477 	 * must emit the lo32/hi32 halves individually.
478 	 */
479 	.macro	le64sym, sym
480 	.long	\sym\()_lo32
481 	.long	\sym\()_hi32
482 	.endm
483 
484 	/*
485 	 * mov_q - move an immediate constant into a 64-bit register using
486 	 *         between 2 and 4 movz/movk instructions (depending on the
487 	 *         magnitude and sign of the operand)
488 	 */
489 	.macro	mov_q, reg, val
490 	.if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
491 	movz	\reg, :abs_g1_s:\val
492 	.else
493 	.if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
494 	movz	\reg, :abs_g2_s:\val
495 	.else
496 	movz	\reg, :abs_g3:\val
497 	movk	\reg, :abs_g2_nc:\val
498 	.endif
499 	movk	\reg, :abs_g1_nc:\val
500 	.endif
501 	movk	\reg, :abs_g0_nc:\val
502 	.endm
503 
504 /*
505  * Return the current task_struct.
506  */
507 	.macro	get_current_task, rd
508 	mrs	\rd, sp_el0
509 	.endm
510 
511 /*
512  * Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD.
513  * orr is used as it can cover the immediate value (and is idempotent).
514  * In future this may be nop'ed out when dealing with 52-bit kernel VAs.
515  * 	ttbr: Value of ttbr to set, modified.
516  */
517 	.macro	offset_ttbr1, ttbr, tmp
518 #ifdef CONFIG_ARM64_VA_BITS_52
519 	mrs_s	\tmp, SYS_ID_AA64MMFR2_EL1
520 	and	\tmp, \tmp, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
521 	cbnz	\tmp, .Lskipoffs_\@
522 	orr	\ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
523 .Lskipoffs_\@ :
524 #endif
525 	.endm
526 
527 /*
528  * Perform the reverse of offset_ttbr1.
529  * bic is used as it can cover the immediate value and, in future, won't need
530  * to be nop'ed out when dealing with 52-bit kernel VAs.
531  */
532 	.macro	restore_ttbr1, ttbr
533 #ifdef CONFIG_ARM64_VA_BITS_52
534 	bic	\ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
535 #endif
536 	.endm
537 
538 /*
539  * Arrange a physical address in a TTBR register, taking care of 52-bit
540  * addresses.
541  *
542  * 	phys:	physical address, preserved
543  * 	ttbr:	returns the TTBR value
544  */
545 	.macro	phys_to_ttbr, ttbr, phys
546 #ifdef CONFIG_ARM64_PA_BITS_52
547 	orr	\ttbr, \phys, \phys, lsr #46
548 	and	\ttbr, \ttbr, #TTBR_BADDR_MASK_52
549 #else
550 	mov	\ttbr, \phys
551 #endif
552 	.endm
553 
554 	.macro	phys_to_pte, pte, phys
555 #ifdef CONFIG_ARM64_PA_BITS_52
556 	/*
557 	 * We assume \phys is 64K aligned and this is guaranteed by only
558 	 * supporting this configuration with 64K pages.
559 	 */
560 	orr	\pte, \phys, \phys, lsr #36
561 	and	\pte, \pte, #PTE_ADDR_MASK
562 #else
563 	mov	\pte, \phys
564 #endif
565 	.endm
566 
567 	.macro	pte_to_phys, phys, pte
568 #ifdef CONFIG_ARM64_PA_BITS_52
569 	ubfiz	\phys, \pte, #(48 - 16 - 12), #16
570 	bfxil	\phys, \pte, #16, #32
571 	lsl	\phys, \phys, #16
572 #else
573 	and	\phys, \pte, #PTE_ADDR_MASK
574 #endif
575 	.endm
576 
577 /*
578  * tcr_clear_errata_bits - Clear TCR bits that trigger an errata on this CPU.
579  */
580 	.macro	tcr_clear_errata_bits, tcr, tmp1, tmp2
581 #ifdef CONFIG_FUJITSU_ERRATUM_010001
582 	mrs	\tmp1, midr_el1
583 
584 	mov_q	\tmp2, MIDR_FUJITSU_ERRATUM_010001_MASK
585 	and	\tmp1, \tmp1, \tmp2
586 	mov_q	\tmp2, MIDR_FUJITSU_ERRATUM_010001
587 	cmp	\tmp1, \tmp2
588 	b.ne	10f
589 
590 	mov_q	\tmp2, TCR_CLEAR_FUJITSU_ERRATUM_010001
591 	bic	\tcr, \tcr, \tmp2
592 10:
593 #endif /* CONFIG_FUJITSU_ERRATUM_010001 */
594 	.endm
595 
596 /**
597  * Errata workaround prior to disable MMU. Insert an ISB immediately prior
598  * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
599  */
600 	.macro pre_disable_mmu_workaround
601 #ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
602 	isb
603 #endif
604 	.endm
605 
606 	/*
607 	 * frame_push - Push @regcount callee saved registers to the stack,
608 	 *              starting at x19, as well as x29/x30, and set x29 to
609 	 *              the new value of sp. Add @extra bytes of stack space
610 	 *              for locals.
611 	 */
612 	.macro		frame_push, regcount:req, extra
613 	__frame		st, \regcount, \extra
614 	.endm
615 
616 	/*
617 	 * frame_pop  - Pop the callee saved registers from the stack that were
618 	 *              pushed in the most recent call to frame_push, as well
619 	 *              as x29/x30 and any extra stack space that may have been
620 	 *              allocated.
621 	 */
622 	.macro		frame_pop
623 	__frame		ld
624 	.endm
625 
626 	.macro		__frame_regs, reg1, reg2, op, num
627 	.if		.Lframe_regcount == \num
628 	\op\()r		\reg1, [sp, #(\num + 1) * 8]
629 	.elseif		.Lframe_regcount > \num
630 	\op\()p		\reg1, \reg2, [sp, #(\num + 1) * 8]
631 	.endif
632 	.endm
633 
634 	.macro		__frame, op, regcount, extra=0
635 	.ifc		\op, st
636 	.if		(\regcount) < 0 || (\regcount) > 10
637 	.error		"regcount should be in the range [0 ... 10]"
638 	.endif
639 	.if		((\extra) % 16) != 0
640 	.error		"extra should be a multiple of 16 bytes"
641 	.endif
642 	.ifdef		.Lframe_regcount
643 	.if		.Lframe_regcount != -1
644 	.error		"frame_push/frame_pop may not be nested"
645 	.endif
646 	.endif
647 	.set		.Lframe_regcount, \regcount
648 	.set		.Lframe_extra, \extra
649 	.set		.Lframe_local_offset, ((\regcount + 3) / 2) * 16
650 	stp		x29, x30, [sp, #-.Lframe_local_offset - .Lframe_extra]!
651 	mov		x29, sp
652 	.endif
653 
654 	__frame_regs	x19, x20, \op, 1
655 	__frame_regs	x21, x22, \op, 3
656 	__frame_regs	x23, x24, \op, 5
657 	__frame_regs	x25, x26, \op, 7
658 	__frame_regs	x27, x28, \op, 9
659 
660 	.ifc		\op, ld
661 	.if		.Lframe_regcount == -1
662 	.error		"frame_push/frame_pop may not be nested"
663 	.endif
664 	ldp		x29, x30, [sp], #.Lframe_local_offset + .Lframe_extra
665 	.set		.Lframe_regcount, -1
666 	.endif
667 	.endm
668 
669 /*
670  * Check whether to yield to another runnable task from kernel mode NEON code
671  * (which runs with preemption disabled).
672  *
673  * if_will_cond_yield_neon
674  *        // pre-yield patchup code
675  * do_cond_yield_neon
676  *        // post-yield patchup code
677  * endif_yield_neon    <label>
678  *
679  * where <label> is optional, and marks the point where execution will resume
680  * after a yield has been performed. If omitted, execution resumes right after
681  * the endif_yield_neon invocation. Note that the entire sequence, including
682  * the provided patchup code, will be omitted from the image if
683  * CONFIG_PREEMPTION is not defined.
684  *
685  * As a convenience, in the case where no patchup code is required, the above
686  * sequence may be abbreviated to
687  *
688  * cond_yield_neon <label>
689  *
690  * Note that the patchup code does not support assembler directives that change
691  * the output section, any use of such directives is undefined.
692  *
693  * The yield itself consists of the following:
694  * - Check whether the preempt count is exactly 1 and a reschedule is also
695  *   needed. If so, calling of preempt_enable() in kernel_neon_end() will
696  *   trigger a reschedule. If it is not the case, yielding is pointless.
697  * - Disable and re-enable kernel mode NEON, and branch to the yield fixup
698  *   code.
699  *
700  * This macro sequence may clobber all CPU state that is not guaranteed by the
701  * AAPCS to be preserved across an ordinary function call.
702  */
703 
704 	.macro		cond_yield_neon, lbl
705 	if_will_cond_yield_neon
706 	do_cond_yield_neon
707 	endif_yield_neon	\lbl
708 	.endm
709 
710 	.macro		if_will_cond_yield_neon
711 #ifdef CONFIG_PREEMPTION
712 	get_current_task	x0
713 	ldr		x0, [x0, #TSK_TI_PREEMPT]
714 	sub		x0, x0, #PREEMPT_DISABLE_OFFSET
715 	cbz		x0, .Lyield_\@
716 	/* fall through to endif_yield_neon */
717 	.subsection	1
718 .Lyield_\@ :
719 #else
720 	.section	".discard.cond_yield_neon", "ax"
721 #endif
722 	.endm
723 
724 	.macro		do_cond_yield_neon
725 	bl		kernel_neon_end
726 	bl		kernel_neon_begin
727 	.endm
728 
729 	.macro		endif_yield_neon, lbl
730 	.ifnb		\lbl
731 	b		\lbl
732 	.else
733 	b		.Lyield_out_\@
734 	.endif
735 	.previous
736 .Lyield_out_\@ :
737 	.endm
738 
739 /*
740  * This macro emits a program property note section identifying
741  * architecture features which require special handling, mainly for
742  * use in assembly files included in the VDSO.
743  */
744 
745 #define NT_GNU_PROPERTY_TYPE_0  5
746 #define GNU_PROPERTY_AARCH64_FEATURE_1_AND      0xc0000000
747 
748 #define GNU_PROPERTY_AARCH64_FEATURE_1_BTI      (1U << 0)
749 #define GNU_PROPERTY_AARCH64_FEATURE_1_PAC      (1U << 1)
750 
751 #ifdef CONFIG_ARM64_BTI_KERNEL
752 #define GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT		\
753 		((GNU_PROPERTY_AARCH64_FEATURE_1_BTI |	\
754 		  GNU_PROPERTY_AARCH64_FEATURE_1_PAC))
755 #endif
756 
757 #ifdef GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT
758 .macro emit_aarch64_feature_1_and, feat=GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT
759 	.pushsection .note.gnu.property, "a"
760 	.align  3
761 	.long   2f - 1f
762 	.long   6f - 3f
763 	.long   NT_GNU_PROPERTY_TYPE_0
764 1:      .string "GNU"
765 2:
766 	.align  3
767 3:      .long   GNU_PROPERTY_AARCH64_FEATURE_1_AND
768 	.long   5f - 4f
769 4:
770 	/*
771 	 * This is described with an array of char in the Linux API
772 	 * spec but the text and all other usage (including binutils,
773 	 * clang and GCC) treat this as a 32 bit value so no swizzling
774 	 * is required for big endian.
775 	 */
776 	.long   \feat
777 5:
778 	.align  3
779 6:
780 	.popsection
781 .endm
782 
783 #else
784 .macro emit_aarch64_feature_1_and, feat=0
785 .endm
786 
787 #endif /* GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT */
788 
789 #endif	/* __ASM_ASSEMBLER_H */
790