xref: /openbmc/linux/arch/arm64/include/asm/assembler.h (revision ca48739e)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S
4  *
5  * Copyright (C) 1996-2000 Russell King
6  * Copyright (C) 2012 ARM Ltd.
7  */
8 #ifndef __ASSEMBLY__
9 #error "Only include this from assembly code"
10 #endif
11 
12 #ifndef __ASM_ASSEMBLER_H
13 #define __ASM_ASSEMBLER_H
14 
15 #include <asm-generic/export.h>
16 
17 #include <asm/asm-offsets.h>
18 #include <asm/cpufeature.h>
19 #include <asm/cputype.h>
20 #include <asm/debug-monitors.h>
21 #include <asm/page.h>
22 #include <asm/pgtable-hwdef.h>
23 #include <asm/ptrace.h>
24 #include <asm/thread_info.h>
25 
26 	.macro save_and_disable_daif, flags
27 	mrs	\flags, daif
28 	msr	daifset, #0xf
29 	.endm
30 
31 	.macro disable_daif
32 	msr	daifset, #0xf
33 	.endm
34 
35 	.macro enable_daif
36 	msr	daifclr, #0xf
37 	.endm
38 
39 	.macro	restore_daif, flags:req
40 	msr	daif, \flags
41 	.endm
42 
43 	/* IRQ is the lowest priority flag, unconditionally unmask the rest. */
44 	.macro enable_da_f
45 	msr	daifclr, #(8 | 4 | 1)
46 	.endm
47 
48 /*
49  * Save/restore interrupts.
50  */
51 	.macro	save_and_disable_irq, flags
52 	mrs	\flags, daif
53 	msr	daifset, #2
54 	.endm
55 
56 	.macro	restore_irq, flags
57 	msr	daif, \flags
58 	.endm
59 
60 	.macro	enable_dbg
61 	msr	daifclr, #8
62 	.endm
63 
64 	.macro	disable_step_tsk, flgs, tmp
65 	tbz	\flgs, #TIF_SINGLESTEP, 9990f
66 	mrs	\tmp, mdscr_el1
67 	bic	\tmp, \tmp, #DBG_MDSCR_SS
68 	msr	mdscr_el1, \tmp
69 	isb	// Synchronise with enable_dbg
70 9990:
71 	.endm
72 
73 	/* call with daif masked */
74 	.macro	enable_step_tsk, flgs, tmp
75 	tbz	\flgs, #TIF_SINGLESTEP, 9990f
76 	mrs	\tmp, mdscr_el1
77 	orr	\tmp, \tmp, #DBG_MDSCR_SS
78 	msr	mdscr_el1, \tmp
79 9990:
80 	.endm
81 
82 /*
83  * RAS Error Synchronization barrier
84  */
85 	.macro  esb
86 #ifdef CONFIG_ARM64_RAS_EXTN
87 	hint    #16
88 #else
89 	nop
90 #endif
91 	.endm
92 
93 /*
94  * Value prediction barrier
95  */
96 	.macro	csdb
97 	hint	#20
98 	.endm
99 
100 /*
101  * Speculation barrier
102  */
103 	.macro	sb
104 alternative_if_not ARM64_HAS_SB
105 	dsb	nsh
106 	isb
107 alternative_else
108 	SB_BARRIER_INSN
109 	nop
110 alternative_endif
111 	.endm
112 
113 /*
114  * NOP sequence
115  */
116 	.macro	nops, num
117 	.rept	\num
118 	nop
119 	.endr
120 	.endm
121 
122 /*
123  * Emit an entry into the exception table
124  */
125 	.macro		_asm_extable, from, to
126 	.pushsection	__ex_table, "a"
127 	.align		3
128 	.long		(\from - .), (\to - .)
129 	.popsection
130 	.endm
131 
132 #define USER(l, x...)				\
133 9999:	x;					\
134 	_asm_extable	9999b, l
135 
136 /*
137  * Register aliases.
138  */
139 lr	.req	x30		// link register
140 
141 /*
142  * Vector entry
143  */
144 	 .macro	ventry	label
145 	.align	7
146 	b	\label
147 	.endm
148 
149 /*
150  * Select code when configured for BE.
151  */
152 #ifdef CONFIG_CPU_BIG_ENDIAN
153 #define CPU_BE(code...) code
154 #else
155 #define CPU_BE(code...)
156 #endif
157 
158 /*
159  * Select code when configured for LE.
160  */
161 #ifdef CONFIG_CPU_BIG_ENDIAN
162 #define CPU_LE(code...)
163 #else
164 #define CPU_LE(code...) code
165 #endif
166 
167 /*
168  * Define a macro that constructs a 64-bit value by concatenating two
169  * 32-bit registers. Note that on big endian systems the order of the
170  * registers is swapped.
171  */
172 #ifndef CONFIG_CPU_BIG_ENDIAN
173 	.macro	regs_to_64, rd, lbits, hbits
174 #else
175 	.macro	regs_to_64, rd, hbits, lbits
176 #endif
177 	orr	\rd, \lbits, \hbits, lsl #32
178 	.endm
179 
180 /*
181  * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
182  * <symbol> is within the range +/- 4 GB of the PC.
183  */
184 	/*
185 	 * @dst: destination register (64 bit wide)
186 	 * @sym: name of the symbol
187 	 */
188 	.macro	adr_l, dst, sym
189 	adrp	\dst, \sym
190 	add	\dst, \dst, :lo12:\sym
191 	.endm
192 
193 	/*
194 	 * @dst: destination register (32 or 64 bit wide)
195 	 * @sym: name of the symbol
196 	 * @tmp: optional 64-bit scratch register to be used if <dst> is a
197 	 *       32-bit wide register, in which case it cannot be used to hold
198 	 *       the address
199 	 */
200 	.macro	ldr_l, dst, sym, tmp=
201 	.ifb	\tmp
202 	adrp	\dst, \sym
203 	ldr	\dst, [\dst, :lo12:\sym]
204 	.else
205 	adrp	\tmp, \sym
206 	ldr	\dst, [\tmp, :lo12:\sym]
207 	.endif
208 	.endm
209 
210 	/*
211 	 * @src: source register (32 or 64 bit wide)
212 	 * @sym: name of the symbol
213 	 * @tmp: mandatory 64-bit scratch register to calculate the address
214 	 *       while <src> needs to be preserved.
215 	 */
216 	.macro	str_l, src, sym, tmp
217 	adrp	\tmp, \sym
218 	str	\src, [\tmp, :lo12:\sym]
219 	.endm
220 
221 	/*
222 	 * @dst: destination register
223 	 */
224 #if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
225 	.macro	this_cpu_offset, dst
226 	mrs	\dst, tpidr_el2
227 	.endm
228 #else
229 	.macro	this_cpu_offset, dst
230 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
231 	mrs	\dst, tpidr_el1
232 alternative_else
233 	mrs	\dst, tpidr_el2
234 alternative_endif
235 	.endm
236 #endif
237 
238 	/*
239 	 * @dst: Result of per_cpu(sym, smp_processor_id()) (can be SP)
240 	 * @sym: The name of the per-cpu variable
241 	 * @tmp: scratch register
242 	 */
243 	.macro adr_this_cpu, dst, sym, tmp
244 	adrp	\tmp, \sym
245 	add	\dst, \tmp, #:lo12:\sym
246 	this_cpu_offset \tmp
247 	add	\dst, \dst, \tmp
248 	.endm
249 
250 	/*
251 	 * @dst: Result of READ_ONCE(per_cpu(sym, smp_processor_id()))
252 	 * @sym: The name of the per-cpu variable
253 	 * @tmp: scratch register
254 	 */
255 	.macro ldr_this_cpu dst, sym, tmp
256 	adr_l	\dst, \sym
257 	this_cpu_offset \tmp
258 	ldr	\dst, [\dst, \tmp]
259 	.endm
260 
261 /*
262  * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
263  */
264 	.macro	vma_vm_mm, rd, rn
265 	ldr	\rd, [\rn, #VMA_VM_MM]
266 	.endm
267 
268 /*
269  * read_ctr - read CTR_EL0. If the system has mismatched register fields,
270  * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val
271  */
272 	.macro	read_ctr, reg
273 alternative_if_not ARM64_MISMATCHED_CACHE_TYPE
274 	mrs	\reg, ctr_el0			// read CTR
275 	nop
276 alternative_else
277 	ldr_l	\reg, arm64_ftr_reg_ctrel0 + ARM64_FTR_SYSVAL
278 alternative_endif
279 	.endm
280 
281 
282 /*
283  * raw_dcache_line_size - get the minimum D-cache line size on this CPU
284  * from the CTR register.
285  */
286 	.macro	raw_dcache_line_size, reg, tmp
287 	mrs	\tmp, ctr_el0			// read CTR
288 	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
289 	mov	\reg, #4			// bytes per word
290 	lsl	\reg, \reg, \tmp		// actual cache line size
291 	.endm
292 
293 /*
294  * dcache_line_size - get the safe D-cache line size across all CPUs
295  */
296 	.macro	dcache_line_size, reg, tmp
297 	read_ctr	\tmp
298 	ubfm		\tmp, \tmp, #16, #19	// cache line size encoding
299 	mov		\reg, #4		// bytes per word
300 	lsl		\reg, \reg, \tmp	// actual cache line size
301 	.endm
302 
303 /*
304  * raw_icache_line_size - get the minimum I-cache line size on this CPU
305  * from the CTR register.
306  */
307 	.macro	raw_icache_line_size, reg, tmp
308 	mrs	\tmp, ctr_el0			// read CTR
309 	and	\tmp, \tmp, #0xf		// cache line size encoding
310 	mov	\reg, #4			// bytes per word
311 	lsl	\reg, \reg, \tmp		// actual cache line size
312 	.endm
313 
314 /*
315  * icache_line_size - get the safe I-cache line size across all CPUs
316  */
317 	.macro	icache_line_size, reg, tmp
318 	read_ctr	\tmp
319 	and		\tmp, \tmp, #0xf	// cache line size encoding
320 	mov		\reg, #4		// bytes per word
321 	lsl		\reg, \reg, \tmp	// actual cache line size
322 	.endm
323 
324 /*
325  * tcr_set_t0sz - update TCR.T0SZ so that we can load the ID map
326  */
327 	.macro	tcr_set_t0sz, valreg, t0sz
328 	bfi	\valreg, \t0sz, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
329 	.endm
330 
331 /*
332  * tcr_set_t1sz - update TCR.T1SZ
333  */
334 	.macro	tcr_set_t1sz, valreg, t1sz
335 	bfi	\valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH
336 	.endm
337 
338 /*
339  * tcr_compute_pa_size - set TCR.(I)PS to the highest supported
340  * ID_AA64MMFR0_EL1.PARange value
341  *
342  *	tcr:		register with the TCR_ELx value to be updated
343  *	pos:		IPS or PS bitfield position
344  *	tmp{0,1}:	temporary registers
345  */
346 	.macro	tcr_compute_pa_size, tcr, pos, tmp0, tmp1
347 	mrs	\tmp0, ID_AA64MMFR0_EL1
348 	// Narrow PARange to fit the PS field in TCR_ELx
349 	ubfx	\tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3
350 	mov	\tmp1, #ID_AA64MMFR0_PARANGE_MAX
351 	cmp	\tmp0, \tmp1
352 	csel	\tmp0, \tmp1, \tmp0, hi
353 	bfi	\tcr, \tmp0, \pos, #3
354 	.endm
355 
356 /*
357  * Macro to perform a data cache maintenance for the interval
358  * [kaddr, kaddr + size)
359  *
360  * 	op:		operation passed to dc instruction
361  * 	domain:		domain used in dsb instruciton
362  * 	kaddr:		starting virtual address of the region
363  * 	size:		size of the region
364  * 	Corrupts:	kaddr, size, tmp1, tmp2
365  */
366 	.macro __dcache_op_workaround_clean_cache, op, kaddr
367 alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
368 	dc	\op, \kaddr
369 alternative_else
370 	dc	civac, \kaddr
371 alternative_endif
372 	.endm
373 
374 	.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
375 	dcache_line_size \tmp1, \tmp2
376 	add	\size, \kaddr, \size
377 	sub	\tmp2, \tmp1, #1
378 	bic	\kaddr, \kaddr, \tmp2
379 9998:
380 	.ifc	\op, cvau
381 	__dcache_op_workaround_clean_cache \op, \kaddr
382 	.else
383 	.ifc	\op, cvac
384 	__dcache_op_workaround_clean_cache \op, \kaddr
385 	.else
386 	.ifc	\op, cvap
387 	sys	3, c7, c12, 1, \kaddr	// dc cvap
388 	.else
389 	.ifc	\op, cvadp
390 	sys	3, c7, c13, 1, \kaddr	// dc cvadp
391 	.else
392 	dc	\op, \kaddr
393 	.endif
394 	.endif
395 	.endif
396 	.endif
397 	add	\kaddr, \kaddr, \tmp1
398 	cmp	\kaddr, \size
399 	b.lo	9998b
400 	dsb	\domain
401 	.endm
402 
403 /*
404  * Macro to perform an instruction cache maintenance for the interval
405  * [start, end)
406  *
407  * 	start, end:	virtual addresses describing the region
408  *	label:		A label to branch to on user fault.
409  * 	Corrupts:	tmp1, tmp2
410  */
411 	.macro invalidate_icache_by_line start, end, tmp1, tmp2, label
412 	icache_line_size \tmp1, \tmp2
413 	sub	\tmp2, \tmp1, #1
414 	bic	\tmp2, \start, \tmp2
415 9997:
416 USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
417 	add	\tmp2, \tmp2, \tmp1
418 	cmp	\tmp2, \end
419 	b.lo	9997b
420 	dsb	ish
421 	isb
422 	.endm
423 
424 /*
425  * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
426  */
427 	.macro	reset_pmuserenr_el0, tmpreg
428 	mrs	\tmpreg, id_aa64dfr0_el1
429 	sbfx	\tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4
430 	cmp	\tmpreg, #1			// Skip if no PMU present
431 	b.lt	9000f
432 	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
433 9000:
434 	.endm
435 
436 /*
437  * reset_amuserenr_el0 - reset AMUSERENR_EL0 if AMUv1 present
438  */
439 	.macro	reset_amuserenr_el0, tmpreg
440 	mrs	\tmpreg, id_aa64pfr0_el1	// Check ID_AA64PFR0_EL1
441 	ubfx	\tmpreg, \tmpreg, #ID_AA64PFR0_AMU_SHIFT, #4
442 	cbz	\tmpreg, .Lskip_\@		// Skip if no AMU present
443 	msr_s	SYS_AMUSERENR_EL0, xzr		// Disable AMU access from EL0
444 .Lskip_\@:
445 	.endm
446 /*
447  * copy_page - copy src to dest using temp registers t1-t8
448  */
449 	.macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req
450 9998:	ldp	\t1, \t2, [\src]
451 	ldp	\t3, \t4, [\src, #16]
452 	ldp	\t5, \t6, [\src, #32]
453 	ldp	\t7, \t8, [\src, #48]
454 	add	\src, \src, #64
455 	stnp	\t1, \t2, [\dest]
456 	stnp	\t3, \t4, [\dest, #16]
457 	stnp	\t5, \t6, [\dest, #32]
458 	stnp	\t7, \t8, [\dest, #48]
459 	add	\dest, \dest, #64
460 	tst	\src, #(PAGE_SIZE - 1)
461 	b.ne	9998b
462 	.endm
463 
464 /*
465  * Annotate a function as being unsuitable for kprobes.
466  */
467 #ifdef CONFIG_KPROBES
468 #define NOKPROBE(x)				\
469 	.pushsection "_kprobe_blacklist", "aw";	\
470 	.quad	x;				\
471 	.popsection;
472 #else
473 #define NOKPROBE(x)
474 #endif
475 
476 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
477 #define EXPORT_SYMBOL_NOKASAN(name)
478 #else
479 #define EXPORT_SYMBOL_NOKASAN(name)	EXPORT_SYMBOL(name)
480 #endif
481 
482 	/*
483 	 * Emit a 64-bit absolute little endian symbol reference in a way that
484 	 * ensures that it will be resolved at build time, even when building a
485 	 * PIE binary. This requires cooperation from the linker script, which
486 	 * must emit the lo32/hi32 halves individually.
487 	 */
488 	.macro	le64sym, sym
489 	.long	\sym\()_lo32
490 	.long	\sym\()_hi32
491 	.endm
492 
493 	/*
494 	 * mov_q - move an immediate constant into a 64-bit register using
495 	 *         between 2 and 4 movz/movk instructions (depending on the
496 	 *         magnitude and sign of the operand)
497 	 */
498 	.macro	mov_q, reg, val
499 	.if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
500 	movz	\reg, :abs_g1_s:\val
501 	.else
502 	.if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
503 	movz	\reg, :abs_g2_s:\val
504 	.else
505 	movz	\reg, :abs_g3:\val
506 	movk	\reg, :abs_g2_nc:\val
507 	.endif
508 	movk	\reg, :abs_g1_nc:\val
509 	.endif
510 	movk	\reg, :abs_g0_nc:\val
511 	.endm
512 
513 /*
514  * Return the current task_struct.
515  */
516 	.macro	get_current_task, rd
517 	mrs	\rd, sp_el0
518 	.endm
519 
520 /*
521  * Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD.
522  * orr is used as it can cover the immediate value (and is idempotent).
523  * In future this may be nop'ed out when dealing with 52-bit kernel VAs.
524  * 	ttbr: Value of ttbr to set, modified.
525  */
526 	.macro	offset_ttbr1, ttbr, tmp
527 #ifdef CONFIG_ARM64_VA_BITS_52
528 	mrs_s	\tmp, SYS_ID_AA64MMFR2_EL1
529 	and	\tmp, \tmp, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
530 	cbnz	\tmp, .Lskipoffs_\@
531 	orr	\ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
532 .Lskipoffs_\@ :
533 #endif
534 	.endm
535 
536 /*
537  * Perform the reverse of offset_ttbr1.
538  * bic is used as it can cover the immediate value and, in future, won't need
539  * to be nop'ed out when dealing with 52-bit kernel VAs.
540  */
541 	.macro	restore_ttbr1, ttbr
542 #ifdef CONFIG_ARM64_VA_BITS_52
543 	bic	\ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
544 #endif
545 	.endm
546 
547 /*
548  * Arrange a physical address in a TTBR register, taking care of 52-bit
549  * addresses.
550  *
551  * 	phys:	physical address, preserved
552  * 	ttbr:	returns the TTBR value
553  */
554 	.macro	phys_to_ttbr, ttbr, phys
555 #ifdef CONFIG_ARM64_PA_BITS_52
556 	orr	\ttbr, \phys, \phys, lsr #46
557 	and	\ttbr, \ttbr, #TTBR_BADDR_MASK_52
558 #else
559 	mov	\ttbr, \phys
560 #endif
561 	.endm
562 
563 	.macro	phys_to_pte, pte, phys
564 #ifdef CONFIG_ARM64_PA_BITS_52
565 	/*
566 	 * We assume \phys is 64K aligned and this is guaranteed by only
567 	 * supporting this configuration with 64K pages.
568 	 */
569 	orr	\pte, \phys, \phys, lsr #36
570 	and	\pte, \pte, #PTE_ADDR_MASK
571 #else
572 	mov	\pte, \phys
573 #endif
574 	.endm
575 
576 	.macro	pte_to_phys, phys, pte
577 #ifdef CONFIG_ARM64_PA_BITS_52
578 	ubfiz	\phys, \pte, #(48 - 16 - 12), #16
579 	bfxil	\phys, \pte, #16, #32
580 	lsl	\phys, \phys, #16
581 #else
582 	and	\phys, \pte, #PTE_ADDR_MASK
583 #endif
584 	.endm
585 
586 /*
587  * tcr_clear_errata_bits - Clear TCR bits that trigger an errata on this CPU.
588  */
589 	.macro	tcr_clear_errata_bits, tcr, tmp1, tmp2
590 #ifdef CONFIG_FUJITSU_ERRATUM_010001
591 	mrs	\tmp1, midr_el1
592 
593 	mov_q	\tmp2, MIDR_FUJITSU_ERRATUM_010001_MASK
594 	and	\tmp1, \tmp1, \tmp2
595 	mov_q	\tmp2, MIDR_FUJITSU_ERRATUM_010001
596 	cmp	\tmp1, \tmp2
597 	b.ne	10f
598 
599 	mov_q	\tmp2, TCR_CLEAR_FUJITSU_ERRATUM_010001
600 	bic	\tcr, \tcr, \tmp2
601 10:
602 #endif /* CONFIG_FUJITSU_ERRATUM_010001 */
603 	.endm
604 
605 /**
606  * Errata workaround prior to disable MMU. Insert an ISB immediately prior
607  * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
608  */
609 	.macro pre_disable_mmu_workaround
610 #ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
611 	isb
612 #endif
613 	.endm
614 
615 	/*
616 	 * frame_push - Push @regcount callee saved registers to the stack,
617 	 *              starting at x19, as well as x29/x30, and set x29 to
618 	 *              the new value of sp. Add @extra bytes of stack space
619 	 *              for locals.
620 	 */
621 	.macro		frame_push, regcount:req, extra
622 	__frame		st, \regcount, \extra
623 	.endm
624 
625 	/*
626 	 * frame_pop  - Pop the callee saved registers from the stack that were
627 	 *              pushed in the most recent call to frame_push, as well
628 	 *              as x29/x30 and any extra stack space that may have been
629 	 *              allocated.
630 	 */
631 	.macro		frame_pop
632 	__frame		ld
633 	.endm
634 
635 	.macro		__frame_regs, reg1, reg2, op, num
636 	.if		.Lframe_regcount == \num
637 	\op\()r		\reg1, [sp, #(\num + 1) * 8]
638 	.elseif		.Lframe_regcount > \num
639 	\op\()p		\reg1, \reg2, [sp, #(\num + 1) * 8]
640 	.endif
641 	.endm
642 
643 	.macro		__frame, op, regcount, extra=0
644 	.ifc		\op, st
645 	.if		(\regcount) < 0 || (\regcount) > 10
646 	.error		"regcount should be in the range [0 ... 10]"
647 	.endif
648 	.if		((\extra) % 16) != 0
649 	.error		"extra should be a multiple of 16 bytes"
650 	.endif
651 	.ifdef		.Lframe_regcount
652 	.if		.Lframe_regcount != -1
653 	.error		"frame_push/frame_pop may not be nested"
654 	.endif
655 	.endif
656 	.set		.Lframe_regcount, \regcount
657 	.set		.Lframe_extra, \extra
658 	.set		.Lframe_local_offset, ((\regcount + 3) / 2) * 16
659 	stp		x29, x30, [sp, #-.Lframe_local_offset - .Lframe_extra]!
660 	mov		x29, sp
661 	.endif
662 
663 	__frame_regs	x19, x20, \op, 1
664 	__frame_regs	x21, x22, \op, 3
665 	__frame_regs	x23, x24, \op, 5
666 	__frame_regs	x25, x26, \op, 7
667 	__frame_regs	x27, x28, \op, 9
668 
669 	.ifc		\op, ld
670 	.if		.Lframe_regcount == -1
671 	.error		"frame_push/frame_pop may not be nested"
672 	.endif
673 	ldp		x29, x30, [sp], #.Lframe_local_offset + .Lframe_extra
674 	.set		.Lframe_regcount, -1
675 	.endif
676 	.endm
677 
678 /*
679  * Set SCTLR_EL1 to the passed value, and invalidate the local icache
680  * in the process. This is called when setting the MMU on.
681  */
682 .macro set_sctlr_el1, reg
683 	msr	sctlr_el1, \reg
684 	isb
685 	/*
686 	 * Invalidate the local I-cache so that any instructions fetched
687 	 * speculatively from the PoC are discarded, since they may have
688 	 * been dynamically patched at the PoU.
689 	 */
690 	ic	iallu
691 	dsb	nsh
692 	isb
693 .endm
694 
695 /*
696  * Check whether to yield to another runnable task from kernel mode NEON code
697  * (which runs with preemption disabled).
698  *
699  * if_will_cond_yield_neon
700  *        // pre-yield patchup code
701  * do_cond_yield_neon
702  *        // post-yield patchup code
703  * endif_yield_neon    <label>
704  *
705  * where <label> is optional, and marks the point where execution will resume
706  * after a yield has been performed. If omitted, execution resumes right after
707  * the endif_yield_neon invocation. Note that the entire sequence, including
708  * the provided patchup code, will be omitted from the image if
709  * CONFIG_PREEMPTION is not defined.
710  *
711  * As a convenience, in the case where no patchup code is required, the above
712  * sequence may be abbreviated to
713  *
714  * cond_yield_neon <label>
715  *
716  * Note that the patchup code does not support assembler directives that change
717  * the output section, any use of such directives is undefined.
718  *
719  * The yield itself consists of the following:
720  * - Check whether the preempt count is exactly 1 and a reschedule is also
721  *   needed. If so, calling of preempt_enable() in kernel_neon_end() will
722  *   trigger a reschedule. If it is not the case, yielding is pointless.
723  * - Disable and re-enable kernel mode NEON, and branch to the yield fixup
724  *   code.
725  *
726  * This macro sequence may clobber all CPU state that is not guaranteed by the
727  * AAPCS to be preserved across an ordinary function call.
728  */
729 
730 	.macro		cond_yield_neon, lbl
731 	if_will_cond_yield_neon
732 	do_cond_yield_neon
733 	endif_yield_neon	\lbl
734 	.endm
735 
736 	.macro		if_will_cond_yield_neon
737 #ifdef CONFIG_PREEMPTION
738 	get_current_task	x0
739 	ldr		x0, [x0, #TSK_TI_PREEMPT]
740 	sub		x0, x0, #PREEMPT_DISABLE_OFFSET
741 	cbz		x0, .Lyield_\@
742 	/* fall through to endif_yield_neon */
743 	.subsection	1
744 .Lyield_\@ :
745 #else
746 	.section	".discard.cond_yield_neon", "ax"
747 #endif
748 	.endm
749 
750 	.macro		do_cond_yield_neon
751 	bl		kernel_neon_end
752 	bl		kernel_neon_begin
753 	.endm
754 
755 	.macro		endif_yield_neon, lbl
756 	.ifnb		\lbl
757 	b		\lbl
758 	.else
759 	b		.Lyield_out_\@
760 	.endif
761 	.previous
762 .Lyield_out_\@ :
763 	.endm
764 
765 	/*
766 	 * Check whether preempt-disabled code should yield as soon as it
767 	 * is able. This is the case if re-enabling preemption a single
768 	 * time results in a preempt count of zero, and the TIF_NEED_RESCHED
769 	 * flag is set. (Note that the latter is stored negated in the
770 	 * top word of the thread_info::preempt_count field)
771 	 */
772 	.macro		cond_yield, lbl:req, tmp:req
773 #ifdef CONFIG_PREEMPTION
774 	get_current_task \tmp
775 	ldr		\tmp, [\tmp, #TSK_TI_PREEMPT]
776 	sub		\tmp, \tmp, #PREEMPT_DISABLE_OFFSET
777 	cbz		\tmp, \lbl
778 #endif
779 	.endm
780 
781 /*
782  * This macro emits a program property note section identifying
783  * architecture features which require special handling, mainly for
784  * use in assembly files included in the VDSO.
785  */
786 
787 #define NT_GNU_PROPERTY_TYPE_0  5
788 #define GNU_PROPERTY_AARCH64_FEATURE_1_AND      0xc0000000
789 
790 #define GNU_PROPERTY_AARCH64_FEATURE_1_BTI      (1U << 0)
791 #define GNU_PROPERTY_AARCH64_FEATURE_1_PAC      (1U << 1)
792 
793 #ifdef CONFIG_ARM64_BTI_KERNEL
794 #define GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT		\
795 		((GNU_PROPERTY_AARCH64_FEATURE_1_BTI |	\
796 		  GNU_PROPERTY_AARCH64_FEATURE_1_PAC))
797 #endif
798 
799 #ifdef GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT
800 .macro emit_aarch64_feature_1_and, feat=GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT
801 	.pushsection .note.gnu.property, "a"
802 	.align  3
803 	.long   2f - 1f
804 	.long   6f - 3f
805 	.long   NT_GNU_PROPERTY_TYPE_0
806 1:      .string "GNU"
807 2:
808 	.align  3
809 3:      .long   GNU_PROPERTY_AARCH64_FEATURE_1_AND
810 	.long   5f - 4f
811 4:
812 	/*
813 	 * This is described with an array of char in the Linux API
814 	 * spec but the text and all other usage (including binutils,
815 	 * clang and GCC) treat this as a 32 bit value so no swizzling
816 	 * is required for big endian.
817 	 */
818 	.long   \feat
819 5:
820 	.align  3
821 6:
822 	.popsection
823 .endm
824 
825 #else
826 .macro emit_aarch64_feature_1_and, feat=0
827 .endm
828 
829 #endif /* GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT */
830 
831 #endif	/* __ASM_ASSEMBLER_H */
832