xref: /openbmc/linux/arch/arm64/include/asm/assembler.h (revision 87fcfa7b7fe6bf819033fe827a27f710e38639b5)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S
4  *
5  * Copyright (C) 1996-2000 Russell King
6  * Copyright (C) 2012 ARM Ltd.
7  */
8 #ifndef __ASSEMBLY__
9 #error "Only include this from assembly code"
10 #endif
11 
12 #ifndef __ASM_ASSEMBLER_H
13 #define __ASM_ASSEMBLER_H
14 
15 #include <asm-generic/export.h>
16 
17 #include <asm/asm-offsets.h>
18 #include <asm/cpufeature.h>
19 #include <asm/cputype.h>
20 #include <asm/debug-monitors.h>
21 #include <asm/page.h>
22 #include <asm/pgtable-hwdef.h>
23 #include <asm/ptrace.h>
24 #include <asm/thread_info.h>
25 
26 	.macro save_and_disable_daif, flags
27 	mrs	\flags, daif
28 	msr	daifset, #0xf
29 	.endm
30 
31 	.macro disable_daif
32 	msr	daifset, #0xf
33 	.endm
34 
35 	.macro enable_daif
36 	msr	daifclr, #0xf
37 	.endm
38 
39 	.macro	restore_daif, flags:req
40 	msr	daif, \flags
41 	.endm
42 
43 	/* IRQ is the lowest priority flag, unconditionally unmask the rest. */
44 	.macro enable_da_f
45 	msr	daifclr, #(8 | 4 | 1)
46 	.endm
47 
48 /*
49  * Save/restore interrupts.
50  */
51 	.macro	save_and_disable_irq, flags
52 	mrs	\flags, daif
53 	msr	daifset, #2
54 	.endm
55 
56 	.macro	restore_irq, flags
57 	msr	daif, \flags
58 	.endm
59 
60 	.macro	enable_dbg
61 	msr	daifclr, #8
62 	.endm
63 
64 	.macro	disable_step_tsk, flgs, tmp
65 	tbz	\flgs, #TIF_SINGLESTEP, 9990f
66 	mrs	\tmp, mdscr_el1
67 	bic	\tmp, \tmp, #DBG_MDSCR_SS
68 	msr	mdscr_el1, \tmp
69 	isb	// Synchronise with enable_dbg
70 9990:
71 	.endm
72 
73 	/* call with daif masked */
74 	.macro	enable_step_tsk, flgs, tmp
75 	tbz	\flgs, #TIF_SINGLESTEP, 9990f
76 	mrs	\tmp, mdscr_el1
77 	orr	\tmp, \tmp, #DBG_MDSCR_SS
78 	msr	mdscr_el1, \tmp
79 9990:
80 	.endm
81 
82 /*
83  * RAS Error Synchronization barrier
84  */
85 	.macro  esb
86 #ifdef CONFIG_ARM64_RAS_EXTN
87 	hint    #16
88 #else
89 	nop
90 #endif
91 	.endm
92 
93 /*
94  * Value prediction barrier
95  */
96 	.macro	csdb
97 	hint	#20
98 	.endm
99 
100 /*
101  * Speculation barrier
102  */
103 	.macro	sb
104 alternative_if_not ARM64_HAS_SB
105 	dsb	nsh
106 	isb
107 alternative_else
108 	SB_BARRIER_INSN
109 	nop
110 alternative_endif
111 	.endm
112 
113 /*
114  * NOP sequence
115  */
116 	.macro	nops, num
117 	.rept	\num
118 	nop
119 	.endr
120 	.endm
121 
122 /*
123  * Emit an entry into the exception table
124  */
125 	.macro		_asm_extable, from, to
126 	.pushsection	__ex_table, "a"
127 	.align		3
128 	.long		(\from - .), (\to - .)
129 	.popsection
130 	.endm
131 
132 #define USER(l, x...)				\
133 9999:	x;					\
134 	_asm_extable	9999b, l
135 
136 /*
137  * Register aliases.
138  */
139 lr	.req	x30		// link register
140 
141 /*
142  * Vector entry
143  */
144 	 .macro	ventry	label
145 	.align	7
146 	b	\label
147 	.endm
148 
149 /*
150  * Select code when configured for BE.
151  */
152 #ifdef CONFIG_CPU_BIG_ENDIAN
153 #define CPU_BE(code...) code
154 #else
155 #define CPU_BE(code...)
156 #endif
157 
158 /*
159  * Select code when configured for LE.
160  */
161 #ifdef CONFIG_CPU_BIG_ENDIAN
162 #define CPU_LE(code...)
163 #else
164 #define CPU_LE(code...) code
165 #endif
166 
167 /*
168  * Define a macro that constructs a 64-bit value by concatenating two
169  * 32-bit registers. Note that on big endian systems the order of the
170  * registers is swapped.
171  */
172 #ifndef CONFIG_CPU_BIG_ENDIAN
173 	.macro	regs_to_64, rd, lbits, hbits
174 #else
175 	.macro	regs_to_64, rd, hbits, lbits
176 #endif
177 	orr	\rd, \lbits, \hbits, lsl #32
178 	.endm
179 
180 /*
181  * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
182  * <symbol> is within the range +/- 4 GB of the PC.
183  */
184 	/*
185 	 * @dst: destination register (64 bit wide)
186 	 * @sym: name of the symbol
187 	 */
188 	.macro	adr_l, dst, sym
189 	adrp	\dst, \sym
190 	add	\dst, \dst, :lo12:\sym
191 	.endm
192 
193 	/*
194 	 * @dst: destination register (32 or 64 bit wide)
195 	 * @sym: name of the symbol
196 	 * @tmp: optional 64-bit scratch register to be used if <dst> is a
197 	 *       32-bit wide register, in which case it cannot be used to hold
198 	 *       the address
199 	 */
200 	.macro	ldr_l, dst, sym, tmp=
201 	.ifb	\tmp
202 	adrp	\dst, \sym
203 	ldr	\dst, [\dst, :lo12:\sym]
204 	.else
205 	adrp	\tmp, \sym
206 	ldr	\dst, [\tmp, :lo12:\sym]
207 	.endif
208 	.endm
209 
210 	/*
211 	 * @src: source register (32 or 64 bit wide)
212 	 * @sym: name of the symbol
213 	 * @tmp: mandatory 64-bit scratch register to calculate the address
214 	 *       while <src> needs to be preserved.
215 	 */
216 	.macro	str_l, src, sym, tmp
217 	adrp	\tmp, \sym
218 	str	\src, [\tmp, :lo12:\sym]
219 	.endm
220 
221 	/*
222 	 * @dst: Result of per_cpu(sym, smp_processor_id()) (can be SP)
223 	 * @sym: The name of the per-cpu variable
224 	 * @tmp: scratch register
225 	 */
226 	.macro adr_this_cpu, dst, sym, tmp
227 	adrp	\tmp, \sym
228 	add	\dst, \tmp, #:lo12:\sym
229 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
230 	mrs	\tmp, tpidr_el1
231 alternative_else
232 	mrs	\tmp, tpidr_el2
233 alternative_endif
234 	add	\dst, \dst, \tmp
235 	.endm
236 
237 	/*
238 	 * @dst: Result of READ_ONCE(per_cpu(sym, smp_processor_id()))
239 	 * @sym: The name of the per-cpu variable
240 	 * @tmp: scratch register
241 	 */
242 	.macro ldr_this_cpu dst, sym, tmp
243 	adr_l	\dst, \sym
244 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
245 	mrs	\tmp, tpidr_el1
246 alternative_else
247 	mrs	\tmp, tpidr_el2
248 alternative_endif
249 	ldr	\dst, [\dst, \tmp]
250 	.endm
251 
252 /*
253  * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
254  */
255 	.macro	vma_vm_mm, rd, rn
256 	ldr	\rd, [\rn, #VMA_VM_MM]
257 	.endm
258 
259 /*
260  * mmid - get context id from mm pointer (mm->context.id)
261  */
262 	.macro	mmid, rd, rn
263 	ldr	\rd, [\rn, #MM_CONTEXT_ID]
264 	.endm
265 /*
266  * read_ctr - read CTR_EL0. If the system has mismatched register fields,
267  * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val
268  */
269 	.macro	read_ctr, reg
270 alternative_if_not ARM64_MISMATCHED_CACHE_TYPE
271 	mrs	\reg, ctr_el0			// read CTR
272 	nop
273 alternative_else
274 	ldr_l	\reg, arm64_ftr_reg_ctrel0 + ARM64_FTR_SYSVAL
275 alternative_endif
276 	.endm
277 
278 
279 /*
280  * raw_dcache_line_size - get the minimum D-cache line size on this CPU
281  * from the CTR register.
282  */
283 	.macro	raw_dcache_line_size, reg, tmp
284 	mrs	\tmp, ctr_el0			// read CTR
285 	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
286 	mov	\reg, #4			// bytes per word
287 	lsl	\reg, \reg, \tmp		// actual cache line size
288 	.endm
289 
290 /*
291  * dcache_line_size - get the safe D-cache line size across all CPUs
292  */
293 	.macro	dcache_line_size, reg, tmp
294 	read_ctr	\tmp
295 	ubfm		\tmp, \tmp, #16, #19	// cache line size encoding
296 	mov		\reg, #4		// bytes per word
297 	lsl		\reg, \reg, \tmp	// actual cache line size
298 	.endm
299 
300 /*
301  * raw_icache_line_size - get the minimum I-cache line size on this CPU
302  * from the CTR register.
303  */
304 	.macro	raw_icache_line_size, reg, tmp
305 	mrs	\tmp, ctr_el0			// read CTR
306 	and	\tmp, \tmp, #0xf		// cache line size encoding
307 	mov	\reg, #4			// bytes per word
308 	lsl	\reg, \reg, \tmp		// actual cache line size
309 	.endm
310 
311 /*
312  * icache_line_size - get the safe I-cache line size across all CPUs
313  */
314 	.macro	icache_line_size, reg, tmp
315 	read_ctr	\tmp
316 	and		\tmp, \tmp, #0xf	// cache line size encoding
317 	mov		\reg, #4		// bytes per word
318 	lsl		\reg, \reg, \tmp	// actual cache line size
319 	.endm
320 
321 /*
322  * tcr_set_t0sz - update TCR.T0SZ so that we can load the ID map
323  */
324 	.macro	tcr_set_t0sz, valreg, t0sz
325 	bfi	\valreg, \t0sz, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
326 	.endm
327 
328 /*
329  * tcr_set_t1sz - update TCR.T1SZ
330  */
331 	.macro	tcr_set_t1sz, valreg, t1sz
332 	bfi	\valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH
333 	.endm
334 
335 /*
336  * tcr_compute_pa_size - set TCR.(I)PS to the highest supported
337  * ID_AA64MMFR0_EL1.PARange value
338  *
339  *	tcr:		register with the TCR_ELx value to be updated
340  *	pos:		IPS or PS bitfield position
341  *	tmp{0,1}:	temporary registers
342  */
343 	.macro	tcr_compute_pa_size, tcr, pos, tmp0, tmp1
344 	mrs	\tmp0, ID_AA64MMFR0_EL1
345 	// Narrow PARange to fit the PS field in TCR_ELx
346 	ubfx	\tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3
347 	mov	\tmp1, #ID_AA64MMFR0_PARANGE_MAX
348 	cmp	\tmp0, \tmp1
349 	csel	\tmp0, \tmp1, \tmp0, hi
350 	bfi	\tcr, \tmp0, \pos, #3
351 	.endm
352 
353 /*
354  * Macro to perform a data cache maintenance for the interval
355  * [kaddr, kaddr + size)
356  *
357  * 	op:		operation passed to dc instruction
358  * 	domain:		domain used in dsb instruciton
359  * 	kaddr:		starting virtual address of the region
360  * 	size:		size of the region
361  * 	Corrupts:	kaddr, size, tmp1, tmp2
362  */
363 	.macro __dcache_op_workaround_clean_cache, op, kaddr
364 alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
365 	dc	\op, \kaddr
366 alternative_else
367 	dc	civac, \kaddr
368 alternative_endif
369 	.endm
370 
371 	.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
372 	dcache_line_size \tmp1, \tmp2
373 	add	\size, \kaddr, \size
374 	sub	\tmp2, \tmp1, #1
375 	bic	\kaddr, \kaddr, \tmp2
376 9998:
377 	.ifc	\op, cvau
378 	__dcache_op_workaround_clean_cache \op, \kaddr
379 	.else
380 	.ifc	\op, cvac
381 	__dcache_op_workaround_clean_cache \op, \kaddr
382 	.else
383 	.ifc	\op, cvap
384 	sys	3, c7, c12, 1, \kaddr	// dc cvap
385 	.else
386 	.ifc	\op, cvadp
387 	sys	3, c7, c13, 1, \kaddr	// dc cvadp
388 	.else
389 	dc	\op, \kaddr
390 	.endif
391 	.endif
392 	.endif
393 	.endif
394 	add	\kaddr, \kaddr, \tmp1
395 	cmp	\kaddr, \size
396 	b.lo	9998b
397 	dsb	\domain
398 	.endm
399 
400 /*
401  * Macro to perform an instruction cache maintenance for the interval
402  * [start, end)
403  *
404  * 	start, end:	virtual addresses describing the region
405  *	label:		A label to branch to on user fault.
406  * 	Corrupts:	tmp1, tmp2
407  */
408 	.macro invalidate_icache_by_line start, end, tmp1, tmp2, label
409 	icache_line_size \tmp1, \tmp2
410 	sub	\tmp2, \tmp1, #1
411 	bic	\tmp2, \start, \tmp2
412 9997:
413 USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
414 	add	\tmp2, \tmp2, \tmp1
415 	cmp	\tmp2, \end
416 	b.lo	9997b
417 	dsb	ish
418 	isb
419 	.endm
420 
421 /*
422  * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
423  */
424 	.macro	reset_pmuserenr_el0, tmpreg
425 	mrs	\tmpreg, id_aa64dfr0_el1
426 	sbfx	\tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4
427 	cmp	\tmpreg, #1			// Skip if no PMU present
428 	b.lt	9000f
429 	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
430 9000:
431 	.endm
432 
433 /*
434  * copy_page - copy src to dest using temp registers t1-t8
435  */
436 	.macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req
437 9998:	ldp	\t1, \t2, [\src]
438 	ldp	\t3, \t4, [\src, #16]
439 	ldp	\t5, \t6, [\src, #32]
440 	ldp	\t7, \t8, [\src, #48]
441 	add	\src, \src, #64
442 	stnp	\t1, \t2, [\dest]
443 	stnp	\t3, \t4, [\dest, #16]
444 	stnp	\t5, \t6, [\dest, #32]
445 	stnp	\t7, \t8, [\dest, #48]
446 	add	\dest, \dest, #64
447 	tst	\src, #(PAGE_SIZE - 1)
448 	b.ne	9998b
449 	.endm
450 
451 /*
452  * Annotate a function as being unsuitable for kprobes.
453  */
454 #ifdef CONFIG_KPROBES
455 #define NOKPROBE(x)				\
456 	.pushsection "_kprobe_blacklist", "aw";	\
457 	.quad	x;				\
458 	.popsection;
459 #else
460 #define NOKPROBE(x)
461 #endif
462 
463 #ifdef CONFIG_KASAN
464 #define EXPORT_SYMBOL_NOKASAN(name)
465 #else
466 #define EXPORT_SYMBOL_NOKASAN(name)	EXPORT_SYMBOL(name)
467 #endif
468 
469 	/*
470 	 * Emit a 64-bit absolute little endian symbol reference in a way that
471 	 * ensures that it will be resolved at build time, even when building a
472 	 * PIE binary. This requires cooperation from the linker script, which
473 	 * must emit the lo32/hi32 halves individually.
474 	 */
475 	.macro	le64sym, sym
476 	.long	\sym\()_lo32
477 	.long	\sym\()_hi32
478 	.endm
479 
480 	/*
481 	 * mov_q - move an immediate constant into a 64-bit register using
482 	 *         between 2 and 4 movz/movk instructions (depending on the
483 	 *         magnitude and sign of the operand)
484 	 */
485 	.macro	mov_q, reg, val
486 	.if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
487 	movz	\reg, :abs_g1_s:\val
488 	.else
489 	.if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
490 	movz	\reg, :abs_g2_s:\val
491 	.else
492 	movz	\reg, :abs_g3:\val
493 	movk	\reg, :abs_g2_nc:\val
494 	.endif
495 	movk	\reg, :abs_g1_nc:\val
496 	.endif
497 	movk	\reg, :abs_g0_nc:\val
498 	.endm
499 
500 /*
501  * Return the current task_struct.
502  */
503 	.macro	get_current_task, rd
504 	mrs	\rd, sp_el0
505 	.endm
506 
507 /*
508  * Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD.
509  * orr is used as it can cover the immediate value (and is idempotent).
510  * In future this may be nop'ed out when dealing with 52-bit kernel VAs.
511  * 	ttbr: Value of ttbr to set, modified.
512  */
513 	.macro	offset_ttbr1, ttbr, tmp
514 #ifdef CONFIG_ARM64_VA_BITS_52
515 	mrs_s	\tmp, SYS_ID_AA64MMFR2_EL1
516 	and	\tmp, \tmp, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
517 	cbnz	\tmp, .Lskipoffs_\@
518 	orr	\ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
519 .Lskipoffs_\@ :
520 #endif
521 	.endm
522 
523 /*
524  * Perform the reverse of offset_ttbr1.
525  * bic is used as it can cover the immediate value and, in future, won't need
526  * to be nop'ed out when dealing with 52-bit kernel VAs.
527  */
528 	.macro	restore_ttbr1, ttbr
529 #ifdef CONFIG_ARM64_VA_BITS_52
530 	bic	\ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
531 #endif
532 	.endm
533 
534 /*
535  * Arrange a physical address in a TTBR register, taking care of 52-bit
536  * addresses.
537  *
538  * 	phys:	physical address, preserved
539  * 	ttbr:	returns the TTBR value
540  */
541 	.macro	phys_to_ttbr, ttbr, phys
542 #ifdef CONFIG_ARM64_PA_BITS_52
543 	orr	\ttbr, \phys, \phys, lsr #46
544 	and	\ttbr, \ttbr, #TTBR_BADDR_MASK_52
545 #else
546 	mov	\ttbr, \phys
547 #endif
548 	.endm
549 
550 	.macro	phys_to_pte, pte, phys
551 #ifdef CONFIG_ARM64_PA_BITS_52
552 	/*
553 	 * We assume \phys is 64K aligned and this is guaranteed by only
554 	 * supporting this configuration with 64K pages.
555 	 */
556 	orr	\pte, \phys, \phys, lsr #36
557 	and	\pte, \pte, #PTE_ADDR_MASK
558 #else
559 	mov	\pte, \phys
560 #endif
561 	.endm
562 
563 	.macro	pte_to_phys, phys, pte
564 #ifdef CONFIG_ARM64_PA_BITS_52
565 	ubfiz	\phys, \pte, #(48 - 16 - 12), #16
566 	bfxil	\phys, \pte, #16, #32
567 	lsl	\phys, \phys, #16
568 #else
569 	and	\phys, \pte, #PTE_ADDR_MASK
570 #endif
571 	.endm
572 
573 /*
574  * tcr_clear_errata_bits - Clear TCR bits that trigger an errata on this CPU.
575  */
576 	.macro	tcr_clear_errata_bits, tcr, tmp1, tmp2
577 #ifdef CONFIG_FUJITSU_ERRATUM_010001
578 	mrs	\tmp1, midr_el1
579 
580 	mov_q	\tmp2, MIDR_FUJITSU_ERRATUM_010001_MASK
581 	and	\tmp1, \tmp1, \tmp2
582 	mov_q	\tmp2, MIDR_FUJITSU_ERRATUM_010001
583 	cmp	\tmp1, \tmp2
584 	b.ne	10f
585 
586 	mov_q	\tmp2, TCR_CLEAR_FUJITSU_ERRATUM_010001
587 	bic	\tcr, \tcr, \tmp2
588 10:
589 #endif /* CONFIG_FUJITSU_ERRATUM_010001 */
590 	.endm
591 
592 /**
593  * Errata workaround prior to disable MMU. Insert an ISB immediately prior
594  * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
595  */
596 	.macro pre_disable_mmu_workaround
597 #ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
598 	isb
599 #endif
600 	.endm
601 
602 	/*
603 	 * frame_push - Push @regcount callee saved registers to the stack,
604 	 *              starting at x19, as well as x29/x30, and set x29 to
605 	 *              the new value of sp. Add @extra bytes of stack space
606 	 *              for locals.
607 	 */
608 	.macro		frame_push, regcount:req, extra
609 	__frame		st, \regcount, \extra
610 	.endm
611 
612 	/*
613 	 * frame_pop  - Pop the callee saved registers from the stack that were
614 	 *              pushed in the most recent call to frame_push, as well
615 	 *              as x29/x30 and any extra stack space that may have been
616 	 *              allocated.
617 	 */
618 	.macro		frame_pop
619 	__frame		ld
620 	.endm
621 
622 	.macro		__frame_regs, reg1, reg2, op, num
623 	.if		.Lframe_regcount == \num
624 	\op\()r		\reg1, [sp, #(\num + 1) * 8]
625 	.elseif		.Lframe_regcount > \num
626 	\op\()p		\reg1, \reg2, [sp, #(\num + 1) * 8]
627 	.endif
628 	.endm
629 
630 	.macro		__frame, op, regcount, extra=0
631 	.ifc		\op, st
632 	.if		(\regcount) < 0 || (\regcount) > 10
633 	.error		"regcount should be in the range [0 ... 10]"
634 	.endif
635 	.if		((\extra) % 16) != 0
636 	.error		"extra should be a multiple of 16 bytes"
637 	.endif
638 	.ifdef		.Lframe_regcount
639 	.if		.Lframe_regcount != -1
640 	.error		"frame_push/frame_pop may not be nested"
641 	.endif
642 	.endif
643 	.set		.Lframe_regcount, \regcount
644 	.set		.Lframe_extra, \extra
645 	.set		.Lframe_local_offset, ((\regcount + 3) / 2) * 16
646 	stp		x29, x30, [sp, #-.Lframe_local_offset - .Lframe_extra]!
647 	mov		x29, sp
648 	.endif
649 
650 	__frame_regs	x19, x20, \op, 1
651 	__frame_regs	x21, x22, \op, 3
652 	__frame_regs	x23, x24, \op, 5
653 	__frame_regs	x25, x26, \op, 7
654 	__frame_regs	x27, x28, \op, 9
655 
656 	.ifc		\op, ld
657 	.if		.Lframe_regcount == -1
658 	.error		"frame_push/frame_pop may not be nested"
659 	.endif
660 	ldp		x29, x30, [sp], #.Lframe_local_offset + .Lframe_extra
661 	.set		.Lframe_regcount, -1
662 	.endif
663 	.endm
664 
665 /*
666  * Check whether to yield to another runnable task from kernel mode NEON code
667  * (which runs with preemption disabled).
668  *
669  * if_will_cond_yield_neon
670  *        // pre-yield patchup code
671  * do_cond_yield_neon
672  *        // post-yield patchup code
673  * endif_yield_neon    <label>
674  *
675  * where <label> is optional, and marks the point where execution will resume
676  * after a yield has been performed. If omitted, execution resumes right after
677  * the endif_yield_neon invocation. Note that the entire sequence, including
678  * the provided patchup code, will be omitted from the image if
679  * CONFIG_PREEMPTION is not defined.
680  *
681  * As a convenience, in the case where no patchup code is required, the above
682  * sequence may be abbreviated to
683  *
684  * cond_yield_neon <label>
685  *
686  * Note that the patchup code does not support assembler directives that change
687  * the output section, any use of such directives is undefined.
688  *
689  * The yield itself consists of the following:
690  * - Check whether the preempt count is exactly 1 and a reschedule is also
691  *   needed. If so, calling of preempt_enable() in kernel_neon_end() will
692  *   trigger a reschedule. If it is not the case, yielding is pointless.
693  * - Disable and re-enable kernel mode NEON, and branch to the yield fixup
694  *   code.
695  *
696  * This macro sequence may clobber all CPU state that is not guaranteed by the
697  * AAPCS to be preserved across an ordinary function call.
698  */
699 
700 	.macro		cond_yield_neon, lbl
701 	if_will_cond_yield_neon
702 	do_cond_yield_neon
703 	endif_yield_neon	\lbl
704 	.endm
705 
706 	.macro		if_will_cond_yield_neon
707 #ifdef CONFIG_PREEMPTION
708 	get_current_task	x0
709 	ldr		x0, [x0, #TSK_TI_PREEMPT]
710 	sub		x0, x0, #PREEMPT_DISABLE_OFFSET
711 	cbz		x0, .Lyield_\@
712 	/* fall through to endif_yield_neon */
713 	.subsection	1
714 .Lyield_\@ :
715 #else
716 	.section	".discard.cond_yield_neon", "ax"
717 #endif
718 	.endm
719 
720 	.macro		do_cond_yield_neon
721 	bl		kernel_neon_end
722 	bl		kernel_neon_begin
723 	.endm
724 
725 	.macro		endif_yield_neon, lbl
726 	.ifnb		\lbl
727 	b		\lbl
728 	.else
729 	b		.Lyield_out_\@
730 	.endif
731 	.previous
732 .Lyield_out_\@ :
733 	.endm
734 
735 #endif	/* __ASM_ASSEMBLER_H */
736