1// SPDX-License-Identifier: GPL-2.0-only
2// Copyright (C) 2021 ARM Limited.
3// Original author: Mark Brown <broonie@kernel.org>
4//
5// Scalable Matrix Extension ZA context switch test
6// Repeatedly writes unique test patterns into each ZA tile
7// and reads them back to verify integrity.
8//
9// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
10// (leave it running for as long as you want...)
11// kill $pids
12
13#include <asm/unistd.h>
14#include "assembler.h"
15#include "asm-offsets.h"
16#include "sme-inst.h"
17
18.arch_extension sve
19
20#define MAXVL     2048
21#define MAXVL_B   (MAXVL / 8)
22
23// Declare some storage space to shadow ZA register contents and a
24// scratch buffer for a vector.
25.pushsection .text
26.data
27.align 4
28zaref:
29	.space	MAXVL_B * MAXVL_B
30scratch:
31	.space	MAXVL_B
32.popsection
33
34// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
35// Clobbers x0-x3
36function memcpy
37	cmp	x2, #0
38	b.eq	1f
390:	ldrb	w3, [x1], #1
40	strb	w3, [x0], #1
41	subs	x2, x2, #1
42	b.ne	0b
431:	ret
44endfunction
45
46// Generate a test pattern for storage in ZA
47// x0: pid
48// x1: row in ZA
49// x2: generation
50
51// These values are used to constuct a 32-bit pattern that is repeated in the
52// scratch buffer as many times as will fit:
53// bits 31:28	generation number (increments once per test_loop)
54// bits 27:16	pid
55// bits 15: 8	row number
56// bits  7: 0	32-bit lane index
57
58function pattern
59	mov	w3, wzr
60	bfi	w3, w0, #16, #12	// PID
61	bfi	w3, w1, #8, #8		// Row
62	bfi	w3, w2, #28, #4		// Generation
63
64	ldr	x0, =scratch
65	mov	w1, #MAXVL_B / 4
66
670:	str	w3, [x0], #4
68	add	w3, w3, #1		// Lane
69	subs	w1, w1, #1
70	b.ne	0b
71
72	ret
73endfunction
74
75// Get the address of shadow data for ZA horizontal vector xn
76.macro _adrza xd, xn, nrtmp
77	ldr	\xd, =zaref
78	rdsvl	\nrtmp, 1
79	madd	\xd, x\nrtmp, \xn, \xd
80.endm
81
82// Set up test pattern in a ZA horizontal vector
83// x0: pid
84// x1: row number
85// x2: generation
86function setup_za
87	mov	x4, x30
88	mov	x12, x1			// Use x12 for vector select
89
90	bl	pattern			// Get pattern in scratch buffer
91	_adrza	x0, x12, 2		// Shadow buffer pointer to x0 and x5
92	mov	x5, x0
93	ldr	x1, =scratch
94	bl	memcpy			// length set up in x2 by _adrza
95
96	_ldr_za 12, 5			// load vector w12 from pointer x5
97
98	ret	x4
99endfunction
100
101// Trivial memory compare: compare x2 bytes starting at address x0 with
102// bytes starting at address x1.
103// Returns only if all bytes match; otherwise, the program is aborted.
104// Clobbers x0-x5.
105function memcmp
106	cbz	x2, 2f
107
108	stp	x0, x1, [sp, #-0x20]!
109	str	x2, [sp, #0x10]
110
111	mov	x5, #0
1120:	ldrb	w3, [x0, x5]
113	ldrb	w4, [x1, x5]
114	add	x5, x5, #1
115	cmp	w3, w4
116	b.ne	1f
117	subs	x2, x2, #1
118	b.ne	0b
119
1201:	ldr	x2, [sp, #0x10]
121	ldp	x0, x1, [sp], #0x20
122	b.ne	barf
123
1242:	ret
125endfunction
126
127// Verify that a ZA vector matches its shadow in memory, else abort
128// x0: row number
129// Clobbers x0-x7 and x12.
130function check_za
131	mov	x3, x30
132
133	mov	x12, x0
134	_adrza	x5, x0, 6		// pointer to expected value in x5
135	mov	x4, x0
136	ldr	x7, =scratch		// x7 is scratch
137
138	mov	x0, x7			// Poison scratch
139	mov	x1, x6
140	bl	memfill_ae
141
142	_str_za 12, 7			// save vector w12 to pointer x7
143
144	mov	x0, x5
145	mov	x1, x7
146	mov	x2, x6
147	mov	x30, x3
148	b	memcmp
149endfunction
150
151// Any SME register modified here can cause corruption in the main
152// thread -- but *only* the locations modified here.
153function irritator_handler
154	// Increment the irritation signal count (x23):
155	ldr	x0, [x2, #ucontext_regs + 8 * 23]
156	add	x0, x0, #1
157	str	x0, [x2, #ucontext_regs + 8 * 23]
158
159	// Corrupt some random ZA data
160#if 0
161	adr	x0, .text + (irritator_handler - .text) / 16 * 16
162	movi	v0.8b, #1
163	movi	v9.16b, #2
164	movi	v31.8b, #3
165#endif
166
167	ret
168endfunction
169
170function tickle_handler
171	// Increment the signal count (x23):
172	ldr	x0, [x2, #ucontext_regs + 8 * 23]
173	add	x0, x0, #1
174	str	x0, [x2, #ucontext_regs + 8 * 23]
175
176	ret
177endfunction
178
179function terminate_handler
180	mov	w21, w0
181	mov	x20, x2
182
183	puts	"Terminated by signal "
184	mov	w0, w21
185	bl	putdec
186	puts	", no error, iterations="
187	ldr	x0, [x20, #ucontext_regs + 8 * 22]
188	bl	putdec
189	puts	", signals="
190	ldr	x0, [x20, #ucontext_regs + 8 * 23]
191	bl	putdecn
192
193	mov	x0, #0
194	mov	x8, #__NR_exit
195	svc	#0
196endfunction
197
198// w0: signal number
199// x1: sa_action
200// w2: sa_flags
201// Clobbers x0-x6,x8
202function setsignal
203	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
204
205	mov	w4, w0
206	mov	x5, x1
207	mov	w6, w2
208
209	add	x0, sp, #16
210	mov	x1, #sa_sz
211	bl	memclr
212
213	mov	w0, w4
214	add	x1, sp, #16
215	str	w6, [x1, #sa_flags]
216	str	x5, [x1, #sa_handler]
217	mov	x2, #0
218	mov	x3, #sa_mask_sz
219	mov	x8, #__NR_rt_sigaction
220	svc	#0
221
222	cbz	w0, 1f
223
224	puts	"sigaction failure\n"
225	b	.Labort
226
2271:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
228	ret
229endfunction
230
231// Main program entry point
232.globl _start
233function _start
234_start:
235	mov	x23, #0		// signal count
236
237	mov	w0, #SIGINT
238	adr	x1, terminate_handler
239	mov	w2, #SA_SIGINFO
240	bl	setsignal
241
242	mov	w0, #SIGTERM
243	adr	x1, terminate_handler
244	mov	w2, #SA_SIGINFO
245	bl	setsignal
246
247	mov	w0, #SIGUSR1
248	adr	x1, irritator_handler
249	mov	w2, #SA_SIGINFO
250	orr	w2, w2, #SA_NODEFER
251	bl	setsignal
252
253	mov	w0, #SIGUSR2
254	adr	x1, tickle_handler
255	mov	w2, #SA_SIGINFO
256	orr	w2, w2, #SA_NODEFER
257	bl	setsignal
258
259	puts	"Streaming mode "
260	smstart_za
261
262	// Sanity-check and report the vector length
263
264	rdsvl	19, 8
265	cmp	x19, #128
266	b.lo	1f
267	cmp	x19, #2048
268	b.hi	1f
269	tst	x19, #(8 - 1)
270	b.eq	2f
271
2721:	puts	"bad vector length: "
273	mov	x0, x19
274	bl	putdecn
275	b	.Labort
276
2772:	puts	"vector length:\t"
278	mov	x0, x19
279	bl	putdec
280	puts	" bits\n"
281
282	// Obtain our PID, to ensure test pattern uniqueness between processes
283	mov	x8, #__NR_getpid
284	svc	#0
285	mov	x20, x0
286
287	puts	"PID:\t"
288	mov	x0, x20
289	bl	putdecn
290
291	mov	x22, #0		// generation number, increments per iteration
292.Ltest_loop:
293	rdsvl	0, 8
294	cmp	x0, x19
295	b.ne	vl_barf
296
297	rdsvl	21, 1		// Set up ZA & shadow with test pattern
2980:	mov	x0, x20
299	sub	x1, x21, #1
300	mov	x2, x22
301	bl	setup_za
302	subs	x21, x21, #1
303	b.ne	0b
304
305	mov	x8, #__NR_sched_yield	// encourage preemption
3061:
307	svc	#0
308
309	mrs	x0, S3_3_C4_C2_2	// SVCR should have ZA=1,SM=0
310	and	x1, x0, #3
311	cmp	x1, #2
312	b.ne	svcr_barf
313
314	rdsvl	21, 1			// Verify that the data made it through
315	rdsvl	24, 1			// Verify that the data made it through
3160:	sub	x0, x24, x21
317	bl	check_za
318	subs	x21, x21, #1
319	bne	0b
320
321	add	x22, x22, #1	// Everything still working
322	b	.Ltest_loop
323
324.Labort:
325	mov	x0, #0
326	mov	x1, #SIGABRT
327	mov	x8, #__NR_kill
328	svc	#0
329endfunction
330
331function barf
332// fpsimd.c acitivty log dump hack
333//	ldr	w0, =0xdeadc0de
334//	mov	w8, #__NR_exit
335//	svc	#0
336// end hack
337	smstop
338	mov	x10, x0	// expected data
339	mov	x11, x1	// actual data
340	mov	x12, x2	// data size
341
342	puts	"Mismatch: PID="
343	mov	x0, x20
344	bl	putdec
345	puts	", iteration="
346	mov	x0, x22
347	bl	putdec
348	puts	", row="
349	mov	x0, x21
350	bl	putdecn
351	puts	"\tExpected ["
352	mov	x0, x10
353	mov	x1, x12
354	bl	dumphex
355	puts	"]\n\tGot      ["
356	mov	x0, x11
357	mov	x1, x12
358	bl	dumphex
359	puts	"]\n"
360
361	mov	x8, #__NR_getpid
362	svc	#0
363// fpsimd.c acitivty log dump hack
364//	ldr	w0, =0xdeadc0de
365//	mov	w8, #__NR_exit
366//	svc	#0
367// ^ end of hack
368	mov	x1, #SIGABRT
369	mov	x8, #__NR_kill
370	svc	#0
371//	mov	x8, #__NR_exit
372//	mov	x1, #1
373//	svc	#0
374endfunction
375
376function vl_barf
377	mov	x10, x0
378
379	puts	"Bad active VL: "
380	mov	x0, x10
381	bl	putdecn
382
383	mov	x8, #__NR_exit
384	mov	x1, #1
385	svc	#0
386endfunction
387
388function svcr_barf
389	mov	x10, x0
390
391	puts	"Bad SVCR: "
392	mov	x0, x10
393	bl	putdecn
394
395	mov	x8, #__NR_exit
396	mov	x1, #1
397	svc	#0
398endfunction
399