1// SPDX-License-Identifier: GPL-2.0-only
2// Copyright (C) 2021 ARM Limited.
3// Original author: Mark Brown <broonie@kernel.org>
4//
5// Scalable Matrix Extension ZA context switch test
6// Repeatedly writes unique test patterns into each ZA tile
7// and reads them back to verify integrity.
8//
9// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
10// (leave it running for as long as you want...)
11// kill $pids
12
13#include <asm/unistd.h>
14#include "assembler.h"
15#include "asm-offsets.h"
16#include "sme-inst.h"
17
18.arch_extension sve
19
20#define MAXVL     2048
21#define MAXVL_B   (MAXVL / 8)
22
23// Declare some storage space to shadow ZA register contents and a
24// scratch buffer for a vector.
25.pushsection .text
26.data
27.align 4
28zaref:
29	.space	MAXVL_B * MAXVL_B
30scratch:
31	.space	MAXVL_B
32.popsection
33
34// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
35// Clobbers x0-x3
36function memcpy
37	cmp	x2, #0
38	b.eq	1f
390:	ldrb	w3, [x1], #1
40	strb	w3, [x0], #1
41	subs	x2, x2, #1
42	b.ne	0b
431:	ret
44endfunction
45
46// Generate a test pattern for storage in ZA
47// x0: pid
48// x1: row in ZA
49// x2: generation
50
51// These values are used to constuct a 32-bit pattern that is repeated in the
52// scratch buffer as many times as will fit:
53// bits 31:28	generation number (increments once per test_loop)
54// bits 27:16	pid
55// bits 15: 8	row number
56// bits  7: 0	32-bit lane index
57
58function pattern
59	mov	w3, wzr
60	bfi	w3, w0, #16, #12	// PID
61	bfi	w3, w1, #8, #8		// Row
62	bfi	w3, w2, #28, #4		// Generation
63
64	ldr	x0, =scratch
65	mov	w1, #MAXVL_B / 4
66
670:	str	w3, [x0], #4
68	add	w3, w3, #1		// Lane
69	subs	w1, w1, #1
70	b.ne	0b
71
72	ret
73endfunction
74
75// Get the address of shadow data for ZA horizontal vector xn
76.macro _adrza xd, xn, nrtmp
77	ldr	\xd, =zaref
78	rdsvl	\nrtmp, 1
79	madd	\xd, x\nrtmp, \xn, \xd
80.endm
81
82// Set up test pattern in a ZA horizontal vector
83// x0: pid
84// x1: row number
85// x2: generation
86function setup_za
87	mov	x4, x30
88	mov	x12, x1			// Use x12 for vector select
89
90	bl	pattern			// Get pattern in scratch buffer
91	_adrza	x0, x12, 2		// Shadow buffer pointer to x0 and x5
92	mov	x5, x0
93	ldr	x1, =scratch
94	bl	memcpy			// length set up in x2 by _adrza
95
96	_ldr_za 12, 5			// load vector w12 from pointer x5
97
98	ret	x4
99endfunction
100
101// Trivial memory compare: compare x2 bytes starting at address x0 with
102// bytes starting at address x1.
103// Returns only if all bytes match; otherwise, the program is aborted.
104// Clobbers x0-x5.
105function memcmp
106	cbz	x2, 2f
107
108	stp	x0, x1, [sp, #-0x20]!
109	str	x2, [sp, #0x10]
110
111	mov	x5, #0
1120:	ldrb	w3, [x0, x5]
113	ldrb	w4, [x1, x5]
114	add	x5, x5, #1
115	cmp	w3, w4
116	b.ne	1f
117	subs	x2, x2, #1
118	b.ne	0b
119
1201:	ldr	x2, [sp, #0x10]
121	ldp	x0, x1, [sp], #0x20
122	b.ne	barf
123
1242:	ret
125endfunction
126
127// Verify that a ZA vector matches its shadow in memory, else abort
128// x0: row number
129// Clobbers x0-x7 and x12.
130function check_za
131	mov	x3, x30
132
133	mov	x12, x0
134	_adrza	x5, x0, 6		// pointer to expected value in x5
135	mov	x4, x0
136	ldr	x7, =scratch		// x7 is scratch
137
138	mov	x0, x7			// Poison scratch
139	mov	x1, x6
140	bl	memfill_ae
141
142	_str_za 12, 7			// save vector w12 to pointer x7
143
144	mov	x0, x5
145	mov	x1, x7
146	mov	x2, x6
147	mov	x30, x3
148	b	memcmp
149endfunction
150
151// Any SME register modified here can cause corruption in the main
152// thread -- but *only* the locations modified here.
153function irritator_handler
154	// Increment the irritation signal count (x23):
155	ldr	x0, [x2, #ucontext_regs + 8 * 23]
156	add	x0, x0, #1
157	str	x0, [x2, #ucontext_regs + 8 * 23]
158
159	// Corrupt some random ZA data
160#if 0
161	adr	x0, .text + (irritator_handler - .text) / 16 * 16
162	movi	v0.8b, #1
163	movi	v9.16b, #2
164	movi	v31.8b, #3
165#endif
166
167	ret
168endfunction
169
170function terminate_handler
171	mov	w21, w0
172	mov	x20, x2
173
174	puts	"Terminated by signal "
175	mov	w0, w21
176	bl	putdec
177	puts	", no error, iterations="
178	ldr	x0, [x20, #ucontext_regs + 8 * 22]
179	bl	putdec
180	puts	", signals="
181	ldr	x0, [x20, #ucontext_regs + 8 * 23]
182	bl	putdecn
183
184	mov	x0, #0
185	mov	x8, #__NR_exit
186	svc	#0
187endfunction
188
189// w0: signal number
190// x1: sa_action
191// w2: sa_flags
192// Clobbers x0-x6,x8
193function setsignal
194	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
195
196	mov	w4, w0
197	mov	x5, x1
198	mov	w6, w2
199
200	add	x0, sp, #16
201	mov	x1, #sa_sz
202	bl	memclr
203
204	mov	w0, w4
205	add	x1, sp, #16
206	str	w6, [x1, #sa_flags]
207	str	x5, [x1, #sa_handler]
208	mov	x2, #0
209	mov	x3, #sa_mask_sz
210	mov	x8, #__NR_rt_sigaction
211	svc	#0
212
213	cbz	w0, 1f
214
215	puts	"sigaction failure\n"
216	b	.Labort
217
2181:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
219	ret
220endfunction
221
222// Main program entry point
223.globl _start
224function _start
225_start:
226	puts	"Streaming mode "
227	smstart_za
228
229	// Sanity-check and report the vector length
230
231	rdsvl	19, 8
232	cmp	x19, #128
233	b.lo	1f
234	cmp	x19, #2048
235	b.hi	1f
236	tst	x19, #(8 - 1)
237	b.eq	2f
238
2391:	puts	"bad vector length: "
240	mov	x0, x19
241	bl	putdecn
242	b	.Labort
243
2442:	puts	"vector length:\t"
245	mov	x0, x19
246	bl	putdec
247	puts	" bits\n"
248
249	// Obtain our PID, to ensure test pattern uniqueness between processes
250	mov	x8, #__NR_getpid
251	svc	#0
252	mov	x20, x0
253
254	puts	"PID:\t"
255	mov	x0, x20
256	bl	putdecn
257
258	mov	x23, #0		// Irritation signal count
259
260	mov	w0, #SIGINT
261	adr	x1, terminate_handler
262	mov	w2, #SA_SIGINFO
263	bl	setsignal
264
265	mov	w0, #SIGTERM
266	adr	x1, terminate_handler
267	mov	w2, #SA_SIGINFO
268	bl	setsignal
269
270	mov	w0, #SIGUSR1
271	adr	x1, irritator_handler
272	mov	w2, #SA_SIGINFO
273	orr	w2, w2, #SA_NODEFER
274	bl	setsignal
275
276	mov	x22, #0		// generation number, increments per iteration
277.Ltest_loop:
278	rdsvl	0, 8
279	cmp	x0, x19
280	b.ne	vl_barf
281
282	rdsvl	21, 1		// Set up ZA & shadow with test pattern
2830:	mov	x0, x20
284	sub	x1, x21, #1
285	mov	x2, x22
286	bl	setup_za
287	subs	x21, x21, #1
288	b.ne	0b
289
290	and	x8, x22, #127		// Every 128 interations...
291	cbz	x8, 0f
292	mov	x8, #__NR_getpid	// (otherwise minimal syscall)
293	b	1f
2940:
295	mov	x8, #__NR_sched_yield	// ...encourage preemption
2961:
297	svc	#0
298
299	mrs	x0, S3_3_C4_C2_2	// SVCR should have ZA=1,SM=0
300	and	x1, x0, #3
301	cmp	x1, #2
302	b.ne	svcr_barf
303
304	rdsvl	21, 1			// Verify that the data made it through
305	rdsvl	24, 1			// Verify that the data made it through
3060:	sub	x0, x24, x21
307	bl	check_za
308	subs	x21, x21, #1
309	bne	0b
310
311	add	x22, x22, #1	// Everything still working
312	b	.Ltest_loop
313
314.Labort:
315	mov	x0, #0
316	mov	x1, #SIGABRT
317	mov	x8, #__NR_kill
318	svc	#0
319endfunction
320
321function barf
322// fpsimd.c acitivty log dump hack
323//	ldr	w0, =0xdeadc0de
324//	mov	w8, #__NR_exit
325//	svc	#0
326// end hack
327	smstop
328	mov	x10, x0	// expected data
329	mov	x11, x1	// actual data
330	mov	x12, x2	// data size
331
332	puts	"Mismatch: PID="
333	mov	x0, x20
334	bl	putdec
335	puts	", iteration="
336	mov	x0, x22
337	bl	putdec
338	puts	", row="
339	mov	x0, x21
340	bl	putdecn
341	puts	"\tExpected ["
342	mov	x0, x10
343	mov	x1, x12
344	bl	dumphex
345	puts	"]\n\tGot      ["
346	mov	x0, x11
347	mov	x1, x12
348	bl	dumphex
349	puts	"]\n"
350
351	mov	x8, #__NR_getpid
352	svc	#0
353// fpsimd.c acitivty log dump hack
354//	ldr	w0, =0xdeadc0de
355//	mov	w8, #__NR_exit
356//	svc	#0
357// ^ end of hack
358	mov	x1, #SIGABRT
359	mov	x8, #__NR_kill
360	svc	#0
361//	mov	x8, #__NR_exit
362//	mov	x1, #1
363//	svc	#0
364endfunction
365
366function vl_barf
367	mov	x10, x0
368
369	puts	"Bad active VL: "
370	mov	x0, x10
371	bl	putdecn
372
373	mov	x8, #__NR_exit
374	mov	x1, #1
375	svc	#0
376endfunction
377
378function svcr_barf
379	mov	x10, x0
380
381	puts	"Bad SVCR: "
382	mov	x0, x10
383	bl	putdecn
384
385	mov	x8, #__NR_exit
386	mov	x1, #1
387	svc	#0
388endfunction
389