1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright 2020, Gustavo Luiz Duarte, IBM Corp.
4  *
5  * This test starts a transaction and triggers a signal, forcing a pagefault to
6  * happen when the kernel signal handling code touches the user signal stack.
7  *
8  * In order to avoid pre-faulting the signal stack memory and to force the
9  * pagefault to happen precisely in the kernel signal handling code, the
10  * pagefault handling is done in userspace using the userfaultfd facility.
11  *
12  * Further pagefaults are triggered by crafting the signal handler's ucontext
13  * to point to additional memory regions managed by the userfaultfd, so using
14  * the same mechanism used to avoid pre-faulting the signal stack memory.
15  *
16  * On failure (bug is present) kernel crashes or never returns control back to
17  * userspace. If bug is not present, tests completes almost immediately.
18  */
19 
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <linux/userfaultfd.h>
24 #include <poll.h>
25 #include <unistd.h>
26 #include <sys/ioctl.h>
27 #include <sys/syscall.h>
28 #include <fcntl.h>
29 #include <sys/mman.h>
30 #include <pthread.h>
31 #include <signal.h>
32 #include <errno.h>
33 
34 #include "tm.h"
35 
36 
37 #define UF_MEM_SIZE 655360	/* 10 x 64k pages */
38 
39 /* Memory handled by userfaultfd */
40 static char *uf_mem;
41 static size_t uf_mem_offset = 0;
42 
43 /*
44  * Data that will be copied into the faulting pages (instead of zero-filled
45  * pages). This is used to make the test more reliable and avoid segfaulting
46  * when we return from the signal handler. Since we are making the signal
47  * handler's ucontext point to newly allocated memory, when that memory is
48  * paged-in it will contain the expected content.
49  */
50 static char backing_mem[UF_MEM_SIZE];
51 
52 static size_t pagesize;
53 
54 /*
55  * Return a chunk of at least 'size' bytes of memory that will be handled by
56  * userfaultfd. If 'backing_data' is not NULL, its content will be save to
57  * 'backing_mem' and then copied into the faulting pages when the page fault
58  * is handled.
59  */
60 void *get_uf_mem(size_t size, void *backing_data)
61 {
62 	void *ret;
63 
64 	if (uf_mem_offset + size > UF_MEM_SIZE) {
65 		fprintf(stderr, "Requesting more uf_mem than expected!\n");
66 		exit(EXIT_FAILURE);
67 	}
68 
69 	ret = &uf_mem[uf_mem_offset];
70 
71 	/* Save the data that will be copied into the faulting page */
72 	if (backing_data != NULL)
73 		memcpy(&backing_mem[uf_mem_offset], backing_data, size);
74 
75 	/* Reserve the requested amount of uf_mem */
76 	uf_mem_offset += size;
77 	/* Keep uf_mem_offset aligned to the page size (round up) */
78 	uf_mem_offset = (uf_mem_offset + pagesize - 1) & ~(pagesize - 1);
79 
80 	return ret;
81 }
82 
83 void *fault_handler_thread(void *arg)
84 {
85 	struct uffd_msg msg;	/* Data read from userfaultfd */
86 	long uffd;		/* userfaultfd file descriptor */
87 	struct uffdio_copy uffdio_copy;
88 	struct pollfd pollfd;
89 	ssize_t nread, offset;
90 
91 	uffd = (long) arg;
92 
93 	for (;;) {
94 		pollfd.fd = uffd;
95 		pollfd.events = POLLIN;
96 		if (poll(&pollfd, 1, -1) == -1) {
97 			perror("poll() failed");
98 			exit(EXIT_FAILURE);
99 		}
100 
101 		nread = read(uffd, &msg, sizeof(msg));
102 		if (nread == 0) {
103 			fprintf(stderr, "read(): EOF on userfaultfd\n");
104 			exit(EXIT_FAILURE);
105 		}
106 
107 		if (nread == -1) {
108 			perror("read() failed");
109 			exit(EXIT_FAILURE);
110 		}
111 
112 		/* We expect only one kind of event */
113 		if (msg.event != UFFD_EVENT_PAGEFAULT) {
114 			fprintf(stderr, "Unexpected event on userfaultfd\n");
115 			exit(EXIT_FAILURE);
116 		}
117 
118 		/*
119 		 * We need to handle page faults in units of pages(!).
120 		 * So, round faulting address down to page boundary.
121 		 */
122 		uffdio_copy.dst = msg.arg.pagefault.address & ~(pagesize-1);
123 
124 		offset = (char *) uffdio_copy.dst - uf_mem;
125 		uffdio_copy.src = (unsigned long) &backing_mem[offset];
126 
127 		uffdio_copy.len = pagesize;
128 		uffdio_copy.mode = 0;
129 		uffdio_copy.copy = 0;
130 		if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) {
131 			perror("ioctl-UFFDIO_COPY failed");
132 			exit(EXIT_FAILURE);
133 		}
134 	}
135 }
136 
137 void setup_uf_mem(void)
138 {
139 	long uffd;		/* userfaultfd file descriptor */
140 	pthread_t thr;
141 	struct uffdio_api uffdio_api;
142 	struct uffdio_register uffdio_register;
143 	int ret;
144 
145 	pagesize = sysconf(_SC_PAGE_SIZE);
146 
147 	/* Create and enable userfaultfd object */
148 	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
149 	if (uffd == -1) {
150 		perror("userfaultfd() failed");
151 		exit(EXIT_FAILURE);
152 	}
153 	uffdio_api.api = UFFD_API;
154 	uffdio_api.features = 0;
155 	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
156 		perror("ioctl-UFFDIO_API failed");
157 		exit(EXIT_FAILURE);
158 	}
159 
160 	/*
161 	 * Create a private anonymous mapping. The memory will be demand-zero
162 	 * paged, that is, not yet allocated. When we actually touch the memory
163 	 * the related page will be allocated via the userfaultfd mechanism.
164 	 */
165 	uf_mem = mmap(NULL, UF_MEM_SIZE, PROT_READ | PROT_WRITE,
166 		      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
167 	if (uf_mem == MAP_FAILED) {
168 		perror("mmap() failed");
169 		exit(EXIT_FAILURE);
170 	}
171 
172 	/*
173 	 * Register the memory range of the mapping we've just mapped to be
174 	 * handled by the userfaultfd object. In 'mode' we request to track
175 	 * missing pages (i.e. pages that have not yet been faulted-in).
176 	 */
177 	uffdio_register.range.start = (unsigned long) uf_mem;
178 	uffdio_register.range.len = UF_MEM_SIZE;
179 	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
180 	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
181 		perror("ioctl-UFFDIO_REGISTER");
182 		exit(EXIT_FAILURE);
183 	}
184 
185 	/* Create a thread that will process the userfaultfd events */
186 	ret = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd);
187 	if (ret != 0) {
188 		fprintf(stderr, "pthread_create(): Error. Returned %d\n", ret);
189 		exit(EXIT_FAILURE);
190 	}
191 }
192 
193 /*
194  * Assumption: the signal was delivered while userspace was in transactional or
195  * suspended state, i.e. uc->uc_link != NULL.
196  */
197 void signal_handler(int signo, siginfo_t *si, void *uc)
198 {
199 	ucontext_t *ucp = uc;
200 
201 	/* Skip 'trap' after returning, otherwise we get a SIGTRAP again */
202 	ucp->uc_link->uc_mcontext.regs->nip += 4;
203 
204 	ucp->uc_mcontext.v_regs =
205 		get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_mcontext.v_regs);
206 
207 	ucp->uc_link->uc_mcontext.v_regs =
208 		get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_link->uc_mcontext.v_regs);
209 
210 	ucp->uc_link = get_uf_mem(sizeof(ucontext_t), ucp->uc_link);
211 }
212 
213 bool have_userfaultfd(void)
214 {
215 	long rc;
216 
217 	errno = 0;
218 	rc = syscall(__NR_userfaultfd, -1);
219 
220 	return rc == 0 || errno != ENOSYS;
221 }
222 
223 int tm_signal_pagefault(void)
224 {
225 	struct sigaction sa;
226 	stack_t ss;
227 
228 	SKIP_IF(!have_htm());
229 	SKIP_IF(!have_userfaultfd());
230 
231 	setup_uf_mem();
232 
233 	/*
234 	 * Set an alternative stack that will generate a page fault when the
235 	 * signal is raised. The page fault will be treated via userfaultfd,
236 	 * i.e. via fault_handler_thread.
237 	 */
238 	ss.ss_sp = get_uf_mem(SIGSTKSZ, NULL);
239 	ss.ss_size = SIGSTKSZ;
240 	ss.ss_flags = 0;
241 	if (sigaltstack(&ss, NULL) == -1) {
242 		perror("sigaltstack() failed");
243 		exit(EXIT_FAILURE);
244 	}
245 
246 	sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
247 	sa.sa_sigaction = signal_handler;
248 	if (sigaction(SIGTRAP, &sa, NULL) == -1) {
249 		perror("sigaction() failed");
250 		exit(EXIT_FAILURE);
251 	}
252 
253 	/* Trigger a SIGTRAP in transactional state */
254 	asm __volatile__(
255 			"tbegin.;"
256 			"beq    1f;"
257 			"trap;"
258 			"1: ;"
259 			: : : "memory");
260 
261 	/* Trigger a SIGTRAP in suspended state */
262 	asm __volatile__(
263 			"tbegin.;"
264 			"beq    1f;"
265 			"tsuspend.;"
266 			"trap;"
267 			"tresume.;"
268 			"1: ;"
269 			: : : "memory");
270 
271 	return EXIT_SUCCESS;
272 }
273 
274 int main(int argc, char **argv)
275 {
276 	/*
277 	 * Depending on kernel config, the TM Bad Thing might not result in a
278 	 * crash, instead the kernel never returns control back to userspace, so
279 	 * set a tight timeout. If the test passes it completes almost
280 	 * immediately.
281 	 */
282 	test_harness_set_timeout(2);
283 	return test_harness(tm_signal_pagefault, "tm_signal_pagefault");
284 }
285