1 // SPDX-License-Identifier: GPL-2.0+
2 
3 #include <asm/unistd.h>
4 #include <linux/hw_breakpoint.h>
5 #include <linux/ptrace.h>
6 #include <memory.h>
7 #include <stdlib.h>
8 #include <sys/wait.h>
9 
10 #include "utils.h"
11 
12 /*
13  * Child subroutine that performs a load on the address, then traps
14  */
15 void same_watch_addr_child(unsigned long *addr);
16 
17 /* Address of the ld instruction in same_watch_addr_child() */
18 extern char same_watch_addr_load[];
19 
20 /* Address of the end trap instruction in same_watch_addr_child() */
21 extern char same_watch_addr_trap[];
22 
23 /*
24  * Child subroutine that performs a load on the first address, then a load on
25  * the second address (with no instructions separating this from the first
26  * load), then traps.
27  */
28 void perf_then_ptrace_child(unsigned long *first_addr, unsigned long *second_addr);
29 
30 /* Address of the first ld instruction in perf_then_ptrace_child() */
31 extern char perf_then_ptrace_load1[];
32 
33 /* Address of the second ld instruction in perf_then_ptrace_child() */
34 extern char perf_then_ptrace_load2[];
35 
36 /* Address of the end trap instruction in perf_then_ptrace_child() */
37 extern char perf_then_ptrace_trap[];
38 
sys_ptrace(long request,pid_t pid,unsigned long addr,unsigned long data)39 static inline long sys_ptrace(long request, pid_t pid, unsigned long addr, unsigned long data)
40 {
41 	return syscall(__NR_ptrace, request, pid, addr, data);
42 }
43 
ptrace_traceme(void)44 static long ptrace_traceme(void)
45 {
46 	return sys_ptrace(PTRACE_TRACEME, 0, 0, 0);
47 }
48 
ptrace_getregs(pid_t pid,struct pt_regs * result)49 static long ptrace_getregs(pid_t pid, struct pt_regs *result)
50 {
51 	return sys_ptrace(PTRACE_GETREGS, pid, 0, (unsigned long)result);
52 }
53 
ptrace_setregs(pid_t pid,struct pt_regs * result)54 static long ptrace_setregs(pid_t pid, struct pt_regs *result)
55 {
56 	return sys_ptrace(PTRACE_SETREGS, pid, 0, (unsigned long)result);
57 }
58 
ptrace_cont(pid_t pid,long signal)59 static long ptrace_cont(pid_t pid, long signal)
60 {
61 	return sys_ptrace(PTRACE_CONT, pid, 0, signal);
62 }
63 
ptrace_singlestep(pid_t pid,long signal)64 static long ptrace_singlestep(pid_t pid, long signal)
65 {
66 	return sys_ptrace(PTRACE_SINGLESTEP, pid, 0, signal);
67 }
68 
ppc_ptrace_gethwdbginfo(pid_t pid,struct ppc_debug_info * dbginfo)69 static long ppc_ptrace_gethwdbginfo(pid_t pid, struct ppc_debug_info *dbginfo)
70 {
71 	return sys_ptrace(PPC_PTRACE_GETHWDBGINFO, pid, 0, (unsigned long)dbginfo);
72 }
73 
ppc_ptrace_sethwdbg(pid_t pid,struct ppc_hw_breakpoint * bp_info)74 static long ppc_ptrace_sethwdbg(pid_t pid, struct ppc_hw_breakpoint *bp_info)
75 {
76 	return sys_ptrace(PPC_PTRACE_SETHWDEBUG, pid, 0, (unsigned long)bp_info);
77 }
78 
ppc_ptrace_delhwdbg(pid_t pid,int bp_id)79 static long ppc_ptrace_delhwdbg(pid_t pid, int bp_id)
80 {
81 	return sys_ptrace(PPC_PTRACE_DELHWDEBUG, pid, 0L, bp_id);
82 }
83 
ptrace_getreg_pc(pid_t pid,void ** pc)84 static long ptrace_getreg_pc(pid_t pid, void **pc)
85 {
86 	struct pt_regs regs;
87 	long err;
88 
89 	err = ptrace_getregs(pid, &regs);
90 	if (err)
91 		return err;
92 
93 	*pc = (void *)regs.nip;
94 
95 	return 0;
96 }
97 
ptrace_setreg_pc(pid_t pid,void * pc)98 static long ptrace_setreg_pc(pid_t pid, void *pc)
99 {
100 	struct pt_regs regs;
101 	long err;
102 
103 	err = ptrace_getregs(pid, &regs);
104 	if (err)
105 		return err;
106 
107 	regs.nip = (unsigned long)pc;
108 
109 	err = ptrace_setregs(pid, &regs);
110 	if (err)
111 		return err;
112 
113 	return 0;
114 }
115 
perf_event_open(struct perf_event_attr * attr,pid_t pid,int cpu,int group_fd,unsigned long flags)116 static int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
117 			   int group_fd, unsigned long flags)
118 {
119 	return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
120 }
121 
perf_user_event_attr_set(struct perf_event_attr * attr,void * addr,u64 len)122 static void perf_user_event_attr_set(struct perf_event_attr *attr, void *addr, u64 len)
123 {
124 	memset(attr, 0, sizeof(struct perf_event_attr));
125 
126 	attr->type		= PERF_TYPE_BREAKPOINT;
127 	attr->size		= sizeof(struct perf_event_attr);
128 	attr->bp_type		= HW_BREAKPOINT_R;
129 	attr->bp_addr		= (u64)addr;
130 	attr->bp_len		= len;
131 	attr->exclude_kernel	= 1;
132 	attr->exclude_hv	= 1;
133 }
134 
perf_watchpoint_open(pid_t child_pid,void * addr,u64 len)135 static int perf_watchpoint_open(pid_t child_pid, void *addr, u64 len)
136 {
137 	struct perf_event_attr attr;
138 
139 	perf_user_event_attr_set(&attr, addr, len);
140 	return perf_event_open(&attr, child_pid, -1, -1, 0);
141 }
142 
perf_read_counter(int perf_fd,u64 * count)143 static int perf_read_counter(int perf_fd, u64 *count)
144 {
145 	/*
146 	 * A perf counter is retrieved by the read() syscall. It contains
147 	 * the current count as 8 bytes that are interpreted as a u64
148 	 */
149 	ssize_t len = read(perf_fd, count, sizeof(*count));
150 
151 	if (len != sizeof(*count))
152 		return -1;
153 
154 	return 0;
155 }
156 
ppc_ptrace_init_breakpoint(struct ppc_hw_breakpoint * info,int type,void * addr,int len)157 static void ppc_ptrace_init_breakpoint(struct ppc_hw_breakpoint *info,
158 				       int type, void *addr, int len)
159 {
160 	info->version = 1;
161 	info->trigger_type = type;
162 	info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
163 	info->addr = (u64)addr;
164 	info->addr2 = (u64)addr + len;
165 	info->condition_value = 0;
166 	if (!len)
167 		info->addr_mode = PPC_BREAKPOINT_MODE_EXACT;
168 	else
169 		info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
170 }
171 
172 /*
173  * Checks if we can place at least 2 watchpoints on the child process
174  */
check_watchpoints(pid_t pid)175 static int check_watchpoints(pid_t pid)
176 {
177 	struct ppc_debug_info dbginfo;
178 
179 	FAIL_IF_MSG(ppc_ptrace_gethwdbginfo(pid, &dbginfo), "PPC_PTRACE_GETHWDBGINFO failed");
180 	SKIP_IF_MSG(dbginfo.num_data_bps <= 1, "Not enough data watchpoints (need at least 2)");
181 
182 	return 0;
183 }
184 
185 /*
186  * Wrapper around a plain fork() call that sets up the child for
187  * ptrace-ing. Both the parent and child return from this, though
188  * the child is stopped until ptrace_cont(pid) is run by the parent.
189  */
ptrace_fork_child(pid_t * pid)190 static int ptrace_fork_child(pid_t *pid)
191 {
192 	int status;
193 
194 	*pid = fork();
195 
196 	if (*pid < 0)
197 		FAIL_IF_MSG(1, "Failed to fork child");
198 
199 	if (!*pid) {
200 		FAIL_IF_EXIT_MSG(ptrace_traceme(), "PTRACE_TRACEME failed");
201 		FAIL_IF_EXIT_MSG(raise(SIGSTOP), "Child failed to raise SIGSTOP");
202 	} else {
203 		/* Synchronise on child SIGSTOP */
204 		FAIL_IF_MSG(waitpid(*pid, &status, 0) == -1, "Failed to wait for child");
205 		FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
206 	}
207 
208 	return 0;
209 }
210 
211 /*
212  * Tests the interaction between ptrace and perf watching the same data.
213  *
214  * We expect ptrace to take 'priority', as it is has before-execute
215  * semantics.
216  *
217  * The perf counter should not be incremented yet because perf has after-execute
218  * semantics. E.g., if ptrace changes the child PC, we don't even execute the
219  * instruction at all.
220  *
221  * When the child is stopped for ptrace, we test both continue and single step.
222  * Both should increment the perf counter. We also test changing the PC somewhere
223  * different and stepping, which should not increment the perf counter.
224  */
same_watch_addr_test(void)225 int same_watch_addr_test(void)
226 {
227 	struct ppc_hw_breakpoint bp_info;	/* ptrace breakpoint info */
228 	int bp_id;	/* Breakpoint handle of ptrace watchpoint */
229 	int perf_fd;	/* File descriptor of perf performance counter */
230 	u64 perf_count;	/* Most recently fetched perf performance counter value */
231 	pid_t pid;	/* PID of child process */
232 	void *pc;	/* Most recently fetched child PC value */
233 	int status;	/* Stop status of child after waitpid */
234 	unsigned long value;	/* Dummy value to be read/written to by child */
235 	int err;
236 
237 	err = ptrace_fork_child(&pid);
238 	if (err)
239 		return err;
240 
241 	if (!pid) {
242 		same_watch_addr_child(&value);
243 		exit(1);
244 	}
245 
246 	err = check_watchpoints(pid);
247 	if (err)
248 		return err;
249 
250 	/* Place a perf watchpoint counter on value */
251 	perf_fd = perf_watchpoint_open(pid, &value, sizeof(value));
252 	FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter");
253 
254 	/* Place a ptrace watchpoint on value */
255 	ppc_ptrace_init_breakpoint(&bp_info, PPC_BREAKPOINT_TRIGGER_READ, &value, sizeof(value));
256 	bp_id = ppc_ptrace_sethwdbg(pid, &bp_info);
257 	FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint");
258 
259 	/* Let the child run. It should stop on the ptrace watchpoint */
260 	FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
261 
262 	FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
263 	FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
264 	FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
265 	FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction");
266 
267 	/*
268 	 * We stopped before executing the load, so perf should not have
269 	 * recorded any events yet
270 	 */
271 	FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
272 	FAIL_IF_MSG(perf_count != 0, "perf recorded unexpected event");
273 
274 	/* Single stepping over the load should increment the perf counter */
275 	FAIL_IF_MSG(ptrace_singlestep(pid, 0), "Failed to single step child");
276 
277 	FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
278 	FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
279 	FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
280 	FAIL_IF_MSG(pc != same_watch_addr_load + 4, "Failed to single step load instruction");
281 	FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
282 	FAIL_IF_MSG(perf_count != 1, "perf counter did not increment");
283 
284 	/*
285 	 * Set up a ptrace watchpoint on the value again and trigger it.
286 	 * The perf counter should not have incremented because we do not
287 	 * execute the load yet.
288 	 */
289 	FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint");
290 	bp_id = ppc_ptrace_sethwdbg(pid, &bp_info);
291 	FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint");
292 	FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC");
293 	FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
294 
295 	FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
296 	FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
297 	FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
298 	FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load trap");
299 	FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
300 	FAIL_IF_MSG(perf_count != 1, "perf counter should not have changed");
301 
302 	/* Continuing over the load should increment the perf counter */
303 	FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
304 
305 	FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
306 	FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
307 	FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
308 	FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap");
309 	FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
310 	FAIL_IF_MSG(perf_count != 2, "perf counter did not increment");
311 
312 	/*
313 	 * If we set the child PC back to the load instruction, then continue,
314 	 * we should reach the end trap (because ptrace is one-shot) and have
315 	 * another perf event.
316 	 */
317 	FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC");
318 	FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
319 
320 	FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
321 	FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
322 	FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
323 	FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap");
324 	FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
325 	FAIL_IF_MSG(perf_count != 3, "perf counter did not increment");
326 
327 	/*
328 	 * If we set the child PC back to the load instruction, set a ptrace
329 	 * watchpoint on the load, then continue, we should immediately get
330 	 * the ptrace trap without incrementing the perf counter
331 	 */
332 	FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint");
333 	bp_id = ppc_ptrace_sethwdbg(pid, &bp_info);
334 	FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint");
335 	FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC");
336 	FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
337 
338 	FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
339 	FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
340 	FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
341 	FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction");
342 	FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
343 	FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed");
344 
345 	/*
346 	 * If we change the PC while stopped on the load instruction, we should
347 	 * not increment the perf counter (because ptrace is before-execute,
348 	 * perf is after-execute).
349 	 */
350 	FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load + 4), "Failed to set child PC");
351 	FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
352 
353 	FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
354 	FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
355 	FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
356 	FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap");
357 	FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
358 	FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed");
359 
360 	/* Clean up child */
361 	FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child");
362 
363 	return 0;
364 }
365 
366 /*
367  * Tests the interaction between ptrace and perf when:
368  * 1. perf watches a value
369  * 2. ptrace watches a different value
370  * 3. The perf value is read, then the ptrace value is read immediately after
371  *
372  * A breakpoint implementation may accidentally misattribute/skip one of
373  * the ptrace or perf handlers, as interrupt based work is done after perf
374  * and before ptrace.
375  *
376  * We expect the perf counter to increment before the ptrace watchpoint
377  * triggers.
378  */
perf_then_ptrace_test(void)379 int perf_then_ptrace_test(void)
380 {
381 	struct ppc_hw_breakpoint bp_info;	/* ptrace breakpoint info */
382 	int bp_id;	/* Breakpoint handle of ptrace watchpoint */
383 	int perf_fd;	/* File descriptor of perf performance counter */
384 	u64 perf_count;	/* Most recently fetched perf performance counter value */
385 	pid_t pid;	/* PID of child process */
386 	void *pc;	/* Most recently fetched child PC value */
387 	int status;	/* Stop status of child after waitpid */
388 	unsigned long perf_value;	/* Dummy value to be watched by perf */
389 	unsigned long ptrace_value;	/* Dummy value to be watched by ptrace */
390 	int err;
391 
392 	err = ptrace_fork_child(&pid);
393 	if (err)
394 		return err;
395 
396 	/*
397 	 * If we are the child, run a subroutine that reads the perf value,
398 	 * then reads the ptrace value with consecutive load instructions
399 	 */
400 	if (!pid) {
401 		perf_then_ptrace_child(&perf_value, &ptrace_value);
402 		exit(0);
403 	}
404 
405 	err = check_watchpoints(pid);
406 	if (err)
407 		return err;
408 
409 	/* Place a perf watchpoint counter */
410 	perf_fd = perf_watchpoint_open(pid, &perf_value, sizeof(perf_value));
411 	FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter");
412 
413 	/* Place a ptrace watchpoint */
414 	ppc_ptrace_init_breakpoint(&bp_info, PPC_BREAKPOINT_TRIGGER_READ,
415 				   &ptrace_value, sizeof(ptrace_value));
416 	bp_id = ppc_ptrace_sethwdbg(pid, &bp_info);
417 	FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint");
418 
419 	/* Let the child run. It should stop on the ptrace watchpoint */
420 	FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
421 
422 	FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
423 	FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
424 	FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
425 	FAIL_IF_MSG(pc != perf_then_ptrace_load2, "Child did not stop on ptrace load");
426 
427 	/* perf should have recorded the first load */
428 	FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
429 	FAIL_IF_MSG(perf_count != 1, "perf counter did not increment");
430 
431 	/* Clean up child */
432 	FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child");
433 
434 	return 0;
435 }
436 
main(int argc,char * argv[])437 int main(int argc, char *argv[])
438 {
439 	int err = 0;
440 
441 	err |= test_harness(same_watch_addr_test, "same_watch_addr");
442 	err |= test_harness(perf_then_ptrace_test, "perf_then_ptrace");
443 
444 	return err;
445 }
446