xref: /openbmc/debug-trigger/main.c (revision be3dd0e6)
1 // SPDX-License-Identifier: Apache-2.0
2 // Copyright (C) 2021 IBM Corp.
3 
4 /*
5  * debug-trigger listens for an external signal that the BMC is in some way unresponsive. When a
6  * signal is received it triggers a crash to collect debug data and reboots the system in the hope
7  * that it will recover.
8  *
9  * Usage: debug-trigger [SOURCE] [SINK]
10  *
11  * Options:
12  *  --sink-actions=ACTION
13  *	Set the class of sink action(s) to be used. Can take the value of 'sysrq' or 'dbus'.
14  *	Defaults to 'sysrq'.
15  *
16  * Examples:
17  *  debug-trigger
18  *	Set the source as stdin, the sink as stdout, and use the default 'sysrq' set of sink
19  *	actions. Useful for testing.
20  *
21  *  debug-trigger --sink-actions=sysrq
22  *	Explicitly use the 'sysrq' set of sink actions with stdin as the source and stdout as the
23  *	sink.
24  *
25  *  debug-trigger /dev/serio_raw0 /proc/sysrq-trigger
26  *	Open /dev/serio_raw0 as the source and /proc/sysrq-trigger as the sink, with the default
27  *	'sysrq' set of sink actions. When 'D' is read from /dev/serio_raw0 'c' will be written to
28  *	/proc/sysrq-trigger, causing a kernel panic. When 'R' is read from /dev/serio_raw0 'b' will
29  *	be written to /proc/sysrq-trigger, causing an immediate reboot of the system.
30  *
31  *  dbug-trigger --sink-actions=dbus /dev/serio_raw0
32  *	Open /dev/serio_raw0 as the source and configure the 'dbus' set of sink actions. When 'D' is
33  *	read from /dev/serio_raw0 create a dump via phosphor-debug-collector by calling through its
34  *	D-Bus interface, then reboot the system by starting systemd's 'reboot.target'
35  */
36 #define _GNU_SOURCE
37 
38 #include "config.h"
39 
40 #include <err.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <getopt.h>
44 #include <libgen.h>
45 #include <limits.h>
46 #include <linux/reboot.h>
47 #include <poll.h>
48 #include <stdbool.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <sys/reboot.h>
53 #include <sys/stat.h>
54 #include <sys/types.h>
55 #include <unistd.h>
56 
57 #define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
58 
59 struct sd_bus;
60 
61 struct debug_source_ops {
62 	int (*poll)(void *ctx, char *op);
63 };
64 
65 struct debug_source {
66 	const struct debug_source_ops *ops;
67 	void *ctx;
68 };
69 
70 struct debug_source_basic {
71 	int source;
72 };
73 
74 struct debug_source_dbus {
75 	struct sd_bus *bus;
76 #define DBUS_SOURCE_PFD_SOURCE	0
77 #define DBUS_SOURCE_PFD_DBUS	1
78 	struct pollfd pfds[2];
79 };
80 
81 struct debug_sink_ops {
82 	void (*debug)(void *ctx);
83 	void (*reboot)(void *ctx);
84 };
85 
86 struct debug_sink {
87 	const struct debug_sink_ops *ops;
88 	void *ctx;
89 };
90 
91 struct debug_sink_sysrq {
92 	int sink;
93 };
94 
95 struct debug_sink_dbus {
96 	struct sd_bus *bus;
97 };
98 
sysrq_sink_debug(void * ctx)99 static void sysrq_sink_debug(void *ctx)
100 {
101 	struct debug_sink_sysrq *sysrq = ctx;
102 	/* https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/admin-guide/sysrq.rst?h=v5.16#n93 */
103 	static const char action = 'c';
104 	ssize_t rc;
105 
106 	sync();
107 
108 	if ((rc = write(sysrq->sink, &action, sizeof(action))) == sizeof(action))
109 		return;
110 
111 	if (rc == -1) {
112 		warn("Failed to execute debug command");
113 	} else {
114 		warnx("Failed to execute debug command: %zd", rc);
115 	}
116 }
117 
sysrq_sink_reboot(void * ctx)118 static void sysrq_sink_reboot(void *ctx)
119 {
120 	struct debug_sink_sysrq *sysrq = ctx;
121 	/* https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/admin-guide/sysrq.rst?h=v5.16#n90 */
122 	static const char action = 'b';
123 	ssize_t rc;
124 
125 	sync();
126 
127 	if ((rc = write(sysrq->sink, &action, sizeof(action))) == sizeof(action))
128 		return;
129 
130 	if (rc == -1) {
131 		warn("Failed to reboot BMC");
132 	} else {
133 		warnx("Failed to reboot BMC: %zd", rc);
134 	}
135 }
136 
basic_source_poll(void * ctx,char * op)137 static int basic_source_poll(void *ctx, char *op)
138 {
139 	struct debug_source_basic *basic = ctx;
140 	ssize_t ingress;
141 
142 	if ((ingress = read(basic->source, op, 1)) != 1) {
143 		if (ingress < 0) {
144 			warn("Failed to read from basic source");
145 			return -errno;
146 		}
147 
148 		/* Unreachable */
149 		errx(EXIT_FAILURE, "Bad read, requested 1 got %zd", ingress);
150 	}
151 
152 	return 0;
153 }
154 
155 const struct debug_sink_ops sysrq_sink_ops = {
156 	.debug = sysrq_sink_debug,
157 	.reboot = sysrq_sink_reboot,
158 };
159 
160 const struct debug_source_ops basic_source_ops = {
161 	.poll = basic_source_poll,
162 };
163 
164 #if HAVE_SYSTEMD
165 #include <systemd/sd-bus.h>
166 
167 static void dbus_sink_reboot(void *ctx);
dbus_sink_dump_progress(sd_bus_message * m,void * userdata,sd_bus_error * ret_error)168 static int dbus_sink_dump_progress(sd_bus_message *m, void *userdata,
169 				   sd_bus_error *ret_error __attribute__((unused)))
170 {
171 	struct debug_sink_dbus *dbus = userdata;
172 	const char *status;
173 	const char *iface;
174 	int rc;
175 
176 	// sa{sv}as
177 	rc = sd_bus_message_read_basic(m, 's', &iface);
178 	if (rc < 0) {
179 		warnx("Failed to extract interface from PropertiesChanged signal: %s",
180 		      strerror(-rc));
181 		return rc;
182 	}
183 
184 	/* Bail if it's not an update to the Progress interface */
185 	if (strcmp(iface, "xyz.openbmc_project.Common.Progress"))
186 		return 0;
187 
188 	rc = sd_bus_message_enter_container(m, 'a', "{sv}");
189 	if (rc < 0)
190 		return rc;
191 
192 	if (!rc)
193 		return 0;
194 
195 	status = NULL;
196 	while (1) {
197 		const char *member;
198 
199 		rc = sd_bus_message_enter_container(m, 'e', "sv");
200 		if (rc < 0)
201 			return rc;
202 
203 		if (!rc)
204 			break;
205 
206 		rc = sd_bus_message_read_basic(m, 's', &member);
207 		if (rc < 0) {
208 			warnx("Failed to extract member name from PropertiesChanged signal: %s",
209 			      strerror(-rc));
210 			return rc;
211 		}
212 
213 		if (!strcmp(member, "Status")) {
214 			rc = sd_bus_message_enter_container(m, 'v', "s");
215 			if (rc < 0) {
216 				warnx("Failed to enter variant container in PropertiesChanged signal: %s",
217 				      strerror(-rc));
218 				return rc;
219 			}
220 
221 			if (!rc)
222 				goto exit_dict_container;
223 
224 			rc = sd_bus_message_read_basic(m, 's', &status);
225 			if (rc < 0) {
226 				warnx("Failed to extract status value from PropertiesChanged signal: %s",
227 				      strerror(-rc));
228 				return rc;
229 			}
230 
231 			sd_bus_message_exit_container(m);
232 		} else {
233 			rc = sd_bus_message_skip(m, "v");
234 			if (rc < 0) {
235 				warnx("Failed to skip variant for unrecognised member %s in PropertiesChanged signal: %s",
236 				      member, strerror(-rc));
237 				return rc;
238 			}
239 		}
240 
241 exit_dict_container:
242 		sd_bus_message_exit_container(m);
243 	}
244 
245 	sd_bus_message_exit_container(m);
246 
247 	if (!status)
248 		return 0;
249 
250 	printf("Dump progress on %s: %s\n", sd_bus_message_get_path(m), status);
251 
252 	/* If we're finished with the dump, reboot the system */
253 	if (!strcmp(status, "xyz.openbmc_project.Common.Progress.OperationStatus.Completed")) {
254 		sd_bus_slot *slot = sd_bus_get_current_slot(dbus->bus);
255 		sd_bus_slot_unref(slot);
256 		dbus_sink_reboot(userdata);
257 	}
258 
259 	return 0;
260 }
261 
dbus_sink_debug(void * ctx)262 static void dbus_sink_debug(void *ctx)
263 {
264 	sd_bus_error ret_error = SD_BUS_ERROR_NULL;
265 	struct debug_sink_dbus *dbus = ctx;
266 	sd_bus_message *reply;
267 	sd_bus_slot *slot;
268 	const char *path;
269 	char *status;
270 	int rc;
271 
272 	/* Start a BMC dump */
273 	rc = sd_bus_call_method(dbus->bus,
274 				"xyz.openbmc_project.Dump.Manager",
275 				"/xyz/openbmc_project/dump/bmc",
276 				"xyz.openbmc_project.Dump.Create",
277 				"CreateDump",
278 				&ret_error,
279 				&reply, "a{sv}", 0);
280 	if (rc < 0) {
281 		warnx("Failed to call CreateDump: %s", strerror(-rc));
282 		return;
283 	}
284 
285 	/* Extract the dump path */
286 	rc = sd_bus_message_read_basic(reply, 'o', &path);
287 	if (rc < 0) {
288 		warnx("Failed to extract dump object path: %s", strerror(-rc));
289 		goto cleanup_reply;
290 	}
291 
292 	/* Set up a match watching for completion of the dump */
293 	rc = sd_bus_match_signal(dbus->bus,
294 				 &slot,
295 				 "xyz.openbmc_project.Dump.Manager",
296 				 path,
297 				 "org.freedesktop.DBus.Properties",
298 				 "PropertiesChanged",
299 				 dbus_sink_dump_progress,
300 				 ctx);
301 	if (rc < 0) {
302 		warnx("Failed to add signal match for progress status on dump object %s: %s",
303 		      path, strerror(-rc));
304 		goto cleanup_reply;
305 	}
306 
307 	/*
308 	 * Mark the slot as 'floating'. If a slot is _not_ marked as floating it holds a reference
309 	 * to the bus, and the bus will stay alive so long as the slot is referenced. If the slot is
310 	 * instead marked floating the relationship is inverted: The lifetime of the slot is defined
311 	 * in terms of the bus, which means we relieve ourselves of having to track the lifetime of
312 	 * the slot.
313 	 *
314 	 * For more details see `man 3 sd_bus_slot_set_floating`, also documented here:
315 	 *
316 	 * https://www.freedesktop.org/software/systemd/man/sd_bus_slot_set_floating.html
317 	 */
318 	rc = sd_bus_slot_set_floating(slot, 0);
319 	if (rc < 0) {
320 		warnx("Failed to mark progress match slot on %s as floating: %s",
321 		      path, strerror(-rc));
322 		goto cleanup_reply;
323 	}
324 
325 	printf("Registered progress match on dump object %s\n", path);
326 
327 	/* Now that the match is set up, check the current value in case we missed any updates */
328 	rc = sd_bus_get_property_string(dbus->bus,
329 					"xyz.openbmc_project.Dump.Manager",
330 					path,
331 					"xyz.openbmc_project.Common.Progress",
332 					"Status",
333 					&ret_error,
334 					&status);
335 	if (rc < 0) {
336 		warnx("Failed to get progress status property on dump object %s: %s",
337 		      path, strerror(-rc));
338 		sd_bus_slot_unref(slot);
339 		goto cleanup_reply;
340 	}
341 
342 	printf("Dump state for %s is currently %s\n", path, status);
343 
344 	/*
345 	 * If we're finished with the dump, reboot the system. If the dump isn't finished the reboot
346 	 * will instead take place via the dbus_sink_dump_progress() callback on the match.
347 	 */
348 	if (!strcmp(status, "xyz.openbmc_project.Common.Progress.OperationStatus.Completed")) {
349 		sd_bus_slot_unref(slot);
350 		dbus_sink_reboot(ctx);
351 	}
352 
353 cleanup_reply:
354 	sd_bus_message_unref(reply);
355 }
356 
dbus_sink_reboot(void * ctx)357 static void dbus_sink_reboot(void *ctx)
358 {
359 	sd_bus_error ret_error = SD_BUS_ERROR_NULL;
360 	struct debug_sink_dbus *dbus = ctx;
361 	sd_bus_message *reply;
362 	int rc;
363 
364 	warnx("Rebooting the system");
365 
366 	rc = sd_bus_call_method(dbus->bus,
367 				"org.freedesktop.systemd1",
368 				"/org/freedesktop/systemd1",
369 				"org.freedesktop.systemd1.Manager",
370 				"StartUnit",
371 				&ret_error,
372 				&reply,
373 				"ss",
374 				"reboot.target",
375 				"replace-irreversibly");
376 	if (rc < 0) {
377 		warnx("Failed to start reboot.target: %s", strerror(-rc));
378 	}
379 }
380 
dbus_source_poll(void * ctx,char * op)381 static int dbus_source_poll(void *ctx, char *op)
382 {
383 	struct debug_source_dbus *dbus = ctx;
384 	int rc;
385 
386 	while (1) {
387 		struct timespec tsto, *ptsto;
388 		uint64_t dbusto;
389 
390 		/* See SD_BUS_GET_FD(3) */
391 		dbus->pfds[DBUS_SOURCE_PFD_DBUS].fd = sd_bus_get_fd(dbus->bus);
392 		dbus->pfds[DBUS_SOURCE_PFD_DBUS].events = sd_bus_get_events(dbus->bus);
393 		rc = sd_bus_get_timeout(dbus->bus, &dbusto);
394 		if (rc < 0)
395 			return rc;
396 
397 		if (dbusto == UINT64_MAX) {
398 			ptsto = NULL;
399 		} else if (dbus->pfds[DBUS_SOURCE_PFD_DBUS].events == 0) {
400 			ptsto = NULL;
401 		} else {
402 #define MSEC_PER_SEC 1000U
403 #define USEC_PER_SEC (MSEC_PER_SEC * 1000U)
404 #define NSEC_PER_SEC (USEC_PER_SEC * 1000U)
405 #define NSEC_PER_USEC (NSEC_PER_SEC / USEC_PER_SEC)
406 			tsto.tv_sec = dbusto / USEC_PER_SEC;
407 			tsto.tv_nsec = (dbusto % USEC_PER_SEC) * NSEC_PER_USEC;
408 			ptsto = &tsto;
409 		}
410 
411 		if ((rc = ppoll(dbus->pfds, ARRAY_SIZE(dbus->pfds), ptsto, NULL)) < 0) {
412 			warn("Failed polling source fds");
413 			return -errno;
414 		}
415 
416 		if (dbus->pfds[DBUS_SOURCE_PFD_SOURCE].revents) {
417 			ssize_t ingress;
418 
419 			if ((ingress = read(dbus->pfds[DBUS_SOURCE_PFD_SOURCE].fd, op, 1)) != 1) {
420 				if (ingress < 0) {
421 					warn("Failed to read from basic source");
422 					return -errno;
423 				}
424 
425 				errx(EXIT_FAILURE, "Bad read, requested 1 got %zd", ingress);
426 			}
427 
428 			return 0;
429 		}
430 
431 		if (dbus->pfds[DBUS_SOURCE_PFD_DBUS].revents) {
432 			if ((rc = sd_bus_process(dbus->bus, NULL)) < 0) {
433 				warnx("Failed processing inbound D-Bus messages: %s",
434 				      strerror(-rc));
435 				return rc;
436 			}
437 		}
438 	}
439 }
440 #else
dbus_sink_debug(void * ctx)441 static void dbus_sink_debug(void *ctx)
442 {
443 	warnx("%s: Configured without systemd, dbus sinks disabled", __func__);
444 }
445 
dbus_sink_reboot(void * ctx)446 static void dbus_sink_reboot(void *ctx)
447 {
448 	warnx("%s: Configured without systemd, dbus sinks disabled", __func__);
449 }
450 
dbus_source_poll(void * ctx,char * op)451 static int dbus_source_poll(void *ctx, char *op)
452 {
453 	errx(EXIT_FAILURE, "Configured without systemd, dbus sources disabled", __func__);
454 }
455 #endif
456 
457 const struct debug_sink_ops dbus_sink_ops = {
458 	.debug = dbus_sink_debug,
459 	.reboot = dbus_sink_reboot,
460 };
461 
462 const struct debug_source_ops dbus_source_ops = {
463 	.poll = dbus_source_poll,
464 };
465 
process(struct debug_source * source,struct debug_sink * sink)466 static int process(struct debug_source *source, struct debug_sink *sink)
467 {
468 	char command;
469 	int rc;
470 
471 	while (!(rc = source->ops->poll(source->ctx, &command))) {
472 		switch (command) {
473 		case 'D':
474 			warnx("Debug action triggered\n");
475 			sink->ops->debug(sink->ctx);
476 			break;
477 		case 'R':
478 			warnx("Reboot action triggered\n");
479 			sink->ops->reboot(sink->ctx);
480 			break;
481 		default:
482 			warnx("Unexpected command: 0x%02x (%c)", command, command);
483 		}
484 	}
485 
486 	if (rc < 0)
487 		warnx("Failed to poll source: %s", strerror(-rc));
488 
489 	return rc;
490 }
491 
main(int argc,char * const argv[])492 int main(int argc, char * const argv[])
493 {
494 	struct debug_source_basic basic_source;
495 	struct debug_source_dbus dbus_source;
496 	struct debug_sink_sysrq sysrq_sink;
497 	struct debug_sink_dbus dbus_sink;
498 	const char *sink_actions = NULL;
499 	struct debug_source source;
500 	struct debug_sink sink;
501 	char devnode[PATH_MAX];
502 	char *devid;
503 	int sourcefd;
504 	int sinkfd;
505 
506 	/* Option processing */
507 	while (1) {
508 		static struct option long_options[] = {
509 			{"sink-actions", required_argument, 0, 's'},
510 			{0, 0, 0, 0},
511 		};
512 		int c;
513 
514 		c = getopt_long(argc, argv, "", long_options, NULL);
515 		if (c == -1)
516 			break;
517 
518 		switch (c) {
519 		case 's':
520 			sink_actions = optarg;
521 			break;
522 		default:
523 			break;
524 		}
525 	}
526 
527 	/*
528 	 * The default behaviour sets the source file descriptor as stdin and the sink file
529 	 * descriptor as stdout. This allows trivial testing on the command-line with just a
530 	 * keyboard and without crashing the system.
531 	 */
532 	sourcefd = 0;
533 	sinkfd = 1;
534 
535 	/* Handle the source path argument, if any */
536 	if (optind < argc) {
537 		char devpath[PATH_MAX];
538 
539 		/*
540 		 * To make our lives easy with udev we take the basename of the source argument and
541 		 * look for it in /dev. This allows us to use %p (the devpath specifier) in the udev
542 		 * rule to pass the device of interest to the systemd unit.
543 		 */
544 		strncpy(devpath, argv[optind], sizeof(devpath));
545 		devpath[PATH_MAX - 1] = '\0';
546 		devid = basename(devpath);
547 
548 		strncpy(devnode, "/dev/", sizeof(devnode));
549 		strncat(devnode, devid, sizeof(devnode) - strlen("/dev/"));
550 		devnode[PATH_MAX - 1] = '\0';
551 
552 		if ((sourcefd = open(devnode, O_RDONLY)) == -1)
553 			err(EXIT_FAILURE, "Failed to open source %s", devnode);
554 
555 		optind++;
556 	}
557 
558 	/*
559 	 * Handle the sink path argument, if any. If sink_actions hasn't been set via the
560 	 * --sink-actions option, then default to 'sysrq'. Otherwise, if --sink-actions=sysrq has
561 	 * been passed, do as we're told and use the 'sysrq' sink actions.
562 	 */
563 	if (!sink_actions || !strcmp("sysrq", sink_actions)) {
564 		if (optind < argc) {
565 			/*
566 			 * Just open the sink path directly. If we ever need different behaviour
567 			 * then we patch this bit when we know what we need.
568 			 */
569 			if ((sinkfd = open(argv[optind], O_WRONLY)) == -1)
570 				err(EXIT_FAILURE, "Failed to open sink %s", argv[optind]);
571 
572 			optind++;
573 		}
574 
575 		basic_source.source = sourcefd;
576 		source.ops = &basic_source_ops;
577 		source.ctx = &basic_source;
578 
579 		sysrq_sink.sink = sinkfd;
580 		sink.ops = &sysrq_sink_ops;
581 		sink.ctx = &sysrq_sink;
582 	}
583 
584 	/* Set up the dbus sink actions if requested via --sink-actions=dbus */
585 	if (sink_actions && !strcmp("dbus", sink_actions)) {
586 		sd_bus *bus;
587 		int rc;
588 
589 		rc = sd_bus_open_system(&bus);
590 		if (rc < 0) {
591 			errx(EXIT_FAILURE, "Failed to connect to the system bus: %s",
592 			       strerror(-rc));
593 		}
594 
595 		dbus_source.bus = bus;
596 		dbus_source.pfds[DBUS_SOURCE_PFD_SOURCE].fd = sourcefd;
597 		dbus_source.pfds[DBUS_SOURCE_PFD_SOURCE].events = POLLIN;
598 		source.ops = &dbus_source_ops;
599 		source.ctx = &dbus_source;
600 
601 		dbus_sink.bus = bus;
602 		sink.ops = &dbus_sink_ops;
603 		sink.ctx = &dbus_sink;
604 	}
605 
606 	/* Check we're done with the command-line */
607 	if (optind < argc)
608 		errx(EXIT_FAILURE, "Found %d unexpected arguments", argc - optind);
609 
610 	if (!(source.ops && source.ctx))
611 		errx(EXIT_FAILURE, "Invalid source configuration");
612 
613 	if (!(sink.ops && sink.ctx))
614 		errx(EXIT_FAILURE, "Unrecognised sink: %s", sink_actions);
615 
616 	/* Trigger the actions on the sink when we receive an event from the source */
617 	if (process(&source, &sink) < 0)
618 		errx(EXIT_FAILURE, "Failure while processing command stream");
619 
620 	return 0;
621 }
622