1 // SPDX-License-Identifier: Apache-2.0
2 // Copyright (C) 2021 IBM Corp.
3
4 /*
5 * debug-trigger listens for an external signal that the BMC is in some way unresponsive. When a
6 * signal is received it triggers a crash to collect debug data and reboots the system in the hope
7 * that it will recover.
8 *
9 * Usage: debug-trigger [SOURCE] [SINK]
10 *
11 * Options:
12 * --sink-actions=ACTION
13 * Set the class of sink action(s) to be used. Can take the value of 'sysrq' or 'dbus'.
14 * Defaults to 'sysrq'.
15 *
16 * Examples:
17 * debug-trigger
18 * Set the source as stdin, the sink as stdout, and use the default 'sysrq' set of sink
19 * actions. Useful for testing.
20 *
21 * debug-trigger --sink-actions=sysrq
22 * Explicitly use the 'sysrq' set of sink actions with stdin as the source and stdout as the
23 * sink.
24 *
25 * debug-trigger /dev/serio_raw0 /proc/sysrq-trigger
26 * Open /dev/serio_raw0 as the source and /proc/sysrq-trigger as the sink, with the default
27 * 'sysrq' set of sink actions. When 'D' is read from /dev/serio_raw0 'c' will be written to
28 * /proc/sysrq-trigger, causing a kernel panic. When 'R' is read from /dev/serio_raw0 'b' will
29 * be written to /proc/sysrq-trigger, causing an immediate reboot of the system.
30 *
31 * dbug-trigger --sink-actions=dbus /dev/serio_raw0
32 * Open /dev/serio_raw0 as the source and configure the 'dbus' set of sink actions. When 'D' is
33 * read from /dev/serio_raw0 create a dump via phosphor-debug-collector by calling through its
34 * D-Bus interface, then reboot the system by starting systemd's 'reboot.target'
35 */
36 #define _GNU_SOURCE
37
38 #include "config.h"
39
40 #include <err.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <getopt.h>
44 #include <libgen.h>
45 #include <limits.h>
46 #include <linux/reboot.h>
47 #include <poll.h>
48 #include <stdbool.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <sys/reboot.h>
53 #include <sys/stat.h>
54 #include <sys/types.h>
55 #include <unistd.h>
56
57 #define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
58
59 struct sd_bus;
60
61 struct debug_source_ops {
62 int (*poll)(void *ctx, char *op);
63 };
64
65 struct debug_source {
66 const struct debug_source_ops *ops;
67 void *ctx;
68 };
69
70 struct debug_source_basic {
71 int source;
72 };
73
74 struct debug_source_dbus {
75 struct sd_bus *bus;
76 #define DBUS_SOURCE_PFD_SOURCE 0
77 #define DBUS_SOURCE_PFD_DBUS 1
78 struct pollfd pfds[2];
79 };
80
81 struct debug_sink_ops {
82 void (*debug)(void *ctx);
83 void (*reboot)(void *ctx);
84 };
85
86 struct debug_sink {
87 const struct debug_sink_ops *ops;
88 void *ctx;
89 };
90
91 struct debug_sink_sysrq {
92 int sink;
93 };
94
95 struct debug_sink_dbus {
96 struct sd_bus *bus;
97 };
98
sysrq_sink_debug(void * ctx)99 static void sysrq_sink_debug(void *ctx)
100 {
101 struct debug_sink_sysrq *sysrq = ctx;
102 /* https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/admin-guide/sysrq.rst?h=v5.16#n93 */
103 static const char action = 'c';
104 ssize_t rc;
105
106 sync();
107
108 if ((rc = write(sysrq->sink, &action, sizeof(action))) == sizeof(action))
109 return;
110
111 if (rc == -1) {
112 warn("Failed to execute debug command");
113 } else {
114 warnx("Failed to execute debug command: %zd", rc);
115 }
116 }
117
sysrq_sink_reboot(void * ctx)118 static void sysrq_sink_reboot(void *ctx)
119 {
120 struct debug_sink_sysrq *sysrq = ctx;
121 /* https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/admin-guide/sysrq.rst?h=v5.16#n90 */
122 static const char action = 'b';
123 ssize_t rc;
124
125 sync();
126
127 if ((rc = write(sysrq->sink, &action, sizeof(action))) == sizeof(action))
128 return;
129
130 if (rc == -1) {
131 warn("Failed to reboot BMC");
132 } else {
133 warnx("Failed to reboot BMC: %zd", rc);
134 }
135 }
136
basic_source_poll(void * ctx,char * op)137 static int basic_source_poll(void *ctx, char *op)
138 {
139 struct debug_source_basic *basic = ctx;
140 ssize_t ingress;
141
142 if ((ingress = read(basic->source, op, 1)) != 1) {
143 if (ingress < 0) {
144 warn("Failed to read from basic source");
145 return -errno;
146 }
147
148 /* Unreachable */
149 errx(EXIT_FAILURE, "Bad read, requested 1 got %zd", ingress);
150 }
151
152 return 0;
153 }
154
155 const struct debug_sink_ops sysrq_sink_ops = {
156 .debug = sysrq_sink_debug,
157 .reboot = sysrq_sink_reboot,
158 };
159
160 const struct debug_source_ops basic_source_ops = {
161 .poll = basic_source_poll,
162 };
163
164 #if HAVE_SYSTEMD
165 #include <systemd/sd-bus.h>
166
167 static void dbus_sink_reboot(void *ctx);
dbus_sink_dump_progress(sd_bus_message * m,void * userdata,sd_bus_error * ret_error)168 static int dbus_sink_dump_progress(sd_bus_message *m, void *userdata,
169 sd_bus_error *ret_error __attribute__((unused)))
170 {
171 struct debug_sink_dbus *dbus = userdata;
172 const char *status;
173 const char *iface;
174 int rc;
175
176 // sa{sv}as
177 rc = sd_bus_message_read_basic(m, 's', &iface);
178 if (rc < 0) {
179 warnx("Failed to extract interface from PropertiesChanged signal: %s",
180 strerror(-rc));
181 return rc;
182 }
183
184 /* Bail if it's not an update to the Progress interface */
185 if (strcmp(iface, "xyz.openbmc_project.Common.Progress"))
186 return 0;
187
188 rc = sd_bus_message_enter_container(m, 'a', "{sv}");
189 if (rc < 0)
190 return rc;
191
192 if (!rc)
193 return 0;
194
195 status = NULL;
196 while (1) {
197 const char *member;
198
199 rc = sd_bus_message_enter_container(m, 'e', "sv");
200 if (rc < 0)
201 return rc;
202
203 if (!rc)
204 break;
205
206 rc = sd_bus_message_read_basic(m, 's', &member);
207 if (rc < 0) {
208 warnx("Failed to extract member name from PropertiesChanged signal: %s",
209 strerror(-rc));
210 return rc;
211 }
212
213 if (!strcmp(member, "Status")) {
214 rc = sd_bus_message_enter_container(m, 'v', "s");
215 if (rc < 0) {
216 warnx("Failed to enter variant container in PropertiesChanged signal: %s",
217 strerror(-rc));
218 return rc;
219 }
220
221 if (!rc)
222 goto exit_dict_container;
223
224 rc = sd_bus_message_read_basic(m, 's', &status);
225 if (rc < 0) {
226 warnx("Failed to extract status value from PropertiesChanged signal: %s",
227 strerror(-rc));
228 return rc;
229 }
230
231 sd_bus_message_exit_container(m);
232 } else {
233 rc = sd_bus_message_skip(m, "v");
234 if (rc < 0) {
235 warnx("Failed to skip variant for unrecognised member %s in PropertiesChanged signal: %s",
236 member, strerror(-rc));
237 return rc;
238 }
239 }
240
241 exit_dict_container:
242 sd_bus_message_exit_container(m);
243 }
244
245 sd_bus_message_exit_container(m);
246
247 if (!status)
248 return 0;
249
250 printf("Dump progress on %s: %s\n", sd_bus_message_get_path(m), status);
251
252 /* If we're finished with the dump, reboot the system */
253 if (!strcmp(status, "xyz.openbmc_project.Common.Progress.OperationStatus.Completed")) {
254 sd_bus_slot *slot = sd_bus_get_current_slot(dbus->bus);
255 sd_bus_slot_unref(slot);
256 dbus_sink_reboot(userdata);
257 }
258
259 return 0;
260 }
261
dbus_sink_debug(void * ctx)262 static void dbus_sink_debug(void *ctx)
263 {
264 sd_bus_error ret_error = SD_BUS_ERROR_NULL;
265 struct debug_sink_dbus *dbus = ctx;
266 sd_bus_message *reply;
267 sd_bus_slot *slot;
268 const char *path;
269 char *status;
270 int rc;
271
272 /* Start a BMC dump */
273 rc = sd_bus_call_method(dbus->bus,
274 "xyz.openbmc_project.Dump.Manager",
275 "/xyz/openbmc_project/dump/bmc",
276 "xyz.openbmc_project.Dump.Create",
277 "CreateDump",
278 &ret_error,
279 &reply, "a{sv}", 0);
280 if (rc < 0) {
281 warnx("Failed to call CreateDump: %s", strerror(-rc));
282 return;
283 }
284
285 /* Extract the dump path */
286 rc = sd_bus_message_read_basic(reply, 'o', &path);
287 if (rc < 0) {
288 warnx("Failed to extract dump object path: %s", strerror(-rc));
289 goto cleanup_reply;
290 }
291
292 /* Set up a match watching for completion of the dump */
293 rc = sd_bus_match_signal(dbus->bus,
294 &slot,
295 "xyz.openbmc_project.Dump.Manager",
296 path,
297 "org.freedesktop.DBus.Properties",
298 "PropertiesChanged",
299 dbus_sink_dump_progress,
300 ctx);
301 if (rc < 0) {
302 warnx("Failed to add signal match for progress status on dump object %s: %s",
303 path, strerror(-rc));
304 goto cleanup_reply;
305 }
306
307 /*
308 * Mark the slot as 'floating'. If a slot is _not_ marked as floating it holds a reference
309 * to the bus, and the bus will stay alive so long as the slot is referenced. If the slot is
310 * instead marked floating the relationship is inverted: The lifetime of the slot is defined
311 * in terms of the bus, which means we relieve ourselves of having to track the lifetime of
312 * the slot.
313 *
314 * For more details see `man 3 sd_bus_slot_set_floating`, also documented here:
315 *
316 * https://www.freedesktop.org/software/systemd/man/sd_bus_slot_set_floating.html
317 */
318 rc = sd_bus_slot_set_floating(slot, 0);
319 if (rc < 0) {
320 warnx("Failed to mark progress match slot on %s as floating: %s",
321 path, strerror(-rc));
322 goto cleanup_reply;
323 }
324
325 printf("Registered progress match on dump object %s\n", path);
326
327 /* Now that the match is set up, check the current value in case we missed any updates */
328 rc = sd_bus_get_property_string(dbus->bus,
329 "xyz.openbmc_project.Dump.Manager",
330 path,
331 "xyz.openbmc_project.Common.Progress",
332 "Status",
333 &ret_error,
334 &status);
335 if (rc < 0) {
336 warnx("Failed to get progress status property on dump object %s: %s",
337 path, strerror(-rc));
338 sd_bus_slot_unref(slot);
339 goto cleanup_reply;
340 }
341
342 printf("Dump state for %s is currently %s\n", path, status);
343
344 /*
345 * If we're finished with the dump, reboot the system. If the dump isn't finished the reboot
346 * will instead take place via the dbus_sink_dump_progress() callback on the match.
347 */
348 if (!strcmp(status, "xyz.openbmc_project.Common.Progress.OperationStatus.Completed")) {
349 sd_bus_slot_unref(slot);
350 dbus_sink_reboot(ctx);
351 }
352
353 cleanup_reply:
354 sd_bus_message_unref(reply);
355 }
356
dbus_sink_reboot(void * ctx)357 static void dbus_sink_reboot(void *ctx)
358 {
359 sd_bus_error ret_error = SD_BUS_ERROR_NULL;
360 struct debug_sink_dbus *dbus = ctx;
361 sd_bus_message *reply;
362 int rc;
363
364 warnx("Rebooting the system");
365
366 rc = sd_bus_call_method(dbus->bus,
367 "org.freedesktop.systemd1",
368 "/org/freedesktop/systemd1",
369 "org.freedesktop.systemd1.Manager",
370 "StartUnit",
371 &ret_error,
372 &reply,
373 "ss",
374 "reboot.target",
375 "replace-irreversibly");
376 if (rc < 0) {
377 warnx("Failed to start reboot.target: %s", strerror(-rc));
378 }
379 }
380
dbus_source_poll(void * ctx,char * op)381 static int dbus_source_poll(void *ctx, char *op)
382 {
383 struct debug_source_dbus *dbus = ctx;
384 int rc;
385
386 while (1) {
387 struct timespec tsto, *ptsto;
388 uint64_t dbusto;
389
390 /* See SD_BUS_GET_FD(3) */
391 dbus->pfds[DBUS_SOURCE_PFD_DBUS].fd = sd_bus_get_fd(dbus->bus);
392 dbus->pfds[DBUS_SOURCE_PFD_DBUS].events = sd_bus_get_events(dbus->bus);
393 rc = sd_bus_get_timeout(dbus->bus, &dbusto);
394 if (rc < 0)
395 return rc;
396
397 if (dbusto == UINT64_MAX) {
398 ptsto = NULL;
399 } else if (dbus->pfds[DBUS_SOURCE_PFD_DBUS].events == 0) {
400 ptsto = NULL;
401 } else {
402 #define MSEC_PER_SEC 1000U
403 #define USEC_PER_SEC (MSEC_PER_SEC * 1000U)
404 #define NSEC_PER_SEC (USEC_PER_SEC * 1000U)
405 #define NSEC_PER_USEC (NSEC_PER_SEC / USEC_PER_SEC)
406 tsto.tv_sec = dbusto / USEC_PER_SEC;
407 tsto.tv_nsec = (dbusto % USEC_PER_SEC) * NSEC_PER_USEC;
408 ptsto = &tsto;
409 }
410
411 if ((rc = ppoll(dbus->pfds, ARRAY_SIZE(dbus->pfds), ptsto, NULL)) < 0) {
412 warn("Failed polling source fds");
413 return -errno;
414 }
415
416 if (dbus->pfds[DBUS_SOURCE_PFD_SOURCE].revents) {
417 ssize_t ingress;
418
419 if ((ingress = read(dbus->pfds[DBUS_SOURCE_PFD_SOURCE].fd, op, 1)) != 1) {
420 if (ingress < 0) {
421 warn("Failed to read from basic source");
422 return -errno;
423 }
424
425 errx(EXIT_FAILURE, "Bad read, requested 1 got %zd", ingress);
426 }
427
428 return 0;
429 }
430
431 if (dbus->pfds[DBUS_SOURCE_PFD_DBUS].revents) {
432 if ((rc = sd_bus_process(dbus->bus, NULL)) < 0) {
433 warnx("Failed processing inbound D-Bus messages: %s",
434 strerror(-rc));
435 return rc;
436 }
437 }
438 }
439 }
440 #else
dbus_sink_debug(void * ctx)441 static void dbus_sink_debug(void *ctx)
442 {
443 warnx("%s: Configured without systemd, dbus sinks disabled", __func__);
444 }
445
dbus_sink_reboot(void * ctx)446 static void dbus_sink_reboot(void *ctx)
447 {
448 warnx("%s: Configured without systemd, dbus sinks disabled", __func__);
449 }
450
dbus_source_poll(void * ctx,char * op)451 static int dbus_source_poll(void *ctx, char *op)
452 {
453 errx(EXIT_FAILURE, "Configured without systemd, dbus sources disabled", __func__);
454 }
455 #endif
456
457 const struct debug_sink_ops dbus_sink_ops = {
458 .debug = dbus_sink_debug,
459 .reboot = dbus_sink_reboot,
460 };
461
462 const struct debug_source_ops dbus_source_ops = {
463 .poll = dbus_source_poll,
464 };
465
process(struct debug_source * source,struct debug_sink * sink)466 static int process(struct debug_source *source, struct debug_sink *sink)
467 {
468 char command;
469 int rc;
470
471 while (!(rc = source->ops->poll(source->ctx, &command))) {
472 switch (command) {
473 case 'D':
474 warnx("Debug action triggered\n");
475 sink->ops->debug(sink->ctx);
476 break;
477 case 'R':
478 warnx("Reboot action triggered\n");
479 sink->ops->reboot(sink->ctx);
480 break;
481 default:
482 warnx("Unexpected command: 0x%02x (%c)", command, command);
483 }
484 }
485
486 if (rc < 0)
487 warnx("Failed to poll source: %s", strerror(-rc));
488
489 return rc;
490 }
491
main(int argc,char * const argv[])492 int main(int argc, char * const argv[])
493 {
494 struct debug_source_basic basic_source;
495 struct debug_source_dbus dbus_source;
496 struct debug_sink_sysrq sysrq_sink;
497 struct debug_sink_dbus dbus_sink;
498 const char *sink_actions = NULL;
499 struct debug_source source;
500 struct debug_sink sink;
501 char devnode[PATH_MAX];
502 char *devid;
503 int sourcefd;
504 int sinkfd;
505
506 /* Option processing */
507 while (1) {
508 static struct option long_options[] = {
509 {"sink-actions", required_argument, 0, 's'},
510 {0, 0, 0, 0},
511 };
512 int c;
513
514 c = getopt_long(argc, argv, "", long_options, NULL);
515 if (c == -1)
516 break;
517
518 switch (c) {
519 case 's':
520 sink_actions = optarg;
521 break;
522 default:
523 break;
524 }
525 }
526
527 /*
528 * The default behaviour sets the source file descriptor as stdin and the sink file
529 * descriptor as stdout. This allows trivial testing on the command-line with just a
530 * keyboard and without crashing the system.
531 */
532 sourcefd = 0;
533 sinkfd = 1;
534
535 /* Handle the source path argument, if any */
536 if (optind < argc) {
537 char devpath[PATH_MAX];
538
539 /*
540 * To make our lives easy with udev we take the basename of the source argument and
541 * look for it in /dev. This allows us to use %p (the devpath specifier) in the udev
542 * rule to pass the device of interest to the systemd unit.
543 */
544 strncpy(devpath, argv[optind], sizeof(devpath));
545 devpath[PATH_MAX - 1] = '\0';
546 devid = basename(devpath);
547
548 strncpy(devnode, "/dev/", sizeof(devnode));
549 strncat(devnode, devid, sizeof(devnode) - strlen("/dev/"));
550 devnode[PATH_MAX - 1] = '\0';
551
552 if ((sourcefd = open(devnode, O_RDONLY)) == -1)
553 err(EXIT_FAILURE, "Failed to open source %s", devnode);
554
555 optind++;
556 }
557
558 /*
559 * Handle the sink path argument, if any. If sink_actions hasn't been set via the
560 * --sink-actions option, then default to 'sysrq'. Otherwise, if --sink-actions=sysrq has
561 * been passed, do as we're told and use the 'sysrq' sink actions.
562 */
563 if (!sink_actions || !strcmp("sysrq", sink_actions)) {
564 if (optind < argc) {
565 /*
566 * Just open the sink path directly. If we ever need different behaviour
567 * then we patch this bit when we know what we need.
568 */
569 if ((sinkfd = open(argv[optind], O_WRONLY)) == -1)
570 err(EXIT_FAILURE, "Failed to open sink %s", argv[optind]);
571
572 optind++;
573 }
574
575 basic_source.source = sourcefd;
576 source.ops = &basic_source_ops;
577 source.ctx = &basic_source;
578
579 sysrq_sink.sink = sinkfd;
580 sink.ops = &sysrq_sink_ops;
581 sink.ctx = &sysrq_sink;
582 }
583
584 /* Set up the dbus sink actions if requested via --sink-actions=dbus */
585 if (sink_actions && !strcmp("dbus", sink_actions)) {
586 sd_bus *bus;
587 int rc;
588
589 rc = sd_bus_open_system(&bus);
590 if (rc < 0) {
591 errx(EXIT_FAILURE, "Failed to connect to the system bus: %s",
592 strerror(-rc));
593 }
594
595 dbus_source.bus = bus;
596 dbus_source.pfds[DBUS_SOURCE_PFD_SOURCE].fd = sourcefd;
597 dbus_source.pfds[DBUS_SOURCE_PFD_SOURCE].events = POLLIN;
598 source.ops = &dbus_source_ops;
599 source.ctx = &dbus_source;
600
601 dbus_sink.bus = bus;
602 sink.ops = &dbus_sink_ops;
603 sink.ctx = &dbus_sink;
604 }
605
606 /* Check we're done with the command-line */
607 if (optind < argc)
608 errx(EXIT_FAILURE, "Found %d unexpected arguments", argc - optind);
609
610 if (!(source.ops && source.ctx))
611 errx(EXIT_FAILURE, "Invalid source configuration");
612
613 if (!(sink.ops && sink.ctx))
614 errx(EXIT_FAILURE, "Unrecognised sink: %s", sink_actions);
615
616 /* Trigger the actions on the sink when we receive an event from the source */
617 if (process(&source, &sink) < 0)
618 errx(EXIT_FAILURE, "Failure while processing command stream");
619
620 return 0;
621 }
622