xref: /openbmc/phosphor-state-manager/obmcutil (revision 4e792702211e5a8bad6379b4cd16d8b0437a4eb9)
1#!/bin/bash -e
2
3set -euo pipefail
4
5# ============================================================================
6# Constants - D-Bus Interfaces and Objects
7# ============================================================================
8readonly INTERFACE_ROOT="xyz.openbmc_project"
9readonly STATE_INTERFACE="${INTERFACE_ROOT}.State"
10readonly CONTROL_INTERFACE="${INTERFACE_ROOT}.Control"
11
12readonly OBJECT_ROOT="/xyz/openbmc_project"
13readonly STATE_OBJECT="${OBJECT_ROOT}/state"
14readonly CONTROL_OBJECT="${OBJECT_ROOT}/control"
15
16# ============================================================================
17# Constants - Systemd Targets and Directories
18# ============================================================================
19readonly BMC_SYSTEMD_RUN_DIR="/run/systemd/system/"
20readonly HOST_TIMEOUT_TARGET="obmc-host-timeout@0.target"
21readonly HOST_CRASH_TARGET="obmc-host-crash@0.target"
22
23# ============================================================================
24# Constants - BMC Reboot Guard
25# ============================================================================
26readonly BMC_REBOOT_GUARD_FILE="reboot-guard.conf"
27readonly BMC_REBOOT_GUARD_UNITS=("reboot" "poweroff" "halt")
28
29# ============================================================================
30# Constants - Default Values
31# ============================================================================
32readonly DEFAULT_WAIT_TIMEOUT=30
33
34# ============================================================================
35# Usage and Help
36# ============================================================================
37readonly OPTS="bmcstate,bootprogress,chassiskill,chassisoff,chassison,chassisstate,hoststate,\
38osstate,power,poweroff,poweron,state,status,hostrebootoff,hostrebooton,recoveryoff,recoveryon,\
39bmcrebootoff, bmcrebooton, listbootblock listlogs showlog deletelogs, stopofftargets"
40
41readonly USAGE="Usage: obmcutil [-h] [--wait] [--verbose] [--id=<INSTANCE_ID>]
42{$OPTS}"
43
44## NOTE: By declaring these globally instead of passing them through the
45## intermediary functions, which may not be "best practice", the readability
46## and cleanliness of the code should at least be increased.
47
48# ============================================================================
49# Global Variables
50# ============================================================================
51# The command passed in to be executed (e.g. poweron/off, status, etc.)
52# This will be be used in some instances of error reporting
53G_ORIG_CMD=
54# The state an interface should be in after executing the requested command.
55G_REQUESTED_STATE=
56# The query to run during a poweron/off or chassison/off to check that
57# the requested state (G_REQUESTED_STATE) of the interface has been reached.
58G_QUERY=
59# Wait the set period of time for state transitions to be successful before
60# continuing on with the program or reporting an error if timeout reached.
61G_WAIT=
62# Print the journal to the console
63G_VERBOSE=
64# Instance id, default 0
65G_INSTANCE_ID="0"
66# Force a command even if system state is not correct
67G_FORCE=
68
69# ============================================================================
70# Service Cache - Avoid repeated mapper lookups
71# ============================================================================
72declare -A G_SERVICE_CACHE
73
74# ============================================================================
75# Helper Functions
76# ============================================================================
77
78# Get service for an object with caching to avoid repeated mapper calls
79function get_service()
80{
81    local object="$1"
82
83    # Check cache first
84    if [ -n "${G_SERVICE_CACHE[$object]:-}" ]; then
85        echo "${G_SERVICE_CACHE[$object]}"
86        return 0
87    fi
88
89    # Call mapper and cache result
90    local service
91    service=$(mapper get-service "$object")
92    G_SERVICE_CACHE[$object]="$service"
93    echo "$service"
94}
95
96function print_help()
97{
98    echo "$USAGE"
99    echo ""
100    echo "positional arguments:"
101    echo "  {$OPTS}"
102    echo ""
103    echo "Examples:"
104    echo ""
105    echo "obmcutil hostrebootoff Disable auto reboot of Host from Quiesce state"
106    echo "obmcutil hostrebootoffonetime Disable auto reboot of Host from"
107    echo "                              Quiesce state for a single boot"
108    echo "obmcutil hostrebooton   Enable auto reboot of Host from Quiesce state"
109    echo ""
110    echo "obmcutil bmcrebootoff   Disable reboot of BMC"
111    echo "obmcutil bmcrebooton    Enable reboot of BMC"
112    echo ""
113    echo "obmcutil recoveryoff    Disable handling boot watchdog timeout and host crash"
114    echo "                        Also, disable BMC and Host auto reboots"
115    echo ""
116    echo "obmcutil recoveryon     Enable handling boot watchdog timeout and host crash"
117    echo "                        Also, enable BMC and Host auto reboots"
118    echo ""
119    echo "obmcutil recoverystatus Display the status of handling boot watchdog timeout and host crash"
120    echo "                        and also the status of BMC and Host auto reboots setting"
121    echo ""
122    echo "obmcutil listbootblock  Check for and list any errors blocking the boot"
123    echo "                        of the system"
124    echo ""
125    echo "obmcutil listlogs       List all phosphor-logging entries on the"
126    echo "                        system"
127    echo ""
128    echo "obmcutil showlog <log>  Display details of input log. Format of <log>"
129    echo "                        should match listlogs output"
130    echo ""
131    echo "obmcutil deletelogs     Delete all phosphor-logging entries from"
132    echo "                        system"
133    echo "obmcutil stopofftargets Manually stop all obmc targets in power off"
134    echo "                        path"
135    echo ""
136    echo "optional arguments (must precede the positional options above):"
137    echo "  -h, --help          show this help message and exit"
138    echo "  -w, --wait          block until state transition succeeds or fails"
139    echo "  -v, --verbose       print the journal to stdout if --wait is supplied"
140    echo "  -i, -id             instance id, default 0"
141    echo "  -f, --force         force issuing the command ignoring preconditions (use with caution)"
142    exit 0
143}
144
145function run_timeout()
146{
147    local timeout="$1"; shift
148    local cmd="$*"
149    local verbose_child=
150
151    if [ -n "$G_VERBOSE" ]; then
152        journalctl -f &
153        verbose_child=$!
154    fi
155
156    $cmd
157
158    # Run a background query for the transition to the expected state
159    # This will be killed if the transition doesn't succeed within
160    # a timeout period.
161    (
162        while ! grep -q "$G_REQUESTED_STATE" <<< "$(handle_cmd "$G_QUERY")" ; do
163            sleep 1
164        done
165    ) &
166    wait_child=$!
167
168    # Could be bad if process is killed before 'timeout' occurs if
169    # transition doesn't succeed.
170    trap -- "" SIGTERM
171
172    # Workaround for lack of 'timeout' command.
173    (
174        sleep "$timeout"
175        kill $wait_child
176    ) > /dev/null 2>&1 &
177
178    if ! wait $wait_child; then
179        echo "Unable to confirm '$G_ORIG_CMD' success" \
180            "within timeout period (${timeout}s)"
181    fi
182
183    if [ -n "$verbose_child" ]; then
184        kill $verbose_child
185    fi
186}
187
188function run_cmd()
189{
190    local cmd="$*";
191
192    if [ -n "$G_WAIT" ]; then
193        run_timeout "$G_WAIT" "$cmd"
194    else
195        $cmd
196    fi
197}
198
199function set_property()
200{
201    run_cmd busctl set-property "$@"
202}
203
204function get_property()
205{
206    G_WAIT=""
207    run_cmd busctl get-property "$@"
208}
209
210# Generic function to query and display state properties
211function state_query()
212{
213    local state
214    state=$(get_property "$@" | cut -d '"' -f2)
215    printf "%-20s: %s\n" "$4" "$state"
216}
217
218# Helper to query state for a given object type
219function query_state()
220{
221    local object_type="$1"
222    local interface="$2"
223    local property="$3"
224    local instance="${4:-$G_INSTANCE_ID}"
225
226    local object="${STATE_OBJECT}/${object_type}${instance}"
227    local service
228    service=$(get_service "$object")
229    state_query "$service" "$object" "$interface" "$property"
230}
231
232# Helper to set a state transition property
233function set_state_transition()
234{
235    local object_type="$1"
236    local interface="$2"
237    local property="$3"
238    local value="$4"
239    local instance="${5:-$G_INSTANCE_ID}"
240
241    local object="${STATE_OBJECT}/${object_type}${instance}"
242    local service
243    service=$(get_service "$object")
244    set_property "$service" "$object" "$interface" "$property" "s" "$value"
245}
246
247# Helper to set a control property
248function set_control_property()
249{
250    local object_path="$1"
251    local interface="$2"
252    local property="$3"
253    local type="$4"
254    local value="$5"
255
256    local service
257    service=$(get_service "$object_path")
258    set_property "$service" "$object_path" "$interface" "$property" "$type" "$value"
259}
260
261function print_usage_err()
262{
263    echo "ERROR: $1" >&2
264    echo "$USAGE"
265    exit 1
266}
267
268function mask_systemd_target()
269{
270    local target="$*"
271    systemctl mask "$target" || {
272        echo "ERROR: Failed to mask systemd target: $target" >&2
273        return 1
274    }
275}
276
277function unmask_systemd_target()
278{
279    local target="$*"
280    systemctl unmask "$target" || {
281        echo "ERROR: Failed to unmask systemd target: $target" >&2
282        return 1
283    }
284}
285
286function get_systemd_target_state()
287{
288    local target="$*"
289    local enabled_state
290    enabled_state=$(systemctl is-enabled "$target" 2>/dev/null) || enabled_state="unknown"
291    echo "$enabled_state"
292}
293
294function disable_bmc_reboot()
295{
296    local unit
297    for unit in "${BMC_REBOOT_GUARD_UNITS[@]}"; do
298        mkdir -p "${BMC_SYSTEMD_RUN_DIR}${unit}.target.d"
299        echo -e "[Unit]\nRefuseManualStart=yes" >> "${BMC_SYSTEMD_RUN_DIR}${unit}.target.d/${BMC_REBOOT_GUARD_FILE}"
300    done
301}
302
303function enable_bmc_reboot()
304{
305    local unit
306    for unit in "${BMC_REBOOT_GUARD_UNITS[@]}"; do
307        rm -rf "${BMC_SYSTEMD_RUN_DIR}${unit}.target.d"
308    done
309}
310
311function get_bmc_reboot_status()
312{
313    local unit
314    for unit in "${BMC_REBOOT_GUARD_UNITS[@]}"; do
315        if [ -e "${BMC_SYSTEMD_RUN_DIR}${unit}.target.d/${BMC_REBOOT_GUARD_FILE}" ]; then
316            echo "off"
317            return 0
318        fi
319    done
320    echo "on"
321    return 0
322}
323
324function get_host_reboot_status()
325{
326    local OBJECT=$CONTROL_OBJECT/host$G_INSTANCE_ID/auto_reboot
327    local SERVICE
328    SERVICE=$(get_service "$OBJECT")
329    local INTERFACE=$CONTROL_INTERFACE.Boot.RebootPolicy
330    local PROPERTY=AutoReboot
331    local output
332    output="$(get_property "$SERVICE" "$OBJECT" $INTERFACE $PROPERTY)"
333    echo "${output//b /}"
334}
335
336# will write blocking errors to stdout
337function check_boot_block_errors()
338{
339    local blockArray=()
340    local subtree assocs entry berror
341
342    # Look for any objects under logging that implement the
343    # xyz.openbmc_project.Logging.ErrorBlocksTransition
344    subtree="$(busctl call xyz.openbmc_project.ObjectMapper \
345               /xyz/openbmc_project/object_mapper \
346               xyz.openbmc_project.ObjectMapper \
347               GetSubTree sias "/xyz/openbmc_project/logging/" 0 1 \
348               xyz.openbmc_project.Logging.ErrorBlocksTransition)"
349
350    subtree="${subtree//\"/}"
351
352    for entry in $subtree; do
353        if [[ ${entry} == xyz/openbmc_project/logging/block* ]]; then
354            blockArray+=( "$entry" )
355        fi
356    done
357
358    # now find associated error log for each boot block error
359    for berror in "${blockArray[@]}"; do
360        assocs="$(busctl call xyz.openbmc_project.Logging "$berror" \
361                  org.freedesktop.DBus.Properties Get \
362                  ss xyz.openbmc_project.Association.Definitions Associations)"
363
364        assocs="${assocs//\"/}"
365
366        for entry in $assocs; do
367            if [[ ${entry} == xyz/openbmc_project/logging/entry* ]]; then
368                echo "Blocking Error: $entry"
369            fi
370        done
371    done
372}
373
374# check if system is in transitioning state for chassis or host and
375# reject request if it is (if force option not set)
376function check_chassis_host_states()
377{
378    # If user has --force enabled, no check
379    if [ -n "$G_FORCE" ]; then
380        return 0
381    fi
382
383    local OBJECT=$STATE_OBJECT/chassis$G_INSTANCE_ID
384    local SERVICE
385    SERVICE=$(get_service "$OBJECT")
386    local INTERFACE=$STATE_INTERFACE.Chassis
387    local PROPERTY=CurrentPowerState
388    local state
389    state=$(get_property "$SERVICE" "$OBJECT" "$INTERFACE $PROPERTY" | cut -d '"' -f2)
390    if [[ ${state} =~ "xyz.openbmc_project.State.Chassis.PowerState.Transitioning"* ]]; then
391        echo "Chassis is $state, request rejected, use --force to override"
392        exit 1
393    fi
394
395    OBJECT=$STATE_OBJECT/host$G_INSTANCE_ID
396    SERVICE=$(get_service "$OBJECT")
397    INTERFACE=$STATE_INTERFACE.Host
398    PROPERTY=CurrentHostState
399    state=$(get_property "$SERVICE" "$OBJECT" "$INTERFACE $PROPERTY" | cut -d '"' -f2)
400    if [[ ${state} =~ "xyz.openbmc_project.State.Host.HostState.Transitioning"* ]]; then
401        echo "Host is $state, request rejected, use --force to override"
402        exit 1
403    fi
404}
405
406# helper function to check for boot block errors and notify user
407function check_and_warn_boot_block()
408{
409    blockingErrors=$(check_boot_block_errors)
410    if [ -n "$blockingErrors" ]; then
411        echo !!!!!!!!!!
412        echo "WARNING! System has blocking errors that will prevent boot"
413        echo "$blockingErrors"
414        echo !!!!!!!!!!
415    fi
416}
417
418# list all phosphor-logging entries
419function list_logs()
420{
421    # Look for any objects under logging that implement the
422    # xyz.openbmc_project.Logging.Entry
423    busctl -j call xyz.openbmc_project.ObjectMapper \
424        /xyz/openbmc_project/object_mapper \
425        xyz.openbmc_project.ObjectMapper \
426        GetSubTreePaths sias "/xyz/openbmc_project/logging/" 0 1 \
427        xyz.openbmc_project.Logging.Entry
428}
429
430# display input log details
431function show_log()
432{
433    busctl -j call xyz.openbmc_project.Logging \
434        "$1" \
435        org.freedesktop.DBus.Properties \
436        GetAll s xyz.openbmc_project.Logging.Entry
437}
438
439# delete all phosphor-logging entries
440function delete_logs()
441{
442    busctl call xyz.openbmc_project.Logging \
443        /xyz/openbmc_project/logging \
444        xyz.openbmc_project.Collection.DeleteAll DeleteAll
445}
446
447# stop all targets associated with powering off a system
448function stop_off_targets()
449{
450    systemctl stop \
451        obmc-chassis-powered-off@0.target \
452        obmc-host-stop-pre@0.target \
453        obmc-host-stopped@0.target \
454        obmc-host-stopping@0.target \
455        obmc-power-off@0.target \
456        obmc-power-stop-pre@0.target \
457        obmc-power-stop@0.target
458}
459
460function handle_cmd()
461{
462    case "$1" in
463        chassisoff)
464            check_chassis_host_states
465            local INTERFACE=$STATE_INTERFACE.Chassis
466            G_REQUESTED_STATE=$INTERFACE.PowerState.Off
467            G_QUERY="chassisstate"
468            set_state_transition "chassis" "$INTERFACE" "RequestedPowerTransition" "$INTERFACE.Transition.Off"
469            ;;
470        chassison)
471            check_chassis_host_states
472            check_and_warn_boot_block
473            local INTERFACE=$STATE_INTERFACE.Chassis
474            G_REQUESTED_STATE=$INTERFACE.PowerState.On
475            G_QUERY="chassisstate"
476            set_state_transition "chassis" "$INTERFACE" "RequestedPowerTransition" "$INTERFACE.Transition.On"
477            ;;
478        poweroff)
479            check_chassis_host_states
480            local INTERFACE=$STATE_INTERFACE.Host
481            G_REQUESTED_STATE=$INTERFACE.HostState.Off
482            G_QUERY="hoststate"
483            set_state_transition "host" "$INTERFACE" "RequestedHostTransition" "$INTERFACE.Transition.Off"
484            ;;
485        poweron)
486            check_chassis_host_states
487            check_and_warn_boot_block
488            local INTERFACE=$STATE_INTERFACE.Host
489            G_REQUESTED_STATE=$INTERFACE.HostState.Running
490            G_QUERY="hoststate"
491            set_state_transition "host" "$INTERFACE" "RequestedHostTransition" "$INTERFACE.Transition.On"
492            ;;
493        bmcstate)
494            query_state "bmc" "$STATE_INTERFACE.BMC" "CurrentBMCState" "0"
495            ;;
496        chassisstate)
497            query_state "chassis" "$STATE_INTERFACE.Chassis" "CurrentPowerState"
498            ;;
499        hoststate)
500            query_state "host" "$STATE_INTERFACE.Host" "CurrentHostState"
501            ;;
502        osstate)
503            query_state "host" "$STATE_INTERFACE.OperatingSystem.Status" "OperatingSystemState"
504            ;;
505        state|status)
506            for query in bmcstate chassisstate hoststate bootprogress osstate
507            do
508                handle_cmd $query
509            done
510            check_and_warn_boot_block
511            ;;
512        bootprogress)
513            query_state "host" "$STATE_INTERFACE.Boot.Progress" "BootProgress"
514            ;;
515        power)
516            local OBJECT=/org/openbmc/control/power0
517            local SERVICE
518            SERVICE=$(get_service "$OBJECT")
519            local INTERFACE=org.openbmc.control.Power
520            local property STATE
521            for property in pgood state pgood_timeout; do
522                # get_property can potentially return several
523                # different formats of values, so we do the parsing outside
524                # of get_property depending on the query. These queries
525                # return 'i VALUE' formatted strings.
526                STATE=$(get_property "$SERVICE" "$OBJECT" "$INTERFACE" "$property")
527                STATE="${STATE#i }"
528                STATE="${STATE#i	}"
529                printf "%s = %s\n" "$property" "$STATE"
530            done
531            ;;
532        chassiskill)
533            /usr/libexec/chassiskill
534            ;;
535        hostrebootoff)
536            set_control_property "$CONTROL_OBJECT/host$G_INSTANCE_ID/auto_reboot" \
537                "$CONTROL_INTERFACE.Boot.RebootPolicy" "AutoReboot" "b" "false"
538            ;;
539        hostrebootoffonetime)
540            set_control_property "$CONTROL_OBJECT/host$G_INSTANCE_ID/auto_reboot/one_time" \
541                "$CONTROL_INTERFACE.Boot.RebootPolicy" "AutoReboot" "b" "false"
542            ;;
543        hostrebooton)
544            set_control_property "$CONTROL_OBJECT/host$G_INSTANCE_ID/auto_reboot" \
545                "$CONTROL_INTERFACE.Boot.RebootPolicy" "AutoReboot" "b" "true"
546            ;;
547        bmcrebootoff)
548            disable_bmc_reboot
549            ;;
550        bmcrebooton)
551            enable_bmc_reboot
552            ;;
553        recoveryoff)
554            handle_cmd hostrebootoff
555            handle_cmd bmcrebootoff
556            mask_systemd_target $HOST_TIMEOUT_TARGET
557            mask_systemd_target $HOST_CRASH_TARGET
558            ;;
559        recoveryon)
560            handle_cmd hostrebooton
561            handle_cmd bmcrebooton
562            unmask_systemd_target $HOST_TIMEOUT_TARGET
563            unmask_systemd_target $HOST_CRASH_TARGET
564            ;;
565        recoverystatus)
566            host_reboot_state=$(get_host_reboot_status)
567            if [[ $host_reboot_state == "true" ]]; then
568                host_reboot_status=1
569            else
570                host_reboot_status=0
571            fi
572
573            bmc_reboot_state=$(get_bmc_reboot_status)
574            if [[ $bmc_reboot_state == "on" ]]; then
575                bmc_reboot_status=1
576            else
577                bmc_reboot_status=0
578            fi
579
580            host_timeout_target_state=$(get_systemd_target_state $HOST_TIMEOUT_TARGET)
581            if [[ $host_timeout_target_state == "masked" ]]; then
582                host_timeout_status=0
583            else
584                host_timeout_status=1
585            fi
586
587            host_crash_target_state=$(get_systemd_target_state $HOST_CRASH_TARGET)
588            if [[ $host_crash_target_state == "masked" ]]; then
589                host_crash_status=0
590            else
591                host_crash_status=1
592            fi
593
594            if (( host_reboot_status && bmc_reboot_status && host_timeout_status && host_crash_status )); then
595                echo "recovery: On"
596            elif (( !host_reboot_status && !bmc_reboot_status && !host_timeout_status && !host_crash_status )); then
597                echo "recovery: Off"
598            else
599                echo "recovery: Undefined"
600            fi
601
602            declare -A status
603            status[0]="Off"
604            status[1]="On"
605
606            printf "  %-11s: %s\n" "hostReboot" "${status[$host_reboot_status]}"
607            printf "  %-11s: %s\n" "bmcReboot" "${status[$bmc_reboot_status]}"
608            printf "  %-11s: %s\n" "hostTimeout" "${status[$host_timeout_status]}"
609            printf "  %-11s: %s\n" "hostCrash" "${status[$host_crash_status]}"
610            ;;
611        listbootblock)
612            blockingErrors=$(check_boot_block_errors)
613            if [ -z "$blockingErrors" ]; then
614                echo "No blocking errors present"
615            else
616                echo "$blockingErrors"
617            fi
618            ;;
619        listlogs)
620            list_logs
621            ;;
622        showlog)
623            if [ -z "${2:-}" ]; then
624                print_usage_err "showlog requires a log path argument"
625            fi
626            show_log "$2"
627            ;;
628        deletelogs)
629            delete_logs
630            ;;
631        stopofftargets)
632            stop_off_targets
633            ;;
634        *)
635            print_usage_err "Invalid command '$1'"
636            ;;
637    esac
638}
639
640# Validate at least one argument is provided
641if [ $# -eq 0 ]; then
642    print_usage_err "No command specified"
643fi
644
645shiftcnt=0
646for arg in "$@"; do
647    case $arg in
648        -w|--wait)
649            G_WAIT=$DEFAULT_WAIT_TIMEOUT
650            shiftcnt=$((shiftcnt+1))
651            continue
652            ;;
653        -h|--help)
654            print_help
655            ;;
656        -v|--verbose)
657            G_VERBOSE=y
658            shiftcnt=$((shiftcnt+1))
659            ;;
660        -i=*|--id=*)
661            G_INSTANCE_ID="${arg#*=}"
662            # Validate instance ID is a number
663            if ! [[ "$G_INSTANCE_ID" =~ ^[0-9]+$ ]]; then
664                print_usage_err "Instance ID must be a number: $G_INSTANCE_ID"
665            fi
666            shiftcnt=$((shiftcnt+1))
667            ;;
668        -f|--force)
669            G_FORCE=y
670            shiftcnt=$((shiftcnt+1))
671            ;;
672        -*)
673            print_usage_err "Unknown option: $arg"
674            ;;
675        *)
676            G_ORIG_CMD=$arg
677            # shift out the optional parameters
678            shift $shiftcnt
679            # pass all arguments to handle_cmd in case command takes additional
680            # parameters
681            handle_cmd "$@"
682            break
683            ;;
684    esac
685done
686
687# If we got here without executing a command, only options were provided
688if [ -z "$G_ORIG_CMD" ]; then
689    print_usage_err "No command specified"
690fi
691