1 extern "C" 2 { 3 #include "libpdbg.h" 4 } 5 6 #include "extensions/phal/common_utils.hpp" 7 #include "extensions/phal/create_pel.hpp" 8 #include "extensions/phal/pdbg_utils.hpp" 9 #include "p10_cfam.hpp" 10 #include "registration.hpp" 11 12 #include <phosphor-logging/log.hpp> 13 #include <sdbusplus/bus.hpp> 14 15 #include <cstdio> 16 #include <fstream> 17 #include <memory> 18 19 namespace openpower 20 { 21 namespace phal 22 { 23 24 using namespace openpower::cfam::p10; 25 using namespace phosphor::logging; 26 27 /** Best effort function to create a BMC dump */ 28 void createBmcDump() 29 { 30 auto bus = sdbusplus::bus::new_default(); 31 32 auto method = bus.new_method_call( 33 "xyz.openbmc_project.Dump.Manager", "/xyz/openbmc_project/dump/bmc", 34 "xyz.openbmc_project.Dump.Create", "CreateDump"); 35 method.append( 36 std::vector< 37 std::pair<std::string, std::variant<std::string, uint64_t>>>()); 38 try 39 { 40 bus.call_noreply(method); 41 } 42 catch (const sdbusplus::exception_t& e) 43 { 44 log<level::ERR>("Exception raised creating BMC dump", 45 entry("EXCEPTION=%s", e.what())); 46 // just continue, failing to collect a dump should not cause further 47 // issues in this path 48 } 49 } 50 51 /** 52 * This is the backup plan to ensuring the host is not running before the 53 * BMC issues a power off to the system. Prior to this procedure being called, 54 * the BMC has tried all other communication mechanisms to talk with the host 55 * and they have failed. The design is that the host firmware will write the 56 * value 0xA5000001 to Mailbox scratch register 12 when they are up and running 57 * to a point where communication to the BMC is no longer required to function. 58 * On a power off or shutdown this register is cleared by the host and BMC 59 * firmware. If the BMC sees the 0xA5000001 pattern in the scratch register 60 * then it assumes the host is running and will leave power on to the system. 61 */ 62 void checkHostRunning() 63 { 64 struct pdbg_target* procTarget; 65 66 try 67 { 68 phal_init(); 69 } 70 catch (const std::exception& ex) 71 { 72 // This should "never" happen so just throw the exception and let 73 // our systemd error handling process this 74 log<level::ERR>("Exception raised during init PHAL", 75 entry("EXCEPTION=%s", ex.what())); 76 throw std::runtime_error("PHAL initialization failed"); 77 } 78 79 pdbg_for_each_class_target("proc", procTarget) 80 { 81 // Only check the primary proc 82 if (!isPrimaryProc(procTarget)) 83 { 84 continue; 85 } 86 87 uint32_t val = 0; 88 constexpr uint32_t HOST_RUNNING_INDICATION = 0xA5000001; 89 auto rc = getCFAM(procTarget, P10_SCRATCH_REG_12, val); 90 if ((rc == 0) && (val != HOST_RUNNING_INDICATION)) 91 { 92 log<level::INFO>("CFAM read indicates host is not running", 93 entry("CFAM=0x%X", val)); 94 return; 95 } 96 97 if (rc != 0) 98 { 99 // On error, we have to assume host is up so just fall through 100 // to code below 101 log<level::ERR>("CFAM read error, assume host is running"); 102 } 103 else if (val == HOST_RUNNING_INDICATION) 104 { 105 // This is not good. Normal communication path to host did not work 106 // but CFAM indicates host is running. 107 log<level::ERR>("CFAM read indicates host is running"); 108 } 109 110 // Create an error so user knows system is in a bad state 111 openpower::pel::createPEL("org.open_power.PHAL.Error.HostRunning"); 112 113 // Create file for host instance and create in filesystem to 114 // indicate to services that host is running. 115 // This file is cleared by the phosphor-state-manager once the host 116 // start target completes. 117 constexpr auto HOST_RUNNING_FILE = "/run/openbmc/host@%d-on"; 118 auto size = std::snprintf(nullptr, 0, HOST_RUNNING_FILE, 0); 119 size++; // null 120 std::unique_ptr<char[]> buf(new char[size]); 121 std::snprintf(buf.get(), size, HOST_RUNNING_FILE, 0); 122 std::ofstream outfile(buf.get()); 123 outfile.close(); 124 125 // Try to create BMC dump for further debug 126 createBmcDump(); 127 128 return; 129 } 130 131 // We should "never" make it here. If we did it implies no primary processor 132 // was found. Once again, rely on systemd recovery if this happens 133 log<level::ERR>("No primary processor found in checkHostRunning"); 134 throw std::runtime_error("No primary processor found in checkHostRunning"); 135 } 136 137 /** 138 * The BMC is to make a best effort to clear the CFAM register used by PHYP 139 * to indicate it is running when the host is stopped. This procedure will do 140 * that. 141 */ 142 void clearHostRunning() 143 { 144 struct pdbg_target* procTarget; 145 log<level::INFO>("Entering clearHostRunning"); 146 147 try 148 { 149 phal_init(); 150 } 151 catch (const std::exception& ex) 152 { 153 // This should "never" happen so just throw the exception and let 154 // our systemd error handling process this 155 log<level::ERR>("Exception raised during init PHAL", 156 entry("EXCEPTION=%s", ex.what())); 157 throw std::runtime_error("PHAL initialization failed"); 158 } 159 160 pdbg_for_each_class_target("proc", procTarget) 161 { 162 // Only check the primary proc 163 if (!isPrimaryProc(procTarget)) 164 { 165 continue; 166 } 167 168 constexpr uint32_t HOST_NOT_RUNNING_INDICATION = 0; 169 auto rc = putCFAM(procTarget, P10_SCRATCH_REG_12, 170 HOST_NOT_RUNNING_INDICATION); 171 if (rc != 0) 172 { 173 log<level::ERR>("CFAM write to clear host running status failed"); 174 } 175 176 // It's best effort, so just return either way 177 return; 178 } 179 log<level::ERR>("No primary processor found in clearHostRunning"); 180 } 181 182 REGISTER_PROCEDURE("checkHostRunning", checkHostRunning) 183 REGISTER_PROCEDURE("clearHostRunning", clearHostRunning) 184 185 } // namespace phal 186 } // namespace openpower 187