1 #include "watchdog.hpp" 2 3 #include "ipmid.hpp" 4 #include "watchdog_service.hpp" 5 6 #include <endian.h> 7 #include <ipmid/api.h> 8 9 #include <cstdint> 10 #include <phosphor-logging/elog-errors.hpp> 11 #include <phosphor-logging/elog.hpp> 12 #include <phosphor-logging/log.hpp> 13 #include <string> 14 #include <xyz/openbmc_project/Common/error.hpp> 15 16 using phosphor::logging::commit; 17 using phosphor::logging::level; 18 using phosphor::logging::log; 19 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; 20 21 static bool lastCallSuccessful = false; 22 23 void reportError() 24 { 25 // We don't want to fill the SEL with errors if the daemon dies and doesn't 26 // come back but the watchdog keeps on ticking. Instead, we only report the 27 // error if we haven't reported one since the last successful call 28 if (!lastCallSuccessful) 29 { 30 return; 31 } 32 lastCallSuccessful = false; 33 34 // TODO: This slow down the end of the IPMI transaction waiting 35 // for the commit to finish. commit<>() can take at least 5 seconds 36 // to complete. 5s is very slow for an IPMI command and ends up 37 // congesting the IPMI channel needlessly, especially if the watchdog 38 // is ticking fairly quickly and we have some transient issues. 39 commit<InternalFailure>(); 40 } 41 42 ipmi_ret_t ipmi_app_watchdog_reset(ipmi_netfn_t netfn, ipmi_cmd_t cmd, 43 ipmi_request_t request, 44 ipmi_response_t response, 45 ipmi_data_len_t data_len, 46 ipmi_context_t context) 47 { 48 // We never return data with this command so immediately get rid of it 49 *data_len = 0; 50 51 try 52 { 53 WatchdogService wd_service; 54 55 // Notify the caller if we haven't initialized our timer yet 56 // so it can configure actions and timeouts 57 if (!wd_service.getInitialized()) 58 { 59 lastCallSuccessful = true; 60 return IPMI_WDOG_CC_NOT_INIT; 61 } 62 63 // The ipmi standard dictates we enable the watchdog during reset 64 wd_service.resetTimeRemaining(true); 65 lastCallSuccessful = true; 66 return IPMI_CC_OK; 67 } 68 catch (const InternalFailure& e) 69 { 70 reportError(); 71 return IPMI_CC_UNSPECIFIED_ERROR; 72 } 73 catch (const std::exception& e) 74 { 75 const std::string e_str = std::string("wd_reset: ") + e.what(); 76 log<level::ERR>(e_str.c_str()); 77 reportError(); 78 return IPMI_CC_UNSPECIFIED_ERROR; 79 } 80 catch (...) 81 { 82 log<level::ERR>("wd_reset: Unknown Error"); 83 reportError(); 84 return IPMI_CC_UNSPECIFIED_ERROR; 85 } 86 } 87 88 static constexpr uint8_t wd_dont_stop = 0x1 << 6; 89 static constexpr uint8_t wd_timeout_action_mask = 0x3; 90 91 static constexpr uint8_t wdTimerUseMask = 0x7; 92 93 enum class IpmiAction : uint8_t 94 { 95 None = 0x0, 96 HardReset = 0x1, 97 PowerOff = 0x2, 98 PowerCycle = 0x3, 99 }; 100 101 /** @brief Converts an IPMI Watchdog Action to DBUS defined action 102 * @param[in] ipmi_action The IPMI Watchdog Action 103 * @return The Watchdog Action that the ipmi_action maps to 104 */ 105 WatchdogService::Action ipmiActionToWdAction(IpmiAction ipmi_action) 106 { 107 switch (ipmi_action) 108 { 109 case IpmiAction::None: 110 { 111 return WatchdogService::Action::None; 112 } 113 case IpmiAction::HardReset: 114 { 115 return WatchdogService::Action::HardReset; 116 } 117 case IpmiAction::PowerOff: 118 { 119 return WatchdogService::Action::PowerOff; 120 } 121 case IpmiAction::PowerCycle: 122 { 123 return WatchdogService::Action::PowerCycle; 124 } 125 default: 126 { 127 throw std::domain_error("IPMI Action is invalid"); 128 } 129 } 130 } 131 132 enum class IpmiTimerUse : uint8_t 133 { 134 Reserved = 0x0, 135 BIOSFRB2 = 0x1, 136 BIOSPOST = 0x2, 137 OSLoad = 0x3, 138 SMSOS = 0x4, 139 OEM = 0x5, 140 }; 141 142 WatchdogService::TimerUse ipmiTimerUseToWdTimerUse(IpmiTimerUse ipmiTimerUse) 143 { 144 switch (ipmiTimerUse) 145 { 146 case IpmiTimerUse::Reserved: 147 { 148 return WatchdogService::TimerUse::Reserved; 149 } 150 case IpmiTimerUse::BIOSFRB2: 151 { 152 return WatchdogService::TimerUse::BIOSFRB2; 153 } 154 case IpmiTimerUse::BIOSPOST: 155 { 156 return WatchdogService::TimerUse::BIOSPOST; 157 } 158 case IpmiTimerUse::OSLoad: 159 { 160 return WatchdogService::TimerUse::OSLoad; 161 } 162 case IpmiTimerUse::SMSOS: 163 { 164 return WatchdogService::TimerUse::SMSOS; 165 } 166 case IpmiTimerUse::OEM: 167 { 168 return WatchdogService::TimerUse::OEM; 169 } 170 default: 171 { 172 return WatchdogService::TimerUse::Reserved; 173 } 174 } 175 } 176 177 struct wd_set_req 178 { 179 uint8_t timer_use; 180 uint8_t timer_action; 181 uint8_t pretimeout; // (seconds) 182 uint8_t expire_flags; 183 uint16_t initial_countdown; // Little Endian (deciseconds) 184 } __attribute__((packed)); 185 static_assert(sizeof(wd_set_req) == 6, "wd_set_req has invalid size."); 186 static_assert(sizeof(wd_set_req) <= MAX_IPMI_BUFFER, 187 "wd_get_res can't fit in request buffer."); 188 189 ipmi_ret_t ipmi_app_watchdog_set(ipmi_netfn_t netfn, ipmi_cmd_t cmd, 190 ipmi_request_t request, 191 ipmi_response_t response, 192 ipmi_data_len_t data_len, 193 ipmi_context_t context) 194 { 195 // Extract the request data 196 if (*data_len < sizeof(wd_set_req)) 197 { 198 *data_len = 0; 199 return IPMI_CC_REQ_DATA_LEN_INVALID; 200 } 201 wd_set_req req; 202 memcpy(&req, request, sizeof(req)); 203 req.initial_countdown = le16toh(req.initial_countdown); 204 *data_len = 0; 205 206 try 207 { 208 WatchdogService wd_service; 209 // Stop the timer if the don't stop bit is not set 210 if (!(req.timer_use & wd_dont_stop)) 211 { 212 wd_service.setEnabled(false); 213 } 214 215 // Set the action based on the request 216 const auto ipmi_action = 217 static_cast<IpmiAction>(req.timer_action & wd_timeout_action_mask); 218 wd_service.setExpireAction(ipmiActionToWdAction(ipmi_action)); 219 220 const auto ipmiTimerUse = 221 static_cast<IpmiTimerUse>(req.timer_use & wdTimerUseMask); 222 wd_service.setTimerUse(ipmiTimerUseToWdTimerUse(ipmiTimerUse)); 223 224 // Set the new interval and the time remaining deci -> mill seconds 225 const uint64_t interval = req.initial_countdown * 100; 226 wd_service.setInterval(interval); 227 wd_service.setTimeRemaining(interval); 228 229 // Mark as initialized so that future resets behave correctly 230 wd_service.setInitialized(true); 231 232 lastCallSuccessful = true; 233 return IPMI_CC_OK; 234 } 235 catch (const std::domain_error&) 236 { 237 return IPMI_CC_INVALID_FIELD_REQUEST; 238 } 239 catch (const InternalFailure& e) 240 { 241 reportError(); 242 return IPMI_CC_UNSPECIFIED_ERROR; 243 } 244 catch (const std::exception& e) 245 { 246 const std::string e_str = std::string("wd_set: ") + e.what(); 247 log<level::ERR>(e_str.c_str()); 248 reportError(); 249 return IPMI_CC_UNSPECIFIED_ERROR; 250 } 251 catch (...) 252 { 253 log<level::ERR>("wd_set: Unknown Error"); 254 reportError(); 255 return IPMI_CC_UNSPECIFIED_ERROR; 256 } 257 } 258 259 /** @brief Converts a DBUS Watchdog Action to IPMI defined action 260 * @param[in] wd_action The DBUS Watchdog Action 261 * @return The IpmiAction that the wd_action maps to 262 */ 263 IpmiAction wdActionToIpmiAction(WatchdogService::Action wd_action) 264 { 265 switch (wd_action) 266 { 267 case WatchdogService::Action::None: 268 { 269 return IpmiAction::None; 270 } 271 case WatchdogService::Action::HardReset: 272 { 273 return IpmiAction::HardReset; 274 } 275 case WatchdogService::Action::PowerOff: 276 { 277 return IpmiAction::PowerOff; 278 } 279 case WatchdogService::Action::PowerCycle: 280 { 281 return IpmiAction::PowerCycle; 282 } 283 default: 284 { 285 // We have no method via IPMI to signal that the action is unknown 286 // or unmappable in some way. 287 // Just ignore the error and return NONE so the host can reconcile. 288 return IpmiAction::None; 289 } 290 } 291 } 292 293 IpmiTimerUse wdTimerUseToIpmiTimerUse(WatchdogService::TimerUse wdTimerUse) 294 { 295 switch (wdTimerUse) 296 { 297 case WatchdogService::TimerUse::Reserved: 298 { 299 return IpmiTimerUse::Reserved; 300 } 301 case WatchdogService::TimerUse::BIOSFRB2: 302 { 303 return IpmiTimerUse::BIOSFRB2; 304 } 305 case WatchdogService::TimerUse::BIOSPOST: 306 { 307 return IpmiTimerUse::BIOSPOST; 308 } 309 case WatchdogService::TimerUse::OSLoad: 310 { 311 return IpmiTimerUse::OSLoad; 312 } 313 314 case WatchdogService::TimerUse::SMSOS: 315 { 316 return IpmiTimerUse::SMSOS; 317 } 318 case WatchdogService::TimerUse::OEM: 319 { 320 return IpmiTimerUse::OEM; 321 } 322 default: 323 { 324 return IpmiTimerUse::Reserved; 325 } 326 } 327 } 328 329 struct wd_get_res 330 { 331 uint8_t timer_use; 332 uint8_t timer_action; 333 uint8_t pretimeout; 334 uint8_t expire_flags; 335 uint16_t initial_countdown; // Little Endian (deciseconds) 336 uint16_t present_countdown; // Little Endian (deciseconds) 337 } __attribute__((packed)); 338 static_assert(sizeof(wd_get_res) == 8, "wd_get_res has invalid size."); 339 static_assert(sizeof(wd_get_res) <= MAX_IPMI_BUFFER, 340 "wd_get_res can't fit in response buffer."); 341 342 static constexpr uint8_t wd_dont_log = 0x1 << 7; 343 static constexpr uint8_t wd_running = 0x1 << 6; 344 345 ipmi_ret_t ipmi_app_watchdog_get(ipmi_netfn_t netfn, ipmi_cmd_t cmd, 346 ipmi_request_t request, 347 ipmi_response_t response, 348 ipmi_data_len_t data_len, 349 ipmi_context_t context) 350 { 351 // Assume we will fail and send no data outside the return code 352 *data_len = 0; 353 354 try 355 { 356 WatchdogService wd_service; 357 WatchdogService::Properties wd_prop = wd_service.getProperties(); 358 359 // Build and return the response 360 wd_get_res res; 361 res.timer_use = wd_dont_log; 362 res.timer_action = 363 static_cast<uint8_t>(wdActionToIpmiAction(wd_prop.expireAction)); 364 365 // Interval and timeRemaining need converted from milli -> deci seconds 366 res.initial_countdown = htole16(wd_prop.interval / 100); 367 if (wd_prop.enabled) 368 { 369 res.timer_use |= wd_running; 370 res.present_countdown = htole16(wd_prop.timeRemaining / 100); 371 } 372 else 373 { 374 res.present_countdown = res.initial_countdown; 375 } 376 377 res.timer_use |= 378 static_cast<uint8_t>(wdTimerUseToIpmiTimerUse(wd_prop.timerUse)); 379 380 // TODO: Do something about having pretimeout support 381 res.pretimeout = 0; 382 res.expire_flags = 0; 383 memcpy(response, &res, sizeof(res)); 384 *data_len = sizeof(res); 385 lastCallSuccessful = true; 386 return IPMI_CC_OK; 387 } 388 catch (const InternalFailure& e) 389 { 390 reportError(); 391 return IPMI_CC_UNSPECIFIED_ERROR; 392 } 393 catch (const std::exception& e) 394 { 395 const std::string e_str = std::string("wd_get: ") + e.what(); 396 log<level::ERR>(e_str.c_str()); 397 reportError(); 398 return IPMI_CC_UNSPECIFIED_ERROR; 399 } 400 catch (...) 401 { 402 log<level::ERR>("wd_get: Unknown Error"); 403 reportError(); 404 return IPMI_CC_UNSPECIFIED_ERROR; 405 } 406 } 407