1 #pragma once 2 3 #include "occ_pass_through.hpp" 4 #include "occ_status.hpp" 5 #include "pldm.hpp" 6 7 #ifdef PHAL_SUPPORT 8 #include <libphal.H> 9 #endif 10 #include "powercap.hpp" 11 #include "powermode.hpp" 12 #include "utils.hpp" 13 14 #include <sdbusplus/bus.hpp> 15 #include <sdeventplus/event.hpp> 16 #include <sdeventplus/source/signal.hpp> 17 #include <sdeventplus/utility/timer.hpp> 18 #include <stdplus/signal.hpp> 19 20 #include <cstring> 21 #include <functional> 22 #include <vector> 23 24 namespace sdbusRule = sdbusplus::bus::match::rules; 25 namespace open_power 26 { 27 namespace occ 28 { 29 30 enum occFruType 31 { 32 processorCore = 0, 33 internalMemCtlr = 1, 34 dimm = 2, 35 memCtrlAndDimm = 3, 36 VRMVdd = 6, 37 PMIC = 7, 38 memCtlrExSensor = 8, 39 processorIoRing = 9 40 }; 41 42 /** @brief Default time, in seconds, between OCC poll commands */ 43 constexpr unsigned int defaultPollingInterval = 5; 44 45 constexpr auto AMBIENT_PATH = 46 "/xyz/openbmc_project/sensors/temperature/Ambient_Virtual_Temp"; 47 constexpr auto AMBIENT_INTERFACE = "xyz.openbmc_project.Sensor.Value"; 48 constexpr auto AMBIENT_PROP = "Value"; 49 constexpr auto ALTITUDE_PATH = "/xyz/openbmc_project/sensors/altitude/Altitude"; 50 constexpr auto ALTITUDE_INTERFACE = "xyz.openbmc_project.Sensor.Value"; 51 constexpr auto ALTITUDE_PROP = "Value"; 52 53 constexpr auto EXTN_LABEL_PWRM_MEMORY_POWER = "5057524d"; 54 constexpr auto EXTN_LABEL_PWRP_PROCESSOR_POWER = "50575250"; 55 56 /** @class Manager 57 * @brief Builds and manages OCC objects 58 */ 59 struct Manager 60 { 61 public: 62 Manager() = delete; 63 Manager(const Manager&) = delete; 64 Manager& operator=(const Manager&) = delete; 65 Manager(Manager&&) = delete; 66 Manager& operator=(Manager&&) = delete; 67 ~Manager() = default; 68 69 /** @brief Adds OCC pass-through and status objects on the bus 70 * when corresponding CPU inventory is created. 71 * 72 * @param[in] event - Unique ptr reference to sd_event 73 */ Manageropen_power::occ::Manager74 explicit Manager(EventPtr& event) : 75 event(event), pollInterval(defaultPollingInterval), 76 sdpEvent(sdeventplus::Event::get_default()), 77 _pollTimer( 78 std::make_unique< 79 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>( 80 sdpEvent, std::bind(&Manager::pollerTimerExpired, this))), 81 ambientPropChanged( 82 utils::getBus(), 83 sdbusRule::member("PropertiesChanged") + 84 sdbusRule::path(AMBIENT_PATH) + 85 sdbusRule::argN(0, AMBIENT_INTERFACE) + 86 sdbusRule::interface("org.freedesktop.DBus.Properties"), 87 std::bind(&Manager::ambientCallback, this, std::placeholders::_1)), 88 discoverTimer( 89 std::make_unique< 90 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>( 91 sdpEvent, std::bind(&Manager::findAndCreateObjects, this))), 92 waitForAllOccsTimer( 93 std::make_unique< 94 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>( 95 sdpEvent, std::bind(&Manager::occsNotAllRunning, this))), 96 throttlePldmTraceTimer( 97 std::make_unique< 98 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>( 99 sdpEvent, std::bind(&Manager::throttlePldmTraceExpired, this))) 100 { 101 findAndCreateObjects(); 102 103 readAltitude(); 104 } 105 106 void createPldmHandle(); 107 108 /** @brief Return the number of bound OCCs */ getNumOCCsopen_power::occ::Manager109 inline auto getNumOCCs() const 110 { 111 return activeCount; 112 } 113 114 /** @brief Called by a Device to report that the SBE timed out 115 * and appropriate action should be taken 116 * 117 * @param[in] instance - the OCC instance id 118 */ 119 void sbeTimeout(unsigned int instance); 120 121 /** @brief Return the latest ambient and altitude readings 122 * 123 * @param[out] ambientValid - true if ambientTemp is valid 124 * @param[out] ambient - ambient temperature in degrees C 125 * @param[out] altitude - altitude in meters 126 */ 127 void getAmbientData(bool& ambientValid, uint8_t& ambientTemp, 128 uint16_t& altitude) const; 129 130 /** @brief Notify pcap object to update bounds */ 131 void updatePcapBounds() const; 132 133 /** 134 * @brief Set all sensor values of this OCC to NaN. 135 * @param[in] id - Id of the OCC. 136 * */ 137 void setSensorValueToNaN(uint32_t id) const; 138 139 /** @brief Set all sensor values of this OCC to NaN and non functional. 140 * 141 * @param[in] id - Id of the OCC. 142 */ 143 void setSensorValueToNonFunctional(uint32_t id) const; 144 145 /** @brief Clear any state flags that need to be reset when the host state 146 * is off */ 147 void hostPoweredOff(); 148 149 /** @brief Collect data to include in BMC dumps 150 * This will get called when app receives a SIGUSR1 signal 151 */ 152 void collectDumpData(sdeventplus::source::Signal&, 153 const struct signalfd_siginfo*); 154 155 /** @brief Name of file to put the occ-control dump data */ 156 static const std::string dumpFile; 157 158 private: 159 /** @brief Creates the OCC D-Bus objects. 160 */ 161 void findAndCreateObjects(); 162 163 /** @brief Callback that responds to cpu creation in the inventory - 164 * by creating the needed objects. 165 * 166 * @param[in] msg - bus message 167 * 168 * @returns 0 to indicate success 169 */ 170 int cpuCreated(sdbusplus::message_t& msg); 171 172 /** @brief Create child OCC objects. 173 * 174 * @param[in] occ - the occ name, such as occ0. 175 */ 176 void createObjects(const std::string& occ); 177 178 /** @brief Callback handler invoked by Status object when the OccActive 179 * property is changed. This is needed to make sure that the 180 * error detection is started only after all the OCCs are bound. 181 * Similarly, when one of the OCC gets its OccActive property 182 * un-set, then the OCC error detection needs to be stopped on 183 * all the OCCs 184 * 185 * @param[in] status - OccActive status 186 */ 187 void statusCallBack(instanceID instance, bool status); 188 189 /** @brief Set flag that a PM Complex reset is needed (to be initiated 190 * later) */ 191 void resetOccRequest(instanceID instance); 192 193 /** @brief Initiate the request to reset the PM Complex (PLDM -> HBRT) */ 194 void initiateOccRequest(instanceID instance); 195 196 /** @brief Sends a Heartbeat command to host control command handler */ 197 void sendHeartBeat(); 198 199 /** @brief reference to sd_event wrapped in unique_ptr */ 200 EventPtr& event; 201 202 /** @brief OCC pass-through objects */ 203 std::vector<std::unique_ptr<PassThrough>> passThroughObjects; 204 205 /** @brief OCC Status objects */ 206 std::vector<std::unique_ptr<Status>> statusObjects; 207 208 /** @brief Power cap monitor and occ notification object */ 209 std::unique_ptr<open_power::occ::powercap::PowerCap> pcap; 210 211 /** @brief Power mode monitor and notification object */ 212 std::unique_ptr<open_power::occ::powermode::PowerMode> pmode; 213 214 /** @brief sbdbusplus match objects */ 215 std::vector<sdbusplus::bus::match_t> cpuMatches; 216 217 /** @brief Number of OCCs that are bound */ 218 uint8_t activeCount = 0; 219 220 /** @brief Number of seconds between poll commands */ 221 uint8_t pollInterval; 222 223 /** @brief Ambient temperature of the system in degrees C */ 224 uint8_t ambient = 0xFF; // default: not available 225 226 /** @brief Altitude of the system in meters */ 227 uint16_t altitude = 0xFFFF; // default: not available 228 229 /** @brief Poll timer event */ 230 sdeventplus::Event sdpEvent; 231 232 /** @brief Flags to indicate if waiting for all of the OCC active sensors to 233 * come online */ 234 bool waitingForAllOccActiveSensors = false; 235 236 /** @brief Set containing intance numbers of any OCCs that became active 237 * while waiting for status objects to be created */ 238 std::set<uint8_t> queuedActiveState; 239 240 /** 241 * @brief The timer to be used once the OCC goes active. When it expires, 242 * a POLL command will be sent to the OCC and then timer restarted. 243 */ 244 std::unique_ptr< 245 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>> 246 _pollTimer; 247 248 /** @brief Subscribe to ambient temperature changed events */ 249 sdbusplus::bus::match_t ambientPropChanged; 250 251 /** @brief Flag to indicate that a PM complex reset needs to happen */ 252 bool resetRequired = false; 253 /** @brief Instance number of the OCC/processor that triggered the reset */ 254 uint8_t resetInstance = 255; 255 /** @brief Set when a PM complex reset has been issued (to prevent multiple 256 * requests) */ 257 bool resetInProgress = false; 258 259 /** @brief Callback handler invoked by the PLDM event handler when state of 260 * the OCC is toggled by the host. The caller passes the instance 261 * of the OCC and state of the OCC. 262 * 263 * @param[in] instance - instance of the OCC 264 * @param[in] status - true when the OCC goes active and false when the OCC 265 * goes inactive 266 * 267 * @return true if setting the state of OCC is successful and false if it 268 * fails. 269 */ 270 bool updateOCCActive(instanceID instance, bool status); 271 272 /** @brief Callback handler invoked by the PLDM event handler when mode of 273 * the OCC SAFE MODE is inacted or cleared. 274 */ 275 void updateOccSafeMode(bool safeState); 276 277 /** @brief Callback handler invoked by PLDM sensor change when 278 * the HRESET succeeds or fails. 279 * 280 * @param[in] instance - the SBE instance id 281 * @param[in] success - true if the HRESET succeeded, otherwise false 282 */ 283 void sbeHRESETResult(instanceID instance, bool success); 284 285 #ifdef PHAL_SUPPORT 286 /** @brief Helper function to check whether an SBE dump should be collected 287 * now. 288 * 289 * @param[in] instance - the SBE instance id 290 * 291 * @return true if an SBE dump should be collected and false if not 292 */ 293 bool sbeCanDump(unsigned int instance); 294 295 /** @brief Helper function to set the SBE state through PDBG/PHAL 296 * 297 * @param[in] instance - instance of the SBE 298 * @param[in] state - the state to which the SBE should be set 299 * 300 */ 301 void setSBEState(unsigned int instance, enum sbe_state state); 302 303 /** @brief Helper function to get the SBE instance PDBG processor target 304 * 305 * @param[in] instance - the SBE instance id 306 * 307 * @return a pointer to the PDBG target 308 */ 309 struct pdbg_target* getPdbgTarget(unsigned int instance); 310 311 /** @brief Whether pdbg_targets_init has been called */ 312 bool pdbgInitialized = false; 313 #endif 314 315 std::unique_ptr<pldm::Interface> pldmHandle = nullptr; 316 317 /** 318 * @brief Timer used when discovering OCCs in /dev. 319 */ 320 std::unique_ptr< 321 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>> 322 discoverTimer; 323 324 /** 325 * @brief Used when discovering /dev/occ objects to know if 326 * any were added since the last check. 327 */ 328 std::vector<int> prevOCCSearch; 329 330 /** 331 * @brief Timer used when waiting for OCCs to go active. 332 */ 333 std::unique_ptr< 334 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>> 335 waitForAllOccsTimer; 336 337 /** 338 * @brief Timer used to throttle PLDM traces when there are problems 339 determining the OCC status via pldm. Used to prevent excessive 340 journal traces. 341 */ 342 std::unique_ptr< 343 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>> 344 throttlePldmTraceTimer; 345 /** 346 * @brief onPldmTimeoutCreatePel flag will be used to indicate if 347 * a PEL should get created when the throttlePldmTraceTimer expires. 348 * The first time the throttlePldmTraceTimer expires, the traces 349 * will be throttled and then the timer gets restarted. The 350 * next time the timer expires, a PEL will get created. 351 */ 352 bool onPldmTimeoutCreatePel = false; 353 354 /** @brief Check if all of the OCC Active sensors are available and if not 355 * restart the discoverTimer 356 */ 357 void throttlePldmTraceExpired(); 358 359 /** @brief Create a PEL when the code is not able to obtain the OCC PDRs 360 * via PLDM. This is called when the throttlePldmTraceTimer expires. 361 */ 362 void createPldmSensorPEL(); 363 364 /** @brief Called when code times out waiting for all OCCs to be running or 365 * after the app is restarted (Status does not callback into 366 * Manager). 367 */ 368 void occsNotAllRunning(); 369 370 /** @brief Check if all of the OCC Active sensors are available and if not 371 * restart the discoverTimer 372 */ 373 void checkAllActiveSensors(); 374 375 /** 376 * @brief Called when poll timer expires and forces a POLL command to the 377 * OCC. The poll timer will then be restarted. 378 * */ 379 void pollerTimerExpired(); 380 381 /** 382 * @brief Finds the OCC devices in /dev 383 * 384 * @return The IDs of the OCCs - 0, 1, etc. 385 */ 386 std::vector<int> findOCCsInDev(); 387 388 /** 389 * @brief Gets the occ sensor values. 390 * @param[in] occ - pointer to OCCs Status object 391 * */ 392 void getSensorValues(std::unique_ptr<Status>& occ); 393 394 /** 395 * @brief Trigger OCC driver to read the temperature sensors. 396 * @param[in] path - path of the OCC sensors. 397 * @param[in] id - Id of the OCC. 398 * */ 399 void readTempSensors(const fs::path& path, uint32_t id); 400 401 /** 402 * @brief Trigger OCC driver to read the extended sensors. 403 * @param[in] path - path of the OCC sensors. 404 * @param[in] id - Id of the OCC. 405 * */ 406 void readExtnSensors(const fs::path& path, uint32_t id); 407 408 /** 409 * @brief Trigger OCC driver to read the power sensors. 410 * @param[in] path - path of the OCC sensors. 411 * @param[in] id - Id of the OCC. 412 * */ 413 void readPowerSensors(const fs::path& path, uint32_t id); 414 415 /** @brief Store the existing OCC sensors on D-BUS */ 416 std::map<std::string, uint32_t> existingSensors; 417 418 /** @brief Get FunctionID from the `powerX_label` file. 419 * @param[in] value - the value of the `powerX_label` file. 420 * @returns FunctionID of the power sensors. 421 */ 422 std::optional<std::string> getPowerLabelFunctionID( 423 const std::string& value); 424 425 /** @brief The power sensor names map */ 426 const std::map<std::string, std::string> powerSensorName = { 427 {"system", "total_power"}, {"1", "p0_mem_power"}, 428 {"2", "p1_mem_power"}, {"3", "p2_mem_power"}, 429 {"4", "p3_mem_power"}, {"5", "p0_power"}, 430 {"6", "p1_power"}, {"7", "p2_power"}, 431 {"8", "p3_power"}, {"9", "p0_cache_power"}, 432 {"10", "p1_cache_power"}, {"11", "p2_cache_power"}, 433 {"12", "p3_cache_power"}, {"13", "io_a_power"}, 434 {"14", "io_b_power"}, {"15", "io_c_power"}, 435 {"16", "fans_a_power"}, {"17", "fans_b_power"}, 436 {"18", "storage_a_power"}, {"19", "storage_b_power"}, 437 {"23", "mem_cache_power"}, {"25", "p0_mem_0_power"}, 438 {"26", "p0_mem_1_power"}, {"27", "p0_mem_2_power"}, 439 {"35", "pcie_dcm0_power"}, {"36", "pcie_dcm1_power"}, 440 {"37", "pcie_dcm2_power"}, {"38", "pcie_dcm3_power"}, 441 {"39", "io_dcm0_power"}, {"40", "io_dcm1_power"}, 442 {"41", "io_dcm2_power"}, {"42", "io_dcm3_power"}, 443 {"43", "avdd_total_power"}}; 444 445 /** @brief The dimm temperature sensor names map */ 446 const std::map<uint32_t, std::string> dimmTempSensorName = { 447 {internalMemCtlr, "_intmb_temp"}, 448 {dimm, "_dram_temp"}, 449 {memCtrlAndDimm, "_dram_extmb_temp"}, 450 {PMIC, "_pmic_temp"}, 451 {memCtlrExSensor, "_extmb_temp"}}; 452 453 /** @brief The dimm DVFS temperature sensor names map */ 454 const std::map<uint32_t, std::string> dimmDVFSSensorName = { 455 {internalMemCtlr, "dimm_intmb_dvfs_temp"}, 456 {dimm, "dimm_dram_dvfs_temp"}, 457 {memCtrlAndDimm, "dimm_dram_extmb_dvfs_temp"}, 458 {PMIC, "dimm_pmic_dvfs_temp"}, 459 {memCtlrExSensor, "dimm_extmb_dvfs_temp"}}; 460 461 /** @brief Read the altitude from DBus */ 462 void readAltitude(); 463 464 /** @brief Callback function when ambient temperature changes 465 * 466 * @param[in] msg - Data associated with subscribed signal 467 */ 468 void ambientCallback(sdbusplus::message_t& msg); 469 470 /** @brief Confirm that a single OCC master was found and start presence 471 * monitoring 472 */ 473 void validateOccMaster(); 474 }; 475 476 } // namespace occ 477 } // namespace open_power 478