1 #include "config.h" 2 3 #include "occ_manager.hpp" 4 5 #include "i2c_occ.hpp" 6 #include "occ_dbus.hpp" 7 #include "utils.hpp" 8 9 #include <phosphor-logging/elog-errors.hpp> 10 #include <phosphor-logging/log.hpp> 11 #include <xyz/openbmc_project/Common/error.hpp> 12 13 #include <chrono> 14 #include <cmath> 15 #include <filesystem> 16 #include <regex> 17 18 namespace open_power 19 { 20 namespace occ 21 { 22 23 constexpr uint32_t fruTypeNotAvailable = 0xFF; 24 constexpr auto fruTypeSuffix = "fru_type"; 25 constexpr auto faultSuffix = "fault"; 26 constexpr auto inputSuffix = "input"; 27 28 using namespace phosphor::logging; 29 30 template <typename T> 31 T readFile(const std::string& path) 32 { 33 std::ifstream ifs; 34 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit | 35 std::ifstream::eofbit); 36 T data; 37 38 try 39 { 40 ifs.open(path); 41 ifs >> data; 42 ifs.close(); 43 } 44 catch (const std::exception& e) 45 { 46 auto err = errno; 47 throw std::system_error(err, std::generic_category()); 48 } 49 50 return data; 51 } 52 53 void Manager::findAndCreateObjects() 54 { 55 #ifndef POWER10 56 for (auto id = 0; id < MAX_CPUS; ++id) 57 { 58 // Create one occ per cpu 59 auto occ = std::string(OCC_NAME) + std::to_string(id); 60 createObjects(occ); 61 } 62 #else 63 // Create the OCCs based on on the /dev/occX devices 64 auto occs = findOCCsInDev(); 65 66 if (occs.empty() || (prevOCCSearch.size() != occs.size())) 67 { 68 // Something changed or no OCCs yet, try again in 10s. 69 // Note on the first pass prevOCCSearch will be empty, 70 // so there will be at least one delay to give things 71 // a chance to settle. 72 prevOCCSearch = occs; 73 74 using namespace std::literals::chrono_literals; 75 discoverTimer->restartOnce(10s); 76 } 77 else 78 { 79 discoverTimer.reset(); 80 81 // createObjects requires OCC0 first. 82 std::sort(occs.begin(), occs.end()); 83 84 for (auto id : occs) 85 { 86 createObjects(std::string(OCC_NAME) + std::to_string(id)); 87 } 88 } 89 #endif 90 } 91 92 std::vector<int> Manager::findOCCsInDev() 93 { 94 std::vector<int> occs; 95 std::regex expr{R"(occ(\d+)$)"}; 96 97 for (auto& file : fs::directory_iterator("/dev")) 98 { 99 std::smatch match; 100 std::string path{file.path().string()}; 101 if (std::regex_search(path, match, expr)) 102 { 103 auto num = std::stoi(match[1].str()); 104 105 // /dev numbering starts at 1, ours starts at 0. 106 occs.push_back(num - 1); 107 } 108 } 109 110 return occs; 111 } 112 113 int Manager::cpuCreated(sdbusplus::message::message& msg) 114 { 115 namespace fs = std::filesystem; 116 117 sdbusplus::message::object_path o; 118 msg.read(o); 119 fs::path cpuPath(std::string(std::move(o))); 120 121 auto name = cpuPath.filename().string(); 122 auto index = name.find(CPU_NAME); 123 name.replace(index, std::strlen(CPU_NAME), OCC_NAME); 124 125 createObjects(name); 126 127 return 0; 128 } 129 130 void Manager::createObjects(const std::string& occ) 131 { 132 auto path = fs::path(OCC_CONTROL_ROOT) / occ; 133 134 passThroughObjects.emplace_back( 135 std::make_unique<PassThrough>(path.c_str())); 136 137 statusObjects.emplace_back(std::make_unique<Status>( 138 event, path.c_str(), *this, 139 std::bind(std::mem_fn(&Manager::statusCallBack), this, 140 std::placeholders::_1) 141 #ifdef PLDM 142 , 143 std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(), 144 std::placeholders::_1) 145 #endif 146 )); 147 148 // Create the power cap monitor object for master occ (0) 149 if (!pcap) 150 { 151 pcap = std::make_unique<open_power::occ::powercap::PowerCap>( 152 *statusObjects.front()); 153 } 154 155 #ifdef POWER10 156 // Create the power mode monitor object for master occ (0) 157 if (!pmode) 158 { 159 pmode = std::make_unique<open_power::occ::powermode::PowerMode>( 160 *statusObjects.front()); 161 } 162 #endif 163 } 164 165 void Manager::statusCallBack(bool status) 166 { 167 using InternalFailure = 168 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; 169 170 // At this time, it won't happen but keeping it 171 // here just in case something changes in the future 172 if ((activeCount == 0) && (!status)) 173 { 174 log<level::ERR>("Invalid update on OCCActive"); 175 elog<InternalFailure>(); 176 } 177 178 activeCount += status ? 1 : -1; 179 180 // Only start presence detection if all the OCCs are bound 181 if (activeCount == statusObjects.size()) 182 { 183 for (auto& obj : statusObjects) 184 { 185 obj->addPresenceWatchMaster(); 186 } 187 } 188 189 if ((!_pollTimer->isEnabled()) && (activeCount > 0)) 190 { 191 log<level::INFO>( 192 fmt::format( 193 "Manager::statusCallBack(): {} OCCs will be polled every {} seconds", 194 activeCount, pollInterval) 195 .c_str()); 196 197 // Send poll and start OCC poll timer 198 pollerTimerExpired(); 199 } 200 else if ((_pollTimer->isEnabled()) && (activeCount == 0)) 201 { 202 // Stop OCC poll timer 203 log<level::INFO>( 204 "Manager::statusCallBack(): OCCs are not running, stopping poll timer"); 205 _pollTimer->setEnabled(false); 206 207 #ifdef READ_OCC_SENSORS 208 for (auto& obj : statusObjects) 209 { 210 setSensorValueToNaN(obj->getOccInstanceID()); 211 } 212 #endif 213 } 214 } 215 216 #ifdef I2C_OCC 217 void Manager::initStatusObjects() 218 { 219 // Make sure we have a valid path string 220 static_assert(sizeof(DEV_PATH) != 0); 221 222 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH); 223 auto occMasterName = deviceNames.front(); 224 for (auto& name : deviceNames) 225 { 226 i2c_occ::i2cToDbus(name); 227 name = std::string(OCC_NAME) + '_' + name; 228 auto path = fs::path(OCC_CONTROL_ROOT) / name; 229 statusObjects.emplace_back( 230 std::make_unique<Status>(event, path.c_str(), *this)); 231 } 232 // The first device is master occ 233 pcap = std::make_unique<open_power::occ::powercap::PowerCap>( 234 *statusObjects.front(), occMasterName); 235 #ifdef POWER10 236 pmode = std::make_unique<open_power::occ::powermode::PowerMode>( 237 *statusObjects.front()); 238 #endif 239 } 240 #endif 241 242 #ifdef PLDM 243 bool Manager::updateOCCActive(instanceID instance, bool status) 244 { 245 return (statusObjects[instance])->occActive(status); 246 } 247 #endif 248 249 void Manager::pollerTimerExpired() 250 { 251 if (activeCount == 0) 252 { 253 // No OCCs running, so poll timer will not be restarted 254 log<level::INFO>( 255 "Manager::pollerTimerExpire(): No OCCs running, poll timer not restarted"); 256 } 257 258 if (!_pollTimer) 259 { 260 log<level::ERR>( 261 "Manager::pollerTimerExpired() ERROR: Timer not defined"); 262 return; 263 } 264 265 for (auto& obj : statusObjects) 266 { 267 // Read sysfs to force kernel to poll OCC 268 obj->readOccState(); 269 270 #ifdef READ_OCC_SENSORS 271 // Read occ sensor values 272 auto id = obj->getOccInstanceID(); 273 if (!obj->occActive()) 274 { 275 // Occ not activated 276 setSensorValueToNaN(id); 277 continue; 278 } 279 getSensorValues(id, obj->isMasterOcc()); 280 #endif 281 } 282 283 // Restart OCC poll timer 284 _pollTimer->restartOnce(std::chrono::seconds(pollInterval)); 285 } 286 287 #ifdef READ_OCC_SENSORS 288 void Manager::readTempSensors(const fs::path& path, uint32_t id) 289 { 290 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label 291 for (auto& file : fs::directory_iterator(path)) 292 { 293 if (!std::regex_search(file.path().string(), expr)) 294 { 295 continue; 296 } 297 298 uint32_t labelValue{0}; 299 300 try 301 { 302 labelValue = readFile<uint32_t>(file.path()); 303 } 304 catch (const std::system_error& e) 305 { 306 log<level::DEBUG>( 307 fmt::format("readTempSensors: Failed reading {}, errno = {}", 308 file.path().string(), e.code().value()) 309 .c_str()); 310 continue; 311 } 312 313 const std::string& tempLabel = "label"; 314 const std::string filePathString = file.path().string().substr( 315 0, file.path().string().length() - tempLabel.length()); 316 317 uint32_t fruTypeValue{0}; 318 try 319 { 320 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix); 321 } 322 catch (const std::system_error& e) 323 { 324 log<level::DEBUG>( 325 fmt::format("readTempSensors: Failed reading {}, errno = {}", 326 filePathString + fruTypeSuffix, e.code().value()) 327 .c_str()); 328 continue; 329 } 330 331 std::string sensorPath = 332 OCC_SENSORS_ROOT + std::string("/temperature/"); 333 334 if (fruTypeValue == VRMVdd) 335 { 336 sensorPath.append("vrm_vdd" + std::to_string(id) + "_temp"); 337 } 338 else 339 { 340 uint16_t type = (labelValue & 0xFF000000) >> 24; 341 uint16_t instanceID = labelValue & 0x0000FFFF; 342 343 if (type == OCC_DIMM_TEMP_SENSOR_TYPE) 344 { 345 if (fruTypeValue == fruTypeNotAvailable) 346 { 347 // Not all DIMM related temps are available to read 348 // (no _input file in this case) 349 continue; 350 } 351 auto iter = dimmTempSensorName.find(fruTypeValue); 352 if (iter == dimmTempSensorName.end()) 353 { 354 log<level::ERR>( 355 fmt::format( 356 "readTempSensors: Fru type error! fruTypeValue = {}) ", 357 fruTypeValue) 358 .c_str()); 359 continue; 360 } 361 362 sensorPath.append("dimm" + std::to_string(instanceID) + 363 iter->second); 364 } 365 else if (type == OCC_CPU_TEMP_SENSOR_TYPE) 366 { 367 if (fruTypeValue != processorCore) 368 { 369 // TODO: support IO ring temp 370 continue; 371 } 372 373 // The OCC reports small core temps, of which there are 374 // two per big core. All current P10 systems are in big 375 // core mode, so use a big core name. 376 uint16_t coreNum = instanceID / 2; 377 uint16_t tempNum = instanceID % 2; 378 sensorPath.append("proc" + std::to_string(id) + "_core" + 379 std::to_string(coreNum) + "_" + 380 std::to_string(tempNum) + "_temp"); 381 } 382 else 383 { 384 continue; 385 } 386 } 387 388 uint32_t faultValue{0}; 389 try 390 { 391 faultValue = readFile<uint32_t>(filePathString + faultSuffix); 392 } 393 catch (const std::system_error& e) 394 { 395 log<level::DEBUG>( 396 fmt::format("readTempSensors: Failed reading {}, errno = {}", 397 filePathString + faultSuffix, e.code().value()) 398 .c_str()); 399 continue; 400 } 401 402 // At this point, the sensor will be created for sure. 403 if (existingSensors.find(sensorPath) == existingSensors.end()) 404 { 405 open_power::occ::dbus::OccDBusSensors::getOccDBus() 406 .setChassisAssociation(sensorPath); 407 } 408 409 if (faultValue != 0) 410 { 411 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue( 412 sensorPath, std::numeric_limits<double>::quiet_NaN()); 413 414 open_power::occ::dbus::OccDBusSensors::getOccDBus() 415 .setOperationalStatus(sensorPath, false); 416 417 continue; 418 } 419 420 double tempValue{0}; 421 422 try 423 { 424 tempValue = readFile<double>(filePathString + inputSuffix); 425 } 426 catch (const std::system_error& e) 427 { 428 log<level::DEBUG>( 429 fmt::format("readTempSensors: Failed reading {}, errno = {}", 430 filePathString + inputSuffix, e.code().value()) 431 .c_str()); 432 continue; 433 } 434 435 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue( 436 sensorPath, tempValue * std::pow(10, -3)); 437 438 open_power::occ::dbus::OccDBusSensors::getOccDBus() 439 .setOperationalStatus(sensorPath, true); 440 441 existingSensors[sensorPath] = id; 442 } 443 return; 444 } 445 446 std::optional<std::string> 447 Manager::getPowerLabelFunctionID(const std::string& value) 448 { 449 // If the value is "system", then the FunctionID is "system". 450 if (value == "system") 451 { 452 return value; 453 } 454 455 // If the value is not "system", then the label value have 3 numbers, of 456 // which we only care about the middle one: 457 // <sensor id>_<function id>_<apss channel> 458 // eg: The value is "0_10_5" , then the FunctionID is "10". 459 if (value.find("_") == std::string::npos) 460 { 461 return std::nullopt; 462 } 463 464 auto powerLabelValue = value.substr((value.find("_") + 1)); 465 466 if (powerLabelValue.find("_") == std::string::npos) 467 { 468 return std::nullopt; 469 } 470 471 return powerLabelValue.substr(0, powerLabelValue.find("_")); 472 } 473 474 void Manager::readPowerSensors(const fs::path& path, uint32_t id) 475 { 476 std::regex expr{"power\\d+_label$"}; // Example: power5_label 477 for (auto& file : fs::directory_iterator(path)) 478 { 479 if (!std::regex_search(file.path().string(), expr)) 480 { 481 continue; 482 } 483 484 std::string labelValue; 485 try 486 { 487 labelValue = readFile<std::string>(file.path()); 488 } 489 catch (const std::system_error& e) 490 { 491 log<level::DEBUG>( 492 fmt::format("readPowerSensors: Failed reading {}, errno = {}", 493 file.path().string(), e.code().value()) 494 .c_str()); 495 continue; 496 } 497 498 auto functionID = getPowerLabelFunctionID(labelValue); 499 if (functionID == std::nullopt) 500 { 501 continue; 502 } 503 504 const std::string& tempLabel = "label"; 505 const std::string filePathString = file.path().string().substr( 506 0, file.path().string().length() - tempLabel.length()); 507 508 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/"); 509 510 auto iter = powerSensorName.find(*functionID); 511 if (iter == powerSensorName.end()) 512 { 513 continue; 514 } 515 sensorPath.append(iter->second); 516 517 double tempValue{0}; 518 519 try 520 { 521 tempValue = readFile<double>(filePathString + inputSuffix); 522 } 523 catch (const std::system_error& e) 524 { 525 log<level::DEBUG>( 526 fmt::format("readTempSensors: Failed reading {}, errno = {}", 527 filePathString + inputSuffix, e.code().value()) 528 .c_str()); 529 continue; 530 } 531 532 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue( 533 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3)); 534 535 open_power::occ::dbus::OccDBusSensors::getOccDBus() 536 .setOperationalStatus(sensorPath, true); 537 538 if (existingSensors.find(sensorPath) == existingSensors.end()) 539 { 540 open_power::occ::dbus::OccDBusSensors::getOccDBus() 541 .setChassisAssociation(sensorPath); 542 } 543 544 existingSensors[sensorPath] = id; 545 } 546 return; 547 } 548 549 void Manager::setSensorValueToNaN(uint32_t id) 550 { 551 for (const auto& [sensorPath, occId] : existingSensors) 552 { 553 if (occId == id) 554 { 555 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue( 556 sensorPath, std::numeric_limits<double>::quiet_NaN()); 557 } 558 } 559 return; 560 } 561 562 void Manager::getSensorValues(uint32_t id, bool masterOcc) 563 { 564 const auto occ = std::string("occ-hwmon.") + std::to_string(id + 1); 565 566 fs::path fileName{OCC_HWMON_PATH + occ + "/hwmon/"}; 567 568 // Need to get the hwmonXX directory name, there better only be 1 dir 569 assert(std::distance(fs::directory_iterator(fileName), 570 fs::directory_iterator{}) == 1); 571 // Now set our path to this full path, including this hwmonXX directory 572 fileName = fs::path(*fs::directory_iterator(fileName)); 573 574 // Read temperature sensors 575 readTempSensors(fileName, id); 576 577 if (masterOcc) 578 { 579 // Read power sensors 580 readPowerSensors(fileName, id); 581 } 582 583 return; 584 } 585 #endif 586 587 } // namespace occ 588 } // namespace open_power 589