1 #include "config.h" 2 3 #include "occ_manager.hpp" 4 5 #include "i2c_occ.hpp" 6 #include "occ_dbus.hpp" 7 #include "utils.hpp" 8 9 #include <phosphor-logging/elog-errors.hpp> 10 #include <phosphor-logging/log.hpp> 11 #include <xyz/openbmc_project/Common/error.hpp> 12 13 #include <chrono> 14 #include <cmath> 15 #include <filesystem> 16 #include <regex> 17 18 namespace open_power 19 { 20 namespace occ 21 { 22 23 constexpr uint32_t fruTypeNotAvailable = 0xFF; 24 constexpr auto fruTypeSuffix = "fru_type"; 25 constexpr auto faultSuffix = "fault"; 26 constexpr auto inputSuffix = "input"; 27 28 using namespace phosphor::logging; 29 30 template <typename T> 31 T readFile(const std::string& path) 32 { 33 std::ifstream ifs; 34 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit | 35 std::ifstream::eofbit); 36 T data; 37 38 try 39 { 40 ifs.open(path); 41 ifs >> data; 42 ifs.close(); 43 } 44 catch (const std::exception& e) 45 { 46 auto err = errno; 47 throw std::system_error(err, std::generic_category()); 48 } 49 50 return data; 51 } 52 53 void Manager::findAndCreateObjects() 54 { 55 #ifndef POWER10 56 for (auto id = 0; id < MAX_CPUS; ++id) 57 { 58 // Create one occ per cpu 59 auto occ = std::string(OCC_NAME) + std::to_string(id); 60 createObjects(occ); 61 } 62 #else 63 // Create the OCCs based on on the /dev/occX devices 64 auto occs = findOCCsInDev(); 65 66 if (occs.empty() || (prevOCCSearch.size() != occs.size())) 67 { 68 // Something changed or no OCCs yet, try again in 10s. 69 // Note on the first pass prevOCCSearch will be empty, 70 // so there will be at least one delay to give things 71 // a chance to settle. 72 prevOCCSearch = occs; 73 74 using namespace std::literals::chrono_literals; 75 discoverTimer->restartOnce(10s); 76 } 77 else 78 { 79 discoverTimer.reset(); 80 81 // createObjects requires OCC0 first. 82 std::sort(occs.begin(), occs.end()); 83 84 for (auto id : occs) 85 { 86 createObjects(std::string(OCC_NAME) + std::to_string(id)); 87 } 88 } 89 #endif 90 } 91 92 std::vector<int> Manager::findOCCsInDev() 93 { 94 std::vector<int> occs; 95 std::regex expr{R"(occ(\d+)$)"}; 96 97 for (auto& file : fs::directory_iterator("/dev")) 98 { 99 std::smatch match; 100 std::string path{file.path().string()}; 101 if (std::regex_search(path, match, expr)) 102 { 103 auto num = std::stoi(match[1].str()); 104 105 // /dev numbering starts at 1, ours starts at 0. 106 occs.push_back(num - 1); 107 } 108 } 109 110 return occs; 111 } 112 113 int Manager::cpuCreated(sdbusplus::message::message& msg) 114 { 115 namespace fs = std::filesystem; 116 117 sdbusplus::message::object_path o; 118 msg.read(o); 119 fs::path cpuPath(std::string(std::move(o))); 120 121 auto name = cpuPath.filename().string(); 122 auto index = name.find(CPU_NAME); 123 name.replace(index, std::strlen(CPU_NAME), OCC_NAME); 124 125 createObjects(name); 126 127 return 0; 128 } 129 130 void Manager::createObjects(const std::string& occ) 131 { 132 auto path = fs::path(OCC_CONTROL_ROOT) / occ; 133 134 passThroughObjects.emplace_back( 135 std::make_unique<PassThrough>(path.c_str())); 136 137 statusObjects.emplace_back(std::make_unique<Status>( 138 event, path.c_str(), *this, 139 std::bind(std::mem_fn(&Manager::statusCallBack), this, 140 std::placeholders::_1) 141 #ifdef PLDM 142 , 143 std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(), 144 std::placeholders::_1) 145 #endif 146 )); 147 148 // Create the power cap monitor object for master occ (0) 149 if (!pcap) 150 { 151 pcap = std::make_unique<open_power::occ::powercap::PowerCap>( 152 *statusObjects.front()); 153 } 154 155 #ifdef POWER10 156 // Create the power mode monitor object for master occ (0) 157 if (!pmode) 158 { 159 pmode = std::make_unique<open_power::occ::powermode::PowerMode>( 160 *statusObjects.front()); 161 } 162 // Create the idle power saver monitor object for master occ (0) 163 if (!pips) 164 { 165 pips = std::make_unique<open_power::occ::powermode::PowerIPS>( 166 *statusObjects.front()); 167 } 168 #endif 169 } 170 171 void Manager::statusCallBack(bool status) 172 { 173 using InternalFailure = 174 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; 175 176 // At this time, it won't happen but keeping it 177 // here just in case something changes in the future 178 if ((activeCount == 0) && (!status)) 179 { 180 log<level::ERR>("Invalid update on OCCActive"); 181 elog<InternalFailure>(); 182 } 183 184 activeCount += status ? 1 : -1; 185 186 // Only start presence detection if all the OCCs are bound 187 if (activeCount == statusObjects.size()) 188 { 189 for (auto& obj : statusObjects) 190 { 191 obj->addPresenceWatchMaster(); 192 } 193 } 194 195 if ((!_pollTimer->isEnabled()) && (activeCount > 0)) 196 { 197 log<level::INFO>( 198 fmt::format( 199 "Manager::statusCallBack(): {} OCCs will be polled every {} seconds", 200 activeCount, pollInterval) 201 .c_str()); 202 203 // Send poll and start OCC poll timer 204 pollerTimerExpired(); 205 } 206 else if ((_pollTimer->isEnabled()) && (activeCount == 0)) 207 { 208 // Stop OCC poll timer 209 log<level::INFO>( 210 "Manager::statusCallBack(): OCCs are not running, stopping poll timer"); 211 _pollTimer->setEnabled(false); 212 213 #ifdef READ_OCC_SENSORS 214 for (auto& obj : statusObjects) 215 { 216 setSensorValueToNaN(obj->getOccInstanceID()); 217 } 218 #endif 219 } 220 } 221 222 #ifdef I2C_OCC 223 void Manager::initStatusObjects() 224 { 225 // Make sure we have a valid path string 226 static_assert(sizeof(DEV_PATH) != 0); 227 228 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH); 229 auto occMasterName = deviceNames.front(); 230 for (auto& name : deviceNames) 231 { 232 i2c_occ::i2cToDbus(name); 233 name = std::string(OCC_NAME) + '_' + name; 234 auto path = fs::path(OCC_CONTROL_ROOT) / name; 235 statusObjects.emplace_back( 236 std::make_unique<Status>(event, path.c_str(), *this)); 237 } 238 // The first device is master occ 239 pcap = std::make_unique<open_power::occ::powercap::PowerCap>( 240 *statusObjects.front(), occMasterName); 241 #ifdef POWER10 242 pmode = std::make_unique<open_power::occ::powermode::PowerMode>( 243 *statusObjects.front()); 244 pips = std::make_unique<open_power::occ::powermode::PowerIPS>( 245 *statusObjects.front()); 246 #endif 247 } 248 #endif 249 250 #ifdef PLDM 251 bool Manager::updateOCCActive(instanceID instance, bool status) 252 { 253 return (statusObjects[instance])->occActive(status); 254 } 255 #endif 256 257 void Manager::pollerTimerExpired() 258 { 259 if (activeCount == 0) 260 { 261 // No OCCs running, so poll timer will not be restarted 262 log<level::INFO>( 263 "Manager::pollerTimerExpire(): No OCCs running, poll timer not restarted"); 264 } 265 266 if (!_pollTimer) 267 { 268 log<level::ERR>( 269 "Manager::pollerTimerExpired() ERROR: Timer not defined"); 270 return; 271 } 272 273 for (auto& obj : statusObjects) 274 { 275 // Read sysfs to force kernel to poll OCC 276 obj->readOccState(); 277 278 #ifdef READ_OCC_SENSORS 279 // Read occ sensor values 280 auto id = obj->getOccInstanceID(); 281 if (!obj->occActive()) 282 { 283 // Occ not activated 284 setSensorValueToNaN(id); 285 continue; 286 } 287 getSensorValues(id, obj->isMasterOcc()); 288 #endif 289 } 290 291 // Restart OCC poll timer 292 _pollTimer->restartOnce(std::chrono::seconds(pollInterval)); 293 } 294 295 #ifdef READ_OCC_SENSORS 296 void Manager::readTempSensors(const fs::path& path, uint32_t id) 297 { 298 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label 299 for (auto& file : fs::directory_iterator(path)) 300 { 301 if (!std::regex_search(file.path().string(), expr)) 302 { 303 continue; 304 } 305 306 uint32_t labelValue{0}; 307 308 try 309 { 310 labelValue = readFile<uint32_t>(file.path()); 311 } 312 catch (const std::system_error& e) 313 { 314 log<level::DEBUG>( 315 fmt::format("readTempSensors: Failed reading {}, errno = {}", 316 file.path().string(), e.code().value()) 317 .c_str()); 318 continue; 319 } 320 321 const std::string& tempLabel = "label"; 322 const std::string filePathString = file.path().string().substr( 323 0, file.path().string().length() - tempLabel.length()); 324 325 uint32_t fruTypeValue{0}; 326 try 327 { 328 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix); 329 } 330 catch (const std::system_error& e) 331 { 332 log<level::DEBUG>( 333 fmt::format("readTempSensors: Failed reading {}, errno = {}", 334 filePathString + fruTypeSuffix, e.code().value()) 335 .c_str()); 336 continue; 337 } 338 339 std::string sensorPath = 340 OCC_SENSORS_ROOT + std::string("/temperature/"); 341 342 if (fruTypeValue == VRMVdd) 343 { 344 sensorPath.append("vrm_vdd" + std::to_string(id) + "_temp"); 345 } 346 else 347 { 348 uint16_t type = (labelValue & 0xFF000000) >> 24; 349 uint16_t instanceID = labelValue & 0x0000FFFF; 350 351 if (type == OCC_DIMM_TEMP_SENSOR_TYPE) 352 { 353 if (fruTypeValue == fruTypeNotAvailable) 354 { 355 // Not all DIMM related temps are available to read 356 // (no _input file in this case) 357 continue; 358 } 359 auto iter = dimmTempSensorName.find(fruTypeValue); 360 if (iter == dimmTempSensorName.end()) 361 { 362 log<level::ERR>( 363 fmt::format( 364 "readTempSensors: Fru type error! fruTypeValue = {}) ", 365 fruTypeValue) 366 .c_str()); 367 continue; 368 } 369 370 sensorPath.append("dimm" + std::to_string(instanceID) + 371 iter->second); 372 } 373 else if (type == OCC_CPU_TEMP_SENSOR_TYPE) 374 { 375 if (fruTypeValue != processorCore) 376 { 377 // TODO: support IO ring temp 378 continue; 379 } 380 381 // The OCC reports small core temps, of which there are 382 // two per big core. All current P10 systems are in big 383 // core mode, so use a big core name. 384 uint16_t coreNum = instanceID / 2; 385 uint16_t tempNum = instanceID % 2; 386 sensorPath.append("proc" + std::to_string(id) + "_core" + 387 std::to_string(coreNum) + "_" + 388 std::to_string(tempNum) + "_temp"); 389 } 390 else 391 { 392 continue; 393 } 394 } 395 396 uint32_t faultValue{0}; 397 try 398 { 399 faultValue = readFile<uint32_t>(filePathString + faultSuffix); 400 } 401 catch (const std::system_error& e) 402 { 403 log<level::DEBUG>( 404 fmt::format("readTempSensors: Failed reading {}, errno = {}", 405 filePathString + faultSuffix, e.code().value()) 406 .c_str()); 407 continue; 408 } 409 410 // At this point, the sensor will be created for sure. 411 if (existingSensors.find(sensorPath) == existingSensors.end()) 412 { 413 open_power::occ::dbus::OccDBusSensors::getOccDBus() 414 .setChassisAssociation(sensorPath); 415 } 416 417 if (faultValue != 0) 418 { 419 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue( 420 sensorPath, std::numeric_limits<double>::quiet_NaN()); 421 422 open_power::occ::dbus::OccDBusSensors::getOccDBus() 423 .setOperationalStatus(sensorPath, false); 424 425 continue; 426 } 427 428 double tempValue{0}; 429 430 try 431 { 432 tempValue = readFile<double>(filePathString + inputSuffix); 433 } 434 catch (const std::system_error& e) 435 { 436 log<level::DEBUG>( 437 fmt::format("readTempSensors: Failed reading {}, errno = {}", 438 filePathString + inputSuffix, e.code().value()) 439 .c_str()); 440 continue; 441 } 442 443 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue( 444 sensorPath, tempValue * std::pow(10, -3)); 445 446 open_power::occ::dbus::OccDBusSensors::getOccDBus() 447 .setOperationalStatus(sensorPath, true); 448 449 existingSensors[sensorPath] = id; 450 } 451 return; 452 } 453 454 std::optional<std::string> 455 Manager::getPowerLabelFunctionID(const std::string& value) 456 { 457 // If the value is "system", then the FunctionID is "system". 458 if (value == "system") 459 { 460 return value; 461 } 462 463 // If the value is not "system", then the label value have 3 numbers, of 464 // which we only care about the middle one: 465 // <sensor id>_<function id>_<apss channel> 466 // eg: The value is "0_10_5" , then the FunctionID is "10". 467 if (value.find("_") == std::string::npos) 468 { 469 return std::nullopt; 470 } 471 472 auto powerLabelValue = value.substr((value.find("_") + 1)); 473 474 if (powerLabelValue.find("_") == std::string::npos) 475 { 476 return std::nullopt; 477 } 478 479 return powerLabelValue.substr(0, powerLabelValue.find("_")); 480 } 481 482 void Manager::readPowerSensors(const fs::path& path, uint32_t id) 483 { 484 std::regex expr{"power\\d+_label$"}; // Example: power5_label 485 for (auto& file : fs::directory_iterator(path)) 486 { 487 if (!std::regex_search(file.path().string(), expr)) 488 { 489 continue; 490 } 491 492 std::string labelValue; 493 try 494 { 495 labelValue = readFile<std::string>(file.path()); 496 } 497 catch (const std::system_error& e) 498 { 499 log<level::DEBUG>( 500 fmt::format("readPowerSensors: Failed reading {}, errno = {}", 501 file.path().string(), e.code().value()) 502 .c_str()); 503 continue; 504 } 505 506 auto functionID = getPowerLabelFunctionID(labelValue); 507 if (functionID == std::nullopt) 508 { 509 continue; 510 } 511 512 const std::string& tempLabel = "label"; 513 const std::string filePathString = file.path().string().substr( 514 0, file.path().string().length() - tempLabel.length()); 515 516 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/"); 517 518 auto iter = powerSensorName.find(*functionID); 519 if (iter == powerSensorName.end()) 520 { 521 continue; 522 } 523 sensorPath.append(iter->second); 524 525 double tempValue{0}; 526 527 try 528 { 529 tempValue = readFile<double>(filePathString + inputSuffix); 530 } 531 catch (const std::system_error& e) 532 { 533 log<level::DEBUG>( 534 fmt::format("readTempSensors: Failed reading {}, errno = {}", 535 filePathString + inputSuffix, e.code().value()) 536 .c_str()); 537 continue; 538 } 539 540 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue( 541 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3)); 542 543 open_power::occ::dbus::OccDBusSensors::getOccDBus() 544 .setOperationalStatus(sensorPath, true); 545 546 if (existingSensors.find(sensorPath) == existingSensors.end()) 547 { 548 open_power::occ::dbus::OccDBusSensors::getOccDBus() 549 .setChassisAssociation(sensorPath); 550 } 551 552 existingSensors[sensorPath] = id; 553 } 554 return; 555 } 556 557 void Manager::setSensorValueToNaN(uint32_t id) 558 { 559 for (const auto& [sensorPath, occId] : existingSensors) 560 { 561 if (occId == id) 562 { 563 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue( 564 sensorPath, std::numeric_limits<double>::quiet_NaN()); 565 } 566 } 567 return; 568 } 569 570 void Manager::getSensorValues(uint32_t id, bool masterOcc) 571 { 572 const auto occ = std::string("occ-hwmon.") + std::to_string(id + 1); 573 574 fs::path fileName{OCC_HWMON_PATH + occ + "/hwmon/"}; 575 576 // Need to get the hwmonXX directory name, there better only be 1 dir 577 assert(std::distance(fs::directory_iterator(fileName), 578 fs::directory_iterator{}) == 1); 579 // Now set our path to this full path, including this hwmonXX directory 580 fileName = fs::path(*fs::directory_iterator(fileName)); 581 582 // Read temperature sensors 583 readTempSensors(fileName, id); 584 585 if (masterOcc) 586 { 587 // Read power sensors 588 readPowerSensors(fileName, id); 589 } 590 591 return; 592 } 593 #endif 594 595 } // namespace occ 596 } // namespace open_power 597