1 #include "config.h" 2 3 #include "occ_manager.hpp" 4 5 #include "i2c_occ.hpp" 6 #include "occ_dbus.hpp" 7 #include "utils.hpp" 8 9 #include <phosphor-logging/elog-errors.hpp> 10 #include <phosphor-logging/log.hpp> 11 #include <xyz/openbmc_project/Common/error.hpp> 12 13 #include <chrono> 14 #include <cmath> 15 #include <experimental/filesystem> 16 #include <regex> 17 18 namespace open_power 19 { 20 namespace occ 21 { 22 23 constexpr uint32_t fruTypeNotAvailable = 0xFF; 24 constexpr auto fruTypeSuffix = "fru_type"; 25 constexpr auto faultSuffix = "fault"; 26 constexpr auto inputSuffix = "input"; 27 28 using namespace phosphor::logging; 29 30 template <typename T> 31 T readFile(const std::string& path) 32 { 33 std::ifstream ifs; 34 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit | 35 std::ifstream::eofbit); 36 T data; 37 38 try 39 { 40 ifs.open(path); 41 ifs >> data; 42 ifs.close(); 43 } 44 catch (const std::exception& e) 45 { 46 auto err = errno; 47 throw std::system_error(err, std::generic_category()); 48 } 49 50 return data; 51 } 52 53 void Manager::findAndCreateObjects() 54 { 55 #ifndef POWER10 56 for (auto id = 0; id < MAX_CPUS; ++id) 57 { 58 // Create one occ per cpu 59 auto occ = std::string(OCC_NAME) + std::to_string(id); 60 createObjects(occ); 61 } 62 #else 63 // Create the OCCs based on on the /dev/occX devices 64 auto occs = findOCCsInDev(); 65 66 if (occs.empty() || (prevOCCSearch.size() != occs.size())) 67 { 68 // Something changed or no OCCs yet, try again in 10s. 69 // Note on the first pass prevOCCSearch will be empty, 70 // so there will be at least one delay to give things 71 // a chance to settle. 72 prevOCCSearch = occs; 73 74 using namespace std::literals::chrono_literals; 75 discoverTimer->restartOnce(10s); 76 } 77 else 78 { 79 discoverTimer.reset(); 80 81 // createObjects requires OCC0 first. 82 std::sort(occs.begin(), occs.end()); 83 84 for (auto id : occs) 85 { 86 createObjects(std::string(OCC_NAME) + std::to_string(id)); 87 } 88 } 89 #endif 90 } 91 92 std::vector<int> Manager::findOCCsInDev() 93 { 94 std::vector<int> occs; 95 std::regex expr{R"(occ(\d+)$)"}; 96 97 for (auto& file : fs::directory_iterator("/dev")) 98 { 99 std::smatch match; 100 std::string path{file.path().string()}; 101 if (std::regex_search(path, match, expr)) 102 { 103 auto num = std::stoi(match[1].str()); 104 105 // /dev numbering starts at 1, ours starts at 0. 106 occs.push_back(num - 1); 107 } 108 } 109 110 return occs; 111 } 112 113 int Manager::cpuCreated(sdbusplus::message::message& msg) 114 { 115 namespace fs = std::experimental::filesystem; 116 117 sdbusplus::message::object_path o; 118 msg.read(o); 119 fs::path cpuPath(std::string(std::move(o))); 120 121 auto name = cpuPath.filename().string(); 122 auto index = name.find(CPU_NAME); 123 name.replace(index, std::strlen(CPU_NAME), OCC_NAME); 124 125 createObjects(name); 126 127 return 0; 128 } 129 130 void Manager::createObjects(const std::string& occ) 131 { 132 auto path = fs::path(OCC_CONTROL_ROOT) / occ; 133 134 passThroughObjects.emplace_back( 135 std::make_unique<PassThrough>(path.c_str())); 136 137 statusObjects.emplace_back(std::make_unique<Status>( 138 event, path.c_str(), *this, 139 std::bind(std::mem_fn(&Manager::statusCallBack), this, 140 std::placeholders::_1) 141 #ifdef PLDM 142 , 143 std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(), 144 std::placeholders::_1) 145 #endif 146 )); 147 148 // Create the power cap monitor object for master occ (0) 149 if (!pcap) 150 { 151 pcap = std::make_unique<open_power::occ::powercap::PowerCap>( 152 *statusObjects.front()); 153 } 154 155 #ifdef POWER10 156 // Create the power mode monitor object for master occ (0) 157 if (!pmode) 158 { 159 pmode = std::make_unique<open_power::occ::powermode::PowerMode>( 160 *statusObjects.front()); 161 } 162 #endif 163 } 164 165 void Manager::statusCallBack(bool status) 166 { 167 using InternalFailure = 168 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; 169 170 // At this time, it won't happen but keeping it 171 // here just in case something changes in the future 172 if ((activeCount == 0) && (!status)) 173 { 174 log<level::ERR>("Invalid update on OCCActive"); 175 elog<InternalFailure>(); 176 } 177 178 activeCount += status ? 1 : -1; 179 180 // Only start presence detection if all the OCCs are bound 181 if (activeCount == statusObjects.size()) 182 { 183 for (auto& obj : statusObjects) 184 { 185 obj->addPresenceWatchMaster(); 186 } 187 } 188 189 if ((!_pollTimer->isEnabled()) && (activeCount > 0)) 190 { 191 log<level::INFO>( 192 fmt::format( 193 "Manager::statusCallBack(): {} OCCs will be polled every {} seconds", 194 activeCount, pollInterval) 195 .c_str()); 196 197 // Send poll and start OCC poll timer 198 pollerTimerExpired(); 199 } 200 else if ((_pollTimer->isEnabled()) && (activeCount == 0)) 201 { 202 // Stop OCC poll timer 203 log<level::INFO>( 204 "Manager::statusCallBack(): OCCs are not running, stopping poll timer"); 205 _pollTimer->setEnabled(false); 206 207 #ifdef READ_OCC_SENSORS 208 for (auto& obj : statusObjects) 209 { 210 setSensorValueToNaN(obj->getOccInstanceID()); 211 } 212 #endif 213 } 214 } 215 216 #ifdef I2C_OCC 217 void Manager::initStatusObjects() 218 { 219 // Make sure we have a valid path string 220 static_assert(sizeof(DEV_PATH) != 0); 221 222 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH); 223 auto occMasterName = deviceNames.front(); 224 for (auto& name : deviceNames) 225 { 226 i2c_occ::i2cToDbus(name); 227 name = std::string(OCC_NAME) + '_' + name; 228 auto path = fs::path(OCC_CONTROL_ROOT) / name; 229 statusObjects.emplace_back( 230 std::make_unique<Status>(event, path.c_str(), *this)); 231 } 232 // The first device is master occ 233 pcap = std::make_unique<open_power::occ::powercap::PowerCap>( 234 *statusObjects.front(), occMasterName); 235 #ifdef POWER10 236 pmode = std::make_unique<open_power::occ::powermode::PowerMode>( 237 *statusObjects.front()); 238 #endif 239 } 240 #endif 241 242 #ifdef PLDM 243 bool Manager::updateOCCActive(instanceID instance, bool status) 244 { 245 return (statusObjects[instance])->occActive(status); 246 } 247 #endif 248 249 void Manager::pollerTimerExpired() 250 { 251 if (activeCount == 0) 252 { 253 // No OCCs running, so poll timer will not be restarted 254 log<level::INFO>( 255 "Manager::pollerTimerExpire(): No OCCs running, poll timer not restarted"); 256 } 257 258 if (!_pollTimer) 259 { 260 log<level::ERR>( 261 "Manager::pollerTimerExpired() ERROR: Timer not defined"); 262 return; 263 } 264 265 for (auto& obj : statusObjects) 266 { 267 // Read sysfs to force kernel to poll OCC 268 obj->readOccState(); 269 270 #ifdef READ_OCC_SENSORS 271 // Read occ sensor values 272 auto id = obj->getOccInstanceID(); 273 if (!obj->occActive()) 274 { 275 // Occ not activated 276 setSensorValueToNaN(id); 277 continue; 278 } 279 getSensorValues(id, obj->isMasterOcc()); 280 #endif 281 } 282 283 // Restart OCC poll timer 284 _pollTimer->restartOnce(std::chrono::seconds(pollInterval)); 285 } 286 287 #ifdef READ_OCC_SENSORS 288 void Manager::readTempSensors(const fs::path& path, uint32_t id) 289 { 290 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label 291 for (auto& file : fs::directory_iterator(path)) 292 { 293 if (!std::regex_search(file.path().string(), expr)) 294 { 295 continue; 296 } 297 298 uint32_t labelValue{0}; 299 300 try 301 { 302 labelValue = readFile<uint32_t>(file.path()); 303 } 304 catch (const std::system_error& e) 305 { 306 log<level::DEBUG>( 307 fmt::format("readTempSensors: Failed reading {}, errno = {}", 308 file.path().string(), e.code().value()) 309 .c_str()); 310 continue; 311 } 312 313 const std::string& tempLabel = "label"; 314 const std::string filePathString = file.path().string().substr( 315 0, file.path().string().length() - tempLabel.length()); 316 317 uint32_t fruTypeValue{0}; 318 try 319 { 320 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix); 321 } 322 catch (const std::system_error& e) 323 { 324 log<level::DEBUG>( 325 fmt::format("readTempSensors: Failed reading {}, errno = {}", 326 filePathString + fruTypeSuffix, e.code().value()) 327 .c_str()); 328 continue; 329 } 330 331 std::string sensorPath = 332 OCC_SENSORS_ROOT + std::string("/temperature/"); 333 334 if (fruTypeValue == VRMVdd) 335 { 336 sensorPath.append("vrm_vdd" + std::to_string(id) + "_temp"); 337 } 338 else 339 { 340 uint16_t type = (labelValue & 0xFF000000) >> 24; 341 uint16_t instanceID = labelValue & 0x0000FFFF; 342 343 if (type == OCC_DIMM_TEMP_SENSOR_TYPE) 344 { 345 if (fruTypeValue == fruTypeNotAvailable) 346 { 347 // Not all DIMM related temps are available to read 348 // (no _input file in this case) 349 continue; 350 } 351 auto iter = dimmTempSensorName.find(fruTypeValue); 352 if (iter == dimmTempSensorName.end()) 353 { 354 log<level::ERR>( 355 fmt::format( 356 "readTempSensors: Fru type error! fruTypeValue = {}) ", 357 fruTypeValue) 358 .c_str()); 359 continue; 360 } 361 362 sensorPath.append("dimm" + std::to_string(instanceID) + 363 iter->second); 364 } 365 else if (type == OCC_CPU_TEMP_SENSOR_TYPE) 366 { 367 if (fruTypeValue != processorCore) 368 { 369 // TODO: support IO ring temp 370 continue; 371 } 372 373 sensorPath.append("proc" + std::to_string(id) + "_core" + 374 std::to_string(instanceID) + "_temp"); 375 } 376 else 377 { 378 continue; 379 } 380 } 381 382 uint32_t faultValue{0}; 383 try 384 { 385 faultValue = readFile<uint32_t>(filePathString + faultSuffix); 386 } 387 catch (const std::system_error& e) 388 { 389 log<level::DEBUG>( 390 fmt::format("readTempSensors: Failed reading {}, errno = {}", 391 filePathString + faultSuffix, e.code().value()) 392 .c_str()); 393 continue; 394 } 395 396 if (faultValue != 0) 397 { 398 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue( 399 sensorPath, std::numeric_limits<double>::quiet_NaN()); 400 401 open_power::occ::dbus::OccDBusSensors::getOccDBus() 402 .setOperationalStatus(sensorPath, false); 403 404 continue; 405 } 406 407 double tempValue{0}; 408 409 try 410 { 411 tempValue = readFile<double>(filePathString + inputSuffix); 412 } 413 catch (const std::system_error& e) 414 { 415 log<level::DEBUG>( 416 fmt::format("readTempSensors: Failed reading {}, errno = {}", 417 filePathString + inputSuffix, e.code().value()) 418 .c_str()); 419 continue; 420 } 421 422 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue( 423 sensorPath, tempValue * std::pow(10, -3)); 424 425 open_power::occ::dbus::OccDBusSensors::getOccDBus() 426 .setOperationalStatus(sensorPath, true); 427 428 existingSensors[sensorPath] = id; 429 } 430 return; 431 } 432 433 std::optional<std::string> 434 Manager::getPowerLabelFunctionID(const std::string& value) 435 { 436 // If the value is "system", then the FunctionID is "system". 437 if (value == "system") 438 { 439 return value; 440 } 441 442 // If the value is not "system", then the label value have 3 numbers, of 443 // which we only care about the middle one: 444 // <sensor id>_<function id>_<apss channel> 445 // eg: The value is "0_10_5" , then the FunctionID is "10". 446 if (value.find("_") == std::string::npos) 447 { 448 return std::nullopt; 449 } 450 451 auto powerLabelValue = value.substr((value.find("_") + 1)); 452 453 if (powerLabelValue.find("_") == std::string::npos) 454 { 455 return std::nullopt; 456 } 457 458 return powerLabelValue.substr(0, powerLabelValue.find("_")); 459 } 460 461 void Manager::readPowerSensors(const fs::path& path, uint32_t id) 462 { 463 std::regex expr{"power\\d+_label$"}; // Example: power5_label 464 for (auto& file : fs::directory_iterator(path)) 465 { 466 if (!std::regex_search(file.path().string(), expr)) 467 { 468 continue; 469 } 470 471 std::string labelValue; 472 try 473 { 474 labelValue = readFile<std::string>(file.path()); 475 } 476 catch (const std::system_error& e) 477 { 478 log<level::DEBUG>( 479 fmt::format("readPowerSensors: Failed reading {}, errno = {}", 480 file.path().string(), e.code().value()) 481 .c_str()); 482 continue; 483 } 484 485 auto functionID = getPowerLabelFunctionID(labelValue); 486 if (functionID == std::nullopt) 487 { 488 continue; 489 } 490 491 const std::string& tempLabel = "label"; 492 const std::string filePathString = file.path().string().substr( 493 0, file.path().string().length() - tempLabel.length()); 494 495 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/"); 496 497 auto iter = powerSensorName.find(*functionID); 498 if (iter == powerSensorName.end()) 499 { 500 continue; 501 } 502 sensorPath.append(iter->second); 503 504 double tempValue{0}; 505 506 try 507 { 508 tempValue = readFile<double>(filePathString + inputSuffix); 509 } 510 catch (const std::system_error& e) 511 { 512 log<level::DEBUG>( 513 fmt::format("readTempSensors: Failed reading {}, errno = {}", 514 filePathString + inputSuffix, e.code().value()) 515 .c_str()); 516 continue; 517 } 518 519 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue( 520 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3)); 521 522 open_power::occ::dbus::OccDBusSensors::getOccDBus() 523 .setOperationalStatus(sensorPath, true); 524 525 existingSensors[sensorPath] = id; 526 } 527 return; 528 } 529 530 void Manager::setSensorValueToNaN(uint32_t id) 531 { 532 for (const auto& [sensorPath, occId] : existingSensors) 533 { 534 if (occId == id) 535 { 536 open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue( 537 sensorPath, std::numeric_limits<double>::quiet_NaN()); 538 } 539 } 540 return; 541 } 542 543 void Manager::getSensorValues(uint32_t id, bool masterOcc) 544 { 545 const auto occ = std::string("occ-hwmon.") + std::to_string(id + 1); 546 547 fs::path fileName{OCC_HWMON_PATH + occ + "/hwmon/"}; 548 549 // Need to get the hwmonXX directory name, there better only be 1 dir 550 assert(std::distance(fs::directory_iterator(fileName), 551 fs::directory_iterator{}) == 1); 552 // Now set our path to this full path, including this hwmonXX directory 553 fileName = fs::path(*fs::directory_iterator(fileName)); 554 555 // Read temperature sensors 556 readTempSensors(fileName, id); 557 558 if (masterOcc) 559 { 560 // Read power sensors 561 readPowerSensors(fileName, id); 562 } 563 564 return; 565 } 566 #endif 567 568 } // namespace occ 569 } // namespace open_power 570