1 #include "config.h" 2 3 #include "occ_manager.hpp" 4 5 #include "i2c_occ.hpp" 6 #include "occ_dbus.hpp" 7 #include "occ_errors.hpp" 8 #include "utils.hpp" 9 10 #include <phosphor-logging/elog-errors.hpp> 11 #include <phosphor-logging/log.hpp> 12 #include <xyz/openbmc_project/Common/error.hpp> 13 14 #include <chrono> 15 #include <cmath> 16 #include <filesystem> 17 #include <fstream> 18 #include <regex> 19 20 namespace open_power 21 { 22 namespace occ 23 { 24 25 constexpr uint32_t fruTypeNotAvailable = 0xFF; 26 constexpr auto fruTypeSuffix = "fru_type"; 27 constexpr auto faultSuffix = "fault"; 28 constexpr auto inputSuffix = "input"; 29 constexpr auto maxSuffix = "max"; 30 31 const auto HOST_ON_FILE = "/run/openbmc/host@0-on"; 32 33 using namespace phosphor::logging; 34 using namespace std::literals::chrono_literals; 35 36 template <typename T> 37 T readFile(const std::string& path) 38 { 39 std::ifstream ifs; 40 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit | 41 std::ifstream::eofbit); 42 T data; 43 44 try 45 { 46 ifs.open(path); 47 ifs >> data; 48 ifs.close(); 49 } 50 catch (const std::exception& e) 51 { 52 auto err = errno; 53 throw std::system_error(err, std::generic_category()); 54 } 55 56 return data; 57 } 58 59 // findAndCreateObjects(): 60 // Takes care of getting the required objects created and 61 // finds the available devices/processors. 62 // (function is called everytime the discoverTimer expires) 63 // - create the PowerMode object to control OCC modes 64 // - create statusObjects for each OCC device found 65 // - waits for OCC Active sensors PDRs to become available 66 // - restart discoverTimer if all data is not available yet 67 void Manager::findAndCreateObjects() 68 { 69 #ifndef POWER10 70 for (auto id = 0; id < MAX_CPUS; ++id) 71 { 72 // Create one occ per cpu 73 auto occ = std::string(OCC_NAME) + std::to_string(id); 74 createObjects(occ); 75 } 76 #else 77 if (!pmode) 78 { 79 // Create the power mode object 80 pmode = std::make_unique<powermode::PowerMode>( 81 *this, powermode::PMODE_PATH, powermode::PIPS_PATH, event); 82 } 83 84 if (!fs::exists(HOST_ON_FILE)) 85 { 86 static bool statusObjCreated = false; 87 if (!statusObjCreated) 88 { 89 // Create the OCCs based on on the /dev/occX devices 90 auto occs = findOCCsInDev(); 91 92 if (occs.empty() || (prevOCCSearch.size() != occs.size())) 93 { 94 // Something changed or no OCCs yet, try again in 10s. 95 // Note on the first pass prevOCCSearch will be empty, 96 // so there will be at least one delay to give things 97 // a chance to settle. 98 prevOCCSearch = occs; 99 100 log<level::INFO>( 101 std::format( 102 "Manager::findAndCreateObjects(): Waiting for OCCs (currently {})", 103 occs.size()) 104 .c_str()); 105 106 discoverTimer->restartOnce(10s); 107 } 108 else 109 { 110 // All OCCs appear to be available, create status objects 111 112 // createObjects requires OCC0 first. 113 std::sort(occs.begin(), occs.end()); 114 115 log<level::INFO>( 116 std::format( 117 "Manager::findAndCreateObjects(): Creating {} OCC Status Objects", 118 occs.size()) 119 .c_str()); 120 for (auto id : occs) 121 { 122 createObjects(std::string(OCC_NAME) + std::to_string(id)); 123 } 124 statusObjCreated = true; 125 waitingForAllOccActiveSensors = true; 126 127 // Find/update the processor path associated with each OCC 128 for (auto& obj : statusObjects) 129 { 130 obj->updateProcAssociation(); 131 } 132 } 133 } 134 135 if (statusObjCreated && waitingForAllOccActiveSensors) 136 { 137 static bool tracedHostWait = false; 138 if (utils::isHostRunning()) 139 { 140 if (tracedHostWait) 141 { 142 log<level::INFO>( 143 "Manager::findAndCreateObjects(): Host is running"); 144 tracedHostWait = false; 145 } 146 checkAllActiveSensors(); 147 } 148 else 149 { 150 if (!tracedHostWait) 151 { 152 log<level::INFO>( 153 "Manager::findAndCreateObjects(): Waiting for host to start"); 154 tracedHostWait = true; 155 } 156 discoverTimer->restartOnce(30s); 157 #ifdef PLDM 158 if (throttlePldmTraceTimer->isEnabled()) 159 { 160 // Host is no longer running, disable throttle timer and 161 // make sure traces are not throttled 162 log<level::INFO>( 163 "findAndCreateObjects(): disabling sensor timer"); 164 throttlePldmTraceTimer->setEnabled(false); 165 pldmHandle->setTraceThrottle(false); 166 } 167 #endif 168 } 169 } 170 } 171 else 172 { 173 log<level::INFO>( 174 std::format( 175 "Manager::findAndCreateObjects(): Waiting for {} to complete...", 176 HOST_ON_FILE) 177 .c_str()); 178 discoverTimer->restartOnce(10s); 179 } 180 #endif 181 } 182 183 #ifdef POWER10 184 // Check if all occActive sensors are available 185 void Manager::checkAllActiveSensors() 186 { 187 static bool allActiveSensorAvailable = false; 188 static bool tracedSensorWait = false; 189 static bool waitingForHost = false; 190 191 if (open_power::occ::utils::isHostRunning()) 192 { 193 if (waitingForHost) 194 { 195 waitingForHost = false; 196 log<level::INFO>("checkAllActiveSensors(): Host is now running"); 197 } 198 199 // Start with the assumption that all are available 200 allActiveSensorAvailable = true; 201 for (auto& obj : statusObjects) 202 { 203 if ((!obj->occActive()) && (!obj->getPldmSensorReceived())) 204 { 205 auto instance = obj->getOccInstanceID(); 206 // Check if sensor was queued while waiting for discovery 207 auto match = queuedActiveState.find(instance); 208 if (match != queuedActiveState.end()) 209 { 210 queuedActiveState.erase(match); 211 log<level::INFO>( 212 std::format( 213 "checkAllActiveSensors(): OCC{} is ACTIVE (queued)", 214 instance) 215 .c_str()); 216 obj->occActive(true); 217 } 218 else 219 { 220 allActiveSensorAvailable = false; 221 if (!tracedSensorWait) 222 { 223 log<level::INFO>( 224 std::format( 225 "checkAllActiveSensors(): Waiting on OCC{} Active sensor", 226 instance) 227 .c_str()); 228 tracedSensorWait = true; 229 #ifdef PLDM 230 // Make sure PLDM traces are not throttled 231 pldmHandle->setTraceThrottle(false); 232 // Start timer to throttle PLDM traces when timer 233 // expires 234 onPldmTimeoutCreatePel = false; 235 throttlePldmTraceTimer->restartOnce(5min); 236 #endif 237 } 238 #ifdef PLDM 239 // Ignore active sensor check if the OCCs are being reset 240 if (!resetInProgress) 241 { 242 pldmHandle->checkActiveSensor(obj->getOccInstanceID()); 243 } 244 #endif 245 break; 246 } 247 } 248 } 249 } 250 else 251 { 252 if (!waitingForHost) 253 { 254 waitingForHost = true; 255 log<level::INFO>( 256 "checkAllActiveSensors(): Waiting for host to start"); 257 #ifdef PLDM 258 if (throttlePldmTraceTimer->isEnabled()) 259 { 260 // Host is no longer running, disable throttle timer and 261 // make sure traces are not throttled 262 log<level::INFO>( 263 "checkAllActiveSensors(): disabling sensor timer"); 264 throttlePldmTraceTimer->setEnabled(false); 265 pldmHandle->setTraceThrottle(false); 266 } 267 #endif 268 } 269 } 270 271 if (allActiveSensorAvailable) 272 { 273 // All sensors were found, disable the discovery timer 274 if (discoverTimer->isEnabled()) 275 { 276 discoverTimer->setEnabled(false); 277 } 278 #ifdef PLDM 279 if (throttlePldmTraceTimer->isEnabled()) 280 { 281 // Disable throttle timer and make sure traces are not throttled 282 throttlePldmTraceTimer->setEnabled(false); 283 pldmHandle->setTraceThrottle(false); 284 } 285 #endif 286 if (waitingForAllOccActiveSensors) 287 { 288 log<level::INFO>( 289 "checkAllActiveSensors(): OCC Active sensors are available"); 290 waitingForAllOccActiveSensors = false; 291 292 if (resetRequired) 293 { 294 initiateOccRequest(resetInstance); 295 296 if (!waitForAllOccsTimer->isEnabled()) 297 { 298 log<level::WARNING>( 299 "occsNotAllRunning: Restarting waitForAllOccTimer"); 300 // restart occ wait timer to check status after reset 301 // completes 302 waitForAllOccsTimer->restartOnce(60s); 303 } 304 } 305 } 306 queuedActiveState.clear(); 307 tracedSensorWait = false; 308 } 309 else 310 { 311 // Not all sensors were available, so keep waiting 312 if (!tracedSensorWait) 313 { 314 log<level::INFO>( 315 "checkAllActiveSensors(): Waiting for OCC Active sensors to become available"); 316 tracedSensorWait = true; 317 } 318 discoverTimer->restartOnce(10s); 319 } 320 } 321 #endif 322 323 std::vector<int> Manager::findOCCsInDev() 324 { 325 std::vector<int> occs; 326 std::regex expr{R"(occ(\d+)$)"}; 327 328 for (auto& file : fs::directory_iterator("/dev")) 329 { 330 std::smatch match; 331 std::string path{file.path().string()}; 332 if (std::regex_search(path, match, expr)) 333 { 334 auto num = std::stoi(match[1].str()); 335 336 // /dev numbering starts at 1, ours starts at 0. 337 occs.push_back(num - 1); 338 } 339 } 340 341 return occs; 342 } 343 344 int Manager::cpuCreated(sdbusplus::message_t& msg) 345 { 346 namespace fs = std::filesystem; 347 348 sdbusplus::message::object_path o; 349 msg.read(o); 350 fs::path cpuPath(std::string(std::move(o))); 351 352 auto name = cpuPath.filename().string(); 353 auto index = name.find(CPU_NAME); 354 name.replace(index, std::strlen(CPU_NAME), OCC_NAME); 355 356 createObjects(name); 357 358 return 0; 359 } 360 361 void Manager::createObjects(const std::string& occ) 362 { 363 auto path = fs::path(OCC_CONTROL_ROOT) / occ; 364 365 statusObjects.emplace_back(std::make_unique<Status>( 366 event, path.c_str(), *this, 367 #ifdef POWER10 368 pmode, 369 #endif 370 std::bind(std::mem_fn(&Manager::statusCallBack), this, 371 std::placeholders::_1, std::placeholders::_2) 372 #ifdef PLDM 373 , 374 // Callback will set flag indicating reset needs to be done 375 // instead of immediately issuing a reset via PLDM. 376 std::bind(std::mem_fn(&Manager::resetOccRequest), this, 377 std::placeholders::_1) 378 #endif 379 )); 380 381 // Create the power cap monitor object 382 if (!pcap) 383 { 384 pcap = std::make_unique<open_power::occ::powercap::PowerCap>( 385 *statusObjects.back()); 386 } 387 388 if (statusObjects.back()->isMasterOcc()) 389 { 390 log<level::INFO>( 391 std::format("Manager::createObjects(): OCC{} is the master", 392 statusObjects.back()->getOccInstanceID()) 393 .c_str()); 394 _pollTimer->setEnabled(false); 395 396 #ifdef POWER10 397 // Set the master OCC on the PowerMode object 398 pmode->setMasterOcc(path); 399 #endif 400 } 401 402 passThroughObjects.emplace_back(std::make_unique<PassThrough>( 403 path.c_str() 404 #ifdef POWER10 405 , 406 pmode 407 #endif 408 )); 409 } 410 411 // If a reset is not already outstanding, set a flag to indicate that a reset is 412 // needed. 413 void Manager::resetOccRequest(instanceID instance) 414 { 415 if (!resetRequired) 416 { 417 resetRequired = true; 418 resetInstance = instance; 419 log<level::ERR>( 420 std::format( 421 "resetOccRequest: PM Complex reset was requested due to OCC{}", 422 instance) 423 .c_str()); 424 } 425 else if (instance != resetInstance) 426 { 427 log<level::WARNING>( 428 std::format( 429 "resetOccRequest: Ignoring PM Complex reset request for OCC{}, because reset already outstanding for OCC{}", 430 instance, resetInstance) 431 .c_str()); 432 } 433 } 434 435 // If a reset has not been started, initiate an OCC reset via PLDM 436 void Manager::initiateOccRequest(instanceID instance) 437 { 438 if (!resetInProgress) 439 { 440 resetInProgress = true; 441 resetInstance = instance; 442 log<level::ERR>( 443 std::format( 444 "initiateOccRequest: Initiating PM Complex reset due to OCC{}", 445 instance) 446 .c_str()); 447 #ifdef PLDM 448 pldmHandle->resetOCC(instance); 449 #endif 450 resetRequired = false; 451 } 452 else 453 { 454 log<level::WARNING>( 455 std::format( 456 "initiateOccRequest: Ignoring PM Complex reset request for OCC{}, because reset already in process for OCC{}", 457 instance, resetInstance) 458 .c_str()); 459 } 460 } 461 462 void Manager::statusCallBack(instanceID instance, bool status) 463 { 464 if (status == true) 465 { 466 if (resetInProgress) 467 { 468 log<level::INFO>( 469 std::format( 470 "statusCallBack: Ignoring OCC{} activate because a reset has been initiated due to OCC{}", 471 instance, resetInstance) 472 .c_str()); 473 return; 474 } 475 476 // OCC went active 477 ++activeCount; 478 479 #ifdef POWER10 480 if (activeCount == 1) 481 { 482 // First OCC went active (allow some time for all OCCs to go active) 483 waitForAllOccsTimer->restartOnce(60s); 484 } 485 #endif 486 487 if (activeCount == statusObjects.size()) 488 { 489 #ifdef POWER10 490 // All OCCs are now running 491 if (waitForAllOccsTimer->isEnabled()) 492 { 493 // stop occ wait timer 494 waitForAllOccsTimer->setEnabled(false); 495 } 496 497 // All OCCs have been found, check if we need a reset 498 if (resetRequired) 499 { 500 initiateOccRequest(resetInstance); 501 502 if (!waitForAllOccsTimer->isEnabled()) 503 { 504 log<level::WARNING>( 505 "occsNotAllRunning: Restarting waitForAllOccTimer"); 506 // restart occ wait timer 507 waitForAllOccsTimer->restartOnce(60s); 508 } 509 } 510 else 511 { 512 // Verify master OCC and start presence monitor 513 validateOccMaster(); 514 } 515 #else 516 // Verify master OCC and start presence monitor 517 validateOccMaster(); 518 #endif 519 } 520 521 // Start poll timer if not already started 522 if (!_pollTimer->isEnabled()) 523 { 524 log<level::INFO>( 525 std::format("Manager: OCCs will be polled every {} seconds", 526 pollInterval) 527 .c_str()); 528 529 // Send poll and start OCC poll timer 530 pollerTimerExpired(); 531 } 532 } 533 else 534 { 535 // OCC went away 536 if (activeCount > 0) 537 { 538 --activeCount; 539 } 540 else 541 { 542 log<level::INFO>( 543 std::format("OCC{} disabled, but currently no active OCCs", 544 instance) 545 .c_str()); 546 } 547 548 if (activeCount == 0) 549 { 550 // No OCCs are running 551 552 if (resetInProgress) 553 { 554 // All OCC active sensors are clear (reset should be in 555 // progress) 556 log<level::INFO>( 557 std::format( 558 "statusCallBack: Clearing resetInProgress (activeCount={}, OCC{}, status={})", 559 activeCount, instance, status) 560 .c_str()); 561 resetInProgress = false; 562 resetInstance = 255; 563 } 564 565 // Stop OCC poll timer 566 if (_pollTimer->isEnabled()) 567 { 568 log<level::INFO>( 569 "Manager::statusCallBack(): OCCs are not running, stopping poll timer"); 570 _pollTimer->setEnabled(false); 571 } 572 573 #ifdef POWER10 574 // stop wait timer 575 if (waitForAllOccsTimer->isEnabled()) 576 { 577 waitForAllOccsTimer->setEnabled(false); 578 } 579 #endif 580 } 581 else if (resetInProgress) 582 { 583 log<level::INFO>( 584 std::format( 585 "statusCallBack: Skipping clear of resetInProgress (activeCount={}, OCC{}, status={})", 586 activeCount, instance, status) 587 .c_str()); 588 } 589 #ifdef READ_OCC_SENSORS 590 // Clear OCC sensors 591 setSensorValueToNaN(instance); 592 #endif 593 } 594 595 #ifdef POWER10 596 if (waitingForAllOccActiveSensors) 597 { 598 if (utils::isHostRunning()) 599 { 600 checkAllActiveSensors(); 601 } 602 } 603 #endif 604 } 605 606 #ifdef I2C_OCC 607 void Manager::initStatusObjects() 608 { 609 // Make sure we have a valid path string 610 static_assert(sizeof(DEV_PATH) != 0); 611 612 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH); 613 for (auto& name : deviceNames) 614 { 615 i2c_occ::i2cToDbus(name); 616 name = std::string(OCC_NAME) + '_' + name; 617 auto path = fs::path(OCC_CONTROL_ROOT) / name; 618 statusObjects.emplace_back( 619 std::make_unique<Status>(event, path.c_str(), *this)); 620 } 621 // The first device is master occ 622 pcap = std::make_unique<open_power::occ::powercap::PowerCap>( 623 *statusObjects.front()); 624 #ifdef POWER10 625 pmode = std::make_unique<powermode::PowerMode>(*this, powermode::PMODE_PATH, 626 powermode::PIPS_PATH); 627 // Set the master OCC on the PowerMode object 628 pmode->setMasterOcc(path); 629 #endif 630 } 631 #endif 632 633 #ifdef PLDM 634 void Manager::sbeTimeout(unsigned int instance) 635 { 636 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(), 637 [instance](const auto& obj) { 638 return instance == obj->getOccInstanceID(); 639 }); 640 641 if (obj != statusObjects.end() && (*obj)->occActive()) 642 { 643 log<level::INFO>( 644 std::format("SBE timeout, requesting HRESET (OCC{})", instance) 645 .c_str()); 646 647 setSBEState(instance, SBE_STATE_NOT_USABLE); 648 649 pldmHandle->sendHRESET(instance); 650 } 651 } 652 653 bool Manager::updateOCCActive(instanceID instance, bool status) 654 { 655 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(), 656 [instance](const auto& obj) { 657 return instance == obj->getOccInstanceID(); 658 }); 659 660 const bool hostRunning = open_power::occ::utils::isHostRunning(); 661 if (obj != statusObjects.end()) 662 { 663 if (!hostRunning && (status == true)) 664 { 665 log<level::WARNING>( 666 std::format( 667 "updateOCCActive: Host is not running yet (OCC{} active={}), clearing sensor received", 668 instance, status) 669 .c_str()); 670 (*obj)->setPldmSensorReceived(false); 671 if (!waitingForAllOccActiveSensors) 672 { 673 log<level::INFO>( 674 "updateOCCActive: Waiting for Host and all OCC Active Sensors"); 675 waitingForAllOccActiveSensors = true; 676 } 677 #ifdef POWER10 678 discoverTimer->restartOnce(30s); 679 #endif 680 return false; 681 } 682 else 683 { 684 (*obj)->setPldmSensorReceived(true); 685 return (*obj)->occActive(status); 686 } 687 } 688 else 689 { 690 if (hostRunning) 691 { 692 log<level::WARNING>( 693 std::format( 694 "updateOCCActive: No status object to update for OCC{} (active={})", 695 instance, status) 696 .c_str()); 697 } 698 else 699 { 700 if (status == true) 701 { 702 log<level::WARNING>( 703 std::format( 704 "updateOCCActive: No status objects and Host is not running yet (OCC{} active={})", 705 instance, status) 706 .c_str()); 707 } 708 } 709 if (status == true) 710 { 711 // OCC went active 712 queuedActiveState.insert(instance); 713 } 714 else 715 { 716 auto match = queuedActiveState.find(instance); 717 if (match != queuedActiveState.end()) 718 { 719 // OCC was disabled 720 queuedActiveState.erase(match); 721 } 722 } 723 return false; 724 } 725 } 726 727 // Called upon pldm event To set powermode Safe Mode State for system. 728 void Manager::updateOccSafeMode(bool safeMode) 729 { 730 #ifdef POWER10 731 pmode->updateDbusSafeMode(safeMode); 732 #endif 733 // Update the processor throttle status on dbus 734 for (auto& obj : statusObjects) 735 { 736 obj->updateThrottle(safeMode, THROTTLED_SAFE); 737 } 738 } 739 740 void Manager::sbeHRESETResult(instanceID instance, bool success) 741 { 742 if (success) 743 { 744 log<level::INFO>( 745 std::format("HRESET succeeded (OCC{})", instance).c_str()); 746 747 setSBEState(instance, SBE_STATE_BOOTED); 748 749 return; 750 } 751 752 setSBEState(instance, SBE_STATE_FAILED); 753 754 if (sbeCanDump(instance)) 755 { 756 log<level::INFO>( 757 std::format("HRESET failed (OCC{}), triggering SBE dump", instance) 758 .c_str()); 759 760 auto& bus = utils::getBus(); 761 uint32_t src6 = instance << 16; 762 uint32_t logId = 763 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout", 764 src6, "SBE command timeout"); 765 766 try 767 { 768 constexpr auto interface = "xyz.openbmc_project.Dump.Create"; 769 constexpr auto function = "CreateDump"; 770 771 std::string service = 772 utils::getService(OP_DUMP_OBJ_PATH, interface); 773 auto method = bus.new_method_call(service.c_str(), OP_DUMP_OBJ_PATH, 774 interface, function); 775 776 std::map<std::string, std::variant<std::string, uint64_t>> 777 createParams{ 778 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId", 779 uint64_t(logId)}, 780 {"com.ibm.Dump.Create.CreateParameters.DumpType", 781 "com.ibm.Dump.Create.DumpType.SBE"}, 782 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId", 783 uint64_t(instance)}, 784 }; 785 786 method.append(createParams); 787 788 auto response = bus.call(method); 789 } 790 catch (const sdbusplus::exception_t& e) 791 { 792 constexpr auto ERROR_DUMP_DISABLED = 793 "xyz.openbmc_project.Dump.Create.Error.Disabled"; 794 if (e.name() == ERROR_DUMP_DISABLED) 795 { 796 log<level::INFO>("Dump is disabled, skipping"); 797 } 798 else 799 { 800 log<level::ERR>("Dump failed"); 801 } 802 } 803 } 804 805 // SBE Reset failed, try PM Complex reset 806 log<level::ERR>("sbeHRESETResult: Forcing PM Complex reset"); 807 resetOccRequest(instance); 808 } 809 810 bool Manager::sbeCanDump(unsigned int instance) 811 { 812 struct pdbg_target* proc = getPdbgTarget(instance); 813 814 if (!proc) 815 { 816 // allow the dump in the error case 817 return true; 818 } 819 820 try 821 { 822 if (!openpower::phal::sbe::isDumpAllowed(proc)) 823 { 824 return false; 825 } 826 827 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc)) 828 { 829 return false; 830 } 831 } 832 catch (openpower::phal::exception::SbeError& e) 833 { 834 log<level::INFO>("Failed to query SBE state"); 835 } 836 837 // allow the dump in the error case 838 return true; 839 } 840 841 void Manager::setSBEState(unsigned int instance, enum sbe_state state) 842 { 843 struct pdbg_target* proc = getPdbgTarget(instance); 844 845 if (!proc) 846 { 847 return; 848 } 849 850 try 851 { 852 openpower::phal::sbe::setState(proc, state); 853 } 854 catch (const openpower::phal::exception::SbeError& e) 855 { 856 log<level::ERR>( 857 std::format("Failed to set SBE state: {}", e.what()).c_str()); 858 } 859 } 860 861 struct pdbg_target* Manager::getPdbgTarget(unsigned int instance) 862 { 863 if (!pdbgInitialized) 864 { 865 try 866 { 867 openpower::phal::pdbg::init(); 868 pdbgInitialized = true; 869 } 870 catch (const openpower::phal::exception::PdbgError& e) 871 { 872 log<level::ERR>("pdbg initialization failed"); 873 return nullptr; 874 } 875 } 876 877 struct pdbg_target* proc = nullptr; 878 pdbg_for_each_class_target("proc", proc) 879 { 880 if (pdbg_target_index(proc) == instance) 881 { 882 return proc; 883 } 884 } 885 886 log<level::ERR>("Failed to get pdbg target"); 887 return nullptr; 888 } 889 #endif 890 891 void Manager::pollerTimerExpired() 892 { 893 if (!_pollTimer) 894 { 895 log<level::ERR>("pollerTimerExpired() ERROR: Timer not defined"); 896 return; 897 } 898 899 #ifdef POWER10 900 if (resetRequired) 901 { 902 log<level::ERR>("pollerTimerExpired() - Initiating PM Complex reset"); 903 initiateOccRequest(resetInstance); 904 905 if (!waitForAllOccsTimer->isEnabled()) 906 { 907 log<level::WARNING>( 908 "pollerTimerExpired: Restarting waitForAllOccTimer"); 909 // restart occ wait timer 910 waitForAllOccsTimer->restartOnce(60s); 911 } 912 return; 913 } 914 #endif 915 916 for (auto& obj : statusObjects) 917 { 918 if (!obj->occActive()) 919 { 920 // OCC is not running yet 921 #ifdef READ_OCC_SENSORS 922 auto id = obj->getOccInstanceID(); 923 setSensorValueToNaN(id); 924 #endif 925 continue; 926 } 927 928 // Read sysfs to force kernel to poll OCC 929 obj->readOccState(); 930 931 #ifdef READ_OCC_SENSORS 932 // Read occ sensor values 933 getSensorValues(obj); 934 #endif 935 } 936 937 if (activeCount > 0) 938 { 939 // Restart OCC poll timer 940 _pollTimer->restartOnce(std::chrono::seconds(pollInterval)); 941 } 942 else 943 { 944 // No OCCs running, so poll timer will not be restarted 945 log<level::INFO>( 946 std::format( 947 "Manager::pollerTimerExpired: poll timer will not be restarted") 948 .c_str()); 949 } 950 } 951 952 #ifdef READ_OCC_SENSORS 953 void Manager::readTempSensors(const fs::path& path, uint32_t occInstance) 954 { 955 // There may be more than one sensor with the same FRU type 956 // and label so make two passes: the first to read the temps 957 // from sysfs, and the second to put them on D-Bus after 958 // resolving any conflicts. 959 std::map<std::string, double> sensorData; 960 961 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label 962 for (auto& file : fs::directory_iterator(path)) 963 { 964 if (!std::regex_search(file.path().string(), expr)) 965 { 966 continue; 967 } 968 969 uint32_t labelValue{0}; 970 971 try 972 { 973 labelValue = readFile<uint32_t>(file.path()); 974 } 975 catch (const std::system_error& e) 976 { 977 log<level::DEBUG>( 978 std::format("readTempSensors: Failed reading {}, errno = {}", 979 file.path().string(), e.code().value()) 980 .c_str()); 981 continue; 982 } 983 984 const std::string& tempLabel = "label"; 985 const std::string filePathString = file.path().string().substr( 986 0, file.path().string().length() - tempLabel.length()); 987 988 uint32_t fruTypeValue{0}; 989 try 990 { 991 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix); 992 } 993 catch (const std::system_error& e) 994 { 995 log<level::DEBUG>( 996 std::format("readTempSensors: Failed reading {}, errno = {}", 997 filePathString + fruTypeSuffix, e.code().value()) 998 .c_str()); 999 continue; 1000 } 1001 1002 std::string sensorPath = 1003 OCC_SENSORS_ROOT + std::string("/temperature/"); 1004 1005 std::string dvfsTempPath; 1006 1007 if (fruTypeValue == VRMVdd) 1008 { 1009 sensorPath.append( 1010 "vrm_vdd" + std::to_string(occInstance) + "_temp"); 1011 } 1012 else if (fruTypeValue == processorIoRing) 1013 { 1014 sensorPath.append( 1015 "proc" + std::to_string(occInstance) + "_ioring_temp"); 1016 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" + 1017 std::to_string(occInstance) + "_ioring_dvfs_temp"; 1018 } 1019 else 1020 { 1021 uint16_t type = (labelValue & 0xFF000000) >> 24; 1022 uint16_t instanceID = labelValue & 0x0000FFFF; 1023 1024 if (type == OCC_DIMM_TEMP_SENSOR_TYPE) 1025 { 1026 if (fruTypeValue == fruTypeNotAvailable) 1027 { 1028 // Not all DIMM related temps are available to read 1029 // (no _input file in this case) 1030 continue; 1031 } 1032 auto iter = dimmTempSensorName.find(fruTypeValue); 1033 if (iter == dimmTempSensorName.end()) 1034 { 1035 log<level::ERR>( 1036 std::format( 1037 "readTempSensors: Fru type error! fruTypeValue = {}) ", 1038 fruTypeValue) 1039 .c_str()); 1040 continue; 1041 } 1042 1043 sensorPath.append( 1044 "dimm" + std::to_string(instanceID) + iter->second); 1045 1046 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/" + 1047 dimmDVFSSensorName.at(fruTypeValue); 1048 } 1049 else if (type == OCC_CPU_TEMP_SENSOR_TYPE) 1050 { 1051 if (fruTypeValue == processorCore) 1052 { 1053 // The OCC reports small core temps, of which there are 1054 // two per big core. All current P10 systems are in big 1055 // core mode, so use a big core name. 1056 uint16_t coreNum = instanceID / 2; 1057 uint16_t tempNum = instanceID % 2; 1058 sensorPath.append("proc" + std::to_string(occInstance) + 1059 "_core" + std::to_string(coreNum) + "_" + 1060 std::to_string(tempNum) + "_temp"); 1061 1062 dvfsTempPath = 1063 std::string{OCC_SENSORS_ROOT} + "/temperature/proc" + 1064 std::to_string(occInstance) + "_core_dvfs_temp"; 1065 } 1066 else 1067 { 1068 continue; 1069 } 1070 } 1071 else 1072 { 1073 continue; 1074 } 1075 } 1076 1077 // The dvfs temp file only needs to be read once per chip per type. 1078 if (!dvfsTempPath.empty() && 1079 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath)) 1080 { 1081 try 1082 { 1083 auto dvfsValue = readFile<double>(filePathString + maxSuffix); 1084 1085 dbus::OccDBusSensors::getOccDBus().setDvfsTemp( 1086 dvfsTempPath, dvfsValue * std::pow(10, -3)); 1087 } 1088 catch (const std::system_error& e) 1089 { 1090 log<level::DEBUG>( 1091 std::format( 1092 "readTempSensors: Failed reading {}, errno = {}", 1093 filePathString + maxSuffix, e.code().value()) 1094 .c_str()); 1095 } 1096 } 1097 1098 uint32_t faultValue{0}; 1099 try 1100 { 1101 faultValue = readFile<uint32_t>(filePathString + faultSuffix); 1102 } 1103 catch (const std::system_error& e) 1104 { 1105 log<level::DEBUG>( 1106 std::format("readTempSensors: Failed reading {}, errno = {}", 1107 filePathString + faultSuffix, e.code().value()) 1108 .c_str()); 1109 continue; 1110 } 1111 1112 double tempValue{0}; 1113 // NOTE: if OCC sends back 0xFF, kernal sets this fault value to 1. 1114 if (faultValue != 0) 1115 { 1116 tempValue = std::numeric_limits<double>::quiet_NaN(); 1117 } 1118 else 1119 { 1120 // Read the temperature 1121 try 1122 { 1123 tempValue = readFile<double>(filePathString + inputSuffix); 1124 } 1125 catch (const std::system_error& e) 1126 { 1127 log<level::DEBUG>( 1128 std::format( 1129 "readTempSensors: Failed reading {}, errno = {}", 1130 filePathString + inputSuffix, e.code().value()) 1131 .c_str()); 1132 1133 // if errno == EAGAIN(Resource temporarily unavailable) then set 1134 // temp to 0, to avoid using old temp, and affecting FAN 1135 // Control. 1136 if (e.code().value() == EAGAIN) 1137 { 1138 tempValue = 0; 1139 } 1140 // else the errno would be something like 1141 // EBADF(Bad file descriptor) 1142 // or ENOENT(No such file or directory) 1143 else 1144 { 1145 continue; 1146 } 1147 } 1148 } 1149 1150 // If this object path already has a value, only overwite 1151 // it if the previous one was an NaN or a smaller value. 1152 auto existing = sensorData.find(sensorPath); 1153 if (existing != sensorData.end()) 1154 { 1155 // Multiple sensors found for this FRU type 1156 if ((std::isnan(existing->second) && (tempValue == 0)) || 1157 ((existing->second == 0) && std::isnan(tempValue))) 1158 { 1159 // One of the redundant sensors has failed (0xFF/nan), and the 1160 // other sensor has no reading (0), so set the FRU to NaN to 1161 // force fan increase 1162 tempValue = std::numeric_limits<double>::quiet_NaN(); 1163 existing->second = tempValue; 1164 } 1165 if (std::isnan(existing->second) || (tempValue > existing->second)) 1166 { 1167 existing->second = tempValue; 1168 } 1169 } 1170 else 1171 { 1172 // First sensor for this FRU type 1173 sensorData[sensorPath] = tempValue; 1174 } 1175 } 1176 1177 // Now publish the values on D-Bus. 1178 for (const auto& [objectPath, value] : sensorData) 1179 { 1180 dbus::OccDBusSensors::getOccDBus().setValue(objectPath, 1181 value * std::pow(10, -3)); 1182 1183 dbus::OccDBusSensors::getOccDBus().setOperationalStatus( 1184 objectPath, !std::isnan(value)); 1185 1186 if (existingSensors.find(objectPath) == existingSensors.end()) 1187 { 1188 dbus::OccDBusSensors::getOccDBus().setChassisAssociation( 1189 objectPath); 1190 } 1191 1192 existingSensors[objectPath] = occInstance; 1193 } 1194 } 1195 1196 std::optional<std::string> 1197 Manager::getPowerLabelFunctionID(const std::string& value) 1198 { 1199 // If the value is "system", then the FunctionID is "system". 1200 if (value == "system") 1201 { 1202 return value; 1203 } 1204 1205 // If the value is not "system", then the label value have 3 numbers, of 1206 // which we only care about the middle one: 1207 // <sensor id>_<function id>_<apss channel> 1208 // eg: The value is "0_10_5" , then the FunctionID is "10". 1209 if (value.find("_") == std::string::npos) 1210 { 1211 return std::nullopt; 1212 } 1213 1214 auto powerLabelValue = value.substr((value.find("_") + 1)); 1215 1216 if (powerLabelValue.find("_") == std::string::npos) 1217 { 1218 return std::nullopt; 1219 } 1220 1221 return powerLabelValue.substr(0, powerLabelValue.find("_")); 1222 } 1223 1224 void Manager::readPowerSensors(const fs::path& path, uint32_t id) 1225 { 1226 std::regex expr{"power\\d+_label$"}; // Example: power5_label 1227 for (auto& file : fs::directory_iterator(path)) 1228 { 1229 if (!std::regex_search(file.path().string(), expr)) 1230 { 1231 continue; 1232 } 1233 1234 std::string labelValue; 1235 try 1236 { 1237 labelValue = readFile<std::string>(file.path()); 1238 } 1239 catch (const std::system_error& e) 1240 { 1241 log<level::DEBUG>( 1242 std::format("readPowerSensors: Failed reading {}, errno = {}", 1243 file.path().string(), e.code().value()) 1244 .c_str()); 1245 continue; 1246 } 1247 1248 auto functionID = getPowerLabelFunctionID(labelValue); 1249 if (functionID == std::nullopt) 1250 { 1251 continue; 1252 } 1253 1254 const std::string& tempLabel = "label"; 1255 const std::string filePathString = file.path().string().substr( 1256 0, file.path().string().length() - tempLabel.length()); 1257 1258 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/"); 1259 1260 auto iter = powerSensorName.find(*functionID); 1261 if (iter == powerSensorName.end()) 1262 { 1263 continue; 1264 } 1265 sensorPath.append(iter->second); 1266 1267 double tempValue{0}; 1268 1269 try 1270 { 1271 tempValue = readFile<double>(filePathString + inputSuffix); 1272 } 1273 catch (const std::system_error& e) 1274 { 1275 log<level::DEBUG>( 1276 std::format("readPowerSensors: Failed reading {}, errno = {}", 1277 filePathString + inputSuffix, e.code().value()) 1278 .c_str()); 1279 continue; 1280 } 1281 1282 dbus::OccDBusSensors::getOccDBus().setUnit( 1283 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts"); 1284 1285 dbus::OccDBusSensors::getOccDBus().setValue( 1286 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3)); 1287 1288 dbus::OccDBusSensors::getOccDBus().setOperationalStatus( 1289 sensorPath, true); 1290 1291 if (existingSensors.find(sensorPath) == existingSensors.end()) 1292 { 1293 dbus::OccDBusSensors::getOccDBus().setChassisAssociation( 1294 sensorPath); 1295 } 1296 1297 existingSensors[sensorPath] = id; 1298 } 1299 return; 1300 } 1301 1302 void Manager::setSensorValueToNaN(uint32_t id) const 1303 { 1304 for (const auto& [sensorPath, occId] : existingSensors) 1305 { 1306 if (occId == id) 1307 { 1308 dbus::OccDBusSensors::getOccDBus().setValue( 1309 sensorPath, std::numeric_limits<double>::quiet_NaN()); 1310 1311 dbus::OccDBusSensors::getOccDBus().setOperationalStatus( 1312 sensorPath, true); 1313 } 1314 } 1315 return; 1316 } 1317 1318 void Manager::setSensorValueToNonFunctional(uint32_t id) const 1319 { 1320 for (const auto& [sensorPath, occId] : existingSensors) 1321 { 1322 if (occId == id) 1323 { 1324 dbus::OccDBusSensors::getOccDBus().setValue( 1325 sensorPath, std::numeric_limits<double>::quiet_NaN()); 1326 1327 dbus::OccDBusSensors::getOccDBus().setOperationalStatus( 1328 sensorPath, false); 1329 } 1330 } 1331 return; 1332 } 1333 1334 void Manager::getSensorValues(std::unique_ptr<Status>& occ) 1335 { 1336 static bool tracedError[8] = {0}; 1337 const fs::path sensorPath = occ->getHwmonPath(); 1338 const uint32_t id = occ->getOccInstanceID(); 1339 1340 if (fs::exists(sensorPath)) 1341 { 1342 // Read temperature sensors 1343 readTempSensors(sensorPath, id); 1344 1345 if (occ->isMasterOcc()) 1346 { 1347 // Read power sensors 1348 readPowerSensors(sensorPath, id); 1349 } 1350 tracedError[id] = false; 1351 } 1352 else 1353 { 1354 if (!tracedError[id]) 1355 { 1356 log<level::ERR>( 1357 std::format( 1358 "Manager::getSensorValues: OCC{} sensor path missing: {}", 1359 id, sensorPath.c_str()) 1360 .c_str()); 1361 tracedError[id] = true; 1362 } 1363 } 1364 1365 return; 1366 } 1367 #endif 1368 1369 // Read the altitude from DBus 1370 void Manager::readAltitude() 1371 { 1372 static bool traceAltitudeErr = true; 1373 1374 utils::PropertyValue altitudeProperty{}; 1375 try 1376 { 1377 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE, 1378 ALTITUDE_PROP); 1379 auto sensorVal = std::get<double>(altitudeProperty); 1380 if (sensorVal < 0xFFFF) 1381 { 1382 if (sensorVal < 0) 1383 { 1384 altitude = 0; 1385 } 1386 else 1387 { 1388 // Round to nearest meter 1389 altitude = uint16_t(sensorVal + 0.5); 1390 } 1391 log<level::DEBUG>(std::format("readAltitude: sensor={} ({}m)", 1392 sensorVal, altitude) 1393 .c_str()); 1394 traceAltitudeErr = true; 1395 } 1396 else 1397 { 1398 if (traceAltitudeErr) 1399 { 1400 traceAltitudeErr = false; 1401 log<level::DEBUG>( 1402 std::format("Invalid altitude value: {}", sensorVal) 1403 .c_str()); 1404 } 1405 } 1406 } 1407 catch (const sdbusplus::exception_t& e) 1408 { 1409 if (traceAltitudeErr) 1410 { 1411 traceAltitudeErr = false; 1412 log<level::INFO>( 1413 std::format("Unable to read Altitude: {}", e.what()).c_str()); 1414 } 1415 altitude = 0xFFFF; // not available 1416 } 1417 } 1418 1419 // Callback function when ambient temperature changes 1420 void Manager::ambientCallback(sdbusplus::message_t& msg) 1421 { 1422 double currentTemp = 0; 1423 uint8_t truncatedTemp = 0xFF; 1424 std::string msgSensor; 1425 std::map<std::string, std::variant<double>> msgData; 1426 msg.read(msgSensor, msgData); 1427 1428 auto valPropMap = msgData.find(AMBIENT_PROP); 1429 if (valPropMap == msgData.end()) 1430 { 1431 log<level::DEBUG>("ambientCallback: Unknown ambient property changed"); 1432 return; 1433 } 1434 currentTemp = std::get<double>(valPropMap->second); 1435 if (std::isnan(currentTemp)) 1436 { 1437 truncatedTemp = 0xFF; 1438 } 1439 else 1440 { 1441 if (currentTemp < 0) 1442 { 1443 truncatedTemp = 0; 1444 } 1445 else 1446 { 1447 // Round to nearest degree C 1448 truncatedTemp = uint8_t(currentTemp + 0.5); 1449 } 1450 } 1451 1452 // If ambient changes, notify OCCs 1453 if (truncatedTemp != ambient) 1454 { 1455 log<level::DEBUG>( 1456 std::format("ambientCallback: Ambient change from {} to {}C", 1457 ambient, currentTemp) 1458 .c_str()); 1459 1460 ambient = truncatedTemp; 1461 if (altitude == 0xFFFF) 1462 { 1463 // No altitude yet, try reading again 1464 readAltitude(); 1465 } 1466 1467 log<level::DEBUG>( 1468 std::format("ambientCallback: Ambient: {}C, altitude: {}m", ambient, 1469 altitude) 1470 .c_str()); 1471 #ifdef POWER10 1472 // Send ambient and altitude to all OCCs 1473 for (auto& obj : statusObjects) 1474 { 1475 if (obj->occActive()) 1476 { 1477 obj->sendAmbient(ambient, altitude); 1478 } 1479 } 1480 #endif // POWER10 1481 } 1482 } 1483 1484 // return the current ambient and altitude readings 1485 void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp, 1486 uint16_t& altitudeValue) const 1487 { 1488 ambientValid = true; 1489 ambientTemp = ambient; 1490 altitudeValue = altitude; 1491 1492 if (ambient == 0xFF) 1493 { 1494 ambientValid = false; 1495 } 1496 } 1497 1498 #ifdef POWER10 1499 // Called when waitForAllOccsTimer expires 1500 // After the first OCC goes active, this timer will be started (60 seconds) 1501 void Manager::occsNotAllRunning() 1502 { 1503 if (resetInProgress) 1504 { 1505 log<level::WARNING>( 1506 "occsNotAllRunning: Ignoring waitForAllOccsTimer because reset is in progress"); 1507 return; 1508 } 1509 if (activeCount != statusObjects.size()) 1510 { 1511 // Not all OCCs went active 1512 log<level::WARNING>( 1513 std::format( 1514 "occsNotAllRunning: Active OCC count ({}) does not match expected count ({})", 1515 activeCount, statusObjects.size()) 1516 .c_str()); 1517 // Procs may be garded, so may be expected 1518 } 1519 1520 if (resetRequired) 1521 { 1522 initiateOccRequest(resetInstance); 1523 1524 if (!waitForAllOccsTimer->isEnabled()) 1525 { 1526 log<level::WARNING>( 1527 "occsNotAllRunning: Restarting waitForAllOccTimer"); 1528 // restart occ wait timer 1529 waitForAllOccsTimer->restartOnce(60s); 1530 } 1531 } 1532 else 1533 { 1534 validateOccMaster(); 1535 } 1536 } 1537 1538 #ifdef PLDM 1539 // Called when throttlePldmTraceTimer expires. 1540 // If this timer expires, that indicates there are no OCC active sensor PDRs 1541 // found which will trigger pldm traces to be throttled. 1542 // The second time this timer expires, a PEL will get created. 1543 void Manager::throttlePldmTraceExpired() 1544 { 1545 if (utils::isHostRunning()) 1546 { 1547 if (!onPldmTimeoutCreatePel) 1548 { 1549 // Throttle traces 1550 pldmHandle->setTraceThrottle(true); 1551 // Restart timer to log a PEL when timer expires 1552 onPldmTimeoutCreatePel = true; 1553 throttlePldmTraceTimer->restartOnce(40min); 1554 } 1555 else 1556 { 1557 log<level::ERR>( 1558 "throttlePldmTraceExpired(): OCC active sensors still not available!"); 1559 // Create PEL 1560 createPldmSensorPEL(); 1561 } 1562 } 1563 else 1564 { 1565 // Make sure traces are not throttled 1566 pldmHandle->setTraceThrottle(false); 1567 log<level::INFO>( 1568 "throttlePldmTraceExpired(): host it not running ignoring sensor timer"); 1569 } 1570 } 1571 1572 void Manager::createPldmSensorPEL() 1573 { 1574 Error::Descriptor d = Error::Descriptor(MISSING_OCC_SENSORS_PATH); 1575 std::map<std::string, std::string> additionalData; 1576 1577 additionalData.emplace("_PID", std::to_string(getpid())); 1578 1579 log<level::INFO>( 1580 std::format( 1581 "createPldmSensorPEL(): Unable to find PLDM sensors for the OCCs") 1582 .c_str()); 1583 1584 auto& bus = utils::getBus(); 1585 1586 try 1587 { 1588 FFDCFiles ffdc; 1589 // Add occ-control journal traces to PEL FFDC 1590 auto occJournalFile = 1591 FFDC::addJournalEntries(ffdc, "openpower-occ-control", 40); 1592 1593 static constexpr auto loggingObjectPath = 1594 "/xyz/openbmc_project/logging"; 1595 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL"; 1596 std::string service = 1597 utils::getService(loggingObjectPath, opLoggingInterface); 1598 auto method = 1599 bus.new_method_call(service.c_str(), loggingObjectPath, 1600 opLoggingInterface, "CreatePELWithFFDCFiles"); 1601 1602 // Set level to Warning (Predictive). 1603 auto level = 1604 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage( 1605 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level:: 1606 Warning); 1607 1608 method.append(d.path, level, additionalData, ffdc); 1609 bus.call(method); 1610 } 1611 catch (const sdbusplus::exception_t& e) 1612 { 1613 log<level::ERR>( 1614 std::format("Failed to create MISSING_OCC_SENSORS PEL: {}", 1615 e.what()) 1616 .c_str()); 1617 } 1618 } 1619 #endif // PLDM 1620 #endif // POWER10 1621 1622 // Verify single master OCC and start presence monitor 1623 void Manager::validateOccMaster() 1624 { 1625 int masterInstance = -1; 1626 for (auto& obj : statusObjects) 1627 { 1628 auto instance = obj->getOccInstanceID(); 1629 #ifdef POWER10 1630 if (!obj->occActive()) 1631 { 1632 if (utils::isHostRunning()) 1633 { 1634 // Check if sensor was queued while waiting for discovery 1635 auto match = queuedActiveState.find(instance); 1636 if (match != queuedActiveState.end()) 1637 { 1638 queuedActiveState.erase(match); 1639 log<level::INFO>( 1640 std::format( 1641 "validateOccMaster: OCC{} is ACTIVE (queued)", 1642 instance) 1643 .c_str()); 1644 obj->occActive(true); 1645 } 1646 else 1647 { 1648 // OCC does not appear to be active yet, check active sensor 1649 #ifdef PLDM 1650 pldmHandle->checkActiveSensor(instance); 1651 #endif 1652 if (obj->occActive()) 1653 { 1654 log<level::INFO>( 1655 std::format( 1656 "validateOccMaster: OCC{} is ACTIVE after reading sensor", 1657 instance) 1658 .c_str()); 1659 } 1660 } 1661 } 1662 else 1663 { 1664 log<level::WARNING>( 1665 std::format( 1666 "validateOccMaster: HOST is not running (OCC{})", 1667 instance) 1668 .c_str()); 1669 return; 1670 } 1671 } 1672 #endif // POWER10 1673 1674 if (obj->isMasterOcc()) 1675 { 1676 obj->addPresenceWatchMaster(); 1677 1678 if (masterInstance == -1) 1679 { 1680 masterInstance = instance; 1681 } 1682 else 1683 { 1684 log<level::ERR>( 1685 std::format( 1686 "validateOccMaster: Multiple OCC masters! ({} and {})", 1687 masterInstance, instance) 1688 .c_str()); 1689 // request reset 1690 obj->deviceError(Error::Descriptor(PRESENCE_ERROR_PATH)); 1691 } 1692 } 1693 } 1694 1695 if (masterInstance < 0) 1696 { 1697 log<level::ERR>( 1698 std::format("validateOccMaster: Master OCC not found! (of {} OCCs)", 1699 statusObjects.size()) 1700 .c_str()); 1701 // request reset 1702 statusObjects.front()->deviceError( 1703 Error::Descriptor(PRESENCE_ERROR_PATH)); 1704 } 1705 else 1706 { 1707 log<level::INFO>( 1708 std::format("validateOccMaster: OCC{} is master of {} OCCs", 1709 masterInstance, activeCount) 1710 .c_str()); 1711 #ifdef POWER10 1712 pmode->updateDbusSafeMode(false); 1713 #endif 1714 } 1715 } 1716 1717 void Manager::updatePcapBounds() const 1718 { 1719 if (pcap) 1720 { 1721 pcap->updatePcapBounds(); 1722 } 1723 } 1724 1725 } // namespace occ 1726 } // namespace open_power 1727