1 #include "config.h" 2 3 #include "occ_manager.hpp" 4 5 #include "i2c_occ.hpp" 6 #include "occ_dbus.hpp" 7 #include "occ_errors.hpp" 8 #include "utils.hpp" 9 10 #include <phosphor-logging/elog-errors.hpp> 11 #include <phosphor-logging/lg2.hpp> 12 #include <xyz/openbmc_project/Common/error.hpp> 13 14 #include <chrono> 15 #include <cmath> 16 #include <filesystem> 17 #include <fstream> 18 #include <regex> 19 20 namespace open_power 21 { 22 namespace occ 23 { 24 25 constexpr uint32_t fruTypeNotAvailable = 0xFF; 26 constexpr auto fruTypeSuffix = "fru_type"; 27 constexpr auto faultSuffix = "fault"; 28 constexpr auto inputSuffix = "input"; 29 constexpr auto maxSuffix = "max"; 30 31 const auto HOST_ON_FILE = "/run/openbmc/host@0-on"; 32 33 using namespace phosphor::logging; 34 using namespace std::literals::chrono_literals; 35 36 template <typename T> 37 T readFile(const std::string& path) 38 { 39 std::ifstream ifs; 40 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit | 41 std::ifstream::eofbit); 42 T data; 43 44 try 45 { 46 ifs.open(path); 47 ifs >> data; 48 ifs.close(); 49 } 50 catch (const std::exception& e) 51 { 52 auto err = errno; 53 throw std::system_error(err, std::generic_category()); 54 } 55 56 return data; 57 } 58 59 // findAndCreateObjects(): 60 // Takes care of getting the required objects created and 61 // finds the available devices/processors. 62 // (function is called everytime the discoverTimer expires) 63 // - create the PowerMode object to control OCC modes 64 // - create statusObjects for each OCC device found 65 // - waits for OCC Active sensors PDRs to become available 66 // - restart discoverTimer if all data is not available yet 67 void Manager::findAndCreateObjects() 68 { 69 #ifndef POWER10 70 for (auto id = 0; id < MAX_CPUS; ++id) 71 { 72 // Create one occ per cpu 73 auto occ = std::string(OCC_NAME) + std::to_string(id); 74 createObjects(occ); 75 } 76 #else 77 if (!pmode) 78 { 79 // Create the power mode object 80 pmode = std::make_unique<powermode::PowerMode>( 81 *this, powermode::PMODE_PATH, powermode::PIPS_PATH, event); 82 } 83 84 if (!fs::exists(HOST_ON_FILE)) 85 { 86 static bool statusObjCreated = false; 87 if (!statusObjCreated) 88 { 89 // Create the OCCs based on on the /dev/occX devices 90 auto occs = findOCCsInDev(); 91 92 if (occs.empty() || (prevOCCSearch.size() != occs.size())) 93 { 94 // Something changed or no OCCs yet, try again in 10s. 95 // Note on the first pass prevOCCSearch will be empty, 96 // so there will be at least one delay to give things 97 // a chance to settle. 98 prevOCCSearch = occs; 99 100 lg2::info( 101 "Manager::findAndCreateObjects(): Waiting for OCCs (currently {QTY})", 102 "QTY", occs.size()); 103 104 discoverTimer->restartOnce(10s); 105 } 106 else 107 { 108 // All OCCs appear to be available, create status objects 109 110 // createObjects requires OCC0 first. 111 std::sort(occs.begin(), occs.end()); 112 113 lg2::info( 114 "Manager::findAndCreateObjects(): Creating {QTY} OCC Status Objects", 115 "QTY", occs.size()); 116 for (auto id : occs) 117 { 118 createObjects(std::string(OCC_NAME) + std::to_string(id)); 119 } 120 statusObjCreated = true; 121 waitingForAllOccActiveSensors = true; 122 123 // Find/update the processor path associated with each OCC 124 for (auto& obj : statusObjects) 125 { 126 obj->updateProcAssociation(); 127 } 128 } 129 } 130 131 if (statusObjCreated && waitingForAllOccActiveSensors) 132 { 133 static bool tracedHostWait = false; 134 if (utils::isHostRunning()) 135 { 136 if (tracedHostWait) 137 { 138 lg2::info( 139 "Manager::findAndCreateObjects(): Host is running"); 140 tracedHostWait = false; 141 } 142 checkAllActiveSensors(); 143 } 144 else 145 { 146 if (!tracedHostWait) 147 { 148 lg2::info( 149 "Manager::findAndCreateObjects(): Waiting for host to start"); 150 tracedHostWait = true; 151 } 152 discoverTimer->restartOnce(30s); 153 #ifdef PLDM 154 if (throttlePldmTraceTimer->isEnabled()) 155 { 156 // Host is no longer running, disable throttle timer and 157 // make sure traces are not throttled 158 lg2::info("findAndCreateObjects(): disabling sensor timer"); 159 throttlePldmTraceTimer->setEnabled(false); 160 pldmHandle->setTraceThrottle(false); 161 } 162 #endif 163 } 164 } 165 } 166 else 167 { 168 lg2::info( 169 "Manager::findAndCreateObjects(): Waiting for {FILE} to complete...", 170 "FILE", HOST_ON_FILE); 171 discoverTimer->restartOnce(10s); 172 } 173 #endif 174 } 175 176 #ifdef POWER10 177 // Check if all occActive sensors are available 178 void Manager::checkAllActiveSensors() 179 { 180 static bool allActiveSensorAvailable = false; 181 static bool tracedSensorWait = false; 182 static bool waitingForHost = false; 183 184 if (open_power::occ::utils::isHostRunning()) 185 { 186 if (waitingForHost) 187 { 188 waitingForHost = false; 189 lg2::info("checkAllActiveSensors(): Host is now running"); 190 } 191 192 // Start with the assumption that all are available 193 allActiveSensorAvailable = true; 194 for (auto& obj : statusObjects) 195 { 196 if ((!obj->occActive()) && (!obj->getPldmSensorReceived())) 197 { 198 auto instance = obj->getOccInstanceID(); 199 // Check if sensor was queued while waiting for discovery 200 auto match = queuedActiveState.find(instance); 201 if (match != queuedActiveState.end()) 202 { 203 queuedActiveState.erase(match); 204 lg2::info( 205 "checkAllActiveSensors(): OCC{INST} is ACTIVE (queued)", 206 "INST", instance); 207 obj->occActive(true); 208 } 209 else 210 { 211 allActiveSensorAvailable = false; 212 if (!tracedSensorWait) 213 { 214 lg2::info( 215 "checkAllActiveSensors(): Waiting on OCC{INST} Active sensor", 216 "INST", instance); 217 tracedSensorWait = true; 218 #ifdef PLDM 219 // Make sure PLDM traces are not throttled 220 pldmHandle->setTraceThrottle(false); 221 // Start timer to throttle PLDM traces when timer 222 // expires 223 onPldmTimeoutCreatePel = false; 224 throttlePldmTraceTimer->restartOnce(5min); 225 #endif 226 } 227 #ifdef PLDM 228 // Ignore active sensor check if the OCCs are being reset 229 if (!resetInProgress) 230 { 231 pldmHandle->checkActiveSensor(obj->getOccInstanceID()); 232 } 233 #endif 234 break; 235 } 236 } 237 } 238 } 239 else 240 { 241 if (!waitingForHost) 242 { 243 waitingForHost = true; 244 lg2::info("checkAllActiveSensors(): Waiting for host to start"); 245 #ifdef PLDM 246 if (throttlePldmTraceTimer->isEnabled()) 247 { 248 // Host is no longer running, disable throttle timer and 249 // make sure traces are not throttled 250 lg2::info("checkAllActiveSensors(): disabling sensor timer"); 251 throttlePldmTraceTimer->setEnabled(false); 252 pldmHandle->setTraceThrottle(false); 253 } 254 #endif 255 } 256 } 257 258 if (allActiveSensorAvailable) 259 { 260 // All sensors were found, disable the discovery timer 261 if (discoverTimer->isEnabled()) 262 { 263 discoverTimer->setEnabled(false); 264 } 265 #ifdef PLDM 266 if (throttlePldmTraceTimer->isEnabled()) 267 { 268 // Disable throttle timer and make sure traces are not throttled 269 throttlePldmTraceTimer->setEnabled(false); 270 pldmHandle->setTraceThrottle(false); 271 } 272 #endif 273 if (waitingForAllOccActiveSensors) 274 { 275 lg2::info( 276 "checkAllActiveSensors(): OCC Active sensors are available"); 277 waitingForAllOccActiveSensors = false; 278 279 if (resetRequired) 280 { 281 initiateOccRequest(resetInstance); 282 283 if (!waitForAllOccsTimer->isEnabled()) 284 { 285 lg2::warning( 286 "occsNotAllRunning: Restarting waitForAllOccTimer"); 287 // restart occ wait timer to check status after reset 288 // completes 289 waitForAllOccsTimer->restartOnce(60s); 290 } 291 } 292 } 293 queuedActiveState.clear(); 294 tracedSensorWait = false; 295 } 296 else 297 { 298 // Not all sensors were available, so keep waiting 299 if (!tracedSensorWait) 300 { 301 lg2::info( 302 "checkAllActiveSensors(): Waiting for OCC Active sensors to become available"); 303 tracedSensorWait = true; 304 } 305 discoverTimer->restartOnce(10s); 306 } 307 } 308 #endif 309 310 std::vector<int> Manager::findOCCsInDev() 311 { 312 std::vector<int> occs; 313 std::regex expr{R"(occ(\d+)$)"}; 314 315 for (auto& file : fs::directory_iterator("/dev")) 316 { 317 std::smatch match; 318 std::string path{file.path().string()}; 319 if (std::regex_search(path, match, expr)) 320 { 321 auto num = std::stoi(match[1].str()); 322 323 // /dev numbering starts at 1, ours starts at 0. 324 occs.push_back(num - 1); 325 } 326 } 327 328 return occs; 329 } 330 331 int Manager::cpuCreated(sdbusplus::message_t& msg) 332 { 333 namespace fs = std::filesystem; 334 335 sdbusplus::message::object_path o; 336 msg.read(o); 337 fs::path cpuPath(std::string(std::move(o))); 338 339 auto name = cpuPath.filename().string(); 340 auto index = name.find(CPU_NAME); 341 name.replace(index, std::strlen(CPU_NAME), OCC_NAME); 342 343 createObjects(name); 344 345 return 0; 346 } 347 348 void Manager::createObjects(const std::string& occ) 349 { 350 auto path = fs::path(OCC_CONTROL_ROOT) / occ; 351 352 statusObjects.emplace_back(std::make_unique<Status>( 353 event, path.c_str(), *this, 354 #ifdef POWER10 355 pmode, 356 #endif 357 std::bind(std::mem_fn(&Manager::statusCallBack), this, 358 std::placeholders::_1, std::placeholders::_2) 359 #ifdef PLDM 360 , 361 // Callback will set flag indicating reset needs to be done 362 // instead of immediately issuing a reset via PLDM. 363 std::bind(std::mem_fn(&Manager::resetOccRequest), this, 364 std::placeholders::_1) 365 #endif 366 )); 367 368 // Create the power cap monitor object 369 if (!pcap) 370 { 371 pcap = std::make_unique<open_power::occ::powercap::PowerCap>( 372 *statusObjects.back()); 373 } 374 375 if (statusObjects.back()->isMasterOcc()) 376 { 377 lg2::info("Manager::createObjects(): OCC{INST} is the master", "INST", 378 statusObjects.back()->getOccInstanceID()); 379 _pollTimer->setEnabled(false); 380 381 #ifdef POWER10 382 // Set the master OCC on the PowerMode object 383 pmode->setMasterOcc(path); 384 #endif 385 } 386 387 passThroughObjects.emplace_back(std::make_unique<PassThrough>( 388 path.c_str() 389 #ifdef POWER10 390 , 391 pmode 392 #endif 393 )); 394 } 395 396 // If a reset is not already outstanding, set a flag to indicate that a reset is 397 // needed. 398 void Manager::resetOccRequest(instanceID instance) 399 { 400 if (!resetRequired) 401 { 402 resetRequired = true; 403 resetInstance = instance; 404 lg2::error( 405 "resetOccRequest: PM Complex reset was requested due to OCC{INST}", 406 "INST", instance); 407 } 408 else if (instance != resetInstance) 409 { 410 lg2::warning( 411 "resetOccRequest: Ignoring PM Complex reset request for OCC{INST}, because reset already outstanding for OCC{RINST}", 412 "INST", instance, "RINST", resetInstance); 413 } 414 } 415 416 // If a reset has not been started, initiate an OCC reset via PLDM 417 void Manager::initiateOccRequest(instanceID instance) 418 { 419 if (!resetInProgress) 420 { 421 resetInProgress = true; 422 resetInstance = instance; 423 lg2::error( 424 "initiateOccRequest: Initiating PM Complex reset due to OCC{INST}", 425 "INST", instance); 426 #ifdef PLDM 427 pldmHandle->resetOCC(instance); 428 #endif 429 resetRequired = false; 430 } 431 else 432 { 433 lg2::warning( 434 "initiateOccRequest: Ignoring PM Complex reset request for OCC{INST}, because reset already in process for OCC{RINST}", 435 "INST", instance, "RINST", resetInstance); 436 } 437 } 438 439 void Manager::statusCallBack(instanceID instance, bool status) 440 { 441 if (status == true) 442 { 443 if (resetInProgress) 444 { 445 lg2::info( 446 "statusCallBack: Ignoring OCC{INST} activate because a reset has been initiated due to OCC{INST}", 447 "INST", instance, "RINST", resetInstance); 448 return; 449 } 450 451 // OCC went active 452 ++activeCount; 453 454 #ifdef POWER10 455 if (activeCount == 1) 456 { 457 // First OCC went active (allow some time for all OCCs to go active) 458 waitForAllOccsTimer->restartOnce(60s); 459 } 460 #endif 461 462 if (activeCount == statusObjects.size()) 463 { 464 #ifdef POWER10 465 // All OCCs are now running 466 if (waitForAllOccsTimer->isEnabled()) 467 { 468 // stop occ wait timer 469 waitForAllOccsTimer->setEnabled(false); 470 } 471 472 // All OCCs have been found, check if we need a reset 473 if (resetRequired) 474 { 475 initiateOccRequest(resetInstance); 476 477 if (!waitForAllOccsTimer->isEnabled()) 478 { 479 lg2::warning( 480 "occsNotAllRunning: Restarting waitForAllOccTimer"); 481 // restart occ wait timer 482 waitForAllOccsTimer->restartOnce(60s); 483 } 484 } 485 else 486 { 487 // Verify master OCC and start presence monitor 488 validateOccMaster(); 489 } 490 #else 491 // Verify master OCC and start presence monitor 492 validateOccMaster(); 493 #endif 494 } 495 496 // Start poll timer if not already started 497 if (!_pollTimer->isEnabled()) 498 { 499 lg2::info("Manager: OCCs will be polled every {TIME} seconds", 500 "TIME", pollInterval); 501 502 // Send poll and start OCC poll timer 503 pollerTimerExpired(); 504 } 505 } 506 else 507 { 508 // OCC went away 509 if (activeCount > 0) 510 { 511 --activeCount; 512 } 513 else 514 { 515 lg2::info("OCC{INST} disabled, but currently no active OCCs", 516 "INST", instance); 517 } 518 519 if (activeCount == 0) 520 { 521 // No OCCs are running 522 523 if (resetInProgress) 524 { 525 // All OCC active sensors are clear (reset should be in 526 // progress) 527 lg2::info( 528 "statusCallBack: Clearing resetInProgress (activeCount={COUNT}, OCC{INST}, status={STATUS})", 529 "COUNT", activeCount, "INST", instance, "STATUS", status); 530 resetInProgress = false; 531 resetInstance = 255; 532 } 533 534 // Stop OCC poll timer 535 if (_pollTimer->isEnabled()) 536 { 537 lg2::info( 538 "Manager::statusCallBack(): OCCs are not running, stopping poll timer"); 539 _pollTimer->setEnabled(false); 540 } 541 542 #ifdef POWER10 543 // stop wait timer 544 if (waitForAllOccsTimer->isEnabled()) 545 { 546 waitForAllOccsTimer->setEnabled(false); 547 } 548 #endif 549 } 550 else if (resetInProgress) 551 { 552 lg2::info( 553 "statusCallBack: Skipping clear of resetInProgress (activeCount={COUNT}, OCC{INST}, status={STATUS})", 554 "COUNT", activeCount, "INST", instance, "STATUS", status); 555 } 556 #ifdef READ_OCC_SENSORS 557 // Clear OCC sensors 558 setSensorValueToNaN(instance); 559 #endif 560 } 561 562 #ifdef POWER10 563 if (waitingForAllOccActiveSensors) 564 { 565 if (utils::isHostRunning()) 566 { 567 checkAllActiveSensors(); 568 } 569 } 570 #endif 571 } 572 573 #ifdef I2C_OCC 574 void Manager::initStatusObjects() 575 { 576 // Make sure we have a valid path string 577 static_assert(sizeof(DEV_PATH) != 0); 578 579 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH); 580 for (auto& name : deviceNames) 581 { 582 i2c_occ::i2cToDbus(name); 583 name = std::string(OCC_NAME) + '_' + name; 584 auto path = fs::path(OCC_CONTROL_ROOT) / name; 585 statusObjects.emplace_back( 586 std::make_unique<Status>(event, path.c_str(), *this)); 587 } 588 // The first device is master occ 589 pcap = std::make_unique<open_power::occ::powercap::PowerCap>( 590 *statusObjects.front()); 591 #ifdef POWER10 592 pmode = std::make_unique<powermode::PowerMode>(*this, powermode::PMODE_PATH, 593 powermode::PIPS_PATH); 594 // Set the master OCC on the PowerMode object 595 pmode->setMasterOcc(path); 596 #endif 597 } 598 #endif 599 600 #ifdef PLDM 601 void Manager::sbeTimeout(unsigned int instance) 602 { 603 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(), 604 [instance](const auto& obj) { 605 return instance == obj->getOccInstanceID(); 606 }); 607 608 if (obj != statusObjects.end() && (*obj)->occActive()) 609 { 610 lg2::info("SBE timeout, requesting HRESET (OCC{INST})", "INST", 611 instance); 612 613 setSBEState(instance, SBE_STATE_NOT_USABLE); 614 615 pldmHandle->sendHRESET(instance); 616 } 617 } 618 619 bool Manager::updateOCCActive(instanceID instance, bool status) 620 { 621 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(), 622 [instance](const auto& obj) { 623 return instance == obj->getOccInstanceID(); 624 }); 625 626 const bool hostRunning = open_power::occ::utils::isHostRunning(); 627 if (obj != statusObjects.end()) 628 { 629 if (!hostRunning && (status == true)) 630 { 631 lg2::warning( 632 "updateOCCActive: Host is not running yet (OCC{INST} active={STAT}), clearing sensor received", 633 "INST", instance, "STAT", status); 634 (*obj)->setPldmSensorReceived(false); 635 if (!waitingForAllOccActiveSensors) 636 { 637 lg2::info( 638 "updateOCCActive: Waiting for Host and all OCC Active Sensors"); 639 waitingForAllOccActiveSensors = true; 640 } 641 #ifdef POWER10 642 discoverTimer->restartOnce(30s); 643 #endif 644 return false; 645 } 646 else 647 { 648 (*obj)->setPldmSensorReceived(true); 649 return (*obj)->occActive(status); 650 } 651 } 652 else 653 { 654 if (hostRunning) 655 { 656 lg2::warning( 657 "updateOCCActive: No status object to update for OCC{INST} (active={STAT})", 658 "INST", instance, "STAT", status); 659 } 660 else 661 { 662 if (status == true) 663 { 664 lg2::warning( 665 "updateOCCActive: No status objects and Host is not running yet (OCC{INST} active={STAT})", 666 "INST", instance, "STAT", status); 667 } 668 } 669 if (status == true) 670 { 671 // OCC went active 672 queuedActiveState.insert(instance); 673 } 674 else 675 { 676 auto match = queuedActiveState.find(instance); 677 if (match != queuedActiveState.end()) 678 { 679 // OCC was disabled 680 queuedActiveState.erase(match); 681 } 682 } 683 return false; 684 } 685 } 686 687 // Called upon pldm event To set powermode Safe Mode State for system. 688 void Manager::updateOccSafeMode(bool safeMode) 689 { 690 #ifdef POWER10 691 pmode->updateDbusSafeMode(safeMode); 692 #endif 693 // Update the processor throttle status on dbus 694 for (auto& obj : statusObjects) 695 { 696 obj->updateThrottle(safeMode, THROTTLED_SAFE); 697 } 698 } 699 700 void Manager::sbeHRESETResult(instanceID instance, bool success) 701 { 702 if (success) 703 { 704 lg2::info("HRESET succeeded (OCC{INST})", "INST", instance); 705 706 setSBEState(instance, SBE_STATE_BOOTED); 707 708 return; 709 } 710 711 setSBEState(instance, SBE_STATE_FAILED); 712 713 if (sbeCanDump(instance)) 714 { 715 lg2::info("HRESET failed (OCC{INST}), triggering SBE dump", "INST", 716 instance); 717 718 auto& bus = utils::getBus(); 719 uint32_t src6 = instance << 16; 720 uint32_t logId = 721 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout", 722 src6, "SBE command timeout"); 723 724 try 725 { 726 constexpr auto interface = "xyz.openbmc_project.Dump.Create"; 727 constexpr auto function = "CreateDump"; 728 729 std::string service = 730 utils::getService(OP_DUMP_OBJ_PATH, interface); 731 auto method = bus.new_method_call(service.c_str(), OP_DUMP_OBJ_PATH, 732 interface, function); 733 734 std::map<std::string, std::variant<std::string, uint64_t>> 735 createParams{ 736 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId", 737 uint64_t(logId)}, 738 {"com.ibm.Dump.Create.CreateParameters.DumpType", 739 "com.ibm.Dump.Create.DumpType.SBE"}, 740 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId", 741 uint64_t(instance)}, 742 }; 743 744 method.append(createParams); 745 746 auto response = bus.call(method); 747 } 748 catch (const sdbusplus::exception_t& e) 749 { 750 constexpr auto ERROR_DUMP_DISABLED = 751 "xyz.openbmc_project.Dump.Create.Error.Disabled"; 752 if (e.name() == ERROR_DUMP_DISABLED) 753 { 754 lg2::info("Dump is disabled, skipping"); 755 } 756 else 757 { 758 lg2::error("Dump failed"); 759 } 760 } 761 } 762 763 // SBE Reset failed, try PM Complex reset 764 lg2::error("sbeHRESETResult: Forcing PM Complex reset"); 765 resetOccRequest(instance); 766 } 767 768 bool Manager::sbeCanDump(unsigned int instance) 769 { 770 struct pdbg_target* proc = getPdbgTarget(instance); 771 772 if (!proc) 773 { 774 // allow the dump in the error case 775 return true; 776 } 777 778 try 779 { 780 if (!openpower::phal::sbe::isDumpAllowed(proc)) 781 { 782 return false; 783 } 784 785 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc)) 786 { 787 return false; 788 } 789 } 790 catch (openpower::phal::exception::SbeError& e) 791 { 792 lg2::info("Failed to query SBE state"); 793 } 794 795 // allow the dump in the error case 796 return true; 797 } 798 799 void Manager::setSBEState(unsigned int instance, enum sbe_state state) 800 { 801 struct pdbg_target* proc = getPdbgTarget(instance); 802 803 if (!proc) 804 { 805 return; 806 } 807 808 try 809 { 810 openpower::phal::sbe::setState(proc, state); 811 } 812 catch (const openpower::phal::exception::SbeError& e) 813 { 814 lg2::error("Failed to set SBE state: {ERROR}", "ERROR", e.what()); 815 } 816 } 817 818 struct pdbg_target* Manager::getPdbgTarget(unsigned int instance) 819 { 820 if (!pdbgInitialized) 821 { 822 try 823 { 824 openpower::phal::pdbg::init(); 825 pdbgInitialized = true; 826 } 827 catch (const openpower::phal::exception::PdbgError& e) 828 { 829 lg2::error("pdbg initialization failed"); 830 return nullptr; 831 } 832 } 833 834 struct pdbg_target* proc = nullptr; 835 pdbg_for_each_class_target("proc", proc) 836 { 837 if (pdbg_target_index(proc) == instance) 838 { 839 return proc; 840 } 841 } 842 843 lg2::error("Failed to get pdbg target"); 844 return nullptr; 845 } 846 #endif 847 848 void Manager::pollerTimerExpired() 849 { 850 if (!_pollTimer) 851 { 852 lg2::error("pollerTimerExpired() ERROR: Timer not defined"); 853 return; 854 } 855 856 #ifdef POWER10 857 if (resetRequired) 858 { 859 lg2::error("pollerTimerExpired() - Initiating PM Complex reset"); 860 initiateOccRequest(resetInstance); 861 862 if (!waitForAllOccsTimer->isEnabled()) 863 { 864 lg2::warning("pollerTimerExpired: Restarting waitForAllOccTimer"); 865 // restart occ wait timer 866 waitForAllOccsTimer->restartOnce(60s); 867 } 868 return; 869 } 870 #endif 871 872 for (auto& obj : statusObjects) 873 { 874 if (!obj->occActive()) 875 { 876 // OCC is not running yet 877 #ifdef READ_OCC_SENSORS 878 auto id = obj->getOccInstanceID(); 879 setSensorValueToNaN(id); 880 #endif 881 continue; 882 } 883 884 // Read sysfs to force kernel to poll OCC 885 obj->readOccState(); 886 887 #ifdef READ_OCC_SENSORS 888 // Read occ sensor values 889 getSensorValues(obj); 890 #endif 891 } 892 893 if (activeCount > 0) 894 { 895 // Restart OCC poll timer 896 _pollTimer->restartOnce(std::chrono::seconds(pollInterval)); 897 } 898 else 899 { 900 // No OCCs running, so poll timer will not be restarted 901 lg2::info( 902 "Manager::pollerTimerExpired: poll timer will not be restarted"); 903 } 904 } 905 906 #ifdef READ_OCC_SENSORS 907 void Manager::readTempSensors(const fs::path& path, uint32_t occInstance) 908 { 909 // There may be more than one sensor with the same FRU type 910 // and label so make two passes: the first to read the temps 911 // from sysfs, and the second to put them on D-Bus after 912 // resolving any conflicts. 913 std::map<std::string, double> sensorData; 914 915 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label 916 for (auto& file : fs::directory_iterator(path)) 917 { 918 if (!std::regex_search(file.path().string(), expr)) 919 { 920 continue; 921 } 922 923 uint32_t labelValue{0}; 924 925 try 926 { 927 labelValue = readFile<uint32_t>(file.path()); 928 } 929 catch (const std::system_error& e) 930 { 931 lg2::debug( 932 "readTempSensors: Failed reading {PATH}, errno = {ERROR}", 933 "PATH", file.path().string(), "ERROR", e.code().value()); 934 continue; 935 } 936 937 const std::string& tempLabel = "label"; 938 const std::string filePathString = file.path().string().substr( 939 0, file.path().string().length() - tempLabel.length()); 940 941 uint32_t fruTypeValue{0}; 942 try 943 { 944 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix); 945 } 946 catch (const std::system_error& e) 947 { 948 lg2::debug( 949 "readTempSensors: Failed reading {PATH}, errno = {ERROR}", 950 "PATH", filePathString + fruTypeSuffix, "ERROR", 951 e.code().value()); 952 continue; 953 } 954 955 std::string sensorPath = 956 OCC_SENSORS_ROOT + std::string("/temperature/"); 957 958 std::string dvfsTempPath; 959 960 if (fruTypeValue == VRMVdd) 961 { 962 sensorPath.append( 963 "vrm_vdd" + std::to_string(occInstance) + "_temp"); 964 } 965 else if (fruTypeValue == processorIoRing) 966 { 967 sensorPath.append( 968 "proc" + std::to_string(occInstance) + "_ioring_temp"); 969 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" + 970 std::to_string(occInstance) + "_ioring_dvfs_temp"; 971 } 972 else 973 { 974 uint16_t type = (labelValue & 0xFF000000) >> 24; 975 uint16_t instanceID = labelValue & 0x0000FFFF; 976 977 if (type == OCC_DIMM_TEMP_SENSOR_TYPE) 978 { 979 if (fruTypeValue == fruTypeNotAvailable) 980 { 981 // Not all DIMM related temps are available to read 982 // (no _input file in this case) 983 continue; 984 } 985 auto iter = dimmTempSensorName.find(fruTypeValue); 986 if (iter == dimmTempSensorName.end()) 987 { 988 lg2::error( 989 "readTempSensors: Fru type error! fruTypeValue = {FRU}) ", 990 "FRU", fruTypeValue); 991 continue; 992 } 993 994 sensorPath.append( 995 "dimm" + std::to_string(instanceID) + iter->second); 996 997 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/" + 998 dimmDVFSSensorName.at(fruTypeValue); 999 } 1000 else if (type == OCC_CPU_TEMP_SENSOR_TYPE) 1001 { 1002 if (fruTypeValue == processorCore) 1003 { 1004 // The OCC reports small core temps, of which there are 1005 // two per big core. All current P10 systems are in big 1006 // core mode, so use a big core name. 1007 uint16_t coreNum = instanceID / 2; 1008 uint16_t tempNum = instanceID % 2; 1009 sensorPath.append("proc" + std::to_string(occInstance) + 1010 "_core" + std::to_string(coreNum) + "_" + 1011 std::to_string(tempNum) + "_temp"); 1012 1013 dvfsTempPath = 1014 std::string{OCC_SENSORS_ROOT} + "/temperature/proc" + 1015 std::to_string(occInstance) + "_core_dvfs_temp"; 1016 } 1017 else 1018 { 1019 continue; 1020 } 1021 } 1022 else 1023 { 1024 continue; 1025 } 1026 } 1027 1028 // The dvfs temp file only needs to be read once per chip per type. 1029 if (!dvfsTempPath.empty() && 1030 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath)) 1031 { 1032 try 1033 { 1034 auto dvfsValue = readFile<double>(filePathString + maxSuffix); 1035 1036 dbus::OccDBusSensors::getOccDBus().setDvfsTemp( 1037 dvfsTempPath, dvfsValue * std::pow(10, -3)); 1038 } 1039 catch (const std::system_error& e) 1040 { 1041 lg2::debug( 1042 "readTempSensors: Failed reading {PATH}, errno = {ERROR}", 1043 "PATH", filePathString + maxSuffix, "ERROR", 1044 e.code().value()); 1045 } 1046 } 1047 1048 uint32_t faultValue{0}; 1049 try 1050 { 1051 faultValue = readFile<uint32_t>(filePathString + faultSuffix); 1052 } 1053 catch (const std::system_error& e) 1054 { 1055 lg2::debug( 1056 "readTempSensors: Failed reading {PATH}, errno = {ERROR}", 1057 "PATH", filePathString + faultSuffix, "ERROR", 1058 e.code().value()); 1059 continue; 1060 } 1061 1062 double tempValue{0}; 1063 // NOTE: if OCC sends back 0xFF, kernal sets this fault value to 1. 1064 if (faultValue != 0) 1065 { 1066 tempValue = std::numeric_limits<double>::quiet_NaN(); 1067 } 1068 else 1069 { 1070 // Read the temperature 1071 try 1072 { 1073 tempValue = readFile<double>(filePathString + inputSuffix); 1074 } 1075 catch (const std::system_error& e) 1076 { 1077 lg2::debug( 1078 "readTempSensors: Failed reading {PATH}, errno = {ERROR}", 1079 "PATH", filePathString + inputSuffix, "ERROR", 1080 e.code().value()); 1081 1082 // if errno == EAGAIN(Resource temporarily unavailable) then set 1083 // temp to 0, to avoid using old temp, and affecting FAN 1084 // Control. 1085 if (e.code().value() == EAGAIN) 1086 { 1087 tempValue = 0; 1088 } 1089 // else the errno would be something like 1090 // EBADF(Bad file descriptor) 1091 // or ENOENT(No such file or directory) 1092 else 1093 { 1094 continue; 1095 } 1096 } 1097 } 1098 1099 // If this object path already has a value, only overwite 1100 // it if the previous one was an NaN or a smaller value. 1101 auto existing = sensorData.find(sensorPath); 1102 if (existing != sensorData.end()) 1103 { 1104 // Multiple sensors found for this FRU type 1105 if ((std::isnan(existing->second) && (tempValue == 0)) || 1106 ((existing->second == 0) && std::isnan(tempValue))) 1107 { 1108 // One of the redundant sensors has failed (0xFF/nan), and the 1109 // other sensor has no reading (0), so set the FRU to NaN to 1110 // force fan increase 1111 tempValue = std::numeric_limits<double>::quiet_NaN(); 1112 existing->second = tempValue; 1113 } 1114 if (std::isnan(existing->second) || (tempValue > existing->second)) 1115 { 1116 existing->second = tempValue; 1117 } 1118 } 1119 else 1120 { 1121 // First sensor for this FRU type 1122 sensorData[sensorPath] = tempValue; 1123 } 1124 } 1125 1126 // Now publish the values on D-Bus. 1127 for (const auto& [objectPath, value] : sensorData) 1128 { 1129 dbus::OccDBusSensors::getOccDBus().setValue(objectPath, 1130 value * std::pow(10, -3)); 1131 1132 dbus::OccDBusSensors::getOccDBus().setOperationalStatus( 1133 objectPath, !std::isnan(value)); 1134 1135 if (existingSensors.find(objectPath) == existingSensors.end()) 1136 { 1137 dbus::OccDBusSensors::getOccDBus().setChassisAssociation( 1138 objectPath, {"all_sensors"}); 1139 } 1140 1141 existingSensors[objectPath] = occInstance; 1142 } 1143 } 1144 1145 std::optional<std::string> 1146 Manager::getPowerLabelFunctionID(const std::string& value) 1147 { 1148 // If the value is "system", then the FunctionID is "system". 1149 if (value == "system") 1150 { 1151 return value; 1152 } 1153 1154 // If the value is not "system", then the label value have 3 numbers, of 1155 // which we only care about the middle one: 1156 // <sensor id>_<function id>_<apss channel> 1157 // eg: The value is "0_10_5" , then the FunctionID is "10". 1158 if (value.find("_") == std::string::npos) 1159 { 1160 return std::nullopt; 1161 } 1162 1163 auto powerLabelValue = value.substr((value.find("_") + 1)); 1164 1165 if (powerLabelValue.find("_") == std::string::npos) 1166 { 1167 return std::nullopt; 1168 } 1169 1170 return powerLabelValue.substr(0, powerLabelValue.find("_")); 1171 } 1172 1173 void Manager::readPowerSensors(const fs::path& path, uint32_t id) 1174 { 1175 std::regex expr{"power\\d+_label$"}; // Example: power5_label 1176 for (auto& file : fs::directory_iterator(path)) 1177 { 1178 if (!std::regex_search(file.path().string(), expr)) 1179 { 1180 continue; 1181 } 1182 1183 std::string labelValue; 1184 try 1185 { 1186 labelValue = readFile<std::string>(file.path()); 1187 } 1188 catch (const std::system_error& e) 1189 { 1190 lg2::debug( 1191 "readPowerSensors: Failed reading {PATH}, errno = {ERROR}", 1192 "PATH", file.path().string(), "ERROR", e.code().value()); 1193 continue; 1194 } 1195 1196 auto functionID = getPowerLabelFunctionID(labelValue); 1197 if (functionID == std::nullopt) 1198 { 1199 continue; 1200 } 1201 1202 const std::string& tempLabel = "label"; 1203 const std::string filePathString = file.path().string().substr( 1204 0, file.path().string().length() - tempLabel.length()); 1205 1206 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/"); 1207 1208 auto iter = powerSensorName.find(*functionID); 1209 if (iter == powerSensorName.end()) 1210 { 1211 continue; 1212 } 1213 sensorPath.append(iter->second); 1214 1215 double tempValue{0}; 1216 1217 try 1218 { 1219 tempValue = readFile<double>(filePathString + inputSuffix); 1220 } 1221 catch (const std::system_error& e) 1222 { 1223 lg2::debug( 1224 "readPowerSensors: Failed reading {PATH}, errno = {ERROR}", 1225 "PATH", filePathString + inputSuffix, "ERROR", 1226 e.code().value()); 1227 continue; 1228 } 1229 1230 dbus::OccDBusSensors::getOccDBus().setUnit( 1231 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts"); 1232 1233 dbus::OccDBusSensors::getOccDBus().setValue( 1234 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3)); 1235 1236 dbus::OccDBusSensors::getOccDBus().setOperationalStatus( 1237 sensorPath, true); 1238 1239 if (existingSensors.find(sensorPath) == existingSensors.end()) 1240 { 1241 std::vector<int> occs; 1242 std::vector<std::string> fTypeList = {"all_sensors"}; 1243 if (iter->second == "total_power") 1244 { 1245 // Total system power has its own chassis association 1246 fTypeList.push_back("total_power"); 1247 } 1248 dbus::OccDBusSensors::getOccDBus().setChassisAssociation( 1249 sensorPath, fTypeList); 1250 } 1251 1252 existingSensors[sensorPath] = id; 1253 } 1254 return; 1255 } 1256 1257 void Manager::setSensorValueToNaN(uint32_t id) const 1258 { 1259 for (const auto& [sensorPath, occId] : existingSensors) 1260 { 1261 if (occId == id) 1262 { 1263 dbus::OccDBusSensors::getOccDBus().setValue( 1264 sensorPath, std::numeric_limits<double>::quiet_NaN()); 1265 1266 dbus::OccDBusSensors::getOccDBus().setOperationalStatus( 1267 sensorPath, true); 1268 } 1269 } 1270 return; 1271 } 1272 1273 void Manager::setSensorValueToNonFunctional(uint32_t id) const 1274 { 1275 for (const auto& [sensorPath, occId] : existingSensors) 1276 { 1277 if (occId == id) 1278 { 1279 dbus::OccDBusSensors::getOccDBus().setValue( 1280 sensorPath, std::numeric_limits<double>::quiet_NaN()); 1281 1282 dbus::OccDBusSensors::getOccDBus().setOperationalStatus( 1283 sensorPath, false); 1284 } 1285 } 1286 return; 1287 } 1288 1289 void Manager::getSensorValues(std::unique_ptr<Status>& occ) 1290 { 1291 static bool tracedError[8] = {0}; 1292 const fs::path sensorPath = occ->getHwmonPath(); 1293 const uint32_t id = occ->getOccInstanceID(); 1294 1295 if (fs::exists(sensorPath)) 1296 { 1297 // Read temperature sensors 1298 readTempSensors(sensorPath, id); 1299 1300 if (occ->isMasterOcc()) 1301 { 1302 // Read power sensors 1303 readPowerSensors(sensorPath, id); 1304 } 1305 tracedError[id] = false; 1306 } 1307 else 1308 { 1309 if (!tracedError[id]) 1310 { 1311 lg2::error( 1312 "Manager::getSensorValues: OCC{INST} sensor path missing: {PATH}", 1313 "INST", id, "PATH", sensorPath); 1314 tracedError[id] = true; 1315 } 1316 } 1317 1318 return; 1319 } 1320 #endif 1321 1322 // Read the altitude from DBus 1323 void Manager::readAltitude() 1324 { 1325 static bool traceAltitudeErr = true; 1326 1327 utils::PropertyValue altitudeProperty{}; 1328 try 1329 { 1330 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE, 1331 ALTITUDE_PROP); 1332 auto sensorVal = std::get<double>(altitudeProperty); 1333 if (sensorVal < 0xFFFF) 1334 { 1335 if (sensorVal < 0) 1336 { 1337 altitude = 0; 1338 } 1339 else 1340 { 1341 // Round to nearest meter 1342 altitude = uint16_t(sensorVal + 0.5); 1343 } 1344 lg2::debug("readAltitude: sensor={VALUE} ({ALT}m)", "VALUE", 1345 sensorVal, "ALT", altitude); 1346 traceAltitudeErr = true; 1347 } 1348 else 1349 { 1350 if (traceAltitudeErr) 1351 { 1352 traceAltitudeErr = false; 1353 lg2::debug("Invalid altitude value: {ALT}", "ALT", sensorVal); 1354 } 1355 } 1356 } 1357 catch (const sdbusplus::exception_t& e) 1358 { 1359 if (traceAltitudeErr) 1360 { 1361 traceAltitudeErr = false; 1362 lg2::info("Unable to read Altitude: {ERROR}", "ERROR", e.what()); 1363 } 1364 altitude = 0xFFFF; // not available 1365 } 1366 } 1367 1368 // Callback function when ambient temperature changes 1369 void Manager::ambientCallback(sdbusplus::message_t& msg) 1370 { 1371 double currentTemp = 0; 1372 uint8_t truncatedTemp = 0xFF; 1373 std::string msgSensor; 1374 std::map<std::string, std::variant<double>> msgData; 1375 msg.read(msgSensor, msgData); 1376 1377 auto valPropMap = msgData.find(AMBIENT_PROP); 1378 if (valPropMap == msgData.end()) 1379 { 1380 lg2::debug("ambientCallback: Unknown ambient property changed"); 1381 return; 1382 } 1383 currentTemp = std::get<double>(valPropMap->second); 1384 if (std::isnan(currentTemp)) 1385 { 1386 truncatedTemp = 0xFF; 1387 } 1388 else 1389 { 1390 if (currentTemp < 0) 1391 { 1392 truncatedTemp = 0; 1393 } 1394 else 1395 { 1396 // Round to nearest degree C 1397 truncatedTemp = uint8_t(currentTemp + 0.5); 1398 } 1399 } 1400 1401 // If ambient changes, notify OCCs 1402 if (truncatedTemp != ambient) 1403 { 1404 lg2::debug("ambientCallback: Ambient change from {OLD} to {NEW}C", 1405 "OLD", ambient, "NEW", currentTemp); 1406 1407 ambient = truncatedTemp; 1408 if (altitude == 0xFFFF) 1409 { 1410 // No altitude yet, try reading again 1411 readAltitude(); 1412 } 1413 1414 lg2::debug("ambientCallback: Ambient: {TEMP}C, altitude: {ALT}m", 1415 "TEMP", ambient, "ALT", altitude); 1416 #ifdef POWER10 1417 // Send ambient and altitude to all OCCs 1418 for (auto& obj : statusObjects) 1419 { 1420 if (obj->occActive()) 1421 { 1422 obj->sendAmbient(ambient, altitude); 1423 } 1424 } 1425 #endif // POWER10 1426 } 1427 } 1428 1429 // return the current ambient and altitude readings 1430 void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp, 1431 uint16_t& altitudeValue) const 1432 { 1433 ambientValid = true; 1434 ambientTemp = ambient; 1435 altitudeValue = altitude; 1436 1437 if (ambient == 0xFF) 1438 { 1439 ambientValid = false; 1440 } 1441 } 1442 1443 #ifdef POWER10 1444 // Called when waitForAllOccsTimer expires 1445 // After the first OCC goes active, this timer will be started (60 seconds) 1446 void Manager::occsNotAllRunning() 1447 { 1448 if (resetInProgress) 1449 { 1450 lg2::warning( 1451 "occsNotAllRunning: Ignoring waitForAllOccsTimer because reset is in progress"); 1452 return; 1453 } 1454 if (activeCount != statusObjects.size()) 1455 { 1456 // Not all OCCs went active 1457 lg2::warning( 1458 "occsNotAllRunning: Active OCC count ({COUNT}) does not match expected count ({EXP})", 1459 "COUNT", activeCount, "EXP", statusObjects.size()); 1460 // Procs may be garded, so may be expected 1461 } 1462 1463 if (resetRequired) 1464 { 1465 initiateOccRequest(resetInstance); 1466 1467 if (!waitForAllOccsTimer->isEnabled()) 1468 { 1469 lg2::warning("occsNotAllRunning: Restarting waitForAllOccTimer"); 1470 // restart occ wait timer 1471 waitForAllOccsTimer->restartOnce(60s); 1472 } 1473 } 1474 else 1475 { 1476 validateOccMaster(); 1477 } 1478 } 1479 1480 #ifdef PLDM 1481 // Called when throttlePldmTraceTimer expires. 1482 // If this timer expires, that indicates there are no OCC active sensor PDRs 1483 // found which will trigger pldm traces to be throttled. 1484 // The second time this timer expires, a PEL will get created. 1485 void Manager::throttlePldmTraceExpired() 1486 { 1487 if (utils::isHostRunning()) 1488 { 1489 if (!onPldmTimeoutCreatePel) 1490 { 1491 // Throttle traces 1492 pldmHandle->setTraceThrottle(true); 1493 // Restart timer to log a PEL when timer expires 1494 onPldmTimeoutCreatePel = true; 1495 throttlePldmTraceTimer->restartOnce(40min); 1496 } 1497 else 1498 { 1499 lg2::error( 1500 "throttlePldmTraceExpired(): OCC active sensors still not available!"); 1501 // Create PEL 1502 createPldmSensorPEL(); 1503 } 1504 } 1505 else 1506 { 1507 // Make sure traces are not throttled 1508 pldmHandle->setTraceThrottle(false); 1509 lg2::info( 1510 "throttlePldmTraceExpired(): host it not running ignoring sensor timer"); 1511 } 1512 } 1513 1514 void Manager::createPldmSensorPEL() 1515 { 1516 Error::Descriptor d = Error::Descriptor(MISSING_OCC_SENSORS_PATH); 1517 std::map<std::string, std::string> additionalData; 1518 1519 additionalData.emplace("_PID", std::to_string(getpid())); 1520 1521 lg2::info( 1522 "createPldmSensorPEL(): Unable to find PLDM sensors for the OCCs"); 1523 1524 auto& bus = utils::getBus(); 1525 1526 try 1527 { 1528 FFDCFiles ffdc; 1529 // Add occ-control journal traces to PEL FFDC 1530 auto occJournalFile = 1531 FFDC::addJournalEntries(ffdc, "openpower-occ-control", 40); 1532 1533 static constexpr auto loggingObjectPath = 1534 "/xyz/openbmc_project/logging"; 1535 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL"; 1536 std::string service = 1537 utils::getService(loggingObjectPath, opLoggingInterface); 1538 auto method = 1539 bus.new_method_call(service.c_str(), loggingObjectPath, 1540 opLoggingInterface, "CreatePELWithFFDCFiles"); 1541 1542 // Set level to Warning (Predictive). 1543 auto level = 1544 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage( 1545 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level:: 1546 Warning); 1547 1548 method.append(d.path, level, additionalData, ffdc); 1549 bus.call(method); 1550 } 1551 catch (const sdbusplus::exception_t& e) 1552 { 1553 lg2::error("Failed to create MISSING_OCC_SENSORS PEL: {ERROR}", "ERROR", 1554 e.what()); 1555 } 1556 } 1557 #endif // PLDM 1558 #endif // POWER10 1559 1560 // Verify single master OCC and start presence monitor 1561 void Manager::validateOccMaster() 1562 { 1563 int masterInstance = -1; 1564 for (auto& obj : statusObjects) 1565 { 1566 auto instance = obj->getOccInstanceID(); 1567 #ifdef POWER10 1568 if (!obj->occActive()) 1569 { 1570 if (utils::isHostRunning()) 1571 { 1572 // Check if sensor was queued while waiting for discovery 1573 auto match = queuedActiveState.find(instance); 1574 if (match != queuedActiveState.end()) 1575 { 1576 queuedActiveState.erase(match); 1577 lg2::info("validateOccMaster: OCC{INST} is ACTIVE (queued)", 1578 "INST", instance); 1579 obj->occActive(true); 1580 } 1581 else 1582 { 1583 // OCC does not appear to be active yet, check active sensor 1584 #ifdef PLDM 1585 pldmHandle->checkActiveSensor(instance); 1586 #endif 1587 if (obj->occActive()) 1588 { 1589 lg2::info( 1590 "validateOccMaster: OCC{INST} is ACTIVE after reading sensor", 1591 "INST", instance); 1592 } 1593 } 1594 } 1595 else 1596 { 1597 lg2::warning( 1598 "validateOccMaster: HOST is not running (OCC{INST})", 1599 "INST", instance); 1600 return; 1601 } 1602 } 1603 #endif // POWER10 1604 1605 if (obj->isMasterOcc()) 1606 { 1607 obj->addPresenceWatchMaster(); 1608 1609 if (masterInstance == -1) 1610 { 1611 masterInstance = instance; 1612 } 1613 else 1614 { 1615 lg2::error( 1616 "validateOccMaster: Multiple OCC masters! ({MAST1} and {MAST2})", 1617 "MAST1", masterInstance, "MAST2", instance); 1618 // request reset 1619 obj->deviceError(Error::Descriptor(PRESENCE_ERROR_PATH)); 1620 } 1621 } 1622 } 1623 1624 if (masterInstance < 0) 1625 { 1626 lg2::error("validateOccMaster: Master OCC not found! (of {NUM} OCCs)", 1627 "NUM", statusObjects.size()); 1628 // request reset 1629 statusObjects.front()->deviceError( 1630 Error::Descriptor(PRESENCE_ERROR_PATH)); 1631 } 1632 else 1633 { 1634 lg2::info("validateOccMaster: OCC{INST} is master of {COUNT} OCCs", 1635 "INST", masterInstance, "COUNT", activeCount); 1636 #ifdef POWER10 1637 pmode->updateDbusSafeMode(false); 1638 #endif 1639 } 1640 } 1641 1642 void Manager::updatePcapBounds() const 1643 { 1644 if (pcap) 1645 { 1646 pcap->updatePcapBounds(); 1647 } 1648 } 1649 1650 } // namespace occ 1651 } // namespace open_power 1652