1 #include "config.h"
2
3 #include "occ_manager.hpp"
4
5 #include "i2c_occ.hpp"
6 #include "occ_dbus.hpp"
7 #include "occ_errors.hpp"
8 #include "utils.hpp"
9
10 #include <phosphor-logging/elog-errors.hpp>
11 #include <phosphor-logging/lg2.hpp>
12 #include <xyz/openbmc_project/Common/error.hpp>
13
14 #include <chrono>
15 #include <cmath>
16 #include <filesystem>
17 #include <fstream>
18 #include <regex>
19
20 namespace open_power
21 {
22 namespace occ
23 {
24
25 constexpr uint32_t fruTypeNotAvailable = 0xFF;
26 constexpr auto fruTypeSuffix = "fru_type";
27 constexpr auto faultSuffix = "fault";
28 constexpr auto inputSuffix = "input";
29 constexpr auto maxSuffix = "max";
30
31 const auto HOST_ON_FILE = "/run/openbmc/host@0-on";
32
33 using namespace phosphor::logging;
34 using namespace std::literals::chrono_literals;
35
36 template <typename T>
readFile(const std::string & path)37 T readFile(const std::string& path)
38 {
39 std::ifstream ifs;
40 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
41 std::ifstream::eofbit);
42 T data;
43
44 try
45 {
46 ifs.open(path);
47 ifs >> data;
48 ifs.close();
49 }
50 catch (const std::exception& e)
51 {
52 auto err = errno;
53 throw std::system_error(err, std::generic_category());
54 }
55
56 return data;
57 }
58
59 // findAndCreateObjects():
60 // Takes care of getting the required objects created and
61 // finds the available devices/processors.
62 // (function is called everytime the discoverTimer expires)
63 // - create the PowerMode object to control OCC modes
64 // - create statusObjects for each OCC device found
65 // - waits for OCC Active sensors PDRs to become available
66 // - restart discoverTimer if all data is not available yet
findAndCreateObjects()67 void Manager::findAndCreateObjects()
68 {
69 #ifndef POWER10
70 for (auto id = 0; id < MAX_CPUS; ++id)
71 {
72 // Create one occ per cpu
73 auto occ = std::string(OCC_NAME) + std::to_string(id);
74 createObjects(occ);
75 }
76 #else
77 if (!pmode)
78 {
79 // Create the power mode object
80 pmode = std::make_unique<powermode::PowerMode>(
81 *this, powermode::PMODE_PATH, powermode::PIPS_PATH, event);
82 }
83
84 if (!fs::exists(HOST_ON_FILE))
85 {
86 static bool statusObjCreated = false;
87 if (!statusObjCreated)
88 {
89 // Create the OCCs based on on the /dev/occX devices
90 auto occs = findOCCsInDev();
91
92 if (occs.empty() || (prevOCCSearch.size() != occs.size()))
93 {
94 // Something changed or no OCCs yet, try again in 10s.
95 // Note on the first pass prevOCCSearch will be empty,
96 // so there will be at least one delay to give things
97 // a chance to settle.
98 prevOCCSearch = occs;
99
100 lg2::info(
101 "Manager::findAndCreateObjects(): Waiting for OCCs (currently {QTY})",
102 "QTY", occs.size());
103
104 discoverTimer->restartOnce(10s);
105 }
106 else
107 {
108 // All OCCs appear to be available, create status objects
109
110 // createObjects requires OCC0 first.
111 std::sort(occs.begin(), occs.end());
112
113 lg2::info(
114 "Manager::findAndCreateObjects(): Creating {QTY} OCC Status Objects",
115 "QTY", occs.size());
116 for (auto id : occs)
117 {
118 createObjects(std::string(OCC_NAME) + std::to_string(id));
119 }
120 statusObjCreated = true;
121 waitingForAllOccActiveSensors = true;
122
123 // Find/update the processor path associated with each OCC
124 for (auto& obj : statusObjects)
125 {
126 obj->updateProcAssociation();
127 }
128 }
129 }
130
131 if (statusObjCreated && waitingForAllOccActiveSensors)
132 {
133 static bool tracedHostWait = false;
134 if (utils::isHostRunning())
135 {
136 if (tracedHostWait)
137 {
138 lg2::info(
139 "Manager::findAndCreateObjects(): Host is running");
140 tracedHostWait = false;
141 }
142 checkAllActiveSensors();
143 }
144 else
145 {
146 if (!tracedHostWait)
147 {
148 lg2::info(
149 "Manager::findAndCreateObjects(): Waiting for host to start");
150 tracedHostWait = true;
151 }
152 discoverTimer->restartOnce(30s);
153 #ifdef PLDM
154 if (throttlePldmTraceTimer->isEnabled())
155 {
156 // Host is no longer running, disable throttle timer and
157 // make sure traces are not throttled
158 lg2::info("findAndCreateObjects(): disabling sensor timer");
159 throttlePldmTraceTimer->setEnabled(false);
160 pldmHandle->setTraceThrottle(false);
161 }
162 #endif
163 }
164 }
165 }
166 else
167 {
168 lg2::info(
169 "Manager::findAndCreateObjects(): Waiting for {FILE} to complete...",
170 "FILE", HOST_ON_FILE);
171 discoverTimer->restartOnce(10s);
172 }
173 #endif
174 }
175
176 #ifdef POWER10
177 // Check if all occActive sensors are available
checkAllActiveSensors()178 void Manager::checkAllActiveSensors()
179 {
180 static bool allActiveSensorAvailable = false;
181 static bool tracedSensorWait = false;
182 static bool waitingForHost = false;
183
184 if (open_power::occ::utils::isHostRunning())
185 {
186 if (waitingForHost)
187 {
188 waitingForHost = false;
189 lg2::info("checkAllActiveSensors(): Host is now running");
190 }
191
192 // Start with the assumption that all are available
193 allActiveSensorAvailable = true;
194 for (auto& obj : statusObjects)
195 {
196 if ((!obj->occActive()) && (!obj->getPldmSensorReceived()))
197 {
198 auto instance = obj->getOccInstanceID();
199 // Check if sensor was queued while waiting for discovery
200 auto match = queuedActiveState.find(instance);
201 if (match != queuedActiveState.end())
202 {
203 queuedActiveState.erase(match);
204 lg2::info(
205 "checkAllActiveSensors(): OCC{INST} is ACTIVE (queued)",
206 "INST", instance);
207 obj->occActive(true);
208 }
209 else
210 {
211 allActiveSensorAvailable = false;
212 if (!tracedSensorWait)
213 {
214 lg2::info(
215 "checkAllActiveSensors(): Waiting on OCC{INST} Active sensor",
216 "INST", instance);
217 tracedSensorWait = true;
218 #ifdef PLDM
219 // Make sure PLDM traces are not throttled
220 pldmHandle->setTraceThrottle(false);
221 // Start timer to throttle PLDM traces when timer
222 // expires
223 onPldmTimeoutCreatePel = false;
224 throttlePldmTraceTimer->restartOnce(5min);
225 #endif
226 }
227 #ifdef PLDM
228 // Ignore active sensor check if the OCCs are being reset
229 if (!resetInProgress)
230 {
231 pldmHandle->checkActiveSensor(obj->getOccInstanceID());
232 }
233 #endif
234 break;
235 }
236 }
237 }
238 }
239 else
240 {
241 if (!waitingForHost)
242 {
243 waitingForHost = true;
244 lg2::info("checkAllActiveSensors(): Waiting for host to start");
245 #ifdef PLDM
246 if (throttlePldmTraceTimer->isEnabled())
247 {
248 // Host is no longer running, disable throttle timer and
249 // make sure traces are not throttled
250 lg2::info("checkAllActiveSensors(): disabling sensor timer");
251 throttlePldmTraceTimer->setEnabled(false);
252 pldmHandle->setTraceThrottle(false);
253 }
254 #endif
255 }
256 }
257
258 if (allActiveSensorAvailable)
259 {
260 // All sensors were found, disable the discovery timer
261 if (discoverTimer->isEnabled())
262 {
263 discoverTimer->setEnabled(false);
264 }
265 #ifdef PLDM
266 if (throttlePldmTraceTimer->isEnabled())
267 {
268 // Disable throttle timer and make sure traces are not throttled
269 throttlePldmTraceTimer->setEnabled(false);
270 pldmHandle->setTraceThrottle(false);
271 }
272 #endif
273 if (waitingForAllOccActiveSensors)
274 {
275 lg2::info(
276 "checkAllActiveSensors(): OCC Active sensors are available");
277 waitingForAllOccActiveSensors = false;
278
279 if (resetRequired)
280 {
281 initiateOccRequest(resetInstance);
282
283 if (!waitForAllOccsTimer->isEnabled())
284 {
285 lg2::warning(
286 "occsNotAllRunning: Restarting waitForAllOccTimer");
287 // restart occ wait timer to check status after reset
288 // completes
289 waitForAllOccsTimer->restartOnce(60s);
290 }
291 }
292 }
293 queuedActiveState.clear();
294 tracedSensorWait = false;
295 }
296 else
297 {
298 // Not all sensors were available, so keep waiting
299 if (!tracedSensorWait)
300 {
301 lg2::info(
302 "checkAllActiveSensors(): Waiting for OCC Active sensors to become available");
303 tracedSensorWait = true;
304 }
305 discoverTimer->restartOnce(10s);
306 }
307 }
308 #endif
309
findOCCsInDev()310 std::vector<int> Manager::findOCCsInDev()
311 {
312 std::vector<int> occs;
313 std::regex expr{R"(occ(\d+)$)"};
314
315 for (auto& file : fs::directory_iterator("/dev"))
316 {
317 std::smatch match;
318 std::string path{file.path().string()};
319 if (std::regex_search(path, match, expr))
320 {
321 auto num = std::stoi(match[1].str());
322
323 // /dev numbering starts at 1, ours starts at 0.
324 occs.push_back(num - 1);
325 }
326 }
327
328 return occs;
329 }
330
cpuCreated(sdbusplus::message_t & msg)331 int Manager::cpuCreated(sdbusplus::message_t& msg)
332 {
333 namespace fs = std::filesystem;
334
335 sdbusplus::message::object_path o;
336 msg.read(o);
337 fs::path cpuPath(std::string(std::move(o)));
338
339 auto name = cpuPath.filename().string();
340 auto index = name.find(CPU_NAME);
341 name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
342
343 createObjects(name);
344
345 return 0;
346 }
347
createObjects(const std::string & occ)348 void Manager::createObjects(const std::string& occ)
349 {
350 auto path = fs::path(OCC_CONTROL_ROOT) / occ;
351
352 statusObjects.emplace_back(std::make_unique<Status>(
353 event, path.c_str(), *this,
354 #ifdef POWER10
355 pmode,
356 #endif
357 std::bind(std::mem_fn(&Manager::statusCallBack), this,
358 std::placeholders::_1, std::placeholders::_2)
359 #ifdef PLDM
360 ,
361 // Callback will set flag indicating reset needs to be done
362 // instead of immediately issuing a reset via PLDM.
363 std::bind(std::mem_fn(&Manager::resetOccRequest), this,
364 std::placeholders::_1)
365 #endif
366 ));
367
368 // Create the power cap monitor object
369 if (!pcap)
370 {
371 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
372 *statusObjects.back());
373 }
374
375 if (statusObjects.back()->isMasterOcc())
376 {
377 lg2::info("Manager::createObjects(): OCC{INST} is the master", "INST",
378 statusObjects.back()->getOccInstanceID());
379 _pollTimer->setEnabled(false);
380
381 #ifdef POWER10
382 // Set the master OCC on the PowerMode object
383 pmode->setMasterOcc(path);
384 #endif
385 }
386
387 passThroughObjects.emplace_back(std::make_unique<PassThrough>(
388 path.c_str()
389 #ifdef POWER10
390 ,
391 pmode
392 #endif
393 ));
394 }
395
396 // If a reset is not already outstanding, set a flag to indicate that a reset is
397 // needed.
resetOccRequest(instanceID instance)398 void Manager::resetOccRequest(instanceID instance)
399 {
400 if (!resetRequired)
401 {
402 resetRequired = true;
403 resetInstance = instance;
404 lg2::error(
405 "resetOccRequest: PM Complex reset was requested due to OCC{INST}",
406 "INST", instance);
407 }
408 else if (instance != resetInstance)
409 {
410 lg2::warning(
411 "resetOccRequest: Ignoring PM Complex reset request for OCC{INST}, because reset already outstanding for OCC{RINST}",
412 "INST", instance, "RINST", resetInstance);
413 }
414 }
415
416 // If a reset has not been started, initiate an OCC reset via PLDM
initiateOccRequest(instanceID instance)417 void Manager::initiateOccRequest(instanceID instance)
418 {
419 if (!resetInProgress)
420 {
421 resetInProgress = true;
422 resetInstance = instance;
423 lg2::error(
424 "initiateOccRequest: Initiating PM Complex reset due to OCC{INST}",
425 "INST", instance);
426 #ifdef PLDM
427 pldmHandle->resetOCC(instance);
428 #endif
429 resetRequired = false;
430 }
431 else
432 {
433 lg2::warning(
434 "initiateOccRequest: Ignoring PM Complex reset request for OCC{INST}, because reset already in process for OCC{RINST}",
435 "INST", instance, "RINST", resetInstance);
436 }
437 }
438
statusCallBack(instanceID instance,bool status)439 void Manager::statusCallBack(instanceID instance, bool status)
440 {
441 if (status == true)
442 {
443 if (resetInProgress)
444 {
445 lg2::info(
446 "statusCallBack: Ignoring OCC{INST} activate because a reset has been initiated due to OCC{INST}",
447 "INST", instance, "RINST", resetInstance);
448 return;
449 }
450
451 // OCC went active
452 ++activeCount;
453
454 #ifdef POWER10
455 if (activeCount == 1)
456 {
457 // First OCC went active (allow some time for all OCCs to go active)
458 waitForAllOccsTimer->restartOnce(60s);
459 }
460 #endif
461
462 if (activeCount == statusObjects.size())
463 {
464 #ifdef POWER10
465 // All OCCs are now running
466 if (waitForAllOccsTimer->isEnabled())
467 {
468 // stop occ wait timer
469 waitForAllOccsTimer->setEnabled(false);
470 }
471
472 // All OCCs have been found, check if we need a reset
473 if (resetRequired)
474 {
475 initiateOccRequest(resetInstance);
476
477 if (!waitForAllOccsTimer->isEnabled())
478 {
479 lg2::warning(
480 "occsNotAllRunning: Restarting waitForAllOccTimer");
481 // restart occ wait timer
482 waitForAllOccsTimer->restartOnce(60s);
483 }
484 }
485 else
486 {
487 // Verify master OCC and start presence monitor
488 validateOccMaster();
489 }
490 #else
491 // Verify master OCC and start presence monitor
492 validateOccMaster();
493 #endif
494 }
495
496 // Start poll timer if not already started
497 if (!_pollTimer->isEnabled())
498 {
499 lg2::info("Manager: OCCs will be polled every {TIME} seconds",
500 "TIME", pollInterval);
501
502 // Send poll and start OCC poll timer
503 pollerTimerExpired();
504 }
505 }
506 else
507 {
508 // OCC went away
509 if (activeCount > 0)
510 {
511 --activeCount;
512 }
513 else
514 {
515 lg2::info("OCC{INST} disabled, but currently no active OCCs",
516 "INST", instance);
517 }
518
519 if (activeCount == 0)
520 {
521 // No OCCs are running
522
523 if (resetInProgress)
524 {
525 // All OCC active sensors are clear (reset should be in
526 // progress)
527 lg2::info(
528 "statusCallBack: Clearing resetInProgress (activeCount={COUNT}, OCC{INST}, status={STATUS})",
529 "COUNT", activeCount, "INST", instance, "STATUS", status);
530 resetInProgress = false;
531 resetInstance = 255;
532 }
533
534 // Stop OCC poll timer
535 if (_pollTimer->isEnabled())
536 {
537 lg2::info(
538 "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
539 _pollTimer->setEnabled(false);
540 }
541
542 #ifdef POWER10
543 // stop wait timer
544 if (waitForAllOccsTimer->isEnabled())
545 {
546 waitForAllOccsTimer->setEnabled(false);
547 }
548 #endif
549 }
550 else if (resetInProgress)
551 {
552 lg2::info(
553 "statusCallBack: Skipping clear of resetInProgress (activeCount={COUNT}, OCC{INST}, status={STATUS})",
554 "COUNT", activeCount, "INST", instance, "STATUS", status);
555 }
556 #ifdef READ_OCC_SENSORS
557 // Clear OCC sensors
558 setSensorValueToNaN(instance);
559 #endif
560 }
561
562 #ifdef POWER10
563 if (waitingForAllOccActiveSensors)
564 {
565 if (utils::isHostRunning())
566 {
567 checkAllActiveSensors();
568 }
569 }
570 #endif
571 }
572
573 #ifdef I2C_OCC
initStatusObjects()574 void Manager::initStatusObjects()
575 {
576 // Make sure we have a valid path string
577 static_assert(sizeof(DEV_PATH) != 0);
578
579 auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
580 for (auto& name : deviceNames)
581 {
582 i2c_occ::i2cToDbus(name);
583 name = std::string(OCC_NAME) + '_' + name;
584 auto path = fs::path(OCC_CONTROL_ROOT) / name;
585 statusObjects.emplace_back(
586 std::make_unique<Status>(event, path.c_str(), *this));
587 }
588 // The first device is master occ
589 pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
590 *statusObjects.front());
591 #ifdef POWER10
592 pmode = std::make_unique<powermode::PowerMode>(*this, powermode::PMODE_PATH,
593 powermode::PIPS_PATH);
594 // Set the master OCC on the PowerMode object
595 pmode->setMasterOcc(path);
596 #endif
597 }
598 #endif
599
600 #ifdef PLDM
sbeTimeout(unsigned int instance)601 void Manager::sbeTimeout(unsigned int instance)
602 {
603 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
604 [instance](const auto& obj) {
605 return instance == obj->getOccInstanceID();
606 });
607
608 if (obj != statusObjects.end() && (*obj)->occActive())
609 {
610 lg2::info("SBE timeout, requesting HRESET (OCC{INST})", "INST",
611 instance);
612
613 setSBEState(instance, SBE_STATE_NOT_USABLE);
614
615 pldmHandle->sendHRESET(instance);
616 }
617 }
618
updateOCCActive(instanceID instance,bool status)619 bool Manager::updateOCCActive(instanceID instance, bool status)
620 {
621 auto obj = std::find_if(statusObjects.begin(), statusObjects.end(),
622 [instance](const auto& obj) {
623 return instance == obj->getOccInstanceID();
624 });
625
626 const bool hostRunning = open_power::occ::utils::isHostRunning();
627 if (obj != statusObjects.end())
628 {
629 if (!hostRunning && (status == true))
630 {
631 lg2::warning(
632 "updateOCCActive: Host is not running yet (OCC{INST} active={STAT}), clearing sensor received",
633 "INST", instance, "STAT", status);
634 (*obj)->setPldmSensorReceived(false);
635 if (!waitingForAllOccActiveSensors)
636 {
637 lg2::info(
638 "updateOCCActive: Waiting for Host and all OCC Active Sensors");
639 waitingForAllOccActiveSensors = true;
640 }
641 #ifdef POWER10
642 discoverTimer->restartOnce(30s);
643 #endif
644 return false;
645 }
646 else
647 {
648 (*obj)->setPldmSensorReceived(true);
649 return (*obj)->occActive(status);
650 }
651 }
652 else
653 {
654 if (hostRunning)
655 {
656 lg2::warning(
657 "updateOCCActive: No status object to update for OCC{INST} (active={STAT})",
658 "INST", instance, "STAT", status);
659 }
660 else
661 {
662 if (status == true)
663 {
664 lg2::warning(
665 "updateOCCActive: No status objects and Host is not running yet (OCC{INST} active={STAT})",
666 "INST", instance, "STAT", status);
667 }
668 }
669 if (status == true)
670 {
671 // OCC went active
672 queuedActiveState.insert(instance);
673 }
674 else
675 {
676 auto match = queuedActiveState.find(instance);
677 if (match != queuedActiveState.end())
678 {
679 // OCC was disabled
680 queuedActiveState.erase(match);
681 }
682 }
683 return false;
684 }
685 }
686
687 // Called upon pldm event To set powermode Safe Mode State for system.
updateOccSafeMode(bool safeMode)688 void Manager::updateOccSafeMode(bool safeMode)
689 {
690 #ifdef POWER10
691 pmode->updateDbusSafeMode(safeMode);
692 #endif
693 // Update the processor throttle status on dbus
694 for (auto& obj : statusObjects)
695 {
696 obj->updateThrottle(safeMode, THROTTLED_SAFE);
697 }
698 }
699
sbeHRESETResult(instanceID instance,bool success)700 void Manager::sbeHRESETResult(instanceID instance, bool success)
701 {
702 if (success)
703 {
704 lg2::info("HRESET succeeded (OCC{INST})", "INST", instance);
705
706 setSBEState(instance, SBE_STATE_BOOTED);
707
708 return;
709 }
710
711 setSBEState(instance, SBE_STATE_FAILED);
712
713 if (sbeCanDump(instance))
714 {
715 lg2::info("HRESET failed (OCC{INST}), triggering SBE dump", "INST",
716 instance);
717
718 auto& bus = utils::getBus();
719 uint32_t src6 = instance << 16;
720 uint32_t logId =
721 FFDC::createPEL("org.open_power.Processor.Error.SbeChipOpTimeout",
722 src6, "SBE command timeout");
723
724 try
725 {
726 constexpr auto interface = "xyz.openbmc_project.Dump.Create";
727 constexpr auto function = "CreateDump";
728
729 std::string service =
730 utils::getService(OP_DUMP_OBJ_PATH, interface);
731 auto method = bus.new_method_call(service.c_str(), OP_DUMP_OBJ_PATH,
732 interface, function);
733
734 std::map<std::string, std::variant<std::string, uint64_t>>
735 createParams{
736 {"com.ibm.Dump.Create.CreateParameters.ErrorLogId",
737 uint64_t(logId)},
738 {"com.ibm.Dump.Create.CreateParameters.DumpType",
739 "com.ibm.Dump.Create.DumpType.SBE"},
740 {"com.ibm.Dump.Create.CreateParameters.FailingUnitId",
741 uint64_t(instance)},
742 };
743
744 method.append(createParams);
745
746 auto response = bus.call(method);
747 }
748 catch (const sdbusplus::exception_t& e)
749 {
750 constexpr auto ERROR_DUMP_DISABLED =
751 "xyz.openbmc_project.Dump.Create.Error.Disabled";
752 if (e.name() == ERROR_DUMP_DISABLED)
753 {
754 lg2::info("Dump is disabled, skipping");
755 }
756 else
757 {
758 lg2::error("Dump failed");
759 }
760 }
761 }
762
763 // SBE Reset failed, try PM Complex reset
764 lg2::error("sbeHRESETResult: Forcing PM Complex reset");
765 resetOccRequest(instance);
766 }
767
sbeCanDump(unsigned int instance)768 bool Manager::sbeCanDump(unsigned int instance)
769 {
770 struct pdbg_target* proc = getPdbgTarget(instance);
771
772 if (!proc)
773 {
774 // allow the dump in the error case
775 return true;
776 }
777
778 try
779 {
780 if (!openpower::phal::sbe::isDumpAllowed(proc))
781 {
782 return false;
783 }
784
785 if (openpower::phal::pdbg::isSbeVitalAttnActive(proc))
786 {
787 return false;
788 }
789 }
790 catch (openpower::phal::exception::SbeError& e)
791 {
792 lg2::info("Failed to query SBE state");
793 }
794
795 // allow the dump in the error case
796 return true;
797 }
798
setSBEState(unsigned int instance,enum sbe_state state)799 void Manager::setSBEState(unsigned int instance, enum sbe_state state)
800 {
801 struct pdbg_target* proc = getPdbgTarget(instance);
802
803 if (!proc)
804 {
805 return;
806 }
807
808 try
809 {
810 openpower::phal::sbe::setState(proc, state);
811 }
812 catch (const openpower::phal::exception::SbeError& e)
813 {
814 lg2::error("Failed to set SBE state: {ERROR}", "ERROR", e.what());
815 }
816 }
817
getPdbgTarget(unsigned int instance)818 struct pdbg_target* Manager::getPdbgTarget(unsigned int instance)
819 {
820 if (!pdbgInitialized)
821 {
822 try
823 {
824 openpower::phal::pdbg::init();
825 pdbgInitialized = true;
826 }
827 catch (const openpower::phal::exception::PdbgError& e)
828 {
829 lg2::error("pdbg initialization failed");
830 return nullptr;
831 }
832 }
833
834 struct pdbg_target* proc = nullptr;
835 pdbg_for_each_class_target("proc", proc)
836 {
837 if (pdbg_target_index(proc) == instance)
838 {
839 return proc;
840 }
841 }
842
843 lg2::error("Failed to get pdbg target");
844 return nullptr;
845 }
846 #endif
847
pollerTimerExpired()848 void Manager::pollerTimerExpired()
849 {
850 if (!_pollTimer)
851 {
852 lg2::error("pollerTimerExpired() ERROR: Timer not defined");
853 return;
854 }
855
856 #ifdef POWER10
857 if (resetRequired)
858 {
859 lg2::error("pollerTimerExpired() - Initiating PM Complex reset");
860 initiateOccRequest(resetInstance);
861
862 if (!waitForAllOccsTimer->isEnabled())
863 {
864 lg2::warning("pollerTimerExpired: Restarting waitForAllOccTimer");
865 // restart occ wait timer
866 waitForAllOccsTimer->restartOnce(60s);
867 }
868 return;
869 }
870 #endif
871
872 for (auto& obj : statusObjects)
873 {
874 if (!obj->occActive())
875 {
876 // OCC is not running yet
877 #ifdef READ_OCC_SENSORS
878 auto id = obj->getOccInstanceID();
879 setSensorValueToNaN(id);
880 #endif
881 continue;
882 }
883
884 // Read sysfs to force kernel to poll OCC
885 obj->readOccState();
886
887 #ifdef READ_OCC_SENSORS
888 // Read occ sensor values
889 getSensorValues(obj);
890 #endif
891 }
892
893 if (activeCount > 0)
894 {
895 // Restart OCC poll timer
896 _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
897 }
898 else
899 {
900 // No OCCs running, so poll timer will not be restarted
901 lg2::info(
902 "Manager::pollerTimerExpired: poll timer will not be restarted");
903 }
904 }
905
906 #ifdef READ_OCC_SENSORS
readTempSensors(const fs::path & path,uint32_t occInstance)907 void Manager::readTempSensors(const fs::path& path, uint32_t occInstance)
908 {
909 // There may be more than one sensor with the same FRU type
910 // and label so make two passes: the first to read the temps
911 // from sysfs, and the second to put them on D-Bus after
912 // resolving any conflicts.
913 std::map<std::string, double> sensorData;
914
915 std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
916 for (auto& file : fs::directory_iterator(path))
917 {
918 if (!std::regex_search(file.path().string(), expr))
919 {
920 continue;
921 }
922
923 uint32_t labelValue{0};
924
925 try
926 {
927 labelValue = readFile<uint32_t>(file.path());
928 }
929 catch (const std::system_error& e)
930 {
931 lg2::debug(
932 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
933 "PATH", file.path().string(), "ERROR", e.code().value());
934 continue;
935 }
936
937 const std::string& tempLabel = "label";
938 const std::string filePathString = file.path().string().substr(
939 0, file.path().string().length() - tempLabel.length());
940
941 uint32_t fruTypeValue{0};
942 try
943 {
944 fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
945 }
946 catch (const std::system_error& e)
947 {
948 lg2::debug(
949 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
950 "PATH", filePathString + fruTypeSuffix, "ERROR",
951 e.code().value());
952 continue;
953 }
954
955 std::string sensorPath =
956 OCC_SENSORS_ROOT + std::string("/temperature/");
957
958 std::string dvfsTempPath;
959
960 if (fruTypeValue == VRMVdd)
961 {
962 sensorPath.append(
963 "vrm_vdd" + std::to_string(occInstance) + "_temp");
964 }
965 else if (fruTypeValue == processorIoRing)
966 {
967 sensorPath.append(
968 "proc" + std::to_string(occInstance) + "_ioring_temp");
969 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
970 std::to_string(occInstance) + "_ioring_dvfs_temp";
971 }
972 else
973 {
974 uint16_t type = (labelValue & 0xFF000000) >> 24;
975 uint16_t instanceID = labelValue & 0x0000FFFF;
976
977 if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
978 {
979 if (fruTypeValue == fruTypeNotAvailable)
980 {
981 // Not all DIMM related temps are available to read
982 // (no _input file in this case)
983 continue;
984 }
985 auto iter = dimmTempSensorName.find(fruTypeValue);
986 if (iter == dimmTempSensorName.end())
987 {
988 lg2::error(
989 "readTempSensors: Fru type error! fruTypeValue = {FRU}) ",
990 "FRU", fruTypeValue);
991 continue;
992 }
993
994 sensorPath.append(
995 "dimm" + std::to_string(instanceID) + iter->second);
996
997 dvfsTempPath = std::string{OCC_SENSORS_ROOT} + "/temperature/" +
998 dimmDVFSSensorName.at(fruTypeValue);
999 }
1000 else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
1001 {
1002 if (fruTypeValue == processorCore)
1003 {
1004 // The OCC reports small core temps, of which there are
1005 // two per big core. All current P10 systems are in big
1006 // core mode, so use a big core name.
1007 uint16_t coreNum = instanceID / 2;
1008 uint16_t tempNum = instanceID % 2;
1009 sensorPath.append("proc" + std::to_string(occInstance) +
1010 "_core" + std::to_string(coreNum) + "_" +
1011 std::to_string(tempNum) + "_temp");
1012
1013 dvfsTempPath =
1014 std::string{OCC_SENSORS_ROOT} + "/temperature/proc" +
1015 std::to_string(occInstance) + "_core_dvfs_temp";
1016 }
1017 else
1018 {
1019 continue;
1020 }
1021 }
1022 else
1023 {
1024 continue;
1025 }
1026 }
1027
1028 // The dvfs temp file only needs to be read once per chip per type.
1029 if (!dvfsTempPath.empty() &&
1030 !dbus::OccDBusSensors::getOccDBus().hasDvfsTemp(dvfsTempPath))
1031 {
1032 try
1033 {
1034 auto dvfsValue = readFile<double>(filePathString + maxSuffix);
1035
1036 dbus::OccDBusSensors::getOccDBus().setDvfsTemp(
1037 dvfsTempPath, dvfsValue * std::pow(10, -3));
1038 }
1039 catch (const std::system_error& e)
1040 {
1041 lg2::debug(
1042 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1043 "PATH", filePathString + maxSuffix, "ERROR",
1044 e.code().value());
1045 }
1046 }
1047
1048 uint32_t faultValue{0};
1049 try
1050 {
1051 faultValue = readFile<uint32_t>(filePathString + faultSuffix);
1052 }
1053 catch (const std::system_error& e)
1054 {
1055 lg2::debug(
1056 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1057 "PATH", filePathString + faultSuffix, "ERROR",
1058 e.code().value());
1059 continue;
1060 }
1061
1062 double tempValue{0};
1063 // NOTE: if OCC sends back 0xFF, kernal sets this fault value to 1.
1064 if (faultValue != 0)
1065 {
1066 tempValue = std::numeric_limits<double>::quiet_NaN();
1067 }
1068 else
1069 {
1070 // Read the temperature
1071 try
1072 {
1073 tempValue = readFile<double>(filePathString + inputSuffix);
1074 }
1075 catch (const std::system_error& e)
1076 {
1077 lg2::debug(
1078 "readTempSensors: Failed reading {PATH}, errno = {ERROR}",
1079 "PATH", filePathString + inputSuffix, "ERROR",
1080 e.code().value());
1081
1082 // if errno == EAGAIN(Resource temporarily unavailable) then set
1083 // temp to 0, to avoid using old temp, and affecting FAN
1084 // Control.
1085 if (e.code().value() == EAGAIN)
1086 {
1087 tempValue = 0;
1088 }
1089 // else the errno would be something like
1090 // EBADF(Bad file descriptor)
1091 // or ENOENT(No such file or directory)
1092 else
1093 {
1094 continue;
1095 }
1096 }
1097 }
1098
1099 // If this object path already has a value, only overwite
1100 // it if the previous one was an NaN or a smaller value.
1101 auto existing = sensorData.find(sensorPath);
1102 if (existing != sensorData.end())
1103 {
1104 // Multiple sensors found for this FRU type
1105 if ((std::isnan(existing->second) && (tempValue == 0)) ||
1106 ((existing->second == 0) && std::isnan(tempValue)))
1107 {
1108 // One of the redundant sensors has failed (0xFF/nan), and the
1109 // other sensor has no reading (0), so set the FRU to NaN to
1110 // force fan increase
1111 tempValue = std::numeric_limits<double>::quiet_NaN();
1112 existing->second = tempValue;
1113 }
1114 if (std::isnan(existing->second) || (tempValue > existing->second))
1115 {
1116 existing->second = tempValue;
1117 }
1118 }
1119 else
1120 {
1121 // First sensor for this FRU type
1122 sensorData[sensorPath] = tempValue;
1123 }
1124 }
1125
1126 // Now publish the values on D-Bus.
1127 for (const auto& [objectPath, value] : sensorData)
1128 {
1129 dbus::OccDBusSensors::getOccDBus().setValue(objectPath,
1130 value * std::pow(10, -3));
1131
1132 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1133 objectPath, !std::isnan(value));
1134
1135 if (existingSensors.find(objectPath) == existingSensors.end())
1136 {
1137 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
1138 objectPath, {"all_sensors"});
1139 }
1140
1141 existingSensors[objectPath] = occInstance;
1142 }
1143 }
1144
1145 std::optional<std::string>
getPowerLabelFunctionID(const std::string & value)1146 Manager::getPowerLabelFunctionID(const std::string& value)
1147 {
1148 // If the value is "system", then the FunctionID is "system".
1149 if (value == "system")
1150 {
1151 return value;
1152 }
1153
1154 // If the value is not "system", then the label value have 3 numbers, of
1155 // which we only care about the middle one:
1156 // <sensor id>_<function id>_<apss channel>
1157 // eg: The value is "0_10_5" , then the FunctionID is "10".
1158 if (value.find("_") == std::string::npos)
1159 {
1160 return std::nullopt;
1161 }
1162
1163 auto powerLabelValue = value.substr((value.find("_") + 1));
1164
1165 if (powerLabelValue.find("_") == std::string::npos)
1166 {
1167 return std::nullopt;
1168 }
1169
1170 return powerLabelValue.substr(0, powerLabelValue.find("_"));
1171 }
1172
readPowerSensors(const fs::path & path,uint32_t id)1173 void Manager::readPowerSensors(const fs::path& path, uint32_t id)
1174 {
1175 std::regex expr{"power\\d+_label$"}; // Example: power5_label
1176 for (auto& file : fs::directory_iterator(path))
1177 {
1178 if (!std::regex_search(file.path().string(), expr))
1179 {
1180 continue;
1181 }
1182
1183 std::string labelValue;
1184 try
1185 {
1186 labelValue = readFile<std::string>(file.path());
1187 }
1188 catch (const std::system_error& e)
1189 {
1190 lg2::debug(
1191 "readPowerSensors: Failed reading {PATH}, errno = {ERROR}",
1192 "PATH", file.path().string(), "ERROR", e.code().value());
1193 continue;
1194 }
1195
1196 auto functionID = getPowerLabelFunctionID(labelValue);
1197 if (functionID == std::nullopt)
1198 {
1199 continue;
1200 }
1201
1202 const std::string& tempLabel = "label";
1203 const std::string filePathString = file.path().string().substr(
1204 0, file.path().string().length() - tempLabel.length());
1205
1206 std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
1207
1208 auto iter = powerSensorName.find(*functionID);
1209 if (iter == powerSensorName.end())
1210 {
1211 continue;
1212 }
1213 sensorPath.append(iter->second);
1214
1215 double tempValue{0};
1216
1217 try
1218 {
1219 tempValue = readFile<double>(filePathString + inputSuffix);
1220 }
1221 catch (const std::system_error& e)
1222 {
1223 lg2::debug(
1224 "readPowerSensors: Failed reading {PATH}, errno = {ERROR}",
1225 "PATH", filePathString + inputSuffix, "ERROR",
1226 e.code().value());
1227 continue;
1228 }
1229
1230 dbus::OccDBusSensors::getOccDBus().setUnit(
1231 sensorPath, "xyz.openbmc_project.Sensor.Value.Unit.Watts");
1232
1233 dbus::OccDBusSensors::getOccDBus().setValue(
1234 sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
1235
1236 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1237 sensorPath, true);
1238
1239 if (existingSensors.find(sensorPath) == existingSensors.end())
1240 {
1241 std::vector<int> occs;
1242 std::vector<std::string> fTypeList = {"all_sensors"};
1243 if (iter->second == "total_power")
1244 {
1245 // Total system power has its own chassis association
1246 fTypeList.push_back("total_power");
1247 }
1248 dbus::OccDBusSensors::getOccDBus().setChassisAssociation(
1249 sensorPath, fTypeList);
1250 }
1251
1252 existingSensors[sensorPath] = id;
1253 }
1254 return;
1255 }
1256
setSensorValueToNaN(uint32_t id) const1257 void Manager::setSensorValueToNaN(uint32_t id) const
1258 {
1259 for (const auto& [sensorPath, occId] : existingSensors)
1260 {
1261 if (occId == id)
1262 {
1263 dbus::OccDBusSensors::getOccDBus().setValue(
1264 sensorPath, std::numeric_limits<double>::quiet_NaN());
1265
1266 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1267 sensorPath, true);
1268 }
1269 }
1270 return;
1271 }
1272
setSensorValueToNonFunctional(uint32_t id) const1273 void Manager::setSensorValueToNonFunctional(uint32_t id) const
1274 {
1275 for (const auto& [sensorPath, occId] : existingSensors)
1276 {
1277 if (occId == id)
1278 {
1279 dbus::OccDBusSensors::getOccDBus().setValue(
1280 sensorPath, std::numeric_limits<double>::quiet_NaN());
1281
1282 dbus::OccDBusSensors::getOccDBus().setOperationalStatus(
1283 sensorPath, false);
1284 }
1285 }
1286 return;
1287 }
1288
getSensorValues(std::unique_ptr<Status> & occ)1289 void Manager::getSensorValues(std::unique_ptr<Status>& occ)
1290 {
1291 static bool tracedError[8] = {0};
1292 const fs::path sensorPath = occ->getHwmonPath();
1293 const uint32_t id = occ->getOccInstanceID();
1294
1295 if (fs::exists(sensorPath))
1296 {
1297 // Read temperature sensors
1298 readTempSensors(sensorPath, id);
1299
1300 if (occ->isMasterOcc())
1301 {
1302 // Read power sensors
1303 readPowerSensors(sensorPath, id);
1304 }
1305 tracedError[id] = false;
1306 }
1307 else
1308 {
1309 if (!tracedError[id])
1310 {
1311 lg2::error(
1312 "Manager::getSensorValues: OCC{INST} sensor path missing: {PATH}",
1313 "INST", id, "PATH", sensorPath);
1314 tracedError[id] = true;
1315 }
1316 }
1317
1318 return;
1319 }
1320 #endif
1321
1322 // Read the altitude from DBus
readAltitude()1323 void Manager::readAltitude()
1324 {
1325 static bool traceAltitudeErr = true;
1326
1327 utils::PropertyValue altitudeProperty{};
1328 try
1329 {
1330 altitudeProperty = utils::getProperty(ALTITUDE_PATH, ALTITUDE_INTERFACE,
1331 ALTITUDE_PROP);
1332 auto sensorVal = std::get<double>(altitudeProperty);
1333 if (sensorVal < 0xFFFF)
1334 {
1335 if (sensorVal < 0)
1336 {
1337 altitude = 0;
1338 }
1339 else
1340 {
1341 // Round to nearest meter
1342 altitude = uint16_t(sensorVal + 0.5);
1343 }
1344 lg2::debug("readAltitude: sensor={VALUE} ({ALT}m)", "VALUE",
1345 sensorVal, "ALT", altitude);
1346 traceAltitudeErr = true;
1347 }
1348 else
1349 {
1350 if (traceAltitudeErr)
1351 {
1352 traceAltitudeErr = false;
1353 lg2::debug("Invalid altitude value: {ALT}", "ALT", sensorVal);
1354 }
1355 }
1356 }
1357 catch (const sdbusplus::exception_t& e)
1358 {
1359 if (traceAltitudeErr)
1360 {
1361 traceAltitudeErr = false;
1362 lg2::info("Unable to read Altitude: {ERROR}", "ERROR", e.what());
1363 }
1364 altitude = 0xFFFF; // not available
1365 }
1366 }
1367
1368 // Callback function when ambient temperature changes
ambientCallback(sdbusplus::message_t & msg)1369 void Manager::ambientCallback(sdbusplus::message_t& msg)
1370 {
1371 double currentTemp = 0;
1372 uint8_t truncatedTemp = 0xFF;
1373 std::string msgSensor;
1374 std::map<std::string, std::variant<double>> msgData;
1375 msg.read(msgSensor, msgData);
1376
1377 auto valPropMap = msgData.find(AMBIENT_PROP);
1378 if (valPropMap == msgData.end())
1379 {
1380 lg2::debug("ambientCallback: Unknown ambient property changed");
1381 return;
1382 }
1383 currentTemp = std::get<double>(valPropMap->second);
1384 if (std::isnan(currentTemp))
1385 {
1386 truncatedTemp = 0xFF;
1387 }
1388 else
1389 {
1390 if (currentTemp < 0)
1391 {
1392 truncatedTemp = 0;
1393 }
1394 else
1395 {
1396 // Round to nearest degree C
1397 truncatedTemp = uint8_t(currentTemp + 0.5);
1398 }
1399 }
1400
1401 // If ambient changes, notify OCCs
1402 if (truncatedTemp != ambient)
1403 {
1404 lg2::debug("ambientCallback: Ambient change from {OLD} to {NEW}C",
1405 "OLD", ambient, "NEW", currentTemp);
1406
1407 ambient = truncatedTemp;
1408 if (altitude == 0xFFFF)
1409 {
1410 // No altitude yet, try reading again
1411 readAltitude();
1412 }
1413
1414 lg2::debug("ambientCallback: Ambient: {TEMP}C, altitude: {ALT}m",
1415 "TEMP", ambient, "ALT", altitude);
1416 #ifdef POWER10
1417 // Send ambient and altitude to all OCCs
1418 for (auto& obj : statusObjects)
1419 {
1420 if (obj->occActive())
1421 {
1422 obj->sendAmbient(ambient, altitude);
1423 }
1424 }
1425 #endif // POWER10
1426 }
1427 }
1428
1429 // return the current ambient and altitude readings
getAmbientData(bool & ambientValid,uint8_t & ambientTemp,uint16_t & altitudeValue) const1430 void Manager::getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
1431 uint16_t& altitudeValue) const
1432 {
1433 ambientValid = true;
1434 ambientTemp = ambient;
1435 altitudeValue = altitude;
1436
1437 if (ambient == 0xFF)
1438 {
1439 ambientValid = false;
1440 }
1441 }
1442
1443 #ifdef POWER10
1444 // Called when waitForAllOccsTimer expires
1445 // After the first OCC goes active, this timer will be started (60 seconds)
occsNotAllRunning()1446 void Manager::occsNotAllRunning()
1447 {
1448 if (resetInProgress)
1449 {
1450 lg2::warning(
1451 "occsNotAllRunning: Ignoring waitForAllOccsTimer because reset is in progress");
1452 return;
1453 }
1454 if (activeCount != statusObjects.size())
1455 {
1456 // Not all OCCs went active
1457 lg2::warning(
1458 "occsNotAllRunning: Active OCC count ({COUNT}) does not match expected count ({EXP})",
1459 "COUNT", activeCount, "EXP", statusObjects.size());
1460 // Procs may be garded, so may be expected
1461 }
1462
1463 if (resetRequired)
1464 {
1465 initiateOccRequest(resetInstance);
1466
1467 if (!waitForAllOccsTimer->isEnabled())
1468 {
1469 lg2::warning("occsNotAllRunning: Restarting waitForAllOccTimer");
1470 // restart occ wait timer
1471 waitForAllOccsTimer->restartOnce(60s);
1472 }
1473 }
1474 else
1475 {
1476 validateOccMaster();
1477 }
1478 }
1479
1480 #ifdef PLDM
1481 // Called when throttlePldmTraceTimer expires.
1482 // If this timer expires, that indicates there are no OCC active sensor PDRs
1483 // found which will trigger pldm traces to be throttled.
1484 // The second time this timer expires, a PEL will get created.
throttlePldmTraceExpired()1485 void Manager::throttlePldmTraceExpired()
1486 {
1487 if (utils::isHostRunning())
1488 {
1489 if (!onPldmTimeoutCreatePel)
1490 {
1491 // Throttle traces
1492 pldmHandle->setTraceThrottle(true);
1493 // Restart timer to log a PEL when timer expires
1494 onPldmTimeoutCreatePel = true;
1495 throttlePldmTraceTimer->restartOnce(40min);
1496 }
1497 else
1498 {
1499 lg2::error(
1500 "throttlePldmTraceExpired(): OCC active sensors still not available!");
1501 // Create PEL
1502 createPldmSensorPEL();
1503 }
1504 }
1505 else
1506 {
1507 // Make sure traces are not throttled
1508 pldmHandle->setTraceThrottle(false);
1509 lg2::info(
1510 "throttlePldmTraceExpired(): host it not running ignoring sensor timer");
1511 }
1512 }
1513
createPldmSensorPEL()1514 void Manager::createPldmSensorPEL()
1515 {
1516 Error::Descriptor d = Error::Descriptor(MISSING_OCC_SENSORS_PATH);
1517 std::map<std::string, std::string> additionalData;
1518
1519 additionalData.emplace("_PID", std::to_string(getpid()));
1520
1521 lg2::info(
1522 "createPldmSensorPEL(): Unable to find PLDM sensors for the OCCs");
1523
1524 auto& bus = utils::getBus();
1525
1526 try
1527 {
1528 FFDCFiles ffdc;
1529 // Add occ-control journal traces to PEL FFDC
1530 auto occJournalFile =
1531 FFDC::addJournalEntries(ffdc, "openpower-occ-control", 40);
1532
1533 static constexpr auto loggingObjectPath =
1534 "/xyz/openbmc_project/logging";
1535 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
1536 std::string service =
1537 utils::getService(loggingObjectPath, opLoggingInterface);
1538 auto method =
1539 bus.new_method_call(service.c_str(), loggingObjectPath,
1540 opLoggingInterface, "CreatePELWithFFDCFiles");
1541
1542 // Set level to Warning (Predictive).
1543 auto level =
1544 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
1545 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
1546 Warning);
1547
1548 method.append(d.path, level, additionalData, ffdc);
1549 bus.call(method);
1550 }
1551 catch (const sdbusplus::exception_t& e)
1552 {
1553 lg2::error("Failed to create MISSING_OCC_SENSORS PEL: {ERROR}", "ERROR",
1554 e.what());
1555 }
1556 }
1557 #endif // PLDM
1558 #endif // POWER10
1559
1560 // Verify single master OCC and start presence monitor
validateOccMaster()1561 void Manager::validateOccMaster()
1562 {
1563 int masterInstance = -1;
1564 for (auto& obj : statusObjects)
1565 {
1566 auto instance = obj->getOccInstanceID();
1567 #ifdef POWER10
1568 if (!obj->occActive())
1569 {
1570 if (utils::isHostRunning())
1571 {
1572 // Check if sensor was queued while waiting for discovery
1573 auto match = queuedActiveState.find(instance);
1574 if (match != queuedActiveState.end())
1575 {
1576 queuedActiveState.erase(match);
1577 lg2::info("validateOccMaster: OCC{INST} is ACTIVE (queued)",
1578 "INST", instance);
1579 obj->occActive(true);
1580 }
1581 else
1582 {
1583 // OCC does not appear to be active yet, check active sensor
1584 #ifdef PLDM
1585 pldmHandle->checkActiveSensor(instance);
1586 #endif
1587 if (obj->occActive())
1588 {
1589 lg2::info(
1590 "validateOccMaster: OCC{INST} is ACTIVE after reading sensor",
1591 "INST", instance);
1592 }
1593 }
1594 }
1595 else
1596 {
1597 lg2::warning(
1598 "validateOccMaster: HOST is not running (OCC{INST})",
1599 "INST", instance);
1600 return;
1601 }
1602 }
1603 #endif // POWER10
1604
1605 if (obj->isMasterOcc())
1606 {
1607 obj->addPresenceWatchMaster();
1608
1609 if (masterInstance == -1)
1610 {
1611 masterInstance = instance;
1612 }
1613 else
1614 {
1615 lg2::error(
1616 "validateOccMaster: Multiple OCC masters! ({MAST1} and {MAST2})",
1617 "MAST1", masterInstance, "MAST2", instance);
1618 // request reset
1619 obj->deviceError(Error::Descriptor(PRESENCE_ERROR_PATH));
1620 }
1621 }
1622 }
1623
1624 if (masterInstance < 0)
1625 {
1626 lg2::error("validateOccMaster: Master OCC not found! (of {NUM} OCCs)",
1627 "NUM", statusObjects.size());
1628 // request reset
1629 statusObjects.front()->deviceError(
1630 Error::Descriptor(PRESENCE_ERROR_PATH));
1631 }
1632 else
1633 {
1634 lg2::info("validateOccMaster: OCC{INST} is master of {COUNT} OCCs",
1635 "INST", masterInstance, "COUNT", activeCount);
1636 #ifdef POWER10
1637 pmode->updateDbusSafeMode(false);
1638 #endif
1639 }
1640 }
1641
updatePcapBounds() const1642 void Manager::updatePcapBounds() const
1643 {
1644 if (pcap)
1645 {
1646 pcap->updatePcapBounds();
1647 }
1648 }
1649
1650 } // namespace occ
1651 } // namespace open_power
1652