xref: /openbmc/openpower-occ-control/occ_manager.cpp (revision 2f9f9bba661dcae2f0dd05ea6ddae9eb11a909d9)
1 #include "config.h"
2 
3 #include "occ_manager.hpp"
4 
5 #include "i2c_occ.hpp"
6 #include "occ_dbus.hpp"
7 #include "utils.hpp"
8 
9 #include <phosphor-logging/elog-errors.hpp>
10 #include <phosphor-logging/log.hpp>
11 #include <xyz/openbmc_project/Common/error.hpp>
12 
13 #include <chrono>
14 #include <cmath>
15 #include <filesystem>
16 #include <regex>
17 
18 namespace open_power
19 {
20 namespace occ
21 {
22 
23 constexpr uint32_t fruTypeNotAvailable = 0xFF;
24 constexpr auto fruTypeSuffix = "fru_type";
25 constexpr auto faultSuffix = "fault";
26 constexpr auto inputSuffix = "input";
27 
28 using namespace phosphor::logging;
29 
30 template <typename T>
31 T readFile(const std::string& path)
32 {
33     std::ifstream ifs;
34     ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
35                    std::ifstream::eofbit);
36     T data;
37 
38     try
39     {
40         ifs.open(path);
41         ifs >> data;
42         ifs.close();
43     }
44     catch (const std::exception& e)
45     {
46         auto err = errno;
47         throw std::system_error(err, std::generic_category());
48     }
49 
50     return data;
51 }
52 
53 void Manager::findAndCreateObjects()
54 {
55 #ifndef POWER10
56     for (auto id = 0; id < MAX_CPUS; ++id)
57     {
58         // Create one occ per cpu
59         auto occ = std::string(OCC_NAME) + std::to_string(id);
60         createObjects(occ);
61     }
62 #else
63     // Create the OCCs based on on the /dev/occX devices
64     auto occs = findOCCsInDev();
65 
66     if (occs.empty() || (prevOCCSearch.size() != occs.size()))
67     {
68         // Something changed or no OCCs yet, try again in 10s.
69         // Note on the first pass prevOCCSearch will be empty,
70         // so there will be at least one delay to give things
71         // a chance to settle.
72         prevOCCSearch = occs;
73 
74         using namespace std::literals::chrono_literals;
75         discoverTimer->restartOnce(10s);
76     }
77     else
78     {
79         discoverTimer.reset();
80 
81         // createObjects requires OCC0 first.
82         std::sort(occs.begin(), occs.end());
83 
84         for (auto id : occs)
85         {
86             createObjects(std::string(OCC_NAME) + std::to_string(id));
87         }
88     }
89 #endif
90 }
91 
92 std::vector<int> Manager::findOCCsInDev()
93 {
94     std::vector<int> occs;
95     std::regex expr{R"(occ(\d+)$)"};
96 
97     for (auto& file : fs::directory_iterator("/dev"))
98     {
99         std::smatch match;
100         std::string path{file.path().string()};
101         if (std::regex_search(path, match, expr))
102         {
103             auto num = std::stoi(match[1].str());
104 
105             // /dev numbering starts at 1, ours starts at 0.
106             occs.push_back(num - 1);
107         }
108     }
109 
110     return occs;
111 }
112 
113 int Manager::cpuCreated(sdbusplus::message::message& msg)
114 {
115     namespace fs = std::filesystem;
116 
117     sdbusplus::message::object_path o;
118     msg.read(o);
119     fs::path cpuPath(std::string(std::move(o)));
120 
121     auto name = cpuPath.filename().string();
122     auto index = name.find(CPU_NAME);
123     name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
124 
125     createObjects(name);
126 
127     return 0;
128 }
129 
130 void Manager::createObjects(const std::string& occ)
131 {
132     auto path = fs::path(OCC_CONTROL_ROOT) / occ;
133 
134     passThroughObjects.emplace_back(
135         std::make_unique<PassThrough>(path.c_str()));
136 
137     statusObjects.emplace_back(std::make_unique<Status>(
138         event, path.c_str(), *this,
139         std::bind(std::mem_fn(&Manager::statusCallBack), this,
140                   std::placeholders::_1)
141 #ifdef PLDM
142             ,
143         std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(),
144                   std::placeholders::_1)
145 #endif
146             ));
147 
148     // Create the power cap monitor object for master occ (0)
149     if (!pcap)
150     {
151         pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
152             *statusObjects.front());
153     }
154 
155 #ifdef POWER10
156     // Create the power mode monitor object for master occ (0)
157     if (!pmode)
158     {
159         pmode = std::make_unique<open_power::occ::powermode::PowerMode>(
160             *statusObjects.front());
161     }
162     // Create the idle power saver monitor object for master occ (0)
163     if (!pips)
164     {
165         pips = std::make_unique<open_power::occ::powermode::PowerIPS>(
166             *statusObjects.front());
167     }
168 #endif
169 }
170 
171 void Manager::statusCallBack(bool status)
172 {
173     using InternalFailure =
174         sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
175 
176     // At this time, it won't happen but keeping it
177     // here just in case something changes in the future
178     if ((activeCount == 0) && (!status))
179     {
180         log<level::ERR>("Invalid update on OCCActive");
181         elog<InternalFailure>();
182     }
183 
184     activeCount += status ? 1 : -1;
185 
186     // Only start presence detection if all the OCCs are bound
187     if (activeCount == statusObjects.size())
188     {
189         for (auto& obj : statusObjects)
190         {
191             obj->addPresenceWatchMaster();
192         }
193     }
194 
195     if ((!_pollTimer->isEnabled()) && (activeCount > 0))
196     {
197         log<level::INFO>(
198             fmt::format(
199                 "Manager::statusCallBack(): {} OCCs will be polled every {} seconds",
200                 activeCount, pollInterval)
201                 .c_str());
202 
203         // Send poll and start OCC poll timer
204         pollerTimerExpired();
205     }
206     else if ((_pollTimer->isEnabled()) && (activeCount == 0))
207     {
208         // Stop OCC poll timer
209         log<level::INFO>(
210             "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
211         _pollTimer->setEnabled(false);
212 
213 #ifdef READ_OCC_SENSORS
214         for (auto& obj : statusObjects)
215         {
216             setSensorValueToNaN(obj->getOccInstanceID());
217         }
218 #endif
219     }
220 }
221 
222 #ifdef I2C_OCC
223 void Manager::initStatusObjects()
224 {
225     // Make sure we have a valid path string
226     static_assert(sizeof(DEV_PATH) != 0);
227 
228     auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
229     auto occMasterName = deviceNames.front();
230     for (auto& name : deviceNames)
231     {
232         i2c_occ::i2cToDbus(name);
233         name = std::string(OCC_NAME) + '_' + name;
234         auto path = fs::path(OCC_CONTROL_ROOT) / name;
235         statusObjects.emplace_back(
236             std::make_unique<Status>(event, path.c_str(), *this));
237     }
238     // The first device is master occ
239     pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
240         *statusObjects.front(), occMasterName);
241 #ifdef POWER10
242     pmode = std::make_unique<open_power::occ::powermode::PowerMode>(
243         *statusObjects.front());
244     pips = std::make_unique<open_power::occ::powermode::PowerIPS>(
245         *statusObjects.front());
246 #endif
247 }
248 #endif
249 
250 #ifdef PLDM
251 bool Manager::updateOCCActive(instanceID instance, bool status)
252 {
253     return (statusObjects[instance])->occActive(status);
254 }
255 #endif
256 
257 void Manager::pollerTimerExpired()
258 {
259     if (activeCount == 0)
260     {
261         // No OCCs running, so poll timer will not be restarted
262         log<level::INFO>(
263             "Manager::pollerTimerExpire(): No OCCs running, poll timer not restarted");
264     }
265 
266     if (!_pollTimer)
267     {
268         log<level::ERR>(
269             "Manager::pollerTimerExpired() ERROR: Timer not defined");
270         return;
271     }
272 
273     for (auto& obj : statusObjects)
274     {
275         // Read sysfs to force kernel to poll OCC
276         obj->readOccState();
277 
278 #ifdef READ_OCC_SENSORS
279         // Read occ sensor values
280         auto id = obj->getOccInstanceID();
281         if (!obj->occActive())
282         {
283             // Occ not activated
284             setSensorValueToNaN(id);
285             continue;
286         }
287         getSensorValues(id, obj->isMasterOcc());
288 #endif
289     }
290 
291     // Restart OCC poll timer
292     _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
293 }
294 
295 #ifdef READ_OCC_SENSORS
296 void Manager::readTempSensors(const fs::path& path, uint32_t id)
297 {
298     std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
299     for (auto& file : fs::directory_iterator(path))
300     {
301         if (!std::regex_search(file.path().string(), expr))
302         {
303             continue;
304         }
305 
306         uint32_t labelValue{0};
307 
308         try
309         {
310             labelValue = readFile<uint32_t>(file.path());
311         }
312         catch (const std::system_error& e)
313         {
314             log<level::DEBUG>(
315                 fmt::format("readTempSensors: Failed reading {}, errno = {}",
316                             file.path().string(), e.code().value())
317                     .c_str());
318             continue;
319         }
320 
321         const std::string& tempLabel = "label";
322         const std::string filePathString = file.path().string().substr(
323             0, file.path().string().length() - tempLabel.length());
324 
325         uint32_t fruTypeValue{0};
326         try
327         {
328             fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
329         }
330         catch (const std::system_error& e)
331         {
332             log<level::DEBUG>(
333                 fmt::format("readTempSensors: Failed reading {}, errno = {}",
334                             filePathString + fruTypeSuffix, e.code().value())
335                     .c_str());
336             continue;
337         }
338 
339         std::string sensorPath =
340             OCC_SENSORS_ROOT + std::string("/temperature/");
341 
342         if (fruTypeValue == VRMVdd)
343         {
344             sensorPath.append("vrm_vdd" + std::to_string(id) + "_temp");
345         }
346         else
347         {
348             uint16_t type = (labelValue & 0xFF000000) >> 24;
349             uint16_t instanceID = labelValue & 0x0000FFFF;
350 
351             if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
352             {
353                 if (fruTypeValue == fruTypeNotAvailable)
354                 {
355                     // Not all DIMM related temps are available to read
356                     // (no _input file in this case)
357                     continue;
358                 }
359                 auto iter = dimmTempSensorName.find(fruTypeValue);
360                 if (iter == dimmTempSensorName.end())
361                 {
362                     log<level::ERR>(
363                         fmt::format(
364                             "readTempSensors: Fru type error! fruTypeValue = {}) ",
365                             fruTypeValue)
366                             .c_str());
367                     continue;
368                 }
369 
370                 sensorPath.append("dimm" + std::to_string(instanceID) +
371                                   iter->second);
372             }
373             else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
374             {
375                 if (fruTypeValue != processorCore)
376                 {
377                     // TODO: support IO ring temp
378                     continue;
379                 }
380 
381                 // The OCC reports small core temps, of which there are
382                 // two per big core.  All current P10 systems are in big
383                 // core mode, so use a big core name.
384                 uint16_t coreNum = instanceID / 2;
385                 uint16_t tempNum = instanceID % 2;
386                 sensorPath.append("proc" + std::to_string(id) + "_core" +
387                                   std::to_string(coreNum) + "_" +
388                                   std::to_string(tempNum) + "_temp");
389             }
390             else
391             {
392                 continue;
393             }
394         }
395 
396         uint32_t faultValue{0};
397         try
398         {
399             faultValue = readFile<uint32_t>(filePathString + faultSuffix);
400         }
401         catch (const std::system_error& e)
402         {
403             log<level::DEBUG>(
404                 fmt::format("readTempSensors: Failed reading {}, errno = {}",
405                             filePathString + faultSuffix, e.code().value())
406                     .c_str());
407             continue;
408         }
409 
410         // At this point, the sensor will be created for sure.
411         if (existingSensors.find(sensorPath) == existingSensors.end())
412         {
413             open_power::occ::dbus::OccDBusSensors::getOccDBus()
414                 .setChassisAssociation(sensorPath);
415         }
416 
417         if (faultValue != 0)
418         {
419             open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
420                 sensorPath, std::numeric_limits<double>::quiet_NaN());
421 
422             open_power::occ::dbus::OccDBusSensors::getOccDBus()
423                 .setOperationalStatus(sensorPath, false);
424 
425             continue;
426         }
427 
428         double tempValue{0};
429 
430         try
431         {
432             tempValue = readFile<double>(filePathString + inputSuffix);
433         }
434         catch (const std::system_error& e)
435         {
436             log<level::DEBUG>(
437                 fmt::format("readTempSensors: Failed reading {}, errno = {}",
438                             filePathString + inputSuffix, e.code().value())
439                     .c_str());
440             continue;
441         }
442 
443         open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
444             sensorPath, tempValue * std::pow(10, -3));
445 
446         open_power::occ::dbus::OccDBusSensors::getOccDBus()
447             .setOperationalStatus(sensorPath, true);
448 
449         existingSensors[sensorPath] = id;
450     }
451     return;
452 }
453 
454 std::optional<std::string>
455     Manager::getPowerLabelFunctionID(const std::string& value)
456 {
457     // If the value is "system", then the FunctionID is "system".
458     if (value == "system")
459     {
460         return value;
461     }
462 
463     // If the value is not "system", then the label value have 3 numbers, of
464     // which we only care about the middle one:
465     // <sensor id>_<function id>_<apss channel>
466     // eg: The value is "0_10_5" , then the FunctionID is "10".
467     if (value.find("_") == std::string::npos)
468     {
469         return std::nullopt;
470     }
471 
472     auto powerLabelValue = value.substr((value.find("_") + 1));
473 
474     if (powerLabelValue.find("_") == std::string::npos)
475     {
476         return std::nullopt;
477     }
478 
479     return powerLabelValue.substr(0, powerLabelValue.find("_"));
480 }
481 
482 void Manager::readPowerSensors(const fs::path& path, uint32_t id)
483 {
484     std::regex expr{"power\\d+_label$"}; // Example: power5_label
485     for (auto& file : fs::directory_iterator(path))
486     {
487         if (!std::regex_search(file.path().string(), expr))
488         {
489             continue;
490         }
491 
492         std::string labelValue;
493         try
494         {
495             labelValue = readFile<std::string>(file.path());
496         }
497         catch (const std::system_error& e)
498         {
499             log<level::DEBUG>(
500                 fmt::format("readPowerSensors: Failed reading {}, errno = {}",
501                             file.path().string(), e.code().value())
502                     .c_str());
503             continue;
504         }
505 
506         auto functionID = getPowerLabelFunctionID(labelValue);
507         if (functionID == std::nullopt)
508         {
509             continue;
510         }
511 
512         const std::string& tempLabel = "label";
513         const std::string filePathString = file.path().string().substr(
514             0, file.path().string().length() - tempLabel.length());
515 
516         std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
517 
518         auto iter = powerSensorName.find(*functionID);
519         if (iter == powerSensorName.end())
520         {
521             continue;
522         }
523         sensorPath.append(iter->second);
524 
525         double tempValue{0};
526 
527         try
528         {
529             tempValue = readFile<double>(filePathString + inputSuffix);
530         }
531         catch (const std::system_error& e)
532         {
533             log<level::DEBUG>(
534                 fmt::format("readTempSensors: Failed reading {}, errno = {}",
535                             filePathString + inputSuffix, e.code().value())
536                     .c_str());
537             continue;
538         }
539 
540         open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
541             sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
542 
543         open_power::occ::dbus::OccDBusSensors::getOccDBus()
544             .setOperationalStatus(sensorPath, true);
545 
546         if (existingSensors.find(sensorPath) == existingSensors.end())
547         {
548             open_power::occ::dbus::OccDBusSensors::getOccDBus()
549                 .setChassisAssociation(sensorPath);
550         }
551 
552         existingSensors[sensorPath] = id;
553     }
554     return;
555 }
556 
557 void Manager::setSensorValueToNaN(uint32_t id)
558 {
559     for (const auto& [sensorPath, occId] : existingSensors)
560     {
561         if (occId == id)
562         {
563             open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
564                 sensorPath, std::numeric_limits<double>::quiet_NaN());
565         }
566     }
567     return;
568 }
569 
570 void Manager::getSensorValues(uint32_t id, bool masterOcc)
571 {
572     const auto occ = std::string("occ-hwmon.") + std::to_string(id + 1);
573 
574     fs::path fileName{OCC_HWMON_PATH + occ + "/hwmon/"};
575 
576     // Need to get the hwmonXX directory name, there better only be 1 dir
577     assert(std::distance(fs::directory_iterator(fileName),
578                          fs::directory_iterator{}) == 1);
579     // Now set our path to this full path, including this hwmonXX directory
580     fileName = fs::path(*fs::directory_iterator(fileName));
581 
582     // Read temperature sensors
583     readTempSensors(fileName, id);
584 
585     if (masterOcc)
586     {
587         // Read power sensors
588         readPowerSensors(fileName, id);
589     }
590 
591     return;
592 }
593 #endif
594 
595 } // namespace occ
596 } // namespace open_power
597