xref: /openbmc/openpower-occ-control/occ_manager.cpp (revision 9ed399d987706c5816f0f2ef850af347eac988d9)
1 #include "config.h"
2 
3 #include "occ_manager.hpp"
4 
5 #include "i2c_occ.hpp"
6 #include "occ_dbus.hpp"
7 #include "utils.hpp"
8 
9 #include <phosphor-logging/elog-errors.hpp>
10 #include <phosphor-logging/log.hpp>
11 #include <xyz/openbmc_project/Common/error.hpp>
12 
13 #include <chrono>
14 #include <cmath>
15 #include <filesystem>
16 #include <regex>
17 
18 namespace open_power
19 {
20 namespace occ
21 {
22 
23 constexpr uint32_t fruTypeNotAvailable = 0xFF;
24 constexpr auto fruTypeSuffix = "fru_type";
25 constexpr auto faultSuffix = "fault";
26 constexpr auto inputSuffix = "input";
27 
28 using namespace phosphor::logging;
29 
30 template <typename T>
31 T readFile(const std::string& path)
32 {
33     std::ifstream ifs;
34     ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
35                    std::ifstream::eofbit);
36     T data;
37 
38     try
39     {
40         ifs.open(path);
41         ifs >> data;
42         ifs.close();
43     }
44     catch (const std::exception& e)
45     {
46         auto err = errno;
47         throw std::system_error(err, std::generic_category());
48     }
49 
50     return data;
51 }
52 
53 void Manager::findAndCreateObjects()
54 {
55 #ifndef POWER10
56     for (auto id = 0; id < MAX_CPUS; ++id)
57     {
58         // Create one occ per cpu
59         auto occ = std::string(OCC_NAME) + std::to_string(id);
60         createObjects(occ);
61     }
62 #else
63     // Create the OCCs based on on the /dev/occX devices
64     auto occs = findOCCsInDev();
65 
66     if (occs.empty() || (prevOCCSearch.size() != occs.size()))
67     {
68         // Something changed or no OCCs yet, try again in 10s.
69         // Note on the first pass prevOCCSearch will be empty,
70         // so there will be at least one delay to give things
71         // a chance to settle.
72         prevOCCSearch = occs;
73 
74         using namespace std::literals::chrono_literals;
75         discoverTimer->restartOnce(10s);
76     }
77     else
78     {
79         discoverTimer.reset();
80 
81         // createObjects requires OCC0 first.
82         std::sort(occs.begin(), occs.end());
83 
84         for (auto id : occs)
85         {
86             createObjects(std::string(OCC_NAME) + std::to_string(id));
87         }
88     }
89 #endif
90 }
91 
92 std::vector<int> Manager::findOCCsInDev()
93 {
94     std::vector<int> occs;
95     std::regex expr{R"(occ(\d+)$)"};
96 
97     for (auto& file : fs::directory_iterator("/dev"))
98     {
99         std::smatch match;
100         std::string path{file.path().string()};
101         if (std::regex_search(path, match, expr))
102         {
103             auto num = std::stoi(match[1].str());
104 
105             // /dev numbering starts at 1, ours starts at 0.
106             occs.push_back(num - 1);
107         }
108     }
109 
110     return occs;
111 }
112 
113 int Manager::cpuCreated(sdbusplus::message::message& msg)
114 {
115     namespace fs = std::filesystem;
116 
117     sdbusplus::message::object_path o;
118     msg.read(o);
119     fs::path cpuPath(std::string(std::move(o)));
120 
121     auto name = cpuPath.filename().string();
122     auto index = name.find(CPU_NAME);
123     name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
124 
125     createObjects(name);
126 
127     return 0;
128 }
129 
130 void Manager::createObjects(const std::string& occ)
131 {
132     auto path = fs::path(OCC_CONTROL_ROOT) / occ;
133 
134     passThroughObjects.emplace_back(
135         std::make_unique<PassThrough>(path.c_str()));
136 
137     statusObjects.emplace_back(std::make_unique<Status>(
138         event, path.c_str(), *this,
139         std::bind(std::mem_fn(&Manager::statusCallBack), this,
140                   std::placeholders::_1)
141 #ifdef PLDM
142             ,
143         std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(),
144                   std::placeholders::_1)
145 #endif
146             ));
147 
148     // Create the power cap monitor object for master occ (0)
149     if (!pcap)
150     {
151         pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
152             *statusObjects.front());
153     }
154 
155 #ifdef POWER10
156     // Create the power mode monitor object for master occ (0)
157     if (!pmode)
158     {
159         pmode = std::make_unique<open_power::occ::powermode::PowerMode>(
160             *statusObjects.front());
161     }
162 #endif
163 }
164 
165 void Manager::statusCallBack(bool status)
166 {
167     using InternalFailure =
168         sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
169 
170     // At this time, it won't happen but keeping it
171     // here just in case something changes in the future
172     if ((activeCount == 0) && (!status))
173     {
174         log<level::ERR>("Invalid update on OCCActive");
175         elog<InternalFailure>();
176     }
177 
178     activeCount += status ? 1 : -1;
179 
180     // Only start presence detection if all the OCCs are bound
181     if (activeCount == statusObjects.size())
182     {
183         for (auto& obj : statusObjects)
184         {
185             obj->addPresenceWatchMaster();
186         }
187     }
188 
189     if ((!_pollTimer->isEnabled()) && (activeCount > 0))
190     {
191         log<level::INFO>(
192             fmt::format(
193                 "Manager::statusCallBack(): {} OCCs will be polled every {} seconds",
194                 activeCount, pollInterval)
195                 .c_str());
196 
197         // Send poll and start OCC poll timer
198         pollerTimerExpired();
199     }
200     else if ((_pollTimer->isEnabled()) && (activeCount == 0))
201     {
202         // Stop OCC poll timer
203         log<level::INFO>(
204             "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
205         _pollTimer->setEnabled(false);
206 
207 #ifdef READ_OCC_SENSORS
208         for (auto& obj : statusObjects)
209         {
210             setSensorValueToNaN(obj->getOccInstanceID());
211         }
212 #endif
213     }
214 }
215 
216 #ifdef I2C_OCC
217 void Manager::initStatusObjects()
218 {
219     // Make sure we have a valid path string
220     static_assert(sizeof(DEV_PATH) != 0);
221 
222     auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
223     auto occMasterName = deviceNames.front();
224     for (auto& name : deviceNames)
225     {
226         i2c_occ::i2cToDbus(name);
227         name = std::string(OCC_NAME) + '_' + name;
228         auto path = fs::path(OCC_CONTROL_ROOT) / name;
229         statusObjects.emplace_back(
230             std::make_unique<Status>(event, path.c_str(), *this));
231     }
232     // The first device is master occ
233     pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
234         *statusObjects.front(), occMasterName);
235 #ifdef POWER10
236     pmode = std::make_unique<open_power::occ::powermode::PowerMode>(
237         *statusObjects.front());
238 #endif
239 }
240 #endif
241 
242 #ifdef PLDM
243 bool Manager::updateOCCActive(instanceID instance, bool status)
244 {
245     return (statusObjects[instance])->occActive(status);
246 }
247 #endif
248 
249 void Manager::pollerTimerExpired()
250 {
251     if (activeCount == 0)
252     {
253         // No OCCs running, so poll timer will not be restarted
254         log<level::INFO>(
255             "Manager::pollerTimerExpire(): No OCCs running, poll timer not restarted");
256     }
257 
258     if (!_pollTimer)
259     {
260         log<level::ERR>(
261             "Manager::pollerTimerExpired() ERROR: Timer not defined");
262         return;
263     }
264 
265     for (auto& obj : statusObjects)
266     {
267         // Read sysfs to force kernel to poll OCC
268         obj->readOccState();
269 
270 #ifdef READ_OCC_SENSORS
271         // Read occ sensor values
272         auto id = obj->getOccInstanceID();
273         if (!obj->occActive())
274         {
275             // Occ not activated
276             setSensorValueToNaN(id);
277             continue;
278         }
279         getSensorValues(id, obj->isMasterOcc());
280 #endif
281     }
282 
283     // Restart OCC poll timer
284     _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
285 }
286 
287 #ifdef READ_OCC_SENSORS
288 void Manager::readTempSensors(const fs::path& path, uint32_t id)
289 {
290     std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
291     for (auto& file : fs::directory_iterator(path))
292     {
293         if (!std::regex_search(file.path().string(), expr))
294         {
295             continue;
296         }
297 
298         uint32_t labelValue{0};
299 
300         try
301         {
302             labelValue = readFile<uint32_t>(file.path());
303         }
304         catch (const std::system_error& e)
305         {
306             log<level::DEBUG>(
307                 fmt::format("readTempSensors: Failed reading {}, errno = {}",
308                             file.path().string(), e.code().value())
309                     .c_str());
310             continue;
311         }
312 
313         const std::string& tempLabel = "label";
314         const std::string filePathString = file.path().string().substr(
315             0, file.path().string().length() - tempLabel.length());
316 
317         uint32_t fruTypeValue{0};
318         try
319         {
320             fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
321         }
322         catch (const std::system_error& e)
323         {
324             log<level::DEBUG>(
325                 fmt::format("readTempSensors: Failed reading {}, errno = {}",
326                             filePathString + fruTypeSuffix, e.code().value())
327                     .c_str());
328             continue;
329         }
330 
331         std::string sensorPath =
332             OCC_SENSORS_ROOT + std::string("/temperature/");
333 
334         if (fruTypeValue == VRMVdd)
335         {
336             sensorPath.append("vrm_vdd" + std::to_string(id) + "_temp");
337         }
338         else
339         {
340             uint16_t type = (labelValue & 0xFF000000) >> 24;
341             uint16_t instanceID = labelValue & 0x0000FFFF;
342 
343             if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
344             {
345                 if (fruTypeValue == fruTypeNotAvailable)
346                 {
347                     // Not all DIMM related temps are available to read
348                     // (no _input file in this case)
349                     continue;
350                 }
351                 auto iter = dimmTempSensorName.find(fruTypeValue);
352                 if (iter == dimmTempSensorName.end())
353                 {
354                     log<level::ERR>(
355                         fmt::format(
356                             "readTempSensors: Fru type error! fruTypeValue = {}) ",
357                             fruTypeValue)
358                             .c_str());
359                     continue;
360                 }
361 
362                 sensorPath.append("dimm" + std::to_string(instanceID) +
363                                   iter->second);
364             }
365             else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
366             {
367                 if (fruTypeValue != processorCore)
368                 {
369                     // TODO: support IO ring temp
370                     continue;
371                 }
372 
373                 // The OCC reports small core temps, of which there are
374                 // two per big core.  All current P10 systems are in big
375                 // core mode, so use a big core name.
376                 uint16_t coreNum = instanceID / 2;
377                 uint16_t tempNum = instanceID % 2;
378                 sensorPath.append("proc" + std::to_string(id) + "_core" +
379                                   std::to_string(coreNum) + "_" +
380                                   std::to_string(tempNum) + "_temp");
381             }
382             else
383             {
384                 continue;
385             }
386         }
387 
388         uint32_t faultValue{0};
389         try
390         {
391             faultValue = readFile<uint32_t>(filePathString + faultSuffix);
392         }
393         catch (const std::system_error& e)
394         {
395             log<level::DEBUG>(
396                 fmt::format("readTempSensors: Failed reading {}, errno = {}",
397                             filePathString + faultSuffix, e.code().value())
398                     .c_str());
399             continue;
400         }
401 
402         // At this point, the sensor will be created for sure.
403         if (existingSensors.find(sensorPath) == existingSensors.end())
404         {
405             open_power::occ::dbus::OccDBusSensors::getOccDBus()
406                 .setChassisAssociation(sensorPath);
407         }
408 
409         if (faultValue != 0)
410         {
411             open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
412                 sensorPath, std::numeric_limits<double>::quiet_NaN());
413 
414             open_power::occ::dbus::OccDBusSensors::getOccDBus()
415                 .setOperationalStatus(sensorPath, false);
416 
417             continue;
418         }
419 
420         double tempValue{0};
421 
422         try
423         {
424             tempValue = readFile<double>(filePathString + inputSuffix);
425         }
426         catch (const std::system_error& e)
427         {
428             log<level::DEBUG>(
429                 fmt::format("readTempSensors: Failed reading {}, errno = {}",
430                             filePathString + inputSuffix, e.code().value())
431                     .c_str());
432             continue;
433         }
434 
435         open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
436             sensorPath, tempValue * std::pow(10, -3));
437 
438         open_power::occ::dbus::OccDBusSensors::getOccDBus()
439             .setOperationalStatus(sensorPath, true);
440 
441         existingSensors[sensorPath] = id;
442     }
443     return;
444 }
445 
446 std::optional<std::string>
447     Manager::getPowerLabelFunctionID(const std::string& value)
448 {
449     // If the value is "system", then the FunctionID is "system".
450     if (value == "system")
451     {
452         return value;
453     }
454 
455     // If the value is not "system", then the label value have 3 numbers, of
456     // which we only care about the middle one:
457     // <sensor id>_<function id>_<apss channel>
458     // eg: The value is "0_10_5" , then the FunctionID is "10".
459     if (value.find("_") == std::string::npos)
460     {
461         return std::nullopt;
462     }
463 
464     auto powerLabelValue = value.substr((value.find("_") + 1));
465 
466     if (powerLabelValue.find("_") == std::string::npos)
467     {
468         return std::nullopt;
469     }
470 
471     return powerLabelValue.substr(0, powerLabelValue.find("_"));
472 }
473 
474 void Manager::readPowerSensors(const fs::path& path, uint32_t id)
475 {
476     std::regex expr{"power\\d+_label$"}; // Example: power5_label
477     for (auto& file : fs::directory_iterator(path))
478     {
479         if (!std::regex_search(file.path().string(), expr))
480         {
481             continue;
482         }
483 
484         std::string labelValue;
485         try
486         {
487             labelValue = readFile<std::string>(file.path());
488         }
489         catch (const std::system_error& e)
490         {
491             log<level::DEBUG>(
492                 fmt::format("readPowerSensors: Failed reading {}, errno = {}",
493                             file.path().string(), e.code().value())
494                     .c_str());
495             continue;
496         }
497 
498         auto functionID = getPowerLabelFunctionID(labelValue);
499         if (functionID == std::nullopt)
500         {
501             continue;
502         }
503 
504         const std::string& tempLabel = "label";
505         const std::string filePathString = file.path().string().substr(
506             0, file.path().string().length() - tempLabel.length());
507 
508         std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
509 
510         auto iter = powerSensorName.find(*functionID);
511         if (iter == powerSensorName.end())
512         {
513             continue;
514         }
515         sensorPath.append(iter->second);
516 
517         double tempValue{0};
518 
519         try
520         {
521             tempValue = readFile<double>(filePathString + inputSuffix);
522         }
523         catch (const std::system_error& e)
524         {
525             log<level::DEBUG>(
526                 fmt::format("readTempSensors: Failed reading {}, errno = {}",
527                             filePathString + inputSuffix, e.code().value())
528                     .c_str());
529             continue;
530         }
531 
532         open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
533             sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
534 
535         open_power::occ::dbus::OccDBusSensors::getOccDBus()
536             .setOperationalStatus(sensorPath, true);
537 
538         if (existingSensors.find(sensorPath) == existingSensors.end())
539         {
540             open_power::occ::dbus::OccDBusSensors::getOccDBus()
541                 .setChassisAssociation(sensorPath);
542         }
543 
544         existingSensors[sensorPath] = id;
545     }
546     return;
547 }
548 
549 void Manager::setSensorValueToNaN(uint32_t id)
550 {
551     for (const auto& [sensorPath, occId] : existingSensors)
552     {
553         if (occId == id)
554         {
555             open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
556                 sensorPath, std::numeric_limits<double>::quiet_NaN());
557         }
558     }
559     return;
560 }
561 
562 void Manager::getSensorValues(uint32_t id, bool masterOcc)
563 {
564     const auto occ = std::string("occ-hwmon.") + std::to_string(id + 1);
565 
566     fs::path fileName{OCC_HWMON_PATH + occ + "/hwmon/"};
567 
568     // Need to get the hwmonXX directory name, there better only be 1 dir
569     assert(std::distance(fs::directory_iterator(fileName),
570                          fs::directory_iterator{}) == 1);
571     // Now set our path to this full path, including this hwmonXX directory
572     fileName = fs::path(*fs::directory_iterator(fileName));
573 
574     // Read temperature sensors
575     readTempSensors(fileName, id);
576 
577     if (masterOcc)
578     {
579         // Read power sensors
580         readPowerSensors(fileName, id);
581     }
582 
583     return;
584 }
585 #endif
586 
587 } // namespace occ
588 } // namespace open_power
589