1 #include "config.h"
2 
3 #include "occ_manager.hpp"
4 
5 #include "i2c_occ.hpp"
6 #include "occ_dbus.hpp"
7 #include "utils.hpp"
8 
9 #include <phosphor-logging/elog-errors.hpp>
10 #include <phosphor-logging/log.hpp>
11 #include <xyz/openbmc_project/Common/error.hpp>
12 
13 #include <chrono>
14 #include <cmath>
15 #include <experimental/filesystem>
16 #include <regex>
17 
18 namespace open_power
19 {
20 namespace occ
21 {
22 
23 constexpr uint32_t fruTypeNotAvailable = 0xFF;
24 constexpr auto fruTypeSuffix = "fru_type";
25 constexpr auto faultSuffix = "fault";
26 constexpr auto inputSuffix = "input";
27 
28 using namespace phosphor::logging;
29 
30 template <typename T>
31 T readFile(const std::string& path)
32 {
33     std::ifstream ifs;
34     ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit |
35                    std::ifstream::eofbit);
36     T data;
37 
38     try
39     {
40         ifs.open(path);
41         ifs >> data;
42         ifs.close();
43     }
44     catch (const std::exception& e)
45     {
46         auto err = errno;
47         throw std::system_error(err, std::generic_category());
48     }
49 
50     return data;
51 }
52 
53 void Manager::findAndCreateObjects()
54 {
55 #ifndef POWER10
56     for (auto id = 0; id < MAX_CPUS; ++id)
57     {
58         // Create one occ per cpu
59         auto occ = std::string(OCC_NAME) + std::to_string(id);
60         createObjects(occ);
61     }
62 #else
63     // Create the OCCs based on on the /dev/occX devices
64     auto occs = findOCCsInDev();
65 
66     if (occs.empty() || (prevOCCSearch.size() != occs.size()))
67     {
68         // Something changed or no OCCs yet, try again in 10s.
69         // Note on the first pass prevOCCSearch will be empty,
70         // so there will be at least one delay to give things
71         // a chance to settle.
72         prevOCCSearch = occs;
73 
74         using namespace std::literals::chrono_literals;
75         discoverTimer->restartOnce(10s);
76     }
77     else
78     {
79         discoverTimer.reset();
80 
81         // createObjects requires OCC0 first.
82         std::sort(occs.begin(), occs.end());
83 
84         for (auto id : occs)
85         {
86             createObjects(std::string(OCC_NAME) + std::to_string(id));
87         }
88     }
89 #endif
90 }
91 
92 std::vector<int> Manager::findOCCsInDev()
93 {
94     std::vector<int> occs;
95     std::regex expr{R"(occ(\d+)$)"};
96 
97     for (auto& file : fs::directory_iterator("/dev"))
98     {
99         std::smatch match;
100         std::string path{file.path().string()};
101         if (std::regex_search(path, match, expr))
102         {
103             auto num = std::stoi(match[1].str());
104 
105             // /dev numbering starts at 1, ours starts at 0.
106             occs.push_back(num - 1);
107         }
108     }
109 
110     return occs;
111 }
112 
113 int Manager::cpuCreated(sdbusplus::message::message& msg)
114 {
115     namespace fs = std::experimental::filesystem;
116 
117     sdbusplus::message::object_path o;
118     msg.read(o);
119     fs::path cpuPath(std::string(std::move(o)));
120 
121     auto name = cpuPath.filename().string();
122     auto index = name.find(CPU_NAME);
123     name.replace(index, std::strlen(CPU_NAME), OCC_NAME);
124 
125     createObjects(name);
126 
127     return 0;
128 }
129 
130 void Manager::createObjects(const std::string& occ)
131 {
132     auto path = fs::path(OCC_CONTROL_ROOT) / occ;
133 
134     passThroughObjects.emplace_back(
135         std::make_unique<PassThrough>(path.c_str()));
136 
137     statusObjects.emplace_back(std::make_unique<Status>(
138         event, path.c_str(), *this,
139         std::bind(std::mem_fn(&Manager::statusCallBack), this,
140                   std::placeholders::_1)
141 #ifdef PLDM
142             ,
143         std::bind(std::mem_fn(&pldm::Interface::resetOCC), pldmHandle.get(),
144                   std::placeholders::_1)
145 #endif
146             ));
147 
148     // Create the power cap monitor object for master occ (0)
149     if (!pcap)
150     {
151         pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
152             *statusObjects.front());
153     }
154 
155 #ifdef POWER10
156     // Create the power mode monitor object for master occ (0)
157     if (!pmode)
158     {
159         pmode = std::make_unique<open_power::occ::powermode::PowerMode>(
160             *statusObjects.front());
161     }
162 #endif
163 }
164 
165 void Manager::statusCallBack(bool status)
166 {
167     using InternalFailure =
168         sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
169 
170     // At this time, it won't happen but keeping it
171     // here just in case something changes in the future
172     if ((activeCount == 0) && (!status))
173     {
174         log<level::ERR>("Invalid update on OCCActive");
175         elog<InternalFailure>();
176     }
177 
178     activeCount += status ? 1 : -1;
179 
180     // Only start presence detection if all the OCCs are bound
181     if (activeCount == statusObjects.size())
182     {
183         for (auto& obj : statusObjects)
184         {
185             obj->addPresenceWatchMaster();
186         }
187     }
188 
189     if ((!_pollTimer->isEnabled()) && (activeCount > 0))
190     {
191         log<level::INFO>(
192             fmt::format(
193                 "Manager::statusCallBack(): {} OCCs will be polled every {} seconds",
194                 activeCount, pollInterval)
195                 .c_str());
196 
197         // Send poll and start OCC poll timer
198         pollerTimerExpired();
199     }
200     else if ((_pollTimer->isEnabled()) && (activeCount == 0))
201     {
202         // Stop OCC poll timer
203         log<level::INFO>(
204             "Manager::statusCallBack(): OCCs are not running, stopping poll timer");
205         _pollTimer->setEnabled(false);
206 
207 #ifdef READ_OCC_SENSORS
208         for (auto& obj : statusObjects)
209         {
210             setSensorValueToNaN(obj->getOccInstanceID());
211         }
212 #endif
213     }
214 }
215 
216 #ifdef I2C_OCC
217 void Manager::initStatusObjects()
218 {
219     // Make sure we have a valid path string
220     static_assert(sizeof(DEV_PATH) != 0);
221 
222     auto deviceNames = i2c_occ::getOccHwmonDevices(DEV_PATH);
223     auto occMasterName = deviceNames.front();
224     for (auto& name : deviceNames)
225     {
226         i2c_occ::i2cToDbus(name);
227         name = std::string(OCC_NAME) + '_' + name;
228         auto path = fs::path(OCC_CONTROL_ROOT) / name;
229         statusObjects.emplace_back(
230             std::make_unique<Status>(event, path.c_str(), *this));
231     }
232     // The first device is master occ
233     pcap = std::make_unique<open_power::occ::powercap::PowerCap>(
234         *statusObjects.front(), occMasterName);
235 #ifdef POWER10
236     pmode = std::make_unique<open_power::occ::powermode::PowerMode>(
237         *statusObjects.front());
238 #endif
239 }
240 #endif
241 
242 #ifdef PLDM
243 bool Manager::updateOCCActive(instanceID instance, bool status)
244 {
245     return (statusObjects[instance])->occActive(status);
246 }
247 #endif
248 
249 void Manager::pollerTimerExpired()
250 {
251     if (activeCount == 0)
252     {
253         // No OCCs running, so poll timer will not be restarted
254         log<level::INFO>(
255             "Manager::pollerTimerExpire(): No OCCs running, poll timer not restarted");
256     }
257 
258     if (!_pollTimer)
259     {
260         log<level::ERR>(
261             "Manager::pollerTimerExpired() ERROR: Timer not defined");
262         return;
263     }
264 
265     for (auto& obj : statusObjects)
266     {
267         // Read sysfs to force kernel to poll OCC
268         obj->readOccState();
269 
270 #ifdef READ_OCC_SENSORS
271         // Read occ sensor values
272         auto id = obj->getOccInstanceID();
273         if (!obj->occActive())
274         {
275             // Occ not activated
276             setSensorValueToNaN(id);
277             continue;
278         }
279         getSensorValues(id, obj->isMasterOcc());
280 #endif
281     }
282 
283     // Restart OCC poll timer
284     _pollTimer->restartOnce(std::chrono::seconds(pollInterval));
285 }
286 
287 #ifdef READ_OCC_SENSORS
288 void Manager::readTempSensors(const fs::path& path, uint32_t id)
289 {
290     std::regex expr{"temp\\d+_label$"}; // Example: temp5_label
291     for (auto& file : fs::directory_iterator(path))
292     {
293         if (!std::regex_search(file.path().string(), expr))
294         {
295             continue;
296         }
297 
298         uint32_t labelValue{0};
299 
300         try
301         {
302             labelValue = readFile<uint32_t>(file.path());
303         }
304         catch (const std::system_error& e)
305         {
306             log<level::DEBUG>(
307                 fmt::format("readTempSensors: Failed reading {}, errno = {}",
308                             file.path().string(), e.code().value())
309                     .c_str());
310             continue;
311         }
312 
313         const std::string& tempLabel = "label";
314         const std::string filePathString = file.path().string().substr(
315             0, file.path().string().length() - tempLabel.length());
316 
317         uint32_t fruTypeValue{0};
318         try
319         {
320             fruTypeValue = readFile<uint32_t>(filePathString + fruTypeSuffix);
321         }
322         catch (const std::system_error& e)
323         {
324             log<level::DEBUG>(
325                 fmt::format("readTempSensors: Failed reading {}, errno = {}",
326                             filePathString + fruTypeSuffix, e.code().value())
327                     .c_str());
328             continue;
329         }
330 
331         std::string sensorPath =
332             OCC_SENSORS_ROOT + std::string("/temperature/");
333 
334         if (fruTypeValue == VRMVdd)
335         {
336             sensorPath.append("vrm_vdd" + std::to_string(id) + "_temp");
337         }
338         else
339         {
340             uint16_t type = (labelValue & 0xFF000000) >> 24;
341             uint16_t instanceID = labelValue & 0x0000FFFF;
342 
343             if (type == OCC_DIMM_TEMP_SENSOR_TYPE)
344             {
345                 if (fruTypeValue == fruTypeNotAvailable)
346                 {
347                     // Not all DIMM related temps are available to read
348                     // (no _input file in this case)
349                     continue;
350                 }
351                 auto iter = dimmTempSensorName.find(fruTypeValue);
352                 if (iter == dimmTempSensorName.end())
353                 {
354                     log<level::ERR>(
355                         fmt::format(
356                             "readTempSensors: Fru type error! fruTypeValue = {}) ",
357                             fruTypeValue)
358                             .c_str());
359                     continue;
360                 }
361 
362                 sensorPath.append("dimm" + std::to_string(instanceID) +
363                                   iter->second);
364             }
365             else if (type == OCC_CPU_TEMP_SENSOR_TYPE)
366             {
367                 if (fruTypeValue != processorCore)
368                 {
369                     // TODO: support IO ring temp
370                     continue;
371                 }
372 
373                 sensorPath.append("proc" + std::to_string(id) + "_core" +
374                                   std::to_string(instanceID) + "_temp");
375             }
376             else
377             {
378                 continue;
379             }
380         }
381 
382         uint32_t faultValue{0};
383         try
384         {
385             faultValue = readFile<uint32_t>(filePathString + faultSuffix);
386         }
387         catch (const std::system_error& e)
388         {
389             log<level::DEBUG>(
390                 fmt::format("readTempSensors: Failed reading {}, errno = {}",
391                             filePathString + faultSuffix, e.code().value())
392                     .c_str());
393             continue;
394         }
395 
396         if (faultValue != 0)
397         {
398             open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
399                 sensorPath, std::numeric_limits<double>::quiet_NaN());
400 
401             open_power::occ::dbus::OccDBusSensors::getOccDBus()
402                 .setOperationalStatus(sensorPath, false);
403 
404             continue;
405         }
406 
407         double tempValue{0};
408 
409         try
410         {
411             tempValue = readFile<double>(filePathString + inputSuffix);
412         }
413         catch (const std::system_error& e)
414         {
415             log<level::DEBUG>(
416                 fmt::format("readTempSensors: Failed reading {}, errno = {}",
417                             filePathString + inputSuffix, e.code().value())
418                     .c_str());
419             continue;
420         }
421 
422         open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
423             sensorPath, tempValue * std::pow(10, -3));
424 
425         open_power::occ::dbus::OccDBusSensors::getOccDBus()
426             .setOperationalStatus(sensorPath, true);
427 
428         existingSensors[sensorPath] = id;
429     }
430     return;
431 }
432 
433 std::optional<std::string>
434     Manager::getPowerLabelFunctionID(const std::string& value)
435 {
436     // If the value is "system", then the FunctionID is "system".
437     if (value == "system")
438     {
439         return value;
440     }
441 
442     // If the value is not "system", then the label value have 3 numbers, of
443     // which we only care about the middle one:
444     // <sensor id>_<function id>_<apss channel>
445     // eg: The value is "0_10_5" , then the FunctionID is "10".
446     if (value.find("_") == std::string::npos)
447     {
448         return std::nullopt;
449     }
450 
451     auto powerLabelValue = value.substr((value.find("_") + 1));
452 
453     if (powerLabelValue.find("_") == std::string::npos)
454     {
455         return std::nullopt;
456     }
457 
458     return powerLabelValue.substr(0, powerLabelValue.find("_"));
459 }
460 
461 void Manager::readPowerSensors(const fs::path& path, uint32_t id)
462 {
463     std::regex expr{"power\\d+_label$"}; // Example: power5_label
464     for (auto& file : fs::directory_iterator(path))
465     {
466         if (!std::regex_search(file.path().string(), expr))
467         {
468             continue;
469         }
470 
471         std::string labelValue;
472         try
473         {
474             labelValue = readFile<std::string>(file.path());
475         }
476         catch (const std::system_error& e)
477         {
478             log<level::DEBUG>(
479                 fmt::format("readPowerSensors: Failed reading {}, errno = {}",
480                             file.path().string(), e.code().value())
481                     .c_str());
482             continue;
483         }
484 
485         auto functionID = getPowerLabelFunctionID(labelValue);
486         if (functionID == std::nullopt)
487         {
488             continue;
489         }
490 
491         const std::string& tempLabel = "label";
492         const std::string filePathString = file.path().string().substr(
493             0, file.path().string().length() - tempLabel.length());
494 
495         std::string sensorPath = OCC_SENSORS_ROOT + std::string("/power/");
496 
497         auto iter = powerSensorName.find(*functionID);
498         if (iter == powerSensorName.end())
499         {
500             continue;
501         }
502         sensorPath.append(iter->second);
503 
504         double tempValue{0};
505 
506         try
507         {
508             tempValue = readFile<double>(filePathString + inputSuffix);
509         }
510         catch (const std::system_error& e)
511         {
512             log<level::DEBUG>(
513                 fmt::format("readTempSensors: Failed reading {}, errno = {}",
514                             filePathString + inputSuffix, e.code().value())
515                     .c_str());
516             continue;
517         }
518 
519         open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
520             sensorPath, tempValue * std::pow(10, -3) * std::pow(10, -3));
521 
522         open_power::occ::dbus::OccDBusSensors::getOccDBus()
523             .setOperationalStatus(sensorPath, true);
524 
525         existingSensors[sensorPath] = id;
526     }
527     return;
528 }
529 
530 void Manager::setSensorValueToNaN(uint32_t id)
531 {
532     for (const auto& [sensorPath, occId] : existingSensors)
533     {
534         if (occId == id)
535         {
536             open_power::occ::dbus::OccDBusSensors::getOccDBus().setValue(
537                 sensorPath, std::numeric_limits<double>::quiet_NaN());
538         }
539     }
540     return;
541 }
542 
543 void Manager::getSensorValues(uint32_t id, bool masterOcc)
544 {
545     const auto occ = std::string("occ-hwmon.") + std::to_string(id + 1);
546 
547     fs::path fileName{OCC_HWMON_PATH + occ + "/hwmon/"};
548 
549     // Need to get the hwmonXX directory name, there better only be 1 dir
550     assert(std::distance(fs::directory_iterator(fileName),
551                          fs::directory_iterator{}) == 1);
552     // Now set our path to this full path, including this hwmonXX directory
553     fileName = fs::path(*fs::directory_iterator(fileName));
554 
555     // Read temperature sensors
556     readTempSensors(fileName, id);
557 
558     if (masterOcc)
559     {
560         // Read power sensors
561         readPowerSensors(fileName, id);
562     }
563 
564     return;
565 }
566 #endif
567 
568 } // namespace occ
569 } // namespace open_power
570